new HTML tags parser and entities substitution code

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@10744 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Václav Slavík
2001-07-01 15:09:35 +00:00
parent ec7c3e898a
commit daa616fca0
6 changed files with 619 additions and 479 deletions

View File

@@ -21,24 +21,21 @@
#include "wx/html/htmltag.h"
#include "wx/filesys.h"
class wxHtmlParser;
class wxHtmlTagHandler;
//--------------------------------------------------------------------------------
// wxHtmlParser
// This class handles generic parsing of HTML document : it scans
// the document and divide it into blocks of tags (where one block
// consists of starting and ending tag and of text between these
// 2 tags.
//--------------------------------------------------------------------------------
class WXDLLEXPORT wxMBConv;
class WXDLLEXPORT wxHtmlParser;
class WXDLLEXPORT wxHtmlTagHandler;
class WXDLLEXPORT wxHtmlEntitiesParser;
// This class handles generic parsing of HTML document : it scans
// the document and divide it into blocks of tags (where one block
// consists of starting and ending tag and of text between these
// 2 tags.
class WXDLLEXPORT wxHtmlParser : public wxObject
{
DECLARE_ABSTRACT_CLASS(wxHtmlParser)
public:
wxHtmlParser() : wxObject(), m_HandlersHash(wxKEY_STRING)
{ m_FS = NULL; m_Cache = NULL; m_HandlersStack = NULL; }
wxHtmlParser();
virtual ~wxHtmlParser();
// Sets the class which will be used for opening files
@@ -106,6 +103,9 @@ protected:
// ignored if no hander is found.
// Derived class is *responsible* for filling in m_Handlers table.
virtual void AddTag(const wxHtmlTag& tag);
// Returns entity parser object, used to substitute HTML &entities;
wxHtmlEntitiesParser *GetEntitiesParser() const { return m_entitiesParser; }
protected:
// source being parsed
@@ -130,24 +130,20 @@ protected:
wxFileSystem *m_FS;
// handlers stack used by PushTagHandler and PopTagHandler
wxList *m_HandlersStack;
// entity parse
wxHtmlEntitiesParser *m_entitiesParser;
};
//--------------------------------------------------------------------------------
// wxHtmlTagHandler
// This class (and derived classes) cooperates with wxHtmlParser.
// Each recognized tag is passed to handler which is capable
// of handling it. Each tag is handled in 3 steps:
// 1. Handler will modifies state of parser
// (using it's public methods)
// 2. Parser parses source between starting and ending tag
// 3. Handler restores original state of the parser
//--------------------------------------------------------------------------------
// This class (and derived classes) cooperates with wxHtmlParser.
// Each recognized tag is passed to handler which is capable
// of handling it. Each tag is handled in 3 steps:
// 1. Handler will modifies state of parser
// (using it's public methods)
// 2. Parser parses source between starting and ending tag
// 3. Handler restores original state of the parser
class WXDLLEXPORT wxHtmlTagHandler : public wxObject
{
DECLARE_ABSTRACT_CLASS(wxHtmlTagHandler)
@@ -184,6 +180,33 @@ protected:
};
// This class is used to parse HTML entities in strings. It can handle
// both named entities and &#xxxx entries where xxxx is Unicode code.
class WXDLLEXPORT wxHtmlEntitiesParser : public wxObject
{
DECLARE_DYNAMIC_CLASS(wxHtmlEntitiesParser)
public:
wxHtmlEntitiesParser();
virtual ~wxHtmlEntitiesParser();
// Sets encoding of output string.
// Has no effect if wxUSE_WCHAR_T==0 or wxUSE_UNICODE==1
void SetEncoding(wxFontEncoding encoding);
// Parses entities in input and replaces them with respective characters
// (with respect to output encoding)
wxString Parse(const wxString& input);
protected:
wxChar GetEntityChar(const wxString& entity);
wxChar GetCharForCode(unsigned code);
#if wxUSE_WCHAR_T && !wxUSE_UNICODE
wxMBConv *m_conv;
wxFontEncoding m_encoding;
#endif
};
#endif