Unicode fixes to wxHTML

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/branches/WX_2_4_BRANCH@17774 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Václav Slavík
2002-11-09 01:04:10 +00:00
parent ca351a69c3
commit 0cc3981cd9
7 changed files with 162 additions and 29 deletions

View File

@@ -118,6 +118,11 @@ public:
// empty
virtual bool RestoreState();
// Parses HTML string 'markup' and extracts charset info from <meta> tag
// if present. Returns empty string if the tag is missing.
// For wxHTML's internal use.
static wxString ExtractCharsetInformation(const wxString& markup);
protected:
// DOM structure
void CreateDOMTree();
@@ -242,7 +247,11 @@ public:
wxChar GetEntityChar(const wxString& entity);
// Returns character that represents given Unicode code
#if wxUSE_UNICODE
wxChar GetCharForCode(unsigned code) { return (wxChar)code; }
#else
wxChar GetCharForCode(unsigned code);
#endif
protected:
#if wxUSE_WCHAR_T && !wxUSE_UNICODE

View File

@@ -117,10 +117,12 @@ public:
const wxHtmlLinkInfo& GetLink() const { return m_Link; }
void SetLink(const wxHtmlLinkInfo& link);
#if !wxUSE_UNICODE
void SetInputEncoding(wxFontEncoding enc);
wxFontEncoding GetInputEncoding() const { return m_InputEnc; }
wxFontEncoding GetOutputEncoding() const { return m_OutputEnc; }
wxEncodingConverter *GetEncodingConverter() const { return m_EncConv; }
#endif
// creates font depending on m_Font* members.
virtual wxFont* CreateCurrentFont();
@@ -161,7 +163,9 @@ private:
wxFont* m_FontsTable[2][2][2][2][7];
wxString m_FontsFacesTable[2][2][2][2][7];
#if !wxUSE_UNICODE
wxFontEncoding m_FontsEncTable[2][2][2][2][7];
#endif
// table of loaded fonts. 1st four indexes are 0 or 1, depending on on/off
// state of these flags (from left to right):
// [bold][italic][underlined][fixed_size]
@@ -172,9 +176,11 @@ private:
wxString m_FontFaceFixed, m_FontFaceNormal;
// html font sizes and faces of fixed and proportional fonts
#if !wxUSE_UNICODE
wxFontEncoding m_InputEnc, m_OutputEnc;
// I/O font encodings
wxEncodingConverter *m_EncConv;
#endif
};

View File

@@ -251,7 +251,7 @@ wxHtmlHelpData::~wxHtmlHelpData()
}
// defined in htmlfilt.cpp
void wxPrivate_ReadString(wxString& str, wxInputStream* s);
void wxPrivate_ReadString(wxString& str, wxInputStream* s, wxMBConv& conv);
bool wxHtmlHelpData::LoadMSProject(wxHtmlBookRecord *book, wxFileSystem& fsys, const wxString& indexfile, const wxString& contentsfile)
{
@@ -267,7 +267,7 @@ bool wxHtmlHelpData::LoadMSProject(wxHtmlBookRecord *book, wxFileSystem& fsys, c
if (f)
{
buf.clear();
wxPrivate_ReadString(buf, f->GetStream());
wxPrivate_ReadString(buf, f->GetStream(), wxConvLibc /*FIXME?*/);
delete f;
handler->ReadIn(m_Contents, m_ContentsCnt);
parser.Parse(buf);
@@ -280,7 +280,7 @@ bool wxHtmlHelpData::LoadMSProject(wxHtmlBookRecord *book, wxFileSystem& fsys, c
if (f)
{
buf.clear();
wxPrivate_ReadString(buf, f->GetStream());
wxPrivate_ReadString(buf, f->GetStream(), wxConvLibc /*FIXME?*/);
delete f;
handler->ReadIn(m_Index, m_IndexCnt);
parser.Parse(buf);
@@ -600,7 +600,7 @@ bool wxHtmlHelpData::AddBook(const wxString& book)
wxChar linebuf[300];
wxString tmp;
wxPrivate_ReadString(tmp, s);
wxPrivate_ReadString(tmp, s, wxConvLibc /*FIXME?*/);
lineptr = tmp.c_str();
do
@@ -839,7 +839,8 @@ bool wxSearchEngine::Scan(wxInputStream *stream)
int wrd = wxStrlen(m_Keyword);
bool found = FALSE;
wxString tmp;
wxPrivate_ReadString(tmp, stream);
wxPrivate_ReadString(tmp, stream, wxConvLibc);
// FIXME - use wxHtmlFilters instead of wxPrivate_ReadString !!!!!!
int lng = tmp.length();
const wxChar *buf = tmp.c_str();

View File

@@ -26,11 +26,12 @@
#include "wx/intl.h"
#endif
#include "wx/strconv.h"
#include "wx/html/htmlfilt.h"
#include "wx/html/htmlwin.h"
// utility function: read a wxString from a wxInputStream
void wxPrivate_ReadString(wxString& str, wxInputStream* s)
void wxPrivate_ReadString(wxString& str, wxInputStream* s, wxMBConv& conv)
{
size_t streamSize = s->GetSize();
@@ -45,7 +46,7 @@ void wxPrivate_ReadString(wxString& str, wxInputStream* s)
s->Read(buffer, bufSize);
lastRead = s->LastRead();
buffer[lastRead] = 0;
str.Append( wxString::FromAscii(buffer) ); // TODO: What encoding ?
str.Append(wxString(buffer, conv));
}
while (lastRead == bufSize);
}
@@ -54,7 +55,7 @@ void wxPrivate_ReadString(wxString& str, wxInputStream* s)
char* src = new char[streamSize+1];
s->Read(src, streamSize);
src[streamSize] = 0;
str = wxString::FromAscii( src); // TODO: What encoding ?
str = wxString(src, conv);
delete[] src;
}
}
@@ -87,7 +88,7 @@ wxString wxHtmlFilterPlainText::ReadFile(const wxFSFile& file) const
wxString doc, doc2;
if (s == NULL) return wxEmptyString;
wxPrivate_ReadString(doc, s);
wxPrivate_ReadString(doc, s, wxConvISO8859_1);
doc.Replace(wxT("&"), wxT("&amp;"), TRUE);
doc.Replace(wxT("<"), wxT("&lt;"), TRUE);
@@ -172,15 +173,43 @@ wxString wxHtmlFilterHTML::ReadFile(const wxFSFile& file) const
wxLogError(_("Cannot open HTML document: %s"), file.GetLocation().c_str());
return wxEmptyString;
}
wxPrivate_ReadString(doc, s);
// add meta tag if we obtained this through http:
if (file.GetMimeType().Find(_T("; charset=")) == 0)
// NB: We convert input file to wchar_t here in Unicode mode, based on
// either Content-Type header or <meta> tags. In ANSI mode, we don't
// do it as it is done by wxHtmlParser (for this reason, we add <meta>
// tag if we used Content-Type header).
#if wxUSE_UNICODE
int charsetPos;
if ((charsetPos = file.GetMimeType().Find(_T("; charset="))) != wxNOT_FOUND)
{
wxString s(_T("<meta http-equiv=\"Content-Type\" content=\""));
s << file.GetMimeType() << _T("\">");
return s+doc;
wxString charset = file.GetMimeType().Mid(charsetPos + 10);
wxCSConv conv(charset);
wxPrivate_ReadString(doc, s, conv);
}
else
{
wxString tmpdoc;
wxPrivate_ReadString(tmpdoc, s, wxConvISO8859_1);
wxString charset = wxHtmlParser::ExtractCharsetInformation(tmpdoc);
if (charset.empty())
doc = tmpdoc;
else
{
wxCSConv conv(charset);
doc = wxString(tmpdoc.mb_str(wxConvISO8859_1), conv);
}
}
#else // !wxUSE_UNICODE
wxPrivate_ReadString(doc, s, wxConvLibc);
// add meta tag if we obtained this through http:
if (!file.GetMimeType().empty())
{
wxString hdr;
wxString mime = file.GetMimeType();
hdr.Printf(_T("<meta http-equiv=\"Content-Type\" content=\"%s\">"), mime.c_str());
return hdr+doc;
}
#endif
return doc;
}

View File

@@ -500,11 +500,10 @@ extern "C" int LINKAGEMODE wxHtmlEntityCompare(const void *key, const void *item
return wxStrcmp((wxChar*)key, ((wxHtmlEntityInfo*)item)->name);
}
#if !wxUSE_UNICODE
wxChar wxHtmlEntitiesParser::GetCharForCode(unsigned code)
{
#if wxUSE_UNICODE
return (wxChar)code;
#elif wxUSE_WCHAR_T
#if wxUSE_WCHAR_T
char buf[2];
wchar_t wbuf[2];
wbuf[0] = (wchar_t)code;
@@ -517,6 +516,7 @@ wxChar wxHtmlEntitiesParser::GetCharForCode(unsigned code)
return (code < 256) ? (wxChar)code : '?';
#endif
}
#endif
wxChar wxHtmlEntitiesParser::GetEntityChar(const wxString& entity)
{
@@ -822,4 +822,55 @@ wxFSFile *wxHtmlParser::OpenURL(wxHtmlURLType WXUNUSED(type),
return GetFS()->OpenFile(url);
}
//-----------------------------------------------------------------------------
// wxHtmlParser::ExtractCharsetInformation
//-----------------------------------------------------------------------------
class wxMetaTagParser : public wxHtmlParser
{
public:
wxObject* GetProduct() { return NULL; }
protected:
virtual void AddText(const wxChar* WXUNUSED(txt)) {}
};
class wxMetaTagHandler : public wxHtmlTagHandler
{
public:
wxMetaTagHandler(wxString *retval) : wxHtmlTagHandler(), m_retval(retval) {}
wxString GetSupportedTags() { return wxT("META"); }
bool HandleTag(const wxHtmlTag& tag);
private:
wxString *m_retval;
};
bool wxMetaTagHandler::HandleTag(const wxHtmlTag& tag)
{
if (tag.HasParam(_T("HTTP-EQUIV")) &&
tag.GetParam(_T("HTTP-EQUIV")) == _T("Content-Type") &&
tag.HasParam(_T("CONTENT")))
{
wxString content = tag.GetParam(_T("CONTENT"));
if (content.Left(19) == _T("text/html; charset="))
{
*m_retval = content.Mid(19);
}
}
return FALSE;
}
/*static*/
wxString wxHtmlParser::ExtractCharsetInformation(const wxString& markup)
{
wxString charset;
wxMetaTagParser parser;
parser.AddTagHandler(new wxMetaTagHandler(&charset));
parser.Parse(markup);
return charset;
}
#endif

View File

@@ -270,7 +270,7 @@ void wxHtmlPrintout::SetHtmlText(const wxString& html, const wxString &basepath,
}
// defined in htmlfilt.cpp
void wxPrivate_ReadString(wxString& str, wxInputStream* s);
void wxPrivate_ReadString(wxString& str, wxInputStream* s, wxMBConv& conv);
void wxHtmlPrintout::SetHtmlFile(const wxString& htmlfile)
{
@@ -285,7 +285,7 @@ void wxHtmlPrintout::SetHtmlFile(const wxString& htmlfile)
wxInputStream *st = ff->GetStream();
wxString doc;
wxPrivate_ReadString(doc, st);
wxPrivate_ReadString(doc, st, wxConvLibc /*FIXME -- use wxHtmlFilter!!*/);
delete ff;

View File

@@ -49,9 +49,11 @@ wxHtmlWinParser::wxHtmlWinParser(wxHtmlWindow *wnd) : wxHtmlParser()
m_DC = NULL;
m_CharHeight = m_CharWidth = 0;
m_UseLink = FALSE;
#if !wxUSE_UNICODE
m_EncConv = NULL;
m_InputEnc = wxFONTENCODING_ISO8859_1;
m_OutputEnc = wxFONTENCODING_DEFAULT;
#endif
{
int i, j, k, l, m;
@@ -63,7 +65,9 @@ wxHtmlWinParser::wxHtmlWinParser(wxHtmlWindow *wnd) : wxHtmlParser()
{
m_FontsTable[i][j][k][l][m] = NULL;
m_FontsFacesTable[i][j][k][l][m] = wxEmptyString;
#if !wxUSE_UNICODE
m_FontsEncTable[i][j][k][l][m] = wxFONTENCODING_DEFAULT;
#endif
}
#ifdef __WXMSW__
static int default_sizes[7] = {7, 8, 10, 12, 16, 22, 30};
@@ -98,7 +102,9 @@ wxHtmlWinParser::~wxHtmlWinParser()
if (m_FontsTable[i][j][k][l][m] != NULL)
delete m_FontsTable[i][j][k][l][m];
}
#if !wxUSE_UNICODE
delete m_EncConv;
#endif
delete[] m_tmpStrBuf;
}
@@ -120,7 +126,9 @@ void wxHtmlWinParser::SetFonts(wxString normal_face, wxString fixed_face, const
m_FontFaceFixed = fixed_face;
m_FontFaceNormal = normal_face;
#if !wxUSE_UNICODE
SetInputEncoding(m_InputEnc);
#endif
for (i = 0; i < 2; i++)
for (j = 0; j < 2; j++)
@@ -156,8 +164,18 @@ void wxHtmlWinParser::InitParser(const wxString& source)
m_tmpLastWasSpace = FALSE;
OpenContainer();
OpenContainer();
#if 0 //!wxUSE_UNICODE
wxString charset = ExtractCharsetInformation(source);
if (!charset.empty())
{
wxFontEncoding enc = wxFontMapper::Get()->CharsetToEncoding(charset);
if (enc != wxFONTENCODING_SYSTEM)
SetInputEncoding(enc);
}
#endif
m_Container->InsertCell(new wxHtmlColourCell(m_ActualColor));
m_Container->InsertCell(new wxHtmlFontCell(CreateCurrentFont()));
}
@@ -165,7 +183,9 @@ void wxHtmlWinParser::InitParser(const wxString& source)
void wxHtmlWinParser::DoneParser()
{
m_Container = NULL;
#if !wxUSE_UNICODE
SetInputEncoding(wxFONTENCODING_ISO8859_1); // for next call
#endif
wxHtmlParser::DoneParser();
}
@@ -254,8 +274,10 @@ void wxHtmlWinParser::AddText(const wxChar* txt)
if (templen == 1) continue;
#endif
templen = 0;
#if !wxUSE_UNICODE
if (m_EncConv)
m_EncConv->Convert(temp);
#endif
size_t len = wxStrlen(temp);
for (size_t j = 0; j < len; j++)
if (temp[j] == nbsp)
@@ -271,8 +293,10 @@ void wxHtmlWinParser::AddText(const wxChar* txt)
if (templen && (templen > 1 || temp[0] != wxT(' ')))
{
temp[templen] = 0;
#if !wxUSE_UNICODE
if (m_EncConv)
m_EncConv->Convert(temp);
#endif
size_t len = wxStrlen(temp);
for (size_t j = 0; j < len; j++)
if (temp[j] == nbsp)
@@ -334,9 +358,15 @@ wxFont* wxHtmlWinParser::CreateCurrentFont()
wxString face = ff ? m_FontFaceFixed : m_FontFaceNormal;
wxString *faceptr = &(m_FontsFacesTable[fb][fi][fu][ff][fs]);
wxFont **fontptr = &(m_FontsTable[fb][fi][fu][ff][fs]);
#if !wxUSE_UNICODE
wxFontEncoding *encptr = &(m_FontsEncTable[fb][fi][fu][ff][fs]);
#endif
if (*fontptr != NULL && (*faceptr != face || *encptr != m_OutputEnc))
if (*fontptr != NULL && (*faceptr != face
#if !wxUSE_UNICODE
|| *encptr != m_OutputEnc
#endif
))
{
delete *fontptr;
*fontptr = NULL;
@@ -345,14 +375,18 @@ wxFont* wxHtmlWinParser::CreateCurrentFont()
if (*fontptr == NULL)
{
*faceptr = face;
*encptr = m_OutputEnc;
*fontptr = new wxFont(
(int) (m_FontsSizes[fs] * m_PixelScale),
ff ? wxMODERN : wxSWISS,
fi ? wxITALIC : wxNORMAL,
fb ? wxBOLD : wxNORMAL,
fu ? TRUE : FALSE, face,
m_OutputEnc);
fu ? TRUE : FALSE, face
#if wxUSE_UNICODE
);
#else
, m_OutputEnc);
*encptr = m_OutputEnc;
#endif
}
m_DC->SetFont(**fontptr);
return (*fontptr);
@@ -372,12 +406,15 @@ void wxHtmlWinParser::SetFontFace(const wxString& face)
if (GetFontFixed()) m_FontFaceFixed = face;
else m_FontFaceNormal = face;
#if !wxUSE_UNICODE
if (m_InputEnc != wxFONTENCODING_DEFAULT)
SetInputEncoding(m_InputEnc);
#endif
}
#if !wxUSE_UNICODE
void wxHtmlWinParser::SetInputEncoding(wxFontEncoding enc)
{
m_InputEnc = m_OutputEnc = wxFONTENCODING_DEFAULT;
@@ -435,7 +472,7 @@ void wxHtmlWinParser::SetInputEncoding(wxFontEncoding enc)
m_EncConv = NULL;
}
}
#endif