Unicode fixes to wxHTML

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/branches/WX_2_4_BRANCH@17774 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Václav Slavík
2002-11-09 01:04:10 +00:00
parent ca351a69c3
commit 0cc3981cd9
7 changed files with 162 additions and 29 deletions

View File

@@ -117,6 +117,11 @@ public:
// Restores parser's state from stack or returns FALSE if the stack is // Restores parser's state from stack or returns FALSE if the stack is
// empty // empty
virtual bool RestoreState(); virtual bool RestoreState();
// Parses HTML string 'markup' and extracts charset info from <meta> tag
// if present. Returns empty string if the tag is missing.
// For wxHTML's internal use.
static wxString ExtractCharsetInformation(const wxString& markup);
protected: protected:
// DOM structure // DOM structure
@@ -242,7 +247,11 @@ public:
wxChar GetEntityChar(const wxString& entity); wxChar GetEntityChar(const wxString& entity);
// Returns character that represents given Unicode code // Returns character that represents given Unicode code
#if wxUSE_UNICODE
wxChar GetCharForCode(unsigned code) { return (wxChar)code; }
#else
wxChar GetCharForCode(unsigned code); wxChar GetCharForCode(unsigned code);
#endif
protected: protected:
#if wxUSE_WCHAR_T && !wxUSE_UNICODE #if wxUSE_WCHAR_T && !wxUSE_UNICODE

View File

@@ -117,10 +117,12 @@ public:
const wxHtmlLinkInfo& GetLink() const { return m_Link; } const wxHtmlLinkInfo& GetLink() const { return m_Link; }
void SetLink(const wxHtmlLinkInfo& link); void SetLink(const wxHtmlLinkInfo& link);
#if !wxUSE_UNICODE
void SetInputEncoding(wxFontEncoding enc); void SetInputEncoding(wxFontEncoding enc);
wxFontEncoding GetInputEncoding() const { return m_InputEnc; } wxFontEncoding GetInputEncoding() const { return m_InputEnc; }
wxFontEncoding GetOutputEncoding() const { return m_OutputEnc; } wxFontEncoding GetOutputEncoding() const { return m_OutputEnc; }
wxEncodingConverter *GetEncodingConverter() const { return m_EncConv; } wxEncodingConverter *GetEncodingConverter() const { return m_EncConv; }
#endif
// creates font depending on m_Font* members. // creates font depending on m_Font* members.
virtual wxFont* CreateCurrentFont(); virtual wxFont* CreateCurrentFont();
@@ -161,7 +163,9 @@ private:
wxFont* m_FontsTable[2][2][2][2][7]; wxFont* m_FontsTable[2][2][2][2][7];
wxString m_FontsFacesTable[2][2][2][2][7]; wxString m_FontsFacesTable[2][2][2][2][7];
#if !wxUSE_UNICODE
wxFontEncoding m_FontsEncTable[2][2][2][2][7]; wxFontEncoding m_FontsEncTable[2][2][2][2][7];
#endif
// table of loaded fonts. 1st four indexes are 0 or 1, depending on on/off // table of loaded fonts. 1st four indexes are 0 or 1, depending on on/off
// state of these flags (from left to right): // state of these flags (from left to right):
// [bold][italic][underlined][fixed_size] // [bold][italic][underlined][fixed_size]
@@ -172,9 +176,11 @@ private:
wxString m_FontFaceFixed, m_FontFaceNormal; wxString m_FontFaceFixed, m_FontFaceNormal;
// html font sizes and faces of fixed and proportional fonts // html font sizes and faces of fixed and proportional fonts
#if !wxUSE_UNICODE
wxFontEncoding m_InputEnc, m_OutputEnc; wxFontEncoding m_InputEnc, m_OutputEnc;
// I/O font encodings // I/O font encodings
wxEncodingConverter *m_EncConv; wxEncodingConverter *m_EncConv;
#endif
}; };

View File

@@ -251,7 +251,7 @@ wxHtmlHelpData::~wxHtmlHelpData()
} }
// defined in htmlfilt.cpp // defined in htmlfilt.cpp
void wxPrivate_ReadString(wxString& str, wxInputStream* s); void wxPrivate_ReadString(wxString& str, wxInputStream* s, wxMBConv& conv);
bool wxHtmlHelpData::LoadMSProject(wxHtmlBookRecord *book, wxFileSystem& fsys, const wxString& indexfile, const wxString& contentsfile) bool wxHtmlHelpData::LoadMSProject(wxHtmlBookRecord *book, wxFileSystem& fsys, const wxString& indexfile, const wxString& contentsfile)
{ {
@@ -267,7 +267,7 @@ bool wxHtmlHelpData::LoadMSProject(wxHtmlBookRecord *book, wxFileSystem& fsys, c
if (f) if (f)
{ {
buf.clear(); buf.clear();
wxPrivate_ReadString(buf, f->GetStream()); wxPrivate_ReadString(buf, f->GetStream(), wxConvLibc /*FIXME?*/);
delete f; delete f;
handler->ReadIn(m_Contents, m_ContentsCnt); handler->ReadIn(m_Contents, m_ContentsCnt);
parser.Parse(buf); parser.Parse(buf);
@@ -280,7 +280,7 @@ bool wxHtmlHelpData::LoadMSProject(wxHtmlBookRecord *book, wxFileSystem& fsys, c
if (f) if (f)
{ {
buf.clear(); buf.clear();
wxPrivate_ReadString(buf, f->GetStream()); wxPrivate_ReadString(buf, f->GetStream(), wxConvLibc /*FIXME?*/);
delete f; delete f;
handler->ReadIn(m_Index, m_IndexCnt); handler->ReadIn(m_Index, m_IndexCnt);
parser.Parse(buf); parser.Parse(buf);
@@ -600,7 +600,7 @@ bool wxHtmlHelpData::AddBook(const wxString& book)
wxChar linebuf[300]; wxChar linebuf[300];
wxString tmp; wxString tmp;
wxPrivate_ReadString(tmp, s); wxPrivate_ReadString(tmp, s, wxConvLibc /*FIXME?*/);
lineptr = tmp.c_str(); lineptr = tmp.c_str();
do do
@@ -839,7 +839,8 @@ bool wxSearchEngine::Scan(wxInputStream *stream)
int wrd = wxStrlen(m_Keyword); int wrd = wxStrlen(m_Keyword);
bool found = FALSE; bool found = FALSE;
wxString tmp; wxString tmp;
wxPrivate_ReadString(tmp, stream); wxPrivate_ReadString(tmp, stream, wxConvLibc);
// FIXME - use wxHtmlFilters instead of wxPrivate_ReadString !!!!!!
int lng = tmp.length(); int lng = tmp.length();
const wxChar *buf = tmp.c_str(); const wxChar *buf = tmp.c_str();

View File

@@ -26,15 +26,16 @@
#include "wx/intl.h" #include "wx/intl.h"
#endif #endif
#include "wx/strconv.h"
#include "wx/html/htmlfilt.h" #include "wx/html/htmlfilt.h"
#include "wx/html/htmlwin.h" #include "wx/html/htmlwin.h"
// utility function: read a wxString from a wxInputStream // utility function: read a wxString from a wxInputStream
void wxPrivate_ReadString(wxString& str, wxInputStream* s) void wxPrivate_ReadString(wxString& str, wxInputStream* s, wxMBConv& conv)
{ {
size_t streamSize = s->GetSize(); size_t streamSize = s->GetSize();
if(streamSize == ~(size_t)0) if (streamSize == ~(size_t)0)
{ {
const size_t bufSize = 4095; const size_t bufSize = 4095;
char buffer[bufSize+1]; char buffer[bufSize+1];
@@ -45,17 +46,17 @@ void wxPrivate_ReadString(wxString& str, wxInputStream* s)
s->Read(buffer, bufSize); s->Read(buffer, bufSize);
lastRead = s->LastRead(); lastRead = s->LastRead();
buffer[lastRead] = 0; buffer[lastRead] = 0;
str.Append( wxString::FromAscii(buffer) ); // TODO: What encoding ? str.Append(wxString(buffer, conv));
} }
while(lastRead == bufSize); while (lastRead == bufSize);
} }
else else
{ {
char* src = new char[streamSize+1]; char* src = new char[streamSize+1];
s->Read(src, streamSize); s->Read(src, streamSize);
src[streamSize] = 0; src[streamSize] = 0;
str = wxString::FromAscii( src); // TODO: What encoding ? str = wxString(src, conv);
delete [] src; delete[] src;
} }
} }
@@ -87,7 +88,7 @@ wxString wxHtmlFilterPlainText::ReadFile(const wxFSFile& file) const
wxString doc, doc2; wxString doc, doc2;
if (s == NULL) return wxEmptyString; if (s == NULL) return wxEmptyString;
wxPrivate_ReadString(doc, s); wxPrivate_ReadString(doc, s, wxConvISO8859_1);
doc.Replace(wxT("&"), wxT("&amp;"), TRUE); doc.Replace(wxT("&"), wxT("&amp;"), TRUE);
doc.Replace(wxT("<"), wxT("&lt;"), TRUE); doc.Replace(wxT("<"), wxT("&lt;"), TRUE);
@@ -172,15 +173,43 @@ wxString wxHtmlFilterHTML::ReadFile(const wxFSFile& file) const
wxLogError(_("Cannot open HTML document: %s"), file.GetLocation().c_str()); wxLogError(_("Cannot open HTML document: %s"), file.GetLocation().c_str());
return wxEmptyString; return wxEmptyString;
} }
wxPrivate_ReadString(doc, s);
// add meta tag if we obtained this through http: // NB: We convert input file to wchar_t here in Unicode mode, based on
if (file.GetMimeType().Find(_T("; charset=")) == 0) // either Content-Type header or <meta> tags. In ANSI mode, we don't
// do it as it is done by wxHtmlParser (for this reason, we add <meta>
// tag if we used Content-Type header).
#if wxUSE_UNICODE
int charsetPos;
if ((charsetPos = file.GetMimeType().Find(_T("; charset="))) != wxNOT_FOUND)
{ {
wxString s(_T("<meta http-equiv=\"Content-Type\" content=\"")); wxString charset = file.GetMimeType().Mid(charsetPos + 10);
s << file.GetMimeType() << _T("\">"); wxCSConv conv(charset);
return s+doc; wxPrivate_ReadString(doc, s, conv);
} }
else
{
wxString tmpdoc;
wxPrivate_ReadString(tmpdoc, s, wxConvISO8859_1);
wxString charset = wxHtmlParser::ExtractCharsetInformation(tmpdoc);
if (charset.empty())
doc = tmpdoc;
else
{
wxCSConv conv(charset);
doc = wxString(tmpdoc.mb_str(wxConvISO8859_1), conv);
}
}
#else // !wxUSE_UNICODE
wxPrivate_ReadString(doc, s, wxConvLibc);
// add meta tag if we obtained this through http:
if (!file.GetMimeType().empty())
{
wxString hdr;
wxString mime = file.GetMimeType();
hdr.Printf(_T("<meta http-equiv=\"Content-Type\" content=\"%s\">"), mime.c_str());
return hdr+doc;
}
#endif
return doc; return doc;
} }

View File

@@ -500,11 +500,10 @@ extern "C" int LINKAGEMODE wxHtmlEntityCompare(const void *key, const void *item
return wxStrcmp((wxChar*)key, ((wxHtmlEntityInfo*)item)->name); return wxStrcmp((wxChar*)key, ((wxHtmlEntityInfo*)item)->name);
} }
#if !wxUSE_UNICODE
wxChar wxHtmlEntitiesParser::GetCharForCode(unsigned code) wxChar wxHtmlEntitiesParser::GetCharForCode(unsigned code)
{ {
#if wxUSE_UNICODE #if wxUSE_WCHAR_T
return (wxChar)code;
#elif wxUSE_WCHAR_T
char buf[2]; char buf[2];
wchar_t wbuf[2]; wchar_t wbuf[2];
wbuf[0] = (wchar_t)code; wbuf[0] = (wchar_t)code;
@@ -517,6 +516,7 @@ wxChar wxHtmlEntitiesParser::GetCharForCode(unsigned code)
return (code < 256) ? (wxChar)code : '?'; return (code < 256) ? (wxChar)code : '?';
#endif #endif
} }
#endif
wxChar wxHtmlEntitiesParser::GetEntityChar(const wxString& entity) wxChar wxHtmlEntitiesParser::GetEntityChar(const wxString& entity)
{ {
@@ -822,4 +822,55 @@ wxFSFile *wxHtmlParser::OpenURL(wxHtmlURLType WXUNUSED(type),
return GetFS()->OpenFile(url); return GetFS()->OpenFile(url);
} }
//-----------------------------------------------------------------------------
// wxHtmlParser::ExtractCharsetInformation
//-----------------------------------------------------------------------------
class wxMetaTagParser : public wxHtmlParser
{
public:
wxObject* GetProduct() { return NULL; }
protected:
virtual void AddText(const wxChar* WXUNUSED(txt)) {}
};
class wxMetaTagHandler : public wxHtmlTagHandler
{
public:
wxMetaTagHandler(wxString *retval) : wxHtmlTagHandler(), m_retval(retval) {}
wxString GetSupportedTags() { return wxT("META"); }
bool HandleTag(const wxHtmlTag& tag);
private:
wxString *m_retval;
};
bool wxMetaTagHandler::HandleTag(const wxHtmlTag& tag)
{
if (tag.HasParam(_T("HTTP-EQUIV")) &&
tag.GetParam(_T("HTTP-EQUIV")) == _T("Content-Type") &&
tag.HasParam(_T("CONTENT")))
{
wxString content = tag.GetParam(_T("CONTENT"));
if (content.Left(19) == _T("text/html; charset="))
{
*m_retval = content.Mid(19);
}
}
return FALSE;
}
/*static*/
wxString wxHtmlParser::ExtractCharsetInformation(const wxString& markup)
{
wxString charset;
wxMetaTagParser parser;
parser.AddTagHandler(new wxMetaTagHandler(&charset));
parser.Parse(markup);
return charset;
}
#endif #endif

View File

@@ -270,7 +270,7 @@ void wxHtmlPrintout::SetHtmlText(const wxString& html, const wxString &basepath,
} }
// defined in htmlfilt.cpp // defined in htmlfilt.cpp
void wxPrivate_ReadString(wxString& str, wxInputStream* s); void wxPrivate_ReadString(wxString& str, wxInputStream* s, wxMBConv& conv);
void wxHtmlPrintout::SetHtmlFile(const wxString& htmlfile) void wxHtmlPrintout::SetHtmlFile(const wxString& htmlfile)
{ {
@@ -285,7 +285,7 @@ void wxHtmlPrintout::SetHtmlFile(const wxString& htmlfile)
wxInputStream *st = ff->GetStream(); wxInputStream *st = ff->GetStream();
wxString doc; wxString doc;
wxPrivate_ReadString(doc, st); wxPrivate_ReadString(doc, st, wxConvLibc /*FIXME -- use wxHtmlFilter!!*/);
delete ff; delete ff;

View File

@@ -49,9 +49,11 @@ wxHtmlWinParser::wxHtmlWinParser(wxHtmlWindow *wnd) : wxHtmlParser()
m_DC = NULL; m_DC = NULL;
m_CharHeight = m_CharWidth = 0; m_CharHeight = m_CharWidth = 0;
m_UseLink = FALSE; m_UseLink = FALSE;
#if !wxUSE_UNICODE
m_EncConv = NULL; m_EncConv = NULL;
m_InputEnc = wxFONTENCODING_ISO8859_1; m_InputEnc = wxFONTENCODING_ISO8859_1;
m_OutputEnc = wxFONTENCODING_DEFAULT; m_OutputEnc = wxFONTENCODING_DEFAULT;
#endif
{ {
int i, j, k, l, m; int i, j, k, l, m;
@@ -63,7 +65,9 @@ wxHtmlWinParser::wxHtmlWinParser(wxHtmlWindow *wnd) : wxHtmlParser()
{ {
m_FontsTable[i][j][k][l][m] = NULL; m_FontsTable[i][j][k][l][m] = NULL;
m_FontsFacesTable[i][j][k][l][m] = wxEmptyString; m_FontsFacesTable[i][j][k][l][m] = wxEmptyString;
#if !wxUSE_UNICODE
m_FontsEncTable[i][j][k][l][m] = wxFONTENCODING_DEFAULT; m_FontsEncTable[i][j][k][l][m] = wxFONTENCODING_DEFAULT;
#endif
} }
#ifdef __WXMSW__ #ifdef __WXMSW__
static int default_sizes[7] = {7, 8, 10, 12, 16, 22, 30}; static int default_sizes[7] = {7, 8, 10, 12, 16, 22, 30};
@@ -98,7 +102,9 @@ wxHtmlWinParser::~wxHtmlWinParser()
if (m_FontsTable[i][j][k][l][m] != NULL) if (m_FontsTable[i][j][k][l][m] != NULL)
delete m_FontsTable[i][j][k][l][m]; delete m_FontsTable[i][j][k][l][m];
} }
#if !wxUSE_UNICODE
delete m_EncConv; delete m_EncConv;
#endif
delete[] m_tmpStrBuf; delete[] m_tmpStrBuf;
} }
@@ -120,7 +126,9 @@ void wxHtmlWinParser::SetFonts(wxString normal_face, wxString fixed_face, const
m_FontFaceFixed = fixed_face; m_FontFaceFixed = fixed_face;
m_FontFaceNormal = normal_face; m_FontFaceNormal = normal_face;
#if !wxUSE_UNICODE
SetInputEncoding(m_InputEnc); SetInputEncoding(m_InputEnc);
#endif
for (i = 0; i < 2; i++) for (i = 0; i < 2; i++)
for (j = 0; j < 2; j++) for (j = 0; j < 2; j++)
@@ -156,8 +164,18 @@ void wxHtmlWinParser::InitParser(const wxString& source)
m_tmpLastWasSpace = FALSE; m_tmpLastWasSpace = FALSE;
OpenContainer(); OpenContainer();
OpenContainer(); OpenContainer();
#if 0 //!wxUSE_UNICODE
wxString charset = ExtractCharsetInformation(source);
if (!charset.empty())
{
wxFontEncoding enc = wxFontMapper::Get()->CharsetToEncoding(charset);
if (enc != wxFONTENCODING_SYSTEM)
SetInputEncoding(enc);
}
#endif
m_Container->InsertCell(new wxHtmlColourCell(m_ActualColor)); m_Container->InsertCell(new wxHtmlColourCell(m_ActualColor));
m_Container->InsertCell(new wxHtmlFontCell(CreateCurrentFont())); m_Container->InsertCell(new wxHtmlFontCell(CreateCurrentFont()));
} }
@@ -165,7 +183,9 @@ void wxHtmlWinParser::InitParser(const wxString& source)
void wxHtmlWinParser::DoneParser() void wxHtmlWinParser::DoneParser()
{ {
m_Container = NULL; m_Container = NULL;
#if !wxUSE_UNICODE
SetInputEncoding(wxFONTENCODING_ISO8859_1); // for next call SetInputEncoding(wxFONTENCODING_ISO8859_1); // for next call
#endif
wxHtmlParser::DoneParser(); wxHtmlParser::DoneParser();
} }
@@ -254,8 +274,10 @@ void wxHtmlWinParser::AddText(const wxChar* txt)
if (templen == 1) continue; if (templen == 1) continue;
#endif #endif
templen = 0; templen = 0;
#if !wxUSE_UNICODE
if (m_EncConv) if (m_EncConv)
m_EncConv->Convert(temp); m_EncConv->Convert(temp);
#endif
size_t len = wxStrlen(temp); size_t len = wxStrlen(temp);
for (size_t j = 0; j < len; j++) for (size_t j = 0; j < len; j++)
if (temp[j] == nbsp) if (temp[j] == nbsp)
@@ -271,8 +293,10 @@ void wxHtmlWinParser::AddText(const wxChar* txt)
if (templen && (templen > 1 || temp[0] != wxT(' '))) if (templen && (templen > 1 || temp[0] != wxT(' ')))
{ {
temp[templen] = 0; temp[templen] = 0;
#if !wxUSE_UNICODE
if (m_EncConv) if (m_EncConv)
m_EncConv->Convert(temp); m_EncConv->Convert(temp);
#endif
size_t len = wxStrlen(temp); size_t len = wxStrlen(temp);
for (size_t j = 0; j < len; j++) for (size_t j = 0; j < len; j++)
if (temp[j] == nbsp) if (temp[j] == nbsp)
@@ -334,9 +358,15 @@ wxFont* wxHtmlWinParser::CreateCurrentFont()
wxString face = ff ? m_FontFaceFixed : m_FontFaceNormal; wxString face = ff ? m_FontFaceFixed : m_FontFaceNormal;
wxString *faceptr = &(m_FontsFacesTable[fb][fi][fu][ff][fs]); wxString *faceptr = &(m_FontsFacesTable[fb][fi][fu][ff][fs]);
wxFont **fontptr = &(m_FontsTable[fb][fi][fu][ff][fs]); wxFont **fontptr = &(m_FontsTable[fb][fi][fu][ff][fs]);
#if !wxUSE_UNICODE
wxFontEncoding *encptr = &(m_FontsEncTable[fb][fi][fu][ff][fs]); wxFontEncoding *encptr = &(m_FontsEncTable[fb][fi][fu][ff][fs]);
#endif
if (*fontptr != NULL && (*faceptr != face || *encptr != m_OutputEnc)) if (*fontptr != NULL && (*faceptr != face
#if !wxUSE_UNICODE
|| *encptr != m_OutputEnc
#endif
))
{ {
delete *fontptr; delete *fontptr;
*fontptr = NULL; *fontptr = NULL;
@@ -345,14 +375,18 @@ wxFont* wxHtmlWinParser::CreateCurrentFont()
if (*fontptr == NULL) if (*fontptr == NULL)
{ {
*faceptr = face; *faceptr = face;
*encptr = m_OutputEnc;
*fontptr = new wxFont( *fontptr = new wxFont(
(int) (m_FontsSizes[fs] * m_PixelScale), (int) (m_FontsSizes[fs] * m_PixelScale),
ff ? wxMODERN : wxSWISS, ff ? wxMODERN : wxSWISS,
fi ? wxITALIC : wxNORMAL, fi ? wxITALIC : wxNORMAL,
fb ? wxBOLD : wxNORMAL, fb ? wxBOLD : wxNORMAL,
fu ? TRUE : FALSE, face, fu ? TRUE : FALSE, face
m_OutputEnc); #if wxUSE_UNICODE
);
#else
, m_OutputEnc);
*encptr = m_OutputEnc;
#endif
} }
m_DC->SetFont(**fontptr); m_DC->SetFont(**fontptr);
return (*fontptr); return (*fontptr);
@@ -372,12 +406,15 @@ void wxHtmlWinParser::SetFontFace(const wxString& face)
if (GetFontFixed()) m_FontFaceFixed = face; if (GetFontFixed()) m_FontFaceFixed = face;
else m_FontFaceNormal = face; else m_FontFaceNormal = face;
#if !wxUSE_UNICODE
if (m_InputEnc != wxFONTENCODING_DEFAULT) if (m_InputEnc != wxFONTENCODING_DEFAULT)
SetInputEncoding(m_InputEnc); SetInputEncoding(m_InputEnc);
#endif
} }
#if !wxUSE_UNICODE
void wxHtmlWinParser::SetInputEncoding(wxFontEncoding enc) void wxHtmlWinParser::SetInputEncoding(wxFontEncoding enc)
{ {
m_InputEnc = m_OutputEnc = wxFONTENCODING_DEFAULT; m_InputEnc = m_OutputEnc = wxFONTENCODING_DEFAULT;
@@ -435,7 +472,7 @@ void wxHtmlWinParser::SetInputEncoding(wxFontEncoding enc)
m_EncConv = NULL; m_EncConv = NULL;
} }
} }
#endif