Unicode fixes to wxHTML
git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/branches/WX_2_4_BRANCH@17774 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
@@ -118,6 +118,11 @@ public:
|
||||
// empty
|
||||
virtual bool RestoreState();
|
||||
|
||||
// Parses HTML string 'markup' and extracts charset info from <meta> tag
|
||||
// if present. Returns empty string if the tag is missing.
|
||||
// For wxHTML's internal use.
|
||||
static wxString ExtractCharsetInformation(const wxString& markup);
|
||||
|
||||
protected:
|
||||
// DOM structure
|
||||
void CreateDOMTree();
|
||||
@@ -242,7 +247,11 @@ public:
|
||||
wxChar GetEntityChar(const wxString& entity);
|
||||
|
||||
// Returns character that represents given Unicode code
|
||||
#if wxUSE_UNICODE
|
||||
wxChar GetCharForCode(unsigned code) { return (wxChar)code; }
|
||||
#else
|
||||
wxChar GetCharForCode(unsigned code);
|
||||
#endif
|
||||
|
||||
protected:
|
||||
#if wxUSE_WCHAR_T && !wxUSE_UNICODE
|
||||
|
@@ -117,10 +117,12 @@ public:
|
||||
const wxHtmlLinkInfo& GetLink() const { return m_Link; }
|
||||
void SetLink(const wxHtmlLinkInfo& link);
|
||||
|
||||
#if !wxUSE_UNICODE
|
||||
void SetInputEncoding(wxFontEncoding enc);
|
||||
wxFontEncoding GetInputEncoding() const { return m_InputEnc; }
|
||||
wxFontEncoding GetOutputEncoding() const { return m_OutputEnc; }
|
||||
wxEncodingConverter *GetEncodingConverter() const { return m_EncConv; }
|
||||
#endif
|
||||
|
||||
// creates font depending on m_Font* members.
|
||||
virtual wxFont* CreateCurrentFont();
|
||||
@@ -161,7 +163,9 @@ private:
|
||||
|
||||
wxFont* m_FontsTable[2][2][2][2][7];
|
||||
wxString m_FontsFacesTable[2][2][2][2][7];
|
||||
#if !wxUSE_UNICODE
|
||||
wxFontEncoding m_FontsEncTable[2][2][2][2][7];
|
||||
#endif
|
||||
// table of loaded fonts. 1st four indexes are 0 or 1, depending on on/off
|
||||
// state of these flags (from left to right):
|
||||
// [bold][italic][underlined][fixed_size]
|
||||
@@ -172,9 +176,11 @@ private:
|
||||
wxString m_FontFaceFixed, m_FontFaceNormal;
|
||||
// html font sizes and faces of fixed and proportional fonts
|
||||
|
||||
#if !wxUSE_UNICODE
|
||||
wxFontEncoding m_InputEnc, m_OutputEnc;
|
||||
// I/O font encodings
|
||||
wxEncodingConverter *m_EncConv;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
|
@@ -251,7 +251,7 @@ wxHtmlHelpData::~wxHtmlHelpData()
|
||||
}
|
||||
|
||||
// defined in htmlfilt.cpp
|
||||
void wxPrivate_ReadString(wxString& str, wxInputStream* s);
|
||||
void wxPrivate_ReadString(wxString& str, wxInputStream* s, wxMBConv& conv);
|
||||
|
||||
bool wxHtmlHelpData::LoadMSProject(wxHtmlBookRecord *book, wxFileSystem& fsys, const wxString& indexfile, const wxString& contentsfile)
|
||||
{
|
||||
@@ -267,7 +267,7 @@ bool wxHtmlHelpData::LoadMSProject(wxHtmlBookRecord *book, wxFileSystem& fsys, c
|
||||
if (f)
|
||||
{
|
||||
buf.clear();
|
||||
wxPrivate_ReadString(buf, f->GetStream());
|
||||
wxPrivate_ReadString(buf, f->GetStream(), wxConvLibc /*FIXME?*/);
|
||||
delete f;
|
||||
handler->ReadIn(m_Contents, m_ContentsCnt);
|
||||
parser.Parse(buf);
|
||||
@@ -280,7 +280,7 @@ bool wxHtmlHelpData::LoadMSProject(wxHtmlBookRecord *book, wxFileSystem& fsys, c
|
||||
if (f)
|
||||
{
|
||||
buf.clear();
|
||||
wxPrivate_ReadString(buf, f->GetStream());
|
||||
wxPrivate_ReadString(buf, f->GetStream(), wxConvLibc /*FIXME?*/);
|
||||
delete f;
|
||||
handler->ReadIn(m_Index, m_IndexCnt);
|
||||
parser.Parse(buf);
|
||||
@@ -600,7 +600,7 @@ bool wxHtmlHelpData::AddBook(const wxString& book)
|
||||
wxChar linebuf[300];
|
||||
wxString tmp;
|
||||
|
||||
wxPrivate_ReadString(tmp, s);
|
||||
wxPrivate_ReadString(tmp, s, wxConvLibc /*FIXME?*/);
|
||||
lineptr = tmp.c_str();
|
||||
|
||||
do
|
||||
@@ -839,7 +839,8 @@ bool wxSearchEngine::Scan(wxInputStream *stream)
|
||||
int wrd = wxStrlen(m_Keyword);
|
||||
bool found = FALSE;
|
||||
wxString tmp;
|
||||
wxPrivate_ReadString(tmp, stream);
|
||||
wxPrivate_ReadString(tmp, stream, wxConvLibc);
|
||||
// FIXME - use wxHtmlFilters instead of wxPrivate_ReadString !!!!!!
|
||||
int lng = tmp.length();
|
||||
const wxChar *buf = tmp.c_str();
|
||||
|
||||
|
@@ -26,11 +26,12 @@
|
||||
#include "wx/intl.h"
|
||||
#endif
|
||||
|
||||
#include "wx/strconv.h"
|
||||
#include "wx/html/htmlfilt.h"
|
||||
#include "wx/html/htmlwin.h"
|
||||
|
||||
// utility function: read a wxString from a wxInputStream
|
||||
void wxPrivate_ReadString(wxString& str, wxInputStream* s)
|
||||
void wxPrivate_ReadString(wxString& str, wxInputStream* s, wxMBConv& conv)
|
||||
{
|
||||
size_t streamSize = s->GetSize();
|
||||
|
||||
@@ -45,7 +46,7 @@ void wxPrivate_ReadString(wxString& str, wxInputStream* s)
|
||||
s->Read(buffer, bufSize);
|
||||
lastRead = s->LastRead();
|
||||
buffer[lastRead] = 0;
|
||||
str.Append( wxString::FromAscii(buffer) ); // TODO: What encoding ?
|
||||
str.Append(wxString(buffer, conv));
|
||||
}
|
||||
while (lastRead == bufSize);
|
||||
}
|
||||
@@ -54,7 +55,7 @@ void wxPrivate_ReadString(wxString& str, wxInputStream* s)
|
||||
char* src = new char[streamSize+1];
|
||||
s->Read(src, streamSize);
|
||||
src[streamSize] = 0;
|
||||
str = wxString::FromAscii( src); // TODO: What encoding ?
|
||||
str = wxString(src, conv);
|
||||
delete[] src;
|
||||
}
|
||||
}
|
||||
@@ -87,7 +88,7 @@ wxString wxHtmlFilterPlainText::ReadFile(const wxFSFile& file) const
|
||||
wxString doc, doc2;
|
||||
|
||||
if (s == NULL) return wxEmptyString;
|
||||
wxPrivate_ReadString(doc, s);
|
||||
wxPrivate_ReadString(doc, s, wxConvISO8859_1);
|
||||
|
||||
doc.Replace(wxT("&"), wxT("&"), TRUE);
|
||||
doc.Replace(wxT("<"), wxT("<"), TRUE);
|
||||
@@ -172,15 +173,43 @@ wxString wxHtmlFilterHTML::ReadFile(const wxFSFile& file) const
|
||||
wxLogError(_("Cannot open HTML document: %s"), file.GetLocation().c_str());
|
||||
return wxEmptyString;
|
||||
}
|
||||
wxPrivate_ReadString(doc, s);
|
||||
|
||||
// add meta tag if we obtained this through http:
|
||||
if (file.GetMimeType().Find(_T("; charset=")) == 0)
|
||||
// NB: We convert input file to wchar_t here in Unicode mode, based on
|
||||
// either Content-Type header or <meta> tags. In ANSI mode, we don't
|
||||
// do it as it is done by wxHtmlParser (for this reason, we add <meta>
|
||||
// tag if we used Content-Type header).
|
||||
#if wxUSE_UNICODE
|
||||
int charsetPos;
|
||||
if ((charsetPos = file.GetMimeType().Find(_T("; charset="))) != wxNOT_FOUND)
|
||||
{
|
||||
wxString s(_T("<meta http-equiv=\"Content-Type\" content=\""));
|
||||
s << file.GetMimeType() << _T("\">");
|
||||
return s+doc;
|
||||
wxString charset = file.GetMimeType().Mid(charsetPos + 10);
|
||||
wxCSConv conv(charset);
|
||||
wxPrivate_ReadString(doc, s, conv);
|
||||
}
|
||||
else
|
||||
{
|
||||
wxString tmpdoc;
|
||||
wxPrivate_ReadString(tmpdoc, s, wxConvISO8859_1);
|
||||
wxString charset = wxHtmlParser::ExtractCharsetInformation(tmpdoc);
|
||||
if (charset.empty())
|
||||
doc = tmpdoc;
|
||||
else
|
||||
{
|
||||
wxCSConv conv(charset);
|
||||
doc = wxString(tmpdoc.mb_str(wxConvISO8859_1), conv);
|
||||
}
|
||||
}
|
||||
#else // !wxUSE_UNICODE
|
||||
wxPrivate_ReadString(doc, s, wxConvLibc);
|
||||
// add meta tag if we obtained this through http:
|
||||
if (!file.GetMimeType().empty())
|
||||
{
|
||||
wxString hdr;
|
||||
wxString mime = file.GetMimeType();
|
||||
hdr.Printf(_T("<meta http-equiv=\"Content-Type\" content=\"%s\">"), mime.c_str());
|
||||
return hdr+doc;
|
||||
}
|
||||
#endif
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
@@ -500,11 +500,10 @@ extern "C" int LINKAGEMODE wxHtmlEntityCompare(const void *key, const void *item
|
||||
return wxStrcmp((wxChar*)key, ((wxHtmlEntityInfo*)item)->name);
|
||||
}
|
||||
|
||||
#if !wxUSE_UNICODE
|
||||
wxChar wxHtmlEntitiesParser::GetCharForCode(unsigned code)
|
||||
{
|
||||
#if wxUSE_UNICODE
|
||||
return (wxChar)code;
|
||||
#elif wxUSE_WCHAR_T
|
||||
#if wxUSE_WCHAR_T
|
||||
char buf[2];
|
||||
wchar_t wbuf[2];
|
||||
wbuf[0] = (wchar_t)code;
|
||||
@@ -517,6 +516,7 @@ wxChar wxHtmlEntitiesParser::GetCharForCode(unsigned code)
|
||||
return (code < 256) ? (wxChar)code : '?';
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
wxChar wxHtmlEntitiesParser::GetEntityChar(const wxString& entity)
|
||||
{
|
||||
@@ -822,4 +822,55 @@ wxFSFile *wxHtmlParser::OpenURL(wxHtmlURLType WXUNUSED(type),
|
||||
return GetFS()->OpenFile(url);
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// wxHtmlParser::ExtractCharsetInformation
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
class wxMetaTagParser : public wxHtmlParser
|
||||
{
|
||||
public:
|
||||
wxObject* GetProduct() { return NULL; }
|
||||
protected:
|
||||
virtual void AddText(const wxChar* WXUNUSED(txt)) {}
|
||||
};
|
||||
|
||||
class wxMetaTagHandler : public wxHtmlTagHandler
|
||||
{
|
||||
public:
|
||||
wxMetaTagHandler(wxString *retval) : wxHtmlTagHandler(), m_retval(retval) {}
|
||||
wxString GetSupportedTags() { return wxT("META"); }
|
||||
bool HandleTag(const wxHtmlTag& tag);
|
||||
|
||||
private:
|
||||
wxString *m_retval;
|
||||
};
|
||||
|
||||
bool wxMetaTagHandler::HandleTag(const wxHtmlTag& tag)
|
||||
{
|
||||
if (tag.HasParam(_T("HTTP-EQUIV")) &&
|
||||
tag.GetParam(_T("HTTP-EQUIV")) == _T("Content-Type") &&
|
||||
tag.HasParam(_T("CONTENT")))
|
||||
{
|
||||
wxString content = tag.GetParam(_T("CONTENT"));
|
||||
if (content.Left(19) == _T("text/html; charset="))
|
||||
{
|
||||
*m_retval = content.Mid(19);
|
||||
}
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
/*static*/
|
||||
wxString wxHtmlParser::ExtractCharsetInformation(const wxString& markup)
|
||||
{
|
||||
wxString charset;
|
||||
wxMetaTagParser parser;
|
||||
parser.AddTagHandler(new wxMetaTagHandler(&charset));
|
||||
parser.Parse(markup);
|
||||
return charset;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
@@ -270,7 +270,7 @@ void wxHtmlPrintout::SetHtmlText(const wxString& html, const wxString &basepath,
|
||||
}
|
||||
|
||||
// defined in htmlfilt.cpp
|
||||
void wxPrivate_ReadString(wxString& str, wxInputStream* s);
|
||||
void wxPrivate_ReadString(wxString& str, wxInputStream* s, wxMBConv& conv);
|
||||
|
||||
void wxHtmlPrintout::SetHtmlFile(const wxString& htmlfile)
|
||||
{
|
||||
@@ -285,7 +285,7 @@ void wxHtmlPrintout::SetHtmlFile(const wxString& htmlfile)
|
||||
|
||||
wxInputStream *st = ff->GetStream();
|
||||
wxString doc;
|
||||
wxPrivate_ReadString(doc, st);
|
||||
wxPrivate_ReadString(doc, st, wxConvLibc /*FIXME -- use wxHtmlFilter!!*/);
|
||||
|
||||
delete ff;
|
||||
|
||||
|
@@ -49,9 +49,11 @@ wxHtmlWinParser::wxHtmlWinParser(wxHtmlWindow *wnd) : wxHtmlParser()
|
||||
m_DC = NULL;
|
||||
m_CharHeight = m_CharWidth = 0;
|
||||
m_UseLink = FALSE;
|
||||
#if !wxUSE_UNICODE
|
||||
m_EncConv = NULL;
|
||||
m_InputEnc = wxFONTENCODING_ISO8859_1;
|
||||
m_OutputEnc = wxFONTENCODING_DEFAULT;
|
||||
#endif
|
||||
|
||||
{
|
||||
int i, j, k, l, m;
|
||||
@@ -63,7 +65,9 @@ wxHtmlWinParser::wxHtmlWinParser(wxHtmlWindow *wnd) : wxHtmlParser()
|
||||
{
|
||||
m_FontsTable[i][j][k][l][m] = NULL;
|
||||
m_FontsFacesTable[i][j][k][l][m] = wxEmptyString;
|
||||
#if !wxUSE_UNICODE
|
||||
m_FontsEncTable[i][j][k][l][m] = wxFONTENCODING_DEFAULT;
|
||||
#endif
|
||||
}
|
||||
#ifdef __WXMSW__
|
||||
static int default_sizes[7] = {7, 8, 10, 12, 16, 22, 30};
|
||||
@@ -98,7 +102,9 @@ wxHtmlWinParser::~wxHtmlWinParser()
|
||||
if (m_FontsTable[i][j][k][l][m] != NULL)
|
||||
delete m_FontsTable[i][j][k][l][m];
|
||||
}
|
||||
#if !wxUSE_UNICODE
|
||||
delete m_EncConv;
|
||||
#endif
|
||||
delete[] m_tmpStrBuf;
|
||||
}
|
||||
|
||||
@@ -120,7 +126,9 @@ void wxHtmlWinParser::SetFonts(wxString normal_face, wxString fixed_face, const
|
||||
m_FontFaceFixed = fixed_face;
|
||||
m_FontFaceNormal = normal_face;
|
||||
|
||||
#if !wxUSE_UNICODE
|
||||
SetInputEncoding(m_InputEnc);
|
||||
#endif
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
for (j = 0; j < 2; j++)
|
||||
@@ -156,8 +164,18 @@ void wxHtmlWinParser::InitParser(const wxString& source)
|
||||
m_tmpLastWasSpace = FALSE;
|
||||
|
||||
OpenContainer();
|
||||
|
||||
OpenContainer();
|
||||
|
||||
#if 0 //!wxUSE_UNICODE
|
||||
wxString charset = ExtractCharsetInformation(source);
|
||||
if (!charset.empty())
|
||||
{
|
||||
wxFontEncoding enc = wxFontMapper::Get()->CharsetToEncoding(charset);
|
||||
if (enc != wxFONTENCODING_SYSTEM)
|
||||
SetInputEncoding(enc);
|
||||
}
|
||||
#endif
|
||||
|
||||
m_Container->InsertCell(new wxHtmlColourCell(m_ActualColor));
|
||||
m_Container->InsertCell(new wxHtmlFontCell(CreateCurrentFont()));
|
||||
}
|
||||
@@ -165,7 +183,9 @@ void wxHtmlWinParser::InitParser(const wxString& source)
|
||||
void wxHtmlWinParser::DoneParser()
|
||||
{
|
||||
m_Container = NULL;
|
||||
#if !wxUSE_UNICODE
|
||||
SetInputEncoding(wxFONTENCODING_ISO8859_1); // for next call
|
||||
#endif
|
||||
wxHtmlParser::DoneParser();
|
||||
}
|
||||
|
||||
@@ -254,8 +274,10 @@ void wxHtmlWinParser::AddText(const wxChar* txt)
|
||||
if (templen == 1) continue;
|
||||
#endif
|
||||
templen = 0;
|
||||
#if !wxUSE_UNICODE
|
||||
if (m_EncConv)
|
||||
m_EncConv->Convert(temp);
|
||||
#endif
|
||||
size_t len = wxStrlen(temp);
|
||||
for (size_t j = 0; j < len; j++)
|
||||
if (temp[j] == nbsp)
|
||||
@@ -271,8 +293,10 @@ void wxHtmlWinParser::AddText(const wxChar* txt)
|
||||
if (templen && (templen > 1 || temp[0] != wxT(' ')))
|
||||
{
|
||||
temp[templen] = 0;
|
||||
#if !wxUSE_UNICODE
|
||||
if (m_EncConv)
|
||||
m_EncConv->Convert(temp);
|
||||
#endif
|
||||
size_t len = wxStrlen(temp);
|
||||
for (size_t j = 0; j < len; j++)
|
||||
if (temp[j] == nbsp)
|
||||
@@ -334,9 +358,15 @@ wxFont* wxHtmlWinParser::CreateCurrentFont()
|
||||
wxString face = ff ? m_FontFaceFixed : m_FontFaceNormal;
|
||||
wxString *faceptr = &(m_FontsFacesTable[fb][fi][fu][ff][fs]);
|
||||
wxFont **fontptr = &(m_FontsTable[fb][fi][fu][ff][fs]);
|
||||
#if !wxUSE_UNICODE
|
||||
wxFontEncoding *encptr = &(m_FontsEncTable[fb][fi][fu][ff][fs]);
|
||||
#endif
|
||||
|
||||
if (*fontptr != NULL && (*faceptr != face || *encptr != m_OutputEnc))
|
||||
if (*fontptr != NULL && (*faceptr != face
|
||||
#if !wxUSE_UNICODE
|
||||
|| *encptr != m_OutputEnc
|
||||
#endif
|
||||
))
|
||||
{
|
||||
delete *fontptr;
|
||||
*fontptr = NULL;
|
||||
@@ -345,14 +375,18 @@ wxFont* wxHtmlWinParser::CreateCurrentFont()
|
||||
if (*fontptr == NULL)
|
||||
{
|
||||
*faceptr = face;
|
||||
*encptr = m_OutputEnc;
|
||||
*fontptr = new wxFont(
|
||||
(int) (m_FontsSizes[fs] * m_PixelScale),
|
||||
ff ? wxMODERN : wxSWISS,
|
||||
fi ? wxITALIC : wxNORMAL,
|
||||
fb ? wxBOLD : wxNORMAL,
|
||||
fu ? TRUE : FALSE, face,
|
||||
m_OutputEnc);
|
||||
fu ? TRUE : FALSE, face
|
||||
#if wxUSE_UNICODE
|
||||
);
|
||||
#else
|
||||
, m_OutputEnc);
|
||||
*encptr = m_OutputEnc;
|
||||
#endif
|
||||
}
|
||||
m_DC->SetFont(**fontptr);
|
||||
return (*fontptr);
|
||||
@@ -372,12 +406,15 @@ void wxHtmlWinParser::SetFontFace(const wxString& face)
|
||||
if (GetFontFixed()) m_FontFaceFixed = face;
|
||||
else m_FontFaceNormal = face;
|
||||
|
||||
#if !wxUSE_UNICODE
|
||||
if (m_InputEnc != wxFONTENCODING_DEFAULT)
|
||||
SetInputEncoding(m_InputEnc);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
#if !wxUSE_UNICODE
|
||||
void wxHtmlWinParser::SetInputEncoding(wxFontEncoding enc)
|
||||
{
|
||||
m_InputEnc = m_OutputEnc = wxFONTENCODING_DEFAULT;
|
||||
@@ -435,7 +472,7 @@ void wxHtmlWinParser::SetInputEncoding(wxFontEncoding enc)
|
||||
m_EncConv = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user