diff --git a/include/wx/html/htmlpars.h b/include/wx/html/htmlpars.h
index ea6a99bc5c..4ddc3fda66 100644
--- a/include/wx/html/htmlpars.h
+++ b/include/wx/html/htmlpars.h
@@ -117,6 +117,11 @@ public:
// Restores parser's state from stack or returns FALSE if the stack is
// empty
virtual bool RestoreState();
+
+ // Parses HTML string 'markup' and extracts charset info from tag
+ // if present. Returns empty string if the tag is missing.
+ // For wxHTML's internal use.
+ static wxString ExtractCharsetInformation(const wxString& markup);
protected:
// DOM structure
@@ -242,7 +247,11 @@ public:
wxChar GetEntityChar(const wxString& entity);
// Returns character that represents given Unicode code
+#if wxUSE_UNICODE
+ wxChar GetCharForCode(unsigned code) { return (wxChar)code; }
+#else
wxChar GetCharForCode(unsigned code);
+#endif
protected:
#if wxUSE_WCHAR_T && !wxUSE_UNICODE
diff --git a/include/wx/html/winpars.h b/include/wx/html/winpars.h
index 5e6866d069..23905552f7 100644
--- a/include/wx/html/winpars.h
+++ b/include/wx/html/winpars.h
@@ -117,10 +117,12 @@ public:
const wxHtmlLinkInfo& GetLink() const { return m_Link; }
void SetLink(const wxHtmlLinkInfo& link);
+#if !wxUSE_UNICODE
void SetInputEncoding(wxFontEncoding enc);
wxFontEncoding GetInputEncoding() const { return m_InputEnc; }
wxFontEncoding GetOutputEncoding() const { return m_OutputEnc; }
wxEncodingConverter *GetEncodingConverter() const { return m_EncConv; }
+#endif
// creates font depending on m_Font* members.
virtual wxFont* CreateCurrentFont();
@@ -161,7 +163,9 @@ private:
wxFont* m_FontsTable[2][2][2][2][7];
wxString m_FontsFacesTable[2][2][2][2][7];
+#if !wxUSE_UNICODE
wxFontEncoding m_FontsEncTable[2][2][2][2][7];
+#endif
// table of loaded fonts. 1st four indexes are 0 or 1, depending on on/off
// state of these flags (from left to right):
// [bold][italic][underlined][fixed_size]
@@ -172,9 +176,11 @@ private:
wxString m_FontFaceFixed, m_FontFaceNormal;
// html font sizes and faces of fixed and proportional fonts
+#if !wxUSE_UNICODE
wxFontEncoding m_InputEnc, m_OutputEnc;
// I/O font encodings
wxEncodingConverter *m_EncConv;
+#endif
};
diff --git a/src/html/helpdata.cpp b/src/html/helpdata.cpp
index 37691592c7..9c430b0b06 100644
--- a/src/html/helpdata.cpp
+++ b/src/html/helpdata.cpp
@@ -251,7 +251,7 @@ wxHtmlHelpData::~wxHtmlHelpData()
}
// defined in htmlfilt.cpp
-void wxPrivate_ReadString(wxString& str, wxInputStream* s);
+void wxPrivate_ReadString(wxString& str, wxInputStream* s, wxMBConv& conv);
bool wxHtmlHelpData::LoadMSProject(wxHtmlBookRecord *book, wxFileSystem& fsys, const wxString& indexfile, const wxString& contentsfile)
{
@@ -267,7 +267,7 @@ bool wxHtmlHelpData::LoadMSProject(wxHtmlBookRecord *book, wxFileSystem& fsys, c
if (f)
{
buf.clear();
- wxPrivate_ReadString(buf, f->GetStream());
+ wxPrivate_ReadString(buf, f->GetStream(), wxConvLibc /*FIXME?*/);
delete f;
handler->ReadIn(m_Contents, m_ContentsCnt);
parser.Parse(buf);
@@ -280,7 +280,7 @@ bool wxHtmlHelpData::LoadMSProject(wxHtmlBookRecord *book, wxFileSystem& fsys, c
if (f)
{
buf.clear();
- wxPrivate_ReadString(buf, f->GetStream());
+ wxPrivate_ReadString(buf, f->GetStream(), wxConvLibc /*FIXME?*/);
delete f;
handler->ReadIn(m_Index, m_IndexCnt);
parser.Parse(buf);
@@ -600,7 +600,7 @@ bool wxHtmlHelpData::AddBook(const wxString& book)
wxChar linebuf[300];
wxString tmp;
- wxPrivate_ReadString(tmp, s);
+ wxPrivate_ReadString(tmp, s, wxConvLibc /*FIXME?*/);
lineptr = tmp.c_str();
do
@@ -839,7 +839,8 @@ bool wxSearchEngine::Scan(wxInputStream *stream)
int wrd = wxStrlen(m_Keyword);
bool found = FALSE;
wxString tmp;
- wxPrivate_ReadString(tmp, stream);
+ wxPrivate_ReadString(tmp, stream, wxConvLibc);
+ // FIXME - use wxHtmlFilters instead of wxPrivate_ReadString !!!!!!
int lng = tmp.length();
const wxChar *buf = tmp.c_str();
diff --git a/src/html/htmlfilt.cpp b/src/html/htmlfilt.cpp
index 7b9deb923b..868139e29a 100644
--- a/src/html/htmlfilt.cpp
+++ b/src/html/htmlfilt.cpp
@@ -26,15 +26,16 @@
#include "wx/intl.h"
#endif
+#include "wx/strconv.h"
#include "wx/html/htmlfilt.h"
#include "wx/html/htmlwin.h"
// utility function: read a wxString from a wxInputStream
-void wxPrivate_ReadString(wxString& str, wxInputStream* s)
+void wxPrivate_ReadString(wxString& str, wxInputStream* s, wxMBConv& conv)
{
size_t streamSize = s->GetSize();
- if(streamSize == ~(size_t)0)
+ if (streamSize == ~(size_t)0)
{
const size_t bufSize = 4095;
char buffer[bufSize+1];
@@ -45,17 +46,17 @@ void wxPrivate_ReadString(wxString& str, wxInputStream* s)
s->Read(buffer, bufSize);
lastRead = s->LastRead();
buffer[lastRead] = 0;
- str.Append( wxString::FromAscii(buffer) ); // TODO: What encoding ?
+ str.Append(wxString(buffer, conv));
}
- while(lastRead == bufSize);
+ while (lastRead == bufSize);
}
else
{
char* src = new char[streamSize+1];
s->Read(src, streamSize);
src[streamSize] = 0;
- str = wxString::FromAscii( src); // TODO: What encoding ?
- delete [] src;
+ str = wxString(src, conv);
+ delete[] src;
}
}
@@ -87,7 +88,7 @@ wxString wxHtmlFilterPlainText::ReadFile(const wxFSFile& file) const
wxString doc, doc2;
if (s == NULL) return wxEmptyString;
- wxPrivate_ReadString(doc, s);
+ wxPrivate_ReadString(doc, s, wxConvISO8859_1);
doc.Replace(wxT("&"), wxT("&"), TRUE);
doc.Replace(wxT("<"), wxT("<"), TRUE);
@@ -172,15 +173,43 @@ wxString wxHtmlFilterHTML::ReadFile(const wxFSFile& file) const
wxLogError(_("Cannot open HTML document: %s"), file.GetLocation().c_str());
return wxEmptyString;
}
- wxPrivate_ReadString(doc, s);
- // add meta tag if we obtained this through http:
- if (file.GetMimeType().Find(_T("; charset=")) == 0)
+ // NB: We convert input file to wchar_t here in Unicode mode, based on
+ // either Content-Type header or tags. In ANSI mode, we don't
+ // do it as it is done by wxHtmlParser (for this reason, we add
+ // tag if we used Content-Type header).
+#if wxUSE_UNICODE
+ int charsetPos;
+ if ((charsetPos = file.GetMimeType().Find(_T("; charset="))) != wxNOT_FOUND)
{
- wxString s(_T("");
- return s+doc;
+ wxString charset = file.GetMimeType().Mid(charsetPos + 10);
+ wxCSConv conv(charset);
+ wxPrivate_ReadString(doc, s, conv);
}
+ else
+ {
+ wxString tmpdoc;
+ wxPrivate_ReadString(tmpdoc, s, wxConvISO8859_1);
+ wxString charset = wxHtmlParser::ExtractCharsetInformation(tmpdoc);
+ if (charset.empty())
+ doc = tmpdoc;
+ else
+ {
+ wxCSConv conv(charset);
+ doc = wxString(tmpdoc.mb_str(wxConvISO8859_1), conv);
+ }
+ }
+#else // !wxUSE_UNICODE
+ wxPrivate_ReadString(doc, s, wxConvLibc);
+ // add meta tag if we obtained this through http:
+ if (!file.GetMimeType().empty())
+ {
+ wxString hdr;
+ wxString mime = file.GetMimeType();
+ hdr.Printf(_T(""), mime.c_str());
+ return hdr+doc;
+ }
+#endif
return doc;
}
diff --git a/src/html/htmlpars.cpp b/src/html/htmlpars.cpp
index c3a5e3a26b..4a998a6396 100644
--- a/src/html/htmlpars.cpp
+++ b/src/html/htmlpars.cpp
@@ -500,11 +500,10 @@ extern "C" int LINKAGEMODE wxHtmlEntityCompare(const void *key, const void *item
return wxStrcmp((wxChar*)key, ((wxHtmlEntityInfo*)item)->name);
}
+#if !wxUSE_UNICODE
wxChar wxHtmlEntitiesParser::GetCharForCode(unsigned code)
{
-#if wxUSE_UNICODE
- return (wxChar)code;
-#elif wxUSE_WCHAR_T
+#if wxUSE_WCHAR_T
char buf[2];
wchar_t wbuf[2];
wbuf[0] = (wchar_t)code;
@@ -517,6 +516,7 @@ wxChar wxHtmlEntitiesParser::GetCharForCode(unsigned code)
return (code < 256) ? (wxChar)code : '?';
#endif
}
+#endif
wxChar wxHtmlEntitiesParser::GetEntityChar(const wxString& entity)
{
@@ -822,4 +822,55 @@ wxFSFile *wxHtmlParser::OpenURL(wxHtmlURLType WXUNUSED(type),
return GetFS()->OpenFile(url);
}
+
+//-----------------------------------------------------------------------------
+// wxHtmlParser::ExtractCharsetInformation
+//-----------------------------------------------------------------------------
+
+class wxMetaTagParser : public wxHtmlParser
+{
+public:
+ wxObject* GetProduct() { return NULL; }
+protected:
+ virtual void AddText(const wxChar* WXUNUSED(txt)) {}
+};
+
+class wxMetaTagHandler : public wxHtmlTagHandler
+{
+public:
+ wxMetaTagHandler(wxString *retval) : wxHtmlTagHandler(), m_retval(retval) {}
+ wxString GetSupportedTags() { return wxT("META"); }
+ bool HandleTag(const wxHtmlTag& tag);
+
+private:
+ wxString *m_retval;
+};
+
+bool wxMetaTagHandler::HandleTag(const wxHtmlTag& tag)
+{
+ if (tag.HasParam(_T("HTTP-EQUIV")) &&
+ tag.GetParam(_T("HTTP-EQUIV")) == _T("Content-Type") &&
+ tag.HasParam(_T("CONTENT")))
+ {
+ wxString content = tag.GetParam(_T("CONTENT"));
+ if (content.Left(19) == _T("text/html; charset="))
+ {
+ *m_retval = content.Mid(19);
+ }
+ }
+ return FALSE;
+}
+
+
+/*static*/
+wxString wxHtmlParser::ExtractCharsetInformation(const wxString& markup)
+{
+ wxString charset;
+ wxMetaTagParser parser;
+ parser.AddTagHandler(new wxMetaTagHandler(&charset));
+ parser.Parse(markup);
+ return charset;
+}
+
+
#endif
diff --git a/src/html/htmprint.cpp b/src/html/htmprint.cpp
index 117e2261c4..40b7565107 100644
--- a/src/html/htmprint.cpp
+++ b/src/html/htmprint.cpp
@@ -270,7 +270,7 @@ void wxHtmlPrintout::SetHtmlText(const wxString& html, const wxString &basepath,
}
// defined in htmlfilt.cpp
-void wxPrivate_ReadString(wxString& str, wxInputStream* s);
+void wxPrivate_ReadString(wxString& str, wxInputStream* s, wxMBConv& conv);
void wxHtmlPrintout::SetHtmlFile(const wxString& htmlfile)
{
@@ -285,7 +285,7 @@ void wxHtmlPrintout::SetHtmlFile(const wxString& htmlfile)
wxInputStream *st = ff->GetStream();
wxString doc;
- wxPrivate_ReadString(doc, st);
+ wxPrivate_ReadString(doc, st, wxConvLibc /*FIXME -- use wxHtmlFilter!!*/);
delete ff;
diff --git a/src/html/winpars.cpp b/src/html/winpars.cpp
index 9e3ccc26a4..d11a8ea9fd 100644
--- a/src/html/winpars.cpp
+++ b/src/html/winpars.cpp
@@ -49,9 +49,11 @@ wxHtmlWinParser::wxHtmlWinParser(wxHtmlWindow *wnd) : wxHtmlParser()
m_DC = NULL;
m_CharHeight = m_CharWidth = 0;
m_UseLink = FALSE;
+#if !wxUSE_UNICODE
m_EncConv = NULL;
m_InputEnc = wxFONTENCODING_ISO8859_1;
m_OutputEnc = wxFONTENCODING_DEFAULT;
+#endif
{
int i, j, k, l, m;
@@ -63,7 +65,9 @@ wxHtmlWinParser::wxHtmlWinParser(wxHtmlWindow *wnd) : wxHtmlParser()
{
m_FontsTable[i][j][k][l][m] = NULL;
m_FontsFacesTable[i][j][k][l][m] = wxEmptyString;
+#if !wxUSE_UNICODE
m_FontsEncTable[i][j][k][l][m] = wxFONTENCODING_DEFAULT;
+#endif
}
#ifdef __WXMSW__
static int default_sizes[7] = {7, 8, 10, 12, 16, 22, 30};
@@ -98,7 +102,9 @@ wxHtmlWinParser::~wxHtmlWinParser()
if (m_FontsTable[i][j][k][l][m] != NULL)
delete m_FontsTable[i][j][k][l][m];
}
+#if !wxUSE_UNICODE
delete m_EncConv;
+#endif
delete[] m_tmpStrBuf;
}
@@ -120,7 +126,9 @@ void wxHtmlWinParser::SetFonts(wxString normal_face, wxString fixed_face, const
m_FontFaceFixed = fixed_face;
m_FontFaceNormal = normal_face;
+#if !wxUSE_UNICODE
SetInputEncoding(m_InputEnc);
+#endif
for (i = 0; i < 2; i++)
for (j = 0; j < 2; j++)
@@ -156,8 +164,18 @@ void wxHtmlWinParser::InitParser(const wxString& source)
m_tmpLastWasSpace = FALSE;
OpenContainer();
-
OpenContainer();
+
+#if 0 //!wxUSE_UNICODE
+ wxString charset = ExtractCharsetInformation(source);
+ if (!charset.empty())
+ {
+ wxFontEncoding enc = wxFontMapper::Get()->CharsetToEncoding(charset);
+ if (enc != wxFONTENCODING_SYSTEM)
+ SetInputEncoding(enc);
+ }
+#endif
+
m_Container->InsertCell(new wxHtmlColourCell(m_ActualColor));
m_Container->InsertCell(new wxHtmlFontCell(CreateCurrentFont()));
}
@@ -165,7 +183,9 @@ void wxHtmlWinParser::InitParser(const wxString& source)
void wxHtmlWinParser::DoneParser()
{
m_Container = NULL;
+#if !wxUSE_UNICODE
SetInputEncoding(wxFONTENCODING_ISO8859_1); // for next call
+#endif
wxHtmlParser::DoneParser();
}
@@ -254,8 +274,10 @@ void wxHtmlWinParser::AddText(const wxChar* txt)
if (templen == 1) continue;
#endif
templen = 0;
+#if !wxUSE_UNICODE
if (m_EncConv)
m_EncConv->Convert(temp);
+#endif
size_t len = wxStrlen(temp);
for (size_t j = 0; j < len; j++)
if (temp[j] == nbsp)
@@ -271,8 +293,10 @@ void wxHtmlWinParser::AddText(const wxChar* txt)
if (templen && (templen > 1 || temp[0] != wxT(' ')))
{
temp[templen] = 0;
+#if !wxUSE_UNICODE
if (m_EncConv)
m_EncConv->Convert(temp);
+#endif
size_t len = wxStrlen(temp);
for (size_t j = 0; j < len; j++)
if (temp[j] == nbsp)
@@ -334,9 +358,15 @@ wxFont* wxHtmlWinParser::CreateCurrentFont()
wxString face = ff ? m_FontFaceFixed : m_FontFaceNormal;
wxString *faceptr = &(m_FontsFacesTable[fb][fi][fu][ff][fs]);
wxFont **fontptr = &(m_FontsTable[fb][fi][fu][ff][fs]);
+#if !wxUSE_UNICODE
wxFontEncoding *encptr = &(m_FontsEncTable[fb][fi][fu][ff][fs]);
+#endif
- if (*fontptr != NULL && (*faceptr != face || *encptr != m_OutputEnc))
+ if (*fontptr != NULL && (*faceptr != face
+#if !wxUSE_UNICODE
+ || *encptr != m_OutputEnc
+#endif
+ ))
{
delete *fontptr;
*fontptr = NULL;
@@ -345,14 +375,18 @@ wxFont* wxHtmlWinParser::CreateCurrentFont()
if (*fontptr == NULL)
{
*faceptr = face;
- *encptr = m_OutputEnc;
*fontptr = new wxFont(
(int) (m_FontsSizes[fs] * m_PixelScale),
ff ? wxMODERN : wxSWISS,
fi ? wxITALIC : wxNORMAL,
fb ? wxBOLD : wxNORMAL,
- fu ? TRUE : FALSE, face,
- m_OutputEnc);
+ fu ? TRUE : FALSE, face
+#if wxUSE_UNICODE
+ );
+#else
+ , m_OutputEnc);
+ *encptr = m_OutputEnc;
+#endif
}
m_DC->SetFont(**fontptr);
return (*fontptr);
@@ -372,12 +406,15 @@ void wxHtmlWinParser::SetFontFace(const wxString& face)
if (GetFontFixed()) m_FontFaceFixed = face;
else m_FontFaceNormal = face;
+#if !wxUSE_UNICODE
if (m_InputEnc != wxFONTENCODING_DEFAULT)
SetInputEncoding(m_InputEnc);
+#endif
}
+#if !wxUSE_UNICODE
void wxHtmlWinParser::SetInputEncoding(wxFontEncoding enc)
{
m_InputEnc = m_OutputEnc = wxFONTENCODING_DEFAULT;
@@ -435,7 +472,7 @@ void wxHtmlWinParser::SetInputEncoding(wxFontEncoding enc)
m_EncConv = NULL;
}
}
-
+#endif