diff --git a/include/wx/html/htmlpars.h b/include/wx/html/htmlpars.h index ea6a99bc5c..4ddc3fda66 100644 --- a/include/wx/html/htmlpars.h +++ b/include/wx/html/htmlpars.h @@ -117,6 +117,11 @@ public: // Restores parser's state from stack or returns FALSE if the stack is // empty virtual bool RestoreState(); + + // Parses HTML string 'markup' and extracts charset info from tag + // if present. Returns empty string if the tag is missing. + // For wxHTML's internal use. + static wxString ExtractCharsetInformation(const wxString& markup); protected: // DOM structure @@ -242,7 +247,11 @@ public: wxChar GetEntityChar(const wxString& entity); // Returns character that represents given Unicode code +#if wxUSE_UNICODE + wxChar GetCharForCode(unsigned code) { return (wxChar)code; } +#else wxChar GetCharForCode(unsigned code); +#endif protected: #if wxUSE_WCHAR_T && !wxUSE_UNICODE diff --git a/include/wx/html/winpars.h b/include/wx/html/winpars.h index 5e6866d069..23905552f7 100644 --- a/include/wx/html/winpars.h +++ b/include/wx/html/winpars.h @@ -117,10 +117,12 @@ public: const wxHtmlLinkInfo& GetLink() const { return m_Link; } void SetLink(const wxHtmlLinkInfo& link); +#if !wxUSE_UNICODE void SetInputEncoding(wxFontEncoding enc); wxFontEncoding GetInputEncoding() const { return m_InputEnc; } wxFontEncoding GetOutputEncoding() const { return m_OutputEnc; } wxEncodingConverter *GetEncodingConverter() const { return m_EncConv; } +#endif // creates font depending on m_Font* members. virtual wxFont* CreateCurrentFont(); @@ -161,7 +163,9 @@ private: wxFont* m_FontsTable[2][2][2][2][7]; wxString m_FontsFacesTable[2][2][2][2][7]; +#if !wxUSE_UNICODE wxFontEncoding m_FontsEncTable[2][2][2][2][7]; +#endif // table of loaded fonts. 1st four indexes are 0 or 1, depending on on/off // state of these flags (from left to right): // [bold][italic][underlined][fixed_size] @@ -172,9 +176,11 @@ private: wxString m_FontFaceFixed, m_FontFaceNormal; // html font sizes and faces of fixed and proportional fonts +#if !wxUSE_UNICODE wxFontEncoding m_InputEnc, m_OutputEnc; // I/O font encodings wxEncodingConverter *m_EncConv; +#endif }; diff --git a/src/html/helpdata.cpp b/src/html/helpdata.cpp index 37691592c7..9c430b0b06 100644 --- a/src/html/helpdata.cpp +++ b/src/html/helpdata.cpp @@ -251,7 +251,7 @@ wxHtmlHelpData::~wxHtmlHelpData() } // defined in htmlfilt.cpp -void wxPrivate_ReadString(wxString& str, wxInputStream* s); +void wxPrivate_ReadString(wxString& str, wxInputStream* s, wxMBConv& conv); bool wxHtmlHelpData::LoadMSProject(wxHtmlBookRecord *book, wxFileSystem& fsys, const wxString& indexfile, const wxString& contentsfile) { @@ -267,7 +267,7 @@ bool wxHtmlHelpData::LoadMSProject(wxHtmlBookRecord *book, wxFileSystem& fsys, c if (f) { buf.clear(); - wxPrivate_ReadString(buf, f->GetStream()); + wxPrivate_ReadString(buf, f->GetStream(), wxConvLibc /*FIXME?*/); delete f; handler->ReadIn(m_Contents, m_ContentsCnt); parser.Parse(buf); @@ -280,7 +280,7 @@ bool wxHtmlHelpData::LoadMSProject(wxHtmlBookRecord *book, wxFileSystem& fsys, c if (f) { buf.clear(); - wxPrivate_ReadString(buf, f->GetStream()); + wxPrivate_ReadString(buf, f->GetStream(), wxConvLibc /*FIXME?*/); delete f; handler->ReadIn(m_Index, m_IndexCnt); parser.Parse(buf); @@ -600,7 +600,7 @@ bool wxHtmlHelpData::AddBook(const wxString& book) wxChar linebuf[300]; wxString tmp; - wxPrivate_ReadString(tmp, s); + wxPrivate_ReadString(tmp, s, wxConvLibc /*FIXME?*/); lineptr = tmp.c_str(); do @@ -839,7 +839,8 @@ bool wxSearchEngine::Scan(wxInputStream *stream) int wrd = wxStrlen(m_Keyword); bool found = FALSE; wxString tmp; - wxPrivate_ReadString(tmp, stream); + wxPrivate_ReadString(tmp, stream, wxConvLibc); + // FIXME - use wxHtmlFilters instead of wxPrivate_ReadString !!!!!! int lng = tmp.length(); const wxChar *buf = tmp.c_str(); diff --git a/src/html/htmlfilt.cpp b/src/html/htmlfilt.cpp index 7b9deb923b..868139e29a 100644 --- a/src/html/htmlfilt.cpp +++ b/src/html/htmlfilt.cpp @@ -26,15 +26,16 @@ #include "wx/intl.h" #endif +#include "wx/strconv.h" #include "wx/html/htmlfilt.h" #include "wx/html/htmlwin.h" // utility function: read a wxString from a wxInputStream -void wxPrivate_ReadString(wxString& str, wxInputStream* s) +void wxPrivate_ReadString(wxString& str, wxInputStream* s, wxMBConv& conv) { size_t streamSize = s->GetSize(); - if(streamSize == ~(size_t)0) + if (streamSize == ~(size_t)0) { const size_t bufSize = 4095; char buffer[bufSize+1]; @@ -45,17 +46,17 @@ void wxPrivate_ReadString(wxString& str, wxInputStream* s) s->Read(buffer, bufSize); lastRead = s->LastRead(); buffer[lastRead] = 0; - str.Append( wxString::FromAscii(buffer) ); // TODO: What encoding ? + str.Append(wxString(buffer, conv)); } - while(lastRead == bufSize); + while (lastRead == bufSize); } else { char* src = new char[streamSize+1]; s->Read(src, streamSize); src[streamSize] = 0; - str = wxString::FromAscii( src); // TODO: What encoding ? - delete [] src; + str = wxString(src, conv); + delete[] src; } } @@ -87,7 +88,7 @@ wxString wxHtmlFilterPlainText::ReadFile(const wxFSFile& file) const wxString doc, doc2; if (s == NULL) return wxEmptyString; - wxPrivate_ReadString(doc, s); + wxPrivate_ReadString(doc, s, wxConvISO8859_1); doc.Replace(wxT("&"), wxT("&"), TRUE); doc.Replace(wxT("<"), wxT("<"), TRUE); @@ -172,15 +173,43 @@ wxString wxHtmlFilterHTML::ReadFile(const wxFSFile& file) const wxLogError(_("Cannot open HTML document: %s"), file.GetLocation().c_str()); return wxEmptyString; } - wxPrivate_ReadString(doc, s); - // add meta tag if we obtained this through http: - if (file.GetMimeType().Find(_T("; charset=")) == 0) + // NB: We convert input file to wchar_t here in Unicode mode, based on + // either Content-Type header or tags. In ANSI mode, we don't + // do it as it is done by wxHtmlParser (for this reason, we add + // tag if we used Content-Type header). +#if wxUSE_UNICODE + int charsetPos; + if ((charsetPos = file.GetMimeType().Find(_T("; charset="))) != wxNOT_FOUND) { - wxString s(_T(""); - return s+doc; + wxString charset = file.GetMimeType().Mid(charsetPos + 10); + wxCSConv conv(charset); + wxPrivate_ReadString(doc, s, conv); } + else + { + wxString tmpdoc; + wxPrivate_ReadString(tmpdoc, s, wxConvISO8859_1); + wxString charset = wxHtmlParser::ExtractCharsetInformation(tmpdoc); + if (charset.empty()) + doc = tmpdoc; + else + { + wxCSConv conv(charset); + doc = wxString(tmpdoc.mb_str(wxConvISO8859_1), conv); + } + } +#else // !wxUSE_UNICODE + wxPrivate_ReadString(doc, s, wxConvLibc); + // add meta tag if we obtained this through http: + if (!file.GetMimeType().empty()) + { + wxString hdr; + wxString mime = file.GetMimeType(); + hdr.Printf(_T(""), mime.c_str()); + return hdr+doc; + } +#endif return doc; } diff --git a/src/html/htmlpars.cpp b/src/html/htmlpars.cpp index c3a5e3a26b..4a998a6396 100644 --- a/src/html/htmlpars.cpp +++ b/src/html/htmlpars.cpp @@ -500,11 +500,10 @@ extern "C" int LINKAGEMODE wxHtmlEntityCompare(const void *key, const void *item return wxStrcmp((wxChar*)key, ((wxHtmlEntityInfo*)item)->name); } +#if !wxUSE_UNICODE wxChar wxHtmlEntitiesParser::GetCharForCode(unsigned code) { -#if wxUSE_UNICODE - return (wxChar)code; -#elif wxUSE_WCHAR_T +#if wxUSE_WCHAR_T char buf[2]; wchar_t wbuf[2]; wbuf[0] = (wchar_t)code; @@ -517,6 +516,7 @@ wxChar wxHtmlEntitiesParser::GetCharForCode(unsigned code) return (code < 256) ? (wxChar)code : '?'; #endif } +#endif wxChar wxHtmlEntitiesParser::GetEntityChar(const wxString& entity) { @@ -822,4 +822,55 @@ wxFSFile *wxHtmlParser::OpenURL(wxHtmlURLType WXUNUSED(type), return GetFS()->OpenFile(url); } + +//----------------------------------------------------------------------------- +// wxHtmlParser::ExtractCharsetInformation +//----------------------------------------------------------------------------- + +class wxMetaTagParser : public wxHtmlParser +{ +public: + wxObject* GetProduct() { return NULL; } +protected: + virtual void AddText(const wxChar* WXUNUSED(txt)) {} +}; + +class wxMetaTagHandler : public wxHtmlTagHandler +{ +public: + wxMetaTagHandler(wxString *retval) : wxHtmlTagHandler(), m_retval(retval) {} + wxString GetSupportedTags() { return wxT("META"); } + bool HandleTag(const wxHtmlTag& tag); + +private: + wxString *m_retval; +}; + +bool wxMetaTagHandler::HandleTag(const wxHtmlTag& tag) +{ + if (tag.HasParam(_T("HTTP-EQUIV")) && + tag.GetParam(_T("HTTP-EQUIV")) == _T("Content-Type") && + tag.HasParam(_T("CONTENT"))) + { + wxString content = tag.GetParam(_T("CONTENT")); + if (content.Left(19) == _T("text/html; charset=")) + { + *m_retval = content.Mid(19); + } + } + return FALSE; +} + + +/*static*/ +wxString wxHtmlParser::ExtractCharsetInformation(const wxString& markup) +{ + wxString charset; + wxMetaTagParser parser; + parser.AddTagHandler(new wxMetaTagHandler(&charset)); + parser.Parse(markup); + return charset; +} + + #endif diff --git a/src/html/htmprint.cpp b/src/html/htmprint.cpp index 117e2261c4..40b7565107 100644 --- a/src/html/htmprint.cpp +++ b/src/html/htmprint.cpp @@ -270,7 +270,7 @@ void wxHtmlPrintout::SetHtmlText(const wxString& html, const wxString &basepath, } // defined in htmlfilt.cpp -void wxPrivate_ReadString(wxString& str, wxInputStream* s); +void wxPrivate_ReadString(wxString& str, wxInputStream* s, wxMBConv& conv); void wxHtmlPrintout::SetHtmlFile(const wxString& htmlfile) { @@ -285,7 +285,7 @@ void wxHtmlPrintout::SetHtmlFile(const wxString& htmlfile) wxInputStream *st = ff->GetStream(); wxString doc; - wxPrivate_ReadString(doc, st); + wxPrivate_ReadString(doc, st, wxConvLibc /*FIXME -- use wxHtmlFilter!!*/); delete ff; diff --git a/src/html/winpars.cpp b/src/html/winpars.cpp index 9e3ccc26a4..d11a8ea9fd 100644 --- a/src/html/winpars.cpp +++ b/src/html/winpars.cpp @@ -49,9 +49,11 @@ wxHtmlWinParser::wxHtmlWinParser(wxHtmlWindow *wnd) : wxHtmlParser() m_DC = NULL; m_CharHeight = m_CharWidth = 0; m_UseLink = FALSE; +#if !wxUSE_UNICODE m_EncConv = NULL; m_InputEnc = wxFONTENCODING_ISO8859_1; m_OutputEnc = wxFONTENCODING_DEFAULT; +#endif { int i, j, k, l, m; @@ -63,7 +65,9 @@ wxHtmlWinParser::wxHtmlWinParser(wxHtmlWindow *wnd) : wxHtmlParser() { m_FontsTable[i][j][k][l][m] = NULL; m_FontsFacesTable[i][j][k][l][m] = wxEmptyString; +#if !wxUSE_UNICODE m_FontsEncTable[i][j][k][l][m] = wxFONTENCODING_DEFAULT; +#endif } #ifdef __WXMSW__ static int default_sizes[7] = {7, 8, 10, 12, 16, 22, 30}; @@ -98,7 +102,9 @@ wxHtmlWinParser::~wxHtmlWinParser() if (m_FontsTable[i][j][k][l][m] != NULL) delete m_FontsTable[i][j][k][l][m]; } +#if !wxUSE_UNICODE delete m_EncConv; +#endif delete[] m_tmpStrBuf; } @@ -120,7 +126,9 @@ void wxHtmlWinParser::SetFonts(wxString normal_face, wxString fixed_face, const m_FontFaceFixed = fixed_face; m_FontFaceNormal = normal_face; +#if !wxUSE_UNICODE SetInputEncoding(m_InputEnc); +#endif for (i = 0; i < 2; i++) for (j = 0; j < 2; j++) @@ -156,8 +164,18 @@ void wxHtmlWinParser::InitParser(const wxString& source) m_tmpLastWasSpace = FALSE; OpenContainer(); - OpenContainer(); + +#if 0 //!wxUSE_UNICODE + wxString charset = ExtractCharsetInformation(source); + if (!charset.empty()) + { + wxFontEncoding enc = wxFontMapper::Get()->CharsetToEncoding(charset); + if (enc != wxFONTENCODING_SYSTEM) + SetInputEncoding(enc); + } +#endif + m_Container->InsertCell(new wxHtmlColourCell(m_ActualColor)); m_Container->InsertCell(new wxHtmlFontCell(CreateCurrentFont())); } @@ -165,7 +183,9 @@ void wxHtmlWinParser::InitParser(const wxString& source) void wxHtmlWinParser::DoneParser() { m_Container = NULL; +#if !wxUSE_UNICODE SetInputEncoding(wxFONTENCODING_ISO8859_1); // for next call +#endif wxHtmlParser::DoneParser(); } @@ -254,8 +274,10 @@ void wxHtmlWinParser::AddText(const wxChar* txt) if (templen == 1) continue; #endif templen = 0; +#if !wxUSE_UNICODE if (m_EncConv) m_EncConv->Convert(temp); +#endif size_t len = wxStrlen(temp); for (size_t j = 0; j < len; j++) if (temp[j] == nbsp) @@ -271,8 +293,10 @@ void wxHtmlWinParser::AddText(const wxChar* txt) if (templen && (templen > 1 || temp[0] != wxT(' '))) { temp[templen] = 0; +#if !wxUSE_UNICODE if (m_EncConv) m_EncConv->Convert(temp); +#endif size_t len = wxStrlen(temp); for (size_t j = 0; j < len; j++) if (temp[j] == nbsp) @@ -334,9 +358,15 @@ wxFont* wxHtmlWinParser::CreateCurrentFont() wxString face = ff ? m_FontFaceFixed : m_FontFaceNormal; wxString *faceptr = &(m_FontsFacesTable[fb][fi][fu][ff][fs]); wxFont **fontptr = &(m_FontsTable[fb][fi][fu][ff][fs]); +#if !wxUSE_UNICODE wxFontEncoding *encptr = &(m_FontsEncTable[fb][fi][fu][ff][fs]); +#endif - if (*fontptr != NULL && (*faceptr != face || *encptr != m_OutputEnc)) + if (*fontptr != NULL && (*faceptr != face +#if !wxUSE_UNICODE + || *encptr != m_OutputEnc +#endif + )) { delete *fontptr; *fontptr = NULL; @@ -345,14 +375,18 @@ wxFont* wxHtmlWinParser::CreateCurrentFont() if (*fontptr == NULL) { *faceptr = face; - *encptr = m_OutputEnc; *fontptr = new wxFont( (int) (m_FontsSizes[fs] * m_PixelScale), ff ? wxMODERN : wxSWISS, fi ? wxITALIC : wxNORMAL, fb ? wxBOLD : wxNORMAL, - fu ? TRUE : FALSE, face, - m_OutputEnc); + fu ? TRUE : FALSE, face +#if wxUSE_UNICODE + ); +#else + , m_OutputEnc); + *encptr = m_OutputEnc; +#endif } m_DC->SetFont(**fontptr); return (*fontptr); @@ -372,12 +406,15 @@ void wxHtmlWinParser::SetFontFace(const wxString& face) if (GetFontFixed()) m_FontFaceFixed = face; else m_FontFaceNormal = face; +#if !wxUSE_UNICODE if (m_InputEnc != wxFONTENCODING_DEFAULT) SetInputEncoding(m_InputEnc); +#endif } +#if !wxUSE_UNICODE void wxHtmlWinParser::SetInputEncoding(wxFontEncoding enc) { m_InputEnc = m_OutputEnc = wxFONTENCODING_DEFAULT; @@ -435,7 +472,7 @@ void wxHtmlWinParser::SetInputEncoding(wxFontEncoding enc) m_EncConv = NULL; } } - +#endif