///////////////////////////////////////////////////////////////////////////// // Name: strconv.cpp // Purpose: Unicode conversion classes // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin // Modified by: // Created: 29/01/98 // RCS-ID: $Id$ // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vadim Zeitlin // Licence: wxWindows license ///////////////////////////////////////////////////////////////////////////// #ifdef __GNUG__ #pragma implementation "strconv.h" #endif // For compilers that support precompilation, includes "wx.h". #include "wx/wxprec.h" #ifdef __BORLANDC__ #pragma hdrstop #endif #include #include #include #ifdef __SALFORDC__ #include #endif #include "wx/debug.h" #include "wx/strconv.h" //---------------------------------------------------------------------------- // wxConvCurrent //---------------------------------------------------------------------------- WXDLLEXPORT_DATA(wxMBConv *) wxConvCurrent = &wxConvLibc; #if !wxUSE_WCHAR_T //---------------------------------------------------------------------------- // stand-ins in absence of wchar_t //---------------------------------------------------------------------------- WXDLLEXPORT_DATA(wxMBConv) wxConvLibc, wxConvFile; #else //---------------------------------------------------------------------------- // wxMBConv //---------------------------------------------------------------------------- WXDLLEXPORT_DATA(wxMBConv) wxConvLibc; size_t wxMBConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const { return wxMB2WC(buf, psz, n); } size_t wxMBConv::WC2MB(char *buf, const wchar_t *psz, size_t n) const { return wxWC2MB(buf, psz, n); } const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const { if (psz) { size_t nLen = MB2WC((wchar_t *) NULL, psz, 0); wxWCharBuffer buf(nLen); MB2WC((wchar_t *)(const wchar_t *) buf, psz, nLen); return buf; } else return wxWCharBuffer((wchar_t *) NULL); } const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *psz) const { if (psz) { size_t nLen = WC2MB((char *) NULL, psz, 0); wxCharBuffer buf(nLen); WC2MB((char *)(const char *) buf, psz, nLen); return buf; } else return wxCharBuffer((char *) NULL); } //---------------------------------------------------------------------------- // standard file conversion //---------------------------------------------------------------------------- WXDLLEXPORT_DATA(wxMBConvFile) wxConvFile; // just use the libc conversion for now size_t wxMBConvFile::MB2WC(wchar_t *buf, const char *psz, size_t n) const { return wxMB2WC(buf, psz, n); } size_t wxMBConvFile::WC2MB(char *buf, const wchar_t *psz, size_t n) const { return wxWC2MB(buf, psz, n); } #ifdef __WXGTK12__ //---------------------------------------------------------------------------- // standard gdk conversion //---------------------------------------------------------------------------- WXDLLEXPORT_DATA(wxMBConvGdk) wxConvGdk; #include size_t wxMBConvGdk::MB2WC(wchar_t *buf, const char *psz, size_t n) const { if (buf) { return gdk_mbstowcs((GdkWChar *)buf, psz, n); } else { GdkWChar *nbuf = new GdkWChar[n=strlen(psz)]; size_t len = gdk_mbstowcs(nbuf, psz, n); delete [] nbuf; return len; } } size_t wxMBConvGdk::WC2MB(char *buf, const wchar_t *psz, size_t n) const { char *mbstr = gdk_wcstombs((GdkWChar *)psz); size_t len = mbstr ? strlen(mbstr) : 0; if (buf) { if (len > n) len = n; memcpy(buf, psz, len); if (len < n) buf[len] = 0; } return len; } #endif // GTK > 1.0 // ---------------------------------------------------------------------------- // UTF-7 // ---------------------------------------------------------------------------- WXDLLEXPORT_DATA(wxMBConvUTF7) wxConvUTF7; #if 0 static char utf7_setD[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" "0123456789'(),-./:?"; static char utf7_setO[]="!\"#$%&*;<=>@[]^_`{|}"; static char utf7_setB[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" "0123456789+/"; #endif // TODO: write actual implementations of UTF-7 here size_t wxMBConvUTF7::MB2WC(wchar_t * WXUNUSED(buf), const char * WXUNUSED(psz), size_t WXUNUSED(n)) const { return 0; } size_t wxMBConvUTF7::WC2MB(char * WXUNUSED(buf), const wchar_t * WXUNUSED(psz), size_t WXUNUSED(n)) const { return 0; } //---------------------------------------------------------------------------- // UTF-8 //---------------------------------------------------------------------------- WXDLLEXPORT_DATA(wxMBConvUTF8) wxConvUTF8; static unsigned long utf8_max[]={0x7f,0x7ff,0xffff,0x1fffff,0x3ffffff,0x7fffffff,0xffffffff}; size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const { size_t len = 0; while (*psz && ((!buf) || (len>cnt); while (cnt--) { cc = *psz++; if ((cc&0xC0)!=0x80) { // invalid UTF-8 sequence return (size_t)-1; } res=(res<<6)|(cc&0x3f); } if (res<=utf8_max[ocnt]) { // illegal UTF-8 encoding return (size_t)-1; } if (buf) *buf++=res; len++; } } } if (buf && (lenutf8_max[cnt]; cnt++); if (!cnt) { // plain ASCII char if (buf) *buf++=cc; len++; } else { len+=cnt+1; if (buf) { *buf++=(-128>>cnt)|((cc>>(cnt*6))&(0x3f>>cnt)); while (cnt--) *buf++=0x80|((cc>>(cnt*6))&0x3f); } } } if (buf && (lennames.Add(token.GetNextToken()); } else if (cmd == T("")) cset->names.Add(token.GetNextToken()); else if (cmd == T("")) comchar = token.GetNextToken(); else if (cmd == T("")) escchar = token.GetNextToken(); else if (cmd == T("")) { delete cset; cset = (wxCharacterSet *) NULL; break; // we don't support multibyte charsets ourselves (yet) } else if (cmd == T("CHARMAP")) { cset->data = (wchar_t *)calloc(256, sizeof(wchar_t)); in_charset = TRUE; } else if (cmd == T("END")) { if (token.GetNextToken() == T("CHARMAP")) in_charset = FALSE; } else if (in_charset) { // format: /x00 NULL (NUL) // /x41 LATIN CAPITAL LETTER A wxString hex = token.GetNextToken(); // skip whitespace (why doesn't wxStringTokenizer do this?) while (wxIsEmpty(hex) && token.HasMoreTokens()) hex = token.GetNextToken(); wxString uni = token.GetNextToken(); // skip whitespace again while (wxIsEmpty(uni) && token.HasMoreTokens()) uni = token.GetNextToken(); if ((hex.Len() > 2) && (wxString(hex.GetChar(0)) == escchar) && (hex.GetChar(1) == T('x')) && (uni.Left(2) == T("=0) { unsigned long uni1 = ::wxHexToDec(uni.Mid(2,2)); unsigned long uni2 = ::wxHexToDec(uni.Mid(4,2)); cset->data[pos] = (uni1 << 16) | uni2; // wxFprintf(stderr,T("char %02x mapped to %04x (%c)\n"),pos,cset->data[pos],cset->data[pos]); } } } } if (cset) { cset->names.Shrink(); wxCharsets.Add(cset); } } } #endif wxCharsets.Shrink(); } static wxCharacterSet *wxFindCharacterSet(const wxChar *charset) { if (!charset) return (wxCharacterSet *)NULL; wxLoadCharacterSets(); for (size_t n=0; n4) { if (wxString(charset,4) == T("8859")) { codeset << T("8859-"); if (*charset == T('-')) charset++; } } } codeset << charset; codeset.MakeUpper(); m_name = wxStrdup(codeset.c_str()); m_deferred = TRUE; #endif } } void wxCSConv::LoadNow() { // wxPrintf(T("Conversion request\n")); if (m_deferred) { if (!m_name) { #ifdef __UNIX__ wxChar *lang = wxGetenv(T("LANG")); wxChar *dot = lang ? wxStrchr(lang, T('.')) : (wxChar *)NULL; if (dot) SetName(dot+1); #endif } m_cset = wxFindCharacterSet(m_name); m_deferred = FALSE; } } size_t wxCSConv::MB2WC(wchar_t *buf, const char *psz, size_t n) const { ((wxCSConv *)this)->LoadNow(); // discard constness if (buf) { if (m_cset) { for (size_t c=0; cdata[(unsigned char)(psz[c])]; } else { // latin-1 (direct) for (size_t c=0; cLoadNow(); // discard constness if (buf) { if (m_cset) { for (size_t c=0; cdata[n] != psz[c]); n++); buf[c] = (n>0xff) ? '?' : n; } } else { // latin-1 (direct) for (size_t c=0; c0xff) ? '?' : psz[c]; } return n; } return wcslen(psz); } #endif //wxUSE_WCHAR_T