diff --git a/include/wx/strconv.h b/include/wx/strconv.h index 12346c9763..470632ca4a 100644 --- a/include/wx/strconv.h +++ b/include/wx/strconv.h @@ -543,6 +543,39 @@ private: wxMBConv *m_convReal; }; +// ---------------------------------------------------------------------------- +// wxWhateverWorksConv: use whatever encoding works for the input +// ---------------------------------------------------------------------------- + +class WXDLLIMPEXP_BASE wxWhateverWorksConv : public wxMBConv +{ +public: + wxWhateverWorksConv() + { + } + + // Try to interpret the string as UTF-8, if it fails fall back to the + // current locale encoding (wxConvLibc) and if this fails as well, + // interpret it as wxConvISO8859_1 (which is used because it never fails + // and this conversion is used when we really, really must produce + // something on output). + virtual size_t + ToWChar(wchar_t *dst, size_t dstLen, + const char *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE; + + // Try to encode the string using the current locale encoding (wxConvLibc) + // and fall back to UTF-8 (which never fails) if it doesn't work. Note that + // we never use wxConvISO8859_1 here as we prefer to fall back on UTF-8 + // even for the strings containing only code points representable in 8869-1. + virtual size_t + FromWChar(char *dst, size_t dstLen, + const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE; + + virtual wxMBConv *Clone() const wxOVERRIDE + { + return new wxWhateverWorksConv(); + } +}; // ---------------------------------------------------------------------------- // declare predefined conversion objects @@ -578,6 +611,12 @@ WX_DECLARE_GLOBAL_CONV(wxMBConvStrictUTF8, wxConvUTF8) WX_DECLARE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7) #define wxConvUTF7 wxGet_wxConvUTF7() +// conversion used when we may not afford to lose data when outputting Unicode +// strings (should be avoid in the other direction as it can misinterpret the +// input encoding) +WX_DECLARE_GLOBAL_CONV(wxWhateverWorksConv, wxConvWhateverWorks) +#define wxConvWhateverWorks wxGet_wxConvWhateverWorks() + // conversion used for the file names on the systems where they're not Unicode // (basically anything except Windows) // @@ -648,12 +687,15 @@ extern WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI; // function which would crash if we passed NULL to it), so these functions // always return a valid pointer if their argument is non-NULL - // this function safety is achieved by trying wxConvLibc first, wxConvUTF8 - // next if it fails and, finally, wxConvISO8859_1 which always succeeds - extern WXDLLIMPEXP_BASE wxWCharBuffer wxSafeConvertMB2WX(const char *s); + inline wxWCharBuffer wxSafeConvertMB2WX(const char *s) + { + return wxConvWhateverWorks.cMB2WC(s); + } - // this function uses wxConvLibc and wxConvUTF8 if it fails - extern WXDLLIMPEXP_BASE wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws); + inline wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws) + { + return wxConvWhateverWorks.cWC2MB(ws); + } #else // ANSI // no conversions to do #define wxConvertWX2MB(s) (s) diff --git a/interface/wx/strconv.h b/interface/wx/strconv.h index 9388b49135..86e765d548 100644 --- a/interface/wx/strconv.h +++ b/interface/wx/strconv.h @@ -483,6 +483,30 @@ public: bool IsOk() const; }; +/** + Conversion object always producing non-empty output for non-empty input. + + Conversions done using this object never lose data, at the cost of possibly + producing the output in an unwanted encoding or misinterpreting input + encoding. + + To be precise, converting Unicode to multibyte strings using this object + tries to use the current locale encoding first but if this doesn't work, it + falls back to using UTF-8. In the other direction, UTF-8 is tried first, + then the current locale encoding and if this fails too, input is + interpreted as using ISO 8859-1, which never fails. + + It is almost always @e wrong to use this converter for multibyte-to-Unicode + direction as the program should know which encoding the input data is + supposed to use and use the appropriate converter instead. However it may + be useful in the Unicode-to-multibyte direction if the goal is to produce + the output in the current locale encoding if possible, but still output + something, instead of nothing at all, even if the Unicode string is not + representable in this encoding. + + @since 3.1.0 + */ +extern wxMBConv& wxConvWhateverWorks; /** diff --git a/src/common/msgout.cpp b/src/common/msgout.cpp index 48587c5624..9003413e57 100644 --- a/src/common/msgout.cpp +++ b/src/common/msgout.cpp @@ -148,13 +148,8 @@ wxString wxMessageOutputStderr::AppendLineFeedIfNeeded(const wxString& str) void wxMessageOutputStderr::Output(const wxString& str) { const wxString strWithLF = AppendLineFeedIfNeeded(str); - const wxWX2MBbuf buf = strWithLF.mb_str(); - - if ( buf ) - fprintf(m_fp, "%s", (const char*) buf); - else // print at least something - fprintf(m_fp, "%s", (const char*) strWithLF.ToAscii()); + fprintf(m_fp, "%s", (const char*) strWithLF.mb_str(wxConvWhateverWorks)); fflush(m_fp); } diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp index 9e7da47925..0390dc3795 100644 --- a/src/common/strconv.cpp +++ b/src/common/strconv.cpp @@ -3286,36 +3286,40 @@ bool wxCSConv::IsUTF8() const #endif -#if wxUSE_UNICODE +// ============================================================================ +// wxWhateverWorksConv +// ============================================================================ -wxWCharBuffer wxSafeConvertMB2WX(const char *s) +size_t +wxWhateverWorksConv::ToWChar(wchar_t *dst, size_t dstLen, + const char *src, size_t srcLen) const { - if ( !s ) - return wxWCharBuffer(); + size_t rc = wxConvUTF8.ToWChar(dst, dstLen, src, srcLen); + if ( rc != wxCONV_FAILED ) + return rc; - wxWCharBuffer wbuf(wxConvLibc.cMB2WX(s)); - if ( !wbuf ) - wbuf = wxConvUTF8.cMB2WX(s); - if ( !wbuf ) - wbuf = wxConvISO8859_1.cMB2WX(s); + rc = wxConvLibc.ToWChar(dst, dstLen, src, srcLen); + if ( rc != wxCONV_FAILED ) + return rc; - return wbuf; + rc = wxConvISO8859_1.ToWChar(dst, dstLen, src, srcLen); + + return rc; } -wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws) +size_t +wxWhateverWorksConv::FromWChar(char *dst, size_t dstLen, + const wchar_t *src, size_t srcLen) const { - if ( !ws ) - return wxCharBuffer(); + size_t rc = wxConvLibc.FromWChar(dst, dstLen, src, srcLen); + if ( rc != wxCONV_FAILED ) + return rc; - wxCharBuffer buf(wxConvLibc.cWX2MB(ws)); - if ( !buf ) - buf = wxConvUTF8.cWX2MB(ws); + rc = wxConvUTF8.FromWChar(dst, dstLen, src, srcLen); - return buf; + return rc; } -#endif // wxUSE_UNICODE - // ---------------------------------------------------------------------------- // globals // ---------------------------------------------------------------------------- @@ -3330,6 +3334,7 @@ wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws) #undef wxConvLibc #undef wxConvUTF8 #undef wxConvUTF7 +#undef wxConvWhateverWorks #undef wxConvLocal #undef wxConvISO8859_1 @@ -3369,6 +3374,7 @@ wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws) // empty statement (and hope that no compilers warns about this) WX_DEFINE_GLOBAL_CONV(wxMBConvStrictUTF8, wxConvUTF8, ;); WX_DEFINE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7, ;); +WX_DEFINE_GLOBAL_CONV(wxWhateverWorksConv, wxConvWhateverWorks, ;); WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvLocal, (wxFONTENCODING_SYSTEM)); WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvISO8859_1, (wxFONTENCODING_ISO8859_1)); @@ -3387,5 +3393,5 @@ WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = #ifdef __DARWIN__ &wxConvMacUTF8DObj; #else // !__DARWIN__ - wxGet_wxConvLibcPtr(); + wxGet_wxConvWhateverWorksPtr(); #endif // __DARWIN__/!__DARWIN__ diff --git a/src/common/string.cpp b/src/common/string.cpp index 930b488162..feb6d8e501 100644 --- a/src/common/string.cpp +++ b/src/common/string.cpp @@ -191,13 +191,7 @@ static wxStrCacheStatsDumper s_showCacheStats; wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str) { #if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8 - const wxScopedCharBuffer buf(str.AsCharBuf()); - if ( !buf ) - os.clear(wxSTD ios_base::failbit); - else - os << buf.data(); - - return os; + return os << wxConvWhateverWorks.cWX2MB(str); #else return os << str.AsInternal(); #endif diff --git a/src/unix/utilsunx.cpp b/src/unix/utilsunx.cpp index 2a403b1a4d..a0db944983 100644 --- a/src/unix/utilsunx.cpp +++ b/src/unix/utilsunx.cpp @@ -400,7 +400,7 @@ public: for ( int i = 0; i < m_argc; i++ ) { - m_argv[i] = wxStrdup(args[i]); + m_argv[i] = wxStrdup(args[i].mb_str(wxConvWhateverWorks)); } } diff --git a/tests/file/filefn.cpp b/tests/file/filefn.cpp index 77a6dc2bfb..659823806e 100644 --- a/tests/file/filefn.cpp +++ b/tests/file/filefn.cpp @@ -78,10 +78,6 @@ private: wxString m_fileNameNonASCII; wxString m_fileNameWork; -#ifndef __DARWIN__ - wxMBConv* m_convFNOld; -#endif - wxDECLARE_NO_COPY_CLASS(FileFunctionsTestCase); }; @@ -98,16 +94,6 @@ CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( FileFunctionsTestCase, "FileFunctionsTest void FileFunctionsTestCase::setUp() { - // Under Unix we need to use UTF-8 for the tests using non-ASCII filenames - // and this is not necessarily the case because the tests don't call - // setlocale(LC_ALL, ""), so ensure it explicitly. This is just a temporary - // hack until we find the solution to make the library work with Unicode - // filenames irrespectively of the current locale. -#ifndef __DARWIN__ - m_convFNOld = wxConvFileName; - wxConvFileName = &wxConvUTF8; -#endif - // Initialize local data wxFileName fn1(wxFileName::GetTempDir(), wxT("wx_file_mask.txt")); @@ -137,10 +123,6 @@ void FileFunctionsTestCase::tearDown() { wxRemoveFile(m_fileNameWork); } - -#ifndef __DARWIN__ - wxConvFileName = m_convFNOld; -#endif } void FileFunctionsTestCase::GetTempFolder()