diff --git a/include/wx/strconv.h b/include/wx/strconv.h index 12346c9763..9aa87f8014 100644 --- a/include/wx/strconv.h +++ b/include/wx/strconv.h @@ -543,6 +543,39 @@ private: wxMBConv *m_convReal; }; +// ---------------------------------------------------------------------------- +// wxWhateverWorksConv: use whatever encoding works for the input +// ---------------------------------------------------------------------------- + +class WXDLLIMPEXP_BASE wxWhateverWorksConv : public wxMBConv +{ +public: + wxWhateverWorksConv() + { + } + + // Try to interpret the string as UTF-8, if it fails fall back to the + // current locale encoding (wxConvLibc) and if this fails as well, + // interpret it as wxConvISO8859_1 (which is used because it never fails + // and this conversion is used when we really, really must produce + // something on output). + virtual size_t + ToWChar(wchar_t *dst, size_t dstLen, + const char *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE; + + // Try to encode the string using the current locale encoding (wxConvLibc) + // and fall back to UTF-8 (which never fails) if it doesn't work. Note that + // we never use wxConvISO8859_1 here as we prefer to fall back on UTF-8 + // even for the strings containing only code points representable in 8869-1. + virtual size_t + FromWChar(char *dst, size_t dstLen, + const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE; + + virtual wxMBConv *Clone() const wxOVERRIDE + { + return new wxWhateverWorksConv(); + } +}; // ---------------------------------------------------------------------------- // declare predefined conversion objects @@ -578,6 +611,12 @@ WX_DECLARE_GLOBAL_CONV(wxMBConvStrictUTF8, wxConvUTF8) WX_DECLARE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7) #define wxConvUTF7 wxGet_wxConvUTF7() +// conversion used when we may not afford to lose data when outputting Unicode +// strings (should be avoid in the other direction as it can misinterpret the +// input encoding) +WX_DECLARE_GLOBAL_CONV(wxWhateverWorksConv, wxConvWhateverWorks) +#define wxConvWhateverWorks wxGet_wxConvWhateverWorks() + // conversion used for the file names on the systems where they're not Unicode // (basically anything except Windows) // diff --git a/interface/wx/strconv.h b/interface/wx/strconv.h index 9388b49135..86e765d548 100644 --- a/interface/wx/strconv.h +++ b/interface/wx/strconv.h @@ -483,6 +483,30 @@ public: bool IsOk() const; }; +/** + Conversion object always producing non-empty output for non-empty input. + + Conversions done using this object never lose data, at the cost of possibly + producing the output in an unwanted encoding or misinterpreting input + encoding. + + To be precise, converting Unicode to multibyte strings using this object + tries to use the current locale encoding first but if this doesn't work, it + falls back to using UTF-8. In the other direction, UTF-8 is tried first, + then the current locale encoding and if this fails too, input is + interpreted as using ISO 8859-1, which never fails. + + It is almost always @e wrong to use this converter for multibyte-to-Unicode + direction as the program should know which encoding the input data is + supposed to use and use the appropriate converter instead. However it may + be useful in the Unicode-to-multibyte direction if the goal is to produce + the output in the current locale encoding if possible, but still output + something, instead of nothing at all, even if the Unicode string is not + representable in this encoding. + + @since 3.1.0 + */ +extern wxMBConv& wxConvWhateverWorks; /** diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp index 9e7da47925..2756870d55 100644 --- a/src/common/strconv.cpp +++ b/src/common/strconv.cpp @@ -3286,6 +3286,40 @@ bool wxCSConv::IsUTF8() const #endif +// ============================================================================ +// wxWhateverWorksConv +// ============================================================================ + +size_t +wxWhateverWorksConv::ToWChar(wchar_t *dst, size_t dstLen, + const char *src, size_t srcLen) const +{ + size_t rc = wxConvUTF8.ToWChar(dst, dstLen, src, srcLen); + if ( rc != wxCONV_FAILED ) + return rc; + + rc = wxConvLibc.ToWChar(dst, dstLen, src, srcLen); + if ( rc != wxCONV_FAILED ) + return rc; + + rc = wxConvISO8859_1.ToWChar(dst, dstLen, src, srcLen); + + return rc; +} + +size_t +wxWhateverWorksConv::FromWChar(char *dst, size_t dstLen, + const wchar_t *src, size_t srcLen) const +{ + size_t rc = wxConvLibc.FromWChar(dst, dstLen, src, srcLen); + if ( rc != wxCONV_FAILED ) + return rc; + + rc = wxConvUTF8.FromWChar(dst, dstLen, src, srcLen); + + return rc; +} + #if wxUSE_UNICODE wxWCharBuffer wxSafeConvertMB2WX(const char *s) @@ -3330,6 +3364,7 @@ wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws) #undef wxConvLibc #undef wxConvUTF8 #undef wxConvUTF7 +#undef wxConvWhateverWorks #undef wxConvLocal #undef wxConvISO8859_1 @@ -3369,6 +3404,7 @@ wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws) // empty statement (and hope that no compilers warns about this) WX_DEFINE_GLOBAL_CONV(wxMBConvStrictUTF8, wxConvUTF8, ;); WX_DEFINE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7, ;); +WX_DEFINE_GLOBAL_CONV(wxWhateverWorksConv, wxConvWhateverWorks, ;); WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvLocal, (wxFONTENCODING_SYSTEM)); WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvISO8859_1, (wxFONTENCODING_ISO8859_1)); @@ -3387,5 +3423,5 @@ WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName = #ifdef __DARWIN__ &wxConvMacUTF8DObj; #else // !__DARWIN__ - wxGet_wxConvLibcPtr(); + wxGet_wxConvWhateverWorksPtr(); #endif // __DARWIN__/!__DARWIN__