Add wxWhateverWorksConv and use it for file names under Unix

This ensures that we can create output files with Unicode names even when they're not representable in the current locale encoding, notably when the current locale has never been changed and is still the default "C" one, not supporting anything else other than 7 bit ASCII. Credits for the new class name go to Woody Allen.
2016-02-19 02:41:28 +01:00
parent 837e6d186d
commit a11456c078
3 changed files with 100 additions and 1 deletions
--- a/include/wx/strconv.h
+++ b/include/wx/strconv.h
@@ -543,6 +543,39 @@ private:
    wxMBConv *m_convReal;
 };

+// ----------------------------------------------------------------------------
+// wxWhateverWorksConv: use whatever encoding works for the input
+// ----------------------------------------------------------------------------
+
+class WXDLLIMPEXP_BASE wxWhateverWorksConv : public wxMBConv
+{
+public:
+    wxWhateverWorksConv()
+    {
+    }
+
+    // Try to interpret the string as UTF-8, if it fails fall back to the
+    // current locale encoding (wxConvLibc) and if this fails as well,
+    // interpret it as wxConvISO8859_1 (which is used because it never fails
+    // and this conversion is used when we really, really must produce
+    // something on output).
+    virtual size_t
+    ToWChar(wchar_t *dst, size_t dstLen,
+            const char *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE;
+
+    // Try to encode the string using the current locale encoding (wxConvLibc)
+    // and fall back to UTF-8 (which never fails) if it doesn't work. Note that
+    // we never use wxConvISO8859_1 here as we prefer to fall back on UTF-8
+    // even for the strings containing only code points representable in 8869-1.
+    virtual size_t
+    FromWChar(char *dst, size_t dstLen,
+              const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE;
+
+    virtual wxMBConv *Clone() const wxOVERRIDE
+    {
+        return new wxWhateverWorksConv();
+    }
+};

 // ----------------------------------------------------------------------------
 // declare predefined conversion objects
@@ -578,6 +611,12 @@ WX_DECLARE_GLOBAL_CONV(wxMBConvStrictUTF8, wxConvUTF8)
 WX_DECLARE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7)
 #define wxConvUTF7 wxGet_wxConvUTF7()

+// conversion used when we may not afford to lose data when outputting Unicode
+// strings (should be avoid in the other direction as it can misinterpret the
+// input encoding)
+WX_DECLARE_GLOBAL_CONV(wxWhateverWorksConv, wxConvWhateverWorks)
+#define wxConvWhateverWorks wxGet_wxConvWhateverWorks()
+
 // conversion used for the file names on the systems where they're not Unicode
 // (basically anything except Windows)
 //
--- a/interface/wx/strconv.h
+++ b/interface/wx/strconv.h
@@ -483,6 +483,30 @@ public:
    bool IsOk() const;
 };

+/**
+    Conversion object always producing non-empty output for non-empty input.
+
+    Conversions done using this object never lose data, at the cost of possibly
+    producing the output in an unwanted encoding or misinterpreting input
+    encoding.
+
+    To be precise, converting Unicode to multibyte strings using this object
+    tries to use the current locale encoding first but if this doesn't work, it
+    falls back to using UTF-8. In the other direction, UTF-8 is tried first,
+    then the current locale encoding and if this fails too, input is
+    interpreted as using ISO 8859-1, which never fails.
+
+    It is almost always @e wrong to use this converter for multibyte-to-Unicode
+    direction as the program should know which encoding the input data is
+    supposed to use and use the appropriate converter instead. However it may
+    be useful in the Unicode-to-multibyte direction if the goal is to produce
+    the output in the current locale encoding if possible, but still output
+    something, instead of nothing at all, even if the Unicode string is not
+    representable in this encoding.
+
+    @since 3.1.0
+ */
+extern wxMBConv& wxConvWhateverWorks;


 /**
--- a/src/common/strconv.cpp
+++ b/src/common/strconv.cpp
@@ -3286,6 +3286,40 @@ bool wxCSConv::IsUTF8() const
 #endif


+// ============================================================================
+// wxWhateverWorksConv
+// ============================================================================
+
+size_t
+wxWhateverWorksConv::ToWChar(wchar_t *dst, size_t dstLen,
+                             const char *src, size_t srcLen) const
+{
+    size_t rc = wxConvUTF8.ToWChar(dst, dstLen, src, srcLen);
+    if ( rc != wxCONV_FAILED )
+        return rc;
+
+    rc = wxConvLibc.ToWChar(dst, dstLen, src, srcLen);
+    if ( rc != wxCONV_FAILED )
+        return rc;
+
+    rc = wxConvISO8859_1.ToWChar(dst, dstLen, src, srcLen);
+
+    return rc;
+}
+
+size_t
+wxWhateverWorksConv::FromWChar(char *dst, size_t dstLen,
+                               const wchar_t *src, size_t srcLen) const
+{
+    size_t rc = wxConvLibc.FromWChar(dst, dstLen, src, srcLen);
+    if ( rc != wxCONV_FAILED )
+        return rc;
+
+    rc = wxConvUTF8.FromWChar(dst, dstLen, src, srcLen);
+
+    return rc;
+}
+
 #if wxUSE_UNICODE

 wxWCharBuffer wxSafeConvertMB2WX(const char *s)
@@ -3330,6 +3364,7 @@ wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws)
 #undef wxConvLibc
 #undef wxConvUTF8
 #undef wxConvUTF7
+#undef wxConvWhateverWorks
 #undef wxConvLocal
 #undef wxConvISO8859_1

@@ -3369,6 +3404,7 @@ wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws)
 //     empty statement (and hope that no compilers warns about this)
 WX_DEFINE_GLOBAL_CONV(wxMBConvStrictUTF8, wxConvUTF8, ;);
 WX_DEFINE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7, ;);
+WX_DEFINE_GLOBAL_CONV(wxWhateverWorksConv, wxConvWhateverWorks, ;);

 WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvLocal, (wxFONTENCODING_SYSTEM));
 WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvISO8859_1, (wxFONTENCODING_ISO8859_1));
@@ -3387,5 +3423,5 @@ WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName =
 #ifdef __DARWIN__
                                    &wxConvMacUTF8DObj;
 #else // !__DARWIN__
-                                    wxGet_wxConvLibcPtr();
+                                    wxGet_wxConvWhateverWorksPtr();
 #endif // __DARWIN__/!__DARWIN__