Add wxWhateverWorksConv and use it for file names under Unix

This ensures that we can create output files with Unicode names even when
they're not representable in the current locale encoding, notably when the
current locale has never been changed and is still the default "C" one, not
supporting anything else other than 7 bit ASCII.

Credits for the new class name go to Woody Allen.
This commit is contained in:
Vadim Zeitlin
2016-02-19 02:41:28 +01:00
parent 837e6d186d
commit a11456c078
3 changed files with 100 additions and 1 deletions

View File

@@ -543,6 +543,39 @@ private:
wxMBConv *m_convReal;
};
// ----------------------------------------------------------------------------
// wxWhateverWorksConv: use whatever encoding works for the input
// ----------------------------------------------------------------------------
class WXDLLIMPEXP_BASE wxWhateverWorksConv : public wxMBConv
{
public:
wxWhateverWorksConv()
{
}
// Try to interpret the string as UTF-8, if it fails fall back to the
// current locale encoding (wxConvLibc) and if this fails as well,
// interpret it as wxConvISO8859_1 (which is used because it never fails
// and this conversion is used when we really, really must produce
// something on output).
virtual size_t
ToWChar(wchar_t *dst, size_t dstLen,
const char *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE;
// Try to encode the string using the current locale encoding (wxConvLibc)
// and fall back to UTF-8 (which never fails) if it doesn't work. Note that
// we never use wxConvISO8859_1 here as we prefer to fall back on UTF-8
// even for the strings containing only code points representable in 8869-1.
virtual size_t
FromWChar(char *dst, size_t dstLen,
const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE;
virtual wxMBConv *Clone() const wxOVERRIDE
{
return new wxWhateverWorksConv();
}
};
// ----------------------------------------------------------------------------
// declare predefined conversion objects
@@ -578,6 +611,12 @@ WX_DECLARE_GLOBAL_CONV(wxMBConvStrictUTF8, wxConvUTF8)
WX_DECLARE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7)
#define wxConvUTF7 wxGet_wxConvUTF7()
// conversion used when we may not afford to lose data when outputting Unicode
// strings (should be avoid in the other direction as it can misinterpret the
// input encoding)
WX_DECLARE_GLOBAL_CONV(wxWhateverWorksConv, wxConvWhateverWorks)
#define wxConvWhateverWorks wxGet_wxConvWhateverWorks()
// conversion used for the file names on the systems where they're not Unicode
// (basically anything except Windows)
//

View File

@@ -483,6 +483,30 @@ public:
bool IsOk() const;
};
/**
Conversion object always producing non-empty output for non-empty input.
Conversions done using this object never lose data, at the cost of possibly
producing the output in an unwanted encoding or misinterpreting input
encoding.
To be precise, converting Unicode to multibyte strings using this object
tries to use the current locale encoding first but if this doesn't work, it
falls back to using UTF-8. In the other direction, UTF-8 is tried first,
then the current locale encoding and if this fails too, input is
interpreted as using ISO 8859-1, which never fails.
It is almost always @e wrong to use this converter for multibyte-to-Unicode
direction as the program should know which encoding the input data is
supposed to use and use the appropriate converter instead. However it may
be useful in the Unicode-to-multibyte direction if the goal is to produce
the output in the current locale encoding if possible, but still output
something, instead of nothing at all, even if the Unicode string is not
representable in this encoding.
@since 3.1.0
*/
extern wxMBConv& wxConvWhateverWorks;
/**

View File

@@ -3286,6 +3286,40 @@ bool wxCSConv::IsUTF8() const
#endif
// ============================================================================
// wxWhateverWorksConv
// ============================================================================
size_t
wxWhateverWorksConv::ToWChar(wchar_t *dst, size_t dstLen,
const char *src, size_t srcLen) const
{
size_t rc = wxConvUTF8.ToWChar(dst, dstLen, src, srcLen);
if ( rc != wxCONV_FAILED )
return rc;
rc = wxConvLibc.ToWChar(dst, dstLen, src, srcLen);
if ( rc != wxCONV_FAILED )
return rc;
rc = wxConvISO8859_1.ToWChar(dst, dstLen, src, srcLen);
return rc;
}
size_t
wxWhateverWorksConv::FromWChar(char *dst, size_t dstLen,
const wchar_t *src, size_t srcLen) const
{
size_t rc = wxConvLibc.FromWChar(dst, dstLen, src, srcLen);
if ( rc != wxCONV_FAILED )
return rc;
rc = wxConvUTF8.FromWChar(dst, dstLen, src, srcLen);
return rc;
}
#if wxUSE_UNICODE
wxWCharBuffer wxSafeConvertMB2WX(const char *s)
@@ -3330,6 +3364,7 @@ wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws)
#undef wxConvLibc
#undef wxConvUTF8
#undef wxConvUTF7
#undef wxConvWhateverWorks
#undef wxConvLocal
#undef wxConvISO8859_1
@@ -3369,6 +3404,7 @@ wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws)
// empty statement (and hope that no compilers warns about this)
WX_DEFINE_GLOBAL_CONV(wxMBConvStrictUTF8, wxConvUTF8, ;);
WX_DEFINE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7, ;);
WX_DEFINE_GLOBAL_CONV(wxWhateverWorksConv, wxConvWhateverWorks, ;);
WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvLocal, (wxFONTENCODING_SYSTEM));
WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvISO8859_1, (wxFONTENCODING_ISO8859_1));
@@ -3387,5 +3423,5 @@ WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName =
#ifdef __DARWIN__
&wxConvMacUTF8DObj;
#else // !__DARWIN__
wxGet_wxConvLibcPtr();
wxGet_wxConvWhateverWorksPtr();
#endif // __DARWIN__/!__DARWIN__