Merge pull request #219 from vadz/whatever-conv
Fall back on UTF-8 when converting Unicode to multibyte fails This is not ideal, but better than just losing data entirely.
This commit is contained in:
@@ -543,6 +543,39 @@ private:
|
||||
wxMBConv *m_convReal;
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// wxWhateverWorksConv: use whatever encoding works for the input
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
class WXDLLIMPEXP_BASE wxWhateverWorksConv : public wxMBConv
|
||||
{
|
||||
public:
|
||||
wxWhateverWorksConv()
|
||||
{
|
||||
}
|
||||
|
||||
// Try to interpret the string as UTF-8, if it fails fall back to the
|
||||
// current locale encoding (wxConvLibc) and if this fails as well,
|
||||
// interpret it as wxConvISO8859_1 (which is used because it never fails
|
||||
// and this conversion is used when we really, really must produce
|
||||
// something on output).
|
||||
virtual size_t
|
||||
ToWChar(wchar_t *dst, size_t dstLen,
|
||||
const char *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE;
|
||||
|
||||
// Try to encode the string using the current locale encoding (wxConvLibc)
|
||||
// and fall back to UTF-8 (which never fails) if it doesn't work. Note that
|
||||
// we never use wxConvISO8859_1 here as we prefer to fall back on UTF-8
|
||||
// even for the strings containing only code points representable in 8869-1.
|
||||
virtual size_t
|
||||
FromWChar(char *dst, size_t dstLen,
|
||||
const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE;
|
||||
|
||||
virtual wxMBConv *Clone() const wxOVERRIDE
|
||||
{
|
||||
return new wxWhateverWorksConv();
|
||||
}
|
||||
};
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// declare predefined conversion objects
|
||||
@@ -578,6 +611,12 @@ WX_DECLARE_GLOBAL_CONV(wxMBConvStrictUTF8, wxConvUTF8)
|
||||
WX_DECLARE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7)
|
||||
#define wxConvUTF7 wxGet_wxConvUTF7()
|
||||
|
||||
// conversion used when we may not afford to lose data when outputting Unicode
|
||||
// strings (should be avoid in the other direction as it can misinterpret the
|
||||
// input encoding)
|
||||
WX_DECLARE_GLOBAL_CONV(wxWhateverWorksConv, wxConvWhateverWorks)
|
||||
#define wxConvWhateverWorks wxGet_wxConvWhateverWorks()
|
||||
|
||||
// conversion used for the file names on the systems where they're not Unicode
|
||||
// (basically anything except Windows)
|
||||
//
|
||||
@@ -648,12 +687,15 @@ extern WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI;
|
||||
// function which would crash if we passed NULL to it), so these functions
|
||||
// always return a valid pointer if their argument is non-NULL
|
||||
|
||||
// this function safety is achieved by trying wxConvLibc first, wxConvUTF8
|
||||
// next if it fails and, finally, wxConvISO8859_1 which always succeeds
|
||||
extern WXDLLIMPEXP_BASE wxWCharBuffer wxSafeConvertMB2WX(const char *s);
|
||||
inline wxWCharBuffer wxSafeConvertMB2WX(const char *s)
|
||||
{
|
||||
return wxConvWhateverWorks.cMB2WC(s);
|
||||
}
|
||||
|
||||
// this function uses wxConvLibc and wxConvUTF8 if it fails
|
||||
extern WXDLLIMPEXP_BASE wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws);
|
||||
inline wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws)
|
||||
{
|
||||
return wxConvWhateverWorks.cWC2MB(ws);
|
||||
}
|
||||
#else // ANSI
|
||||
// no conversions to do
|
||||
#define wxConvertWX2MB(s) (s)
|
||||
|
@@ -483,6 +483,30 @@ public:
|
||||
bool IsOk() const;
|
||||
};
|
||||
|
||||
/**
|
||||
Conversion object always producing non-empty output for non-empty input.
|
||||
|
||||
Conversions done using this object never lose data, at the cost of possibly
|
||||
producing the output in an unwanted encoding or misinterpreting input
|
||||
encoding.
|
||||
|
||||
To be precise, converting Unicode to multibyte strings using this object
|
||||
tries to use the current locale encoding first but if this doesn't work, it
|
||||
falls back to using UTF-8. In the other direction, UTF-8 is tried first,
|
||||
then the current locale encoding and if this fails too, input is
|
||||
interpreted as using ISO 8859-1, which never fails.
|
||||
|
||||
It is almost always @e wrong to use this converter for multibyte-to-Unicode
|
||||
direction as the program should know which encoding the input data is
|
||||
supposed to use and use the appropriate converter instead. However it may
|
||||
be useful in the Unicode-to-multibyte direction if the goal is to produce
|
||||
the output in the current locale encoding if possible, but still output
|
||||
something, instead of nothing at all, even if the Unicode string is not
|
||||
representable in this encoding.
|
||||
|
||||
@since 3.1.0
|
||||
*/
|
||||
extern wxMBConv& wxConvWhateverWorks;
|
||||
|
||||
|
||||
/**
|
||||
|
@@ -148,13 +148,8 @@ wxString wxMessageOutputStderr::AppendLineFeedIfNeeded(const wxString& str)
|
||||
void wxMessageOutputStderr::Output(const wxString& str)
|
||||
{
|
||||
const wxString strWithLF = AppendLineFeedIfNeeded(str);
|
||||
const wxWX2MBbuf buf = strWithLF.mb_str();
|
||||
|
||||
if ( buf )
|
||||
fprintf(m_fp, "%s", (const char*) buf);
|
||||
else // print at least something
|
||||
fprintf(m_fp, "%s", (const char*) strWithLF.ToAscii());
|
||||
|
||||
fprintf(m_fp, "%s", (const char*) strWithLF.mb_str(wxConvWhateverWorks));
|
||||
fflush(m_fp);
|
||||
}
|
||||
|
||||
|
@@ -3286,36 +3286,40 @@ bool wxCSConv::IsUTF8() const
|
||||
#endif
|
||||
|
||||
|
||||
#if wxUSE_UNICODE
|
||||
// ============================================================================
|
||||
// wxWhateverWorksConv
|
||||
// ============================================================================
|
||||
|
||||
wxWCharBuffer wxSafeConvertMB2WX(const char *s)
|
||||
size_t
|
||||
wxWhateverWorksConv::ToWChar(wchar_t *dst, size_t dstLen,
|
||||
const char *src, size_t srcLen) const
|
||||
{
|
||||
if ( !s )
|
||||
return wxWCharBuffer();
|
||||
size_t rc = wxConvUTF8.ToWChar(dst, dstLen, src, srcLen);
|
||||
if ( rc != wxCONV_FAILED )
|
||||
return rc;
|
||||
|
||||
wxWCharBuffer wbuf(wxConvLibc.cMB2WX(s));
|
||||
if ( !wbuf )
|
||||
wbuf = wxConvUTF8.cMB2WX(s);
|
||||
if ( !wbuf )
|
||||
wbuf = wxConvISO8859_1.cMB2WX(s);
|
||||
rc = wxConvLibc.ToWChar(dst, dstLen, src, srcLen);
|
||||
if ( rc != wxCONV_FAILED )
|
||||
return rc;
|
||||
|
||||
return wbuf;
|
||||
rc = wxConvISO8859_1.ToWChar(dst, dstLen, src, srcLen);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws)
|
||||
size_t
|
||||
wxWhateverWorksConv::FromWChar(char *dst, size_t dstLen,
|
||||
const wchar_t *src, size_t srcLen) const
|
||||
{
|
||||
if ( !ws )
|
||||
return wxCharBuffer();
|
||||
size_t rc = wxConvLibc.FromWChar(dst, dstLen, src, srcLen);
|
||||
if ( rc != wxCONV_FAILED )
|
||||
return rc;
|
||||
|
||||
wxCharBuffer buf(wxConvLibc.cWX2MB(ws));
|
||||
if ( !buf )
|
||||
buf = wxConvUTF8.cWX2MB(ws);
|
||||
rc = wxConvUTF8.FromWChar(dst, dstLen, src, srcLen);
|
||||
|
||||
return buf;
|
||||
return rc;
|
||||
}
|
||||
|
||||
#endif // wxUSE_UNICODE
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// globals
|
||||
// ----------------------------------------------------------------------------
|
||||
@@ -3330,6 +3334,7 @@ wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws)
|
||||
#undef wxConvLibc
|
||||
#undef wxConvUTF8
|
||||
#undef wxConvUTF7
|
||||
#undef wxConvWhateverWorks
|
||||
#undef wxConvLocal
|
||||
#undef wxConvISO8859_1
|
||||
|
||||
@@ -3369,6 +3374,7 @@ wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws)
|
||||
// empty statement (and hope that no compilers warns about this)
|
||||
WX_DEFINE_GLOBAL_CONV(wxMBConvStrictUTF8, wxConvUTF8, ;);
|
||||
WX_DEFINE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7, ;);
|
||||
WX_DEFINE_GLOBAL_CONV(wxWhateverWorksConv, wxConvWhateverWorks, ;);
|
||||
|
||||
WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvLocal, (wxFONTENCODING_SYSTEM));
|
||||
WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvISO8859_1, (wxFONTENCODING_ISO8859_1));
|
||||
@@ -3387,5 +3393,5 @@ WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName =
|
||||
#ifdef __DARWIN__
|
||||
&wxConvMacUTF8DObj;
|
||||
#else // !__DARWIN__
|
||||
wxGet_wxConvLibcPtr();
|
||||
wxGet_wxConvWhateverWorksPtr();
|
||||
#endif // __DARWIN__/!__DARWIN__
|
||||
|
@@ -191,13 +191,7 @@ static wxStrCacheStatsDumper s_showCacheStats;
|
||||
wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
|
||||
{
|
||||
#if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
|
||||
const wxScopedCharBuffer buf(str.AsCharBuf());
|
||||
if ( !buf )
|
||||
os.clear(wxSTD ios_base::failbit);
|
||||
else
|
||||
os << buf.data();
|
||||
|
||||
return os;
|
||||
return os << wxConvWhateverWorks.cWX2MB(str);
|
||||
#else
|
||||
return os << str.AsInternal();
|
||||
#endif
|
||||
|
@@ -400,7 +400,7 @@ public:
|
||||
|
||||
for ( int i = 0; i < m_argc; i++ )
|
||||
{
|
||||
m_argv[i] = wxStrdup(args[i]);
|
||||
m_argv[i] = wxStrdup(args[i].mb_str(wxConvWhateverWorks));
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -78,10 +78,6 @@ private:
|
||||
wxString m_fileNameNonASCII;
|
||||
wxString m_fileNameWork;
|
||||
|
||||
#ifndef __DARWIN__
|
||||
wxMBConv* m_convFNOld;
|
||||
#endif
|
||||
|
||||
wxDECLARE_NO_COPY_CLASS(FileFunctionsTestCase);
|
||||
};
|
||||
|
||||
@@ -98,16 +94,6 @@ CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( FileFunctionsTestCase, "FileFunctionsTest
|
||||
|
||||
void FileFunctionsTestCase::setUp()
|
||||
{
|
||||
// Under Unix we need to use UTF-8 for the tests using non-ASCII filenames
|
||||
// and this is not necessarily the case because the tests don't call
|
||||
// setlocale(LC_ALL, ""), so ensure it explicitly. This is just a temporary
|
||||
// hack until we find the solution to make the library work with Unicode
|
||||
// filenames irrespectively of the current locale.
|
||||
#ifndef __DARWIN__
|
||||
m_convFNOld = wxConvFileName;
|
||||
wxConvFileName = &wxConvUTF8;
|
||||
#endif
|
||||
|
||||
// Initialize local data
|
||||
|
||||
wxFileName fn1(wxFileName::GetTempDir(), wxT("wx_file_mask.txt"));
|
||||
@@ -137,10 +123,6 @@ void FileFunctionsTestCase::tearDown()
|
||||
{
|
||||
wxRemoveFile(m_fileNameWork);
|
||||
}
|
||||
|
||||
#ifndef __DARWIN__
|
||||
wxConvFileName = m_convFNOld;
|
||||
#endif
|
||||
}
|
||||
|
||||
void FileFunctionsTestCase::GetTempFolder()
|
||||
|
Reference in New Issue
Block a user