Merge pull request #219 from vadz/whatever-conv
Fall back on UTF-8 when converting Unicode to multibyte fails This is not ideal, but better than just losing data entirely.
This commit is contained in:
@@ -543,6 +543,39 @@ private:
|
|||||||
wxMBConv *m_convReal;
|
wxMBConv *m_convReal;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------------
|
||||||
|
// wxWhateverWorksConv: use whatever encoding works for the input
|
||||||
|
// ----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class WXDLLIMPEXP_BASE wxWhateverWorksConv : public wxMBConv
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
wxWhateverWorksConv()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to interpret the string as UTF-8, if it fails fall back to the
|
||||||
|
// current locale encoding (wxConvLibc) and if this fails as well,
|
||||||
|
// interpret it as wxConvISO8859_1 (which is used because it never fails
|
||||||
|
// and this conversion is used when we really, really must produce
|
||||||
|
// something on output).
|
||||||
|
virtual size_t
|
||||||
|
ToWChar(wchar_t *dst, size_t dstLen,
|
||||||
|
const char *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE;
|
||||||
|
|
||||||
|
// Try to encode the string using the current locale encoding (wxConvLibc)
|
||||||
|
// and fall back to UTF-8 (which never fails) if it doesn't work. Note that
|
||||||
|
// we never use wxConvISO8859_1 here as we prefer to fall back on UTF-8
|
||||||
|
// even for the strings containing only code points representable in 8869-1.
|
||||||
|
virtual size_t
|
||||||
|
FromWChar(char *dst, size_t dstLen,
|
||||||
|
const wchar_t *src, size_t srcLen = wxNO_LEN) const wxOVERRIDE;
|
||||||
|
|
||||||
|
virtual wxMBConv *Clone() const wxOVERRIDE
|
||||||
|
{
|
||||||
|
return new wxWhateverWorksConv();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// ----------------------------------------------------------------------------
|
// ----------------------------------------------------------------------------
|
||||||
// declare predefined conversion objects
|
// declare predefined conversion objects
|
||||||
@@ -578,6 +611,12 @@ WX_DECLARE_GLOBAL_CONV(wxMBConvStrictUTF8, wxConvUTF8)
|
|||||||
WX_DECLARE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7)
|
WX_DECLARE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7)
|
||||||
#define wxConvUTF7 wxGet_wxConvUTF7()
|
#define wxConvUTF7 wxGet_wxConvUTF7()
|
||||||
|
|
||||||
|
// conversion used when we may not afford to lose data when outputting Unicode
|
||||||
|
// strings (should be avoid in the other direction as it can misinterpret the
|
||||||
|
// input encoding)
|
||||||
|
WX_DECLARE_GLOBAL_CONV(wxWhateverWorksConv, wxConvWhateverWorks)
|
||||||
|
#define wxConvWhateverWorks wxGet_wxConvWhateverWorks()
|
||||||
|
|
||||||
// conversion used for the file names on the systems where they're not Unicode
|
// conversion used for the file names on the systems where they're not Unicode
|
||||||
// (basically anything except Windows)
|
// (basically anything except Windows)
|
||||||
//
|
//
|
||||||
@@ -648,12 +687,15 @@ extern WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI;
|
|||||||
// function which would crash if we passed NULL to it), so these functions
|
// function which would crash if we passed NULL to it), so these functions
|
||||||
// always return a valid pointer if their argument is non-NULL
|
// always return a valid pointer if their argument is non-NULL
|
||||||
|
|
||||||
// this function safety is achieved by trying wxConvLibc first, wxConvUTF8
|
inline wxWCharBuffer wxSafeConvertMB2WX(const char *s)
|
||||||
// next if it fails and, finally, wxConvISO8859_1 which always succeeds
|
{
|
||||||
extern WXDLLIMPEXP_BASE wxWCharBuffer wxSafeConvertMB2WX(const char *s);
|
return wxConvWhateverWorks.cMB2WC(s);
|
||||||
|
}
|
||||||
|
|
||||||
// this function uses wxConvLibc and wxConvUTF8 if it fails
|
inline wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws)
|
||||||
extern WXDLLIMPEXP_BASE wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws);
|
{
|
||||||
|
return wxConvWhateverWorks.cWC2MB(ws);
|
||||||
|
}
|
||||||
#else // ANSI
|
#else // ANSI
|
||||||
// no conversions to do
|
// no conversions to do
|
||||||
#define wxConvertWX2MB(s) (s)
|
#define wxConvertWX2MB(s) (s)
|
||||||
|
@@ -483,6 +483,30 @@ public:
|
|||||||
bool IsOk() const;
|
bool IsOk() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
Conversion object always producing non-empty output for non-empty input.
|
||||||
|
|
||||||
|
Conversions done using this object never lose data, at the cost of possibly
|
||||||
|
producing the output in an unwanted encoding or misinterpreting input
|
||||||
|
encoding.
|
||||||
|
|
||||||
|
To be precise, converting Unicode to multibyte strings using this object
|
||||||
|
tries to use the current locale encoding first but if this doesn't work, it
|
||||||
|
falls back to using UTF-8. In the other direction, UTF-8 is tried first,
|
||||||
|
then the current locale encoding and if this fails too, input is
|
||||||
|
interpreted as using ISO 8859-1, which never fails.
|
||||||
|
|
||||||
|
It is almost always @e wrong to use this converter for multibyte-to-Unicode
|
||||||
|
direction as the program should know which encoding the input data is
|
||||||
|
supposed to use and use the appropriate converter instead. However it may
|
||||||
|
be useful in the Unicode-to-multibyte direction if the goal is to produce
|
||||||
|
the output in the current locale encoding if possible, but still output
|
||||||
|
something, instead of nothing at all, even if the Unicode string is not
|
||||||
|
representable in this encoding.
|
||||||
|
|
||||||
|
@since 3.1.0
|
||||||
|
*/
|
||||||
|
extern wxMBConv& wxConvWhateverWorks;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@@ -148,13 +148,8 @@ wxString wxMessageOutputStderr::AppendLineFeedIfNeeded(const wxString& str)
|
|||||||
void wxMessageOutputStderr::Output(const wxString& str)
|
void wxMessageOutputStderr::Output(const wxString& str)
|
||||||
{
|
{
|
||||||
const wxString strWithLF = AppendLineFeedIfNeeded(str);
|
const wxString strWithLF = AppendLineFeedIfNeeded(str);
|
||||||
const wxWX2MBbuf buf = strWithLF.mb_str();
|
|
||||||
|
|
||||||
if ( buf )
|
|
||||||
fprintf(m_fp, "%s", (const char*) buf);
|
|
||||||
else // print at least something
|
|
||||||
fprintf(m_fp, "%s", (const char*) strWithLF.ToAscii());
|
|
||||||
|
|
||||||
|
fprintf(m_fp, "%s", (const char*) strWithLF.mb_str(wxConvWhateverWorks));
|
||||||
fflush(m_fp);
|
fflush(m_fp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -3286,36 +3286,40 @@ bool wxCSConv::IsUTF8() const
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if wxUSE_UNICODE
|
// ============================================================================
|
||||||
|
// wxWhateverWorksConv
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
wxWCharBuffer wxSafeConvertMB2WX(const char *s)
|
size_t
|
||||||
|
wxWhateverWorksConv::ToWChar(wchar_t *dst, size_t dstLen,
|
||||||
|
const char *src, size_t srcLen) const
|
||||||
{
|
{
|
||||||
if ( !s )
|
size_t rc = wxConvUTF8.ToWChar(dst, dstLen, src, srcLen);
|
||||||
return wxWCharBuffer();
|
if ( rc != wxCONV_FAILED )
|
||||||
|
return rc;
|
||||||
|
|
||||||
wxWCharBuffer wbuf(wxConvLibc.cMB2WX(s));
|
rc = wxConvLibc.ToWChar(dst, dstLen, src, srcLen);
|
||||||
if ( !wbuf )
|
if ( rc != wxCONV_FAILED )
|
||||||
wbuf = wxConvUTF8.cMB2WX(s);
|
return rc;
|
||||||
if ( !wbuf )
|
|
||||||
wbuf = wxConvISO8859_1.cMB2WX(s);
|
|
||||||
|
|
||||||
return wbuf;
|
rc = wxConvISO8859_1.ToWChar(dst, dstLen, src, srcLen);
|
||||||
|
|
||||||
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws)
|
size_t
|
||||||
|
wxWhateverWorksConv::FromWChar(char *dst, size_t dstLen,
|
||||||
|
const wchar_t *src, size_t srcLen) const
|
||||||
{
|
{
|
||||||
if ( !ws )
|
size_t rc = wxConvLibc.FromWChar(dst, dstLen, src, srcLen);
|
||||||
return wxCharBuffer();
|
if ( rc != wxCONV_FAILED )
|
||||||
|
return rc;
|
||||||
|
|
||||||
wxCharBuffer buf(wxConvLibc.cWX2MB(ws));
|
rc = wxConvUTF8.FromWChar(dst, dstLen, src, srcLen);
|
||||||
if ( !buf )
|
|
||||||
buf = wxConvUTF8.cWX2MB(ws);
|
|
||||||
|
|
||||||
return buf;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // wxUSE_UNICODE
|
|
||||||
|
|
||||||
// ----------------------------------------------------------------------------
|
// ----------------------------------------------------------------------------
|
||||||
// globals
|
// globals
|
||||||
// ----------------------------------------------------------------------------
|
// ----------------------------------------------------------------------------
|
||||||
@@ -3330,6 +3334,7 @@ wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws)
|
|||||||
#undef wxConvLibc
|
#undef wxConvLibc
|
||||||
#undef wxConvUTF8
|
#undef wxConvUTF8
|
||||||
#undef wxConvUTF7
|
#undef wxConvUTF7
|
||||||
|
#undef wxConvWhateverWorks
|
||||||
#undef wxConvLocal
|
#undef wxConvLocal
|
||||||
#undef wxConvISO8859_1
|
#undef wxConvISO8859_1
|
||||||
|
|
||||||
@@ -3369,6 +3374,7 @@ wxCharBuffer wxSafeConvertWX2MB(const wchar_t *ws)
|
|||||||
// empty statement (and hope that no compilers warns about this)
|
// empty statement (and hope that no compilers warns about this)
|
||||||
WX_DEFINE_GLOBAL_CONV(wxMBConvStrictUTF8, wxConvUTF8, ;);
|
WX_DEFINE_GLOBAL_CONV(wxMBConvStrictUTF8, wxConvUTF8, ;);
|
||||||
WX_DEFINE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7, ;);
|
WX_DEFINE_GLOBAL_CONV(wxMBConvUTF7, wxConvUTF7, ;);
|
||||||
|
WX_DEFINE_GLOBAL_CONV(wxWhateverWorksConv, wxConvWhateverWorks, ;);
|
||||||
|
|
||||||
WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvLocal, (wxFONTENCODING_SYSTEM));
|
WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvLocal, (wxFONTENCODING_SYSTEM));
|
||||||
WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvISO8859_1, (wxFONTENCODING_ISO8859_1));
|
WX_DEFINE_GLOBAL_CONV(wxCSConv, wxConvISO8859_1, (wxFONTENCODING_ISO8859_1));
|
||||||
@@ -3387,5 +3393,5 @@ WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvFileName =
|
|||||||
#ifdef __DARWIN__
|
#ifdef __DARWIN__
|
||||||
&wxConvMacUTF8DObj;
|
&wxConvMacUTF8DObj;
|
||||||
#else // !__DARWIN__
|
#else // !__DARWIN__
|
||||||
wxGet_wxConvLibcPtr();
|
wxGet_wxConvWhateverWorksPtr();
|
||||||
#endif // __DARWIN__/!__DARWIN__
|
#endif // __DARWIN__/!__DARWIN__
|
||||||
|
@@ -191,13 +191,7 @@ static wxStrCacheStatsDumper s_showCacheStats;
|
|||||||
wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
|
wxSTD ostream& operator<<(wxSTD ostream& os, const wxCStrData& str)
|
||||||
{
|
{
|
||||||
#if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
|
#if wxUSE_UNICODE && !wxUSE_UNICODE_UTF8
|
||||||
const wxScopedCharBuffer buf(str.AsCharBuf());
|
return os << wxConvWhateverWorks.cWX2MB(str);
|
||||||
if ( !buf )
|
|
||||||
os.clear(wxSTD ios_base::failbit);
|
|
||||||
else
|
|
||||||
os << buf.data();
|
|
||||||
|
|
||||||
return os;
|
|
||||||
#else
|
#else
|
||||||
return os << str.AsInternal();
|
return os << str.AsInternal();
|
||||||
#endif
|
#endif
|
||||||
|
@@ -400,7 +400,7 @@ public:
|
|||||||
|
|
||||||
for ( int i = 0; i < m_argc; i++ )
|
for ( int i = 0; i < m_argc; i++ )
|
||||||
{
|
{
|
||||||
m_argv[i] = wxStrdup(args[i]);
|
m_argv[i] = wxStrdup(args[i].mb_str(wxConvWhateverWorks));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -78,10 +78,6 @@ private:
|
|||||||
wxString m_fileNameNonASCII;
|
wxString m_fileNameNonASCII;
|
||||||
wxString m_fileNameWork;
|
wxString m_fileNameWork;
|
||||||
|
|
||||||
#ifndef __DARWIN__
|
|
||||||
wxMBConv* m_convFNOld;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
wxDECLARE_NO_COPY_CLASS(FileFunctionsTestCase);
|
wxDECLARE_NO_COPY_CLASS(FileFunctionsTestCase);
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -98,16 +94,6 @@ CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( FileFunctionsTestCase, "FileFunctionsTest
|
|||||||
|
|
||||||
void FileFunctionsTestCase::setUp()
|
void FileFunctionsTestCase::setUp()
|
||||||
{
|
{
|
||||||
// Under Unix we need to use UTF-8 for the tests using non-ASCII filenames
|
|
||||||
// and this is not necessarily the case because the tests don't call
|
|
||||||
// setlocale(LC_ALL, ""), so ensure it explicitly. This is just a temporary
|
|
||||||
// hack until we find the solution to make the library work with Unicode
|
|
||||||
// filenames irrespectively of the current locale.
|
|
||||||
#ifndef __DARWIN__
|
|
||||||
m_convFNOld = wxConvFileName;
|
|
||||||
wxConvFileName = &wxConvUTF8;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Initialize local data
|
// Initialize local data
|
||||||
|
|
||||||
wxFileName fn1(wxFileName::GetTempDir(), wxT("wx_file_mask.txt"));
|
wxFileName fn1(wxFileName::GetTempDir(), wxT("wx_file_mask.txt"));
|
||||||
@@ -137,10 +123,6 @@ void FileFunctionsTestCase::tearDown()
|
|||||||
{
|
{
|
||||||
wxRemoveFile(m_fileNameWork);
|
wxRemoveFile(m_fileNameWork);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef __DARWIN__
|
|
||||||
wxConvFileName = m_convFNOld;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void FileFunctionsTestCase::GetTempFolder()
|
void FileFunctionsTestCase::GetTempFolder()
|
||||||
|
Reference in New Issue
Block a user