1. changed all "wxMBConv& conv" parameters to "const wxMBConv&"

2. this allows to use wxConvAuto() instead of wxConvUTF8 as default value
   for this parameter in the classes which read text from the file: wxConvAuto
   automatically recognizes the BOM at the start of file and uses the correct
   conversion
3. don't use Windows for UTF-7 conversions as there is no way to make it
   fail on invalid UTF-7 strings; use our own wxMBConvUtf7 instead


git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@38570 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Vadim Zeitlin
2006-04-05 14:37:47 +00:00
parent cc845a6142
commit 830f8f11bc
21 changed files with 413 additions and 103 deletions

View File

@@ -203,21 +203,16 @@ wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
size_t lenChunk = MB2WC(NULL, src, 0);
if ( lenChunk == 0 )
{
// nothing left in the input string, conversion succeeded
// nothing left in the input string, conversion succeeded; but
// still account for the trailing NULL
dstWritten++;
break;
}
if ( lenChunk == wxCONV_FAILED )
return wxCONV_FAILED;
// if we already have a previous chunk, leave the NUL separating it
// from this one
if ( dstWritten )
{
dstWritten++;
if ( dst )
dst++;
}
lenChunk++; // for trailing NUL
dstWritten += lenChunk;
@@ -226,8 +221,7 @@ wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
if ( dstWritten > dstLen )
return wxCONV_FAILED;
lenChunk = MB2WC(dst, src, lenChunk + 1 /* for NUL */);
if ( lenChunk == wxCONV_FAILED )
if ( MB2WC(dst, src, lenChunk) == wxCONV_FAILED )
return wxCONV_FAILED;
dst += lenChunk;
@@ -390,11 +384,11 @@ wxMBConv::cMB2WC(const char *in, size_t inLen, size_t *outLen) const
const size_t dstLen = ToWChar(NULL, 0, in, inLen);
if ( dstLen != wxCONV_FAILED )
{
wxWCharBuffer wbuf(dstLen);
wxWCharBuffer wbuf(dstLen - 1);
if ( ToWChar(wbuf.data(), dstLen, in, inLen) )
{
if ( outLen )
*outLen = dstLen;
*outLen = dstLen - 1;
return wbuf;
}
}
@@ -411,11 +405,11 @@ wxMBConv::cWC2MB(const wchar_t *in, size_t inLen, size_t *outLen) const
const size_t dstLen = FromWChar(NULL, 0, in, inLen);
if ( dstLen != wxCONV_FAILED )
{
wxCharBuffer buf(dstLen);
wxCharBuffer buf(dstLen - 1);
if ( FromWChar(buf.data(), dstLen, in, inLen) )
{
if ( outLen )
*outLen = dstLen;
*outLen = dstLen - 1;
return buf;
}
}
@@ -1825,35 +1819,27 @@ public:
// wouldn't work if reading an incomplete MB char didn't result in an
// error
//
// note however that using MB_ERR_INVALID_CHARS with CP_UTF7 results in
// an error (tested under Windows Server 2003) and apparently it is
// done on purpose, i.e. the function accepts any input in this case
// and although I'd prefer to return error on ill-formed output, our
// own wxMBConvUTF7 doesn't detect errors (e.g. lone "+" which is
// explicitly ill-formed according to RFC 2152) neither so we don't
// even have any fallback here...
//
// Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
// Win XP or newer and if it is specified on older versions, conversion
// from CP_UTF8 (which can have flags only 0 or MB_ERR_INVALID_CHARS)
// fails. So we can only use the flag on newer Windows versions.
// Additionally, the flag is not supported by UTF7, symbol and CJK
// encodings. See here:
// Win XP or newer and it is not supported for UTF-[78] so we always
// use our own conversions in this case. See
// http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
// http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
if ( m_CodePage == CP_UTF8 )
{
return wxConvUTF8.MB2WC(buf, psz, n);
}
if ( m_CodePage == CP_UTF7 )
{
return wxConvUTF7.MB2WC(buf, psz, n);
}
int flags = 0;
if ( m_CodePage != CP_UTF7 && m_CodePage != CP_SYMBOL &&
m_CodePage < 50000 &&
IsAtLeastWin2kSP4() )
if ( (m_CodePage < 50000 && m_CodePage != CP_SYMBOL) &&
IsAtLeastWin2kSP4() )
{
flags = MB_ERR_INVALID_CHARS;
}
else if ( m_CodePage == CP_UTF8 )
{
// Avoid round-trip in the special case of UTF-8 by using our
// own UTF-8 conversion code:
return wxMBConvUTF8().MB2WC(buf, psz, n);
}
const size_t len = ::MultiByteToWideChar
(