Merge branch 'text-stream-nuls'
Fixes for wxTextInputStream and wxMBConvUTF{7,16} classes bug fixes.
This commit is contained in:
@@ -25,7 +25,8 @@ typedef wxTextOutputStream& (*__wxTextOutputManip)(wxTextOutputStream&);
|
|||||||
WXDLLIMPEXP_BASE wxTextOutputStream &endl( wxTextOutputStream &stream );
|
WXDLLIMPEXP_BASE wxTextOutputStream &endl( wxTextOutputStream &stream );
|
||||||
|
|
||||||
|
|
||||||
#define wxEOT wxT('\4') // the End-Of-Text control code (used only inside wxTextInputStream)
|
// Obsolete constant defined only for compatibility, not used.
|
||||||
|
#define wxEOT wxT('\4')
|
||||||
|
|
||||||
// If you're scanning through a file using wxTextInputStream, you should check for EOF _before_
|
// If you're scanning through a file using wxTextInputStream, you should check for EOF _before_
|
||||||
// reading the next item (word / number), because otherwise the last item may get lost.
|
// reading the next item (word / number), because otherwise the last item may get lost.
|
||||||
@@ -58,7 +59,7 @@ public:
|
|||||||
double ReadDouble();
|
double ReadDouble();
|
||||||
wxString ReadLine();
|
wxString ReadLine();
|
||||||
wxString ReadWord();
|
wxString ReadWord();
|
||||||
wxChar GetChar() { wxChar c = NextChar(); return (wxChar)(c != wxEOT ? c : 0); }
|
wxChar GetChar();
|
||||||
|
|
||||||
wxString GetStringSeparators() const { return m_separators; }
|
wxString GetStringSeparators() const { return m_separators; }
|
||||||
void SetStringSeparators(const wxString &c) { m_separators = c; }
|
void SetStringSeparators(const wxString &c) { m_separators = c; }
|
||||||
@@ -83,7 +84,22 @@ public:
|
|||||||
protected:
|
protected:
|
||||||
wxInputStream &m_input;
|
wxInputStream &m_input;
|
||||||
wxString m_separators;
|
wxString m_separators;
|
||||||
char m_lastBytes[10]; // stores the bytes that were read for the last character
|
|
||||||
|
// Data possibly (see m_validXXX) read from the stream but not decoded yet.
|
||||||
|
// This is necessary because GetChar() may only return a single character
|
||||||
|
// but we may get more than one character when decoding raw input bytes.
|
||||||
|
char m_lastBytes[10];
|
||||||
|
|
||||||
|
// The bytes [0, m_validEnd) of m_lastBytes contain the bytes read by the
|
||||||
|
// last GetChar() call (this interval may be empty if GetChar() hasn't been
|
||||||
|
// called yet). The bytes [0, m_validBegin) have been already decoded and
|
||||||
|
// returned to caller or stored in m_lastWChar in the particularly
|
||||||
|
// egregious case of decoding a non-BMP character when using UTF-16 for
|
||||||
|
// wchar_t. Finally, the bytes [m_validBegin, m_validEnd) remain to be
|
||||||
|
// decoded and returned during the next call (again, this interval can, and
|
||||||
|
// usually will, be empty too if m_validBegin == m_validEnd).
|
||||||
|
size_t m_validBegin,
|
||||||
|
m_validEnd;
|
||||||
|
|
||||||
#if wxUSE_UNICODE
|
#if wxUSE_UNICODE
|
||||||
wxMBConv *m_conv;
|
wxMBConv *m_conv;
|
||||||
@@ -100,8 +116,6 @@ protected:
|
|||||||
|
|
||||||
bool EatEOL(const wxChar &c);
|
bool EatEOL(const wxChar &c);
|
||||||
void UngetLast(); // should be used instead of wxInputStream::Ungetch() because of Unicode issues
|
void UngetLast(); // should be used instead of wxInputStream::Ungetch() because of Unicode issues
|
||||||
// returns EOT (\4) if there is a stream error, or end of file
|
|
||||||
wxChar NextChar(); // this should be used instead of GetC() because of Unicode issues
|
|
||||||
wxChar NextNonSeparators();
|
wxChar NextNonSeparators();
|
||||||
|
|
||||||
wxDECLARE_NO_COPY_CLASS(wxTextInputStream);
|
wxDECLARE_NO_COPY_CLASS(wxTextInputStream);
|
||||||
|
@@ -103,41 +103,44 @@ static size_t encode_utf16(wxUint32 input, wxUint16 *output)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t decode_utf16(const wxUint16* input, wxUint32& output)
|
// Returns the next UTF-32 character from the wchar_t buffer terminated by the
|
||||||
{
|
// "end" pointer (the caller must ensure that on input "*pSrc < end") and
|
||||||
if ((*input < 0xd800) || (*input > 0xdfff))
|
// advances the pointer to the character after this one.
|
||||||
{
|
|
||||||
output = *input;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
else if ((input[1] < 0xdc00) || (input[1] > 0xdfff))
|
|
||||||
{
|
|
||||||
output = *input;
|
|
||||||
return wxCONV_FAILED;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
|
|
||||||
return 2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// returns the next UTF-32 character from the wchar_t buffer and advances the
|
|
||||||
// pointer to the character after this one
|
|
||||||
//
|
//
|
||||||
// if an invalid character is found, *pSrc is set to NULL, the caller must
|
// If an invalid or incomplete character is found, *pSrc is set to NULL, the
|
||||||
// check for this
|
// caller must check for this.
|
||||||
static wxUint32 wxDecodeSurrogate(const wxChar16 **pSrc)
|
static wxUint32 wxDecodeSurrogate(const wxChar16 **pSrc, const wxChar16* end)
|
||||||
{
|
{
|
||||||
wxUint32 out;
|
const wxChar16*& src = *pSrc;
|
||||||
const size_t
|
|
||||||
n = decode_utf16(reinterpret_cast<const wxUint16 *>(*pSrc), out);
|
|
||||||
if ( n == wxCONV_FAILED )
|
|
||||||
*pSrc = NULL;
|
|
||||||
else
|
|
||||||
*pSrc += n;
|
|
||||||
|
|
||||||
return out;
|
// Is this a BMP character?
|
||||||
|
const wxUint16 u = *src++;
|
||||||
|
if ((u < 0xd800) || (u > 0xdfff))
|
||||||
|
{
|
||||||
|
// Yes, just return it.
|
||||||
|
return u;
|
||||||
|
}
|
||||||
|
|
||||||
|
// No, we have the first half of a surrogate, check if we also have the
|
||||||
|
// second half (notice that this check does nothing if end == NULL, as it
|
||||||
|
// is allowed to be, and this is correct).
|
||||||
|
if ( src == end )
|
||||||
|
{
|
||||||
|
// No, we don't because this is the end of input.
|
||||||
|
src = NULL;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
const wxUint16 u2 = *src++;
|
||||||
|
if ( (u2 < 0xdc00) || (u2 > 0xdfff) )
|
||||||
|
{
|
||||||
|
// No, it's not in the low surrogate range.
|
||||||
|
src = NULL;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Yes, decode it and return the corresponding Unicode character.
|
||||||
|
return ((u - 0xd7c0) << 10) + (u2 - 0xdc00);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ----------------------------------------------------------------------------
|
// ----------------------------------------------------------------------------
|
||||||
@@ -699,6 +702,10 @@ size_t wxMBConvUTF7::ToWChar(wchar_t *dst, size_t dstLen,
|
|||||||
// start of an encoded segment?
|
// start of an encoded segment?
|
||||||
if ( cc == '+' )
|
if ( cc == '+' )
|
||||||
{
|
{
|
||||||
|
// Can't end with a plus sign.
|
||||||
|
if ( src == srcEnd )
|
||||||
|
return wxCONV_FAILED;
|
||||||
|
|
||||||
if ( *src == '-' )
|
if ( *src == '-' )
|
||||||
{
|
{
|
||||||
// just the encoded plus sign, don't switch to shifted mode
|
// just the encoded plus sign, don't switch to shifted mode
|
||||||
@@ -1070,9 +1077,10 @@ wxMBConvStrictUTF8::FromWChar(char *dst, size_t dstLen,
|
|||||||
char *out = dstLen ? dst : NULL;
|
char *out = dstLen ? dst : NULL;
|
||||||
size_t written = 0;
|
size_t written = 0;
|
||||||
|
|
||||||
for ( const wchar_t *wp = src; ; wp++ )
|
const wchar_t* const end = srcLen == wxNO_LEN ? NULL : src + srcLen;
|
||||||
|
for ( const wchar_t *wp = src; ; )
|
||||||
{
|
{
|
||||||
if ( (srcLen == wxNO_LEN ? !*wp : !srcLen) )
|
if ( end ? wp == end : !*wp )
|
||||||
{
|
{
|
||||||
// all done successfully, just add the trailing NULL if we are not
|
// all done successfully, just add the trailing NULL if we are not
|
||||||
// using explicit length
|
// using explicit length
|
||||||
@@ -1092,38 +1100,13 @@ wxMBConvStrictUTF8::FromWChar(char *dst, size_t dstLen,
|
|||||||
return written;
|
return written;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( srcLen != wxNO_LEN )
|
|
||||||
srcLen--;
|
|
||||||
|
|
||||||
wxUint32 code;
|
wxUint32 code;
|
||||||
#ifdef WC_UTF16
|
#ifdef WC_UTF16
|
||||||
// Be careful here: decode_utf16() may need to read the next wchar_t
|
code = wxDecodeSurrogate(&wp, end);
|
||||||
// but we might not have any left, so pass it a temporary buffer which
|
if ( !wp )
|
||||||
// always has 2 wide characters and take care to set its second element
|
|
||||||
// to 0, which is invalid as a second half of a surrogate, to ensure
|
|
||||||
// that we return an error when trying to convert a buffer ending with
|
|
||||||
// half of a surrogate.
|
|
||||||
wxUint16 tmp[2];
|
|
||||||
tmp[0] = wp[0];
|
|
||||||
tmp[1] = srcLen != 0 ? wp[1] : 0;
|
|
||||||
switch ( decode_utf16(tmp, code) )
|
|
||||||
{
|
|
||||||
case 1:
|
|
||||||
// Nothing special to do, just a character from BMP.
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 2:
|
|
||||||
// skip the next char too as we decoded a surrogate
|
|
||||||
wp++;
|
|
||||||
if ( srcLen != wxNO_LEN )
|
|
||||||
srcLen--;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case wxCONV_FAILED:
|
|
||||||
return wxCONV_FAILED;
|
return wxCONV_FAILED;
|
||||||
}
|
|
||||||
#else // wchar_t is UTF-32
|
#else // wchar_t is UTF-32
|
||||||
code = *wp & 0x7fffffff;
|
code = *wp++ & 0x7fffffff;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
unsigned len;
|
unsigned len;
|
||||||
@@ -1381,20 +1364,15 @@ size_t wxMBConvUTF8::FromWChar(char *buf, size_t n,
|
|||||||
|
|
||||||
// The length can be either given explicitly or computed implicitly for the
|
// The length can be either given explicitly or computed implicitly for the
|
||||||
// NUL-terminated strings.
|
// NUL-terminated strings.
|
||||||
const bool isNulTerminated = srcLen == wxNO_LEN;
|
const wchar_t* const end = srcLen == wxNO_LEN ? NULL : psz + srcLen;
|
||||||
while ((isNulTerminated ? *psz : srcLen--) && ((!buf) || (len < n)))
|
while ((end ? psz < end : *psz) && ((!buf) || (len < n)))
|
||||||
{
|
{
|
||||||
wxUint32 cc;
|
wxUint32 cc;
|
||||||
|
|
||||||
#ifdef WC_UTF16
|
#ifdef WC_UTF16
|
||||||
// cast is ok for WC_UTF16
|
cc = wxDecodeSurrogate(&psz, end);
|
||||||
size_t pa = decode_utf16((const wxUint16 *)psz, cc);
|
if ( !psz )
|
||||||
|
return wxCONV_FAILED;
|
||||||
// we could have consumed two input code units if we decoded a
|
|
||||||
// surrogate, so adjust the input pointer and, if necessary, the length
|
|
||||||
psz += (pa == wxCONV_FAILED) ? 1 : pa;
|
|
||||||
if ( pa == 2 && !isNulTerminated )
|
|
||||||
srcLen--;
|
|
||||||
#else
|
#else
|
||||||
cc = (*psz++) & 0x7fffffff;
|
cc = (*psz++) & 0x7fffffff;
|
||||||
#endif
|
#endif
|
||||||
@@ -1455,7 +1433,7 @@ size_t wxMBConvUTF8::FromWChar(char *buf, size_t n,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( isNulTerminated )
|
if ( !end )
|
||||||
{
|
{
|
||||||
// Add the trailing NUL in this case if we have a large enough buffer.
|
// Add the trailing NUL in this case if we have a large enough buffer.
|
||||||
if ( buf && (len < n) )
|
if ( buf && (len < n) )
|
||||||
@@ -1623,7 +1601,7 @@ wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
|
|||||||
const wxUint16 *inBuff = reinterpret_cast<const wxUint16 *>(src);
|
const wxUint16 *inBuff = reinterpret_cast<const wxUint16 *>(src);
|
||||||
for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
|
for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
|
||||||
{
|
{
|
||||||
const wxUint32 ch = wxDecodeSurrogate(&inBuff);
|
const wxUint32 ch = wxDecodeSurrogate(&inBuff, inEnd);
|
||||||
if ( !inBuff )
|
if ( !inBuff )
|
||||||
return wxCONV_FAILED;
|
return wxCONV_FAILED;
|
||||||
|
|
||||||
@@ -1693,29 +1671,26 @@ wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
|
|||||||
const wxUint16 *inBuff = reinterpret_cast<const wxUint16 *>(src);
|
const wxUint16 *inBuff = reinterpret_cast<const wxUint16 *>(src);
|
||||||
for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
|
for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
|
||||||
{
|
{
|
||||||
wxUint32 ch;
|
|
||||||
wxUint16 tmp[2];
|
wxUint16 tmp[2];
|
||||||
|
const wxUint16* tmpEnd = tmp;
|
||||||
|
|
||||||
tmp[0] = wxUINT16_SWAP_ALWAYS(*inBuff);
|
tmp[0] = wxUINT16_SWAP_ALWAYS(*inBuff);
|
||||||
if ( ++inBuff < inEnd )
|
tmpEnd++;
|
||||||
|
|
||||||
|
if ( inBuff + 1 < inEnd )
|
||||||
{
|
{
|
||||||
// Normal case, we have a next character to decode.
|
// Normal case, we have a next character to decode.
|
||||||
tmp[1] = wxUINT16_SWAP_ALWAYS(*inBuff);
|
tmp[1] = wxUINT16_SWAP_ALWAYS(inBuff[1]);
|
||||||
}
|
tmpEnd++;
|
||||||
else // End of input.
|
|
||||||
{
|
|
||||||
// Setting the second character to 0 ensures we correctly return
|
|
||||||
// wxCONV_FAILED if the first one is the first half of a surrogate
|
|
||||||
// as the second half can't be 0 in this case.
|
|
||||||
tmp[1] = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const size_t numChars = decode_utf16(tmp, ch);
|
const wxUint16* p = tmp;
|
||||||
if ( numChars == wxCONV_FAILED )
|
const wxUint32 ch = wxDecodeSurrogate(&p, tmpEnd);
|
||||||
|
if ( !p )
|
||||||
return wxCONV_FAILED;
|
return wxCONV_FAILED;
|
||||||
|
|
||||||
if ( numChars == 2 )
|
// Move the real pointer by the same amount as "p" was updated by.
|
||||||
inBuff++;
|
inBuff += p - tmp;
|
||||||
|
|
||||||
outLen++;
|
outLen++;
|
||||||
|
|
||||||
@@ -1861,7 +1836,7 @@ wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
|
|||||||
size_t outLen = 0;
|
size_t outLen = 0;
|
||||||
for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
|
for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
|
||||||
{
|
{
|
||||||
const wxUint32 ch = wxDecodeSurrogate(&src);
|
const wxUint32 ch = wxDecodeSurrogate(&src, srcEnd);
|
||||||
if ( !src )
|
if ( !src )
|
||||||
return wxCONV_FAILED;
|
return wxCONV_FAILED;
|
||||||
|
|
||||||
@@ -1930,7 +1905,7 @@ wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
|
|||||||
size_t outLen = 0;
|
size_t outLen = 0;
|
||||||
for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
|
for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
|
||||||
{
|
{
|
||||||
const wxUint32 ch = wxDecodeSurrogate(&src);
|
const wxUint32 ch = wxDecodeSurrogate(&src, srcEnd);
|
||||||
if ( !src )
|
if ( !src )
|
||||||
return wxCONV_FAILED;
|
return wxCONV_FAILED;
|
||||||
|
|
||||||
|
@@ -35,7 +35,8 @@ wxTextInputStream::wxTextInputStream(wxInputStream &s,
|
|||||||
const wxMBConv& conv)
|
const wxMBConv& conv)
|
||||||
: m_input(s), m_separators(sep), m_conv(conv.Clone())
|
: m_input(s), m_separators(sep), m_conv(conv.Clone())
|
||||||
{
|
{
|
||||||
memset((void*)m_lastBytes, 0, 10);
|
m_validBegin =
|
||||||
|
m_validEnd = 0;
|
||||||
|
|
||||||
#if SIZEOF_WCHAR_T == 2
|
#if SIZEOF_WCHAR_T == 2
|
||||||
m_lastWChar = 0;
|
m_lastWChar = 0;
|
||||||
@@ -45,7 +46,10 @@ wxTextInputStream::wxTextInputStream(wxInputStream &s,
|
|||||||
wxTextInputStream::wxTextInputStream(wxInputStream &s, const wxString &sep)
|
wxTextInputStream::wxTextInputStream(wxInputStream &s, const wxString &sep)
|
||||||
: m_input(s), m_separators(sep)
|
: m_input(s), m_separators(sep)
|
||||||
{
|
{
|
||||||
memset((void*)m_lastBytes, 0, 10);
|
m_validBegin =
|
||||||
|
m_validEnd = 0;
|
||||||
|
|
||||||
|
m_lastBytes[0] = 0;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -58,14 +62,16 @@ wxTextInputStream::~wxTextInputStream()
|
|||||||
|
|
||||||
void wxTextInputStream::UngetLast()
|
void wxTextInputStream::UngetLast()
|
||||||
{
|
{
|
||||||
size_t byteCount = 0;
|
if ( m_validEnd )
|
||||||
while(m_lastBytes[byteCount]) // pseudo ANSI strlen (even for Unicode!)
|
{
|
||||||
byteCount++;
|
m_input.Ungetch(m_lastBytes, m_validEnd);
|
||||||
m_input.Ungetch(m_lastBytes, byteCount);
|
|
||||||
memset((void*)m_lastBytes, 0, 10);
|
m_validBegin =
|
||||||
|
m_validEnd = 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
wxChar wxTextInputStream::NextChar()
|
wxChar wxTextInputStream::GetChar()
|
||||||
{
|
{
|
||||||
#if wxUSE_UNICODE
|
#if wxUSE_UNICODE
|
||||||
#if SIZEOF_WCHAR_T == 2
|
#if SIZEOF_WCHAR_T == 2
|
||||||
@@ -77,17 +83,37 @@ wxChar wxTextInputStream::NextChar()
|
|||||||
m_lastWChar = 0;
|
m_lastWChar = 0;
|
||||||
return wc;
|
return wc;
|
||||||
}
|
}
|
||||||
#endif // !SWIG_ONLY_SCRIPT_API
|
#endif // SIZEOF_WCHAR_T
|
||||||
|
|
||||||
wxChar wbuf[2];
|
// If we have any non-decoded bytes left from the last call, shift them to
|
||||||
memset((void*)m_lastBytes, 0, 10);
|
// be at the beginning of the buffer.
|
||||||
for(size_t inlen = 0; inlen < 9; inlen++)
|
if ( m_validBegin < m_validEnd )
|
||||||
|
{
|
||||||
|
m_validEnd -= m_validBegin;
|
||||||
|
memmove(m_lastBytes, m_lastBytes + m_validBegin, m_validEnd);
|
||||||
|
}
|
||||||
|
else // All bytes were already decoded and consumed.
|
||||||
|
{
|
||||||
|
m_validEnd = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We may need to decode up to 4 characters if we have input starting with
|
||||||
|
// 3 BOM-like bytes, but not actually containing a BOM, as decoding it will
|
||||||
|
// only succeed when 4 bytes are read -- and will yield 4 wide characters.
|
||||||
|
wxChar wbuf[4];
|
||||||
|
for(size_t inlen = 0; inlen < sizeof(m_lastBytes); inlen++)
|
||||||
|
{
|
||||||
|
if ( inlen >= m_validEnd )
|
||||||
{
|
{
|
||||||
// actually read the next character
|
// actually read the next character
|
||||||
m_lastBytes[inlen] = m_input.GetC();
|
m_lastBytes[inlen] = m_input.GetC();
|
||||||
|
|
||||||
if(m_input.LastRead() <= 0)
|
if(m_input.LastRead() <= 0)
|
||||||
return wxEOT;
|
return 0;
|
||||||
|
|
||||||
|
m_validEnd++;
|
||||||
|
}
|
||||||
|
//else: Retry decoding what we already have in the buffer.
|
||||||
|
|
||||||
switch ( m_conv->ToWChar(wbuf, WXSIZEOF(wbuf), m_lastBytes, inlen + 1) )
|
switch ( m_conv->ToWChar(wbuf, WXSIZEOF(wbuf), m_lastBytes, inlen + 1) )
|
||||||
{
|
{
|
||||||
@@ -103,12 +129,17 @@ wxChar wxTextInputStream::NextChar()
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
// if we couldn't decode a single character during the last
|
// If we couldn't decode a single character during the last
|
||||||
// loop iteration we shouldn't be able to decode 2 or more of
|
// loop iteration, but decoded more than one of them with just
|
||||||
// them with an extra single byte, something fishy is going on
|
// one extra byte, the only explanation is that we were using a
|
||||||
// (except if we use UTF-16, see below)
|
// wxConvAuto conversion recognizing the initial BOM and that
|
||||||
wxFAIL_MSG("unexpected decoding result");
|
// it couldn't detect the presence or absence of BOM so far,
|
||||||
return wxEOT;
|
// but now finally has enough data to see that there is none.
|
||||||
|
// As we must have fallen back to Latin-1 in this case, return
|
||||||
|
// just the first byte and keep the other ones for the next
|
||||||
|
// time.
|
||||||
|
m_validBegin = 1;
|
||||||
|
return wbuf[0];
|
||||||
|
|
||||||
#if SIZEOF_WCHAR_T == 2
|
#if SIZEOF_WCHAR_T == 2
|
||||||
case 2:
|
case 2:
|
||||||
@@ -118,25 +149,37 @@ wxChar wxTextInputStream::NextChar()
|
|||||||
// remember the second one for the next call, as there is no
|
// remember the second one for the next call, as there is no
|
||||||
// way to fit both of them into a single wxChar in this case.
|
// way to fit both of them into a single wxChar in this case.
|
||||||
m_lastWChar = wbuf[1];
|
m_lastWChar = wbuf[1];
|
||||||
#endif // !SWIG_ONLY_SCRIPT_API
|
#endif // SIZEOF_WCHAR_T == 2
|
||||||
wxFALLTHROUGH;
|
wxFALLTHROUGH;
|
||||||
|
|
||||||
case 1:
|
case 1:
|
||||||
|
m_validBegin = inlen + 1;
|
||||||
|
|
||||||
// we finally decoded a character
|
// we finally decoded a character
|
||||||
return wbuf[0];
|
return wbuf[0];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// there should be no encoding which requires more than nine bytes for one
|
// There should be no encoding which requires more than 10 bytes to decode
|
||||||
// character so something must be wrong with our conversion but we have no
|
// at least one character (the most actually seems to be 7: 3 for the
|
||||||
// way to signal it from here
|
// initial BOM, which is ignored, and 4 for the longest possible encoding
|
||||||
return wxEOT;
|
// of a Unicode character in UTF-8), so something must be wrong with our
|
||||||
|
// conversion but we have no way to signal it from here and just return 0
|
||||||
|
// as if we reached the end of the stream.
|
||||||
|
m_validBegin = 0;
|
||||||
|
m_validEnd = sizeof(m_lastBytes);
|
||||||
|
|
||||||
|
return 0;
|
||||||
#else
|
#else
|
||||||
m_lastBytes[0] = m_input.GetC();
|
m_lastBytes[0] = m_input.GetC();
|
||||||
|
|
||||||
if(m_input.LastRead() <= 0)
|
if(m_input.LastRead() <= 0)
|
||||||
return wxEOT;
|
{
|
||||||
|
m_validEnd = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
m_validEnd = 1;
|
||||||
|
|
||||||
return m_lastBytes[0];
|
return m_lastBytes[0];
|
||||||
#endif
|
#endif
|
||||||
@@ -147,8 +190,9 @@ wxChar wxTextInputStream::NextNonSeparators()
|
|||||||
{
|
{
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
wxChar c = NextChar();
|
wxChar c = GetChar();
|
||||||
if (c == wxEOT) return (wxChar) 0;
|
if (!c)
|
||||||
|
return c;
|
||||||
|
|
||||||
if (c != wxT('\n') &&
|
if (c != wxT('\n') &&
|
||||||
c != wxT('\r') &&
|
c != wxT('\r') &&
|
||||||
@@ -164,8 +208,8 @@ bool wxTextInputStream::EatEOL(const wxChar &c)
|
|||||||
|
|
||||||
if (c == wxT('\r')) // eat on both Mac and DOS
|
if (c == wxT('\r')) // eat on both Mac and DOS
|
||||||
{
|
{
|
||||||
wxChar c2 = NextChar();
|
wxChar c2 = GetChar();
|
||||||
if(c2 == wxEOT) return true; // end of stream reached, had enough :-)
|
if (!c2) return true; // end of stream reached, had enough :-)
|
||||||
|
|
||||||
if (c2 != wxT('\n')) UngetLast(); // Don't eat on Mac
|
if (c2 != wxT('\n')) UngetLast(); // Don't eat on Mac
|
||||||
return true;
|
return true;
|
||||||
@@ -261,8 +305,8 @@ wxString wxTextInputStream::ReadLine()
|
|||||||
|
|
||||||
while ( !m_input.Eof() )
|
while ( !m_input.Eof() )
|
||||||
{
|
{
|
||||||
wxChar c = NextChar();
|
wxChar c = GetChar();
|
||||||
if(c == wxEOT)
|
if (!c)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (EatEOL(c))
|
if (EatEOL(c))
|
||||||
@@ -289,8 +333,8 @@ wxString wxTextInputStream::ReadWord()
|
|||||||
|
|
||||||
while ( !m_input.Eof() )
|
while ( !m_input.Eof() )
|
||||||
{
|
{
|
||||||
c = NextChar();
|
c = GetChar();
|
||||||
if(c == wxEOT)
|
if (!c)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (m_separators.Find(c) >= 0)
|
if (m_separators.Find(c) >= 0)
|
||||||
|
@@ -985,6 +985,7 @@ void MBConvTestCase::NonBMPCharTests()
|
|||||||
TestDecoder(w, wchars, u8, sizeof(u8)-1, wxConvUTF8, 1);
|
TestDecoder(w, wchars, u8, sizeof(u8)-1, wxConvUTF8, 1);
|
||||||
TestEncoder(w, wchars, u8, sizeof(u8)-1, wxConvUTF8, 1);
|
TestEncoder(w, wchars, u8, sizeof(u8)-1, wxConvUTF8, 1);
|
||||||
}
|
}
|
||||||
|
SECTION("wxMBConvUTF16LE")
|
||||||
{
|
{
|
||||||
char u16le[sizeof(u16)];
|
char u16le[sizeof(u16)];
|
||||||
for (size_t i = 0; i < sizeof(u16)/2; ++i) {
|
for (size_t i = 0; i < sizeof(u16)/2; ++i) {
|
||||||
@@ -995,6 +996,7 @@ void MBConvTestCase::NonBMPCharTests()
|
|||||||
TestDecoder(w, wchars, u16le, sizeof(u16le)-2, conv, 2);
|
TestDecoder(w, wchars, u16le, sizeof(u16le)-2, conv, 2);
|
||||||
TestEncoder(w, wchars, u16le, sizeof(u16le)-2, conv, 2);
|
TestEncoder(w, wchars, u16le, sizeof(u16le)-2, conv, 2);
|
||||||
}
|
}
|
||||||
|
SECTION("wxMBConvUTF16BE")
|
||||||
{
|
{
|
||||||
char u16be[sizeof(u16)];
|
char u16be[sizeof(u16)];
|
||||||
for (size_t i = 0; i < sizeof(u16)/2; ++i) {
|
for (size_t i = 0; i < sizeof(u16)/2; ++i) {
|
||||||
@@ -1005,6 +1007,7 @@ void MBConvTestCase::NonBMPCharTests()
|
|||||||
TestDecoder(w, wchars, u16be, sizeof(u16be)-2, conv, 2);
|
TestDecoder(w, wchars, u16be, sizeof(u16be)-2, conv, 2);
|
||||||
TestEncoder(w, wchars, u16be, sizeof(u16be)-2, conv, 2);
|
TestEncoder(w, wchars, u16be, sizeof(u16be)-2, conv, 2);
|
||||||
}
|
}
|
||||||
|
SECTION("wxMBConvUTF32LE")
|
||||||
{
|
{
|
||||||
char u32le[sizeof(u32)];
|
char u32le[sizeof(u32)];
|
||||||
for (size_t i = 0; i < sizeof(u32)/4; ++i) {
|
for (size_t i = 0; i < sizeof(u32)/4; ++i) {
|
||||||
@@ -1017,6 +1020,7 @@ void MBConvTestCase::NonBMPCharTests()
|
|||||||
TestDecoder(w, wchars, u32le, sizeof(u32le)-4, conv, 4);
|
TestDecoder(w, wchars, u32le, sizeof(u32le)-4, conv, 4);
|
||||||
TestEncoder(w, wchars, u32le, sizeof(u32le)-4, conv, 4);
|
TestEncoder(w, wchars, u32le, sizeof(u32le)-4, conv, 4);
|
||||||
}
|
}
|
||||||
|
SECTION("wxMBConvUTF32BE")
|
||||||
{
|
{
|
||||||
char u32be[sizeof(u32)];
|
char u32be[sizeof(u32)];
|
||||||
for (size_t i = 0; i < sizeof(u32)/4; ++i) {
|
for (size_t i = 0; i < sizeof(u32)/4; ++i) {
|
||||||
|
@@ -290,4 +290,40 @@ void TextStreamTestCase::TestInput(const wxMBConv& conv,
|
|||||||
CPPUNIT_ASSERT_EQUAL( 0, memcmp(txtWchar, temp.wc_str(), sizeof(txtWchar)) );
|
CPPUNIT_ASSERT_EQUAL( 0, memcmp(txtWchar, temp.wc_str(), sizeof(txtWchar)) );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE("wxTextInputStream::GetChar", "[text][input][stream][char]")
|
||||||
|
{
|
||||||
|
// This is the simplest possible test that used to trigger assertion in
|
||||||
|
// wxTextInputStream::GetChar().
|
||||||
|
SECTION("starts-with-nul")
|
||||||
|
{
|
||||||
|
const wxUint8 buf[] = { 0x00, 0x01, };
|
||||||
|
wxMemoryInputStream mis(buf, sizeof(buf));
|
||||||
|
wxTextInputStream tis(mis);
|
||||||
|
|
||||||
|
REQUIRE( tis.GetChar() == 0x00 );
|
||||||
|
REQUIRE( tis.GetChar() == 0x01 );
|
||||||
|
REQUIRE( tis.GetChar() == 0x00 );
|
||||||
|
CHECK( tis.GetInputStream().Eof() );
|
||||||
|
}
|
||||||
|
|
||||||
|
// This exercises a problematic path in GetChar() as the first 3 bytes of
|
||||||
|
// this stream look like the start of UTF-32BE BOM, but this is not
|
||||||
|
// actually a BOM because the 4th byte is 0xFE and not 0xFF, so the stream
|
||||||
|
// should decode the buffer as Latin-1 once it gets there.
|
||||||
|
SECTION("almost-UTF-32-BOM")
|
||||||
|
{
|
||||||
|
const wxUint8 buf[] = { 0x00, 0x00, 0xFE, 0xFE, 0x01 };
|
||||||
|
wxMemoryInputStream mis(buf, sizeof(buf));
|
||||||
|
wxTextInputStream tis(mis);
|
||||||
|
|
||||||
|
REQUIRE( tis.GetChar() == 0x00 );
|
||||||
|
REQUIRE( tis.GetChar() == 0x00 );
|
||||||
|
REQUIRE( tis.GetChar() == 0xFE );
|
||||||
|
REQUIRE( tis.GetChar() == 0xFE );
|
||||||
|
REQUIRE( tis.GetChar() == 0x01 );
|
||||||
|
REQUIRE( tis.GetChar() == 0x00 );
|
||||||
|
CHECK( tis.GetInputStream().Eof() );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#endif // wxUSE_UNICODE
|
#endif // wxUSE_UNICODE
|
||||||
|
Reference in New Issue
Block a user