made wxTextInputStream Unicode safe and general cleanup (patch 653775)
git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@21825 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
@@ -15,6 +15,12 @@ and on a typical 32-bit computer, none of these match to the "long" type (wxInt3
|
|||||||
is defined as int on 32-bit architectures) so that you cannot use long. To avoid
|
is defined as int on 32-bit architectures) so that you cannot use long. To avoid
|
||||||
problems (here and elsewhere), make use of wxInt32, wxUint32 and similar types.
|
problems (here and elsewhere), make use of wxInt32, wxUint32 and similar types.
|
||||||
|
|
||||||
|
If you're scanning through a file using wxTextInputStream, you should check for EOF {\bf before}
|
||||||
|
reading the next item (word / number), because otherwise the last item may get lost.
|
||||||
|
You should however be prepared to receive an empty item (empty string / zero number) at the
|
||||||
|
end of file, especially on Windows systems. This is unavoidable because most (but not all) files end
|
||||||
|
with whitespace (i.e. usually a newline).
|
||||||
|
|
||||||
For example:
|
For example:
|
||||||
|
|
||||||
\begin{verbatim}
|
\begin{verbatim}
|
||||||
@@ -37,14 +43,20 @@ For example:
|
|||||||
|
|
||||||
\membersection{wxTextInputStream::wxTextInputStream}\label{wxtextinputstreamconstr}
|
\membersection{wxTextInputStream::wxTextInputStream}\label{wxtextinputstreamconstr}
|
||||||
|
|
||||||
\func{}{wxTextInputStream}{\param{wxInputStream\&}{ stream}}
|
\func{}{wxTextInputStream}{\param{wxInputStream\&}{ stream}, \param{const wxString\&}{ sep=wxT(" \t")},
|
||||||
|
\param{wxMBConv\&}{ conv = wxConvUTF8} }
|
||||||
|
|
||||||
Constructs a text stream object from an input stream. Only read methods will
|
Constructs a text stream object from an input stream. Only read methods will
|
||||||
be available.
|
be available.
|
||||||
|
|
||||||
\wxheading{Parameters}
|
\wxheading{Parameters}
|
||||||
|
|
||||||
\docparam{stream}{The input stream.}
|
\docparam{stream}{The underlying input stream.}
|
||||||
|
|
||||||
|
\docparam{sep}{The initial string separator characters.}
|
||||||
|
|
||||||
|
\docparam{conv}{{\it In Unicode build only:} The encoding converter used to convert the bytes in the
|
||||||
|
underlying input stream to characters.}
|
||||||
|
|
||||||
\membersection{wxTextInputStream::\destruct{wxTextInputStream}}
|
\membersection{wxTextInputStream::\destruct{wxTextInputStream}}
|
||||||
|
|
||||||
@@ -52,23 +64,64 @@ be available.
|
|||||||
|
|
||||||
Destroys the wxTextInputStream object.
|
Destroys the wxTextInputStream object.
|
||||||
|
|
||||||
\membersection{wxTextInputStream::Read8}
|
\membersection{wxTextInputStream::Read8}\label{wxtextinputstreamread8}
|
||||||
|
|
||||||
\func{wxUint8}{Read8}{\void}
|
\func{wxUint8}{Read8}{\param{int}{ base = 10}}
|
||||||
|
|
||||||
Reads a single byte from the stream.
|
Reads a single unsigned byte from the stream, given in base {\it base}.
|
||||||
|
|
||||||
|
The value of {\it base} must be comprised between $2$ and $36$, inclusive, or
|
||||||
|
be a special value $0$ which means that the usual rules of {\tt C} numbers are
|
||||||
|
applied: if the number starts with {\tt 0x} it is considered to be in base
|
||||||
|
$16$, if it starts with {\tt 0} - in base $8$ and in base $10$ otherwise. Note
|
||||||
|
that you may not want to specify the base $0$ if you are parsing the numbers
|
||||||
|
which may have leading zeroes as they can yield unexpected (to the user not
|
||||||
|
familiar with C) results.
|
||||||
|
|
||||||
|
\membersection{wxTextInputStream::Read8S}
|
||||||
|
|
||||||
|
\func{wxInt8}{Read8S}{\param{int}{ base = 10}}
|
||||||
|
|
||||||
|
Reads a single signed byte from the stream.
|
||||||
|
|
||||||
|
See \helpref{wxTextInputStream::Read8}{wxtextinputstreamread8} for the
|
||||||
|
description of the {\it base} parameter.
|
||||||
|
|
||||||
\membersection{wxTextInputStream::Read16}
|
\membersection{wxTextInputStream::Read16}
|
||||||
|
|
||||||
\func{wxUint16}{Read16}{\void}
|
\func{wxUint16}{Read16}{\param{int}{ base = 10}}
|
||||||
|
|
||||||
Reads a 16 bit integer from the stream.
|
Reads a unsigned 16 bit integer from the stream.
|
||||||
|
|
||||||
|
See \helpref{wxTextInputStream::Read8}{wxtextinputstreamread8} for the
|
||||||
|
description of the {\it base} parameter.
|
||||||
|
|
||||||
|
\membersection{wxTextInputStream::Read16S}
|
||||||
|
|
||||||
|
\func{wxInt16}{Read16S}{\param{int}{ base = 10}}
|
||||||
|
|
||||||
|
Reads a signed 16 bit integer from the stream.
|
||||||
|
|
||||||
|
See \helpref{wxTextInputStream::Read8}{wxtextinputstreamread8} for the
|
||||||
|
description of the {\it base} parameter.
|
||||||
|
|
||||||
\membersection{wxTextInputStream::Read32}
|
\membersection{wxTextInputStream::Read32}
|
||||||
|
|
||||||
\func{wxUint32}{Read32}{\void}
|
\func{wxUint32}{Read32}{\param{int}{ base = 10}}
|
||||||
|
|
||||||
Reads a 32 bit integer from the stream.
|
Reads a 32 bit unsigned integer from the stream.
|
||||||
|
|
||||||
|
See \helpref{wxTextInputStream::Read8}{wxtextinputstreamread8} for the
|
||||||
|
description of the {\it base} parameter.
|
||||||
|
|
||||||
|
\membersection{wxTextInputStream::Read32S}
|
||||||
|
|
||||||
|
\func{wxInt32}{Read32S}{\param{int}{ base = 10}}
|
||||||
|
|
||||||
|
Reads a 32 bit signed integer from the stream.
|
||||||
|
|
||||||
|
See \helpref{wxTextInputStream::Read8}{wxtextinputstreamread8} for the
|
||||||
|
description of the {\it base} parameter.
|
||||||
|
|
||||||
\membersection{wxTextInputStream::ReadDouble}
|
\membersection{wxTextInputStream::ReadDouble}
|
||||||
|
|
||||||
|
@@ -29,6 +29,13 @@ typedef wxTextOutputStream& (*__wxTextOutputManip)(wxTextOutputStream&);
|
|||||||
WXDLLIMPEXP_BASE wxTextOutputStream &endl( wxTextOutputStream &stream );
|
WXDLLIMPEXP_BASE wxTextOutputStream &endl( wxTextOutputStream &stream );
|
||||||
|
|
||||||
|
|
||||||
|
#define wxEOT wxT('\4') // the End-Of-Text control code (used only inside wxTextInputStream)
|
||||||
|
|
||||||
|
// If you're scanning through a file using wxTextInputStream, you should check for EOF _before_
|
||||||
|
// reading the next item (word / number), because otherwise the last item may get lost.
|
||||||
|
// You should however be prepared to receive an empty item (empty string / zero number) at the
|
||||||
|
// end of file, especially on Windows systems. This is unavoidable because most (but not all) files end
|
||||||
|
// with whitespace (i.e. usually a newline).
|
||||||
class WXDLLIMPEXP_BASE wxTextInputStream
|
class WXDLLIMPEXP_BASE wxTextInputStream
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@@ -39,11 +46,14 @@ public:
|
|||||||
#endif
|
#endif
|
||||||
~wxTextInputStream();
|
~wxTextInputStream();
|
||||||
|
|
||||||
wxUint32 Read32();
|
wxUint32 Read32(int base = 10); // base may be between 2 and 36, inclusive, or the special 0 (= C format)
|
||||||
wxUint16 Read16();
|
wxUint16 Read16(int base = 10);
|
||||||
wxUint8 Read8();
|
wxUint8 Read8(int base = 10);
|
||||||
|
wxInt32 Read32S(int base = 10);
|
||||||
|
wxInt16 Read16S(int base = 10);
|
||||||
|
wxInt8 Read8S(int base = 10);
|
||||||
double ReadDouble();
|
double ReadDouble();
|
||||||
wxString ReadString(); // deprecated use ReadLine or ReadWord instead
|
wxString ReadString(); // deprecated: use ReadLine or ReadWord instead
|
||||||
wxString ReadLine();
|
wxString ReadLine();
|
||||||
wxString ReadWord();
|
wxString ReadWord();
|
||||||
|
|
||||||
@@ -65,14 +75,17 @@ public:
|
|||||||
protected:
|
protected:
|
||||||
wxInputStream &m_input;
|
wxInputStream &m_input;
|
||||||
wxString m_separators;
|
wxString m_separators;
|
||||||
|
char m_lastBytes[10]; // stores the bytes that were read for the last character
|
||||||
|
|
||||||
#if wxUSE_UNICODE
|
#if wxUSE_UNICODE
|
||||||
wxMBConv &m_conv;
|
wxMBConv &m_conv;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
bool EatEOL(const wxChar &c);
|
bool EatEOL(const wxChar &c);
|
||||||
|
void UngetLast(); // should be used instead of wxInputStream::Ungetch() because of Unicode issues
|
||||||
|
// returns EOT (\4) if there is a stream error, or end of file
|
||||||
|
wxChar NextChar(); // this should be used instead of GetC() because of Unicode issues
|
||||||
wxChar NextNonSeparators();
|
wxChar NextNonSeparators();
|
||||||
void SkipIfEndOfLine( wxChar c );
|
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef enum
|
typedef enum
|
||||||
|
@@ -42,11 +42,13 @@
|
|||||||
wxTextInputStream::wxTextInputStream(wxInputStream &s, const wxString &sep, wxMBConv& conv)
|
wxTextInputStream::wxTextInputStream(wxInputStream &s, const wxString &sep, wxMBConv& conv)
|
||||||
: m_input(s), m_separators(sep), m_conv(conv)
|
: m_input(s), m_separators(sep), m_conv(conv)
|
||||||
{
|
{
|
||||||
|
memset((void*)m_lastBytes, 0, 10);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
wxTextInputStream::wxTextInputStream(wxInputStream &s, const wxString &sep)
|
wxTextInputStream::wxTextInputStream(wxInputStream &s, const wxString &sep)
|
||||||
: m_input(s), m_separators(sep)
|
: m_input(s), m_separators(sep)
|
||||||
{
|
{
|
||||||
|
memset((void*)m_lastBytes, 0, 10);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -54,13 +56,52 @@ wxTextInputStream::~wxTextInputStream()
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void wxTextInputStream::UngetLast()
|
||||||
|
{
|
||||||
|
size_t byteCount = 0;
|
||||||
|
while(m_lastBytes[byteCount]) // pseudo ANSI strlen (even for Unicode!)
|
||||||
|
byteCount++;
|
||||||
|
m_input.Ungetch(m_lastBytes, byteCount);
|
||||||
|
memset((void*)m_lastBytes, 0, 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
wxChar wxTextInputStream::NextChar()
|
||||||
|
{
|
||||||
|
#if wxUSE_UNICODE
|
||||||
|
wxChar wbuf[2];
|
||||||
|
memset((void*)m_lastBytes, 0, 10);
|
||||||
|
for(size_t inlen = 0; inlen < 9; inlen++)
|
||||||
|
{
|
||||||
|
// actually read the next character
|
||||||
|
m_lastBytes[inlen] = m_input.GetC();
|
||||||
|
|
||||||
|
if(m_input.LastRead() <= 0)
|
||||||
|
return wxEOT;
|
||||||
|
|
||||||
|
int retlen = (int) m_conv.MB2WC(wbuf, m_lastBytes, 2); // returns -1 for failure
|
||||||
|
if(retlen >= 0) // res == 0 could happen for '\0' char
|
||||||
|
return wbuf[0];
|
||||||
|
}
|
||||||
|
// there should be no encoding which requires more than nine bytes for one character...
|
||||||
|
return wxEOT;
|
||||||
|
#else
|
||||||
|
m_lastBytes[0] = m_input.GetC();
|
||||||
|
|
||||||
|
if(m_input.LastRead() <= 0)
|
||||||
|
return wxEOT;
|
||||||
|
|
||||||
|
return m_lastBytes[0];
|
||||||
|
#endif
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
wxChar wxTextInputStream::NextNonSeparators()
|
wxChar wxTextInputStream::NextNonSeparators()
|
||||||
{
|
{
|
||||||
wxChar c = (wxChar) 0;
|
wxChar c = (wxChar) 0;
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
if (!m_input) return (wxChar) 0;
|
c = NextChar();
|
||||||
c = m_input.GetC();
|
if (c == wxEOT) return (wxChar) 0;
|
||||||
|
|
||||||
if (c != wxT('\n') &&
|
if (c != wxT('\n') &&
|
||||||
c != wxT('\r') &&
|
c != wxT('\r') &&
|
||||||
@@ -76,162 +117,65 @@ bool wxTextInputStream::EatEOL(const wxChar &c)
|
|||||||
|
|
||||||
if (c == wxT('\r')) // eat on both Mac and DOS
|
if (c == wxT('\r')) // eat on both Mac and DOS
|
||||||
{
|
{
|
||||||
if (!m_input) return TRUE;
|
wxChar c2 = NextChar();
|
||||||
wxChar c2 = m_input.GetC();
|
if(c2 == wxEOT) return TRUE; // end of stream reached, had enough :-)
|
||||||
|
|
||||||
if (c2 != wxT('\n')) m_input.Ungetch( c2 ); // Don't eat on Mac
|
if (c2 != wxT('\n')) UngetLast(); // Don't eat on Mac
|
||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
void wxTextInputStream::SkipIfEndOfLine( wxChar c )
|
wxUint32 wxTextInputStream::Read32(int base)
|
||||||
{
|
{
|
||||||
if (EatEOL(c)) return;
|
wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
|
||||||
else m_input.Ungetch( c ); // no line terminator
|
if(!m_input) return 0;
|
||||||
}
|
|
||||||
|
|
||||||
wxUint32 wxTextInputStream::Read32()
|
wxString word = ReadWord();
|
||||||
{
|
if(word.IsEmpty())
|
||||||
/* I only implemented a simple integer parser */
|
|
||||||
// VZ: what about using strtol()?? (TODO)
|
|
||||||
|
|
||||||
int sign;
|
|
||||||
wxInt32 i;
|
|
||||||
|
|
||||||
if (!m_input) return 0;
|
|
||||||
int c = NextNonSeparators();
|
|
||||||
if (c==(wxChar)0) return 0;
|
|
||||||
|
|
||||||
i = 0;
|
|
||||||
if (! (c == wxT('-') || c == wxT('+') || isdigit(c)) )
|
|
||||||
{
|
|
||||||
m_input.Ungetch(c);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
return wxStrtoul(word.c_str(), 0, base);
|
||||||
|
|
||||||
if (c == wxT('-'))
|
|
||||||
{
|
|
||||||
sign = -1;
|
|
||||||
c = m_input.GetC();
|
|
||||||
} else
|
|
||||||
if (c == wxT('+'))
|
|
||||||
{
|
|
||||||
sign = 1;
|
|
||||||
c = m_input.GetC();
|
|
||||||
} else
|
|
||||||
{
|
|
||||||
sign = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
while (isdigit(c))
|
|
||||||
{
|
|
||||||
i = i*10 + (c - (int)wxT('0'));
|
|
||||||
c = m_input.GetC();
|
|
||||||
}
|
|
||||||
|
|
||||||
SkipIfEndOfLine( c );
|
|
||||||
|
|
||||||
i *= sign;
|
|
||||||
|
|
||||||
return (wxUint32)i;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
wxUint16 wxTextInputStream::Read16()
|
wxUint16 wxTextInputStream::Read16(int base)
|
||||||
{
|
{
|
||||||
return (wxUint16)Read32();
|
return (wxUint16)Read32(base);
|
||||||
}
|
}
|
||||||
|
|
||||||
wxUint8 wxTextInputStream::Read8()
|
wxUint8 wxTextInputStream::Read8(int base)
|
||||||
{
|
{
|
||||||
return (wxUint8)Read32();
|
return (wxUint8)Read32(base);
|
||||||
|
}
|
||||||
|
|
||||||
|
wxInt32 wxTextInputStream::Read32S(int base)
|
||||||
|
{
|
||||||
|
wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
|
||||||
|
if(!m_input) return 0;
|
||||||
|
|
||||||
|
wxString word = ReadWord();
|
||||||
|
if(word.IsEmpty())
|
||||||
|
return 0;
|
||||||
|
return wxStrtol(word.c_str(), 0, base);
|
||||||
|
}
|
||||||
|
|
||||||
|
wxInt16 wxTextInputStream::Read16S(int base)
|
||||||
|
{
|
||||||
|
return (wxInt16)Read32S(base);
|
||||||
|
}
|
||||||
|
|
||||||
|
wxInt8 wxTextInputStream::Read8S(int base)
|
||||||
|
{
|
||||||
|
return (wxInt8)Read32S(base);
|
||||||
}
|
}
|
||||||
|
|
||||||
double wxTextInputStream::ReadDouble()
|
double wxTextInputStream::ReadDouble()
|
||||||
{
|
{
|
||||||
/* I only implemented a simple float parser
|
if(!m_input) return 0;
|
||||||
* VZ: what about using strtod()?? (TODO)
|
wxString word = ReadWord();
|
||||||
*/
|
if(word.IsEmpty())
|
||||||
|
|
||||||
double f;
|
|
||||||
int theSign;
|
|
||||||
|
|
||||||
if (!m_input)
|
|
||||||
return 0;
|
return 0;
|
||||||
|
return wxStrtod(word.c_str(), 0);
|
||||||
int c = NextNonSeparators();
|
|
||||||
if (c==(wxChar)0) return 0;
|
|
||||||
|
|
||||||
f = 0.0;
|
|
||||||
if (! (c == wxT('.') || c == wxT(',') || c == wxT('-') || c == wxT('+') || isdigit(c)) )
|
|
||||||
{
|
|
||||||
m_input.Ungetch(c);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (c == wxT('-'))
|
|
||||||
{
|
|
||||||
theSign = -1;
|
|
||||||
c = m_input.GetC();
|
|
||||||
} else
|
|
||||||
if (c == wxT('+'))
|
|
||||||
{
|
|
||||||
theSign = 1;
|
|
||||||
c = m_input.GetC();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
theSign = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
while (isdigit(c))
|
|
||||||
{
|
|
||||||
f = f*10 + (c - wxT('0'));
|
|
||||||
c = m_input.GetC();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (c == wxT('.') || c == wxT(','))
|
|
||||||
{
|
|
||||||
double f_multiplicator = (double) 0.1;
|
|
||||||
|
|
||||||
c = m_input.GetC();
|
|
||||||
|
|
||||||
while (isdigit(c))
|
|
||||||
{
|
|
||||||
f += (c-wxT('0'))*f_multiplicator;
|
|
||||||
f_multiplicator /= 10;
|
|
||||||
c = m_input.GetC();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (c == wxT('e'))
|
|
||||||
{
|
|
||||||
double f_multiplicator = 0.0;
|
|
||||||
int i, e;
|
|
||||||
|
|
||||||
c = m_input.GetC();
|
|
||||||
|
|
||||||
switch (c)
|
|
||||||
{
|
|
||||||
case wxT('-'): f_multiplicator = 0.1; break;
|
|
||||||
case wxT('+'): f_multiplicator = 10.0; break;
|
|
||||||
}
|
|
||||||
|
|
||||||
e = Read8(); // why only max 256 ?
|
|
||||||
|
|
||||||
for (i=0;i<e;i++)
|
|
||||||
f *= f_multiplicator;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
SkipIfEndOfLine( c );
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
m_input.Ungetch(c);
|
|
||||||
}
|
|
||||||
|
|
||||||
f *= theSign;
|
|
||||||
return f;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
wxString wxTextInputStream::ReadString()
|
wxString wxTextInputStream::ReadString()
|
||||||
@@ -245,19 +189,9 @@ wxString wxTextInputStream::ReadLine()
|
|||||||
|
|
||||||
while ( !m_input.Eof() )
|
while ( !m_input.Eof() )
|
||||||
{
|
{
|
||||||
#if wxUSE_UNICODE
|
wxChar c = NextChar();
|
||||||
// FIXME: this is only works for single byte encodings
|
if(c == wxEOT)
|
||||||
// How-to read a single char in an unkown encoding???
|
break;
|
||||||
char buf[10];
|
|
||||||
buf[0] = m_input.GetC();
|
|
||||||
buf[1] = 0;
|
|
||||||
|
|
||||||
wxChar wbuf[2];
|
|
||||||
m_conv.MB2WC( wbuf, buf, 2 );
|
|
||||||
wxChar c = wbuf[0];
|
|
||||||
#else
|
|
||||||
char c = m_input.GetC();
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ( !m_input )
|
if ( !m_input )
|
||||||
break;
|
break;
|
||||||
@@ -286,9 +220,8 @@ wxString wxTextInputStream::ReadWord()
|
|||||||
|
|
||||||
while ( !m_input.Eof() )
|
while ( !m_input.Eof() )
|
||||||
{
|
{
|
||||||
c = m_input.GetC();
|
c = NextChar();
|
||||||
|
if(c == wxEOT)
|
||||||
if (!m_input)
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (m_separators.Contains(c))
|
if (m_separators.Contains(c))
|
||||||
@@ -311,13 +244,8 @@ wxTextInputStream& wxTextInputStream::operator>>(wxString& word)
|
|||||||
|
|
||||||
wxTextInputStream& wxTextInputStream::operator>>(char& c)
|
wxTextInputStream& wxTextInputStream::operator>>(char& c)
|
||||||
{
|
{
|
||||||
if (!m_input)
|
|
||||||
{
|
|
||||||
c = 0;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
c = m_input.GetC();
|
c = m_input.GetC();
|
||||||
|
if(m_input.LastRead() <= 0) c = 0;
|
||||||
|
|
||||||
if (EatEOL(c))
|
if (EatEOL(c))
|
||||||
c = '\n';
|
c = '\n';
|
||||||
|
Reference in New Issue
Block a user