made wxTextInputStream Unicode safe and general cleanup (patch 653775)

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@21825 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Vadim Zeitlin
2003-07-09 22:55:57 +00:00
parent ed83150139
commit 2348a84293
3 changed files with 167 additions and 173 deletions

View File

@@ -15,6 +15,12 @@ and on a typical 32-bit computer, none of these match to the "long" type (wxInt3
is defined as int on 32-bit architectures) so that you cannot use long. To avoid is defined as int on 32-bit architectures) so that you cannot use long. To avoid
problems (here and elsewhere), make use of wxInt32, wxUint32 and similar types. problems (here and elsewhere), make use of wxInt32, wxUint32 and similar types.
If you're scanning through a file using wxTextInputStream, you should check for EOF {\bf before}
reading the next item (word / number), because otherwise the last item may get lost.
You should however be prepared to receive an empty item (empty string / zero number) at the
end of file, especially on Windows systems. This is unavoidable because most (but not all) files end
with whitespace (i.e. usually a newline).
For example: For example:
\begin{verbatim} \begin{verbatim}
@@ -37,14 +43,20 @@ For example:
\membersection{wxTextInputStream::wxTextInputStream}\label{wxtextinputstreamconstr} \membersection{wxTextInputStream::wxTextInputStream}\label{wxtextinputstreamconstr}
\func{}{wxTextInputStream}{\param{wxInputStream\&}{ stream}} \func{}{wxTextInputStream}{\param{wxInputStream\&}{ stream}, \param{const wxString\&}{ sep=wxT(" \t")},
\param{wxMBConv\&}{ conv = wxConvUTF8} }
Constructs a text stream object from an input stream. Only read methods will Constructs a text stream object from an input stream. Only read methods will
be available. be available.
\wxheading{Parameters} \wxheading{Parameters}
\docparam{stream}{The input stream.} \docparam{stream}{The underlying input stream.}
\docparam{sep}{The initial string separator characters.}
\docparam{conv}{{\it In Unicode build only:} The encoding converter used to convert the bytes in the
underlying input stream to characters.}
\membersection{wxTextInputStream::\destruct{wxTextInputStream}} \membersection{wxTextInputStream::\destruct{wxTextInputStream}}
@@ -52,23 +64,64 @@ be available.
Destroys the wxTextInputStream object. Destroys the wxTextInputStream object.
\membersection{wxTextInputStream::Read8} \membersection{wxTextInputStream::Read8}\label{wxtextinputstreamread8}
\func{wxUint8}{Read8}{\void} \func{wxUint8}{Read8}{\param{int}{ base = 10}}
Reads a single byte from the stream. Reads a single unsigned byte from the stream, given in base {\it base}.
The value of {\it base} must be comprised between $2$ and $36$, inclusive, or
be a special value $0$ which means that the usual rules of {\tt C} numbers are
applied: if the number starts with {\tt 0x} it is considered to be in base
$16$, if it starts with {\tt 0} - in base $8$ and in base $10$ otherwise. Note
that you may not want to specify the base $0$ if you are parsing the numbers
which may have leading zeroes as they can yield unexpected (to the user not
familiar with C) results.
\membersection{wxTextInputStream::Read8S}
\func{wxInt8}{Read8S}{\param{int}{ base = 10}}
Reads a single signed byte from the stream.
See \helpref{wxTextInputStream::Read8}{wxtextinputstreamread8} for the
description of the {\it base} parameter.
\membersection{wxTextInputStream::Read16} \membersection{wxTextInputStream::Read16}
\func{wxUint16}{Read16}{\void} \func{wxUint16}{Read16}{\param{int}{ base = 10}}
Reads a 16 bit integer from the stream. Reads a unsigned 16 bit integer from the stream.
See \helpref{wxTextInputStream::Read8}{wxtextinputstreamread8} for the
description of the {\it base} parameter.
\membersection{wxTextInputStream::Read16S}
\func{wxInt16}{Read16S}{\param{int}{ base = 10}}
Reads a signed 16 bit integer from the stream.
See \helpref{wxTextInputStream::Read8}{wxtextinputstreamread8} for the
description of the {\it base} parameter.
\membersection{wxTextInputStream::Read32} \membersection{wxTextInputStream::Read32}
\func{wxUint32}{Read32}{\void} \func{wxUint32}{Read32}{\param{int}{ base = 10}}
Reads a 32 bit integer from the stream. Reads a 32 bit unsigned integer from the stream.
See \helpref{wxTextInputStream::Read8}{wxtextinputstreamread8} for the
description of the {\it base} parameter.
\membersection{wxTextInputStream::Read32S}
\func{wxInt32}{Read32S}{\param{int}{ base = 10}}
Reads a 32 bit signed integer from the stream.
See \helpref{wxTextInputStream::Read8}{wxtextinputstreamread8} for the
description of the {\it base} parameter.
\membersection{wxTextInputStream::ReadDouble} \membersection{wxTextInputStream::ReadDouble}

View File

@@ -29,6 +29,13 @@ typedef wxTextOutputStream& (*__wxTextOutputManip)(wxTextOutputStream&);
WXDLLIMPEXP_BASE wxTextOutputStream &endl( wxTextOutputStream &stream ); WXDLLIMPEXP_BASE wxTextOutputStream &endl( wxTextOutputStream &stream );
#define wxEOT wxT('\4') // the End-Of-Text control code (used only inside wxTextInputStream)
// If you're scanning through a file using wxTextInputStream, you should check for EOF _before_
// reading the next item (word / number), because otherwise the last item may get lost.
// You should however be prepared to receive an empty item (empty string / zero number) at the
// end of file, especially on Windows systems. This is unavoidable because most (but not all) files end
// with whitespace (i.e. usually a newline).
class WXDLLIMPEXP_BASE wxTextInputStream class WXDLLIMPEXP_BASE wxTextInputStream
{ {
public: public:
@@ -39,11 +46,14 @@ public:
#endif #endif
~wxTextInputStream(); ~wxTextInputStream();
wxUint32 Read32(); wxUint32 Read32(int base = 10); // base may be between 2 and 36, inclusive, or the special 0 (= C format)
wxUint16 Read16(); wxUint16 Read16(int base = 10);
wxUint8 Read8(); wxUint8 Read8(int base = 10);
wxInt32 Read32S(int base = 10);
wxInt16 Read16S(int base = 10);
wxInt8 Read8S(int base = 10);
double ReadDouble(); double ReadDouble();
wxString ReadString(); // deprecated use ReadLine or ReadWord instead wxString ReadString(); // deprecated: use ReadLine or ReadWord instead
wxString ReadLine(); wxString ReadLine();
wxString ReadWord(); wxString ReadWord();
@@ -65,14 +75,17 @@ public:
protected: protected:
wxInputStream &m_input; wxInputStream &m_input;
wxString m_separators; wxString m_separators;
char m_lastBytes[10]; // stores the bytes that were read for the last character
#if wxUSE_UNICODE #if wxUSE_UNICODE
wxMBConv &m_conv; wxMBConv &m_conv;
#endif #endif
bool EatEOL(const wxChar &c); bool EatEOL(const wxChar &c);
void UngetLast(); // should be used instead of wxInputStream::Ungetch() because of Unicode issues
// returns EOT (\4) if there is a stream error, or end of file
wxChar NextChar(); // this should be used instead of GetC() because of Unicode issues
wxChar NextNonSeparators(); wxChar NextNonSeparators();
void SkipIfEndOfLine( wxChar c );
}; };
typedef enum typedef enum

View File

@@ -42,11 +42,13 @@
wxTextInputStream::wxTextInputStream(wxInputStream &s, const wxString &sep, wxMBConv& conv) wxTextInputStream::wxTextInputStream(wxInputStream &s, const wxString &sep, wxMBConv& conv)
: m_input(s), m_separators(sep), m_conv(conv) : m_input(s), m_separators(sep), m_conv(conv)
{ {
memset((void*)m_lastBytes, 0, 10);
} }
#else #else
wxTextInputStream::wxTextInputStream(wxInputStream &s, const wxString &sep) wxTextInputStream::wxTextInputStream(wxInputStream &s, const wxString &sep)
: m_input(s), m_separators(sep) : m_input(s), m_separators(sep)
{ {
memset((void*)m_lastBytes, 0, 10);
} }
#endif #endif
@@ -54,13 +56,52 @@ wxTextInputStream::~wxTextInputStream()
{ {
} }
void wxTextInputStream::UngetLast()
{
size_t byteCount = 0;
while(m_lastBytes[byteCount]) // pseudo ANSI strlen (even for Unicode!)
byteCount++;
m_input.Ungetch(m_lastBytes, byteCount);
memset((void*)m_lastBytes, 0, 10);
}
wxChar wxTextInputStream::NextChar()
{
#if wxUSE_UNICODE
wxChar wbuf[2];
memset((void*)m_lastBytes, 0, 10);
for(size_t inlen = 0; inlen < 9; inlen++)
{
// actually read the next character
m_lastBytes[inlen] = m_input.GetC();
if(m_input.LastRead() <= 0)
return wxEOT;
int retlen = (int) m_conv.MB2WC(wbuf, m_lastBytes, 2); // returns -1 for failure
if(retlen >= 0) // res == 0 could happen for '\0' char
return wbuf[0];
}
// there should be no encoding which requires more than nine bytes for one character...
return wxEOT;
#else
m_lastBytes[0] = m_input.GetC();
if(m_input.LastRead() <= 0)
return wxEOT;
return m_lastBytes[0];
#endif
}
wxChar wxTextInputStream::NextNonSeparators() wxChar wxTextInputStream::NextNonSeparators()
{ {
wxChar c = (wxChar) 0; wxChar c = (wxChar) 0;
for (;;) for (;;)
{ {
if (!m_input) return (wxChar) 0; c = NextChar();
c = m_input.GetC(); if (c == wxEOT) return (wxChar) 0;
if (c != wxT('\n') && if (c != wxT('\n') &&
c != wxT('\r') && c != wxT('\r') &&
@@ -76,162 +117,65 @@ bool wxTextInputStream::EatEOL(const wxChar &c)
if (c == wxT('\r')) // eat on both Mac and DOS if (c == wxT('\r')) // eat on both Mac and DOS
{ {
if (!m_input) return TRUE; wxChar c2 = NextChar();
wxChar c2 = m_input.GetC(); if(c2 == wxEOT) return TRUE; // end of stream reached, had enough :-)
if (c2 != wxT('\n')) m_input.Ungetch( c2 ); // Don't eat on Mac if (c2 != wxT('\n')) UngetLast(); // Don't eat on Mac
return TRUE; return TRUE;
} }
return FALSE; return FALSE;
} }
void wxTextInputStream::SkipIfEndOfLine( wxChar c ) wxUint32 wxTextInputStream::Read32(int base)
{ {
if (EatEOL(c)) return; wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
else m_input.Ungetch( c ); // no line terminator if(!m_input) return 0;
}
wxUint32 wxTextInputStream::Read32() wxString word = ReadWord();
{ if(word.IsEmpty())
/* I only implemented a simple integer parser */
// VZ: what about using strtol()?? (TODO)
int sign;
wxInt32 i;
if (!m_input) return 0;
int c = NextNonSeparators();
if (c==(wxChar)0) return 0;
i = 0;
if (! (c == wxT('-') || c == wxT('+') || isdigit(c)) )
{
m_input.Ungetch(c);
return 0; return 0;
} return wxStrtoul(word.c_str(), 0, base);
if (c == wxT('-'))
{
sign = -1;
c = m_input.GetC();
} else
if (c == wxT('+'))
{
sign = 1;
c = m_input.GetC();
} else
{
sign = 1;
}
while (isdigit(c))
{
i = i*10 + (c - (int)wxT('0'));
c = m_input.GetC();
}
SkipIfEndOfLine( c );
i *= sign;
return (wxUint32)i;
} }
wxUint16 wxTextInputStream::Read16() wxUint16 wxTextInputStream::Read16(int base)
{ {
return (wxUint16)Read32(); return (wxUint16)Read32(base);
} }
wxUint8 wxTextInputStream::Read8() wxUint8 wxTextInputStream::Read8(int base)
{ {
return (wxUint8)Read32(); return (wxUint8)Read32(base);
}
wxInt32 wxTextInputStream::Read32S(int base)
{
wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
if(!m_input) return 0;
wxString word = ReadWord();
if(word.IsEmpty())
return 0;
return wxStrtol(word.c_str(), 0, base);
}
wxInt16 wxTextInputStream::Read16S(int base)
{
return (wxInt16)Read32S(base);
}
wxInt8 wxTextInputStream::Read8S(int base)
{
return (wxInt8)Read32S(base);
} }
double wxTextInputStream::ReadDouble() double wxTextInputStream::ReadDouble()
{ {
/* I only implemented a simple float parser if(!m_input) return 0;
* VZ: what about using strtod()?? (TODO) wxString word = ReadWord();
*/ if(word.IsEmpty())
double f;
int theSign;
if (!m_input)
return 0; return 0;
return wxStrtod(word.c_str(), 0);
int c = NextNonSeparators();
if (c==(wxChar)0) return 0;
f = 0.0;
if (! (c == wxT('.') || c == wxT(',') || c == wxT('-') || c == wxT('+') || isdigit(c)) )
{
m_input.Ungetch(c);
return 0;
}
if (c == wxT('-'))
{
theSign = -1;
c = m_input.GetC();
} else
if (c == wxT('+'))
{
theSign = 1;
c = m_input.GetC();
}
else
{
theSign = 1;
}
while (isdigit(c))
{
f = f*10 + (c - wxT('0'));
c = m_input.GetC();
}
if (c == wxT('.') || c == wxT(','))
{
double f_multiplicator = (double) 0.1;
c = m_input.GetC();
while (isdigit(c))
{
f += (c-wxT('0'))*f_multiplicator;
f_multiplicator /= 10;
c = m_input.GetC();
}
if (c == wxT('e'))
{
double f_multiplicator = 0.0;
int i, e;
c = m_input.GetC();
switch (c)
{
case wxT('-'): f_multiplicator = 0.1; break;
case wxT('+'): f_multiplicator = 10.0; break;
}
e = Read8(); // why only max 256 ?
for (i=0;i<e;i++)
f *= f_multiplicator;
}
else
SkipIfEndOfLine( c );
}
else
{
m_input.Ungetch(c);
}
f *= theSign;
return f;
} }
wxString wxTextInputStream::ReadString() wxString wxTextInputStream::ReadString()
@@ -245,19 +189,9 @@ wxString wxTextInputStream::ReadLine()
while ( !m_input.Eof() ) while ( !m_input.Eof() )
{ {
#if wxUSE_UNICODE wxChar c = NextChar();
// FIXME: this is only works for single byte encodings if(c == wxEOT)
// How-to read a single char in an unkown encoding??? break;
char buf[10];
buf[0] = m_input.GetC();
buf[1] = 0;
wxChar wbuf[2];
m_conv.MB2WC( wbuf, buf, 2 );
wxChar c = wbuf[0];
#else
char c = m_input.GetC();
#endif
if ( !m_input ) if ( !m_input )
break; break;
@@ -286,9 +220,8 @@ wxString wxTextInputStream::ReadWord()
while ( !m_input.Eof() ) while ( !m_input.Eof() )
{ {
c = m_input.GetC(); c = NextChar();
if(c == wxEOT)
if (!m_input)
break; break;
if (m_separators.Contains(c)) if (m_separators.Contains(c))
@@ -311,13 +244,8 @@ wxTextInputStream& wxTextInputStream::operator>>(wxString& word)
wxTextInputStream& wxTextInputStream::operator>>(char& c) wxTextInputStream& wxTextInputStream::operator>>(char& c)
{ {
if (!m_input)
{
c = 0;
return *this;
}
c = m_input.GetC(); c = m_input.GetC();
if(m_input.LastRead() <= 0) c = 0;
if (EatEOL(c)) if (EatEOL(c))
c = '\n'; c = '\n';