Files
wxWidgets/src/common/txtstrm.cpp
2019-04-05 10:12:27 -07:00

723 lines
16 KiB
C++

///////////////////////////////////////////////////////////////////////////////
// Name: src/common/txtstrm.cpp
// Purpose: Text stream classes
// Author: Guilhem Lavaux
// Modified by:
// Created: 28/06/98
// Copyright: (c) Guilhem Lavaux
// Licence: wxWindows licence
/////////////////////////////////////////////////////////////////////////////
// For compilers that support precompilation, includes "wx.h".
#include "wx/wxprec.h"
#ifdef __BORLANDC__
#pragma hdrstop
#endif
#if wxUSE_STREAMS
#include "wx/txtstrm.h"
#ifndef WX_PRECOMP
#include "wx/crt.h"
#endif
#include <ctype.h>
// ----------------------------------------------------------------------------
// wxTextInputStream
// ----------------------------------------------------------------------------
#if wxUSE_UNICODE
wxTextInputStream::wxTextInputStream(wxInputStream &s,
const wxString &sep,
const wxMBConv& conv)
: m_input(s), m_separators(sep), m_conv(conv.Clone())
{
m_validBegin =
m_validEnd = 0;
#if SIZEOF_WCHAR_T == 2
m_lastWChar = 0;
#endif // SIZEOF_WCHAR_T == 2
}
#else
wxTextInputStream::wxTextInputStream(wxInputStream &s, const wxString &sep)
: m_input(s), m_separators(sep)
{
m_validBegin =
m_validEnd = 0;
m_lastBytes[0] = 0;
}
#endif
wxTextInputStream::~wxTextInputStream()
{
#if wxUSE_UNICODE
delete m_conv;
#endif // wxUSE_UNICODE
}
void wxTextInputStream::UngetLast()
{
if ( m_validEnd )
{
m_input.Ungetch(m_lastBytes, m_validEnd);
m_validBegin =
m_validEnd = 0;
}
}
wxChar wxTextInputStream::GetChar()
{
#if wxUSE_UNICODE
#if SIZEOF_WCHAR_T == 2
// Return the already raed character remaining from the last call to this
// function, if any.
if ( m_lastWChar )
{
const wxChar wc = m_lastWChar;
m_lastWChar = 0;
return wc;
}
#endif // SIZEOF_WCHAR_T
// If we have any non-decoded bytes left from the last call, shift them to
// be at the beginning of the buffer.
if ( m_validBegin < m_validEnd )
{
m_validEnd -= m_validBegin;
memmove(m_lastBytes, m_lastBytes + m_validBegin, m_validEnd);
}
else // All bytes were already decoded and consumed.
{
m_validEnd = 0;
}
// We may need to decode up to 4 characters if we have input starting with
// 3 BOM-like bytes, but not actually containing a BOM, as decoding it will
// only succeed when 4 bytes are read -- and will yield 4 wide characters.
wxChar wbuf[4];
for(size_t inlen = 0; inlen < sizeof(m_lastBytes); inlen++)
{
if ( inlen >= m_validEnd )
{
// actually read the next character
m_lastBytes[inlen] = m_input.GetC();
if (m_input.LastRead() == 0)
return 0;
m_validEnd++;
}
//else: Retry decoding what we already have in the buffer.
switch ( m_conv->ToWChar(wbuf, WXSIZEOF(wbuf), m_lastBytes, inlen + 1) )
{
case 0:
// this is a bug in converter object as it should either fail
// or decode non-empty string to something non-empty
wxFAIL_MSG("ToWChar() can't return 0 for non-empty input");
break;
case wxCONV_FAILED:
// the buffer probably doesn't contain enough bytes to decode
// as a complete character, try with more bytes
break;
default:
// If we couldn't decode a single character during the last
// loop iteration, but decoded more than one of them with just
// one extra byte, the only explanation is that we were using a
// wxConvAuto conversion recognizing the initial BOM and that
// it couldn't detect the presence or absence of BOM so far,
// but now finally has enough data to see that there is none.
// As we must have fallen back to Latin-1 in this case, return
// just the first byte and keep the other ones for the next
// time.
m_validBegin = 1;
return wbuf[0];
#if SIZEOF_WCHAR_T == 2
case 2:
// When wchar_t uses UTF-16, we could have decoded a single
// Unicode code point as 2 wchar_t characters and there is
// nothing else to do here but to return the first one now and
// remember the second one for the next call, as there is no
// way to fit both of them into a single wxChar in this case.
m_lastWChar = wbuf[1];
#endif // SIZEOF_WCHAR_T == 2
wxFALLTHROUGH;
case 1:
m_validBegin = inlen + 1;
// we finally decoded a character
return wbuf[0];
}
}
// There should be no encoding which requires more than 10 bytes to decode
// at least one character (the most actually seems to be 7: 3 for the
// initial BOM, which is ignored, and 4 for the longest possible encoding
// of a Unicode character in UTF-8), so something must be wrong with our
// conversion but we have no way to signal it from here and just return 0
// as if we reached the end of the stream.
m_validBegin = 0;
m_validEnd = sizeof(m_lastBytes);
return 0;
#else
m_lastBytes[0] = m_input.GetC();
if(m_input.LastRead() <= 0)
{
m_validEnd = 0;
return 0;
}
m_validEnd = 1;
return m_lastBytes[0];
#endif
}
wxChar wxTextInputStream::NextNonSeparators()
{
for (;;)
{
wxChar c = GetChar();
if (!c)
return c;
if (c != wxT('\n') &&
c != wxT('\r') &&
m_separators.Find(c) < 0)
return c;
}
}
bool wxTextInputStream::EatEOL(const wxChar &c)
{
if (c == wxT('\n')) return true; // eat on UNIX
if (c == wxT('\r')) // eat on both Mac and DOS
{
wxChar c2 = GetChar();
if (!c2) return true; // end of stream reached, had enough :-)
if (c2 != wxT('\n')) UngetLast(); // Don't eat on Mac
return true;
}
return false;
}
wxUint64 wxTextInputStream::Read64(int base)
{
wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
if(!m_input) return 0;
wxString word = ReadWord();
if(word.empty())
return 0;
wxUint64 res;
if(!word.ToULongLong(&res, base))
return 0;
return res;
}
wxUint32 wxTextInputStream::Read32(int base)
{
wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
if(!m_input) return 0;
wxString word = ReadWord();
if(word.empty())
return 0;
return wxStrtoul(word.c_str(), 0, base);
}
wxUint16 wxTextInputStream::Read16(int base)
{
return (wxUint16)Read32(base);
}
wxUint8 wxTextInputStream::Read8(int base)
{
return (wxUint8)Read32(base);
}
wxInt64 wxTextInputStream::Read64S(int base)
{
wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
if(!m_input) return 0;
wxString word = ReadWord();
if(word.empty())
return 0;
wxInt64 res;
if(!word.ToLongLong(&res, base))
return 0;
return res;
}
wxInt32 wxTextInputStream::Read32S(int base)
{
wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") );
if(!m_input) return 0;
wxString word = ReadWord();
if(word.empty())
return 0;
return wxStrtol(word.c_str(), 0, base);
}
wxInt16 wxTextInputStream::Read16S(int base)
{
return (wxInt16)Read32S(base);
}
wxInt8 wxTextInputStream::Read8S(int base)
{
return (wxInt8)Read32S(base);
}
double wxTextInputStream::ReadDouble()
{
if(!m_input) return 0;
wxString word = ReadWord();
if(word.empty())
return 0;
return wxStrtod(word.c_str(), 0);
}
wxString wxTextInputStream::ReadLine()
{
wxString line;
while ( !m_input.Eof() )
{
wxChar c = GetChar();
if (!c)
break;
if (EatEOL(c))
break;
line += c;
}
return line;
}
wxString wxTextInputStream::ReadWord()
{
wxString word;
if ( !m_input )
return word;
wxChar c = NextNonSeparators();
if ( !c )
return word;
word += c;
while ( !m_input.Eof() )
{
c = GetChar();
if (!c)
break;
if (m_separators.Find(c) >= 0)
break;
if (EatEOL(c))
break;
word += c;
}
return word;
}
wxTextInputStream& wxTextInputStream::operator>>(wxString& word)
{
word = ReadWord();
return *this;
}
wxTextInputStream& wxTextInputStream::operator>>(char& c)
{
c = m_input.GetC();
if (m_input.LastRead() == 0) c = 0;
if (EatEOL(c))
c = '\n';
return *this;
}
#if wxUSE_UNICODE && wxWCHAR_T_IS_REAL_TYPE
wxTextInputStream& wxTextInputStream::operator>>(wchar_t& wc)
{
wc = GetChar();
return *this;
}
#endif // wxUSE_UNICODE
wxTextInputStream& wxTextInputStream::operator>>(wxInt16& i)
{
i = Read16S();
return *this;
}
wxTextInputStream& wxTextInputStream::operator>>(wxInt32& i)
{
i = Read32S();
return *this;
}
wxTextInputStream& wxTextInputStream::operator>>(wxInt64& i)
{
i = Read64S();
return *this;
}
wxTextInputStream& wxTextInputStream::operator>>(wxUint16& i)
{
i = Read16();
return *this;
}
wxTextInputStream& wxTextInputStream::operator>>(wxUint32& i)
{
i = Read32();
return *this;
}
wxTextInputStream& wxTextInputStream::operator>>(wxUint64& i)
{
i = Read64();
return *this;
}
wxTextInputStream& wxTextInputStream::operator>>(double& i)
{
i = ReadDouble();
return *this;
}
wxTextInputStream& wxTextInputStream::operator>>(float& f)
{
f = (float)ReadDouble();
return *this;
}
#if wxUSE_UNICODE
wxTextOutputStream::wxTextOutputStream(wxOutputStream& s,
wxEOL mode,
const wxMBConv& conv)
: m_output(s), m_conv(conv.Clone())
#else
wxTextOutputStream::wxTextOutputStream(wxOutputStream& s, wxEOL mode)
: m_output(s)
#endif
{
m_mode = mode;
if (m_mode == wxEOL_NATIVE)
{
#if defined(__WINDOWS__)
m_mode = wxEOL_DOS;
#else
m_mode = wxEOL_UNIX;
#endif
}
#if wxUSE_UNICODE && SIZEOF_WCHAR_T == 2
m_lastWChar = 0;
#endif // SIZEOF_WCHAR_T == 2
}
wxTextOutputStream::~wxTextOutputStream()
{
#if wxUSE_UNICODE
delete m_conv;
#endif // wxUSE_UNICODE
}
void wxTextOutputStream::SetMode(wxEOL mode)
{
m_mode = mode;
if (m_mode == wxEOL_NATIVE)
{
#if defined(__WINDOWS__)
m_mode = wxEOL_DOS;
#else
m_mode = wxEOL_UNIX;
#endif
}
}
void wxTextOutputStream::Write64(wxUint64 i)
{
WriteString(wxString::Format("%" wxLongLongFmtSpec "u", i));
}
void wxTextOutputStream::Write32(wxUint32 i)
{
wxString str;
str.Printf(wxT("%u"), i);
WriteString(str);
}
void wxTextOutputStream::Write16(wxUint16 i)
{
wxString str;
str.Printf(wxT("%u"), (unsigned)i);
WriteString(str);
}
void wxTextOutputStream::Write8(wxUint8 i)
{
wxString str;
str.Printf(wxT("%u"), (unsigned)i);
WriteString(str);
}
void wxTextOutputStream::WriteDouble(double d)
{
wxString str;
str.Printf(wxT("%f"), d);
WriteString(str);
}
void wxTextOutputStream::WriteString(const wxString& string)
{
size_t len = string.length();
wxString out;
out.reserve(len);
for ( size_t i = 0; i < len; i++ )
{
const wxChar c = string[i];
if ( c == wxT('\n') )
{
switch ( m_mode )
{
case wxEOL_DOS:
out << wxT("\r\n");
continue;
case wxEOL_MAC:
out << wxT('\r');
continue;
default:
wxFAIL_MSG( wxT("unknown EOL mode in wxTextOutputStream") );
wxFALLTHROUGH;
case wxEOL_UNIX:
// don't treat '\n' specially
;
}
}
out << c;
}
#if wxUSE_UNICODE
// FIXME-UTF8: use wxCharBufferWithLength if/when we have it
wxCharBuffer buffer = m_conv->cWC2MB(out.wc_str(), out.length(), &len);
m_output.Write(buffer, len);
#else
m_output.Write(out.c_str(), out.length() );
#endif
}
wxTextOutputStream& wxTextOutputStream::PutChar(wxChar c)
{
#if wxUSE_UNICODE
#if SIZEOF_WCHAR_T == 2
wxCharBuffer buffer;
size_t len;
if ( m_lastWChar )
{
wxChar buf[2];
buf[0] = m_lastWChar;
buf[1] = c;
buffer = m_conv->cWC2MB(buf, WXSIZEOF(buf), &len);
m_lastWChar = 0;
}
else
{
buffer = m_conv->cWC2MB(&c, 1, &len);
}
if ( !len )
{
// Conversion failed, possibly because we have the first half of a
// surrogate character, so just store it and write it out when the
// second half is written to the stream too later.
//
// Notice that if we already had had a valid m_lastWChar, it is simply
// discarded here which is very bad, but there is no way to signal an
// error from here and this is not worse than the old code behaviour.
m_lastWChar = c;
}
else
{
for ( size_t n = 0; n < len; n++ )
{
const char c2 = buffer[n];
if ( c2 == '\n' )
{
switch ( m_mode )
{
case wxEOL_DOS:
m_output.Write("\r\n", 2);
continue;
case wxEOL_MAC:
m_output.Write("\r", 1);
continue;
default:
wxFAIL_MSG( wxT("unknown EOL mode in wxTextOutputStream") );
wxFALLTHROUGH;
case wxEOL_UNIX:
// don't treat '\n' specially
;
}
}
m_output.Write(&c2, 1);
}
}
#else // SIZEOF_WCHAR_T == 4
WriteString( wxString(&c, *m_conv, 1) );
#endif // SIZEOF_WCHAR_T == 2 or 4
#else
WriteString( wxString(&c, wxConvLocal, 1) );
#endif
return *this;
}
void wxTextOutputStream::Flush()
{
#if wxUSE_UNICODE
const size_t len = m_conv->FromWChar(NULL, 0, L"", 1);
if ( len > m_conv->GetMBNulLen() )
{
wxCharBuffer buf(len);
m_conv->FromWChar(buf.data(), len, L"", 1);
m_output.Write(buf, len - m_conv->GetMBNulLen());
}
#endif // wxUSE_UNICODE
}
wxTextOutputStream& wxTextOutputStream::operator<<(const wxString& string)
{
WriteString( string );
return *this;
}
wxTextOutputStream& wxTextOutputStream::operator<<(char c)
{
WriteString( wxString::FromAscii(c) );
return *this;
}
#if wxUSE_UNICODE && wxWCHAR_T_IS_REAL_TYPE
wxTextOutputStream& wxTextOutputStream::operator<<(wchar_t wc)
{
PutChar(wc);
return *this;
}
#endif // wxUSE_UNICODE
wxTextOutputStream& wxTextOutputStream::operator<<(wxInt16 c)
{
Write(c);
return *this;
}
wxTextOutputStream& wxTextOutputStream::operator<<(wxInt32 c)
{
Write(c);
return *this;
}
wxTextOutputStream& wxTextOutputStream::operator<<(wxInt64 c)
{
Write(c);
return *this;
}
wxTextOutputStream& wxTextOutputStream::operator<<(wxUint16 c)
{
Write(c);
return *this;
}
wxTextOutputStream& wxTextOutputStream::operator<<(wxUint32 c)
{
Write(c);
return *this;
}
wxTextOutputStream& wxTextOutputStream::operator<<(wxUint64 c)
{
Write(c);
return *this;
}
wxTextOutputStream &wxTextOutputStream::operator<<(double f)
{
Write(f);
return *this;
}
wxTextOutputStream& wxTextOutputStream::operator<<(float f)
{
Write(f);
return *this;
}
wxTextOutputStream &endl( wxTextOutputStream &stream )
{
return stream.PutChar(wxT('\n'));
}
#endif
// wxUSE_STREAMS