/////////////////////////////////////////////////////////////////////////////// // Name: src/common/txtstrm.cpp // Purpose: Text stream classes // Author: Guilhem Lavaux // Modified by: // Created: 28/06/98 // Copyright: (c) Guilhem Lavaux // Licence: wxWindows licence ///////////////////////////////////////////////////////////////////////////// // For compilers that support precompilation, includes "wx.h". #include "wx/wxprec.h" #ifdef __BORLANDC__ #pragma hdrstop #endif #if wxUSE_STREAMS #include "wx/txtstrm.h" #ifndef WX_PRECOMP #include "wx/crt.h" #endif #include // ---------------------------------------------------------------------------- // wxTextInputStream // ---------------------------------------------------------------------------- #if wxUSE_UNICODE wxTextInputStream::wxTextInputStream(wxInputStream &s, const wxString &sep, const wxMBConv& conv) : m_input(s), m_separators(sep), m_conv(conv.Clone()) { m_validBegin = m_validEnd = 0; #if SIZEOF_WCHAR_T == 2 m_lastWChar = 0; #endif // SIZEOF_WCHAR_T == 2 } #else wxTextInputStream::wxTextInputStream(wxInputStream &s, const wxString &sep) : m_input(s), m_separators(sep) { m_validBegin = m_validEnd = 0; m_lastBytes[0] = 0; } #endif wxTextInputStream::~wxTextInputStream() { #if wxUSE_UNICODE delete m_conv; #endif // wxUSE_UNICODE } void wxTextInputStream::UngetLast() { if ( m_validEnd ) { m_input.Ungetch(m_lastBytes, m_validEnd); m_validBegin = m_validEnd = 0; } } wxChar wxTextInputStream::GetChar() { #if wxUSE_UNICODE #if SIZEOF_WCHAR_T == 2 // Return the already raed character remaining from the last call to this // function, if any. if ( m_lastWChar ) { const wxChar wc = m_lastWChar; m_lastWChar = 0; return wc; } #endif // SIZEOF_WCHAR_T // If we have any non-decoded bytes left from the last call, shift them to // be at the beginning of the buffer. if ( m_validBegin < m_validEnd ) { m_validEnd -= m_validBegin; memmove(m_lastBytes, m_lastBytes + m_validBegin, m_validEnd); } else // All bytes were already decoded and consumed. { m_validEnd = 0; } // We may need to decode up to 4 characters if we have input starting with // 3 BOM-like bytes, but not actually containing a BOM, as decoding it will // only succeed when 4 bytes are read -- and will yield 4 wide characters. wxChar wbuf[4]; for(size_t inlen = 0; inlen < sizeof(m_lastBytes); inlen++) { if ( inlen >= m_validEnd ) { // actually read the next character m_lastBytes[inlen] = m_input.GetC(); if (m_input.LastRead() == 0) return 0; m_validEnd++; } //else: Retry decoding what we already have in the buffer. switch ( m_conv->ToWChar(wbuf, WXSIZEOF(wbuf), m_lastBytes, inlen + 1) ) { case 0: // this is a bug in converter object as it should either fail // or decode non-empty string to something non-empty wxFAIL_MSG("ToWChar() can't return 0 for non-empty input"); break; case wxCONV_FAILED: // the buffer probably doesn't contain enough bytes to decode // as a complete character, try with more bytes break; default: // If we couldn't decode a single character during the last // loop iteration, but decoded more than one of them with just // one extra byte, the only explanation is that we were using a // wxConvAuto conversion recognizing the initial BOM and that // it couldn't detect the presence or absence of BOM so far, // but now finally has enough data to see that there is none. // As we must have fallen back to Latin-1 in this case, return // just the first byte and keep the other ones for the next // time. m_validBegin = 1; return wbuf[0]; #if SIZEOF_WCHAR_T == 2 case 2: // When wchar_t uses UTF-16, we could have decoded a single // Unicode code point as 2 wchar_t characters and there is // nothing else to do here but to return the first one now and // remember the second one for the next call, as there is no // way to fit both of them into a single wxChar in this case. m_lastWChar = wbuf[1]; #endif // SIZEOF_WCHAR_T == 2 wxFALLTHROUGH; case 1: m_validBegin = inlen + 1; // we finally decoded a character return wbuf[0]; } } // There should be no encoding which requires more than 10 bytes to decode // at least one character (the most actually seems to be 7: 3 for the // initial BOM, which is ignored, and 4 for the longest possible encoding // of a Unicode character in UTF-8), so something must be wrong with our // conversion but we have no way to signal it from here and just return 0 // as if we reached the end of the stream. m_validBegin = 0; m_validEnd = sizeof(m_lastBytes); return 0; #else m_lastBytes[0] = m_input.GetC(); if(m_input.LastRead() <= 0) { m_validEnd = 0; return 0; } m_validEnd = 1; return m_lastBytes[0]; #endif } wxChar wxTextInputStream::NextNonSeparators() { for (;;) { wxChar c = GetChar(); if (!c) return c; if (c != wxT('\n') && c != wxT('\r') && m_separators.Find(c) < 0) return c; } } bool wxTextInputStream::EatEOL(const wxChar &c) { if (c == wxT('\n')) return true; // eat on UNIX if (c == wxT('\r')) // eat on both Mac and DOS { wxChar c2 = GetChar(); if (!c2) return true; // end of stream reached, had enough :-) if (c2 != wxT('\n')) UngetLast(); // Don't eat on Mac return true; } return false; } wxUint64 wxTextInputStream::Read64(int base) { wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") ); if(!m_input) return 0; wxString word = ReadWord(); if(word.empty()) return 0; wxUint64 res; if(!word.ToULongLong(&res, base)) return 0; return res; } wxUint32 wxTextInputStream::Read32(int base) { wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") ); if(!m_input) return 0; wxString word = ReadWord(); if(word.empty()) return 0; return wxStrtoul(word.c_str(), 0, base); } wxUint16 wxTextInputStream::Read16(int base) { return (wxUint16)Read32(base); } wxUint8 wxTextInputStream::Read8(int base) { return (wxUint8)Read32(base); } wxInt64 wxTextInputStream::Read64S(int base) { wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") ); if(!m_input) return 0; wxString word = ReadWord(); if(word.empty()) return 0; wxInt64 res; if(!word.ToLongLong(&res, base)) return 0; return res; } wxInt32 wxTextInputStream::Read32S(int base) { wxASSERT_MSG( !base || (base > 1 && base <= 36), wxT("invalid base") ); if(!m_input) return 0; wxString word = ReadWord(); if(word.empty()) return 0; return wxStrtol(word.c_str(), 0, base); } wxInt16 wxTextInputStream::Read16S(int base) { return (wxInt16)Read32S(base); } wxInt8 wxTextInputStream::Read8S(int base) { return (wxInt8)Read32S(base); } double wxTextInputStream::ReadDouble() { if(!m_input) return 0; wxString word = ReadWord(); if(word.empty()) return 0; return wxStrtod(word.c_str(), 0); } wxString wxTextInputStream::ReadLine() { wxString line; while ( !m_input.Eof() ) { wxChar c = GetChar(); if (!c) break; if (EatEOL(c)) break; line += c; } return line; } wxString wxTextInputStream::ReadWord() { wxString word; if ( !m_input ) return word; wxChar c = NextNonSeparators(); if ( !c ) return word; word += c; while ( !m_input.Eof() ) { c = GetChar(); if (!c) break; if (m_separators.Find(c) >= 0) break; if (EatEOL(c)) break; word += c; } return word; } wxTextInputStream& wxTextInputStream::operator>>(wxString& word) { word = ReadWord(); return *this; } wxTextInputStream& wxTextInputStream::operator>>(char& c) { c = m_input.GetC(); if (m_input.LastRead() == 0) c = 0; if (EatEOL(c)) c = '\n'; return *this; } #if wxUSE_UNICODE && wxWCHAR_T_IS_REAL_TYPE wxTextInputStream& wxTextInputStream::operator>>(wchar_t& wc) { wc = GetChar(); return *this; } #endif // wxUSE_UNICODE wxTextInputStream& wxTextInputStream::operator>>(wxInt16& i) { i = Read16S(); return *this; } wxTextInputStream& wxTextInputStream::operator>>(wxInt32& i) { i = Read32S(); return *this; } wxTextInputStream& wxTextInputStream::operator>>(wxInt64& i) { i = Read64S(); return *this; } wxTextInputStream& wxTextInputStream::operator>>(wxUint16& i) { i = Read16(); return *this; } wxTextInputStream& wxTextInputStream::operator>>(wxUint32& i) { i = Read32(); return *this; } wxTextInputStream& wxTextInputStream::operator>>(wxUint64& i) { i = Read64(); return *this; } wxTextInputStream& wxTextInputStream::operator>>(double& i) { i = ReadDouble(); return *this; } wxTextInputStream& wxTextInputStream::operator>>(float& f) { f = (float)ReadDouble(); return *this; } #if wxUSE_UNICODE wxTextOutputStream::wxTextOutputStream(wxOutputStream& s, wxEOL mode, const wxMBConv& conv) : m_output(s), m_conv(conv.Clone()) #else wxTextOutputStream::wxTextOutputStream(wxOutputStream& s, wxEOL mode) : m_output(s) #endif { m_mode = mode; if (m_mode == wxEOL_NATIVE) { #if defined(__WINDOWS__) m_mode = wxEOL_DOS; #else m_mode = wxEOL_UNIX; #endif } #if wxUSE_UNICODE && SIZEOF_WCHAR_T == 2 m_lastWChar = 0; #endif // SIZEOF_WCHAR_T == 2 } wxTextOutputStream::~wxTextOutputStream() { #if wxUSE_UNICODE delete m_conv; #endif // wxUSE_UNICODE } void wxTextOutputStream::SetMode(wxEOL mode) { m_mode = mode; if (m_mode == wxEOL_NATIVE) { #if defined(__WINDOWS__) m_mode = wxEOL_DOS; #else m_mode = wxEOL_UNIX; #endif } } void wxTextOutputStream::Write64(wxUint64 i) { WriteString(wxString::Format("%" wxLongLongFmtSpec "u", i)); } void wxTextOutputStream::Write32(wxUint32 i) { wxString str; str.Printf(wxT("%u"), i); WriteString(str); } void wxTextOutputStream::Write16(wxUint16 i) { wxString str; str.Printf(wxT("%u"), (unsigned)i); WriteString(str); } void wxTextOutputStream::Write8(wxUint8 i) { wxString str; str.Printf(wxT("%u"), (unsigned)i); WriteString(str); } void wxTextOutputStream::WriteDouble(double d) { wxString str; str.Printf(wxT("%f"), d); WriteString(str); } void wxTextOutputStream::WriteString(const wxString& string) { size_t len = string.length(); wxString out; out.reserve(len); for ( size_t i = 0; i < len; i++ ) { const wxChar c = string[i]; if ( c == wxT('\n') ) { switch ( m_mode ) { case wxEOL_DOS: out << wxT("\r\n"); continue; case wxEOL_MAC: out << wxT('\r'); continue; default: wxFAIL_MSG( wxT("unknown EOL mode in wxTextOutputStream") ); wxFALLTHROUGH; case wxEOL_UNIX: // don't treat '\n' specially ; } } out << c; } #if wxUSE_UNICODE // FIXME-UTF8: use wxCharBufferWithLength if/when we have it wxCharBuffer buffer = m_conv->cWC2MB(out.wc_str(), out.length(), &len); m_output.Write(buffer, len); #else m_output.Write(out.c_str(), out.length() ); #endif } wxTextOutputStream& wxTextOutputStream::PutChar(wxChar c) { #if wxUSE_UNICODE #if SIZEOF_WCHAR_T == 2 wxCharBuffer buffer; size_t len; if ( m_lastWChar ) { wxChar buf[2]; buf[0] = m_lastWChar; buf[1] = c; buffer = m_conv->cWC2MB(buf, WXSIZEOF(buf), &len); m_lastWChar = 0; } else { buffer = m_conv->cWC2MB(&c, 1, &len); } if ( !len ) { // Conversion failed, possibly because we have the first half of a // surrogate character, so just store it and write it out when the // second half is written to the stream too later. // // Notice that if we already had had a valid m_lastWChar, it is simply // discarded here which is very bad, but there is no way to signal an // error from here and this is not worse than the old code behaviour. m_lastWChar = c; } else { for ( size_t n = 0; n < len; n++ ) { const char c2 = buffer[n]; if ( c2 == '\n' ) { switch ( m_mode ) { case wxEOL_DOS: m_output.Write("\r\n", 2); continue; case wxEOL_MAC: m_output.Write("\r", 1); continue; default: wxFAIL_MSG( wxT("unknown EOL mode in wxTextOutputStream") ); wxFALLTHROUGH; case wxEOL_UNIX: // don't treat '\n' specially ; } } m_output.Write(&c2, 1); } } #else // SIZEOF_WCHAR_T == 4 WriteString( wxString(&c, *m_conv, 1) ); #endif // SIZEOF_WCHAR_T == 2 or 4 #else WriteString( wxString(&c, wxConvLocal, 1) ); #endif return *this; } void wxTextOutputStream::Flush() { #if wxUSE_UNICODE const size_t len = m_conv->FromWChar(NULL, 0, L"", 1); if ( len > m_conv->GetMBNulLen() ) { wxCharBuffer buf(len); m_conv->FromWChar(buf.data(), len, L"", 1); m_output.Write(buf, len - m_conv->GetMBNulLen()); } #endif // wxUSE_UNICODE } wxTextOutputStream& wxTextOutputStream::operator<<(const wxString& string) { WriteString( string ); return *this; } wxTextOutputStream& wxTextOutputStream::operator<<(char c) { WriteString( wxString::FromAscii(c) ); return *this; } #if wxUSE_UNICODE && wxWCHAR_T_IS_REAL_TYPE wxTextOutputStream& wxTextOutputStream::operator<<(wchar_t wc) { PutChar(wc); return *this; } #endif // wxUSE_UNICODE wxTextOutputStream& wxTextOutputStream::operator<<(wxInt16 c) { Write(c); return *this; } wxTextOutputStream& wxTextOutputStream::operator<<(wxInt32 c) { Write(c); return *this; } wxTextOutputStream& wxTextOutputStream::operator<<(wxInt64 c) { Write(c); return *this; } wxTextOutputStream& wxTextOutputStream::operator<<(wxUint16 c) { Write(c); return *this; } wxTextOutputStream& wxTextOutputStream::operator<<(wxUint32 c) { Write(c); return *this; } wxTextOutputStream& wxTextOutputStream::operator<<(wxUint64 c) { Write(c); return *this; } wxTextOutputStream &wxTextOutputStream::operator<<(double f) { Write(f); return *this; } wxTextOutputStream& wxTextOutputStream::operator<<(float f) { Write(f); return *this; } wxTextOutputStream &endl( wxTextOutputStream &stream ) { return stream.PutChar(wxT('\n')); } #endif // wxUSE_STREAMS