rewrote Read() to slurp all the file at once to avoid problems in Unicode build

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@38464 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Vadim Zeitlin
2006-03-31 13:53:20 +00:00
parent 468c5a97ec
commit dbcf443c7d

View File

@@ -35,6 +35,7 @@
#include "wx/textfile.h" #include "wx/textfile.h"
#include "wx/filename.h" #include "wx/filename.h"
#include "wx/buffer.h"
// ============================================================================ // ============================================================================
// wxTextFile class implementation // wxTextFile class implementation
@@ -88,24 +89,24 @@ bool wxTextFile::OnClose()
bool wxTextFile::OnRead(wxMBConv& conv) bool wxTextFile::OnRead(wxMBConv& conv)
{ {
// file should be opened and we must be in it's beginning // file should be opened and we must be in it's beginning
wxASSERT( m_file.IsOpened() && wxASSERT( m_file.IsOpened() && m_file.Tell() == 0 );
(m_file.GetKind() != wxFILE_KIND_DISK || m_file.Tell() == 0) );
static const size_t BUF_SIZE = 1024; // read the entire file in memory: this is not the most efficient thing to
#if wxUSE_UNICODE // do but there is no good way to avoid it in Unicode build because if we
static const size_t NUL_SIZE = 4; // read the file block by block we can't convert each block to Unicode
#else // separately (the last multibyte char in the block might be only partially
static const size_t NUL_SIZE = 1; // read and so the conversion would fail) and, as the file contents is kept
#endif // in memory by wxTextFile anyhow, it shouldn't be a big problem to read
// the file entirely
const size_t bufSize = m_file.Length() + 4 /* for trailing NULs */;
size_t bufPos = 0;
wxCharBuffer buf(bufSize - 1 /* it adds 1 internally */);
char buf[BUF_SIZE + NUL_SIZE]; char block[1024];
wxChar chLast = '\0'; for ( bool eof = false; !eof; )
wxString str;
for ( ;; )
{ {
// leave space for trailing NUL // try to read up to the size of the entire block
ssize_t nRead = m_file.Read(buf, BUF_SIZE); ssize_t nRead = m_file.Read(block, WXSIZEOF(block));
if ( nRead == wxInvalidOffset ) if ( nRead == wxInvalidOffset )
{ {
@@ -113,42 +114,49 @@ bool wxTextFile::OnRead(wxMBConv& conv)
return false; return false;
} }
if ( nRead == 0 ) eof = nRead == 0;
break; if ( eof )
// save the number characters which we already processed during the
// last loop iteration
const size_t lenOld = str.length();
#if wxUSE_UNICODE
// we have to properly NUL-terminate the string for any encoding it may
// use -- 4 NULs should be enough for everyone (this is why we add 4
// extra bytes to the buffer)
buf[nRead] =
buf[nRead + 1] =
buf[nRead + 2] =
buf[nRead + 3] = '\0';
// append to the remains of the last block, don't overwrite
wxString strbuf(buf, conv);
if ( strbuf.empty() )
{ {
// conversion failed // append 4 trailing NUL bytes: this is needed to ensure that the
return false; // string is going to be NUL-terminated, whatever is the encoding
// used (even UTF-32)
block[0] =
block[1] =
block[2] =
block[3] = '\0';
nRead = 4;
} }
str += strbuf; // this shouldn't happen but don't overwrite the buffer if it does
#else // ANSI wxCHECK_MSG( bufPos + nRead <= bufSize, false,
wxUnusedVar(conv); _T("read more than file length?") );
buf[nRead] = '\0';
str += buf;
#endif // wxUSE_UNICODE/!wxUSE_UNICODE
// append to the buffer
memcpy(buf.data() + bufPos, block, nRead);
bufPos += nRead;
}
const wxString str(buf, conv);
#if wxUSE_UNICODE
if ( str.empty() )
{
wxLogError(_("Failed to convert file contents to Unicode."));
return false;
}
#endif // wxUSE_UNICODE
free(buf.release()); // we don't need this memory any more
// now break the buffer in lines
// last processed character, we need to know if it was a CR or not
wxChar chLast = '\0';
// the beginning of the current line, changes inside the loop // the beginning of the current line, changes inside the loop
wxString::const_iterator lineStart = str.begin(); wxString::const_iterator lineStart = str.begin();
const wxString::const_iterator end = str.end(); const wxString::const_iterator end = str.end();
for ( wxString::const_iterator p = lineStart + lenOld; p != end; p++ ) for ( wxString::const_iterator p = lineStart; p != end; p++ )
{ {
const wxChar ch = *p; const wxChar ch = *p;
switch ( ch ) switch ( ch )
@@ -174,7 +182,7 @@ bool wxTextFile::OnRead(wxMBConv& conv)
AddLine(wxEmptyString, wxTextFileType_Mac); AddLine(wxEmptyString, wxTextFileType_Mac);
lineStart = p + 1; lineStart = p + 1;
} }
//else: we don't what this is yet -- could be a Mac EOL or //else: we don't know what this is yet -- could be a Mac EOL or
// start of DOS EOL so wait for next char // start of DOS EOL so wait for next char
break; break;
@@ -190,14 +198,11 @@ bool wxTextFile::OnRead(wxMBConv& conv)
chLast = ch; chLast = ch;
} }
// remove the part we already processed
str.erase(0, lineStart - str.begin());
}
// anything in the last line? // anything in the last line?
if ( !str.empty() ) if ( lineStart != end )
{ {
AddLine(str, wxTextFileType_None); // no line terminator // add unterminated last line
AddLine(wxString(lineStart, end), wxTextFileType_None);
} }
return true; return true;