rewrote Read() to slurp all the file at once to avoid problems in Unicode build

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@38464 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Vadim Zeitlin
2006-03-31 13:53:20 +00:00
parent 468c5a97ec
commit dbcf443c7d

View File

@@ -35,6 +35,7 @@
#include "wx/textfile.h" #include "wx/textfile.h"
#include "wx/filename.h" #include "wx/filename.h"
#include "wx/buffer.h"
// ============================================================================ // ============================================================================
// wxTextFile class implementation // wxTextFile class implementation
@@ -88,24 +89,24 @@ bool wxTextFile::OnClose()
bool wxTextFile::OnRead(wxMBConv& conv) bool wxTextFile::OnRead(wxMBConv& conv)
{ {
// file should be opened and we must be in it's beginning // file should be opened and we must be in it's beginning
wxASSERT( m_file.IsOpened() && wxASSERT( m_file.IsOpened() && m_file.Tell() == 0 );
(m_file.GetKind() != wxFILE_KIND_DISK || m_file.Tell() == 0) );
static const size_t BUF_SIZE = 1024; // read the entire file in memory: this is not the most efficient thing to
#if wxUSE_UNICODE // do but there is no good way to avoid it in Unicode build because if we
static const size_t NUL_SIZE = 4; // read the file block by block we can't convert each block to Unicode
#else // separately (the last multibyte char in the block might be only partially
static const size_t NUL_SIZE = 1; // read and so the conversion would fail) and, as the file contents is kept
#endif // in memory by wxTextFile anyhow, it shouldn't be a big problem to read
// the file entirely
const size_t bufSize = m_file.Length() + 4 /* for trailing NULs */;
size_t bufPos = 0;
wxCharBuffer buf(bufSize - 1 /* it adds 1 internally */);
char buf[BUF_SIZE + NUL_SIZE]; char block[1024];
wxChar chLast = '\0'; for ( bool eof = false; !eof; )
wxString str;
for ( ;; )
{ {
// leave space for trailing NUL // try to read up to the size of the entire block
ssize_t nRead = m_file.Read(buf, BUF_SIZE); ssize_t nRead = m_file.Read(block, WXSIZEOF(block));
if ( nRead == wxInvalidOffset ) if ( nRead == wxInvalidOffset )
{ {
@@ -113,91 +114,95 @@ bool wxTextFile::OnRead(wxMBConv& conv)
return false; return false;
} }
if ( nRead == 0 ) eof = nRead == 0;
break; if ( eof )
{
// append 4 trailing NUL bytes: this is needed to ensure that the
// string is going to be NUL-terminated, whatever is the encoding
// used (even UTF-32)
block[0] =
block[1] =
block[2] =
block[3] = '\0';
nRead = 4;
}
// save the number characters which we already processed during the // this shouldn't happen but don't overwrite the buffer if it does
// last loop iteration wxCHECK_MSG( bufPos + nRead <= bufSize, false,
const size_t lenOld = str.length(); _T("read more than file length?") );
// append to the buffer
memcpy(buf.data() + bufPos, block, nRead);
bufPos += nRead;
}
const wxString str(buf, conv);
#if wxUSE_UNICODE #if wxUSE_UNICODE
// we have to properly NUL-terminate the string for any encoding it may if ( str.empty() )
// use -- 4 NULs should be enough for everyone (this is why we add 4 {
// extra bytes to the buffer) wxLogError(_("Failed to convert file contents to Unicode."));
buf[nRead] = return false;
buf[nRead + 1] = }
buf[nRead + 2] = #endif // wxUSE_UNICODE
buf[nRead + 3] = '\0';
// append to the remains of the last block, don't overwrite free(buf.release()); // we don't need this memory any more
wxString strbuf(buf, conv);
if ( strbuf.empty() )
// now break the buffer in lines
// last processed character, we need to know if it was a CR or not
wxChar chLast = '\0';
// the beginning of the current line, changes inside the loop
wxString::const_iterator lineStart = str.begin();
const wxString::const_iterator end = str.end();
for ( wxString::const_iterator p = lineStart; p != end; p++ )
{
const wxChar ch = *p;
switch ( ch )
{ {
// conversion failed case '\n':
return false; // could be a DOS or Unix EOL
} if ( chLast == '\r' )
{
AddLine(wxString(lineStart, p - 1), wxTextFileType_Dos);
}
else // bare '\n', Unix style
{
AddLine(wxString(lineStart, p), wxTextFileType_Unix);
}
str += strbuf; lineStart = p + 1;
#else // ANSI break;
wxUnusedVar(conv);
buf[nRead] = '\0';
str += buf;
#endif // wxUSE_UNICODE/!wxUSE_UNICODE
// the beginning of the current line, changes inside the loop
wxString::const_iterator lineStart = str.begin();
const wxString::const_iterator end = str.end();
for ( wxString::const_iterator p = lineStart + lenOld; p != end; p++ )
{
const wxChar ch = *p;
switch ( ch )
{
case '\n':
// could be a DOS or Unix EOL
if ( chLast == '\r' )
{
AddLine(wxString(lineStart, p - 1), wxTextFileType_Dos);
}
else // bare '\n', Unix style
{
AddLine(wxString(lineStart, p), wxTextFileType_Unix);
}
case '\r':
if ( chLast == '\r' )
{
// Mac empty line
AddLine(wxEmptyString, wxTextFileType_Mac);
lineStart = p + 1; lineStart = p + 1;
break; }
//else: we don't know what this is yet -- could be a Mac EOL or
// start of DOS EOL so wait for next char
break;
case '\r': default:
if ( chLast == '\r' ) if ( chLast == '\r' )
{ {
// Mac empty line // Mac line termination
AddLine(wxEmptyString, wxTextFileType_Mac); AddLine(wxString(lineStart, p - 1), wxTextFileType_Mac);
lineStart = p + 1; lineStart = p;
} }
//else: we don't what this is yet -- could be a Mac EOL or
// start of DOS EOL so wait for next char
break;
default:
if ( chLast == '\r' )
{
// Mac line termination
AddLine(wxString(lineStart, p - 1), wxTextFileType_Mac);
lineStart = p;
}
}
chLast = ch;
} }
// remove the part we already processed chLast = ch;
str.erase(0, lineStart - str.begin());
} }
// anything in the last line? // anything in the last line?
if ( !str.empty() ) if ( lineStart != end )
{ {
AddLine(str, wxTextFileType_None); // no line terminator // add unterminated last line
AddLine(wxString(lineStart, end), wxTextFileType_None);
} }
return true; return true;