rewrote Read() to slurp all the file at once to avoid problems in Unicode build
git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@38464 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
@@ -35,6 +35,7 @@
|
|||||||
|
|
||||||
#include "wx/textfile.h"
|
#include "wx/textfile.h"
|
||||||
#include "wx/filename.h"
|
#include "wx/filename.h"
|
||||||
|
#include "wx/buffer.h"
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// wxTextFile class implementation
|
// wxTextFile class implementation
|
||||||
@@ -88,24 +89,24 @@ bool wxTextFile::OnClose()
|
|||||||
bool wxTextFile::OnRead(wxMBConv& conv)
|
bool wxTextFile::OnRead(wxMBConv& conv)
|
||||||
{
|
{
|
||||||
// file should be opened and we must be in it's beginning
|
// file should be opened and we must be in it's beginning
|
||||||
wxASSERT( m_file.IsOpened() &&
|
wxASSERT( m_file.IsOpened() && m_file.Tell() == 0 );
|
||||||
(m_file.GetKind() != wxFILE_KIND_DISK || m_file.Tell() == 0) );
|
|
||||||
|
|
||||||
static const size_t BUF_SIZE = 1024;
|
// read the entire file in memory: this is not the most efficient thing to
|
||||||
#if wxUSE_UNICODE
|
// do but there is no good way to avoid it in Unicode build because if we
|
||||||
static const size_t NUL_SIZE = 4;
|
// read the file block by block we can't convert each block to Unicode
|
||||||
#else
|
// separately (the last multibyte char in the block might be only partially
|
||||||
static const size_t NUL_SIZE = 1;
|
// read and so the conversion would fail) and, as the file contents is kept
|
||||||
#endif
|
// in memory by wxTextFile anyhow, it shouldn't be a big problem to read
|
||||||
|
// the file entirely
|
||||||
|
const size_t bufSize = m_file.Length() + 4 /* for trailing NULs */;
|
||||||
|
size_t bufPos = 0;
|
||||||
|
wxCharBuffer buf(bufSize - 1 /* it adds 1 internally */);
|
||||||
|
|
||||||
char buf[BUF_SIZE + NUL_SIZE];
|
char block[1024];
|
||||||
wxChar chLast = '\0';
|
for ( bool eof = false; !eof; )
|
||||||
wxString str;
|
|
||||||
|
|
||||||
for ( ;; )
|
|
||||||
{
|
{
|
||||||
// leave space for trailing NUL
|
// try to read up to the size of the entire block
|
||||||
ssize_t nRead = m_file.Read(buf, BUF_SIZE);
|
ssize_t nRead = m_file.Read(block, WXSIZEOF(block));
|
||||||
|
|
||||||
if ( nRead == wxInvalidOffset )
|
if ( nRead == wxInvalidOffset )
|
||||||
{
|
{
|
||||||
@@ -113,91 +114,95 @@ bool wxTextFile::OnRead(wxMBConv& conv)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( nRead == 0 )
|
eof = nRead == 0;
|
||||||
break;
|
if ( eof )
|
||||||
|
{
|
||||||
|
// append 4 trailing NUL bytes: this is needed to ensure that the
|
||||||
|
// string is going to be NUL-terminated, whatever is the encoding
|
||||||
|
// used (even UTF-32)
|
||||||
|
block[0] =
|
||||||
|
block[1] =
|
||||||
|
block[2] =
|
||||||
|
block[3] = '\0';
|
||||||
|
nRead = 4;
|
||||||
|
}
|
||||||
|
|
||||||
// save the number characters which we already processed during the
|
// this shouldn't happen but don't overwrite the buffer if it does
|
||||||
// last loop iteration
|
wxCHECK_MSG( bufPos + nRead <= bufSize, false,
|
||||||
const size_t lenOld = str.length();
|
_T("read more than file length?") );
|
||||||
|
|
||||||
|
// append to the buffer
|
||||||
|
memcpy(buf.data() + bufPos, block, nRead);
|
||||||
|
bufPos += nRead;
|
||||||
|
}
|
||||||
|
|
||||||
|
const wxString str(buf, conv);
|
||||||
#if wxUSE_UNICODE
|
#if wxUSE_UNICODE
|
||||||
// we have to properly NUL-terminate the string for any encoding it may
|
if ( str.empty() )
|
||||||
// use -- 4 NULs should be enough for everyone (this is why we add 4
|
{
|
||||||
// extra bytes to the buffer)
|
wxLogError(_("Failed to convert file contents to Unicode."));
|
||||||
buf[nRead] =
|
return false;
|
||||||
buf[nRead + 1] =
|
}
|
||||||
buf[nRead + 2] =
|
#endif // wxUSE_UNICODE
|
||||||
buf[nRead + 3] = '\0';
|
|
||||||
|
|
||||||
// append to the remains of the last block, don't overwrite
|
free(buf.release()); // we don't need this memory any more
|
||||||
wxString strbuf(buf, conv);
|
|
||||||
if ( strbuf.empty() )
|
|
||||||
|
// now break the buffer in lines
|
||||||
|
|
||||||
|
// last processed character, we need to know if it was a CR or not
|
||||||
|
wxChar chLast = '\0';
|
||||||
|
|
||||||
|
// the beginning of the current line, changes inside the loop
|
||||||
|
wxString::const_iterator lineStart = str.begin();
|
||||||
|
const wxString::const_iterator end = str.end();
|
||||||
|
for ( wxString::const_iterator p = lineStart; p != end; p++ )
|
||||||
|
{
|
||||||
|
const wxChar ch = *p;
|
||||||
|
switch ( ch )
|
||||||
{
|
{
|
||||||
// conversion failed
|
case '\n':
|
||||||
return false;
|
// could be a DOS or Unix EOL
|
||||||
}
|
if ( chLast == '\r' )
|
||||||
|
{
|
||||||
|
AddLine(wxString(lineStart, p - 1), wxTextFileType_Dos);
|
||||||
|
}
|
||||||
|
else // bare '\n', Unix style
|
||||||
|
{
|
||||||
|
AddLine(wxString(lineStart, p), wxTextFileType_Unix);
|
||||||
|
}
|
||||||
|
|
||||||
str += strbuf;
|
lineStart = p + 1;
|
||||||
#else // ANSI
|
break;
|
||||||
wxUnusedVar(conv);
|
|
||||||
buf[nRead] = '\0';
|
|
||||||
str += buf;
|
|
||||||
#endif // wxUSE_UNICODE/!wxUSE_UNICODE
|
|
||||||
|
|
||||||
|
|
||||||
// the beginning of the current line, changes inside the loop
|
|
||||||
wxString::const_iterator lineStart = str.begin();
|
|
||||||
const wxString::const_iterator end = str.end();
|
|
||||||
for ( wxString::const_iterator p = lineStart + lenOld; p != end; p++ )
|
|
||||||
{
|
|
||||||
const wxChar ch = *p;
|
|
||||||
switch ( ch )
|
|
||||||
{
|
|
||||||
case '\n':
|
|
||||||
// could be a DOS or Unix EOL
|
|
||||||
if ( chLast == '\r' )
|
|
||||||
{
|
|
||||||
AddLine(wxString(lineStart, p - 1), wxTextFileType_Dos);
|
|
||||||
}
|
|
||||||
else // bare '\n', Unix style
|
|
||||||
{
|
|
||||||
AddLine(wxString(lineStart, p), wxTextFileType_Unix);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
case '\r':
|
||||||
|
if ( chLast == '\r' )
|
||||||
|
{
|
||||||
|
// Mac empty line
|
||||||
|
AddLine(wxEmptyString, wxTextFileType_Mac);
|
||||||
lineStart = p + 1;
|
lineStart = p + 1;
|
||||||
break;
|
}
|
||||||
|
//else: we don't know what this is yet -- could be a Mac EOL or
|
||||||
|
// start of DOS EOL so wait for next char
|
||||||
|
break;
|
||||||
|
|
||||||
case '\r':
|
default:
|
||||||
if ( chLast == '\r' )
|
if ( chLast == '\r' )
|
||||||
{
|
{
|
||||||
// Mac empty line
|
// Mac line termination
|
||||||
AddLine(wxEmptyString, wxTextFileType_Mac);
|
AddLine(wxString(lineStart, p - 1), wxTextFileType_Mac);
|
||||||
lineStart = p + 1;
|
lineStart = p;
|
||||||
}
|
}
|
||||||
//else: we don't what this is yet -- could be a Mac EOL or
|
|
||||||
// start of DOS EOL so wait for next char
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
if ( chLast == '\r' )
|
|
||||||
{
|
|
||||||
// Mac line termination
|
|
||||||
AddLine(wxString(lineStart, p - 1), wxTextFileType_Mac);
|
|
||||||
lineStart = p;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
chLast = ch;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// remove the part we already processed
|
chLast = ch;
|
||||||
str.erase(0, lineStart - str.begin());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// anything in the last line?
|
// anything in the last line?
|
||||||
if ( !str.empty() )
|
if ( lineStart != end )
|
||||||
{
|
{
|
||||||
AddLine(str, wxTextFileType_None); // no line terminator
|
// add unterminated last line
|
||||||
|
AddLine(wxString(lineStart, end), wxTextFileType_None);
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
Reference in New Issue
Block a user