rewrote Read() to slurp all the file at once to avoid problems in Unicode build

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@38464 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
2006-03-31 13:53:20 +00:00
parent 468c5a97ec
commit dbcf443c7d
1 changed files with 90 additions and 85 deletions
--- a/src/common/textfile.cpp
+++ b/src/common/textfile.cpp
@@ -35,6 +35,7 @@
 #include "wx/textfile.h"
 #include "wx/filename.h"
 #include "wx/buffer.h"
 // ============================================================================
 // wxTextFile class implementation
@@ -88,24 +89,24 @@ bool wxTextFile::OnClose()
 bool wxTextFile::OnRead(wxMBConv& conv)
 {
    // file should be opened and we must be in it's beginning
-    wxASSERT( m_file.IsOpened() &&
+    wxASSERT( m_file.IsOpened() && m_file.Tell() == 0 );
                (m_file.GetKind() != wxFILE_KIND_DISK || m_file.Tell() == 0) );
-    static const size_t BUF_SIZE = 1024;
+    // read the entire file in memory: this is not the most efficient thing to
-#if wxUSE_UNICODE
+    // do but there is no good way to avoid it in Unicode build because if we
-    static const size_t NUL_SIZE = 4;
+    // read the file block by block we can't convert each block to Unicode
-#else
+    // separately (the last multibyte char in the block might be only partially
-    static const size_t NUL_SIZE = 1;
+    // read and so the conversion would fail) and, as the file contents is kept
-#endif
+    // in memory by wxTextFile anyhow, it shouldn't be a big problem to read
    // the file entirely
    const size_t bufSize = m_file.Length() + 4 /* for trailing NULs */;
    size_t bufPos = 0;
    wxCharBuffer buf(bufSize - 1 /* it adds 1 internally */);
-    char buf[BUF_SIZE + NUL_SIZE];
+    char block[1024];
-    wxChar chLast = '\0';
+    for ( bool eof = false; !eof; )
    wxString str;
    for ( ;; )
    {
-        // leave space for trailing NUL
+        // try to read up to the size of the entire block
-        ssize_t nRead = m_file.Read(buf, BUF_SIZE);
+        ssize_t nRead = m_file.Read(block, WXSIZEOF(block));
        if ( nRead == wxInvalidOffset )
        {
@@ -113,42 +114,49 @@ bool wxTextFile::OnRead(wxMBConv& conv)
            return false;
        }
-        if ( nRead == 0 )
+        eof = nRead == 0;
-            break;
+        if ( eof )
        // save the number characters which we already processed during the
        // last loop iteration
        const size_t lenOld = str.length();
 #if wxUSE_UNICODE
        // we have to properly NUL-terminate the string for any encoding it may
        // use -- 4 NULs should be enough for everyone (this is why we add 4
        // extra bytes to the buffer)
        buf[nRead] =
        buf[nRead + 1] =
        buf[nRead + 2] =
        buf[nRead + 3] = '\0';
        // append to the remains of the last block, don't overwrite
        wxString strbuf(buf, conv);
        if ( strbuf.empty() )
        {
-            // conversion failed
+            // append 4 trailing NUL bytes: this is needed to ensure that the
-            return false;
+            // string is going to be NUL-terminated, whatever is the encoding
            // used (even UTF-32)
            block[0] =
            block[1] =
            block[2] =
            block[3] = '\0';
            nRead = 4;
        }
-        str += strbuf;
+        // this shouldn't happen but don't overwrite the buffer if it does
-#else // ANSI
+        wxCHECK_MSG( bufPos + nRead <= bufSize, false,
-        wxUnusedVar(conv);
+                     _T("read more than file length?") );
        buf[nRead] = '\0';
        str += buf;
 #endif // wxUSE_UNICODE/!wxUSE_UNICODE
        // append to the buffer
        memcpy(buf.data() + bufPos, block, nRead);
        bufPos += nRead;
    }
    const wxString str(buf, conv);
 #if wxUSE_UNICODE
    if ( str.empty() )
    {
        wxLogError(_("Failed to convert file contents to Unicode."));
        return false;
    }
 #endif // wxUSE_UNICODE
    free(buf.release()); // we don't need this memory any more
    // now break the buffer in lines
    // last processed character, we need to know if it was a CR or not
    wxChar chLast = '\0';
    // the beginning of the current line, changes inside the loop
    wxString::const_iterator lineStart = str.begin();
    const wxString::const_iterator end = str.end();
-        for ( wxString::const_iterator p = lineStart + lenOld; p != end; p++ )
+    for ( wxString::const_iterator p = lineStart; p != end; p++ )
    {
        const wxChar ch = *p;
        switch ( ch )
@@ -174,7 +182,7 @@ bool wxTextFile::OnRead(wxMBConv& conv)
                    AddLine(wxEmptyString, wxTextFileType_Mac);
                    lineStart = p + 1;
                }
-                    //else: we don't what this is yet -- could be a Mac EOL or
+                //else: we don't know what this is yet -- could be a Mac EOL or
                //      start of DOS EOL so wait for next char
                break;
@@ -190,14 +198,11 @@ bool wxTextFile::OnRead(wxMBConv& conv)
        chLast = ch;
    }
        // remove the part we already processed
        str.erase(0, lineStart - str.begin());
    }
    // anything in the last line?
-    if ( !str.empty() )
+    if ( lineStart != end )
    {
-        AddLine(str, wxTextFileType_None); // no line terminator
+        // add unterminated last line
        AddLine(wxString(lineStart, end), wxTextFileType_None);
    }
    return true;