rewrote Read() to slurp all the file at once to avoid problems in Unicode build

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@38464 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
2006-03-31 13:53:20 +00:00
parent 468c5a97ec
commit dbcf443c7d
1 changed files with 90 additions and 85 deletions
--- a/src/common/textfile.cpp
+++ b/src/common/textfile.cpp
@@ -35,6 +35,7 @@
 #include "wx/textfile.h"
 #include "wx/filename.h"
 #include "wx/buffer.h"
 // ============================================================================
 // wxTextFile class implementation
@@ -88,24 +89,24 @@ bool wxTextFile::OnClose()
 bool wxTextFile::OnRead(wxMBConv& conv)
 {
    // file should be opened and we must be in it's beginning
-    wxASSERT( m_file.IsOpened() &&
+    wxASSERT( m_file.IsOpened() && m_file.Tell() == 0 );
                (m_file.GetKind() != wxFILE_KIND_DISK || m_file.Tell() == 0) );
-    static const size_t BUF_SIZE = 1024;
+    // read the entire file in memory: this is not the most efficient thing to
-#if wxUSE_UNICODE
+    // do but there is no good way to avoid it in Unicode build because if we
-    static const size_t NUL_SIZE = 4;
+    // read the file block by block we can't convert each block to Unicode
-#else
+    // separately (the last multibyte char in the block might be only partially
-    static const size_t NUL_SIZE = 1;
+    // read and so the conversion would fail) and, as the file contents is kept
-#endif
+    // in memory by wxTextFile anyhow, it shouldn't be a big problem to read
    // the file entirely
    const size_t bufSize = m_file.Length() + 4 /* for trailing NULs */;
    size_t bufPos = 0;
    wxCharBuffer buf(bufSize - 1 /* it adds 1 internally */);
-    char buf[BUF_SIZE + NUL_SIZE];
+    char block[1024];
-    wxChar chLast = '\0';
+    for ( bool eof = false; !eof; )
    wxString str;
    for ( ;; )
    {
-        // leave space for trailing NUL
+        // try to read up to the size of the entire block
-        ssize_t nRead = m_file.Read(buf, BUF_SIZE);
+        ssize_t nRead = m_file.Read(block, WXSIZEOF(block));
        if ( nRead == wxInvalidOffset )
        {
@@ -113,91 +114,95 @@ bool wxTextFile::OnRead(wxMBConv& conv)
            return false;
        }
-        if ( nRead == 0 )
+        eof = nRead == 0;
-            break;
+        if ( eof )
        {
            // append 4 trailing NUL bytes: this is needed to ensure that the
            // string is going to be NUL-terminated, whatever is the encoding
            // used (even UTF-32)
            block[0] =
            block[1] =
            block[2] =
            block[3] = '\0';
            nRead = 4;
        }
-        // save the number characters which we already processed during the
+        // this shouldn't happen but don't overwrite the buffer if it does
-        // last loop iteration
+        wxCHECK_MSG( bufPos + nRead <= bufSize, false,
-        const size_t lenOld = str.length();
+                     _T("read more than file length?") );
        // append to the buffer
        memcpy(buf.data() + bufPos, block, nRead);
        bufPos += nRead;
    }
    const wxString str(buf, conv);
 #if wxUSE_UNICODE
-        // we have to properly NUL-terminate the string for any encoding it may
+    if ( str.empty() )
-        // use -- 4 NULs should be enough for everyone (this is why we add 4
+    {
-        // extra bytes to the buffer)
+        wxLogError(_("Failed to convert file contents to Unicode."));
-        buf[nRead] =
+        return false;
-        buf[nRead + 1] =
+    }
-        buf[nRead + 2] =
+#endif // wxUSE_UNICODE
        buf[nRead + 3] = '\0';
-        // append to the remains of the last block, don't overwrite
+    free(buf.release()); // we don't need this memory any more
-        wxString strbuf(buf, conv);
+
-        if ( strbuf.empty() )
+
    // now break the buffer in lines
    // last processed character, we need to know if it was a CR or not
    wxChar chLast = '\0';
    // the beginning of the current line, changes inside the loop
    wxString::const_iterator lineStart = str.begin();
    const wxString::const_iterator end = str.end();
    for ( wxString::const_iterator p = lineStart; p != end; p++ )
    {
        const wxChar ch = *p;
        switch ( ch )
        {
-            // conversion failed
+            case '\n':
-            return false;
+                // could be a DOS or Unix EOL
-        }
+                if ( chLast == '\r' )
                {
                    AddLine(wxString(lineStart, p - 1), wxTextFileType_Dos);
                }
                else // bare '\n', Unix style
                {
                    AddLine(wxString(lineStart, p), wxTextFileType_Unix);
                }
-        str += strbuf;
+                lineStart = p + 1;
-#else // ANSI
+                break;
        wxUnusedVar(conv);
        buf[nRead] = '\0';
        str += buf;
 #endif // wxUSE_UNICODE/!wxUSE_UNICODE
        // the beginning of the current line, changes inside the loop
        wxString::const_iterator lineStart = str.begin();
        const wxString::const_iterator end = str.end();
        for ( wxString::const_iterator p = lineStart + lenOld; p != end; p++ )
        {
            const wxChar ch = *p;
            switch ( ch )
            {
                case '\n':
                    // could be a DOS or Unix EOL
                    if ( chLast == '\r' )
                    {
                        AddLine(wxString(lineStart, p - 1), wxTextFileType_Dos);
                    }
                    else // bare '\n', Unix style
                    {
                        AddLine(wxString(lineStart, p), wxTextFileType_Unix);
                    }
            case '\r':
                if ( chLast == '\r' )
                {
                    // Mac empty line
                    AddLine(wxEmptyString, wxTextFileType_Mac);
                    lineStart = p + 1;
-                    break;
+                }
                //else: we don't know what this is yet -- could be a Mac EOL or
                //      start of DOS EOL so wait for next char
                break;
-                case '\r':
+            default:
-                    if ( chLast == '\r' )
+                if ( chLast == '\r' )
-                    {
+                {
-                        // Mac empty line
+                    // Mac line termination
-                        AddLine(wxEmptyString, wxTextFileType_Mac);
+                    AddLine(wxString(lineStart, p - 1), wxTextFileType_Mac);
-                        lineStart = p + 1;
+                    lineStart = p;
-                    }
+                }
                    //else: we don't what this is yet -- could be a Mac EOL or
                    //      start of DOS EOL so wait for next char
                    break;
                default:
                    if ( chLast == '\r' )
                    {
                        // Mac line termination
                        AddLine(wxString(lineStart, p - 1), wxTextFileType_Mac);
                        lineStart = p;
                    }
            }
            chLast = ch;
        }
-        // remove the part we already processed
+        chLast = ch;
        str.erase(0, lineStart - str.begin());
    }
    // anything in the last line?
-    if ( !str.empty() )
+    if ( lineStart != end )
    {
-        AddLine(str, wxTextFileType_None); // no line terminator
+        // add unterminated last line
        AddLine(wxString(lineStart, end), wxTextFileType_None);
    }
    return true;