From a30bee473e0a59ad650551d800e7c96f6419ea01 Mon Sep 17 00:00:00 2001 From: Vadim Zeitlin Date: Fri, 15 Dec 2017 18:34:43 +0100 Subject: [PATCH 1/3] Make wxFile::ReadAll() work for unseekable files too Calling this function with an unseekable file, such as any file under /sys on Linux systems, would previously just hang as the loop condition was never satisfied when length was -1. Fix this by checking for this case and using a different approach by extending the buffer we read the data into as we go instead of preallocating it all at once. See #9965. --- docs/changes.txt | 1 + interface/wx/file.h | 2 ++ src/common/file.cpp | 71 ++++++++++++++++++++++++++++++++++------- tests/file/filetest.cpp | 24 ++++++++++++++ 4 files changed, 86 insertions(+), 12 deletions(-) diff --git a/docs/changes.txt b/docs/changes.txt index 2b982c304a..5ac1eb417a 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -106,6 +106,7 @@ All: - Add UTF-8 support to wxZipOutputStream (Tobias Taschner). - Update all bundled 3rd party libraries to their latest versions. - Use unique prefix for all zlib symbols to avoid link conflicts. +- Make wxFile::ReadAll() work for unseekable files too. All (GUI): diff --git a/interface/wx/file.h b/interface/wx/file.h index 31ea6bdd4f..78fb12aa3c 100644 --- a/interface/wx/file.h +++ b/interface/wx/file.h @@ -377,6 +377,8 @@ public: /** Reads the entire contents of the file into a string. + Since wxWidgets 3.1.1 this method also works for unseekable files. + @param str Non-@NULL pointer to a string to read data into. @param conv diff --git a/src/common/file.cpp b/src/common/file.cpp index 0e976273dc..84cf9f78fc 100644 --- a/src/common/file.cpp +++ b/src/common/file.cpp @@ -261,24 +261,71 @@ bool wxFile::ReadAll(wxString *str, const wxMBConv& conv) { wxCHECK_MSG( str, false, wxS("Output string must be non-NULL") ); + static const ssize_t READSIZE = 4096; + + wxCharBuffer buf; + ssize_t length = Length(); - wxCHECK_MSG( (wxFileOffset)length == Length(), false, wxT("huge file not supported") ); - - wxCharBuffer buf(length); - char* p = buf.data(); - for ( ;; ) + if ( length != -1 ) { - static const ssize_t READSIZE = 4096; + wxCHECK_MSG( (wxFileOffset)length == Length(), false, wxT("huge file not supported") ); - ssize_t nread = Read(p, length > READSIZE ? READSIZE : length); - if ( nread == wxInvalidOffset ) + if ( !buf.extend(length) ) return false; - p += nread; - if ( length <= nread ) - break; + char* p = buf.data(); + for ( ;; ) + { + ssize_t nread = Read(p, length > READSIZE ? READSIZE : length); + if ( nread == wxInvalidOffset ) + return false; - length -= nread; + if ( nread == 0 ) + { + // We have reached EOF before reading the entire length of the + // file. This can happen for some special files (e.g. those + // under /sys on Linux systems) or even for regular files if + // another process has truncated the file since we started + // reading it, so deal with it gracefully. + buf.shrink(p - buf.data()); + break; + } + + p += nread; + length -= nread; + + if ( !length ) + { + // Notice that we don't keep reading after getting the expected + // number of bytes, even though in principle a situation + // similar to the one described above, with another process + // extending the file since we started to read it, is possible. + // But returning just the data that was in the file when we + // originally started reading it isn't really wrong in this + // case, so keep things simple and just do it like this. + break; + } + } + } + else // File is not seekable + { + for ( ;; ) + { + const size_t len = buf.length(); + if ( !buf.extend(len + READSIZE) ) + return false; + + ssize_t nread = Read(buf.data() + len, READSIZE); + if ( nread == wxInvalidOffset ) + return false; + + if ( nread < READSIZE ) + { + // We have reached EOF. + buf.shrink(len + nread); + break; + } + } } str->assign(buf, conv); diff --git a/tests/file/filetest.cpp b/tests/file/filetest.cpp index fdad2d8e4a..faeed69dbf 100644 --- a/tests/file/filetest.cpp +++ b/tests/file/filetest.cpp @@ -139,4 +139,28 @@ void FileTestCase::TempFile() CPPUNIT_ASSERT( wxRemoveFile(wxT("test2")) ); } +#ifdef __LINUX__ + +// Check that GetSize() works correctly for special files. +TEST_CASE("wxFile::Special", "[file][linux][special-file]") +{ + // This file is not seekable and has 0 size, but can still be read. + wxFile fileProc("/proc/diskstats"); + CHECK( fileProc.IsOpened() ); + + wxString s; + CHECK( fileProc.ReadAll(&s) ); + CHECK( !s.empty() ); + + // All files in /sys seem to have size of 4KiB currently, even if they + // don't have that much data in them. + wxFile fileSys("/sys/power/state"); + CHECK( fileSys.IsOpened() ); + CHECK( fileSys.ReadAll(&s) ); + CHECK( !s.empty() ); + CHECK( s.length() < 4096 ); +} + +#endif // __LINUX__ + #endif // wxUSE_FILE From cc86de14168d143543a1eb530399ffe774dbde64 Mon Sep 17 00:00:00 2001 From: Vadim Zeitlin Date: Fri, 15 Dec 2017 18:37:35 +0100 Subject: [PATCH 2/3] Replace file reading code in wxTextFile with wxFile::ReadAll() There is nothing special about wxTextFile justifying having code for dealing with non-seekable files in it, so put this code into wxFile itself and just call its ReadAll() method from here. This is a better fix than 41f6f17d01562aa09bdbcc6b02241b62f1d06b75 originally applied (and which is going to be reverted next) as it doesn't break wxFile::Length() for these files and also avoids triggering an assert if the file we're trying to read was truncated by another process in the meanwhile -- which can happen and doesn't indicate a programming error and so shouldn't result in an assert. Also add a unit test checking that this really works. See #3802, #8354, #9965. --- src/common/textfile.cpp | 115 +------------------------------- tests/textfile/textfiletest.cpp | 26 +++++++- 2 files changed, 28 insertions(+), 113 deletions(-) diff --git a/src/common/textfile.cpp b/src/common/textfile.cpp index 279013c28c..7dc3421d4a 100644 --- a/src/common/textfile.cpp +++ b/src/common/textfile.cpp @@ -93,121 +93,12 @@ bool wxTextFile::OnRead(const wxMBConv& conv) // file should be opened wxASSERT_MSG( m_file.IsOpened(), wxT("can't read closed file") ); - // read the entire file in memory: this is not the most efficient thing to - // do it but there is no good way to avoid it in Unicode build because if - // we read the file block by block we can't convert each block to Unicode - // separately (the last multibyte char in the block might be only partially - // read and so the conversion would fail) and, as the file contents is kept - // in memory by wxTextFile anyhow, it shouldn't be a big problem to read - // the file entirely - size_t bufSize = 0; - - // number of bytes to (try to) read from disk at once - static const size_t BLOCK_SIZE = 4096; - - wxCharBuffer buf; - - // first determine if the file is seekable or not and so whether we can - // determine its length in advance - wxFileOffset fileLength; + wxString str; + if ( !m_file.ReadAll(&str, conv) ) { - wxLogNull logNull; - fileLength = m_file.Length(); - } - - // some non-seekable files under /proc under Linux pretend that they're - // seekable but always return 0; others do return an error - const bool seekable = fileLength != wxInvalidOffset && fileLength != 0; - if ( seekable ) - { - // we know the required length, so set the buffer size in advance - bufSize = fileLength; - if ( !buf.extend(bufSize) ) - return false; - - // if the file is seekable, also check that we're at its beginning - wxASSERT_MSG( m_file.Tell() == 0, wxT("should be at start of file") ); - - char *dst = buf.data(); - for ( size_t nRemaining = bufSize; nRemaining > 0; ) - { - size_t nToRead = BLOCK_SIZE; - - // the file size could have changed, avoid overflowing the buffer - // even if it did - if ( nToRead > nRemaining ) - nToRead = nRemaining; - - ssize_t nRead = m_file.Read(dst, nToRead); - - if ( nRead == wxInvalidOffset ) - { - // read error (error message already given in wxFile::Read) - return false; - } - - if ( nRead == 0 ) - { - // this file can't be empty because we checked for this above - // so this must be the end of file - break; - } - - dst += nRead; - nRemaining -= nRead; - } - - wxASSERT_MSG( dst - buf.data() == (wxFileOffset)bufSize, - wxT("logic error") ); - } - else // file is not seekable - { - char block[BLOCK_SIZE]; - for ( ;; ) - { - ssize_t nRead = m_file.Read(block, WXSIZEOF(block)); - - if ( nRead == wxInvalidOffset ) - { - // read error (error message already given in wxFile::Read) - return false; - } - - if ( nRead == 0 ) - { - // if no bytes have been read, presumably this is a - // valid-but-empty file - if ( bufSize == 0 ) - return true; - - // otherwise we've finished reading the file - break; - } - - // extend the buffer for new data - if ( !buf.extend(bufSize + nRead) ) - return false; - - // and append it to the buffer - memcpy(buf.data() + bufSize, block, nRead); - bufSize += nRead; - } - } - - const wxString str(buf, conv, bufSize); - - // there's no risk of this happening in ANSI build -#if wxUSE_UNICODE - if ( bufSize > 4 && str.empty() ) - { - wxLogError(_("Failed to convert file \"%s\" to Unicode."), GetName()); + wxLogError(_("Failed to read text file \"%s\"."), GetName()); return false; } -#endif // wxUSE_UNICODE - - // we don't need this memory any more - buf.reset(); - // now break the buffer in lines diff --git a/tests/textfile/textfiletest.cpp b/tests/textfile/textfiletest.cpp index 828756db3e..910cbe3cc7 100644 --- a/tests/textfile/textfiletest.cpp +++ b/tests/textfile/textfiletest.cpp @@ -338,5 +338,29 @@ void TextFileTestCase::ReadBig() f[NUM_LINES - 1] ); } -#endif // wxUSE_TEXTFILE +#ifdef __LINUX__ +// Check if using wxTextFile with special files, whose reported size doesn't +// correspond to the real amount of data in them, works. +TEST_CASE("wxTextFile::Special", "[textfile][linux][special-file]") +{ + SECTION("/proc") + { + wxTextFile f; + CHECK( f.Open("/proc/diskstats") ); + CHECK( f.GetLineCount() > 1 ); + } + + SECTION("/sys") + { + wxTextFile f; + CHECK( f.Open("/sys/power/state") ); + REQUIRE( f.GetLineCount() == 1 ); + INFO( "/sys/power/state contains \"" << f[0] << "\"" ); + CHECK( f[0].find("mem") != wxString::npos ); + } +} + +#endif // __LINUX__ + +#endif // wxUSE_TEXTFILE From a03ece4880d68743f0a40710f00b82da223c870f Mon Sep 17 00:00:00 2001 From: Vadim Zeitlin Date: Fri, 15 Dec 2017 17:19:09 +0100 Subject: [PATCH 3/3] Remove st_blocks hack from wxFile::Length() under Linux This reverts commit 41f6f17d01562aa09bdbcc6b02241b62f1d06b75 ("return 0 (meaning the file is not seekable, as the docs now explain) instead of 4KB for the files in sysfs under Linux") as it seems to be wrong to return a value different from what "ls -l" or "stat" return here and the original problem was solved in a better way in the previous commit. See #9965. Closes #17818. --- interface/wx/file.h | 11 +++++++---- src/common/file.cpp | 15 --------------- tests/file/filetest.cpp | 5 +++++ tests/filename/filenametest.cpp | 15 +++++++++++++++ 4 files changed, 27 insertions(+), 19 deletions(-) diff --git a/interface/wx/file.h b/interface/wx/file.h index 78fb12aa3c..4e354185a0 100644 --- a/interface/wx/file.h +++ b/interface/wx/file.h @@ -95,11 +95,14 @@ public: /** Returns the length of the file. - This method may return ::wxInvalidOffset if the length couldn't be - determined or 0 even for non-empty files if the file is not seekable. + Returns ::wxInvalidOffset if the length couldn't be determined. - In general, the only way to determine if the file for which this function - returns 0 is really empty or not is to try reading from it. + Please also note that there is @e no guarantee that reading that many + bytes from the file will always succeed. While this is true for regular + files (unless the file size has been changed by another process in + between Length() and Read() calls), some special files, such as most + files under @c /sys or @c /proc directories under Linux, don't actually + contain as much data as their size indicates. */ wxFileOffset Length() const; diff --git a/src/common/file.cpp b/src/common/file.cpp index 84cf9f78fc..9446bb32d9 100644 --- a/src/common/file.cpp +++ b/src/common/file.cpp @@ -474,21 +474,6 @@ wxFileOffset wxFile::Length() const { wxASSERT( IsOpened() ); - // we use a special method for Linux systems where files in sysfs (i.e. - // those under /sys typically) return length of 4096 bytes even when - // they're much smaller -- this is a problem as it results in errors later - // when we try reading 4KB from them -#ifdef __LINUX__ - struct stat st; - if ( fstat(m_fd, &st) == 0 ) - { - // returning 0 for the special files indicates to the caller that they - // are not seekable - return st.st_blocks ? st.st_size : 0; - } - //else: failed to stat, try the normal method -#endif // __LINUX__ - wxFileOffset iRc = Tell(); if ( iRc != wxInvalidOffset ) { wxFileOffset iLen = const_cast(this)->SeekEnd(); diff --git a/tests/file/filetest.cpp b/tests/file/filetest.cpp index faeed69dbf..f1876daf94 100644 --- a/tests/file/filetest.cpp +++ b/tests/file/filetest.cpp @@ -144,6 +144,10 @@ void FileTestCase::TempFile() // Check that GetSize() works correctly for special files. TEST_CASE("wxFile::Special", "[file][linux][special-file]") { + // We can't test /proc/kcore here, unlike in the similar + // wxFileName::GetSize() test, as wxFile must be able to open it (at least + // for reading) and usually we don't have the permissions to do it. + // This file is not seekable and has 0 size, but can still be read. wxFile fileProc("/proc/diskstats"); CHECK( fileProc.IsOpened() ); @@ -155,6 +159,7 @@ TEST_CASE("wxFile::Special", "[file][linux][special-file]") // All files in /sys seem to have size of 4KiB currently, even if they // don't have that much data in them. wxFile fileSys("/sys/power/state"); + CHECK( fileSys.Length() == 4096 ); CHECK( fileSys.IsOpened() ); CHECK( fileSys.ReadAll(&s) ); CHECK( !s.empty() ); diff --git a/tests/filename/filenametest.cpp b/tests/filename/filenametest.cpp index 739194323a..762061813c 100644 --- a/tests/filename/filenametest.cpp +++ b/tests/filename/filenametest.cpp @@ -1049,3 +1049,18 @@ void FileNameTestCase::TestShortcuts() } #endif // __WINDOWS__ + +#ifdef __LINUX__ + +// Check that GetSize() works correctly for special files. +TEST_CASE("wxFileName::GetSizeSpecial", "[filename][linux][special-file]") +{ + wxULongLong size = wxFileName::GetSize("/proc/kcore"); + INFO( "size of /proc/kcore=" << size ); + CHECK( size > 0 ); + + // All files in /sys seem to have size of 4KiB currently. + CHECK( wxFileName::GetSize("/sys/power/state") == 4096 ); +} + +#endif // __LINUX__