From e8c540011895d55449afa1b67d073ae8577815a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=A1clav=20Slav=C3=ADk?= Date: Mon, 16 Dec 2013 18:54:42 +0000 Subject: [PATCH] wxTextFile: don't loose data with CRCRLF line endings. Previously, when reading files with completely nonstandard - but occurring in the wild thanks to broken Notepad - files with CRCRLF, all content would be replaced with empty lines. Fix the code to do what many editors do with such files: treat this as data line followed by an empty one. This is not ideal, but it is better than discarding data - and arguably, silently cleaning up the endings wouldn't be great either (and would add extra complications for what is an obscure and broken case). See http://stackoverflow.com/questions/6998506/text-file-with-0d-0d-0a-line-breaks git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@75387 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775 --- src/common/textfile.cpp | 20 +++++++++++++++++--- tests/textfile/textfiletest.cpp | 21 +++++++++++++++++++++ 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/src/common/textfile.cpp b/src/common/textfile.cpp index ddf88c9e46..86b11bfd2f 100644 --- a/src/common/textfile.cpp +++ b/src/common/textfile.cpp @@ -245,9 +245,23 @@ bool wxTextFile::OnRead(const wxMBConv& conv) case '\r': if ( lastWasCR ) { - // Mac empty line - AddLine(wxEmptyString, wxTextFileType_Mac); - lineStart = p + 1; + wxString::const_iterator next = p + 1; + // Peek at the next character to detect weirdly formatted + // files ending in CRCRLF. Without this, we would silently + // loose all the lines; this way, we insert empty lines + // (as some editors do), but don't loose any data. + // See here for more information: + // http://stackoverflow.com/questions/6998506/text-file-with-0d-0d-0a-line-breaks + if ( next != end && *next == '\n' ) + { + AddLine(wxString(lineStart, p - 1), wxTextFileType_Mac); + } + else + { + // Mac empty line + AddLine(wxEmptyString, wxTextFileType_Mac); + } + lineStart = next; } //else: we don't know what this is yet -- could be a Mac EOL or // start of DOS EOL so wait for next char diff --git a/tests/textfile/textfiletest.cpp b/tests/textfile/textfiletest.cpp index d523e6371b..35d9257b4c 100644 --- a/tests/textfile/textfiletest.cpp +++ b/tests/textfile/textfiletest.cpp @@ -49,6 +49,7 @@ private: CPPUNIT_TEST( ReadMac ); CPPUNIT_TEST( ReadMacLast ); CPPUNIT_TEST( ReadMixed ); + CPPUNIT_TEST( ReadCRCRLF ); #if wxUSE_UNICODE CPPUNIT_TEST( ReadUTF8 ); CPPUNIT_TEST( ReadUTF16 ); @@ -64,6 +65,7 @@ private: void ReadMac(); void ReadMacLast(); void ReadMixed(); + void ReadCRCRLF(); #if wxUSE_UNICODE void ReadUTF8(); void ReadUTF16(); @@ -206,6 +208,25 @@ void TextFileTestCase::ReadMixed() CPPUNIT_ASSERT_EQUAL( wxString(wxT("baz")), f.GetLastLine() ); } +void TextFileTestCase::ReadCRCRLF() +{ + // Notepad may create files with CRCRLF line endings (see + // http://stackoverflow.com/questions/6998506/text-file-with-0d-0d-0a-line-breaks). + // Older versions of wx would loose all data when reading such files. + // Test that the data are read, but don't worry about empty lines in between or + // line endings. + CreateTestFile("foo\r\r\nbar\r\r\nbaz\r\r\n"); + + wxTextFile f; + CPPUNIT_ASSERT( f.Open(wxString::FromAscii(GetTestFileName())) ); + + wxString all; + for ( wxString str = f.GetFirstLine(); !f.Eof(); str = f.GetNextLine() ) + all += str; + + CPPUNIT_ASSERT_EQUAL( "foobarbaz", all ); +} + #if wxUSE_UNICODE void TextFileTestCase::ReadUTF8()