Rewrite wxTextFile's newlines parsing to fix multiple bugs.
Remove the complicated parsing state machine that contained bugs with unexpected inputs and was very hard to modify correctly. Replace it with much simpler code that looks ahead, instead of deducing line endings from past characters. The new code never looses lines with data and calls AddLine() on the first newline character it encounters, peeking ahead to determine the line ending type. git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@75799 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
@@ -208,107 +208,46 @@ bool wxTextFile::OnRead(const wxMBConv& conv)
|
|||||||
|
|
||||||
// now break the buffer in lines
|
// now break the buffer in lines
|
||||||
|
|
||||||
// was the last processed character a CR?
|
|
||||||
bool lastWasCR = false;
|
|
||||||
|
|
||||||
// the beginning of the current line, changes inside the loop
|
// the beginning of the current line, changes inside the loop
|
||||||
wxString::const_iterator lineStart = str.begin();
|
wxString::const_iterator lineStart = str.begin();
|
||||||
const wxString::const_iterator end = str.end();
|
const wxString::const_iterator end = str.end();
|
||||||
for ( wxString::const_iterator p = lineStart; p != end; p++ )
|
for ( wxString::const_iterator p = lineStart; p != end; p++ )
|
||||||
{
|
{
|
||||||
const wxChar ch = *p;
|
const wxChar ch = *p;
|
||||||
switch ( ch )
|
if ( ch == '\r' || ch == '\n' )
|
||||||
{
|
{
|
||||||
case '\n':
|
// Determine the kind of line ending this is.
|
||||||
// could be a DOS or Unix EOL
|
wxTextFileType lineType = wxTextFileType_None;
|
||||||
if ( lastWasCR )
|
if ( ch == '\r' )
|
||||||
{
|
{
|
||||||
if ( p - 1 >= lineStart )
|
wxString::const_iterator next = p + 1;
|
||||||
{
|
if ( next != end && *next == '\n' )
|
||||||
AddLine(wxString(lineStart, p - 1), wxTextFileType_Dos);
|
lineType = wxTextFileType_Dos;
|
||||||
}
|
else
|
||||||
else
|
lineType = wxTextFileType_Mac;
|
||||||
{
|
}
|
||||||
// there were two line endings, so add an empty line:
|
else // ch == '\n'
|
||||||
AddLine(wxEmptyString, wxTextFileType_Dos);
|
{
|
||||||
}
|
lineType = wxTextFileType_Unix;
|
||||||
}
|
}
|
||||||
else // bare '\n', Unix style
|
|
||||||
{
|
|
||||||
AddLine(wxString(lineStart, p), wxTextFileType_Unix);
|
|
||||||
}
|
|
||||||
|
|
||||||
lineStart = p + 1;
|
AddLine(wxString(lineStart, p), lineType);
|
||||||
lastWasCR = false;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case '\r':
|
// DOS EOL is the only one consisting of two chars, not one.
|
||||||
if ( lastWasCR )
|
if ( lineType == wxTextFileType_Dos )
|
||||||
{
|
p++;
|
||||||
wxString::const_iterator next = p + 1;
|
|
||||||
// Peek at the next character to detect weirdly formatted
|
|
||||||
// files ending in CRCRLF. Without this, we would silently
|
|
||||||
// loose all the lines; this way, we insert empty lines
|
|
||||||
// (as some editors do), but don't loose any data.
|
|
||||||
// See here for more information:
|
|
||||||
// http://stackoverflow.com/questions/6998506/text-file-with-0d-0d-0a-line-breaks
|
|
||||||
if ( next != end && *next == '\n' )
|
|
||||||
{
|
|
||||||
AddLine(wxString(lineStart, p - 1), wxTextFileType_Mac);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Mac empty line
|
|
||||||
AddLine(wxEmptyString, wxTextFileType_Mac);
|
|
||||||
}
|
|
||||||
lineStart = next;
|
|
||||||
}
|
|
||||||
//else: we don't know what this is yet -- could be a Mac EOL or
|
|
||||||
// start of DOS EOL so wait for next char
|
|
||||||
|
|
||||||
lastWasCR = true;
|
lineStart = p + 1;
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
if ( lastWasCR )
|
|
||||||
{
|
|
||||||
// Mac line termination
|
|
||||||
if ( p - 1 >= lineStart )
|
|
||||||
{
|
|
||||||
AddLine(wxString(lineStart, p - 1), wxTextFileType_Mac);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// there were two line endings, so add an empty line:
|
|
||||||
AddLine(wxEmptyString, wxTextFileType_Mac);
|
|
||||||
}
|
|
||||||
lineStart = p;
|
|
||||||
}
|
|
||||||
lastWasCR = false;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// anything in the last line?
|
// anything in the last line?
|
||||||
if ( lineStart != end )
|
if ( lineStart != end )
|
||||||
{
|
{
|
||||||
// add the last line, notice that it may have been terminated with CR
|
// Add the last line; notice that it is certainly not terminated with a
|
||||||
// as we don't end the line immediately when we see a CR, as it could
|
// newline, otherwise it would be handled above.
|
||||||
// be followed by a LF.
|
|
||||||
wxString lastLine(lineStart, end);
|
wxString lastLine(lineStart, end);
|
||||||
wxTextFileType lastType;
|
AddLine(lastLine, wxTextFileType_None);
|
||||||
if ( lastWasCR )
|
|
||||||
{
|
|
||||||
// last line had Mac EOL, exclude it from the string
|
|
||||||
lastLine.RemoveLast();
|
|
||||||
lastType = wxTextFileType_Mac;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// last line wasn't terminated at all
|
|
||||||
lastType = wxTextFileType_None;
|
|
||||||
}
|
|
||||||
|
|
||||||
AddLine(lastLine, lastType);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
Reference in New Issue
Block a user