Make wxRegEx::Replace run in linear time

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@36159 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Michael Wetherell
2005-11-11 17:22:13 +00:00
parent 0519aac9db
commit f302b6a445

View File

@@ -45,10 +45,22 @@
#include <regex.h> #include <regex.h>
#include "wx/regex.h" #include "wx/regex.h"
// defined when the regex lib uses 'char' but 'wxChar' is wide
#if wxUSE_UNICODE && !defined(__REG_NOFRONT)
# define WX_NEED_CONVERT
#endif
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// private classes // private classes
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// the character type used by the regular expression engine
#ifndef WX_NEED_CONVERT
typedef wxChar wxRegChar;
#else
typedef char wxRegChar;
#endif
// the real implementation of wxRegEx // the real implementation of wxRegEx
class wxRegExImpl class wxRegExImpl
{ {
@@ -62,7 +74,7 @@ public:
// RE operations // RE operations
bool Compile(const wxString& expr, int flags = 0); bool Compile(const wxString& expr, int flags = 0);
bool Matches(const wxChar *str, int flags = 0) const; bool Matches(const wxRegChar *str, int flags, size_t len) const;
bool GetMatch(size_t *start, size_t *len, size_t index = 0) const; bool GetMatch(size_t *start, size_t *len, size_t index = 0) const;
size_t GetMatchCount() const; size_t GetMatchCount() const;
int Replace(wxString *pattern, const wxString& replacement, int Replace(wxString *pattern, const wxString& replacement,
@@ -130,7 +142,7 @@ wxRegExImpl::~wxRegExImpl()
wxString wxRegExImpl::GetErrorMsg(int errorcode, bool badconv) const wxString wxRegExImpl::GetErrorMsg(int errorcode, bool badconv) const
{ {
#if wxUSE_UNICODE && !defined(__REG_NOFRONT) #ifdef WX_NEED_CONVERT
// currently only needed when using system library in Unicode mode // currently only needed when using system library in Unicode mode
if ( badconv ) if ( badconv )
{ {
@@ -255,7 +267,7 @@ bool wxRegExImpl::Compile(const wxString& expr, int flags)
return IsValid(); return IsValid();
} }
bool wxRegExImpl::Matches(const wxChar *str, int flags) const bool wxRegExImpl::Matches(const wxRegChar *str, int flags, size_t len) const
{ {
wxCHECK_MSG( IsValid(), false, _T("must successfully Compile() first") ); wxCHECK_MSG( IsValid(), false, _T("must successfully Compile() first") );
@@ -278,11 +290,9 @@ bool wxRegExImpl::Matches(const wxChar *str, int flags) const
// do match it // do match it
#ifdef __REG_NOFRONT #ifdef __REG_NOFRONT
bool conv = true; int rc = wx_re_exec(&self->m_RegEx, str, len, NULL, m_nMatches, m_Matches, flagsRE);
int rc = wx_re_exec(&self->m_RegEx, str, wxStrlen(str), NULL, m_nMatches, m_Matches, flagsRE);
#else #else
const wxWX2MBbuf conv = wxConvertWX2MB(str); int rc = str ? regexec(&self->m_RegEx, str, m_nMatches, m_Matches, flagsRE) : REG_BADPAT;
int rc = conv ? regexec(&self->m_RegEx, conv, m_nMatches, m_Matches, flagsRE) : REG_BADPAT;
#endif #endif
switch ( rc ) switch ( rc )
@@ -293,8 +303,8 @@ bool wxRegExImpl::Matches(const wxChar *str, int flags) const
default: default:
// an error occurred // an error occurred
wxLogError(_("Failed to match '%s' in regular expression: %s"), wxLogError(_("Failed to find match for regular expression: %s"),
str, GetErrorMsg(rc, !conv).c_str()); GetErrorMsg(rc, !str).c_str());
// fall through // fall through
case REG_NOMATCH: case REG_NOMATCH:
@@ -336,9 +346,29 @@ int wxRegExImpl::Replace(wxString *text,
wxCHECK_MSG( text, wxNOT_FOUND, _T("NULL text in wxRegEx::Replace") ); wxCHECK_MSG( text, wxNOT_FOUND, _T("NULL text in wxRegEx::Replace") );
wxCHECK_MSG( IsValid(), wxNOT_FOUND, _T("must successfully Compile() first") ); wxCHECK_MSG( IsValid(), wxNOT_FOUND, _T("must successfully Compile() first") );
// the input string
#ifndef WX_NEED_CONVERT
const wxChar *textstr = text->c_str();
size_t textlen = text->length();
#else
const wxWX2MBbuf textstr = wxConvertWX2MB(*text);
if (!textstr)
{
wxLogError(_("Failed to find match for regular expression: %s"),
GetErrorMsg(0, true).c_str());
return 0;
}
size_t textlen = strlen(textstr);
text->clear();
#endif
// the replacement text // the replacement text
wxString textNew; wxString textNew;
// the result, allow 25% extra
wxString result;
result.reserve(5 * textlen / 4);
// attempt at optimization: don't iterate over the string if it doesn't // attempt at optimization: don't iterate over the string if it doesn't
// contain back references at all // contain back references at all
bool mayHaveBackrefs = bool mayHaveBackrefs =
@@ -350,10 +380,6 @@ int wxRegExImpl::Replace(wxString *text,
} }
// the position where we start looking for the match // the position where we start looking for the match
//
// NB: initial version had a nasty bug because it used a wxChar* instead of
// an index but the problem is that replace() in the loop invalidates
// all pointers into the string so we have to use indices instead
size_t matchStart = 0; size_t matchStart = 0;
// number of replacement made: we won't make more than maxMatches of them // number of replacement made: we won't make more than maxMatches of them
@@ -363,7 +389,9 @@ int wxRegExImpl::Replace(wxString *text,
// note that "^" shouldn't match after the first call to Matches() so we // note that "^" shouldn't match after the first call to Matches() so we
// use wxRE_NOTBOL to prevent it from happening // use wxRE_NOTBOL to prevent it from happening
while ( (!maxMatches || countRepl < maxMatches) && while ( (!maxMatches || countRepl < maxMatches) &&
Matches(text->c_str() + matchStart, countRepl ? wxRE_NOTBOL : 0) ) Matches(textstr + matchStart,
countRepl ? wxRE_NOTBOL : 0,
textlen - matchStart) )
{ {
// the string possibly contains back references: we need to calculate // the string possibly contains back references: we need to calculate
// the replacement text anew after each match // the replacement text anew after each match
@@ -407,8 +435,8 @@ int wxRegExImpl::Replace(wxString *text,
} }
else else
{ {
textNew += wxString(text->c_str() + matchStart + start, textNew += wxString(textstr + matchStart + start,
len); *wxConvCurrent, len);
mayHaveBackrefs = true; mayHaveBackrefs = true;
} }
@@ -429,14 +457,31 @@ int wxRegExImpl::Replace(wxString *text,
return wxNOT_FOUND; return wxNOT_FOUND;
} }
// an insurance against implementations that don't grow exponentially
// to ensure building the result takes linear time
if (result.capacity() < result.length() + start + textNew.length())
result.reserve(2 * result.length());
#ifndef WX_NEED_CONVERT
result.append(*text, matchStart, start);
#else
result.append(wxString(textstr + matchStart, *wxConvCurrent, start));
#endif
matchStart += start; matchStart += start;
text->replace(matchStart, len, textNew); result.append(textNew);
countRepl++; countRepl++;
matchStart += textNew.length(); matchStart += len;
} }
#ifndef WX_NEED_CONVERT
result.append(*text, matchStart, wxString::npos);
#else
result.append(wxString(textstr + matchStart, *wxConvCurrent));
#endif
*text = result;
return countRepl; return countRepl;
} }
@@ -449,7 +494,6 @@ void wxRegEx::Init()
m_impl = NULL; m_impl = NULL;
} }
wxRegEx::~wxRegEx() wxRegEx::~wxRegEx()
{ {
delete m_impl; delete m_impl;
@@ -478,7 +522,11 @@ bool wxRegEx::Matches(const wxChar *str, int flags) const
{ {
wxCHECK_MSG( IsValid(), false, _T("must successfully Compile() first") ); wxCHECK_MSG( IsValid(), false, _T("must successfully Compile() first") );
return m_impl->Matches(str, flags); #ifndef WX_NEED_CONVERT
return m_impl->Matches(str, flags, wxStrlen(str));
#else
return m_impl->Matches(wxConvertWX2MB(str), flags, wxStrlen(str));
#endif
} }
bool wxRegEx::GetMatch(size_t *start, size_t *len, size_t index) const bool wxRegEx::GetMatch(size_t *start, size_t *len, size_t index) const