Use re_search directly instead of regexec when using the GNU regex lib

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@36211 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Michael Wetherell
2005-11-20 17:25:33 +00:00
parent 7ede73899d
commit ab0f0eddc4
7 changed files with 138 additions and 41 deletions

3
configure vendored
View File

@@ -23979,7 +23979,8 @@ echo "${ECHO_T}$ac_cv_header_regex_h" >&6
fi fi
if test $ac_cv_header_regex_h = yes; then if test $ac_cv_header_regex_h = yes; then
for ac_func in regcomp
for ac_func in regcomp re_search
do do
as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
echo "$as_me:$LINENO: checking for $ac_func" >&5 echo "$as_me:$LINENO: checking for $ac_func" >&5

View File

@@ -2342,7 +2342,7 @@ if test "$wxUSE_REGEX" != "no"; then
if test "$wxUSE_REGEX" = "sys" -o "$wxUSE_REGEX" = "yes" ; then if test "$wxUSE_REGEX" = "sys" -o "$wxUSE_REGEX" = "yes" ; then
dnl according to Unix 98 specs, regcomp() is in libc but I believe that dnl according to Unix 98 specs, regcomp() is in libc but I believe that
dnl on some old systems it may be in libregex - check for it too? dnl on some old systems it may be in libregex - check for it too?
AC_CHECK_HEADER(regex.h, [AC_CHECK_FUNCS(regcomp)]) AC_CHECK_HEADER(regex.h, [AC_CHECK_FUNCS(regcomp re_search)])
if test "x$ac_cv_func_regcomp" != "xyes"; then if test "x$ac_cv_func_regcomp" != "xyes"; then
if test "$wxUSE_REGEX" = "sys" ; then if test "$wxUSE_REGEX" = "sys" ; then

View File

@@ -197,12 +197,15 @@ returns {\tt true} if matches and {\tt false} otherwise.
{\it Flags} may be combination of {\tt wxRE\_NOTBOL} and {\tt wxRE\_NOTEOL}. {\it Flags} may be combination of {\tt wxRE\_NOTBOL} and {\tt wxRE\_NOTEOL}.
System regex libraries always assume the text being searched is null Some regex libraries assume that the text given is null terminated, while
terminated and any length given is ignored. others require the length be given as a separate parameter. Therefore for
maximum portability assume that {\it text} cannot contain embedded nulls.
When using the built-in regex library, the first overload obtains the length When the {\it Matches(const wxChar *text, int flags = 0)} form is used,
of the string using wxStrlen, the second from the {\it len} parameter and the a {\it wxStrlen()} will be done internally if the regex library requires the
third from the length of the {\it wxString}. length. When using {\it Matches()} in a loop
the {\it Matches(text, flags, len)} form can be used instead, making it
possible to avoid a {\it wxStrlen()} inside the loop.
May only be called after successful call to \helpref{Compile()}{wxregexcompile}. May only be called after successful call to \helpref{Compile()}{wxregexcompile}.

View File

@@ -94,7 +94,7 @@ public:
// true if matches and false otherwise // true if matches and false otherwise
// //
// flags may be combination of wxRE_NOTBOL and wxRE_NOTEOL // flags may be combination of wxRE_NOTBOL and wxRE_NOTEOL
// len may be the length of text (ignored except by built-in regex lib) // len may be the length of text (ignored by most system regex libs)
// //
// may only be called after successful call to Compile() // may only be called after successful call to Compile()
bool Matches(const wxChar *text, int flags = 0) const; bool Matches(const wxChar *text, int flags = 0) const;

View File

@@ -577,6 +577,11 @@
* case WX_NO_REGEX_ADVANCED should be defined. * case WX_NO_REGEX_ADVANCED should be defined.
*/ */
#undef WX_NO_REGEX_ADVANCED #undef WX_NO_REGEX_ADVANCED
/*
* On GNU systems use re_search instead of regexec, since the latter does a
* strlen on the search text affecting the performance of some operations.
*/
#undef HAVE_RE_SEARCH
/* /*
* Use SDL for audio (Unix) * Use SDL for audio (Unix)
*/ */

View File

@@ -604,6 +604,11 @@
* case WX_NO_REGEX_ADVANCED should be defined. * case WX_NO_REGEX_ADVANCED should be defined.
*/ */
#undef WX_NO_REGEX_ADVANCED #undef WX_NO_REGEX_ADVANCED
/*
* On GNU systems use re_search instead of regexec, since the latter does a
* strlen on the search text affecting the performance of some operations.
*/
#undef HAVE_RE_SEARCH
/* /*
* Use SDL for audio (Unix) * Use SDL for audio (Unix)
*/ */

View File

@@ -46,23 +46,84 @@
#include "wx/regex.h" #include "wx/regex.h"
// WXREGEX_USING_BUILTIN defined when using the built-in regex lib // WXREGEX_USING_BUILTIN defined when using the built-in regex lib
// WXREGEX_BUILTIN_ONLY() wrap a parameter only used with the built-in regex // WXREGEX_IF_NEED_LEN() wrap the len parameter only used with the built-in
// WXREGEX_CONVERT_TO_MB indicates when the regex lib is using chars and // or GNU regex
// WXREGEX_CONVERT_TO_MB defined when the regex lib is using chars and
// wxChar is wide, so conversion must be done // wxChar is wide, so conversion must be done
// WXREGEX_CHAR(x) Convert wxChar to wxRegChar
//
#ifdef __REG_NOFRONT #ifdef __REG_NOFRONT
# define WXREGEX_USING_BUILTIN # define WXREGEX_USING_BUILTIN
# define WXREGEX_BUILTIN_ONLY(x) ,x # define WXREGEX_IF_NEED_LEN(x) ,x
# define WXREGEX_CHAR(x) x
#else #else
# define WXREGEX_BUILTIN_ONLY(x) # ifdef HAVE_RE_SEARCH
# define WXREGEX_IF_NEED_LEN(x) ,x
# else
# define WXREGEX_IF_NEED_LEN(x)
# endif
# if wxUSE_UNICODE # if wxUSE_UNICODE
# define WXREGEX_CONVERT_TO_MB # define WXREGEX_CONVERT_TO_MB
# endif # endif
# define WXREGEX_CHAR(x) wxConvertWX2MB(x)
#endif #endif
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// private classes // private classes
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
#ifndef HAVE_RE_SEARCH
// the array of offsets for the matches, the usual POSIX regmatch_t array.
class wxRegExMatches
{
public:
typedef regmatch_t *match_type;
wxRegExMatches(size_t n) { m_matches = new regmatch_t[n]; }
~wxRegExMatches() { delete [] m_matches; }
size_t Start(size_t n) const { return m_matches[n].rm_so; }
size_t End(size_t n) const { return m_matches[n].rm_eo; }
regmatch_t *get() const { return m_matches; }
private:
regmatch_t *m_matches;
};
#else // HAVE_RE_SEARCH
// the array of offsets for the matches, the struct used by the GNU lib
class wxRegExMatches
{
public:
typedef re_registers *match_type;
wxRegExMatches(size_t n)
{
m_matches.num_regs = n;
m_matches.start = new regoff_t[n];
m_matches.end = new regoff_t[n];
}
~wxRegExMatches()
{
delete [] m_matches.start;
delete [] m_matches.end;
}
size_t Start(size_t n) const { return m_matches.start[n]; }
size_t End(size_t n) const { return m_matches.end[n]; }
re_registers *get() { return &m_matches; }
private:
re_registers m_matches;
};
#endif // HAVE_RE_SEARCH
// the character type used by the regular expression engine // the character type used by the regular expression engine
#ifndef WXREGEX_CONVERT_TO_MB #ifndef WXREGEX_CONVERT_TO_MB
typedef wxChar wxRegChar; typedef wxChar wxRegChar;
@@ -84,7 +145,7 @@ public:
// RE operations // RE operations
bool Compile(const wxString& expr, int flags = 0); bool Compile(const wxString& expr, int flags = 0);
bool Matches(const wxRegChar *str, int flags bool Matches(const wxRegChar *str, int flags
WXREGEX_BUILTIN_ONLY(size_t len)) const; WXREGEX_IF_NEED_LEN(size_t len)) const;
bool GetMatch(size_t *start, size_t *len, size_t index = 0) const; bool GetMatch(size_t *start, size_t *len, size_t index = 0) const;
size_t GetMatchCount() const; size_t GetMatchCount() const;
int Replace(wxString *pattern, const wxString& replacement, int Replace(wxString *pattern, const wxString& replacement,
@@ -110,7 +171,7 @@ private:
regfree(&m_RegEx); regfree(&m_RegEx);
} }
delete [] m_Matches; delete m_Matches;
} }
// free the RE if any and reinit the members // free the RE if any and reinit the members
@@ -120,18 +181,18 @@ private:
Init(); Init();
} }
// compiled RE // compiled RE
regex_t m_RegEx; regex_t m_RegEx;
// the subexpressions data // the subexpressions data
regmatch_t *m_Matches; wxRegExMatches *m_Matches;
size_t m_nMatches; size_t m_nMatches;
// true if m_RegEx is valid // true if m_RegEx is valid
bool m_isCompiled; bool m_isCompiled;
}; };
// ============================================================================ // ============================================================================
// implementation // implementation
// ============================================================================ // ============================================================================
@@ -277,9 +338,36 @@ bool wxRegExImpl::Compile(const wxString& expr, int flags)
return IsValid(); return IsValid();
} }
#ifdef HAVE_RE_SEARCH
// On GNU, regexec is implemented as a wrapper around re_search. re_search
// requires a length parameter which the POSIX regexec does not have,
// therefore regexec must do a strlen on the search text each time it is
// called. This can drastically affect performance when matching is done in
// a loop along a string, such as during a search and replace. Therefore if
// re_search is detected by configure, it is used directly.
//
static int ReSearch(const regex_t *preg,
const char *text,
size_t len,
re_registers *matches,
int eflags)
{
regex_t *pattern = wx_const_cast(regex_t*, preg);
pattern->not_bol = (eflags & REG_NOTBOL) != 0;
pattern->not_eol = (eflags & REG_NOTEOL) != 0;
pattern->regs_allocated = REGS_FIXED;
int ret = re_search(pattern, text, len, 0, len, matches);
return ret >= 0 ? 0 : REG_NOMATCH;
}
#endif // HAVE_RE_SEARCH
bool wxRegExImpl::Matches(const wxRegChar *str, bool wxRegExImpl::Matches(const wxRegChar *str,
int flags int flags
WXREGEX_BUILTIN_ONLY(size_t len)) const WXREGEX_IF_NEED_LEN(size_t len)) const
{ {
wxCHECK_MSG( IsValid(), false, _T("must successfully Compile() first") ); wxCHECK_MSG( IsValid(), false, _T("must successfully Compile() first") );
@@ -297,14 +385,18 @@ bool wxRegExImpl::Matches(const wxRegChar *str,
wxRegExImpl *self = wxConstCast(this, wxRegExImpl); wxRegExImpl *self = wxConstCast(this, wxRegExImpl);
if ( !m_Matches && m_nMatches ) if ( !m_Matches && m_nMatches )
{ {
self->m_Matches = new regmatch_t[m_nMatches]; self->m_Matches = new wxRegExMatches(m_nMatches);
} }
wxRegExMatches::match_type matches = m_Matches ? m_Matches->get() : NULL;
// do match it // do match it
#ifdef WXREGEX_USING_BUILTIN #if defined WXREGEX_USING_BUILTIN
int rc = wx_re_exec(&self->m_RegEx, str, len, NULL, m_nMatches, m_Matches, flagsRE); int rc = wx_re_exec(&self->m_RegEx, str, len, NULL, m_nMatches, matches, flagsRE);
#elif defined HAVE_RE_SEARCH
int rc = str ? ReSearch(&self->m_RegEx, str, len, matches, flagsRE) : REG_BADPAT;
#else #else
int rc = str ? regexec(&self->m_RegEx, str, m_nMatches, m_Matches, flagsRE) : REG_BADPAT; int rc = str ? regexec(&self->m_RegEx, str, m_nMatches, matches, flagsRE) : REG_BADPAT;
#endif #endif
switch ( rc ) switch ( rc )
@@ -332,13 +424,10 @@ bool wxRegExImpl::GetMatch(size_t *start, size_t *len, size_t index) const
wxCHECK_MSG( m_Matches, false, _T("must call Matches() first") ); wxCHECK_MSG( m_Matches, false, _T("must call Matches() first") );
wxCHECK_MSG( index < m_nMatches, false, _T("invalid match index") ); wxCHECK_MSG( index < m_nMatches, false, _T("invalid match index") );
const regmatch_t& match = m_Matches[index];
// we need the casts because rm_so can be a 64 bit quantity
if ( start ) if ( start )
*start = wx_truncate_cast(size_t, match.rm_so); *start = m_Matches->Start(index);
if ( len ) if ( len )
*len = wx_truncate_cast(size_t, match.rm_eo - match.rm_so); *len = m_Matches->End(index) - m_Matches->Start(index);
return true; return true;
} }
@@ -363,7 +452,7 @@ int wxRegExImpl::Replace(wxString *text,
const wxChar *textstr = text->c_str(); const wxChar *textstr = text->c_str();
size_t textlen = text->length(); size_t textlen = text->length();
#else #else
const wxWX2MBbuf textstr = wxConvertWX2MB(*text); const wxWX2MBbuf textstr = WXREGEX_CHAR(*text);
if (!textstr) if (!textstr)
{ {
wxLogError(_("Failed to find match for regular expression: %s"), wxLogError(_("Failed to find match for regular expression: %s"),
@@ -403,7 +492,7 @@ int wxRegExImpl::Replace(wxString *text,
while ( (!maxMatches || countRepl < maxMatches) && while ( (!maxMatches || countRepl < maxMatches) &&
Matches(textstr + matchStart, Matches(textstr + matchStart,
countRepl ? wxRE_NOTBOL : 0 countRepl ? wxRE_NOTBOL : 0
WXREGEX_BUILTIN_ONLY(textlen - matchStart)) ) WXREGEX_IF_NEED_LEN(textlen - matchStart)) )
{ {
// the string possibly contains back references: we need to calculate // the string possibly contains back references: we need to calculate
// the replacement text anew after each match // the replacement text anew after each match
@@ -535,22 +624,16 @@ bool wxRegEx::Matches(const wxChar *str, int flags, size_t len) const
wxCHECK_MSG( IsValid(), false, _T("must successfully Compile() first") ); wxCHECK_MSG( IsValid(), false, _T("must successfully Compile() first") );
(void)len; (void)len;
#ifdef WXREGEX_CONVERT_TO_MB return m_impl->Matches(WXREGEX_CHAR(str), flags WXREGEX_IF_NEED_LEN(len));
return m_impl->Matches(wxConvertWX2MB(str), flags);
#else
return m_impl->Matches(str, flags WXREGEX_BUILTIN_ONLY(len));
#endif
} }
bool wxRegEx::Matches(const wxChar *str, int flags) const bool wxRegEx::Matches(const wxChar *str, int flags) const
{ {
wxCHECK_MSG( IsValid(), false, _T("must successfully Compile() first") ); wxCHECK_MSG( IsValid(), false, _T("must successfully Compile() first") );
#ifdef WXREGEX_CONVERT_TO_MB return m_impl->Matches(WXREGEX_CHAR(str),
return m_impl->Matches(wxConvertWX2MB(str), flags); flags
#else WXREGEX_IF_NEED_LEN(wxStrlen(str)));
return m_impl->Matches(str, flags WXREGEX_BUILTIN_ONLY(wxStrlen(str)));
#endif
} }
bool wxRegEx::GetMatch(size_t *start, size_t *len, size_t index) const bool wxRegEx::GetMatch(size_t *start, size_t *len, size_t index) const