Use re_search directly instead of regexec when using the GNU regex lib
git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@36211 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
3
configure
vendored
3
configure
vendored
@@ -23979,7 +23979,8 @@ echo "${ECHO_T}$ac_cv_header_regex_h" >&6
|
||||
fi
|
||||
if test $ac_cv_header_regex_h = yes; then
|
||||
|
||||
for ac_func in regcomp
|
||||
|
||||
for ac_func in regcomp re_search
|
||||
do
|
||||
as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
|
||||
echo "$as_me:$LINENO: checking for $ac_func" >&5
|
||||
|
@@ -2342,7 +2342,7 @@ if test "$wxUSE_REGEX" != "no"; then
|
||||
if test "$wxUSE_REGEX" = "sys" -o "$wxUSE_REGEX" = "yes" ; then
|
||||
dnl according to Unix 98 specs, regcomp() is in libc but I believe that
|
||||
dnl on some old systems it may be in libregex - check for it too?
|
||||
AC_CHECK_HEADER(regex.h, [AC_CHECK_FUNCS(regcomp)])
|
||||
AC_CHECK_HEADER(regex.h, [AC_CHECK_FUNCS(regcomp re_search)])
|
||||
|
||||
if test "x$ac_cv_func_regcomp" != "xyes"; then
|
||||
if test "$wxUSE_REGEX" = "sys" ; then
|
||||
|
@@ -197,12 +197,15 @@ returns {\tt true} if matches and {\tt false} otherwise.
|
||||
|
||||
{\it Flags} may be combination of {\tt wxRE\_NOTBOL} and {\tt wxRE\_NOTEOL}.
|
||||
|
||||
System regex libraries always assume the text being searched is null
|
||||
terminated and any length given is ignored.
|
||||
Some regex libraries assume that the text given is null terminated, while
|
||||
others require the length be given as a separate parameter. Therefore for
|
||||
maximum portability assume that {\it text} cannot contain embedded nulls.
|
||||
|
||||
When using the built-in regex library, the first overload obtains the length
|
||||
of the string using wxStrlen, the second from the {\it len} parameter and the
|
||||
third from the length of the {\it wxString}.
|
||||
When the {\it Matches(const wxChar *text, int flags = 0)} form is used,
|
||||
a {\it wxStrlen()} will be done internally if the regex library requires the
|
||||
length. When using {\it Matches()} in a loop
|
||||
the {\it Matches(text, flags, len)} form can be used instead, making it
|
||||
possible to avoid a {\it wxStrlen()} inside the loop.
|
||||
|
||||
May only be called after successful call to \helpref{Compile()}{wxregexcompile}.
|
||||
|
||||
|
@@ -94,7 +94,7 @@ public:
|
||||
// true if matches and false otherwise
|
||||
//
|
||||
// flags may be combination of wxRE_NOTBOL and wxRE_NOTEOL
|
||||
// len may be the length of text (ignored except by built-in regex lib)
|
||||
// len may be the length of text (ignored by most system regex libs)
|
||||
//
|
||||
// may only be called after successful call to Compile()
|
||||
bool Matches(const wxChar *text, int flags = 0) const;
|
||||
|
@@ -577,6 +577,11 @@
|
||||
* case WX_NO_REGEX_ADVANCED should be defined.
|
||||
*/
|
||||
#undef WX_NO_REGEX_ADVANCED
|
||||
/*
|
||||
* On GNU systems use re_search instead of regexec, since the latter does a
|
||||
* strlen on the search text affecting the performance of some operations.
|
||||
*/
|
||||
#undef HAVE_RE_SEARCH
|
||||
/*
|
||||
* Use SDL for audio (Unix)
|
||||
*/
|
||||
|
@@ -604,6 +604,11 @@
|
||||
* case WX_NO_REGEX_ADVANCED should be defined.
|
||||
*/
|
||||
#undef WX_NO_REGEX_ADVANCED
|
||||
/*
|
||||
* On GNU systems use re_search instead of regexec, since the latter does a
|
||||
* strlen on the search text affecting the performance of some operations.
|
||||
*/
|
||||
#undef HAVE_RE_SEARCH
|
||||
/*
|
||||
* Use SDL for audio (Unix)
|
||||
*/
|
||||
|
@@ -46,23 +46,84 @@
|
||||
#include "wx/regex.h"
|
||||
|
||||
// WXREGEX_USING_BUILTIN defined when using the built-in regex lib
|
||||
// WXREGEX_BUILTIN_ONLY() wrap a parameter only used with the built-in regex
|
||||
// WXREGEX_CONVERT_TO_MB indicates when the regex lib is using chars and
|
||||
// WXREGEX_IF_NEED_LEN() wrap the len parameter only used with the built-in
|
||||
// or GNU regex
|
||||
// WXREGEX_CONVERT_TO_MB defined when the regex lib is using chars and
|
||||
// wxChar is wide, so conversion must be done
|
||||
// WXREGEX_CHAR(x) Convert wxChar to wxRegChar
|
||||
//
|
||||
#ifdef __REG_NOFRONT
|
||||
# define WXREGEX_USING_BUILTIN
|
||||
# define WXREGEX_BUILTIN_ONLY(x) ,x
|
||||
# define WXREGEX_IF_NEED_LEN(x) ,x
|
||||
# define WXREGEX_CHAR(x) x
|
||||
#else
|
||||
# define WXREGEX_BUILTIN_ONLY(x)
|
||||
# ifdef HAVE_RE_SEARCH
|
||||
# define WXREGEX_IF_NEED_LEN(x) ,x
|
||||
# else
|
||||
# define WXREGEX_IF_NEED_LEN(x)
|
||||
# endif
|
||||
# if wxUSE_UNICODE
|
||||
# define WXREGEX_CONVERT_TO_MB
|
||||
# endif
|
||||
# define WXREGEX_CHAR(x) wxConvertWX2MB(x)
|
||||
#endif
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// private classes
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
#ifndef HAVE_RE_SEARCH
|
||||
|
||||
// the array of offsets for the matches, the usual POSIX regmatch_t array.
|
||||
class wxRegExMatches
|
||||
{
|
||||
public:
|
||||
typedef regmatch_t *match_type;
|
||||
|
||||
wxRegExMatches(size_t n) { m_matches = new regmatch_t[n]; }
|
||||
~wxRegExMatches() { delete [] m_matches; }
|
||||
|
||||
size_t Start(size_t n) const { return m_matches[n].rm_so; }
|
||||
size_t End(size_t n) const { return m_matches[n].rm_eo; }
|
||||
|
||||
regmatch_t *get() const { return m_matches; }
|
||||
|
||||
private:
|
||||
regmatch_t *m_matches;
|
||||
};
|
||||
|
||||
#else // HAVE_RE_SEARCH
|
||||
|
||||
// the array of offsets for the matches, the struct used by the GNU lib
|
||||
class wxRegExMatches
|
||||
{
|
||||
public:
|
||||
typedef re_registers *match_type;
|
||||
|
||||
wxRegExMatches(size_t n)
|
||||
{
|
||||
m_matches.num_regs = n;
|
||||
m_matches.start = new regoff_t[n];
|
||||
m_matches.end = new regoff_t[n];
|
||||
}
|
||||
|
||||
~wxRegExMatches()
|
||||
{
|
||||
delete [] m_matches.start;
|
||||
delete [] m_matches.end;
|
||||
}
|
||||
|
||||
size_t Start(size_t n) const { return m_matches.start[n]; }
|
||||
size_t End(size_t n) const { return m_matches.end[n]; }
|
||||
|
||||
re_registers *get() { return &m_matches; }
|
||||
|
||||
private:
|
||||
re_registers m_matches;
|
||||
};
|
||||
|
||||
#endif // HAVE_RE_SEARCH
|
||||
|
||||
// the character type used by the regular expression engine
|
||||
#ifndef WXREGEX_CONVERT_TO_MB
|
||||
typedef wxChar wxRegChar;
|
||||
@@ -84,7 +145,7 @@ public:
|
||||
// RE operations
|
||||
bool Compile(const wxString& expr, int flags = 0);
|
||||
bool Matches(const wxRegChar *str, int flags
|
||||
WXREGEX_BUILTIN_ONLY(size_t len)) const;
|
||||
WXREGEX_IF_NEED_LEN(size_t len)) const;
|
||||
bool GetMatch(size_t *start, size_t *len, size_t index = 0) const;
|
||||
size_t GetMatchCount() const;
|
||||
int Replace(wxString *pattern, const wxString& replacement,
|
||||
@@ -110,7 +171,7 @@ private:
|
||||
regfree(&m_RegEx);
|
||||
}
|
||||
|
||||
delete [] m_Matches;
|
||||
delete m_Matches;
|
||||
}
|
||||
|
||||
// free the RE if any and reinit the members
|
||||
@@ -120,18 +181,18 @@ private:
|
||||
Init();
|
||||
}
|
||||
|
||||
|
||||
// compiled RE
|
||||
regex_t m_RegEx;
|
||||
regex_t m_RegEx;
|
||||
|
||||
// the subexpressions data
|
||||
regmatch_t *m_Matches;
|
||||
size_t m_nMatches;
|
||||
wxRegExMatches *m_Matches;
|
||||
size_t m_nMatches;
|
||||
|
||||
// true if m_RegEx is valid
|
||||
bool m_isCompiled;
|
||||
bool m_isCompiled;
|
||||
};
|
||||
|
||||
|
||||
// ============================================================================
|
||||
// implementation
|
||||
// ============================================================================
|
||||
@@ -277,9 +338,36 @@ bool wxRegExImpl::Compile(const wxString& expr, int flags)
|
||||
return IsValid();
|
||||
}
|
||||
|
||||
#ifdef HAVE_RE_SEARCH
|
||||
|
||||
// On GNU, regexec is implemented as a wrapper around re_search. re_search
|
||||
// requires a length parameter which the POSIX regexec does not have,
|
||||
// therefore regexec must do a strlen on the search text each time it is
|
||||
// called. This can drastically affect performance when matching is done in
|
||||
// a loop along a string, such as during a search and replace. Therefore if
|
||||
// re_search is detected by configure, it is used directly.
|
||||
//
|
||||
static int ReSearch(const regex_t *preg,
|
||||
const char *text,
|
||||
size_t len,
|
||||
re_registers *matches,
|
||||
int eflags)
|
||||
{
|
||||
regex_t *pattern = wx_const_cast(regex_t*, preg);
|
||||
|
||||
pattern->not_bol = (eflags & REG_NOTBOL) != 0;
|
||||
pattern->not_eol = (eflags & REG_NOTEOL) != 0;
|
||||
pattern->regs_allocated = REGS_FIXED;
|
||||
|
||||
int ret = re_search(pattern, text, len, 0, len, matches);
|
||||
return ret >= 0 ? 0 : REG_NOMATCH;
|
||||
}
|
||||
|
||||
#endif // HAVE_RE_SEARCH
|
||||
|
||||
bool wxRegExImpl::Matches(const wxRegChar *str,
|
||||
int flags
|
||||
WXREGEX_BUILTIN_ONLY(size_t len)) const
|
||||
WXREGEX_IF_NEED_LEN(size_t len)) const
|
||||
{
|
||||
wxCHECK_MSG( IsValid(), false, _T("must successfully Compile() first") );
|
||||
|
||||
@@ -297,14 +385,18 @@ bool wxRegExImpl::Matches(const wxRegChar *str,
|
||||
wxRegExImpl *self = wxConstCast(this, wxRegExImpl);
|
||||
if ( !m_Matches && m_nMatches )
|
||||
{
|
||||
self->m_Matches = new regmatch_t[m_nMatches];
|
||||
self->m_Matches = new wxRegExMatches(m_nMatches);
|
||||
}
|
||||
|
||||
wxRegExMatches::match_type matches = m_Matches ? m_Matches->get() : NULL;
|
||||
|
||||
// do match it
|
||||
#ifdef WXREGEX_USING_BUILTIN
|
||||
int rc = wx_re_exec(&self->m_RegEx, str, len, NULL, m_nMatches, m_Matches, flagsRE);
|
||||
#if defined WXREGEX_USING_BUILTIN
|
||||
int rc = wx_re_exec(&self->m_RegEx, str, len, NULL, m_nMatches, matches, flagsRE);
|
||||
#elif defined HAVE_RE_SEARCH
|
||||
int rc = str ? ReSearch(&self->m_RegEx, str, len, matches, flagsRE) : REG_BADPAT;
|
||||
#else
|
||||
int rc = str ? regexec(&self->m_RegEx, str, m_nMatches, m_Matches, flagsRE) : REG_BADPAT;
|
||||
int rc = str ? regexec(&self->m_RegEx, str, m_nMatches, matches, flagsRE) : REG_BADPAT;
|
||||
#endif
|
||||
|
||||
switch ( rc )
|
||||
@@ -332,13 +424,10 @@ bool wxRegExImpl::GetMatch(size_t *start, size_t *len, size_t index) const
|
||||
wxCHECK_MSG( m_Matches, false, _T("must call Matches() first") );
|
||||
wxCHECK_MSG( index < m_nMatches, false, _T("invalid match index") );
|
||||
|
||||
const regmatch_t& match = m_Matches[index];
|
||||
|
||||
// we need the casts because rm_so can be a 64 bit quantity
|
||||
if ( start )
|
||||
*start = wx_truncate_cast(size_t, match.rm_so);
|
||||
*start = m_Matches->Start(index);
|
||||
if ( len )
|
||||
*len = wx_truncate_cast(size_t, match.rm_eo - match.rm_so);
|
||||
*len = m_Matches->End(index) - m_Matches->Start(index);
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -363,7 +452,7 @@ int wxRegExImpl::Replace(wxString *text,
|
||||
const wxChar *textstr = text->c_str();
|
||||
size_t textlen = text->length();
|
||||
#else
|
||||
const wxWX2MBbuf textstr = wxConvertWX2MB(*text);
|
||||
const wxWX2MBbuf textstr = WXREGEX_CHAR(*text);
|
||||
if (!textstr)
|
||||
{
|
||||
wxLogError(_("Failed to find match for regular expression: %s"),
|
||||
@@ -403,7 +492,7 @@ int wxRegExImpl::Replace(wxString *text,
|
||||
while ( (!maxMatches || countRepl < maxMatches) &&
|
||||
Matches(textstr + matchStart,
|
||||
countRepl ? wxRE_NOTBOL : 0
|
||||
WXREGEX_BUILTIN_ONLY(textlen - matchStart)) )
|
||||
WXREGEX_IF_NEED_LEN(textlen - matchStart)) )
|
||||
{
|
||||
// the string possibly contains back references: we need to calculate
|
||||
// the replacement text anew after each match
|
||||
@@ -535,22 +624,16 @@ bool wxRegEx::Matches(const wxChar *str, int flags, size_t len) const
|
||||
wxCHECK_MSG( IsValid(), false, _T("must successfully Compile() first") );
|
||||
(void)len;
|
||||
|
||||
#ifdef WXREGEX_CONVERT_TO_MB
|
||||
return m_impl->Matches(wxConvertWX2MB(str), flags);
|
||||
#else
|
||||
return m_impl->Matches(str, flags WXREGEX_BUILTIN_ONLY(len));
|
||||
#endif
|
||||
return m_impl->Matches(WXREGEX_CHAR(str), flags WXREGEX_IF_NEED_LEN(len));
|
||||
}
|
||||
|
||||
bool wxRegEx::Matches(const wxChar *str, int flags) const
|
||||
{
|
||||
wxCHECK_MSG( IsValid(), false, _T("must successfully Compile() first") );
|
||||
|
||||
#ifdef WXREGEX_CONVERT_TO_MB
|
||||
return m_impl->Matches(wxConvertWX2MB(str), flags);
|
||||
#else
|
||||
return m_impl->Matches(str, flags WXREGEX_BUILTIN_ONLY(wxStrlen(str)));
|
||||
#endif
|
||||
return m_impl->Matches(WXREGEX_CHAR(str),
|
||||
flags
|
||||
WXREGEX_IF_NEED_LEN(wxStrlen(str)));
|
||||
}
|
||||
|
||||
bool wxRegEx::GetMatch(size_t *start, size_t *len, size_t index) const
|
||||
|
Reference in New Issue
Block a user