Use re_search directly instead of regexec when using the GNU regex lib
git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@36211 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
3
configure
vendored
3
configure
vendored
@@ -23979,7 +23979,8 @@ echo "${ECHO_T}$ac_cv_header_regex_h" >&6
|
|||||||
fi
|
fi
|
||||||
if test $ac_cv_header_regex_h = yes; then
|
if test $ac_cv_header_regex_h = yes; then
|
||||||
|
|
||||||
for ac_func in regcomp
|
|
||||||
|
for ac_func in regcomp re_search
|
||||||
do
|
do
|
||||||
as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
|
as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
|
||||||
echo "$as_me:$LINENO: checking for $ac_func" >&5
|
echo "$as_me:$LINENO: checking for $ac_func" >&5
|
||||||
|
@@ -2342,7 +2342,7 @@ if test "$wxUSE_REGEX" != "no"; then
|
|||||||
if test "$wxUSE_REGEX" = "sys" -o "$wxUSE_REGEX" = "yes" ; then
|
if test "$wxUSE_REGEX" = "sys" -o "$wxUSE_REGEX" = "yes" ; then
|
||||||
dnl according to Unix 98 specs, regcomp() is in libc but I believe that
|
dnl according to Unix 98 specs, regcomp() is in libc but I believe that
|
||||||
dnl on some old systems it may be in libregex - check for it too?
|
dnl on some old systems it may be in libregex - check for it too?
|
||||||
AC_CHECK_HEADER(regex.h, [AC_CHECK_FUNCS(regcomp)])
|
AC_CHECK_HEADER(regex.h, [AC_CHECK_FUNCS(regcomp re_search)])
|
||||||
|
|
||||||
if test "x$ac_cv_func_regcomp" != "xyes"; then
|
if test "x$ac_cv_func_regcomp" != "xyes"; then
|
||||||
if test "$wxUSE_REGEX" = "sys" ; then
|
if test "$wxUSE_REGEX" = "sys" ; then
|
||||||
|
@@ -197,12 +197,15 @@ returns {\tt true} if matches and {\tt false} otherwise.
|
|||||||
|
|
||||||
{\it Flags} may be combination of {\tt wxRE\_NOTBOL} and {\tt wxRE\_NOTEOL}.
|
{\it Flags} may be combination of {\tt wxRE\_NOTBOL} and {\tt wxRE\_NOTEOL}.
|
||||||
|
|
||||||
System regex libraries always assume the text being searched is null
|
Some regex libraries assume that the text given is null terminated, while
|
||||||
terminated and any length given is ignored.
|
others require the length be given as a separate parameter. Therefore for
|
||||||
|
maximum portability assume that {\it text} cannot contain embedded nulls.
|
||||||
|
|
||||||
When using the built-in regex library, the first overload obtains the length
|
When the {\it Matches(const wxChar *text, int flags = 0)} form is used,
|
||||||
of the string using wxStrlen, the second from the {\it len} parameter and the
|
a {\it wxStrlen()} will be done internally if the regex library requires the
|
||||||
third from the length of the {\it wxString}.
|
length. When using {\it Matches()} in a loop
|
||||||
|
the {\it Matches(text, flags, len)} form can be used instead, making it
|
||||||
|
possible to avoid a {\it wxStrlen()} inside the loop.
|
||||||
|
|
||||||
May only be called after successful call to \helpref{Compile()}{wxregexcompile}.
|
May only be called after successful call to \helpref{Compile()}{wxregexcompile}.
|
||||||
|
|
||||||
|
@@ -94,7 +94,7 @@ public:
|
|||||||
// true if matches and false otherwise
|
// true if matches and false otherwise
|
||||||
//
|
//
|
||||||
// flags may be combination of wxRE_NOTBOL and wxRE_NOTEOL
|
// flags may be combination of wxRE_NOTBOL and wxRE_NOTEOL
|
||||||
// len may be the length of text (ignored except by built-in regex lib)
|
// len may be the length of text (ignored by most system regex libs)
|
||||||
//
|
//
|
||||||
// may only be called after successful call to Compile()
|
// may only be called after successful call to Compile()
|
||||||
bool Matches(const wxChar *text, int flags = 0) const;
|
bool Matches(const wxChar *text, int flags = 0) const;
|
||||||
|
@@ -577,6 +577,11 @@
|
|||||||
* case WX_NO_REGEX_ADVANCED should be defined.
|
* case WX_NO_REGEX_ADVANCED should be defined.
|
||||||
*/
|
*/
|
||||||
#undef WX_NO_REGEX_ADVANCED
|
#undef WX_NO_REGEX_ADVANCED
|
||||||
|
/*
|
||||||
|
* On GNU systems use re_search instead of regexec, since the latter does a
|
||||||
|
* strlen on the search text affecting the performance of some operations.
|
||||||
|
*/
|
||||||
|
#undef HAVE_RE_SEARCH
|
||||||
/*
|
/*
|
||||||
* Use SDL for audio (Unix)
|
* Use SDL for audio (Unix)
|
||||||
*/
|
*/
|
||||||
|
@@ -604,6 +604,11 @@
|
|||||||
* case WX_NO_REGEX_ADVANCED should be defined.
|
* case WX_NO_REGEX_ADVANCED should be defined.
|
||||||
*/
|
*/
|
||||||
#undef WX_NO_REGEX_ADVANCED
|
#undef WX_NO_REGEX_ADVANCED
|
||||||
|
/*
|
||||||
|
* On GNU systems use re_search instead of regexec, since the latter does a
|
||||||
|
* strlen on the search text affecting the performance of some operations.
|
||||||
|
*/
|
||||||
|
#undef HAVE_RE_SEARCH
|
||||||
/*
|
/*
|
||||||
* Use SDL for audio (Unix)
|
* Use SDL for audio (Unix)
|
||||||
*/
|
*/
|
||||||
|
@@ -46,23 +46,84 @@
|
|||||||
#include "wx/regex.h"
|
#include "wx/regex.h"
|
||||||
|
|
||||||
// WXREGEX_USING_BUILTIN defined when using the built-in regex lib
|
// WXREGEX_USING_BUILTIN defined when using the built-in regex lib
|
||||||
// WXREGEX_BUILTIN_ONLY() wrap a parameter only used with the built-in regex
|
// WXREGEX_IF_NEED_LEN() wrap the len parameter only used with the built-in
|
||||||
// WXREGEX_CONVERT_TO_MB indicates when the regex lib is using chars and
|
// or GNU regex
|
||||||
|
// WXREGEX_CONVERT_TO_MB defined when the regex lib is using chars and
|
||||||
// wxChar is wide, so conversion must be done
|
// wxChar is wide, so conversion must be done
|
||||||
|
// WXREGEX_CHAR(x) Convert wxChar to wxRegChar
|
||||||
|
//
|
||||||
#ifdef __REG_NOFRONT
|
#ifdef __REG_NOFRONT
|
||||||
# define WXREGEX_USING_BUILTIN
|
# define WXREGEX_USING_BUILTIN
|
||||||
# define WXREGEX_BUILTIN_ONLY(x) ,x
|
# define WXREGEX_IF_NEED_LEN(x) ,x
|
||||||
|
# define WXREGEX_CHAR(x) x
|
||||||
#else
|
#else
|
||||||
# define WXREGEX_BUILTIN_ONLY(x)
|
# ifdef HAVE_RE_SEARCH
|
||||||
|
# define WXREGEX_IF_NEED_LEN(x) ,x
|
||||||
|
# else
|
||||||
|
# define WXREGEX_IF_NEED_LEN(x)
|
||||||
|
# endif
|
||||||
# if wxUSE_UNICODE
|
# if wxUSE_UNICODE
|
||||||
# define WXREGEX_CONVERT_TO_MB
|
# define WXREGEX_CONVERT_TO_MB
|
||||||
# endif
|
# endif
|
||||||
|
# define WXREGEX_CHAR(x) wxConvertWX2MB(x)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// ----------------------------------------------------------------------------
|
// ----------------------------------------------------------------------------
|
||||||
// private classes
|
// private classes
|
||||||
// ----------------------------------------------------------------------------
|
// ----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#ifndef HAVE_RE_SEARCH
|
||||||
|
|
||||||
|
// the array of offsets for the matches, the usual POSIX regmatch_t array.
|
||||||
|
class wxRegExMatches
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef regmatch_t *match_type;
|
||||||
|
|
||||||
|
wxRegExMatches(size_t n) { m_matches = new regmatch_t[n]; }
|
||||||
|
~wxRegExMatches() { delete [] m_matches; }
|
||||||
|
|
||||||
|
size_t Start(size_t n) const { return m_matches[n].rm_so; }
|
||||||
|
size_t End(size_t n) const { return m_matches[n].rm_eo; }
|
||||||
|
|
||||||
|
regmatch_t *get() const { return m_matches; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
regmatch_t *m_matches;
|
||||||
|
};
|
||||||
|
|
||||||
|
#else // HAVE_RE_SEARCH
|
||||||
|
|
||||||
|
// the array of offsets for the matches, the struct used by the GNU lib
|
||||||
|
class wxRegExMatches
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef re_registers *match_type;
|
||||||
|
|
||||||
|
wxRegExMatches(size_t n)
|
||||||
|
{
|
||||||
|
m_matches.num_regs = n;
|
||||||
|
m_matches.start = new regoff_t[n];
|
||||||
|
m_matches.end = new regoff_t[n];
|
||||||
|
}
|
||||||
|
|
||||||
|
~wxRegExMatches()
|
||||||
|
{
|
||||||
|
delete [] m_matches.start;
|
||||||
|
delete [] m_matches.end;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t Start(size_t n) const { return m_matches.start[n]; }
|
||||||
|
size_t End(size_t n) const { return m_matches.end[n]; }
|
||||||
|
|
||||||
|
re_registers *get() { return &m_matches; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
re_registers m_matches;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // HAVE_RE_SEARCH
|
||||||
|
|
||||||
// the character type used by the regular expression engine
|
// the character type used by the regular expression engine
|
||||||
#ifndef WXREGEX_CONVERT_TO_MB
|
#ifndef WXREGEX_CONVERT_TO_MB
|
||||||
typedef wxChar wxRegChar;
|
typedef wxChar wxRegChar;
|
||||||
@@ -84,7 +145,7 @@ public:
|
|||||||
// RE operations
|
// RE operations
|
||||||
bool Compile(const wxString& expr, int flags = 0);
|
bool Compile(const wxString& expr, int flags = 0);
|
||||||
bool Matches(const wxRegChar *str, int flags
|
bool Matches(const wxRegChar *str, int flags
|
||||||
WXREGEX_BUILTIN_ONLY(size_t len)) const;
|
WXREGEX_IF_NEED_LEN(size_t len)) const;
|
||||||
bool GetMatch(size_t *start, size_t *len, size_t index = 0) const;
|
bool GetMatch(size_t *start, size_t *len, size_t index = 0) const;
|
||||||
size_t GetMatchCount() const;
|
size_t GetMatchCount() const;
|
||||||
int Replace(wxString *pattern, const wxString& replacement,
|
int Replace(wxString *pattern, const wxString& replacement,
|
||||||
@@ -110,7 +171,7 @@ private:
|
|||||||
regfree(&m_RegEx);
|
regfree(&m_RegEx);
|
||||||
}
|
}
|
||||||
|
|
||||||
delete [] m_Matches;
|
delete m_Matches;
|
||||||
}
|
}
|
||||||
|
|
||||||
// free the RE if any and reinit the members
|
// free the RE if any and reinit the members
|
||||||
@@ -120,18 +181,18 @@ private:
|
|||||||
Init();
|
Init();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// compiled RE
|
// compiled RE
|
||||||
regex_t m_RegEx;
|
regex_t m_RegEx;
|
||||||
|
|
||||||
// the subexpressions data
|
// the subexpressions data
|
||||||
regmatch_t *m_Matches;
|
wxRegExMatches *m_Matches;
|
||||||
size_t m_nMatches;
|
size_t m_nMatches;
|
||||||
|
|
||||||
// true if m_RegEx is valid
|
// true if m_RegEx is valid
|
||||||
bool m_isCompiled;
|
bool m_isCompiled;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// implementation
|
// implementation
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
@@ -277,9 +338,36 @@ bool wxRegExImpl::Compile(const wxString& expr, int flags)
|
|||||||
return IsValid();
|
return IsValid();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_RE_SEARCH
|
||||||
|
|
||||||
|
// On GNU, regexec is implemented as a wrapper around re_search. re_search
|
||||||
|
// requires a length parameter which the POSIX regexec does not have,
|
||||||
|
// therefore regexec must do a strlen on the search text each time it is
|
||||||
|
// called. This can drastically affect performance when matching is done in
|
||||||
|
// a loop along a string, such as during a search and replace. Therefore if
|
||||||
|
// re_search is detected by configure, it is used directly.
|
||||||
|
//
|
||||||
|
static int ReSearch(const regex_t *preg,
|
||||||
|
const char *text,
|
||||||
|
size_t len,
|
||||||
|
re_registers *matches,
|
||||||
|
int eflags)
|
||||||
|
{
|
||||||
|
regex_t *pattern = wx_const_cast(regex_t*, preg);
|
||||||
|
|
||||||
|
pattern->not_bol = (eflags & REG_NOTBOL) != 0;
|
||||||
|
pattern->not_eol = (eflags & REG_NOTEOL) != 0;
|
||||||
|
pattern->regs_allocated = REGS_FIXED;
|
||||||
|
|
||||||
|
int ret = re_search(pattern, text, len, 0, len, matches);
|
||||||
|
return ret >= 0 ? 0 : REG_NOMATCH;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // HAVE_RE_SEARCH
|
||||||
|
|
||||||
bool wxRegExImpl::Matches(const wxRegChar *str,
|
bool wxRegExImpl::Matches(const wxRegChar *str,
|
||||||
int flags
|
int flags
|
||||||
WXREGEX_BUILTIN_ONLY(size_t len)) const
|
WXREGEX_IF_NEED_LEN(size_t len)) const
|
||||||
{
|
{
|
||||||
wxCHECK_MSG( IsValid(), false, _T("must successfully Compile() first") );
|
wxCHECK_MSG( IsValid(), false, _T("must successfully Compile() first") );
|
||||||
|
|
||||||
@@ -297,14 +385,18 @@ bool wxRegExImpl::Matches(const wxRegChar *str,
|
|||||||
wxRegExImpl *self = wxConstCast(this, wxRegExImpl);
|
wxRegExImpl *self = wxConstCast(this, wxRegExImpl);
|
||||||
if ( !m_Matches && m_nMatches )
|
if ( !m_Matches && m_nMatches )
|
||||||
{
|
{
|
||||||
self->m_Matches = new regmatch_t[m_nMatches];
|
self->m_Matches = new wxRegExMatches(m_nMatches);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
wxRegExMatches::match_type matches = m_Matches ? m_Matches->get() : NULL;
|
||||||
|
|
||||||
// do match it
|
// do match it
|
||||||
#ifdef WXREGEX_USING_BUILTIN
|
#if defined WXREGEX_USING_BUILTIN
|
||||||
int rc = wx_re_exec(&self->m_RegEx, str, len, NULL, m_nMatches, m_Matches, flagsRE);
|
int rc = wx_re_exec(&self->m_RegEx, str, len, NULL, m_nMatches, matches, flagsRE);
|
||||||
|
#elif defined HAVE_RE_SEARCH
|
||||||
|
int rc = str ? ReSearch(&self->m_RegEx, str, len, matches, flagsRE) : REG_BADPAT;
|
||||||
#else
|
#else
|
||||||
int rc = str ? regexec(&self->m_RegEx, str, m_nMatches, m_Matches, flagsRE) : REG_BADPAT;
|
int rc = str ? regexec(&self->m_RegEx, str, m_nMatches, matches, flagsRE) : REG_BADPAT;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
switch ( rc )
|
switch ( rc )
|
||||||
@@ -332,13 +424,10 @@ bool wxRegExImpl::GetMatch(size_t *start, size_t *len, size_t index) const
|
|||||||
wxCHECK_MSG( m_Matches, false, _T("must call Matches() first") );
|
wxCHECK_MSG( m_Matches, false, _T("must call Matches() first") );
|
||||||
wxCHECK_MSG( index < m_nMatches, false, _T("invalid match index") );
|
wxCHECK_MSG( index < m_nMatches, false, _T("invalid match index") );
|
||||||
|
|
||||||
const regmatch_t& match = m_Matches[index];
|
|
||||||
|
|
||||||
// we need the casts because rm_so can be a 64 bit quantity
|
|
||||||
if ( start )
|
if ( start )
|
||||||
*start = wx_truncate_cast(size_t, match.rm_so);
|
*start = m_Matches->Start(index);
|
||||||
if ( len )
|
if ( len )
|
||||||
*len = wx_truncate_cast(size_t, match.rm_eo - match.rm_so);
|
*len = m_Matches->End(index) - m_Matches->Start(index);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -363,7 +452,7 @@ int wxRegExImpl::Replace(wxString *text,
|
|||||||
const wxChar *textstr = text->c_str();
|
const wxChar *textstr = text->c_str();
|
||||||
size_t textlen = text->length();
|
size_t textlen = text->length();
|
||||||
#else
|
#else
|
||||||
const wxWX2MBbuf textstr = wxConvertWX2MB(*text);
|
const wxWX2MBbuf textstr = WXREGEX_CHAR(*text);
|
||||||
if (!textstr)
|
if (!textstr)
|
||||||
{
|
{
|
||||||
wxLogError(_("Failed to find match for regular expression: %s"),
|
wxLogError(_("Failed to find match for regular expression: %s"),
|
||||||
@@ -403,7 +492,7 @@ int wxRegExImpl::Replace(wxString *text,
|
|||||||
while ( (!maxMatches || countRepl < maxMatches) &&
|
while ( (!maxMatches || countRepl < maxMatches) &&
|
||||||
Matches(textstr + matchStart,
|
Matches(textstr + matchStart,
|
||||||
countRepl ? wxRE_NOTBOL : 0
|
countRepl ? wxRE_NOTBOL : 0
|
||||||
WXREGEX_BUILTIN_ONLY(textlen - matchStart)) )
|
WXREGEX_IF_NEED_LEN(textlen - matchStart)) )
|
||||||
{
|
{
|
||||||
// the string possibly contains back references: we need to calculate
|
// the string possibly contains back references: we need to calculate
|
||||||
// the replacement text anew after each match
|
// the replacement text anew after each match
|
||||||
@@ -535,22 +624,16 @@ bool wxRegEx::Matches(const wxChar *str, int flags, size_t len) const
|
|||||||
wxCHECK_MSG( IsValid(), false, _T("must successfully Compile() first") );
|
wxCHECK_MSG( IsValid(), false, _T("must successfully Compile() first") );
|
||||||
(void)len;
|
(void)len;
|
||||||
|
|
||||||
#ifdef WXREGEX_CONVERT_TO_MB
|
return m_impl->Matches(WXREGEX_CHAR(str), flags WXREGEX_IF_NEED_LEN(len));
|
||||||
return m_impl->Matches(wxConvertWX2MB(str), flags);
|
|
||||||
#else
|
|
||||||
return m_impl->Matches(str, flags WXREGEX_BUILTIN_ONLY(len));
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool wxRegEx::Matches(const wxChar *str, int flags) const
|
bool wxRegEx::Matches(const wxChar *str, int flags) const
|
||||||
{
|
{
|
||||||
wxCHECK_MSG( IsValid(), false, _T("must successfully Compile() first") );
|
wxCHECK_MSG( IsValid(), false, _T("must successfully Compile() first") );
|
||||||
|
|
||||||
#ifdef WXREGEX_CONVERT_TO_MB
|
return m_impl->Matches(WXREGEX_CHAR(str),
|
||||||
return m_impl->Matches(wxConvertWX2MB(str), flags);
|
flags
|
||||||
#else
|
WXREGEX_IF_NEED_LEN(wxStrlen(str)));
|
||||||
return m_impl->Matches(str, flags WXREGEX_BUILTIN_ONLY(wxStrlen(str)));
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool wxRegEx::GetMatch(size_t *start, size_t *len, size_t index) const
|
bool wxRegEx::GetMatch(size_t *start, size_t *len, size_t index) const
|
||||||
|
Reference in New Issue
Block a user