diff --git a/include/wx/regex.h b/include/wx/regex.h
index ac70ff3907..dc3b34c4a5 100644
--- a/include/wx/regex.h
+++ b/include/wx/regex.h
@@ -144,6 +144,9 @@ public:
static wxString QuoteMeta(const wxString& str);
+ // return the extended RE corresponding to the given basic RE
+ static wxString ConvertFromBasic(const wxString& bre);
+
// dtor not virtual, don't derive from this class
~wxRegEx();
diff --git a/interface/wx/regex.h b/interface/wx/regex.h
index 0a6f24558c..cc3dc92acf 100644
--- a/interface/wx/regex.h
+++ b/interface/wx/regex.h
@@ -276,5 +276,15 @@ public:
@since 3.1.3
*/
static wxString QuoteMeta(const wxString& str);
+
+ /**
+ Converts a basic regular expression to an extended regex syntax.
+
+ This function can be used to convert @a bre using deprecated wxRE_BASIC
+ syntax to default (extended) syntax.
+
+ @since 3.1.6
+ */
+ static wxString ConvertFromBasic(const wxString& bre);
};
diff --git a/misc/suppressions/codespell-lines b/misc/suppressions/codespell-lines
index 44401db6a6..3e1c2e8461 100644
--- a/misc/suppressions/codespell-lines
+++ b/misc/suppressions/codespell-lines
@@ -24,3 +24,6 @@ expressions (BRE). EREs are roughly those of the traditional @e egrep,
// 2019), i.e. SEH translator seems to work just fine without /EHa too, so
// Purpose: helpers for the structured exception handling (SEH) under Win32
* MinGW-w64 versions 7.3 and 8.1 (32-bit binaries use SJLJ exceptions, 64-bit ones use SEH, and all binaries use Win32 threads).
+ static wxString ConvertFromBasic(const wxString& bre);
+ This function can be used to convert @a bre using deprecated wxRE_BASIC
+ static wxString ConvertFromBasic(const wxString& bre);
diff --git a/src/common/regex.cpp b/src/common/regex.cpp
index c0cf525976..2b784a7314 100644
--- a/src/common/regex.cpp
+++ b/src/common/regex.cpp
@@ -41,31 +41,27 @@
// WXREGEX_USING_BUILTIN defined when using the built-in regex lib
// WXREGEX_USING_RE_SEARCH defined when using re_search in the GNU regex lib
-// WXREGEX_IF_NEED_LEN() wrap the len parameter only used with the built-in
-// or GNU regex
// WXREGEX_CONVERT_TO_MB defined when the regex lib is using chars and
-// wxChar is wide, so conversion must be done
-// WXREGEX_CHAR(x) Convert wxChar to wxRegChar
+// wxChar is wide, so conversion to UTF-8 must be done
//
#ifdef __REG_NOFRONT
# define WXREGEX_USING_BUILTIN
-# define WXREGEX_IF_NEED_LEN(x) ,x
-# if wxUSE_UNICODE
-# define WXREGEX_CHAR(x) (x).wc_str()
-# else
-# define WXREGEX_CHAR(x) (x).mb_str()
-# endif
#else
# ifdef HAVE_RE_SEARCH
-# define WXREGEX_IF_NEED_LEN(x) ,x
# define WXREGEX_USING_RE_SEARCH
# else
-# define WXREGEX_IF_NEED_LEN(x)
+ // We can't use length, so just drop it in this wrapper.
+ inline int
+ wx_regexec(const regex_t* preg, const char* string, size_t,
+ size_t nmatch, regmatch_t* pmatch, int eflags)
+ {
+ return regexec(preg, string, nmatch, pmatch, eflags);
+ }
# endif
# if wxUSE_UNICODE
# define WXREGEX_CONVERT_TO_MB
# endif
-# define WXREGEX_CHAR(x) (x).mb_str()
+# define wx_regcomp regcomp
# define wx_regfree regfree
# define wx_regerror regerror
#endif
@@ -157,8 +153,7 @@ public:
// RE operations
bool Compile(const wxString& expr, int flags = 0);
- bool Matches(const wxRegChar *str, int flags
- WXREGEX_IF_NEED_LEN(size_t len)) const;
+ bool Matches(const wxRegChar *str, int flags, size_t len) const;
bool GetMatch(size_t *start, size_t *len, size_t index = 0) const;
size_t GetMatchCount() const;
int Replace(wxString *pattern, const wxString& replacement,
@@ -166,7 +161,7 @@ public:
private:
// return the string containing the error message for the given err code
- wxString GetErrorMsg(int errorcode, bool badconv) const;
+ wxString GetErrorMsg(int errorcode) const;
// init the members
void Init()
@@ -224,33 +219,22 @@ wxRegExImpl::~wxRegExImpl()
Free();
}
-wxString wxRegExImpl::GetErrorMsg(int errorcode, bool badconv) const
+wxString wxRegExImpl::GetErrorMsg(int errorcode) const
{
-#ifdef WXREGEX_CONVERT_TO_MB
- // currently only needed when using system library in Unicode mode
- if ( badconv )
- {
- return _("conversion to 8-bit encoding failed");
- }
-#else
- // 'use' badconv to avoid a compiler warning
- (void)badconv;
-#endif
-
wxString szError;
// first get the string length needed
int len = wx_regerror(errorcode, &m_RegEx, NULL, 0);
if ( len > 0 )
{
- char* szcmbError = new char[++len];
+ wxCharBuffer errbuf(len);
- (void)wx_regerror(errorcode, &m_RegEx, szcmbError, len);
+ (void)wx_regerror(errorcode, &m_RegEx, errbuf.data(), errbuf.length());
- szError = wxConvLibc.cMB2WX(szcmbError);
- delete [] szcmbError;
+ szError = wxConvLibc.cMB2WX(errbuf);
}
- else // regerror() returned 0
+
+ if ( szError.empty() ) // regerror() returned 0 or conversion failed
{
szError = _("unknown error");
}
@@ -258,6 +242,281 @@ wxString wxRegExImpl::GetErrorMsg(int errorcode, bool badconv) const
return szError;
}
+// Helper function for processing bracket expressions inside a regex.
+//
+// Advance the iterator until the closing bracket matching the opening one the
+// iterator currently points to, i.e.:
+//
+// Precondition: *it == '['
+// Postcondition: *it == ']' or it == end if failed to find matching ']'
+static
+wxString::const_iterator
+SkipBracketExpression(wxString::const_iterator it, wxString::const_iterator end)
+{
+ wxASSERT_MSG( *it == '[', "must be at the start of bracket expression" );
+
+ // Initial ']', possibly after the preceding '^', is different because it
+ // stands for a literal ']' and not the end of the bracket expression, so
+ // check for it first.
+ ++it;
+ if ( it != end && *it == '^' )
+ ++it;
+ if ( it != end && *it == ']' )
+ ++it;
+
+ // Any ']' from now on ends the bracket expression.
+ for ( ; it != end; ++it )
+ {
+ const wxUniChar c = *it;
+
+ if ( c == ']' )
+ break;
+
+ if ( c == '[' )
+ {
+ // Bare '[' on its own is not special, but collating elements and
+ // character classes are, so check for them and advance past them
+ // if necessary to avoid misinterpreting the matching closing ']'.
+ if ( ++it == end )
+ break;
+
+ const wxUniChar c = *it;
+ if ( c == ':' || c == '.' || c == '=' )
+ {
+ for ( ++it; it != end; ++it )
+ {
+ if ( *it == c )
+ {
+ if ( ++it == end )
+ break;
+
+ if ( *it == ']' )
+ break;
+ }
+ }
+
+ if ( it == end )
+ break;
+ }
+ }
+ }
+
+ return it;
+}
+
+/* static */
+wxString wxRegEx::ConvertFromBasic(const wxString& bre)
+{
+ /*
+ Quoting regex(7):
+
+ Obsolete ("basic") regular expressions differ in several respects.
+ '|', '+', and '?' are ordinary characters and there is no equivalent
+ for their functionality. The delimiters for bounds are "\{" and "\}",
+ with '{' and '}' by themselves ordinary characters. The parentheses
+ for nested subexpressions are "\(" and "\)", with '(' and ')' by
+ themselves ordinary characters. '^' is an ordinary character except at
+ the beginning of the RE or(!) the beginning of a parenthesized
+ subexpression, '$' is an ordinary character except at the end of the RE
+ or(!) the end of a parenthesized subexpression, and '*' is an ordinary
+ character if it appears at the beginning of the RE or the beginning of
+ a parenthesized subexpression (after a possible leading '^').
+
+ Finally, there is one new type of atom, a back reference: '\' followed
+ by a nonzero decimal digit d matches the same sequence of characters
+ matched by the dth parenthesized subexpression [...]
+ */
+ wxString ere;
+ ere.reserve(bre.length());
+
+ enum SinceStart
+ {
+ SinceStart_None, // Just at the beginning.
+ SinceStart_OnlyCaret, // Had just "^" since the beginning.
+ SinceStart_Some // Had something else since the beginning.
+ };
+
+ struct State
+ {
+ explicit State(SinceStart sinceStart_)
+ {
+ isBackslash = false;
+ sinceStart = sinceStart_;
+ }
+
+ bool isBackslash;
+ SinceStart sinceStart;
+ };
+
+ State previous(SinceStart_None);
+ for ( wxString::const_iterator it = bre.begin(),
+ end = bre.end();
+ it != end;
+ ++it )
+ {
+ const wxUniChar c = *it;
+
+ // What should be done with the current character?
+ enum Disposition
+ {
+ Disposition_Skip, // Nothing.
+ Disposition_Append, // Append to output.
+ Disposition_Escape // ... after escaping it with backslash.
+ } disposition = Disposition_Append;
+
+ State current(SinceStart_Some);
+
+ if ( previous.isBackslash )
+ {
+ // By default, keep the backslash present in the BRE, it's still
+ // needed in the ERE too.
+ disposition = Disposition_Escape;
+
+ switch ( c.GetValue() )
+ {
+ case '(':
+ // It's the start of a new subexpression.
+ current.sinceStart = SinceStart_None;
+ wxFALLTHROUGH;
+
+ case ')':
+ case '{':
+ case '}':
+ // Do not escape to ensure they remain special in the ERE
+ // as the escaped versions were special in the BRE.
+ disposition = Disposition_Append;
+ break;
+ }
+ }
+ else // This character is not escaped.
+ {
+ switch ( c.GetValue() )
+ {
+ case '\\':
+ current.isBackslash = true;
+
+ // Don't do anything with it yet, we'll deal with it later.
+ disposition = Disposition_Skip;
+ break;
+
+ case '^':
+ // Escape unless it appears at the start.
+ switch ( previous.sinceStart )
+ {
+ case SinceStart_None:
+ // Don't escape, but do update the state.
+ current.sinceStart = SinceStart_OnlyCaret;
+ break;
+
+ case SinceStart_OnlyCaret:
+ case SinceStart_Some:
+ disposition = Disposition_Escape;
+ break;
+ }
+ break;
+
+ case '*':
+ // Escape unless it appears at the start or right after "^".
+ switch ( previous.sinceStart )
+ {
+ case SinceStart_None:
+ case SinceStart_OnlyCaret:
+ disposition = Disposition_Escape;
+ break;
+
+ case SinceStart_Some:
+ break;
+ }
+ break;
+
+ case '$':
+ // Escape unless it appears at the end or just before "\)".
+ disposition = Disposition_Escape;
+ {
+ wxString::const_iterator next = it;
+ ++next;
+ if ( next == end )
+ {
+ // It is at the end, so has special meaning.
+ disposition = Disposition_Append;
+ }
+ else // Not at the end, but maybe at subexpression end?
+ {
+ if ( *next == '\\' )
+ {
+ ++next;
+ if ( next != end && *next == ')' )
+ disposition = Disposition_Append;
+ }
+ }
+ }
+ break;
+
+ case '|':
+ case '+':
+ case '?':
+ case '(':
+ case ')':
+ case '{':
+ case '}':
+ // Escape these characters which are not special in a BRE,
+ // but would be special in a ERE if left unescaped.
+ disposition = Disposition_Escape;
+ break;
+
+ case '[':
+ // Rules are very different for the characters inside the
+ // bracket expressions and we don't have to change anything
+ // for them as the syntax is the same for BREs and EREs, so
+ // just process the entire expression at once.
+ {
+ const wxString::const_iterator start = it;
+ it = SkipBracketExpression(it, end);
+
+ // Copy everything inside without any changes.
+ ere += wxString(start, it);
+
+ if ( it == end )
+ {
+ // If we reached the end without finding the
+ // matching ']' there is nothing remaining anyhow.
+ return ere;
+ }
+
+ // Note that default Disposition_Append here is fine,
+ // we'll append the closing ']' to "ere" below.
+ }
+ break;
+ }
+ }
+
+ switch ( disposition )
+ {
+ case Disposition_Skip:
+ break;
+
+ case Disposition_Escape:
+ ere += '\\';
+ wxFALLTHROUGH;
+
+ case Disposition_Append:
+ // Note: don't use "c" here, iterator may have been advanced
+ // inside the loop.
+ ere += *it;
+ break;
+ }
+
+ previous = current;
+ }
+
+ // It's an error if a RE ends with a backslash, but we still need to
+ // preserve this error in the resulting RE.
+ if ( previous.isBackslash )
+ ere += '\\';
+
+ return ere;
+}
+
bool wxRegExImpl::Compile(const wxString& expr, int flags)
{
Reinit();
@@ -290,22 +549,24 @@ bool wxRegExImpl::Compile(const wxString& expr, int flags)
if ( flags & wxRE_NEWLINE )
flagsRE |= REG_NEWLINE;
+#ifndef WXREGEX_CONVERT_TO_MB
+ const wxChar *exprstr = expr.wx_str();
+#else
+ const wxScopedCharBuffer exprbuf = expr.utf8_str();
+ const char* const exprstr = exprbuf.data();
+#endif
+
// compile it
#ifdef WXREGEX_USING_BUILTIN
- bool conv = true;
- // FIXME-UTF8: use wc_str() after removing ANSI build
- int errorcode = wx_re_comp(&m_RegEx, expr.c_str(), expr.length(), flagsRE);
+ int errorcode = wx_re_comp(&m_RegEx, exprstr, expr.length(), flagsRE);
#else
- // FIXME-UTF8: this is potentially broken, we shouldn't even try it
- // and should always use builtin regex library (or PCRE?)
- const wxWX2MBbuf conv = expr.mbc_str();
- int errorcode = conv ? regcomp(&m_RegEx, conv, flagsRE) : REG_BADPAT;
+ int errorcode = wx_regcomp(&m_RegEx, exprstr, flagsRE);
#endif
if ( errorcode )
{
wxLogError(_("Invalid regular expression '%s': %s"),
- expr.c_str(), GetErrorMsg(errorcode, !conv).c_str());
+ expr, GetErrorMsg(errorcode));
m_isCompiled = false;
}
@@ -384,8 +645,8 @@ static int ReSearch(const regex_t *preg,
#endif // WXREGEX_USING_RE_SEARCH
bool wxRegExImpl::Matches(const wxRegChar *str,
- int flags
- WXREGEX_IF_NEED_LEN(size_t len)) const
+ int flags,
+ size_t len) const
{
wxCHECK_MSG( IsValid(), false, wxT("must successfully Compile() first") );
@@ -412,9 +673,9 @@ bool wxRegExImpl::Matches(const wxRegChar *str,
#if defined WXREGEX_USING_BUILTIN
int rc = wx_re_exec(&self->m_RegEx, str, len, NULL, m_nMatches, matches, flagsRE);
#elif defined WXREGEX_USING_RE_SEARCH
- int rc = str ? ReSearch(&self->m_RegEx, str, len, matches, flagsRE) : REG_BADPAT;
+ int rc = ReSearch(&self->m_RegEx, str, len, matches, flagsRE);
#else
- int rc = str ? regexec(&self->m_RegEx, str, m_nMatches, matches, flagsRE) : REG_BADPAT;
+ int rc = wx_regexec(&self->m_RegEx, str, len, m_nMatches, matches, flagsRE);
#endif
switch ( rc )
@@ -426,7 +687,7 @@ bool wxRegExImpl::Matches(const wxRegChar *str,
default:
// an error occurred
wxLogError(_("Failed to find match for regular expression: %s"),
- GetErrorMsg(rc, !str).c_str());
+ GetErrorMsg(rc));
wxFALLTHROUGH;
case REG_NOMATCH:
@@ -470,15 +731,9 @@ int wxRegExImpl::Replace(wxString *text,
const wxChar *textstr = text->c_str();
size_t textlen = text->length();
#else
- const wxWX2MBbuf textstr = WXREGEX_CHAR(*text);
- if (!textstr)
- {
- wxLogError(_("Failed to find match for regular expression: %s"),
- GetErrorMsg(0, true).c_str());
- return 0;
- }
- size_t textlen = strlen(textstr);
- text->clear();
+ const wxScopedCharBuffer textbuf = text->utf8_str();
+ const char* const textstr = textbuf.data();
+ size_t textlen = textbuf.length();
#endif
// the replacement text
@@ -508,14 +763,9 @@ int wxRegExImpl::Replace(wxString *text,
// note that "^" shouldn't match after the first call to Matches() so we
// use wxRE_NOTBOL to prevent it from happening
while ( (!maxMatches || countRepl < maxMatches) &&
- Matches(
-#ifndef WXREGEX_CONVERT_TO_MB
- textstr + matchStart,
-#else
- textstr.data() + matchStart,
-#endif
- countRepl ? wxRE_NOTBOL : 0
- WXREGEX_IF_NEED_LEN(textlen - matchStart)) )
+ Matches(textstr + matchStart,
+ countRepl ? wxRE_NOTBOL : 0,
+ textlen - matchStart) )
{
// the string possibly contains back references: we need to calculate
// the replacement text anew after each match
@@ -559,14 +809,8 @@ int wxRegExImpl::Replace(wxString *text,
}
else
{
- textNew += wxString(
-#ifndef WXREGEX_CONVERT_TO_MB
- textstr
-#else
- textstr.data()
-#endif
- + matchStart + start,
- *wxConvCurrent, len);
+ textNew += wxString(textstr + matchStart + start,
+ wxConvUTF8, len);
mayHaveBackrefs = true;
}
@@ -592,11 +836,7 @@ int wxRegExImpl::Replace(wxString *text,
if (result.capacity() < result.length() + start + textNew.length())
result.reserve(2 * result.length());
-#ifndef WXREGEX_CONVERT_TO_MB
- result.append(*text, matchStart, start);
-#else
- result.append(wxString(textstr.data() + matchStart, *wxConvCurrent, start));
-#endif
+ result.append(wxString(textstr + matchStart, wxConvUTF8, start));
matchStart += start;
result.append(textNew);
@@ -605,11 +845,7 @@ int wxRegExImpl::Replace(wxString *text,
matchStart += len;
}
-#ifndef WXREGEX_CONVERT_TO_MB
- result.append(*text, matchStart, wxString::npos);
-#else
- result.append(wxString(textstr.data() + matchStart, *wxConvCurrent));
-#endif
+ result.append(wxString(textstr + matchStart, wxConvUTF8));
*text = result;
return countRepl;
@@ -651,8 +887,15 @@ bool wxRegEx::Matches(const wxString& str, int flags) const
{
wxCHECK_MSG( IsValid(), false, wxT("must successfully Compile() first") );
- return m_impl->Matches(WXREGEX_CHAR(str), flags
- WXREGEX_IF_NEED_LEN(str.length()));
+#ifndef WXREGEX_CONVERT_TO_MB
+ const wxChar* const textstr = str.c_str();
+ const size_t textlen = str.length();
+#else
+ const wxScopedCharBuffer textstr = str.utf8_str();
+ const size_t textlen = textstr.length();
+#endif
+
+ return m_impl->Matches(textstr, flags, textlen);
}
bool wxRegEx::GetMatch(size_t *start, size_t *len, size_t index) const
@@ -668,7 +911,11 @@ wxString wxRegEx::GetMatch(const wxString& text, size_t index) const
if ( !GetMatch(&start, &len, index) )
return wxEmptyString;
+#ifndef WXREGEX_CONVERT_TO_MB
return text.Mid(start, len);
+#else
+ return wxString::FromUTF8(text.utf8_str().data() + start, len);
+#endif
}
size_t wxRegEx::GetMatchCount() const
diff --git a/tests/benchmarks/Makefile.in b/tests/benchmarks/Makefile.in
index 1945ccfe18..e7842cc239 100644
--- a/tests/benchmarks/Makefile.in
+++ b/tests/benchmarks/Makefile.in
@@ -57,6 +57,7 @@ BENCH_OBJECTS = \
bench_ipcclient.o \
bench_log.o \
bench_mbconv.o \
+ bench_regex.o \
bench_strings.o \
bench_tls.o \
bench_printfbench.o
@@ -299,6 +300,9 @@ bench_log.o: $(srcdir)/log.cpp
bench_mbconv.o: $(srcdir)/mbconv.cpp
$(CXXC) -c -o $@ $(BENCH_CXXFLAGS) $(srcdir)/mbconv.cpp
+bench_regex.o: $(srcdir)/regex.cpp
+ $(CXXC) -c -o $@ $(BENCH_CXXFLAGS) $(srcdir)/regex.cpp
+
bench_strings.o: $(srcdir)/strings.cpp
$(CXXC) -c -o $@ $(BENCH_CXXFLAGS) $(srcdir)/strings.cpp
diff --git a/tests/benchmarks/bench.bkl b/tests/benchmarks/bench.bkl
index f3efa94295..ed17be45fc 100644
--- a/tests/benchmarks/bench.bkl
+++ b/tests/benchmarks/bench.bkl
@@ -16,6 +16,7 @@
ipcclient.cpp
log.cpp
mbconv.cpp
+ regex.cpp
strings.cpp
tls.cpp
printfbench.cpp
diff --git a/tests/benchmarks/bench_vc8_bench.vcproj b/tests/benchmarks/bench_vc8_bench.vcproj
index 59ea520090..80b87940c3 100644
--- a/tests/benchmarks/bench_vc8_bench.vcproj
+++ b/tests/benchmarks/bench_vc8_bench.vcproj
@@ -838,6 +838,10 @@
RelativePath=".\printfbench.cpp"
>
+
+
diff --git a/tests/benchmarks/bench_vc9_bench.vcproj b/tests/benchmarks/bench_vc9_bench.vcproj
index 04a316aada..049dd37fb9 100644
--- a/tests/benchmarks/bench_vc9_bench.vcproj
+++ b/tests/benchmarks/bench_vc9_bench.vcproj
@@ -810,6 +810,10 @@
RelativePath=".\printfbench.cpp"
>
+
+
diff --git a/tests/benchmarks/makefile.gcc b/tests/benchmarks/makefile.gcc
index 3bfb0a06cf..df40d939ce 100644
--- a/tests/benchmarks/makefile.gcc
+++ b/tests/benchmarks/makefile.gcc
@@ -36,6 +36,7 @@ BENCH_OBJECTS = \
$(OBJS)\bench_ipcclient.o \
$(OBJS)\bench_log.o \
$(OBJS)\bench_mbconv.o \
+ $(OBJS)\bench_regex.o \
$(OBJS)\bench_strings.o \
$(OBJS)\bench_tls.o \
$(OBJS)\bench_printfbench.o
@@ -310,6 +311,9 @@ $(OBJS)\bench_log.o: ./log.cpp
$(OBJS)\bench_mbconv.o: ./mbconv.cpp
$(CXX) -c -o $@ $(BENCH_CXXFLAGS) $(CPPDEPS) $<
+$(OBJS)\bench_regex.o: ./regex.cpp
+ $(CXX) -c -o $@ $(BENCH_CXXFLAGS) $(CPPDEPS) $<
+
$(OBJS)\bench_strings.o: ./strings.cpp
$(CXX) -c -o $@ $(BENCH_CXXFLAGS) $(CPPDEPS) $<
diff --git a/tests/benchmarks/makefile.vc b/tests/benchmarks/makefile.vc
index 948b2813d8..9b0a130818 100644
--- a/tests/benchmarks/makefile.vc
+++ b/tests/benchmarks/makefile.vc
@@ -37,6 +37,7 @@ BENCH_OBJECTS = \
$(OBJS)\bench_ipcclient.obj \
$(OBJS)\bench_log.obj \
$(OBJS)\bench_mbconv.obj \
+ $(OBJS)\bench_regex.obj \
$(OBJS)\bench_strings.obj \
$(OBJS)\bench_tls.obj \
$(OBJS)\bench_printfbench.obj
@@ -698,6 +699,9 @@ $(OBJS)\bench_log.obj: .\log.cpp
$(OBJS)\bench_mbconv.obj: .\mbconv.cpp
$(CXX) /c /nologo /TP /Fo$@ $(BENCH_CXXFLAGS) .\mbconv.cpp
+$(OBJS)\bench_regex.obj: .\regex.cpp
+ $(CXX) /c /nologo /TP /Fo$@ $(BENCH_CXXFLAGS) .\regex.cpp
+
$(OBJS)\bench_strings.obj: .\strings.cpp
$(CXX) /c /nologo /TP /Fo$@ $(BENCH_CXXFLAGS) .\strings.cpp
diff --git a/tests/benchmarks/regex.cpp b/tests/benchmarks/regex.cpp
new file mode 100644
index 0000000000..c2ca70fa98
--- /dev/null
+++ b/tests/benchmarks/regex.cpp
@@ -0,0 +1,74 @@
+/////////////////////////////////////////////////////////////////////////////
+// Name: tests/benchmarks/regex.cpp
+// Purpose: wxRegEx benchmarks
+// Author: Vadim Zeitlin
+// Created: 2018-11-15
+// Copyright: (c) 2018 Vadim Zeitlin
+// Licence: wxWindows licence
+/////////////////////////////////////////////////////////////////////////////
+
+#include "wx/ffile.h"
+#include "wx/regex.h"
+
+#include "bench.h"
+
+// ----------------------------------------------------------------------------
+// Benchmark relative costs of compiling and matching for a simple regex
+// ----------------------------------------------------------------------------
+
+static const char* const RE_SIMPLE = ".";
+
+BENCHMARK_FUNC(RECompile)
+{
+ return wxRegEx(RE_SIMPLE).IsValid();
+}
+
+BENCHMARK_FUNC(REMatch)
+{
+ static wxRegEx re(RE_SIMPLE);
+ return re.Matches("foo");
+}
+
+BENCHMARK_FUNC(RECompileAndMatch)
+{
+ return wxRegEx(RE_SIMPLE).Matches("foo");
+}
+
+// ----------------------------------------------------------------------------
+// Benchmark the cost of using a more complicated regex
+// ----------------------------------------------------------------------------
+
+namespace
+{
+
+// Use the contents of an already existing test file.
+const wxString& GetTestText()
+{
+ static wxString text;
+ if ( text.empty() )
+ {
+ wxFFile("htmltest.html").ReadAll(&text);
+ }
+
+ return text;
+}
+
+} // anonymous namespace
+
+BENCHMARK_FUNC(REFindTD)
+{
+ // This is too simplistic, but good enough for benchmarking.
+ static wxRegEx re("[^<]* | ", wxRE_ICASE | wxRE_NEWLINE);
+
+ int matches = 0;
+ for ( const wxChar* p = GetTestText().c_str(); re.Matches(p); ++matches )
+ {
+ size_t start, len;
+ if ( !re.GetMatch(&start, &len) )
+ return false;
+
+ p += start + len;
+ }
+
+ return matches == 21; // result of "grep -c"
+}
diff --git a/tests/regex/regextest.cpp b/tests/regex/regextest.cpp
index 321fd29c85..4902c852d1 100644
--- a/tests/regex/regextest.cpp
+++ b/tests/regex/regextest.cpp
@@ -234,12 +234,18 @@ void RegExTestCase::doTest(int flavor)
// 'e' - test that the pattern fails to compile
if (m_mode == 'e') {
CHECK( !re.IsValid() );
- } else {
- CHECK( re.IsValid() );
- }
- if (!re.IsValid())
+ // Never continue with this kind of test.
return;
+ } else {
+ // Note: we don't use REQUIRE here because this would abort the entire
+ // test case on error instead of skipping just the rest of this regex
+ // test.
+ CHECK( re.IsValid() );
+
+ if (!re.IsValid())
+ return;
+ }
bool matches = re.Matches(m_data, m_matchFlags);
diff --git a/tests/regex/wxregextest.cpp b/tests/regex/wxregextest.cpp
index 50a58c7d14..3d9780d12d 100644
--- a/tests/regex/wxregextest.cpp
+++ b/tests/regex/wxregextest.cpp
@@ -79,28 +79,33 @@ CheckMatch(const char* pattern,
INFO( "Pattern: " << pattern << FlagStr(flags) << ", match: " << text );
wxRegEx re(pattern, compileFlags);
- REQUIRE( re.IsValid() );
-
- bool ok = re.Matches(text, matchFlags);
-
- if (expected) {
- REQUIRE( ok );
-
- wxStringTokenizer tkz(wxString(expected, *wxConvCurrent),
- wxT("\t"), wxTOKEN_RET_EMPTY);
- size_t i;
-
- for (i = 0; i < re.GetMatchCount() && tkz.HasMoreTokens(); i++) {
- INFO( "Match #" << i );
- CHECK( re.GetMatch(text, i) == tkz.GetNextToken() );
- }
-
- if ((flags & wxRE_NOSUB) == 0)
- CHECK(re.GetMatchCount() == i);
+ if ( !re.IsValid() )
+ {
+ FAIL("Regex compilation failed");
+ return;
}
- else {
- CHECK( !ok );
+
+ if ( !re.Matches(text, matchFlags) )
+ {
+ CHECK( !expected );
+ return;
}
+
+ CHECK( expected );
+ if ( !expected )
+ return;
+
+ wxStringTokenizer tkz(wxString(expected, *wxConvCurrent),
+ wxT("\t"), wxTOKEN_RET_EMPTY);
+ size_t i;
+
+ for (i = 0; i < re.GetMatchCount() && tkz.HasMoreTokens(); i++) {
+ INFO( "Match #" << i );
+ CHECK( re.GetMatch(text, i) == tkz.GetNextToken() );
+ }
+
+ if ((flags & wxRE_NOSUB) == 0)
+ CHECK(re.GetMatchCount() == i);
}
TEST_CASE("wxRegEx::Match", "[regex][match]")
@@ -165,4 +170,18 @@ TEST_CASE("wxRegEx::QuoteMeta", "[regex][meta]")
CHECK( wxRegEx::QuoteMeta(":foo.*bar") == ":foo\\.\\*bar" );
}
+TEST_CASE("wxRegEx::ConvertFromBasic", "[regex][basic]")
+{
+ CHECK( wxRegEx::ConvertFromBasic("\\(a\\)b") == "(a)b" );
+ CHECK( wxRegEx::ConvertFromBasic("a\\{0,1\\}b") == "a{0,1}b" );
+ CHECK( wxRegEx::ConvertFromBasic("*") == "\\*" );
+ CHECK( wxRegEx::ConvertFromBasic("**") == "\\**" );
+ CHECK( wxRegEx::ConvertFromBasic("^*") == "^\\*" );
+ CHECK( wxRegEx::ConvertFromBasic("^^") == "^\\^" );
+ CHECK( wxRegEx::ConvertFromBasic("x$y") == "x\\$y" );
+ CHECK( wxRegEx::ConvertFromBasic("$$") == "\\$$" );
+ CHECK( wxRegEx::ConvertFromBasic("\\(x$\\)") == "(x$)" );
+ CHECK( wxRegEx::ConvertFromBasic("[^$\\)]") == "[^$\\)]" );
+}
+
#endif // wxUSE_REGEX