diff --git a/include/wx/regex.h b/include/wx/regex.h index ac70ff3907..dc3b34c4a5 100644 --- a/include/wx/regex.h +++ b/include/wx/regex.h @@ -144,6 +144,9 @@ public: static wxString QuoteMeta(const wxString& str); + // return the extended RE corresponding to the given basic RE + static wxString ConvertFromBasic(const wxString& bre); + // dtor not virtual, don't derive from this class ~wxRegEx(); diff --git a/interface/wx/regex.h b/interface/wx/regex.h index 0a6f24558c..cc3dc92acf 100644 --- a/interface/wx/regex.h +++ b/interface/wx/regex.h @@ -276,5 +276,15 @@ public: @since 3.1.3 */ static wxString QuoteMeta(const wxString& str); + + /** + Converts a basic regular expression to an extended regex syntax. + + This function can be used to convert @a bre using deprecated wxRE_BASIC + syntax to default (extended) syntax. + + @since 3.1.6 + */ + static wxString ConvertFromBasic(const wxString& bre); }; diff --git a/misc/suppressions/codespell-lines b/misc/suppressions/codespell-lines index 44401db6a6..3e1c2e8461 100644 --- a/misc/suppressions/codespell-lines +++ b/misc/suppressions/codespell-lines @@ -24,3 +24,6 @@ expressions (BRE). EREs are roughly those of the traditional @e egrep, // 2019), i.e. SEH translator seems to work just fine without /EHa too, so // Purpose: helpers for the structured exception handling (SEH) under Win32 * MinGW-w64 versions 7.3 and 8.1 (32-bit binaries use SJLJ exceptions, 64-bit ones use SEH, and all binaries use Win32 threads). + static wxString ConvertFromBasic(const wxString& bre); + This function can be used to convert @a bre using deprecated wxRE_BASIC + static wxString ConvertFromBasic(const wxString& bre); diff --git a/src/common/regex.cpp b/src/common/regex.cpp index c0cf525976..2b784a7314 100644 --- a/src/common/regex.cpp +++ b/src/common/regex.cpp @@ -41,31 +41,27 @@ // WXREGEX_USING_BUILTIN defined when using the built-in regex lib // WXREGEX_USING_RE_SEARCH defined when using re_search in the GNU regex lib -// WXREGEX_IF_NEED_LEN() wrap the len parameter only used with the built-in -// or GNU regex // WXREGEX_CONVERT_TO_MB defined when the regex lib is using chars and -// wxChar is wide, so conversion must be done -// WXREGEX_CHAR(x) Convert wxChar to wxRegChar +// wxChar is wide, so conversion to UTF-8 must be done // #ifdef __REG_NOFRONT # define WXREGEX_USING_BUILTIN -# define WXREGEX_IF_NEED_LEN(x) ,x -# if wxUSE_UNICODE -# define WXREGEX_CHAR(x) (x).wc_str() -# else -# define WXREGEX_CHAR(x) (x).mb_str() -# endif #else # ifdef HAVE_RE_SEARCH -# define WXREGEX_IF_NEED_LEN(x) ,x # define WXREGEX_USING_RE_SEARCH # else -# define WXREGEX_IF_NEED_LEN(x) + // We can't use length, so just drop it in this wrapper. + inline int + wx_regexec(const regex_t* preg, const char* string, size_t, + size_t nmatch, regmatch_t* pmatch, int eflags) + { + return regexec(preg, string, nmatch, pmatch, eflags); + } # endif # if wxUSE_UNICODE # define WXREGEX_CONVERT_TO_MB # endif -# define WXREGEX_CHAR(x) (x).mb_str() +# define wx_regcomp regcomp # define wx_regfree regfree # define wx_regerror regerror #endif @@ -157,8 +153,7 @@ public: // RE operations bool Compile(const wxString& expr, int flags = 0); - bool Matches(const wxRegChar *str, int flags - WXREGEX_IF_NEED_LEN(size_t len)) const; + bool Matches(const wxRegChar *str, int flags, size_t len) const; bool GetMatch(size_t *start, size_t *len, size_t index = 0) const; size_t GetMatchCount() const; int Replace(wxString *pattern, const wxString& replacement, @@ -166,7 +161,7 @@ public: private: // return the string containing the error message for the given err code - wxString GetErrorMsg(int errorcode, bool badconv) const; + wxString GetErrorMsg(int errorcode) const; // init the members void Init() @@ -224,33 +219,22 @@ wxRegExImpl::~wxRegExImpl() Free(); } -wxString wxRegExImpl::GetErrorMsg(int errorcode, bool badconv) const +wxString wxRegExImpl::GetErrorMsg(int errorcode) const { -#ifdef WXREGEX_CONVERT_TO_MB - // currently only needed when using system library in Unicode mode - if ( badconv ) - { - return _("conversion to 8-bit encoding failed"); - } -#else - // 'use' badconv to avoid a compiler warning - (void)badconv; -#endif - wxString szError; // first get the string length needed int len = wx_regerror(errorcode, &m_RegEx, NULL, 0); if ( len > 0 ) { - char* szcmbError = new char[++len]; + wxCharBuffer errbuf(len); - (void)wx_regerror(errorcode, &m_RegEx, szcmbError, len); + (void)wx_regerror(errorcode, &m_RegEx, errbuf.data(), errbuf.length()); - szError = wxConvLibc.cMB2WX(szcmbError); - delete [] szcmbError; + szError = wxConvLibc.cMB2WX(errbuf); } - else // regerror() returned 0 + + if ( szError.empty() ) // regerror() returned 0 or conversion failed { szError = _("unknown error"); } @@ -258,6 +242,281 @@ wxString wxRegExImpl::GetErrorMsg(int errorcode, bool badconv) const return szError; } +// Helper function for processing bracket expressions inside a regex. +// +// Advance the iterator until the closing bracket matching the opening one the +// iterator currently points to, i.e.: +// +// Precondition: *it == '[' +// Postcondition: *it == ']' or it == end if failed to find matching ']' +static +wxString::const_iterator +SkipBracketExpression(wxString::const_iterator it, wxString::const_iterator end) +{ + wxASSERT_MSG( *it == '[', "must be at the start of bracket expression" ); + + // Initial ']', possibly after the preceding '^', is different because it + // stands for a literal ']' and not the end of the bracket expression, so + // check for it first. + ++it; + if ( it != end && *it == '^' ) + ++it; + if ( it != end && *it == ']' ) + ++it; + + // Any ']' from now on ends the bracket expression. + for ( ; it != end; ++it ) + { + const wxUniChar c = *it; + + if ( c == ']' ) + break; + + if ( c == '[' ) + { + // Bare '[' on its own is not special, but collating elements and + // character classes are, so check for them and advance past them + // if necessary to avoid misinterpreting the matching closing ']'. + if ( ++it == end ) + break; + + const wxUniChar c = *it; + if ( c == ':' || c == '.' || c == '=' ) + { + for ( ++it; it != end; ++it ) + { + if ( *it == c ) + { + if ( ++it == end ) + break; + + if ( *it == ']' ) + break; + } + } + + if ( it == end ) + break; + } + } + } + + return it; +} + +/* static */ +wxString wxRegEx::ConvertFromBasic(const wxString& bre) +{ + /* + Quoting regex(7): + + Obsolete ("basic") regular expressions differ in several respects. + '|', '+', and '?' are ordinary characters and there is no equivalent + for their functionality. The delimiters for bounds are "\{" and "\}", + with '{' and '}' by themselves ordinary characters. The parentheses + for nested subexpressions are "\(" and "\)", with '(' and ')' by + themselves ordinary characters. '^' is an ordinary character except at + the beginning of the RE or(!) the beginning of a parenthesized + subexpression, '$' is an ordinary character except at the end of the RE + or(!) the end of a parenthesized subexpression, and '*' is an ordinary + character if it appears at the beginning of the RE or the beginning of + a parenthesized subexpression (after a possible leading '^'). + + Finally, there is one new type of atom, a back reference: '\' followed + by a nonzero decimal digit d matches the same sequence of characters + matched by the dth parenthesized subexpression [...] + */ + wxString ere; + ere.reserve(bre.length()); + + enum SinceStart + { + SinceStart_None, // Just at the beginning. + SinceStart_OnlyCaret, // Had just "^" since the beginning. + SinceStart_Some // Had something else since the beginning. + }; + + struct State + { + explicit State(SinceStart sinceStart_) + { + isBackslash = false; + sinceStart = sinceStart_; + } + + bool isBackslash; + SinceStart sinceStart; + }; + + State previous(SinceStart_None); + for ( wxString::const_iterator it = bre.begin(), + end = bre.end(); + it != end; + ++it ) + { + const wxUniChar c = *it; + + // What should be done with the current character? + enum Disposition + { + Disposition_Skip, // Nothing. + Disposition_Append, // Append to output. + Disposition_Escape // ... after escaping it with backslash. + } disposition = Disposition_Append; + + State current(SinceStart_Some); + + if ( previous.isBackslash ) + { + // By default, keep the backslash present in the BRE, it's still + // needed in the ERE too. + disposition = Disposition_Escape; + + switch ( c.GetValue() ) + { + case '(': + // It's the start of a new subexpression. + current.sinceStart = SinceStart_None; + wxFALLTHROUGH; + + case ')': + case '{': + case '}': + // Do not escape to ensure they remain special in the ERE + // as the escaped versions were special in the BRE. + disposition = Disposition_Append; + break; + } + } + else // This character is not escaped. + { + switch ( c.GetValue() ) + { + case '\\': + current.isBackslash = true; + + // Don't do anything with it yet, we'll deal with it later. + disposition = Disposition_Skip; + break; + + case '^': + // Escape unless it appears at the start. + switch ( previous.sinceStart ) + { + case SinceStart_None: + // Don't escape, but do update the state. + current.sinceStart = SinceStart_OnlyCaret; + break; + + case SinceStart_OnlyCaret: + case SinceStart_Some: + disposition = Disposition_Escape; + break; + } + break; + + case '*': + // Escape unless it appears at the start or right after "^". + switch ( previous.sinceStart ) + { + case SinceStart_None: + case SinceStart_OnlyCaret: + disposition = Disposition_Escape; + break; + + case SinceStart_Some: + break; + } + break; + + case '$': + // Escape unless it appears at the end or just before "\)". + disposition = Disposition_Escape; + { + wxString::const_iterator next = it; + ++next; + if ( next == end ) + { + // It is at the end, so has special meaning. + disposition = Disposition_Append; + } + else // Not at the end, but maybe at subexpression end? + { + if ( *next == '\\' ) + { + ++next; + if ( next != end && *next == ')' ) + disposition = Disposition_Append; + } + } + } + break; + + case '|': + case '+': + case '?': + case '(': + case ')': + case '{': + case '}': + // Escape these characters which are not special in a BRE, + // but would be special in a ERE if left unescaped. + disposition = Disposition_Escape; + break; + + case '[': + // Rules are very different for the characters inside the + // bracket expressions and we don't have to change anything + // for them as the syntax is the same for BREs and EREs, so + // just process the entire expression at once. + { + const wxString::const_iterator start = it; + it = SkipBracketExpression(it, end); + + // Copy everything inside without any changes. + ere += wxString(start, it); + + if ( it == end ) + { + // If we reached the end without finding the + // matching ']' there is nothing remaining anyhow. + return ere; + } + + // Note that default Disposition_Append here is fine, + // we'll append the closing ']' to "ere" below. + } + break; + } + } + + switch ( disposition ) + { + case Disposition_Skip: + break; + + case Disposition_Escape: + ere += '\\'; + wxFALLTHROUGH; + + case Disposition_Append: + // Note: don't use "c" here, iterator may have been advanced + // inside the loop. + ere += *it; + break; + } + + previous = current; + } + + // It's an error if a RE ends with a backslash, but we still need to + // preserve this error in the resulting RE. + if ( previous.isBackslash ) + ere += '\\'; + + return ere; +} + bool wxRegExImpl::Compile(const wxString& expr, int flags) { Reinit(); @@ -290,22 +549,24 @@ bool wxRegExImpl::Compile(const wxString& expr, int flags) if ( flags & wxRE_NEWLINE ) flagsRE |= REG_NEWLINE; +#ifndef WXREGEX_CONVERT_TO_MB + const wxChar *exprstr = expr.wx_str(); +#else + const wxScopedCharBuffer exprbuf = expr.utf8_str(); + const char* const exprstr = exprbuf.data(); +#endif + // compile it #ifdef WXREGEX_USING_BUILTIN - bool conv = true; - // FIXME-UTF8: use wc_str() after removing ANSI build - int errorcode = wx_re_comp(&m_RegEx, expr.c_str(), expr.length(), flagsRE); + int errorcode = wx_re_comp(&m_RegEx, exprstr, expr.length(), flagsRE); #else - // FIXME-UTF8: this is potentially broken, we shouldn't even try it - // and should always use builtin regex library (or PCRE?) - const wxWX2MBbuf conv = expr.mbc_str(); - int errorcode = conv ? regcomp(&m_RegEx, conv, flagsRE) : REG_BADPAT; + int errorcode = wx_regcomp(&m_RegEx, exprstr, flagsRE); #endif if ( errorcode ) { wxLogError(_("Invalid regular expression '%s': %s"), - expr.c_str(), GetErrorMsg(errorcode, !conv).c_str()); + expr, GetErrorMsg(errorcode)); m_isCompiled = false; } @@ -384,8 +645,8 @@ static int ReSearch(const regex_t *preg, #endif // WXREGEX_USING_RE_SEARCH bool wxRegExImpl::Matches(const wxRegChar *str, - int flags - WXREGEX_IF_NEED_LEN(size_t len)) const + int flags, + size_t len) const { wxCHECK_MSG( IsValid(), false, wxT("must successfully Compile() first") ); @@ -412,9 +673,9 @@ bool wxRegExImpl::Matches(const wxRegChar *str, #if defined WXREGEX_USING_BUILTIN int rc = wx_re_exec(&self->m_RegEx, str, len, NULL, m_nMatches, matches, flagsRE); #elif defined WXREGEX_USING_RE_SEARCH - int rc = str ? ReSearch(&self->m_RegEx, str, len, matches, flagsRE) : REG_BADPAT; + int rc = ReSearch(&self->m_RegEx, str, len, matches, flagsRE); #else - int rc = str ? regexec(&self->m_RegEx, str, m_nMatches, matches, flagsRE) : REG_BADPAT; + int rc = wx_regexec(&self->m_RegEx, str, len, m_nMatches, matches, flagsRE); #endif switch ( rc ) @@ -426,7 +687,7 @@ bool wxRegExImpl::Matches(const wxRegChar *str, default: // an error occurred wxLogError(_("Failed to find match for regular expression: %s"), - GetErrorMsg(rc, !str).c_str()); + GetErrorMsg(rc)); wxFALLTHROUGH; case REG_NOMATCH: @@ -470,15 +731,9 @@ int wxRegExImpl::Replace(wxString *text, const wxChar *textstr = text->c_str(); size_t textlen = text->length(); #else - const wxWX2MBbuf textstr = WXREGEX_CHAR(*text); - if (!textstr) - { - wxLogError(_("Failed to find match for regular expression: %s"), - GetErrorMsg(0, true).c_str()); - return 0; - } - size_t textlen = strlen(textstr); - text->clear(); + const wxScopedCharBuffer textbuf = text->utf8_str(); + const char* const textstr = textbuf.data(); + size_t textlen = textbuf.length(); #endif // the replacement text @@ -508,14 +763,9 @@ int wxRegExImpl::Replace(wxString *text, // note that "^" shouldn't match after the first call to Matches() so we // use wxRE_NOTBOL to prevent it from happening while ( (!maxMatches || countRepl < maxMatches) && - Matches( -#ifndef WXREGEX_CONVERT_TO_MB - textstr + matchStart, -#else - textstr.data() + matchStart, -#endif - countRepl ? wxRE_NOTBOL : 0 - WXREGEX_IF_NEED_LEN(textlen - matchStart)) ) + Matches(textstr + matchStart, + countRepl ? wxRE_NOTBOL : 0, + textlen - matchStart) ) { // the string possibly contains back references: we need to calculate // the replacement text anew after each match @@ -559,14 +809,8 @@ int wxRegExImpl::Replace(wxString *text, } else { - textNew += wxString( -#ifndef WXREGEX_CONVERT_TO_MB - textstr -#else - textstr.data() -#endif - + matchStart + start, - *wxConvCurrent, len); + textNew += wxString(textstr + matchStart + start, + wxConvUTF8, len); mayHaveBackrefs = true; } @@ -592,11 +836,7 @@ int wxRegExImpl::Replace(wxString *text, if (result.capacity() < result.length() + start + textNew.length()) result.reserve(2 * result.length()); -#ifndef WXREGEX_CONVERT_TO_MB - result.append(*text, matchStart, start); -#else - result.append(wxString(textstr.data() + matchStart, *wxConvCurrent, start)); -#endif + result.append(wxString(textstr + matchStart, wxConvUTF8, start)); matchStart += start; result.append(textNew); @@ -605,11 +845,7 @@ int wxRegExImpl::Replace(wxString *text, matchStart += len; } -#ifndef WXREGEX_CONVERT_TO_MB - result.append(*text, matchStart, wxString::npos); -#else - result.append(wxString(textstr.data() + matchStart, *wxConvCurrent)); -#endif + result.append(wxString(textstr + matchStart, wxConvUTF8)); *text = result; return countRepl; @@ -651,8 +887,15 @@ bool wxRegEx::Matches(const wxString& str, int flags) const { wxCHECK_MSG( IsValid(), false, wxT("must successfully Compile() first") ); - return m_impl->Matches(WXREGEX_CHAR(str), flags - WXREGEX_IF_NEED_LEN(str.length())); +#ifndef WXREGEX_CONVERT_TO_MB + const wxChar* const textstr = str.c_str(); + const size_t textlen = str.length(); +#else + const wxScopedCharBuffer textstr = str.utf8_str(); + const size_t textlen = textstr.length(); +#endif + + return m_impl->Matches(textstr, flags, textlen); } bool wxRegEx::GetMatch(size_t *start, size_t *len, size_t index) const @@ -668,7 +911,11 @@ wxString wxRegEx::GetMatch(const wxString& text, size_t index) const if ( !GetMatch(&start, &len, index) ) return wxEmptyString; +#ifndef WXREGEX_CONVERT_TO_MB return text.Mid(start, len); +#else + return wxString::FromUTF8(text.utf8_str().data() + start, len); +#endif } size_t wxRegEx::GetMatchCount() const diff --git a/tests/benchmarks/Makefile.in b/tests/benchmarks/Makefile.in index 1945ccfe18..e7842cc239 100644 --- a/tests/benchmarks/Makefile.in +++ b/tests/benchmarks/Makefile.in @@ -57,6 +57,7 @@ BENCH_OBJECTS = \ bench_ipcclient.o \ bench_log.o \ bench_mbconv.o \ + bench_regex.o \ bench_strings.o \ bench_tls.o \ bench_printfbench.o @@ -299,6 +300,9 @@ bench_log.o: $(srcdir)/log.cpp bench_mbconv.o: $(srcdir)/mbconv.cpp $(CXXC) -c -o $@ $(BENCH_CXXFLAGS) $(srcdir)/mbconv.cpp +bench_regex.o: $(srcdir)/regex.cpp + $(CXXC) -c -o $@ $(BENCH_CXXFLAGS) $(srcdir)/regex.cpp + bench_strings.o: $(srcdir)/strings.cpp $(CXXC) -c -o $@ $(BENCH_CXXFLAGS) $(srcdir)/strings.cpp diff --git a/tests/benchmarks/bench.bkl b/tests/benchmarks/bench.bkl index f3efa94295..ed17be45fc 100644 --- a/tests/benchmarks/bench.bkl +++ b/tests/benchmarks/bench.bkl @@ -16,6 +16,7 @@ ipcclient.cpp log.cpp mbconv.cpp + regex.cpp strings.cpp tls.cpp printfbench.cpp diff --git a/tests/benchmarks/bench_vc8_bench.vcproj b/tests/benchmarks/bench_vc8_bench.vcproj index 59ea520090..80b87940c3 100644 --- a/tests/benchmarks/bench_vc8_bench.vcproj +++ b/tests/benchmarks/bench_vc8_bench.vcproj @@ -838,6 +838,10 @@ RelativePath=".\printfbench.cpp" > + + diff --git a/tests/benchmarks/bench_vc9_bench.vcproj b/tests/benchmarks/bench_vc9_bench.vcproj index 04a316aada..049dd37fb9 100644 --- a/tests/benchmarks/bench_vc9_bench.vcproj +++ b/tests/benchmarks/bench_vc9_bench.vcproj @@ -810,6 +810,10 @@ RelativePath=".\printfbench.cpp" > + + diff --git a/tests/benchmarks/makefile.gcc b/tests/benchmarks/makefile.gcc index 3bfb0a06cf..df40d939ce 100644 --- a/tests/benchmarks/makefile.gcc +++ b/tests/benchmarks/makefile.gcc @@ -36,6 +36,7 @@ BENCH_OBJECTS = \ $(OBJS)\bench_ipcclient.o \ $(OBJS)\bench_log.o \ $(OBJS)\bench_mbconv.o \ + $(OBJS)\bench_regex.o \ $(OBJS)\bench_strings.o \ $(OBJS)\bench_tls.o \ $(OBJS)\bench_printfbench.o @@ -310,6 +311,9 @@ $(OBJS)\bench_log.o: ./log.cpp $(OBJS)\bench_mbconv.o: ./mbconv.cpp $(CXX) -c -o $@ $(BENCH_CXXFLAGS) $(CPPDEPS) $< +$(OBJS)\bench_regex.o: ./regex.cpp + $(CXX) -c -o $@ $(BENCH_CXXFLAGS) $(CPPDEPS) $< + $(OBJS)\bench_strings.o: ./strings.cpp $(CXX) -c -o $@ $(BENCH_CXXFLAGS) $(CPPDEPS) $< diff --git a/tests/benchmarks/makefile.vc b/tests/benchmarks/makefile.vc index 948b2813d8..9b0a130818 100644 --- a/tests/benchmarks/makefile.vc +++ b/tests/benchmarks/makefile.vc @@ -37,6 +37,7 @@ BENCH_OBJECTS = \ $(OBJS)\bench_ipcclient.obj \ $(OBJS)\bench_log.obj \ $(OBJS)\bench_mbconv.obj \ + $(OBJS)\bench_regex.obj \ $(OBJS)\bench_strings.obj \ $(OBJS)\bench_tls.obj \ $(OBJS)\bench_printfbench.obj @@ -698,6 +699,9 @@ $(OBJS)\bench_log.obj: .\log.cpp $(OBJS)\bench_mbconv.obj: .\mbconv.cpp $(CXX) /c /nologo /TP /Fo$@ $(BENCH_CXXFLAGS) .\mbconv.cpp +$(OBJS)\bench_regex.obj: .\regex.cpp + $(CXX) /c /nologo /TP /Fo$@ $(BENCH_CXXFLAGS) .\regex.cpp + $(OBJS)\bench_strings.obj: .\strings.cpp $(CXX) /c /nologo /TP /Fo$@ $(BENCH_CXXFLAGS) .\strings.cpp diff --git a/tests/benchmarks/regex.cpp b/tests/benchmarks/regex.cpp new file mode 100644 index 0000000000..c2ca70fa98 --- /dev/null +++ b/tests/benchmarks/regex.cpp @@ -0,0 +1,74 @@ +///////////////////////////////////////////////////////////////////////////// +// Name: tests/benchmarks/regex.cpp +// Purpose: wxRegEx benchmarks +// Author: Vadim Zeitlin +// Created: 2018-11-15 +// Copyright: (c) 2018 Vadim Zeitlin +// Licence: wxWindows licence +///////////////////////////////////////////////////////////////////////////// + +#include "wx/ffile.h" +#include "wx/regex.h" + +#include "bench.h" + +// ---------------------------------------------------------------------------- +// Benchmark relative costs of compiling and matching for a simple regex +// ---------------------------------------------------------------------------- + +static const char* const RE_SIMPLE = "."; + +BENCHMARK_FUNC(RECompile) +{ + return wxRegEx(RE_SIMPLE).IsValid(); +} + +BENCHMARK_FUNC(REMatch) +{ + static wxRegEx re(RE_SIMPLE); + return re.Matches("foo"); +} + +BENCHMARK_FUNC(RECompileAndMatch) +{ + return wxRegEx(RE_SIMPLE).Matches("foo"); +} + +// ---------------------------------------------------------------------------- +// Benchmark the cost of using a more complicated regex +// ---------------------------------------------------------------------------- + +namespace +{ + +// Use the contents of an already existing test file. +const wxString& GetTestText() +{ + static wxString text; + if ( text.empty() ) + { + wxFFile("htmltest.html").ReadAll(&text); + } + + return text; +} + +} // anonymous namespace + +BENCHMARK_FUNC(REFindTD) +{ + // This is too simplistic, but good enough for benchmarking. + static wxRegEx re("[^<]*", wxRE_ICASE | wxRE_NEWLINE); + + int matches = 0; + for ( const wxChar* p = GetTestText().c_str(); re.Matches(p); ++matches ) + { + size_t start, len; + if ( !re.GetMatch(&start, &len) ) + return false; + + p += start + len; + } + + return matches == 21; // result of "grep -c" +} diff --git a/tests/regex/regextest.cpp b/tests/regex/regextest.cpp index 321fd29c85..4902c852d1 100644 --- a/tests/regex/regextest.cpp +++ b/tests/regex/regextest.cpp @@ -234,12 +234,18 @@ void RegExTestCase::doTest(int flavor) // 'e' - test that the pattern fails to compile if (m_mode == 'e') { CHECK( !re.IsValid() ); - } else { - CHECK( re.IsValid() ); - } - if (!re.IsValid()) + // Never continue with this kind of test. return; + } else { + // Note: we don't use REQUIRE here because this would abort the entire + // test case on error instead of skipping just the rest of this regex + // test. + CHECK( re.IsValid() ); + + if (!re.IsValid()) + return; + } bool matches = re.Matches(m_data, m_matchFlags); diff --git a/tests/regex/wxregextest.cpp b/tests/regex/wxregextest.cpp index 50a58c7d14..3d9780d12d 100644 --- a/tests/regex/wxregextest.cpp +++ b/tests/regex/wxregextest.cpp @@ -79,28 +79,33 @@ CheckMatch(const char* pattern, INFO( "Pattern: " << pattern << FlagStr(flags) << ", match: " << text ); wxRegEx re(pattern, compileFlags); - REQUIRE( re.IsValid() ); - - bool ok = re.Matches(text, matchFlags); - - if (expected) { - REQUIRE( ok ); - - wxStringTokenizer tkz(wxString(expected, *wxConvCurrent), - wxT("\t"), wxTOKEN_RET_EMPTY); - size_t i; - - for (i = 0; i < re.GetMatchCount() && tkz.HasMoreTokens(); i++) { - INFO( "Match #" << i ); - CHECK( re.GetMatch(text, i) == tkz.GetNextToken() ); - } - - if ((flags & wxRE_NOSUB) == 0) - CHECK(re.GetMatchCount() == i); + if ( !re.IsValid() ) + { + FAIL("Regex compilation failed"); + return; } - else { - CHECK( !ok ); + + if ( !re.Matches(text, matchFlags) ) + { + CHECK( !expected ); + return; } + + CHECK( expected ); + if ( !expected ) + return; + + wxStringTokenizer tkz(wxString(expected, *wxConvCurrent), + wxT("\t"), wxTOKEN_RET_EMPTY); + size_t i; + + for (i = 0; i < re.GetMatchCount() && tkz.HasMoreTokens(); i++) { + INFO( "Match #" << i ); + CHECK( re.GetMatch(text, i) == tkz.GetNextToken() ); + } + + if ((flags & wxRE_NOSUB) == 0) + CHECK(re.GetMatchCount() == i); } TEST_CASE("wxRegEx::Match", "[regex][match]") @@ -165,4 +170,18 @@ TEST_CASE("wxRegEx::QuoteMeta", "[regex][meta]") CHECK( wxRegEx::QuoteMeta(":foo.*bar") == ":foo\\.\\*bar" ); } +TEST_CASE("wxRegEx::ConvertFromBasic", "[regex][basic]") +{ + CHECK( wxRegEx::ConvertFromBasic("\\(a\\)b") == "(a)b" ); + CHECK( wxRegEx::ConvertFromBasic("a\\{0,1\\}b") == "a{0,1}b" ); + CHECK( wxRegEx::ConvertFromBasic("*") == "\\*" ); + CHECK( wxRegEx::ConvertFromBasic("**") == "\\**" ); + CHECK( wxRegEx::ConvertFromBasic("^*") == "^\\*" ); + CHECK( wxRegEx::ConvertFromBasic("^^") == "^\\^" ); + CHECK( wxRegEx::ConvertFromBasic("x$y") == "x\\$y" ); + CHECK( wxRegEx::ConvertFromBasic("$$") == "\\$$" ); + CHECK( wxRegEx::ConvertFromBasic("\\(x$\\)") == "(x$)" ); + CHECK( wxRegEx::ConvertFromBasic("[^$\\)]") == "[^$\\)]" ); +} + #endif // wxUSE_REGEX