added regex test suite
git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@26104 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
@@ -37,7 +37,8 @@ TEST_CXXFLAGS = -D__WX$(TOOLKIT)__ $(__WXUNIV_DEFINE_p) -I$(srcdir) \
|
|||||||
$(__DLLFLAG_p) $(CPPUNIT_CFLAGS) $(CPPFLAGS) $(CXXFLAGS)
|
$(__DLLFLAG_p) $(CPPUNIT_CFLAGS) $(CPPFLAGS) $(CXXFLAGS)
|
||||||
TEST_OBJECTS = \
|
TEST_OBJECTS = \
|
||||||
test_test.o \
|
test_test.o \
|
||||||
test_main.o
|
test_main.o \
|
||||||
|
test_regex.o
|
||||||
|
|
||||||
### Conditionally set variables: ###
|
### Conditionally set variables: ###
|
||||||
|
|
||||||
@@ -106,6 +107,9 @@ test_test.o: $(srcdir)/test.cpp
|
|||||||
test_main.o: $(srcdir)/mbconv/main.cpp
|
test_main.o: $(srcdir)/mbconv/main.cpp
|
||||||
$(CXXC) -c -o $@ $(TEST_CXXFLAGS) $<
|
$(CXXC) -c -o $@ $(TEST_CXXFLAGS) $<
|
||||||
|
|
||||||
|
test_regex.o: $(srcdir)/regex/regex.cpp
|
||||||
|
$(CXXC) -c -o $@ $(TEST_CXXFLAGS) $<
|
||||||
|
|
||||||
|
|
||||||
# Include dependency info, if present:
|
# Include dependency info, if present:
|
||||||
@IF_GNU_MAKE@-include .deps/*.d
|
@IF_GNU_MAKE@-include .deps/*.d
|
||||||
|
@@ -31,7 +31,8 @@ TEST_CXXFLAGS = $(__RUNTIME_LIBS_6) -I$(BCCDIR)\include $(__DEBUGINFO) \
|
|||||||
$(__DLLFLAG_p) $(CPPUNIT_CFLAGS) $(CPPFLAGS) $(CXXFLAGS)
|
$(__DLLFLAG_p) $(CPPUNIT_CFLAGS) $(CPPFLAGS) $(CXXFLAGS)
|
||||||
TEST_OBJECTS = \
|
TEST_OBJECTS = \
|
||||||
$(OBJS)\test_test.obj \
|
$(OBJS)\test_test.obj \
|
||||||
$(OBJS)\test_main.obj
|
$(OBJS)\test_main.obj \
|
||||||
|
$(OBJS)\test_regex.obj
|
||||||
|
|
||||||
### Conditionally set variables: ###
|
### Conditionally set variables: ###
|
||||||
|
|
||||||
@@ -156,3 +157,6 @@ $(OBJS)\test_test.obj: .\test.cpp
|
|||||||
|
|
||||||
$(OBJS)\test_main.obj: .\mbconv\main.cpp
|
$(OBJS)\test_main.obj: .\mbconv\main.cpp
|
||||||
$(CXX) -q -c -P -o$@ $(TEST_CXXFLAGS) $**
|
$(CXX) -q -c -P -o$@ $(TEST_CXXFLAGS) $**
|
||||||
|
|
||||||
|
$(OBJS)\test_regex.obj: .\regex\regex.cpp
|
||||||
|
$(CXX) -q -c -P -o$@ $(TEST_CXXFLAGS) $**
|
||||||
|
@@ -22,7 +22,8 @@ TEST_CXXFLAGS = $(__DEBUGINFO) $(__OPTIMIZEFLAG_2) $(GCCFLAGS) -DHAVE_W32API_H \
|
|||||||
$(__EXCEPTIONSFLAG_4) $(__EXCEPTIONSFLAG_5) $(CPPFLAGS) $(CXXFLAGS)
|
$(__EXCEPTIONSFLAG_4) $(__EXCEPTIONSFLAG_5) $(CPPFLAGS) $(CXXFLAGS)
|
||||||
TEST_OBJECTS = \
|
TEST_OBJECTS = \
|
||||||
$(OBJS)\test_test.o \
|
$(OBJS)\test_test.o \
|
||||||
$(OBJS)\test_main.o
|
$(OBJS)\test_main.o \
|
||||||
|
$(OBJS)\test_regex.o
|
||||||
|
|
||||||
### Conditionally set variables: ###
|
### Conditionally set variables: ###
|
||||||
|
|
||||||
@@ -151,4 +152,7 @@ $(OBJS)\test_test.o: ./test.cpp
|
|||||||
$(OBJS)\test_main.o: ./mbconv/main.cpp
|
$(OBJS)\test_main.o: ./mbconv/main.cpp
|
||||||
$(CXX) -c -o $@ $(TEST_CXXFLAGS) $<
|
$(CXX) -c -o $@ $(TEST_CXXFLAGS) $<
|
||||||
|
|
||||||
|
$(OBJS)\test_regex.o: ./regex/regex.cpp
|
||||||
|
$(CXX) -c -o $@ $(TEST_CXXFLAGS) $<
|
||||||
|
|
||||||
.PHONY: all clean
|
.PHONY: all clean
|
||||||
|
@@ -24,7 +24,8 @@ TEST_CXXFLAGS = /M$(__RUNTIME_LIBS_7)$(__DEBUGRUNTIME_3) /DWIN32 \
|
|||||||
$(__EXCEPTIONSFLAG_9) $(CPPFLAGS) $(CXXFLAGS)
|
$(__EXCEPTIONSFLAG_9) $(CPPFLAGS) $(CXXFLAGS)
|
||||||
TEST_OBJECTS = \
|
TEST_OBJECTS = \
|
||||||
$(OBJS)\test_test.obj \
|
$(OBJS)\test_test.obj \
|
||||||
$(OBJS)\test_main.obj
|
$(OBJS)\test_main.obj \
|
||||||
|
$(OBJS)\test_regex.obj
|
||||||
|
|
||||||
### Conditionally set variables: ###
|
### Conditionally set variables: ###
|
||||||
|
|
||||||
@@ -212,3 +213,6 @@ $(OBJS)\test_test.obj: .\test.cpp
|
|||||||
|
|
||||||
$(OBJS)\test_main.obj: .\mbconv\main.cpp
|
$(OBJS)\test_main.obj: .\mbconv\main.cpp
|
||||||
$(CXX) /c /nologo /TP /Fo$@ $(TEST_CXXFLAGS) $**
|
$(CXX) /c /nologo /TP /Fo$@ $(TEST_CXXFLAGS) $**
|
||||||
|
|
||||||
|
$(OBJS)\test_regex.obj: .\regex\regex.cpp
|
||||||
|
$(CXX) /c /nologo /TP /Fo$@ $(TEST_CXXFLAGS) $**
|
||||||
|
@@ -172,7 +172,8 @@ TEST_CXXFLAGS = $(__DEBUGINFO_0) $(__OPTIMIZEFLAG_2) -bm $(__RUNTIME_LIBS_5) &
|
|||||||
$(__EXCEPTIONSFLAG_7) $(CPPFLAGS) $(CXXFLAGS)
|
$(__EXCEPTIONSFLAG_7) $(CPPFLAGS) $(CXXFLAGS)
|
||||||
TEST_OBJECTS = &
|
TEST_OBJECTS = &
|
||||||
$(OBJS)\test_test.obj &
|
$(OBJS)\test_test.obj &
|
||||||
$(OBJS)\test_main.obj
|
$(OBJS)\test_main.obj &
|
||||||
|
$(OBJS)\test_regex.obj
|
||||||
|
|
||||||
|
|
||||||
all : $(OBJS)
|
all : $(OBJS)
|
||||||
@@ -206,3 +207,6 @@ $(OBJS)\test_test.obj : .AUTODEPEND .\test.cpp
|
|||||||
|
|
||||||
$(OBJS)\test_main.obj : .AUTODEPEND .\mbconv\main.cpp
|
$(OBJS)\test_main.obj : .AUTODEPEND .\mbconv\main.cpp
|
||||||
$(CXX) -zq -fo=$^@ $(TEST_CXXFLAGS) $<
|
$(CXX) -zq -fo=$^@ $(TEST_CXXFLAGS) $<
|
||||||
|
|
||||||
|
$(OBJS)\test_regex.obj : .AUTODEPEND .\regex\regex.cpp
|
||||||
|
$(CXX) -zq -fo=$^@ $(TEST_CXXFLAGS) $<
|
||||||
|
1135
tests/regex/reg.test
Normal file
1135
tests/regex/reg.test
Normal file
File diff suppressed because it is too large
Load Diff
421
tests/regex/regex.cpp
Normal file
421
tests/regex/regex.cpp
Normal file
@@ -0,0 +1,421 @@
|
|||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Name: tests/regex/regex.cpp
|
||||||
|
// Purpose: Test the built-in regex lib and wxRegEx
|
||||||
|
// Author: Mike Wetherell
|
||||||
|
// RCS-ID: $Id$
|
||||||
|
// Copyright: (c) 2004 Mike Wetherell
|
||||||
|
// Licence: wxWidgets licence
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
//
|
||||||
|
// Notes:
|
||||||
|
//
|
||||||
|
// To run just one section, say wx_1, do this:
|
||||||
|
// test regex.wx_1
|
||||||
|
//
|
||||||
|
// To run all the regex tests:
|
||||||
|
// test regex
|
||||||
|
//
|
||||||
|
// Some tests must be skipped since they use features which we do not make
|
||||||
|
// available through wxRegEx. To see the list of tests that have been skipped
|
||||||
|
// turn on verbose logging, e.g.:
|
||||||
|
// test --verbose regex
|
||||||
|
//
|
||||||
|
// The tests here are for the builtin library, tests for wxRegEx in general
|
||||||
|
// should go in another module.
|
||||||
|
//
|
||||||
|
// The tests are generated from Henry Spencer's reg.test, additional test
|
||||||
|
// can be added in wxreg.test. These test files are then turned into a C++
|
||||||
|
// include file 'regex.inc' (included below) using a script 'regex.pl'.
|
||||||
|
//
|
||||||
|
|
||||||
|
#if defined(__GNUG__) && !defined(__APPLE__)
|
||||||
|
#pragma implementation
|
||||||
|
#pragma interface
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// For compilers that support precompilation, includes "wx/wx.h".
|
||||||
|
#include "wx/wxprec.h"
|
||||||
|
|
||||||
|
#ifdef __BORLANDC__
|
||||||
|
#pragma hdrstop
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// for all others, include the necessary headers
|
||||||
|
#ifndef WX_PRECOMP
|
||||||
|
#include "wx/wx.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "wx/regex.h"
|
||||||
|
#include "wx/cppunit.h"
|
||||||
|
#include <iomanip>
|
||||||
|
#include <stdexcept>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace CppUnit;
|
||||||
|
|
||||||
|
// many of the tests are specific to the builtin regex lib, so only attempts
|
||||||
|
// to do them when using the builtin regex lib.
|
||||||
|
//
|
||||||
|
#ifdef wxHAS_REGEX_ADVANCED
|
||||||
|
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// The test case - an instance represents a single test
|
||||||
|
|
||||||
|
class RegExTestCase : public TestCase
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
// constructor - create a single testcase
|
||||||
|
RegExTestCase(
|
||||||
|
const string& name,
|
||||||
|
const char *mode,
|
||||||
|
const char *id,
|
||||||
|
const char *flags,
|
||||||
|
const char *pattern,
|
||||||
|
const char *data,
|
||||||
|
const vector<const char *>& expected);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
// run this testcase
|
||||||
|
void runTest();
|
||||||
|
|
||||||
|
private:
|
||||||
|
// workers
|
||||||
|
wxString Conv(const char *str);
|
||||||
|
void parseFlags(const wxString& flags);
|
||||||
|
void doTest(int flavor);
|
||||||
|
static size_t matchCount(const wxString& expr, int flags);
|
||||||
|
static wxString quote(const wxString& arg);
|
||||||
|
const wxChar *convError() const { return _T("<cannot convert>"); }
|
||||||
|
|
||||||
|
// assertions - adds some information about the test that failed
|
||||||
|
void fail(const wxString& msg) const;
|
||||||
|
void failIf(bool condition, const wxString& msg) const
|
||||||
|
{ if (condition) fail(msg); }
|
||||||
|
|
||||||
|
// mode, id, flags, pattern, test data, expected results...
|
||||||
|
int m_mode;
|
||||||
|
wxString m_id;
|
||||||
|
wxString m_flags;
|
||||||
|
wxString m_pattern;
|
||||||
|
wxString m_data;
|
||||||
|
wxArrayString m_expected;
|
||||||
|
|
||||||
|
// the flag decoded
|
||||||
|
int m_compileFlags;
|
||||||
|
int m_matchFlags;
|
||||||
|
bool m_basic;
|
||||||
|
bool m_extended;
|
||||||
|
bool m_advanced;
|
||||||
|
};
|
||||||
|
|
||||||
|
// constructor - throws Exception on failure
|
||||||
|
//
|
||||||
|
RegExTestCase::RegExTestCase(
|
||||||
|
const string& name,
|
||||||
|
const char *mode,
|
||||||
|
const char *id,
|
||||||
|
const char *flags,
|
||||||
|
const char *pattern,
|
||||||
|
const char *data,
|
||||||
|
const vector<const char *>& expected)
|
||||||
|
:
|
||||||
|
TestCase(name),
|
||||||
|
m_mode(mode[0]),
|
||||||
|
m_id(Conv(id)),
|
||||||
|
m_flags(Conv(flags)),
|
||||||
|
m_pattern(Conv(pattern)),
|
||||||
|
m_data(Conv(data)),
|
||||||
|
m_compileFlags(0),
|
||||||
|
m_matchFlags(0),
|
||||||
|
m_basic(false),
|
||||||
|
m_extended(false),
|
||||||
|
m_advanced(false)
|
||||||
|
{
|
||||||
|
bool badconv = m_pattern == convError() || m_data == convError();
|
||||||
|
vector<const char *>::const_iterator it;
|
||||||
|
|
||||||
|
for (it = expected.begin(); it != expected.end(); ++it) {
|
||||||
|
m_expected.push_back(Conv(*it));
|
||||||
|
badconv = badconv || *m_expected.rbegin() == convError();
|
||||||
|
}
|
||||||
|
|
||||||
|
failIf(badconv, _T("cannot convert to default character encoding"));
|
||||||
|
|
||||||
|
// the flags need further parsing...
|
||||||
|
parseFlags(m_flags);
|
||||||
|
|
||||||
|
#ifndef wxHAS_REGEX_ADVANCED
|
||||||
|
failIf(!m_basic && !m_extended, _T("advanced regexs not available"));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// convert a string from UTF8 to the internal encoding
|
||||||
|
//
|
||||||
|
wxString RegExTestCase::Conv(const char *str)
|
||||||
|
{
|
||||||
|
const wxWCharBuffer wstr = wxConvUTF8.cMB2WC(str);
|
||||||
|
const wxWC2WXbuf buf = wxConvCurrent->cWC2WX(wstr);
|
||||||
|
|
||||||
|
if (!buf || wcscmp(wxConvCurrent->cWX2WC(buf), wstr) != 0)
|
||||||
|
return convError();
|
||||||
|
else
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse flags
|
||||||
|
//
|
||||||
|
void RegExTestCase::parseFlags(const wxString& flags)
|
||||||
|
{
|
||||||
|
for (const wxChar *p = flags; *p; p++) {
|
||||||
|
switch (*p) {
|
||||||
|
// noop
|
||||||
|
case '-': break;
|
||||||
|
|
||||||
|
// we don't fully support these flags, but they don't stop us
|
||||||
|
// checking for success of failure of the match, so treat as noop
|
||||||
|
case 'A': case 'B': case 'E': case 'H':
|
||||||
|
case 'I': case 'L': case 'M': case 'N':
|
||||||
|
case 'P': case 'Q': case 'R': case 'S':
|
||||||
|
case 'T': case 'U': case '%':
|
||||||
|
break;
|
||||||
|
|
||||||
|
// match options
|
||||||
|
case '^': m_matchFlags |= wxRE_NOTBOL; break;
|
||||||
|
case '$': m_matchFlags |= wxRE_NOTEOL; break;
|
||||||
|
#if wxUSE_UNICODE
|
||||||
|
case '*': break;
|
||||||
|
#endif
|
||||||
|
// compile options
|
||||||
|
case '&': m_advanced = m_basic = true; break;
|
||||||
|
case 'b': m_basic = true; break;
|
||||||
|
case 'e': m_extended = true; break;
|
||||||
|
case 'i': m_compileFlags |= wxRE_ICASE; break;
|
||||||
|
case 'o': m_compileFlags |= wxRE_NOSUB; break;
|
||||||
|
case 'n': m_compileFlags |= wxRE_NEWLINE; break;
|
||||||
|
case 't': if (strchr("ep", m_mode)) break; // else fall through...
|
||||||
|
|
||||||
|
// anything else we must skip the test
|
||||||
|
default:
|
||||||
|
fail(wxString::Format(
|
||||||
|
_T("requires unsupported flag '%c'"), *p));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try test for all flavours of expression specified
|
||||||
|
//
|
||||||
|
void RegExTestCase::runTest()
|
||||||
|
{
|
||||||
|
if (m_basic)
|
||||||
|
doTest(wxRE_BASIC);
|
||||||
|
if (m_extended)
|
||||||
|
doTest(wxRE_EXTENDED);
|
||||||
|
#ifdef wxHAS_REGEX_ADVANCED
|
||||||
|
if (m_advanced || (!m_basic && !m_extended))
|
||||||
|
doTest(wxRE_ADVANCED);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try the test for a single flavour of expression
|
||||||
|
//
|
||||||
|
void RegExTestCase::doTest(int flavor)
|
||||||
|
{
|
||||||
|
wxRegEx re(m_pattern, m_compileFlags | flavor);
|
||||||
|
|
||||||
|
// 'e' - test that the pattern fails to compile
|
||||||
|
if (m_mode == 'e')
|
||||||
|
return failIf(re.IsValid(), _T("compile suceeded (should fail)"));
|
||||||
|
failIf(!re.IsValid(), _T("compile failed"));
|
||||||
|
|
||||||
|
bool matches = re.Matches(m_data, m_matchFlags);
|
||||||
|
|
||||||
|
// 'f' or 'p' - test that the pattern does not match
|
||||||
|
if (m_mode == 'f' || m_mode == 'p')
|
||||||
|
return failIf(matches, _T("match suceeded (should fail)"));
|
||||||
|
|
||||||
|
// otherwise 'm' or 'i' - test the pattern does match
|
||||||
|
failIf(!matches, _T("match failed"));
|
||||||
|
|
||||||
|
// Check that wxRegEx is going to allocate a large enough array for the
|
||||||
|
// results we are supposed to get
|
||||||
|
failIf(m_expected.size() > matchCount(m_pattern, m_compileFlags | flavor),
|
||||||
|
_T("wxRegEx has not allocated a large enough array for the ")
|
||||||
|
_T("number of results expected"));
|
||||||
|
|
||||||
|
wxString result;
|
||||||
|
size_t start, len;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < m_expected.size(); i++) {
|
||||||
|
failIf(!re.GetMatch(&start, &len, i), wxString::Format(
|
||||||
|
_T("wxRegEx::GetMatch failed for match %d"), i));
|
||||||
|
|
||||||
|
// m - check the match returns the strings given
|
||||||
|
if (m_mode == 'm')
|
||||||
|
if (start < INT_MAX)
|
||||||
|
result = m_data.substr(start, len);
|
||||||
|
else
|
||||||
|
result = _T("");
|
||||||
|
|
||||||
|
// i - check the match returns the offsets given
|
||||||
|
else if (m_mode == 'i')
|
||||||
|
if (start < INT_MAX)
|
||||||
|
result = wxString::Format(_T("%d %d"), start, start + len - 1);
|
||||||
|
else
|
||||||
|
result = _T("-1 -1");
|
||||||
|
|
||||||
|
failIf(result != m_expected[i], wxString::Format(
|
||||||
|
_T("match(%d) == %s, expected == %s"), i,
|
||||||
|
quote(result).c_str(), quote(m_expected[i]).c_str()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// assertion - adds some information about the test that failed
|
||||||
|
//
|
||||||
|
void RegExTestCase::fail(const wxString& msg) const
|
||||||
|
{
|
||||||
|
wxString str;
|
||||||
|
wxArrayString::const_iterator it;
|
||||||
|
|
||||||
|
str << (wxChar)m_mode << _T(" ") << m_id << _T(" ") << m_flags << _T(" ")
|
||||||
|
<< quote(m_pattern) << _T(" ") << quote(m_data);
|
||||||
|
|
||||||
|
for (it = m_expected.begin(); it != m_expected.end(); ++it)
|
||||||
|
str << _T(" ") << quote(*it);
|
||||||
|
|
||||||
|
if (str.length() > 77)
|
||||||
|
str = str.substr(0, 74) + _T("...");
|
||||||
|
|
||||||
|
str << _T("\n ") << msg;
|
||||||
|
|
||||||
|
// no lossy convs so using utf8
|
||||||
|
CPPUNIT_FAIL(string(str.mb_str(wxConvUTF8)));
|
||||||
|
}
|
||||||
|
|
||||||
|
// quote a string so that it can be displayed (static)
|
||||||
|
//
|
||||||
|
wxString RegExTestCase::quote(const wxString& arg)
|
||||||
|
{
|
||||||
|
const wxChar *needEscape = _T("\a\b\t\n\v\f\r\"\\");
|
||||||
|
const wxChar *escapes = _T("abtnvfr\"\\");
|
||||||
|
wxString str;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < arg.length(); i++) {
|
||||||
|
wxUChar ch = arg[i];
|
||||||
|
const wxChar *p = wxStrchr(needEscape, ch);
|
||||||
|
|
||||||
|
if (p)
|
||||||
|
str += wxString::Format(_T("\\%c"), escapes[p - needEscape]);
|
||||||
|
else if (wxIscntrl(ch))
|
||||||
|
str += wxString::Format(_T("\\%03o"), ch);
|
||||||
|
else
|
||||||
|
str += ch;
|
||||||
|
}
|
||||||
|
|
||||||
|
return str.length() == arg.length() && str.find(' ') == wxString::npos ?
|
||||||
|
str : _T("\"") + str + _T("\"");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Count the number of subexpressions (taken from wxRegExImpl::Compile)
|
||||||
|
//
|
||||||
|
size_t RegExTestCase::matchCount(const wxString& expr, int flags)
|
||||||
|
{
|
||||||
|
// there is always one for the whole expression
|
||||||
|
size_t nMatches = 1;
|
||||||
|
|
||||||
|
// and some more for bracketed subexperessions
|
||||||
|
for ( const wxChar *cptr = expr; *cptr; cptr++ )
|
||||||
|
{
|
||||||
|
if ( *cptr == _T('\\') )
|
||||||
|
{
|
||||||
|
// in basic RE syntax groups are inside \(...\)
|
||||||
|
if ( *++cptr == _T('(') && (flags & wxRE_BASIC) )
|
||||||
|
{
|
||||||
|
nMatches++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if ( *cptr == _T('(') && !(flags & wxRE_BASIC) )
|
||||||
|
{
|
||||||
|
// we know that the previous character is not an unquoted
|
||||||
|
// backslash because it would have been eaten above, so we
|
||||||
|
// have a bar '(' and this indicates a group start for the
|
||||||
|
// extended syntax
|
||||||
|
nMatches++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nMatches;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Test suite
|
||||||
|
//
|
||||||
|
// In a non-unicode build the regex is affected by the current locale, so
|
||||||
|
// this derived TestSuite is used. It sets the locale in it's run() method
|
||||||
|
// for the duration of the regex tests.
|
||||||
|
|
||||||
|
class RegExTestSuite : public TestSuite
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
RegExTestSuite(string name);
|
||||||
|
void run(TestResult *result);
|
||||||
|
void add(const char *mode, const char *id, const char *flags,
|
||||||
|
const char *pattern, const char *data, const char *expected, ...);
|
||||||
|
};
|
||||||
|
|
||||||
|
// constructor, sets the locale so that it is set when the tests are added
|
||||||
|
//
|
||||||
|
RegExTestSuite::RegExTestSuite(string name) : TestSuite(name)
|
||||||
|
{
|
||||||
|
setlocale(LC_ALL, "");
|
||||||
|
}
|
||||||
|
|
||||||
|
// run the test suite, sets the locale again since it may have been changed
|
||||||
|
// by another test since this suite was crated
|
||||||
|
//
|
||||||
|
void RegExTestSuite::run(TestResult *result)
|
||||||
|
{
|
||||||
|
setlocale(LC_ALL, "");
|
||||||
|
TestSuite::run(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add a testcase to the suite
|
||||||
|
//
|
||||||
|
void RegExTestSuite::add(
|
||||||
|
const char *mode,
|
||||||
|
const char *id,
|
||||||
|
const char *flags,
|
||||||
|
const char *pattern,
|
||||||
|
const char *data,
|
||||||
|
const char *expected, ...)
|
||||||
|
{
|
||||||
|
string name = getName() + "." + id;
|
||||||
|
|
||||||
|
vector<const char *> expected_results;
|
||||||
|
va_list ap;
|
||||||
|
|
||||||
|
for (va_start(ap, expected); expected; expected = va_arg(ap, const char *))
|
||||||
|
expected_results.push_back(expected);
|
||||||
|
|
||||||
|
va_end(ap);
|
||||||
|
|
||||||
|
try {
|
||||||
|
addTest(new RegExTestCase(
|
||||||
|
name, mode, id, flags, pattern, data, expected_results));
|
||||||
|
}
|
||||||
|
catch (Exception& e) {
|
||||||
|
wxLogInfo(wxString::Format(_T("skipping: %s\n %s\n"),
|
||||||
|
wxString(name.c_str(), wxConvUTF8).c_str(),
|
||||||
|
wxString(e.what(), wxConvUTF8).c_str()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Include the generated tests
|
||||||
|
//
|
||||||
|
#include "regex.inc"
|
||||||
|
|
||||||
|
|
||||||
|
#endif // wxHAS_REGEX_ADVANCED
|
1361
tests/regex/regex.inc
Normal file
1361
tests/regex/regex.inc
Normal file
File diff suppressed because it is too large
Load Diff
437
tests/regex/regex.pl
Executable file
437
tests/regex/regex.pl
Executable file
@@ -0,0 +1,437 @@
|
|||||||
|
#!/usr/bin/env perl -w
|
||||||
|
#############################################################################
|
||||||
|
# Name: regex.pl
|
||||||
|
# Purpose: Generate test code for wxRegEx from 'reg.test'
|
||||||
|
# Author: Mike Wetherell
|
||||||
|
# RCS-ID: $Id$
|
||||||
|
# Copyright: (c) Mike Wetherell
|
||||||
|
# Licence: wxWidgets licence
|
||||||
|
#############################################################################
|
||||||
|
|
||||||
|
#
|
||||||
|
# Notes:
|
||||||
|
# See './regex.pl -h' for usage
|
||||||
|
#
|
||||||
|
# Output at the moment is C++ using the cppunit testing framework. The
|
||||||
|
# language/framework specifics are separated, with the following 5
|
||||||
|
# subs as an interface: 'begin_output', 'begin_section', 'write_test',
|
||||||
|
# 'end_section' and 'end_output'. So for a different language/framework,
|
||||||
|
# implement 5 new similar subs.
|
||||||
|
#
|
||||||
|
# I've avoided using 'use encoding "UTF-8"', since this wasn't available
|
||||||
|
# in perl 5.6.x. Instead I've used some hacks like 'pack "U0C*"'. Versions
|
||||||
|
# earler than perl 5.6.0 aren't going to work.
|
||||||
|
#
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use File::Basename;
|
||||||
|
#use encoding "UTF-8"; # enable in the future when perl 5.6.x is just a memory
|
||||||
|
|
||||||
|
# if 0 output is wide characters, if 1 output is utf8 encoded
|
||||||
|
my $utf = 1;
|
||||||
|
|
||||||
|
# quote a parameter (C++ helper)
|
||||||
|
#
|
||||||
|
sub quotecxx {
|
||||||
|
my %esc = ( "\a" => "a", "\b" => "b", "\f" => "f",
|
||||||
|
"\n" => "n", "\r" => "r", "\t" => "t",
|
||||||
|
"\013" => "v", '"' => '"', "\\" => "\\" );
|
||||||
|
|
||||||
|
# working around lack of 'use encoding'
|
||||||
|
$_ = pack "U0C*", unpack "C*", $_;
|
||||||
|
use utf8;
|
||||||
|
|
||||||
|
s/[\000-\037"\\\177-\x{ffff}]/
|
||||||
|
if ($esc{$&}) {
|
||||||
|
"\\$esc{$&}";
|
||||||
|
} elsif (ord($&) > 0x9f) {
|
||||||
|
if ($utf) {
|
||||||
|
$&;
|
||||||
|
} else {
|
||||||
|
sprintf "\\u%04x", ord($&);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
sprintf "\\%03o", ord($&);
|
||||||
|
}
|
||||||
|
/ge;
|
||||||
|
|
||||||
|
# working around lack of 'use encoding'
|
||||||
|
no utf8;
|
||||||
|
$_ = pack "C*", unpack "C*", $_;
|
||||||
|
|
||||||
|
return ($utf ? '"' : 'L"') . $_ . '"'
|
||||||
|
}
|
||||||
|
|
||||||
|
# start writing the output code (C++ interface)
|
||||||
|
#
|
||||||
|
sub begin_output {
|
||||||
|
my ($from, $instructions) = @_;
|
||||||
|
|
||||||
|
# embed it in the comment
|
||||||
|
$from = "\n$from";
|
||||||
|
$from =~ s/^(?: )?/ * /mg;
|
||||||
|
|
||||||
|
# $instructions contains information about the flags etc.
|
||||||
|
if ($instructions) {
|
||||||
|
$instructions = "\n$instructions";
|
||||||
|
$instructions =~ s/^(?: )?/ * /mg;
|
||||||
|
}
|
||||||
|
|
||||||
|
my $u = $utf ? " (UTF-8 encoded)" : "";
|
||||||
|
|
||||||
|
print <<EOT;
|
||||||
|
/*
|
||||||
|
* Test data for wxRegEx$u
|
||||||
|
$from$instructions */
|
||||||
|
|
||||||
|
EOT
|
||||||
|
}
|
||||||
|
|
||||||
|
my @classes;
|
||||||
|
|
||||||
|
# start a new section (C++ interface)
|
||||||
|
#
|
||||||
|
sub begin_section {
|
||||||
|
my ($id, $title) = @_;
|
||||||
|
my $class = "regextest_$id";
|
||||||
|
$class =~ s/\W/_/g;
|
||||||
|
push @classes, [$id, $class];
|
||||||
|
|
||||||
|
print <<EOT;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* $id $title
|
||||||
|
*/
|
||||||
|
|
||||||
|
class $class : public RegExTestSuite
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
$class() : RegExTestSuite("regex.$id") { }
|
||||||
|
static Test *suite();
|
||||||
|
};
|
||||||
|
|
||||||
|
Test *$class\::suite()
|
||||||
|
{
|
||||||
|
RegExTestSuite *suite = new $class;
|
||||||
|
|
||||||
|
EOT
|
||||||
|
}
|
||||||
|
|
||||||
|
# output a test line (C++ interface)
|
||||||
|
#
|
||||||
|
sub write_test {
|
||||||
|
my @args = @_;
|
||||||
|
$_ = quotecxx for @args;
|
||||||
|
print " suite->add(" . (join ', ', @args) . ", NULL);\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
# end a section (C++ interface)
|
||||||
|
#
|
||||||
|
sub end_section {
|
||||||
|
my ($id, $class) = @{$classes[$#classes]};
|
||||||
|
|
||||||
|
print <<EOT;
|
||||||
|
|
||||||
|
return suite;
|
||||||
|
}
|
||||||
|
|
||||||
|
CPPUNIT_TEST_SUITE_NAMED_REGISTRATION($class, "regex.$id");
|
||||||
|
|
||||||
|
EOT
|
||||||
|
}
|
||||||
|
|
||||||
|
# finish off the output (C++ interface)
|
||||||
|
#
|
||||||
|
sub end_output {
|
||||||
|
print <<EOT;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A suite containing all the above suites
|
||||||
|
*/
|
||||||
|
|
||||||
|
class regextest : public TestSuite
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
regextest() : TestSuite("regex") { }
|
||||||
|
static Test *suite();
|
||||||
|
};
|
||||||
|
|
||||||
|
Test *regextest::suite()
|
||||||
|
{
|
||||||
|
TestSuite *suite = new regextest;
|
||||||
|
|
||||||
|
EOT
|
||||||
|
print " suite->addTest(".$_->[1]."::suite());\n" for @classes;
|
||||||
|
|
||||||
|
print <<EOT;
|
||||||
|
|
||||||
|
return suite;
|
||||||
|
}
|
||||||
|
|
||||||
|
CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest, "regex");
|
||||||
|
CPPUNIT_TEST_SUITE_REGISTRATION(regextest);
|
||||||
|
EOT
|
||||||
|
}
|
||||||
|
|
||||||
|
# Parse a tcl string. Handles curly quoting and double quoting.
|
||||||
|
#
|
||||||
|
sub parsetcl {
|
||||||
|
my ($curly, $quote);
|
||||||
|
# recursively defined expression that can parse balanced braces
|
||||||
|
# warning: uses experimental features of perl, see perlop(1)
|
||||||
|
$curly = qr/\{(?:(?>(?:\\[{}]|[^{}])+)|(??{$curly}))*\}/;
|
||||||
|
$quote = qr/"(?:\\"|[^"])*"/;
|
||||||
|
my @tokens = shift =~ /($curly|$quote|\S+)/g;
|
||||||
|
|
||||||
|
# now remove braces/quotes and unescape any escapes
|
||||||
|
for (@tokens) {
|
||||||
|
if (s/^{(.*)}$/$1/) {
|
||||||
|
# for curly quoting, only unescape \{ and \}
|
||||||
|
s/\\([{}])/$1/g;
|
||||||
|
} else {
|
||||||
|
s/^"(.*)"$/$1/;
|
||||||
|
|
||||||
|
# unescape any escapes
|
||||||
|
my %esc = ( "a" => "\a", "b" => "\b", "f" => "\f",
|
||||||
|
"n" => "\n", "r" => "\r", "t" => "\t",
|
||||||
|
"v" => "\013" );
|
||||||
|
my $x = qr/[[:xdigit:]]/;
|
||||||
|
|
||||||
|
s/\\([0-7]{1,3}|x$x+|u$x{1,4}|.)/
|
||||||
|
if ($1 =~ m{^([0-7]+)}) {
|
||||||
|
chr(oct($1));
|
||||||
|
} elsif ($1 =~ m{^x($x+)}) {
|
||||||
|
pack("C0U", hex($1) & 0xff);
|
||||||
|
} elsif ($1 =~ m{^u($x+)}) {
|
||||||
|
pack("C0U", hex($1));
|
||||||
|
} elsif ($esc{$1}) {
|
||||||
|
$esc{$1};
|
||||||
|
} else {
|
||||||
|
$1;
|
||||||
|
}
|
||||||
|
/ge;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return @tokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
# helpers which keep track of whether begin_section has been called, so that
|
||||||
|
# end_section can be called when appropriate
|
||||||
|
#
|
||||||
|
my @doing = ("0", "");
|
||||||
|
my $in_section = 0;
|
||||||
|
|
||||||
|
sub handle_doing {
|
||||||
|
end_section if $in_section;
|
||||||
|
$in_section = 0;
|
||||||
|
@doing = @_;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub handle_test {
|
||||||
|
begin_section(@doing) if !$in_section;
|
||||||
|
$in_section = 1;
|
||||||
|
write_test @_;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub handle_end {
|
||||||
|
end_section if $in_section;
|
||||||
|
$in_section = 0;
|
||||||
|
end_output;
|
||||||
|
}
|
||||||
|
|
||||||
|
# 'main' - start by parsing the command lines options.
|
||||||
|
#
|
||||||
|
my $badoption = !@ARGV;
|
||||||
|
my $utfdefault = $utf;
|
||||||
|
my $outputname;
|
||||||
|
|
||||||
|
for (my $i = 0; $i < @ARGV; ) {
|
||||||
|
if ($ARGV[$i] !~ m{^-.}) {
|
||||||
|
$i++;
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($ARGV[$i] eq '--') {
|
||||||
|
splice @ARGV, $i, 1;
|
||||||
|
last;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($ARGV[$i] =~ s{^-(.*)o(.*)$}{-$1}i) { # -o : output file
|
||||||
|
$outputname = $2 || splice @ARGV, $i + 1, 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (split //, substr($ARGV[$i], 1)) {
|
||||||
|
if (/u/i) { # -u : utf-8 output
|
||||||
|
$utf = 1;
|
||||||
|
} elsif (/w/i) { # -w : wide char output
|
||||||
|
$utf = 0;
|
||||||
|
} else {
|
||||||
|
$badoption = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
splice @ARGV, $i, 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
# Display help
|
||||||
|
#
|
||||||
|
if ($badoption) {
|
||||||
|
my $prog = basename $0;
|
||||||
|
my ($w, $u) = (" (default)", " ");
|
||||||
|
($w, $u) = ($u, $w) if $utfdefault;
|
||||||
|
|
||||||
|
print <<EOT;
|
||||||
|
Usage: $prog [-u|-w] [-o OUTPUT] [FILE...]
|
||||||
|
Generate test code for wxRegEx from 'reg.test'
|
||||||
|
Example: $prog -o regex.inc reg.test wxreg.test
|
||||||
|
|
||||||
|
-w$w Output will be wide characters.
|
||||||
|
-u$u Output will be UTF-8 encoded.
|
||||||
|
|
||||||
|
Input files should be in UTF-8. If no input files are specified input is
|
||||||
|
read from stdin. If no output file is specified output is written to stdout.
|
||||||
|
See the comments in reg.test (in src/regex) for details of the input file
|
||||||
|
format.
|
||||||
|
EOT
|
||||||
|
exit 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
# Open the output file
|
||||||
|
#
|
||||||
|
open STDOUT, ">$outputname" if $outputname;
|
||||||
|
|
||||||
|
# Read in the files and initially parse just the comments for copyright
|
||||||
|
# information and instructions on the tests
|
||||||
|
#
|
||||||
|
my @input; # slurped input files stripped of comments
|
||||||
|
my $files = ""; # copyright info from the input comments
|
||||||
|
my $instructions = ""; # test instructions from the input comments
|
||||||
|
|
||||||
|
do {
|
||||||
|
my $inputname = basename $ARGV[0] if @ARGV;
|
||||||
|
|
||||||
|
# slurp input
|
||||||
|
undef $/;
|
||||||
|
my $in = <>;
|
||||||
|
|
||||||
|
# remove escaped newlines
|
||||||
|
$in =~ s/(?<!\\)\\\n//g;
|
||||||
|
|
||||||
|
# record the copyrights of the input files
|
||||||
|
for ($in =~ /^#[\t ]*(.*copyright.*)$/mig) {
|
||||||
|
s/[\s:]+/ /g;
|
||||||
|
$files .= " ";
|
||||||
|
$files .= $inputname . ": " if $inputname && $inputname ne '-';
|
||||||
|
$files .= "$_\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
# Parse the comments for instructions on the tests, which look like this:
|
||||||
|
# i successful match with -indices (used in checking things like
|
||||||
|
# nonparticipating subexpressions)
|
||||||
|
if (!$instructions) {
|
||||||
|
my $sp = qr{\t| +}; # tab or three or more spaces
|
||||||
|
my @instructions = $in =~
|
||||||
|
/\n(
|
||||||
|
(?:
|
||||||
|
\#$sp\S?$sp\S[^\n]+\n # instruction line
|
||||||
|
(?:\#$sp$sp\S[^\n]+\n)* # continuation lines (if any)
|
||||||
|
)+
|
||||||
|
)/gx;
|
||||||
|
|
||||||
|
if (@instructions) {
|
||||||
|
$instructions[0] = "Test types:\n$instructions[0]";
|
||||||
|
if (@instructions > 1) {
|
||||||
|
$instructions[1] = "Flag characters:\n$instructions[1]";
|
||||||
|
}
|
||||||
|
$instructions = join "\n", @instructions;
|
||||||
|
$instructions =~ s/^#([^\t]?)/ $1/mg;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# @input is the input of all files (stipped of comments)
|
||||||
|
$in =~ s/^#.*$//mg;
|
||||||
|
push @input, $in;
|
||||||
|
|
||||||
|
} while $ARGV[0];
|
||||||
|
|
||||||
|
# Make a string naming the generator, the input files and copyright info
|
||||||
|
#
|
||||||
|
my $from = "Generated " . localtime() . " by " . basename $0;
|
||||||
|
$from =~ s/[\s]+/ /g;
|
||||||
|
if ($files) {
|
||||||
|
if ($files =~ /:/) {
|
||||||
|
$from .= " from the following files:";
|
||||||
|
} else {
|
||||||
|
$from .= " from work with the following copyright:";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$from = join("\n", $from =~ /(.{0,76}(?:\s|$))/g); # word-wrap
|
||||||
|
$from .= "\n$files" if $files;
|
||||||
|
|
||||||
|
# Now start to print the code
|
||||||
|
#
|
||||||
|
begin_output $from, $instructions;
|
||||||
|
|
||||||
|
# numbers for 'extra' sections
|
||||||
|
my $extra = 1;
|
||||||
|
|
||||||
|
for (@input)
|
||||||
|
{
|
||||||
|
# Print the main tests
|
||||||
|
#
|
||||||
|
# Test lines look like this:
|
||||||
|
# m 3 b {\(a\)b} ab ab a
|
||||||
|
#
|
||||||
|
# Also looks for heading lines, e.g.:
|
||||||
|
# doing 4 "parentheses"
|
||||||
|
#
|
||||||
|
for (split "\n") {
|
||||||
|
if (/^doing\s+(\S+)\s+(\S.*)/) {
|
||||||
|
handle_doing parsetcl "$1 $2";
|
||||||
|
} elsif (/^[efimp]\s/) {
|
||||||
|
handle_test parsetcl $_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extra tests
|
||||||
|
#
|
||||||
|
# The expression below matches something like this:
|
||||||
|
# test reg-33.8 {Bug 505048} {
|
||||||
|
# regexp -inline {\A\s*[^b]*b} ab
|
||||||
|
# } ab
|
||||||
|
#
|
||||||
|
# The three subexpressions then return these parts:
|
||||||
|
# $extras[$i] = '{Bug 505048}',
|
||||||
|
# $extras[$i + 1] = '-inline {\A\s*[^b]*b} ab'
|
||||||
|
# $extras[$i + 2] = 'ab'
|
||||||
|
#
|
||||||
|
my @extras = /\ntest\s+\S+\s*(\{.*?\})\s*\{\n # line 1
|
||||||
|
\s*regexp\s+([^\n]+)\n # line 2
|
||||||
|
\}\s*(\S[^\n]*)/gx; # line 3
|
||||||
|
|
||||||
|
handle_doing "extra_" . $extra++, "checks for bug fixes" if @extras;
|
||||||
|
|
||||||
|
for (my $i = 0; $i < @extras; $i += 3) {
|
||||||
|
my $id = $extras[$i];
|
||||||
|
|
||||||
|
# further parse the middle line into options and the rest (i.e. $args)
|
||||||
|
my ($opts, $args) = $extras[$i + 1] =~ /^\s*((?:-\S+\s+)*)([^\s-].*)/;
|
||||||
|
|
||||||
|
my @args = parsetcl $args;
|
||||||
|
$#args = 1; # only want the first two
|
||||||
|
|
||||||
|
# now handle the options
|
||||||
|
my $test = $opts =~ /-indices/ ? 'i' : $extras[$i + 2] ? 'm' : 'f';
|
||||||
|
my $results = $opts =~ /-inline/ && $test ne 'f' ? $extras[$i+2] : '';
|
||||||
|
|
||||||
|
# get them all in the right order and print
|
||||||
|
unshift @args, $test, parsetcl($id), '-';
|
||||||
|
push @args, parsetcl(parsetcl($results)) if $results;
|
||||||
|
handle_test @args;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# finish
|
||||||
|
#
|
||||||
|
handle_end;
|
71
tests/regex/wxreg.test
Normal file
71
tests/regex/wxreg.test
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
#############################################################################
|
||||||
|
# Name: wxreg.test
|
||||||
|
# Purpose: Additional tests for the regex lib and wxRegEx
|
||||||
|
# Author: Mike Wetherell
|
||||||
|
# RCS-ID: $Id$
|
||||||
|
# Copyright: (c) 2004 Mike Wetherell.
|
||||||
|
# Licence: wxWidgets licence
|
||||||
|
#############################################################################
|
||||||
|
|
||||||
|
#
|
||||||
|
# The layout of this file is the same as src/regex/reg.test. See the comments
|
||||||
|
# in that file for full details. The encoding used in here is UTF-8.
|
||||||
|
#
|
||||||
|
# These tests test the character classifications over the ascii range pretty
|
||||||
|
# thoroughly, since hopefully these will be similar for all platforms and
|
||||||
|
# locales where wxWidgets runs.
|
||||||
|
#
|
||||||
|
# Also does some tests involving western european and cyrillic characters.
|
||||||
|
# In Unicode mode, all these tests should succeed, which verifies that the
|
||||||
|
# classifications aren't limited to a single 8-bit character set.
|
||||||
|
#
|
||||||
|
# In non-unicode mode, if the test can't be translated into the character
|
||||||
|
# encoding of the current locale, the test will be skipped. So either may
|
||||||
|
# succeed or be skipped.
|
||||||
|
#
|
||||||
|
|
||||||
|
doing wx_1 "character classification: ascii"
|
||||||
|
m 1 & {[^[:alnum:]]} "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!" "!"
|
||||||
|
m 2 & {[[:alnum:]]} "\a\b\t\n\v\f\r !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~X" "X"
|
||||||
|
m 3 & {[^[:alpha:]]} "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!" "!"
|
||||||
|
m 4 & {[[:alpha:]]} "\a\b\t\n\v\f\r !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`{|}~X" "X"
|
||||||
|
m 5 & {[^[:cntrl:]]} "\a\b\t\n\v\f\r!" "!"
|
||||||
|
m 6 & {[[:cntrl:]]} " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n" "\n"
|
||||||
|
m 7 & {[^[:digit:]]} "0123456789!" "!"
|
||||||
|
m 8 & {[[:digit:]]} "\a\b\t\n\v\f\r !\"#$%&'()*+,-./:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ 0" "0"
|
||||||
|
m 9 & {[^[:graph:]]} "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n" "\n"
|
||||||
|
m 10 & {[[:graph:]]} "\a\b\t\n\v\f\r !" "!"
|
||||||
|
m 11 & {[^[:lower:]]} "abcdefghijklmnopqrstuvwxyz!" "!"
|
||||||
|
m 12 & {[[:lower:]]} "\a\b\t\n\v\f\r !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`{|}~x" "x"
|
||||||
|
m 13 & {[^[:print:]]} "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\n" "\n"
|
||||||
|
m 14 & {[[:print:]]} "\a\b\n\v\f\rX" "X"
|
||||||
|
m 15 & {[^[:punct:]]} "!\"#%&'()*,-./:;?@[\\]_{}X" "X"
|
||||||
|
m 16 & {[[:punct:]]} "\a\b\t\n\v\f\r 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!" "!"
|
||||||
|
m 17 & {[^[:space:]]} "\t\n\v\f\r X" "X"
|
||||||
|
m 18 & {[[:space:]]} "\a\b!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n" "\n"
|
||||||
|
m 19 & {[^[:upper:]]} "ABCDEFGHIJKLMNOPQRSTUVWXYZ!" "!"
|
||||||
|
m 20 & {[[:upper:]]} "\a\b\t\n\v\f\r !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~X" "X"
|
||||||
|
m 21 & {[^[:xdigit:]]} "0123456789ABCDEFabcdef!" "!"
|
||||||
|
m 22 & {[[:xdigit:]]} "\a\b\t\n\v\f\r !\"#$%&'()*+,-./:;<=>?@GHIJKLMNOPQRSTUVWXYZ[\\]^_`ghijklmnopqrstuvwxyz{|}~a" "a"
|
||||||
|
i 23 &i "AbCdEfGhIjKlMnOpQrStUvWxYz" "aBcDeFgHiJkLmNoPqRsTuVwXyZ" "0 25"
|
||||||
|
|
||||||
|
doing wx_2 "character classification: western european"
|
||||||
|
m 1 & {[^[:alpha:]]} "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ!" "!"
|
||||||
|
m 2 & {[[:alpha:]]} " ¡¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾¿×÷X" "X"
|
||||||
|
m 3 & {[^[:lower:]]} "ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ!" "!"
|
||||||
|
m 4 & {[[:lower:]]} " ¡¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞ÷x" "x"
|
||||||
|
m 5 & {[^[:upper:]]} "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ!" "!"
|
||||||
|
m 6 & {[[:upper:]]} " ¡¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾¿×ßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿX" "X"
|
||||||
|
i 7 &i* "ÀáÂãÄåÆçÈéÊëÌíÎïÐñÒóÔõÖøÙúÛüÝþ" "àÁâÃäÅæÇèÉêËìÍîÏðÑòÓôÕöØùÚûÜýÞ" "0 29"
|
||||||
|
|
||||||
|
doing wx_3 "character classification: cyrillic"
|
||||||
|
m 1 & {[^[:alpha:]]} "ёЁюабцдефгхийклмнопярстужвьызшэщчъЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩЧЪ!" "!"
|
||||||
|
m 2 & {[^[:lower:]]} "ёюабцдефгхийклмнопярстужвьызшэщчъ!" "!"
|
||||||
|
m 3 & {[[:lower:]]} "ЁЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩЧЪx" "x"
|
||||||
|
m 4 & {[^[:upper:]]} "ЁЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩЧЪ!" "!"
|
||||||
|
m 5 & {[[:upper:]]} "ёюабцдефгхийклмнопярстужвьызшэщчъX" "X"
|
||||||
|
i 6 &i* "ЁюАбЦдЕфГхИйКлМнОпЯрСтУжВьЫзШэЩчЪ" "ёЮаБцДеФгХиЙкЛмНоПяРсТуЖвЬыЗшЭщЧъ" "0 32"
|
||||||
|
|
||||||
|
#doing bugs "known bugs"
|
||||||
|
#m 1 - {(\w+).*?(\d\d:\d\d)} "from 10:30 until 12:00" "from" "10:30"
|
||||||
|
|
@@ -10,6 +10,7 @@
|
|||||||
<sources>
|
<sources>
|
||||||
test.cpp
|
test.cpp
|
||||||
mbconv/main.cpp
|
mbconv/main.cpp
|
||||||
|
regex/regex.cpp
|
||||||
</sources>
|
</sources>
|
||||||
<wx-lib>base</wx-lib>
|
<wx-lib>base</wx-lib>
|
||||||
</exe>
|
</exe>
|
||||||
|
@@ -439,6 +439,10 @@ SOURCE=.\mbconv\main.cpp
|
|||||||
# End Source File
|
# End Source File
|
||||||
# Begin Source File
|
# Begin Source File
|
||||||
|
|
||||||
|
SOURCE=.\regex\regex.cpp
|
||||||
|
# End Source File
|
||||||
|
# Begin Source File
|
||||||
|
|
||||||
SOURCE=.\test.cpp
|
SOURCE=.\test.cpp
|
||||||
# End Source File
|
# End Source File
|
||||||
# End Group
|
# End Group
|
||||||
|
Reference in New Issue
Block a user