added regex test suite

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@26104 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Václav Slavík
2004-03-05 23:14:23 +00:00
parent 99e839da3c
commit e70833fb1d
12 changed files with 3455 additions and 5 deletions

View File

@@ -37,7 +37,8 @@ TEST_CXXFLAGS = -D__WX$(TOOLKIT)__ $(__WXUNIV_DEFINE_p) -I$(srcdir) \
$(__DLLFLAG_p) $(CPPUNIT_CFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(__DLLFLAG_p) $(CPPUNIT_CFLAGS) $(CPPFLAGS) $(CXXFLAGS)
TEST_OBJECTS = \ TEST_OBJECTS = \
test_test.o \ test_test.o \
test_main.o test_main.o \
test_regex.o
### Conditionally set variables: ### ### Conditionally set variables: ###
@@ -106,6 +107,9 @@ test_test.o: $(srcdir)/test.cpp
test_main.o: $(srcdir)/mbconv/main.cpp test_main.o: $(srcdir)/mbconv/main.cpp
$(CXXC) -c -o $@ $(TEST_CXXFLAGS) $< $(CXXC) -c -o $@ $(TEST_CXXFLAGS) $<
test_regex.o: $(srcdir)/regex/regex.cpp
$(CXXC) -c -o $@ $(TEST_CXXFLAGS) $<
# Include dependency info, if present: # Include dependency info, if present:
@IF_GNU_MAKE@-include .deps/*.d @IF_GNU_MAKE@-include .deps/*.d

View File

@@ -31,7 +31,8 @@ TEST_CXXFLAGS = $(__RUNTIME_LIBS_6) -I$(BCCDIR)\include $(__DEBUGINFO) \
$(__DLLFLAG_p) $(CPPUNIT_CFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(__DLLFLAG_p) $(CPPUNIT_CFLAGS) $(CPPFLAGS) $(CXXFLAGS)
TEST_OBJECTS = \ TEST_OBJECTS = \
$(OBJS)\test_test.obj \ $(OBJS)\test_test.obj \
$(OBJS)\test_main.obj $(OBJS)\test_main.obj \
$(OBJS)\test_regex.obj
### Conditionally set variables: ### ### Conditionally set variables: ###
@@ -156,3 +157,6 @@ $(OBJS)\test_test.obj: .\test.cpp
$(OBJS)\test_main.obj: .\mbconv\main.cpp $(OBJS)\test_main.obj: .\mbconv\main.cpp
$(CXX) -q -c -P -o$@ $(TEST_CXXFLAGS) $** $(CXX) -q -c -P -o$@ $(TEST_CXXFLAGS) $**
$(OBJS)\test_regex.obj: .\regex\regex.cpp
$(CXX) -q -c -P -o$@ $(TEST_CXXFLAGS) $**

View File

@@ -22,7 +22,8 @@ TEST_CXXFLAGS = $(__DEBUGINFO) $(__OPTIMIZEFLAG_2) $(GCCFLAGS) -DHAVE_W32API_H \
$(__EXCEPTIONSFLAG_4) $(__EXCEPTIONSFLAG_5) $(CPPFLAGS) $(CXXFLAGS) $(__EXCEPTIONSFLAG_4) $(__EXCEPTIONSFLAG_5) $(CPPFLAGS) $(CXXFLAGS)
TEST_OBJECTS = \ TEST_OBJECTS = \
$(OBJS)\test_test.o \ $(OBJS)\test_test.o \
$(OBJS)\test_main.o $(OBJS)\test_main.o \
$(OBJS)\test_regex.o
### Conditionally set variables: ### ### Conditionally set variables: ###
@@ -151,4 +152,7 @@ $(OBJS)\test_test.o: ./test.cpp
$(OBJS)\test_main.o: ./mbconv/main.cpp $(OBJS)\test_main.o: ./mbconv/main.cpp
$(CXX) -c -o $@ $(TEST_CXXFLAGS) $< $(CXX) -c -o $@ $(TEST_CXXFLAGS) $<
$(OBJS)\test_regex.o: ./regex/regex.cpp
$(CXX) -c -o $@ $(TEST_CXXFLAGS) $<
.PHONY: all clean .PHONY: all clean

View File

@@ -24,7 +24,8 @@ TEST_CXXFLAGS = /M$(__RUNTIME_LIBS_7)$(__DEBUGRUNTIME_3) /DWIN32 \
$(__EXCEPTIONSFLAG_9) $(CPPFLAGS) $(CXXFLAGS) $(__EXCEPTIONSFLAG_9) $(CPPFLAGS) $(CXXFLAGS)
TEST_OBJECTS = \ TEST_OBJECTS = \
$(OBJS)\test_test.obj \ $(OBJS)\test_test.obj \
$(OBJS)\test_main.obj $(OBJS)\test_main.obj \
$(OBJS)\test_regex.obj
### Conditionally set variables: ### ### Conditionally set variables: ###
@@ -212,3 +213,6 @@ $(OBJS)\test_test.obj: .\test.cpp
$(OBJS)\test_main.obj: .\mbconv\main.cpp $(OBJS)\test_main.obj: .\mbconv\main.cpp
$(CXX) /c /nologo /TP /Fo$@ $(TEST_CXXFLAGS) $** $(CXX) /c /nologo /TP /Fo$@ $(TEST_CXXFLAGS) $**
$(OBJS)\test_regex.obj: .\regex\regex.cpp
$(CXX) /c /nologo /TP /Fo$@ $(TEST_CXXFLAGS) $**

View File

@@ -172,7 +172,8 @@ TEST_CXXFLAGS = $(__DEBUGINFO_0) $(__OPTIMIZEFLAG_2) -bm $(__RUNTIME_LIBS_5) &
$(__EXCEPTIONSFLAG_7) $(CPPFLAGS) $(CXXFLAGS) $(__EXCEPTIONSFLAG_7) $(CPPFLAGS) $(CXXFLAGS)
TEST_OBJECTS = & TEST_OBJECTS = &
$(OBJS)\test_test.obj & $(OBJS)\test_test.obj &
$(OBJS)\test_main.obj $(OBJS)\test_main.obj &
$(OBJS)\test_regex.obj
all : $(OBJS) all : $(OBJS)
@@ -206,3 +207,6 @@ $(OBJS)\test_test.obj : .AUTODEPEND .\test.cpp
$(OBJS)\test_main.obj : .AUTODEPEND .\mbconv\main.cpp $(OBJS)\test_main.obj : .AUTODEPEND .\mbconv\main.cpp
$(CXX) -zq -fo=$^@ $(TEST_CXXFLAGS) $< $(CXX) -zq -fo=$^@ $(TEST_CXXFLAGS) $<
$(OBJS)\test_regex.obj : .AUTODEPEND .\regex\regex.cpp
$(CXX) -zq -fo=$^@ $(TEST_CXXFLAGS) $<

1135
tests/regex/reg.test Normal file

File diff suppressed because it is too large Load Diff

421
tests/regex/regex.cpp Normal file
View File

@@ -0,0 +1,421 @@
///////////////////////////////////////////////////////////////////////////////
// Name: tests/regex/regex.cpp
// Purpose: Test the built-in regex lib and wxRegEx
// Author: Mike Wetherell
// RCS-ID: $Id$
// Copyright: (c) 2004 Mike Wetherell
// Licence: wxWidgets licence
///////////////////////////////////////////////////////////////////////////////
//
// Notes:
//
// To run just one section, say wx_1, do this:
// test regex.wx_1
//
// To run all the regex tests:
// test regex
//
// Some tests must be skipped since they use features which we do not make
// available through wxRegEx. To see the list of tests that have been skipped
// turn on verbose logging, e.g.:
// test --verbose regex
//
// The tests here are for the builtin library, tests for wxRegEx in general
// should go in another module.
//
// The tests are generated from Henry Spencer's reg.test, additional test
// can be added in wxreg.test. These test files are then turned into a C++
// include file 'regex.inc' (included below) using a script 'regex.pl'.
//
#if defined(__GNUG__) && !defined(__APPLE__)
#pragma implementation
#pragma interface
#endif
// For compilers that support precompilation, includes "wx/wx.h".
#include "wx/wxprec.h"
#ifdef __BORLANDC__
#pragma hdrstop
#endif
// for all others, include the necessary headers
#ifndef WX_PRECOMP
#include "wx/wx.h"
#endif
#include "wx/regex.h"
#include "wx/cppunit.h"
#include <iomanip>
#include <stdexcept>
using namespace std;
using namespace CppUnit;
// many of the tests are specific to the builtin regex lib, so only attempts
// to do them when using the builtin regex lib.
//
#ifdef wxHAS_REGEX_ADVANCED
///////////////////////////////////////////////////////////////////////////////
// The test case - an instance represents a single test
class RegExTestCase : public TestCase
{
public:
// constructor - create a single testcase
RegExTestCase(
const string& name,
const char *mode,
const char *id,
const char *flags,
const char *pattern,
const char *data,
const vector<const char *>& expected);
protected:
// run this testcase
void runTest();
private:
// workers
wxString Conv(const char *str);
void parseFlags(const wxString& flags);
void doTest(int flavor);
static size_t matchCount(const wxString& expr, int flags);
static wxString quote(const wxString& arg);
const wxChar *convError() const { return _T("<cannot convert>"); }
// assertions - adds some information about the test that failed
void fail(const wxString& msg) const;
void failIf(bool condition, const wxString& msg) const
{ if (condition) fail(msg); }
// mode, id, flags, pattern, test data, expected results...
int m_mode;
wxString m_id;
wxString m_flags;
wxString m_pattern;
wxString m_data;
wxArrayString m_expected;
// the flag decoded
int m_compileFlags;
int m_matchFlags;
bool m_basic;
bool m_extended;
bool m_advanced;
};
// constructor - throws Exception on failure
//
RegExTestCase::RegExTestCase(
const string& name,
const char *mode,
const char *id,
const char *flags,
const char *pattern,
const char *data,
const vector<const char *>& expected)
:
TestCase(name),
m_mode(mode[0]),
m_id(Conv(id)),
m_flags(Conv(flags)),
m_pattern(Conv(pattern)),
m_data(Conv(data)),
m_compileFlags(0),
m_matchFlags(0),
m_basic(false),
m_extended(false),
m_advanced(false)
{
bool badconv = m_pattern == convError() || m_data == convError();
vector<const char *>::const_iterator it;
for (it = expected.begin(); it != expected.end(); ++it) {
m_expected.push_back(Conv(*it));
badconv = badconv || *m_expected.rbegin() == convError();
}
failIf(badconv, _T("cannot convert to default character encoding"));
// the flags need further parsing...
parseFlags(m_flags);
#ifndef wxHAS_REGEX_ADVANCED
failIf(!m_basic && !m_extended, _T("advanced regexs not available"));
#endif
}
// convert a string from UTF8 to the internal encoding
//
wxString RegExTestCase::Conv(const char *str)
{
const wxWCharBuffer wstr = wxConvUTF8.cMB2WC(str);
const wxWC2WXbuf buf = wxConvCurrent->cWC2WX(wstr);
if (!buf || wcscmp(wxConvCurrent->cWX2WC(buf), wstr) != 0)
return convError();
else
return buf;
}
// Parse flags
//
void RegExTestCase::parseFlags(const wxString& flags)
{
for (const wxChar *p = flags; *p; p++) {
switch (*p) {
// noop
case '-': break;
// we don't fully support these flags, but they don't stop us
// checking for success of failure of the match, so treat as noop
case 'A': case 'B': case 'E': case 'H':
case 'I': case 'L': case 'M': case 'N':
case 'P': case 'Q': case 'R': case 'S':
case 'T': case 'U': case '%':
break;
// match options
case '^': m_matchFlags |= wxRE_NOTBOL; break;
case '$': m_matchFlags |= wxRE_NOTEOL; break;
#if wxUSE_UNICODE
case '*': break;
#endif
// compile options
case '&': m_advanced = m_basic = true; break;
case 'b': m_basic = true; break;
case 'e': m_extended = true; break;
case 'i': m_compileFlags |= wxRE_ICASE; break;
case 'o': m_compileFlags |= wxRE_NOSUB; break;
case 'n': m_compileFlags |= wxRE_NEWLINE; break;
case 't': if (strchr("ep", m_mode)) break; // else fall through...
// anything else we must skip the test
default:
fail(wxString::Format(
_T("requires unsupported flag '%c'"), *p));
}
}
}
// Try test for all flavours of expression specified
//
void RegExTestCase::runTest()
{
if (m_basic)
doTest(wxRE_BASIC);
if (m_extended)
doTest(wxRE_EXTENDED);
#ifdef wxHAS_REGEX_ADVANCED
if (m_advanced || (!m_basic && !m_extended))
doTest(wxRE_ADVANCED);
#endif
}
// Try the test for a single flavour of expression
//
void RegExTestCase::doTest(int flavor)
{
wxRegEx re(m_pattern, m_compileFlags | flavor);
// 'e' - test that the pattern fails to compile
if (m_mode == 'e')
return failIf(re.IsValid(), _T("compile suceeded (should fail)"));
failIf(!re.IsValid(), _T("compile failed"));
bool matches = re.Matches(m_data, m_matchFlags);
// 'f' or 'p' - test that the pattern does not match
if (m_mode == 'f' || m_mode == 'p')
return failIf(matches, _T("match suceeded (should fail)"));
// otherwise 'm' or 'i' - test the pattern does match
failIf(!matches, _T("match failed"));
// Check that wxRegEx is going to allocate a large enough array for the
// results we are supposed to get
failIf(m_expected.size() > matchCount(m_pattern, m_compileFlags | flavor),
_T("wxRegEx has not allocated a large enough array for the ")
_T("number of results expected"));
wxString result;
size_t start, len;
for (size_t i = 0; i < m_expected.size(); i++) {
failIf(!re.GetMatch(&start, &len, i), wxString::Format(
_T("wxRegEx::GetMatch failed for match %d"), i));
// m - check the match returns the strings given
if (m_mode == 'm')
if (start < INT_MAX)
result = m_data.substr(start, len);
else
result = _T("");
// i - check the match returns the offsets given
else if (m_mode == 'i')
if (start < INT_MAX)
result = wxString::Format(_T("%d %d"), start, start + len - 1);
else
result = _T("-1 -1");
failIf(result != m_expected[i], wxString::Format(
_T("match(%d) == %s, expected == %s"), i,
quote(result).c_str(), quote(m_expected[i]).c_str()));
}
}
// assertion - adds some information about the test that failed
//
void RegExTestCase::fail(const wxString& msg) const
{
wxString str;
wxArrayString::const_iterator it;
str << (wxChar)m_mode << _T(" ") << m_id << _T(" ") << m_flags << _T(" ")
<< quote(m_pattern) << _T(" ") << quote(m_data);
for (it = m_expected.begin(); it != m_expected.end(); ++it)
str << _T(" ") << quote(*it);
if (str.length() > 77)
str = str.substr(0, 74) + _T("...");
str << _T("\n ") << msg;
// no lossy convs so using utf8
CPPUNIT_FAIL(string(str.mb_str(wxConvUTF8)));
}
// quote a string so that it can be displayed (static)
//
wxString RegExTestCase::quote(const wxString& arg)
{
const wxChar *needEscape = _T("\a\b\t\n\v\f\r\"\\");
const wxChar *escapes = _T("abtnvfr\"\\");
wxString str;
for (size_t i = 0; i < arg.length(); i++) {
wxUChar ch = arg[i];
const wxChar *p = wxStrchr(needEscape, ch);
if (p)
str += wxString::Format(_T("\\%c"), escapes[p - needEscape]);
else if (wxIscntrl(ch))
str += wxString::Format(_T("\\%03o"), ch);
else
str += ch;
}
return str.length() == arg.length() && str.find(' ') == wxString::npos ?
str : _T("\"") + str + _T("\"");
}
// Count the number of subexpressions (taken from wxRegExImpl::Compile)
//
size_t RegExTestCase::matchCount(const wxString& expr, int flags)
{
// there is always one for the whole expression
size_t nMatches = 1;
// and some more for bracketed subexperessions
for ( const wxChar *cptr = expr; *cptr; cptr++ )
{
if ( *cptr == _T('\\') )
{
// in basic RE syntax groups are inside \(...\)
if ( *++cptr == _T('(') && (flags & wxRE_BASIC) )
{
nMatches++;
}
}
else if ( *cptr == _T('(') && !(flags & wxRE_BASIC) )
{
// we know that the previous character is not an unquoted
// backslash because it would have been eaten above, so we
// have a bar '(' and this indicates a group start for the
// extended syntax
nMatches++;
}
}
return nMatches;
}
///////////////////////////////////////////////////////////////////////////////
// Test suite
//
// In a non-unicode build the regex is affected by the current locale, so
// this derived TestSuite is used. It sets the locale in it's run() method
// for the duration of the regex tests.
class RegExTestSuite : public TestSuite
{
public:
RegExTestSuite(string name);
void run(TestResult *result);
void add(const char *mode, const char *id, const char *flags,
const char *pattern, const char *data, const char *expected, ...);
};
// constructor, sets the locale so that it is set when the tests are added
//
RegExTestSuite::RegExTestSuite(string name) : TestSuite(name)
{
setlocale(LC_ALL, "");
}
// run the test suite, sets the locale again since it may have been changed
// by another test since this suite was crated
//
void RegExTestSuite::run(TestResult *result)
{
setlocale(LC_ALL, "");
TestSuite::run(result);
}
// Add a testcase to the suite
//
void RegExTestSuite::add(
const char *mode,
const char *id,
const char *flags,
const char *pattern,
const char *data,
const char *expected, ...)
{
string name = getName() + "." + id;
vector<const char *> expected_results;
va_list ap;
for (va_start(ap, expected); expected; expected = va_arg(ap, const char *))
expected_results.push_back(expected);
va_end(ap);
try {
addTest(new RegExTestCase(
name, mode, id, flags, pattern, data, expected_results));
}
catch (Exception& e) {
wxLogInfo(wxString::Format(_T("skipping: %s\n %s\n"),
wxString(name.c_str(), wxConvUTF8).c_str(),
wxString(e.what(), wxConvUTF8).c_str()));
}
}
// Include the generated tests
//
#include "regex.inc"
#endif // wxHAS_REGEX_ADVANCED

1361
tests/regex/regex.inc Normal file

File diff suppressed because it is too large Load Diff

437
tests/regex/regex.pl Executable file
View File

@@ -0,0 +1,437 @@
#!/usr/bin/env perl -w
#############################################################################
# Name: regex.pl
# Purpose: Generate test code for wxRegEx from 'reg.test'
# Author: Mike Wetherell
# RCS-ID: $Id$
# Copyright: (c) Mike Wetherell
# Licence: wxWidgets licence
#############################################################################
#
# Notes:
# See './regex.pl -h' for usage
#
# Output at the moment is C++ using the cppunit testing framework. The
# language/framework specifics are separated, with the following 5
# subs as an interface: 'begin_output', 'begin_section', 'write_test',
# 'end_section' and 'end_output'. So for a different language/framework,
# implement 5 new similar subs.
#
# I've avoided using 'use encoding "UTF-8"', since this wasn't available
# in perl 5.6.x. Instead I've used some hacks like 'pack "U0C*"'. Versions
# earler than perl 5.6.0 aren't going to work.
#
use strict;
use File::Basename;
#use encoding "UTF-8"; # enable in the future when perl 5.6.x is just a memory
# if 0 output is wide characters, if 1 output is utf8 encoded
my $utf = 1;
# quote a parameter (C++ helper)
#
sub quotecxx {
my %esc = ( "\a" => "a", "\b" => "b", "\f" => "f",
"\n" => "n", "\r" => "r", "\t" => "t",
"\013" => "v", '"' => '"', "\\" => "\\" );
# working around lack of 'use encoding'
$_ = pack "U0C*", unpack "C*", $_;
use utf8;
s/[\000-\037"\\\177-\x{ffff}]/
if ($esc{$&}) {
"\\$esc{$&}";
} elsif (ord($&) > 0x9f) {
if ($utf) {
$&;
} else {
sprintf "\\u%04x", ord($&);
}
} else {
sprintf "\\%03o", ord($&);
}
/ge;
# working around lack of 'use encoding'
no utf8;
$_ = pack "C*", unpack "C*", $_;
return ($utf ? '"' : 'L"') . $_ . '"'
}
# start writing the output code (C++ interface)
#
sub begin_output {
my ($from, $instructions) = @_;
# embed it in the comment
$from = "\n$from";
$from =~ s/^(?: )?/ * /mg;
# $instructions contains information about the flags etc.
if ($instructions) {
$instructions = "\n$instructions";
$instructions =~ s/^(?: )?/ * /mg;
}
my $u = $utf ? " (UTF-8 encoded)" : "";
print <<EOT;
/*
* Test data for wxRegEx$u
$from$instructions */
EOT
}
my @classes;
# start a new section (C++ interface)
#
sub begin_section {
my ($id, $title) = @_;
my $class = "regextest_$id";
$class =~ s/\W/_/g;
push @classes, [$id, $class];
print <<EOT;
/*
* $id $title
*/
class $class : public RegExTestSuite
{
public:
$class() : RegExTestSuite("regex.$id") { }
static Test *suite();
};
Test *$class\::suite()
{
RegExTestSuite *suite = new $class;
EOT
}
# output a test line (C++ interface)
#
sub write_test {
my @args = @_;
$_ = quotecxx for @args;
print " suite->add(" . (join ', ', @args) . ", NULL);\n";
}
# end a section (C++ interface)
#
sub end_section {
my ($id, $class) = @{$classes[$#classes]};
print <<EOT;
return suite;
}
CPPUNIT_TEST_SUITE_NAMED_REGISTRATION($class, "regex.$id");
EOT
}
# finish off the output (C++ interface)
#
sub end_output {
print <<EOT;
/*
* A suite containing all the above suites
*/
class regextest : public TestSuite
{
public:
regextest() : TestSuite("regex") { }
static Test *suite();
};
Test *regextest::suite()
{
TestSuite *suite = new regextest;
EOT
print " suite->addTest(".$_->[1]."::suite());\n" for @classes;
print <<EOT;
return suite;
}
CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest, "regex");
CPPUNIT_TEST_SUITE_REGISTRATION(regextest);
EOT
}
# Parse a tcl string. Handles curly quoting and double quoting.
#
sub parsetcl {
my ($curly, $quote);
# recursively defined expression that can parse balanced braces
# warning: uses experimental features of perl, see perlop(1)
$curly = qr/\{(?:(?>(?:\\[{}]|[^{}])+)|(??{$curly}))*\}/;
$quote = qr/"(?:\\"|[^"])*"/;
my @tokens = shift =~ /($curly|$quote|\S+)/g;
# now remove braces/quotes and unescape any escapes
for (@tokens) {
if (s/^{(.*)}$/$1/) {
# for curly quoting, only unescape \{ and \}
s/\\([{}])/$1/g;
} else {
s/^"(.*)"$/$1/;
# unescape any escapes
my %esc = ( "a" => "\a", "b" => "\b", "f" => "\f",
"n" => "\n", "r" => "\r", "t" => "\t",
"v" => "\013" );
my $x = qr/[[:xdigit:]]/;
s/\\([0-7]{1,3}|x$x+|u$x{1,4}|.)/
if ($1 =~ m{^([0-7]+)}) {
chr(oct($1));
} elsif ($1 =~ m{^x($x+)}) {
pack("C0U", hex($1) & 0xff);
} elsif ($1 =~ m{^u($x+)}) {
pack("C0U", hex($1));
} elsif ($esc{$1}) {
$esc{$1};
} else {
$1;
}
/ge;
}
}
return @tokens;
}
# helpers which keep track of whether begin_section has been called, so that
# end_section can be called when appropriate
#
my @doing = ("0", "");
my $in_section = 0;
sub handle_doing {
end_section if $in_section;
$in_section = 0;
@doing = @_;
}
sub handle_test {
begin_section(@doing) if !$in_section;
$in_section = 1;
write_test @_;
}
sub handle_end {
end_section if $in_section;
$in_section = 0;
end_output;
}
# 'main' - start by parsing the command lines options.
#
my $badoption = !@ARGV;
my $utfdefault = $utf;
my $outputname;
for (my $i = 0; $i < @ARGV; ) {
if ($ARGV[$i] !~ m{^-.}) {
$i++;
next;
}
if ($ARGV[$i] eq '--') {
splice @ARGV, $i, 1;
last;
}
if ($ARGV[$i] =~ s{^-(.*)o(.*)$}{-$1}i) { # -o : output file
$outputname = $2 || splice @ARGV, $i + 1, 1;
}
for (split //, substr($ARGV[$i], 1)) {
if (/u/i) { # -u : utf-8 output
$utf = 1;
} elsif (/w/i) { # -w : wide char output
$utf = 0;
} else {
$badoption = 1;
}
}
splice @ARGV, $i, 1;
}
# Display help
#
if ($badoption) {
my $prog = basename $0;
my ($w, $u) = (" (default)", " ");
($w, $u) = ($u, $w) if $utfdefault;
print <<EOT;
Usage: $prog [-u|-w] [-o OUTPUT] [FILE...]
Generate test code for wxRegEx from 'reg.test'
Example: $prog -o regex.inc reg.test wxreg.test
-w$w Output will be wide characters.
-u$u Output will be UTF-8 encoded.
Input files should be in UTF-8. If no input files are specified input is
read from stdin. If no output file is specified output is written to stdout.
See the comments in reg.test (in src/regex) for details of the input file
format.
EOT
exit 0;
}
# Open the output file
#
open STDOUT, ">$outputname" if $outputname;
# Read in the files and initially parse just the comments for copyright
# information and instructions on the tests
#
my @input; # slurped input files stripped of comments
my $files = ""; # copyright info from the input comments
my $instructions = ""; # test instructions from the input comments
do {
my $inputname = basename $ARGV[0] if @ARGV;
# slurp input
undef $/;
my $in = <>;
# remove escaped newlines
$in =~ s/(?<!\\)\\\n//g;
# record the copyrights of the input files
for ($in =~ /^#[\t ]*(.*copyright.*)$/mig) {
s/[\s:]+/ /g;
$files .= " ";
$files .= $inputname . ": " if $inputname && $inputname ne '-';
$files .= "$_\n";
}
# Parse the comments for instructions on the tests, which look like this:
# i successful match with -indices (used in checking things like
# nonparticipating subexpressions)
if (!$instructions) {
my $sp = qr{\t| +}; # tab or three or more spaces
my @instructions = $in =~
/\n(
(?:
\#$sp\S?$sp\S[^\n]+\n # instruction line
(?:\#$sp$sp\S[^\n]+\n)* # continuation lines (if any)
)+
)/gx;
if (@instructions) {
$instructions[0] = "Test types:\n$instructions[0]";
if (@instructions > 1) {
$instructions[1] = "Flag characters:\n$instructions[1]";
}
$instructions = join "\n", @instructions;
$instructions =~ s/^#([^\t]?)/ $1/mg;
}
}
# @input is the input of all files (stipped of comments)
$in =~ s/^#.*$//mg;
push @input, $in;
} while $ARGV[0];
# Make a string naming the generator, the input files and copyright info
#
my $from = "Generated " . localtime() . " by " . basename $0;
$from =~ s/[\s]+/ /g;
if ($files) {
if ($files =~ /:/) {
$from .= " from the following files:";
} else {
$from .= " from work with the following copyright:";
}
}
$from = join("\n", $from =~ /(.{0,76}(?:\s|$))/g); # word-wrap
$from .= "\n$files" if $files;
# Now start to print the code
#
begin_output $from, $instructions;
# numbers for 'extra' sections
my $extra = 1;
for (@input)
{
# Print the main tests
#
# Test lines look like this:
# m 3 b {\(a\)b} ab ab a
#
# Also looks for heading lines, e.g.:
# doing 4 "parentheses"
#
for (split "\n") {
if (/^doing\s+(\S+)\s+(\S.*)/) {
handle_doing parsetcl "$1 $2";
} elsif (/^[efimp]\s/) {
handle_test parsetcl $_;
}
}
# Extra tests
#
# The expression below matches something like this:
# test reg-33.8 {Bug 505048} {
# regexp -inline {\A\s*[^b]*b} ab
# } ab
#
# The three subexpressions then return these parts:
# $extras[$i] = '{Bug 505048}',
# $extras[$i + 1] = '-inline {\A\s*[^b]*b} ab'
# $extras[$i + 2] = 'ab'
#
my @extras = /\ntest\s+\S+\s*(\{.*?\})\s*\{\n # line 1
\s*regexp\s+([^\n]+)\n # line 2
\}\s*(\S[^\n]*)/gx; # line 3
handle_doing "extra_" . $extra++, "checks for bug fixes" if @extras;
for (my $i = 0; $i < @extras; $i += 3) {
my $id = $extras[$i];
# further parse the middle line into options and the rest (i.e. $args)
my ($opts, $args) = $extras[$i + 1] =~ /^\s*((?:-\S+\s+)*)([^\s-].*)/;
my @args = parsetcl $args;
$#args = 1; # only want the first two
# now handle the options
my $test = $opts =~ /-indices/ ? 'i' : $extras[$i + 2] ? 'm' : 'f';
my $results = $opts =~ /-inline/ && $test ne 'f' ? $extras[$i+2] : '';
# get them all in the right order and print
unshift @args, $test, parsetcl($id), '-';
push @args, parsetcl(parsetcl($results)) if $results;
handle_test @args;
}
}
# finish
#
handle_end;

71
tests/regex/wxreg.test Normal file
View File

@@ -0,0 +1,71 @@
#############################################################################
# Name: wxreg.test
# Purpose: Additional tests for the regex lib and wxRegEx
# Author: Mike Wetherell
# RCS-ID: $Id$
# Copyright: (c) 2004 Mike Wetherell.
# Licence: wxWidgets licence
#############################################################################
#
# The layout of this file is the same as src/regex/reg.test. See the comments
# in that file for full details. The encoding used in here is UTF-8.
#
# These tests test the character classifications over the ascii range pretty
# thoroughly, since hopefully these will be similar for all platforms and
# locales where wxWidgets runs.
#
# Also does some tests involving western european and cyrillic characters.
# In Unicode mode, all these tests should succeed, which verifies that the
# classifications aren't limited to a single 8-bit character set.
#
# In non-unicode mode, if the test can't be translated into the character
# encoding of the current locale, the test will be skipped. So either may
# succeed or be skipped.
#
doing wx_1 "character classification: ascii"
m 1 & {[^[:alnum:]]} "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!" "!"
m 2 & {[[:alnum:]]} "\a\b\t\n\v\f\r !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~X" "X"
m 3 & {[^[:alpha:]]} "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!" "!"
m 4 & {[[:alpha:]]} "\a\b\t\n\v\f\r !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`{|}~X" "X"
m 5 & {[^[:cntrl:]]} "\a\b\t\n\v\f\r!" "!"
m 6 & {[[:cntrl:]]} " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n" "\n"
m 7 & {[^[:digit:]]} "0123456789!" "!"
m 8 & {[[:digit:]]} "\a\b\t\n\v\f\r !\"#$%&'()*+,-./:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ 0" "0"
m 9 & {[^[:graph:]]} "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n" "\n"
m 10 & {[[:graph:]]} "\a\b\t\n\v\f\r !" "!"
m 11 & {[^[:lower:]]} "abcdefghijklmnopqrstuvwxyz!" "!"
m 12 & {[[:lower:]]} "\a\b\t\n\v\f\r !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`{|}~x" "x"
m 13 & {[^[:print:]]} "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\n" "\n"
m 14 & {[[:print:]]} "\a\b\n\v\f\rX" "X"
m 15 & {[^[:punct:]]} "!\"#%&'()*,-./:;?@[\\]_{}X" "X"
m 16 & {[[:punct:]]} "\a\b\t\n\v\f\r 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!" "!"
m 17 & {[^[:space:]]} "\t\n\v\f\r X" "X"
m 18 & {[[:space:]]} "\a\b!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n" "\n"
m 19 & {[^[:upper:]]} "ABCDEFGHIJKLMNOPQRSTUVWXYZ!" "!"
m 20 & {[[:upper:]]} "\a\b\t\n\v\f\r !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~X" "X"
m 21 & {[^[:xdigit:]]} "0123456789ABCDEFabcdef!" "!"
m 22 & {[[:xdigit:]]} "\a\b\t\n\v\f\r !\"#$%&'()*+,-./:;<=>?@GHIJKLMNOPQRSTUVWXYZ[\\]^_`ghijklmnopqrstuvwxyz{|}~a" "a"
i 23 &i "AbCdEfGhIjKlMnOpQrStUvWxYz" "aBcDeFgHiJkLmNoPqRsTuVwXyZ" "0 25"
doing wx_2 "character classification: western european"
m 1 & {[^[:alpha:]]} "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ!" "!"
m 2 & {[[:alpha:]]} " ¡¢£¤¥¦§¨©«¬­®¯°±²³´¶·¸¹»¼½¾¿×÷X" "X"
m 3 & {[^[:lower:]]} "ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ!" "!"
m 4 & {[[:lower:]]} " ¡¢£¤¥¦§¨©«¬­®¯°±²³´¶·¸¹»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞ÷x" "x"
m 5 & {[^[:upper:]]} "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ!" "!"
m 6 & {[[:upper:]]} " ¡¢£¤¥¦§¨©«¬­®¯°±²³´¶·¸¹»¼½¾¿×ßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿX" "X"
i 7 &i* "ÀáÂãÄåÆçÈéÊëÌíÎïÐñÒóÔõÖøÙúÛüÝþ" "àÁâÃäÅæÇèÉêËìÍîÏðÑòÓôÕöØùÚûÜýÞ" "0 29"
doing wx_3 "character classification: cyrillic"
m 1 & {[^[:alpha:]]} "ёЁюабцдефгхийклмнопярстужвьызшэщчъЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩЧЪ!" "!"
m 2 & {[^[:lower:]]} "ёюабцдефгхийклмнопярстужвьызшэщчъ!" "!"
m 3 & {[[:lower:]]} "ЁЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩЧЪx" "x"
m 4 & {[^[:upper:]]} "ЁЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩЧЪ!" "!"
m 5 & {[[:upper:]]} "ёюабцдефгхийклмнопярстужвьызшэщчъX" "X"
i 6 &i* "ЁюАбЦдЕфГхИйКлМнОпЯрСтУжВьЫзШэЩчЪ" "ёЮаБцДеФгХиЙкЛмНоПяРсТуЖвЬыЗшЭщЧъ" "0 32"
#doing bugs "known bugs"
#m 1 - {(\w+).*?(\d\d:\d\d)} "from 10:30 until 12:00" "from" "10:30"

View File

@@ -10,6 +10,7 @@
<sources> <sources>
test.cpp test.cpp
mbconv/main.cpp mbconv/main.cpp
regex/regex.cpp
</sources> </sources>
<wx-lib>base</wx-lib> <wx-lib>base</wx-lib>
</exe> </exe>

View File

@@ -439,6 +439,10 @@ SOURCE=.\mbconv\main.cpp
# End Source File # End Source File
# Begin Source File # Begin Source File
SOURCE=.\regex\regex.cpp
# End Source File
# Begin Source File
SOURCE=.\test.cpp SOURCE=.\test.cpp
# End Source File # End Source File
# End Group # End Group