added regex test suite

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@26104 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
2004-03-05 23:14:23 +00:00
parent 99e839da3c
commit e70833fb1d
12 changed files with 3455 additions and 5 deletions
--- a/tests/Makefile.in
+++ b/tests/Makefile.in
@@ -37,7 +37,8 @@ TEST_CXXFLAGS = -D__WX$(TOOLKIT)__ $(__WXUNIV_DEFINE_p) -I$(srcdir) \
 	$(__DLLFLAG_p) $(CPPUNIT_CFLAGS) $(CPPFLAGS) $(CXXFLAGS)
 TEST_OBJECTS =  \
 	test_test.o \
-	test_main.o
+	test_main.o \
 	test_regex.o
 ### Conditionally set variables: ###
@@ -106,6 +107,9 @@ test_test.o: $(srcdir)/test.cpp
 test_main.o: $(srcdir)/mbconv/main.cpp
 	$(CXXC) -c -o $@ $(TEST_CXXFLAGS) $<
 test_regex.o: $(srcdir)/regex/regex.cpp
 	$(CXXC) -c -o $@ $(TEST_CXXFLAGS) $<
 # Include dependency info, if present:
@IF_GNU_MAKE@-include .deps/*.d
--- a/tests/makefile.bcc
+++ b/tests/makefile.bcc
@@ -31,7 +31,8 @@ TEST_CXXFLAGS = $(__RUNTIME_LIBS_6) -I$(BCCDIR)\include $(__DEBUGINFO) \
 	$(__DLLFLAG_p) $(CPPUNIT_CFLAGS) $(CPPFLAGS) $(CXXFLAGS)
 TEST_OBJECTS =  \
 	$(OBJS)\test_test.obj \
-	$(OBJS)\test_main.obj
+	$(OBJS)\test_main.obj \
 	$(OBJS)\test_regex.obj
 ### Conditionally set variables: ###
@@ -156,3 +157,6 @@ $(OBJS)\test_test.obj: .\test.cpp
 $(OBJS)\test_main.obj: .\mbconv\main.cpp
 	$(CXX) -q -c -P -o$@ $(TEST_CXXFLAGS) $**
 $(OBJS)\test_regex.obj: .\regex\regex.cpp
 	$(CXX) -q -c -P -o$@ $(TEST_CXXFLAGS) $**
--- a/tests/makefile.gcc
+++ b/tests/makefile.gcc
@@ -22,7 +22,8 @@ TEST_CXXFLAGS = $(__DEBUGINFO) $(__OPTIMIZEFLAG_2) $(GCCFLAGS) -DHAVE_W32API_H \
 	$(__EXCEPTIONSFLAG_4) $(__EXCEPTIONSFLAG_5) $(CPPFLAGS) $(CXXFLAGS)
 TEST_OBJECTS =  \
 	$(OBJS)\test_test.o \
-	$(OBJS)\test_main.o
+	$(OBJS)\test_main.o \
 	$(OBJS)\test_regex.o
 ### Conditionally set variables: ###
@@ -151,4 +152,7 @@ $(OBJS)\test_test.o: ./test.cpp
 $(OBJS)\test_main.o: ./mbconv/main.cpp
 	$(CXX) -c -o $@ $(TEST_CXXFLAGS) $<
 $(OBJS)\test_regex.o: ./regex/regex.cpp
 	$(CXX) -c -o $@ $(TEST_CXXFLAGS) $<
 .PHONY: all clean
--- a/tests/makefile.vc
+++ b/tests/makefile.vc
@@ -24,7 +24,8 @@ TEST_CXXFLAGS = /M$(__RUNTIME_LIBS_7)$(__DEBUGRUNTIME_3) /DWIN32 \
 	$(__EXCEPTIONSFLAG_9) $(CPPFLAGS) $(CXXFLAGS)
 TEST_OBJECTS =  \
 	$(OBJS)\test_test.obj \
-	$(OBJS)\test_main.obj
+	$(OBJS)\test_main.obj \
 	$(OBJS)\test_regex.obj
 ### Conditionally set variables: ###
@@ -212,3 +213,6 @@ $(OBJS)\test_test.obj: .\test.cpp
 $(OBJS)\test_main.obj: .\mbconv\main.cpp
 	$(CXX) /c /nologo /TP /Fo$@ $(TEST_CXXFLAGS) $**
 $(OBJS)\test_regex.obj: .\regex\regex.cpp
 	$(CXX) /c /nologo /TP /Fo$@ $(TEST_CXXFLAGS) $**
--- a/tests/makefile.wat
+++ b/tests/makefile.wat
@@ -172,7 +172,8 @@ TEST_CXXFLAGS = $(__DEBUGINFO_0) $(__OPTIMIZEFLAG_2) -bm $(__RUNTIME_LIBS_5) &
 	$(__EXCEPTIONSFLAG_7) $(CPPFLAGS) $(CXXFLAGS)
 TEST_OBJECTS =  &
 	$(OBJS)\test_test.obj &
-	$(OBJS)\test_main.obj
+	$(OBJS)\test_main.obj &
 	$(OBJS)\test_regex.obj
 all : $(OBJS)
@@ -206,3 +207,6 @@ $(OBJS)\test_test.obj :  .AUTODEPEND .\test.cpp
 $(OBJS)\test_main.obj :  .AUTODEPEND .\mbconv\main.cpp
 	$(CXX) -zq -fo=$^@ $(TEST_CXXFLAGS) $<
 $(OBJS)\test_regex.obj :  .AUTODEPEND .\regex\regex.cpp
 	$(CXX) -zq -fo=$^@ $(TEST_CXXFLAGS) $<
--- a/tests/regex/reg.test
+++ b/tests/regex/reg.test
--- a/tests/regex/regex.cpp
+++ b/tests/regex/regex.cpp
@@ -0,0 +1,421 @@
 ///////////////////////////////////////////////////////////////////////////////
 // Name:        tests/regex/regex.cpp
 // Purpose:     Test the built-in regex lib and wxRegEx
 // Author:      Mike Wetherell
 // RCS-ID:      $Id$
 // Copyright:   (c) 2004 Mike Wetherell
 // Licence:     wxWidgets licence
 ///////////////////////////////////////////////////////////////////////////////
 //
 // Notes:
 //
 // To run just one section, say wx_1, do this:
 //  test regex.wx_1
 //
 // To run all the regex tests:
 //  test regex
 // 
 // Some tests must be skipped since they use features which we do not make
 // available through wxRegEx. To see the list of tests that have been skipped
 // turn on verbose logging, e.g.:
 //  test --verbose regex
 // 
 // The tests here are for the builtin library, tests for wxRegEx in general
 // should go in another module.
 //
 // The tests are generated from Henry Spencer's reg.test, additional test
 // can be added in wxreg.test. These test files are then turned into a C++
 // include file 'regex.inc' (included below) using a script 'regex.pl'.
 // 
 #if defined(__GNUG__) && !defined(__APPLE__)
    #pragma implementation
    #pragma interface
 #endif
 // For compilers that support precompilation, includes "wx/wx.h".
 #include "wx/wxprec.h"
 #ifdef __BORLANDC__
    #pragma hdrstop
 #endif
 // for all others, include the necessary headers
 #ifndef WX_PRECOMP
    #include "wx/wx.h"
 #endif
 #include "wx/regex.h"
 #include "wx/cppunit.h"
 #include <iomanip>
 #include <stdexcept>
 using namespace std;
 using namespace CppUnit;
 // many of the tests are specific to the builtin regex lib, so only attempts
 // to do them when using the builtin regex lib.
 //
 #ifdef wxHAS_REGEX_ADVANCED
 ///////////////////////////////////////////////////////////////////////////////
 // The test case - an instance represents a single test
 class RegExTestCase : public TestCase
 {
 public:
    // constructor - create a single testcase
    RegExTestCase(
        const string& name,
        const char *mode,
        const char *id,
        const char *flags,
        const char *pattern,
        const char *data,
        const vector<const char *>& expected);
 protected:
    // run this testcase
    void runTest();
 private:
    // workers
    wxString Conv(const char *str);
    void parseFlags(const wxString& flags);
    void doTest(int flavor);
    static size_t matchCount(const wxString& expr, int flags);
    static wxString quote(const wxString& arg);
    const wxChar *convError() const { return _T("<cannot convert>"); }
    // assertions - adds some information about the test that failed
    void fail(const wxString& msg) const;
    void failIf(bool condition, const wxString& msg) const
        { if (condition) fail(msg); }
    // mode, id, flags, pattern, test data, expected results...
    int m_mode;
    wxString m_id;
    wxString m_flags;
    wxString m_pattern;
    wxString m_data;
    wxArrayString m_expected;
    // the flag decoded
    int m_compileFlags;
    int m_matchFlags;
    bool m_basic;
    bool m_extended;
    bool m_advanced;
 };
 // constructor - throws Exception on failure
 //
 RegExTestCase::RegExTestCase(
    const string& name,
    const char *mode,
    const char *id,
    const char *flags,
    const char *pattern,
    const char *data,
    const vector<const char *>& expected)
  :
    TestCase(name),
    m_mode(mode[0]),
    m_id(Conv(id)),
    m_flags(Conv(flags)),
    m_pattern(Conv(pattern)),
    m_data(Conv(data)),
    m_compileFlags(0),
    m_matchFlags(0),
    m_basic(false),
    m_extended(false),
    m_advanced(false)
 {
    bool badconv = m_pattern == convError() || m_data == convError();
    vector<const char *>::const_iterator it;
    for (it = expected.begin(); it != expected.end(); ++it) {
        m_expected.push_back(Conv(*it));
        badconv = badconv || *m_expected.rbegin() == convError();
    }
    failIf(badconv, _T("cannot convert to default character encoding"));
    // the flags need further parsing...
    parseFlags(m_flags);
 #ifndef wxHAS_REGEX_ADVANCED
    failIf(!m_basic && !m_extended, _T("advanced regexs not available"));
 #endif
 }
 // convert a string from UTF8 to the internal encoding
 //
 wxString RegExTestCase::Conv(const char *str)
 {
    const wxWCharBuffer wstr = wxConvUTF8.cMB2WC(str);
    const wxWC2WXbuf buf = wxConvCurrent->cWC2WX(wstr);
    if (!buf || wcscmp(wxConvCurrent->cWX2WC(buf), wstr) != 0)
        return convError();
    else
        return buf;
 }
 // Parse flags
 //
 void RegExTestCase::parseFlags(const wxString& flags)
 {
    for (const wxChar *p = flags; *p; p++) {
        switch (*p) {
            // noop
            case '-': break;
            // we don't fully support these flags, but they don't stop us
            // checking for success of failure of the match, so treat as noop
            case 'A': case 'B': case 'E': case 'H':
            case 'I': case 'L': case 'M': case 'N':
            case 'P': case 'Q': case 'R': case 'S':
            case 'T': case 'U': case '%':
                break;
            // match options
            case '^': m_matchFlags |= wxRE_NOTBOL; break;
            case '$': m_matchFlags |= wxRE_NOTEOL; break;
 #if wxUSE_UNICODE
            case '*': break;
 #endif
            // compile options
            case '&': m_advanced = m_basic = true; break;
            case 'b': m_basic = true; break;
            case 'e': m_extended = true; break;
            case 'i': m_compileFlags |= wxRE_ICASE; break;
            case 'o': m_compileFlags |= wxRE_NOSUB; break;
            case 'n': m_compileFlags |= wxRE_NEWLINE; break;
            case 't': if (strchr("ep", m_mode)) break; // else fall through...
            // anything else we must skip the test
            default:
                fail(wxString::Format(
                     _T("requires unsupported flag '%c'"), *p));
        }
    }
 }
 // Try test for all flavours of expression specified
 //
 void RegExTestCase::runTest()
 {
    if (m_basic)
        doTest(wxRE_BASIC);
    if (m_extended)
        doTest(wxRE_EXTENDED);
 #ifdef wxHAS_REGEX_ADVANCED
    if (m_advanced || (!m_basic && !m_extended))
        doTest(wxRE_ADVANCED);
 #endif
 }
 // Try the test for a single flavour of expression
 //
 void RegExTestCase::doTest(int flavor)
 {
    wxRegEx re(m_pattern, m_compileFlags | flavor);
    // 'e' - test that the pattern fails to compile
    if (m_mode == 'e')
        return failIf(re.IsValid(), _T("compile suceeded (should fail)"));
    failIf(!re.IsValid(), _T("compile failed"));
    bool matches = re.Matches(m_data, m_matchFlags);
    // 'f' or 'p' - test that the pattern does not match
    if (m_mode == 'f' || m_mode == 'p')
        return failIf(matches, _T("match suceeded (should fail)"));
    // otherwise 'm' or 'i' - test the pattern does match
    failIf(!matches, _T("match failed"));
    // Check that wxRegEx is going to allocate a large enough array for the
    // results we are supposed to get
    failIf(m_expected.size() > matchCount(m_pattern, m_compileFlags | flavor),
           _T("wxRegEx has not allocated a large enough array for the ")
           _T("number of results expected"));
    wxString result;
    size_t start, len;
    for (size_t i = 0; i < m_expected.size(); i++) {
        failIf(!re.GetMatch(&start, &len, i), wxString::Format(
                _T("wxRegEx::GetMatch failed for match %d"), i));
        // m - check the match returns the strings given
        if (m_mode == 'm')
            if (start < INT_MAX)
                result = m_data.substr(start, len);
            else
                result = _T("");
        // i - check the match returns the offsets given
        else if (m_mode == 'i')
            if (start < INT_MAX)
                result = wxString::Format(_T("%d %d"), start, start + len - 1);
            else
                result = _T("-1 -1");
        failIf(result != m_expected[i], wxString::Format(
                _T("match(%d) == %s, expected == %s"), i,
                quote(result).c_str(), quote(m_expected[i]).c_str()));
    }
 }
 // assertion - adds some information about the test that failed
 //
 void RegExTestCase::fail(const wxString& msg) const
 {
    wxString str;
    wxArrayString::const_iterator it;
    str << (wxChar)m_mode << _T(" ") << m_id << _T(" ") << m_flags << _T(" ")
        << quote(m_pattern) << _T(" ") << quote(m_data);
    for (it = m_expected.begin(); it != m_expected.end(); ++it)
        str << _T(" ") << quote(*it);
    if (str.length() > 77)
        str = str.substr(0, 74) + _T("...");
    str << _T("\n ") << msg;
    // no lossy convs so using utf8
    CPPUNIT_FAIL(string(str.mb_str(wxConvUTF8)));
 }
 // quote a string so that it can be displayed (static)
 //
 wxString RegExTestCase::quote(const wxString& arg)
 {
    const wxChar *needEscape = _T("\a\b\t\n\v\f\r\"\\");
    const wxChar *escapes = _T("abtnvfr\"\\");
    wxString str;
    for (size_t i = 0; i < arg.length(); i++) {
        wxUChar ch = arg[i];
        const wxChar *p = wxStrchr(needEscape, ch);
        if (p)
            str += wxString::Format(_T("\\%c"), escapes[p - needEscape]);
        else if (wxIscntrl(ch))
            str += wxString::Format(_T("\\%03o"), ch);
        else
            str += ch;
    }
    return str.length() == arg.length() && str.find(' ') == wxString::npos ?
        str : _T("\"") + str + _T("\"");
 }
 // Count the number of subexpressions (taken from wxRegExImpl::Compile)
 //
 size_t RegExTestCase::matchCount(const wxString& expr, int flags)
 {
    // there is always one for the whole expression
    size_t nMatches = 1;
    // and some more for bracketed subexperessions
    for ( const wxChar *cptr = expr; *cptr; cptr++ )
    {
        if ( *cptr == _T('\\') )
        {
            // in basic RE syntax groups are inside \(...\)
            if ( *++cptr == _T('(') && (flags & wxRE_BASIC) )
            {
                nMatches++;
            }
        }
        else if ( *cptr == _T('(') && !(flags & wxRE_BASIC) )
        {
            // we know that the previous character is not an unquoted
            // backslash because it would have been eaten above, so we
            // have a bar '(' and this indicates a group start for the
            // extended syntax
            nMatches++;
        }
    }
    return nMatches;
 }
 ///////////////////////////////////////////////////////////////////////////////
 // Test suite
 //
 // In a non-unicode build the regex is affected by the current locale, so
 // this derived TestSuite is used. It sets the locale in it's run() method
 // for the duration of the regex tests.
 class RegExTestSuite : public TestSuite
 {
 public:
    RegExTestSuite(string name);
    void run(TestResult *result);
    void add(const char *mode, const char *id, const char *flags,
             const char *pattern, const char *data, const char *expected, ...);
 };
 // constructor, sets the locale so that it is set when the tests are added
 //
 RegExTestSuite::RegExTestSuite(string name) : TestSuite(name)
 {
    setlocale(LC_ALL, "");
 }
 // run the test suite, sets the locale again since it may have been changed
 // by another test since this suite was crated
 //
 void RegExTestSuite::run(TestResult *result)
 {
    setlocale(LC_ALL, "");
    TestSuite::run(result);
 }
 // Add a testcase to the suite
 //
 void RegExTestSuite::add(
    const char *mode,
    const char *id,
    const char *flags,
    const char *pattern,
    const char *data,
    const char *expected, ...)
 {
    string name = getName() + "." + id;
    vector<const char *> expected_results;
    va_list ap;
    for (va_start(ap, expected); expected; expected = va_arg(ap, const char *))
        expected_results.push_back(expected);
    va_end(ap);
    try {
        addTest(new RegExTestCase(
            name, mode, id, flags, pattern, data, expected_results));
    }
    catch (Exception& e) {
        wxLogInfo(wxString::Format(_T("skipping: %s\n %s\n"),
            wxString(name.c_str(), wxConvUTF8).c_str(),
            wxString(e.what(), wxConvUTF8).c_str()));
    }
 }
 // Include the generated tests
 //
 #include "regex.inc"
 #endif // wxHAS_REGEX_ADVANCED
--- a/tests/regex/regex.inc
+++ b/tests/regex/regex.inc
--- a/tests/regex/regex.pl
+++ b/tests/regex/regex.pl
@@ -0,0 +1,437 @@
 #!/usr/bin/env perl -w
 #############################################################################
 # Name:        regex.pl
 # Purpose:     Generate test code for wxRegEx from 'reg.test'
 # Author:      Mike Wetherell
 # RCS-ID:      $Id$
 # Copyright:   (c) Mike Wetherell
 # Licence:     wxWidgets licence
 #############################################################################
 #
 # Notes:
 #   See './regex.pl -h' for usage
 #
 #   Output at the moment is C++ using the cppunit testing framework. The
 #   language/framework specifics are separated, with the following 5
 #   subs as an interface: 'begin_output', 'begin_section', 'write_test',
 #   'end_section' and 'end_output'. So for a different language/framework,
 #   implement 5 new similar subs.
 # 
 #   I've avoided using 'use encoding "UTF-8"', since this wasn't available
 #   in perl 5.6.x. Instead I've used some hacks like 'pack "U0C*"'. Versions
 #   earler than perl 5.6.0 aren't going to work.
 #
 use strict;
 use File::Basename;
 #use encoding "UTF-8";  # enable in the future when perl 5.6.x is just a memory
 # if 0 output is wide characters, if 1 output is utf8 encoded
 my $utf = 1;
 # quote a parameter (C++ helper)
 #
 sub quotecxx {
    my %esc = ( "\a" => "a", "\b" => "b", "\f" => "f",
                "\n" => "n", "\r" => "r", "\t" => "t",
                "\013" => "v", '"' => '"', "\\" => "\\" );
    # working around lack of 'use encoding'
    $_ = pack "U0C*", unpack "C*", $_;
    use utf8;
    s/[\000-\037"\\\177-\x{ffff}]/
        if ($esc{$&}) {
            "\\$esc{$&}";
        } elsif (ord($&) > 0x9f) {
            if ($utf) {
                $&;
            } else {
                sprintf "\\u%04x", ord($&);
            }
        } else {
            sprintf "\\%03o", ord($&);
        }
    /ge;
    # working around lack of 'use encoding'
    no utf8;
    $_ = pack "C*", unpack "C*", $_;
    return ($utf ? '"' : 'L"') . $_ . '"'
 }
 # start writing the output code (C++ interface)
 #
 sub begin_output {
    my ($from, $instructions) = @_;
    # embed it in the comment
    $from = "\n$from";
    $from =~ s/^(?:   )?/ * /mg;
    # $instructions contains information about the flags etc.
    if ($instructions) {
        $instructions = "\n$instructions";
        $instructions =~ s/^(?:   )?/ * /mg;
    }
    my $u = $utf ? " (UTF-8 encoded)" : "";
    print <<EOT;
 /*
 * Test data for wxRegEx$u
 $from$instructions */
 EOT
 }
 my @classes;
 # start a new section (C++ interface)
 #
 sub begin_section {
    my ($id, $title) = @_;
    my $class = "regextest_$id";
    $class =~ s/\W/_/g;
    push @classes, [$id, $class];
    print <<EOT;
 /*
 * $id $title
 */
 class $class : public RegExTestSuite
 {
 public:
    $class() : RegExTestSuite("regex.$id") { }
    static Test *suite();
 };
 Test *$class\::suite()
 {
    RegExTestSuite *suite = new $class;
 EOT
 }
 # output a test line (C++ interface)
 #
 sub write_test {
    my @args = @_;
    $_ = quotecxx for @args;
    print "    suite->add(" . (join ', ', @args) . ", NULL);\n"; 
 }
 # end a section (C++ interface)
 #
 sub end_section {
    my ($id, $class) = @{$classes[$#classes]};
    print <<EOT;
    return suite;
 }
 CPPUNIT_TEST_SUITE_NAMED_REGISTRATION($class, "regex.$id");
 EOT
 }
 # finish off the output (C++ interface)
 #
 sub end_output {
    print <<EOT;
 /*
 * A suite containing all the above suites
 */
 class regextest : public TestSuite
 {
 public:
    regextest() : TestSuite("regex") { }
    static Test *suite();
 };
 Test *regextest::suite()
 {
    TestSuite *suite = new regextest;
 EOT
    print "    suite->addTest(".$_->[1]."::suite());\n" for @classes;
    print <<EOT;
    return suite;
 }
 CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(regextest, "regex");
 CPPUNIT_TEST_SUITE_REGISTRATION(regextest);
 EOT
 }
 # Parse a tcl string. Handles curly quoting and double quoting.
 #
 sub parsetcl {
    my ($curly, $quote);
    # recursively defined expression that can parse balanced braces
    # warning: uses experimental features of perl, see perlop(1)
    $curly = qr/\{(?:(?>(?:\\[{}]|[^{}])+)|(??{$curly}))*\}/;
    $quote = qr/"(?:\\"|[^"])*"/;
    my @tokens = shift =~ /($curly|$quote|\S+)/g;
    # now remove braces/quotes and unescape any escapes
    for (@tokens) {
        if (s/^{(.*)}$/$1/) {
            # for curly quoting, only unescape \{ and \}
            s/\\([{}])/$1/g;
        } else {
            s/^"(.*)"$/$1/;
            # unescape any escapes
            my %esc = ( "a" => "\a", "b" => "\b", "f" => "\f",
                        "n" => "\n", "r" => "\r", "t" => "\t",
                        "v" => "\013" );
            my $x = qr/[[:xdigit:]]/;
            s/\\([0-7]{1,3}|x$x+|u$x{1,4}|.)/
                if ($1 =~ m{^([0-7]+)}) {
                    chr(oct($1));
                } elsif ($1 =~ m{^x($x+)}) {
                    pack("C0U", hex($1) & 0xff);
                } elsif ($1 =~ m{^u($x+)}) {
                    pack("C0U", hex($1));
                } elsif ($esc{$1}) {
                    $esc{$1};
                } else {
                    $1;
                }
            /ge;
        }
    }
    return @tokens;
 }
 # helpers which keep track of whether begin_section has been called, so that
 # end_section can be called when appropriate
 #
 my @doing = ("0", "");
 my $in_section = 0;
 sub handle_doing {
    end_section if $in_section;
    $in_section = 0;
    @doing = @_;
 }
 sub handle_test {
    begin_section(@doing) if !$in_section;
    $in_section = 1;
    write_test @_;
 }
 sub handle_end {
    end_section if $in_section;
    $in_section = 0;
    end_output;
 }
 # 'main' - start by parsing the command lines options.
 #
 my $badoption = !@ARGV;
 my $utfdefault = $utf;
 my $outputname;
 for (my $i = 0; $i < @ARGV; ) {
    if ($ARGV[$i] !~ m{^-.}) {
        $i++;
        next;
    }
    if ($ARGV[$i] eq '--') {
        splice @ARGV, $i, 1;
        last;
    }
    if ($ARGV[$i] =~ s{^-(.*)o(.*)$}{-$1}i) {       # -o : output file
        $outputname = $2 || splice @ARGV, $i + 1, 1;
    }
    for (split //, substr($ARGV[$i], 1)) {
        if (/u/i) {                                 # -u : utf-8 output
            $utf = 1;
        } elsif (/w/i) {                            # -w : wide char output
            $utf = 0;
        } else {
            $badoption = 1;
        }
    }
    splice @ARGV, $i, 1;
 }
 # Display help
 #
 if ($badoption) {
    my $prog = basename $0;
    my ($w, $u) = (" (default)", "          ");
    ($w, $u) = ($u, $w) if $utfdefault;
    print <<EOT;
 Usage: $prog [-u|-w] [-o OUTPUT] [FILE...]
 Generate test code for wxRegEx from 'reg.test'
 Example: $prog -o regex.inc reg.test wxreg.test 
 -w$w   Output will be wide characters.
 -u$u   Output will be UTF-8 encoded.
 Input files should be in UTF-8. If no input files are specified input is
 read from stdin. If no output file is specified output is written to stdout.
 See the comments in reg.test (in src/regex) for details of the input file
 format.
 EOT
    exit 0;
 }
 # Open the output file
 #
 open STDOUT, ">$outputname" if $outputname;
 # Read in the files and initially parse just the comments for copyright
 # information and instructions on the tests
 #
 my @input;                  # slurped input files stripped of comments
 my $files = "";             # copyright info from the input comments
 my $instructions = "";      # test instructions from the input comments
 do {
    my $inputname = basename $ARGV[0] if @ARGV;
    # slurp input
    undef $/;
    my $in = <>;
    # remove escaped newlines
    $in =~ s/(?<!\\)\\\n//g;
    # record the copyrights of the input files
    for ($in =~ /^#[\t ]*(.*copyright.*)$/mig) {
        s/[\s:]+/ /g;
        $files .= "  ";
        $files .= $inputname . ": " if $inputname && $inputname ne '-';
        $files .= "$_\n";
    }
    # Parse the comments for instructions on the tests, which look like this:
    #    i    successful match with -indices (used in checking things like
    #         nonparticipating subexpressions)
    if (!$instructions) {
        my $sp = qr{\t|   +};                   # tab or three or more spaces
        my @instructions = $in =~
            /\n(
                (?:
                    \#$sp\S?$sp\S[^\n]+\n       # instruction line
                    (?:\#$sp$sp\S[^\n]+\n)*     # continuation lines (if any)
                )+
            )/gx;
        if (@instructions) {
            $instructions[0] = "Test types:\n$instructions[0]";
            if (@instructions > 1) {
                $instructions[1] = "Flag characters:\n$instructions[1]";
            }
            $instructions = join "\n", @instructions;
            $instructions =~ s/^#([^\t]?)/ $1/mg;
        }
    }
    # @input is the input of all files (stipped of comments)
    $in =~ s/^#.*$//mg;
    push @input, $in;
 } while $ARGV[0];
 # Make a string naming the generator, the input files and copyright info
 #
 my $from = "Generated " . localtime() . " by " . basename $0;
 $from =~ s/[\s]+/ /g;
 if ($files) {
    if ($files =~ /:/) {
        $from .= " from the following files:";
    } else {
        $from .= " from work with the following copyright:";
    }
 }
 $from = join("\n", $from =~ /(.{0,76}(?:\s|$))/g);  # word-wrap
 $from .= "\n$files" if $files;
 # Now start to print the code
 #
 begin_output $from, $instructions;
 # numbers for 'extra' sections
 my $extra = 1;
 for (@input)
 {
    # Print the main tests
    #
    # Test lines look like this:
    # m  3  b       {\(a\)b}        ab      ab      a
    # 
    # Also looks for heading lines, e.g.:
    # doing 4 "parentheses"
    #
    for (split "\n") {
        if (/^doing\s+(\S+)\s+(\S.*)/) {
            handle_doing parsetcl "$1 $2";
        } elsif (/^[efimp]\s/) {
            handle_test parsetcl $_;
        }
    }
    # Extra tests
    #
    # The expression below matches something like this:
    #   test reg-33.8 {Bug 505048} {
    #       regexp -inline {\A\s*[^b]*b} ab
    #   } ab
    #   
    # The three subexpressions then return these parts: 
    #   $extras[$i]     = '{Bug 505048}',
    #   $extras[$i + 1] = '-inline {\A\s*[^b]*b} ab'
    #   $extras[$i + 2] = 'ab'
    #
    my @extras = /\ntest\s+\S+\s*(\{.*?\})\s*\{\n       # line 1
                  \s*regexp\s+([^\n]+)\n                # line 2
                  \}\s*(\S[^\n]*)/gx;                   # line 3
    handle_doing "extra_" . $extra++, "checks for bug fixes" if @extras;
    for (my $i = 0; $i < @extras; $i += 3) {
        my $id = $extras[$i];
        # further parse the middle line into options and the rest (i.e. $args)
        my ($opts, $args) = $extras[$i + 1] =~ /^\s*((?:-\S+\s+)*)([^\s-].*)/;
        my @args = parsetcl $args;
        $#args = 1;     # only want the first two
        # now handle the options
        my $test    = $opts =~ /-indices/ ? 'i' : $extras[$i + 2] ? 'm' : 'f';
        my $results = $opts =~ /-inline/ && $test ne 'f' ? $extras[$i+2] : '';
        # get them all in the right order and print
        unshift @args, $test, parsetcl($id), '-';
        push @args, parsetcl(parsetcl($results)) if $results;
        handle_test @args;
    }
 }
 # finish
 #
 handle_end;
--- a/tests/regex/wxreg.test
+++ b/tests/regex/wxreg.test
@@ -0,0 +1,71 @@
 #############################################################################
 # Name:        wxreg.test
 # Purpose:     Additional tests for the regex lib and wxRegEx
 # Author:      Mike Wetherell
 # RCS-ID:      $Id$
 # Copyright:   (c) 2004 Mike Wetherell.
 # Licence:     wxWidgets licence
 #############################################################################
 #
 # The layout of this file is the same as src/regex/reg.test. See the comments
 # in that file for full details. The encoding used in here is UTF-8.
 #
 # These tests test the character classifications over the ascii range pretty
 # thoroughly, since hopefully these will be similar for all platforms and
 # locales where wxWidgets runs.
 #
 # Also does some tests involving western european and cyrillic characters.
 # In Unicode mode, all these tests should succeed, which verifies that the
 # classifications aren't limited to a single 8-bit character set.
 #
 # In non-unicode mode, if the test can't be translated into the character
 # encoding of the current locale, the test will be skipped. So either may
 # succeed or be skipped.
 #
 doing wx_1 "character classification: ascii"
 m   1   &   {[^[:alnum:]]}  "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!" "!"
 m   2   &   {[[:alnum:]]}   "\a\b\t\n\v\f\r !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~X" "X"
 m   3   &   {[^[:alpha:]]}  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!" "!"
 m   4   &   {[[:alpha:]]}   "\a\b\t\n\v\f\r !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`{|}~X" "X"
 m   5   &   {[^[:cntrl:]]}  "\a\b\t\n\v\f\r!" "!"
 m   6   &   {[[:cntrl:]]}   " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n" "\n"
 m   7   &   {[^[:digit:]]}  "0123456789!" "!"
 m   8   &   {[[:digit:]]}   "\a\b\t\n\v\f\r !\"#$%&'()*+,-./:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ 0" "0"
 m   9   &   {[^[:graph:]]}  "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n" "\n"
 m   10  &   {[[:graph:]]}   "\a\b\t\n\v\f\r !" "!"
 m   11  &   {[^[:lower:]]}  "abcdefghijklmnopqrstuvwxyz!" "!"
 m   12  &   {[[:lower:]]}   "\a\b\t\n\v\f\r !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`{|}~x" "x"
 m   13  &   {[^[:print:]]}  "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\n" "\n"
 m   14  &   {[[:print:]]}   "\a\b\n\v\f\rX" "X"
 m   15  &   {[^[:punct:]]}  "!\"#%&'()*,-./:;?@[\\]_{}X" "X"
 m   16  &   {[[:punct:]]}   "\a\b\t\n\v\f\r 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!" "!"
 m   17  &   {[^[:space:]]}  "\t\n\v\f\r X" "X"
 m   18  &   {[[:space:]]}   "\a\b!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n" "\n"
 m   19  &   {[^[:upper:]]}  "ABCDEFGHIJKLMNOPQRSTUVWXYZ!" "!"
 m   20  &   {[[:upper:]]}   "\a\b\t\n\v\f\r !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~X" "X"
 m   21  &   {[^[:xdigit:]]} "0123456789ABCDEFabcdef!" "!"
 m   22  &   {[[:xdigit:]]}  "\a\b\t\n\v\f\r !\"#$%&'()*+,-./:;<=>?@GHIJKLMNOPQRSTUVWXYZ[\\]^_`ghijklmnopqrstuvwxyz{|}~a" "a"
 i   23  &i  "AbCdEfGhIjKlMnOpQrStUvWxYz" "aBcDeFgHiJkLmNoPqRsTuVwXyZ" "0 25"
 doing wx_2 "character classification: western european"
 m   1   &   {[^[:alpha:]]}  "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ!" "!"
 m   2   &   {[[:alpha:]]}   " ¡¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾¿×÷X" "X"
 m   3   &   {[^[:lower:]]}  "ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ!" "!"
 m   4   &   {[[:lower:]]}   " ¡¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞ÷x" "x"
 m   5   &   {[^[:upper:]]}  "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ!" "!"
 m   6   &   {[[:upper:]]}   " ¡¢£¤¥¦§¨©«¬®¯°±²³´¶·¸¹»¼½¾¿×ßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿX" "X"
 i   7   &i* "ÀáÂãÄåÆçÈéÊëÌíÎïÐñÒóÔõÖøÙúÛüÝþ" "àÁâÃäÅæÇèÉêËìÍîÏðÑòÓôÕöØùÚûÜýÞ" "0 29"
 doing wx_3 "character classification: cyrillic"
 m   1   &   {[^[:alpha:]]}  "ёЁюабцдефгхийклмнопярстужвьызшэщчъЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩЧЪ!" "!"
 m   2   &   {[^[:lower:]]}  "ёюабцдефгхийклмнопярстужвьызшэщчъ!" "!"
 m   3   &   {[[:lower:]]}   "ЁЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩЧЪx" "x"
 m   4   &   {[^[:upper:]]}  "ЁЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩЧЪ!" "!"
 m   5   &   {[[:upper:]]}   "ёюабцдефгхийклмнопярстужвьызшэщчъX" "X"
 i   6   &i* "ЁюАбЦдЕфГхИйКлМнОпЯрСтУжВьЫзШэЩчЪ" "ёЮаБцДеФгХиЙкЛмНоПяРсТуЖвЬыЗшЭщЧъ" "0 32"
 #doing bugs "known bugs"
 #m  1    -   {(\w+).*?(\d\d:\d\d)} "from 10:30 until 12:00" "from" "10:30"
--- a/tests/test.bkl
+++ b/tests/test.bkl
@@ -10,6 +10,7 @@
        <sources>
            test.cpp
            mbconv/main.cpp
            regex/regex.cpp
        </sources>
        <wx-lib>base</wx-lib>
    </exe>
--- a/tests/test.dsp
+++ b/tests/test.dsp
@@ -439,6 +439,10 @@ SOURCE=.\mbconv\main.cpp
 # End Source File
 # Begin Source File
 SOURCE=.\regex\regex.cpp
 # End Source File
 # Begin Source File
 SOURCE=.\test.cpp
 # End Source File
 # End Group