Import regex from tcl 8.4.5

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/branches/RXSPENCER@3951 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Ryan Norton
1999-10-13 02:22:18 +00:00
parent 9bd536df18
commit a6c3a78d25
4 changed files with 1303 additions and 1177 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -1,75 +1,18 @@
/* { REG_OKAY, "REG_OKAY", "no errors detected" },
* $Id$ { REG_NOMATCH, "REG_NOMATCH", "failed to match" },
*/ { REG_BADPAT, "REG_BADPAT", "invalid regexp (reg version 0.8)" },
{ REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" },
{ { REG_ECTYPE, "REG_ECTYPE", "invalid character class" },
REG_OKAY, "REG_OKAY", "no errors detected" { REG_EESCAPE, "REG_EESCAPE", "invalid escape \\ sequence" },
}, { REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" },
{ REG_EBRACK, "REG_EBRACK", "brackets [] not balanced" },
{ { REG_EPAREN, "REG_EPAREN", "parentheses () not balanced" },
REG_NOMATCH, "REG_NOMATCH", "failed to match" { REG_EBRACE, "REG_EBRACE", "braces {} not balanced" },
}, { REG_BADBR, "REG_BADBR", "invalid repetition count(s)" },
{ REG_ERANGE, "REG_ERANGE", "invalid character range" },
{ { REG_ESPACE, "REG_ESPACE", "out of memory" },
REG_BADPAT, "REG_BADPAT", "invalid regexp (reg version 0.8)" { REG_BADRPT, "REG_BADRPT", "quantifier operand invalid" },
}, { REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" },
{ REG_INVARG, "REG_INVARG", "invalid argument to regex function" },
{ { REG_MIXED, "REG_MIXED", "character widths of regex and string differ" },
REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" { REG_BADOPT, "REG_BADOPT", "invalid embedded option" },
},
{
REG_ECTYPE, "REG_ECTYPE", "invalid character class"
},
{
REG_EESCAPE, "REG_EESCAPE", "invalid escape \\ sequence"
},
{
REG_ESUBREG, "REG_ESUBREG", "invalid backreference number"
},
{
REG_EBRACK, "REG_EBRACK", "brackets [] not balanced"
},
{
REG_EPAREN, "REG_EPAREN", "parentheses () not balanced"
},
{
REG_EBRACE, "REG_EBRACE", "braces {} not balanced"
},
{
REG_BADBR, "REG_BADBR", "invalid repetition count(s)"
},
{
REG_ERANGE, "REG_ERANGE", "invalid character range"
},
{
REG_ESPACE, "REG_ESPACE", "out of memory"
},
{
REG_BADRPT, "REG_BADRPT", "quantifier operand invalid"
},
{
REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug"
},
{
REG_INVARG, "REG_INVARG", "invalid argument to regex function"
},
{
REG_MIXED, "REG_MIXED", "character widths of regex and string differ"
},
{
REG_BADOPT, "REG_BADOPT", "invalid embedded option"
},

View File

@@ -1,74 +1,341 @@
#ifndef _REGEX_H_ #ifndef _REGEX_H_
#define _REGEX_H_ /* never again */ #define _REGEX_H_ /* never again */
/* ========= begin header generated by ./mkh ========= */ /*
* regular expressions
*
* Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
*
* Development of this software was funded, in part, by Cray Research Inc.,
* UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
* Corporation, none of whom are responsible for the results. The author
* thanks all of them.
*
* Redistribution and use in source and binary forms -- with or without
* modification -- are permitted for any purpose, provided that
* redistributions in source form retain this entire copyright notice and
* indicate the origin and nature of any modifications.
*
* I'd appreciate being given credit for this package in the documentation
* of software which uses it, but that is not a requirement.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*
*
* Prototypes etc. marked with "^" within comments get gathered up (and
* possibly edited) by the regfwd program and inserted near the bottom of
* this file.
*
* We offer the option of declaring one wide-character version of the
* RE functions as well as the char versions. To do that, define
* __REG_WIDE_T to the type of wide characters (unfortunately, there
* is no consensus that wchar_t is suitable) and __REG_WIDE_COMPILE and
* __REG_WIDE_EXEC to the names to be used for the compile and execute
* functions (suggestion: re_Xcomp and re_Xexec, where X is a letter
* suggestive of the wide type, e.g. re_ucomp and re_uexec for Unicode).
* For cranky old compilers, it may be necessary to do something like:
* #define __REG_WIDE_COMPILE(a,b,c,d) re_Xcomp(a,b,c,d)
* #define __REG_WIDE_EXEC(a,b,c,d,e,f,g) re_Xexec(a,b,c,d,e,f,g)
* rather than just #defining the names as parameterless macros.
*
* For some specialized purposes, it may be desirable to suppress the
* declarations of the "front end" functions, regcomp() and regexec(),
* or of the char versions of the compile and execute functions. To
* suppress the front-end functions, define __REG_NOFRONT. To suppress
* the char versions, define __REG_NOCHAR.
*
* The right place to do those defines (and some others you may want, see
* below) would be <sys/types.h>. If you don't have control of that file,
* the right place to add your own defines to this file is marked below.
* This is normally done automatically, by the makefile and regmkhdr, based
* on the contents of regcustom.h.
*/
/*
* voodoo for C++
*/
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
/* === regex2.h === */
typedef off_t regoff_t;
/*
* Add your own defines, if needed, here.
*/
/*
* Location where a chunk of regcustom.h is automatically spliced into
* this file (working from its prototype, regproto.h).
*/
/* --- begin --- */
/* ensure certain things don't sneak in from system headers */
#ifdef __REG_WIDE_T
#undef __REG_WIDE_T
#endif
#ifdef __REG_WIDE_COMPILE
#undef __REG_WIDE_COMPILE
#endif
#ifdef __REG_WIDE_EXEC
#undef __REG_WIDE_EXEC
#endif
#ifdef __REG_REGOFF_T
#undef __REG_REGOFF_T
#endif
#ifdef __REG_VOID_T
#undef __REG_VOID_T
#endif
#ifdef __REG_CONST
#undef __REG_CONST
#endif
#ifdef __REG_NOFRONT
#undef __REG_NOFRONT
#endif
#ifdef __REG_NOCHAR
#undef __REG_NOCHAR
#endif
/* interface types */
#define __REG_WIDE_T Tcl_UniChar
#define __REG_REGOFF_T long /* not really right, but good enough... */
#define __REG_VOID_T VOID
#define __REG_CONST CONST
/* names and declarations */
#define __REG_WIDE_COMPILE TclReComp
#define __REG_WIDE_EXEC TclReExec
#define __REG_NOFRONT /* don't want regcomp() and regexec() */
#define __REG_NOCHAR /* or the char versions */
#define regfree TclReFree
#define regerror TclReError
/* --- end --- */
/*
* interface types etc.
*/
/*
* regoff_t has to be large enough to hold either off_t or ssize_t,
* and must be signed; it's only a guess that long is suitable, so we
* offer <sys/types.h> an override.
*/
#ifdef __REG_REGOFF_T
typedef __REG_REGOFF_T regoff_t;
#else
typedef long regoff_t;
#endif
/*
* For benefit of old compilers, we offer <sys/types.h> the option of
* overriding the `void' type used to declare nonexistent return types.
*/
#ifdef __REG_VOID_T
typedef __REG_VOID_T re_void;
#else
typedef void re_void;
#endif
/*
* Also for benefit of old compilers, <sys/types.h> can supply a macro
* which expands to a substitute for `const'.
*/
#ifndef __REG_CONST
#define __REG_CONST const
#endif
/*
* other interface types
*/
/* the biggie, a compiled RE (or rather, a front end to same) */
typedef struct { typedef struct {
int re_magic; int re_magic; /* magic number */
size_t re_nsub; /* number of parenthesized subexpressions */ size_t re_nsub; /* number of subexpressions */
const char *re_endp; /* end pointer for REG_PEND */ long re_info; /* information about RE */
struct re_guts *re_g; /* none of your business :-) */ # define REG_UBACKREF 000001
# define REG_ULOOKAHEAD 000002
# define REG_UBOUNDS 000004
# define REG_UBRACES 000010
# define REG_UBSALNUM 000020
# define REG_UPBOTCH 000040
# define REG_UBBS 000100
# define REG_UNONPOSIX 000200
# define REG_UUNSPEC 000400
# define REG_UUNPORT 001000
# define REG_ULOCALE 002000
# define REG_UEMPTYMATCH 004000
# define REG_UIMPOSSIBLE 010000
# define REG_USHORTEST 020000
int re_csize; /* sizeof(character) */
char *re_endp; /* backward compatibility kludge */
/* the rest is opaque pointers to hidden innards */
char *re_guts; /* `char *' is more portable than `void *' */
char *re_fns;
} regex_t; } regex_t;
/* result reporting (may acquire more fields later) */
typedef struct { typedef struct {
regoff_t rm_so; /* start of match */ regoff_t rm_so; /* start of substring */
regoff_t rm_eo; /* end of match */ regoff_t rm_eo; /* end of substring */
} regmatch_t; } regmatch_t;
/* supplementary control and reporting */
/* === regcomp.c === */ typedef struct {
extern int regcomp(regex_t *, const char *, int); regmatch_t rm_extend; /* see REG_EXPECT */
#define REG_BASIC 0000 } rm_detail_t;
#define REG_EXTENDED 0001
#define REG_ICASE 0002
#define REG_NOSUB 0004
#define REG_NEWLINE 0010
#define REG_NOSPEC 0020
#define REG_PEND 0040
#define REG_DUMP 0200
/* === regerror.c === */
#define REG_OKAY 0 /*
#define REG_NOMATCH 1 * compilation
#define REG_BADPAT 2 ^ #ifndef __REG_NOCHAR
#define REG_ECOLLATE 3 ^ int re_comp(regex_t *, __REG_CONST char *, size_t, int);
#define REG_ECTYPE 4 ^ #endif
#define REG_EESCAPE 5 ^ #ifndef __REG_NOFRONT
#define REG_ESUBREG 6 ^ int regcomp(regex_t *, __REG_CONST char *, int);
#define REG_EBRACK 7 ^ #endif
#define REG_EPAREN 8 ^ #ifdef __REG_WIDE_T
#define REG_EBRACE 9 ^ int __REG_WIDE_COMPILE(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int);
#define REG_BADBR 10 ^ #endif
#define REG_ERANGE 11 */
#define REG_ESPACE 12 #define REG_BASIC 000000 /* BREs (convenience) */
#define REG_BADRPT 13 #define REG_EXTENDED 000001 /* EREs */
#define REG_EMPTY 14 #define REG_ADVF 000002 /* advanced features in EREs */
#define REG_ASSERT 15 #define REG_ADVANCED 000003 /* AREs (which are also EREs) */
#define REG_INVARG 16 #define REG_QUOTE 000004 /* no special characters, none */
#define REG_ATOI 255 /* convert name to number (!) */ #define REG_NOSPEC REG_QUOTE /* historical synonym */
#define REG_ITOA 0400 /* convert number to name (!) */ #define REG_ICASE 000010 /* ignore case */
extern size_t regerror(int, const regex_t *, char *, size_t); #define REG_NOSUB 000020 /* don't care about subexpressions */
#define REG_EXPANDED 000040 /* expanded format, white space & comments */
#define REG_NLSTOP 000100 /* \n doesn't match . or [^ ] */
#define REG_NLANCH 000200 /* ^ matches after \n, $ before */
#define REG_NEWLINE 000300 /* newlines are line terminators */
#define REG_PEND 000400 /* ugh -- backward-compatibility hack */
#define REG_EXPECT 001000 /* report details on partial/limited matches */
#define REG_BOSONLY 002000 /* temporary kludge for BOS-only matches */
#define REG_DUMP 004000 /* none of your business :-) */
#define REG_FAKE 010000 /* none of your business :-) */
#define REG_PROGRESS 020000 /* none of your business :-) */
/* === regexec.c === */
extern int regexec(const regex_t *, const char *, size_t, regmatch_t [], int); /*
#define REG_NOTBOL 00001 * execution
#define REG_NOTEOL 00002 ^ #ifndef __REG_NOCHAR
#define REG_STARTEND 00004 ^ int re_exec(regex_t *, __REG_CONST char *, size_t,
#define REG_TRACE 00400 /* tracing of execution */ ^ rm_detail_t *, size_t, regmatch_t [], int);
#define REG_LARGE 01000 /* force large representation */ ^ #endif
#define REG_BACKR 02000 /* force use of backref code */ ^ #ifndef __REG_NOFRONT
^ int regexec(regex_t *, __REG_CONST char *, size_t, regmatch_t [], int);
^ #endif
^ #ifdef __REG_WIDE_T
^ int __REG_WIDE_EXEC(regex_t *, __REG_CONST __REG_WIDE_T *, size_t,
^ rm_detail_t *, size_t, regmatch_t [], int);
^ #endif
*/
#define REG_NOTBOL 0001 /* BOS is not BOL */
#define REG_NOTEOL 0002 /* EOS is not EOL */
#define REG_STARTEND 0004 /* backward compatibility kludge */
#define REG_FTRACE 0010 /* none of your business */
#define REG_MTRACE 0020 /* none of your business */
#define REG_SMALL 0040 /* none of your business */
/* === regfree.c === */
extern void regfree(regex_t *);
/*
* misc generics (may be more functions here eventually)
^ re_void regfree(regex_t *);
*/
/*
* error reporting
* Be careful if modifying the list of error codes -- the table used by
* regerror() is generated automatically from this file!
*
* Note that there is no wide-char variant of regerror at this time; what
* kind of character is used for error reports is independent of what kind
* is used in matching.
*
^ extern size_t regerror(int, __REG_CONST regex_t *, char *, size_t);
*/
#define REG_OKAY 0 /* no errors detected */
#define REG_NOMATCH 1 /* failed to match */
#define REG_BADPAT 2 /* invalid regexp */
#define REG_ECOLLATE 3 /* invalid collating element */
#define REG_ECTYPE 4 /* invalid character class */
#define REG_EESCAPE 5 /* invalid escape \ sequence */
#define REG_ESUBREG 6 /* invalid backreference number */
#define REG_EBRACK 7 /* brackets [] not balanced */
#define REG_EPAREN 8 /* parentheses () not balanced */
#define REG_EBRACE 9 /* braces {} not balanced */
#define REG_BADBR 10 /* invalid repetition count(s) */
#define REG_ERANGE 11 /* invalid character range */
#define REG_ESPACE 12 /* out of memory */
#define REG_BADRPT 13 /* quantifier operand invalid */
#define REG_ASSERT 15 /* "can't happen" -- you found a bug */
#define REG_INVARG 16 /* invalid argument to regex function */
#define REG_MIXED 17 /* character widths of regex and string differ */
#define REG_BADOPT 18 /* invalid embedded option */
/* two specials for debugging and testing */
#define REG_ATOI 101 /* convert error-code name to number */
#define REG_ITOA 102 /* convert error-code number to name */
/*
* the prototypes, as possibly munched by regfwd
*/
/* =====^!^===== begin forwards =====^!^===== */
/* automatically gathered by fwd; do not hand-edit */
/* === regproto.h === */
#ifndef __REG_NOCHAR
int re_comp _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, int));
#endif
#ifndef __REG_NOFRONT
int regcomp _ANSI_ARGS_((regex_t *, __REG_CONST char *, int));
#endif
#ifdef __REG_WIDE_T
int __REG_WIDE_COMPILE _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int));
#endif
#ifndef __REG_NOCHAR
int re_exec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, rm_detail_t *, size_t, regmatch_t [], int));
#endif
#ifndef __REG_NOFRONT
int regexec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, regmatch_t [], int));
#endif
#ifdef __REG_WIDE_T
int __REG_WIDE_EXEC _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, rm_detail_t *, size_t, regmatch_t [], int));
#endif
re_void regfree _ANSI_ARGS_((regex_t *));
extern size_t regerror _ANSI_ARGS_((int, __REG_CONST regex_t *, char *, size_t));
/* automatically gathered by fwd; do not hand-edit */
/* =====^!^===== end forwards =====^!^===== */
/*
* more C++ voodoo
*/
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
/* ========= end header generated by ./mkh ========= */
#endif #endif

View File

@@ -1,21 +1,21 @@
/* /*
* Internal interface definitions, etc., for the reg package * Internal interface definitions, etc., for the reg package
* *
* Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
* *
* Development of this software was funded, in part, by Cray Research Inc., * Development of this software was funded, in part, by Cray Research Inc.,
* UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
* Corporation, none of whom are responsible for the results. The author * Corporation, none of whom are responsible for the results. The author
* thanks all of them. * thanks all of them.
* *
* Redistribution and use in source and binary forms -- with or without * Redistribution and use in source and binary forms -- with or without
* modification -- are permitted for any purpose, provided that * modification -- are permitted for any purpose, provided that
* redistributions in source form retain this entire copyright notice and * redistributions in source form retain this entire copyright notice and
* indicate the origin and nature of any modifications. * indicate the origin and nature of any modifications.
* *
* I'd appreciate being given credit for this package in the documentation * I'd appreciate being given credit for this package in the documentation
* of software which uses it, but that is not a requirement. * of software which uses it, but that is not a requirement.
* *
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
@@ -26,8 +26,6 @@
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $Id$
*/ */
@@ -45,38 +43,63 @@
* Things that regcustom.h might override. * Things that regcustom.h might override.
*/ */
/* standard header files (NULL is a reasonable indicator for them) */
#ifndef NULL
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <limits.h>
#include <string.h>
#endif
/* assertions */ /* assertions */
#ifndef assert #ifndef assert
#ifndef REG_DEBUG # ifndef REG_DEBUG
# ifndef NDEBUG # define NDEBUG /* no assertions */
# define NDEBUG /* no assertions */
# endif # endif
#endif
#include <assert.h> #include <assert.h>
#endif #endif
/* voids */ /* voids */
#ifndef VOID
#define VOID void /* for function return values */
#endif
#ifndef DISCARD #ifndef DISCARD
#define DISCARD void /* for throwing values away */ #define DISCARD VOID /* for throwing values away */
#endif
#ifndef PVOID
#define PVOID VOID * /* generic pointer */
#endif #endif
#ifndef VS #ifndef VS
#define VS(x) ((void *)(x)) /* cast something to generic ptr */ #define VS(x) ((PVOID)(x)) /* cast something to generic ptr */
#endif
#ifndef NOPARMS
#define NOPARMS VOID /* for empty parm lists */
#endif
/* const */
#ifndef CONST
#define CONST const /* for old compilers, might be empty */
#endif #endif
/* function-pointer declarator */ /* function-pointer declarator */
#ifndef FUNCPTR #ifndef FUNCPTR
#define FUNCPTR(name, args) (*name) args #if __STDC__ >= 1
#define FUNCPTR(name, args) (*name)args
#else
#define FUNCPTR(name, args) (*name)()
#endif
#endif #endif
/* memory allocation */ /* memory allocation */
#ifndef MALLOC #ifndef MALLOC
#define MALLOC(n) malloc(n) #define MALLOC(n) malloc(n)
#endif #endif
#ifndef REALLOC #ifndef REALLOC
#define REALLOC(p, n) realloc(VS(p), n) #define REALLOC(p, n) realloc(VS(p), n)
#endif #endif
#ifndef FREE #ifndef FREE
#define FREE(p) free(VS(p)) #define FREE(p) free(VS(p))
#endif #endif
/* want size of a char in bits, and max value in bounded quantifiers */ /* want size of a char in bits, and max value in bounded quantifiers */
@@ -84,7 +107,7 @@
#include <limits.h> #include <limits.h>
#endif #endif
#ifndef _POSIX2_RE_DUP_MAX #ifndef _POSIX2_RE_DUP_MAX
#define _POSIX2_RE_DUP_MAX 255 /* normally from <limits.h> */ #define _POSIX2_RE_DUP_MAX 255 /* normally from <limits.h> */
#endif #endif
@@ -93,13 +116,13 @@
* misc * misc
*/ */
#define NOTREACHED 0 #define NOTREACHED 0
#define xxx 1 #define xxx 1
#define DUPMAX _POSIX2_RE_DUP_MAX #define DUPMAX _POSIX2_RE_DUP_MAX
#define INFINITY (DUPMAX+1) #define INFINITY (DUPMAX+1)
#define REMAGIC 0xfed7 /* magic number for main struct */ #define REMAGIC 0xfed7 /* magic number for main struct */
@@ -108,12 +131,12 @@
*/ */
#ifdef REG_DEBUG #ifdef REG_DEBUG
/* FDEBUG does finite-state tracing */ /* FDEBUG does finite-state tracing */
#define FDEBUG(arglist) { if (v->eflags&REG_FTRACE) printf arglist; } #define FDEBUG(arglist) { if (v->eflags&REG_FTRACE) printf arglist; }
/* MDEBUG does higher-level tracing */ /* MDEBUG does higher-level tracing */
#define MDEBUG(arglist) { if (v->eflags&REG_MTRACE) printf arglist; } #define MDEBUG(arglist) { if (v->eflags&REG_MTRACE) printf arglist; }
#else #else
#define FDEBUG(arglist) {} #define FDEBUG(arglist) {}
#define MDEBUG(arglist) {} #define MDEBUG(arglist) {}
#endif #endif
@@ -121,25 +144,24 @@
/* /*
* bitmap manipulation * bitmap manipulation
*/ */
#define UBITS (CHAR_BIT * sizeof(unsigned)) #define UBITS (CHAR_BIT * sizeof(unsigned))
#define BSET(uv, sn) ((uv)[(sn)/UBITS] |= (unsigned)1 << ((sn)%UBITS)) #define BSET(uv, sn) ((uv)[(sn)/UBITS] |= (unsigned)1 << ((sn)%UBITS))
#define ISBSET(uv, sn) ((uv)[(sn)/UBITS] & ((unsigned)1 << ((sn)%UBITS))) #define ISBSET(uv, sn) ((uv)[(sn)/UBITS] & ((unsigned)1 << ((sn)%UBITS)))
/* /*
* We dissect a chr into byts for colormap table indexing. Here we define * We dissect a chr into byts for colormap table indexing. Here we define
* a byt, which will be the same as a byte on most machines... The exact * a byt, which will be the same as a byte on most machines... The exact
* size of a byt is not critical, but about 8 bits is good, and extraction * size of a byt is not critical, but about 8 bits is good, and extraction
* of 8-bit chunks is sometimes especially fast. * of 8-bit chunks is sometimes especially fast.
*/ */
#ifndef BYTBITS #ifndef BYTBITS
#define BYTBITS 8 /* bits in a byt */ #define BYTBITS 8 /* bits in a byt */
#endif #endif
#define BYTTAB (1<<BYTBITS) /* size of table with one entry per byt #define BYTTAB (1<<BYTBITS) /* size of table with one entry per byt value */
* value */ #define BYTMASK (BYTTAB-1) /* bit mask for byt */
#define BYTMASK (BYTTAB-1) /* bit mask for byt */ #define NBYTS ((CHRBITS+BYTBITS-1)/BYTBITS)
#define NBYTS ((CHRBITS+BYTBITS-1)/BYTBITS)
/* the definition of GETCOLOR(), below, assumes NBYTS <= 4 */ /* the definition of GETCOLOR(), below, assumes NBYTS <= 4 */
@@ -148,86 +170,79 @@
* As soon as possible, we map chrs into equivalence classes -- "colors" -- * As soon as possible, we map chrs into equivalence classes -- "colors" --
* which are of much more manageable number. * which are of much more manageable number.
*/ */
typedef short color; /* colors of characters */ typedef short color; /* colors of characters */
typedef int pcolor; /* what color promotes to */ typedef int pcolor; /* what color promotes to */
#define COLORLESS (-1) /* impossible color */
#define COLORLESS (-1) /* impossible color */ #define WHITE 0 /* default color, parent of all others */
#define WHITE 0 /* default color, parent of all others */
/* /*
* A colormap is a tree -- more precisely, a DAG -- indexed at each level * A colormap is a tree -- more precisely, a DAG -- indexed at each level
* by a byt of the chr, to map the chr to a color efficiently. Because * by a byt of the chr, to map the chr to a color efficiently. Because
* lower sections of the tree can be shared, it can exploit the usual * lower sections of the tree can be shared, it can exploit the usual
* sparseness of such a mapping table. The tree is always NBYTS levels * sparseness of such a mapping table. The tree is always NBYTS levels
* deep (in the past it was shallower during construction but was "filled" * deep (in the past it was shallower during construction but was "filled"
* to full depth at the end of that); areas that are unaltered as yet point * to full depth at the end of that); areas that are unaltered as yet point
* to "fill blocks" which are entirely WHITE in color. * to "fill blocks" which are entirely WHITE in color.
*/ */
/* the tree itself */ /* the tree itself */
struct colors struct colors {
{ color ccolor[BYTTAB];
color ccolor[BYTTAB];
}; };
struct ptrs struct ptrs {
{
union tree *pptr[BYTTAB]; union tree *pptr[BYTTAB];
}; };
union tree union tree {
{
struct colors colors; struct colors colors;
struct ptrs ptrs; struct ptrs ptrs;
}; };
#define tcolor colors.ccolor
#define tcolor colors.ccolor #define tptr ptrs.pptr
#define tptr ptrs.pptr
/* internal per-color structure for the color machinery */ /* internal per-color structure for the color machinery */
struct colordesc struct colordesc {
{ uchr nchrs; /* number of chars of this color */
uchr nchrs; /* number of chars of this color */ color sub; /* open subcolor (if any); free chain ptr */
color sub; /* open subcolor (if any); free chain ptr */ # define NOSUB COLORLESS
#define NOSUB COLORLESS struct arc *arcs; /* color chain */
struct arc *arcs; /* color chain */ int flags;
int flags; # define FREECOL 01 /* currently free */
#define FREECOL 01 /* currently free */ # define PSEUDO 02 /* pseudocolor, no real chars */
#define PSEUDO 02 /* pseudocolor, no real chars */ # define UNUSEDCOLOR(cd) ((cd)->flags&FREECOL)
#define UNUSEDCOLOR(cd) ((cd)->flags&FREECOL) union tree *block; /* block of solid color, if any */
union tree *block; /* block of solid color, if any */
}; };
/* the color map itself */ /* the color map itself */
struct colormap struct colormap {
{ int magic;
int magic; # define CMMAGIC 0x876
#define CMMAGIC 0x876 struct vars *v; /* for compile error reporting */
struct vars *v; /* for compile error reporting */ size_t ncds; /* number of colordescs */
size_t ncds; /* number of colordescs */ size_t max; /* highest in use */
size_t max; /* highest in use */ color free; /* beginning of free chain (if non-0) */
color free; /* beginning of free chain (if non-0) */
struct colordesc *cd; struct colordesc *cd;
#define CDEND(cm) (&(cm)->cd[(cm)->max + 1]) # define CDEND(cm) (&(cm)->cd[(cm)->max + 1])
#define NINLINECDS ((size_t)10) # define NINLINECDS ((size_t)10)
struct colordesc cdspace[NINLINECDS]; struct colordesc cdspace[NINLINECDS];
union tree tree[NBYTS]; /* tree top, plus fill blocks */ union tree tree[NBYTS]; /* tree top, plus fill blocks */
}; };
/* optimization magic to do fast chr->color mapping */ /* optimization magic to do fast chr->color mapping */
#define B0(c) ((c) & BYTMASK) #define B0(c) ((c) & BYTMASK)
#define B1(c) (((c)>>BYTBITS) & BYTMASK) #define B1(c) (((c)>>BYTBITS) & BYTMASK)
#define B2(c) (((c)>>(2*BYTBITS)) & BYTMASK) #define B2(c) (((c)>>(2*BYTBITS)) & BYTMASK)
#define B3(c) (((c)>>(3*BYTBITS)) & BYTMASK) #define B3(c) (((c)>>(3*BYTBITS)) & BYTMASK)
#if NBYTS == 1 #if NBYTS == 1
#define GETCOLOR(cm, c) ((cm)->tree->tcolor[B0(c)]) #define GETCOLOR(cm, c) ((cm)->tree->tcolor[B0(c)])
#endif #endif
/* beware, for NBYTS>1, GETCOLOR() is unsafe -- 2nd arg used repeatedly */ /* beware, for NBYTS>1, GETCOLOR() is unsafe -- 2nd arg used repeatedly */
#if NBYTS == 2 #if NBYTS == 2
#define GETCOLOR(cm, c) ((cm)->tree->tptr[B1(c)]->tcolor[B0(c)]) #define GETCOLOR(cm, c) ((cm)->tree->tptr[B1(c)]->tcolor[B0(c)])
#endif #endif
#if NBYTS == 4 #if NBYTS == 4
#define GETCOLOR(cm, c) ((cm)->tree->tptr[B3(c)]->tptr[B2(c)]->tptr[B1(c)]->tcolor[B0(c)]) #define GETCOLOR(cm, c) ((cm)->tree->tptr[B3(c)]->tptr[B2(c)]->tptr[B1(c)]->tcolor[B0(c)])
#endif #endif
@@ -236,23 +251,22 @@ struct colormap
* Interface definitions for locale-interface functions in locale.c. * Interface definitions for locale-interface functions in locale.c.
* Multi-character collating elements (MCCEs) cause most of the trouble. * Multi-character collating elements (MCCEs) cause most of the trouble.
*/ */
struct cvec struct cvec {
{ int nchrs; /* number of chrs */
int nchrs; /* number of chrs */ int chrspace; /* number of chrs possible */
int chrspace; /* number of chrs possible */ chr *chrs; /* pointer to vector of chrs */
chr *chrs; /* pointer to vector of chrs */ int nranges; /* number of ranges (chr pairs) */
int nranges; /* number of ranges (chr pairs) */ int rangespace; /* number of chrs possible */
int rangespace; /* number of chrs possible */ chr *ranges; /* pointer to vector of chr pairs */
chr *ranges; /* pointer to vector of chr pairs */ int nmcces; /* number of MCCEs */
int nmcces; /* number of MCCEs */ int mccespace; /* number of MCCEs possible */
int mccespace; /* number of MCCEs possible */ int nmccechrs; /* number of chrs used for MCCEs */
int nmccechrs; /* number of chrs used for MCCEs */ chr *mcces[1]; /* pointers to 0-terminated MCCEs */
chr *mcces[1]; /* pointers to 0-terminated MCCEs */ /* and both batches of chrs are on the end */
/* and both batches of chrs are on the end */
}; };
/* caution: this value cannot be changed easily */ /* caution: this value cannot be changed easily */
#define MAXMCCE 2 /* length of longest MCCE */ #define MAXMCCE 2 /* length of longest MCCE */
@@ -264,59 +278,54 @@ struct cvec
*/ */
struct state; struct state;
struct arc struct arc {
{ int type;
int type; # define ARCFREE '\0'
#define ARCFREE '\0' color co;
color co; struct state *from; /* where it's from (and contained within) */
struct state *from; /* where it's from (and contained within) */ struct state *to; /* where it's to */
struct state *to; /* where it's to */ struct arc *outchain; /* *from's outs chain or free chain */
struct arc *outchain; /* *from's outs chain or free chain */ # define freechain outchain
#define freechain outchain struct arc *inchain; /* *to's ins chain */
struct arc *inchain; /* *to's ins chain */ struct arc *colorchain; /* color's arc chain */
struct arc *colorchain; /* color's arc chain */
}; };
struct arcbatch struct arcbatch { /* for bulk allocation of arcs */
{ /* for bulk allocation of arcs */
struct arcbatch *next; struct arcbatch *next;
#define ABSIZE 10 # define ABSIZE 10
struct arc a[ABSIZE]; struct arc a[ABSIZE];
}; };
struct state struct state {
{ int no;
int no; # define FREESTATE (-1)
#define FREESTATE (-1) char flag; /* marks special states */
char flag; /* marks special states */ int nins; /* number of inarcs */
int nins; /* number of inarcs */ struct arc *ins; /* chain of inarcs */
struct arc *ins; /* chain of inarcs */ int nouts; /* number of outarcs */
int nouts; /* number of outarcs */ struct arc *outs; /* chain of outarcs */
struct arc *outs; /* chain of outarcs */ struct arc *free; /* chain of free arcs */
struct arc *free; /* chain of free arcs */ struct state *tmp; /* temporary for traversal algorithms */
struct state *tmp; /* temporary for traversal algorithms */ struct state *next; /* chain for traversing all */
struct state *next; /* chain for traversing all */ struct state *prev; /* back chain */
struct state *prev; /* back chain */ struct arcbatch oas; /* first arcbatch, avoid malloc in easy case */
struct arcbatch oas; /* first arcbatch, avoid malloc in easy int noas; /* number of arcs used in first arcbatch */
* case */
int noas; /* number of arcs used in first arcbatch */
}; };
struct nfa struct nfa {
{ struct state *pre; /* pre-initial state */
struct state *pre; /* pre-initial state */ struct state *init; /* initial state */
struct state *init; /* initial state */ struct state *final; /* final state */
struct state *final; /* final state */ struct state *post; /* post-final state */
struct state *post; /* post-final state */ int nstates; /* for numbering states */
int nstates; /* for numbering states */ struct state *states; /* state-chain header */
struct state *states; /* state-chain header */ struct state *slast; /* tail of the chain */
struct state *slast; /* tail of the chain */ struct state *free; /* free list */
struct state *free; /* free list */ struct colormap *cm; /* the color map */
struct colormap *cm; /* the color map */ color bos[2]; /* colors, if any, assigned to BOS and BOL */
color bos[2]; /* colors, if any, assigned to BOS and BOL */ color eos[2]; /* colors, if any, assigned to EOS and EOL */
color eos[2]; /* colors, if any, assigned to EOS and EOL */ struct vars *v; /* simplifies compile error reporting */
struct vars *v; /* simplifies compile error reporting */ struct nfa *parent; /* parent NFA, if any */
struct nfa *parent; /* parent NFA, if any */
}; };
@@ -324,64 +333,58 @@ struct nfa
/* /*
* definitions for compacted NFA * definitions for compacted NFA
*/ */
struct carc struct carc {
{ color co; /* COLORLESS is list terminator */
color co; /* COLORLESS is list terminator */ int to; /* state number */
int to; /* state number */
}; };
struct cnfa struct cnfa {
{ int nstates; /* number of states */
int nstates; /* number of states */ int ncolors; /* number of colors */
int ncolors; /* number of colors */ int flags;
int flags; # define HASLACONS 01 /* uses lookahead constraints */
#define HASLACONS 01 /* uses lookahead constraints */ int pre; /* setup state number */
int pre; /* setup state number */ int post; /* teardown state number */
int post; /* teardown state number */ color bos[2]; /* colors, if any, assigned to BOS and BOL */
color bos[2]; /* colors, if any, assigned to BOS and BOL */ color eos[2]; /* colors, if any, assigned to EOS and EOL */
color eos[2]; /* colors, if any, assigned to EOS and EOL */ struct carc **states; /* vector of pointers to outarc lists */
struct carc **states; /* vector of pointers to outarc lists */ struct carc *arcs; /* the area for the lists */
struct carc *arcs; /* the area for the lists */
}; };
#define ZAPCNFA(cnfa) ((cnfa).nstates = 0)
#define ZAPCNFA(cnfa) ((cnfa).nstates = 0) #define NULLCNFA(cnfa) ((cnfa).nstates == 0)
#define NULLCNFA(cnfa) ((cnfa).nstates == 0)
/* /*
* subexpression tree * subexpression tree
*/ */
struct subre struct subre {
{ char op; /* '|', '.' (concat), 'b' (backref), '(', '=' */
char op; /* '|', '.' (concat), 'b' (backref), '(', char flags;
* '=' */ # define LONGER 01 /* prefers longer match */
char flags; # define SHORTER 02 /* prefers shorter match */
#define LONGER 01 /* prefers longer match */ # define MIXED 04 /* mixed preference below */
#define SHORTER 02 /* prefers shorter match */ # define CAP 010 /* capturing parens below */
#define MIXED 04 /* mixed preference below */ # define BACKR 020 /* back reference below */
#define CAP 010 /* capturing parens below */ # define INUSE 0100 /* in use in final tree */
#define BACKR 020 /* back reference below */ # define LOCAL 03 /* bits which may not propagate up */
#define INUSE 0100 /* in use in final tree */ # define LMIX(f) ((f)<<2) /* LONGER -> MIXED */
#define LOCAL 03 /* bits which may not propagate up */ # define SMIX(f) ((f)<<1) /* SHORTER -> MIXED */
#define LMIX(f) ((f)<<2) /* LONGER -> MIXED */ # define UP(f) (((f)&~LOCAL) | (LMIX(f) & SMIX(f) & MIXED))
#define SMIX(f) ((f)<<1) /* SHORTER -> MIXED */ # define MESSY(f) ((f)&(MIXED|CAP|BACKR))
#define UP(f) (((f)&~LOCAL) | (LMIX(f) & SMIX(f) & MIXED)) # define PREF(f) ((f)&LOCAL)
#define MESSY(f) ((f)&(MIXED|CAP|BACKR)) # define PREF2(f1, f2) ((PREF(f1) != 0) ? PREF(f1) : PREF(f2))
#define PREF(f) ((f)&LOCAL) # define COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2))
#define PREF2(f1, f2) ((PREF(f1) != 0) ? PREF(f1) : PREF(f2)) short retry; /* index into retry memory */
#define COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2)) int subno; /* subexpression number (for 'b' and '(') */
short retry; /* index into retry memory */ short min; /* min repetitions, for backref only */
int subno; /* subexpression number (for 'b' and '(') */ short max; /* max repetitions, for backref only */
short min; /* min repetitions, for backref only */ struct subre *left; /* left child, if any (also freelist chain) */
short max; /* max repetitions, for backref only */ struct subre *right; /* right child, if any */
struct subre *left; /* left child, if any (also freelist struct state *begin; /* outarcs from here... */
* chain) */ struct state *end; /* ...ending in inarcs here */
struct subre *right; /* right child, if any */ struct cnfa cnfa; /* compacted NFA, if any */
struct state *begin; /* outarcs from here... */ struct subre *chain; /* for bookkeeping and error cleanup */
struct state *end; /* ...ending in inarcs here */
struct cnfa cnfa; /* compacted NFA, if any */
struct subre *chain; /* for bookkeeping and error cleanup */
}; };
@@ -390,9 +393,8 @@ struct subre
* table of function pointers for generic manipulation functions * table of function pointers for generic manipulation functions
* A regex_t's re_fns points to one of these. * A regex_t's re_fns points to one of these.
*/ */
struct fns struct fns {
{ VOID FUNCPTR(free, (regex_t *));
void FUNCPTR(free, (regex_t *));
}; };
@@ -400,18 +402,17 @@ struct fns
/* /*
* the insides of a regex_t, hidden behind a void * * the insides of a regex_t, hidden behind a void *
*/ */
struct guts struct guts {
{ int magic;
int magic; # define GUTSMAGIC 0xfed9
#define GUTSMAGIC 0xfed9 int cflags; /* copy of compile flags */
int cflags; /* copy of compile flags */ long info; /* copy of re_info */
long info; /* copy of re_info */ size_t nsub; /* copy of re_nsub */
size_t nsub; /* copy of re_nsub */
struct subre *tree; struct subre *tree;
struct cnfa search; /* for fast preliminary search */ struct cnfa search; /* for fast preliminary search */
int ntree; int ntree;
struct colormap cmap; struct colormap cmap;
int FUNCPTR(compare, (const chr *, const chr *, size_t)); int FUNCPTR(compare, (CONST chr *, CONST chr *, size_t));
struct subre *lacons; /* lookahead-constraint vector */ struct subre *lacons; /* lookahead-constraint vector */
int nlacons; /* size of lacons */ int nlacons; /* size of lacons */
}; };