Import regex from tcl 8.4.5

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/branches/RXSPENCER@3951 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Ryan Norton
1999-10-13 02:22:18 +00:00
parent 9bd536df18
commit a6c3a78d25
4 changed files with 1303 additions and 1177 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -1,75 +1,18 @@
/*
* $Id$
*/
{
REG_OKAY, "REG_OKAY", "no errors detected"
},
{
REG_NOMATCH, "REG_NOMATCH", "failed to match"
},
{
REG_BADPAT, "REG_BADPAT", "invalid regexp (reg version 0.8)"
},
{
REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element"
},
{
REG_ECTYPE, "REG_ECTYPE", "invalid character class"
},
{
REG_EESCAPE, "REG_EESCAPE", "invalid escape \\ sequence"
},
{
REG_ESUBREG, "REG_ESUBREG", "invalid backreference number"
},
{
REG_EBRACK, "REG_EBRACK", "brackets [] not balanced"
},
{
REG_EPAREN, "REG_EPAREN", "parentheses () not balanced"
},
{
REG_EBRACE, "REG_EBRACE", "braces {} not balanced"
},
{
REG_BADBR, "REG_BADBR", "invalid repetition count(s)"
},
{
REG_ERANGE, "REG_ERANGE", "invalid character range"
},
{
REG_ESPACE, "REG_ESPACE", "out of memory"
},
{
REG_BADRPT, "REG_BADRPT", "quantifier operand invalid"
},
{
REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug"
},
{
REG_INVARG, "REG_INVARG", "invalid argument to regex function"
},
{
REG_MIXED, "REG_MIXED", "character widths of regex and string differ"
},
{
REG_BADOPT, "REG_BADOPT", "invalid embedded option"
},
{ REG_OKAY, "REG_OKAY", "no errors detected" },
{ REG_NOMATCH, "REG_NOMATCH", "failed to match" },
{ REG_BADPAT, "REG_BADPAT", "invalid regexp (reg version 0.8)" },
{ REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" },
{ REG_ECTYPE, "REG_ECTYPE", "invalid character class" },
{ REG_EESCAPE, "REG_EESCAPE", "invalid escape \\ sequence" },
{ REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" },
{ REG_EBRACK, "REG_EBRACK", "brackets [] not balanced" },
{ REG_EPAREN, "REG_EPAREN", "parentheses () not balanced" },
{ REG_EBRACE, "REG_EBRACE", "braces {} not balanced" },
{ REG_BADBR, "REG_BADBR", "invalid repetition count(s)" },
{ REG_ERANGE, "REG_ERANGE", "invalid character range" },
{ REG_ESPACE, "REG_ESPACE", "out of memory" },
{ REG_BADRPT, "REG_BADRPT", "quantifier operand invalid" },
{ REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" },
{ REG_INVARG, "REG_INVARG", "invalid argument to regex function" },
{ REG_MIXED, "REG_MIXED", "character widths of regex and string differ" },
{ REG_BADOPT, "REG_BADOPT", "invalid embedded option" },

View File

@@ -1,74 +1,341 @@
#ifndef _REGEX_H_
#define _REGEX_H_ /* never again */
/* ========= begin header generated by ./mkh ========= */
/*
* regular expressions
*
* Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
*
* Development of this software was funded, in part, by Cray Research Inc.,
* UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
* Corporation, none of whom are responsible for the results. The author
* thanks all of them.
*
* Redistribution and use in source and binary forms -- with or without
* modification -- are permitted for any purpose, provided that
* redistributions in source form retain this entire copyright notice and
* indicate the origin and nature of any modifications.
*
* I'd appreciate being given credit for this package in the documentation
* of software which uses it, but that is not a requirement.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*
*
* Prototypes etc. marked with "^" within comments get gathered up (and
* possibly edited) by the regfwd program and inserted near the bottom of
* this file.
*
* We offer the option of declaring one wide-character version of the
* RE functions as well as the char versions. To do that, define
* __REG_WIDE_T to the type of wide characters (unfortunately, there
* is no consensus that wchar_t is suitable) and __REG_WIDE_COMPILE and
* __REG_WIDE_EXEC to the names to be used for the compile and execute
* functions (suggestion: re_Xcomp and re_Xexec, where X is a letter
* suggestive of the wide type, e.g. re_ucomp and re_uexec for Unicode).
* For cranky old compilers, it may be necessary to do something like:
* #define __REG_WIDE_COMPILE(a,b,c,d) re_Xcomp(a,b,c,d)
* #define __REG_WIDE_EXEC(a,b,c,d,e,f,g) re_Xexec(a,b,c,d,e,f,g)
* rather than just #defining the names as parameterless macros.
*
* For some specialized purposes, it may be desirable to suppress the
* declarations of the "front end" functions, regcomp() and regexec(),
* or of the char versions of the compile and execute functions. To
* suppress the front-end functions, define __REG_NOFRONT. To suppress
* the char versions, define __REG_NOCHAR.
*
* The right place to do those defines (and some others you may want, see
* below) would be <sys/types.h>. If you don't have control of that file,
* the right place to add your own defines to this file is marked below.
* This is normally done automatically, by the makefile and regmkhdr, based
* on the contents of regcustom.h.
*/
/*
* voodoo for C++
*/
#ifdef __cplusplus
extern "C" {
#endif
/* === regex2.h === */
typedef off_t regoff_t;
/*
* Add your own defines, if needed, here.
*/
/*
* Location where a chunk of regcustom.h is automatically spliced into
* this file (working from its prototype, regproto.h).
*/
/* --- begin --- */
/* ensure certain things don't sneak in from system headers */
#ifdef __REG_WIDE_T
#undef __REG_WIDE_T
#endif
#ifdef __REG_WIDE_COMPILE
#undef __REG_WIDE_COMPILE
#endif
#ifdef __REG_WIDE_EXEC
#undef __REG_WIDE_EXEC
#endif
#ifdef __REG_REGOFF_T
#undef __REG_REGOFF_T
#endif
#ifdef __REG_VOID_T
#undef __REG_VOID_T
#endif
#ifdef __REG_CONST
#undef __REG_CONST
#endif
#ifdef __REG_NOFRONT
#undef __REG_NOFRONT
#endif
#ifdef __REG_NOCHAR
#undef __REG_NOCHAR
#endif
/* interface types */
#define __REG_WIDE_T Tcl_UniChar
#define __REG_REGOFF_T long /* not really right, but good enough... */
#define __REG_VOID_T VOID
#define __REG_CONST CONST
/* names and declarations */
#define __REG_WIDE_COMPILE TclReComp
#define __REG_WIDE_EXEC TclReExec
#define __REG_NOFRONT /* don't want regcomp() and regexec() */
#define __REG_NOCHAR /* or the char versions */
#define regfree TclReFree
#define regerror TclReError
/* --- end --- */
/*
* interface types etc.
*/
/*
* regoff_t has to be large enough to hold either off_t or ssize_t,
* and must be signed; it's only a guess that long is suitable, so we
* offer <sys/types.h> an override.
*/
#ifdef __REG_REGOFF_T
typedef __REG_REGOFF_T regoff_t;
#else
typedef long regoff_t;
#endif
/*
* For benefit of old compilers, we offer <sys/types.h> the option of
* overriding the `void' type used to declare nonexistent return types.
*/
#ifdef __REG_VOID_T
typedef __REG_VOID_T re_void;
#else
typedef void re_void;
#endif
/*
* Also for benefit of old compilers, <sys/types.h> can supply a macro
* which expands to a substitute for `const'.
*/
#ifndef __REG_CONST
#define __REG_CONST const
#endif
/*
* other interface types
*/
/* the biggie, a compiled RE (or rather, a front end to same) */
typedef struct {
int re_magic;
size_t re_nsub; /* number of parenthesized subexpressions */
const char *re_endp; /* end pointer for REG_PEND */
struct re_guts *re_g; /* none of your business :-) */
int re_magic; /* magic number */
size_t re_nsub; /* number of subexpressions */
long re_info; /* information about RE */
# define REG_UBACKREF 000001
# define REG_ULOOKAHEAD 000002
# define REG_UBOUNDS 000004
# define REG_UBRACES 000010
# define REG_UBSALNUM 000020
# define REG_UPBOTCH 000040
# define REG_UBBS 000100
# define REG_UNONPOSIX 000200
# define REG_UUNSPEC 000400
# define REG_UUNPORT 001000
# define REG_ULOCALE 002000
# define REG_UEMPTYMATCH 004000
# define REG_UIMPOSSIBLE 010000
# define REG_USHORTEST 020000
int re_csize; /* sizeof(character) */
char *re_endp; /* backward compatibility kludge */
/* the rest is opaque pointers to hidden innards */
char *re_guts; /* `char *' is more portable than `void *' */
char *re_fns;
} regex_t;
/* result reporting (may acquire more fields later) */
typedef struct {
regoff_t rm_so; /* start of match */
regoff_t rm_eo; /* end of match */
regoff_t rm_so; /* start of substring */
regoff_t rm_eo; /* end of substring */
} regmatch_t;
/* === regcomp.c === */
extern int regcomp(regex_t *, const char *, int);
#define REG_BASIC 0000
#define REG_EXTENDED 0001
#define REG_ICASE 0002
#define REG_NOSUB 0004
#define REG_NEWLINE 0010
#define REG_NOSPEC 0020
#define REG_PEND 0040
#define REG_DUMP 0200
/* supplementary control and reporting */
typedef struct {
regmatch_t rm_extend; /* see REG_EXPECT */
} rm_detail_t;
/* === regerror.c === */
#define REG_OKAY 0
#define REG_NOMATCH 1
#define REG_BADPAT 2
#define REG_ECOLLATE 3
#define REG_ECTYPE 4
#define REG_EESCAPE 5
#define REG_ESUBREG 6
#define REG_EBRACK 7
#define REG_EPAREN 8
#define REG_EBRACE 9
#define REG_BADBR 10
#define REG_ERANGE 11
#define REG_ESPACE 12
#define REG_BADRPT 13
#define REG_EMPTY 14
#define REG_ASSERT 15
#define REG_INVARG 16
#define REG_ATOI 255 /* convert name to number (!) */
#define REG_ITOA 0400 /* convert number to name (!) */
extern size_t regerror(int, const regex_t *, char *, size_t);
/*
* compilation
^ #ifndef __REG_NOCHAR
^ int re_comp(regex_t *, __REG_CONST char *, size_t, int);
^ #endif
^ #ifndef __REG_NOFRONT
^ int regcomp(regex_t *, __REG_CONST char *, int);
^ #endif
^ #ifdef __REG_WIDE_T
^ int __REG_WIDE_COMPILE(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int);
^ #endif
*/
#define REG_BASIC 000000 /* BREs (convenience) */
#define REG_EXTENDED 000001 /* EREs */
#define REG_ADVF 000002 /* advanced features in EREs */
#define REG_ADVANCED 000003 /* AREs (which are also EREs) */
#define REG_QUOTE 000004 /* no special characters, none */
#define REG_NOSPEC REG_QUOTE /* historical synonym */
#define REG_ICASE 000010 /* ignore case */
#define REG_NOSUB 000020 /* don't care about subexpressions */
#define REG_EXPANDED 000040 /* expanded format, white space & comments */
#define REG_NLSTOP 000100 /* \n doesn't match . or [^ ] */
#define REG_NLANCH 000200 /* ^ matches after \n, $ before */
#define REG_NEWLINE 000300 /* newlines are line terminators */
#define REG_PEND 000400 /* ugh -- backward-compatibility hack */
#define REG_EXPECT 001000 /* report details on partial/limited matches */
#define REG_BOSONLY 002000 /* temporary kludge for BOS-only matches */
#define REG_DUMP 004000 /* none of your business :-) */
#define REG_FAKE 010000 /* none of your business :-) */
#define REG_PROGRESS 020000 /* none of your business :-) */
/* === regexec.c === */
extern int regexec(const regex_t *, const char *, size_t, regmatch_t [], int);
#define REG_NOTBOL 00001
#define REG_NOTEOL 00002
#define REG_STARTEND 00004
#define REG_TRACE 00400 /* tracing of execution */
#define REG_LARGE 01000 /* force large representation */
#define REG_BACKR 02000 /* force use of backref code */
/*
* execution
^ #ifndef __REG_NOCHAR
^ int re_exec(regex_t *, __REG_CONST char *, size_t,
^ rm_detail_t *, size_t, regmatch_t [], int);
^ #endif
^ #ifndef __REG_NOFRONT
^ int regexec(regex_t *, __REG_CONST char *, size_t, regmatch_t [], int);
^ #endif
^ #ifdef __REG_WIDE_T
^ int __REG_WIDE_EXEC(regex_t *, __REG_CONST __REG_WIDE_T *, size_t,
^ rm_detail_t *, size_t, regmatch_t [], int);
^ #endif
*/
#define REG_NOTBOL 0001 /* BOS is not BOL */
#define REG_NOTEOL 0002 /* EOS is not EOL */
#define REG_STARTEND 0004 /* backward compatibility kludge */
#define REG_FTRACE 0010 /* none of your business */
#define REG_MTRACE 0020 /* none of your business */
#define REG_SMALL 0040 /* none of your business */
/* === regfree.c === */
extern void regfree(regex_t *);
/*
* misc generics (may be more functions here eventually)
^ re_void regfree(regex_t *);
*/
/*
* error reporting
* Be careful if modifying the list of error codes -- the table used by
* regerror() is generated automatically from this file!
*
* Note that there is no wide-char variant of regerror at this time; what
* kind of character is used for error reports is independent of what kind
* is used in matching.
*
^ extern size_t regerror(int, __REG_CONST regex_t *, char *, size_t);
*/
#define REG_OKAY 0 /* no errors detected */
#define REG_NOMATCH 1 /* failed to match */
#define REG_BADPAT 2 /* invalid regexp */
#define REG_ECOLLATE 3 /* invalid collating element */
#define REG_ECTYPE 4 /* invalid character class */
#define REG_EESCAPE 5 /* invalid escape \ sequence */
#define REG_ESUBREG 6 /* invalid backreference number */
#define REG_EBRACK 7 /* brackets [] not balanced */
#define REG_EPAREN 8 /* parentheses () not balanced */
#define REG_EBRACE 9 /* braces {} not balanced */
#define REG_BADBR 10 /* invalid repetition count(s) */
#define REG_ERANGE 11 /* invalid character range */
#define REG_ESPACE 12 /* out of memory */
#define REG_BADRPT 13 /* quantifier operand invalid */
#define REG_ASSERT 15 /* "can't happen" -- you found a bug */
#define REG_INVARG 16 /* invalid argument to regex function */
#define REG_MIXED 17 /* character widths of regex and string differ */
#define REG_BADOPT 18 /* invalid embedded option */
/* two specials for debugging and testing */
#define REG_ATOI 101 /* convert error-code name to number */
#define REG_ITOA 102 /* convert error-code number to name */
/*
* the prototypes, as possibly munched by regfwd
*/
/* =====^!^===== begin forwards =====^!^===== */
/* automatically gathered by fwd; do not hand-edit */
/* === regproto.h === */
#ifndef __REG_NOCHAR
int re_comp _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, int));
#endif
#ifndef __REG_NOFRONT
int regcomp _ANSI_ARGS_((regex_t *, __REG_CONST char *, int));
#endif
#ifdef __REG_WIDE_T
int __REG_WIDE_COMPILE _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int));
#endif
#ifndef __REG_NOCHAR
int re_exec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, rm_detail_t *, size_t, regmatch_t [], int));
#endif
#ifndef __REG_NOFRONT
int regexec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, regmatch_t [], int));
#endif
#ifdef __REG_WIDE_T
int __REG_WIDE_EXEC _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, rm_detail_t *, size_t, regmatch_t [], int));
#endif
re_void regfree _ANSI_ARGS_((regex_t *));
extern size_t regerror _ANSI_ARGS_((int, __REG_CONST regex_t *, char *, size_t));
/* automatically gathered by fwd; do not hand-edit */
/* =====^!^===== end forwards =====^!^===== */
/*
* more C++ voodoo
*/
#ifdef __cplusplus
}
#endif
/* ========= end header generated by ./mkh ========= */
#endif

View File

@@ -1,21 +1,21 @@
/*
* Internal interface definitions, etc., for the reg package
*
* Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
*
* Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
*
* Development of this software was funded, in part, by Cray Research Inc.,
* UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
* Corporation, none of whom are responsible for the results. The author
* thanks all of them.
*
* thanks all of them.
*
* Redistribution and use in source and binary forms -- with or without
* modification -- are permitted for any purpose, provided that
* redistributions in source form retain this entire copyright notice and
* indicate the origin and nature of any modifications.
*
*
* I'd appreciate being given credit for this package in the documentation
* of software which uses it, but that is not a requirement.
*
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
@@ -26,8 +26,6 @@
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $Id$
*/
@@ -45,38 +43,63 @@
* Things that regcustom.h might override.
*/
/* standard header files (NULL is a reasonable indicator for them) */
#ifndef NULL
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <limits.h>
#include <string.h>
#endif
/* assertions */
#ifndef assert
#ifndef REG_DEBUG
# ifndef NDEBUG
# define NDEBUG /* no assertions */
# ifndef REG_DEBUG
# define NDEBUG /* no assertions */
# endif
#endif
#include <assert.h>
#endif
/* voids */
#ifndef VOID
#define VOID void /* for function return values */
#endif
#ifndef DISCARD
#define DISCARD void /* for throwing values away */
#define DISCARD VOID /* for throwing values away */
#endif
#ifndef PVOID
#define PVOID VOID * /* generic pointer */
#endif
#ifndef VS
#define VS(x) ((void *)(x)) /* cast something to generic ptr */
#define VS(x) ((PVOID)(x)) /* cast something to generic ptr */
#endif
#ifndef NOPARMS
#define NOPARMS VOID /* for empty parm lists */
#endif
/* const */
#ifndef CONST
#define CONST const /* for old compilers, might be empty */
#endif
/* function-pointer declarator */
#ifndef FUNCPTR
#define FUNCPTR(name, args) (*name) args
#if __STDC__ >= 1
#define FUNCPTR(name, args) (*name)args
#else
#define FUNCPTR(name, args) (*name)()
#endif
#endif
/* memory allocation */
#ifndef MALLOC
#define MALLOC(n) malloc(n)
#define MALLOC(n) malloc(n)
#endif
#ifndef REALLOC
#define REALLOC(p, n) realloc(VS(p), n)
#define REALLOC(p, n) realloc(VS(p), n)
#endif
#ifndef FREE
#define FREE(p) free(VS(p))
#define FREE(p) free(VS(p))
#endif
/* want size of a char in bits, and max value in bounded quantifiers */
@@ -84,7 +107,7 @@
#include <limits.h>
#endif
#ifndef _POSIX2_RE_DUP_MAX
#define _POSIX2_RE_DUP_MAX 255 /* normally from <limits.h> */
#define _POSIX2_RE_DUP_MAX 255 /* normally from <limits.h> */
#endif
@@ -93,13 +116,13 @@
* misc
*/
#define NOTREACHED 0
#define xxx 1
#define NOTREACHED 0
#define xxx 1
#define DUPMAX _POSIX2_RE_DUP_MAX
#define INFINITY (DUPMAX+1)
#define DUPMAX _POSIX2_RE_DUP_MAX
#define INFINITY (DUPMAX+1)
#define REMAGIC 0xfed7 /* magic number for main struct */
#define REMAGIC 0xfed7 /* magic number for main struct */
@@ -108,12 +131,12 @@
*/
#ifdef REG_DEBUG
/* FDEBUG does finite-state tracing */
#define FDEBUG(arglist) { if (v->eflags&REG_FTRACE) printf arglist; }
#define FDEBUG(arglist) { if (v->eflags&REG_FTRACE) printf arglist; }
/* MDEBUG does higher-level tracing */
#define MDEBUG(arglist) { if (v->eflags&REG_MTRACE) printf arglist; }
#define MDEBUG(arglist) { if (v->eflags&REG_MTRACE) printf arglist; }
#else
#define FDEBUG(arglist) {}
#define MDEBUG(arglist) {}
#define FDEBUG(arglist) {}
#define MDEBUG(arglist) {}
#endif
@@ -121,25 +144,24 @@
/*
* bitmap manipulation
*/
#define UBITS (CHAR_BIT * sizeof(unsigned))
#define BSET(uv, sn) ((uv)[(sn)/UBITS] |= (unsigned)1 << ((sn)%UBITS))
#define ISBSET(uv, sn) ((uv)[(sn)/UBITS] & ((unsigned)1 << ((sn)%UBITS)))
#define UBITS (CHAR_BIT * sizeof(unsigned))
#define BSET(uv, sn) ((uv)[(sn)/UBITS] |= (unsigned)1 << ((sn)%UBITS))
#define ISBSET(uv, sn) ((uv)[(sn)/UBITS] & ((unsigned)1 << ((sn)%UBITS)))
/*
* We dissect a chr into byts for colormap table indexing. Here we define
* a byt, which will be the same as a byte on most machines... The exact
* We dissect a chr into byts for colormap table indexing. Here we define
* a byt, which will be the same as a byte on most machines... The exact
* size of a byt is not critical, but about 8 bits is good, and extraction
* of 8-bit chunks is sometimes especially fast.
*/
#ifndef BYTBITS
#define BYTBITS 8 /* bits in a byt */
#define BYTBITS 8 /* bits in a byt */
#endif
#define BYTTAB (1<<BYTBITS) /* size of table with one entry per byt
* value */
#define BYTMASK (BYTTAB-1) /* bit mask for byt */
#define NBYTS ((CHRBITS+BYTBITS-1)/BYTBITS)
#define BYTTAB (1<<BYTBITS) /* size of table with one entry per byt value */
#define BYTMASK (BYTTAB-1) /* bit mask for byt */
#define NBYTS ((CHRBITS+BYTBITS-1)/BYTBITS)
/* the definition of GETCOLOR(), below, assumes NBYTS <= 4 */
@@ -148,86 +170,79 @@
* As soon as possible, we map chrs into equivalence classes -- "colors" --
* which are of much more manageable number.
*/
typedef short color; /* colors of characters */
typedef int pcolor; /* what color promotes to */
#define COLORLESS (-1) /* impossible color */
#define WHITE 0 /* default color, parent of all others */
typedef short color; /* colors of characters */
typedef int pcolor; /* what color promotes to */
#define COLORLESS (-1) /* impossible color */
#define WHITE 0 /* default color, parent of all others */
/*
* A colormap is a tree -- more precisely, a DAG -- indexed at each level
* by a byt of the chr, to map the chr to a color efficiently. Because
* by a byt of the chr, to map the chr to a color efficiently. Because
* lower sections of the tree can be shared, it can exploit the usual
* sparseness of such a mapping table. The tree is always NBYTS levels
* sparseness of such a mapping table. The tree is always NBYTS levels
* deep (in the past it was shallower during construction but was "filled"
* to full depth at the end of that); areas that are unaltered as yet point
* to "fill blocks" which are entirely WHITE in color.
*/
/* the tree itself */
struct colors
{
color ccolor[BYTTAB];
struct colors {
color ccolor[BYTTAB];
};
struct ptrs
{
struct ptrs {
union tree *pptr[BYTTAB];
};
union tree
{
union tree {
struct colors colors;
struct ptrs ptrs;
};
#define tcolor colors.ccolor
#define tptr ptrs.pptr
#define tcolor colors.ccolor
#define tptr ptrs.pptr
/* internal per-color structure for the color machinery */
struct colordesc
{
uchr nchrs; /* number of chars of this color */
color sub; /* open subcolor (if any); free chain ptr */
#define NOSUB COLORLESS
struct arc *arcs; /* color chain */
int flags;
#define FREECOL 01 /* currently free */
#define PSEUDO 02 /* pseudocolor, no real chars */
#define UNUSEDCOLOR(cd) ((cd)->flags&FREECOL)
union tree *block; /* block of solid color, if any */
struct colordesc {
uchr nchrs; /* number of chars of this color */
color sub; /* open subcolor (if any); free chain ptr */
# define NOSUB COLORLESS
struct arc *arcs; /* color chain */
int flags;
# define FREECOL 01 /* currently free */
# define PSEUDO 02 /* pseudocolor, no real chars */
# define UNUSEDCOLOR(cd) ((cd)->flags&FREECOL)
union tree *block; /* block of solid color, if any */
};
/* the color map itself */
struct colormap
{
int magic;
#define CMMAGIC 0x876
struct vars *v; /* for compile error reporting */
size_t ncds; /* number of colordescs */
size_t max; /* highest in use */
color free; /* beginning of free chain (if non-0) */
struct colormap {
int magic;
# define CMMAGIC 0x876
struct vars *v; /* for compile error reporting */
size_t ncds; /* number of colordescs */
size_t max; /* highest in use */
color free; /* beginning of free chain (if non-0) */
struct colordesc *cd;
#define CDEND(cm) (&(cm)->cd[(cm)->max + 1])
#define NINLINECDS ((size_t)10)
# define CDEND(cm) (&(cm)->cd[(cm)->max + 1])
# define NINLINECDS ((size_t)10)
struct colordesc cdspace[NINLINECDS];
union tree tree[NBYTS]; /* tree top, plus fill blocks */
union tree tree[NBYTS]; /* tree top, plus fill blocks */
};
/* optimization magic to do fast chr->color mapping */
#define B0(c) ((c) & BYTMASK)
#define B1(c) (((c)>>BYTBITS) & BYTMASK)
#define B2(c) (((c)>>(2*BYTBITS)) & BYTMASK)
#define B3(c) (((c)>>(3*BYTBITS)) & BYTMASK)
#define B0(c) ((c) & BYTMASK)
#define B1(c) (((c)>>BYTBITS) & BYTMASK)
#define B2(c) (((c)>>(2*BYTBITS)) & BYTMASK)
#define B3(c) (((c)>>(3*BYTBITS)) & BYTMASK)
#if NBYTS == 1
#define GETCOLOR(cm, c) ((cm)->tree->tcolor[B0(c)])
#define GETCOLOR(cm, c) ((cm)->tree->tcolor[B0(c)])
#endif
/* beware, for NBYTS>1, GETCOLOR() is unsafe -- 2nd arg used repeatedly */
#if NBYTS == 2
#define GETCOLOR(cm, c) ((cm)->tree->tptr[B1(c)]->tcolor[B0(c)])
#define GETCOLOR(cm, c) ((cm)->tree->tptr[B1(c)]->tcolor[B0(c)])
#endif
#if NBYTS == 4
#define GETCOLOR(cm, c) ((cm)->tree->tptr[B3(c)]->tptr[B2(c)]->tptr[B1(c)]->tcolor[B0(c)])
#define GETCOLOR(cm, c) ((cm)->tree->tptr[B3(c)]->tptr[B2(c)]->tptr[B1(c)]->tcolor[B0(c)])
#endif
@@ -236,23 +251,22 @@ struct colormap
* Interface definitions for locale-interface functions in locale.c.
* Multi-character collating elements (MCCEs) cause most of the trouble.
*/
struct cvec
{
int nchrs; /* number of chrs */
int chrspace; /* number of chrs possible */
chr *chrs; /* pointer to vector of chrs */
int nranges; /* number of ranges (chr pairs) */
int rangespace; /* number of chrs possible */
chr *ranges; /* pointer to vector of chr pairs */
int nmcces; /* number of MCCEs */
int mccespace; /* number of MCCEs possible */
int nmccechrs; /* number of chrs used for MCCEs */
chr *mcces[1]; /* pointers to 0-terminated MCCEs */
/* and both batches of chrs are on the end */
struct cvec {
int nchrs; /* number of chrs */
int chrspace; /* number of chrs possible */
chr *chrs; /* pointer to vector of chrs */
int nranges; /* number of ranges (chr pairs) */
int rangespace; /* number of chrs possible */
chr *ranges; /* pointer to vector of chr pairs */
int nmcces; /* number of MCCEs */
int mccespace; /* number of MCCEs possible */
int nmccechrs; /* number of chrs used for MCCEs */
chr *mcces[1]; /* pointers to 0-terminated MCCEs */
/* and both batches of chrs are on the end */
};
/* caution: this value cannot be changed easily */
#define MAXMCCE 2 /* length of longest MCCE */
#define MAXMCCE 2 /* length of longest MCCE */
@@ -264,59 +278,54 @@ struct cvec
*/
struct state;
struct arc
{
int type;
#define ARCFREE '\0'
color co;
struct state *from; /* where it's from (and contained within) */
struct state *to; /* where it's to */
struct arc *outchain; /* *from's outs chain or free chain */
#define freechain outchain
struct arc *inchain; /* *to's ins chain */
struct arc *colorchain; /* color's arc chain */
struct arc {
int type;
# define ARCFREE '\0'
color co;
struct state *from; /* where it's from (and contained within) */
struct state *to; /* where it's to */
struct arc *outchain; /* *from's outs chain or free chain */
# define freechain outchain
struct arc *inchain; /* *to's ins chain */
struct arc *colorchain; /* color's arc chain */
};
struct arcbatch
{ /* for bulk allocation of arcs */
struct arcbatch { /* for bulk allocation of arcs */
struct arcbatch *next;
#define ABSIZE 10
struct arc a[ABSIZE];
# define ABSIZE 10
struct arc a[ABSIZE];
};
struct state
{
int no;
#define FREESTATE (-1)
char flag; /* marks special states */
int nins; /* number of inarcs */
struct arc *ins; /* chain of inarcs */
int nouts; /* number of outarcs */
struct arc *outs; /* chain of outarcs */
struct arc *free; /* chain of free arcs */
struct state *tmp; /* temporary for traversal algorithms */
struct state *next; /* chain for traversing all */
struct state *prev; /* back chain */
struct arcbatch oas; /* first arcbatch, avoid malloc in easy
* case */
int noas; /* number of arcs used in first arcbatch */
struct state {
int no;
# define FREESTATE (-1)
char flag; /* marks special states */
int nins; /* number of inarcs */
struct arc *ins; /* chain of inarcs */
int nouts; /* number of outarcs */
struct arc *outs; /* chain of outarcs */
struct arc *free; /* chain of free arcs */
struct state *tmp; /* temporary for traversal algorithms */
struct state *next; /* chain for traversing all */
struct state *prev; /* back chain */
struct arcbatch oas; /* first arcbatch, avoid malloc in easy case */
int noas; /* number of arcs used in first arcbatch */
};
struct nfa
{
struct state *pre; /* pre-initial state */
struct state *init; /* initial state */
struct state *final; /* final state */
struct state *post; /* post-final state */
int nstates; /* for numbering states */
struct state *states; /* state-chain header */
struct state *slast; /* tail of the chain */
struct state *free; /* free list */
struct colormap *cm; /* the color map */
color bos[2]; /* colors, if any, assigned to BOS and BOL */
color eos[2]; /* colors, if any, assigned to EOS and EOL */
struct vars *v; /* simplifies compile error reporting */
struct nfa *parent; /* parent NFA, if any */
struct nfa {
struct state *pre; /* pre-initial state */
struct state *init; /* initial state */
struct state *final; /* final state */
struct state *post; /* post-final state */
int nstates; /* for numbering states */
struct state *states; /* state-chain header */
struct state *slast; /* tail of the chain */
struct state *free; /* free list */
struct colormap *cm; /* the color map */
color bos[2]; /* colors, if any, assigned to BOS and BOL */
color eos[2]; /* colors, if any, assigned to EOS and EOL */
struct vars *v; /* simplifies compile error reporting */
struct nfa *parent; /* parent NFA, if any */
};
@@ -324,64 +333,58 @@ struct nfa
/*
* definitions for compacted NFA
*/
struct carc
{
color co; /* COLORLESS is list terminator */
int to; /* state number */
struct carc {
color co; /* COLORLESS is list terminator */
int to; /* state number */
};
struct cnfa
{
int nstates; /* number of states */
int ncolors; /* number of colors */
int flags;
#define HASLACONS 01 /* uses lookahead constraints */
int pre; /* setup state number */
int post; /* teardown state number */
color bos[2]; /* colors, if any, assigned to BOS and BOL */
color eos[2]; /* colors, if any, assigned to EOS and EOL */
struct carc **states; /* vector of pointers to outarc lists */
struct carc *arcs; /* the area for the lists */
struct cnfa {
int nstates; /* number of states */
int ncolors; /* number of colors */
int flags;
# define HASLACONS 01 /* uses lookahead constraints */
int pre; /* setup state number */
int post; /* teardown state number */
color bos[2]; /* colors, if any, assigned to BOS and BOL */
color eos[2]; /* colors, if any, assigned to EOS and EOL */
struct carc **states; /* vector of pointers to outarc lists */
struct carc *arcs; /* the area for the lists */
};
#define ZAPCNFA(cnfa) ((cnfa).nstates = 0)
#define NULLCNFA(cnfa) ((cnfa).nstates == 0)
#define ZAPCNFA(cnfa) ((cnfa).nstates = 0)
#define NULLCNFA(cnfa) ((cnfa).nstates == 0)
/*
* subexpression tree
*/
struct subre
{
char op; /* '|', '.' (concat), 'b' (backref), '(',
* '=' */
char flags;
#define LONGER 01 /* prefers longer match */
#define SHORTER 02 /* prefers shorter match */
#define MIXED 04 /* mixed preference below */
#define CAP 010 /* capturing parens below */
#define BACKR 020 /* back reference below */
#define INUSE 0100 /* in use in final tree */
#define LOCAL 03 /* bits which may not propagate up */
#define LMIX(f) ((f)<<2) /* LONGER -> MIXED */
#define SMIX(f) ((f)<<1) /* SHORTER -> MIXED */
#define UP(f) (((f)&~LOCAL) | (LMIX(f) & SMIX(f) & MIXED))
#define MESSY(f) ((f)&(MIXED|CAP|BACKR))
#define PREF(f) ((f)&LOCAL)
#define PREF2(f1, f2) ((PREF(f1) != 0) ? PREF(f1) : PREF(f2))
#define COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2))
short retry; /* index into retry memory */
int subno; /* subexpression number (for 'b' and '(') */
short min; /* min repetitions, for backref only */
short max; /* max repetitions, for backref only */
struct subre *left; /* left child, if any (also freelist
* chain) */
struct subre *right; /* right child, if any */
struct state *begin; /* outarcs from here... */
struct state *end; /* ...ending in inarcs here */
struct cnfa cnfa; /* compacted NFA, if any */
struct subre *chain; /* for bookkeeping and error cleanup */
struct subre {
char op; /* '|', '.' (concat), 'b' (backref), '(', '=' */
char flags;
# define LONGER 01 /* prefers longer match */
# define SHORTER 02 /* prefers shorter match */
# define MIXED 04 /* mixed preference below */
# define CAP 010 /* capturing parens below */
# define BACKR 020 /* back reference below */
# define INUSE 0100 /* in use in final tree */
# define LOCAL 03 /* bits which may not propagate up */
# define LMIX(f) ((f)<<2) /* LONGER -> MIXED */
# define SMIX(f) ((f)<<1) /* SHORTER -> MIXED */
# define UP(f) (((f)&~LOCAL) | (LMIX(f) & SMIX(f) & MIXED))
# define MESSY(f) ((f)&(MIXED|CAP|BACKR))
# define PREF(f) ((f)&LOCAL)
# define PREF2(f1, f2) ((PREF(f1) != 0) ? PREF(f1) : PREF(f2))
# define COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2))
short retry; /* index into retry memory */
int subno; /* subexpression number (for 'b' and '(') */
short min; /* min repetitions, for backref only */
short max; /* max repetitions, for backref only */
struct subre *left; /* left child, if any (also freelist chain) */
struct subre *right; /* right child, if any */
struct state *begin; /* outarcs from here... */
struct state *end; /* ...ending in inarcs here */
struct cnfa cnfa; /* compacted NFA, if any */
struct subre *chain; /* for bookkeeping and error cleanup */
};
@@ -390,9 +393,8 @@ struct subre
* table of function pointers for generic manipulation functions
* A regex_t's re_fns points to one of these.
*/
struct fns
{
void FUNCPTR(free, (regex_t *));
struct fns {
VOID FUNCPTR(free, (regex_t *));
};
@@ -400,18 +402,17 @@ struct fns
/*
* the insides of a regex_t, hidden behind a void *
*/
struct guts
{
int magic;
#define GUTSMAGIC 0xfed9
int cflags; /* copy of compile flags */
long info; /* copy of re_info */
size_t nsub; /* copy of re_nsub */
struct guts {
int magic;
# define GUTSMAGIC 0xfed9
int cflags; /* copy of compile flags */
long info; /* copy of re_info */
size_t nsub; /* copy of re_nsub */
struct subre *tree;
struct cnfa search; /* for fast preliminary search */
int ntree;
struct cnfa search; /* for fast preliminary search */
int ntree;
struct colormap cmap;
int FUNCPTR(compare, (const chr *, const chr *, size_t));
struct subre *lacons; /* lookahead-constraint vector */
int nlacons; /* size of lacons */
int FUNCPTR(compare, (CONST chr *, CONST chr *, size_t));
struct subre *lacons; /* lookahead-constraint vector */
int nlacons; /* size of lacons */
};