From a6c3a78d25754aabf5608d271c04b302963787d3 Mon Sep 17 00:00:00 2001 From: Ryan Norton Date: Wed, 13 Oct 1999 02:22:18 +0000 Subject: [PATCH] Import regex from tcl 8.4.5 git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/branches/RXSPENCER@3951 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775 --- src/regex/regc_lex.c | 1599 ++++++++++++++++++++---------------------- src/regex/regerrs.h | 93 +-- src/regex/regex.h | 371 ++++++++-- src/regex/regguts.h | 417 +++++------ 4 files changed, 1303 insertions(+), 1177 deletions(-) diff --git a/src/regex/regc_lex.c b/src/regex/regc_lex.c index a24290d1a1..1acc3f4cae 100644 --- a/src/regex/regc_lex.c +++ b/src/regex/regc_lex.c @@ -2,21 +2,21 @@ * lexical analyzer * This file is #included by regcomp.c. * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. - * + * * I'd appreciate being given credit for this package in the documentation * of software which uses it, but that is not a requirement. - * + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL @@ -28,183 +28,177 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $Header$ - * */ /* scanning macros (know about v) */ -#define ATEOS() (v->now >= v->stop) -#define HAVE(n) (v->stop - v->now >= (n)) -#define NEXT1(c) (!ATEOS() && *v->now == CHR(c)) -#define NEXT2(a,b) (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b)) -#define NEXT3(a,b,c) (HAVE(3) && *v->now == CHR(a) && \ +#define ATEOS() (v->now >= v->stop) +#define HAVE(n) (v->stop - v->now >= (n)) +#define NEXT1(c) (!ATEOS() && *v->now == CHR(c)) +#define NEXT2(a,b) (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b)) +#define NEXT3(a,b,c) (HAVE(3) && *v->now == CHR(a) && \ *(v->now+1) == CHR(b) && \ *(v->now+2) == CHR(c)) -#define SET(c) (v->nexttype = (c)) -#define SETV(c, n) (v->nexttype = (c), v->nextvalue = (n)) -#define RET(c) return (SET(c), 1) -#define RETV(c, n) return (SETV(c, n), 1) -#define FAILW(e) return (ERR(e), 0) /* ERR does SET(EOS) */ -#define LASTTYPE(t) (v->lasttype == (t)) +#define SET(c) (v->nexttype = (c)) +#define SETV(c, n) (v->nexttype = (c), v->nextvalue = (n)) +#define RET(c) return (SET(c), 1) +#define RETV(c, n) return (SETV(c, n), 1) +#define FAILW(e) return (ERR(e), 0) /* ERR does SET(EOS) */ +#define LASTTYPE(t) (v->lasttype == (t)) /* lexical contexts */ -#define L_ERE 1 /* mainline ERE/ARE */ -#define L_BRE 2 /* mainline BRE */ -#define L_Q 3 /* REG_QUOTE */ -#define L_EBND 4 /* ERE/ARE bound */ -#define L_BBND 5 /* BRE bound */ -#define L_BRACK 6 /* brackets */ -#define L_CEL 7 /* collating element */ -#define L_ECL 8 /* equivalence class */ -#define L_CCL 9 /* character class */ -#define INTOCON(c) (v->lexcon = (c)) -#define INCON(con) (v->lexcon == (con)) +#define L_ERE 1 /* mainline ERE/ARE */ +#define L_BRE 2 /* mainline BRE */ +#define L_Q 3 /* REG_QUOTE */ +#define L_EBND 4 /* ERE/ARE bound */ +#define L_BBND 5 /* BRE bound */ +#define L_BRACK 6 /* brackets */ +#define L_CEL 7 /* collating element */ +#define L_ECL 8 /* equivalence class */ +#define L_CCL 9 /* character class */ +#define INTOCON(c) (v->lexcon = (c)) +#define INCON(con) (v->lexcon == (con)) /* construct pointer past end of chr array */ -#define ENDOF(array) ((array) + sizeof(array)/sizeof(chr)) +#define ENDOF(array) ((array) + sizeof(array)/sizeof(chr)) /* - * lexstart - set up lexical stuff, scan leading options + - lexstart - set up lexical stuff, scan leading options + ^ static VOID lexstart(struct vars *); */ -static void -lexstart(struct vars * v) +static VOID +lexstart(v) +struct vars *v; { - prefixes(v); /* may turn on new type bits etc. */ + prefixes(v); /* may turn on new type bits etc. */ NOERR(); - if (v->cflags & REG_QUOTE) - { - assert(!(v->cflags & (REG_ADVANCED | REG_EXPANDED | REG_NEWLINE))); + if (v->cflags®_QUOTE) { + assert(!(v->cflags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE))); INTOCON(L_Q); - } - else if (v->cflags & REG_EXTENDED) - { - assert(!(v->cflags & REG_QUOTE)); + } else if (v->cflags®_EXTENDED) { + assert(!(v->cflags®_QUOTE)); INTOCON(L_ERE); - } - else - { - assert(!(v->cflags & (REG_QUOTE | REG_ADVF))); + } else { + assert(!(v->cflags&(REG_QUOTE|REG_ADVF))); INTOCON(L_BRE); } v->nexttype = EMPTY; /* remember we were at the start */ - next(v); /* set up the first token */ + next(v); /* set up the first token */ } /* - * prefixes - implement various special prefixes + - prefixes - implement various special prefixes + ^ static VOID prefixes(struct vars *); */ -static void -prefixes(struct vars * v) +static VOID +prefixes(v) +struct vars *v; { /* literal string doesn't get any of this stuff */ - if (v->cflags & REG_QUOTE) + if (v->cflags®_QUOTE) return; - /* initial "***" gets special things */ + /* initial "***" gets special things */ if (HAVE(4) && NEXT3('*', '*', '*')) - switch (*(v->now + 3)) - { - case CHR('?'): /* "***?" error, msg shows version */ - ERR(REG_BADPAT); - return; /* proceed no further */ - break; - case CHR('='): /* "***=" shifts to literal string */ - NOTE(REG_UNONPOSIX); - v->cflags |= REG_QUOTE; - v->cflags &= ~(REG_ADVANCED | REG_EXPANDED | REG_NEWLINE); - v->now += 4; - return; /* and there can be no more prefixes */ - break; - case CHR(':'): /* "***:" shifts to AREs */ - NOTE(REG_UNONPOSIX); - v->cflags |= REG_ADVANCED; - v->now += 4; - break; - default: /* otherwise *** is just an error */ - ERR(REG_BADRPT); - return; - break; + switch (*(v->now + 3)) { + case CHR('?'): /* "***?" error, msg shows version */ + ERR(REG_BADPAT); + return; /* proceed no further */ + break; + case CHR('='): /* "***=" shifts to literal string */ + NOTE(REG_UNONPOSIX); + v->cflags |= REG_QUOTE; + v->cflags &= ~(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE); + v->now += 4; + return; /* and there can be no more prefixes */ + break; + case CHR(':'): /* "***:" shifts to AREs */ + NOTE(REG_UNONPOSIX); + v->cflags |= REG_ADVANCED; + v->now += 4; + break; + default: /* otherwise *** is just an error */ + ERR(REG_BADRPT); + return; + break; } /* BREs and EREs don't get embedded options */ - if ((v->cflags & REG_ADVANCED) != REG_ADVANCED) + if ((v->cflags®_ADVANCED) != REG_ADVANCED) return; /* embedded options (AREs only) */ - if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2))) - { + if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2))) { NOTE(REG_UNONPOSIX); v->now += 2; for (; !ATEOS() && iscalpha(*v->now); v->now++) - switch (*v->now) - { - case CHR('b'): /* BREs (but why???) */ - v->cflags &= ~(REG_ADVANCED | REG_QUOTE); - break; - case CHR('c'): /* case sensitive */ - v->cflags &= ~REG_ICASE; - break; - case CHR('e'): /* plain EREs */ - v->cflags |= REG_EXTENDED; - v->cflags &= ~(REG_ADVF | REG_QUOTE); - break; - case CHR('i'): /* case insensitive */ - v->cflags |= REG_ICASE; - break; - case CHR('m'): /* Perloid synonym for n */ - case CHR('n'): /* \n affects ^ $ . [^ */ - v->cflags |= REG_NEWLINE; - break; - case CHR('p'): /* ~Perl, \n affects . [^ */ - v->cflags |= REG_NLSTOP; - v->cflags &= ~REG_NLANCH; - break; - case CHR('q'): /* literal string */ - v->cflags |= REG_QUOTE; - v->cflags &= ~REG_ADVANCED; - break; - case CHR('s'): /* single line, \n ordinary */ - v->cflags &= ~REG_NEWLINE; - break; - case CHR('t'): /* tight syntax */ - v->cflags &= ~REG_EXPANDED; - break; - case CHR('w'): /* weird, \n affects ^ $ only */ - v->cflags &= ~REG_NLSTOP; - v->cflags |= REG_NLANCH; - break; - case CHR('x'): /* expanded syntax */ - v->cflags |= REG_EXPANDED; - break; - default: - ERR(REG_BADOPT); - return; + switch (*v->now) { + case CHR('b'): /* BREs (but why???) */ + v->cflags &= ~(REG_ADVANCED|REG_QUOTE); + break; + case CHR('c'): /* case sensitive */ + v->cflags &= ~REG_ICASE; + break; + case CHR('e'): /* plain EREs */ + v->cflags |= REG_EXTENDED; + v->cflags &= ~(REG_ADVF|REG_QUOTE); + break; + case CHR('i'): /* case insensitive */ + v->cflags |= REG_ICASE; + break; + case CHR('m'): /* Perloid synonym for n */ + case CHR('n'): /* \n affects ^ $ . [^ */ + v->cflags |= REG_NEWLINE; + break; + case CHR('p'): /* ~Perl, \n affects . [^ */ + v->cflags |= REG_NLSTOP; + v->cflags &= ~REG_NLANCH; + break; + case CHR('q'): /* literal string */ + v->cflags |= REG_QUOTE; + v->cflags &= ~REG_ADVANCED; + break; + case CHR('s'): /* single line, \n ordinary */ + v->cflags &= ~REG_NEWLINE; + break; + case CHR('t'): /* tight syntax */ + v->cflags &= ~REG_EXPANDED; + break; + case CHR('w'): /* weird, \n affects ^ $ only */ + v->cflags &= ~REG_NLSTOP; + v->cflags |= REG_NLANCH; + break; + case CHR('x'): /* expanded syntax */ + v->cflags |= REG_EXPANDED; + break; + default: + ERR(REG_BADOPT); + return; } - if (!NEXT1(')')) - { + if (!NEXT1(')')) { ERR(REG_BADOPT); return; } v->now++; - if (v->cflags & REG_QUOTE) - v->cflags &= ~(REG_EXPANDED | REG_NEWLINE); + if (v->cflags®_QUOTE) + v->cflags &= ~(REG_EXPANDED|REG_NEWLINE); } } /* - * lexnest - "call a subroutine", interpolating string at the lexical level - * + - lexnest - "call a subroutine", interpolating string at the lexical level * Note, this is not a very general facility. There are a number of * implicit assumptions about what sorts of strings can be subroutines. + ^ static VOID lexnest(struct vars *, chr *, chr *); */ -static void -lexnest(struct vars * v, - chr *beginp, /* start of interpolation */ - chr *endp) /* one past end of interpolation */ +static VOID +lexnest(v, beginp, endp) +struct vars *v; +chr *beginp; /* start of interpolation */ +chr *endp; /* one past end of interpolation */ { - assert(v->savenow == NULL); /* only one level of nesting */ + assert(v->savenow == NULL); /* only one level of nesting */ v->savenow = v->now; v->savestop = v->stop; v->now = beginp; @@ -214,124 +208,123 @@ lexnest(struct vars * v, /* * string constants to interpolate as expansions of things like \d */ -static chr backd[] = { /* \d */ +static chr backd[] = { /* \d */ CHR('['), CHR('['), CHR(':'), CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), CHR(':'), CHR(']'), CHR(']') }; -static chr backD[] = { /* \D */ +static chr backD[] = { /* \D */ CHR('['), CHR('^'), CHR('['), CHR(':'), CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), CHR(':'), CHR(']'), CHR(']') }; -static chr brbackd[] = { /* \d within brackets */ +static chr brbackd[] = { /* \d within brackets */ CHR('['), CHR(':'), CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), CHR(':'), CHR(']') }; -static chr backs[] = { /* \s */ +static chr backs[] = { /* \s */ CHR('['), CHR('['), CHR(':'), CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), CHR(':'), CHR(']'), CHR(']') }; -static chr backS[] = { /* \S */ +static chr backS[] = { /* \S */ CHR('['), CHR('^'), CHR('['), CHR(':'), CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), CHR(':'), CHR(']'), CHR(']') }; -static chr brbacks[] = { /* \s within brackets */ +static chr brbacks[] = { /* \s within brackets */ CHR('['), CHR(':'), CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), CHR(':'), CHR(']') }; -static chr backw[] = { /* \w */ +static chr backw[] = { /* \w */ CHR('['), CHR('['), CHR(':'), CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), CHR(':'), CHR(']'), CHR('_'), CHR(']') }; -static chr backW[] = { /* \W */ +static chr backW[] = { /* \W */ CHR('['), CHR('^'), CHR('['), CHR(':'), CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), CHR(':'), CHR(']'), CHR('_'), CHR(']') }; -static chr brbackw[] = { /* \w within brackets */ +static chr brbackw[] = { /* \w within brackets */ CHR('['), CHR(':'), CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), CHR(':'), CHR(']'), CHR('_') }; /* - * lexword - interpolate a bracket expression for word characters + - lexword - interpolate a bracket expression for word characters * Possibly ought to inquire whether there is a "word" character class. + ^ static VOID lexword(struct vars *); */ -static void -lexword(struct vars * v) +static VOID +lexword(v) +struct vars *v; { lexnest(v, backw, ENDOF(backw)); } /* - * next - get next token + - next - get next token + ^ static int next(struct vars *); */ -static int /* 1 normal, 0 failure */ -next(struct vars * v) +static int /* 1 normal, 0 failure */ +next(v) +struct vars *v; { - chr c; + chr c; /* errors yield an infinite sequence of failures */ if (ISERR()) - return 0; /* the error has set nexttype to EOS */ + return 0; /* the error has set nexttype to EOS */ /* remember flavor of last token */ v->lasttype = v->nexttype; /* REG_BOSONLY */ - if (v->nexttype == EMPTY && (v->cflags & REG_BOSONLY)) - { + if (v->nexttype == EMPTY && (v->cflags®_BOSONLY)) { /* at start of a REG_BOSONLY RE */ RETV(SBEGIN, 0); /* same as \A */ } /* if we're nested and we've hit end, return to outer level */ - if (v->savenow != NULL && ATEOS()) - { + if (v->savenow != NULL && ATEOS()) { v->now = v->savenow; v->stop = v->savestop; v->savenow = v->savestop = NULL; } /* skip white space etc. if appropriate (not in literal or []) */ - if (v->cflags & REG_EXPANDED) - switch (v->lexcon) - { - case L_ERE: - case L_BRE: - case L_EBND: - case L_BBND: - skip(v); - break; + if (v->cflags®_EXPANDED) + switch (v->lexcon) { + case L_ERE: + case L_BRE: + case L_EBND: + case L_BBND: + skip(v); + break; } /* handle EOS, depending on context */ - if (ATEOS()) - { - switch (v->lexcon) - { - case L_ERE: - case L_BRE: - case L_Q: - RET(EOS); - break; - case L_EBND: - case L_BBND: - FAILW(REG_EBRACE); - break; - case L_BRACK: - case L_CEL: - case L_ECL: - case L_CCL: - FAILW(REG_EBRACK); - break; + if (ATEOS()) { + switch (v->lexcon) { + case L_ERE: + case L_BRE: + case L_Q: + RET(EOS); + break; + case L_EBND: + case L_BBND: + FAILW(REG_EBRACE); + break; + case L_BRACK: + case L_CEL: + case L_ECL: + case L_CCL: + FAILW(REG_EBRACK); + break; } assert(NOTREACHED); } @@ -340,365 +333,314 @@ next(struct vars * v) c = *v->now++; /* deal with the easy contexts, punt EREs to code below */ - switch (v->lexcon) - { - case L_BRE: /* punt BREs to separate function */ - return brenext(v, c); + switch (v->lexcon) { + case L_BRE: /* punt BREs to separate function */ + return brenext(v, c); + break; + case L_ERE: /* see below */ + break; + case L_Q: /* literal strings are easy */ + RETV(PLAIN, c); + break; + case L_BBND: /* bounds are fairly simple */ + case L_EBND: + switch (c) { + case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'): + case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'): + case CHR('8'): case CHR('9'): + RETV(DIGIT, (chr)DIGITVAL(c)); break; - case L_ERE: /* see below */ + case CHR(','): + RET(','); break; - case L_Q: /* literal strings are easy */ - RETV(PLAIN, c); - break; - case L_BBND: /* bounds are fairly simple */ - case L_EBND: - switch (c) - { - case CHR('0'): - case CHR('1'): - case CHR('2'): - case CHR('3'): - case CHR('4'): - case CHR('5'): - case CHR('6'): - case CHR('7'): - case CHR('8'): - case CHR('9'): - RETV(DIGIT, (chr) DIGITVAL(c)); - break; - case CHR(','): - RET(','); - break; - case CHR('}'): /* ERE bound ends with } */ - if (INCON(L_EBND)) - { - INTOCON(L_ERE); - if ((v->cflags & REG_ADVF) && NEXT1('?')) - { - v->now++; - NOTE(REG_UNONPOSIX); - RETV('}', 0); - } - RETV('}', 1); - } - else - FAILW(REG_BADBR); - break; - case CHR('\\'): /* BRE bound ends with \} */ - if (INCON(L_BBND) && NEXT1('}')) - { - v->now++; - INTOCON(L_BRE); - RET('}'); - } - else - FAILW(REG_BADBR); - break; - default: - FAILW(REG_BADBR); - break; - } - assert(NOTREACHED); - break; - case L_BRACK: /* brackets are not too hard */ - switch (c) - { - case CHR(']'): - if (LASTTYPE('[')) - RETV(PLAIN, c); - else - { - INTOCON((v->cflags & REG_EXTENDED) ? - L_ERE : L_BRE); - RET(']'); - } - break; - case CHR('\\'): - NOTE(REG_UBBS); - if (!(v->cflags & REG_ADVF)) - RETV(PLAIN, c); + case CHR('}'): /* ERE bound ends with } */ + if (INCON(L_EBND)) { + INTOCON(L_ERE); + if ((v->cflags®_ADVF) && NEXT1('?')) { + v->now++; NOTE(REG_UNONPOSIX); - if (ATEOS()) - FAILW(REG_EESCAPE); - (DISCARD) lexescape(v); - switch (v->nexttype) - { /* not all escapes okay here */ - case PLAIN: - return 1; - break; - case CCLASS: - switch (v->nextvalue) - { - case 'd': - lexnest(v, brbackd, ENDOF(brbackd)); - break; - case 's': - lexnest(v, brbacks, ENDOF(brbacks)); - break; - case 'w': - lexnest(v, brbackw, ENDOF(brbackw)); - break; - default: - FAILW(REG_EESCAPE); - break; - } - /* lexnest done, back up and try again */ - v->nexttype = v->lasttype; - return next(v); - break; - } - /* not one of the acceptable escapes */ - FAILW(REG_EESCAPE); - break; - case CHR('-'): - if (LASTTYPE('[') || NEXT1(']')) - RETV(PLAIN, c); - else - RETV(RANGE, c); - break; - case CHR('['): - if (ATEOS()) - FAILW(REG_EBRACK); - switch (*v->now++) - { - case CHR('.'): - INTOCON(L_CEL); - /* might or might not be locale-specific */ - RET(COLLEL); - break; - case CHR('='): - INTOCON(L_ECL); - NOTE(REG_ULOCALE); - RET(ECLASS); - break; - case CHR(':'): - INTOCON(L_CCL); - NOTE(REG_ULOCALE); - RET(CCLASS); - break; - default: /* oops */ - v->now--; - RETV(PLAIN, c); - break; - } - assert(NOTREACHED); - break; - default: - RETV(PLAIN, c); - break; - } - assert(NOTREACHED); + RETV('}', 0); + } + RETV('}', 1); + } else + FAILW(REG_BADBR); break; - case L_CEL: /* collating elements are easy */ - if (c == CHR('.') && NEXT1(']')) - { + case CHR('\\'): /* BRE bound ends with \} */ + if (INCON(L_BBND) && NEXT1('}')) { v->now++; - INTOCON(L_BRACK); - RETV(END, '.'); - } - else - RETV(PLAIN, c); - break; - case L_ECL: /* ditto equivalence classes */ - if (c == CHR('=') && NEXT1(']')) - { - v->now++; - INTOCON(L_BRACK); - RETV(END, '='); - } - else - RETV(PLAIN, c); - break; - case L_CCL: /* ditto character classes */ - if (c == CHR(':') && NEXT1(']')) - { - v->now++; - INTOCON(L_BRACK); - RETV(END, ':'); - } - else - RETV(PLAIN, c); + INTOCON(L_BRE); + RET('}'); + } else + FAILW(REG_BADBR); break; default: + FAILW(REG_BADBR); + break; + } + assert(NOTREACHED); + break; + case L_BRACK: /* brackets are not too hard */ + switch (c) { + case CHR(']'): + if (LASTTYPE('[')) + RETV(PLAIN, c); + else { + INTOCON((v->cflags®_EXTENDED) ? + L_ERE : L_BRE); + RET(']'); + } + break; + case CHR('\\'): + NOTE(REG_UBBS); + if (!(v->cflags®_ADVF)) + RETV(PLAIN, c); + NOTE(REG_UNONPOSIX); + if (ATEOS()) + FAILW(REG_EESCAPE); + (DISCARD)lexescape(v); + switch (v->nexttype) { /* not all escapes okay here */ + case PLAIN: + return 1; + break; + case CCLASS: + switch (v->nextvalue) { + case 'd': + lexnest(v, brbackd, ENDOF(brbackd)); + break; + case 's': + lexnest(v, brbacks, ENDOF(brbacks)); + break; + case 'w': + lexnest(v, brbackw, ENDOF(brbackw)); + break; + default: + FAILW(REG_EESCAPE); + break; + } + /* lexnest done, back up and try again */ + v->nexttype = v->lasttype; + return next(v); + break; + } + /* not one of the acceptable escapes */ + FAILW(REG_EESCAPE); + break; + case CHR('-'): + if (LASTTYPE('[') || NEXT1(']')) + RETV(PLAIN, c); + else + RETV(RANGE, c); + break; + case CHR('['): + if (ATEOS()) + FAILW(REG_EBRACK); + switch (*v->now++) { + case CHR('.'): + INTOCON(L_CEL); + /* might or might not be locale-specific */ + RET(COLLEL); + break; + case CHR('='): + INTOCON(L_ECL); + NOTE(REG_ULOCALE); + RET(ECLASS); + break; + case CHR(':'): + INTOCON(L_CCL); + NOTE(REG_ULOCALE); + RET(CCLASS); + break; + default: /* oops */ + v->now--; + RETV(PLAIN, c); + break; + } assert(NOTREACHED); break; + default: + RETV(PLAIN, c); + break; + } + assert(NOTREACHED); + break; + case L_CEL: /* collating elements are easy */ + if (c == CHR('.') && NEXT1(']')) { + v->now++; + INTOCON(L_BRACK); + RETV(END, '.'); + } else + RETV(PLAIN, c); + break; + case L_ECL: /* ditto equivalence classes */ + if (c == CHR('=') && NEXT1(']')) { + v->now++; + INTOCON(L_BRACK); + RETV(END, '='); + } else + RETV(PLAIN, c); + break; + case L_CCL: /* ditto character classes */ + if (c == CHR(':') && NEXT1(']')) { + v->now++; + INTOCON(L_BRACK); + RETV(END, ':'); + } else + RETV(PLAIN, c); + break; + default: + assert(NOTREACHED); + break; } /* that got rid of everything except EREs and AREs */ assert(INCON(L_ERE)); /* deal with EREs and AREs, except for backslashes */ - switch (c) - { - case CHR('|'): - RET('|'); - break; - case CHR('*'): - if ((v->cflags & REG_ADVF) && NEXT1('?')) - { - v->now++; - NOTE(REG_UNONPOSIX); - RETV('*', 0); - } - RETV('*', 1); - break; - case CHR('+'): - if ((v->cflags & REG_ADVF) && NEXT1('?')) - { - v->now++; - NOTE(REG_UNONPOSIX); - RETV('+', 0); - } - RETV('+', 1); - break; - case CHR('?'): - if ((v->cflags & REG_ADVF) && NEXT1('?')) - { - v->now++; - NOTE(REG_UNONPOSIX); - RETV('?', 0); - } - RETV('?', 1); - break; - case CHR('{'): /* bounds start or plain character */ - if (v->cflags & REG_EXPANDED) - skip(v); - if (ATEOS() || !iscdigit(*v->now)) - { - NOTE(REG_UBRACES); - NOTE(REG_UUNSPEC); - RETV(PLAIN, c); - } - else - { - NOTE(REG_UBOUNDS); - INTOCON(L_EBND); - RET('{'); + switch (c) { + case CHR('|'): + RET('|'); + break; + case CHR('*'): + if ((v->cflags®_ADVF) && NEXT1('?')) { + v->now++; + NOTE(REG_UNONPOSIX); + RETV('*', 0); + } + RETV('*', 1); + break; + case CHR('+'): + if ((v->cflags®_ADVF) && NEXT1('?')) { + v->now++; + NOTE(REG_UNONPOSIX); + RETV('+', 0); + } + RETV('+', 1); + break; + case CHR('?'): + if ((v->cflags®_ADVF) && NEXT1('?')) { + v->now++; + NOTE(REG_UNONPOSIX); + RETV('?', 0); + } + RETV('?', 1); + break; + case CHR('{'): /* bounds start or plain character */ + if (v->cflags®_EXPANDED) + skip(v); + if (ATEOS() || !iscdigit(*v->now)) { + NOTE(REG_UBRACES); + NOTE(REG_UUNSPEC); + RETV(PLAIN, c); + } else { + NOTE(REG_UBOUNDS); + INTOCON(L_EBND); + RET('{'); + } + assert(NOTREACHED); + break; + case CHR('('): /* parenthesis, or advanced extension */ + if ((v->cflags®_ADVF) && NEXT1('?')) { + NOTE(REG_UNONPOSIX); + v->now++; + switch (*v->now++) { + case CHR(':'): /* non-capturing paren */ + RETV('(', 0); + break; + case CHR('#'): /* comment */ + while (!ATEOS() && *v->now != CHR(')')) + v->now++; + if (!ATEOS()) + v->now++; + assert(v->nexttype == v->lasttype); + return next(v); + break; + case CHR('='): /* positive lookahead */ + NOTE(REG_ULOOKAHEAD); + RETV(LACON, 1); + break; + case CHR('!'): /* negative lookahead */ + NOTE(REG_ULOOKAHEAD); + RETV(LACON, 0); + break; + default: + FAILW(REG_BADRPT); + break; } assert(NOTREACHED); - break; - case CHR('('): /* parenthesis, or advanced extension */ - if ((v->cflags & REG_ADVF) && NEXT1('?')) - { - NOTE(REG_UNONPOSIX); - v->now++; - switch (*v->now++) - { - case CHR(':'): /* non-capturing paren */ - RETV('(', 0); - break; - case CHR('#'): /* comment */ - while (!ATEOS() && *v->now != CHR(')')) - v->now++; - if (!ATEOS()) - v->now++; - assert(v->nexttype == v->lasttype); - return next(v); - break; - case CHR('='): /* positive lookahead */ - NOTE(REG_ULOOKAHEAD); - RETV(LACON, 1); - break; - case CHR('!'): /* negative lookahead */ - NOTE(REG_ULOOKAHEAD); - RETV(LACON, 0); - break; - default: - FAILW(REG_BADRPT); - break; - } - assert(NOTREACHED); - } - if (v->cflags & REG_NOSUB) - RETV('(', 0); /* all parens non-capturing */ - else - RETV('(', 1); - break; - case CHR(')'): - if (LASTTYPE('(')) - NOTE(REG_UUNSPEC); - RETV(')', c); - break; - case CHR('['): /* easy except for [[:<:]] and [[:>:]] */ - if (HAVE(6) && *(v->now + 0) == CHR('[') && - *(v->now + 1) == CHR(':') && - (*(v->now + 2) == CHR('<') || - *(v->now + 2) == CHR('>')) && - *(v->now + 3) == CHR(':') && - *(v->now + 4) == CHR(']') && - *(v->now + 5) == CHR(']')) - { - c = *(v->now + 2); - v->now += 6; - NOTE(REG_UNONPOSIX); - RET((c == CHR('<')) ? '<' : '>'); - } - INTOCON(L_BRACK); - if (NEXT1('^')) - { - v->now++; - RETV('[', 0); - } - RETV('[', 1); - break; - case CHR('.'): - RET('.'); - break; - case CHR('^'): - RET('^'); - break; - case CHR('$'): - RET('$'); - break; - case CHR('\\'): /* mostly punt backslashes to code below */ - if (ATEOS()) - FAILW(REG_EESCAPE); - break; - default: /* ordinary character */ - RETV(PLAIN, c); - break; + } + if (v->cflags®_NOSUB) + RETV('(', 0); /* all parens non-capturing */ + else + RETV('(', 1); + break; + case CHR(')'): + if (LASTTYPE('(')) { + NOTE(REG_UUNSPEC); + } + RETV(')', c); + break; + case CHR('['): /* easy except for [[:<:]] and [[:>:]] */ + if (HAVE(6) && *(v->now+0) == CHR('[') && + *(v->now+1) == CHR(':') && + (*(v->now+2) == CHR('<') || + *(v->now+2) == CHR('>')) && + *(v->now+3) == CHR(':') && + *(v->now+4) == CHR(']') && + *(v->now+5) == CHR(']')) { + c = *(v->now+2); + v->now += 6; + NOTE(REG_UNONPOSIX); + RET((c == CHR('<')) ? '<' : '>'); + } + INTOCON(L_BRACK); + if (NEXT1('^')) { + v->now++; + RETV('[', 0); + } + RETV('[', 1); + break; + case CHR('.'): + RET('.'); + break; + case CHR('^'): + RET('^'); + break; + case CHR('$'): + RET('$'); + break; + case CHR('\\'): /* mostly punt backslashes to code below */ + if (ATEOS()) + FAILW(REG_EESCAPE); + break; + default: /* ordinary character */ + RETV(PLAIN, c); + break; } /* ERE/ARE backslash handling; backslash already eaten */ assert(!ATEOS()); - if (!(v->cflags & REG_ADVF)) - { /* only AREs have non-trivial escapes */ - if (iscalnum(*v->now)) - { + if (!(v->cflags®_ADVF)) { /* only AREs have non-trivial escapes */ + if (iscalnum(*v->now)) { NOTE(REG_UBSALNUM); NOTE(REG_UUNSPEC); } RETV(PLAIN, *v->now++); } - (DISCARD) lexescape(v); + (DISCARD)lexescape(v); if (ISERR()) FAILW(REG_EESCAPE); - if (v->nexttype == CCLASS) - { /* fudge at lexical level */ - switch (v->nextvalue) - { - case 'd': - lexnest(v, backd, ENDOF(backd)); - break; - case 'D': - lexnest(v, backD, ENDOF(backD)); - break; - case 's': - lexnest(v, backs, ENDOF(backs)); - break; - case 'S': - lexnest(v, backS, ENDOF(backS)); - break; - case 'w': - lexnest(v, backw, ENDOF(backw)); - break; - case 'W': - lexnest(v, backW, ENDOF(backW)); - break; - default: - assert(NOTREACHED); - FAILW(REG_ASSERT); - break; + if (v->nexttype == CCLASS) { /* fudge at lexical level */ + switch (v->nextvalue) { + case 'd': lexnest(v, backd, ENDOF(backd)); break; + case 'D': lexnest(v, backD, ENDOF(backD)); break; + case 's': lexnest(v, backs, ENDOF(backs)); break; + case 'S': lexnest(v, backS, ENDOF(backS)); break; + case 'w': lexnest(v, backw, ENDOF(backw)); break; + case 'W': lexnest(v, backW, ENDOF(backW)); break; + default: + assert(NOTREACHED); + FAILW(REG_ASSERT); + break; } /* lexnest done, back up and try again */ v->nexttype = v->lasttype; @@ -709,23 +651,24 @@ next(struct vars * v) } /* - * lexescape - parse an ARE backslash escape (backslash already eaten) + - lexescape - parse an ARE backslash escape (backslash already eaten) * Note slightly nonstandard use of the CCLASS type code. + ^ static int lexescape(struct vars *); */ -static int /* not actually used, but convenient for - * RETV */ -lexescape(struct vars * v) +static int /* not actually used, but convenient for RETV */ +lexescape(v) +struct vars *v; { - chr c; - static chr alert[] = { + chr c; + static chr alert[] = { CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t') }; - static chr esc[] = { + static chr esc[] = { CHR('E'), CHR('S'), CHR('C') }; - chr *save; + chr *save; - assert(v->cflags & REG_ADVF); + assert(v->cflags®_ADVF); assert(!ATEOS()); c = *v->now++; @@ -733,295 +676,255 @@ lexescape(struct vars * v) RETV(PLAIN, c); NOTE(REG_UNONPOSIX); - switch (c) - { - case CHR('a'): - RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007'))); - break; - case CHR('A'): - RETV(SBEGIN, 0); - break; - case CHR('b'): - RETV(PLAIN, CHR('\b')); - break; - case CHR('B'): - RETV(PLAIN, CHR('\\')); - break; - case CHR('c'): - NOTE(REG_UUNPORT); - if (ATEOS()) - FAILW(REG_EESCAPE); - RETV(PLAIN, (chr) (*v->now++ & 037)); - break; - case CHR('d'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 'd'); - break; - case CHR('D'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 'D'); - break; - case CHR('e'): - NOTE(REG_UUNPORT); - RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033'))); - break; - case CHR('f'): - RETV(PLAIN, CHR('\f')); - break; - case CHR('m'): - RET('<'); - break; - case CHR('M'): - RET('>'); - break; - case CHR('n'): - RETV(PLAIN, CHR('\n')); - break; - case CHR('r'): - RETV(PLAIN, CHR('\r')); - break; - case CHR('s'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 's'); - break; - case CHR('S'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 'S'); - break; - case CHR('t'): - RETV(PLAIN, CHR('\t')); - break; - case CHR('u'): - c = lexdigits(v, 16, 4, 4); - if (ISERR()) - FAILW(REG_EESCAPE); - RETV(PLAIN, c); - break; - case CHR('U'): - c = lexdigits(v, 16, 8, 8); - if (ISERR()) - FAILW(REG_EESCAPE); - RETV(PLAIN, c); - break; - case CHR('v'): - RETV(PLAIN, CHR('\v')); - break; - case CHR('w'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 'w'); - break; - case CHR('W'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 'W'); - break; - case CHR('x'): - NOTE(REG_UUNPORT); - c = lexdigits(v, 16, 1, 255); /* REs >255 long outside - * spec */ - if (ISERR()) - FAILW(REG_EESCAPE); - RETV(PLAIN, c); - break; - case CHR('y'): - NOTE(REG_ULOCALE); - RETV(WBDRY, 0); - break; - case CHR('Y'): - NOTE(REG_ULOCALE); - RETV(NWBDRY, 0); - break; - case CHR('Z'): - RETV(SEND, 0); - break; - case CHR('1'): - case CHR('2'): - case CHR('3'): - case CHR('4'): - case CHR('5'): - case CHR('6'): - case CHR('7'): - case CHR('8'): - case CHR('9'): - save = v->now; - v->now--; /* put first digit back */ - c = lexdigits(v, 10, 1, 255); /* REs >255 long outside - * spec */ - if (ISERR()) - FAILW(REG_EESCAPE); - /* ugly heuristic (first test is "exactly 1 digit?") */ - if (v->now - save == 0 || (int) c <= v->nsubexp) - { - NOTE(REG_UBACKREF); - RETV(BACKREF, (chr) c); - } - /* oops, doesn't look like it's a backref after all... */ - v->now = save; - /* and fall through into octal number */ - case CHR('0'): - NOTE(REG_UUNPORT); - v->now--; /* put first digit back */ - c = lexdigits(v, 8, 1, 3); - if (ISERR()) - FAILW(REG_EESCAPE); - RETV(PLAIN, c); - break; - default: - assert(iscalpha(c)); - FAILW(REG_EESCAPE); /* unknown alphabetic escape */ - break; + switch (c) { + case CHR('a'): + RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007'))); + break; + case CHR('A'): + RETV(SBEGIN, 0); + break; + case CHR('b'): + RETV(PLAIN, CHR('\b')); + break; + case CHR('B'): + RETV(PLAIN, CHR('\\')); + break; + case CHR('c'): + NOTE(REG_UUNPORT); + if (ATEOS()) + FAILW(REG_EESCAPE); + RETV(PLAIN, (chr)(*v->now++ & 037)); + break; + case CHR('d'): + NOTE(REG_ULOCALE); + RETV(CCLASS, 'd'); + break; + case CHR('D'): + NOTE(REG_ULOCALE); + RETV(CCLASS, 'D'); + break; + case CHR('e'): + NOTE(REG_UUNPORT); + RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033'))); + break; + case CHR('f'): + RETV(PLAIN, CHR('\f')); + break; + case CHR('m'): + RET('<'); + break; + case CHR('M'): + RET('>'); + break; + case CHR('n'): + RETV(PLAIN, CHR('\n')); + break; + case CHR('r'): + RETV(PLAIN, CHR('\r')); + break; + case CHR('s'): + NOTE(REG_ULOCALE); + RETV(CCLASS, 's'); + break; + case CHR('S'): + NOTE(REG_ULOCALE); + RETV(CCLASS, 'S'); + break; + case CHR('t'): + RETV(PLAIN, CHR('\t')); + break; + case CHR('u'): + c = lexdigits(v, 16, 4, 4); + if (ISERR()) + FAILW(REG_EESCAPE); + RETV(PLAIN, c); + break; + case CHR('U'): + c = lexdigits(v, 16, 8, 8); + if (ISERR()) + FAILW(REG_EESCAPE); + RETV(PLAIN, c); + break; + case CHR('v'): + RETV(PLAIN, CHR('\v')); + break; + case CHR('w'): + NOTE(REG_ULOCALE); + RETV(CCLASS, 'w'); + break; + case CHR('W'): + NOTE(REG_ULOCALE); + RETV(CCLASS, 'W'); + break; + case CHR('x'): + NOTE(REG_UUNPORT); + c = lexdigits(v, 16, 1, 255); /* REs >255 long outside spec */ + if (ISERR()) + FAILW(REG_EESCAPE); + RETV(PLAIN, c); + break; + case CHR('y'): + NOTE(REG_ULOCALE); + RETV(WBDRY, 0); + break; + case CHR('Y'): + NOTE(REG_ULOCALE); + RETV(NWBDRY, 0); + break; + case CHR('Z'): + RETV(SEND, 0); + break; + case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'): + case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'): + case CHR('9'): + save = v->now; + v->now--; /* put first digit back */ + c = lexdigits(v, 10, 1, 255); /* REs >255 long outside spec */ + if (ISERR()) + FAILW(REG_EESCAPE); + /* ugly heuristic (first test is "exactly 1 digit?") */ + if (v->now - save == 0 || (int)c <= v->nsubexp) { + NOTE(REG_UBACKREF); + RETV(BACKREF, (chr)c); + } + /* oops, doesn't look like it's a backref after all... */ + v->now = save; + /* and fall through into octal number */ + case CHR('0'): + NOTE(REG_UUNPORT); + v->now--; /* put first digit back */ + c = lexdigits(v, 8, 1, 3); + if (ISERR()) + FAILW(REG_EESCAPE); + RETV(PLAIN, c); + break; + default: + assert(iscalpha(c)); + FAILW(REG_EESCAPE); /* unknown alphabetic escape */ + break; } assert(NOTREACHED); } /* - * lexdigits - slurp up digits and return chr value + - lexdigits - slurp up digits and return chr value + ^ static chr lexdigits(struct vars *, int, int, int); */ -static chr /* chr value; errors signalled via ERR */ -lexdigits(struct vars * v, - int base, - int minlen, - int maxlen) +static chr /* chr value; errors signalled via ERR */ +lexdigits(v, base, minlen, maxlen) +struct vars *v; +int base; +int minlen; +int maxlen; { - uchr n; /* unsigned to avoid overflow misbehavior */ - int len; - chr c; - int d; - const uchr ub = (uchr) base; + uchr n; /* unsigned to avoid overflow misbehavior */ + int len; + chr c; + int d; + CONST uchr ub = (uchr) base; n = 0; - for (len = 0; len < maxlen && !ATEOS(); len++) - { + for (len = 0; len < maxlen && !ATEOS(); len++) { c = *v->now++; - switch (c) - { - case CHR('0'): - case CHR('1'): - case CHR('2'): - case CHR('3'): - case CHR('4'): - case CHR('5'): - case CHR('6'): - case CHR('7'): - case CHR('8'): - case CHR('9'): - d = DIGITVAL(c); - break; - case CHR('a'): - case CHR('A'): - d = 10; - break; - case CHR('b'): - case CHR('B'): - d = 11; - break; - case CHR('c'): - case CHR('C'): - d = 12; - break; - case CHR('d'): - case CHR('D'): - d = 13; - break; - case CHR('e'): - case CHR('E'): - d = 14; - break; - case CHR('f'): - case CHR('F'): - d = 15; - break; - default: - v->now--; /* oops, not a digit at all */ - d = -1; - break; + switch (c) { + case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'): + case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'): + case CHR('8'): case CHR('9'): + d = DIGITVAL(c); + break; + case CHR('a'): case CHR('A'): d = 10; break; + case CHR('b'): case CHR('B'): d = 11; break; + case CHR('c'): case CHR('C'): d = 12; break; + case CHR('d'): case CHR('D'): d = 13; break; + case CHR('e'): case CHR('E'): d = 14; break; + case CHR('f'): case CHR('F'): d = 15; break; + default: + v->now--; /* oops, not a digit at all */ + d = -1; + break; } - if (d >= base) - { /* not a plausible digit */ + if (d >= base) { /* not a plausible digit */ v->now--; d = -1; } if (d < 0) - break; /* NOTE BREAK OUT */ - n = n * ub + (uchr) d; + break; /* NOTE BREAK OUT */ + n = n*ub + (uchr)d; } if (len < minlen) ERR(REG_EESCAPE); - return (chr) n; + return (chr)n; } /* - * brenext - get next BRE token - * + - brenext - get next BRE token * This is much like EREs except for all the stupid backslashes and the * context-dependency of some things. + ^ static int brenext(struct vars *, pchr); */ -static int /* 1 normal, 0 failure */ -brenext(struct vars * v, - chr pc) +static int /* 1 normal, 0 failure */ +brenext(v, pc) +struct vars *v; +pchr pc; { - chr c = (chr) pc; + chr c = (chr)pc; - switch (c) - { - case CHR('*'): - if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^')) - RETV(PLAIN, c); - RET('*'); - break; - case CHR('['): - if (HAVE(6) && *(v->now + 0) == CHR('[') && - *(v->now + 1) == CHR(':') && - (*(v->now + 2) == CHR('<') || - *(v->now + 2) == CHR('>')) && - *(v->now + 3) == CHR(':') && - *(v->now + 4) == CHR(']') && - *(v->now + 5) == CHR(']')) - { - c = *(v->now + 2); - v->now += 6; - NOTE(REG_UNONPOSIX); - RET((c == CHR('<')) ? '<' : '>'); - } - INTOCON(L_BRACK); - if (NEXT1('^')) - { - v->now++; - RETV('[', 0); - } - RETV('[', 1); - break; - case CHR('.'): - RET('.'); - break; - case CHR('^'): - if (LASTTYPE(EMPTY)) - RET('^'); - if (LASTTYPE('(')) - { - NOTE(REG_UUNSPEC); - RET('^'); - } + switch (c) { + case CHR('*'): + if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^')) RETV(PLAIN, c); - break; - case CHR('$'): - if (v->cflags & REG_EXPANDED) - skip(v); - if (ATEOS()) - RET('$'); - if (NEXT2('\\', ')')) - { - NOTE(REG_UUNSPEC); - RET('$'); - } - RETV(PLAIN, c); - break; - case CHR('\\'): - break; /* see below */ - default: - RETV(PLAIN, c); - break; + RET('*'); + break; + case CHR('['): + if (HAVE(6) && *(v->now+0) == CHR('[') && + *(v->now+1) == CHR(':') && + (*(v->now+2) == CHR('<') || + *(v->now+2) == CHR('>')) && + *(v->now+3) == CHR(':') && + *(v->now+4) == CHR(']') && + *(v->now+5) == CHR(']')) { + c = *(v->now+2); + v->now += 6; + NOTE(REG_UNONPOSIX); + RET((c == CHR('<')) ? '<' : '>'); + } + INTOCON(L_BRACK); + if (NEXT1('^')) { + v->now++; + RETV('[', 0); + } + RETV('[', 1); + break; + case CHR('.'): + RET('.'); + break; + case CHR('^'): + if (LASTTYPE(EMPTY)) + RET('^'); + if (LASTTYPE('(')) { + NOTE(REG_UUNSPEC); + RET('^'); + } + RETV(PLAIN, c); + break; + case CHR('$'): + if (v->cflags®_EXPANDED) + skip(v); + if (ATEOS()) + RET('$'); + if (NEXT2('\\', ')')) { + NOTE(REG_UUNSPEC); + RET('$'); + } + RETV(PLAIN, c); + break; + case CHR('\\'): + break; /* see below */ + default: + RETV(PLAIN, c); + break; } assert(c == CHR('\\')); @@ -1030,64 +933,57 @@ brenext(struct vars * v, FAILW(REG_EESCAPE); c = *v->now++; - switch (c) - { - case CHR('{'): - INTOCON(L_BBND); - NOTE(REG_UBOUNDS); - RET('{'); - break; - case CHR('('): - RETV('(', 1); - break; - case CHR(')'): - RETV(')', c); - break; - case CHR('<'): - NOTE(REG_UNONPOSIX); - RET('<'); - break; - case CHR('>'): - NOTE(REG_UNONPOSIX); - RET('>'); - break; - case CHR('1'): - case CHR('2'): - case CHR('3'): - case CHR('4'): - case CHR('5'): - case CHR('6'): - case CHR('7'): - case CHR('8'): - case CHR('9'): - NOTE(REG_UBACKREF); - RETV(BACKREF, (chr) DIGITVAL(c)); - break; - default: - if (iscalnum(c)) - { - NOTE(REG_UBSALNUM); - NOTE(REG_UUNSPEC); - } - RETV(PLAIN, c); - break; + switch (c) { + case CHR('{'): + INTOCON(L_BBND); + NOTE(REG_UBOUNDS); + RET('{'); + break; + case CHR('('): + RETV('(', 1); + break; + case CHR(')'): + RETV(')', c); + break; + case CHR('<'): + NOTE(REG_UNONPOSIX); + RET('<'); + break; + case CHR('>'): + NOTE(REG_UNONPOSIX); + RET('>'); + break; + case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'): + case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'): + case CHR('9'): + NOTE(REG_UBACKREF); + RETV(BACKREF, (chr)DIGITVAL(c)); + break; + default: + if (iscalnum(c)) { + NOTE(REG_UBSALNUM); + NOTE(REG_UUNSPEC); + } + RETV(PLAIN, c); + break; } assert(NOTREACHED); } /* - * skip - skip white space and comments in expanded form + - skip - skip white space and comments in expanded form + ^ static VOID skip(struct vars *); */ -static void -skip(struct vars * v) +static VOID +skip(v) +struct vars *v; { - chr *start = v->now; + chr *start = v->now; - assert(v->cflags & REG_EXPANDED); + assert(v->cflags®_EXPANDED); - for (;;) - { + for (;;) { while (!ATEOS() && iscspace(*v->now)) v->now++; if (ATEOS() || *v->now != CHR('#')) @@ -1103,31 +999,50 @@ skip(struct vars * v) } /* - * newline - return the chr for a newline - * + - newline - return the chr for a newline * This helps confine use of CHR to this source file. + ^ static chr newline(NOPARMS); */ static chr -newline(void) +newline() { return CHR('\n'); } /* - * chrnamed - return the chr known by a given (chr string) name - * + - ch - return the chr sequence for regc_locale.c's fake collating element ch + * This helps confine use of CHR to this source file. Beware that the caller + * knows how long the sequence is. + ^ #ifdef REG_DEBUG + ^ static chr *ch(NOPARMS); + ^ #endif + */ +#ifdef REG_DEBUG +static chr * +ch() +{ + static chr chstr[] = { CHR('c'), CHR('h'), CHR('\0') }; + + return chstr; +} +#endif + +/* + - chrnamed - return the chr known by a given (chr string) name * The code is a bit clumsy, but this routine gets only such specialized * use that it hardly matters. + ^ static chr chrnamed(struct vars *, chr *, chr *, pchr); */ static chr -chrnamed(struct vars * v, - chr *startp, /* start of name */ - chr *endp, /* just past end of name */ - chr lastresort) /* what to return if name lookup fails */ +chrnamed(v, startp, endp, lastresort) +struct vars *v; +chr *startp; /* start of name */ +chr *endp; /* just past end of name */ +pchr lastresort; /* what to return if name lookup fails */ { - celt c; - int errsave; - int e; + celt c; + int errsave; + int e; struct cvec *cv; errsave = v->err; @@ -1137,10 +1052,10 @@ chrnamed(struct vars * v, v->err = errsave; if (e != 0) - return (chr) lastresort; + return (chr)lastresort; cv = range(v, c, c, 0); if (cv->nchrs == 0) - return (chr) lastresort; + return (chr)lastresort; return cv->chrs[0]; } diff --git a/src/regex/regerrs.h b/src/regex/regerrs.h index f99dbf4f73..a3d98b6818 100644 --- a/src/regex/regerrs.h +++ b/src/regex/regerrs.h @@ -1,75 +1,18 @@ -/* - * $Id$ - */ - -{ - REG_OKAY, "REG_OKAY", "no errors detected" -}, - -{ - REG_NOMATCH, "REG_NOMATCH", "failed to match" -}, - -{ - REG_BADPAT, "REG_BADPAT", "invalid regexp (reg version 0.8)" -}, - -{ - REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" -}, - -{ - REG_ECTYPE, "REG_ECTYPE", "invalid character class" -}, - -{ - REG_EESCAPE, "REG_EESCAPE", "invalid escape \\ sequence" -}, - -{ - REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" -}, - -{ - REG_EBRACK, "REG_EBRACK", "brackets [] not balanced" -}, - -{ - REG_EPAREN, "REG_EPAREN", "parentheses () not balanced" -}, - -{ - REG_EBRACE, "REG_EBRACE", "braces {} not balanced" -}, - -{ - REG_BADBR, "REG_BADBR", "invalid repetition count(s)" -}, - -{ - REG_ERANGE, "REG_ERANGE", "invalid character range" -}, - -{ - REG_ESPACE, "REG_ESPACE", "out of memory" -}, - -{ - REG_BADRPT, "REG_BADRPT", "quantifier operand invalid" -}, - -{ - REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" -}, - -{ - REG_INVARG, "REG_INVARG", "invalid argument to regex function" -}, - -{ - REG_MIXED, "REG_MIXED", "character widths of regex and string differ" -}, - -{ - REG_BADOPT, "REG_BADOPT", "invalid embedded option" -}, +{ REG_OKAY, "REG_OKAY", "no errors detected" }, +{ REG_NOMATCH, "REG_NOMATCH", "failed to match" }, +{ REG_BADPAT, "REG_BADPAT", "invalid regexp (reg version 0.8)" }, +{ REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" }, +{ REG_ECTYPE, "REG_ECTYPE", "invalid character class" }, +{ REG_EESCAPE, "REG_EESCAPE", "invalid escape \\ sequence" }, +{ REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" }, +{ REG_EBRACK, "REG_EBRACK", "brackets [] not balanced" }, +{ REG_EPAREN, "REG_EPAREN", "parentheses () not balanced" }, +{ REG_EBRACE, "REG_EBRACE", "braces {} not balanced" }, +{ REG_BADBR, "REG_BADBR", "invalid repetition count(s)" }, +{ REG_ERANGE, "REG_ERANGE", "invalid character range" }, +{ REG_ESPACE, "REG_ESPACE", "out of memory" }, +{ REG_BADRPT, "REG_BADRPT", "quantifier operand invalid" }, +{ REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" }, +{ REG_INVARG, "REG_INVARG", "invalid argument to regex function" }, +{ REG_MIXED, "REG_MIXED", "character widths of regex and string differ" }, +{ REG_BADOPT, "REG_BADOPT", "invalid embedded option" }, diff --git a/src/regex/regex.h b/src/regex/regex.h index d094d072d5..8289a500eb 100644 --- a/src/regex/regex.h +++ b/src/regex/regex.h @@ -1,74 +1,341 @@ #ifndef _REGEX_H_ #define _REGEX_H_ /* never again */ -/* ========= begin header generated by ./mkh ========= */ +/* + * regular expressions + * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * + * Development of this software was funded, in part, by Cray Research Inc., + * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics + * Corporation, none of whom are responsible for the results. The author + * thanks all of them. + * + * Redistribution and use in source and binary forms -- with or without + * modification -- are permitted for any purpose, provided that + * redistributions in source form retain this entire copyright notice and + * indicate the origin and nature of any modifications. + * + * I'd appreciate being given credit for this package in the documentation + * of software which uses it, but that is not a requirement. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * + * Prototypes etc. marked with "^" within comments get gathered up (and + * possibly edited) by the regfwd program and inserted near the bottom of + * this file. + * + * We offer the option of declaring one wide-character version of the + * RE functions as well as the char versions. To do that, define + * __REG_WIDE_T to the type of wide characters (unfortunately, there + * is no consensus that wchar_t is suitable) and __REG_WIDE_COMPILE and + * __REG_WIDE_EXEC to the names to be used for the compile and execute + * functions (suggestion: re_Xcomp and re_Xexec, where X is a letter + * suggestive of the wide type, e.g. re_ucomp and re_uexec for Unicode). + * For cranky old compilers, it may be necessary to do something like: + * #define __REG_WIDE_COMPILE(a,b,c,d) re_Xcomp(a,b,c,d) + * #define __REG_WIDE_EXEC(a,b,c,d,e,f,g) re_Xexec(a,b,c,d,e,f,g) + * rather than just #defining the names as parameterless macros. + * + * For some specialized purposes, it may be desirable to suppress the + * declarations of the "front end" functions, regcomp() and regexec(), + * or of the char versions of the compile and execute functions. To + * suppress the front-end functions, define __REG_NOFRONT. To suppress + * the char versions, define __REG_NOCHAR. + * + * The right place to do those defines (and some others you may want, see + * below) would be . If you don't have control of that file, + * the right place to add your own defines to this file is marked below. + * This is normally done automatically, by the makefile and regmkhdr, based + * on the contents of regcustom.h. + */ + + + +/* + * voodoo for C++ + */ #ifdef __cplusplus extern "C" { #endif -/* === regex2.h === */ -typedef off_t regoff_t; + + +/* + * Add your own defines, if needed, here. + */ + + + +/* + * Location where a chunk of regcustom.h is automatically spliced into + * this file (working from its prototype, regproto.h). + */ +/* --- begin --- */ +/* ensure certain things don't sneak in from system headers */ +#ifdef __REG_WIDE_T +#undef __REG_WIDE_T +#endif +#ifdef __REG_WIDE_COMPILE +#undef __REG_WIDE_COMPILE +#endif +#ifdef __REG_WIDE_EXEC +#undef __REG_WIDE_EXEC +#endif +#ifdef __REG_REGOFF_T +#undef __REG_REGOFF_T +#endif +#ifdef __REG_VOID_T +#undef __REG_VOID_T +#endif +#ifdef __REG_CONST +#undef __REG_CONST +#endif +#ifdef __REG_NOFRONT +#undef __REG_NOFRONT +#endif +#ifdef __REG_NOCHAR +#undef __REG_NOCHAR +#endif +/* interface types */ +#define __REG_WIDE_T Tcl_UniChar +#define __REG_REGOFF_T long /* not really right, but good enough... */ +#define __REG_VOID_T VOID +#define __REG_CONST CONST +/* names and declarations */ +#define __REG_WIDE_COMPILE TclReComp +#define __REG_WIDE_EXEC TclReExec +#define __REG_NOFRONT /* don't want regcomp() and regexec() */ +#define __REG_NOCHAR /* or the char versions */ +#define regfree TclReFree +#define regerror TclReError +/* --- end --- */ + + +/* + * interface types etc. + */ + +/* + * regoff_t has to be large enough to hold either off_t or ssize_t, + * and must be signed; it's only a guess that long is suitable, so we + * offer an override. + */ +#ifdef __REG_REGOFF_T +typedef __REG_REGOFF_T regoff_t; +#else +typedef long regoff_t; +#endif + +/* + * For benefit of old compilers, we offer the option of + * overriding the `void' type used to declare nonexistent return types. + */ +#ifdef __REG_VOID_T +typedef __REG_VOID_T re_void; +#else +typedef void re_void; +#endif + +/* + * Also for benefit of old compilers, can supply a macro + * which expands to a substitute for `const'. + */ +#ifndef __REG_CONST +#define __REG_CONST const +#endif + + + +/* + * other interface types + */ + +/* the biggie, a compiled RE (or rather, a front end to same) */ typedef struct { - int re_magic; - size_t re_nsub; /* number of parenthesized subexpressions */ - const char *re_endp; /* end pointer for REG_PEND */ - struct re_guts *re_g; /* none of your business :-) */ + int re_magic; /* magic number */ + size_t re_nsub; /* number of subexpressions */ + long re_info; /* information about RE */ +# define REG_UBACKREF 000001 +# define REG_ULOOKAHEAD 000002 +# define REG_UBOUNDS 000004 +# define REG_UBRACES 000010 +# define REG_UBSALNUM 000020 +# define REG_UPBOTCH 000040 +# define REG_UBBS 000100 +# define REG_UNONPOSIX 000200 +# define REG_UUNSPEC 000400 +# define REG_UUNPORT 001000 +# define REG_ULOCALE 002000 +# define REG_UEMPTYMATCH 004000 +# define REG_UIMPOSSIBLE 010000 +# define REG_USHORTEST 020000 + int re_csize; /* sizeof(character) */ + char *re_endp; /* backward compatibility kludge */ + /* the rest is opaque pointers to hidden innards */ + char *re_guts; /* `char *' is more portable than `void *' */ + char *re_fns; } regex_t; + +/* result reporting (may acquire more fields later) */ typedef struct { - regoff_t rm_so; /* start of match */ - regoff_t rm_eo; /* end of match */ + regoff_t rm_so; /* start of substring */ + regoff_t rm_eo; /* end of substring */ } regmatch_t; - -/* === regcomp.c === */ -extern int regcomp(regex_t *, const char *, int); -#define REG_BASIC 0000 -#define REG_EXTENDED 0001 -#define REG_ICASE 0002 -#define REG_NOSUB 0004 -#define REG_NEWLINE 0010 -#define REG_NOSPEC 0020 -#define REG_PEND 0040 -#define REG_DUMP 0200 +/* supplementary control and reporting */ +typedef struct { + regmatch_t rm_extend; /* see REG_EXPECT */ +} rm_detail_t; -/* === regerror.c === */ -#define REG_OKAY 0 -#define REG_NOMATCH 1 -#define REG_BADPAT 2 -#define REG_ECOLLATE 3 -#define REG_ECTYPE 4 -#define REG_EESCAPE 5 -#define REG_ESUBREG 6 -#define REG_EBRACK 7 -#define REG_EPAREN 8 -#define REG_EBRACE 9 -#define REG_BADBR 10 -#define REG_ERANGE 11 -#define REG_ESPACE 12 -#define REG_BADRPT 13 -#define REG_EMPTY 14 -#define REG_ASSERT 15 -#define REG_INVARG 16 -#define REG_ATOI 255 /* convert name to number (!) */ -#define REG_ITOA 0400 /* convert number to name (!) */ -extern size_t regerror(int, const regex_t *, char *, size_t); + +/* + * compilation + ^ #ifndef __REG_NOCHAR + ^ int re_comp(regex_t *, __REG_CONST char *, size_t, int); + ^ #endif + ^ #ifndef __REG_NOFRONT + ^ int regcomp(regex_t *, __REG_CONST char *, int); + ^ #endif + ^ #ifdef __REG_WIDE_T + ^ int __REG_WIDE_COMPILE(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int); + ^ #endif + */ +#define REG_BASIC 000000 /* BREs (convenience) */ +#define REG_EXTENDED 000001 /* EREs */ +#define REG_ADVF 000002 /* advanced features in EREs */ +#define REG_ADVANCED 000003 /* AREs (which are also EREs) */ +#define REG_QUOTE 000004 /* no special characters, none */ +#define REG_NOSPEC REG_QUOTE /* historical synonym */ +#define REG_ICASE 000010 /* ignore case */ +#define REG_NOSUB 000020 /* don't care about subexpressions */ +#define REG_EXPANDED 000040 /* expanded format, white space & comments */ +#define REG_NLSTOP 000100 /* \n doesn't match . or [^ ] */ +#define REG_NLANCH 000200 /* ^ matches after \n, $ before */ +#define REG_NEWLINE 000300 /* newlines are line terminators */ +#define REG_PEND 000400 /* ugh -- backward-compatibility hack */ +#define REG_EXPECT 001000 /* report details on partial/limited matches */ +#define REG_BOSONLY 002000 /* temporary kludge for BOS-only matches */ +#define REG_DUMP 004000 /* none of your business :-) */ +#define REG_FAKE 010000 /* none of your business :-) */ +#define REG_PROGRESS 020000 /* none of your business :-) */ -/* === regexec.c === */ -extern int regexec(const regex_t *, const char *, size_t, regmatch_t [], int); -#define REG_NOTBOL 00001 -#define REG_NOTEOL 00002 -#define REG_STARTEND 00004 -#define REG_TRACE 00400 /* tracing of execution */ -#define REG_LARGE 01000 /* force large representation */ -#define REG_BACKR 02000 /* force use of backref code */ + +/* + * execution + ^ #ifndef __REG_NOCHAR + ^ int re_exec(regex_t *, __REG_CONST char *, size_t, + ^ rm_detail_t *, size_t, regmatch_t [], int); + ^ #endif + ^ #ifndef __REG_NOFRONT + ^ int regexec(regex_t *, __REG_CONST char *, size_t, regmatch_t [], int); + ^ #endif + ^ #ifdef __REG_WIDE_T + ^ int __REG_WIDE_EXEC(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, + ^ rm_detail_t *, size_t, regmatch_t [], int); + ^ #endif + */ +#define REG_NOTBOL 0001 /* BOS is not BOL */ +#define REG_NOTEOL 0002 /* EOS is not EOL */ +#define REG_STARTEND 0004 /* backward compatibility kludge */ +#define REG_FTRACE 0010 /* none of your business */ +#define REG_MTRACE 0020 /* none of your business */ +#define REG_SMALL 0040 /* none of your business */ -/* === regfree.c === */ -extern void regfree(regex_t *); +/* + * misc generics (may be more functions here eventually) + ^ re_void regfree(regex_t *); + */ + + + +/* + * error reporting + * Be careful if modifying the list of error codes -- the table used by + * regerror() is generated automatically from this file! + * + * Note that there is no wide-char variant of regerror at this time; what + * kind of character is used for error reports is independent of what kind + * is used in matching. + * + ^ extern size_t regerror(int, __REG_CONST regex_t *, char *, size_t); + */ +#define REG_OKAY 0 /* no errors detected */ +#define REG_NOMATCH 1 /* failed to match */ +#define REG_BADPAT 2 /* invalid regexp */ +#define REG_ECOLLATE 3 /* invalid collating element */ +#define REG_ECTYPE 4 /* invalid character class */ +#define REG_EESCAPE 5 /* invalid escape \ sequence */ +#define REG_ESUBREG 6 /* invalid backreference number */ +#define REG_EBRACK 7 /* brackets [] not balanced */ +#define REG_EPAREN 8 /* parentheses () not balanced */ +#define REG_EBRACE 9 /* braces {} not balanced */ +#define REG_BADBR 10 /* invalid repetition count(s) */ +#define REG_ERANGE 11 /* invalid character range */ +#define REG_ESPACE 12 /* out of memory */ +#define REG_BADRPT 13 /* quantifier operand invalid */ +#define REG_ASSERT 15 /* "can't happen" -- you found a bug */ +#define REG_INVARG 16 /* invalid argument to regex function */ +#define REG_MIXED 17 /* character widths of regex and string differ */ +#define REG_BADOPT 18 /* invalid embedded option */ +/* two specials for debugging and testing */ +#define REG_ATOI 101 /* convert error-code name to number */ +#define REG_ITOA 102 /* convert error-code number to name */ + + + +/* + * the prototypes, as possibly munched by regfwd + */ +/* =====^!^===== begin forwards =====^!^===== */ +/* automatically gathered by fwd; do not hand-edit */ +/* === regproto.h === */ +#ifndef __REG_NOCHAR +int re_comp _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, int)); +#endif +#ifndef __REG_NOFRONT +int regcomp _ANSI_ARGS_((regex_t *, __REG_CONST char *, int)); +#endif +#ifdef __REG_WIDE_T +int __REG_WIDE_COMPILE _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int)); +#endif +#ifndef __REG_NOCHAR +int re_exec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, rm_detail_t *, size_t, regmatch_t [], int)); +#endif +#ifndef __REG_NOFRONT +int regexec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, regmatch_t [], int)); +#endif +#ifdef __REG_WIDE_T +int __REG_WIDE_EXEC _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, rm_detail_t *, size_t, regmatch_t [], int)); +#endif +re_void regfree _ANSI_ARGS_((regex_t *)); +extern size_t regerror _ANSI_ARGS_((int, __REG_CONST regex_t *, char *, size_t)); +/* automatically gathered by fwd; do not hand-edit */ +/* =====^!^===== end forwards =====^!^===== */ + + + +/* + * more C++ voodoo + */ #ifdef __cplusplus } #endif -/* ========= end header generated by ./mkh ========= */ + + + #endif diff --git a/src/regex/regguts.h b/src/regex/regguts.h index aa12dbf445..36e5092367 100644 --- a/src/regex/regguts.h +++ b/src/regex/regguts.h @@ -1,21 +1,21 @@ /* * Internal interface definitions, etc., for the reg package * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * * Development of this software was funded, in part, by Cray Research Inc., * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * + * thanks all of them. + * * Redistribution and use in source and binary forms -- with or without * modification -- are permitted for any purpose, provided that * redistributions in source form retain this entire copyright notice and * indicate the origin and nature of any modifications. - * + * * I'd appreciate being given credit for this package in the documentation * of software which uses it, but that is not a requirement. - * + * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL @@ -26,8 +26,6 @@ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $Id$ */ @@ -45,38 +43,63 @@ * Things that regcustom.h might override. */ +/* standard header files (NULL is a reasonable indicator for them) */ +#ifndef NULL +#include +#include +#include +#include +#include +#endif + /* assertions */ #ifndef assert -#ifndef REG_DEBUG -# ifndef NDEBUG -# define NDEBUG /* no assertions */ +# ifndef REG_DEBUG +# define NDEBUG /* no assertions */ # endif -#endif #include #endif /* voids */ +#ifndef VOID +#define VOID void /* for function return values */ +#endif #ifndef DISCARD -#define DISCARD void /* for throwing values away */ +#define DISCARD VOID /* for throwing values away */ +#endif +#ifndef PVOID +#define PVOID VOID * /* generic pointer */ #endif #ifndef VS -#define VS(x) ((void *)(x)) /* cast something to generic ptr */ +#define VS(x) ((PVOID)(x)) /* cast something to generic ptr */ +#endif +#ifndef NOPARMS +#define NOPARMS VOID /* for empty parm lists */ +#endif + +/* const */ +#ifndef CONST +#define CONST const /* for old compilers, might be empty */ #endif /* function-pointer declarator */ #ifndef FUNCPTR -#define FUNCPTR(name, args) (*name) args +#if __STDC__ >= 1 +#define FUNCPTR(name, args) (*name)args +#else +#define FUNCPTR(name, args) (*name)() +#endif #endif /* memory allocation */ #ifndef MALLOC -#define MALLOC(n) malloc(n) +#define MALLOC(n) malloc(n) #endif #ifndef REALLOC -#define REALLOC(p, n) realloc(VS(p), n) +#define REALLOC(p, n) realloc(VS(p), n) #endif #ifndef FREE -#define FREE(p) free(VS(p)) +#define FREE(p) free(VS(p)) #endif /* want size of a char in bits, and max value in bounded quantifiers */ @@ -84,7 +107,7 @@ #include #endif #ifndef _POSIX2_RE_DUP_MAX -#define _POSIX2_RE_DUP_MAX 255 /* normally from */ +#define _POSIX2_RE_DUP_MAX 255 /* normally from */ #endif @@ -93,13 +116,13 @@ * misc */ -#define NOTREACHED 0 -#define xxx 1 +#define NOTREACHED 0 +#define xxx 1 -#define DUPMAX _POSIX2_RE_DUP_MAX -#define INFINITY (DUPMAX+1) +#define DUPMAX _POSIX2_RE_DUP_MAX +#define INFINITY (DUPMAX+1) -#define REMAGIC 0xfed7 /* magic number for main struct */ +#define REMAGIC 0xfed7 /* magic number for main struct */ @@ -108,12 +131,12 @@ */ #ifdef REG_DEBUG /* FDEBUG does finite-state tracing */ -#define FDEBUG(arglist) { if (v->eflags®_FTRACE) printf arglist; } +#define FDEBUG(arglist) { if (v->eflags®_FTRACE) printf arglist; } /* MDEBUG does higher-level tracing */ -#define MDEBUG(arglist) { if (v->eflags®_MTRACE) printf arglist; } +#define MDEBUG(arglist) { if (v->eflags®_MTRACE) printf arglist; } #else -#define FDEBUG(arglist) {} -#define MDEBUG(arglist) {} +#define FDEBUG(arglist) {} +#define MDEBUG(arglist) {} #endif @@ -121,25 +144,24 @@ /* * bitmap manipulation */ -#define UBITS (CHAR_BIT * sizeof(unsigned)) -#define BSET(uv, sn) ((uv)[(sn)/UBITS] |= (unsigned)1 << ((sn)%UBITS)) -#define ISBSET(uv, sn) ((uv)[(sn)/UBITS] & ((unsigned)1 << ((sn)%UBITS))) +#define UBITS (CHAR_BIT * sizeof(unsigned)) +#define BSET(uv, sn) ((uv)[(sn)/UBITS] |= (unsigned)1 << ((sn)%UBITS)) +#define ISBSET(uv, sn) ((uv)[(sn)/UBITS] & ((unsigned)1 << ((sn)%UBITS))) /* - * We dissect a chr into byts for colormap table indexing. Here we define - * a byt, which will be the same as a byte on most machines... The exact + * We dissect a chr into byts for colormap table indexing. Here we define + * a byt, which will be the same as a byte on most machines... The exact * size of a byt is not critical, but about 8 bits is good, and extraction * of 8-bit chunks is sometimes especially fast. */ #ifndef BYTBITS -#define BYTBITS 8 /* bits in a byt */ +#define BYTBITS 8 /* bits in a byt */ #endif -#define BYTTAB (1<flags&FREECOL) - union tree *block; /* block of solid color, if any */ +struct colordesc { + uchr nchrs; /* number of chars of this color */ + color sub; /* open subcolor (if any); free chain ptr */ +# define NOSUB COLORLESS + struct arc *arcs; /* color chain */ + int flags; +# define FREECOL 01 /* currently free */ +# define PSEUDO 02 /* pseudocolor, no real chars */ +# define UNUSEDCOLOR(cd) ((cd)->flags&FREECOL) + union tree *block; /* block of solid color, if any */ }; /* the color map itself */ -struct colormap -{ - int magic; -#define CMMAGIC 0x876 - struct vars *v; /* for compile error reporting */ - size_t ncds; /* number of colordescs */ - size_t max; /* highest in use */ - color free; /* beginning of free chain (if non-0) */ +struct colormap { + int magic; +# define CMMAGIC 0x876 + struct vars *v; /* for compile error reporting */ + size_t ncds; /* number of colordescs */ + size_t max; /* highest in use */ + color free; /* beginning of free chain (if non-0) */ struct colordesc *cd; -#define CDEND(cm) (&(cm)->cd[(cm)->max + 1]) -#define NINLINECDS ((size_t)10) +# define CDEND(cm) (&(cm)->cd[(cm)->max + 1]) +# define NINLINECDS ((size_t)10) struct colordesc cdspace[NINLINECDS]; - union tree tree[NBYTS]; /* tree top, plus fill blocks */ + union tree tree[NBYTS]; /* tree top, plus fill blocks */ }; /* optimization magic to do fast chr->color mapping */ -#define B0(c) ((c) & BYTMASK) -#define B1(c) (((c)>>BYTBITS) & BYTMASK) -#define B2(c) (((c)>>(2*BYTBITS)) & BYTMASK) -#define B3(c) (((c)>>(3*BYTBITS)) & BYTMASK) +#define B0(c) ((c) & BYTMASK) +#define B1(c) (((c)>>BYTBITS) & BYTMASK) +#define B2(c) (((c)>>(2*BYTBITS)) & BYTMASK) +#define B3(c) (((c)>>(3*BYTBITS)) & BYTMASK) #if NBYTS == 1 -#define GETCOLOR(cm, c) ((cm)->tree->tcolor[B0(c)]) +#define GETCOLOR(cm, c) ((cm)->tree->tcolor[B0(c)]) #endif /* beware, for NBYTS>1, GETCOLOR() is unsafe -- 2nd arg used repeatedly */ #if NBYTS == 2 -#define GETCOLOR(cm, c) ((cm)->tree->tptr[B1(c)]->tcolor[B0(c)]) +#define GETCOLOR(cm, c) ((cm)->tree->tptr[B1(c)]->tcolor[B0(c)]) #endif #if NBYTS == 4 -#define GETCOLOR(cm, c) ((cm)->tree->tptr[B3(c)]->tptr[B2(c)]->tptr[B1(c)]->tcolor[B0(c)]) +#define GETCOLOR(cm, c) ((cm)->tree->tptr[B3(c)]->tptr[B2(c)]->tptr[B1(c)]->tcolor[B0(c)]) #endif @@ -236,23 +251,22 @@ struct colormap * Interface definitions for locale-interface functions in locale.c. * Multi-character collating elements (MCCEs) cause most of the trouble. */ -struct cvec -{ - int nchrs; /* number of chrs */ - int chrspace; /* number of chrs possible */ - chr *chrs; /* pointer to vector of chrs */ - int nranges; /* number of ranges (chr pairs) */ - int rangespace; /* number of chrs possible */ - chr *ranges; /* pointer to vector of chr pairs */ - int nmcces; /* number of MCCEs */ - int mccespace; /* number of MCCEs possible */ - int nmccechrs; /* number of chrs used for MCCEs */ - chr *mcces[1]; /* pointers to 0-terminated MCCEs */ - /* and both batches of chrs are on the end */ +struct cvec { + int nchrs; /* number of chrs */ + int chrspace; /* number of chrs possible */ + chr *chrs; /* pointer to vector of chrs */ + int nranges; /* number of ranges (chr pairs) */ + int rangespace; /* number of chrs possible */ + chr *ranges; /* pointer to vector of chr pairs */ + int nmcces; /* number of MCCEs */ + int mccespace; /* number of MCCEs possible */ + int nmccechrs; /* number of chrs used for MCCEs */ + chr *mcces[1]; /* pointers to 0-terminated MCCEs */ + /* and both batches of chrs are on the end */ }; /* caution: this value cannot be changed easily */ -#define MAXMCCE 2 /* length of longest MCCE */ +#define MAXMCCE 2 /* length of longest MCCE */ @@ -264,59 +278,54 @@ struct cvec */ struct state; -struct arc -{ - int type; -#define ARCFREE '\0' - color co; - struct state *from; /* where it's from (and contained within) */ - struct state *to; /* where it's to */ - struct arc *outchain; /* *from's outs chain or free chain */ -#define freechain outchain - struct arc *inchain; /* *to's ins chain */ - struct arc *colorchain; /* color's arc chain */ +struct arc { + int type; +# define ARCFREE '\0' + color co; + struct state *from; /* where it's from (and contained within) */ + struct state *to; /* where it's to */ + struct arc *outchain; /* *from's outs chain or free chain */ +# define freechain outchain + struct arc *inchain; /* *to's ins chain */ + struct arc *colorchain; /* color's arc chain */ }; -struct arcbatch -{ /* for bulk allocation of arcs */ +struct arcbatch { /* for bulk allocation of arcs */ struct arcbatch *next; -#define ABSIZE 10 - struct arc a[ABSIZE]; +# define ABSIZE 10 + struct arc a[ABSIZE]; }; -struct state -{ - int no; -#define FREESTATE (-1) - char flag; /* marks special states */ - int nins; /* number of inarcs */ - struct arc *ins; /* chain of inarcs */ - int nouts; /* number of outarcs */ - struct arc *outs; /* chain of outarcs */ - struct arc *free; /* chain of free arcs */ - struct state *tmp; /* temporary for traversal algorithms */ - struct state *next; /* chain for traversing all */ - struct state *prev; /* back chain */ - struct arcbatch oas; /* first arcbatch, avoid malloc in easy - * case */ - int noas; /* number of arcs used in first arcbatch */ +struct state { + int no; +# define FREESTATE (-1) + char flag; /* marks special states */ + int nins; /* number of inarcs */ + struct arc *ins; /* chain of inarcs */ + int nouts; /* number of outarcs */ + struct arc *outs; /* chain of outarcs */ + struct arc *free; /* chain of free arcs */ + struct state *tmp; /* temporary for traversal algorithms */ + struct state *next; /* chain for traversing all */ + struct state *prev; /* back chain */ + struct arcbatch oas; /* first arcbatch, avoid malloc in easy case */ + int noas; /* number of arcs used in first arcbatch */ }; -struct nfa -{ - struct state *pre; /* pre-initial state */ - struct state *init; /* initial state */ - struct state *final; /* final state */ - struct state *post; /* post-final state */ - int nstates; /* for numbering states */ - struct state *states; /* state-chain header */ - struct state *slast; /* tail of the chain */ - struct state *free; /* free list */ - struct colormap *cm; /* the color map */ - color bos[2]; /* colors, if any, assigned to BOS and BOL */ - color eos[2]; /* colors, if any, assigned to EOS and EOL */ - struct vars *v; /* simplifies compile error reporting */ - struct nfa *parent; /* parent NFA, if any */ +struct nfa { + struct state *pre; /* pre-initial state */ + struct state *init; /* initial state */ + struct state *final; /* final state */ + struct state *post; /* post-final state */ + int nstates; /* for numbering states */ + struct state *states; /* state-chain header */ + struct state *slast; /* tail of the chain */ + struct state *free; /* free list */ + struct colormap *cm; /* the color map */ + color bos[2]; /* colors, if any, assigned to BOS and BOL */ + color eos[2]; /* colors, if any, assigned to EOS and EOL */ + struct vars *v; /* simplifies compile error reporting */ + struct nfa *parent; /* parent NFA, if any */ }; @@ -324,64 +333,58 @@ struct nfa /* * definitions for compacted NFA */ -struct carc -{ - color co; /* COLORLESS is list terminator */ - int to; /* state number */ +struct carc { + color co; /* COLORLESS is list terminator */ + int to; /* state number */ }; -struct cnfa -{ - int nstates; /* number of states */ - int ncolors; /* number of colors */ - int flags; -#define HASLACONS 01 /* uses lookahead constraints */ - int pre; /* setup state number */ - int post; /* teardown state number */ - color bos[2]; /* colors, if any, assigned to BOS and BOL */ - color eos[2]; /* colors, if any, assigned to EOS and EOL */ - struct carc **states; /* vector of pointers to outarc lists */ - struct carc *arcs; /* the area for the lists */ +struct cnfa { + int nstates; /* number of states */ + int ncolors; /* number of colors */ + int flags; +# define HASLACONS 01 /* uses lookahead constraints */ + int pre; /* setup state number */ + int post; /* teardown state number */ + color bos[2]; /* colors, if any, assigned to BOS and BOL */ + color eos[2]; /* colors, if any, assigned to EOS and EOL */ + struct carc **states; /* vector of pointers to outarc lists */ + struct carc *arcs; /* the area for the lists */ }; - -#define ZAPCNFA(cnfa) ((cnfa).nstates = 0) -#define NULLCNFA(cnfa) ((cnfa).nstates == 0) +#define ZAPCNFA(cnfa) ((cnfa).nstates = 0) +#define NULLCNFA(cnfa) ((cnfa).nstates == 0) /* * subexpression tree */ -struct subre -{ - char op; /* '|', '.' (concat), 'b' (backref), '(', - * '=' */ - char flags; -#define LONGER 01 /* prefers longer match */ -#define SHORTER 02 /* prefers shorter match */ -#define MIXED 04 /* mixed preference below */ -#define CAP 010 /* capturing parens below */ -#define BACKR 020 /* back reference below */ -#define INUSE 0100 /* in use in final tree */ -#define LOCAL 03 /* bits which may not propagate up */ -#define LMIX(f) ((f)<<2) /* LONGER -> MIXED */ -#define SMIX(f) ((f)<<1) /* SHORTER -> MIXED */ -#define UP(f) (((f)&~LOCAL) | (LMIX(f) & SMIX(f) & MIXED)) -#define MESSY(f) ((f)&(MIXED|CAP|BACKR)) -#define PREF(f) ((f)&LOCAL) -#define PREF2(f1, f2) ((PREF(f1) != 0) ? PREF(f1) : PREF(f2)) -#define COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2)) - short retry; /* index into retry memory */ - int subno; /* subexpression number (for 'b' and '(') */ - short min; /* min repetitions, for backref only */ - short max; /* max repetitions, for backref only */ - struct subre *left; /* left child, if any (also freelist - * chain) */ - struct subre *right; /* right child, if any */ - struct state *begin; /* outarcs from here... */ - struct state *end; /* ...ending in inarcs here */ - struct cnfa cnfa; /* compacted NFA, if any */ - struct subre *chain; /* for bookkeeping and error cleanup */ +struct subre { + char op; /* '|', '.' (concat), 'b' (backref), '(', '=' */ + char flags; +# define LONGER 01 /* prefers longer match */ +# define SHORTER 02 /* prefers shorter match */ +# define MIXED 04 /* mixed preference below */ +# define CAP 010 /* capturing parens below */ +# define BACKR 020 /* back reference below */ +# define INUSE 0100 /* in use in final tree */ +# define LOCAL 03 /* bits which may not propagate up */ +# define LMIX(f) ((f)<<2) /* LONGER -> MIXED */ +# define SMIX(f) ((f)<<1) /* SHORTER -> MIXED */ +# define UP(f) (((f)&~LOCAL) | (LMIX(f) & SMIX(f) & MIXED)) +# define MESSY(f) ((f)&(MIXED|CAP|BACKR)) +# define PREF(f) ((f)&LOCAL) +# define PREF2(f1, f2) ((PREF(f1) != 0) ? PREF(f1) : PREF(f2)) +# define COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2)) + short retry; /* index into retry memory */ + int subno; /* subexpression number (for 'b' and '(') */ + short min; /* min repetitions, for backref only */ + short max; /* max repetitions, for backref only */ + struct subre *left; /* left child, if any (also freelist chain) */ + struct subre *right; /* right child, if any */ + struct state *begin; /* outarcs from here... */ + struct state *end; /* ...ending in inarcs here */ + struct cnfa cnfa; /* compacted NFA, if any */ + struct subre *chain; /* for bookkeeping and error cleanup */ }; @@ -390,9 +393,8 @@ struct subre * table of function pointers for generic manipulation functions * A regex_t's re_fns points to one of these. */ -struct fns -{ - void FUNCPTR(free, (regex_t *)); +struct fns { + VOID FUNCPTR(free, (regex_t *)); }; @@ -400,18 +402,17 @@ struct fns /* * the insides of a regex_t, hidden behind a void * */ -struct guts -{ - int magic; -#define GUTSMAGIC 0xfed9 - int cflags; /* copy of compile flags */ - long info; /* copy of re_info */ - size_t nsub; /* copy of re_nsub */ +struct guts { + int magic; +# define GUTSMAGIC 0xfed9 + int cflags; /* copy of compile flags */ + long info; /* copy of re_info */ + size_t nsub; /* copy of re_nsub */ struct subre *tree; - struct cnfa search; /* for fast preliminary search */ - int ntree; + struct cnfa search; /* for fast preliminary search */ + int ntree; struct colormap cmap; - int FUNCPTR(compare, (const chr *, const chr *, size_t)); - struct subre *lacons; /* lookahead-constraint vector */ - int nlacons; /* size of lacons */ + int FUNCPTR(compare, (CONST chr *, CONST chr *, size_t)); + struct subre *lacons; /* lookahead-constraint vector */ + int nlacons; /* size of lacons */ };