Import regex from tcl 8.4.5

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/branches/RXSPENCER@3951 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Ryan Norton
1999-10-13 02:22:18 +00:00
parent 9bd536df18
commit a6c3a78d25
4 changed files with 1303 additions and 1177 deletions

View File

@@ -28,8 +28,6 @@
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* *
* $Header$
*
*/ */
/* scanning macros (know about v) */ /* scanning macros (know about v) */
@@ -64,27 +62,24 @@
#define ENDOF(array) ((array) + sizeof(array)/sizeof(chr)) #define ENDOF(array) ((array) + sizeof(array)/sizeof(chr))
/* /*
* lexstart - set up lexical stuff, scan leading options - lexstart - set up lexical stuff, scan leading options
^ static VOID lexstart(struct vars *);
*/ */
static void static VOID
lexstart(struct vars * v) lexstart(v)
struct vars *v;
{ {
prefixes(v); /* may turn on new type bits etc. */ prefixes(v); /* may turn on new type bits etc. */
NOERR(); NOERR();
if (v->cflags & REG_QUOTE) if (v->cflags&REG_QUOTE) {
{ assert(!(v->cflags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE)));
assert(!(v->cflags & (REG_ADVANCED | REG_EXPANDED | REG_NEWLINE)));
INTOCON(L_Q); INTOCON(L_Q);
} } else if (v->cflags&REG_EXTENDED) {
else if (v->cflags & REG_EXTENDED) assert(!(v->cflags&REG_QUOTE));
{
assert(!(v->cflags & REG_QUOTE));
INTOCON(L_ERE); INTOCON(L_ERE);
} } else {
else assert(!(v->cflags&(REG_QUOTE|REG_ADVF)));
{
assert(!(v->cflags & (REG_QUOTE | REG_ADVF)));
INTOCON(L_BRE); INTOCON(L_BRE);
} }
@@ -93,19 +88,20 @@ lexstart(struct vars * v)
} }
/* /*
* prefixes - implement various special prefixes - prefixes - implement various special prefixes
^ static VOID prefixes(struct vars *);
*/ */
static void static VOID
prefixes(struct vars * v) prefixes(v)
struct vars *v;
{ {
/* literal string doesn't get any of this stuff */ /* literal string doesn't get any of this stuff */
if (v->cflags & REG_QUOTE) if (v->cflags&REG_QUOTE)
return; return;
/* initial "***" gets special things */ /* initial "***" gets special things */
if (HAVE(4) && NEXT3('*', '*', '*')) if (HAVE(4) && NEXT3('*', '*', '*'))
switch (*(v->now + 3)) switch (*(v->now + 3)) {
{
case CHR('?'): /* "***?" error, msg shows version */ case CHR('?'): /* "***?" error, msg shows version */
ERR(REG_BADPAT); ERR(REG_BADPAT);
return; /* proceed no further */ return; /* proceed no further */
@@ -113,7 +109,7 @@ prefixes(struct vars * v)
case CHR('='): /* "***=" shifts to literal string */ case CHR('='): /* "***=" shifts to literal string */
NOTE(REG_UNONPOSIX); NOTE(REG_UNONPOSIX);
v->cflags |= REG_QUOTE; v->cflags |= REG_QUOTE;
v->cflags &= ~(REG_ADVANCED | REG_EXPANDED | REG_NEWLINE); v->cflags &= ~(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE);
v->now += 4; v->now += 4;
return; /* and there can be no more prefixes */ return; /* and there can be no more prefixes */
break; break;
@@ -129,26 +125,24 @@ prefixes(struct vars * v)
} }
/* BREs and EREs don't get embedded options */ /* BREs and EREs don't get embedded options */
if ((v->cflags & REG_ADVANCED) != REG_ADVANCED) if ((v->cflags&REG_ADVANCED) != REG_ADVANCED)
return; return;
/* embedded options (AREs only) */ /* embedded options (AREs only) */
if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2))) if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2))) {
{
NOTE(REG_UNONPOSIX); NOTE(REG_UNONPOSIX);
v->now += 2; v->now += 2;
for (; !ATEOS() && iscalpha(*v->now); v->now++) for (; !ATEOS() && iscalpha(*v->now); v->now++)
switch (*v->now) switch (*v->now) {
{
case CHR('b'): /* BREs (but why???) */ case CHR('b'): /* BREs (but why???) */
v->cflags &= ~(REG_ADVANCED | REG_QUOTE); v->cflags &= ~(REG_ADVANCED|REG_QUOTE);
break; break;
case CHR('c'): /* case sensitive */ case CHR('c'): /* case sensitive */
v->cflags &= ~REG_ICASE; v->cflags &= ~REG_ICASE;
break; break;
case CHR('e'): /* plain EREs */ case CHR('e'): /* plain EREs */
v->cflags |= REG_EXTENDED; v->cflags |= REG_EXTENDED;
v->cflags &= ~(REG_ADVF | REG_QUOTE); v->cflags &= ~(REG_ADVF|REG_QUOTE);
break; break;
case CHR('i'): /* case insensitive */ case CHR('i'): /* case insensitive */
v->cflags |= REG_ICASE; v->cflags |= REG_ICASE;
@@ -182,27 +176,27 @@ prefixes(struct vars * v)
ERR(REG_BADOPT); ERR(REG_BADOPT);
return; return;
} }
if (!NEXT1(')')) if (!NEXT1(')')) {
{
ERR(REG_BADOPT); ERR(REG_BADOPT);
return; return;
} }
v->now++; v->now++;
if (v->cflags & REG_QUOTE) if (v->cflags&REG_QUOTE)
v->cflags &= ~(REG_EXPANDED | REG_NEWLINE); v->cflags &= ~(REG_EXPANDED|REG_NEWLINE);
} }
} }
/* /*
* lexnest - "call a subroutine", interpolating string at the lexical level - lexnest - "call a subroutine", interpolating string at the lexical level
*
* Note, this is not a very general facility. There are a number of * Note, this is not a very general facility. There are a number of
* implicit assumptions about what sorts of strings can be subroutines. * implicit assumptions about what sorts of strings can be subroutines.
^ static VOID lexnest(struct vars *, chr *, chr *);
*/ */
static void static VOID
lexnest(struct vars * v, lexnest(v, beginp, endp)
chr *beginp, /* start of interpolation */ struct vars *v;
chr *endp) /* one past end of interpolation */ chr *beginp; /* start of interpolation */
chr *endp; /* one past end of interpolation */
{ {
assert(v->savenow == NULL); /* only one level of nesting */ assert(v->savenow == NULL); /* only one level of nesting */
v->savenow = v->now; v->savenow = v->now;
@@ -261,20 +255,24 @@ static chr brbackw[] = { /* \w within brackets */
}; };
/* /*
* lexword - interpolate a bracket expression for word characters - lexword - interpolate a bracket expression for word characters
* Possibly ought to inquire whether there is a "word" character class. * Possibly ought to inquire whether there is a "word" character class.
^ static VOID lexword(struct vars *);
*/ */
static void static VOID
lexword(struct vars * v) lexword(v)
struct vars *v;
{ {
lexnest(v, backw, ENDOF(backw)); lexnest(v, backw, ENDOF(backw));
} }
/* /*
* next - get next token - next - get next token
^ static int next(struct vars *);
*/ */
static int /* 1 normal, 0 failure */ static int /* 1 normal, 0 failure */
next(struct vars * v) next(v)
struct vars *v;
{ {
chr c; chr c;
@@ -286,24 +284,21 @@ next(struct vars * v)
v->lasttype = v->nexttype; v->lasttype = v->nexttype;
/* REG_BOSONLY */ /* REG_BOSONLY */
if (v->nexttype == EMPTY && (v->cflags & REG_BOSONLY)) if (v->nexttype == EMPTY && (v->cflags&REG_BOSONLY)) {
{
/* at start of a REG_BOSONLY RE */ /* at start of a REG_BOSONLY RE */
RETV(SBEGIN, 0); /* same as \A */ RETV(SBEGIN, 0); /* same as \A */
} }
/* if we're nested and we've hit end, return to outer level */ /* if we're nested and we've hit end, return to outer level */
if (v->savenow != NULL && ATEOS()) if (v->savenow != NULL && ATEOS()) {
{
v->now = v->savenow; v->now = v->savenow;
v->stop = v->savestop; v->stop = v->savestop;
v->savenow = v->savestop = NULL; v->savenow = v->savestop = NULL;
} }
/* skip white space etc. if appropriate (not in literal or []) */ /* skip white space etc. if appropriate (not in literal or []) */
if (v->cflags & REG_EXPANDED) if (v->cflags&REG_EXPANDED)
switch (v->lexcon) switch (v->lexcon) {
{
case L_ERE: case L_ERE:
case L_BRE: case L_BRE:
case L_EBND: case L_EBND:
@@ -313,10 +308,8 @@ next(struct vars * v)
} }
/* handle EOS, depending on context */ /* handle EOS, depending on context */
if (ATEOS()) if (ATEOS()) {
{ switch (v->lexcon) {
switch (v->lexcon)
{
case L_ERE: case L_ERE:
case L_BRE: case L_BRE:
case L_Q: case L_Q:
@@ -340,8 +333,7 @@ next(struct vars * v)
c = *v->now++; c = *v->now++;
/* deal with the easy contexts, punt EREs to code below */ /* deal with the easy contexts, punt EREs to code below */
switch (v->lexcon) switch (v->lexcon) {
{
case L_BRE: /* punt BREs to separate function */ case L_BRE: /* punt BREs to separate function */
return brenext(v, c); return brenext(v, c);
break; break;
@@ -352,46 +344,33 @@ next(struct vars * v)
break; break;
case L_BBND: /* bounds are fairly simple */ case L_BBND: /* bounds are fairly simple */
case L_EBND: case L_EBND:
switch (c) switch (c) {
{ case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'):
case CHR('0'): case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'):
case CHR('1'): case CHR('8'): case CHR('9'):
case CHR('2'): RETV(DIGIT, (chr)DIGITVAL(c));
case CHR('3'):
case CHR('4'):
case CHR('5'):
case CHR('6'):
case CHR('7'):
case CHR('8'):
case CHR('9'):
RETV(DIGIT, (chr) DIGITVAL(c));
break; break;
case CHR(','): case CHR(','):
RET(','); RET(',');
break; break;
case CHR('}'): /* ERE bound ends with } */ case CHR('}'): /* ERE bound ends with } */
if (INCON(L_EBND)) if (INCON(L_EBND)) {
{
INTOCON(L_ERE); INTOCON(L_ERE);
if ((v->cflags & REG_ADVF) && NEXT1('?')) if ((v->cflags&REG_ADVF) && NEXT1('?')) {
{
v->now++; v->now++;
NOTE(REG_UNONPOSIX); NOTE(REG_UNONPOSIX);
RETV('}', 0); RETV('}', 0);
} }
RETV('}', 1); RETV('}', 1);
} } else
else
FAILW(REG_BADBR); FAILW(REG_BADBR);
break; break;
case CHR('\\'): /* BRE bound ends with \} */ case CHR('\\'): /* BRE bound ends with \} */
if (INCON(L_BBND) && NEXT1('}')) if (INCON(L_BBND) && NEXT1('}')) {
{
v->now++; v->now++;
INTOCON(L_BRE); INTOCON(L_BRE);
RET('}'); RET('}');
} } else
else
FAILW(REG_BADBR); FAILW(REG_BADBR);
break; break;
default: default:
@@ -401,34 +380,30 @@ next(struct vars * v)
assert(NOTREACHED); assert(NOTREACHED);
break; break;
case L_BRACK: /* brackets are not too hard */ case L_BRACK: /* brackets are not too hard */
switch (c) switch (c) {
{
case CHR(']'): case CHR(']'):
if (LASTTYPE('[')) if (LASTTYPE('['))
RETV(PLAIN, c); RETV(PLAIN, c);
else else {
{ INTOCON((v->cflags&REG_EXTENDED) ?
INTOCON((v->cflags & REG_EXTENDED) ?
L_ERE : L_BRE); L_ERE : L_BRE);
RET(']'); RET(']');
} }
break; break;
case CHR('\\'): case CHR('\\'):
NOTE(REG_UBBS); NOTE(REG_UBBS);
if (!(v->cflags & REG_ADVF)) if (!(v->cflags&REG_ADVF))
RETV(PLAIN, c); RETV(PLAIN, c);
NOTE(REG_UNONPOSIX); NOTE(REG_UNONPOSIX);
if (ATEOS()) if (ATEOS())
FAILW(REG_EESCAPE); FAILW(REG_EESCAPE);
(DISCARD) lexescape(v); (DISCARD)lexescape(v);
switch (v->nexttype) switch (v->nexttype) { /* not all escapes okay here */
{ /* not all escapes okay here */
case PLAIN: case PLAIN:
return 1; return 1;
break; break;
case CCLASS: case CCLASS:
switch (v->nextvalue) switch (v->nextvalue) {
{
case 'd': case 'd':
lexnest(v, brbackd, ENDOF(brbackd)); lexnest(v, brbackd, ENDOF(brbackd));
break; break;
@@ -459,8 +434,7 @@ next(struct vars * v)
case CHR('['): case CHR('['):
if (ATEOS()) if (ATEOS())
FAILW(REG_EBRACK); FAILW(REG_EBRACK);
switch (*v->now++) switch (*v->now++) {
{
case CHR('.'): case CHR('.'):
INTOCON(L_CEL); INTOCON(L_CEL);
/* might or might not be locale-specific */ /* might or might not be locale-specific */
@@ -490,33 +464,27 @@ next(struct vars * v)
assert(NOTREACHED); assert(NOTREACHED);
break; break;
case L_CEL: /* collating elements are easy */ case L_CEL: /* collating elements are easy */
if (c == CHR('.') && NEXT1(']')) if (c == CHR('.') && NEXT1(']')) {
{
v->now++; v->now++;
INTOCON(L_BRACK); INTOCON(L_BRACK);
RETV(END, '.'); RETV(END, '.');
} } else
else
RETV(PLAIN, c); RETV(PLAIN, c);
break; break;
case L_ECL: /* ditto equivalence classes */ case L_ECL: /* ditto equivalence classes */
if (c == CHR('=') && NEXT1(']')) if (c == CHR('=') && NEXT1(']')) {
{
v->now++; v->now++;
INTOCON(L_BRACK); INTOCON(L_BRACK);
RETV(END, '='); RETV(END, '=');
} } else
else
RETV(PLAIN, c); RETV(PLAIN, c);
break; break;
case L_CCL: /* ditto character classes */ case L_CCL: /* ditto character classes */
if (c == CHR(':') && NEXT1(']')) if (c == CHR(':') && NEXT1(']')) {
{
v->now++; v->now++;
INTOCON(L_BRACK); INTOCON(L_BRACK);
RETV(END, ':'); RETV(END, ':');
} } else
else
RETV(PLAIN, c); RETV(PLAIN, c);
break; break;
default: default:
@@ -528,14 +496,12 @@ next(struct vars * v)
assert(INCON(L_ERE)); assert(INCON(L_ERE));
/* deal with EREs and AREs, except for backslashes */ /* deal with EREs and AREs, except for backslashes */
switch (c) switch (c) {
{
case CHR('|'): case CHR('|'):
RET('|'); RET('|');
break; break;
case CHR('*'): case CHR('*'):
if ((v->cflags & REG_ADVF) && NEXT1('?')) if ((v->cflags&REG_ADVF) && NEXT1('?')) {
{
v->now++; v->now++;
NOTE(REG_UNONPOSIX); NOTE(REG_UNONPOSIX);
RETV('*', 0); RETV('*', 0);
@@ -543,8 +509,7 @@ next(struct vars * v)
RETV('*', 1); RETV('*', 1);
break; break;
case CHR('+'): case CHR('+'):
if ((v->cflags & REG_ADVF) && NEXT1('?')) if ((v->cflags&REG_ADVF) && NEXT1('?')) {
{
v->now++; v->now++;
NOTE(REG_UNONPOSIX); NOTE(REG_UNONPOSIX);
RETV('+', 0); RETV('+', 0);
@@ -552,8 +517,7 @@ next(struct vars * v)
RETV('+', 1); RETV('+', 1);
break; break;
case CHR('?'): case CHR('?'):
if ((v->cflags & REG_ADVF) && NEXT1('?')) if ((v->cflags&REG_ADVF) && NEXT1('?')) {
{
v->now++; v->now++;
NOTE(REG_UNONPOSIX); NOTE(REG_UNONPOSIX);
RETV('?', 0); RETV('?', 0);
@@ -561,16 +525,13 @@ next(struct vars * v)
RETV('?', 1); RETV('?', 1);
break; break;
case CHR('{'): /* bounds start or plain character */ case CHR('{'): /* bounds start or plain character */
if (v->cflags & REG_EXPANDED) if (v->cflags&REG_EXPANDED)
skip(v); skip(v);
if (ATEOS() || !iscdigit(*v->now)) if (ATEOS() || !iscdigit(*v->now)) {
{
NOTE(REG_UBRACES); NOTE(REG_UBRACES);
NOTE(REG_UUNSPEC); NOTE(REG_UUNSPEC);
RETV(PLAIN, c); RETV(PLAIN, c);
} } else {
else
{
NOTE(REG_UBOUNDS); NOTE(REG_UBOUNDS);
INTOCON(L_EBND); INTOCON(L_EBND);
RET('{'); RET('{');
@@ -578,12 +539,10 @@ next(struct vars * v)
assert(NOTREACHED); assert(NOTREACHED);
break; break;
case CHR('('): /* parenthesis, or advanced extension */ case CHR('('): /* parenthesis, or advanced extension */
if ((v->cflags & REG_ADVF) && NEXT1('?')) if ((v->cflags&REG_ADVF) && NEXT1('?')) {
{
NOTE(REG_UNONPOSIX); NOTE(REG_UNONPOSIX);
v->now++; v->now++;
switch (*v->now++) switch (*v->now++) {
{
case CHR(':'): /* non-capturing paren */ case CHR(':'): /* non-capturing paren */
RETV('(', 0); RETV('(', 0);
break; break;
@@ -609,33 +568,32 @@ next(struct vars * v)
} }
assert(NOTREACHED); assert(NOTREACHED);
} }
if (v->cflags & REG_NOSUB) if (v->cflags&REG_NOSUB)
RETV('(', 0); /* all parens non-capturing */ RETV('(', 0); /* all parens non-capturing */
else else
RETV('(', 1); RETV('(', 1);
break; break;
case CHR(')'): case CHR(')'):
if (LASTTYPE('(')) if (LASTTYPE('(')) {
NOTE(REG_UUNSPEC); NOTE(REG_UUNSPEC);
}
RETV(')', c); RETV(')', c);
break; break;
case CHR('['): /* easy except for [[:<:]] and [[:>:]] */ case CHR('['): /* easy except for [[:<:]] and [[:>:]] */
if (HAVE(6) && *(v->now + 0) == CHR('[') && if (HAVE(6) && *(v->now+0) == CHR('[') &&
*(v->now + 1) == CHR(':') && *(v->now+1) == CHR(':') &&
(*(v->now + 2) == CHR('<') || (*(v->now+2) == CHR('<') ||
*(v->now + 2) == CHR('>')) && *(v->now+2) == CHR('>')) &&
*(v->now + 3) == CHR(':') && *(v->now+3) == CHR(':') &&
*(v->now + 4) == CHR(']') && *(v->now+4) == CHR(']') &&
*(v->now + 5) == CHR(']')) *(v->now+5) == CHR(']')) {
{ c = *(v->now+2);
c = *(v->now + 2);
v->now += 6; v->now += 6;
NOTE(REG_UNONPOSIX); NOTE(REG_UNONPOSIX);
RET((c == CHR('<')) ? '<' : '>'); RET((c == CHR('<')) ? '<' : '>');
} }
INTOCON(L_BRACK); INTOCON(L_BRACK);
if (NEXT1('^')) if (NEXT1('^')) {
{
v->now++; v->now++;
RETV('[', 0); RETV('[', 0);
} }
@@ -661,40 +619,24 @@ next(struct vars * v)
/* ERE/ARE backslash handling; backslash already eaten */ /* ERE/ARE backslash handling; backslash already eaten */
assert(!ATEOS()); assert(!ATEOS());
if (!(v->cflags & REG_ADVF)) if (!(v->cflags&REG_ADVF)) { /* only AREs have non-trivial escapes */
{ /* only AREs have non-trivial escapes */ if (iscalnum(*v->now)) {
if (iscalnum(*v->now))
{
NOTE(REG_UBSALNUM); NOTE(REG_UBSALNUM);
NOTE(REG_UUNSPEC); NOTE(REG_UUNSPEC);
} }
RETV(PLAIN, *v->now++); RETV(PLAIN, *v->now++);
} }
(DISCARD) lexescape(v); (DISCARD)lexescape(v);
if (ISERR()) if (ISERR())
FAILW(REG_EESCAPE); FAILW(REG_EESCAPE);
if (v->nexttype == CCLASS) if (v->nexttype == CCLASS) { /* fudge at lexical level */
{ /* fudge at lexical level */ switch (v->nextvalue) {
switch (v->nextvalue) case 'd': lexnest(v, backd, ENDOF(backd)); break;
{ case 'D': lexnest(v, backD, ENDOF(backD)); break;
case 'd': case 's': lexnest(v, backs, ENDOF(backs)); break;
lexnest(v, backd, ENDOF(backd)); case 'S': lexnest(v, backS, ENDOF(backS)); break;
break; case 'w': lexnest(v, backw, ENDOF(backw)); break;
case 'D': case 'W': lexnest(v, backW, ENDOF(backW)); break;
lexnest(v, backD, ENDOF(backD));
break;
case 's':
lexnest(v, backs, ENDOF(backs));
break;
case 'S':
lexnest(v, backS, ENDOF(backS));
break;
case 'w':
lexnest(v, backw, ENDOF(backw));
break;
case 'W':
lexnest(v, backW, ENDOF(backW));
break;
default: default:
assert(NOTREACHED); assert(NOTREACHED);
FAILW(REG_ASSERT); FAILW(REG_ASSERT);
@@ -709,12 +651,13 @@ next(struct vars * v)
} }
/* /*
* lexescape - parse an ARE backslash escape (backslash already eaten) - lexescape - parse an ARE backslash escape (backslash already eaten)
* Note slightly nonstandard use of the CCLASS type code. * Note slightly nonstandard use of the CCLASS type code.
^ static int lexescape(struct vars *);
*/ */
static int /* not actually used, but convenient for static int /* not actually used, but convenient for RETV */
* RETV */ lexescape(v)
lexescape(struct vars * v) struct vars *v;
{ {
chr c; chr c;
static chr alert[] = { static chr alert[] = {
@@ -725,7 +668,7 @@ lexescape(struct vars * v)
}; };
chr *save; chr *save;
assert(v->cflags & REG_ADVF); assert(v->cflags&REG_ADVF);
assert(!ATEOS()); assert(!ATEOS());
c = *v->now++; c = *v->now++;
@@ -733,8 +676,7 @@ lexescape(struct vars * v)
RETV(PLAIN, c); RETV(PLAIN, c);
NOTE(REG_UNONPOSIX); NOTE(REG_UNONPOSIX);
switch (c) switch (c) {
{
case CHR('a'): case CHR('a'):
RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007'))); RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007')));
break; break;
@@ -751,7 +693,7 @@ lexescape(struct vars * v)
NOTE(REG_UUNPORT); NOTE(REG_UUNPORT);
if (ATEOS()) if (ATEOS())
FAILW(REG_EESCAPE); FAILW(REG_EESCAPE);
RETV(PLAIN, (chr) (*v->now++ & 037)); RETV(PLAIN, (chr)(*v->now++ & 037));
break; break;
case CHR('d'): case CHR('d'):
NOTE(REG_ULOCALE); NOTE(REG_ULOCALE);
@@ -816,8 +758,7 @@ lexescape(struct vars * v)
break; break;
case CHR('x'): case CHR('x'):
NOTE(REG_UUNPORT); NOTE(REG_UUNPORT);
c = lexdigits(v, 16, 1, 255); /* REs >255 long outside c = lexdigits(v, 16, 1, 255); /* REs >255 long outside spec */
* spec */
if (ISERR()) if (ISERR())
FAILW(REG_EESCAPE); FAILW(REG_EESCAPE);
RETV(PLAIN, c); RETV(PLAIN, c);
@@ -833,26 +774,18 @@ lexescape(struct vars * v)
case CHR('Z'): case CHR('Z'):
RETV(SEND, 0); RETV(SEND, 0);
break; break;
case CHR('1'): case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'):
case CHR('2'): case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'):
case CHR('3'):
case CHR('4'):
case CHR('5'):
case CHR('6'):
case CHR('7'):
case CHR('8'):
case CHR('9'): case CHR('9'):
save = v->now; save = v->now;
v->now--; /* put first digit back */ v->now--; /* put first digit back */
c = lexdigits(v, 10, 1, 255); /* REs >255 long outside c = lexdigits(v, 10, 1, 255); /* REs >255 long outside spec */
* spec */
if (ISERR()) if (ISERR())
FAILW(REG_EESCAPE); FAILW(REG_EESCAPE);
/* ugly heuristic (first test is "exactly 1 digit?") */ /* ugly heuristic (first test is "exactly 1 digit?") */
if (v->now - save == 0 || (int) c <= v->nsubexp) if (v->now - save == 0 || (int)c <= v->nsubexp) {
{
NOTE(REG_UBACKREF); NOTE(REG_UBACKREF);
RETV(BACKREF, (chr) c); RETV(BACKREF, (chr)c);
} }
/* oops, doesn't look like it's a backref after all... */ /* oops, doesn't look like it's a backref after all... */
v->now = save; v->now = save;
@@ -874,119 +807,91 @@ lexescape(struct vars * v)
} }
/* /*
* lexdigits - slurp up digits and return chr value - lexdigits - slurp up digits and return chr value
^ static chr lexdigits(struct vars *, int, int, int);
*/ */
static chr /* chr value; errors signalled via ERR */ static chr /* chr value; errors signalled via ERR */
lexdigits(struct vars * v, lexdigits(v, base, minlen, maxlen)
int base, struct vars *v;
int minlen, int base;
int maxlen) int minlen;
int maxlen;
{ {
uchr n; /* unsigned to avoid overflow misbehavior */ uchr n; /* unsigned to avoid overflow misbehavior */
int len; int len;
chr c; chr c;
int d; int d;
const uchr ub = (uchr) base; CONST uchr ub = (uchr) base;
n = 0; n = 0;
for (len = 0; len < maxlen && !ATEOS(); len++) for (len = 0; len < maxlen && !ATEOS(); len++) {
{
c = *v->now++; c = *v->now++;
switch (c) switch (c) {
{ case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'):
case CHR('0'): case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'):
case CHR('1'): case CHR('8'): case CHR('9'):
case CHR('2'):
case CHR('3'):
case CHR('4'):
case CHR('5'):
case CHR('6'):
case CHR('7'):
case CHR('8'):
case CHR('9'):
d = DIGITVAL(c); d = DIGITVAL(c);
break; break;
case CHR('a'): case CHR('a'): case CHR('A'): d = 10; break;
case CHR('A'): case CHR('b'): case CHR('B'): d = 11; break;
d = 10; case CHR('c'): case CHR('C'): d = 12; break;
break; case CHR('d'): case CHR('D'): d = 13; break;
case CHR('b'): case CHR('e'): case CHR('E'): d = 14; break;
case CHR('B'): case CHR('f'): case CHR('F'): d = 15; break;
d = 11;
break;
case CHR('c'):
case CHR('C'):
d = 12;
break;
case CHR('d'):
case CHR('D'):
d = 13;
break;
case CHR('e'):
case CHR('E'):
d = 14;
break;
case CHR('f'):
case CHR('F'):
d = 15;
break;
default: default:
v->now--; /* oops, not a digit at all */ v->now--; /* oops, not a digit at all */
d = -1; d = -1;
break; break;
} }
if (d >= base) if (d >= base) { /* not a plausible digit */
{ /* not a plausible digit */
v->now--; v->now--;
d = -1; d = -1;
} }
if (d < 0) if (d < 0)
break; /* NOTE BREAK OUT */ break; /* NOTE BREAK OUT */
n = n * ub + (uchr) d; n = n*ub + (uchr)d;
} }
if (len < minlen) if (len < minlen)
ERR(REG_EESCAPE); ERR(REG_EESCAPE);
return (chr) n; return (chr)n;
} }
/* /*
* brenext - get next BRE token - brenext - get next BRE token
*
* This is much like EREs except for all the stupid backslashes and the * This is much like EREs except for all the stupid backslashes and the
* context-dependency of some things. * context-dependency of some things.
^ static int brenext(struct vars *, pchr);
*/ */
static int /* 1 normal, 0 failure */ static int /* 1 normal, 0 failure */
brenext(struct vars * v, brenext(v, pc)
chr pc) struct vars *v;
pchr pc;
{ {
chr c = (chr) pc; chr c = (chr)pc;
switch (c) switch (c) {
{
case CHR('*'): case CHR('*'):
if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^')) if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^'))
RETV(PLAIN, c); RETV(PLAIN, c);
RET('*'); RET('*');
break; break;
case CHR('['): case CHR('['):
if (HAVE(6) && *(v->now + 0) == CHR('[') && if (HAVE(6) && *(v->now+0) == CHR('[') &&
*(v->now + 1) == CHR(':') && *(v->now+1) == CHR(':') &&
(*(v->now + 2) == CHR('<') || (*(v->now+2) == CHR('<') ||
*(v->now + 2) == CHR('>')) && *(v->now+2) == CHR('>')) &&
*(v->now + 3) == CHR(':') && *(v->now+3) == CHR(':') &&
*(v->now + 4) == CHR(']') && *(v->now+4) == CHR(']') &&
*(v->now + 5) == CHR(']')) *(v->now+5) == CHR(']')) {
{ c = *(v->now+2);
c = *(v->now + 2);
v->now += 6; v->now += 6;
NOTE(REG_UNONPOSIX); NOTE(REG_UNONPOSIX);
RET((c == CHR('<')) ? '<' : '>'); RET((c == CHR('<')) ? '<' : '>');
} }
INTOCON(L_BRACK); INTOCON(L_BRACK);
if (NEXT1('^')) if (NEXT1('^')) {
{
v->now++; v->now++;
RETV('[', 0); RETV('[', 0);
} }
@@ -998,20 +903,18 @@ brenext(struct vars * v,
case CHR('^'): case CHR('^'):
if (LASTTYPE(EMPTY)) if (LASTTYPE(EMPTY))
RET('^'); RET('^');
if (LASTTYPE('(')) if (LASTTYPE('(')) {
{
NOTE(REG_UUNSPEC); NOTE(REG_UUNSPEC);
RET('^'); RET('^');
} }
RETV(PLAIN, c); RETV(PLAIN, c);
break; break;
case CHR('$'): case CHR('$'):
if (v->cflags & REG_EXPANDED) if (v->cflags&REG_EXPANDED)
skip(v); skip(v);
if (ATEOS()) if (ATEOS())
RET('$'); RET('$');
if (NEXT2('\\', ')')) if (NEXT2('\\', ')')) {
{
NOTE(REG_UUNSPEC); NOTE(REG_UUNSPEC);
RET('$'); RET('$');
} }
@@ -1030,8 +933,7 @@ brenext(struct vars * v,
FAILW(REG_EESCAPE); FAILW(REG_EESCAPE);
c = *v->now++; c = *v->now++;
switch (c) switch (c) {
{
case CHR('{'): case CHR('{'):
INTOCON(L_BBND); INTOCON(L_BBND);
NOTE(REG_UBOUNDS); NOTE(REG_UBOUNDS);
@@ -1051,21 +953,14 @@ brenext(struct vars * v,
NOTE(REG_UNONPOSIX); NOTE(REG_UNONPOSIX);
RET('>'); RET('>');
break; break;
case CHR('1'): case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'):
case CHR('2'): case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'):
case CHR('3'):
case CHR('4'):
case CHR('5'):
case CHR('6'):
case CHR('7'):
case CHR('8'):
case CHR('9'): case CHR('9'):
NOTE(REG_UBACKREF); NOTE(REG_UBACKREF);
RETV(BACKREF, (chr) DIGITVAL(c)); RETV(BACKREF, (chr)DIGITVAL(c));
break; break;
default: default:
if (iscalnum(c)) if (iscalnum(c)) {
{
NOTE(REG_UBSALNUM); NOTE(REG_UBSALNUM);
NOTE(REG_UUNSPEC); NOTE(REG_UUNSPEC);
} }
@@ -1077,17 +972,18 @@ brenext(struct vars * v,
} }
/* /*
* skip - skip white space and comments in expanded form - skip - skip white space and comments in expanded form
^ static VOID skip(struct vars *);
*/ */
static void static VOID
skip(struct vars * v) skip(v)
struct vars *v;
{ {
chr *start = v->now; chr *start = v->now;
assert(v->cflags & REG_EXPANDED); assert(v->cflags&REG_EXPANDED);
for (;;) for (;;) {
{
while (!ATEOS() && iscspace(*v->now)) while (!ATEOS() && iscspace(*v->now))
v->now++; v->now++;
if (ATEOS() || *v->now != CHR('#')) if (ATEOS() || *v->now != CHR('#'))
@@ -1103,27 +999,46 @@ skip(struct vars * v)
} }
/* /*
* newline - return the chr for a newline - newline - return the chr for a newline
*
* This helps confine use of CHR to this source file. * This helps confine use of CHR to this source file.
^ static chr newline(NOPARMS);
*/ */
static chr static chr
newline(void) newline()
{ {
return CHR('\n'); return CHR('\n');
} }
/* /*
* chrnamed - return the chr known by a given (chr string) name - ch - return the chr sequence for regc_locale.c's fake collating element ch
* * This helps confine use of CHR to this source file. Beware that the caller
* knows how long the sequence is.
^ #ifdef REG_DEBUG
^ static chr *ch(NOPARMS);
^ #endif
*/
#ifdef REG_DEBUG
static chr *
ch()
{
static chr chstr[] = { CHR('c'), CHR('h'), CHR('\0') };
return chstr;
}
#endif
/*
- chrnamed - return the chr known by a given (chr string) name
* The code is a bit clumsy, but this routine gets only such specialized * The code is a bit clumsy, but this routine gets only such specialized
* use that it hardly matters. * use that it hardly matters.
^ static chr chrnamed(struct vars *, chr *, chr *, pchr);
*/ */
static chr static chr
chrnamed(struct vars * v, chrnamed(v, startp, endp, lastresort)
chr *startp, /* start of name */ struct vars *v;
chr *endp, /* just past end of name */ chr *startp; /* start of name */
chr lastresort) /* what to return if name lookup fails */ chr *endp; /* just past end of name */
pchr lastresort; /* what to return if name lookup fails */
{ {
celt c; celt c;
int errsave; int errsave;
@@ -1137,10 +1052,10 @@ chrnamed(struct vars * v,
v->err = errsave; v->err = errsave;
if (e != 0) if (e != 0)
return (chr) lastresort; return (chr)lastresort;
cv = range(v, c, c, 0); cv = range(v, c, c, 0);
if (cv->nchrs == 0) if (cv->nchrs == 0)
return (chr) lastresort; return (chr)lastresort;
return cv->chrs[0]; return cv->chrs[0];
} }

View File

@@ -1,75 +1,18 @@
/* { REG_OKAY, "REG_OKAY", "no errors detected" },
* $Id$ { REG_NOMATCH, "REG_NOMATCH", "failed to match" },
*/ { REG_BADPAT, "REG_BADPAT", "invalid regexp (reg version 0.8)" },
{ REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" },
{ { REG_ECTYPE, "REG_ECTYPE", "invalid character class" },
REG_OKAY, "REG_OKAY", "no errors detected" { REG_EESCAPE, "REG_EESCAPE", "invalid escape \\ sequence" },
}, { REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" },
{ REG_EBRACK, "REG_EBRACK", "brackets [] not balanced" },
{ { REG_EPAREN, "REG_EPAREN", "parentheses () not balanced" },
REG_NOMATCH, "REG_NOMATCH", "failed to match" { REG_EBRACE, "REG_EBRACE", "braces {} not balanced" },
}, { REG_BADBR, "REG_BADBR", "invalid repetition count(s)" },
{ REG_ERANGE, "REG_ERANGE", "invalid character range" },
{ { REG_ESPACE, "REG_ESPACE", "out of memory" },
REG_BADPAT, "REG_BADPAT", "invalid regexp (reg version 0.8)" { REG_BADRPT, "REG_BADRPT", "quantifier operand invalid" },
}, { REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" },
{ REG_INVARG, "REG_INVARG", "invalid argument to regex function" },
{ { REG_MIXED, "REG_MIXED", "character widths of regex and string differ" },
REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" { REG_BADOPT, "REG_BADOPT", "invalid embedded option" },
},
{
REG_ECTYPE, "REG_ECTYPE", "invalid character class"
},
{
REG_EESCAPE, "REG_EESCAPE", "invalid escape \\ sequence"
},
{
REG_ESUBREG, "REG_ESUBREG", "invalid backreference number"
},
{
REG_EBRACK, "REG_EBRACK", "brackets [] not balanced"
},
{
REG_EPAREN, "REG_EPAREN", "parentheses () not balanced"
},
{
REG_EBRACE, "REG_EBRACE", "braces {} not balanced"
},
{
REG_BADBR, "REG_BADBR", "invalid repetition count(s)"
},
{
REG_ERANGE, "REG_ERANGE", "invalid character range"
},
{
REG_ESPACE, "REG_ESPACE", "out of memory"
},
{
REG_BADRPT, "REG_BADRPT", "quantifier operand invalid"
},
{
REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug"
},
{
REG_INVARG, "REG_INVARG", "invalid argument to regex function"
},
{
REG_MIXED, "REG_MIXED", "character widths of regex and string differ"
},
{
REG_BADOPT, "REG_BADOPT", "invalid embedded option"
},

View File

@@ -1,74 +1,341 @@
#ifndef _REGEX_H_ #ifndef _REGEX_H_
#define _REGEX_H_ /* never again */ #define _REGEX_H_ /* never again */
/* ========= begin header generated by ./mkh ========= */ /*
* regular expressions
*
* Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
*
* Development of this software was funded, in part, by Cray Research Inc.,
* UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
* Corporation, none of whom are responsible for the results. The author
* thanks all of them.
*
* Redistribution and use in source and binary forms -- with or without
* modification -- are permitted for any purpose, provided that
* redistributions in source form retain this entire copyright notice and
* indicate the origin and nature of any modifications.
*
* I'd appreciate being given credit for this package in the documentation
* of software which uses it, but that is not a requirement.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*
*
* Prototypes etc. marked with "^" within comments get gathered up (and
* possibly edited) by the regfwd program and inserted near the bottom of
* this file.
*
* We offer the option of declaring one wide-character version of the
* RE functions as well as the char versions. To do that, define
* __REG_WIDE_T to the type of wide characters (unfortunately, there
* is no consensus that wchar_t is suitable) and __REG_WIDE_COMPILE and
* __REG_WIDE_EXEC to the names to be used for the compile and execute
* functions (suggestion: re_Xcomp and re_Xexec, where X is a letter
* suggestive of the wide type, e.g. re_ucomp and re_uexec for Unicode).
* For cranky old compilers, it may be necessary to do something like:
* #define __REG_WIDE_COMPILE(a,b,c,d) re_Xcomp(a,b,c,d)
* #define __REG_WIDE_EXEC(a,b,c,d,e,f,g) re_Xexec(a,b,c,d,e,f,g)
* rather than just #defining the names as parameterless macros.
*
* For some specialized purposes, it may be desirable to suppress the
* declarations of the "front end" functions, regcomp() and regexec(),
* or of the char versions of the compile and execute functions. To
* suppress the front-end functions, define __REG_NOFRONT. To suppress
* the char versions, define __REG_NOCHAR.
*
* The right place to do those defines (and some others you may want, see
* below) would be <sys/types.h>. If you don't have control of that file,
* the right place to add your own defines to this file is marked below.
* This is normally done automatically, by the makefile and regmkhdr, based
* on the contents of regcustom.h.
*/
/*
* voodoo for C++
*/
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
/* === regex2.h === */
typedef off_t regoff_t;
/*
* Add your own defines, if needed, here.
*/
/*
* Location where a chunk of regcustom.h is automatically spliced into
* this file (working from its prototype, regproto.h).
*/
/* --- begin --- */
/* ensure certain things don't sneak in from system headers */
#ifdef __REG_WIDE_T
#undef __REG_WIDE_T
#endif
#ifdef __REG_WIDE_COMPILE
#undef __REG_WIDE_COMPILE
#endif
#ifdef __REG_WIDE_EXEC
#undef __REG_WIDE_EXEC
#endif
#ifdef __REG_REGOFF_T
#undef __REG_REGOFF_T
#endif
#ifdef __REG_VOID_T
#undef __REG_VOID_T
#endif
#ifdef __REG_CONST
#undef __REG_CONST
#endif
#ifdef __REG_NOFRONT
#undef __REG_NOFRONT
#endif
#ifdef __REG_NOCHAR
#undef __REG_NOCHAR
#endif
/* interface types */
#define __REG_WIDE_T Tcl_UniChar
#define __REG_REGOFF_T long /* not really right, but good enough... */
#define __REG_VOID_T VOID
#define __REG_CONST CONST
/* names and declarations */
#define __REG_WIDE_COMPILE TclReComp
#define __REG_WIDE_EXEC TclReExec
#define __REG_NOFRONT /* don't want regcomp() and regexec() */
#define __REG_NOCHAR /* or the char versions */
#define regfree TclReFree
#define regerror TclReError
/* --- end --- */
/*
* interface types etc.
*/
/*
* regoff_t has to be large enough to hold either off_t or ssize_t,
* and must be signed; it's only a guess that long is suitable, so we
* offer <sys/types.h> an override.
*/
#ifdef __REG_REGOFF_T
typedef __REG_REGOFF_T regoff_t;
#else
typedef long regoff_t;
#endif
/*
* For benefit of old compilers, we offer <sys/types.h> the option of
* overriding the `void' type used to declare nonexistent return types.
*/
#ifdef __REG_VOID_T
typedef __REG_VOID_T re_void;
#else
typedef void re_void;
#endif
/*
* Also for benefit of old compilers, <sys/types.h> can supply a macro
* which expands to a substitute for `const'.
*/
#ifndef __REG_CONST
#define __REG_CONST const
#endif
/*
* other interface types
*/
/* the biggie, a compiled RE (or rather, a front end to same) */
typedef struct { typedef struct {
int re_magic; int re_magic; /* magic number */
size_t re_nsub; /* number of parenthesized subexpressions */ size_t re_nsub; /* number of subexpressions */
const char *re_endp; /* end pointer for REG_PEND */ long re_info; /* information about RE */
struct re_guts *re_g; /* none of your business :-) */ # define REG_UBACKREF 000001
# define REG_ULOOKAHEAD 000002
# define REG_UBOUNDS 000004
# define REG_UBRACES 000010
# define REG_UBSALNUM 000020
# define REG_UPBOTCH 000040
# define REG_UBBS 000100
# define REG_UNONPOSIX 000200
# define REG_UUNSPEC 000400
# define REG_UUNPORT 001000
# define REG_ULOCALE 002000
# define REG_UEMPTYMATCH 004000
# define REG_UIMPOSSIBLE 010000
# define REG_USHORTEST 020000
int re_csize; /* sizeof(character) */
char *re_endp; /* backward compatibility kludge */
/* the rest is opaque pointers to hidden innards */
char *re_guts; /* `char *' is more portable than `void *' */
char *re_fns;
} regex_t; } regex_t;
/* result reporting (may acquire more fields later) */
typedef struct { typedef struct {
regoff_t rm_so; /* start of match */ regoff_t rm_so; /* start of substring */
regoff_t rm_eo; /* end of match */ regoff_t rm_eo; /* end of substring */
} regmatch_t; } regmatch_t;
/* supplementary control and reporting */
/* === regcomp.c === */ typedef struct {
extern int regcomp(regex_t *, const char *, int); regmatch_t rm_extend; /* see REG_EXPECT */
#define REG_BASIC 0000 } rm_detail_t;
#define REG_EXTENDED 0001
#define REG_ICASE 0002
#define REG_NOSUB 0004
#define REG_NEWLINE 0010
#define REG_NOSPEC 0020
#define REG_PEND 0040
#define REG_DUMP 0200
/* === regerror.c === */
#define REG_OKAY 0 /*
#define REG_NOMATCH 1 * compilation
#define REG_BADPAT 2 ^ #ifndef __REG_NOCHAR
#define REG_ECOLLATE 3 ^ int re_comp(regex_t *, __REG_CONST char *, size_t, int);
#define REG_ECTYPE 4 ^ #endif
#define REG_EESCAPE 5 ^ #ifndef __REG_NOFRONT
#define REG_ESUBREG 6 ^ int regcomp(regex_t *, __REG_CONST char *, int);
#define REG_EBRACK 7 ^ #endif
#define REG_EPAREN 8 ^ #ifdef __REG_WIDE_T
#define REG_EBRACE 9 ^ int __REG_WIDE_COMPILE(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int);
#define REG_BADBR 10 ^ #endif
#define REG_ERANGE 11 */
#define REG_ESPACE 12 #define REG_BASIC 000000 /* BREs (convenience) */
#define REG_BADRPT 13 #define REG_EXTENDED 000001 /* EREs */
#define REG_EMPTY 14 #define REG_ADVF 000002 /* advanced features in EREs */
#define REG_ASSERT 15 #define REG_ADVANCED 000003 /* AREs (which are also EREs) */
#define REG_INVARG 16 #define REG_QUOTE 000004 /* no special characters, none */
#define REG_ATOI 255 /* convert name to number (!) */ #define REG_NOSPEC REG_QUOTE /* historical synonym */
#define REG_ITOA 0400 /* convert number to name (!) */ #define REG_ICASE 000010 /* ignore case */
extern size_t regerror(int, const regex_t *, char *, size_t); #define REG_NOSUB 000020 /* don't care about subexpressions */
#define REG_EXPANDED 000040 /* expanded format, white space & comments */
#define REG_NLSTOP 000100 /* \n doesn't match . or [^ ] */
#define REG_NLANCH 000200 /* ^ matches after \n, $ before */
#define REG_NEWLINE 000300 /* newlines are line terminators */
#define REG_PEND 000400 /* ugh -- backward-compatibility hack */
#define REG_EXPECT 001000 /* report details on partial/limited matches */
#define REG_BOSONLY 002000 /* temporary kludge for BOS-only matches */
#define REG_DUMP 004000 /* none of your business :-) */
#define REG_FAKE 010000 /* none of your business :-) */
#define REG_PROGRESS 020000 /* none of your business :-) */
/* === regexec.c === */
extern int regexec(const regex_t *, const char *, size_t, regmatch_t [], int); /*
#define REG_NOTBOL 00001 * execution
#define REG_NOTEOL 00002 ^ #ifndef __REG_NOCHAR
#define REG_STARTEND 00004 ^ int re_exec(regex_t *, __REG_CONST char *, size_t,
#define REG_TRACE 00400 /* tracing of execution */ ^ rm_detail_t *, size_t, regmatch_t [], int);
#define REG_LARGE 01000 /* force large representation */ ^ #endif
#define REG_BACKR 02000 /* force use of backref code */ ^ #ifndef __REG_NOFRONT
^ int regexec(regex_t *, __REG_CONST char *, size_t, regmatch_t [], int);
^ #endif
^ #ifdef __REG_WIDE_T
^ int __REG_WIDE_EXEC(regex_t *, __REG_CONST __REG_WIDE_T *, size_t,
^ rm_detail_t *, size_t, regmatch_t [], int);
^ #endif
*/
#define REG_NOTBOL 0001 /* BOS is not BOL */
#define REG_NOTEOL 0002 /* EOS is not EOL */
#define REG_STARTEND 0004 /* backward compatibility kludge */
#define REG_FTRACE 0010 /* none of your business */
#define REG_MTRACE 0020 /* none of your business */
#define REG_SMALL 0040 /* none of your business */
/* === regfree.c === */
extern void regfree(regex_t *);
/*
* misc generics (may be more functions here eventually)
^ re_void regfree(regex_t *);
*/
/*
* error reporting
* Be careful if modifying the list of error codes -- the table used by
* regerror() is generated automatically from this file!
*
* Note that there is no wide-char variant of regerror at this time; what
* kind of character is used for error reports is independent of what kind
* is used in matching.
*
^ extern size_t regerror(int, __REG_CONST regex_t *, char *, size_t);
*/
#define REG_OKAY 0 /* no errors detected */
#define REG_NOMATCH 1 /* failed to match */
#define REG_BADPAT 2 /* invalid regexp */
#define REG_ECOLLATE 3 /* invalid collating element */
#define REG_ECTYPE 4 /* invalid character class */
#define REG_EESCAPE 5 /* invalid escape \ sequence */
#define REG_ESUBREG 6 /* invalid backreference number */
#define REG_EBRACK 7 /* brackets [] not balanced */
#define REG_EPAREN 8 /* parentheses () not balanced */
#define REG_EBRACE 9 /* braces {} not balanced */
#define REG_BADBR 10 /* invalid repetition count(s) */
#define REG_ERANGE 11 /* invalid character range */
#define REG_ESPACE 12 /* out of memory */
#define REG_BADRPT 13 /* quantifier operand invalid */
#define REG_ASSERT 15 /* "can't happen" -- you found a bug */
#define REG_INVARG 16 /* invalid argument to regex function */
#define REG_MIXED 17 /* character widths of regex and string differ */
#define REG_BADOPT 18 /* invalid embedded option */
/* two specials for debugging and testing */
#define REG_ATOI 101 /* convert error-code name to number */
#define REG_ITOA 102 /* convert error-code number to name */
/*
* the prototypes, as possibly munched by regfwd
*/
/* =====^!^===== begin forwards =====^!^===== */
/* automatically gathered by fwd; do not hand-edit */
/* === regproto.h === */
#ifndef __REG_NOCHAR
int re_comp _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, int));
#endif
#ifndef __REG_NOFRONT
int regcomp _ANSI_ARGS_((regex_t *, __REG_CONST char *, int));
#endif
#ifdef __REG_WIDE_T
int __REG_WIDE_COMPILE _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int));
#endif
#ifndef __REG_NOCHAR
int re_exec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, rm_detail_t *, size_t, regmatch_t [], int));
#endif
#ifndef __REG_NOFRONT
int regexec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, regmatch_t [], int));
#endif
#ifdef __REG_WIDE_T
int __REG_WIDE_EXEC _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, rm_detail_t *, size_t, regmatch_t [], int));
#endif
re_void regfree _ANSI_ARGS_((regex_t *));
extern size_t regerror _ANSI_ARGS_((int, __REG_CONST regex_t *, char *, size_t));
/* automatically gathered by fwd; do not hand-edit */
/* =====^!^===== end forwards =====^!^===== */
/*
* more C++ voodoo
*/
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
/* ========= end header generated by ./mkh ========= */
#endif #endif

View File

@@ -26,8 +26,6 @@
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $Id$
*/ */
@@ -45,27 +43,52 @@
* Things that regcustom.h might override. * Things that regcustom.h might override.
*/ */
/* standard header files (NULL is a reasonable indicator for them) */
#ifndef NULL
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <limits.h>
#include <string.h>
#endif
/* assertions */ /* assertions */
#ifndef assert #ifndef assert
#ifndef REG_DEBUG # ifndef REG_DEBUG
# ifndef NDEBUG
# define NDEBUG /* no assertions */ # define NDEBUG /* no assertions */
# endif # endif
#endif
#include <assert.h> #include <assert.h>
#endif #endif
/* voids */ /* voids */
#ifndef VOID
#define VOID void /* for function return values */
#endif
#ifndef DISCARD #ifndef DISCARD
#define DISCARD void /* for throwing values away */ #define DISCARD VOID /* for throwing values away */
#endif
#ifndef PVOID
#define PVOID VOID * /* generic pointer */
#endif #endif
#ifndef VS #ifndef VS
#define VS(x) ((void *)(x)) /* cast something to generic ptr */ #define VS(x) ((PVOID)(x)) /* cast something to generic ptr */
#endif
#ifndef NOPARMS
#define NOPARMS VOID /* for empty parm lists */
#endif
/* const */
#ifndef CONST
#define CONST const /* for old compilers, might be empty */
#endif #endif
/* function-pointer declarator */ /* function-pointer declarator */
#ifndef FUNCPTR #ifndef FUNCPTR
#define FUNCPTR(name, args) (*name) args #if __STDC__ >= 1
#define FUNCPTR(name, args) (*name)args
#else
#define FUNCPTR(name, args) (*name)()
#endif
#endif #endif
/* memory allocation */ /* memory allocation */
@@ -136,8 +159,7 @@
#ifndef BYTBITS #ifndef BYTBITS
#define BYTBITS 8 /* bits in a byt */ #define BYTBITS 8 /* bits in a byt */
#endif #endif
#define BYTTAB (1<<BYTBITS) /* size of table with one entry per byt #define BYTTAB (1<<BYTBITS) /* size of table with one entry per byt value */
* value */
#define BYTMASK (BYTTAB-1) /* bit mask for byt */ #define BYTMASK (BYTTAB-1) /* bit mask for byt */
#define NBYTS ((CHRBITS+BYTBITS-1)/BYTBITS) #define NBYTS ((CHRBITS+BYTBITS-1)/BYTBITS)
/* the definition of GETCOLOR(), below, assumes NBYTS <= 4 */ /* the definition of GETCOLOR(), below, assumes NBYTS <= 4 */
@@ -150,7 +172,6 @@
*/ */
typedef short color; /* colors of characters */ typedef short color; /* colors of characters */
typedef int pcolor; /* what color promotes to */ typedef int pcolor; /* what color promotes to */
#define COLORLESS (-1) /* impossible color */ #define COLORLESS (-1) /* impossible color */
#define WHITE 0 /* default color, parent of all others */ #define WHITE 0 /* default color, parent of all others */
@@ -167,49 +188,43 @@ typedef int pcolor; /* what color promotes to */
*/ */
/* the tree itself */ /* the tree itself */
struct colors struct colors {
{
color ccolor[BYTTAB]; color ccolor[BYTTAB];
}; };
struct ptrs struct ptrs {
{
union tree *pptr[BYTTAB]; union tree *pptr[BYTTAB];
}; };
union tree union tree {
{
struct colors colors; struct colors colors;
struct ptrs ptrs; struct ptrs ptrs;
}; };
#define tcolor colors.ccolor #define tcolor colors.ccolor
#define tptr ptrs.pptr #define tptr ptrs.pptr
/* internal per-color structure for the color machinery */ /* internal per-color structure for the color machinery */
struct colordesc struct colordesc {
{
uchr nchrs; /* number of chars of this color */ uchr nchrs; /* number of chars of this color */
color sub; /* open subcolor (if any); free chain ptr */ color sub; /* open subcolor (if any); free chain ptr */
#define NOSUB COLORLESS # define NOSUB COLORLESS
struct arc *arcs; /* color chain */ struct arc *arcs; /* color chain */
int flags; int flags;
#define FREECOL 01 /* currently free */ # define FREECOL 01 /* currently free */
#define PSEUDO 02 /* pseudocolor, no real chars */ # define PSEUDO 02 /* pseudocolor, no real chars */
#define UNUSEDCOLOR(cd) ((cd)->flags&FREECOL) # define UNUSEDCOLOR(cd) ((cd)->flags&FREECOL)
union tree *block; /* block of solid color, if any */ union tree *block; /* block of solid color, if any */
}; };
/* the color map itself */ /* the color map itself */
struct colormap struct colormap {
{
int magic; int magic;
#define CMMAGIC 0x876 # define CMMAGIC 0x876
struct vars *v; /* for compile error reporting */ struct vars *v; /* for compile error reporting */
size_t ncds; /* number of colordescs */ size_t ncds; /* number of colordescs */
size_t max; /* highest in use */ size_t max; /* highest in use */
color free; /* beginning of free chain (if non-0) */ color free; /* beginning of free chain (if non-0) */
struct colordesc *cd; struct colordesc *cd;
#define CDEND(cm) (&(cm)->cd[(cm)->max + 1]) # define CDEND(cm) (&(cm)->cd[(cm)->max + 1])
#define NINLINECDS ((size_t)10) # define NINLINECDS ((size_t)10)
struct colordesc cdspace[NINLINECDS]; struct colordesc cdspace[NINLINECDS];
union tree tree[NBYTS]; /* tree top, plus fill blocks */ union tree tree[NBYTS]; /* tree top, plus fill blocks */
}; };
@@ -236,8 +251,7 @@ struct colormap
* Interface definitions for locale-interface functions in locale.c. * Interface definitions for locale-interface functions in locale.c.
* Multi-character collating elements (MCCEs) cause most of the trouble. * Multi-character collating elements (MCCEs) cause most of the trouble.
*/ */
struct cvec struct cvec {
{
int nchrs; /* number of chrs */ int nchrs; /* number of chrs */
int chrspace; /* number of chrs possible */ int chrspace; /* number of chrs possible */
chr *chrs; /* pointer to vector of chrs */ chr *chrs; /* pointer to vector of chrs */
@@ -264,30 +278,27 @@ struct cvec
*/ */
struct state; struct state;
struct arc struct arc {
{
int type; int type;
#define ARCFREE '\0' # define ARCFREE '\0'
color co; color co;
struct state *from; /* where it's from (and contained within) */ struct state *from; /* where it's from (and contained within) */
struct state *to; /* where it's to */ struct state *to; /* where it's to */
struct arc *outchain; /* *from's outs chain or free chain */ struct arc *outchain; /* *from's outs chain or free chain */
#define freechain outchain # define freechain outchain
struct arc *inchain; /* *to's ins chain */ struct arc *inchain; /* *to's ins chain */
struct arc *colorchain; /* color's arc chain */ struct arc *colorchain; /* color's arc chain */
}; };
struct arcbatch struct arcbatch { /* for bulk allocation of arcs */
{ /* for bulk allocation of arcs */
struct arcbatch *next; struct arcbatch *next;
#define ABSIZE 10 # define ABSIZE 10
struct arc a[ABSIZE]; struct arc a[ABSIZE];
}; };
struct state struct state {
{
int no; int no;
#define FREESTATE (-1) # define FREESTATE (-1)
char flag; /* marks special states */ char flag; /* marks special states */
int nins; /* number of inarcs */ int nins; /* number of inarcs */
struct arc *ins; /* chain of inarcs */ struct arc *ins; /* chain of inarcs */
@@ -297,13 +308,11 @@ struct state
struct state *tmp; /* temporary for traversal algorithms */ struct state *tmp; /* temporary for traversal algorithms */
struct state *next; /* chain for traversing all */ struct state *next; /* chain for traversing all */
struct state *prev; /* back chain */ struct state *prev; /* back chain */
struct arcbatch oas; /* first arcbatch, avoid malloc in easy struct arcbatch oas; /* first arcbatch, avoid malloc in easy case */
* case */
int noas; /* number of arcs used in first arcbatch */ int noas; /* number of arcs used in first arcbatch */
}; };
struct nfa struct nfa {
{
struct state *pre; /* pre-initial state */ struct state *pre; /* pre-initial state */
struct state *init; /* initial state */ struct state *init; /* initial state */
struct state *final; /* final state */ struct state *final; /* final state */
@@ -324,18 +333,16 @@ struct nfa
/* /*
* definitions for compacted NFA * definitions for compacted NFA
*/ */
struct carc struct carc {
{
color co; /* COLORLESS is list terminator */ color co; /* COLORLESS is list terminator */
int to; /* state number */ int to; /* state number */
}; };
struct cnfa struct cnfa {
{
int nstates; /* number of states */ int nstates; /* number of states */
int ncolors; /* number of colors */ int ncolors; /* number of colors */
int flags; int flags;
#define HASLACONS 01 /* uses lookahead constraints */ # define HASLACONS 01 /* uses lookahead constraints */
int pre; /* setup state number */ int pre; /* setup state number */
int post; /* teardown state number */ int post; /* teardown state number */
color bos[2]; /* colors, if any, assigned to BOS and BOL */ color bos[2]; /* colors, if any, assigned to BOS and BOL */
@@ -343,7 +350,6 @@ struct cnfa
struct carc **states; /* vector of pointers to outarc lists */ struct carc **states; /* vector of pointers to outarc lists */
struct carc *arcs; /* the area for the lists */ struct carc *arcs; /* the area for the lists */
}; };
#define ZAPCNFA(cnfa) ((cnfa).nstates = 0) #define ZAPCNFA(cnfa) ((cnfa).nstates = 0)
#define NULLCNFA(cnfa) ((cnfa).nstates == 0) #define NULLCNFA(cnfa) ((cnfa).nstates == 0)
@@ -352,31 +358,28 @@ struct cnfa
/* /*
* subexpression tree * subexpression tree
*/ */
struct subre struct subre {
{ char op; /* '|', '.' (concat), 'b' (backref), '(', '=' */
char op; /* '|', '.' (concat), 'b' (backref), '(',
* '=' */
char flags; char flags;
#define LONGER 01 /* prefers longer match */ # define LONGER 01 /* prefers longer match */
#define SHORTER 02 /* prefers shorter match */ # define SHORTER 02 /* prefers shorter match */
#define MIXED 04 /* mixed preference below */ # define MIXED 04 /* mixed preference below */
#define CAP 010 /* capturing parens below */ # define CAP 010 /* capturing parens below */
#define BACKR 020 /* back reference below */ # define BACKR 020 /* back reference below */
#define INUSE 0100 /* in use in final tree */ # define INUSE 0100 /* in use in final tree */
#define LOCAL 03 /* bits which may not propagate up */ # define LOCAL 03 /* bits which may not propagate up */
#define LMIX(f) ((f)<<2) /* LONGER -> MIXED */ # define LMIX(f) ((f)<<2) /* LONGER -> MIXED */
#define SMIX(f) ((f)<<1) /* SHORTER -> MIXED */ # define SMIX(f) ((f)<<1) /* SHORTER -> MIXED */
#define UP(f) (((f)&~LOCAL) | (LMIX(f) & SMIX(f) & MIXED)) # define UP(f) (((f)&~LOCAL) | (LMIX(f) & SMIX(f) & MIXED))
#define MESSY(f) ((f)&(MIXED|CAP|BACKR)) # define MESSY(f) ((f)&(MIXED|CAP|BACKR))
#define PREF(f) ((f)&LOCAL) # define PREF(f) ((f)&LOCAL)
#define PREF2(f1, f2) ((PREF(f1) != 0) ? PREF(f1) : PREF(f2)) # define PREF2(f1, f2) ((PREF(f1) != 0) ? PREF(f1) : PREF(f2))
#define COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2)) # define COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2))
short retry; /* index into retry memory */ short retry; /* index into retry memory */
int subno; /* subexpression number (for 'b' and '(') */ int subno; /* subexpression number (for 'b' and '(') */
short min; /* min repetitions, for backref only */ short min; /* min repetitions, for backref only */
short max; /* max repetitions, for backref only */ short max; /* max repetitions, for backref only */
struct subre *left; /* left child, if any (also freelist struct subre *left; /* left child, if any (also freelist chain) */
* chain) */
struct subre *right; /* right child, if any */ struct subre *right; /* right child, if any */
struct state *begin; /* outarcs from here... */ struct state *begin; /* outarcs from here... */
struct state *end; /* ...ending in inarcs here */ struct state *end; /* ...ending in inarcs here */
@@ -390,9 +393,8 @@ struct subre
* table of function pointers for generic manipulation functions * table of function pointers for generic manipulation functions
* A regex_t's re_fns points to one of these. * A regex_t's re_fns points to one of these.
*/ */
struct fns struct fns {
{ VOID FUNCPTR(free, (regex_t *));
void FUNCPTR(free, (regex_t *));
}; };
@@ -400,10 +402,9 @@ struct fns
/* /*
* the insides of a regex_t, hidden behind a void * * the insides of a regex_t, hidden behind a void *
*/ */
struct guts struct guts {
{
int magic; int magic;
#define GUTSMAGIC 0xfed9 # define GUTSMAGIC 0xfed9
int cflags; /* copy of compile flags */ int cflags; /* copy of compile flags */
long info; /* copy of re_info */ long info; /* copy of re_info */
size_t nsub; /* copy of re_nsub */ size_t nsub; /* copy of re_nsub */
@@ -411,7 +412,7 @@ struct guts
struct cnfa search; /* for fast preliminary search */ struct cnfa search; /* for fast preliminary search */
int ntree; int ntree;
struct colormap cmap; struct colormap cmap;
int FUNCPTR(compare, (const chr *, const chr *, size_t)); int FUNCPTR(compare, (CONST chr *, CONST chr *, size_t));
struct subre *lacons; /* lookahead-constraint vector */ struct subre *lacons; /* lookahead-constraint vector */
int nlacons; /* size of lacons */ int nlacons; /* size of lacons */
}; };