Import regex from tcl 8.4.5
git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/branches/RXSPENCER@3951 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
@@ -28,8 +28,6 @@
|
||||
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* $Header$
|
||||
*
|
||||
*/
|
||||
|
||||
/* scanning macros (know about v) */
|
||||
@@ -64,26 +62,23 @@
|
||||
#define ENDOF(array) ((array) + sizeof(array)/sizeof(chr))
|
||||
|
||||
/*
|
||||
* lexstart - set up lexical stuff, scan leading options
|
||||
- lexstart - set up lexical stuff, scan leading options
|
||||
^ static VOID lexstart(struct vars *);
|
||||
*/
|
||||
static void
|
||||
lexstart(struct vars * v)
|
||||
static VOID
|
||||
lexstart(v)
|
||||
struct vars *v;
|
||||
{
|
||||
prefixes(v); /* may turn on new type bits etc. */
|
||||
NOERR();
|
||||
|
||||
if (v->cflags & REG_QUOTE)
|
||||
{
|
||||
if (v->cflags®_QUOTE) {
|
||||
assert(!(v->cflags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE)));
|
||||
INTOCON(L_Q);
|
||||
}
|
||||
else if (v->cflags & REG_EXTENDED)
|
||||
{
|
||||
} else if (v->cflags®_EXTENDED) {
|
||||
assert(!(v->cflags®_QUOTE));
|
||||
INTOCON(L_ERE);
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
assert(!(v->cflags&(REG_QUOTE|REG_ADVF)));
|
||||
INTOCON(L_BRE);
|
||||
}
|
||||
@@ -93,10 +88,12 @@ lexstart(struct vars * v)
|
||||
}
|
||||
|
||||
/*
|
||||
* prefixes - implement various special prefixes
|
||||
- prefixes - implement various special prefixes
|
||||
^ static VOID prefixes(struct vars *);
|
||||
*/
|
||||
static void
|
||||
prefixes(struct vars * v)
|
||||
static VOID
|
||||
prefixes(v)
|
||||
struct vars *v;
|
||||
{
|
||||
/* literal string doesn't get any of this stuff */
|
||||
if (v->cflags®_QUOTE)
|
||||
@@ -104,8 +101,7 @@ prefixes(struct vars * v)
|
||||
|
||||
/* initial "***" gets special things */
|
||||
if (HAVE(4) && NEXT3('*', '*', '*'))
|
||||
switch (*(v->now + 3))
|
||||
{
|
||||
switch (*(v->now + 3)) {
|
||||
case CHR('?'): /* "***?" error, msg shows version */
|
||||
ERR(REG_BADPAT);
|
||||
return; /* proceed no further */
|
||||
@@ -133,13 +129,11 @@ prefixes(struct vars * v)
|
||||
return;
|
||||
|
||||
/* embedded options (AREs only) */
|
||||
if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2)))
|
||||
{
|
||||
if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2))) {
|
||||
NOTE(REG_UNONPOSIX);
|
||||
v->now += 2;
|
||||
for (; !ATEOS() && iscalpha(*v->now); v->now++)
|
||||
switch (*v->now)
|
||||
{
|
||||
switch (*v->now) {
|
||||
case CHR('b'): /* BREs (but why???) */
|
||||
v->cflags &= ~(REG_ADVANCED|REG_QUOTE);
|
||||
break;
|
||||
@@ -182,8 +176,7 @@ prefixes(struct vars * v)
|
||||
ERR(REG_BADOPT);
|
||||
return;
|
||||
}
|
||||
if (!NEXT1(')'))
|
||||
{
|
||||
if (!NEXT1(')')) {
|
||||
ERR(REG_BADOPT);
|
||||
return;
|
||||
}
|
||||
@@ -194,15 +187,16 @@ prefixes(struct vars * v)
|
||||
}
|
||||
|
||||
/*
|
||||
* lexnest - "call a subroutine", interpolating string at the lexical level
|
||||
*
|
||||
- lexnest - "call a subroutine", interpolating string at the lexical level
|
||||
* Note, this is not a very general facility. There are a number of
|
||||
* implicit assumptions about what sorts of strings can be subroutines.
|
||||
^ static VOID lexnest(struct vars *, chr *, chr *);
|
||||
*/
|
||||
static void
|
||||
lexnest(struct vars * v,
|
||||
chr *beginp, /* start of interpolation */
|
||||
chr *endp) /* one past end of interpolation */
|
||||
static VOID
|
||||
lexnest(v, beginp, endp)
|
||||
struct vars *v;
|
||||
chr *beginp; /* start of interpolation */
|
||||
chr *endp; /* one past end of interpolation */
|
||||
{
|
||||
assert(v->savenow == NULL); /* only one level of nesting */
|
||||
v->savenow = v->now;
|
||||
@@ -261,20 +255,24 @@ static chr brbackw[] = { /* \w within brackets */
|
||||
};
|
||||
|
||||
/*
|
||||
* lexword - interpolate a bracket expression for word characters
|
||||
- lexword - interpolate a bracket expression for word characters
|
||||
* Possibly ought to inquire whether there is a "word" character class.
|
||||
^ static VOID lexword(struct vars *);
|
||||
*/
|
||||
static void
|
||||
lexword(struct vars * v)
|
||||
static VOID
|
||||
lexword(v)
|
||||
struct vars *v;
|
||||
{
|
||||
lexnest(v, backw, ENDOF(backw));
|
||||
}
|
||||
|
||||
/*
|
||||
* next - get next token
|
||||
- next - get next token
|
||||
^ static int next(struct vars *);
|
||||
*/
|
||||
static int /* 1 normal, 0 failure */
|
||||
next(struct vars * v)
|
||||
next(v)
|
||||
struct vars *v;
|
||||
{
|
||||
chr c;
|
||||
|
||||
@@ -286,15 +284,13 @@ next(struct vars * v)
|
||||
v->lasttype = v->nexttype;
|
||||
|
||||
/* REG_BOSONLY */
|
||||
if (v->nexttype == EMPTY && (v->cflags & REG_BOSONLY))
|
||||
{
|
||||
if (v->nexttype == EMPTY && (v->cflags®_BOSONLY)) {
|
||||
/* at start of a REG_BOSONLY RE */
|
||||
RETV(SBEGIN, 0); /* same as \A */
|
||||
}
|
||||
|
||||
/* if we're nested and we've hit end, return to outer level */
|
||||
if (v->savenow != NULL && ATEOS())
|
||||
{
|
||||
if (v->savenow != NULL && ATEOS()) {
|
||||
v->now = v->savenow;
|
||||
v->stop = v->savestop;
|
||||
v->savenow = v->savestop = NULL;
|
||||
@@ -302,8 +298,7 @@ next(struct vars * v)
|
||||
|
||||
/* skip white space etc. if appropriate (not in literal or []) */
|
||||
if (v->cflags®_EXPANDED)
|
||||
switch (v->lexcon)
|
||||
{
|
||||
switch (v->lexcon) {
|
||||
case L_ERE:
|
||||
case L_BRE:
|
||||
case L_EBND:
|
||||
@@ -313,10 +308,8 @@ next(struct vars * v)
|
||||
}
|
||||
|
||||
/* handle EOS, depending on context */
|
||||
if (ATEOS())
|
||||
{
|
||||
switch (v->lexcon)
|
||||
{
|
||||
if (ATEOS()) {
|
||||
switch (v->lexcon) {
|
||||
case L_ERE:
|
||||
case L_BRE:
|
||||
case L_Q:
|
||||
@@ -340,8 +333,7 @@ next(struct vars * v)
|
||||
c = *v->now++;
|
||||
|
||||
/* deal with the easy contexts, punt EREs to code below */
|
||||
switch (v->lexcon)
|
||||
{
|
||||
switch (v->lexcon) {
|
||||
case L_BRE: /* punt BREs to separate function */
|
||||
return brenext(v, c);
|
||||
break;
|
||||
@@ -352,46 +344,33 @@ next(struct vars * v)
|
||||
break;
|
||||
case L_BBND: /* bounds are fairly simple */
|
||||
case L_EBND:
|
||||
switch (c)
|
||||
{
|
||||
case CHR('0'):
|
||||
case CHR('1'):
|
||||
case CHR('2'):
|
||||
case CHR('3'):
|
||||
case CHR('4'):
|
||||
case CHR('5'):
|
||||
case CHR('6'):
|
||||
case CHR('7'):
|
||||
case CHR('8'):
|
||||
case CHR('9'):
|
||||
switch (c) {
|
||||
case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'):
|
||||
case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'):
|
||||
case CHR('8'): case CHR('9'):
|
||||
RETV(DIGIT, (chr)DIGITVAL(c));
|
||||
break;
|
||||
case CHR(','):
|
||||
RET(',');
|
||||
break;
|
||||
case CHR('}'): /* ERE bound ends with } */
|
||||
if (INCON(L_EBND))
|
||||
{
|
||||
if (INCON(L_EBND)) {
|
||||
INTOCON(L_ERE);
|
||||
if ((v->cflags & REG_ADVF) && NEXT1('?'))
|
||||
{
|
||||
if ((v->cflags®_ADVF) && NEXT1('?')) {
|
||||
v->now++;
|
||||
NOTE(REG_UNONPOSIX);
|
||||
RETV('}', 0);
|
||||
}
|
||||
RETV('}', 1);
|
||||
}
|
||||
else
|
||||
} else
|
||||
FAILW(REG_BADBR);
|
||||
break;
|
||||
case CHR('\\'): /* BRE bound ends with \} */
|
||||
if (INCON(L_BBND) && NEXT1('}'))
|
||||
{
|
||||
if (INCON(L_BBND) && NEXT1('}')) {
|
||||
v->now++;
|
||||
INTOCON(L_BRE);
|
||||
RET('}');
|
||||
}
|
||||
else
|
||||
} else
|
||||
FAILW(REG_BADBR);
|
||||
break;
|
||||
default:
|
||||
@@ -401,13 +380,11 @@ next(struct vars * v)
|
||||
assert(NOTREACHED);
|
||||
break;
|
||||
case L_BRACK: /* brackets are not too hard */
|
||||
switch (c)
|
||||
{
|
||||
switch (c) {
|
||||
case CHR(']'):
|
||||
if (LASTTYPE('['))
|
||||
RETV(PLAIN, c);
|
||||
else
|
||||
{
|
||||
else {
|
||||
INTOCON((v->cflags®_EXTENDED) ?
|
||||
L_ERE : L_BRE);
|
||||
RET(']');
|
||||
@@ -421,14 +398,12 @@ next(struct vars * v)
|
||||
if (ATEOS())
|
||||
FAILW(REG_EESCAPE);
|
||||
(DISCARD)lexescape(v);
|
||||
switch (v->nexttype)
|
||||
{ /* not all escapes okay here */
|
||||
switch (v->nexttype) { /* not all escapes okay here */
|
||||
case PLAIN:
|
||||
return 1;
|
||||
break;
|
||||
case CCLASS:
|
||||
switch (v->nextvalue)
|
||||
{
|
||||
switch (v->nextvalue) {
|
||||
case 'd':
|
||||
lexnest(v, brbackd, ENDOF(brbackd));
|
||||
break;
|
||||
@@ -459,8 +434,7 @@ next(struct vars * v)
|
||||
case CHR('['):
|
||||
if (ATEOS())
|
||||
FAILW(REG_EBRACK);
|
||||
switch (*v->now++)
|
||||
{
|
||||
switch (*v->now++) {
|
||||
case CHR('.'):
|
||||
INTOCON(L_CEL);
|
||||
/* might or might not be locale-specific */
|
||||
@@ -490,33 +464,27 @@ next(struct vars * v)
|
||||
assert(NOTREACHED);
|
||||
break;
|
||||
case L_CEL: /* collating elements are easy */
|
||||
if (c == CHR('.') && NEXT1(']'))
|
||||
{
|
||||
if (c == CHR('.') && NEXT1(']')) {
|
||||
v->now++;
|
||||
INTOCON(L_BRACK);
|
||||
RETV(END, '.');
|
||||
}
|
||||
else
|
||||
} else
|
||||
RETV(PLAIN, c);
|
||||
break;
|
||||
case L_ECL: /* ditto equivalence classes */
|
||||
if (c == CHR('=') && NEXT1(']'))
|
||||
{
|
||||
if (c == CHR('=') && NEXT1(']')) {
|
||||
v->now++;
|
||||
INTOCON(L_BRACK);
|
||||
RETV(END, '=');
|
||||
}
|
||||
else
|
||||
} else
|
||||
RETV(PLAIN, c);
|
||||
break;
|
||||
case L_CCL: /* ditto character classes */
|
||||
if (c == CHR(':') && NEXT1(']'))
|
||||
{
|
||||
if (c == CHR(':') && NEXT1(']')) {
|
||||
v->now++;
|
||||
INTOCON(L_BRACK);
|
||||
RETV(END, ':');
|
||||
}
|
||||
else
|
||||
} else
|
||||
RETV(PLAIN, c);
|
||||
break;
|
||||
default:
|
||||
@@ -528,14 +496,12 @@ next(struct vars * v)
|
||||
assert(INCON(L_ERE));
|
||||
|
||||
/* deal with EREs and AREs, except for backslashes */
|
||||
switch (c)
|
||||
{
|
||||
switch (c) {
|
||||
case CHR('|'):
|
||||
RET('|');
|
||||
break;
|
||||
case CHR('*'):
|
||||
if ((v->cflags & REG_ADVF) && NEXT1('?'))
|
||||
{
|
||||
if ((v->cflags®_ADVF) && NEXT1('?')) {
|
||||
v->now++;
|
||||
NOTE(REG_UNONPOSIX);
|
||||
RETV('*', 0);
|
||||
@@ -543,8 +509,7 @@ next(struct vars * v)
|
||||
RETV('*', 1);
|
||||
break;
|
||||
case CHR('+'):
|
||||
if ((v->cflags & REG_ADVF) && NEXT1('?'))
|
||||
{
|
||||
if ((v->cflags®_ADVF) && NEXT1('?')) {
|
||||
v->now++;
|
||||
NOTE(REG_UNONPOSIX);
|
||||
RETV('+', 0);
|
||||
@@ -552,8 +517,7 @@ next(struct vars * v)
|
||||
RETV('+', 1);
|
||||
break;
|
||||
case CHR('?'):
|
||||
if ((v->cflags & REG_ADVF) && NEXT1('?'))
|
||||
{
|
||||
if ((v->cflags®_ADVF) && NEXT1('?')) {
|
||||
v->now++;
|
||||
NOTE(REG_UNONPOSIX);
|
||||
RETV('?', 0);
|
||||
@@ -563,14 +527,11 @@ next(struct vars * v)
|
||||
case CHR('{'): /* bounds start or plain character */
|
||||
if (v->cflags®_EXPANDED)
|
||||
skip(v);
|
||||
if (ATEOS() || !iscdigit(*v->now))
|
||||
{
|
||||
if (ATEOS() || !iscdigit(*v->now)) {
|
||||
NOTE(REG_UBRACES);
|
||||
NOTE(REG_UUNSPEC);
|
||||
RETV(PLAIN, c);
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
NOTE(REG_UBOUNDS);
|
||||
INTOCON(L_EBND);
|
||||
RET('{');
|
||||
@@ -578,12 +539,10 @@ next(struct vars * v)
|
||||
assert(NOTREACHED);
|
||||
break;
|
||||
case CHR('('): /* parenthesis, or advanced extension */
|
||||
if ((v->cflags & REG_ADVF) && NEXT1('?'))
|
||||
{
|
||||
if ((v->cflags®_ADVF) && NEXT1('?')) {
|
||||
NOTE(REG_UNONPOSIX);
|
||||
v->now++;
|
||||
switch (*v->now++)
|
||||
{
|
||||
switch (*v->now++) {
|
||||
case CHR(':'): /* non-capturing paren */
|
||||
RETV('(', 0);
|
||||
break;
|
||||
@@ -615,8 +574,9 @@ next(struct vars * v)
|
||||
RETV('(', 1);
|
||||
break;
|
||||
case CHR(')'):
|
||||
if (LASTTYPE('('))
|
||||
if (LASTTYPE('(')) {
|
||||
NOTE(REG_UUNSPEC);
|
||||
}
|
||||
RETV(')', c);
|
||||
break;
|
||||
case CHR('['): /* easy except for [[:<:]] and [[:>:]] */
|
||||
@@ -626,16 +586,14 @@ next(struct vars * v)
|
||||
*(v->now+2) == CHR('>')) &&
|
||||
*(v->now+3) == CHR(':') &&
|
||||
*(v->now+4) == CHR(']') &&
|
||||
*(v->now + 5) == CHR(']'))
|
||||
{
|
||||
*(v->now+5) == CHR(']')) {
|
||||
c = *(v->now+2);
|
||||
v->now += 6;
|
||||
NOTE(REG_UNONPOSIX);
|
||||
RET((c == CHR('<')) ? '<' : '>');
|
||||
}
|
||||
INTOCON(L_BRACK);
|
||||
if (NEXT1('^'))
|
||||
{
|
||||
if (NEXT1('^')) {
|
||||
v->now++;
|
||||
RETV('[', 0);
|
||||
}
|
||||
@@ -661,10 +619,8 @@ next(struct vars * v)
|
||||
|
||||
/* ERE/ARE backslash handling; backslash already eaten */
|
||||
assert(!ATEOS());
|
||||
if (!(v->cflags & REG_ADVF))
|
||||
{ /* only AREs have non-trivial escapes */
|
||||
if (iscalnum(*v->now))
|
||||
{
|
||||
if (!(v->cflags®_ADVF)) { /* only AREs have non-trivial escapes */
|
||||
if (iscalnum(*v->now)) {
|
||||
NOTE(REG_UBSALNUM);
|
||||
NOTE(REG_UUNSPEC);
|
||||
}
|
||||
@@ -673,28 +629,14 @@ next(struct vars * v)
|
||||
(DISCARD)lexescape(v);
|
||||
if (ISERR())
|
||||
FAILW(REG_EESCAPE);
|
||||
if (v->nexttype == CCLASS)
|
||||
{ /* fudge at lexical level */
|
||||
switch (v->nextvalue)
|
||||
{
|
||||
case 'd':
|
||||
lexnest(v, backd, ENDOF(backd));
|
||||
break;
|
||||
case 'D':
|
||||
lexnest(v, backD, ENDOF(backD));
|
||||
break;
|
||||
case 's':
|
||||
lexnest(v, backs, ENDOF(backs));
|
||||
break;
|
||||
case 'S':
|
||||
lexnest(v, backS, ENDOF(backS));
|
||||
break;
|
||||
case 'w':
|
||||
lexnest(v, backw, ENDOF(backw));
|
||||
break;
|
||||
case 'W':
|
||||
lexnest(v, backW, ENDOF(backW));
|
||||
break;
|
||||
if (v->nexttype == CCLASS) { /* fudge at lexical level */
|
||||
switch (v->nextvalue) {
|
||||
case 'd': lexnest(v, backd, ENDOF(backd)); break;
|
||||
case 'D': lexnest(v, backD, ENDOF(backD)); break;
|
||||
case 's': lexnest(v, backs, ENDOF(backs)); break;
|
||||
case 'S': lexnest(v, backS, ENDOF(backS)); break;
|
||||
case 'w': lexnest(v, backw, ENDOF(backw)); break;
|
||||
case 'W': lexnest(v, backW, ENDOF(backW)); break;
|
||||
default:
|
||||
assert(NOTREACHED);
|
||||
FAILW(REG_ASSERT);
|
||||
@@ -709,12 +651,13 @@ next(struct vars * v)
|
||||
}
|
||||
|
||||
/*
|
||||
* lexescape - parse an ARE backslash escape (backslash already eaten)
|
||||
- lexescape - parse an ARE backslash escape (backslash already eaten)
|
||||
* Note slightly nonstandard use of the CCLASS type code.
|
||||
^ static int lexescape(struct vars *);
|
||||
*/
|
||||
static int /* not actually used, but convenient for
|
||||
* RETV */
|
||||
lexescape(struct vars * v)
|
||||
static int /* not actually used, but convenient for RETV */
|
||||
lexescape(v)
|
||||
struct vars *v;
|
||||
{
|
||||
chr c;
|
||||
static chr alert[] = {
|
||||
@@ -733,8 +676,7 @@ lexescape(struct vars * v)
|
||||
RETV(PLAIN, c);
|
||||
|
||||
NOTE(REG_UNONPOSIX);
|
||||
switch (c)
|
||||
{
|
||||
switch (c) {
|
||||
case CHR('a'):
|
||||
RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007')));
|
||||
break;
|
||||
@@ -816,8 +758,7 @@ lexescape(struct vars * v)
|
||||
break;
|
||||
case CHR('x'):
|
||||
NOTE(REG_UUNPORT);
|
||||
c = lexdigits(v, 16, 1, 255); /* REs >255 long outside
|
||||
* spec */
|
||||
c = lexdigits(v, 16, 1, 255); /* REs >255 long outside spec */
|
||||
if (ISERR())
|
||||
FAILW(REG_EESCAPE);
|
||||
RETV(PLAIN, c);
|
||||
@@ -833,24 +774,16 @@ lexescape(struct vars * v)
|
||||
case CHR('Z'):
|
||||
RETV(SEND, 0);
|
||||
break;
|
||||
case CHR('1'):
|
||||
case CHR('2'):
|
||||
case CHR('3'):
|
||||
case CHR('4'):
|
||||
case CHR('5'):
|
||||
case CHR('6'):
|
||||
case CHR('7'):
|
||||
case CHR('8'):
|
||||
case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'):
|
||||
case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'):
|
||||
case CHR('9'):
|
||||
save = v->now;
|
||||
v->now--; /* put first digit back */
|
||||
c = lexdigits(v, 10, 1, 255); /* REs >255 long outside
|
||||
* spec */
|
||||
c = lexdigits(v, 10, 1, 255); /* REs >255 long outside spec */
|
||||
if (ISERR())
|
||||
FAILW(REG_EESCAPE);
|
||||
/* ugly heuristic (first test is "exactly 1 digit?") */
|
||||
if (v->now - save == 0 || (int) c <= v->nsubexp)
|
||||
{
|
||||
if (v->now - save == 0 || (int)c <= v->nsubexp) {
|
||||
NOTE(REG_UBACKREF);
|
||||
RETV(BACKREF, (chr)c);
|
||||
}
|
||||
@@ -874,70 +807,44 @@ lexescape(struct vars * v)
|
||||
}
|
||||
|
||||
/*
|
||||
* lexdigits - slurp up digits and return chr value
|
||||
- lexdigits - slurp up digits and return chr value
|
||||
^ static chr lexdigits(struct vars *, int, int, int);
|
||||
*/
|
||||
static chr /* chr value; errors signalled via ERR */
|
||||
lexdigits(struct vars * v,
|
||||
int base,
|
||||
int minlen,
|
||||
int maxlen)
|
||||
lexdigits(v, base, minlen, maxlen)
|
||||
struct vars *v;
|
||||
int base;
|
||||
int minlen;
|
||||
int maxlen;
|
||||
{
|
||||
uchr n; /* unsigned to avoid overflow misbehavior */
|
||||
int len;
|
||||
chr c;
|
||||
int d;
|
||||
const uchr ub = (uchr) base;
|
||||
CONST uchr ub = (uchr) base;
|
||||
|
||||
n = 0;
|
||||
for (len = 0; len < maxlen && !ATEOS(); len++)
|
||||
{
|
||||
for (len = 0; len < maxlen && !ATEOS(); len++) {
|
||||
c = *v->now++;
|
||||
switch (c)
|
||||
{
|
||||
case CHR('0'):
|
||||
case CHR('1'):
|
||||
case CHR('2'):
|
||||
case CHR('3'):
|
||||
case CHR('4'):
|
||||
case CHR('5'):
|
||||
case CHR('6'):
|
||||
case CHR('7'):
|
||||
case CHR('8'):
|
||||
case CHR('9'):
|
||||
switch (c) {
|
||||
case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'):
|
||||
case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'):
|
||||
case CHR('8'): case CHR('9'):
|
||||
d = DIGITVAL(c);
|
||||
break;
|
||||
case CHR('a'):
|
||||
case CHR('A'):
|
||||
d = 10;
|
||||
break;
|
||||
case CHR('b'):
|
||||
case CHR('B'):
|
||||
d = 11;
|
||||
break;
|
||||
case CHR('c'):
|
||||
case CHR('C'):
|
||||
d = 12;
|
||||
break;
|
||||
case CHR('d'):
|
||||
case CHR('D'):
|
||||
d = 13;
|
||||
break;
|
||||
case CHR('e'):
|
||||
case CHR('E'):
|
||||
d = 14;
|
||||
break;
|
||||
case CHR('f'):
|
||||
case CHR('F'):
|
||||
d = 15;
|
||||
break;
|
||||
case CHR('a'): case CHR('A'): d = 10; break;
|
||||
case CHR('b'): case CHR('B'): d = 11; break;
|
||||
case CHR('c'): case CHR('C'): d = 12; break;
|
||||
case CHR('d'): case CHR('D'): d = 13; break;
|
||||
case CHR('e'): case CHR('E'): d = 14; break;
|
||||
case CHR('f'): case CHR('F'): d = 15; break;
|
||||
default:
|
||||
v->now--; /* oops, not a digit at all */
|
||||
d = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (d >= base)
|
||||
{ /* not a plausible digit */
|
||||
if (d >= base) { /* not a plausible digit */
|
||||
v->now--;
|
||||
d = -1;
|
||||
}
|
||||
@@ -952,19 +859,19 @@ lexdigits(struct vars * v,
|
||||
}
|
||||
|
||||
/*
|
||||
* brenext - get next BRE token
|
||||
*
|
||||
- brenext - get next BRE token
|
||||
* This is much like EREs except for all the stupid backslashes and the
|
||||
* context-dependency of some things.
|
||||
^ static int brenext(struct vars *, pchr);
|
||||
*/
|
||||
static int /* 1 normal, 0 failure */
|
||||
brenext(struct vars * v,
|
||||
chr pc)
|
||||
brenext(v, pc)
|
||||
struct vars *v;
|
||||
pchr pc;
|
||||
{
|
||||
chr c = (chr)pc;
|
||||
|
||||
switch (c)
|
||||
{
|
||||
switch (c) {
|
||||
case CHR('*'):
|
||||
if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^'))
|
||||
RETV(PLAIN, c);
|
||||
@@ -977,16 +884,14 @@ brenext(struct vars * v,
|
||||
*(v->now+2) == CHR('>')) &&
|
||||
*(v->now+3) == CHR(':') &&
|
||||
*(v->now+4) == CHR(']') &&
|
||||
*(v->now + 5) == CHR(']'))
|
||||
{
|
||||
*(v->now+5) == CHR(']')) {
|
||||
c = *(v->now+2);
|
||||
v->now += 6;
|
||||
NOTE(REG_UNONPOSIX);
|
||||
RET((c == CHR('<')) ? '<' : '>');
|
||||
}
|
||||
INTOCON(L_BRACK);
|
||||
if (NEXT1('^'))
|
||||
{
|
||||
if (NEXT1('^')) {
|
||||
v->now++;
|
||||
RETV('[', 0);
|
||||
}
|
||||
@@ -998,8 +903,7 @@ brenext(struct vars * v,
|
||||
case CHR('^'):
|
||||
if (LASTTYPE(EMPTY))
|
||||
RET('^');
|
||||
if (LASTTYPE('('))
|
||||
{
|
||||
if (LASTTYPE('(')) {
|
||||
NOTE(REG_UUNSPEC);
|
||||
RET('^');
|
||||
}
|
||||
@@ -1010,8 +914,7 @@ brenext(struct vars * v,
|
||||
skip(v);
|
||||
if (ATEOS())
|
||||
RET('$');
|
||||
if (NEXT2('\\', ')'))
|
||||
{
|
||||
if (NEXT2('\\', ')')) {
|
||||
NOTE(REG_UUNSPEC);
|
||||
RET('$');
|
||||
}
|
||||
@@ -1030,8 +933,7 @@ brenext(struct vars * v,
|
||||
FAILW(REG_EESCAPE);
|
||||
|
||||
c = *v->now++;
|
||||
switch (c)
|
||||
{
|
||||
switch (c) {
|
||||
case CHR('{'):
|
||||
INTOCON(L_BBND);
|
||||
NOTE(REG_UBOUNDS);
|
||||
@@ -1051,21 +953,14 @@ brenext(struct vars * v,
|
||||
NOTE(REG_UNONPOSIX);
|
||||
RET('>');
|
||||
break;
|
||||
case CHR('1'):
|
||||
case CHR('2'):
|
||||
case CHR('3'):
|
||||
case CHR('4'):
|
||||
case CHR('5'):
|
||||
case CHR('6'):
|
||||
case CHR('7'):
|
||||
case CHR('8'):
|
||||
case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'):
|
||||
case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'):
|
||||
case CHR('9'):
|
||||
NOTE(REG_UBACKREF);
|
||||
RETV(BACKREF, (chr)DIGITVAL(c));
|
||||
break;
|
||||
default:
|
||||
if (iscalnum(c))
|
||||
{
|
||||
if (iscalnum(c)) {
|
||||
NOTE(REG_UBSALNUM);
|
||||
NOTE(REG_UUNSPEC);
|
||||
}
|
||||
@@ -1077,17 +972,18 @@ brenext(struct vars * v,
|
||||
}
|
||||
|
||||
/*
|
||||
* skip - skip white space and comments in expanded form
|
||||
- skip - skip white space and comments in expanded form
|
||||
^ static VOID skip(struct vars *);
|
||||
*/
|
||||
static void
|
||||
skip(struct vars * v)
|
||||
static VOID
|
||||
skip(v)
|
||||
struct vars *v;
|
||||
{
|
||||
chr *start = v->now;
|
||||
|
||||
assert(v->cflags®_EXPANDED);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
for (;;) {
|
||||
while (!ATEOS() && iscspace(*v->now))
|
||||
v->now++;
|
||||
if (ATEOS() || *v->now != CHR('#'))
|
||||
@@ -1103,27 +999,46 @@ skip(struct vars * v)
|
||||
}
|
||||
|
||||
/*
|
||||
* newline - return the chr for a newline
|
||||
*
|
||||
- newline - return the chr for a newline
|
||||
* This helps confine use of CHR to this source file.
|
||||
^ static chr newline(NOPARMS);
|
||||
*/
|
||||
static chr
|
||||
newline(void)
|
||||
newline()
|
||||
{
|
||||
return CHR('\n');
|
||||
}
|
||||
|
||||
/*
|
||||
* chrnamed - return the chr known by a given (chr string) name
|
||||
*
|
||||
- ch - return the chr sequence for regc_locale.c's fake collating element ch
|
||||
* This helps confine use of CHR to this source file. Beware that the caller
|
||||
* knows how long the sequence is.
|
||||
^ #ifdef REG_DEBUG
|
||||
^ static chr *ch(NOPARMS);
|
||||
^ #endif
|
||||
*/
|
||||
#ifdef REG_DEBUG
|
||||
static chr *
|
||||
ch()
|
||||
{
|
||||
static chr chstr[] = { CHR('c'), CHR('h'), CHR('\0') };
|
||||
|
||||
return chstr;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
- chrnamed - return the chr known by a given (chr string) name
|
||||
* The code is a bit clumsy, but this routine gets only such specialized
|
||||
* use that it hardly matters.
|
||||
^ static chr chrnamed(struct vars *, chr *, chr *, pchr);
|
||||
*/
|
||||
static chr
|
||||
chrnamed(struct vars * v,
|
||||
chr *startp, /* start of name */
|
||||
chr *endp, /* just past end of name */
|
||||
chr lastresort) /* what to return if name lookup fails */
|
||||
chrnamed(v, startp, endp, lastresort)
|
||||
struct vars *v;
|
||||
chr *startp; /* start of name */
|
||||
chr *endp; /* just past end of name */
|
||||
pchr lastresort; /* what to return if name lookup fails */
|
||||
{
|
||||
celt c;
|
||||
int errsave;
|
||||
|
@@ -1,75 +1,18 @@
|
||||
/*
|
||||
* $Id$
|
||||
*/
|
||||
|
||||
{
|
||||
REG_OKAY, "REG_OKAY", "no errors detected"
|
||||
},
|
||||
|
||||
{
|
||||
REG_NOMATCH, "REG_NOMATCH", "failed to match"
|
||||
},
|
||||
|
||||
{
|
||||
REG_BADPAT, "REG_BADPAT", "invalid regexp (reg version 0.8)"
|
||||
},
|
||||
|
||||
{
|
||||
REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element"
|
||||
},
|
||||
|
||||
{
|
||||
REG_ECTYPE, "REG_ECTYPE", "invalid character class"
|
||||
},
|
||||
|
||||
{
|
||||
REG_EESCAPE, "REG_EESCAPE", "invalid escape \\ sequence"
|
||||
},
|
||||
|
||||
{
|
||||
REG_ESUBREG, "REG_ESUBREG", "invalid backreference number"
|
||||
},
|
||||
|
||||
{
|
||||
REG_EBRACK, "REG_EBRACK", "brackets [] not balanced"
|
||||
},
|
||||
|
||||
{
|
||||
REG_EPAREN, "REG_EPAREN", "parentheses () not balanced"
|
||||
},
|
||||
|
||||
{
|
||||
REG_EBRACE, "REG_EBRACE", "braces {} not balanced"
|
||||
},
|
||||
|
||||
{
|
||||
REG_BADBR, "REG_BADBR", "invalid repetition count(s)"
|
||||
},
|
||||
|
||||
{
|
||||
REG_ERANGE, "REG_ERANGE", "invalid character range"
|
||||
},
|
||||
|
||||
{
|
||||
REG_ESPACE, "REG_ESPACE", "out of memory"
|
||||
},
|
||||
|
||||
{
|
||||
REG_BADRPT, "REG_BADRPT", "quantifier operand invalid"
|
||||
},
|
||||
|
||||
{
|
||||
REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug"
|
||||
},
|
||||
|
||||
{
|
||||
REG_INVARG, "REG_INVARG", "invalid argument to regex function"
|
||||
},
|
||||
|
||||
{
|
||||
REG_MIXED, "REG_MIXED", "character widths of regex and string differ"
|
||||
},
|
||||
|
||||
{
|
||||
REG_BADOPT, "REG_BADOPT", "invalid embedded option"
|
||||
},
|
||||
{ REG_OKAY, "REG_OKAY", "no errors detected" },
|
||||
{ REG_NOMATCH, "REG_NOMATCH", "failed to match" },
|
||||
{ REG_BADPAT, "REG_BADPAT", "invalid regexp (reg version 0.8)" },
|
||||
{ REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" },
|
||||
{ REG_ECTYPE, "REG_ECTYPE", "invalid character class" },
|
||||
{ REG_EESCAPE, "REG_EESCAPE", "invalid escape \\ sequence" },
|
||||
{ REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" },
|
||||
{ REG_EBRACK, "REG_EBRACK", "brackets [] not balanced" },
|
||||
{ REG_EPAREN, "REG_EPAREN", "parentheses () not balanced" },
|
||||
{ REG_EBRACE, "REG_EBRACE", "braces {} not balanced" },
|
||||
{ REG_BADBR, "REG_BADBR", "invalid repetition count(s)" },
|
||||
{ REG_ERANGE, "REG_ERANGE", "invalid character range" },
|
||||
{ REG_ESPACE, "REG_ESPACE", "out of memory" },
|
||||
{ REG_BADRPT, "REG_BADRPT", "quantifier operand invalid" },
|
||||
{ REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" },
|
||||
{ REG_INVARG, "REG_INVARG", "invalid argument to regex function" },
|
||||
{ REG_MIXED, "REG_MIXED", "character widths of regex and string differ" },
|
||||
{ REG_BADOPT, "REG_BADOPT", "invalid embedded option" },
|
||||
|
@@ -1,74 +1,341 @@
|
||||
#ifndef _REGEX_H_
|
||||
#define _REGEX_H_ /* never again */
|
||||
/* ========= begin header generated by ./mkh ========= */
|
||||
/*
|
||||
* regular expressions
|
||||
*
|
||||
* Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
|
||||
*
|
||||
* Development of this software was funded, in part, by Cray Research Inc.,
|
||||
* UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
|
||||
* Corporation, none of whom are responsible for the results. The author
|
||||
* thanks all of them.
|
||||
*
|
||||
* Redistribution and use in source and binary forms -- with or without
|
||||
* modification -- are permitted for any purpose, provided that
|
||||
* redistributions in source form retain this entire copyright notice and
|
||||
* indicate the origin and nature of any modifications.
|
||||
*
|
||||
* I'd appreciate being given credit for this package in the documentation
|
||||
* of software which uses it, but that is not a requirement.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
||||
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
* HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*
|
||||
*
|
||||
* Prototypes etc. marked with "^" within comments get gathered up (and
|
||||
* possibly edited) by the regfwd program and inserted near the bottom of
|
||||
* this file.
|
||||
*
|
||||
* We offer the option of declaring one wide-character version of the
|
||||
* RE functions as well as the char versions. To do that, define
|
||||
* __REG_WIDE_T to the type of wide characters (unfortunately, there
|
||||
* is no consensus that wchar_t is suitable) and __REG_WIDE_COMPILE and
|
||||
* __REG_WIDE_EXEC to the names to be used for the compile and execute
|
||||
* functions (suggestion: re_Xcomp and re_Xexec, where X is a letter
|
||||
* suggestive of the wide type, e.g. re_ucomp and re_uexec for Unicode).
|
||||
* For cranky old compilers, it may be necessary to do something like:
|
||||
* #define __REG_WIDE_COMPILE(a,b,c,d) re_Xcomp(a,b,c,d)
|
||||
* #define __REG_WIDE_EXEC(a,b,c,d,e,f,g) re_Xexec(a,b,c,d,e,f,g)
|
||||
* rather than just #defining the names as parameterless macros.
|
||||
*
|
||||
* For some specialized purposes, it may be desirable to suppress the
|
||||
* declarations of the "front end" functions, regcomp() and regexec(),
|
||||
* or of the char versions of the compile and execute functions. To
|
||||
* suppress the front-end functions, define __REG_NOFRONT. To suppress
|
||||
* the char versions, define __REG_NOCHAR.
|
||||
*
|
||||
* The right place to do those defines (and some others you may want, see
|
||||
* below) would be <sys/types.h>. If you don't have control of that file,
|
||||
* the right place to add your own defines to this file is marked below.
|
||||
* This is normally done automatically, by the makefile and regmkhdr, based
|
||||
* on the contents of regcustom.h.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* voodoo for C++
|
||||
*/
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* === regex2.h === */
|
||||
typedef off_t regoff_t;
|
||||
|
||||
|
||||
/*
|
||||
* Add your own defines, if needed, here.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Location where a chunk of regcustom.h is automatically spliced into
|
||||
* this file (working from its prototype, regproto.h).
|
||||
*/
|
||||
/* --- begin --- */
|
||||
/* ensure certain things don't sneak in from system headers */
|
||||
#ifdef __REG_WIDE_T
|
||||
#undef __REG_WIDE_T
|
||||
#endif
|
||||
#ifdef __REG_WIDE_COMPILE
|
||||
#undef __REG_WIDE_COMPILE
|
||||
#endif
|
||||
#ifdef __REG_WIDE_EXEC
|
||||
#undef __REG_WIDE_EXEC
|
||||
#endif
|
||||
#ifdef __REG_REGOFF_T
|
||||
#undef __REG_REGOFF_T
|
||||
#endif
|
||||
#ifdef __REG_VOID_T
|
||||
#undef __REG_VOID_T
|
||||
#endif
|
||||
#ifdef __REG_CONST
|
||||
#undef __REG_CONST
|
||||
#endif
|
||||
#ifdef __REG_NOFRONT
|
||||
#undef __REG_NOFRONT
|
||||
#endif
|
||||
#ifdef __REG_NOCHAR
|
||||
#undef __REG_NOCHAR
|
||||
#endif
|
||||
/* interface types */
|
||||
#define __REG_WIDE_T Tcl_UniChar
|
||||
#define __REG_REGOFF_T long /* not really right, but good enough... */
|
||||
#define __REG_VOID_T VOID
|
||||
#define __REG_CONST CONST
|
||||
/* names and declarations */
|
||||
#define __REG_WIDE_COMPILE TclReComp
|
||||
#define __REG_WIDE_EXEC TclReExec
|
||||
#define __REG_NOFRONT /* don't want regcomp() and regexec() */
|
||||
#define __REG_NOCHAR /* or the char versions */
|
||||
#define regfree TclReFree
|
||||
#define regerror TclReError
|
||||
/* --- end --- */
|
||||
|
||||
|
||||
/*
|
||||
* interface types etc.
|
||||
*/
|
||||
|
||||
/*
|
||||
* regoff_t has to be large enough to hold either off_t or ssize_t,
|
||||
* and must be signed; it's only a guess that long is suitable, so we
|
||||
* offer <sys/types.h> an override.
|
||||
*/
|
||||
#ifdef __REG_REGOFF_T
|
||||
typedef __REG_REGOFF_T regoff_t;
|
||||
#else
|
||||
typedef long regoff_t;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* For benefit of old compilers, we offer <sys/types.h> the option of
|
||||
* overriding the `void' type used to declare nonexistent return types.
|
||||
*/
|
||||
#ifdef __REG_VOID_T
|
||||
typedef __REG_VOID_T re_void;
|
||||
#else
|
||||
typedef void re_void;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Also for benefit of old compilers, <sys/types.h> can supply a macro
|
||||
* which expands to a substitute for `const'.
|
||||
*/
|
||||
#ifndef __REG_CONST
|
||||
#define __REG_CONST const
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* other interface types
|
||||
*/
|
||||
|
||||
/* the biggie, a compiled RE (or rather, a front end to same) */
|
||||
typedef struct {
|
||||
int re_magic;
|
||||
size_t re_nsub; /* number of parenthesized subexpressions */
|
||||
const char *re_endp; /* end pointer for REG_PEND */
|
||||
struct re_guts *re_g; /* none of your business :-) */
|
||||
int re_magic; /* magic number */
|
||||
size_t re_nsub; /* number of subexpressions */
|
||||
long re_info; /* information about RE */
|
||||
# define REG_UBACKREF 000001
|
||||
# define REG_ULOOKAHEAD 000002
|
||||
# define REG_UBOUNDS 000004
|
||||
# define REG_UBRACES 000010
|
||||
# define REG_UBSALNUM 000020
|
||||
# define REG_UPBOTCH 000040
|
||||
# define REG_UBBS 000100
|
||||
# define REG_UNONPOSIX 000200
|
||||
# define REG_UUNSPEC 000400
|
||||
# define REG_UUNPORT 001000
|
||||
# define REG_ULOCALE 002000
|
||||
# define REG_UEMPTYMATCH 004000
|
||||
# define REG_UIMPOSSIBLE 010000
|
||||
# define REG_USHORTEST 020000
|
||||
int re_csize; /* sizeof(character) */
|
||||
char *re_endp; /* backward compatibility kludge */
|
||||
/* the rest is opaque pointers to hidden innards */
|
||||
char *re_guts; /* `char *' is more portable than `void *' */
|
||||
char *re_fns;
|
||||
} regex_t;
|
||||
|
||||
/* result reporting (may acquire more fields later) */
|
||||
typedef struct {
|
||||
regoff_t rm_so; /* start of match */
|
||||
regoff_t rm_eo; /* end of match */
|
||||
regoff_t rm_so; /* start of substring */
|
||||
regoff_t rm_eo; /* end of substring */
|
||||
} regmatch_t;
|
||||
|
||||
|
||||
/* === regcomp.c === */
|
||||
extern int regcomp(regex_t *, const char *, int);
|
||||
#define REG_BASIC 0000
|
||||
#define REG_EXTENDED 0001
|
||||
#define REG_ICASE 0002
|
||||
#define REG_NOSUB 0004
|
||||
#define REG_NEWLINE 0010
|
||||
#define REG_NOSPEC 0020
|
||||
#define REG_PEND 0040
|
||||
#define REG_DUMP 0200
|
||||
/* supplementary control and reporting */
|
||||
typedef struct {
|
||||
regmatch_t rm_extend; /* see REG_EXPECT */
|
||||
} rm_detail_t;
|
||||
|
||||
|
||||
/* === regerror.c === */
|
||||
#define REG_OKAY 0
|
||||
#define REG_NOMATCH 1
|
||||
#define REG_BADPAT 2
|
||||
#define REG_ECOLLATE 3
|
||||
#define REG_ECTYPE 4
|
||||
#define REG_EESCAPE 5
|
||||
#define REG_ESUBREG 6
|
||||
#define REG_EBRACK 7
|
||||
#define REG_EPAREN 8
|
||||
#define REG_EBRACE 9
|
||||
#define REG_BADBR 10
|
||||
#define REG_ERANGE 11
|
||||
#define REG_ESPACE 12
|
||||
#define REG_BADRPT 13
|
||||
#define REG_EMPTY 14
|
||||
#define REG_ASSERT 15
|
||||
#define REG_INVARG 16
|
||||
#define REG_ATOI 255 /* convert name to number (!) */
|
||||
#define REG_ITOA 0400 /* convert number to name (!) */
|
||||
extern size_t regerror(int, const regex_t *, char *, size_t);
|
||||
|
||||
/*
|
||||
* compilation
|
||||
^ #ifndef __REG_NOCHAR
|
||||
^ int re_comp(regex_t *, __REG_CONST char *, size_t, int);
|
||||
^ #endif
|
||||
^ #ifndef __REG_NOFRONT
|
||||
^ int regcomp(regex_t *, __REG_CONST char *, int);
|
||||
^ #endif
|
||||
^ #ifdef __REG_WIDE_T
|
||||
^ int __REG_WIDE_COMPILE(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int);
|
||||
^ #endif
|
||||
*/
|
||||
#define REG_BASIC 000000 /* BREs (convenience) */
|
||||
#define REG_EXTENDED 000001 /* EREs */
|
||||
#define REG_ADVF 000002 /* advanced features in EREs */
|
||||
#define REG_ADVANCED 000003 /* AREs (which are also EREs) */
|
||||
#define REG_QUOTE 000004 /* no special characters, none */
|
||||
#define REG_NOSPEC REG_QUOTE /* historical synonym */
|
||||
#define REG_ICASE 000010 /* ignore case */
|
||||
#define REG_NOSUB 000020 /* don't care about subexpressions */
|
||||
#define REG_EXPANDED 000040 /* expanded format, white space & comments */
|
||||
#define REG_NLSTOP 000100 /* \n doesn't match . or [^ ] */
|
||||
#define REG_NLANCH 000200 /* ^ matches after \n, $ before */
|
||||
#define REG_NEWLINE 000300 /* newlines are line terminators */
|
||||
#define REG_PEND 000400 /* ugh -- backward-compatibility hack */
|
||||
#define REG_EXPECT 001000 /* report details on partial/limited matches */
|
||||
#define REG_BOSONLY 002000 /* temporary kludge for BOS-only matches */
|
||||
#define REG_DUMP 004000 /* none of your business :-) */
|
||||
#define REG_FAKE 010000 /* none of your business :-) */
|
||||
#define REG_PROGRESS 020000 /* none of your business :-) */
|
||||
|
||||
|
||||
/* === regexec.c === */
|
||||
extern int regexec(const regex_t *, const char *, size_t, regmatch_t [], int);
|
||||
#define REG_NOTBOL 00001
|
||||
#define REG_NOTEOL 00002
|
||||
#define REG_STARTEND 00004
|
||||
#define REG_TRACE 00400 /* tracing of execution */
|
||||
#define REG_LARGE 01000 /* force large representation */
|
||||
#define REG_BACKR 02000 /* force use of backref code */
|
||||
|
||||
/*
|
||||
* execution
|
||||
^ #ifndef __REG_NOCHAR
|
||||
^ int re_exec(regex_t *, __REG_CONST char *, size_t,
|
||||
^ rm_detail_t *, size_t, regmatch_t [], int);
|
||||
^ #endif
|
||||
^ #ifndef __REG_NOFRONT
|
||||
^ int regexec(regex_t *, __REG_CONST char *, size_t, regmatch_t [], int);
|
||||
^ #endif
|
||||
^ #ifdef __REG_WIDE_T
|
||||
^ int __REG_WIDE_EXEC(regex_t *, __REG_CONST __REG_WIDE_T *, size_t,
|
||||
^ rm_detail_t *, size_t, regmatch_t [], int);
|
||||
^ #endif
|
||||
*/
|
||||
#define REG_NOTBOL 0001 /* BOS is not BOL */
|
||||
#define REG_NOTEOL 0002 /* EOS is not EOL */
|
||||
#define REG_STARTEND 0004 /* backward compatibility kludge */
|
||||
#define REG_FTRACE 0010 /* none of your business */
|
||||
#define REG_MTRACE 0020 /* none of your business */
|
||||
#define REG_SMALL 0040 /* none of your business */
|
||||
|
||||
|
||||
/* === regfree.c === */
|
||||
extern void regfree(regex_t *);
|
||||
|
||||
/*
|
||||
* misc generics (may be more functions here eventually)
|
||||
^ re_void regfree(regex_t *);
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* error reporting
|
||||
* Be careful if modifying the list of error codes -- the table used by
|
||||
* regerror() is generated automatically from this file!
|
||||
*
|
||||
* Note that there is no wide-char variant of regerror at this time; what
|
||||
* kind of character is used for error reports is independent of what kind
|
||||
* is used in matching.
|
||||
*
|
||||
^ extern size_t regerror(int, __REG_CONST regex_t *, char *, size_t);
|
||||
*/
|
||||
#define REG_OKAY 0 /* no errors detected */
|
||||
#define REG_NOMATCH 1 /* failed to match */
|
||||
#define REG_BADPAT 2 /* invalid regexp */
|
||||
#define REG_ECOLLATE 3 /* invalid collating element */
|
||||
#define REG_ECTYPE 4 /* invalid character class */
|
||||
#define REG_EESCAPE 5 /* invalid escape \ sequence */
|
||||
#define REG_ESUBREG 6 /* invalid backreference number */
|
||||
#define REG_EBRACK 7 /* brackets [] not balanced */
|
||||
#define REG_EPAREN 8 /* parentheses () not balanced */
|
||||
#define REG_EBRACE 9 /* braces {} not balanced */
|
||||
#define REG_BADBR 10 /* invalid repetition count(s) */
|
||||
#define REG_ERANGE 11 /* invalid character range */
|
||||
#define REG_ESPACE 12 /* out of memory */
|
||||
#define REG_BADRPT 13 /* quantifier operand invalid */
|
||||
#define REG_ASSERT 15 /* "can't happen" -- you found a bug */
|
||||
#define REG_INVARG 16 /* invalid argument to regex function */
|
||||
#define REG_MIXED 17 /* character widths of regex and string differ */
|
||||
#define REG_BADOPT 18 /* invalid embedded option */
|
||||
/* two specials for debugging and testing */
|
||||
#define REG_ATOI 101 /* convert error-code name to number */
|
||||
#define REG_ITOA 102 /* convert error-code number to name */
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* the prototypes, as possibly munched by regfwd
|
||||
*/
|
||||
/* =====^!^===== begin forwards =====^!^===== */
|
||||
/* automatically gathered by fwd; do not hand-edit */
|
||||
/* === regproto.h === */
|
||||
#ifndef __REG_NOCHAR
|
||||
int re_comp _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, int));
|
||||
#endif
|
||||
#ifndef __REG_NOFRONT
|
||||
int regcomp _ANSI_ARGS_((regex_t *, __REG_CONST char *, int));
|
||||
#endif
|
||||
#ifdef __REG_WIDE_T
|
||||
int __REG_WIDE_COMPILE _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int));
|
||||
#endif
|
||||
#ifndef __REG_NOCHAR
|
||||
int re_exec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, rm_detail_t *, size_t, regmatch_t [], int));
|
||||
#endif
|
||||
#ifndef __REG_NOFRONT
|
||||
int regexec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, regmatch_t [], int));
|
||||
#endif
|
||||
#ifdef __REG_WIDE_T
|
||||
int __REG_WIDE_EXEC _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, rm_detail_t *, size_t, regmatch_t [], int));
|
||||
#endif
|
||||
re_void regfree _ANSI_ARGS_((regex_t *));
|
||||
extern size_t regerror _ANSI_ARGS_((int, __REG_CONST regex_t *, char *, size_t));
|
||||
/* automatically gathered by fwd; do not hand-edit */
|
||||
/* =====^!^===== end forwards =====^!^===== */
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* more C++ voodoo
|
||||
*/
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
/* ========= end header generated by ./mkh ========= */
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
@@ -26,8 +26,6 @@
|
||||
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* $Id$
|
||||
*/
|
||||
|
||||
|
||||
@@ -45,27 +43,52 @@
|
||||
* Things that regcustom.h might override.
|
||||
*/
|
||||
|
||||
/* standard header files (NULL is a reasonable indicator for them) */
|
||||
#ifndef NULL
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <ctype.h>
|
||||
#include <limits.h>
|
||||
#include <string.h>
|
||||
#endif
|
||||
|
||||
/* assertions */
|
||||
#ifndef assert
|
||||
# ifndef REG_DEBUG
|
||||
# ifndef NDEBUG
|
||||
# define NDEBUG /* no assertions */
|
||||
# endif
|
||||
#endif
|
||||
#include <assert.h>
|
||||
#endif
|
||||
|
||||
/* voids */
|
||||
#ifndef VOID
|
||||
#define VOID void /* for function return values */
|
||||
#endif
|
||||
#ifndef DISCARD
|
||||
#define DISCARD void /* for throwing values away */
|
||||
#define DISCARD VOID /* for throwing values away */
|
||||
#endif
|
||||
#ifndef PVOID
|
||||
#define PVOID VOID * /* generic pointer */
|
||||
#endif
|
||||
#ifndef VS
|
||||
#define VS(x) ((void *)(x)) /* cast something to generic ptr */
|
||||
#define VS(x) ((PVOID)(x)) /* cast something to generic ptr */
|
||||
#endif
|
||||
#ifndef NOPARMS
|
||||
#define NOPARMS VOID /* for empty parm lists */
|
||||
#endif
|
||||
|
||||
/* const */
|
||||
#ifndef CONST
|
||||
#define CONST const /* for old compilers, might be empty */
|
||||
#endif
|
||||
|
||||
/* function-pointer declarator */
|
||||
#ifndef FUNCPTR
|
||||
#if __STDC__ >= 1
|
||||
#define FUNCPTR(name, args) (*name)args
|
||||
#else
|
||||
#define FUNCPTR(name, args) (*name)()
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* memory allocation */
|
||||
@@ -136,8 +159,7 @@
|
||||
#ifndef BYTBITS
|
||||
#define BYTBITS 8 /* bits in a byt */
|
||||
#endif
|
||||
#define BYTTAB (1<<BYTBITS) /* size of table with one entry per byt
|
||||
* value */
|
||||
#define BYTTAB (1<<BYTBITS) /* size of table with one entry per byt value */
|
||||
#define BYTMASK (BYTTAB-1) /* bit mask for byt */
|
||||
#define NBYTS ((CHRBITS+BYTBITS-1)/BYTBITS)
|
||||
/* the definition of GETCOLOR(), below, assumes NBYTS <= 4 */
|
||||
@@ -150,7 +172,6 @@
|
||||
*/
|
||||
typedef short color; /* colors of characters */
|
||||
typedef int pcolor; /* what color promotes to */
|
||||
|
||||
#define COLORLESS (-1) /* impossible color */
|
||||
#define WHITE 0 /* default color, parent of all others */
|
||||
|
||||
@@ -167,26 +188,21 @@ typedef int pcolor; /* what color promotes to */
|
||||
*/
|
||||
|
||||
/* the tree itself */
|
||||
struct colors
|
||||
{
|
||||
struct colors {
|
||||
color ccolor[BYTTAB];
|
||||
};
|
||||
struct ptrs
|
||||
{
|
||||
struct ptrs {
|
||||
union tree *pptr[BYTTAB];
|
||||
};
|
||||
union tree
|
||||
{
|
||||
union tree {
|
||||
struct colors colors;
|
||||
struct ptrs ptrs;
|
||||
};
|
||||
|
||||
#define tcolor colors.ccolor
|
||||
#define tptr ptrs.pptr
|
||||
|
||||
/* internal per-color structure for the color machinery */
|
||||
struct colordesc
|
||||
{
|
||||
struct colordesc {
|
||||
uchr nchrs; /* number of chars of this color */
|
||||
color sub; /* open subcolor (if any); free chain ptr */
|
||||
# define NOSUB COLORLESS
|
||||
@@ -199,8 +215,7 @@ struct colordesc
|
||||
};
|
||||
|
||||
/* the color map itself */
|
||||
struct colormap
|
||||
{
|
||||
struct colormap {
|
||||
int magic;
|
||||
# define CMMAGIC 0x876
|
||||
struct vars *v; /* for compile error reporting */
|
||||
@@ -236,8 +251,7 @@ struct colormap
|
||||
* Interface definitions for locale-interface functions in locale.c.
|
||||
* Multi-character collating elements (MCCEs) cause most of the trouble.
|
||||
*/
|
||||
struct cvec
|
||||
{
|
||||
struct cvec {
|
||||
int nchrs; /* number of chrs */
|
||||
int chrspace; /* number of chrs possible */
|
||||
chr *chrs; /* pointer to vector of chrs */
|
||||
@@ -264,8 +278,7 @@ struct cvec
|
||||
*/
|
||||
struct state;
|
||||
|
||||
struct arc
|
||||
{
|
||||
struct arc {
|
||||
int type;
|
||||
# define ARCFREE '\0'
|
||||
color co;
|
||||
@@ -277,15 +290,13 @@ struct arc
|
||||
struct arc *colorchain; /* color's arc chain */
|
||||
};
|
||||
|
||||
struct arcbatch
|
||||
{ /* for bulk allocation of arcs */
|
||||
struct arcbatch { /* for bulk allocation of arcs */
|
||||
struct arcbatch *next;
|
||||
# define ABSIZE 10
|
||||
struct arc a[ABSIZE];
|
||||
};
|
||||
|
||||
struct state
|
||||
{
|
||||
struct state {
|
||||
int no;
|
||||
# define FREESTATE (-1)
|
||||
char flag; /* marks special states */
|
||||
@@ -297,13 +308,11 @@ struct state
|
||||
struct state *tmp; /* temporary for traversal algorithms */
|
||||
struct state *next; /* chain for traversing all */
|
||||
struct state *prev; /* back chain */
|
||||
struct arcbatch oas; /* first arcbatch, avoid malloc in easy
|
||||
* case */
|
||||
struct arcbatch oas; /* first arcbatch, avoid malloc in easy case */
|
||||
int noas; /* number of arcs used in first arcbatch */
|
||||
};
|
||||
|
||||
struct nfa
|
||||
{
|
||||
struct nfa {
|
||||
struct state *pre; /* pre-initial state */
|
||||
struct state *init; /* initial state */
|
||||
struct state *final; /* final state */
|
||||
@@ -324,14 +333,12 @@ struct nfa
|
||||
/*
|
||||
* definitions for compacted NFA
|
||||
*/
|
||||
struct carc
|
||||
{
|
||||
struct carc {
|
||||
color co; /* COLORLESS is list terminator */
|
||||
int to; /* state number */
|
||||
};
|
||||
|
||||
struct cnfa
|
||||
{
|
||||
struct cnfa {
|
||||
int nstates; /* number of states */
|
||||
int ncolors; /* number of colors */
|
||||
int flags;
|
||||
@@ -343,7 +350,6 @@ struct cnfa
|
||||
struct carc **states; /* vector of pointers to outarc lists */
|
||||
struct carc *arcs; /* the area for the lists */
|
||||
};
|
||||
|
||||
#define ZAPCNFA(cnfa) ((cnfa).nstates = 0)
|
||||
#define NULLCNFA(cnfa) ((cnfa).nstates == 0)
|
||||
|
||||
@@ -352,10 +358,8 @@ struct cnfa
|
||||
/*
|
||||
* subexpression tree
|
||||
*/
|
||||
struct subre
|
||||
{
|
||||
char op; /* '|', '.' (concat), 'b' (backref), '(',
|
||||
* '=' */
|
||||
struct subre {
|
||||
char op; /* '|', '.' (concat), 'b' (backref), '(', '=' */
|
||||
char flags;
|
||||
# define LONGER 01 /* prefers longer match */
|
||||
# define SHORTER 02 /* prefers shorter match */
|
||||
@@ -375,8 +379,7 @@ struct subre
|
||||
int subno; /* subexpression number (for 'b' and '(') */
|
||||
short min; /* min repetitions, for backref only */
|
||||
short max; /* max repetitions, for backref only */
|
||||
struct subre *left; /* left child, if any (also freelist
|
||||
* chain) */
|
||||
struct subre *left; /* left child, if any (also freelist chain) */
|
||||
struct subre *right; /* right child, if any */
|
||||
struct state *begin; /* outarcs from here... */
|
||||
struct state *end; /* ...ending in inarcs here */
|
||||
@@ -390,9 +393,8 @@ struct subre
|
||||
* table of function pointers for generic manipulation functions
|
||||
* A regex_t's re_fns points to one of these.
|
||||
*/
|
||||
struct fns
|
||||
{
|
||||
void FUNCPTR(free, (regex_t *));
|
||||
struct fns {
|
||||
VOID FUNCPTR(free, (regex_t *));
|
||||
};
|
||||
|
||||
|
||||
@@ -400,8 +402,7 @@ struct fns
|
||||
/*
|
||||
* the insides of a regex_t, hidden behind a void *
|
||||
*/
|
||||
struct guts
|
||||
{
|
||||
struct guts {
|
||||
int magic;
|
||||
# define GUTSMAGIC 0xfed9
|
||||
int cflags; /* copy of compile flags */
|
||||
@@ -411,7 +412,7 @@ struct guts
|
||||
struct cnfa search; /* for fast preliminary search */
|
||||
int ntree;
|
||||
struct colormap cmap;
|
||||
int FUNCPTR(compare, (const chr *, const chr *, size_t));
|
||||
int FUNCPTR(compare, (CONST chr *, CONST chr *, size_t));
|
||||
struct subre *lacons; /* lookahead-constraint vector */
|
||||
int nlacons; /* size of lacons */
|
||||
};
|
||||
|
Reference in New Issue
Block a user