From 8e30ac7429cfd46e15de22c35831a5f9d4f4e757 Mon Sep 17 00:00:00 2001 From: Bryan Petty Date: Fri, 13 Jul 2001 13:31:24 +0000 Subject: [PATCH] This commit was manufactured by cvs2svn to create tag 'ALPHA_3_8'. git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/tags/ALPHA_3_8@10995 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775 --- src/regex/regc_color.c | 778 -------------------- src/regex/regc_lex.c | 1061 --------------------------- src/regex/regc_nfa.c | 1575 ---------------------------------------- src/regex/rege_dfa.c | 677 ----------------- src/regex/regerror.c | 181 ++--- src/regex/regerrs.h | 18 - src/regex/regex.h | 341 --------- src/regex/regexec.c | 1154 ++++------------------------- src/regex/regfree.c | 76 +- src/regex/regfronts.c | 83 --- src/regex/regguts.h | 418 ----------- src/regex/tclUniData.c | 904 ----------------------- 12 files changed, 256 insertions(+), 7010 deletions(-) delete mode 100644 src/regex/regc_color.c delete mode 100644 src/regex/regc_lex.c delete mode 100644 src/regex/regc_nfa.c delete mode 100644 src/regex/rege_dfa.c delete mode 100644 src/regex/regerrs.h delete mode 100644 src/regex/regex.h delete mode 100644 src/regex/regfronts.c delete mode 100644 src/regex/regguts.h delete mode 100644 src/regex/tclUniData.c diff --git a/src/regex/regc_color.c b/src/regex/regc_color.c deleted file mode 100644 index 5aed21c630..0000000000 --- a/src/regex/regc_color.c +++ /dev/null @@ -1,778 +0,0 @@ -/* - * colorings of characters - * This file is #included by regcomp.c. - * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics - * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * I'd appreciate being given credit for this package in the documentation - * of software which uses it, but that is not a requirement. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * - * - * Note that there are some incestuous relationships between this code and - * NFA arc maintenance, which perhaps ought to be cleaned up sometime. - */ - - - -#define CISERR() VISERR(cm->v) -#define CERR(e) VERR(cm->v, (e)) - - - -/* - - initcm - set up new colormap - ^ static VOID initcm(struct vars *, struct colormap *); - */ -static VOID -initcm(v, cm) -struct vars *v; -struct colormap *cm; -{ - int i; - int j; - union tree *t; - union tree *nextt; - struct colordesc *cd; - - cm->magic = CMMAGIC; - cm->v = v; - - cm->ncds = NINLINECDS; - cm->cd = cm->cdspace; - cm->max = 0; - cm->free = 0; - - cd = cm->cd; /* cm->cd[WHITE] */ - cd->sub = NOSUB; - cd->arcs = NULL; - cd->flags = 0; - cd->nchrs = CHR_MAX - CHR_MIN + 1; - - /* upper levels of tree */ - for (t = &cm->tree[0], j = NBYTS-1; j > 0; t = nextt, j--) { - nextt = t + 1; - for (i = BYTTAB-1; i >= 0; i--) - t->tptr[i] = nextt; - } - /* bottom level is solid white */ - t = &cm->tree[NBYTS-1]; - for (i = BYTTAB-1; i >= 0; i--) - t->tcolor[i] = WHITE; - cd->block = t; -} - -/* - - freecm - free dynamically-allocated things in a colormap - ^ static VOID freecm(struct colormap *); - */ -static VOID -freecm(cm) -struct colormap *cm; -{ - size_t i; - union tree *cb; - - cm->magic = 0; - if (NBYTS > 1) - cmtreefree(cm, cm->tree, 0); - for (i = 1; i <= cm->max; i++) /* skip WHITE */ - if (!UNUSEDCOLOR(&cm->cd[i])) { - cb = cm->cd[i].block; - if (cb != NULL) - FREE(cb); - } - if (cm->cd != cm->cdspace) - FREE(cm->cd); -} - -/* - - cmtreefree - free a non-terminal part of a colormap tree - ^ static VOID cmtreefree(struct colormap *, union tree *, int); - */ -static VOID -cmtreefree(cm, tree, level) -struct colormap *cm; -union tree *tree; -int level; /* level number (top == 0) of this block */ -{ - int i; - union tree *t; - union tree *fillt = &cm->tree[level+1]; - union tree *cb; - - assert(level < NBYTS-1); /* this level has pointers */ - for (i = BYTTAB-1; i >= 0; i--) { - t = tree->tptr[i]; - assert(t != NULL); - if (t != fillt) { - if (level < NBYTS-2) { /* more pointer blocks below */ - cmtreefree(cm, t, level+1); - FREE(t); - } else { /* color block below */ - cb = cm->cd[t->tcolor[0]].block; - if (t != cb) /* not a solid block */ - FREE(t); - } - } - } -} - -/* - - setcolor - set the color of a character in a colormap - ^ static color setcolor(struct colormap *, pchr, pcolor); - */ -static color /* previous color */ -setcolor(cm, c, co) -struct colormap *cm; -pchr c; -pcolor co; -{ - uchr uc = c; - int shift; - int level; - int b; - int bottom; - union tree *t; - union tree *newt; - union tree *fillt; - union tree *lastt; - union tree *cb; - color prev; - - assert(cm->magic == CMMAGIC); - if (CISERR() || co == COLORLESS) - return COLORLESS; - - t = cm->tree; - for (level = 0, shift = BYTBITS * (NBYTS - 1); shift > 0; - level++, shift -= BYTBITS) { - b = (uc >> shift) & BYTMASK; - lastt = t; - t = lastt->tptr[b]; - assert(t != NULL); - fillt = &cm->tree[level+1]; - bottom = (shift <= BYTBITS) ? 1 : 0; - cb = (bottom) ? cm->cd[t->tcolor[0]].block : fillt; - if (t == fillt || t == cb) { /* must allocate a new block */ - newt = (union tree *)MALLOC((bottom) ? - sizeof(struct colors) : sizeof(struct ptrs)); - if (newt == NULL) { - CERR(REG_ESPACE); - return COLORLESS; - } - if (bottom) - memcpy(VS(newt->tcolor), VS(t->tcolor), - BYTTAB*sizeof(color)); - else - memcpy(VS(newt->tptr), VS(t->tptr), - BYTTAB*sizeof(union tree *)); - t = newt; - lastt->tptr[b] = t; - } - } - - b = uc & BYTMASK; - prev = t->tcolor[b]; - t->tcolor[b] = (color)co; - return prev; -} - -/* - - maxcolor - report largest color number in use - ^ static color maxcolor(struct colormap *); - */ -static color -maxcolor(cm) -struct colormap *cm; -{ - if (CISERR()) - return COLORLESS; - - return (color)cm->max; -} - -/* - - newcolor - find a new color (must be subject of setcolor at once) - * Beware: may relocate the colordescs. - ^ static color newcolor(struct colormap *); - */ -static color /* COLORLESS for error */ -newcolor(cm) -struct colormap *cm; -{ - struct colordesc *cd; - struct colordesc *new; - size_t n; - - if (CISERR()) - return COLORLESS; - - if (cm->free != 0) { - assert(cm->free > 0); - assert((size_t)cm->free < cm->ncds); - cd = &cm->cd[cm->free]; - assert(UNUSEDCOLOR(cd)); - assert(cd->arcs == NULL); - cm->free = cd->sub; - } else if (cm->max < cm->ncds - 1) { - cm->max++; - cd = &cm->cd[cm->max]; - } else { - /* oops, must allocate more */ - n = cm->ncds * 2; - if (cm->cd == cm->cdspace) { - new = (struct colordesc *)MALLOC(n * - sizeof(struct colordesc)); - if (new != NULL) - memcpy(VS(new), VS(cm->cdspace), cm->ncds * - sizeof(struct colordesc)); - } else - new = (struct colordesc *)REALLOC(cm->cd, - n * sizeof(struct colordesc)); - if (new == NULL) { - CERR(REG_ESPACE); - return COLORLESS; - } - cm->cd = new; - cm->ncds = n; - assert(cm->max < cm->ncds - 1); - cm->max++; - cd = &cm->cd[cm->max]; - } - - cd->nchrs = 0; - cd->sub = NOSUB; - cd->arcs = NULL; - cd->flags = 0; - cd->block = NULL; - - return (color)(cd - cm->cd); -} - -/* - - freecolor - free a color (must have no arcs or subcolor) - ^ static VOID freecolor(struct colormap *, pcolor); - */ -static VOID -freecolor(cm, co) -struct colormap *cm; -pcolor co; -{ - struct colordesc *cd = &cm->cd[co]; - color pco, nco; /* for freelist scan */ - - assert(co >= 0); - if (co == WHITE) - return; - - assert(cd->arcs == NULL); - assert(cd->sub == NOSUB); - assert(cd->nchrs == 0); - cd->flags = FREECOL; - if (cd->block != NULL) { - FREE(cd->block); - cd->block = NULL; /* just paranoia */ - } - - if ((size_t)co == cm->max) { - while (cm->max > WHITE && UNUSEDCOLOR(&cm->cd[cm->max])) - cm->max--; - assert(cm->free >= 0); - while ((size_t)cm->free > cm->max) - cm->free = cm->cd[cm->free].sub; - if (cm->free > 0) { - assert(cm->free < cm->max); - pco = cm->free; - nco = cm->cd[pco].sub; - while (nco > 0) - if ((size_t)nco > cm->max) { - /* take this one out of freelist */ - nco = cm->cd[nco].sub; - cm->cd[pco].sub = nco; - } else { - assert(nco < cm->max); - pco = nco; - nco = cm->cd[pco].sub; - } - } - } else { - cd->sub = cm->free; - cm->free = (color)(cd - cm->cd); - } -} - -/* - - pseudocolor - allocate a false color, to be managed by other means - ^ static color pseudocolor(struct colormap *); - */ -static color -pseudocolor(cm) -struct colormap *cm; -{ - color co; - - co = newcolor(cm); - if (CISERR()) - return COLORLESS; - cm->cd[co].nchrs = 1; - cm->cd[co].flags = PSEUDO; - return co; -} - -/* - - subcolor - allocate a new subcolor (if necessary) to this chr - ^ static color subcolor(struct colormap *, pchr c); - */ -static color -subcolor(cm, c) -struct colormap *cm; -pchr c; -{ - color co; /* current color of c */ - color sco; /* new subcolor */ - - co = GETCOLOR(cm, c); - sco = newsub(cm, co); - if (CISERR()) - return COLORLESS; - assert(sco != COLORLESS); - - if (co == sco) /* already in an open subcolor */ - return co; /* rest is redundant */ - cm->cd[co].nchrs--; - cm->cd[sco].nchrs++; - setcolor(cm, c, sco); - return sco; -} - -/* - - newsub - allocate a new subcolor (if necessary) for a color - ^ static color newsub(struct colormap *, pcolor); - */ -static color -newsub(cm, co) -struct colormap *cm; -pcolor co; -{ - color sco; /* new subcolor */ - - sco = cm->cd[co].sub; - if (sco == NOSUB) { /* color has no open subcolor */ - if (cm->cd[co].nchrs == 1) /* optimization */ - return co; - sco = newcolor(cm); /* must create subcolor */ - if (sco == COLORLESS) { - assert(CISERR()); - return COLORLESS; - } - cm->cd[co].sub = sco; - cm->cd[sco].sub = sco; /* open subcolor points to self */ - } - assert(sco != NOSUB); - - return sco; -} - -/* - - subrange - allocate new subcolors to this range of chrs, fill in arcs - ^ static VOID subrange(struct vars *, pchr, pchr, struct state *, - ^ struct state *); - */ -static VOID -subrange(v, from, to, lp, rp) -struct vars *v; -pchr from; -pchr to; -struct state *lp; -struct state *rp; -{ - uchr uf; - int i; - - assert(from <= to); - - /* first, align "from" on a tree-block boundary */ - uf = (uchr)from; - i = (int)( ((uf + BYTTAB-1) & (uchr)~BYTMASK) - uf ); - for (; from <= to && i > 0; i--, from++) - newarc(v->nfa, PLAIN, subcolor(v->cm, from), lp, rp); - if (from > to) /* didn't reach a boundary */ - return; - - /* deal with whole blocks */ - for (; to - from >= BYTTAB; from += BYTTAB) - subblock(v, from, lp, rp); - - /* clean up any remaining partial table */ - for (; from <= to; from++) - newarc(v->nfa, PLAIN, subcolor(v->cm, from), lp, rp); -} - -/* - - subblock - allocate new subcolors for one tree block of chrs, fill in arcs - ^ static VOID subblock(struct vars *, pchr, struct state *, struct state *); - */ -static VOID -subblock(v, start, lp, rp) -struct vars *v; -pchr start; /* first of BYTTAB chrs */ -struct state *lp; -struct state *rp; -{ - uchr uc = start; - struct colormap *cm = v->cm; - int shift; - int level; - int i; - int b; - union tree *t; - union tree *cb; - union tree *fillt; - union tree *lastt; - int previ; - int ndone; - color co; - color sco; - - assert((uc % BYTTAB) == 0); - - /* find its color block, making new pointer blocks as needed */ - t = cm->tree; - fillt = NULL; - for (level = 0, shift = BYTBITS * (NBYTS - 1); shift > 0; - level++, shift -= BYTBITS) { - b = (uc >> shift) & BYTMASK; - lastt = t; - t = lastt->tptr[b]; - assert(t != NULL); - fillt = &cm->tree[level+1]; - if (t == fillt && shift > BYTBITS) { /* need new ptr block */ - t = (union tree *)MALLOC(sizeof(struct ptrs)); - if (t == NULL) { - CERR(REG_ESPACE); - return; - } - memcpy(VS(t->tptr), VS(fillt->tptr), - BYTTAB*sizeof(union tree *)); - lastt->tptr[b] = t; - } - } - - /* special cases: fill block or solid block */ - co = t->tcolor[0]; - cb = cm->cd[co].block; - if (t == fillt || t == cb) { - /* either way, we want a subcolor solid block */ - sco = newsub(cm, co); - t = cm->cd[sco].block; - if (t == NULL) { /* must set it up */ - t = (union tree *)MALLOC(sizeof(struct colors)); - if (t == NULL) { - CERR(REG_ESPACE); - return; - } - for (i = 0; i < BYTTAB; i++) - t->tcolor[i] = sco; - cm->cd[sco].block = t; - } - /* find loop must have run at least once */ - lastt->tptr[b] = t; - newarc(v->nfa, PLAIN, sco, lp, rp); - cm->cd[co].nchrs -= BYTTAB; - cm->cd[sco].nchrs += BYTTAB; - return; - } - - /* general case, a mixed block to be altered */ - i = 0; - while (i < BYTTAB) { - co = t->tcolor[i]; - sco = newsub(cm, co); - newarc(v->nfa, PLAIN, sco, lp, rp); - previ = i; - do { - t->tcolor[i++] = sco; - } while (i < BYTTAB && t->tcolor[i] == co); - ndone = i - previ; - cm->cd[co].nchrs -= ndone; - cm->cd[sco].nchrs += ndone; - } -} - -/* - - okcolors - promote subcolors to full colors - ^ static VOID okcolors(struct nfa *, struct colormap *); - */ -static VOID -okcolors(nfa, cm) -struct nfa *nfa; -struct colormap *cm; -{ - struct colordesc *cd; - struct colordesc *end = CDEND(cm); - struct colordesc *scd; - struct arc *a; - color co; - color sco; - - for (cd = cm->cd, co = 0; cd < end; cd++, co++) { - sco = cd->sub; - if (UNUSEDCOLOR(cd) || sco == NOSUB) { - /* has no subcolor, no further action */ - } else if (sco == co) { - /* is subcolor, let parent deal with it */ - } else if (cd->nchrs == 0) { - /* parent empty, its arcs change color to subcolor */ - cd->sub = NOSUB; - scd = &cm->cd[sco]; - assert(scd->nchrs > 0); - assert(scd->sub == sco); - scd->sub = NOSUB; - while ((a = cd->arcs) != NULL) { - assert(a->co == co); - /* uncolorchain(cm, a); */ - cd->arcs = a->colorchain; - a->co = sco; - /* colorchain(cm, a); */ - a->colorchain = scd->arcs; - scd->arcs = a; - } - freecolor(cm, co); - } else { - /* parent's arcs must gain parallel subcolor arcs */ - cd->sub = NOSUB; - scd = &cm->cd[sco]; - assert(scd->nchrs > 0); - assert(scd->sub == sco); - scd->sub = NOSUB; - for (a = cd->arcs; a != NULL; a = a->colorchain) { - assert(a->co == co); - newarc(nfa, a->type, sco, a->from, a->to); - } - } - } -} - -/* - - colorchain - add this arc to the color chain of its color - ^ static VOID colorchain(struct colormap *, struct arc *); - */ -static VOID -colorchain(cm, a) -struct colormap *cm; -struct arc *a; -{ - struct colordesc *cd = &cm->cd[a->co]; - - a->colorchain = cd->arcs; - cd->arcs = a; -} - -/* - - uncolorchain - delete this arc from the color chain of its color - ^ static VOID uncolorchain(struct colormap *, struct arc *); - */ -static VOID -uncolorchain(cm, a) -struct colormap *cm; -struct arc *a; -{ - struct colordesc *cd = &cm->cd[a->co]; - struct arc *aa; - - aa = cd->arcs; - if (aa == a) /* easy case */ - cd->arcs = a->colorchain; - else { - for (; aa != NULL && aa->colorchain != a; aa = aa->colorchain) - continue; - assert(aa != NULL); - aa->colorchain = a->colorchain; - } - a->colorchain = NULL; /* paranoia */ -} - -/* - - singleton - is this character in its own color? - ^ static int singleton(struct colormap *, pchr c); - */ -static int /* predicate */ -singleton(cm, c) -struct colormap *cm; -pchr c; -{ - color co; /* color of c */ - - co = GETCOLOR(cm, c); - if (cm->cd[co].nchrs == 1 && cm->cd[co].sub == NOSUB) - return 1; - return 0; -} - -/* - - rainbow - add arcs of all full colors (but one) between specified states - ^ static VOID rainbow(struct nfa *, struct colormap *, int, pcolor, - ^ struct state *, struct state *); - */ -static VOID -rainbow(nfa, cm, type, but, from, to) -struct nfa *nfa; -struct colormap *cm; -int type; -pcolor but; /* COLORLESS if no exceptions */ -struct state *from; -struct state *to; -{ - struct colordesc *cd; - struct colordesc *end = CDEND(cm); - color co; - - for (cd = cm->cd, co = 0; cd < end && !CISERR(); cd++, co++) - if (!UNUSEDCOLOR(cd) && cd->sub != co && co != but && - !(cd->flags&PSEUDO)) - newarc(nfa, type, co, from, to); -} - -/* - - colorcomplement - add arcs of complementary colors - * The calling sequence ought to be reconciled with cloneouts(). - ^ static VOID colorcomplement(struct nfa *, struct colormap *, int, - ^ struct state *, struct state *, struct state *); - */ -static VOID -colorcomplement(nfa, cm, type, of, from, to) -struct nfa *nfa; -struct colormap *cm; -int type; -struct state *of; /* complements of this guy's PLAIN outarcs */ -struct state *from; -struct state *to; -{ - struct colordesc *cd; - struct colordesc *end = CDEND(cm); - color co; - - assert(of != from); - for (cd = cm->cd, co = 0; cd < end && !CISERR(); cd++, co++) - if (!UNUSEDCOLOR(cd) && !(cd->flags&PSEUDO)) - if (findarc(of, PLAIN, co) == NULL) - newarc(nfa, type, co, from, to); -} - - - -#ifdef REG_DEBUG -/* - ^ #ifdef REG_DEBUG - */ - -/* - - dumpcolors - debugging output - ^ static VOID dumpcolors(struct colormap *, FILE *); - */ -static VOID -dumpcolors(cm, f) -struct colormap *cm; -FILE *f; -{ - struct colordesc *cd; - struct colordesc *end; - color co; - chr c; - char *has; - - fprintf(f, "max %ld\n", (long)cm->max); - if (NBYTS > 1) - fillcheck(cm, cm->tree, 0, f); - end = CDEND(cm); - for (cd = cm->cd + 1, co = 1; cd < end; cd++, co++) /* skip 0 */ - if (!UNUSEDCOLOR(cd)) { - assert(cd->nchrs > 0); - has = (cd->block != NULL) ? "#" : ""; - if (cd->flags&PSEUDO) - fprintf(f, "#%2ld%s(ps): ", (long)co, has); - else - fprintf(f, "#%2ld%s(%2d): ", (long)co, - has, cd->nchrs); - /* it's hard to do this more efficiently */ - for (c = CHR_MIN; c < CHR_MAX; c++) - if (GETCOLOR(cm, c) == co) - dumpchr(c, f); - assert(c == CHR_MAX); - if (GETCOLOR(cm, c) == co) - dumpchr(c, f); - fprintf(f, "\n"); - } -} - -/* - - fillcheck - check proper filling of a tree - ^ static VOID fillcheck(struct colormap *, union tree *, int, FILE *); - */ -static VOID -fillcheck(cm, tree, level, f) -struct colormap *cm; -union tree *tree; -int level; /* level number (top == 0) of this block */ -FILE *f; -{ - int i; - union tree *t; - union tree *fillt = &cm->tree[level+1]; - - assert(level < NBYTS-1); /* this level has pointers */ - for (i = BYTTAB-1; i >= 0; i--) { - t = tree->tptr[i]; - if (t == NULL) - fprintf(f, "NULL found in filled tree!\n"); - else if (t == fillt) - {} - else if (level < NBYTS-2) /* more pointer blocks below */ - fillcheck(cm, t, level+1, f); - } -} - -/* - - dumpchr - print a chr - * Kind of char-centric but works well enough for debug use. - ^ static VOID dumpchr(pchr, FILE *); - */ -static VOID -dumpchr(c, f) -pchr c; -FILE *f; -{ - if (c == '\\') - fprintf(f, "\\\\"); - else if (c > ' ' && c <= '~') - putc((char)c, f); - else - fprintf(f, "\\u%04lx", (long)c); -} - -/* - ^ #endif - */ -#endif /* ifdef REG_DEBUG */ diff --git a/src/regex/regc_lex.c b/src/regex/regc_lex.c deleted file mode 100644 index 1acc3f4cae..0000000000 --- a/src/regex/regc_lex.c +++ /dev/null @@ -1,1061 +0,0 @@ -/* - * lexical analyzer - * This file is #included by regcomp.c. - * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics - * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * I'd appreciate being given credit for this package in the documentation - * of software which uses it, but that is not a requirement. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -/* scanning macros (know about v) */ -#define ATEOS() (v->now >= v->stop) -#define HAVE(n) (v->stop - v->now >= (n)) -#define NEXT1(c) (!ATEOS() && *v->now == CHR(c)) -#define NEXT2(a,b) (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b)) -#define NEXT3(a,b,c) (HAVE(3) && *v->now == CHR(a) && \ - *(v->now+1) == CHR(b) && \ - *(v->now+2) == CHR(c)) -#define SET(c) (v->nexttype = (c)) -#define SETV(c, n) (v->nexttype = (c), v->nextvalue = (n)) -#define RET(c) return (SET(c), 1) -#define RETV(c, n) return (SETV(c, n), 1) -#define FAILW(e) return (ERR(e), 0) /* ERR does SET(EOS) */ -#define LASTTYPE(t) (v->lasttype == (t)) - -/* lexical contexts */ -#define L_ERE 1 /* mainline ERE/ARE */ -#define L_BRE 2 /* mainline BRE */ -#define L_Q 3 /* REG_QUOTE */ -#define L_EBND 4 /* ERE/ARE bound */ -#define L_BBND 5 /* BRE bound */ -#define L_BRACK 6 /* brackets */ -#define L_CEL 7 /* collating element */ -#define L_ECL 8 /* equivalence class */ -#define L_CCL 9 /* character class */ -#define INTOCON(c) (v->lexcon = (c)) -#define INCON(con) (v->lexcon == (con)) - -/* construct pointer past end of chr array */ -#define ENDOF(array) ((array) + sizeof(array)/sizeof(chr)) - -/* - - lexstart - set up lexical stuff, scan leading options - ^ static VOID lexstart(struct vars *); - */ -static VOID -lexstart(v) -struct vars *v; -{ - prefixes(v); /* may turn on new type bits etc. */ - NOERR(); - - if (v->cflags®_QUOTE) { - assert(!(v->cflags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE))); - INTOCON(L_Q); - } else if (v->cflags®_EXTENDED) { - assert(!(v->cflags®_QUOTE)); - INTOCON(L_ERE); - } else { - assert(!(v->cflags&(REG_QUOTE|REG_ADVF))); - INTOCON(L_BRE); - } - - v->nexttype = EMPTY; /* remember we were at the start */ - next(v); /* set up the first token */ -} - -/* - - prefixes - implement various special prefixes - ^ static VOID prefixes(struct vars *); - */ -static VOID -prefixes(v) -struct vars *v; -{ - /* literal string doesn't get any of this stuff */ - if (v->cflags®_QUOTE) - return; - - /* initial "***" gets special things */ - if (HAVE(4) && NEXT3('*', '*', '*')) - switch (*(v->now + 3)) { - case CHR('?'): /* "***?" error, msg shows version */ - ERR(REG_BADPAT); - return; /* proceed no further */ - break; - case CHR('='): /* "***=" shifts to literal string */ - NOTE(REG_UNONPOSIX); - v->cflags |= REG_QUOTE; - v->cflags &= ~(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE); - v->now += 4; - return; /* and there can be no more prefixes */ - break; - case CHR(':'): /* "***:" shifts to AREs */ - NOTE(REG_UNONPOSIX); - v->cflags |= REG_ADVANCED; - v->now += 4; - break; - default: /* otherwise *** is just an error */ - ERR(REG_BADRPT); - return; - break; - } - - /* BREs and EREs don't get embedded options */ - if ((v->cflags®_ADVANCED) != REG_ADVANCED) - return; - - /* embedded options (AREs only) */ - if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2))) { - NOTE(REG_UNONPOSIX); - v->now += 2; - for (; !ATEOS() && iscalpha(*v->now); v->now++) - switch (*v->now) { - case CHR('b'): /* BREs (but why???) */ - v->cflags &= ~(REG_ADVANCED|REG_QUOTE); - break; - case CHR('c'): /* case sensitive */ - v->cflags &= ~REG_ICASE; - break; - case CHR('e'): /* plain EREs */ - v->cflags |= REG_EXTENDED; - v->cflags &= ~(REG_ADVF|REG_QUOTE); - break; - case CHR('i'): /* case insensitive */ - v->cflags |= REG_ICASE; - break; - case CHR('m'): /* Perloid synonym for n */ - case CHR('n'): /* \n affects ^ $ . [^ */ - v->cflags |= REG_NEWLINE; - break; - case CHR('p'): /* ~Perl, \n affects . [^ */ - v->cflags |= REG_NLSTOP; - v->cflags &= ~REG_NLANCH; - break; - case CHR('q'): /* literal string */ - v->cflags |= REG_QUOTE; - v->cflags &= ~REG_ADVANCED; - break; - case CHR('s'): /* single line, \n ordinary */ - v->cflags &= ~REG_NEWLINE; - break; - case CHR('t'): /* tight syntax */ - v->cflags &= ~REG_EXPANDED; - break; - case CHR('w'): /* weird, \n affects ^ $ only */ - v->cflags &= ~REG_NLSTOP; - v->cflags |= REG_NLANCH; - break; - case CHR('x'): /* expanded syntax */ - v->cflags |= REG_EXPANDED; - break; - default: - ERR(REG_BADOPT); - return; - } - if (!NEXT1(')')) { - ERR(REG_BADOPT); - return; - } - v->now++; - if (v->cflags®_QUOTE) - v->cflags &= ~(REG_EXPANDED|REG_NEWLINE); - } -} - -/* - - lexnest - "call a subroutine", interpolating string at the lexical level - * Note, this is not a very general facility. There are a number of - * implicit assumptions about what sorts of strings can be subroutines. - ^ static VOID lexnest(struct vars *, chr *, chr *); - */ -static VOID -lexnest(v, beginp, endp) -struct vars *v; -chr *beginp; /* start of interpolation */ -chr *endp; /* one past end of interpolation */ -{ - assert(v->savenow == NULL); /* only one level of nesting */ - v->savenow = v->now; - v->savestop = v->stop; - v->now = beginp; - v->stop = endp; -} - -/* - * string constants to interpolate as expansions of things like \d - */ -static chr backd[] = { /* \d */ - CHR('['), CHR('['), CHR(':'), - CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), - CHR(':'), CHR(']'), CHR(']') -}; -static chr backD[] = { /* \D */ - CHR('['), CHR('^'), CHR('['), CHR(':'), - CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), - CHR(':'), CHR(']'), CHR(']') -}; -static chr brbackd[] = { /* \d within brackets */ - CHR('['), CHR(':'), - CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'), - CHR(':'), CHR(']') -}; -static chr backs[] = { /* \s */ - CHR('['), CHR('['), CHR(':'), - CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), - CHR(':'), CHR(']'), CHR(']') -}; -static chr backS[] = { /* \S */ - CHR('['), CHR('^'), CHR('['), CHR(':'), - CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), - CHR(':'), CHR(']'), CHR(']') -}; -static chr brbacks[] = { /* \s within brackets */ - CHR('['), CHR(':'), - CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'), - CHR(':'), CHR(']') -}; -static chr backw[] = { /* \w */ - CHR('['), CHR('['), CHR(':'), - CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), - CHR(':'), CHR(']'), CHR('_'), CHR(']') -}; -static chr backW[] = { /* \W */ - CHR('['), CHR('^'), CHR('['), CHR(':'), - CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), - CHR(':'), CHR(']'), CHR('_'), CHR(']') -}; -static chr brbackw[] = { /* \w within brackets */ - CHR('['), CHR(':'), - CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'), - CHR(':'), CHR(']'), CHR('_') -}; - -/* - - lexword - interpolate a bracket expression for word characters - * Possibly ought to inquire whether there is a "word" character class. - ^ static VOID lexword(struct vars *); - */ -static VOID -lexword(v) -struct vars *v; -{ - lexnest(v, backw, ENDOF(backw)); -} - -/* - - next - get next token - ^ static int next(struct vars *); - */ -static int /* 1 normal, 0 failure */ -next(v) -struct vars *v; -{ - chr c; - - /* errors yield an infinite sequence of failures */ - if (ISERR()) - return 0; /* the error has set nexttype to EOS */ - - /* remember flavor of last token */ - v->lasttype = v->nexttype; - - /* REG_BOSONLY */ - if (v->nexttype == EMPTY && (v->cflags®_BOSONLY)) { - /* at start of a REG_BOSONLY RE */ - RETV(SBEGIN, 0); /* same as \A */ - } - - /* if we're nested and we've hit end, return to outer level */ - if (v->savenow != NULL && ATEOS()) { - v->now = v->savenow; - v->stop = v->savestop; - v->savenow = v->savestop = NULL; - } - - /* skip white space etc. if appropriate (not in literal or []) */ - if (v->cflags®_EXPANDED) - switch (v->lexcon) { - case L_ERE: - case L_BRE: - case L_EBND: - case L_BBND: - skip(v); - break; - } - - /* handle EOS, depending on context */ - if (ATEOS()) { - switch (v->lexcon) { - case L_ERE: - case L_BRE: - case L_Q: - RET(EOS); - break; - case L_EBND: - case L_BBND: - FAILW(REG_EBRACE); - break; - case L_BRACK: - case L_CEL: - case L_ECL: - case L_CCL: - FAILW(REG_EBRACK); - break; - } - assert(NOTREACHED); - } - - /* okay, time to actually get a character */ - c = *v->now++; - - /* deal with the easy contexts, punt EREs to code below */ - switch (v->lexcon) { - case L_BRE: /* punt BREs to separate function */ - return brenext(v, c); - break; - case L_ERE: /* see below */ - break; - case L_Q: /* literal strings are easy */ - RETV(PLAIN, c); - break; - case L_BBND: /* bounds are fairly simple */ - case L_EBND: - switch (c) { - case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'): - case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'): - case CHR('8'): case CHR('9'): - RETV(DIGIT, (chr)DIGITVAL(c)); - break; - case CHR(','): - RET(','); - break; - case CHR('}'): /* ERE bound ends with } */ - if (INCON(L_EBND)) { - INTOCON(L_ERE); - if ((v->cflags®_ADVF) && NEXT1('?')) { - v->now++; - NOTE(REG_UNONPOSIX); - RETV('}', 0); - } - RETV('}', 1); - } else - FAILW(REG_BADBR); - break; - case CHR('\\'): /* BRE bound ends with \} */ - if (INCON(L_BBND) && NEXT1('}')) { - v->now++; - INTOCON(L_BRE); - RET('}'); - } else - FAILW(REG_BADBR); - break; - default: - FAILW(REG_BADBR); - break; - } - assert(NOTREACHED); - break; - case L_BRACK: /* brackets are not too hard */ - switch (c) { - case CHR(']'): - if (LASTTYPE('[')) - RETV(PLAIN, c); - else { - INTOCON((v->cflags®_EXTENDED) ? - L_ERE : L_BRE); - RET(']'); - } - break; - case CHR('\\'): - NOTE(REG_UBBS); - if (!(v->cflags®_ADVF)) - RETV(PLAIN, c); - NOTE(REG_UNONPOSIX); - if (ATEOS()) - FAILW(REG_EESCAPE); - (DISCARD)lexescape(v); - switch (v->nexttype) { /* not all escapes okay here */ - case PLAIN: - return 1; - break; - case CCLASS: - switch (v->nextvalue) { - case 'd': - lexnest(v, brbackd, ENDOF(brbackd)); - break; - case 's': - lexnest(v, brbacks, ENDOF(brbacks)); - break; - case 'w': - lexnest(v, brbackw, ENDOF(brbackw)); - break; - default: - FAILW(REG_EESCAPE); - break; - } - /* lexnest done, back up and try again */ - v->nexttype = v->lasttype; - return next(v); - break; - } - /* not one of the acceptable escapes */ - FAILW(REG_EESCAPE); - break; - case CHR('-'): - if (LASTTYPE('[') || NEXT1(']')) - RETV(PLAIN, c); - else - RETV(RANGE, c); - break; - case CHR('['): - if (ATEOS()) - FAILW(REG_EBRACK); - switch (*v->now++) { - case CHR('.'): - INTOCON(L_CEL); - /* might or might not be locale-specific */ - RET(COLLEL); - break; - case CHR('='): - INTOCON(L_ECL); - NOTE(REG_ULOCALE); - RET(ECLASS); - break; - case CHR(':'): - INTOCON(L_CCL); - NOTE(REG_ULOCALE); - RET(CCLASS); - break; - default: /* oops */ - v->now--; - RETV(PLAIN, c); - break; - } - assert(NOTREACHED); - break; - default: - RETV(PLAIN, c); - break; - } - assert(NOTREACHED); - break; - case L_CEL: /* collating elements are easy */ - if (c == CHR('.') && NEXT1(']')) { - v->now++; - INTOCON(L_BRACK); - RETV(END, '.'); - } else - RETV(PLAIN, c); - break; - case L_ECL: /* ditto equivalence classes */ - if (c == CHR('=') && NEXT1(']')) { - v->now++; - INTOCON(L_BRACK); - RETV(END, '='); - } else - RETV(PLAIN, c); - break; - case L_CCL: /* ditto character classes */ - if (c == CHR(':') && NEXT1(']')) { - v->now++; - INTOCON(L_BRACK); - RETV(END, ':'); - } else - RETV(PLAIN, c); - break; - default: - assert(NOTREACHED); - break; - } - - /* that got rid of everything except EREs and AREs */ - assert(INCON(L_ERE)); - - /* deal with EREs and AREs, except for backslashes */ - switch (c) { - case CHR('|'): - RET('|'); - break; - case CHR('*'): - if ((v->cflags®_ADVF) && NEXT1('?')) { - v->now++; - NOTE(REG_UNONPOSIX); - RETV('*', 0); - } - RETV('*', 1); - break; - case CHR('+'): - if ((v->cflags®_ADVF) && NEXT1('?')) { - v->now++; - NOTE(REG_UNONPOSIX); - RETV('+', 0); - } - RETV('+', 1); - break; - case CHR('?'): - if ((v->cflags®_ADVF) && NEXT1('?')) { - v->now++; - NOTE(REG_UNONPOSIX); - RETV('?', 0); - } - RETV('?', 1); - break; - case CHR('{'): /* bounds start or plain character */ - if (v->cflags®_EXPANDED) - skip(v); - if (ATEOS() || !iscdigit(*v->now)) { - NOTE(REG_UBRACES); - NOTE(REG_UUNSPEC); - RETV(PLAIN, c); - } else { - NOTE(REG_UBOUNDS); - INTOCON(L_EBND); - RET('{'); - } - assert(NOTREACHED); - break; - case CHR('('): /* parenthesis, or advanced extension */ - if ((v->cflags®_ADVF) && NEXT1('?')) { - NOTE(REG_UNONPOSIX); - v->now++; - switch (*v->now++) { - case CHR(':'): /* non-capturing paren */ - RETV('(', 0); - break; - case CHR('#'): /* comment */ - while (!ATEOS() && *v->now != CHR(')')) - v->now++; - if (!ATEOS()) - v->now++; - assert(v->nexttype == v->lasttype); - return next(v); - break; - case CHR('='): /* positive lookahead */ - NOTE(REG_ULOOKAHEAD); - RETV(LACON, 1); - break; - case CHR('!'): /* negative lookahead */ - NOTE(REG_ULOOKAHEAD); - RETV(LACON, 0); - break; - default: - FAILW(REG_BADRPT); - break; - } - assert(NOTREACHED); - } - if (v->cflags®_NOSUB) - RETV('(', 0); /* all parens non-capturing */ - else - RETV('(', 1); - break; - case CHR(')'): - if (LASTTYPE('(')) { - NOTE(REG_UUNSPEC); - } - RETV(')', c); - break; - case CHR('['): /* easy except for [[:<:]] and [[:>:]] */ - if (HAVE(6) && *(v->now+0) == CHR('[') && - *(v->now+1) == CHR(':') && - (*(v->now+2) == CHR('<') || - *(v->now+2) == CHR('>')) && - *(v->now+3) == CHR(':') && - *(v->now+4) == CHR(']') && - *(v->now+5) == CHR(']')) { - c = *(v->now+2); - v->now += 6; - NOTE(REG_UNONPOSIX); - RET((c == CHR('<')) ? '<' : '>'); - } - INTOCON(L_BRACK); - if (NEXT1('^')) { - v->now++; - RETV('[', 0); - } - RETV('[', 1); - break; - case CHR('.'): - RET('.'); - break; - case CHR('^'): - RET('^'); - break; - case CHR('$'): - RET('$'); - break; - case CHR('\\'): /* mostly punt backslashes to code below */ - if (ATEOS()) - FAILW(REG_EESCAPE); - break; - default: /* ordinary character */ - RETV(PLAIN, c); - break; - } - - /* ERE/ARE backslash handling; backslash already eaten */ - assert(!ATEOS()); - if (!(v->cflags®_ADVF)) { /* only AREs have non-trivial escapes */ - if (iscalnum(*v->now)) { - NOTE(REG_UBSALNUM); - NOTE(REG_UUNSPEC); - } - RETV(PLAIN, *v->now++); - } - (DISCARD)lexescape(v); - if (ISERR()) - FAILW(REG_EESCAPE); - if (v->nexttype == CCLASS) { /* fudge at lexical level */ - switch (v->nextvalue) { - case 'd': lexnest(v, backd, ENDOF(backd)); break; - case 'D': lexnest(v, backD, ENDOF(backD)); break; - case 's': lexnest(v, backs, ENDOF(backs)); break; - case 'S': lexnest(v, backS, ENDOF(backS)); break; - case 'w': lexnest(v, backw, ENDOF(backw)); break; - case 'W': lexnest(v, backW, ENDOF(backW)); break; - default: - assert(NOTREACHED); - FAILW(REG_ASSERT); - break; - } - /* lexnest done, back up and try again */ - v->nexttype = v->lasttype; - return next(v); - } - /* otherwise, lexescape has already done the work */ - return !ISERR(); -} - -/* - - lexescape - parse an ARE backslash escape (backslash already eaten) - * Note slightly nonstandard use of the CCLASS type code. - ^ static int lexescape(struct vars *); - */ -static int /* not actually used, but convenient for RETV */ -lexescape(v) -struct vars *v; -{ - chr c; - static chr alert[] = { - CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t') - }; - static chr esc[] = { - CHR('E'), CHR('S'), CHR('C') - }; - chr *save; - - assert(v->cflags®_ADVF); - - assert(!ATEOS()); - c = *v->now++; - if (!iscalnum(c)) - RETV(PLAIN, c); - - NOTE(REG_UNONPOSIX); - switch (c) { - case CHR('a'): - RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007'))); - break; - case CHR('A'): - RETV(SBEGIN, 0); - break; - case CHR('b'): - RETV(PLAIN, CHR('\b')); - break; - case CHR('B'): - RETV(PLAIN, CHR('\\')); - break; - case CHR('c'): - NOTE(REG_UUNPORT); - if (ATEOS()) - FAILW(REG_EESCAPE); - RETV(PLAIN, (chr)(*v->now++ & 037)); - break; - case CHR('d'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 'd'); - break; - case CHR('D'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 'D'); - break; - case CHR('e'): - NOTE(REG_UUNPORT); - RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033'))); - break; - case CHR('f'): - RETV(PLAIN, CHR('\f')); - break; - case CHR('m'): - RET('<'); - break; - case CHR('M'): - RET('>'); - break; - case CHR('n'): - RETV(PLAIN, CHR('\n')); - break; - case CHR('r'): - RETV(PLAIN, CHR('\r')); - break; - case CHR('s'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 's'); - break; - case CHR('S'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 'S'); - break; - case CHR('t'): - RETV(PLAIN, CHR('\t')); - break; - case CHR('u'): - c = lexdigits(v, 16, 4, 4); - if (ISERR()) - FAILW(REG_EESCAPE); - RETV(PLAIN, c); - break; - case CHR('U'): - c = lexdigits(v, 16, 8, 8); - if (ISERR()) - FAILW(REG_EESCAPE); - RETV(PLAIN, c); - break; - case CHR('v'): - RETV(PLAIN, CHR('\v')); - break; - case CHR('w'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 'w'); - break; - case CHR('W'): - NOTE(REG_ULOCALE); - RETV(CCLASS, 'W'); - break; - case CHR('x'): - NOTE(REG_UUNPORT); - c = lexdigits(v, 16, 1, 255); /* REs >255 long outside spec */ - if (ISERR()) - FAILW(REG_EESCAPE); - RETV(PLAIN, c); - break; - case CHR('y'): - NOTE(REG_ULOCALE); - RETV(WBDRY, 0); - break; - case CHR('Y'): - NOTE(REG_ULOCALE); - RETV(NWBDRY, 0); - break; - case CHR('Z'): - RETV(SEND, 0); - break; - case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'): - case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'): - case CHR('9'): - save = v->now; - v->now--; /* put first digit back */ - c = lexdigits(v, 10, 1, 255); /* REs >255 long outside spec */ - if (ISERR()) - FAILW(REG_EESCAPE); - /* ugly heuristic (first test is "exactly 1 digit?") */ - if (v->now - save == 0 || (int)c <= v->nsubexp) { - NOTE(REG_UBACKREF); - RETV(BACKREF, (chr)c); - } - /* oops, doesn't look like it's a backref after all... */ - v->now = save; - /* and fall through into octal number */ - case CHR('0'): - NOTE(REG_UUNPORT); - v->now--; /* put first digit back */ - c = lexdigits(v, 8, 1, 3); - if (ISERR()) - FAILW(REG_EESCAPE); - RETV(PLAIN, c); - break; - default: - assert(iscalpha(c)); - FAILW(REG_EESCAPE); /* unknown alphabetic escape */ - break; - } - assert(NOTREACHED); -} - -/* - - lexdigits - slurp up digits and return chr value - ^ static chr lexdigits(struct vars *, int, int, int); - */ -static chr /* chr value; errors signalled via ERR */ -lexdigits(v, base, minlen, maxlen) -struct vars *v; -int base; -int minlen; -int maxlen; -{ - uchr n; /* unsigned to avoid overflow misbehavior */ - int len; - chr c; - int d; - CONST uchr ub = (uchr) base; - - n = 0; - for (len = 0; len < maxlen && !ATEOS(); len++) { - c = *v->now++; - switch (c) { - case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'): - case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'): - case CHR('8'): case CHR('9'): - d = DIGITVAL(c); - break; - case CHR('a'): case CHR('A'): d = 10; break; - case CHR('b'): case CHR('B'): d = 11; break; - case CHR('c'): case CHR('C'): d = 12; break; - case CHR('d'): case CHR('D'): d = 13; break; - case CHR('e'): case CHR('E'): d = 14; break; - case CHR('f'): case CHR('F'): d = 15; break; - default: - v->now--; /* oops, not a digit at all */ - d = -1; - break; - } - - if (d >= base) { /* not a plausible digit */ - v->now--; - d = -1; - } - if (d < 0) - break; /* NOTE BREAK OUT */ - n = n*ub + (uchr)d; - } - if (len < minlen) - ERR(REG_EESCAPE); - - return (chr)n; -} - -/* - - brenext - get next BRE token - * This is much like EREs except for all the stupid backslashes and the - * context-dependency of some things. - ^ static int brenext(struct vars *, pchr); - */ -static int /* 1 normal, 0 failure */ -brenext(v, pc) -struct vars *v; -pchr pc; -{ - chr c = (chr)pc; - - switch (c) { - case CHR('*'): - if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^')) - RETV(PLAIN, c); - RET('*'); - break; - case CHR('['): - if (HAVE(6) && *(v->now+0) == CHR('[') && - *(v->now+1) == CHR(':') && - (*(v->now+2) == CHR('<') || - *(v->now+2) == CHR('>')) && - *(v->now+3) == CHR(':') && - *(v->now+4) == CHR(']') && - *(v->now+5) == CHR(']')) { - c = *(v->now+2); - v->now += 6; - NOTE(REG_UNONPOSIX); - RET((c == CHR('<')) ? '<' : '>'); - } - INTOCON(L_BRACK); - if (NEXT1('^')) { - v->now++; - RETV('[', 0); - } - RETV('[', 1); - break; - case CHR('.'): - RET('.'); - break; - case CHR('^'): - if (LASTTYPE(EMPTY)) - RET('^'); - if (LASTTYPE('(')) { - NOTE(REG_UUNSPEC); - RET('^'); - } - RETV(PLAIN, c); - break; - case CHR('$'): - if (v->cflags®_EXPANDED) - skip(v); - if (ATEOS()) - RET('$'); - if (NEXT2('\\', ')')) { - NOTE(REG_UUNSPEC); - RET('$'); - } - RETV(PLAIN, c); - break; - case CHR('\\'): - break; /* see below */ - default: - RETV(PLAIN, c); - break; - } - - assert(c == CHR('\\')); - - if (ATEOS()) - FAILW(REG_EESCAPE); - - c = *v->now++; - switch (c) { - case CHR('{'): - INTOCON(L_BBND); - NOTE(REG_UBOUNDS); - RET('{'); - break; - case CHR('('): - RETV('(', 1); - break; - case CHR(')'): - RETV(')', c); - break; - case CHR('<'): - NOTE(REG_UNONPOSIX); - RET('<'); - break; - case CHR('>'): - NOTE(REG_UNONPOSIX); - RET('>'); - break; - case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'): - case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'): - case CHR('9'): - NOTE(REG_UBACKREF); - RETV(BACKREF, (chr)DIGITVAL(c)); - break; - default: - if (iscalnum(c)) { - NOTE(REG_UBSALNUM); - NOTE(REG_UUNSPEC); - } - RETV(PLAIN, c); - break; - } - - assert(NOTREACHED); -} - -/* - - skip - skip white space and comments in expanded form - ^ static VOID skip(struct vars *); - */ -static VOID -skip(v) -struct vars *v; -{ - chr *start = v->now; - - assert(v->cflags®_EXPANDED); - - for (;;) { - while (!ATEOS() && iscspace(*v->now)) - v->now++; - if (ATEOS() || *v->now != CHR('#')) - break; /* NOTE BREAK OUT */ - assert(NEXT1('#')); - while (!ATEOS() && *v->now != CHR('\n')) - v->now++; - /* leave the newline to be picked up by the iscspace loop */ - } - - if (v->now != start) - NOTE(REG_UNONPOSIX); -} - -/* - - newline - return the chr for a newline - * This helps confine use of CHR to this source file. - ^ static chr newline(NOPARMS); - */ -static chr -newline() -{ - return CHR('\n'); -} - -/* - - ch - return the chr sequence for regc_locale.c's fake collating element ch - * This helps confine use of CHR to this source file. Beware that the caller - * knows how long the sequence is. - ^ #ifdef REG_DEBUG - ^ static chr *ch(NOPARMS); - ^ #endif - */ -#ifdef REG_DEBUG -static chr * -ch() -{ - static chr chstr[] = { CHR('c'), CHR('h'), CHR('\0') }; - - return chstr; -} -#endif - -/* - - chrnamed - return the chr known by a given (chr string) name - * The code is a bit clumsy, but this routine gets only such specialized - * use that it hardly matters. - ^ static chr chrnamed(struct vars *, chr *, chr *, pchr); - */ -static chr -chrnamed(v, startp, endp, lastresort) -struct vars *v; -chr *startp; /* start of name */ -chr *endp; /* just past end of name */ -pchr lastresort; /* what to return if name lookup fails */ -{ - celt c; - int errsave; - int e; - struct cvec *cv; - - errsave = v->err; - v->err = 0; - c = element(v, startp, endp); - e = v->err; - v->err = errsave; - - if (e != 0) - return (chr)lastresort; - - cv = range(v, c, c, 0); - if (cv->nchrs == 0) - return (chr)lastresort; - return cv->chrs[0]; -} diff --git a/src/regex/regc_nfa.c b/src/regex/regc_nfa.c deleted file mode 100644 index 9881cd4304..0000000000 --- a/src/regex/regc_nfa.c +++ /dev/null @@ -1,1575 +0,0 @@ -/* - * NFA utilities. - * This file is #included by regcomp.c. - * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics - * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * I'd appreciate being given credit for this package in the documentation - * of software which uses it, but that is not a requirement. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * - * - * One or two things that technically ought to be in here - * are actually in color.c, thanks to some incestuous relationships in - * the color chains. - */ - -#define NISERR() VISERR(nfa->v) -#define NERR(e) VERR(nfa->v, (e)) - - -/* - - newnfa - set up an NFA - ^ static struct nfa *newnfa(struct vars *, struct colormap *, struct nfa *); - */ -static struct nfa * /* the NFA, or NULL */ -newnfa(v, cm, parent) -struct vars *v; -struct colormap *cm; -struct nfa *parent; /* NULL if primary NFA */ -{ - struct nfa *nfa; - - nfa = (struct nfa *)MALLOC(sizeof(struct nfa)); - if (nfa == NULL) - return NULL; - - nfa->states = NULL; - nfa->slast = NULL; - nfa->free = NULL; - nfa->nstates = 0; - nfa->cm = cm; - nfa->v = v; - nfa->bos[0] = nfa->bos[1] = COLORLESS; - nfa->eos[0] = nfa->eos[1] = COLORLESS; - nfa->post = newfstate(nfa, '@'); /* number 0 */ - nfa->pre = newfstate(nfa, '>'); /* number 1 */ - nfa->parent = parent; - - nfa->init = newstate(nfa); /* may become invalid later */ - nfa->final = newstate(nfa); - if (ISERR()) { - freenfa(nfa); - return NULL; - } - rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->pre, nfa->init); - newarc(nfa, '^', 1, nfa->pre, nfa->init); - newarc(nfa, '^', 0, nfa->pre, nfa->init); - rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->final, nfa->post); - newarc(nfa, '$', 1, nfa->final, nfa->post); - newarc(nfa, '$', 0, nfa->final, nfa->post); - - if (ISERR()) { - freenfa(nfa); - return NULL; - } - return nfa; -} - -/* - - freenfa - free an entire NFA - ^ static VOID freenfa(struct nfa *); - */ -static VOID -freenfa(nfa) -struct nfa *nfa; -{ - struct state *s; - - while ((s = nfa->states) != NULL) { - s->nins = s->nouts = 0; /* don't worry about arcs */ - freestate(nfa, s); - } - while ((s = nfa->free) != NULL) { - nfa->free = s->next; - destroystate(nfa, s); - } - - nfa->slast = NULL; - nfa->nstates = -1; - nfa->pre = NULL; - nfa->post = NULL; - FREE(nfa); -} - -/* - - newstate - allocate an NFA state, with zero flag value - ^ static struct state *newstate(struct nfa *); - */ -static struct state * /* NULL on error */ -newstate(nfa) -struct nfa *nfa; -{ - struct state *s; - - if (nfa->free != NULL) { - s = nfa->free; - nfa->free = s->next; - } else { - s = (struct state *)MALLOC(sizeof(struct state)); - if (s == NULL) { - NERR(REG_ESPACE); - return NULL; - } - s->oas.next = NULL; - s->free = NULL; - s->noas = 0; - } - - assert(nfa->nstates >= 0); - s->no = nfa->nstates++; - s->flag = 0; - if (nfa->states == NULL) - nfa->states = s; - s->nins = 0; - s->ins = NULL; - s->nouts = 0; - s->outs = NULL; - s->tmp = NULL; - s->next = NULL; - if (nfa->slast != NULL) { - assert(nfa->slast->next == NULL); - nfa->slast->next = s; - } - s->prev = nfa->slast; - nfa->slast = s; - return s; -} - -/* - - newfstate - allocate an NFA state with a specified flag value - ^ static struct state *newfstate(struct nfa *, int flag); - */ -static struct state * /* NULL on error */ -newfstate(nfa, flag) -struct nfa *nfa; -int flag; -{ - struct state *s; - - s = newstate(nfa); - if (s != NULL) - s->flag = (char)flag; - return s; -} - -/* - - dropstate - delete a state's inarcs and outarcs and free it - ^ static VOID dropstate(struct nfa *, struct state *); - */ -static VOID -dropstate(nfa, s) -struct nfa *nfa; -struct state *s; -{ - struct arc *a; - - while ((a = s->ins) != NULL) - freearc(nfa, a); - while ((a = s->outs) != NULL) - freearc(nfa, a); - freestate(nfa, s); -} - -/* - - freestate - free a state, which has no in-arcs or out-arcs - ^ static VOID freestate(struct nfa *, struct state *); - */ -static VOID -freestate(nfa, s) -struct nfa *nfa; -struct state *s; -{ - assert(s != NULL); - assert(s->nins == 0 && s->nouts == 0); - - s->no = FREESTATE; - s->flag = 0; - if (s->next != NULL) - s->next->prev = s->prev; - else { - assert(s == nfa->slast); - nfa->slast = s->prev; - } - if (s->prev != NULL) - s->prev->next = s->next; - else { - assert(s == nfa->states); - nfa->states = s->next; - } - s->prev = NULL; - s->next = nfa->free; /* don't delete it, put it on the free list */ - nfa->free = s; -} - -/* - - destroystate - really get rid of an already-freed state - ^ static VOID destroystate(struct nfa *, struct state *); - */ -static VOID -destroystate(nfa, s) -struct nfa *nfa; -struct state *s; -{ - struct arcbatch *ab; - struct arcbatch *abnext; - - assert(s->no == FREESTATE); - for (ab = s->oas.next; ab != NULL; ab = abnext) { - abnext = ab->next; - FREE(ab); - } - s->ins = NULL; - s->outs = NULL; - s->next = NULL; - FREE(s); -} - -/* - - newarc - set up a new arc within an NFA - ^ static VOID newarc(struct nfa *, int, pcolor, struct state *, - ^ struct state *); - */ -static VOID -newarc(nfa, t, co, from, to) -struct nfa *nfa; -int t; -pcolor co; -struct state *from; -struct state *to; -{ - struct arc *a; - - assert(from != NULL && to != NULL); - - /* check for duplicates */ - for (a = from->outs; a != NULL; a = a->outchain) - if (a->to == to && a->co == co && a->type == t) - return; - - a = allocarc(nfa, from); - if (NISERR()) - return; - assert(a != NULL); - - a->type = t; - a->co = (color)co; - a->to = to; - a->from = from; - - /* - * Put the new arc on the beginning, not the end, of the chains. - * Not only is this easier, it has the very useful side effect that - * deleting the most-recently-added arc is the cheapest case rather - * than the most expensive one. - */ - a->inchain = to->ins; - to->ins = a; - a->outchain = from->outs; - from->outs = a; - - from->nouts++; - to->nins++; - - if (COLORED(a) && nfa->parent == NULL) - colorchain(nfa->cm, a); - - return; -} - -/* - - allocarc - allocate a new out-arc within a state - ^ static struct arc *allocarc(struct nfa *, struct state *); - */ -static struct arc * /* NULL for failure */ -allocarc(nfa, s) -struct nfa *nfa; -struct state *s; -{ - struct arc *a; - struct arcbatch *new; - int i; - - /* shortcut */ - if (s->free == NULL && s->noas < ABSIZE) { - a = &s->oas.a[s->noas]; - s->noas++; - return a; - } - - /* if none at hand, get more */ - if (s->free == NULL) { - new = (struct arcbatch *)MALLOC(sizeof(struct arcbatch)); - if (new == NULL) { - NERR(REG_ESPACE); - return NULL; - } - new->next = s->oas.next; - s->oas.next = new; - - for (i = 0; i < ABSIZE; i++) { - new->a[i].type = 0; - new->a[i].freechain = &new->a[i+1]; - } - new->a[ABSIZE-1].freechain = NULL; - s->free = &new->a[0]; - } - assert(s->free != NULL); - - a = s->free; - s->free = a->freechain; - return a; -} - -/* - - freearc - free an arc - ^ static VOID freearc(struct nfa *, struct arc *); - */ -static VOID -freearc(nfa, victim) -struct nfa *nfa; -struct arc *victim; -{ - struct state *from = victim->from; - struct state *to = victim->to; - struct arc *a; - - assert(victim->type != 0); - - /* take it off color chain if necessary */ - if (COLORED(victim) && nfa->parent == NULL) - uncolorchain(nfa->cm, victim); - - /* take it off source's out-chain */ - assert(from != NULL); - assert(from->outs != NULL); - a = from->outs; - if (a == victim) /* simple case: first in chain */ - from->outs = victim->outchain; - else { - for (; a != NULL && a->outchain != victim; a = a->outchain) - continue; - assert(a != NULL); - a->outchain = victim->outchain; - } - from->nouts--; - - /* take it off target's in-chain */ - assert(to != NULL); - assert(to->ins != NULL); - a = to->ins; - if (a == victim) /* simple case: first in chain */ - to->ins = victim->inchain; - else { - for (; a != NULL && a->inchain != victim; a = a->inchain) - continue; - assert(a != NULL); - a->inchain = victim->inchain; - } - to->nins--; - - /* clean up and place on free list */ - victim->type = 0; - victim->from = NULL; /* precautions... */ - victim->to = NULL; - victim->inchain = NULL; - victim->outchain = NULL; - victim->freechain = from->free; - from->free = victim; -} - -/* - - findarc - find arc, if any, from given source with given type and color - * If there is more than one such arc, the result is random. - ^ static struct arc *findarc(struct state *, int, pcolor); - */ -static struct arc * -findarc(s, type, co) -struct state *s; -int type; -pcolor co; -{ - struct arc *a; - - for (a = s->outs; a != NULL; a = a->outchain) - if (a->type == type && a->co == co) - return a; - return NULL; -} - -/* - - cparc - allocate a new arc within an NFA, copying details from old one - ^ static VOID cparc(struct nfa *, struct arc *, struct state *, - ^ struct state *); - */ -static VOID -cparc(nfa, oa, from, to) -struct nfa *nfa; -struct arc *oa; -struct state *from; -struct state *to; -{ - newarc(nfa, oa->type, oa->co, from, to); -} - -/* - - moveins - move all in arcs of a state to another state - * You might think this could be done better by just updating the - * existing arcs, and you would be right if it weren't for the desire - * for duplicate suppression, which makes it easier to just make new - * ones to exploit the suppression built into newarc. - ^ static VOID moveins(struct nfa *, struct state *, struct state *); - */ -static VOID -moveins(nfa, old, new) -struct nfa *nfa; -struct state *old; -struct state *new; -{ - struct arc *a; - - assert(old != new); - - while ((a = old->ins) != NULL) { - cparc(nfa, a, a->from, new); - freearc(nfa, a); - } - assert(old->nins == 0); - assert(old->ins == NULL); -} - -/* - - copyins - copy all in arcs of a state to another state - ^ static VOID copyins(struct nfa *, struct state *, struct state *); - */ -static VOID -copyins(nfa, old, new) -struct nfa *nfa; -struct state *old; -struct state *new; -{ - struct arc *a; - - assert(old != new); - - for (a = old->ins; a != NULL; a = a->inchain) - cparc(nfa, a, a->from, new); -} - -/* - - moveouts - move all out arcs of a state to another state - ^ static VOID moveouts(struct nfa *, struct state *, struct state *); - */ -static VOID -moveouts(nfa, old, new) -struct nfa *nfa; -struct state *old; -struct state *new; -{ - struct arc *a; - - assert(old != new); - - while ((a = old->outs) != NULL) { - cparc(nfa, a, new, a->to); - freearc(nfa, a); - } -} - -/* - - copyouts - copy all out arcs of a state to another state - ^ static VOID copyouts(struct nfa *, struct state *, struct state *); - */ -static VOID -copyouts(nfa, old, new) -struct nfa *nfa; -struct state *old; -struct state *new; -{ - struct arc *a; - - assert(old != new); - - for (a = old->outs; a != NULL; a = a->outchain) - cparc(nfa, a, new, a->to); -} - -/* - - cloneouts - copy out arcs of a state to another state pair, modifying type - ^ static VOID cloneouts(struct nfa *, struct state *, struct state *, - ^ struct state *, int); - */ -static VOID -cloneouts(nfa, old, from, to, type) -struct nfa *nfa; -struct state *old; -struct state *from; -struct state *to; -int type; -{ - struct arc *a; - - assert(old != from); - - for (a = old->outs; a != NULL; a = a->outchain) - newarc(nfa, type, a->co, from, to); -} - -/* - - delsub - delete a sub-NFA, updating subre pointers if necessary - * This uses a recursive traversal of the sub-NFA, marking already-seen - * states using their tmp pointer. - ^ static VOID delsub(struct nfa *, struct state *, struct state *); - */ -static VOID -delsub(nfa, lp, rp) -struct nfa *nfa; -struct state *lp; /* the sub-NFA goes from here... */ -struct state *rp; /* ...to here, *not* inclusive */ -{ - assert(lp != rp); - - rp->tmp = rp; /* mark end */ - - deltraverse(nfa, lp, lp); - assert(lp->nouts == 0 && rp->nins == 0); /* did the job */ - assert(lp->no != FREESTATE && rp->no != FREESTATE); /* no more */ - - rp->tmp = NULL; /* unmark end */ - lp->tmp = NULL; /* and begin, marked by deltraverse */ -} - -/* - - deltraverse - the recursive heart of delsub - * This routine's basic job is to destroy all out-arcs of the state. - ^ static VOID deltraverse(struct nfa *, struct state *, struct state *); - */ -static VOID -deltraverse(nfa, leftend, s) -struct nfa *nfa; -struct state *leftend; -struct state *s; -{ - struct arc *a; - struct state *to; - - if (s->nouts == 0) - return; /* nothing to do */ - if (s->tmp != NULL) - return; /* already in progress */ - - s->tmp = s; /* mark as in progress */ - - while ((a = s->outs) != NULL) { - to = a->to; - deltraverse(nfa, leftend, to); - assert(to->nouts == 0 || to->tmp != NULL); - freearc(nfa, a); - if (to->nins == 0 && to->tmp == NULL) { - assert(to->nouts == 0); - freestate(nfa, to); - } - } - - assert(s->no != FREESTATE); /* we're still here */ - assert(s == leftend || s->nins != 0); /* and still reachable */ - assert(s->nouts == 0); /* but have no outarcs */ - - s->tmp = NULL; /* we're done here */ -} - -/* - - dupnfa - duplicate sub-NFA - * Another recursive traversal, this time using tmp to point to duplicates - * as well as mark already-seen states. (You knew there was a reason why - * it's a state pointer, didn't you? :-)) - ^ static VOID dupnfa(struct nfa *, struct state *, struct state *, - ^ struct state *, struct state *); - */ -static VOID -dupnfa(nfa, start, stop, from, to) -struct nfa *nfa; -struct state *start; /* duplicate of subNFA starting here */ -struct state *stop; /* and stopping here */ -struct state *from; /* stringing duplicate from here */ -struct state *to; /* to here */ -{ - if (start == stop) { - newarc(nfa, EMPTY, 0, from, to); - return; - } - - stop->tmp = to; - duptraverse(nfa, start, from); - /* done, except for clearing out the tmp pointers */ - - stop->tmp = NULL; - cleartraverse(nfa, start); -} - -/* - - duptraverse - recursive heart of dupnfa - ^ static VOID duptraverse(struct nfa *, struct state *, struct state *); - */ -static VOID -duptraverse(nfa, s, stmp) -struct nfa *nfa; -struct state *s; -struct state *stmp; /* s's duplicate, or NULL */ -{ - struct arc *a; - - if (s->tmp != NULL) - return; /* already done */ - - s->tmp = (stmp == NULL) ? newstate(nfa) : stmp; - if (s->tmp == NULL) { - assert(NISERR()); - return; - } - - for (a = s->outs; a != NULL && !NISERR(); a = a->outchain) { - duptraverse(nfa, a->to, (struct state *)NULL); - assert(a->to->tmp != NULL); - cparc(nfa, a, s->tmp, a->to->tmp); - } -} - -/* - - cleartraverse - recursive cleanup for algorithms that leave tmp ptrs set - ^ static VOID cleartraverse(struct nfa *, struct state *); - */ -static VOID -cleartraverse(nfa, s) -struct nfa *nfa; -struct state *s; -{ - struct arc *a; - - if (s->tmp == NULL) - return; - s->tmp = NULL; - - for (a = s->outs; a != NULL; a = a->outchain) - cleartraverse(nfa, a->to); -} - -/* - - specialcolors - fill in special colors for an NFA - ^ static VOID specialcolors(struct nfa *); - */ -static VOID -specialcolors(nfa) -struct nfa *nfa; -{ - /* false colors for BOS, BOL, EOS, EOL */ - if (nfa->parent == NULL) { - nfa->bos[0] = pseudocolor(nfa->cm); - nfa->bos[1] = pseudocolor(nfa->cm); - nfa->eos[0] = pseudocolor(nfa->cm); - nfa->eos[1] = pseudocolor(nfa->cm); - } else { - assert(nfa->parent->bos[0] != COLORLESS); - nfa->bos[0] = nfa->parent->bos[0]; - assert(nfa->parent->bos[1] != COLORLESS); - nfa->bos[1] = nfa->parent->bos[1]; - assert(nfa->parent->eos[0] != COLORLESS); - nfa->eos[0] = nfa->parent->eos[0]; - assert(nfa->parent->eos[1] != COLORLESS); - nfa->eos[1] = nfa->parent->eos[1]; - } -} - -/* - - optimize - optimize an NFA - ^ static long optimize(struct nfa *, FILE *); - */ -static long /* re_info bits */ -optimize(nfa, f) -struct nfa *nfa; -FILE *f; /* for debug output; NULL none */ -{ - int verbose = (f != NULL) ? 1 : 0; - - if (verbose) - fprintf(f, "\ninitial cleanup:\n"); - cleanup(nfa); /* may simplify situation */ - if (verbose) - dumpnfa(nfa, f); - if (verbose) - fprintf(f, "\nempties:\n"); - fixempties(nfa, f); /* get rid of EMPTY arcs */ - if (verbose) - fprintf(f, "\nconstraints:\n"); - pullback(nfa, f); /* pull back constraints backward */ - pushfwd(nfa, f); /* push fwd constraints forward */ - if (verbose) - fprintf(f, "\nfinal cleanup:\n"); - cleanup(nfa); /* final tidying */ - return analyze(nfa); /* and analysis */ -} - -/* - - pullback - pull back constraints backward to (with luck) eliminate them - ^ static VOID pullback(struct nfa *, FILE *); - */ -static VOID -pullback(nfa, f) -struct nfa *nfa; -FILE *f; /* for debug output; NULL none */ -{ - struct state *s; - struct state *nexts; - struct arc *a; - struct arc *nexta; - int progress; - - /* find and pull until there are no more */ - do { - progress = 0; - for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { - nexts = s->next; - for (a = s->outs; a != NULL && !NISERR(); a = nexta) { - nexta = a->outchain; - if (a->type == '^' || a->type == BEHIND) - if (pull(nfa, a)) - progress = 1; - assert(nexta == NULL || s->no != FREESTATE); - } - } - if (progress && f != NULL) - dumpnfa(nfa, f); - } while (progress && !NISERR()); - if (NISERR()) - return; - - for (a = nfa->pre->outs; a != NULL; a = nexta) { - nexta = a->outchain; - if (a->type == '^') { - assert(a->co == 0 || a->co == 1); - newarc(nfa, PLAIN, nfa->bos[a->co], a->from, a->to); - freearc(nfa, a); - } - } -} - -/* - - pull - pull a back constraint backward past its source state - * A significant property of this function is that it deletes at most - * one state -- the constraint's from state -- and only if the constraint - * was that state's last outarc. - ^ static int pull(struct nfa *, struct arc *); - */ -static int /* 0 couldn't, 1 could */ -pull(nfa, con) -struct nfa *nfa; -struct arc *con; -{ - struct state *from = con->from; - struct state *to = con->to; - struct arc *a; - struct arc *nexta; - struct state *s; - - if (from == to) { /* circular constraint is pointless */ - freearc(nfa, con); - return 1; - } - if (from->flag) /* can't pull back beyond start */ - return 0; - if (from->nins == 0) { /* unreachable */ - freearc(nfa, con); - return 1; - } - - /* first, clone from state if necessary to avoid other outarcs */ - if (from->nouts > 1) { - s = newstate(nfa); - if (NISERR()) - return 0; - assert(to != from); /* con is not an inarc */ - copyins(nfa, from, s); /* duplicate inarcs */ - cparc(nfa, con, s, to); /* move constraint arc */ - freearc(nfa, con); - from = s; - con = from->outs; - } - assert(from->nouts == 1); - - /* propagate the constraint into the from state's inarcs */ - for (a = from->ins; a != NULL; a = nexta) { - nexta = a->inchain; - switch (combine(con, a)) { - case INCOMPATIBLE: /* destroy the arc */ - freearc(nfa, a); - break; - case SATISFIED: /* no action needed */ - break; - case COMPATIBLE: /* swap the two arcs, more or less */ - s = newstate(nfa); - if (NISERR()) - return 0; - cparc(nfa, a, s, to); /* anticipate move */ - cparc(nfa, con, a->from, s); - if (NISERR()) - return 0; - freearc(nfa, a); - break; - default: - assert(NOTREACHED); - break; - } - } - - /* remaining inarcs, if any, incorporate the constraint */ - moveins(nfa, from, to); - dropstate(nfa, from); /* will free the constraint */ - return 1; -} - -/* - - pushfwd - push forward constraints forward to (with luck) eliminate them - ^ static VOID pushfwd(struct nfa *, FILE *); - */ -static VOID -pushfwd(nfa, f) -struct nfa *nfa; -FILE *f; /* for debug output; NULL none */ -{ - struct state *s; - struct state *nexts; - struct arc *a; - struct arc *nexta; - int progress; - - /* find and push until there are no more */ - do { - progress = 0; - for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { - nexts = s->next; - for (a = s->ins; a != NULL && !NISERR(); a = nexta) { - nexta = a->inchain; - if (a->type == '$' || a->type == AHEAD) - if (push(nfa, a)) - progress = 1; - assert(nexta == NULL || s->no != FREESTATE); - } - } - if (progress && f != NULL) - dumpnfa(nfa, f); - } while (progress && !NISERR()); - if (NISERR()) - return; - - for (a = nfa->post->ins; a != NULL; a = nexta) { - nexta = a->inchain; - if (a->type == '$') { - assert(a->co == 0 || a->co == 1); - newarc(nfa, PLAIN, nfa->eos[a->co], a->from, a->to); - freearc(nfa, a); - } - } -} - -/* - - push - push a forward constraint forward past its destination state - * A significant property of this function is that it deletes at most - * one state -- the constraint's to state -- and only if the constraint - * was that state's last inarc. - ^ static int push(struct nfa *, struct arc *); - */ -static int /* 0 couldn't, 1 could */ -push(nfa, con) -struct nfa *nfa; -struct arc *con; -{ - struct state *from = con->from; - struct state *to = con->to; - struct arc *a; - struct arc *nexta; - struct state *s; - - if (to == from) { /* circular constraint is pointless */ - freearc(nfa, con); - return 1; - } - if (to->flag) /* can't push forward beyond end */ - return 0; - if (to->nouts == 0) { /* dead end */ - freearc(nfa, con); - return 1; - } - - /* first, clone to state if necessary to avoid other inarcs */ - if (to->nins > 1) { - s = newstate(nfa); - if (NISERR()) - return 0; - copyouts(nfa, to, s); /* duplicate outarcs */ - cparc(nfa, con, from, s); /* move constraint */ - freearc(nfa, con); - to = s; - con = to->ins; - } - assert(to->nins == 1); - - /* propagate the constraint into the to state's outarcs */ - for (a = to->outs; a != NULL; a = nexta) { - nexta = a->outchain; - switch (combine(con, a)) { - case INCOMPATIBLE: /* destroy the arc */ - freearc(nfa, a); - break; - case SATISFIED: /* no action needed */ - break; - case COMPATIBLE: /* swap the two arcs, more or less */ - s = newstate(nfa); - if (NISERR()) - return 0; - cparc(nfa, con, s, a->to); /* anticipate move */ - cparc(nfa, a, from, s); - if (NISERR()) - return 0; - freearc(nfa, a); - break; - default: - assert(NOTREACHED); - break; - } - } - - /* remaining outarcs, if any, incorporate the constraint */ - moveouts(nfa, to, from); - dropstate(nfa, to); /* will free the constraint */ - return 1; -} - -/* - - combine - constraint lands on an arc, what happens? - ^ #def INCOMPATIBLE 1 // destroys arc - ^ #def SATISFIED 2 // constraint satisfied - ^ #def COMPATIBLE 3 // compatible but not satisfied yet - ^ static int combine(struct arc *, struct arc *); - */ -static int -combine(con, a) -struct arc *con; -struct arc *a; -{ -# define CA(ct,at) (((ct)<type, a->type)) { - case CA('^', PLAIN): /* newlines are handled separately */ - case CA('$', PLAIN): - return INCOMPATIBLE; - break; - case CA(AHEAD, PLAIN): /* color constraints meet colors */ - case CA(BEHIND, PLAIN): - if (con->co == a->co) - return SATISFIED; - return INCOMPATIBLE; - break; - case CA('^', '^'): /* collision, similar constraints */ - case CA('$', '$'): - case CA(AHEAD, AHEAD): - case CA(BEHIND, BEHIND): - if (con->co == a->co) /* true duplication */ - return SATISFIED; - return INCOMPATIBLE; - break; - case CA('^', BEHIND): /* collision, dissimilar constraints */ - case CA(BEHIND, '^'): - case CA('$', AHEAD): - case CA(AHEAD, '$'): - return INCOMPATIBLE; - break; - case CA('^', '$'): /* constraints passing each other */ - case CA('^', AHEAD): - case CA(BEHIND, '$'): - case CA(BEHIND, AHEAD): - case CA('$', '^'): - case CA('$', BEHIND): - case CA(AHEAD, '^'): - case CA(AHEAD, BEHIND): - case CA('^', LACON): - case CA(BEHIND, LACON): - case CA('$', LACON): - case CA(AHEAD, LACON): - return COMPATIBLE; - break; - } - assert(NOTREACHED); - return INCOMPATIBLE; /* for benefit of blind compilers */ -} - -/* - - fixempties - get rid of EMPTY arcs - ^ static VOID fixempties(struct nfa *, FILE *); - */ -static VOID -fixempties(nfa, f) -struct nfa *nfa; -FILE *f; /* for debug output; NULL none */ -{ - struct state *s; - struct state *nexts; - struct arc *a; - struct arc *nexta; - int progress; - - /* find and eliminate empties until there are no more */ - do { - progress = 0; - for (s = nfa->states; s != NULL && !NISERR(); s = nexts) { - nexts = s->next; - for (a = s->outs; a != NULL && !NISERR(); a = nexta) { - nexta = a->outchain; - if (a->type == EMPTY && unempty(nfa, a)) - progress = 1; - assert(nexta == NULL || s->no != FREESTATE); - } - } - if (progress && f != NULL) - dumpnfa(nfa, f); - } while (progress && !NISERR()); -} - -/* - - unempty - optimize out an EMPTY arc, if possible - * Actually, as it stands this function always succeeds, but the return - * value is kept with an eye on possible future changes. - ^ static int unempty(struct nfa *, struct arc *); - */ -static int /* 0 couldn't, 1 could */ -unempty(nfa, a) -struct nfa *nfa; -struct arc *a; -{ - struct state *from = a->from; - struct state *to = a->to; - int usefrom; /* work on from, as opposed to to? */ - - assert(a->type == EMPTY); - assert(from != nfa->pre && to != nfa->post); - - if (from == to) { /* vacuous loop */ - freearc(nfa, a); - return 1; - } - - /* decide which end to work on */ - usefrom = 1; /* default: attack from */ - if (from->nouts > to->nins) - usefrom = 0; - else if (from->nouts == to->nins) { - /* decide on secondary issue: move/copy fewest arcs */ - if (from->nins > to->nouts) - usefrom = 0; - } - - freearc(nfa, a); - if (usefrom) { - if (from->nouts == 0) { - /* was the state's only outarc */ - moveins(nfa, from, to); - freestate(nfa, from); - } else - copyins(nfa, from, to); - } else { - if (to->nins == 0) { - /* was the state's only inarc */ - moveouts(nfa, to, from); - freestate(nfa, to); - } else - copyouts(nfa, to, from); - } - - return 1; -} - -/* - - cleanup - clean up NFA after optimizations - ^ static VOID cleanup(struct nfa *); - */ -static VOID -cleanup(nfa) -struct nfa *nfa; -{ - struct state *s; - struct state *nexts; - int n; - - /* clear out unreachable or dead-end states */ - /* use pre to mark reachable, then post to mark can-reach-post */ - markreachable(nfa, nfa->pre, (struct state *)NULL, nfa->pre); - markcanreach(nfa, nfa->post, nfa->pre, nfa->post); - for (s = nfa->states; s != NULL; s = nexts) { - nexts = s->next; - if (s->tmp != nfa->post && !s->flag) - dropstate(nfa, s); - } - assert(nfa->post->nins == 0 || nfa->post->tmp == nfa->post); - cleartraverse(nfa, nfa->pre); - assert(nfa->post->nins == 0 || nfa->post->tmp == NULL); - /* the nins==0 (final unreachable) case will be caught later */ - - /* renumber surviving states */ - n = 0; - for (s = nfa->states; s != NULL; s = s->next) - s->no = n++; - nfa->nstates = n; -} - -/* - - markreachable - recursive marking of reachable states - ^ static VOID markreachable(struct nfa *, struct state *, struct state *, - ^ struct state *); - */ -static VOID -markreachable(nfa, s, okay, mark) -struct nfa *nfa; -struct state *s; -struct state *okay; /* consider only states with this mark */ -struct state *mark; /* the value to mark with */ -{ - struct arc *a; - - if (s->tmp != okay) - return; - s->tmp = mark; - - for (a = s->outs; a != NULL; a = a->outchain) - markreachable(nfa, a->to, okay, mark); -} - -/* - - markcanreach - recursive marking of states which can reach here - ^ static VOID markcanreach(struct nfa *, struct state *, struct state *, - ^ struct state *); - */ -static VOID -markcanreach(nfa, s, okay, mark) -struct nfa *nfa; -struct state *s; -struct state *okay; /* consider only states with this mark */ -struct state *mark; /* the value to mark with */ -{ - struct arc *a; - - if (s->tmp != okay) - return; - s->tmp = mark; - - for (a = s->ins; a != NULL; a = a->inchain) - markcanreach(nfa, a->from, okay, mark); -} - -/* - - analyze - ascertain potentially-useful facts about an optimized NFA - ^ static long analyze(struct nfa *); - */ -static long /* re_info bits to be ORed in */ -analyze(nfa) -struct nfa *nfa; -{ - struct arc *a; - struct arc *aa; - - if (nfa->pre->outs == NULL) - return REG_UIMPOSSIBLE; - for (a = nfa->pre->outs; a != NULL; a = a->outchain) - for (aa = a->to->outs; aa != NULL; aa = aa->outchain) - if (aa->to == nfa->post) - return REG_UEMPTYMATCH; - return 0; -} - -/* - - compact - compact an NFA - ^ static VOID compact(struct nfa *, struct cnfa *); - */ -static VOID -compact(nfa, cnfa) -struct nfa *nfa; -struct cnfa *cnfa; -{ - struct state *s; - struct arc *a; - size_t nstates; - size_t narcs; - struct carc *ca; - struct carc *first; - - assert (!NISERR()); - - nstates = 0; - narcs = 0; - for (s = nfa->states; s != NULL; s = s->next) { - nstates++; - narcs += 1 + s->nouts + 1; - /* 1 as a fake for flags, nouts for arcs, 1 as endmarker */ - } - - cnfa->states = (struct carc **)MALLOC(nstates * sizeof(struct carc *)); - cnfa->arcs = (struct carc *)MALLOC(narcs * sizeof(struct carc)); - if (cnfa->states == NULL || cnfa->arcs == NULL) { - if (cnfa->states != NULL) - FREE(cnfa->states); - if (cnfa->arcs != NULL) - FREE(cnfa->arcs); - NERR(REG_ESPACE); - return; - } - cnfa->nstates = nstates; - cnfa->pre = nfa->pre->no; - cnfa->post = nfa->post->no; - cnfa->bos[0] = nfa->bos[0]; - cnfa->bos[1] = nfa->bos[1]; - cnfa->eos[0] = nfa->eos[0]; - cnfa->eos[1] = nfa->eos[1]; - cnfa->ncolors = maxcolor(nfa->cm) + 1; - cnfa->flags = 0; - - ca = cnfa->arcs; - for (s = nfa->states; s != NULL; s = s->next) { - assert((size_t)s->no < nstates); - cnfa->states[s->no] = ca; - ca->co = 0; /* clear and skip flags "arc" */ - ca++; - first = ca; - for (a = s->outs; a != NULL; a = a->outchain) - switch (a->type) { - case PLAIN: - ca->co = a->co; - ca->to = a->to->no; - ca++; - break; - case LACON: - assert(s->no != cnfa->pre); - ca->co = (color)(cnfa->ncolors + a->co); - ca->to = a->to->no; - ca++; - cnfa->flags |= HASLACONS; - break; - default: - assert(NOTREACHED); - break; - } - carcsort(first, ca-1); - ca->co = COLORLESS; - ca->to = 0; - ca++; - } - assert(ca == &cnfa->arcs[narcs]); - assert(cnfa->nstates != 0); - - /* mark no-progress states */ - for (a = nfa->pre->outs; a != NULL; a = a->outchain) - cnfa->states[a->to->no]->co = 1; - cnfa->states[nfa->pre->no]->co = 1; -} - -/* - - carcsort - sort compacted-NFA arcs by color - * Really dumb algorithm, but if the list is long enough for that to matter, - * you're in real trouble anyway. - ^ static VOID carcsort(struct carc *, struct carc *); - */ -static VOID -carcsort(first, last) -struct carc *first; -struct carc *last; -{ - struct carc *p; - struct carc *q; - struct carc tmp; - - if (last - first <= 1) - return; - - for (p = first; p <= last; p++) - for (q = p; q <= last; q++) - if (p->co > q->co || - (p->co == q->co && p->to > q->to)) { - assert(p != q); - tmp = *p; - *p = *q; - *q = tmp; - } -} - -/* - - freecnfa - free a compacted NFA - ^ static VOID freecnfa(struct cnfa *); - */ -static VOID -freecnfa(cnfa) -struct cnfa *cnfa; -{ - assert(cnfa->nstates != 0); /* not empty already */ - cnfa->nstates = 0; - FREE(cnfa->states); - FREE(cnfa->arcs); -} - -/* - - dumpnfa - dump an NFA in human-readable form - ^ static VOID dumpnfa(struct nfa *, FILE *); - */ -static VOID -dumpnfa(nfa, f) -struct nfa *nfa; -FILE *f; -{ -#ifdef REG_DEBUG - struct state *s; - - fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no); - if (nfa->bos[0] != COLORLESS) - fprintf(f, ", bos [%ld]", (long)nfa->bos[0]); - if (nfa->bos[1] != COLORLESS) - fprintf(f, ", bol [%ld]", (long)nfa->bos[1]); - if (nfa->eos[0] != COLORLESS) - fprintf(f, ", eos [%ld]", (long)nfa->eos[0]); - if (nfa->eos[1] != COLORLESS) - fprintf(f, ", eol [%ld]", (long)nfa->eos[1]); - fprintf(f, "\n"); - for (s = nfa->states; s != NULL; s = s->next) - dumpstate(s, f); - if (nfa->parent == NULL) - dumpcolors(nfa->cm, f); - fflush(f); -#endif -} - -#ifdef REG_DEBUG /* subordinates of dumpnfa */ -/* - ^ #ifdef REG_DEBUG - */ - -/* - - dumpstate - dump an NFA state in human-readable form - ^ static VOID dumpstate(struct state *, FILE *); - */ -static VOID -dumpstate(s, f) -struct state *s; -FILE *f; -{ - struct arc *a; - - fprintf(f, "%d%s%c", s->no, (s->tmp != NULL) ? "T" : "", - (s->flag) ? s->flag : '.'); - if (s->prev != NULL && s->prev->next != s) - fprintf(f, "\tstate chain bad\n"); - if (s->nouts == 0) - fprintf(f, "\tno out arcs\n"); - else - dumparcs(s, f); - fflush(f); - for (a = s->ins; a != NULL; a = a->inchain) { - if (a->to != s) - fprintf(f, "\tlink from %d to %d on %d's in-chain\n", - a->from->no, a->to->no, s->no); - } -} - -/* - - dumparcs - dump out-arcs in human-readable form - ^ static VOID dumparcs(struct state *, FILE *); - */ -static VOID -dumparcs(s, f) -struct state *s; -FILE *f; -{ - int pos; - - assert(s->nouts > 0); - /* printing arcs in reverse order is usually clearer */ - pos = dumprarcs(s->outs, s, f, 1); - if (pos != 1) - fprintf(f, "\n"); -} - -/* - - dumprarcs - dump remaining outarcs, recursively, in reverse order - ^ static int dumprarcs(struct arc *, struct state *, FILE *, int); - */ -static int /* resulting print position */ -dumprarcs(a, s, f, pos) -struct arc *a; -struct state *s; -FILE *f; -int pos; /* initial print position */ -{ - if (a->outchain != NULL) - pos = dumprarcs(a->outchain, s, f, pos); - dumparc(a, s, f); - if (pos == 5) { - fprintf(f, "\n"); - pos = 1; - } else - pos++; - return pos; -} - -/* - - dumparc - dump one outarc in readable form, including prefixing tab - ^ static VOID dumparc(struct arc *, struct state *, FILE *); - */ -static VOID -dumparc(a, s, f) -struct arc *a; -struct state *s; -FILE *f; -{ - struct arc *aa; - struct arcbatch *ab; - - fprintf(f, "\t"); - switch (a->type) { - case PLAIN: - fprintf(f, "[%ld]", (long)a->co); - break; - case AHEAD: - fprintf(f, ">%ld>", (long)a->co); - break; - case BEHIND: - fprintf(f, "<%ld<", (long)a->co); - break; - case LACON: - fprintf(f, ":%ld:", (long)a->co); - break; - case '^': - case '$': - fprintf(f, "%c%d", a->type, (int)a->co); - break; - case EMPTY: - break; - default: - fprintf(f, "0x%x/0%lo", a->type, (long)a->co); - break; - } - if (a->from != s) - fprintf(f, "?%d?", a->from->no); - for (ab = &a->from->oas; ab != NULL; ab = ab->next) { - for (aa = &ab->a[0]; aa < &ab->a[ABSIZE]; aa++) - if (aa == a) - break; /* NOTE BREAK OUT */ - if (aa < &ab->a[ABSIZE]) /* propagate break */ - break; /* NOTE BREAK OUT */ - } - if (ab == NULL) - fprintf(f, "?!?"); /* not in allocated space */ - fprintf(f, "->"); - if (a->to == NULL) { - fprintf(f, "NULL"); - return; - } - fprintf(f, "%d", a->to->no); - for (aa = a->to->ins; aa != NULL; aa = aa->inchain) - if (aa == a) - break; /* NOTE BREAK OUT */ - if (aa == NULL) - fprintf(f, "?!?"); /* missing from in-chain */ -} - -/* - ^ #endif - */ -#endif /* ifdef REG_DEBUG */ - -/* - - dumpcnfa - dump a compacted NFA in human-readable form - ^ static VOID dumpcnfa(struct cnfa *, FILE *); - */ -static VOID -dumpcnfa(cnfa, f) -struct cnfa *cnfa; -FILE *f; -{ -#ifdef REG_DEBUG - int st; - - fprintf(f, "pre %d, post %d", cnfa->pre, cnfa->post); - if (cnfa->bos[0] != COLORLESS) - fprintf(f, ", bos [%ld]", (long)cnfa->bos[0]); - if (cnfa->bos[1] != COLORLESS) - fprintf(f, ", bol [%ld]", (long)cnfa->bos[1]); - if (cnfa->eos[0] != COLORLESS) - fprintf(f, ", eos [%ld]", (long)cnfa->eos[0]); - if (cnfa->eos[1] != COLORLESS) - fprintf(f, ", eol [%ld]", (long)cnfa->eos[1]); - if (cnfa->flags&HASLACONS) - fprintf(f, ", haslacons"); - fprintf(f, "\n"); - for (st = 0; st < cnfa->nstates; st++) - dumpcstate(st, cnfa->states[st], cnfa, f); - fflush(f); -#endif -} - -#ifdef REG_DEBUG /* subordinates of dumpcnfa */ -/* - ^ #ifdef REG_DEBUG - */ - -/* - - dumpcstate - dump a compacted-NFA state in human-readable form - ^ static VOID dumpcstate(int, struct carc *, struct cnfa *, FILE *); - */ -static VOID -dumpcstate(st, ca, cnfa, f) -int st; -struct carc *ca; -struct cnfa *cnfa; -FILE *f; -{ - int i; - int pos; - - fprintf(f, "%d%s", st, (ca[0].co) ? ":" : "."); - pos = 1; - for (i = 1; ca[i].co != COLORLESS; i++) { - if (ca[i].co < cnfa->ncolors) - fprintf(f, "\t[%ld]->%d", (long)ca[i].co, ca[i].to); - else - fprintf(f, "\t:%ld:->%d", (long)ca[i].co-cnfa->ncolors, - ca[i].to); - if (pos == 5) { - fprintf(f, "\n"); - pos = 1; - } else - pos++; - } - if (i == 1 || pos != 1) - fprintf(f, "\n"); - fflush(f); -} - -/* - ^ #endif - */ -#endif /* ifdef REG_DEBUG */ diff --git a/src/regex/rege_dfa.c b/src/regex/rege_dfa.c deleted file mode 100644 index 313892cc8f..0000000000 --- a/src/regex/rege_dfa.c +++ /dev/null @@ -1,677 +0,0 @@ -/* - * DFA routines - * This file is #included by regexec.c. - * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics - * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * I'd appreciate being given credit for this package in the documentation - * of software which uses it, but that is not a requirement. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -/* - - longest - longest-preferred matching engine - ^ static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *); - */ -static chr * /* endpoint, or NULL */ -longest(v, d, start, stop, hitstopp) -struct vars *v; /* used only for debug and exec flags */ -struct dfa *d; -chr *start; /* where the match should start */ -chr *stop; /* match must end at or before here */ -int *hitstopp; /* record whether hit v->stop, if non-NULL */ -{ - chr *cp; - chr *realstop = (stop == v->stop) ? stop : stop + 1; - color co; - struct sset *css; - struct sset *ss; - chr *post; - int i; - struct colormap *cm = d->cm; - - /* initialize */ - css = initialize(v, d, start); - cp = start; - if (hitstopp != NULL) - *hitstopp = 0; - - /* startup */ - FDEBUG(("+++ startup +++\n")); - if (cp == v->start) { - co = d->cnfa->bos[(v->eflags®_NOTBOL) ? 0 : 1]; - FDEBUG(("color %ld\n", (long)co)); - } else { - co = GETCOLOR(cm, *(cp - 1)); - FDEBUG(("char %c, color %ld\n", (char)*(cp-1), (long)co)); - } - css = miss(v, d, css, co, cp, start); - if (css == NULL) - return NULL; - css->lastseen = cp; - - /* main loop */ - if (v->eflags®_FTRACE) - while (cp < realstop) { - FDEBUG(("+++ at c%d +++\n", css - d->ssets)); - co = GETCOLOR(cm, *cp); - FDEBUG(("char %c, color %ld\n", (char)*cp, (long)co)); - ss = css->outs[co]; - if (ss == NULL) { - ss = miss(v, d, css, co, cp+1, start); - if (ss == NULL) - break; /* NOTE BREAK OUT */ - } - cp++; - ss->lastseen = cp; - css = ss; - } - else - while (cp < realstop) { - co = GETCOLOR(cm, *cp); - ss = css->outs[co]; - if (ss == NULL) { - ss = miss(v, d, css, co, cp+1, start); - if (ss == NULL) - break; /* NOTE BREAK OUT */ - } - cp++; - ss->lastseen = cp; - css = ss; - } - - /* shutdown */ - FDEBUG(("+++ shutdown at c%d +++\n", css - d->ssets)); - if (cp == v->stop && stop == v->stop) { - if (hitstopp != NULL) - *hitstopp = 1; - co = d->cnfa->eos[(v->eflags®_NOTEOL) ? 0 : 1]; - FDEBUG(("color %ld\n", (long)co)); - ss = miss(v, d, css, co, cp, start); - /* special case: match ended at eol? */ - if (ss != NULL && (ss->flags&POSTSTATE)) - return cp; - else if (ss != NULL) - ss->lastseen = cp; /* to be tidy */ - } - - /* find last match, if any */ - post = d->lastpost; - for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--) - if ((ss->flags&POSTSTATE) && post != ss->lastseen && - (post == NULL || post < ss->lastseen)) - post = ss->lastseen; - if (post != NULL) /* found one */ - return post - 1; - - return NULL; -} - -/* - - shortest - shortest-preferred matching engine - ^ static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *, - ^ chr **, int *); - */ -static chr * /* endpoint, or NULL */ -shortest(v, d, start, min, max, coldp, hitstopp) -struct vars *v; -struct dfa *d; -chr *start; /* where the match should start */ -chr *min; /* match must end at or after here */ -chr *max; /* match must end at or before here */ -chr **coldp; /* store coldstart pointer here, if nonNULL */ -int *hitstopp; /* record whether hit v->stop, if non-NULL */ -{ - chr *cp; - chr *realmin = (min == v->stop) ? min : min + 1; - chr *realmax = (max == v->stop) ? max : max + 1; - color co; - struct sset *css; - struct sset *ss; - struct colormap *cm = d->cm; - - /* initialize */ - css = initialize(v, d, start); - cp = start; - if (hitstopp != NULL) - *hitstopp = 0; - - /* startup */ - FDEBUG(("--- startup ---\n")); - if (cp == v->start) { - co = d->cnfa->bos[(v->eflags®_NOTBOL) ? 0 : 1]; - FDEBUG(("color %ld\n", (long)co)); - } else { - co = GETCOLOR(cm, *(cp - 1)); - FDEBUG(("char %c, color %ld\n", (char)*(cp-1), (long)co)); - } - css = miss(v, d, css, co, cp, start); - if (css == NULL) - return NULL; - css->lastseen = cp; - ss = css; - - /* main loop */ - if (v->eflags®_FTRACE) - while (cp < realmax) { - FDEBUG(("--- at c%d ---\n", css - d->ssets)); - co = GETCOLOR(cm, *cp); - FDEBUG(("char %c, color %ld\n", (char)*cp, (long)co)); - ss = css->outs[co]; - if (ss == NULL) { - ss = miss(v, d, css, co, cp+1, start); - if (ss == NULL) - break; /* NOTE BREAK OUT */ - } - cp++; - ss->lastseen = cp; - css = ss; - if ((ss->flags&POSTSTATE) && cp >= realmin) - break; /* NOTE BREAK OUT */ - } - else - while (cp < realmax) { - co = GETCOLOR(cm, *cp); - ss = css->outs[co]; - if (ss == NULL) { - ss = miss(v, d, css, co, cp+1, start); - if (ss == NULL) - break; /* NOTE BREAK OUT */ - } - cp++; - ss->lastseen = cp; - css = ss; - if ((ss->flags&POSTSTATE) && cp >= realmin) - break; /* NOTE BREAK OUT */ - } - - if (ss == NULL) - return NULL; - - if (coldp != NULL) /* report last no-progress state set, if any */ - *coldp = lastcold(v, d); - - if ((ss->flags&POSTSTATE) && cp > min) { - assert(cp >= realmin); - cp--; - } else if (cp == v->stop && max == v->stop) { - co = d->cnfa->eos[(v->eflags®_NOTEOL) ? 0 : 1]; - FDEBUG(("color %ld\n", (long)co)); - ss = miss(v, d, css, co, cp, start); - /* match might have ended at eol */ - if ((ss == NULL || !(ss->flags&POSTSTATE)) && hitstopp != NULL) - *hitstopp = 1; - } - - if (ss == NULL || !(ss->flags&POSTSTATE)) - return NULL; - - return cp; -} - -/* - - lastcold - determine last point at which no progress had been made - ^ static chr *lastcold(struct vars *, struct dfa *); - */ -static chr * /* endpoint, or NULL */ -lastcold(v, d) -struct vars *v; -struct dfa *d; -{ - struct sset *ss; - chr *nopr; - int i; - - nopr = d->lastnopr; - if (nopr == NULL) - nopr = v->start; - for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--) - if ((ss->flags&NOPROGRESS) && nopr < ss->lastseen) - nopr = ss->lastseen; - return nopr; -} - -/* - - newdfa - set up a fresh DFA - ^ static struct dfa *newdfa(struct vars *, struct cnfa *, - ^ struct colormap *, struct smalldfa *); - */ -static struct dfa * -newdfa(v, cnfa, cm, small) -struct vars *v; -struct cnfa *cnfa; -struct colormap *cm; -struct smalldfa *small; /* preallocated space, may be NULL */ -{ - struct dfa *d; - size_t nss = cnfa->nstates * 2; - int wordsper = (cnfa->nstates + UBITS - 1) / UBITS; - struct smalldfa *smallwas = small; - - assert(cnfa != NULL && cnfa->nstates != 0); - - if (nss <= FEWSTATES && cnfa->ncolors <= FEWCOLORS) { - assert(wordsper == 1); - if (small == NULL) { - small = (struct smalldfa *)MALLOC( - sizeof(struct smalldfa)); - if (small == NULL) { - ERR(REG_ESPACE); - return NULL; - } - } - d = &small->dfa; - d->ssets = small->ssets; - d->statesarea = small->statesarea; - d->work = &d->statesarea[nss]; - d->outsarea = small->outsarea; - d->incarea = small->incarea; - d->cptsmalloced = 0; - d->mallocarea = (smallwas == NULL) ? (char *)small : NULL; - } else { - d = (struct dfa *)MALLOC(sizeof(struct dfa)); - if (d == NULL) { - ERR(REG_ESPACE); - return NULL; - } - d->ssets = (struct sset *)MALLOC(nss * sizeof(struct sset)); - d->statesarea = (unsigned *)MALLOC((nss+WORK) * wordsper * - sizeof(unsigned)); - d->work = &d->statesarea[nss * wordsper]; - d->outsarea = (struct sset **)MALLOC(nss * cnfa->ncolors * - sizeof(struct sset *)); - d->incarea = (struct arcp *)MALLOC(nss * cnfa->ncolors * - sizeof(struct arcp)); - d->cptsmalloced = 1; - d->mallocarea = (char *)d; - if (d->ssets == NULL || d->statesarea == NULL || - d->outsarea == NULL || d->incarea == NULL) { - freedfa(d); - ERR(REG_ESPACE); - return NULL; - } - } - - d->nssets = (v->eflags®_SMALL) ? 7 : nss; - d->nssused = 0; - d->nstates = cnfa->nstates; - d->ncolors = cnfa->ncolors; - d->wordsper = wordsper; - d->cnfa = cnfa; - d->cm = cm; - d->lastpost = NULL; - d->lastnopr = NULL; - d->search = d->ssets; - - /* initialization of sset fields is done as needed */ - - return d; -} - -/* - - freedfa - free a DFA - ^ static VOID freedfa(struct dfa *); - */ -static VOID -freedfa(d) -struct dfa *d; -{ - if (d->cptsmalloced) { - if (d->ssets != NULL) - FREE(d->ssets); - if (d->statesarea != NULL) - FREE(d->statesarea); - if (d->outsarea != NULL) - FREE(d->outsarea); - if (d->incarea != NULL) - FREE(d->incarea); - } - - if (d->mallocarea != NULL) - FREE(d->mallocarea); -} - -/* - - hash - construct a hash code for a bitvector - * There are probably better ways, but they're more expensive. - ^ static unsigned hash(unsigned *, int); - */ -static unsigned -hash(uv, n) -unsigned *uv; -int n; -{ - int i; - unsigned h; - - h = 0; - for (i = 0; i < n; i++) - h ^= uv[i]; - return h; -} - -/* - - initialize - hand-craft a cache entry for startup, otherwise get ready - ^ static struct sset *initialize(struct vars *, struct dfa *, chr *); - */ -static struct sset * -initialize(v, d, start) -struct vars *v; /* used only for debug flags */ -struct dfa *d; -chr *start; -{ - struct sset *ss; - int i; - - /* is previous one still there? */ - if (d->nssused > 0 && (d->ssets[0].flags&STARTER)) - ss = &d->ssets[0]; - else { /* no, must (re)build it */ - ss = getvacant(v, d, start, start); - for (i = 0; i < d->wordsper; i++) - ss->states[i] = 0; - BSET(ss->states, d->cnfa->pre); - ss->hash = HASH(ss->states, d->wordsper); - assert(d->cnfa->pre != d->cnfa->post); - ss->flags = STARTER|LOCKED|NOPROGRESS; - /* lastseen dealt with below */ - } - - for (i = 0; i < d->nssused; i++) - d->ssets[i].lastseen = NULL; - ss->lastseen = start; /* maybe untrue, but harmless */ - d->lastpost = NULL; - d->lastnopr = NULL; - return ss; -} - -/* - - miss - handle a cache miss - ^ static struct sset *miss(struct vars *, struct dfa *, struct sset *, - ^ pcolor, chr *, chr *); - */ -static struct sset * /* NULL if goes to empty set */ -miss(v, d, css, co, cp, start) -struct vars *v; /* used only for debug flags */ -struct dfa *d; -struct sset *css; -pcolor co; -chr *cp; /* next chr */ -chr *start; /* where the attempt got started */ -{ - struct cnfa *cnfa = d->cnfa; - int i; - unsigned h; - struct carc *ca; - struct sset *p; - int ispost; - int noprogress; - int gotstate; - int dolacons; - int sawlacons; - - /* for convenience, we can be called even if it might not be a miss */ - if (css->outs[co] != NULL) { - FDEBUG(("hit\n")); - return css->outs[co]; - } - FDEBUG(("miss\n")); - - /* first, what set of states would we end up in? */ - for (i = 0; i < d->wordsper; i++) - d->work[i] = 0; - ispost = 0; - noprogress = 1; - gotstate = 0; - for (i = 0; i < d->nstates; i++) - if (ISBSET(css->states, i)) - for (ca = cnfa->states[i]+1; ca->co != COLORLESS; ca++) - if (ca->co == co) { - BSET(d->work, ca->to); - gotstate = 1; - if (ca->to == cnfa->post) - ispost = 1; - if (!cnfa->states[ca->to]->co) - noprogress = 0; - FDEBUG(("%d -> %d\n", i, ca->to)); - } - dolacons = (gotstate) ? (cnfa->flags&HASLACONS) : 0; - sawlacons = 0; - while (dolacons) { /* transitive closure */ - dolacons = 0; - for (i = 0; i < d->nstates; i++) - if (ISBSET(d->work, i)) - for (ca = cnfa->states[i]+1; ca->co != COLORLESS; - ca++) { - if (ca->co <= cnfa->ncolors) - continue; /* NOTE CONTINUE */ - sawlacons = 1; - if (ISBSET(d->work, ca->to)) - continue; /* NOTE CONTINUE */ - if (!lacon(v, cnfa, cp, ca->co)) - continue; /* NOTE CONTINUE */ - BSET(d->work, ca->to); - dolacons = 1; - if (ca->to == cnfa->post) - ispost = 1; - if (!cnfa->states[ca->to]->co) - noprogress = 0; - FDEBUG(("%d :> %d\n", i, ca->to)); - } - } - if (!gotstate) - return NULL; - h = HASH(d->work, d->wordsper); - - /* next, is that in the cache? */ - for (p = d->ssets, i = d->nssused; i > 0; p++, i--) - if (HIT(h, d->work, p, d->wordsper)) { - FDEBUG(("cached c%d\n", p - d->ssets)); - break; /* NOTE BREAK OUT */ - } - if (i == 0) { /* nope, need a new cache entry */ - p = getvacant(v, d, cp, start); - assert(p != css); - for (i = 0; i < d->wordsper; i++) - p->states[i] = d->work[i]; - p->hash = h; - p->flags = (ispost) ? POSTSTATE : 0; - if (noprogress) - p->flags |= NOPROGRESS; - /* lastseen to be dealt with by caller */ - } - - if (!sawlacons) { /* lookahead conds. always cache miss */ - FDEBUG(("c%d[%d]->c%d\n", css - d->ssets, co, p - d->ssets)); - css->outs[co] = p; - css->inchain[co] = p->ins; - p->ins.ss = css; - p->ins.co = (color)co; - } - return p; -} - -/* - - lacon - lookahead-constraint checker for miss() - ^ static int lacon(struct vars *, struct cnfa *, chr *, pcolor); - */ -static int /* predicate: constraint satisfied? */ -lacon(v, pcnfa, cp, co) -struct vars *v; -struct cnfa *pcnfa; /* parent cnfa */ -chr *cp; -pcolor co; /* "color" of the lookahead constraint */ -{ - int n; - struct subre *sub; - struct dfa *d; - struct smalldfa sd; - chr *end; - - n = co - pcnfa->ncolors; - assert(n < v->g->nlacons && v->g->lacons != NULL); - FDEBUG(("=== testing lacon %d\n", n)); - sub = &v->g->lacons[n]; - d = newdfa(v, &sub->cnfa, &v->g->cmap, &sd); - if (d == NULL) { - ERR(REG_ESPACE); - return 0; - } - end = longest(v, d, cp, v->stop, (int *)NULL); - freedfa(d); - FDEBUG(("=== lacon %d match %d\n", n, (end != NULL))); - return (sub->subno) ? (end != NULL) : (end == NULL); -} - -/* - - getvacant - get a vacant state set - * This routine clears out the inarcs and outarcs, but does not otherwise - * clear the innards of the state set -- that's up to the caller. - ^ static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *); - */ -static struct sset * -getvacant(v, d, cp, start) -struct vars *v; /* used only for debug flags */ -struct dfa *d; -chr *cp; -chr *start; -{ - int i; - struct sset *ss; - struct sset *p; - struct arcp ap; - struct arcp lastap; - color co; - - ss = pickss(v, d, cp, start); - assert(!(ss->flags&LOCKED)); - - /* clear out its inarcs, including self-referential ones */ - ap = ss->ins; - while ((p = ap.ss) != NULL) { - co = ap.co; - FDEBUG(("zapping c%d's %ld outarc\n", p - d->ssets, (long)co)); - p->outs[co] = NULL; - ap = p->inchain[co]; - p->inchain[co].ss = NULL; /* paranoia */ - } - ss->ins.ss = NULL; - - /* take it off the inarc chains of the ssets reached by its outarcs */ - for (i = 0; i < d->ncolors; i++) { - p = ss->outs[i]; - assert(p != ss); /* not self-referential */ - if (p == NULL) - continue; /* NOTE CONTINUE */ - FDEBUG(("del outarc %d from c%d's in chn\n", i, p - d->ssets)); - if (p->ins.ss == ss && p->ins.co == i) - p->ins = ss->inchain[i]; - else { - assert(p->ins.ss != NULL); - for (ap = p->ins; ap.ss != NULL && - !(ap.ss == ss && ap.co == i); - ap = ap.ss->inchain[ap.co]) - lastap = ap; - assert(ap.ss != NULL); - lastap.ss->inchain[lastap.co] = ss->inchain[i]; - } - ss->outs[i] = NULL; - ss->inchain[i].ss = NULL; - } - - /* if ss was a success state, may need to remember location */ - if ((ss->flags&POSTSTATE) && ss->lastseen != d->lastpost && - (d->lastpost == NULL || d->lastpost < ss->lastseen)) - d->lastpost = ss->lastseen; - - /* likewise for a no-progress state */ - if ((ss->flags&NOPROGRESS) && ss->lastseen != d->lastnopr && - (d->lastnopr == NULL || d->lastnopr < ss->lastseen)) - d->lastnopr = ss->lastseen; - - return ss; -} - -/* - - pickss - pick the next stateset to be used - ^ static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *); - */ -static struct sset * -pickss(v, d, cp, start) -struct vars *v; /* used only for debug flags */ -struct dfa *d; -chr *cp; -chr *start; -{ - int i; - struct sset *ss; - struct sset *end; - chr *ancient; - - /* shortcut for cases where cache isn't full */ - if (d->nssused < d->nssets) { - i = d->nssused; - d->nssused++; - ss = &d->ssets[i]; - FDEBUG(("new c%d\n", i)); - /* set up innards */ - ss->states = &d->statesarea[i * d->wordsper]; - ss->flags = 0; - ss->ins.ss = NULL; - ss->ins.co = WHITE; /* give it some value */ - ss->outs = &d->outsarea[i * d->ncolors]; - ss->inchain = &d->incarea[i * d->ncolors]; - for (i = 0; i < d->ncolors; i++) { - ss->outs[i] = NULL; - ss->inchain[i].ss = NULL; - } - return ss; - } - - /* look for oldest, or old enough anyway */ - if (cp - start > d->nssets*2/3) /* oldest 33% are expendable */ - ancient = cp - d->nssets*2/3; - else - ancient = start; - for (ss = d->search, end = &d->ssets[d->nssets]; ss < end; ss++) - if ((ss->lastseen == NULL || ss->lastseen < ancient) && - !(ss->flags&LOCKED)) { - d->search = ss + 1; - FDEBUG(("replacing c%d\n", ss - d->ssets)); - return ss; - } - for (ss = d->ssets, end = d->search; ss < end; ss++) - if ((ss->lastseen == NULL || ss->lastseen < ancient) && - !(ss->flags&LOCKED)) { - d->search = ss + 1; - FDEBUG(("replacing c%d\n", ss - d->ssets)); - return ss; - } - - /* nobody's old enough?!? -- something's really wrong */ - FDEBUG(("can't find victim to replace!\n")); - assert(NOTREACHED); - ERR(REG_ASSERT); - return d->ssets; -} diff --git a/src/regex/regerror.c b/src/regex/regerror.c index aca13aade0..e53dafcfbf 100644 --- a/src/regex/regerror.c +++ b/src/regex/regerror.c @@ -1,109 +1,126 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" +#include "regerror.ih" + /* - * regerror - error-code expansion - * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics - * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * I'd appreciate being given credit for this package in the documentation - * of software which uses it, but that is not a requirement. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * + = #define REG_OKAY 0 + = #define REG_NOMATCH 1 + = #define REG_BADPAT 2 + = #define REG_ECOLLATE 3 + = #define REG_ECTYPE 4 + = #define REG_EESCAPE 5 + = #define REG_ESUBREG 6 + = #define REG_EBRACK 7 + = #define REG_EPAREN 8 + = #define REG_EBRACE 9 + = #define REG_BADBR 10 + = #define REG_ERANGE 11 + = #define REG_ESPACE 12 + = #define REG_BADRPT 13 + = #define REG_EMPTY 14 + = #define REG_ASSERT 15 + = #define REG_INVARG 16 + = #define REG_ATOI 255 // convert name to number (!) + = #define REG_ITOA 0400 // convert number to name (!) */ - -#include "regguts.h" - -/* unknown-error explanation */ -static char unk[] = "*** unknown regex error code 0x%x ***"; - -/* struct to map among codes, code names, and explanations */ static struct rerr { int code; char *name; char *explain; } rerrs[] = { - /* the actual table is built from regex.h */ -# include "regerrs.h" - { -1, "", "oops" }, /* explanation special-cased in code */ + REG_OKAY, "REG_OKAY", "no errors detected", + REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match", + REG_BADPAT, "REG_BADPAT", "invalid regular expression", + REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element", + REG_ECTYPE, "REG_ECTYPE", "invalid character class", + REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)", + REG_ESUBREG, "REG_ESUBREG", "invalid backreference number", + REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced", + REG_EPAREN, "REG_EPAREN", "parentheses not balanced", + REG_EBRACE, "REG_EBRACE", "braces not balanced", + REG_BADBR, "REG_BADBR", "invalid repetition count(s)", + REG_ERANGE, "REG_ERANGE", "invalid character range", + REG_ESPACE, "REG_ESPACE", "out of memory", + REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid", + REG_EMPTY, "REG_EMPTY", "empty (sub)expression", + REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug", + REG_INVARG, "REG_INVARG", "invalid argument to regex routine", + -1, "", "*** unknown regexp error code ***", }; /* - regerror - the interface to error numbers + = extern size_t regerror(int, const regex_t *, char *, size_t); */ /* ARGSUSED */ -size_t /* actual space needed (including NUL) */ +size_t regerror(errcode, preg, errbuf, errbuf_size) -int errcode; /* error code, or REG_ATOI or REG_ITOA */ -CONST regex_t *preg; /* associated regex_t (unused at present) */ -char *errbuf; /* result buffer (unless errbuf_size==0) */ -size_t errbuf_size; /* available space in errbuf, can be 0 */ +int errcode; +const regex_t *preg; +char *errbuf; +size_t errbuf_size; { - struct rerr *r; - char *msg; - char convbuf[sizeof(unk)+50]; /* 50 = plenty for int */ - size_t len; - int icode; + register struct rerr *r; + register size_t len; + register int target = errcode &~ REG_ITOA; + register char *s; + char convbuf[50]; - switch (errcode) { - case REG_ATOI: /* convert name to number */ + if (errcode == REG_ATOI) + s = regatoi(preg, convbuf); + else { for (r = rerrs; r->code >= 0; r++) - if (strcmp(r->name, errbuf) == 0) + if (r->code == target) break; - sprintf(convbuf, "%d", r->code); /* -1 for unknown */ - msg = convbuf; - break; - case REG_ITOA: /* convert number to name */ - icode = atoi(errbuf); /* not our problem if this fails */ - for (r = rerrs; r->code >= 0; r++) - if (r->code == icode) - break; - if (r->code >= 0) - msg = r->name; - else { /* unknown; tell him the number */ - sprintf(convbuf, "REG_%u", (unsigned)icode); - msg = convbuf; - } - break; - default: /* a real, normal error code */ - for (r = rerrs; r->code >= 0; r++) - if (r->code == errcode) - break; - if (r->code >= 0) - msg = r->explain; - else { /* unknown; say so */ - sprintf(convbuf, unk, errcode); - msg = convbuf; - } - break; + + if (errcode®_ITOA) { + if (r->code >= 0) + (void) strcpy(convbuf, r->name); + else + sprintf(convbuf, "REG_0x%x", target); + assert(strlen(convbuf) < sizeof(convbuf)); + s = convbuf; + } else + s = r->explain; } - len = strlen(msg) + 1; /* space needed, including NUL */ + len = strlen(s) + 1; if (errbuf_size > 0) { if (errbuf_size > len) - strcpy(errbuf, msg); - else { /* truncate to fit */ - strncpy(errbuf, msg, errbuf_size-1); + (void) strcpy(errbuf, s); + else { + (void) strncpy(errbuf, s, errbuf_size-1); errbuf[errbuf_size-1] = '\0'; } } - return len; + return(len); +} + +/* + - regatoi - internal routine to implement REG_ATOI + == static char *regatoi(const regex_t *preg, char *localbuf); + */ +static char * +regatoi(preg, localbuf) +const regex_t *preg; +char *localbuf; +{ + register struct rerr *r; + + for (r = rerrs; r->code >= 0; r++) + if (strcmp(r->name, preg->re_endp) == 0) + break; + if (r->code < 0) + return("0"); + + sprintf(localbuf, "%d", r->code); + return(localbuf); } diff --git a/src/regex/regerrs.h b/src/regex/regerrs.h deleted file mode 100644 index a3d98b6818..0000000000 --- a/src/regex/regerrs.h +++ /dev/null @@ -1,18 +0,0 @@ -{ REG_OKAY, "REG_OKAY", "no errors detected" }, -{ REG_NOMATCH, "REG_NOMATCH", "failed to match" }, -{ REG_BADPAT, "REG_BADPAT", "invalid regexp (reg version 0.8)" }, -{ REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" }, -{ REG_ECTYPE, "REG_ECTYPE", "invalid character class" }, -{ REG_EESCAPE, "REG_EESCAPE", "invalid escape \\ sequence" }, -{ REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" }, -{ REG_EBRACK, "REG_EBRACK", "brackets [] not balanced" }, -{ REG_EPAREN, "REG_EPAREN", "parentheses () not balanced" }, -{ REG_EBRACE, "REG_EBRACE", "braces {} not balanced" }, -{ REG_BADBR, "REG_BADBR", "invalid repetition count(s)" }, -{ REG_ERANGE, "REG_ERANGE", "invalid character range" }, -{ REG_ESPACE, "REG_ESPACE", "out of memory" }, -{ REG_BADRPT, "REG_BADRPT", "quantifier operand invalid" }, -{ REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" }, -{ REG_INVARG, "REG_INVARG", "invalid argument to regex function" }, -{ REG_MIXED, "REG_MIXED", "character widths of regex and string differ" }, -{ REG_BADOPT, "REG_BADOPT", "invalid embedded option" }, diff --git a/src/regex/regex.h b/src/regex/regex.h deleted file mode 100644 index 8289a500eb..0000000000 --- a/src/regex/regex.h +++ /dev/null @@ -1,341 +0,0 @@ -#ifndef _REGEX_H_ -#define _REGEX_H_ /* never again */ -/* - * regular expressions - * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics - * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * I'd appreciate being given credit for this package in the documentation - * of software which uses it, but that is not a requirement. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * - * - * Prototypes etc. marked with "^" within comments get gathered up (and - * possibly edited) by the regfwd program and inserted near the bottom of - * this file. - * - * We offer the option of declaring one wide-character version of the - * RE functions as well as the char versions. To do that, define - * __REG_WIDE_T to the type of wide characters (unfortunately, there - * is no consensus that wchar_t is suitable) and __REG_WIDE_COMPILE and - * __REG_WIDE_EXEC to the names to be used for the compile and execute - * functions (suggestion: re_Xcomp and re_Xexec, where X is a letter - * suggestive of the wide type, e.g. re_ucomp and re_uexec for Unicode). - * For cranky old compilers, it may be necessary to do something like: - * #define __REG_WIDE_COMPILE(a,b,c,d) re_Xcomp(a,b,c,d) - * #define __REG_WIDE_EXEC(a,b,c,d,e,f,g) re_Xexec(a,b,c,d,e,f,g) - * rather than just #defining the names as parameterless macros. - * - * For some specialized purposes, it may be desirable to suppress the - * declarations of the "front end" functions, regcomp() and regexec(), - * or of the char versions of the compile and execute functions. To - * suppress the front-end functions, define __REG_NOFRONT. To suppress - * the char versions, define __REG_NOCHAR. - * - * The right place to do those defines (and some others you may want, see - * below) would be . If you don't have control of that file, - * the right place to add your own defines to this file is marked below. - * This is normally done automatically, by the makefile and regmkhdr, based - * on the contents of regcustom.h. - */ - - - -/* - * voodoo for C++ - */ -#ifdef __cplusplus -extern "C" { -#endif - - - -/* - * Add your own defines, if needed, here. - */ - - - -/* - * Location where a chunk of regcustom.h is automatically spliced into - * this file (working from its prototype, regproto.h). - */ -/* --- begin --- */ -/* ensure certain things don't sneak in from system headers */ -#ifdef __REG_WIDE_T -#undef __REG_WIDE_T -#endif -#ifdef __REG_WIDE_COMPILE -#undef __REG_WIDE_COMPILE -#endif -#ifdef __REG_WIDE_EXEC -#undef __REG_WIDE_EXEC -#endif -#ifdef __REG_REGOFF_T -#undef __REG_REGOFF_T -#endif -#ifdef __REG_VOID_T -#undef __REG_VOID_T -#endif -#ifdef __REG_CONST -#undef __REG_CONST -#endif -#ifdef __REG_NOFRONT -#undef __REG_NOFRONT -#endif -#ifdef __REG_NOCHAR -#undef __REG_NOCHAR -#endif -/* interface types */ -#define __REG_WIDE_T Tcl_UniChar -#define __REG_REGOFF_T long /* not really right, but good enough... */ -#define __REG_VOID_T VOID -#define __REG_CONST CONST -/* names and declarations */ -#define __REG_WIDE_COMPILE TclReComp -#define __REG_WIDE_EXEC TclReExec -#define __REG_NOFRONT /* don't want regcomp() and regexec() */ -#define __REG_NOCHAR /* or the char versions */ -#define regfree TclReFree -#define regerror TclReError -/* --- end --- */ - - -/* - * interface types etc. - */ - -/* - * regoff_t has to be large enough to hold either off_t or ssize_t, - * and must be signed; it's only a guess that long is suitable, so we - * offer an override. - */ -#ifdef __REG_REGOFF_T -typedef __REG_REGOFF_T regoff_t; -#else -typedef long regoff_t; -#endif - -/* - * For benefit of old compilers, we offer the option of - * overriding the `void' type used to declare nonexistent return types. - */ -#ifdef __REG_VOID_T -typedef __REG_VOID_T re_void; -#else -typedef void re_void; -#endif - -/* - * Also for benefit of old compilers, can supply a macro - * which expands to a substitute for `const'. - */ -#ifndef __REG_CONST -#define __REG_CONST const -#endif - - - -/* - * other interface types - */ - -/* the biggie, a compiled RE (or rather, a front end to same) */ -typedef struct { - int re_magic; /* magic number */ - size_t re_nsub; /* number of subexpressions */ - long re_info; /* information about RE */ -# define REG_UBACKREF 000001 -# define REG_ULOOKAHEAD 000002 -# define REG_UBOUNDS 000004 -# define REG_UBRACES 000010 -# define REG_UBSALNUM 000020 -# define REG_UPBOTCH 000040 -# define REG_UBBS 000100 -# define REG_UNONPOSIX 000200 -# define REG_UUNSPEC 000400 -# define REG_UUNPORT 001000 -# define REG_ULOCALE 002000 -# define REG_UEMPTYMATCH 004000 -# define REG_UIMPOSSIBLE 010000 -# define REG_USHORTEST 020000 - int re_csize; /* sizeof(character) */ - char *re_endp; /* backward compatibility kludge */ - /* the rest is opaque pointers to hidden innards */ - char *re_guts; /* `char *' is more portable than `void *' */ - char *re_fns; -} regex_t; - -/* result reporting (may acquire more fields later) */ -typedef struct { - regoff_t rm_so; /* start of substring */ - regoff_t rm_eo; /* end of substring */ -} regmatch_t; - -/* supplementary control and reporting */ -typedef struct { - regmatch_t rm_extend; /* see REG_EXPECT */ -} rm_detail_t; - - - -/* - * compilation - ^ #ifndef __REG_NOCHAR - ^ int re_comp(regex_t *, __REG_CONST char *, size_t, int); - ^ #endif - ^ #ifndef __REG_NOFRONT - ^ int regcomp(regex_t *, __REG_CONST char *, int); - ^ #endif - ^ #ifdef __REG_WIDE_T - ^ int __REG_WIDE_COMPILE(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int); - ^ #endif - */ -#define REG_BASIC 000000 /* BREs (convenience) */ -#define REG_EXTENDED 000001 /* EREs */ -#define REG_ADVF 000002 /* advanced features in EREs */ -#define REG_ADVANCED 000003 /* AREs (which are also EREs) */ -#define REG_QUOTE 000004 /* no special characters, none */ -#define REG_NOSPEC REG_QUOTE /* historical synonym */ -#define REG_ICASE 000010 /* ignore case */ -#define REG_NOSUB 000020 /* don't care about subexpressions */ -#define REG_EXPANDED 000040 /* expanded format, white space & comments */ -#define REG_NLSTOP 000100 /* \n doesn't match . or [^ ] */ -#define REG_NLANCH 000200 /* ^ matches after \n, $ before */ -#define REG_NEWLINE 000300 /* newlines are line terminators */ -#define REG_PEND 000400 /* ugh -- backward-compatibility hack */ -#define REG_EXPECT 001000 /* report details on partial/limited matches */ -#define REG_BOSONLY 002000 /* temporary kludge for BOS-only matches */ -#define REG_DUMP 004000 /* none of your business :-) */ -#define REG_FAKE 010000 /* none of your business :-) */ -#define REG_PROGRESS 020000 /* none of your business :-) */ - - - -/* - * execution - ^ #ifndef __REG_NOCHAR - ^ int re_exec(regex_t *, __REG_CONST char *, size_t, - ^ rm_detail_t *, size_t, regmatch_t [], int); - ^ #endif - ^ #ifndef __REG_NOFRONT - ^ int regexec(regex_t *, __REG_CONST char *, size_t, regmatch_t [], int); - ^ #endif - ^ #ifdef __REG_WIDE_T - ^ int __REG_WIDE_EXEC(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, - ^ rm_detail_t *, size_t, regmatch_t [], int); - ^ #endif - */ -#define REG_NOTBOL 0001 /* BOS is not BOL */ -#define REG_NOTEOL 0002 /* EOS is not EOL */ -#define REG_STARTEND 0004 /* backward compatibility kludge */ -#define REG_FTRACE 0010 /* none of your business */ -#define REG_MTRACE 0020 /* none of your business */ -#define REG_SMALL 0040 /* none of your business */ - - - -/* - * misc generics (may be more functions here eventually) - ^ re_void regfree(regex_t *); - */ - - - -/* - * error reporting - * Be careful if modifying the list of error codes -- the table used by - * regerror() is generated automatically from this file! - * - * Note that there is no wide-char variant of regerror at this time; what - * kind of character is used for error reports is independent of what kind - * is used in matching. - * - ^ extern size_t regerror(int, __REG_CONST regex_t *, char *, size_t); - */ -#define REG_OKAY 0 /* no errors detected */ -#define REG_NOMATCH 1 /* failed to match */ -#define REG_BADPAT 2 /* invalid regexp */ -#define REG_ECOLLATE 3 /* invalid collating element */ -#define REG_ECTYPE 4 /* invalid character class */ -#define REG_EESCAPE 5 /* invalid escape \ sequence */ -#define REG_ESUBREG 6 /* invalid backreference number */ -#define REG_EBRACK 7 /* brackets [] not balanced */ -#define REG_EPAREN 8 /* parentheses () not balanced */ -#define REG_EBRACE 9 /* braces {} not balanced */ -#define REG_BADBR 10 /* invalid repetition count(s) */ -#define REG_ERANGE 11 /* invalid character range */ -#define REG_ESPACE 12 /* out of memory */ -#define REG_BADRPT 13 /* quantifier operand invalid */ -#define REG_ASSERT 15 /* "can't happen" -- you found a bug */ -#define REG_INVARG 16 /* invalid argument to regex function */ -#define REG_MIXED 17 /* character widths of regex and string differ */ -#define REG_BADOPT 18 /* invalid embedded option */ -/* two specials for debugging and testing */ -#define REG_ATOI 101 /* convert error-code name to number */ -#define REG_ITOA 102 /* convert error-code number to name */ - - - -/* - * the prototypes, as possibly munched by regfwd - */ -/* =====^!^===== begin forwards =====^!^===== */ -/* automatically gathered by fwd; do not hand-edit */ -/* === regproto.h === */ -#ifndef __REG_NOCHAR -int re_comp _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, int)); -#endif -#ifndef __REG_NOFRONT -int regcomp _ANSI_ARGS_((regex_t *, __REG_CONST char *, int)); -#endif -#ifdef __REG_WIDE_T -int __REG_WIDE_COMPILE _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int)); -#endif -#ifndef __REG_NOCHAR -int re_exec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, rm_detail_t *, size_t, regmatch_t [], int)); -#endif -#ifndef __REG_NOFRONT -int regexec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, regmatch_t [], int)); -#endif -#ifdef __REG_WIDE_T -int __REG_WIDE_EXEC _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, rm_detail_t *, size_t, regmatch_t [], int)); -#endif -re_void regfree _ANSI_ARGS_((regex_t *)); -extern size_t regerror _ANSI_ARGS_((int, __REG_CONST regex_t *, char *, size_t)); -/* automatically gathered by fwd; do not hand-edit */ -/* =====^!^===== end forwards =====^!^===== */ - - - -/* - * more C++ voodoo - */ -#ifdef __cplusplus -} -#endif - - - -#endif diff --git a/src/regex/regexec.c b/src/regex/regexec.c index 41d49bdab5..dcb11b285c 100644 --- a/src/regex/regexec.c +++ b/src/regex/regexec.c @@ -1,1038 +1,138 @@ /* - * re_*exec and friends - match REs + * the outer shell of regexec() * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics - * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * I'd appreciate being given credit for this package in the documentation - * of software which uses it, but that is not a requirement. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * This file includes engine.c *twice*, after muchos fiddling with the + * macros that code uses. This lets the same code operate on two different + * representations for state sets. + */ +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" +#include "regex2.h" + +static int nope = 0; /* for use in asserts; shuts lint up */ + +/* macros for manipulating states, small version */ +#define states unsigned +#define states1 unsigned /* for later use in regexec() decision */ +#define CLEAR(v) ((v) = 0) +#define SET0(v, n) ((v) &= ~((unsigned)1 << (n))) +#define SET1(v, n) ((v) |= (unsigned)1 << (n)) +#define ISSET(v, n) ((v) & ((unsigned)1 << (n))) +#define ASSIGN(d, s) ((d) = (s)) +#define EQ(a, b) ((a) == (b)) +#define STATEVARS int dummy /* dummy version */ +#define STATESETUP(m, n) /* nothing */ +#define STATETEARDOWN(m) /* nothing */ +#define SETUP(v) ((v) = 0) +#define onestate unsigned +#define INIT(o, n) ((o) = (unsigned)1 << (n)) +#define INC(o) ((o) <<= 1) +#define ISSTATEIN(v, o) ((v) & (o)) +/* some abbreviations; note that some of these know variable names! */ +/* do "if I'm here, I can also be there" etc without branches */ +#define FWD(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) << (n)) +#define BACK(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) >> (n)) +#define ISSETBACK(v, n) ((v) & ((unsigned)here >> (n))) +/* function names */ +#define SNAMES /* engine.c looks after details */ + +#include "engine.c" + +/* now undo things */ +#undef states +#undef CLEAR +#undef SET0 +#undef SET1 +#undef ISSET +#undef ASSIGN +#undef EQ +#undef STATEVARS +#undef STATESETUP +#undef STATETEARDOWN +#undef SETUP +#undef onestate +#undef INIT +#undef INC +#undef ISSTATEIN +#undef FWD +#undef BACK +#undef ISSETBACK +#undef SNAMES + +/* macros for manipulating states, large version */ +#define states char * +#define CLEAR(v) memset(v, 0, m->g->nstates) +#define SET0(v, n) ((v)[n] = 0) +#define SET1(v, n) ((v)[n] = 1) +#define ISSET(v, n) ((v)[n]) +#define ASSIGN(d, s) memcpy(d, s, m->g->nstates) +#define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0) +#define STATEVARS int vn; char *space +#define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \ + if ((m)->space == NULL) return(REG_ESPACE); \ + (m)->vn = 0; } +#define STATETEARDOWN(m) { free((m)->space); } +#define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates]) +#define onestate int +#define INIT(o, n) ((o) = (n)) +#define INC(o) ((o)++) +#define ISSTATEIN(v, o) ((v)[o]) +/* some abbreviations; note that some of these know variable names! */ +/* do "if I'm here, I can also be there" etc without branches */ +#define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here]) +#define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here]) +#define ISSETBACK(v, n) ((v)[here - (n)]) +/* function names */ +#define LNAMES /* flag */ + +#include "engine.c" + +/* + - regexec - interface for matching + = extern int regexec(const regex_t *, const char *, size_t, \ + = regmatch_t [], int); + = #define REG_NOTBOL 00001 + = #define REG_NOTEOL 00002 + = #define REG_STARTEND 00004 + = #define REG_TRACE 00400 // tracing of execution + = #define REG_LARGE 01000 // force large representation + = #define REG_BACKR 02000 // force use of backref code * + * We put this here so we can exploit knowledge of the state representation + * when choosing which matcher to call. Also, by this point the matchers + * have been prototyped. */ - -#include "regguts.h" - - - -/* lazy-DFA representation */ -struct arcp { /* "pointer" to an outarc */ - struct sset *ss; - color co; -}; - -struct sset { /* state set */ - unsigned *states; /* pointer to bitvector */ - unsigned hash; /* hash of bitvector */ -# define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw)) -# define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \ - memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0)) - int flags; -# define STARTER 01 /* the initial state set */ -# define POSTSTATE 02 /* includes the goal state */ -# define LOCKED 04 /* locked in cache */ -# define NOPROGRESS 010 /* zero-progress state set */ - struct arcp ins; /* chain of inarcs pointing here */ - chr *lastseen; /* last entered on arrival here */ - struct sset **outs; /* outarc vector indexed by color */ - struct arcp *inchain; /* chain-pointer vector for outarcs */ -}; - -struct dfa { - int nssets; /* size of cache */ - int nssused; /* how many entries occupied yet */ - int nstates; /* number of states */ - int ncolors; /* length of outarc and inchain vectors */ - int wordsper; /* length of state-set bitvectors */ - struct sset *ssets; /* state-set cache */ - unsigned *statesarea; /* bitvector storage */ - unsigned *work; /* pointer to work area within statesarea */ - struct sset **outsarea; /* outarc-vector storage */ - struct arcp *incarea; /* inchain storage */ - struct cnfa *cnfa; - struct colormap *cm; - chr *lastpost; /* location of last cache-flushed success */ - chr *lastnopr; /* location of last cache-flushed NOPROGRESS */ - struct sset *search; /* replacement-search-pointer memory */ - int cptsmalloced; /* were the areas individually malloced? */ - char *mallocarea; /* self, or master malloced area, or NULL */ -}; - -#define WORK 1 /* number of work bitvectors needed */ - -/* setup for non-malloc allocation for small cases */ -#define FEWSTATES 20 /* must be less than UBITS */ -#define FEWCOLORS 15 -struct smalldfa { - struct dfa dfa; - struct sset ssets[FEWSTATES*2]; - unsigned statesarea[FEWSTATES*2 + WORK]; - struct sset *outsarea[FEWSTATES*2 * FEWCOLORS]; - struct arcp incarea[FEWSTATES*2 * FEWCOLORS]; -}; -#define DOMALLOC ((struct smalldfa *)NULL) /* force malloc */ - - - -/* internal variables, bundled for easy passing around */ -struct vars { - regex_t *re; - struct guts *g; - int eflags; /* copies of arguments */ - size_t nmatch; - regmatch_t *pmatch; - rm_detail_t *details; - chr *start; /* start of string */ - chr *stop; /* just past end of string */ - int err; /* error code if any (0 none) */ - regoff_t *mem; /* memory vector for backtracking */ - struct smalldfa dfa1; - struct smalldfa dfa2; -}; -#define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */ -#define ISERR() VISERR(v) -#define VERR(vv,e) (((vv)->err) ? (vv)->err : ((vv)->err = (e))) -#define ERR(e) VERR(v, e) /* record an error */ -#define NOERR() {if (ISERR()) return v->err;} /* if error seen, return it */ -#define OFF(p) ((p) - v->start) -#define LOFF(p) ((long)OFF(p)) - - - -/* - * forward declarations - */ -/* =====^!^===== begin forwards =====^!^===== */ -/* automatically gathered by fwd; do not hand-edit */ -/* === regexec.c === */ -int exec _ANSI_ARGS_((regex_t *, CONST chr *, size_t, rm_detail_t *, size_t, regmatch_t [], int)); -static int find _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *)); -static int cfind _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *)); -static int cfindloop _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **)); -static VOID zapsubs _ANSI_ARGS_((regmatch_t *, size_t)); -static VOID zapmem _ANSI_ARGS_((struct vars *, struct subre *)); -static VOID subset _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); -static int dissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); -static int condissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); -static int altdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); -static int cdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); -static int ccondissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); -static int crevdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); -static int cbrdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); -static int caltdissect _ANSI_ARGS_((struct vars *, struct subre *, chr *, chr *)); -/* === rege_dfa.c === */ -static chr *longest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, int *)); -static chr *shortest _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *)); -static chr *lastcold _ANSI_ARGS_((struct vars *, struct dfa *)); -static struct dfa *newdfa _ANSI_ARGS_((struct vars *, struct cnfa *, struct colormap *, struct smalldfa *)); -static VOID freedfa _ANSI_ARGS_((struct dfa *)); -static unsigned hash _ANSI_ARGS_((unsigned *, int)); -static struct sset *initialize _ANSI_ARGS_((struct vars *, struct dfa *, chr *)); -static struct sset *miss _ANSI_ARGS_((struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *)); -static int lacon _ANSI_ARGS_((struct vars *, struct cnfa *, chr *, pcolor)); -static struct sset *getvacant _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *)); -static struct sset *pickss _ANSI_ARGS_((struct vars *, struct dfa *, chr *, chr *)); -/* automatically gathered by fwd; do not hand-edit */ -/* =====^!^===== end forwards =====^!^===== */ - - - -/* - - exec - match regular expression - ^ int exec(regex_t *, CONST chr *, size_t, rm_detail_t *, - ^ size_t, regmatch_t [], int); - */ -int -exec(re, string, len, details, nmatch, pmatch, flags) -regex_t *re; -CONST chr *string; -size_t len; -rm_detail_t *details; +int /* 0 success, REG_NOMATCH failure */ +regexec(preg, string, nmatch, pmatch, eflags) +const regex_t *preg; +const char *string; size_t nmatch; regmatch_t pmatch[]; -int flags; +int eflags; { - struct vars var; - register struct vars *v = &var; - int st; - size_t n; - int backref; -# define LOCALMAT 20 - regmatch_t mat[LOCALMAT]; -# define LOCALMEM 40 - regoff_t mem[LOCALMEM]; + register struct re_guts *g = preg->re_g; +#ifdef REDEBUG +# define GOODFLAGS(f) (f) +#else +# define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND)) +#endif - /* sanity checks */ - if (re == NULL || string == NULL || re->re_magic != REMAGIC) - return REG_INVARG; - if (re->re_csize != sizeof(chr)) - return REG_MIXED; + if (preg->re_magic != MAGIC1 || g->magic != MAGIC2) + return(REG_BADPAT); + assert(!(g->iflags&BAD)); + if (g->iflags&BAD) /* backstop for no-debug case */ + return(REG_BADPAT); + eflags = GOODFLAGS(eflags); - /* setup */ - v->re = re; - v->g = (struct guts *)re->re_guts; - if ((v->g->cflags®_EXPECT) && details == NULL) - return REG_INVARG; - if (v->g->info®_UIMPOSSIBLE) - return REG_NOMATCH; - backref = (v->g->info®_UBACKREF) ? 1 : 0; - v->eflags = flags; - if (v->g->cflags®_NOSUB) - nmatch = 0; /* override client */ - v->nmatch = nmatch; - if (backref) { - /* need work area */ - if (v->g->nsub + 1 <= LOCALMAT) - v->pmatch = mat; - else - v->pmatch = (regmatch_t *)MALLOC((v->g->nsub + 1) * - sizeof(regmatch_t)); - if (v->pmatch == NULL) - return REG_ESPACE; - v->nmatch = v->g->nsub + 1; - } else - v->pmatch = pmatch; - v->details = details; - v->start = (chr *)string; - v->stop = (chr *)string + len; - v->err = 0; - if (backref) { - /* need retry memory */ - assert(v->g->ntree >= 0); - n = (size_t)v->g->ntree; - if (n <= LOCALMEM) - v->mem = mem; - else - v->mem = (regoff_t *)MALLOC(n*sizeof(regoff_t)); - if (v->mem == NULL) { - if (v->pmatch != pmatch && v->pmatch != mat) - FREE(v->pmatch); - return REG_ESPACE; - } - } else - v->mem = NULL; - - /* do it */ - assert(v->g->tree != NULL); - if (backref) - st = cfind(v, &v->g->tree->cnfa, &v->g->cmap); + if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE)) + return(smatcher(g, (char *)string, nmatch, pmatch, eflags)); else - st = find(v, &v->g->tree->cnfa, &v->g->cmap); - - /* copy (portion of) match vector over if necessary */ - if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) { - zapsubs(pmatch, nmatch); - n = (nmatch < v->nmatch) ? nmatch : v->nmatch; - memcpy(VS(pmatch), VS(v->pmatch), n*sizeof(regmatch_t)); - } - - /* clean up */ - if (v->pmatch != pmatch && v->pmatch != mat) - FREE(v->pmatch); - if (v->mem != NULL && v->mem != mem) - FREE(v->mem); - return st; + return(lmatcher(g, (char *)string, nmatch, pmatch, eflags)); } - -/* - - find - find a match for the main NFA (no-complications case) - ^ static int find(struct vars *, struct cnfa *, struct colormap *); - */ -static int -find(v, cnfa, cm) -struct vars *v; -struct cnfa *cnfa; -struct colormap *cm; -{ - struct dfa *s; - struct dfa *d; - chr *begin; - chr *end = NULL; - chr *cold; - chr *open; /* open and close of range of possible starts */ - chr *close; - int hitend; - int shorter = (v->g->tree->flags&SHORTER) ? 1 : 0; - - /* first, a shot with the search RE */ - s = newdfa(v, &v->g->search, cm, &v->dfa1); - assert(!(ISERR() && s != NULL)); - NOERR(); - MDEBUG(("\nsearch at %ld\n", LOFF(v->start))); - cold = NULL; - close = shortest(v, s, v->start, v->start, v->stop, &cold, (int *)NULL); - freedfa(s); - NOERR(); - if (v->g->cflags®_EXPECT) { - assert(v->details != NULL); - if (cold != NULL) - v->details->rm_extend.rm_so = OFF(cold); - else - v->details->rm_extend.rm_so = OFF(v->stop); - v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ - } - if (close == NULL) /* not found */ - return REG_NOMATCH; - if (v->nmatch == 0) /* found, don't need exact location */ - return REG_OKAY; - - /* find starting point and match */ - assert(cold != NULL); - open = cold; - cold = NULL; - MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close))); - d = newdfa(v, cnfa, cm, &v->dfa1); - assert(!(ISERR() && d != NULL)); - NOERR(); - for (begin = open; begin <= close; begin++) { - MDEBUG(("\nfind trying at %ld\n", LOFF(begin))); - if (shorter) - end = shortest(v, d, begin, begin, v->stop, - (chr **)NULL, &hitend); - else - end = longest(v, d, begin, v->stop, &hitend); - NOERR(); - if (hitend && cold == NULL) - cold = begin; - if (end != NULL) - break; /* NOTE BREAK OUT */ - } - assert(end != NULL); /* search RE succeeded so loop should */ - freedfa(d); - - /* and pin down details */ - assert(v->nmatch > 0); - v->pmatch[0].rm_so = OFF(begin); - v->pmatch[0].rm_eo = OFF(end); - if (v->g->cflags®_EXPECT) { - if (cold != NULL) - v->details->rm_extend.rm_so = OFF(cold); - else - v->details->rm_extend.rm_so = OFF(v->stop); - v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ - } - if (v->nmatch == 1) /* no need for submatches */ - return REG_OKAY; - - /* submatches */ - zapsubs(v->pmatch, v->nmatch); - return dissect(v, v->g->tree, begin, end); -} - -/* - - cfind - find a match for the main NFA (with complications) - ^ static int cfind(struct vars *, struct cnfa *, struct colormap *); - */ -static int -cfind(v, cnfa, cm) -struct vars *v; -struct cnfa *cnfa; -struct colormap *cm; -{ - struct dfa *s; - struct dfa *d; - chr *cold; - int ret; - - s = newdfa(v, &v->g->search, cm, &v->dfa1); - NOERR(); - d = newdfa(v, cnfa, cm, &v->dfa2); - if (ISERR()) { - assert(d == NULL); - freedfa(s); - return v->err; - } - - ret = cfindloop(v, cnfa, cm, d, s, &cold); - - freedfa(d); - freedfa(s); - NOERR(); - if (v->g->cflags®_EXPECT) { - assert(v->details != NULL); - if (cold != NULL) - v->details->rm_extend.rm_so = OFF(cold); - else - v->details->rm_extend.rm_so = OFF(v->stop); - v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */ - } - return ret; -} - -/* - - cfindloop - the heart of cfind - ^ static int cfindloop(struct vars *, struct cnfa *, struct colormap *, - ^ struct dfa *, struct dfa *, chr **); - */ -static int -cfindloop(v, cnfa, cm, d, s, coldp) -struct vars *v; -struct cnfa *cnfa; -struct colormap *cm; -struct dfa *d; -struct dfa *s; -chr **coldp; /* where to put coldstart pointer */ -{ - chr *begin; - chr *end; - chr *cold; - chr *open; /* open and close of range of possible starts */ - chr *close; - chr *estart; - chr *estop; - int er; - int shorter = v->g->tree->flags&SHORTER; - int hitend; - - assert(d != NULL && s != NULL); - cold = NULL; - close = v->start; - do { - MDEBUG(("\ncsearch at %ld\n", LOFF(close))); - close = shortest(v, s, close, close, v->stop, &cold, (int *)NULL); - if (close == NULL) - break; /* NOTE BREAK */ - assert(cold != NULL); - open = cold; - cold = NULL; - MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close))); - for (begin = open; begin <= close; begin++) { - MDEBUG(("\ncfind trying at %ld\n", LOFF(begin))); - estart = begin; - estop = v->stop; - for (;;) { - if (shorter) - end = shortest(v, d, begin, estart, - estop, (chr **)NULL, &hitend); - else - end = longest(v, d, begin, estop, - &hitend); - if (hitend && cold == NULL) - cold = begin; - if (end == NULL) - break; /* NOTE BREAK OUT */ - MDEBUG(("tentative end %ld\n", LOFF(end))); - zapsubs(v->pmatch, v->nmatch); - zapmem(v, v->g->tree); - er = cdissect(v, v->g->tree, begin, end); - if (er == REG_OKAY) { - if (v->nmatch > 0) { - v->pmatch[0].rm_so = OFF(begin); - v->pmatch[0].rm_eo = OFF(end); - } - *coldp = cold; - return REG_OKAY; - } - if (er != REG_NOMATCH) { - ERR(er); - return er; - } - if ((shorter) ? end == estop : end == begin) { - /* no point in trying again */ - *coldp = cold; - return REG_NOMATCH; - } - /* go around and try again */ - if (shorter) - estart = end + 1; - else - estop = end - 1; - } - } - } while (close < v->stop); - - *coldp = cold; - return REG_NOMATCH; -} - -/* - - zapsubs - initialize the subexpression matches to "no match" - ^ static VOID zapsubs(regmatch_t *, size_t); - */ -static VOID -zapsubs(p, n) -regmatch_t *p; -size_t n; -{ - size_t i; - - for (i = n-1; i > 0; i--) { - p[i].rm_so = -1; - p[i].rm_eo = -1; - } -} - -/* - - zapmem - initialize the retry memory of a subtree to zeros - ^ static VOID zapmem(struct vars *, struct subre *); - */ -static VOID -zapmem(v, t) -struct vars *v; -struct subre *t; -{ - if (t == NULL) - return; - - assert(v->mem != NULL); - v->mem[t->retry] = 0; - if (t->op == '(') { - assert(t->subno > 0); - v->pmatch[t->subno].rm_so = -1; - v->pmatch[t->subno].rm_eo = -1; - } - - if (t->left != NULL) - zapmem(v, t->left); - if (t->right != NULL) - zapmem(v, t->right); -} - -/* - - subset - set any subexpression relevant to a successful subre - ^ static VOID subset(struct vars *, struct subre *, chr *, chr *); - */ -static VOID -subset(v, sub, begin, end) -struct vars *v; -struct subre *sub; -chr *begin; -chr *end; -{ - int n = sub->subno; - - assert(n > 0); - if ((size_t)n >= v->nmatch) - return; - - MDEBUG(("setting %d\n", n)); - v->pmatch[n].rm_so = OFF(begin); - v->pmatch[n].rm_eo = OFF(end); -} - -/* - - dissect - determine subexpression matches (uncomplicated case) - ^ static int dissect(struct vars *, struct subre *, chr *, chr *); - */ -static int /* regexec return code */ -dissect(v, t, begin, end) -struct vars *v; -struct subre *t; -chr *begin; /* beginning of relevant substring */ -chr *end; /* end of same */ -{ - assert(t != NULL); - MDEBUG(("dissect %ld-%ld\n", LOFF(begin), LOFF(end))); - - switch (t->op) { - case '=': /* terminal node */ - assert(t->left == NULL && t->right == NULL); - return REG_OKAY; /* no action, parent did the work */ - break; - case '|': /* alternation */ - assert(t->left != NULL); - return altdissect(v, t, begin, end); - break; - case 'b': /* back ref -- shouldn't be calling us! */ - return REG_ASSERT; - break; - case '.': /* concatenation */ - assert(t->left != NULL && t->right != NULL); - return condissect(v, t, begin, end); - break; - case '(': /* capturing */ - assert(t->left != NULL && t->right == NULL); - assert(t->subno > 0); - subset(v, t, begin, end); - return dissect(v, t->left, begin, end); - break; - default: - return REG_ASSERT; - break; - } -} - -/* - - condissect - determine concatenation subexpression matches (uncomplicated) - ^ static int condissect(struct vars *, struct subre *, chr *, chr *); - */ -static int /* regexec return code */ -condissect(v, t, begin, end) -struct vars *v; -struct subre *t; -chr *begin; /* beginning of relevant substring */ -chr *end; /* end of same */ -{ - struct dfa *d; - struct dfa *d2; - chr *mid; - int i; - int shorter = (t->left->flags&SHORTER) ? 1 : 0; - chr *stop = (shorter) ? end : begin; - - assert(t->op == '.'); - assert(t->left != NULL && t->left->cnfa.nstates > 0); - assert(t->right != NULL && t->right->cnfa.nstates > 0); - - d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1); - NOERR(); - d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &v->dfa2); - if (ISERR()) { - assert(d2 == NULL); - freedfa(d); - return v->err; - } - - /* pick a tentative midpoint */ - if (shorter) - mid = shortest(v, d, begin, begin, end, (chr **)NULL, - (int *)NULL); - else - mid = longest(v, d, begin, end, (int *)NULL); - if (mid == NULL) { - freedfa(d); - freedfa(d2); - return REG_ASSERT; - } - MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); - - /* iterate until satisfaction or failure */ - while (longest(v, d2, mid, end, (int *)NULL) != end) { - /* that midpoint didn't work, find a new one */ - if (mid == stop) { - /* all possibilities exhausted! */ - MDEBUG(("no midpoint!\n")); - freedfa(d); - freedfa(d2); - return REG_ASSERT; - } - if (shorter) - mid = shortest(v, d, begin, mid+1, end, (chr **)NULL, - (int *)NULL); - else - mid = longest(v, d, begin, mid-1, (int *)NULL); - if (mid == NULL) { - /* failed to find a new one! */ - MDEBUG(("failed midpoint!\n")); - freedfa(d); - freedfa(d2); - return REG_ASSERT; - } - MDEBUG(("new midpoint %ld\n", LOFF(mid))); - } - - /* satisfaction */ - MDEBUG(("successful\n")); - freedfa(d); - freedfa(d2); - i = dissect(v, t->left, begin, mid); - if (i != REG_OKAY) - return i; - return dissect(v, t->right, mid, end); -} - -/* - - altdissect - determine alternative subexpression matches (uncomplicated) - ^ static int altdissect(struct vars *, struct subre *, chr *, chr *); - */ -static int /* regexec return code */ -altdissect(v, t, begin, end) -struct vars *v; -struct subre *t; -chr *begin; /* beginning of relevant substring */ -chr *end; /* end of same */ -{ - struct dfa *d; - int i; - - assert(t != NULL); - assert(t->op == '|'); - - for (i = 0; t != NULL; t = t->right, i++) { - MDEBUG(("trying %dth\n", i)); - assert(t->left != NULL && t->left->cnfa.nstates > 0); - d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1); - if (ISERR()) - return v->err; - if (longest(v, d, begin, end, (int *)NULL) == end) { - MDEBUG(("success\n")); - freedfa(d); - return dissect(v, t->left, begin, end); - } - freedfa(d); - } - return REG_ASSERT; /* none of them matched?!? */ -} - -/* - - cdissect - determine subexpression matches (with complications) - * The retry memory stores the offset of the trial midpoint from begin, - * plus 1 so that 0 uniquely means "clean slate". - ^ static int cdissect(struct vars *, struct subre *, chr *, chr *); - */ -static int /* regexec return code */ -cdissect(v, t, begin, end) -struct vars *v; -struct subre *t; -chr *begin; /* beginning of relevant substring */ -chr *end; /* end of same */ -{ - int er; - - assert(t != NULL); - MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op)); - - switch (t->op) { - case '=': /* terminal node */ - assert(t->left == NULL && t->right == NULL); - return REG_OKAY; /* no action, parent did the work */ - break; - case '|': /* alternation */ - assert(t->left != NULL); - return caltdissect(v, t, begin, end); - break; - case 'b': /* back ref -- shouldn't be calling us! */ - assert(t->left == NULL && t->right == NULL); - return cbrdissect(v, t, begin, end); - break; - case '.': /* concatenation */ - assert(t->left != NULL && t->right != NULL); - return ccondissect(v, t, begin, end); - break; - case '(': /* capturing */ - assert(t->left != NULL && t->right == NULL); - assert(t->subno > 0); - er = cdissect(v, t->left, begin, end); - if (er == REG_OKAY) - subset(v, t, begin, end); - return er; - break; - default: - return REG_ASSERT; - break; - } -} - -/* - - ccondissect - concatenation subexpression matches (with complications) - * The retry memory stores the offset of the trial midpoint from begin, - * plus 1 so that 0 uniquely means "clean slate". - ^ static int ccondissect(struct vars *, struct subre *, chr *, chr *); - */ -static int /* regexec return code */ -ccondissect(v, t, begin, end) -struct vars *v; -struct subre *t; -chr *begin; /* beginning of relevant substring */ -chr *end; /* end of same */ -{ - struct dfa *d; - struct dfa *d2; - chr *mid; - int er; - - assert(t->op == '.'); - assert(t->left != NULL && t->left->cnfa.nstates > 0); - assert(t->right != NULL && t->right->cnfa.nstates > 0); - - if (t->left->flags&SHORTER) /* reverse scan */ - return crevdissect(v, t, begin, end); - - d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); - if (ISERR()) - return v->err; - d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC); - if (ISERR()) { - freedfa(d); - return v->err; - } - MDEBUG(("cconcat %d\n", t->retry)); - - /* pick a tentative midpoint */ - if (v->mem[t->retry] == 0) { - mid = longest(v, d, begin, end, (int *)NULL); - if (mid == NULL) { - freedfa(d); - freedfa(d2); - return REG_NOMATCH; - } - MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); - v->mem[t->retry] = (mid - begin) + 1; - } else { - mid = begin + (v->mem[t->retry] - 1); - MDEBUG(("working midpoint %ld\n", LOFF(mid))); - } - - /* iterate until satisfaction or failure */ - for (;;) { - /* try this midpoint on for size */ - er = cdissect(v, t->left, begin, mid); - if (er == REG_OKAY && - longest(v, d2, mid, end, (int *)NULL) == end && - (er = cdissect(v, t->right, mid, end)) == - REG_OKAY) - break; /* NOTE BREAK OUT */ - if (er != REG_OKAY && er != REG_NOMATCH) { - freedfa(d); - freedfa(d2); - return er; - } - - /* that midpoint didn't work, find a new one */ - if (mid == begin) { - /* all possibilities exhausted */ - MDEBUG(("%d no midpoint\n", t->retry)); - freedfa(d); - freedfa(d2); - return REG_NOMATCH; - } - mid = longest(v, d, begin, mid-1, (int *)NULL); - if (mid == NULL) { - /* failed to find a new one */ - MDEBUG(("%d failed midpoint\n", t->retry)); - freedfa(d); - freedfa(d2); - return REG_NOMATCH; - } - MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid))); - v->mem[t->retry] = (mid - begin) + 1; - zapmem(v, t->left); - zapmem(v, t->right); - } - - /* satisfaction */ - MDEBUG(("successful\n")); - freedfa(d); - freedfa(d2); - return REG_OKAY; -} - -/* - - crevdissect - determine backref shortest-first subexpression matches - * The retry memory stores the offset of the trial midpoint from begin, - * plus 1 so that 0 uniquely means "clean slate". - ^ static int crevdissect(struct vars *, struct subre *, chr *, chr *); - */ -static int /* regexec return code */ -crevdissect(v, t, begin, end) -struct vars *v; -struct subre *t; -chr *begin; /* beginning of relevant substring */ -chr *end; /* end of same */ -{ - struct dfa *d; - struct dfa *d2; - chr *mid; - int er; - - assert(t->op == '.'); - assert(t->left != NULL && t->left->cnfa.nstates > 0); - assert(t->right != NULL && t->right->cnfa.nstates > 0); - assert(t->left->flags&SHORTER); - - /* concatenation -- need to split the substring between parts */ - d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); - if (ISERR()) - return v->err; - d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC); - if (ISERR()) { - freedfa(d); - return v->err; - } - MDEBUG(("crev %d\n", t->retry)); - - /* pick a tentative midpoint */ - if (v->mem[t->retry] == 0) { - mid = shortest(v, d, begin, begin, end, (chr **)NULL, (int *)NULL); - if (mid == NULL) { - freedfa(d); - freedfa(d2); - return REG_NOMATCH; - } - MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); - v->mem[t->retry] = (mid - begin) + 1; - } else { - mid = begin + (v->mem[t->retry] - 1); - MDEBUG(("working midpoint %ld\n", LOFF(mid))); - } - - /* iterate until satisfaction or failure */ - for (;;) { - /* try this midpoint on for size */ - er = cdissect(v, t->left, begin, mid); - if (er == REG_OKAY && - longest(v, d2, mid, end, (int *)NULL) == end && - (er = cdissect(v, t->right, mid, end)) == - REG_OKAY) - break; /* NOTE BREAK OUT */ - if (er != REG_OKAY && er != REG_NOMATCH) { - freedfa(d); - freedfa(d2); - return er; - } - - /* that midpoint didn't work, find a new one */ - if (mid == end) { - /* all possibilities exhausted */ - MDEBUG(("%d no midpoint\n", t->retry)); - freedfa(d); - freedfa(d2); - return REG_NOMATCH; - } - mid = shortest(v, d, begin, mid+1, end, (chr **)NULL, (int *)NULL); - if (mid == NULL) { - /* failed to find a new one */ - MDEBUG(("%d failed midpoint\n", t->retry)); - freedfa(d); - freedfa(d2); - return REG_NOMATCH; - } - MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid))); - v->mem[t->retry] = (mid - begin) + 1; - zapmem(v, t->left); - zapmem(v, t->right); - } - - /* satisfaction */ - MDEBUG(("successful\n")); - freedfa(d); - freedfa(d2); - return REG_OKAY; -} - -/* - - cbrdissect - determine backref subexpression matches - ^ static int cbrdissect(struct vars *, struct subre *, chr *, chr *); - */ -static int /* regexec return code */ -cbrdissect(v, t, begin, end) -struct vars *v; -struct subre *t; -chr *begin; /* beginning of relevant substring */ -chr *end; /* end of same */ -{ - int i; - int n = t->subno; - size_t len; - chr *paren; - chr *p; - chr *stop; - int min = t->min; - int max = t->max; - - assert(t != NULL); - assert(t->op == 'b'); - assert(n >= 0); - assert((size_t)n < v->nmatch); - - MDEBUG(("cbackref n%d %d{%d-%d}\n", t->retry, n, min, max)); - - if (v->pmatch[n].rm_so == -1) - return REG_NOMATCH; - paren = v->start + v->pmatch[n].rm_so; - len = v->pmatch[n].rm_eo - v->pmatch[n].rm_so; - - /* no room to maneuver -- retries are pointless */ - if (v->mem[t->retry]) - return REG_NOMATCH; - v->mem[t->retry] = 1; - - /* special-case zero-length string */ - if (len == 0) { - if (begin == end) - return REG_OKAY; - return REG_NOMATCH; - } - - /* and too-short string */ - assert(end >= begin); - if ((size_t)(end - begin) < len) - return REG_NOMATCH; - stop = end - len; - - /* count occurrences */ - i = 0; - for (p = begin; p <= stop && (i < max || max == INFINITY); p += len) { - if ((*v->g->compare)(paren, p, len) != 0) - break; - i++; - } - MDEBUG(("cbackref found %d\n", i)); - - /* and sort it out */ - if (p != end) /* didn't consume all of it */ - return REG_NOMATCH; - if (min <= i && (i <= max || max == INFINITY)) - return REG_OKAY; - return REG_NOMATCH; /* out of range */ -} - -/* - - caltdissect - determine alternative subexpression matches (w. complications) - ^ static int caltdissect(struct vars *, struct subre *, chr *, chr *); - */ -static int /* regexec return code */ -caltdissect(v, t, begin, end) -struct vars *v; -struct subre *t; -chr *begin; /* beginning of relevant substring */ -chr *end; /* end of same */ -{ - struct dfa *d; - int er; -# define UNTRIED 0 /* not yet tried at all */ -# define TRYING 1 /* top matched, trying submatches */ -# define TRIED 2 /* top didn't match or submatches exhausted */ - - if (t == NULL) - return REG_NOMATCH; - assert(t->op == '|'); - if (v->mem[t->retry] == TRIED) - return caltdissect(v, t->right, begin, end); - - MDEBUG(("calt n%d\n", t->retry)); - assert(t->left != NULL); - - if (v->mem[t->retry] == UNTRIED) { - d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC); - if (ISERR()) - return v->err; - if (longest(v, d, begin, end, (int *)NULL) != end) { - freedfa(d); - v->mem[t->retry] = TRIED; - return caltdissect(v, t->right, begin, end); - } - freedfa(d); - MDEBUG(("calt matched\n")); - v->mem[t->retry] = TRYING; - } - - er = cdissect(v, t->left, begin, end); - if (er != REG_NOMATCH) - return er; - - v->mem[t->retry] = TRIED; - return caltdissect(v, t->right, begin, end); -} - - - -#include "rege_dfa.c" diff --git a/src/regex/regfree.c b/src/regex/regfree.c index 17a73896f5..9a6acf1733 100644 --- a/src/regex/regfree.c +++ b/src/regex/regfree.c @@ -1,53 +1,37 @@ -/* - * regfree - free an RE - * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics - * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * I'd appreciate being given credit for this package in the documentation - * of software which uses it, but that is not a requirement. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * - * - * You might think that this could be incorporated into regcomp.c, and - * that would be a reasonable idea... except that this is a generic - * function (with a generic name), applicable to all compiled REs - * regardless of the size of their characters, whereas the stuff in - * regcomp.c gets compiled once per character size. - */ +#include +#include +#include +#include -#include "regguts.h" +#include "utils.h" +#include "regex2.h" /* - - regfree - free an RE (generic function, punts to RE-specific function) - * - * Ignoring invocation with NULL is a convenience. + - regfree - free everything + = extern void regfree(regex_t *); */ -VOID -regfree(re) -regex_t *re; +void +regfree(preg) +regex_t *preg; { - if (re == NULL) + register struct re_guts *g; + + if (preg->re_magic != MAGIC1) /* oops */ + return; /* nice to complain, but hard */ + + g = preg->re_g; + if (g == NULL || g->magic != MAGIC2) /* oops again */ return; - (*((struct fns *)re->re_fns)->free)(re); + preg->re_magic = 0; /* mark it invalid */ + g->magic = 0; /* mark it invalid */ + + if (g->strip != NULL) + free((char *)g->strip); + if (g->sets != NULL) + free((char *)g->sets); + if (g->setbits != NULL) + free((char *)g->setbits); + if (g->must != NULL) + free(g->must); + free((char *)g); } diff --git a/src/regex/regfronts.c b/src/regex/regfronts.c deleted file mode 100644 index 82f48e2abc..0000000000 --- a/src/regex/regfronts.c +++ /dev/null @@ -1,83 +0,0 @@ -/* - * regcomp and regexec - front ends to re_ routines - * - * Mostly for implementation of backward-compatibility kludges. Note - * that these routines exist ONLY in char versions. - * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics - * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * I'd appreciate being given credit for this package in the documentation - * of software which uses it, but that is not a requirement. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#include "regguts.h" - -/* - - regcomp - compile regular expression - */ -int -regcomp(re, str, flags) -regex_t *re; -CONST char *str; -int flags; -{ - size_t len; - int f = flags; - - if (f®_PEND) { - len = re->re_endp - str; - f &= ~REG_PEND; - } else - len = strlen(str); - - return re_comp(re, str, len, f); -} - -/* - - regexec - execute regular expression - */ -int -regexec(re, str, nmatch, pmatch, flags) -regex_t *re; -CONST char *str; -size_t nmatch; -regmatch_t pmatch[]; -int flags; -{ - CONST char *start; - size_t len; - int f = flags; - - if (f®_STARTEND) { - start = str + pmatch[0].rm_so; - len = pmatch[0].rm_eo - pmatch[0].rm_so; - f &= ~REG_STARTEND; - } else { - start = str; - len = strlen(str); - } - - return re_exec(re, start, len, nmatch, pmatch, f); -} diff --git a/src/regex/regguts.h b/src/regex/regguts.h deleted file mode 100644 index 36e5092367..0000000000 --- a/src/regex/regguts.h +++ /dev/null @@ -1,418 +0,0 @@ -/* - * Internal interface definitions, etc., for the reg package - * - * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. - * - * Development of this software was funded, in part, by Cray Research Inc., - * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics - * Corporation, none of whom are responsible for the results. The author - * thanks all of them. - * - * Redistribution and use in source and binary forms -- with or without - * modification -- are permitted for any purpose, provided that - * redistributions in source form retain this entire copyright notice and - * indicate the origin and nature of any modifications. - * - * I'd appreciate being given credit for this package in the documentation - * of software which uses it, but that is not a requirement. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - - -/* - * Environmental customization. It should not (I hope) be necessary to - * alter the file you are now reading -- regcustom.h should handle it all, - * given care here and elsewhere. - */ -#include "regcustom.h" - - - -/* - * Things that regcustom.h might override. - */ - -/* standard header files (NULL is a reasonable indicator for them) */ -#ifndef NULL -#include -#include -#include -#include -#include -#endif - -/* assertions */ -#ifndef assert -# ifndef REG_DEBUG -# define NDEBUG /* no assertions */ -# endif -#include -#endif - -/* voids */ -#ifndef VOID -#define VOID void /* for function return values */ -#endif -#ifndef DISCARD -#define DISCARD VOID /* for throwing values away */ -#endif -#ifndef PVOID -#define PVOID VOID * /* generic pointer */ -#endif -#ifndef VS -#define VS(x) ((PVOID)(x)) /* cast something to generic ptr */ -#endif -#ifndef NOPARMS -#define NOPARMS VOID /* for empty parm lists */ -#endif - -/* const */ -#ifndef CONST -#define CONST const /* for old compilers, might be empty */ -#endif - -/* function-pointer declarator */ -#ifndef FUNCPTR -#if __STDC__ >= 1 -#define FUNCPTR(name, args) (*name)args -#else -#define FUNCPTR(name, args) (*name)() -#endif -#endif - -/* memory allocation */ -#ifndef MALLOC -#define MALLOC(n) malloc(n) -#endif -#ifndef REALLOC -#define REALLOC(p, n) realloc(VS(p), n) -#endif -#ifndef FREE -#define FREE(p) free(VS(p)) -#endif - -/* want size of a char in bits, and max value in bounded quantifiers */ -#ifndef CHAR_BIT -#include -#endif -#ifndef _POSIX2_RE_DUP_MAX -#define _POSIX2_RE_DUP_MAX 255 /* normally from */ -#endif - - - -/* - * misc - */ - -#define NOTREACHED 0 -#define xxx 1 - -#define DUPMAX _POSIX2_RE_DUP_MAX -#define INFINITY (DUPMAX+1) - -#define REMAGIC 0xfed7 /* magic number for main struct */ - - - -/* - * debugging facilities - */ -#ifdef REG_DEBUG -/* FDEBUG does finite-state tracing */ -#define FDEBUG(arglist) { if (v->eflags®_FTRACE) printf arglist; } -/* MDEBUG does higher-level tracing */ -#define MDEBUG(arglist) { if (v->eflags®_MTRACE) printf arglist; } -#else -#define FDEBUG(arglist) {} -#define MDEBUG(arglist) {} -#endif - - - -/* - * bitmap manipulation - */ -#define UBITS (CHAR_BIT * sizeof(unsigned)) -#define BSET(uv, sn) ((uv)[(sn)/UBITS] |= (unsigned)1 << ((sn)%UBITS)) -#define ISBSET(uv, sn) ((uv)[(sn)/UBITS] & ((unsigned)1 << ((sn)%UBITS))) - - - -/* - * We dissect a chr into byts for colormap table indexing. Here we define - * a byt, which will be the same as a byte on most machines... The exact - * size of a byt is not critical, but about 8 bits is good, and extraction - * of 8-bit chunks is sometimes especially fast. - */ -#ifndef BYTBITS -#define BYTBITS 8 /* bits in a byt */ -#endif -#define BYTTAB (1<flags&FREECOL) - union tree *block; /* block of solid color, if any */ -}; - -/* the color map itself */ -struct colormap { - int magic; -# define CMMAGIC 0x876 - struct vars *v; /* for compile error reporting */ - size_t ncds; /* number of colordescs */ - size_t max; /* highest in use */ - color free; /* beginning of free chain (if non-0) */ - struct colordesc *cd; -# define CDEND(cm) (&(cm)->cd[(cm)->max + 1]) -# define NINLINECDS ((size_t)10) - struct colordesc cdspace[NINLINECDS]; - union tree tree[NBYTS]; /* tree top, plus fill blocks */ -}; - -/* optimization magic to do fast chr->color mapping */ -#define B0(c) ((c) & BYTMASK) -#define B1(c) (((c)>>BYTBITS) & BYTMASK) -#define B2(c) (((c)>>(2*BYTBITS)) & BYTMASK) -#define B3(c) (((c)>>(3*BYTBITS)) & BYTMASK) -#if NBYTS == 1 -#define GETCOLOR(cm, c) ((cm)->tree->tcolor[B0(c)]) -#endif -/* beware, for NBYTS>1, GETCOLOR() is unsafe -- 2nd arg used repeatedly */ -#if NBYTS == 2 -#define GETCOLOR(cm, c) ((cm)->tree->tptr[B1(c)]->tcolor[B0(c)]) -#endif -#if NBYTS == 4 -#define GETCOLOR(cm, c) ((cm)->tree->tptr[B3(c)]->tptr[B2(c)]->tptr[B1(c)]->tcolor[B0(c)]) -#endif - - - -/* - * Interface definitions for locale-interface functions in locale.c. - * Multi-character collating elements (MCCEs) cause most of the trouble. - */ -struct cvec { - int nchrs; /* number of chrs */ - int chrspace; /* number of chrs possible */ - chr *chrs; /* pointer to vector of chrs */ - int nranges; /* number of ranges (chr pairs) */ - int rangespace; /* number of chrs possible */ - chr *ranges; /* pointer to vector of chr pairs */ - int nmcces; /* number of MCCEs */ - int mccespace; /* number of MCCEs possible */ - int nmccechrs; /* number of chrs used for MCCEs */ - chr *mcces[1]; /* pointers to 0-terminated MCCEs */ - /* and both batches of chrs are on the end */ -}; - -/* caution: this value cannot be changed easily */ -#define MAXMCCE 2 /* length of longest MCCE */ - - - -/* - * definitions for NFA internal representation - * - * Having a "from" pointer within each arc may seem redundant, but it - * saves a lot of hassle. - */ -struct state; - -struct arc { - int type; -# define ARCFREE '\0' - color co; - struct state *from; /* where it's from (and contained within) */ - struct state *to; /* where it's to */ - struct arc *outchain; /* *from's outs chain or free chain */ -# define freechain outchain - struct arc *inchain; /* *to's ins chain */ - struct arc *colorchain; /* color's arc chain */ -}; - -struct arcbatch { /* for bulk allocation of arcs */ - struct arcbatch *next; -# define ABSIZE 10 - struct arc a[ABSIZE]; -}; - -struct state { - int no; -# define FREESTATE (-1) - char flag; /* marks special states */ - int nins; /* number of inarcs */ - struct arc *ins; /* chain of inarcs */ - int nouts; /* number of outarcs */ - struct arc *outs; /* chain of outarcs */ - struct arc *free; /* chain of free arcs */ - struct state *tmp; /* temporary for traversal algorithms */ - struct state *next; /* chain for traversing all */ - struct state *prev; /* back chain */ - struct arcbatch oas; /* first arcbatch, avoid malloc in easy case */ - int noas; /* number of arcs used in first arcbatch */ -}; - -struct nfa { - struct state *pre; /* pre-initial state */ - struct state *init; /* initial state */ - struct state *final; /* final state */ - struct state *post; /* post-final state */ - int nstates; /* for numbering states */ - struct state *states; /* state-chain header */ - struct state *slast; /* tail of the chain */ - struct state *free; /* free list */ - struct colormap *cm; /* the color map */ - color bos[2]; /* colors, if any, assigned to BOS and BOL */ - color eos[2]; /* colors, if any, assigned to EOS and EOL */ - struct vars *v; /* simplifies compile error reporting */ - struct nfa *parent; /* parent NFA, if any */ -}; - - - -/* - * definitions for compacted NFA - */ -struct carc { - color co; /* COLORLESS is list terminator */ - int to; /* state number */ -}; - -struct cnfa { - int nstates; /* number of states */ - int ncolors; /* number of colors */ - int flags; -# define HASLACONS 01 /* uses lookahead constraints */ - int pre; /* setup state number */ - int post; /* teardown state number */ - color bos[2]; /* colors, if any, assigned to BOS and BOL */ - color eos[2]; /* colors, if any, assigned to EOS and EOL */ - struct carc **states; /* vector of pointers to outarc lists */ - struct carc *arcs; /* the area for the lists */ -}; -#define ZAPCNFA(cnfa) ((cnfa).nstates = 0) -#define NULLCNFA(cnfa) ((cnfa).nstates == 0) - - - -/* - * subexpression tree - */ -struct subre { - char op; /* '|', '.' (concat), 'b' (backref), '(', '=' */ - char flags; -# define LONGER 01 /* prefers longer match */ -# define SHORTER 02 /* prefers shorter match */ -# define MIXED 04 /* mixed preference below */ -# define CAP 010 /* capturing parens below */ -# define BACKR 020 /* back reference below */ -# define INUSE 0100 /* in use in final tree */ -# define LOCAL 03 /* bits which may not propagate up */ -# define LMIX(f) ((f)<<2) /* LONGER -> MIXED */ -# define SMIX(f) ((f)<<1) /* SHORTER -> MIXED */ -# define UP(f) (((f)&~LOCAL) | (LMIX(f) & SMIX(f) & MIXED)) -# define MESSY(f) ((f)&(MIXED|CAP|BACKR)) -# define PREF(f) ((f)&LOCAL) -# define PREF2(f1, f2) ((PREF(f1) != 0) ? PREF(f1) : PREF(f2)) -# define COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2)) - short retry; /* index into retry memory */ - int subno; /* subexpression number (for 'b' and '(') */ - short min; /* min repetitions, for backref only */ - short max; /* max repetitions, for backref only */ - struct subre *left; /* left child, if any (also freelist chain) */ - struct subre *right; /* right child, if any */ - struct state *begin; /* outarcs from here... */ - struct state *end; /* ...ending in inarcs here */ - struct cnfa cnfa; /* compacted NFA, if any */ - struct subre *chain; /* for bookkeeping and error cleanup */ -}; - - - -/* - * table of function pointers for generic manipulation functions - * A regex_t's re_fns points to one of these. - */ -struct fns { - VOID FUNCPTR(free, (regex_t *)); -}; - - - -/* - * the insides of a regex_t, hidden behind a void * - */ -struct guts { - int magic; -# define GUTSMAGIC 0xfed9 - int cflags; /* copy of compile flags */ - long info; /* copy of re_info */ - size_t nsub; /* copy of re_nsub */ - struct subre *tree; - struct cnfa search; /* for fast preliminary search */ - int ntree; - struct colormap cmap; - int FUNCPTR(compare, (CONST chr *, CONST chr *, size_t)); - struct subre *lacons; /* lookahead-constraint vector */ - int nlacons; /* size of lacons */ -}; diff --git a/src/regex/tclUniData.c b/src/regex/tclUniData.c deleted file mode 100644 index 9f0c6e05ae..0000000000 --- a/src/regex/tclUniData.c +++ /dev/null @@ -1,904 +0,0 @@ -/* - * tclUniData.c -- - * - * Declarations of Unicode character information tables. This file is - * automatically generated by the tools/uniParse.tcl script. Do not - * modify this file by hand. - * - * Copyright (c) 1998 by Scriptics Corporation. - * All rights reserved. - * - * RCS: @(#) $Id$ - */ - -/* - * A 16-bit Unicode character is split into two parts in order to index - * into the following tables. The lower OFFSET_BITS comprise an offset - * into a page of characters. The upper bits comprise the page number. - */ - -#define OFFSET_BITS 5 - -/* - * The pageMap is indexed by page number and returns an alternate page number - * that identifies a unique page of characters. Many Unicode characters map - * to the same alternate page number. - */ - -static unsigned char pageMap[] = { - 0, 1, 2, 3, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 7, 15, 16, 17, - 18, 19, 20, 21, 22, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 7, 32, - 7, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 47, - 48, 49, 50, 51, 52, 35, 47, 53, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, - 58, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 80, 81, - 84, 85, 80, 86, 87, 88, 89, 90, 91, 92, 35, 93, 94, 95, 35, 96, 97, - 98, 99, 100, 101, 102, 35, 47, 103, 104, 35, 35, 105, 106, 107, 47, - 47, 108, 47, 47, 109, 47, 110, 111, 47, 112, 47, 113, 114, 115, 116, - 114, 47, 117, 118, 35, 47, 47, 119, 90, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 120, 121, 47, 47, 122, - 35, 35, 35, 35, 47, 123, 124, 125, 126, 47, 127, 128, 47, 129, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 7, 7, 7, 7, 130, 7, 7, 131, 132, 133, 134, - 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, - 149, 150, 151, 152, 153, 154, 155, 156, 156, 156, 156, 156, 156, 156, - 157, 158, 159, 160, 161, 162, 35, 35, 35, 160, 163, 164, 165, 166, - 167, 168, 169, 160, 160, 160, 160, 170, 171, 172, 173, 174, 160, 160, - 175, 35, 35, 35, 35, 176, 177, 178, 179, 180, 181, 35, 35, 160, 160, - 160, 160, 160, 160, 160, 160, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 182, 160, 160, 155, 160, 160, 160, 160, 160, 160, 170, 183, 184, 185, - 90, 47, 186, 90, 47, 187, 188, 189, 47, 47, 190, 128, 35, 35, 191, - 192, 193, 194, 192, 195, 196, 197, 160, 160, 160, 198, 160, 160, 199, - 197, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 200, 35, 35, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 201, 35, 35, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 202, 203, 204, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 205, 35, 35, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, - 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, - 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, - 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, - 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 47, 47, 47, 47, 47, 47, 47, 47, 47, 208, 35, 35, 35, 35, - 35, 35, 209, 210, 211, 47, 47, 212, 213, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 214, 215, 47, 216, 47, 217, 218, 35, 219, 220, 221, 47, - 47, 47, 222, 223, 2, 224, 225, 226, 227, 228, 229, 230, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 231, 35, 232, 233, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, - 47, 208, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 47, 234, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 235, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, 207, - 207, 207, 207, 236, 207, 207, 207, 207, 207, 207, 207, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, - 35, 35, 35, 35, 35 -}; - -/* - * The groupMap is indexed by combining the alternate page number with - * the page offset and returns a group number that identifies a unique - * set of character attributes. - */ - -static unsigned char groupMap[] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 3, 3, 4, 3, 3, 3, 5, 6, 3, 7, 3, 8, - 3, 3, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 3, 3, 7, 7, 7, 3, 3, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 5, 3, 6, 11, 12, 11, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 5, 7, 6, 7, 1, 2, 3, 4, 4, 4, 4, 14, 14, 11, 14, 15, 16, - 7, 8, 14, 11, 14, 7, 17, 17, 11, 18, 14, 3, 11, 17, 15, 19, 17, 17, - 17, 3, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 7, 10, 10, 10, 10, 10, 10, 10, 15, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 7, 13, 13, 13, 13, 13, 13, 13, 20, 21, 22, - 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, - 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, - 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 23, 24, 21, 22, 21, - 22, 21, 22, 15, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, - 22, 21, 22, 15, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, - 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, - 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 25, - 21, 22, 21, 22, 21, 22, 26, 15, 27, 21, 22, 21, 22, 28, 21, 22, 29, - 29, 21, 22, 15, 30, 31, 32, 21, 22, 29, 33, 34, 35, 36, 21, 22, 15, - 15, 35, 37, 15, 38, 21, 22, 21, 22, 21, 22, 39, 21, 22, 39, 15, 15, - 21, 22, 39, 21, 22, 40, 40, 21, 22, 21, 22, 41, 21, 22, 15, 42, 21, - 22, 15, 43, 42, 42, 42, 42, 44, 45, 46, 44, 45, 46, 44, 45, 46, 21, - 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 47, 21, - 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, - 15, 44, 45, 46, 21, 22, 48, 49, 21, 22, 21, 22, 21, 22, 21, 22, 0, - 0, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, - 21, 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 15, 15, 15, 50, 51, 15, 52, 52, 15, 53, 15, - 54, 15, 15, 15, 15, 52, 15, 15, 55, 15, 15, 15, 15, 56, 57, 15, 15, - 15, 15, 15, 57, 15, 15, 58, 15, 15, 59, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 60, 15, 15, 60, 15, 15, 15, 15, 60, 15, 61, 61, 15, 15, - 15, 15, 15, 15, 62, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 0, 63, - 63, 63, 63, 63, 63, 63, 63, 63, 11, 11, 63, 63, 63, 63, 63, 63, 63, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 63, 63, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 63, 63, 63, 63, - 63, 11, 11, 11, 11, 11, 11, 11, 11, 11, 63, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 65, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, - 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 11, - 0, 0, 0, 0, 63, 0, 0, 0, 3, 0, 0, 0, 0, 0, 11, 11, 66, 3, 67, 67, 67, - 0, 68, 0, 69, 69, 15, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 70, 71, - 71, 71, 15, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 72, 13, 13, 13, 13, 13, 13, 13, 13, 13, 73, 74, 74, 0, - 75, 76, 77, 77, 77, 78, 79, 15, 0, 0, 21, 22, 21, 22, 21, 22, 21, 22, - 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 80, 81, 47, - 15, 82, 83, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 84, 84, 84, 84, 84, 84, 84, - 84, 84, 84, 84, 84, 84, 84, 84, 84, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, - 81, 81, 81, 81, 21, 22, 14, 64, 64, 64, 64, 0, 85, 85, 0, 0, 21, 22, - 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, - 22, 77, 21, 22, 21, 22, 0, 0, 21, 22, 0, 0, 21, 22, 0, 0, 0, 21, 22, - 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, - 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, - 21, 22, 0, 0, 21, 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 86, 86, 86, 86, - 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, - 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, - 0, 0, 63, 3, 3, 3, 3, 3, 3, 0, 87, 87, 87, 87, 87, 87, 87, 87, 87, - 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, - 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 15, 0, 3, 8, 0, 0, - 0, 0, 0, 0, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 0, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, 64, 64, 3, 64, 3, 64, - 64, 3, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 0, 0, 0, 0, 0, 42, 42, 42, 3, 3, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 3, 0, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 0, 0, 0, 0, 0, 63, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 3, 3, 3, 3, 0, 0, 64, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 3, 42, 64, - 64, 64, 64, 64, 64, 64, 85, 85, 64, 64, 64, 64, 64, 64, 63, 63, 64, - 64, 14, 64, 64, 64, 64, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 42, 42, - 42, 14, 14, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 88, 42, - 64, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 42, 42, 42, 42, 42, 42, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, - 64, 89, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 0, 0, 64, 42, 89, 89, 89, 64, 64, 64, 64, 64, 64, - 64, 64, 89, 89, 89, 89, 64, 0, 0, 42, 64, 64, 64, 64, 0, 0, 0, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 64, 64, 3, 3, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, - 89, 89, 0, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 42, 42, 0, 0, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 0, 42, 0, 0, 0, 42, - 42, 42, 42, 0, 0, 64, 0, 89, 89, 89, 64, 64, 64, 64, 0, 0, 89, 89, - 0, 0, 89, 89, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 89, 0, 0, 0, 0, 42, 42, - 0, 42, 42, 42, 64, 64, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 42, 42, - 4, 4, 17, 17, 17, 17, 17, 17, 14, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 42, - 42, 42, 42, 42, 42, 0, 0, 0, 0, 42, 42, 0, 0, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, - 42, 42, 42, 42, 42, 42, 42, 0, 42, 42, 0, 42, 42, 0, 42, 42, 0, 0, - 64, 0, 89, 89, 89, 64, 64, 0, 0, 0, 0, 64, 64, 0, 0, 64, 64, 64, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42, 42, 42, 0, 42, 0, 0, 0, 0, 0, - 0, 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 64, 64, 42, 42, 42, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 64, 64, 89, 0, 42, 42, 42, 42, 42, 42, 42, - 0, 42, 0, 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 42, 42, 42, 42, 42, - 42, 42, 0, 42, 42, 0, 42, 42, 42, 42, 42, 0, 0, 64, 42, 89, 89, 89, - 64, 64, 64, 64, 64, 0, 64, 64, 89, 0, 89, 89, 64, 0, 0, 42, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 0, 42, - 42, 0, 0, 42, 42, 42, 42, 0, 0, 64, 42, 89, 64, 89, 64, 64, 64, 0, - 0, 0, 89, 89, 0, 0, 89, 89, 64, 0, 0, 0, 0, 0, 0, 0, 0, 64, 89, 0, - 0, 0, 0, 42, 42, 0, 42, 42, 42, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 89, - 0, 42, 42, 42, 42, 42, 42, 0, 0, 0, 42, 42, 42, 0, 42, 42, 42, 42, - 0, 0, 0, 42, 42, 0, 42, 0, 42, 42, 0, 0, 0, 42, 42, 0, 0, 0, 42, 42, - 42, 0, 0, 0, 42, 42, 42, 42, 42, 42, 42, 42, 0, 42, 42, 42, 0, 0, 0, - 0, 89, 89, 64, 89, 89, 0, 0, 0, 89, 89, 89, 0, 89, 89, 89, 64, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 89, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 17, 17, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 89, 89, 89, 0, 42, 42, 42, 42, 42, 42, 42, 42, 0, 42, - 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 0, 42, 42, 42, 42, 42, 0, 0, 0, 0, 64, 64, 64, 89, 89, - 89, 89, 0, 64, 64, 64, 0, 64, 64, 64, 64, 0, 0, 0, 0, 0, 0, 0, 64, - 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 89, - 89, 0, 42, 42, 42, 42, 42, 42, 42, 42, 0, 42, 42, 42, 0, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 42, 42, 42, - 42, 42, 0, 0, 0, 0, 89, 64, 89, 89, 89, 89, 89, 0, 64, 89, 89, 0, 89, - 89, 64, 64, 0, 0, 0, 0, 0, 0, 0, 89, 89, 0, 0, 0, 0, 0, 0, 0, 42, 0, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 0, 89, 89, 89, 64, 64, - 64, 0, 0, 89, 89, 89, 0, 89, 89, 89, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 89, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 89, 89, 0, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 42, 0, 0, - 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 64, 0, 0, 0, 0, 89, 89, 89, 64, - 64, 64, 0, 64, 0, 89, 89, 89, 89, 89, 89, 89, 89, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 89, 89, 3, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 64, 42, 42, 64, 64, 64, 64, 64, 64, 64, 0, 0, 0, 0, 4, 42, 42, - 42, 42, 42, 42, 63, 64, 64, 64, 64, 64, 64, 64, 64, 3, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 3, 3, 0, 0, 0, 0, 0, 42, 42, 0, 42, 0, 0, 42, 42, - 0, 42, 0, 0, 42, 0, 0, 0, 0, 0, 0, 42, 42, 42, 42, 0, 42, 42, 42, 42, - 42, 42, 42, 0, 42, 42, 42, 0, 42, 0, 42, 0, 0, 42, 42, 0, 42, 42, 42, - 42, 64, 42, 42, 64, 64, 64, 64, 64, 64, 0, 64, 64, 42, 0, 0, 42, 42, - 42, 42, 42, 0, 63, 0, 64, 64, 64, 64, 64, 64, 0, 0, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 0, 0, 42, 42, 0, 0, 42, 14, 14, 14, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 14, 14, 14, 14, 14, 64, 64, 14, 14, 14, - 14, 14, 14, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 14, 64, 14, 64, 14, 64, 5, 6, 5, 6, 89, 89, 42, 42, 42, - 42, 42, 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 0, 0, 0, 0, 0, 0, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 89, 64, 64, 64, 64, 64, 3, 64, 64, 42, - 42, 42, 42, 0, 0, 0, 0, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 0, 14, 14, 14, 14, 14, 14, 14, 14, 64, 14, 14, 14, 14, 14, 14, 0, 0, - 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42, 0, 42, - 42, 42, 42, 42, 0, 42, 42, 0, 89, 64, 64, 64, 64, 89, 64, 0, 0, 0, - 64, 64, 89, 64, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 3, - 3, 3, 3, 3, 3, 42, 42, 42, 42, 42, 42, 89, 89, 64, 64, 0, 0, 0, 0, - 0, 0, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, - 77, 77, 77, 77, 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 0, 0, 0, 0, 3, 0, 0, 0, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, - 0, 0, 0, 0, 42, 42, 42, 42, 0, 0, 0, 0, 0, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 0, 0, 0, 42, 42, 42, - 42, 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 0, 42, 0, 42, 42, 42, 42, 0, 0, 42, 42, 42, 42, 42, 42, 42, - 0, 42, 0, 42, 42, 42, 42, 0, 0, 42, 42, 42, 42, 42, 42, 42, 0, 42, - 0, 42, 42, 42, 42, 0, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 0, 42, 0, 42, 42, 42, 42, 0, 0, 42, 42, 42, 42, 42, 42, - 42, 0, 42, 0, 42, 42, 42, 42, 0, 0, 42, 42, 42, 42, 42, 42, 42, 0, - 42, 42, 42, 42, 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, - 3, 9, 9, 9, 9, 9, 9, 9, 9, 9, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 0, 0, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 3, 3, 42, 42, 42, 42, 42, - 42, 42, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 5, 6, 0, 0, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 3, 3, 3, 90, 90, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 89, 89, 89, 64, 64, 64, 64, 64, 64, 64, 89, 89, 89, 89, 89, - 89, 89, 89, 64, 89, 89, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 3, 3, 3, 3, 3, 3, 3, 4, 3, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, - 3, 3, 3, 3, 8, 3, 3, 3, 3, 88, 88, 88, 88, 0, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 0, 0, 0, 0, 0, 0, 42, 42, 42, 63, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 0, 0, 0, 0, - 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 64, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 22, 21, 22, 21, 22, 21, - 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 15, 15, - 15, 15, 15, 91, 0, 0, 0, 0, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, - 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 21, 22, 0, - 0, 0, 0, 0, 0, 92, 92, 92, 92, 92, 92, 92, 92, 93, 93, 93, 93, 93, - 93, 93, 93, 92, 92, 92, 92, 92, 92, 0, 0, 93, 93, 93, 93, 93, 93, 0, - 0, 92, 92, 92, 92, 92, 92, 92, 92, 93, 93, 93, 93, 93, 93, 93, 93, - 92, 92, 92, 92, 92, 92, 92, 92, 93, 93, 93, 93, 93, 93, 93, 93, 92, - 92, 92, 92, 92, 92, 0, 0, 93, 93, 93, 93, 93, 93, 0, 0, 15, 92, 15, - 92, 15, 92, 15, 92, 0, 93, 0, 93, 0, 93, 0, 93, 92, 92, 92, 92, 92, - 92, 92, 92, 93, 93, 93, 93, 93, 93, 93, 93, 94, 94, 95, 95, 95, 95, - 96, 96, 97, 97, 98, 98, 99, 99, 0, 0, 92, 92, 92, 92, 92, 92, 92, 92, - 100, 100, 100, 100, 100, 100, 100, 100, 92, 92, 92, 92, 92, 92, 92, - 92, 100, 100, 100, 100, 100, 100, 100, 100, 92, 92, 92, 92, 92, 92, - 92, 92, 100, 100, 100, 100, 100, 100, 100, 100, 92, 92, 15, 101, 15, - 0, 15, 15, 93, 93, 102, 102, 103, 11, 104, 11, 11, 11, 15, 101, 15, - 0, 15, 15, 105, 105, 105, 105, 103, 11, 11, 11, 92, 92, 15, 15, 0, - 0, 15, 15, 93, 93, 106, 106, 0, 11, 11, 11, 92, 92, 15, 15, 15, 107, - 15, 15, 93, 93, 108, 108, 109, 11, 11, 11, 0, 0, 15, 101, 15, 0, 15, - 15, 110, 110, 111, 111, 103, 11, 11, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 88, 88, 88, 88, 8, 8, 8, 8, 8, 8, 3, 3, 16, 19, 5, 16, 16, - 19, 5, 16, 3, 3, 3, 3, 3, 3, 3, 3, 112, 113, 88, 88, 88, 88, 88, 2, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 16, 19, 3, 3, 3, 3, 12, 12, 3, 3, 3, 7, - 5, 6, 0, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 88, 88, 88, 88, 88, 88, 17, - 0, 0, 0, 17, 17, 17, 17, 17, 17, 7, 7, 7, 5, 6, 15, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 7, 7, 7, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 85, 85, 85, 85, 64, 85, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 77, - 14, 14, 14, 14, 77, 14, 14, 15, 77, 77, 77, 15, 15, 77, 77, 77, 15, - 14, 77, 14, 14, 14, 77, 77, 77, 77, 77, 14, 14, 14, 14, 14, 14, 77, - 14, 114, 14, 77, 14, 115, 116, 77, 77, 14, 15, 77, 77, 14, 77, 15, - 42, 42, 42, 42, 15, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, - 117, 117, 117, 117, 117, 118, 118, 118, 118, 118, 118, 118, 118, 118, - 118, 118, 118, 118, 118, 118, 118, 90, 90, 90, 90, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 7, 14, 14, 14, 14, 14, 7, 7, 14, 14, - 14, 14, 7, 14, 14, 7, 14, 14, 7, 14, 14, 14, 14, 14, 14, 14, 7, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 7, 7, 14, 14, 7, - 14, 7, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 7, 7, 7, 7, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 7, 7, 14, 14, 14, 14, 14, 14, 14, 5, 6, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 119, 119, 119, 119, 119, 119, - 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, 119, - 119, 119, 119, 119, 119, 119, 120, 120, 120, 120, 120, 120, 120, 120, - 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, - 120, 120, 120, 120, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 7, 14, 14, 14, 14, 14, 14, 14, 14, 14, 7, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 7, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, - 14, 14, 14, 0, 14, 14, 14, 14, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 0, 14, 0, 14, 14, 14, 14, 0, 0, 0, 14, 0, 14, 14, - 14, 14, 14, 14, 14, 0, 0, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 14, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 0, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, - 0, 0, 0, 2, 3, 3, 3, 14, 63, 42, 90, 5, 6, 5, 6, 5, 6, 5, 6, 5, 6, - 14, 14, 5, 6, 5, 6, 5, 6, 5, 6, 8, 5, 6, 6, 14, 90, 90, 90, 90, 90, - 90, 90, 90, 90, 64, 64, 64, 64, 64, 64, 8, 63, 63, 63, 63, 63, 14, - 14, 90, 90, 90, 0, 0, 0, 14, 14, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 0, 64, 64, - 11, 11, 63, 63, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 12, 63, - 63, 63, 0, 0, 0, 0, 0, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 0, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 14, 14, 17, 17, 17, - 17, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 0, 0, 0, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42, 42, 42, 42, 42, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 0, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 14, 14, 14, 0, 14, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 42, 42, 42, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121, 121, 121, 121, 121, 121, 121, - 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, - 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 121, 122, 122, 122, - 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, - 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, - 122, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 15, 15, 15, 15, - 15, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 15, 15, 15, 15, 0, - 0, 0, 0, 0, 42, 64, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 7, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 42, 42, 42, 42, - 42, 0, 42, 0, 42, 42, 0, 42, 42, 0, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, 0, 64, 64, 64, 64, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 8, 8, 12, 12, 5, 6, 5, 6, 5, - 6, 5, 6, 5, 6, 5, 6, 5, 6, 5, 6, 0, 0, 0, 0, 3, 3, 3, 3, 12, 12, 12, - 3, 3, 3, 0, 3, 3, 3, 3, 8, 5, 6, 5, 6, 5, 6, 3, 3, 3, 7, 8, 7, 7, 7, - 0, 3, 4, 3, 3, 0, 0, 0, 0, 42, 42, 42, 0, 42, 0, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 0, 0, 88, 0, 3, 3, 3, 4, 3, 3, 3, 5, 6, 3, 7, 3, 8, 3, 3, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 3, 3, 7, 7, 7, 3, 11, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 5, 7, 6, 7, 0, 0, 3, 5, 6, 3, 12, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 63, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 63, - 63, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 0, 0, 0, - 42, 42, 42, 42, 42, 42, 0, 0, 42, 42, 42, 42, 42, 42, 0, 0, 42, 42, - 42, 42, 42, 42, 0, 0, 42, 42, 42, 0, 0, 0, 4, 4, 7, 11, 14, 4, 4, 0, - 14, 7, 7, 7, 7, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 88, 88, 88, 14, - 14, 42, 17, 42, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123, 123, 123, - 126, 126, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 89, 64, 14, 14, 14, - 14, 14, 0, 0, 77, 77, 15, 15, 77, 15, 15, 77, 77, 15, 77, 77, 15, 77, - 77, 15, 15, 77, 15, 15, 77, 77, 15, 77, 77, 15, 77, 77, 15, 15, 77, - 15, 15, 77, 77, 15, 77, 77, 15, 77, 77, 15, 15, 77, 77, 15, 15, 77, - 15, 15, 77, 77, 15, 15, 77, 15, 15, 77, 77, 15, 15, 9, 9, 9, 42, 42, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 88, 0, 88, 88, 88, 88, 88, 88, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122, 122, - 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, - 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, - 122 -}; - -/* - * Each group represents a unique set of character attributes. The attributes - * are encoded into a 32-bit value as follows: - * - * Bits 0-4 Character category: see the constants listed below. - * - * Bits 5-7 Case delta type: 000 = identity - * 010 = add delta for lower - * 011 = add delta for lower, add 1 for title - * 100 = sutract delta for title/upper - * 101 = sub delta for upper, sub 1 for title - * 110 = sub delta for upper, add delta for lower - * - * Bits 8-21 Reserved for future use. - * - * Bits 22-31 Case delta: delta for case conversions. This should be the - * highest field so we can easily sign extend. - */ - -static int groups[] = { - 0, 15, 12, 25, 27, 21, 22, 26, 20, 9, 134217793, 28, 19, 134217858, - 29, 2, 23, 11, 1178599554, 24, -507510654, 4194369, 4194434, -834666431, - 973078658, -507510719, 1258291330, 880803905, 864026689, 859832385, - 331350081, 847249473, 851443777, 868220993, -406847358, 884998209, - 876609601, 893386817, 897581121, 914358337, 910164033, 918552641, - 5, -234880894, 8388705, 4194499, 8388770, 331350146, -406847423, - -234880959, 880803970, 864026754, 859832450, 847249538, 851443842, - 868221058, 876609666, 884998274, 893386882, 897581186, 914358402, - 910164098, 918552706, 4, 6, -352321402, 159383617, 155189313, - 268435521, 264241217, 159383682, 155189378, 130023554, 268435586, - 264241282, 260046978, 239075458, 1, 197132418, 226492546, 360710274, - 335544450, -251658175, 402653314, 335544385, 7, 201326657, 201326722, - 16, 8, 10, 247464066, -33554302, -33554367, -310378366, -360710014, - -419430270, -536870782, -469761918, -528482174, -33554365, -37748606, - -310378431, -37748669, 155189378, -360710079, -419430335, -29359998, - -469761983, -29360063, -536870847, -528482239, 13, 14, -1463812031, - -801111999, -293601215, 67108938, 67109002, 109051997, 109052061, - 18, 17, 8388673, 12582977, 8388738, 12583042 -}; - -/* - * The following constants are used to determine the category of a - * Unicode character. - */ - -#define UNICODE_CATEGORY_MASK 0X1F - -enum { - UNASSIGNED, - UPPERCASE_LETTER, - LOWERCASE_LETTER, - TITLECASE_LETTER, - MODIFIER_LETTER, - OTHER_LETTER, - NON_SPACING_MARK, - ENCLOSING_MARK, - COMBINING_SPACING_MARK, - DECIMAL_DIGIT_NUMBER, - LETTER_NUMBER, - OTHER_NUMBER, - SPACE_SEPARATOR, - LINE_SEPARATOR, - PARAGRAPH_SEPARATOR, - CONTROL, - FORMAT, - PRIVATE_USE, - SURROGATE, - CONNECTOR_PUNCTUATION, - DASH_PUNCTUATION, - OPEN_PUNCTUATION, - CLOSE_PUNCTUATION, - INITIAL_QUOTE_PUNCTUATION, - FINAL_QUOTE_PUNCTUATION, - OTHER_PUNCTUATION, - MATH_SYMBOL, - CURRENCY_SYMBOL, - MODIFIER_SYMBOL, - OTHER_SYMBOL -}; - -/* - * The following macros extract the fields of the character info. The - * GetDelta() macro is complicated because we can't rely on the C compiler - * to do sign extension on right shifts. - */ - -#define GetCaseType(info) (((info) & 0xE0) >> 5) -#define GetCategory(info) ((info) & 0x1F) -#define GetDelta(info) (((info) > 0) ? ((info) >> 22) : (~(~((info)) >> 22))) - -/* - * This macro extracts the information about a character from the - * Unicode character tables. - */ - -#define GetUniCharInfo(ch) (groups[groupMap[(pageMap[(((int)(ch)) & 0xffff) >> OFFSET_BITS] << OFFSET_BITS) | ((ch) & ((1 << OFFSET_BITS)-1))]]) -