Import regex from tcl 8.4.5

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/branches/RXSPENCER@3951 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
1999-10-13 02:22:18 +00:00
parent 9bd536df18
commit a6c3a78d25
4 changed files with 1303 additions and 1177 deletions
--- a/src/regex/regc_lex.c
+++ b/src/regex/regc_lex.c
@@ -28,8 +28,6 @@
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * $Header$
 *
 */
 /* scanning macros (know about v) */
@@ -64,27 +62,24 @@
 #define	ENDOF(array)	((array) + sizeof(array)/sizeof(chr))
 /*
- * lexstart - set up lexical stuff, scan leading options
+ - lexstart - set up lexical stuff, scan leading options
 ^ static VOID lexstart(struct vars *);
 */
-static void
+static VOID
-lexstart(struct vars * v)
+lexstart(v)
 struct vars *v;
 {
 	prefixes(v);			/* may turn on new type bits etc. */
 	NOERR();
-	if (v->cflags & REG_QUOTE)
+	if (v->cflags&REG_QUOTE) {
-	{
+		assert(!(v->cflags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE)));
 		assert(!(v->cflags & (REG_ADVANCED | REG_EXPANDED | REG_NEWLINE)));
 		INTOCON(L_Q);
-	}
+	} else if (v->cflags&REG_EXTENDED) {
-	else if (v->cflags & REG_EXTENDED)
+		assert(!(v->cflags&REG_QUOTE));
 	{
 		assert(!(v->cflags & REG_QUOTE));
 		INTOCON(L_ERE);
-	}
+	} else {
-	else
+		assert(!(v->cflags&(REG_QUOTE|REG_ADVF)));
 	{
 		assert(!(v->cflags & (REG_QUOTE | REG_ADVF)));
 		INTOCON(L_BRE);
 	}
@@ -93,19 +88,20 @@ lexstart(struct vars * v)
 }
 /*
- * prefixes - implement various special prefixes
+ - prefixes - implement various special prefixes
 ^ static VOID prefixes(struct vars *);
 */
-static void
+static VOID
-prefixes(struct vars * v)
+prefixes(v)
 struct vars *v;
 {
 	/* literal string doesn't get any of this stuff */
-	if (v->cflags & REG_QUOTE)
+	if (v->cflags&REG_QUOTE)
 		return;
 	/* initial "***" gets special things */	
 	if (HAVE(4) && NEXT3('*', '*', '*'))
-		switch (*(v->now + 3))
+		switch (*(v->now + 3)) {
 		{
 		case CHR('?'):		/* "***?" error, msg shows version */
 			ERR(REG_BADPAT);
 			return;		/* proceed no further */
@@ -113,7 +109,7 @@ prefixes(struct vars * v)
 		case CHR('='):		/* "***=" shifts to literal string */
 			NOTE(REG_UNONPOSIX);
 			v->cflags |= REG_QUOTE;
-				v->cflags &= ~(REG_ADVANCED | REG_EXPANDED | REG_NEWLINE);
+			v->cflags &= ~(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE);
 			v->now += 4;
 			return;		/* and there can be no more prefixes */
 			break;
@@ -129,26 +125,24 @@ prefixes(struct vars * v)
 		}
 	/* BREs and EREs don't get embedded options */
-	if ((v->cflags & REG_ADVANCED) != REG_ADVANCED)
+	if ((v->cflags&REG_ADVANCED) != REG_ADVANCED)
 		return;
 	/* embedded options (AREs only) */
-	if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2)))
+	if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2))) {
 	{
 		NOTE(REG_UNONPOSIX);
 		v->now += 2;
 		for (; !ATEOS() && iscalpha(*v->now); v->now++)
-			switch (*v->now)
+			switch (*v->now) {
 			{
 			case CHR('b'):		/* BREs (but why???) */
-					v->cflags &= ~(REG_ADVANCED | REG_QUOTE);
+				v->cflags &= ~(REG_ADVANCED|REG_QUOTE);
 				break;
 			case CHR('c'):		/* case sensitive */
 				v->cflags &= ~REG_ICASE;
 				break;
 			case CHR('e'):		/* plain EREs */
 				v->cflags |= REG_EXTENDED;
-					v->cflags &= ~(REG_ADVF | REG_QUOTE);
+				v->cflags &= ~(REG_ADVF|REG_QUOTE);
 				break;
 			case CHR('i'):		/* case insensitive */
 				v->cflags |= REG_ICASE;
@@ -182,27 +176,27 @@ prefixes(struct vars * v)
 				ERR(REG_BADOPT);
 				return;
 			}
-		if (!NEXT1(')'))
+		if (!NEXT1(')')) {
 		{
 			ERR(REG_BADOPT);
 			return;
 		}
 		v->now++;
-		if (v->cflags & REG_QUOTE)
+		if (v->cflags&REG_QUOTE)
-			v->cflags &= ~(REG_EXPANDED | REG_NEWLINE);
+			v->cflags &= ~(REG_EXPANDED|REG_NEWLINE);
 	}
 }
 /*
- * lexnest - "call a subroutine", interpolating string at the lexical level
+ - lexnest - "call a subroutine", interpolating string at the lexical level
 *
 * Note, this is not a very general facility.  There are a number of
 * implicit assumptions about what sorts of strings can be subroutines.
 ^ static VOID lexnest(struct vars *, chr *, chr *);
 */
-static void
+static VOID
-lexnest(struct vars * v,
+lexnest(v, beginp, endp)
-		chr *beginp,			/* start of interpolation */
+struct vars *v;
-		chr *endp)				/* one past end of interpolation */
+chr *beginp;				/* start of interpolation */
 chr *endp;				/* one past end of interpolation */
 {
 	assert(v->savenow == NULL);	/* only one level of nesting */
 	v->savenow = v->now;
@@ -261,20 +255,24 @@ static chr	brbackw[] = {		/* \w within brackets */
 };
 /*
- * lexword - interpolate a bracket expression for word characters
+ - lexword - interpolate a bracket expression for word characters
 * Possibly ought to inquire whether there is a "word" character class.
 ^ static VOID lexword(struct vars *);
 */
-static void
+static VOID
-lexword(struct vars * v)
+lexword(v)
 struct vars *v;
 {
 	lexnest(v, backw, ENDOF(backw));
 }
 /*
- * next - get next token
+ - next - get next token
 ^ static int next(struct vars *);
 */
 static int			/* 1 normal, 0 failure */
-next(struct vars * v)
+next(v)
 struct vars *v;
 {
 	chr c;
@@ -286,24 +284,21 @@ next(struct vars * v)
 	v->lasttype = v->nexttype;
 	/* REG_BOSONLY */
-	if (v->nexttype == EMPTY && (v->cflags & REG_BOSONLY))
+	if (v->nexttype == EMPTY && (v->cflags&REG_BOSONLY)) {
 	{
 		/* at start of a REG_BOSONLY RE */
 		RETV(SBEGIN, 0);		/* same as \A */
 	}
 	/* if we're nested and we've hit end, return to outer level */
-	if (v->savenow != NULL && ATEOS())
+	if (v->savenow != NULL && ATEOS()) {
 	{
 		v->now = v->savenow;
 		v->stop = v->savestop;
 		v->savenow = v->savestop = NULL;
 	}
 	/* skip white space etc. if appropriate (not in literal or []) */
-	if (v->cflags & REG_EXPANDED)
+	if (v->cflags&REG_EXPANDED)
-		switch (v->lexcon)
+		switch (v->lexcon) {
 		{
 		case L_ERE:
 		case L_BRE:
 		case L_EBND:
@@ -313,10 +308,8 @@ next(struct vars * v)
 		}
 	/* handle EOS, depending on context */
-	if (ATEOS())
+	if (ATEOS()) {
-	{
+		switch (v->lexcon) {
 		switch (v->lexcon)
 		{
 		case L_ERE:
 		case L_BRE:
 		case L_Q:
@@ -340,8 +333,7 @@ next(struct vars * v)
 	c = *v->now++;
 	/* deal with the easy contexts, punt EREs to code below */
-	switch (v->lexcon)
+	switch (v->lexcon) {
 	{
 	case L_BRE:			/* punt BREs to separate function */
 		return brenext(v, c);
 		break;
@@ -352,46 +344,33 @@ next(struct vars * v)
 		break;
 	case L_BBND:			/* bounds are fairly simple */
 	case L_EBND:
-			switch (c)
+		switch (c) {
-			{
+		case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'):
-				case CHR('0'):
+		case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'):
-				case CHR('1'):
+		case CHR('8'): case CHR('9'):
-				case CHR('2'):
+			RETV(DIGIT, (chr)DIGITVAL(c));
 				case CHR('3'):
 				case CHR('4'):
 				case CHR('5'):
 				case CHR('6'):
 				case CHR('7'):
 				case CHR('8'):
 				case CHR('9'):
 					RETV(DIGIT, (chr) DIGITVAL(c));
 			break;
 		case CHR(','):
 			RET(',');
 			break;
 		case CHR('}'):		/* ERE bound ends with } */
-					if (INCON(L_EBND))
+			if (INCON(L_EBND)) {
 					{
 				INTOCON(L_ERE);
-						if ((v->cflags & REG_ADVF) && NEXT1('?'))
+				if ((v->cflags&REG_ADVF) && NEXT1('?')) {
 						{
 					v->now++;
 					NOTE(REG_UNONPOSIX);
 					RETV('}', 0);
 				}
 				RETV('}', 1);
-					}
+			} else
 					else
 				FAILW(REG_BADBR);
 			break;
 		case CHR('\\'):		/* BRE bound ends with \} */
-					if (INCON(L_BBND) && NEXT1('}'))
+			if (INCON(L_BBND) && NEXT1('}')) {
 					{
 				v->now++;
 				INTOCON(L_BRE);
 				RET('}');
-					}
+			} else
 					else
 				FAILW(REG_BADBR);
 			break;
 		default:
@@ -401,34 +380,30 @@ next(struct vars * v)
 		assert(NOTREACHED);
 		break;
 	case L_BRACK:			/* brackets are not too hard */
-			switch (c)
+		switch (c) {
 			{
 		case CHR(']'):
 			if (LASTTYPE('['))
 				RETV(PLAIN, c);
-					else
+			else {
-					{
+				INTOCON((v->cflags&REG_EXTENDED) ?
 						INTOCON((v->cflags & REG_EXTENDED) ?
 							L_ERE : L_BRE);
 				RET(']');
 			}
 			break;
 		case CHR('\\'):
 			NOTE(REG_UBBS);
-					if (!(v->cflags & REG_ADVF))
+			if (!(v->cflags&REG_ADVF))
 				RETV(PLAIN, c);
 			NOTE(REG_UNONPOSIX);
 			if (ATEOS())
 				FAILW(REG_EESCAPE);
-					(DISCARD) lexescape(v);
+			(DISCARD)lexescape(v);
-					switch (v->nexttype)
+			switch (v->nexttype) {	/* not all escapes okay here */
 					{			/* not all escapes okay here */
 			case PLAIN:
 				return 1;
 				break;
 			case CCLASS:
-							switch (v->nextvalue)
+				switch (v->nextvalue) {
 							{
 				case 'd':
 					lexnest(v, brbackd, ENDOF(brbackd));
 					break;
@@ -459,8 +434,7 @@ next(struct vars * v)
 		case CHR('['):
 			if (ATEOS())
 				FAILW(REG_EBRACK);
-					switch (*v->now++)
+			switch (*v->now++) {
 					{
 			case CHR('.'):
 				INTOCON(L_CEL);
 				/* might or might not be locale-specific */
@@ -490,33 +464,27 @@ next(struct vars * v)
 		assert(NOTREACHED);
 		break;
 	case L_CEL:			/* collating elements are easy */
-			if (c == CHR('.') && NEXT1(']'))
+		if (c == CHR('.') && NEXT1(']')) {
 			{
 			v->now++;
 			INTOCON(L_BRACK);
 			RETV(END, '.');
-			}
+		} else
 			else
 			RETV(PLAIN, c);
 		break;
 	case L_ECL:			/* ditto equivalence classes */
-			if (c == CHR('=') && NEXT1(']'))
+		if (c == CHR('=') && NEXT1(']')) {
 			{
 			v->now++;
 			INTOCON(L_BRACK);
 			RETV(END, '=');
-			}
+		} else
 			else
 			RETV(PLAIN, c);
 		break;
 	case L_CCL:			/* ditto character classes */
-			if (c == CHR(':') && NEXT1(']'))
+		if (c == CHR(':') && NEXT1(']')) {
 			{
 			v->now++;
 			INTOCON(L_BRACK);
 			RETV(END, ':');
-			}
+		} else
 			else
 			RETV(PLAIN, c);
 		break;
 	default:
@@ -528,14 +496,12 @@ next(struct vars * v)
 	assert(INCON(L_ERE));
 	/* deal with EREs and AREs, except for backslashes */
-	switch (c)
+	switch (c) {
 	{
 	case CHR('|'):
 		RET('|');
 		break;
 	case CHR('*'):
-			if ((v->cflags & REG_ADVF) && NEXT1('?'))
+		if ((v->cflags&REG_ADVF) && NEXT1('?')) {
 			{
 			v->now++;
 			NOTE(REG_UNONPOSIX);
 			RETV('*', 0);
@@ -543,8 +509,7 @@ next(struct vars * v)
 		RETV('*', 1);
 		break;
 	case CHR('+'):
-			if ((v->cflags & REG_ADVF) && NEXT1('?'))
+		if ((v->cflags&REG_ADVF) && NEXT1('?')) {
 			{
 			v->now++;
 			NOTE(REG_UNONPOSIX);
 			RETV('+', 0);
@@ -552,8 +517,7 @@ next(struct vars * v)
 		RETV('+', 1);
 		break;
 	case CHR('?'):
-			if ((v->cflags & REG_ADVF) && NEXT1('?'))
+		if ((v->cflags&REG_ADVF) && NEXT1('?')) {
 			{
 			v->now++;
 			NOTE(REG_UNONPOSIX);
 			RETV('?', 0);
@@ -561,16 +525,13 @@ next(struct vars * v)
 		RETV('?', 1);
 		break;
 	case CHR('{'):		/* bounds start or plain character */
-			if (v->cflags & REG_EXPANDED)
+		if (v->cflags&REG_EXPANDED)
 			skip(v);
-			if (ATEOS() || !iscdigit(*v->now))
+		if (ATEOS() || !iscdigit(*v->now)) {
 			{
 			NOTE(REG_UBRACES);
 			NOTE(REG_UUNSPEC);
 			RETV(PLAIN, c);
-			}
+		} else {
 			else
 			{
 			NOTE(REG_UBOUNDS);
 			INTOCON(L_EBND);
 			RET('{');
@@ -578,12 +539,10 @@ next(struct vars * v)
 		assert(NOTREACHED);
 		break;
 	case CHR('('):		/* parenthesis, or advanced extension */
-			if ((v->cflags & REG_ADVF) && NEXT1('?'))
+		if ((v->cflags&REG_ADVF) && NEXT1('?')) {
 			{
 			NOTE(REG_UNONPOSIX);
 			v->now++;
-				switch (*v->now++)
+			switch (*v->now++) {
 				{
 			case CHR(':'):		/* non-capturing paren */
 				RETV('(', 0);
 				break;
@@ -609,33 +568,32 @@ next(struct vars * v)
 			}
 			assert(NOTREACHED);
 		}
-			if (v->cflags & REG_NOSUB)
+		if (v->cflags&REG_NOSUB)
 			RETV('(', 0);		/* all parens non-capturing */
 		else
 			RETV('(', 1);
 		break;
 	case CHR(')'):
-			if (LASTTYPE('('))
+		if (LASTTYPE('(')) {
 			NOTE(REG_UUNSPEC);
 		}
 		RETV(')', c);
 		break;
 	case CHR('['):		/* easy except for [[:<:]] and [[:>:]] */
-			if (HAVE(6) && *(v->now + 0) == CHR('[') &&
+		if (HAVE(6) &&	*(v->now+0) == CHR('[') &&
-				*(v->now + 1) == CHR(':') &&
+				*(v->now+1) == CHR(':') &&
-				(*(v->now + 2) == CHR('<') ||
+				(*(v->now+2) == CHR('<') ||
-				 *(v->now + 2) == CHR('>')) &&
+						*(v->now+2) == CHR('>')) &&
-				*(v->now + 3) == CHR(':') &&
+				*(v->now+3) == CHR(':') &&
-				*(v->now + 4) == CHR(']') &&
+				*(v->now+4) == CHR(']') &&
-				*(v->now + 5) == CHR(']'))
+				*(v->now+5) == CHR(']')) {
-			{
+			c = *(v->now+2);
 				c = *(v->now + 2);
 			v->now += 6;
 			NOTE(REG_UNONPOSIX);
 			RET((c == CHR('<')) ? '<' : '>');
 		}
 		INTOCON(L_BRACK);
-			if (NEXT1('^'))
+		if (NEXT1('^')) {
 			{
 			v->now++;
 			RETV('[', 0);
 		}
@@ -661,40 +619,24 @@ next(struct vars * v)
 	/* ERE/ARE backslash handling; backslash already eaten */
 	assert(!ATEOS());
-	if (!(v->cflags & REG_ADVF))
+	if (!(v->cflags&REG_ADVF)) {	/* only AREs have non-trivial escapes */
-	{							/* only AREs have non-trivial escapes */
+		if (iscalnum(*v->now)) {
 		if (iscalnum(*v->now))
 		{
 			NOTE(REG_UBSALNUM);
 			NOTE(REG_UUNSPEC);
 		}
 		RETV(PLAIN, *v->now++);
 	}
-	(DISCARD) lexescape(v);
+	(DISCARD)lexescape(v);
 	if (ISERR())
 		FAILW(REG_EESCAPE);
-	if (v->nexttype == CCLASS)
+	if (v->nexttype == CCLASS) {	/* fudge at lexical level */
-	{							/* fudge at lexical level */
+		switch (v->nextvalue) {
-		switch (v->nextvalue)
+		case 'd':	lexnest(v, backd, ENDOF(backd)); break;
-		{
+		case 'D':	lexnest(v, backD, ENDOF(backD)); break;
-			case 'd':
+		case 's':	lexnest(v, backs, ENDOF(backs)); break;
-				lexnest(v, backd, ENDOF(backd));
+		case 'S':	lexnest(v, backS, ENDOF(backS)); break;
-				break;
+		case 'w':	lexnest(v, backw, ENDOF(backw)); break;
-			case 'D':
+		case 'W':	lexnest(v, backW, ENDOF(backW)); break;
 				lexnest(v, backD, ENDOF(backD));
 				break;
 			case 's':
 				lexnest(v, backs, ENDOF(backs));
 				break;
 			case 'S':
 				lexnest(v, backS, ENDOF(backS));
 				break;
 			case 'w':
 				lexnest(v, backw, ENDOF(backw));
 				break;
 			case 'W':
 				lexnest(v, backW, ENDOF(backW));
 				break;
 		default:
 			assert(NOTREACHED);
 			FAILW(REG_ASSERT);
@@ -709,12 +651,13 @@ next(struct vars * v)
 }
 /*
- * lexescape - parse an ARE backslash escape (backslash already eaten)
+ - lexescape - parse an ARE backslash escape (backslash already eaten)
 * Note slightly nonstandard use of the CCLASS type code.
 ^ static int lexescape(struct vars *);
 */
-static int						/* not actually used, but convenient for
+static int			/* not actually used, but convenient for RETV */
-								 * RETV */
+lexescape(v)
-lexescape(struct vars * v)
+struct vars *v;
 {
 	chr c;
 	static chr alert[] = {
@@ -725,7 +668,7 @@ lexescape(struct vars * v)
 	};
 	chr *save;
-	assert(v->cflags & REG_ADVF);
+	assert(v->cflags&REG_ADVF);
 	assert(!ATEOS());
 	c = *v->now++;
@@ -733,8 +676,7 @@ lexescape(struct vars * v)
 		RETV(PLAIN, c);
 	NOTE(REG_UNONPOSIX);
-	switch (c)
+	switch (c) {
 	{
 	case CHR('a'):
 		RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007')));
 		break;
@@ -751,7 +693,7 @@ lexescape(struct vars * v)
 		NOTE(REG_UUNPORT);
 		if (ATEOS())
 			FAILW(REG_EESCAPE);
-			RETV(PLAIN, (chr) (*v->now++ & 037));
+		RETV(PLAIN, (chr)(*v->now++ & 037));
 		break;
 	case CHR('d'):
 		NOTE(REG_ULOCALE);
@@ -816,8 +758,7 @@ lexescape(struct vars * v)
 		break;
 	case CHR('x'):
 		NOTE(REG_UUNPORT);
-			c = lexdigits(v, 16, 1, 255);		/* REs >255 long outside
+		c = lexdigits(v, 16, 1, 255);	/* REs >255 long outside spec */
 												 * spec */
 		if (ISERR())
 			FAILW(REG_EESCAPE);
 		RETV(PLAIN, c);
@@ -833,26 +774,18 @@ lexescape(struct vars * v)
 	case CHR('Z'):
 		RETV(SEND, 0);
 		break;
-		case CHR('1'):
+	case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'):
-		case CHR('2'):
+	case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'):
 		case CHR('3'):
 		case CHR('4'):
 		case CHR('5'):
 		case CHR('6'):
 		case CHR('7'):
 		case CHR('8'):
 	case CHR('9'):
 		save = v->now;
 		v->now--;	/* put first digit back */
-			c = lexdigits(v, 10, 1, 255);		/* REs >255 long outside
+		c = lexdigits(v, 10, 1, 255);	/* REs >255 long outside spec */
 												 * spec */
 		if (ISERR())
 			FAILW(REG_EESCAPE);
 		/* ugly heuristic (first test is "exactly 1 digit?") */
-			if (v->now - save == 0 || (int) c <= v->nsubexp)
+		if (v->now - save == 0 || (int)c <= v->nsubexp) {
 			{
 			NOTE(REG_UBACKREF);
-				RETV(BACKREF, (chr) c);
+			RETV(BACKREF, (chr)c);
 		}
 		/* oops, doesn't look like it's a backref after all... */
 		v->now = save;
@@ -874,119 +807,91 @@ lexescape(struct vars * v)
 }
 /*
- * lexdigits - slurp up digits and return chr value
+ - lexdigits - slurp up digits and return chr value
 ^ static chr lexdigits(struct vars *, int, int, int);
 */
 static chr			/* chr value; errors signalled via ERR */
-lexdigits(struct vars * v,
+lexdigits(v, base, minlen, maxlen)
-		  int base,
+struct vars *v;
-		  int minlen,
+int base;
-		  int maxlen)
+int minlen;
 int maxlen;
 {
 	uchr n;			/* unsigned to avoid overflow misbehavior */
 	int len;
 	chr c;
 	int d;
-	const uchr	ub = (uchr) base;
+	CONST uchr ub = (uchr) base;
 	n = 0;
-	for (len = 0; len < maxlen && !ATEOS(); len++)
+	for (len = 0; len < maxlen && !ATEOS(); len++) {
 	{
 		c = *v->now++;
-		switch (c)
+		switch (c) {
-		{
+		case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'):
-			case CHR('0'):
+		case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'):
-			case CHR('1'):
+		case CHR('8'): case CHR('9'):
 			case CHR('2'):
 			case CHR('3'):
 			case CHR('4'):
 			case CHR('5'):
 			case CHR('6'):
 			case CHR('7'):
 			case CHR('8'):
 			case CHR('9'):
 			d = DIGITVAL(c);
 			break;
-			case CHR('a'):
+		case CHR('a'): case CHR('A'): d = 10; break;
-			case CHR('A'):
+		case CHR('b'): case CHR('B'): d = 11; break;
-				d = 10;
+		case CHR('c'): case CHR('C'): d = 12; break;
-				break;
+		case CHR('d'): case CHR('D'): d = 13; break;
-			case CHR('b'):
+		case CHR('e'): case CHR('E'): d = 14; break;
-			case CHR('B'):
+		case CHR('f'): case CHR('F'): d = 15; break;
 				d = 11;
 				break;
 			case CHR('c'):
 			case CHR('C'):
 				d = 12;
 				break;
 			case CHR('d'):
 			case CHR('D'):
 				d = 13;
 				break;
 			case CHR('e'):
 			case CHR('E'):
 				d = 14;
 				break;
 			case CHR('f'):
 			case CHR('F'):
 				d = 15;
 				break;
 		default:
 			v->now--;	/* oops, not a digit at all */
 			d = -1;
 			break;
 		}
-		if (d >= base)
+		if (d >= base) {	/* not a plausible digit */
 		{						/* not a plausible digit */
 			v->now--;
 			d = -1;
 		}
 		if (d < 0)
 			break;		/* NOTE BREAK OUT */
-		n = n * ub + (uchr) d;
+		n = n*ub + (uchr)d;
 	}
 	if (len < minlen)
 		ERR(REG_EESCAPE);
-	return (chr) n;
+	return (chr)n;
 }
 /*
- * brenext - get next BRE token
+ - brenext - get next BRE token
 *
 * This is much like EREs except for all the stupid backslashes and the
 * context-dependency of some things.
 ^ static int brenext(struct vars *, pchr);
 */
 static int			/* 1 normal, 0 failure */
-brenext(struct vars * v,
+brenext(v, pc)
-		chr pc)
+struct vars *v;
 pchr pc;
 {
-	chr			c = (chr) pc;
+	chr c = (chr)pc;
-	switch (c)
+	switch (c) {
 	{
 	case CHR('*'):
 		if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^'))
 			RETV(PLAIN, c);
 		RET('*');
 		break;
 	case CHR('['):
-			if (HAVE(6) && *(v->now + 0) == CHR('[') &&
+		if (HAVE(6) &&	*(v->now+0) == CHR('[') &&
-				*(v->now + 1) == CHR(':') &&
+				*(v->now+1) == CHR(':') &&
-				(*(v->now + 2) == CHR('<') ||
+				(*(v->now+2) == CHR('<') ||
-				 *(v->now + 2) == CHR('>')) &&
+						*(v->now+2) == CHR('>')) &&
-				*(v->now + 3) == CHR(':') &&
+				*(v->now+3) == CHR(':') &&
-				*(v->now + 4) == CHR(']') &&
+				*(v->now+4) == CHR(']') &&
-				*(v->now + 5) == CHR(']'))
+				*(v->now+5) == CHR(']')) {
-			{
+			c = *(v->now+2);
 				c = *(v->now + 2);
 			v->now += 6;
 			NOTE(REG_UNONPOSIX);
 			RET((c == CHR('<')) ? '<' : '>');
 		}
 		INTOCON(L_BRACK);
-			if (NEXT1('^'))
+		if (NEXT1('^')) {
 			{
 			v->now++;
 			RETV('[', 0);
 		}
@@ -998,20 +903,18 @@ brenext(struct vars * v,
 	case CHR('^'):
 		if (LASTTYPE(EMPTY))
 			RET('^');
-			if (LASTTYPE('('))
+		if (LASTTYPE('(')) {
 			{
 			NOTE(REG_UUNSPEC);
 			RET('^');
 		}
 		RETV(PLAIN, c);
 		break;
 	case CHR('$'):
-			if (v->cflags & REG_EXPANDED)
+		if (v->cflags&REG_EXPANDED)
 			skip(v);
 		if (ATEOS())
 			RET('$');
-			if (NEXT2('\\', ')'))
+		if (NEXT2('\\', ')')) {
 			{
 			NOTE(REG_UUNSPEC);
 			RET('$');
 		}
@@ -1030,8 +933,7 @@ brenext(struct vars * v,
 		FAILW(REG_EESCAPE);
 	c = *v->now++;
-	switch (c)
+	switch (c) {
 	{
 	case CHR('{'):
 		INTOCON(L_BBND);
 		NOTE(REG_UBOUNDS);
@@ -1051,21 +953,14 @@ brenext(struct vars * v,
 		NOTE(REG_UNONPOSIX);
 		RET('>');
 		break;
-		case CHR('1'):
+	case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'):
-		case CHR('2'):
+	case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'):
 		case CHR('3'):
 		case CHR('4'):
 		case CHR('5'):
 		case CHR('6'):
 		case CHR('7'):
 		case CHR('8'):
 	case CHR('9'):
 		NOTE(REG_UBACKREF);
-			RETV(BACKREF, (chr) DIGITVAL(c));
+		RETV(BACKREF, (chr)DIGITVAL(c));
 		break;
 	default:
-			if (iscalnum(c))
+		if (iscalnum(c)) {
 			{
 			NOTE(REG_UBSALNUM);
 			NOTE(REG_UUNSPEC);
 		}
@@ -1077,17 +972,18 @@ brenext(struct vars * v,
 }
 /*
- * skip - skip white space and comments in expanded form
+ - skip - skip white space and comments in expanded form
 ^ static VOID skip(struct vars *);
 */
-static void
+static VOID
-skip(struct vars * v)
+skip(v)
 struct vars *v;
 {
 	chr *start = v->now;
-	assert(v->cflags & REG_EXPANDED);
+	assert(v->cflags&REG_EXPANDED);
-	for (;;)
+	for (;;) {
 	{
 		while (!ATEOS() && iscspace(*v->now))
 			v->now++;
 		if (ATEOS() || *v->now != CHR('#'))
@@ -1103,27 +999,46 @@ skip(struct vars * v)
 }
 /*
- * newline - return the chr for a newline
+ - newline - return the chr for a newline
 *
 * This helps confine use of CHR to this source file.
 ^ static chr newline(NOPARMS);
 */
 static chr
-newline(void)
+newline()
 {
 	return CHR('\n');
 }
 /*
- * chrnamed - return the chr known by a given (chr string) name
+ - ch - return the chr sequence for regc_locale.c's fake collating element ch
- *
+ * This helps confine use of CHR to this source file.  Beware that the caller
 * knows how long the sequence is.
 ^ #ifdef REG_DEBUG
 ^ static chr *ch(NOPARMS);
 ^ #endif
 */
 #ifdef REG_DEBUG
 static chr *
 ch()
 {
 	static chr chstr[] = { CHR('c'), CHR('h'), CHR('\0') };
 	return chstr;
 }
 #endif
 /*
 - chrnamed - return the chr known by a given (chr string) name
 * The code is a bit clumsy, but this routine gets only such specialized
 * use that it hardly matters.
 ^ static chr chrnamed(struct vars *, chr *, chr *, pchr);
 */
 static chr
-chrnamed(struct vars * v,
+chrnamed(v, startp, endp, lastresort)
-		 chr *startp,			/* start of name */
+struct vars *v;
-		 chr *endp,				/* just past end of name */
+chr *startp;			/* start of name */
-		 chr lastresort)		/* what to return if name lookup fails */
+chr *endp;			/* just past end of name */
 pchr lastresort;		/* what to return if name lookup fails */
 {
 	celt c;
 	int errsave;
@@ -1137,10 +1052,10 @@ chrnamed(struct vars * v,
 	v->err = errsave;
 	if (e != 0)
-		return (chr) lastresort;
+		return (chr)lastresort;
 	cv = range(v, c, c, 0);
 	if (cv->nchrs == 0)
-		return (chr) lastresort;
+		return (chr)lastresort;
 	return cv->chrs[0];
 }
--- a/src/regex/regerrs.h
+++ b/src/regex/regerrs.h
@@ -1,75 +1,18 @@
-/*
+{ REG_OKAY,	"REG_OKAY",	"no errors detected" },
- * $Id$
+{ REG_NOMATCH,	"REG_NOMATCH",	"failed to match" },
- */
+{ REG_BADPAT,	"REG_BADPAT",	"invalid regexp (reg version 0.8)" },
-
+{ REG_ECOLLATE,	"REG_ECOLLATE",	"invalid collating element" },
-{
+{ REG_ECTYPE,	"REG_ECTYPE",	"invalid character class" },
-	REG_OKAY, "REG_OKAY", "no errors detected"
+{ REG_EESCAPE,	"REG_EESCAPE",	"invalid escape \\ sequence" },
-},
+{ REG_ESUBREG,	"REG_ESUBREG",	"invalid backreference number" },
-
+{ REG_EBRACK,	"REG_EBRACK",	"brackets [] not balanced" },
-{
+{ REG_EPAREN,	"REG_EPAREN",	"parentheses () not balanced" },
-	REG_NOMATCH, "REG_NOMATCH", "failed to match"
+{ REG_EBRACE,	"REG_EBRACE",	"braces {} not balanced" },
-},
+{ REG_BADBR,	"REG_BADBR",	"invalid repetition count(s)" },
-
+{ REG_ERANGE,	"REG_ERANGE",	"invalid character range" },
-{
+{ REG_ESPACE,	"REG_ESPACE",	"out of memory" },
-	REG_BADPAT, "REG_BADPAT", "invalid regexp (reg version 0.8)"
+{ REG_BADRPT,	"REG_BADRPT",	"quantifier operand invalid" },
-},
+{ REG_ASSERT,	"REG_ASSERT",	"\"can't happen\" -- you found a bug" },
-
+{ REG_INVARG,	"REG_INVARG",	"invalid argument to regex function" },
-{
+{ REG_MIXED,	"REG_MIXED",	"character widths of regex and string differ" },
-	REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element"
+{ REG_BADOPT,	"REG_BADOPT",	"invalid embedded option" },
 },
 {
 	REG_ECTYPE, "REG_ECTYPE", "invalid character class"
 },
 {
 	REG_EESCAPE, "REG_EESCAPE", "invalid escape \\ sequence"
 },
 {
 	REG_ESUBREG, "REG_ESUBREG", "invalid backreference number"
 },
 {
 	REG_EBRACK, "REG_EBRACK", "brackets [] not balanced"
 },
 {
 	REG_EPAREN, "REG_EPAREN", "parentheses () not balanced"
 },
 {
 	REG_EBRACE, "REG_EBRACE", "braces {} not balanced"
 },
 {
 	REG_BADBR, "REG_BADBR", "invalid repetition count(s)"
 },
 {
 	REG_ERANGE, "REG_ERANGE", "invalid character range"
 },
 {
 	REG_ESPACE, "REG_ESPACE", "out of memory"
 },
 {
 	REG_BADRPT, "REG_BADRPT", "quantifier operand invalid"
 },
 {
 	REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug"
 },
 {
 	REG_INVARG, "REG_INVARG", "invalid argument to regex function"
 },
 {
 	REG_MIXED, "REG_MIXED", "character widths of regex and string differ"
 },
 {
 	REG_BADOPT, "REG_BADOPT", "invalid embedded option"
 },
--- a/src/regex/regex.h
+++ b/src/regex/regex.h
@@ -1,74 +1,341 @@
 #ifndef _REGEX_H_
 #define	_REGEX_H_	/* never again */
-/* ========= begin header generated by ./mkh ========= */
+/*
 * regular expressions
 *
 * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
 * 
 * Development of this software was funded, in part, by Cray Research Inc.,
 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
 * Corporation, none of whom are responsible for the results.  The author
 * thanks all of them. 
 * 
 * Redistribution and use in source and binary forms -- with or without
 * modification -- are permitted for any purpose, provided that
 * redistributions in source form retain this entire copyright notice and
 * indicate the origin and nature of any modifications.
 *
 * I'd appreciate being given credit for this package in the documentation
 * of software which uses it, but that is not a requirement.
 * 
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 *
 *
 * Prototypes etc. marked with "^" within comments get gathered up (and
 * possibly edited) by the regfwd program and inserted near the bottom of
 * this file.
 *
 * We offer the option of declaring one wide-character version of the
 * RE functions as well as the char versions.  To do that, define
 * __REG_WIDE_T to the type of wide characters (unfortunately, there
 * is no consensus that wchar_t is suitable) and __REG_WIDE_COMPILE and
 * __REG_WIDE_EXEC to the names to be used for the compile and execute
 * functions (suggestion:  re_Xcomp and re_Xexec, where X is a letter
 * suggestive of the wide type, e.g. re_ucomp and re_uexec for Unicode).
 * For cranky old compilers, it may be necessary to do something like:
 * #define	__REG_WIDE_COMPILE(a,b,c,d)	re_Xcomp(a,b,c,d)
 * #define	__REG_WIDE_EXEC(a,b,c,d,e,f,g)	re_Xexec(a,b,c,d,e,f,g)
 * rather than just #defining the names as parameterless macros.
 *
 * For some specialized purposes, it may be desirable to suppress the
 * declarations of the "front end" functions, regcomp() and regexec(),
 * or of the char versions of the compile and execute functions.  To
 * suppress the front-end functions, define __REG_NOFRONT.  To suppress
 * the char versions, define __REG_NOCHAR.
 *
 * The right place to do those defines (and some others you may want, see
 * below) would be <sys/types.h>.  If you don't have control of that file,
 * the right place to add your own defines to this file is marked below.
 * This is normally done automatically, by the makefile and regmkhdr, based
 * on the contents of regcustom.h.
 */
 /*
 * voodoo for C++
 */
 #ifdef __cplusplus
 extern "C" {
 #endif
-/* === regex2.h === */
+
-typedef off_t regoff_t;
+
 /*
 * Add your own defines, if needed, here.
 */
 /*
 * Location where a chunk of regcustom.h is automatically spliced into
 * this file (working from its prototype, regproto.h).
 */
 /* --- begin --- */
 /* ensure certain things don't sneak in from system headers */
 #ifdef __REG_WIDE_T
 #undef __REG_WIDE_T
 #endif
 #ifdef __REG_WIDE_COMPILE
 #undef __REG_WIDE_COMPILE
 #endif
 #ifdef __REG_WIDE_EXEC
 #undef __REG_WIDE_EXEC
 #endif
 #ifdef __REG_REGOFF_T
 #undef __REG_REGOFF_T
 #endif
 #ifdef __REG_VOID_T
 #undef __REG_VOID_T
 #endif
 #ifdef __REG_CONST
 #undef __REG_CONST
 #endif
 #ifdef __REG_NOFRONT
 #undef __REG_NOFRONT
 #endif
 #ifdef __REG_NOCHAR
 #undef __REG_NOCHAR
 #endif
 /* interface types */
 #define	__REG_WIDE_T	Tcl_UniChar
 #define	__REG_REGOFF_T	long	/* not really right, but good enough... */
 #define	__REG_VOID_T	VOID
 #define	__REG_CONST	CONST
 /* names and declarations */
 #define	__REG_WIDE_COMPILE	TclReComp
 #define	__REG_WIDE_EXEC		TclReExec
 #define	__REG_NOFRONT		/* don't want regcomp() and regexec() */
 #define	__REG_NOCHAR		/* or the char versions */
 #define	regfree		TclReFree
 #define	regerror	TclReError
 /* --- end --- */
 /*
 * interface types etc.
 */
 /*
 * regoff_t has to be large enough to hold either off_t or ssize_t,
 * and must be signed; it's only a guess that long is suitable, so we
 * offer <sys/types.h> an override.
 */
 #ifdef __REG_REGOFF_T
 typedef __REG_REGOFF_T regoff_t;
 #else
 typedef long regoff_t;
 #endif
 /*
 * For benefit of old compilers, we offer <sys/types.h> the option of
 * overriding the `void' type used to declare nonexistent return types.
 */
 #ifdef __REG_VOID_T
 typedef __REG_VOID_T re_void;
 #else
 typedef void re_void;
 #endif
 /*
 * Also for benefit of old compilers, <sys/types.h> can supply a macro
 * which expands to a substitute for `const'.
 */
 #ifndef __REG_CONST
 #define	__REG_CONST	const
 #endif
 /*
 * other interface types
 */
 /* the biggie, a compiled RE (or rather, a front end to same) */
 typedef struct {
-	int re_magic;
+	int re_magic;		/* magic number */
-	size_t re_nsub;		/* number of parenthesized subexpressions */
+	size_t re_nsub;		/* number of subexpressions */
-	const char *re_endp;	/* end pointer for REG_PEND */
+	long re_info;		/* information about RE */
-	struct re_guts *re_g;	/* none of your business :-) */
+#		define	REG_UBACKREF		000001
 #		define	REG_ULOOKAHEAD		000002
 #		define	REG_UBOUNDS		000004
 #		define	REG_UBRACES		000010
 #		define	REG_UBSALNUM		000020
 #		define	REG_UPBOTCH		000040
 #		define	REG_UBBS		000100
 #		define	REG_UNONPOSIX		000200
 #		define	REG_UUNSPEC		000400
 #		define	REG_UUNPORT		001000
 #		define	REG_ULOCALE		002000
 #		define	REG_UEMPTYMATCH		004000
 #		define	REG_UIMPOSSIBLE		010000
 #		define	REG_USHORTEST		020000
 	int re_csize;		/* sizeof(character) */
 	char *re_endp;		/* backward compatibility kludge */
 	/* the rest is opaque pointers to hidden innards */
 	char *re_guts;		/* `char *' is more portable than `void *' */
 	char *re_fns;
 } regex_t;
 /* result reporting (may acquire more fields later) */
 typedef struct {
-	regoff_t rm_so;		/* start of match */
+	regoff_t rm_so;		/* start of substring */
-	regoff_t rm_eo;		/* end of match */
+	regoff_t rm_eo;		/* end of substring */
 } regmatch_t;
-
+/* supplementary control and reporting */
-/* === regcomp.c === */
+typedef struct {
-extern int regcomp(regex_t *, const char *, int);
+	regmatch_t rm_extend;	/* see REG_EXPECT */
-#define	REG_BASIC	0000
+} rm_detail_t;
 #define	REG_EXTENDED	0001
 #define	REG_ICASE	0002
 #define	REG_NOSUB	0004
 #define	REG_NEWLINE	0010
 #define	REG_NOSPEC	0020
 #define	REG_PEND	0040
 #define	REG_DUMP	0200
-/* === regerror.c === */
+
-#define	REG_OKAY	 0
+/*
-#define	REG_NOMATCH	 1
+ * compilation
-#define	REG_BADPAT	 2
+ ^ #ifndef __REG_NOCHAR
-#define	REG_ECOLLATE	 3
+ ^ int re_comp(regex_t *, __REG_CONST char *, size_t, int);
-#define	REG_ECTYPE	 4
+ ^ #endif
-#define	REG_EESCAPE	 5
+ ^ #ifndef __REG_NOFRONT
-#define	REG_ESUBREG	 6
+ ^ int regcomp(regex_t *, __REG_CONST char *, int);
-#define	REG_EBRACK	 7
+ ^ #endif
-#define	REG_EPAREN	 8
+ ^ #ifdef __REG_WIDE_T
-#define	REG_EBRACE	 9
+ ^ int __REG_WIDE_COMPILE(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int);
-#define	REG_BADBR	10
+ ^ #endif
-#define	REG_ERANGE	11
+ */
-#define	REG_ESPACE	12
+#define	REG_BASIC	000000	/* BREs (convenience) */
-#define	REG_BADRPT	13
+#define	REG_EXTENDED	000001	/* EREs */
-#define	REG_EMPTY	14
+#define	REG_ADVF	000002	/* advanced features in EREs */
-#define	REG_ASSERT	15
+#define	REG_ADVANCED	000003	/* AREs (which are also EREs) */
-#define	REG_INVARG	16
+#define	REG_QUOTE	000004	/* no special characters, none */
-#define	REG_ATOI	255	/* convert name to number (!) */
+#define	REG_NOSPEC	REG_QUOTE	/* historical synonym */
-#define	REG_ITOA	0400	/* convert number to name (!) */
+#define	REG_ICASE	000010	/* ignore case */
-extern size_t regerror(int, const regex_t *, char *, size_t);
+#define	REG_NOSUB	000020	/* don't care about subexpressions */
 #define	REG_EXPANDED	000040	/* expanded format, white space & comments */
 #define	REG_NLSTOP	000100	/* \n doesn't match . or [^ ] */
 #define	REG_NLANCH	000200	/* ^ matches after \n, $ before */
 #define	REG_NEWLINE	000300	/* newlines are line terminators */
 #define	REG_PEND	000400	/* ugh -- backward-compatibility hack */
 #define	REG_EXPECT	001000	/* report details on partial/limited matches */
 #define	REG_BOSONLY	002000	/* temporary kludge for BOS-only matches */
 #define	REG_DUMP	004000	/* none of your business :-) */
 #define	REG_FAKE	010000	/* none of your business :-) */
 #define	REG_PROGRESS	020000	/* none of your business :-) */
-/* === regexec.c === */
+
-extern int regexec(const regex_t *, const char *, size_t, regmatch_t [], int);
+/*
-#define	REG_NOTBOL	00001
+ * execution
-#define	REG_NOTEOL	00002
+ ^ #ifndef __REG_NOCHAR
-#define	REG_STARTEND	00004
+ ^ int re_exec(regex_t *, __REG_CONST char *, size_t,
-#define	REG_TRACE	00400	/* tracing of execution */
+ ^				rm_detail_t *, size_t, regmatch_t [], int);
-#define	REG_LARGE	01000	/* force large representation */
+ ^ #endif
-#define	REG_BACKR	02000	/* force use of backref code */
+ ^ #ifndef __REG_NOFRONT
 ^ int regexec(regex_t *, __REG_CONST char *, size_t, regmatch_t [], int);
 ^ #endif
 ^ #ifdef __REG_WIDE_T
 ^ int __REG_WIDE_EXEC(regex_t *, __REG_CONST __REG_WIDE_T *, size_t,
 ^				rm_detail_t *, size_t, regmatch_t [], int);
 ^ #endif
 */
 #define	REG_NOTBOL	0001	/* BOS is not BOL */
 #define	REG_NOTEOL	0002	/* EOS is not EOL */
 #define	REG_STARTEND	0004	/* backward compatibility kludge */
 #define	REG_FTRACE	0010	/* none of your business */
 #define	REG_MTRACE	0020	/* none of your business */
 #define	REG_SMALL	0040	/* none of your business */
 /* === regfree.c === */
 extern void regfree(regex_t *);
 /*
 * misc generics (may be more functions here eventually)
 ^ re_void regfree(regex_t *);
 */
 /*
 * error reporting
 * Be careful if modifying the list of error codes -- the table used by
 * regerror() is generated automatically from this file!
 *
 * Note that there is no wide-char variant of regerror at this time; what
 * kind of character is used for error reports is independent of what kind
 * is used in matching.
 *
 ^ extern size_t regerror(int, __REG_CONST regex_t *, char *, size_t);
 */
 #define	REG_OKAY	 0	/* no errors detected */
 #define	REG_NOMATCH	 1	/* failed to match */
 #define	REG_BADPAT	 2	/* invalid regexp */
 #define	REG_ECOLLATE	 3	/* invalid collating element */
 #define	REG_ECTYPE	 4	/* invalid character class */
 #define	REG_EESCAPE	 5	/* invalid escape \ sequence */
 #define	REG_ESUBREG	 6	/* invalid backreference number */
 #define	REG_EBRACK	 7	/* brackets [] not balanced */
 #define	REG_EPAREN	 8	/* parentheses () not balanced */
 #define	REG_EBRACE	 9	/* braces {} not balanced */
 #define	REG_BADBR	10	/* invalid repetition count(s) */
 #define	REG_ERANGE	11	/* invalid character range */
 #define	REG_ESPACE	12	/* out of memory */
 #define	REG_BADRPT	13	/* quantifier operand invalid */
 #define	REG_ASSERT	15	/* "can't happen" -- you found a bug */
 #define	REG_INVARG	16	/* invalid argument to regex function */
 #define	REG_MIXED	17	/* character widths of regex and string differ */
 #define	REG_BADOPT	18	/* invalid embedded option */
 /* two specials for debugging and testing */
 #define	REG_ATOI	101	/* convert error-code name to number */
 #define	REG_ITOA	102	/* convert error-code number to name */
 /*
 * the prototypes, as possibly munched by regfwd
 */
 /* =====^!^===== begin forwards =====^!^===== */
 /* automatically gathered by fwd; do not hand-edit */
 /* === regproto.h === */
 #ifndef __REG_NOCHAR
 int re_comp _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, int));
 #endif
 #ifndef __REG_NOFRONT
 int regcomp _ANSI_ARGS_((regex_t *, __REG_CONST char *, int));
 #endif
 #ifdef __REG_WIDE_T
 int __REG_WIDE_COMPILE _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int));
 #endif
 #ifndef __REG_NOCHAR
 int re_exec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, rm_detail_t *, size_t, regmatch_t [], int));
 #endif
 #ifndef __REG_NOFRONT
 int regexec _ANSI_ARGS_((regex_t *, __REG_CONST char *, size_t, regmatch_t [], int));
 #endif
 #ifdef __REG_WIDE_T
 int __REG_WIDE_EXEC _ANSI_ARGS_((regex_t *, __REG_CONST __REG_WIDE_T *, size_t, rm_detail_t *, size_t, regmatch_t [], int));
 #endif
 re_void regfree _ANSI_ARGS_((regex_t *));
 extern size_t regerror _ANSI_ARGS_((int, __REG_CONST regex_t *, char *, size_t));
 /* automatically gathered by fwd; do not hand-edit */
 /* =====^!^===== end forwards =====^!^===== */
 /*
 * more C++ voodoo
 */
 #ifdef __cplusplus
 }
 #endif
-/* ========= end header generated by ./mkh ========= */
+
 #endif
--- a/src/regex/regguts.h
+++ b/src/regex/regguts.h
@@ -26,8 +26,6 @@
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * $Id$
 */
@@ -45,27 +43,52 @@
 * Things that regcustom.h might override.
 */
 /* standard header files (NULL is a reasonable indicator for them) */
 #ifndef NULL
 #include <stdio.h>
 #include <stdlib.h>
 #include <ctype.h>
 #include <limits.h>
 #include <string.h>
 #endif
 /* assertions */
 #ifndef assert
-#ifndef REG_DEBUG
+#	ifndef REG_DEBUG
 #	ifndef NDEBUG
 #	define	NDEBUG		/* no assertions */
 #	endif
 #endif
 #include <assert.h>
 #endif
 /* voids */
 #ifndef VOID
 #define	VOID	void			/* for function return values */
 #endif
 #ifndef DISCARD
-#define DISCARD void			/* for throwing values away */
+#define	DISCARD	VOID			/* for throwing values away */
 #endif
 #ifndef PVOID
 #define	PVOID	VOID *			/* generic pointer */
 #endif
 #ifndef VS
-#define VS(x)	((void *)(x))	/* cast something to generic ptr */
+#define	VS(x)	((PVOID)(x))		/* cast something to generic ptr */
 #endif
 #ifndef NOPARMS
 #define	NOPARMS	VOID			/* for empty parm lists */
 #endif
 /* const */
 #ifndef CONST
 #define	CONST	const			/* for old compilers, might be empty */
 #endif
 /* function-pointer declarator */
 #ifndef FUNCPTR
-#define FUNCPTR(name, args) (*name) args
+#if __STDC__ >= 1
 #define	FUNCPTR(name, args)	(*name)args
 #else
 #define	FUNCPTR(name, args)	(*name)()
 #endif
 #endif
 /* memory allocation */
@@ -136,8 +159,7 @@
 #ifndef BYTBITS
 #define	BYTBITS	8		/* bits in a byt */
 #endif
-#define BYTTAB	(1<<BYTBITS)	/* size of table with one entry per byt
+#define	BYTTAB	(1<<BYTBITS)	/* size of table with one entry per byt value */
 								 * value */
 #define	BYTMASK	(BYTTAB-1)	/* bit mask for byt */
 #define	NBYTS	((CHRBITS+BYTBITS-1)/BYTBITS)
 /* the definition of GETCOLOR(), below, assumes NBYTS <= 4 */
@@ -150,7 +172,6 @@
 */
 typedef short color;		/* colors of characters */
 typedef int pcolor;		/* what color promotes to */
 #define	COLORLESS	(-1)	/* impossible color */
 #define	WHITE		0	/* default color, parent of all others */
@@ -167,49 +188,43 @@ typedef int pcolor;				/* what color promotes to */
 */
 /* the tree itself */
-struct colors
+struct colors {
 {
 	color ccolor[BYTTAB];
 };
-struct ptrs
+struct ptrs {
 {
 	union tree *pptr[BYTTAB];
 };
-union tree
+union tree {
 {
 	struct colors colors;
 	struct ptrs ptrs;
 };
 #define	tcolor	colors.ccolor
 #define	tptr	ptrs.pptr
 /* internal per-color structure for the color machinery */
-struct colordesc
+struct colordesc {
 {
 	uchr nchrs;		/* number of chars of this color */
 	color sub;		/* open subcolor (if any); free chain ptr */
-#define  NOSUB	 COLORLESS
+#		define	NOSUB	COLORLESS
 	struct arc *arcs;	/* color chain */
 	int flags;
-#define  FREECOL 01				/* currently free */
+#		define	FREECOL	01	/* currently free */
-#define  PSEUDO  02				/* pseudocolor, no real chars */
+#		define	PSEUDO	02	/* pseudocolor, no real chars */
-#define  UNUSEDCOLOR(cd) ((cd)->flags&FREECOL)
+#	define	UNUSEDCOLOR(cd)	((cd)->flags&FREECOL)
 	union tree *block;	/* block of solid color, if any */
 };
 /* the color map itself */
-struct colormap
+struct colormap {
 {
 	int magic;
-#define  CMMAGIC 0x876
+#		define	CMMAGIC	0x876
 	struct vars *v;			/* for compile error reporting */
 	size_t ncds;			/* number of colordescs */
 	size_t max;			/* highest in use */
 	color free;			/* beginning of free chain (if non-0) */
 	struct colordesc *cd;
-#define  CDEND(cm)	 (&(cm)->cd[(cm)->max + 1])
+#	define	CDEND(cm)	(&(cm)->cd[(cm)->max + 1])
-#define  NINLINECDS  ((size_t)10)
+#		define	NINLINECDS	((size_t)10)
 	struct colordesc cdspace[NINLINECDS];
 	union tree tree[NBYTS];		/* tree top, plus fill blocks */
 };
@@ -236,8 +251,7 @@ struct colormap
 * Interface definitions for locale-interface functions in locale.c.
 * Multi-character collating elements (MCCEs) cause most of the trouble.
 */
-struct cvec
+struct cvec {
 {
 	int nchrs;		/* number of chrs */
 	int chrspace;		/* number of chrs possible */
 	chr *chrs;		/* pointer to vector of chrs */
@@ -264,30 +278,27 @@ struct cvec
 */
 struct state;
-struct arc
+struct arc {
 {
 	int type;
-#define  ARCFREE '\0'
+#		define	ARCFREE	'\0'
 	color co;
 	struct state *from;	/* where it's from (and contained within) */
 	struct state *to;	/* where it's to */
 	struct arc *outchain;	/* *from's outs chain or free chain */
-#define  freechain	 outchain
+#	define	freechain	outchain
 	struct arc *inchain;	/* *to's ins chain */
 	struct arc *colorchain;	/* color's arc chain */
 };
-struct arcbatch
+struct arcbatch {		/* for bulk allocation of arcs */
 {								/* for bulk allocation of arcs */
 	struct arcbatch *next;
-#define  ABSIZE  10
+#	define	ABSIZE	10
 	struct arc a[ABSIZE];
 };
-struct state
+struct state {
 {
 	int no;
-#define  FREESTATE	 (-1)
+#		define	FREESTATE	(-1)
 	char flag;		/* marks special states */
 	int nins;		/* number of inarcs */
 	struct arc *ins;	/* chain of inarcs */
@@ -297,13 +308,11 @@ struct state
 	struct state *tmp;	/* temporary for traversal algorithms */
 	struct state *next;	/* chain for traversing all */
 	struct state *prev;	/* back chain */
-	struct arcbatch oas;		/* first arcbatch, avoid malloc in easy
+	struct arcbatch oas;	/* first arcbatch, avoid malloc in easy case */
 								 * case */
 	int noas;		/* number of arcs used in first arcbatch */
 };
-struct nfa
+struct nfa {
 {
 	struct state *pre;	/* pre-initial state */
 	struct state *init;	/* initial state */
 	struct state *final;	/* final state */
@@ -324,18 +333,16 @@ struct nfa
 /*
 * definitions for compacted NFA
 */
-struct carc
+struct carc {
 {
 	color co;		/* COLORLESS is list terminator */
 	int to;			/* state number */
 };
-struct cnfa
+struct cnfa {
 {
 	int nstates;		/* number of states */
 	int ncolors;		/* number of colors */
 	int flags;
-#define  HASLACONS	 01			/* uses lookahead constraints */
+#		define	HASLACONS	01	/* uses lookahead constraints */
 	int pre;		/* setup state number */
 	int post;		/* teardown state number */
 	color bos[2];		/* colors, if any, assigned to BOS and BOL */
@@ -343,7 +350,6 @@ struct cnfa
 	struct carc **states;	/* vector of pointers to outarc lists */
 	struct carc *arcs;	/* the area for the lists */
 };
 #define	ZAPCNFA(cnfa)	((cnfa).nstates = 0)
 #define	NULLCNFA(cnfa)	((cnfa).nstates == 0)
@@ -352,31 +358,28 @@ struct cnfa
 /*
 * subexpression tree
 */
-struct subre
+struct subre {
-{
+	char op;		/* '|', '.' (concat), 'b' (backref), '(', '=' */
 	char		op;				/* '|', '.' (concat), 'b' (backref), '(',
 								 * '=' */
 	char flags;
-#define  LONGER  01				/* prefers longer match */
+#		define	LONGER	01	/* prefers longer match */
-#define  SHORTER 02				/* prefers shorter match */
+#		define	SHORTER	02	/* prefers shorter match */
-#define  MIXED	 04				/* mixed preference below */
+#		define	MIXED	04	/* mixed preference below */
-#define  CAP 010				/* capturing parens below */
+#		define	CAP	010	/* capturing parens below */
-#define  BACKR	 020			/* back reference below */
+#		define	BACKR	020	/* back reference below */
-#define  INUSE	 0100			/* in use in final tree */
+#		define	INUSE	0100	/* in use in final tree */
-#define  LOCAL	 03				/* bits which may not propagate up */
+#		define	LOCAL	03	/* bits which may not propagate up */
-#define  LMIX(f) ((f)<<2)		/* LONGER -> MIXED */
+#		define	LMIX(f)	((f)<<2)	/* LONGER -> MIXED */
-#define  SMIX(f) ((f)<<1)		/* SHORTER -> MIXED */
+#		define	SMIX(f)	((f)<<1)	/* SHORTER -> MIXED */
-#define  UP(f)	 (((f)&~LOCAL) | (LMIX(f) & SMIX(f) & MIXED))
+#		define	UP(f)	(((f)&~LOCAL) | (LMIX(f) & SMIX(f) & MIXED))
-#define  MESSY(f)	 ((f)&(MIXED|CAP|BACKR))
+#		define	MESSY(f)	((f)&(MIXED|CAP|BACKR))
-#define  PREF(f) ((f)&LOCAL)
+#		define	PREF(f)	((f)&LOCAL)
-#define  PREF2(f1, f2)	 ((PREF(f1) != 0) ? PREF(f1) : PREF(f2))
+#		define	PREF2(f1, f2)	((PREF(f1) != 0) ? PREF(f1) : PREF(f2))
-#define  COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2))
+#		define	COMBINE(f1, f2)	(UP((f1)|(f2)) | PREF2(f1, f2))
 	short retry;		/* index into retry memory */
 	int subno;		/* subexpression number (for 'b' and '(') */
 	short min;		/* min repetitions, for backref only */
 	short max;		/* max repetitions, for backref only */
-	struct subre *left;			/* left child, if any (also freelist
+	struct subre *left;	/* left child, if any (also freelist chain) */
 								 * chain) */
 	struct subre *right;	/* right child, if any */
 	struct state *begin;	/* outarcs from here... */
 	struct state *end;	/* ...ending in inarcs here */
@@ -390,9 +393,8 @@ struct subre
 * table of function pointers for generic manipulation functions
 * A regex_t's re_fns points to one of these.
 */
-struct fns
+struct fns {
-{
+	VOID FUNCPTR(free, (regex_t *));
 	void		FUNCPTR(free, (regex_t *));
 };
@@ -400,10 +402,9 @@ struct fns
 /*
 * the insides of a regex_t, hidden behind a void *
 */
-struct guts
+struct guts {
 {
 	int magic;
-#define  GUTSMAGIC	 0xfed9
+#		define	GUTSMAGIC	0xfed9
 	int cflags;		/* copy of compile flags */
 	long info;		/* copy of re_info */
 	size_t nsub;		/* copy of re_nsub */
@@ -411,7 +412,7 @@ struct guts
 	struct cnfa search;	/* for fast preliminary search */
 	int ntree;
 	struct colormap cmap;
-	int			FUNCPTR(compare, (const chr *, const chr *, size_t));
+	int FUNCPTR(compare, (CONST chr *, CONST chr *, size_t));
 	struct subre *lacons;	/* lookahead-constraint vector */
 	int nlacons;		/* size of lacons */
 };