Tcl regex lib
git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@16311 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
@@ -1,20 +1,166 @@
|
|||||||
Copyright 1992, 1993, 1994, 1997 Henry Spencer. All rights reserved.
|
This regular expression package was originally developed by Henry Spencer.
|
||||||
This software is not subject to any license of the American Telephone
|
It bears the following copyright notice:
|
||||||
and Telegraph Company or of the Regents of the University of California.
|
|
||||||
|
|
||||||
Permission is granted to anyone to use this software for any purpose on
|
**********************************************************************
|
||||||
any computer system, and to alter it and redistribute it, subject
|
|
||||||
to the following restrictions:
|
|
||||||
|
|
||||||
1. The author is not responsible for the consequences of use of this
|
Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
|
||||||
software, no matter how awful, even if they arise from flaws in it.
|
|
||||||
|
|
||||||
2. The origin of this software must not be misrepresented, either by
|
Development of this software was funded, in part, by Cray Research Inc.,
|
||||||
explicit claim or by omission. Since few users ever read sources,
|
UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
|
||||||
credits must appear in the documentation.
|
Corporation, none of whom are responsible for the results. The author
|
||||||
|
thanks all of them.
|
||||||
|
|
||||||
3. Altered versions must be plainly marked as such, and must not be
|
Redistribution and use in source and binary forms -- with or without
|
||||||
misrepresented as being the original software. Since few users
|
modification -- are permitted for any purpose, provided that
|
||||||
ever read sources, credits must appear in the documentation.
|
redistributions in source form retain this entire copyright notice and
|
||||||
|
indicate the origin and nature of any modifications.
|
||||||
|
|
||||||
|
I'd appreciate being given credit for this package in the documentation
|
||||||
|
of software which uses it, but that is not a requirement.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
||||||
|
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||||
|
AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||||
|
HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||||
|
OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||||
|
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||||
|
OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||||
|
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
**********************************************************************
|
||||||
|
|
||||||
|
PostgreSQL adopted the code out of Tcl 8.4.1. Portions of regc_locale.c
|
||||||
|
and re_syntax.n were developed by Tcl developers other than Henry; these
|
||||||
|
files bear the Tcl copyright and license notice:
|
||||||
|
|
||||||
|
**********************************************************************
|
||||||
|
|
||||||
|
This software is copyrighted by the Regents of the University of
|
||||||
|
California, Sun Microsystems, Inc., Scriptics Corporation, ActiveState
|
||||||
|
Corporation and other parties. The following terms apply to all files
|
||||||
|
associated with the software unless explicitly disclaimed in
|
||||||
|
individual files.
|
||||||
|
|
||||||
|
The authors hereby grant permission to use, copy, modify, distribute,
|
||||||
|
and license this software and its documentation for any purpose, provided
|
||||||
|
that existing copyright notices are retained in all copies and that this
|
||||||
|
notice is included verbatim in any distributions. No written agreement,
|
||||||
|
license, or royalty fee is required for any of the authorized uses.
|
||||||
|
Modifications to this software may be copyrighted by their authors
|
||||||
|
and need not follow the licensing terms described here, provided that
|
||||||
|
the new terms are clearly indicated on the first page of each file where
|
||||||
|
they apply.
|
||||||
|
|
||||||
|
IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
|
||||||
|
FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
|
||||||
|
ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
|
||||||
|
DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
|
||||||
|
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE
|
||||||
|
IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
|
||||||
|
NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
|
||||||
|
MODIFICATIONS.
|
||||||
|
|
||||||
|
GOVERNMENT USE: If you are acquiring this software on behalf of the
|
||||||
|
U.S. government, the Government shall have only "Restricted Rights"
|
||||||
|
in the software and related documentation as defined in the Federal
|
||||||
|
Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you
|
||||||
|
are acquiring the software on behalf of the Department of Defense, the
|
||||||
|
software shall be classified as "Commercial Computer Software" and the
|
||||||
|
Government shall have only "Restricted Rights" as defined in Clause
|
||||||
|
252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the
|
||||||
|
authors grant the U.S. Government and others acting in its behalf
|
||||||
|
permission to use and distribute the software in accordance with the
|
||||||
|
terms specified in this license.
|
||||||
|
|
||||||
|
**********************************************************************
|
||||||
|
|
||||||
|
Subsequent modifications to the code by the PostgreSQL project follow
|
||||||
|
the same license terms as the rest of PostgreSQL.
|
||||||
|
(License follows)
|
||||||
|
****************************************************************************
|
||||||
|
PostgreSQL Database Management System
|
||||||
|
(formerly known as Postgres, then as Postgres95)
|
||||||
|
|
||||||
|
Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
|
||||||
|
|
||||||
|
Portions Copyright (c) 1994, The Regents of the University of California
|
||||||
|
|
||||||
|
Permission to use, copy, modify, and distribute this software and its
|
||||||
|
documentation for any purpose, without fee, and without a written agreement
|
||||||
|
is hereby granted, provided that the above copyright notice and this
|
||||||
|
paragraph and the following two paragraphs appear in all copies.
|
||||||
|
|
||||||
|
IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
|
||||||
|
DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
|
||||||
|
LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
|
||||||
|
DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE
|
||||||
|
POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
|
||||||
|
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||||
|
AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
|
||||||
|
ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO
|
||||||
|
PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
|
||||||
|
****************************************************************************
|
||||||
|
And if that's not enough, changes made from wxWindows are put under the
|
||||||
|
wxWindows license:
|
||||||
|
****************************************************************************
|
||||||
|
wxWindows Library Licence, Version 3
|
||||||
|
====================================
|
||||||
|
|
||||||
|
Copyright (C) 1998 Julian Smart, Robert Roebling [, ...]
|
||||||
|
|
||||||
|
Everyone is permitted to copy and distribute verbatim copies
|
||||||
|
of this licence document, but changing it is not allowed.
|
||||||
|
|
||||||
|
WXWINDOWS LIBRARY LICENCE
|
||||||
|
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||||
|
|
||||||
|
This library is free software; you can redistribute it and/or modify it
|
||||||
|
under the terms of the GNU Library General Public Licence as published by
|
||||||
|
the Free Software Foundation; either version 2 of the Licence, or (at
|
||||||
|
your option) any later version.
|
||||||
|
|
||||||
|
This library is distributed in the hope that it will be useful, but
|
||||||
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library
|
||||||
|
General Public Licence for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Library General Public Licence
|
||||||
|
along with this software, usually in a file named COPYING.LIB. If not,
|
||||||
|
write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
|
||||||
|
Boston, MA 02111-1307 USA.
|
||||||
|
|
||||||
|
EXCEPTION NOTICE
|
||||||
|
|
||||||
|
1. As a special exception, the copyright holders of this library give
|
||||||
|
permission for additional uses of the text contained in this release of
|
||||||
|
the library as licenced under the wxWindows Library Licence, applying
|
||||||
|
either version 3 of the Licence, or (at your option) any later version of
|
||||||
|
the Licence as published by the copyright holders of version 3 of the
|
||||||
|
Licence document.
|
||||||
|
|
||||||
|
2. The exception is that you may use, copy, link, modify and distribute
|
||||||
|
under the user's own terms, binary object code versions of works based
|
||||||
|
on the Library.
|
||||||
|
|
||||||
|
3. If you copy code from files distributed under the terms of the GNU
|
||||||
|
General Public Licence or the GNU Library General Public Licence into a
|
||||||
|
copy of this library, as this licence permits, the exception does not
|
||||||
|
apply to the code that you add in this way. To avoid misleading anyone as
|
||||||
|
to the status of such modified files, you must delete this exception
|
||||||
|
notice from such code and/or adjust the licensing conditions notice
|
||||||
|
accordingly.
|
||||||
|
|
||||||
|
4. If you write modifications of your own for this library, it is your
|
||||||
|
choice whether to permit this exception to apply to your modifications.
|
||||||
|
If you do not wish that, you must delete the exception notice from such
|
||||||
|
code and/or adjust the licensing conditions notice accordingly.
|
||||||
|
****************************************************************************
|
||||||
|
|
||||||
4. This notice may not be removed or altered.
|
|
||||||
|
@@ -1,130 +1,28 @@
|
|||||||
# You probably want to take -DREDEBUG out of CFLAGS, and put something like
|
#-------------------------------------------------------------------------
|
||||||
# -O in, *after* testing (-DREDEBUG strengthens testing by enabling a lot of
|
#
|
||||||
# internal assertion checking and some debugging facilities).
|
# Makefile--
|
||||||
# Put -Dconst= in for a pre-ANSI compiler.
|
# Makefile for backend/regex
|
||||||
# Do not take -DPOSIX_MISTAKE out.
|
#
|
||||||
# REGCFLAGS isn't important to you (it's for my use in some special contexts).
|
# IDENTIFICATION
|
||||||
CFLAGS=-I. -DPOSIX_MISTAKE -DREDEBUG $(REGCFLAGS)
|
# $Header: /projects/cvsroot/pgsql-server/src/backend/regex/Makefile,v 1.20 2003/02/05 17:41:32 tgl Exp $
|
||||||
|
#
|
||||||
|
#-------------------------------------------------------------------------
|
||||||
|
|
||||||
# If you have a pre-ANSI compiler, put -o into MKHFLAGS. If you want
|
subdir = src/backend/regex
|
||||||
# the Berkeley __P macro, put -b in.
|
top_builddir = ../../..
|
||||||
MKHFLAGS=
|
include $(top_builddir)/src/Makefile.global
|
||||||
|
|
||||||
# Flags for linking but not compiling, if any.
|
OBJS = regcomp.o regerror.o regexec.o regfree.o
|
||||||
LDFLAGS=
|
|
||||||
|
|
||||||
# Extra libraries for linking, if any.
|
all: SUBSYS.o
|
||||||
LIBS=
|
|
||||||
|
|
||||||
# Internal stuff, should not need changing.
|
SUBSYS.o: $(OBJS)
|
||||||
OBJPRODN=regcomp.o regexec.o regerror.o regfree.o
|
$(LD) $(LDREL) $(LDOUT) SUBSYS.o $(OBJS)
|
||||||
OBJS=$(OBJPRODN) split.o debug.o re_main.o
|
|
||||||
H=cclass.h cname.h regex2.h utils.h
|
|
||||||
REGSRC=regcomp.c regerror.c regexec.c regfree.c
|
|
||||||
ALLSRC=$(REGSRC) engine.c debug.c re_main.c split.c
|
|
||||||
|
|
||||||
# Stuff that matters only if you're trying to lint the package.
|
# mark inclusion dependencies between .c files explicitly
|
||||||
LINTFLAGS=-I. -Dstatic= -Dconst= -DREDEBUG
|
regcomp.o: regcomp.c regc_lex.c regc_color.c regc_nfa.c regc_cvec.c regc_locale.c
|
||||||
LINTC=regcomp.c regexec.c regerror.c regfree.c debug.c re_main.c
|
|
||||||
JUNKLINT=possible pointer alignment|null effect
|
|
||||||
|
|
||||||
# arrangements to build forward-reference header files
|
regexec.o: regexec.c rege_dfa.c
|
||||||
.SUFFIXES: .ih .h
|
|
||||||
.c.ih:
|
|
||||||
sh ./mkh $(MKHFLAGS) -p $< >$@
|
|
||||||
|
|
||||||
default: r
|
clean:
|
||||||
|
rm -f SUBSYS.o $(OBJS)
|
||||||
lib: purge $(OBJPRODN)
|
|
||||||
rm -f libregex.a
|
|
||||||
ar crv libregex.a $(OBJPRODN)
|
|
||||||
|
|
||||||
purge:
|
|
||||||
rm -f *.o
|
|
||||||
|
|
||||||
# stuff to build regex.h
|
|
||||||
REGEXH=regex.h
|
|
||||||
REGEXHSRC=regex2.h $(REGSRC)
|
|
||||||
$(REGEXH): $(REGEXHSRC) mkh
|
|
||||||
sh ./mkh $(MKHFLAGS) -i _REGEX_H_ $(REGEXHSRC) >regex.tmp
|
|
||||||
cmp -s regex.tmp regex.h 2>/dev/null || cp regex.tmp regex.h
|
|
||||||
rm -f regex.tmp
|
|
||||||
|
|
||||||
# dependencies
|
|
||||||
$(OBJPRODN) debug.o: utils.h regex.h regex2.h
|
|
||||||
regcomp.o: cclass.h cname.h regcomp.ih
|
|
||||||
regexec.o: engine.c engine.ih
|
|
||||||
regerror.o: regerror.ih
|
|
||||||
debug.o: debug.ih
|
|
||||||
re_main.o: re_main.ih
|
|
||||||
|
|
||||||
# tester
|
|
||||||
re: $(OBJS)
|
|
||||||
$(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) $(LIBS) -o $@
|
|
||||||
|
|
||||||
# regression test
|
|
||||||
r: re tests
|
|
||||||
./re <tests
|
|
||||||
./re -el <tests
|
|
||||||
./re -er <tests
|
|
||||||
|
|
||||||
# 57 variants, and other stuff, for development use -- not useful to you
|
|
||||||
ra: ./re tests
|
|
||||||
-./re <tests
|
|
||||||
-./re -el <tests
|
|
||||||
-./re -er <tests
|
|
||||||
|
|
||||||
rx: ./re tests
|
|
||||||
./re -x <tests
|
|
||||||
./re -x -el <tests
|
|
||||||
./re -x -er <tests
|
|
||||||
|
|
||||||
t: ./re tests
|
|
||||||
-time ./re <tests
|
|
||||||
-time ./re -cs <tests
|
|
||||||
-time ./re -el <tests
|
|
||||||
-time ./re -cs -el <tests
|
|
||||||
|
|
||||||
l: $(LINTC)
|
|
||||||
lint $(LINTFLAGS) -h $(LINTC) 2>&1 | egrep -v '$(JUNKLINT)' | tee lint
|
|
||||||
|
|
||||||
fullprint:
|
|
||||||
ti README WHATSNEW notes todo | list
|
|
||||||
ti *.h | list
|
|
||||||
list *.c
|
|
||||||
list regex.3 regex.7
|
|
||||||
|
|
||||||
print:
|
|
||||||
ti README WHATSNEW notes todo | list
|
|
||||||
ti *.h | list
|
|
||||||
list reg*.c engine.c
|
|
||||||
|
|
||||||
|
|
||||||
mf.tmp: Makefile
|
|
||||||
sed '/^REGEXH=/s/=.*/=regex.h/' Makefile | sed '/#DEL$$/d' >$@
|
|
||||||
|
|
||||||
DTRH=cclass.h cname.h regex2.h utils.h
|
|
||||||
PRE=COPYRIGHT README WHATSNEW
|
|
||||||
POST=mkh regex.3 regex.7 tests $(DTRH) $(ALLSRC) fake/*.[ch]
|
|
||||||
FILES=$(PRE) Makefile $(POST)
|
|
||||||
DTR=$(PRE) Makefile=mf.tmp $(POST)
|
|
||||||
dtr: $(FILES) mf.tmp
|
|
||||||
makedtr $(DTR) >$@
|
|
||||||
rm mf.tmp
|
|
||||||
|
|
||||||
cio: $(FILES)
|
|
||||||
cio $(FILES)
|
|
||||||
|
|
||||||
rdf: $(FILES)
|
|
||||||
rcsdiff -c $(FILES) 2>&1 | p
|
|
||||||
|
|
||||||
# various forms of cleanup
|
|
||||||
tidy:
|
|
||||||
rm -f junk* core core.* *.core dtr *.tmp lint
|
|
||||||
|
|
||||||
clean: tidy
|
|
||||||
rm -f *.o *.s *.ih re libregex.a
|
|
||||||
|
|
||||||
# don't do this one unless you know what you're doing
|
|
||||||
spotless: clean
|
|
||||||
rm -f mkh regex.h
|
|
||||||
|
@@ -1,20 +0,0 @@
|
|||||||
/* character-class table */
|
|
||||||
static struct cclass {
|
|
||||||
char *name;
|
|
||||||
char *chars;
|
|
||||||
char *multis;
|
|
||||||
} cclasses[] = {
|
|
||||||
{ "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", "" },
|
|
||||||
{ "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", "" },
|
|
||||||
{ "blank", " \t", "" },
|
|
||||||
{ "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\25\26\27\30\31\32\33\34\35\36\37\177", "" },
|
|
||||||
{ "digit", "0123456789", "" },
|
|
||||||
{ "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", "" },
|
|
||||||
{ "lower", "abcdefghijklmnopqrstuvwxyz", "" },
|
|
||||||
{ "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", "" },
|
|
||||||
{ "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", "" },
|
|
||||||
{ "space", "\t\n\v\f\r ", "" },
|
|
||||||
{ "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "" },
|
|
||||||
{ "xdigit", "0123456789ABCDEFabcdef", "" },
|
|
||||||
{ NULL, 0, "" }
|
|
||||||
};
|
|
@@ -1,102 +0,0 @@
|
|||||||
/* character-name table */
|
|
||||||
static struct cname {
|
|
||||||
char *name;
|
|
||||||
char code;
|
|
||||||
} cnames[] = {
|
|
||||||
{ "NUL", '\0' },
|
|
||||||
{ "SOH", '\001' },
|
|
||||||
{ "STX", '\002' },
|
|
||||||
{ "ETX", '\003' },
|
|
||||||
{ "EOT", '\004' },
|
|
||||||
{ "ENQ", '\005' },
|
|
||||||
{ "ACK", '\006' },
|
|
||||||
{ "BEL", '\007' },
|
|
||||||
{ "alert", '\007' },
|
|
||||||
{ "BS", '\010' },
|
|
||||||
{ "backspace", '\b' },
|
|
||||||
{ "HT", '\011' },
|
|
||||||
{ "tab", '\t' },
|
|
||||||
{ "LF", '\012' },
|
|
||||||
{ "newline", '\n' },
|
|
||||||
{ "VT", '\013' },
|
|
||||||
{ "vertical-tab", '\v' },
|
|
||||||
{ "FF", '\014' },
|
|
||||||
{ "form-feed", '\f' },
|
|
||||||
{ "CR", '\015' },
|
|
||||||
{ "carriage-return", '\r' },
|
|
||||||
{ "SO", '\016' },
|
|
||||||
{ "SI", '\017' },
|
|
||||||
{ "DLE", '\020' },
|
|
||||||
{ "DC1", '\021' },
|
|
||||||
{ "DC2", '\022' },
|
|
||||||
{ "DC3", '\023' },
|
|
||||||
{ "DC4", '\024' },
|
|
||||||
{ "NAK", '\025' },
|
|
||||||
{ "SYN", '\026' },
|
|
||||||
{ "ETB", '\027' },
|
|
||||||
{ "CAN", '\030' },
|
|
||||||
{ "EM", '\031' },
|
|
||||||
{ "SUB", '\032' },
|
|
||||||
{ "ESC", '\033' },
|
|
||||||
{ "IS4", '\034' },
|
|
||||||
{ "FS", '\034' },
|
|
||||||
{ "IS3", '\035' },
|
|
||||||
{ "GS", '\035' },
|
|
||||||
{ "IS2", '\036' },
|
|
||||||
{ "RS", '\036' },
|
|
||||||
{ "IS1", '\037' },
|
|
||||||
{ "US", '\037' },
|
|
||||||
{ "space", ' ' },
|
|
||||||
{ "exclamation-mark", '!' },
|
|
||||||
{ "quotation-mark", '"' },
|
|
||||||
{ "number-sign", '#' },
|
|
||||||
{ "dollar-sign", '$' },
|
|
||||||
{ "percent-sign", '%' },
|
|
||||||
{ "ampersand", '&' },
|
|
||||||
{ "apostrophe", '\'' },
|
|
||||||
{ "left-parenthesis", '(' },
|
|
||||||
{ "right-parenthesis", ')' },
|
|
||||||
{ "asterisk", '*' },
|
|
||||||
{ "plus-sign", '+' },
|
|
||||||
{ "comma", ',' },
|
|
||||||
{ "hyphen", '-' },
|
|
||||||
{ "hyphen-minus", '-' },
|
|
||||||
{ "period", '.' },
|
|
||||||
{ "full-stop", '.' },
|
|
||||||
{ "slash", '/' },
|
|
||||||
{ "solidus", '/' },
|
|
||||||
{ "zero", '0' },
|
|
||||||
{ "one", '1' },
|
|
||||||
{ "two", '2' },
|
|
||||||
{ "three", '3' },
|
|
||||||
{ "four", '4' },
|
|
||||||
{ "five", '5' },
|
|
||||||
{ "six", '6' },
|
|
||||||
{ "seven", '7' },
|
|
||||||
{ "eight", '8' },
|
|
||||||
{ "nine", '9' },
|
|
||||||
{ "colon", ':' },
|
|
||||||
{ "semicolon", ';' },
|
|
||||||
{ "less-than-sign", '<' },
|
|
||||||
{ "equals-sign", '=' },
|
|
||||||
{ "greater-than-sign", '>' },
|
|
||||||
{ "question-mark", '?' },
|
|
||||||
{ "commercial-at", '@' },
|
|
||||||
{ "left-square-bracket", '[' },
|
|
||||||
{ "backslash", '\\' },
|
|
||||||
{ "reverse-solidus", '\\' },
|
|
||||||
{ "right-square-bracket", ']' },
|
|
||||||
{ "circumflex", '^' },
|
|
||||||
{ "circumflex-accent", '^' },
|
|
||||||
{ "underscore", '_' },
|
|
||||||
{ "low-line", '_' },
|
|
||||||
{ "grave-accent", '`' },
|
|
||||||
{ "left-brace", '{' },
|
|
||||||
{ "left-curly-bracket", '{' },
|
|
||||||
{ "vertical-line", '|' },
|
|
||||||
{ "right-brace", '}' },
|
|
||||||
{ "right-curly-bracket", '}' },
|
|
||||||
{ "tilde", '~' },
|
|
||||||
{ "DEL", '\177' },
|
|
||||||
{ NULL, 0 },
|
|
||||||
};
|
|
@@ -1,242 +0,0 @@
|
|||||||
#include <stdio.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <ctype.h>
|
|
||||||
#include <limits.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <sys/types.h>
|
|
||||||
#include "regex.h"
|
|
||||||
|
|
||||||
#include "utils.h"
|
|
||||||
#include "regex2.h"
|
|
||||||
#include "debug.ih"
|
|
||||||
|
|
||||||
/*
|
|
||||||
- regprint - print a regexp for debugging
|
|
||||||
== void regprint(regex_t *r, FILE *d);
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
regprint(r, d)
|
|
||||||
regex_t *r;
|
|
||||||
FILE *d;
|
|
||||||
{
|
|
||||||
register struct re_guts *g = r->re_g;
|
|
||||||
register int i;
|
|
||||||
register int c;
|
|
||||||
register int last;
|
|
||||||
int nincat[NC];
|
|
||||||
|
|
||||||
fprintf(d, "%ld states, %d categories", (long)g->nstates,
|
|
||||||
g->ncategories);
|
|
||||||
fprintf(d, ", first %ld last %ld", (long)g->firststate,
|
|
||||||
(long)g->laststate);
|
|
||||||
if (g->iflags&USEBOL)
|
|
||||||
fprintf(d, ", USEBOL");
|
|
||||||
if (g->iflags&USEEOL)
|
|
||||||
fprintf(d, ", USEEOL");
|
|
||||||
if (g->iflags&BAD)
|
|
||||||
fprintf(d, ", BAD");
|
|
||||||
if (g->nsub > 0)
|
|
||||||
fprintf(d, ", nsub=%ld", (long)g->nsub);
|
|
||||||
if (g->must != NULL)
|
|
||||||
fprintf(d, ", must(%ld) `%*s'", (long)g->mlen, (int)g->mlen,
|
|
||||||
g->must);
|
|
||||||
if (g->backrefs)
|
|
||||||
fprintf(d, ", backrefs");
|
|
||||||
if (g->nplus > 0)
|
|
||||||
fprintf(d, ", nplus %ld", (long)g->nplus);
|
|
||||||
fprintf(d, "\n");
|
|
||||||
s_print(g, d);
|
|
||||||
for (i = 0; i < g->ncategories; i++) {
|
|
||||||
nincat[i] = 0;
|
|
||||||
for (c = CHAR_MIN; c <= CHAR_MAX; c++)
|
|
||||||
if (g->categories[c] == i)
|
|
||||||
nincat[i]++;
|
|
||||||
}
|
|
||||||
fprintf(d, "cc0#%d", nincat[0]);
|
|
||||||
for (i = 1; i < g->ncategories; i++)
|
|
||||||
if (nincat[i] == 1) {
|
|
||||||
for (c = CHAR_MIN; c <= CHAR_MAX; c++)
|
|
||||||
if (g->categories[c] == i)
|
|
||||||
break;
|
|
||||||
fprintf(d, ", %d=%s", i, regchar(c));
|
|
||||||
}
|
|
||||||
fprintf(d, "\n");
|
|
||||||
for (i = 1; i < g->ncategories; i++)
|
|
||||||
if (nincat[i] != 1) {
|
|
||||||
fprintf(d, "cc%d\t", i);
|
|
||||||
last = -1;
|
|
||||||
for (c = CHAR_MIN; c <= CHAR_MAX+1; c++) /* +1 does flush */
|
|
||||||
if (c <= CHAR_MAX && g->categories[c] == i) {
|
|
||||||
if (last < 0) {
|
|
||||||
fprintf(d, "%s", regchar(c));
|
|
||||||
last = c;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (last >= 0) {
|
|
||||||
if (last != c-1)
|
|
||||||
fprintf(d, "-%s",
|
|
||||||
regchar(c-1));
|
|
||||||
last = -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fprintf(d, "\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
- s_print - print the strip for debugging
|
|
||||||
== static void s_print(register struct re_guts *g, FILE *d);
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
s_print(g, d)
|
|
||||||
register struct re_guts *g;
|
|
||||||
FILE *d;
|
|
||||||
{
|
|
||||||
register sop *s;
|
|
||||||
register cset *cs;
|
|
||||||
register int i;
|
|
||||||
register int done = 0;
|
|
||||||
register sop opnd;
|
|
||||||
register int col = 0;
|
|
||||||
register int last;
|
|
||||||
register sopno offset = 2;
|
|
||||||
# define GAP() { if (offset % 5 == 0) { \
|
|
||||||
if (col > 40) { \
|
|
||||||
fprintf(d, "\n\t"); \
|
|
||||||
col = 0; \
|
|
||||||
} else { \
|
|
||||||
fprintf(d, " "); \
|
|
||||||
col++; \
|
|
||||||
} \
|
|
||||||
} else \
|
|
||||||
col++; \
|
|
||||||
offset++; \
|
|
||||||
}
|
|
||||||
|
|
||||||
if (OP(g->strip[0]) != OEND)
|
|
||||||
fprintf(d, "missing initial OEND!\n");
|
|
||||||
for (s = &g->strip[1]; !done; s++) {
|
|
||||||
opnd = OPND(*s);
|
|
||||||
switch (OP(*s)) {
|
|
||||||
case OEND:
|
|
||||||
fprintf(d, "\n");
|
|
||||||
done = 1;
|
|
||||||
break;
|
|
||||||
case OCHAR:
|
|
||||||
if (strchr("\\|()^$.[+*?{}!<> ", (char)opnd) != NULL)
|
|
||||||
fprintf(d, "\\%c", (char)opnd);
|
|
||||||
else
|
|
||||||
fprintf(d, "%s", regchar((char)opnd));
|
|
||||||
break;
|
|
||||||
case OBOL:
|
|
||||||
fprintf(d, "^");
|
|
||||||
break;
|
|
||||||
case OEOL:
|
|
||||||
fprintf(d, "$");
|
|
||||||
break;
|
|
||||||
case OBOW:
|
|
||||||
fprintf(d, "\\{");
|
|
||||||
break;
|
|
||||||
case OEOW:
|
|
||||||
fprintf(d, "\\}");
|
|
||||||
break;
|
|
||||||
case OANY:
|
|
||||||
fprintf(d, ".");
|
|
||||||
break;
|
|
||||||
case OANYOF:
|
|
||||||
fprintf(d, "[(%ld)", (long)opnd);
|
|
||||||
cs = &g->sets[opnd];
|
|
||||||
last = -1;
|
|
||||||
for (i = 0; i < g->csetsize+1; i++) /* +1 flushes */
|
|
||||||
if (CHIN(cs, i) && i < g->csetsize) {
|
|
||||||
if (last < 0) {
|
|
||||||
fprintf(d, "%s", regchar(i));
|
|
||||||
last = i;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (last >= 0) {
|
|
||||||
if (last != i-1)
|
|
||||||
fprintf(d, "-%s",
|
|
||||||
regchar(i-1));
|
|
||||||
last = -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fprintf(d, "]");
|
|
||||||
break;
|
|
||||||
case OBACK_:
|
|
||||||
fprintf(d, "(\\<%ld>", (long)opnd);
|
|
||||||
break;
|
|
||||||
case O_BACK:
|
|
||||||
fprintf(d, "<%ld>\\)", (long)opnd);
|
|
||||||
break;
|
|
||||||
case OPLUS_:
|
|
||||||
fprintf(d, "(+");
|
|
||||||
if (OP(*(s+opnd)) != O_PLUS)
|
|
||||||
fprintf(d, "<%ld>", (long)opnd);
|
|
||||||
break;
|
|
||||||
case O_PLUS:
|
|
||||||
if (OP(*(s-opnd)) != OPLUS_)
|
|
||||||
fprintf(d, "<%ld>", (long)opnd);
|
|
||||||
fprintf(d, "+)");
|
|
||||||
break;
|
|
||||||
case OQUEST_:
|
|
||||||
fprintf(d, "(?");
|
|
||||||
if (OP(*(s+opnd)) != O_QUEST)
|
|
||||||
fprintf(d, "<%ld>", (long)opnd);
|
|
||||||
break;
|
|
||||||
case O_QUEST:
|
|
||||||
if (OP(*(s-opnd)) != OQUEST_)
|
|
||||||
fprintf(d, "<%ld>", (long)opnd);
|
|
||||||
fprintf(d, "?)");
|
|
||||||
break;
|
|
||||||
case OLPAREN:
|
|
||||||
fprintf(d, "((<%ld>", (long)opnd);
|
|
||||||
break;
|
|
||||||
case ORPAREN:
|
|
||||||
fprintf(d, "<%ld>))", (long)opnd);
|
|
||||||
break;
|
|
||||||
case OCH_:
|
|
||||||
fprintf(d, "<");
|
|
||||||
if (OP(*(s+opnd)) != OOR2)
|
|
||||||
fprintf(d, "<%ld>", (long)opnd);
|
|
||||||
break;
|
|
||||||
case OOR1:
|
|
||||||
if (OP(*(s-opnd)) != OOR1 && OP(*(s-opnd)) != OCH_)
|
|
||||||
fprintf(d, "<%ld>", (long)opnd);
|
|
||||||
fprintf(d, "|");
|
|
||||||
break;
|
|
||||||
case OOR2:
|
|
||||||
fprintf(d, "|");
|
|
||||||
if (OP(*(s+opnd)) != OOR2 && OP(*(s+opnd)) != O_CH)
|
|
||||||
fprintf(d, "<%ld>", (long)opnd);
|
|
||||||
break;
|
|
||||||
case O_CH:
|
|
||||||
if (OP(*(s-opnd)) != OOR1)
|
|
||||||
fprintf(d, "<%ld>", (long)opnd);
|
|
||||||
fprintf(d, ">");
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
fprintf(d, "!%d(%d)!", OP(*s), opnd);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (!done)
|
|
||||||
GAP();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
- regchar - make a character printable
|
|
||||||
== static char *regchar(int ch);
|
|
||||||
*/
|
|
||||||
static char * /* -> representation */
|
|
||||||
regchar(ch)
|
|
||||||
int ch;
|
|
||||||
{
|
|
||||||
static char buf[10];
|
|
||||||
|
|
||||||
if (isprint(ch) || ch == ' ')
|
|
||||||
sprintf(buf, "%c", ch);
|
|
||||||
else
|
|
||||||
sprintf(buf, "\\%o", ch);
|
|
||||||
return(buf);
|
|
||||||
}
|
|
1019
src/regex/engine.c
1019
src/regex/engine.c
File diff suppressed because it is too large
Load Diff
@@ -1,510 +0,0 @@
|
|||||||
#include <stdio.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <sys/types.h>
|
|
||||||
#include <regex.h>
|
|
||||||
#include <assert.h>
|
|
||||||
|
|
||||||
#include "main.ih"
|
|
||||||
|
|
||||||
char *progname;
|
|
||||||
int debug = 0;
|
|
||||||
int line = 0;
|
|
||||||
int status = 0;
|
|
||||||
|
|
||||||
int copts = REG_EXTENDED;
|
|
||||||
int eopts = 0;
|
|
||||||
regoff_t startoff = 0;
|
|
||||||
regoff_t endoff = 0;
|
|
||||||
|
|
||||||
|
|
||||||
extern int split();
|
|
||||||
extern void regprint();
|
|
||||||
|
|
||||||
/*
|
|
||||||
- main - do the simple case, hand off to regress() for regression
|
|
||||||
*/
|
|
||||||
main(argc, argv)
|
|
||||||
int argc;
|
|
||||||
char *argv[];
|
|
||||||
{
|
|
||||||
regex_t re;
|
|
||||||
# define NS 10
|
|
||||||
regmatch_t subs[NS];
|
|
||||||
char erbuf[100];
|
|
||||||
int err;
|
|
||||||
size_t len;
|
|
||||||
int c;
|
|
||||||
int errflg = 0;
|
|
||||||
register int i;
|
|
||||||
extern int optind;
|
|
||||||
extern char *optarg;
|
|
||||||
|
|
||||||
progname = argv[0];
|
|
||||||
|
|
||||||
while ((c = getopt(argc, argv, "c:e:S:E:x")) != EOF)
|
|
||||||
switch (c) {
|
|
||||||
case 'c': /* compile options */
|
|
||||||
copts = options('c', optarg);
|
|
||||||
break;
|
|
||||||
case 'e': /* execute options */
|
|
||||||
eopts = options('e', optarg);
|
|
||||||
break;
|
|
||||||
case 'S': /* start offset */
|
|
||||||
startoff = (regoff_t)atoi(optarg);
|
|
||||||
break;
|
|
||||||
case 'E': /* end offset */
|
|
||||||
endoff = (regoff_t)atoi(optarg);
|
|
||||||
break;
|
|
||||||
case 'x': /* Debugging. */
|
|
||||||
debug++;
|
|
||||||
break;
|
|
||||||
case '?':
|
|
||||||
default:
|
|
||||||
errflg++;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (errflg) {
|
|
||||||
fprintf(stderr, "usage: %s ", progname);
|
|
||||||
fprintf(stderr, "[-c copt][-C][-d] [re]\n");
|
|
||||||
exit(2);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (optind >= argc) {
|
|
||||||
regress(stdin);
|
|
||||||
exit(status);
|
|
||||||
}
|
|
||||||
|
|
||||||
err = regcomp(&re, argv[optind++], copts);
|
|
||||||
if (err) {
|
|
||||||
len = regerror(err, &re, erbuf, sizeof(erbuf));
|
|
||||||
fprintf(stderr, "error %s, %d/%d `%s'\n",
|
|
||||||
eprint(err), len, sizeof(erbuf), erbuf);
|
|
||||||
exit(status);
|
|
||||||
}
|
|
||||||
regprint(&re, stdout);
|
|
||||||
|
|
||||||
if (optind >= argc) {
|
|
||||||
regfree(&re);
|
|
||||||
exit(status);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (eopts®_STARTEND) {
|
|
||||||
subs[0].rm_so = startoff;
|
|
||||||
subs[0].rm_eo = strlen(argv[optind]) - endoff;
|
|
||||||
}
|
|
||||||
err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
|
|
||||||
if (err) {
|
|
||||||
len = regerror(err, &re, erbuf, sizeof(erbuf));
|
|
||||||
fprintf(stderr, "error %s, %d/%d `%s'\n",
|
|
||||||
eprint(err), len, sizeof(erbuf), erbuf);
|
|
||||||
exit(status);
|
|
||||||
}
|
|
||||||
if (!(copts®_NOSUB)) {
|
|
||||||
len = (int)(subs[0].rm_eo - subs[0].rm_so);
|
|
||||||
if (subs[0].rm_so != -1) {
|
|
||||||
if (len != 0)
|
|
||||||
printf("match `%.*s'\n", len,
|
|
||||||
argv[optind] + subs[0].rm_so);
|
|
||||||
else
|
|
||||||
printf("match `'@%.1s\n",
|
|
||||||
argv[optind] + subs[0].rm_so);
|
|
||||||
}
|
|
||||||
for (i = 1; i < NS; i++)
|
|
||||||
if (subs[i].rm_so != -1)
|
|
||||||
printf("(%d) `%.*s'\n", i,
|
|
||||||
(int)(subs[i].rm_eo - subs[i].rm_so),
|
|
||||||
argv[optind] + subs[i].rm_so);
|
|
||||||
}
|
|
||||||
exit(status);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
- regress - main loop of regression test
|
|
||||||
== void regress(FILE *in);
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
regress(in)
|
|
||||||
FILE *in;
|
|
||||||
{
|
|
||||||
char inbuf[1000];
|
|
||||||
# define MAXF 10
|
|
||||||
char *f[MAXF];
|
|
||||||
int nf;
|
|
||||||
int i;
|
|
||||||
char erbuf[100];
|
|
||||||
size_t ne;
|
|
||||||
char *badpat = "invalid regular expression";
|
|
||||||
# define SHORT 10
|
|
||||||
char *bpname = "REG_BADPAT";
|
|
||||||
regex_t re;
|
|
||||||
|
|
||||||
while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
|
|
||||||
line++;
|
|
||||||
if (inbuf[0] == '#' || inbuf[0] == '\n')
|
|
||||||
continue; /* NOTE CONTINUE */
|
|
||||||
inbuf[strlen(inbuf)-1] = '\0'; /* get rid of stupid \n */
|
|
||||||
if (debug)
|
|
||||||
fprintf(stdout, "%d:\n", line);
|
|
||||||
nf = split(inbuf, f, MAXF, "\t\t");
|
|
||||||
if (nf < 3) {
|
|
||||||
fprintf(stderr, "bad input, line %d\n", line);
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
for (i = 0; i < nf; i++)
|
|
||||||
if (strcmp(f[i], "\"\"") == 0)
|
|
||||||
f[i] = "";
|
|
||||||
if (nf <= 3)
|
|
||||||
f[3] = NULL;
|
|
||||||
if (nf <= 4)
|
|
||||||
f[4] = NULL;
|
|
||||||
try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
|
|
||||||
if (opt('&', f[1])) /* try with either type of RE */
|
|
||||||
try(f[0], f[1], f[2], f[3], f[4],
|
|
||||||
options('c', f[1]) &~ REG_EXTENDED);
|
|
||||||
}
|
|
||||||
|
|
||||||
ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
|
|
||||||
if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
|
|
||||||
fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
|
|
||||||
erbuf, badpat);
|
|
||||||
status = 1;
|
|
||||||
}
|
|
||||||
ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT);
|
|
||||||
if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
|
|
||||||
ne != strlen(badpat)+1) {
|
|
||||||
fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
|
|
||||||
erbuf, SHORT-1, badpat);
|
|
||||||
status = 1;
|
|
||||||
}
|
|
||||||
ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
|
|
||||||
if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
|
|
||||||
fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
|
|
||||||
erbuf, bpname);
|
|
||||||
status = 1;
|
|
||||||
}
|
|
||||||
re.re_endp = bpname;
|
|
||||||
ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
|
|
||||||
if (atoi(erbuf) != (int)REG_BADPAT) {
|
|
||||||
fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
|
|
||||||
erbuf, (long)REG_BADPAT);
|
|
||||||
status = 1;
|
|
||||||
} else if (ne != strlen(erbuf)+1) {
|
|
||||||
fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
|
|
||||||
erbuf, (long)REG_BADPAT);
|
|
||||||
status = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
- try - try it, and report on problems
|
|
||||||
== void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
try(f0, f1, f2, f3, f4, opts)
|
|
||||||
char *f0;
|
|
||||||
char *f1;
|
|
||||||
char *f2;
|
|
||||||
char *f3;
|
|
||||||
char *f4;
|
|
||||||
int opts; /* may not match f1 */
|
|
||||||
{
|
|
||||||
regex_t re;
|
|
||||||
# define NSUBS 10
|
|
||||||
regmatch_t subs[NSUBS];
|
|
||||||
# define NSHOULD 15
|
|
||||||
char *should[NSHOULD];
|
|
||||||
int nshould;
|
|
||||||
char erbuf[100];
|
|
||||||
int err;
|
|
||||||
int len;
|
|
||||||
char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
|
|
||||||
register int i;
|
|
||||||
char *grump;
|
|
||||||
char f0copy[1000];
|
|
||||||
char f2copy[1000];
|
|
||||||
|
|
||||||
strcpy(f0copy, f0);
|
|
||||||
re.re_endp = (opts®_PEND) ? f0copy + strlen(f0copy) : NULL;
|
|
||||||
fixstr(f0copy);
|
|
||||||
err = regcomp(&re, f0copy, opts);
|
|
||||||
if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
|
|
||||||
/* unexpected error or wrong error */
|
|
||||||
len = regerror(err, &re, erbuf, sizeof(erbuf));
|
|
||||||
fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n",
|
|
||||||
line, type, eprint(err), len,
|
|
||||||
sizeof(erbuf), erbuf);
|
|
||||||
status = 1;
|
|
||||||
} else if (err == 0 && opt('C', f1)) {
|
|
||||||
/* unexpected success */
|
|
||||||
fprintf(stderr, "%d: %s should have given REG_%s\n",
|
|
||||||
line, type, f2);
|
|
||||||
status = 1;
|
|
||||||
err = 1; /* so we won't try regexec */
|
|
||||||
}
|
|
||||||
|
|
||||||
if (err != 0) {
|
|
||||||
regfree(&re);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
strcpy(f2copy, f2);
|
|
||||||
fixstr(f2copy);
|
|
||||||
|
|
||||||
if (options('e', f1)®_STARTEND) {
|
|
||||||
if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
|
|
||||||
fprintf(stderr, "%d: bad STARTEND syntax\n", line);
|
|
||||||
subs[0].rm_so = strchr(f2, '(') - f2 + 1;
|
|
||||||
subs[0].rm_eo = strchr(f2, ')') - f2;
|
|
||||||
}
|
|
||||||
err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
|
|
||||||
|
|
||||||
if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
|
|
||||||
/* unexpected error or wrong error */
|
|
||||||
len = regerror(err, &re, erbuf, sizeof(erbuf));
|
|
||||||
fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n",
|
|
||||||
line, type, eprint(err), len,
|
|
||||||
sizeof(erbuf), erbuf);
|
|
||||||
status = 1;
|
|
||||||
} else if (err != 0) {
|
|
||||||
/* nothing more to check */
|
|
||||||
} else if (f3 == NULL) {
|
|
||||||
/* unexpected success */
|
|
||||||
fprintf(stderr, "%d: %s exec should have failed\n",
|
|
||||||
line, type);
|
|
||||||
status = 1;
|
|
||||||
err = 1; /* just on principle */
|
|
||||||
} else if (opts®_NOSUB) {
|
|
||||||
/* nothing more to check */
|
|
||||||
} else if ((grump = check(f2, subs[0], f3)) != NULL) {
|
|
||||||
fprintf(stderr, "%d: %s %s\n", line, type, grump);
|
|
||||||
status = 1;
|
|
||||||
err = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (err != 0 || f4 == NULL) {
|
|
||||||
regfree(&re);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 1; i < NSHOULD; i++)
|
|
||||||
should[i] = NULL;
|
|
||||||
nshould = split(f4, should+1, NSHOULD-1, ",");
|
|
||||||
if (nshould == 0) {
|
|
||||||
nshould = 1;
|
|
||||||
should[1] = "";
|
|
||||||
}
|
|
||||||
for (i = 1; i < NSUBS; i++) {
|
|
||||||
grump = check(f2, subs[i], should[i]);
|
|
||||||
if (grump != NULL) {
|
|
||||||
fprintf(stderr, "%d: %s $%d %s\n", line,
|
|
||||||
type, i, grump);
|
|
||||||
status = 1;
|
|
||||||
err = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
regfree(&re);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
- options - pick options out of a regression-test string
|
|
||||||
== int options(int type, char *s);
|
|
||||||
*/
|
|
||||||
int
|
|
||||||
options(type, s)
|
|
||||||
int type; /* 'c' compile, 'e' exec */
|
|
||||||
char *s;
|
|
||||||
{
|
|
||||||
register char *p;
|
|
||||||
register int o = (type == 'c') ? copts : eopts;
|
|
||||||
register char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
|
|
||||||
|
|
||||||
for (p = s; *p != '\0'; p++)
|
|
||||||
if (strchr(legal, *p) != NULL)
|
|
||||||
switch (*p) {
|
|
||||||
case 'b':
|
|
||||||
o &= ~REG_EXTENDED;
|
|
||||||
break;
|
|
||||||
case 'i':
|
|
||||||
o |= REG_ICASE;
|
|
||||||
break;
|
|
||||||
case 's':
|
|
||||||
o |= REG_NOSUB;
|
|
||||||
break;
|
|
||||||
case 'n':
|
|
||||||
o |= REG_NEWLINE;
|
|
||||||
break;
|
|
||||||
case 'm':
|
|
||||||
o &= ~REG_EXTENDED;
|
|
||||||
o |= REG_NOSPEC;
|
|
||||||
break;
|
|
||||||
case 'p':
|
|
||||||
o |= REG_PEND;
|
|
||||||
break;
|
|
||||||
case '^':
|
|
||||||
o |= REG_NOTBOL;
|
|
||||||
break;
|
|
||||||
case '$':
|
|
||||||
o |= REG_NOTEOL;
|
|
||||||
break;
|
|
||||||
case '#':
|
|
||||||
o |= REG_STARTEND;
|
|
||||||
break;
|
|
||||||
case 't': /* trace */
|
|
||||||
o |= REG_TRACE;
|
|
||||||
break;
|
|
||||||
case 'l': /* force long representation */
|
|
||||||
o |= REG_LARGE;
|
|
||||||
break;
|
|
||||||
case 'r': /* force backref use */
|
|
||||||
o |= REG_BACKR;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return(o);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
- opt - is a particular option in a regression string?
|
|
||||||
== int opt(int c, char *s);
|
|
||||||
*/
|
|
||||||
int /* predicate */
|
|
||||||
opt(c, s)
|
|
||||||
int c;
|
|
||||||
char *s;
|
|
||||||
{
|
|
||||||
return(strchr(s, c) != NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
- fixstr - transform magic characters in strings
|
|
||||||
== void fixstr(register char *p);
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
fixstr(p)
|
|
||||||
register char *p;
|
|
||||||
{
|
|
||||||
if (p == NULL)
|
|
||||||
return;
|
|
||||||
|
|
||||||
for (; *p != '\0'; p++)
|
|
||||||
if (*p == 'N')
|
|
||||||
*p = '\n';
|
|
||||||
else if (*p == 'T')
|
|
||||||
*p = '\t';
|
|
||||||
else if (*p == 'S')
|
|
||||||
*p = ' ';
|
|
||||||
else if (*p == 'Z')
|
|
||||||
*p = '\0';
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
- check - check a substring match
|
|
||||||
== char *check(char *str, regmatch_t sub, char *should);
|
|
||||||
*/
|
|
||||||
char * /* NULL or complaint */
|
|
||||||
check(str, sub, should)
|
|
||||||
char *str;
|
|
||||||
regmatch_t sub;
|
|
||||||
char *should;
|
|
||||||
{
|
|
||||||
register int len;
|
|
||||||
register int shlen;
|
|
||||||
register char *p;
|
|
||||||
static char grump[500];
|
|
||||||
register char *at = NULL;
|
|
||||||
|
|
||||||
if (should != NULL && strcmp(should, "-") == 0)
|
|
||||||
should = NULL;
|
|
||||||
if (should != NULL && should[0] == '@') {
|
|
||||||
at = should + 1;
|
|
||||||
should = "";
|
|
||||||
}
|
|
||||||
|
|
||||||
/* check rm_so and rm_eo for consistency */
|
|
||||||
if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
|
|
||||||
(sub.rm_so != -1 && sub.rm_eo == -1) ||
|
|
||||||
(sub.rm_so != -1 && sub.rm_so < 0) ||
|
|
||||||
(sub.rm_eo != -1 && sub.rm_eo < 0) ) {
|
|
||||||
sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
|
|
||||||
(long)sub.rm_eo);
|
|
||||||
return(grump);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* check for no match */
|
|
||||||
if (sub.rm_so == -1 && should == NULL)
|
|
||||||
return(NULL);
|
|
||||||
if (sub.rm_so == -1)
|
|
||||||
return("did not match");
|
|
||||||
|
|
||||||
/* check for in range */
|
|
||||||
if (sub.rm_eo > strlen(str)) {
|
|
||||||
sprintf(grump, "start %ld end %ld, past end of string",
|
|
||||||
(long)sub.rm_so, (long)sub.rm_eo);
|
|
||||||
return(grump);
|
|
||||||
}
|
|
||||||
|
|
||||||
len = (int)(sub.rm_eo - sub.rm_so);
|
|
||||||
shlen = (int)strlen(should);
|
|
||||||
p = str + sub.rm_so;
|
|
||||||
|
|
||||||
/* check for not supposed to match */
|
|
||||||
if (should == NULL) {
|
|
||||||
sprintf(grump, "matched `%.*s'", len, p);
|
|
||||||
return(grump);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* check for wrong match */
|
|
||||||
if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
|
|
||||||
sprintf(grump, "matched `%.*s' instead", len, p);
|
|
||||||
return(grump);
|
|
||||||
}
|
|
||||||
if (shlen > 0)
|
|
||||||
return(NULL);
|
|
||||||
|
|
||||||
/* check null match in right place */
|
|
||||||
if (at == NULL)
|
|
||||||
return(NULL);
|
|
||||||
shlen = strlen(at);
|
|
||||||
if (shlen == 0)
|
|
||||||
shlen = 1; /* force check for end-of-string */
|
|
||||||
if (strncmp(p, at, shlen) != 0) {
|
|
||||||
sprintf(grump, "matched null at `%.20s'", p);
|
|
||||||
return(grump);
|
|
||||||
}
|
|
||||||
return(NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
- eprint - convert error number to name
|
|
||||||
== static char *eprint(int err);
|
|
||||||
*/
|
|
||||||
static char *
|
|
||||||
eprint(err)
|
|
||||||
int err;
|
|
||||||
{
|
|
||||||
static char epbuf[100];
|
|
||||||
size_t len;
|
|
||||||
|
|
||||||
len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
|
|
||||||
assert(len <= sizeof(epbuf));
|
|
||||||
return(epbuf);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
- efind - convert error name to number
|
|
||||||
== static int efind(char *name);
|
|
||||||
*/
|
|
||||||
static int
|
|
||||||
efind(name)
|
|
||||||
char *name;
|
|
||||||
{
|
|
||||||
static char efbuf[100];
|
|
||||||
size_t n;
|
|
||||||
regex_t re;
|
|
||||||
|
|
||||||
sprintf(efbuf, "REG_%s", name);
|
|
||||||
assert(strlen(efbuf) < sizeof(efbuf));
|
|
||||||
re.re_endp = efbuf;
|
|
||||||
(void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
|
|
||||||
return(atoi(efbuf));
|
|
||||||
}
|
|
189
src/regex/regc_cvec.c
Normal file
189
src/regex/regc_cvec.c
Normal file
@@ -0,0 +1,189 @@
|
|||||||
|
/*
|
||||||
|
* Utility functions for handling cvecs
|
||||||
|
* This file is #included by regcomp.c.
|
||||||
|
*
|
||||||
|
* Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
|
||||||
|
*
|
||||||
|
* Development of this software was funded, in part, by Cray Research Inc.,
|
||||||
|
* UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
|
||||||
|
* Corporation, none of whom are responsible for the results. The author
|
||||||
|
* thanks all of them.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms -- with or without
|
||||||
|
* modification -- are permitted for any purpose, provided that
|
||||||
|
* redistributions in source form retain this entire copyright notice and
|
||||||
|
* indicate the origin and nature of any modifications.
|
||||||
|
*
|
||||||
|
* I'd appreciate being given credit for this package in the documentation
|
||||||
|
* of software which uses it, but that is not a requirement.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
||||||
|
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||||
|
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||||
|
* HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||||
|
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||||
|
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||||
|
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||||
|
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* $Header$
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* newcvec - allocate a new cvec
|
||||||
|
*/
|
||||||
|
static struct cvec *
|
||||||
|
newcvec(int nchrs, /* to hold this many chrs... */
|
||||||
|
int nranges, /* ... and this many ranges... */
|
||||||
|
int nmcces) /* ... and this many MCCEs */
|
||||||
|
{
|
||||||
|
size_t n;
|
||||||
|
size_t nc;
|
||||||
|
struct cvec *cv;
|
||||||
|
|
||||||
|
nc = (size_t) nchrs + (size_t) nmcces *(MAXMCCE + 1) + (size_t) nranges *2;
|
||||||
|
|
||||||
|
n = sizeof(struct cvec) + (size_t) (nmcces - 1) * sizeof(chr *)
|
||||||
|
+ nc * sizeof(chr);
|
||||||
|
cv = (struct cvec *) MALLOC(n);
|
||||||
|
if (cv == NULL)
|
||||||
|
return NULL;
|
||||||
|
cv->chrspace = nchrs;
|
||||||
|
cv->chrs = (chr *) &cv->mcces[nmcces]; /* chrs just after MCCE
|
||||||
|
* ptrs */
|
||||||
|
cv->mccespace = nmcces;
|
||||||
|
cv->ranges = cv->chrs + nchrs + nmcces * (MAXMCCE + 1);
|
||||||
|
cv->rangespace = nranges;
|
||||||
|
return clearcvec(cv);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* clearcvec - clear a possibly-new cvec
|
||||||
|
* Returns pointer as convenience.
|
||||||
|
*/
|
||||||
|
static struct cvec *
|
||||||
|
clearcvec(struct cvec * cv)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
assert(cv != NULL);
|
||||||
|
cv->nchrs = 0;
|
||||||
|
assert(cv->chrs == (chr *) &cv->mcces[cv->mccespace]);
|
||||||
|
cv->nmcces = 0;
|
||||||
|
cv->nmccechrs = 0;
|
||||||
|
cv->nranges = 0;
|
||||||
|
for (i = 0; i < cv->mccespace; i++)
|
||||||
|
cv->mcces[i] = NULL;
|
||||||
|
|
||||||
|
return cv;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* addchr - add a chr to a cvec
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
addchr(struct cvec * cv, /* character vector */
|
||||||
|
chr c) /* character to add */
|
||||||
|
{
|
||||||
|
assert(cv->nchrs < cv->chrspace - cv->nmccechrs);
|
||||||
|
cv->chrs[cv->nchrs++] = (chr) c;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* addrange - add a range to a cvec
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
addrange(struct cvec * cv, /* character vector */
|
||||||
|
chr from, /* first character of range */
|
||||||
|
chr to) /* last character of range */
|
||||||
|
{
|
||||||
|
assert(cv->nranges < cv->rangespace);
|
||||||
|
cv->ranges[cv->nranges * 2] = (chr) from;
|
||||||
|
cv->ranges[cv->nranges * 2 + 1] = (chr) to;
|
||||||
|
cv->nranges++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* addmcce - add an MCCE to a cvec
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
addmcce(struct cvec * cv, /* character vector */
|
||||||
|
chr *startp, /* beginning of text */
|
||||||
|
chr *endp) /* just past end of text */
|
||||||
|
{
|
||||||
|
int len;
|
||||||
|
int i;
|
||||||
|
chr *s;
|
||||||
|
chr *d;
|
||||||
|
|
||||||
|
if (startp == NULL && endp == NULL)
|
||||||
|
return;
|
||||||
|
len = endp - startp;
|
||||||
|
assert(len > 0);
|
||||||
|
assert(cv->nchrs + len < cv->chrspace - cv->nmccechrs);
|
||||||
|
assert(cv->nmcces < cv->mccespace);
|
||||||
|
d = &cv->chrs[cv->chrspace - cv->nmccechrs - len - 1];
|
||||||
|
cv->mcces[cv->nmcces++] = d;
|
||||||
|
for (s = startp, i = len; i > 0; s++, i--)
|
||||||
|
*d++ = *s;
|
||||||
|
*d++ = 0; /* endmarker */
|
||||||
|
assert(d == &cv->chrs[cv->chrspace - cv->nmccechrs]);
|
||||||
|
cv->nmccechrs += len + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* haschr - does a cvec contain this chr?
|
||||||
|
*/
|
||||||
|
static int /* predicate */
|
||||||
|
haschr(struct cvec * cv, /* character vector */
|
||||||
|
chr c) /* character to test for */
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
chr *p;
|
||||||
|
|
||||||
|
for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--)
|
||||||
|
{
|
||||||
|
if (*p == c)
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--)
|
||||||
|
{
|
||||||
|
if ((*p <= c) && (c <= *(p + 1)))
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* getcvec - get a cvec, remembering it as v->cv
|
||||||
|
*/
|
||||||
|
static struct cvec *
|
||||||
|
getcvec(struct vars * v, /* context */
|
||||||
|
int nchrs, /* to hold this many chrs... */
|
||||||
|
int nranges, /* ... and this many ranges... */
|
||||||
|
int nmcces) /* ... and this many MCCEs */
|
||||||
|
{
|
||||||
|
if (v->cv != NULL && nchrs <= v->cv->chrspace &&
|
||||||
|
nranges <= v->cv->rangespace && nmcces <= v->cv->mccespace)
|
||||||
|
return clearcvec(v->cv);
|
||||||
|
|
||||||
|
if (v->cv != NULL)
|
||||||
|
freecvec(v->cv);
|
||||||
|
v->cv = newcvec(nchrs, nranges, nmcces);
|
||||||
|
if (v->cv == NULL)
|
||||||
|
ERR(REG_ESPACE);
|
||||||
|
|
||||||
|
return v->cv;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* freecvec - free a cvec
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
freecvec(struct cvec * cv)
|
||||||
|
{
|
||||||
|
FREE(cv);
|
||||||
|
}
|
@@ -1,235 +0,0 @@
|
|||||||
.TH REGEX 7 "25 Oct 1995"
|
|
||||||
.BY "Henry Spencer"
|
|
||||||
.SH NAME
|
|
||||||
regex \- POSIX 1003.2 regular expressions
|
|
||||||
.SH DESCRIPTION
|
|
||||||
Regular expressions (``RE''s),
|
|
||||||
as defined in POSIX 1003.2, come in two forms:
|
|
||||||
modern REs (roughly those of
|
|
||||||
.IR egrep ;
|
|
||||||
1003.2 calls these ``extended'' REs)
|
|
||||||
and obsolete REs (roughly those of
|
|
||||||
.IR ed ;
|
|
||||||
1003.2 ``basic'' REs).
|
|
||||||
Obsolete REs mostly exist for backward compatibility in some old programs;
|
|
||||||
they will be discussed at the end.
|
|
||||||
1003.2 leaves some aspects of RE syntax and semantics open;
|
|
||||||
`\(dg' marks decisions on these aspects that
|
|
||||||
may not be fully portable to other 1003.2 implementations.
|
|
||||||
.PP
|
|
||||||
A (modern) RE is one\(dg or more non-empty\(dg \fIbranches\fR,
|
|
||||||
separated by `|'.
|
|
||||||
It matches anything that matches one of the branches.
|
|
||||||
.PP
|
|
||||||
A branch is one\(dg or more \fIpieces\fR, concatenated.
|
|
||||||
It matches a match for the first, followed by a match for the second, etc.
|
|
||||||
.PP
|
|
||||||
A piece is an \fIatom\fR possibly followed
|
|
||||||
by a single\(dg `*', `+', `?', or \fIbound\fR.
|
|
||||||
An atom followed by `*' matches a sequence of 0 or more matches of the atom.
|
|
||||||
An atom followed by `+' matches a sequence of 1 or more matches of the atom.
|
|
||||||
An atom followed by `?' matches a sequence of 0 or 1 matches of the atom.
|
|
||||||
.PP
|
|
||||||
A \fIbound\fR is `{' followed by an unsigned decimal integer,
|
|
||||||
possibly followed by `,'
|
|
||||||
possibly followed by another unsigned decimal integer,
|
|
||||||
always followed by `}'.
|
|
||||||
The integers must lie between 0 and RE_DUP_MAX (255\(dg) inclusive,
|
|
||||||
and if there are two of them, the first may not exceed the second.
|
|
||||||
An atom followed by a bound containing one integer \fIi\fR
|
|
||||||
and no comma matches
|
|
||||||
a sequence of exactly \fIi\fR matches of the atom.
|
|
||||||
An atom followed by a bound
|
|
||||||
containing one integer \fIi\fR and a comma matches
|
|
||||||
a sequence of \fIi\fR or more matches of the atom.
|
|
||||||
An atom followed by a bound
|
|
||||||
containing two integers \fIi\fR and \fIj\fR matches
|
|
||||||
a sequence of \fIi\fR through \fIj\fR (inclusive) matches of the atom.
|
|
||||||
.PP
|
|
||||||
An atom is a regular expression enclosed in `()' (matching a match for the
|
|
||||||
regular expression),
|
|
||||||
an empty set of `()' (matching the null string)\(dg,
|
|
||||||
a \fIbracket expression\fR (see below), `.'
|
|
||||||
(matching any single character), `^' (matching the null string at the
|
|
||||||
beginning of a line), `$' (matching the null string at the
|
|
||||||
end of a line), a `\e' followed by one of the characters
|
|
||||||
`^.[$()|*+?{\e'
|
|
||||||
(matching that character taken as an ordinary character),
|
|
||||||
a `\e' followed by any other character\(dg
|
|
||||||
(matching that character taken as an ordinary character,
|
|
||||||
as if the `\e' had not been present\(dg),
|
|
||||||
or a single character with no other significance (matching that character).
|
|
||||||
A `{' followed by a character other than a digit is an ordinary
|
|
||||||
character, not the beginning of a bound\(dg.
|
|
||||||
It is illegal to end an RE with `\e'.
|
|
||||||
.PP
|
|
||||||
A \fIbracket expression\fR is a list of characters enclosed in `[]'.
|
|
||||||
It normally matches any single character from the list (but see below).
|
|
||||||
If the list begins with `^',
|
|
||||||
it matches any single character
|
|
||||||
(but see below) \fInot\fR from the rest of the list.
|
|
||||||
If two characters in the list are separated by `\-', this is shorthand
|
|
||||||
for the full \fIrange\fR of characters between those two (inclusive) in the
|
|
||||||
collating sequence,
|
|
||||||
e.g. `[0\-9]' in ASCII matches any decimal digit.
|
|
||||||
It is illegal\(dg for two ranges to share an
|
|
||||||
endpoint, e.g. `a\-c\-e'.
|
|
||||||
Ranges are very collating-sequence-dependent,
|
|
||||||
and portable programs should avoid relying on them.
|
|
||||||
.PP
|
|
||||||
To include a literal `]' in the list, make it the first character
|
|
||||||
(following a possible `^').
|
|
||||||
To include a literal `\-', make it the first or last character,
|
|
||||||
or the second endpoint of a range.
|
|
||||||
To use a literal `\-' as the first endpoint of a range,
|
|
||||||
enclose it in `[.' and `.]' to make it a collating element (see below).
|
|
||||||
With the exception of these and some combinations using `[' (see next
|
|
||||||
paragraphs), all other special characters, including `\e', lose their
|
|
||||||
special significance within a bracket expression.
|
|
||||||
.PP
|
|
||||||
Within a bracket expression, a collating element (a character,
|
|
||||||
a multi-character sequence that collates as if it were a single character,
|
|
||||||
or a collating-sequence name for either)
|
|
||||||
enclosed in `[.' and `.]' stands for the
|
|
||||||
sequence of characters of that collating element.
|
|
||||||
The sequence is a single element of the bracket expression's list.
|
|
||||||
A bracket expression containing a multi-character collating element
|
|
||||||
can thus match more than one character,
|
|
||||||
e.g. if the collating sequence includes a `ch' collating element,
|
|
||||||
then the RE `[[.ch.]]*c' matches the first five characters
|
|
||||||
of `chchcc'.
|
|
||||||
.PP
|
|
||||||
Within a bracket expression, a collating element enclosed in `[=' and
|
|
||||||
`=]' is an equivalence class, standing for the sequences of characters
|
|
||||||
of all collating elements equivalent to that one, including itself.
|
|
||||||
(If there are no other equivalent collating elements,
|
|
||||||
the treatment is as if the enclosing delimiters were `[.' and `.]'.)
|
|
||||||
For example, if o and \o'o^' are the members of an equivalence class,
|
|
||||||
then `[[=o=]]', `[[=\o'o^'=]]', and `[o\o'o^']' are all synonymous.
|
|
||||||
An equivalence class may not\(dg be an endpoint
|
|
||||||
of a range.
|
|
||||||
.PP
|
|
||||||
Within a bracket expression, the name of a \fIcharacter class\fR enclosed
|
|
||||||
in `[:' and `:]' stands for the list of all characters belonging to that
|
|
||||||
class.
|
|
||||||
Standard character class names are:
|
|
||||||
.PP
|
|
||||||
.RS
|
|
||||||
.nf
|
|
||||||
.ta 3c 6c 9c
|
|
||||||
alnum digit punct
|
|
||||||
alpha graph space
|
|
||||||
blank lower upper
|
|
||||||
cntrl print xdigit
|
|
||||||
.fi
|
|
||||||
.RE
|
|
||||||
.PP
|
|
||||||
These stand for the character classes defined in
|
|
||||||
.IR ctype (3).
|
|
||||||
A locale may provide others.
|
|
||||||
A character class may not be used as an endpoint of a range.
|
|
||||||
.PP
|
|
||||||
There are two special cases\(dg of bracket expressions:
|
|
||||||
the bracket expressions `[[:<:]]' and `[[:>:]]' match the null string at
|
|
||||||
the beginning and end of a word respectively.
|
|
||||||
A word is defined as a sequence of
|
|
||||||
word characters
|
|
||||||
which is neither preceded nor followed by
|
|
||||||
word characters.
|
|
||||||
A word character is an
|
|
||||||
.I alnum
|
|
||||||
character (as defined by
|
|
||||||
.IR ctype (3))
|
|
||||||
or an underscore.
|
|
||||||
This is an extension,
|
|
||||||
compatible with but not specified by POSIX 1003.2,
|
|
||||||
and should be used with
|
|
||||||
caution in software intended to be portable to other systems.
|
|
||||||
.PP
|
|
||||||
In the event that an RE could match more than one substring of a given
|
|
||||||
string,
|
|
||||||
the RE matches the one starting earliest in the string.
|
|
||||||
If the RE could match more than one substring starting at that point,
|
|
||||||
it matches the longest.
|
|
||||||
Subexpressions also match the longest possible substrings, subject to
|
|
||||||
the constraint that the whole match be as long as possible,
|
|
||||||
with subexpressions starting earlier in the RE taking priority over
|
|
||||||
ones starting later.
|
|
||||||
Note that higher-level subexpressions thus take priority over
|
|
||||||
their lower-level component subexpressions.
|
|
||||||
.PP
|
|
||||||
Match lengths are measured in characters, not collating elements.
|
|
||||||
A null string is considered longer than no match at all.
|
|
||||||
For example,
|
|
||||||
`bb*' matches the three middle characters of `abbbc',
|
|
||||||
`(wee|week)(knights|nights)' matches all ten characters of `weeknights',
|
|
||||||
when `(.*).*' is matched against `abc' the parenthesized subexpression
|
|
||||||
matches all three characters, and
|
|
||||||
when `(a*)*' is matched against `bc' both the whole RE and the parenthesized
|
|
||||||
subexpression match the null string.
|
|
||||||
.PP
|
|
||||||
If case-independent matching is specified,
|
|
||||||
the effect is much as if all case distinctions had vanished from the
|
|
||||||
alphabet.
|
|
||||||
When an alphabetic that exists in multiple cases appears as an
|
|
||||||
ordinary character outside a bracket expression, it is effectively
|
|
||||||
transformed into a bracket expression containing both cases,
|
|
||||||
e.g. `x' becomes `[xX]'.
|
|
||||||
When it appears inside a bracket expression, all case counterparts
|
|
||||||
of it are added to the bracket expression, so that (e.g.) `[x]'
|
|
||||||
becomes `[xX]' and `[^x]' becomes `[^xX]'.
|
|
||||||
.PP
|
|
||||||
No particular limit is imposed on the length of REs\(dg.
|
|
||||||
Programs intended to be portable should not employ REs longer
|
|
||||||
than 256 bytes,
|
|
||||||
as an implementation can refuse to accept such REs and remain
|
|
||||||
POSIX-compliant.
|
|
||||||
.PP
|
|
||||||
Obsolete (``basic'') regular expressions differ in several respects.
|
|
||||||
`|', `+', and `?' are ordinary characters and there is no equivalent
|
|
||||||
for their functionality.
|
|
||||||
The delimiters for bounds are `\e{' and `\e}',
|
|
||||||
with `{' and `}' by themselves ordinary characters.
|
|
||||||
The parentheses for nested subexpressions are `\e(' and `\e)',
|
|
||||||
with `(' and `)' by themselves ordinary characters.
|
|
||||||
`^' is an ordinary character except at the beginning of the
|
|
||||||
RE or\(dg the beginning of a parenthesized subexpression,
|
|
||||||
`$' is an ordinary character except at the end of the
|
|
||||||
RE or\(dg the end of a parenthesized subexpression,
|
|
||||||
and `*' is an ordinary character if it appears at the beginning of the
|
|
||||||
RE or the beginning of a parenthesized subexpression
|
|
||||||
(after a possible leading `^').
|
|
||||||
Finally, there is one new type of atom, a \fIback reference\fR:
|
|
||||||
`\e' followed by a non-zero decimal digit \fId\fR
|
|
||||||
matches the same sequence of characters
|
|
||||||
matched by the \fId\fRth parenthesized subexpression
|
|
||||||
(numbering subexpressions by the positions of their opening parentheses,
|
|
||||||
left to right),
|
|
||||||
so that (e.g.) `\e([bc]\e)\e1' matches `bb' or `cc' but not `bc'.
|
|
||||||
.SH SEE ALSO
|
|
||||||
regex(3)
|
|
||||||
.PP
|
|
||||||
POSIX 1003.2, section 2.8 (Regular Expression Notation).
|
|
||||||
.SH HISTORY
|
|
||||||
Written by Henry Spencer, based on the 1003.2 spec.
|
|
||||||
.SH BUGS
|
|
||||||
Having two kinds of REs is a botch.
|
|
||||||
.PP
|
|
||||||
The current 1003.2 spec says that `)' is an ordinary character in
|
|
||||||
the absence of an unmatched `(';
|
|
||||||
this was an unintentional result of a wording error,
|
|
||||||
and change is likely.
|
|
||||||
Avoid relying on it.
|
|
||||||
.PP
|
|
||||||
Back references are a dreadful botch,
|
|
||||||
posing major problems for efficient implementations.
|
|
||||||
They are also somewhat vaguely defined
|
|
||||||
(does
|
|
||||||
`a\e(\e(b\e)*\e2\e)*d' match `abbbd'?).
|
|
||||||
Avoid using them.
|
|
||||||
.PP
|
|
||||||
1003.2's specification of case-independent matching is vague.
|
|
||||||
The ``one case implies all cases'' definition given above
|
|
||||||
is current consensus among implementors as to the right interpretation.
|
|
||||||
.PP
|
|
||||||
The syntax for word boundaries is incredibly ugly.
|
|
@@ -1,134 +0,0 @@
|
|||||||
/*
|
|
||||||
* First, the stuff that ends up in the outside-world include file
|
|
||||||
= typedef off_t regoff_t;
|
|
||||||
= typedef struct {
|
|
||||||
= int re_magic;
|
|
||||||
= size_t re_nsub; // number of parenthesized subexpressions
|
|
||||||
= const char *re_endp; // end pointer for REG_PEND
|
|
||||||
= struct re_guts *re_g; // none of your business :-)
|
|
||||||
= } regex_t;
|
|
||||||
= typedef struct {
|
|
||||||
= regoff_t rm_so; // start of match
|
|
||||||
= regoff_t rm_eo; // end of match
|
|
||||||
= } regmatch_t;
|
|
||||||
*/
|
|
||||||
/*
|
|
||||||
* internals of regex_t
|
|
||||||
*/
|
|
||||||
#define MAGIC1 ((('r'^0200)<<8) | 'e')
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The internal representation is a *strip*, a sequence of
|
|
||||||
* operators ending with an endmarker. (Some terminology etc. is a
|
|
||||||
* historical relic of earlier versions which used multiple strips.)
|
|
||||||
* Certain oddities in the representation are there to permit running
|
|
||||||
* the machinery backwards; in particular, any deviation from sequential
|
|
||||||
* flow must be marked at both its source and its destination. Some
|
|
||||||
* fine points:
|
|
||||||
*
|
|
||||||
* - OPLUS_ and O_PLUS are *inside* the loop they create.
|
|
||||||
* - OQUEST_ and O_QUEST are *outside* the bypass they create.
|
|
||||||
* - OCH_ and O_CH are *outside* the multi-way branch they create, while
|
|
||||||
* OOR1 and OOR2 are respectively the end and the beginning of one of
|
|
||||||
* the branches. Note that there is an implicit OOR2 following OCH_
|
|
||||||
* and an implicit OOR1 preceding O_CH.
|
|
||||||
*
|
|
||||||
* In state representations, an operator's bit is on to signify a state
|
|
||||||
* immediately *preceding* "execution" of that operator.
|
|
||||||
*/
|
|
||||||
typedef long sop; /* strip operator */
|
|
||||||
typedef long sopno;
|
|
||||||
#define OPRMASK 0x7c000000
|
|
||||||
#define OPDMASK 0x03ffffff
|
|
||||||
#define OPSHIFT (26)
|
|
||||||
#define OP(n) ((n)&OPRMASK)
|
|
||||||
#define OPND(n) ((n)&OPDMASK)
|
|
||||||
#define SOP(op, opnd) ((op)|(opnd))
|
|
||||||
/* operators meaning operand */
|
|
||||||
/* (back, fwd are offsets) */
|
|
||||||
#define OEND (1<<OPSHIFT) /* endmarker - */
|
|
||||||
#define OCHAR (2<<OPSHIFT) /* character unsigned char */
|
|
||||||
#define OBOL (3<<OPSHIFT) /* left anchor - */
|
|
||||||
#define OEOL (4<<OPSHIFT) /* right anchor - */
|
|
||||||
#define OANY (5<<OPSHIFT) /* . - */
|
|
||||||
#define OANYOF (6<<OPSHIFT) /* [...] set number */
|
|
||||||
#define OBACK_ (7<<OPSHIFT) /* begin \d paren number */
|
|
||||||
#define O_BACK (8<<OPSHIFT) /* end \d paren number */
|
|
||||||
#define OPLUS_ (9<<OPSHIFT) /* + prefix fwd to suffix */
|
|
||||||
#define O_PLUS (10<<OPSHIFT) /* + suffix back to prefix */
|
|
||||||
#define OQUEST_ (11<<OPSHIFT) /* ? prefix fwd to suffix */
|
|
||||||
#define O_QUEST (12<<OPSHIFT) /* ? suffix back to prefix */
|
|
||||||
#define OLPAREN (13<<OPSHIFT) /* ( fwd to ) */
|
|
||||||
#define ORPAREN (14<<OPSHIFT) /* ) back to ( */
|
|
||||||
#define OCH_ (15<<OPSHIFT) /* begin choice fwd to OOR2 */
|
|
||||||
#define OOR1 (16<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */
|
|
||||||
#define OOR2 (17<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */
|
|
||||||
#define O_CH (18<<OPSHIFT) /* end choice back to OOR1 */
|
|
||||||
#define OBOW (19<<OPSHIFT) /* begin word - */
|
|
||||||
#define OEOW (20<<OPSHIFT) /* end word - */
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Structure for [] character-set representation. Character sets are
|
|
||||||
* done as bit vectors, grouped 8 to a byte vector for compactness.
|
|
||||||
* The individual set therefore has both a pointer to the byte vector
|
|
||||||
* and a mask to pick out the relevant bit of each byte. A hash code
|
|
||||||
* simplifies testing whether two sets could be identical.
|
|
||||||
*
|
|
||||||
* This will get trickier for multicharacter collating elements. As
|
|
||||||
* preliminary hooks for dealing with such things, we also carry along
|
|
||||||
* a string of multi-character elements, and decide the size of the
|
|
||||||
* vectors at run time.
|
|
||||||
*/
|
|
||||||
typedef struct {
|
|
||||||
uch *ptr; /* -> uch [csetsize] */
|
|
||||||
uch mask; /* bit within array */
|
|
||||||
uch hash; /* hash code */
|
|
||||||
size_t smultis;
|
|
||||||
char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */
|
|
||||||
} cset;
|
|
||||||
/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */
|
|
||||||
#define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c))
|
|
||||||
#define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c))
|
|
||||||
#define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask)
|
|
||||||
#define MCadd(p, cs, cp) mcadd(p, cs, cp) /* regcomp() internal fns */
|
|
||||||
#define MCsub(p, cs, cp) mcsub(p, cs, cp)
|
|
||||||
#define MCin(p, cs, cp) mcin(p, cs, cp)
|
|
||||||
|
|
||||||
/* stuff for character categories */
|
|
||||||
typedef unsigned char cat_t;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* main compiled-expression structure
|
|
||||||
*/
|
|
||||||
struct re_guts {
|
|
||||||
int magic;
|
|
||||||
# define MAGIC2 ((('R'^0200)<<8)|'E')
|
|
||||||
sop *strip; /* malloced area for strip */
|
|
||||||
int csetsize; /* number of bits in a cset vector */
|
|
||||||
int ncsets; /* number of csets in use */
|
|
||||||
cset *sets; /* -> cset [ncsets] */
|
|
||||||
uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */
|
|
||||||
int cflags; /* copy of regcomp() cflags argument */
|
|
||||||
sopno nstates; /* = number of sops */
|
|
||||||
sopno firststate; /* the initial OEND (normally 0) */
|
|
||||||
sopno laststate; /* the final OEND */
|
|
||||||
int iflags; /* internal flags */
|
|
||||||
# define USEBOL 01 /* used ^ */
|
|
||||||
# define USEEOL 02 /* used $ */
|
|
||||||
# define BAD 04 /* something wrong */
|
|
||||||
int nbol; /* number of ^ used */
|
|
||||||
int neol; /* number of $ used */
|
|
||||||
int ncategories; /* how many character categories */
|
|
||||||
cat_t *categories; /* ->catspace[-CHAR_MIN] */
|
|
||||||
char *must; /* match must contain this string */
|
|
||||||
int mlen; /* length of must */
|
|
||||||
size_t nsub; /* copy of re_nsub */
|
|
||||||
int backrefs; /* does it use back references? */
|
|
||||||
sopno nplus; /* how deep does it nest +s? */
|
|
||||||
/* catspace must be last */
|
|
||||||
cat_t catspace[1]; /* actually [NC] */
|
|
||||||
};
|
|
||||||
|
|
||||||
/* misc utilities */
|
|
||||||
#define OUT (CHAR_MAX+1) /* a non-character value */
|
|
||||||
#define ISWORD(c) (isalnum(c) || (c) == '_')
|
|
@@ -1,316 +0,0 @@
|
|||||||
#include <stdio.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
/*
|
|
||||||
- split - divide a string into fields, like awk split()
|
|
||||||
= int split(char *string, char *fields[], int nfields, char *sep);
|
|
||||||
*/
|
|
||||||
int /* number of fields, including overflow */
|
|
||||||
split(string, fields, nfields, sep)
|
|
||||||
char *string;
|
|
||||||
char *fields[]; /* list is not NULL-terminated */
|
|
||||||
int nfields; /* number of entries available in fields[] */
|
|
||||||
char *sep; /* "" white, "c" single char, "ab" [ab]+ */
|
|
||||||
{
|
|
||||||
register char *p = string;
|
|
||||||
register char c; /* latest character */
|
|
||||||
register char sepc = sep[0];
|
|
||||||
register char sepc2;
|
|
||||||
register int fn;
|
|
||||||
register char **fp = fields;
|
|
||||||
register char *sepp;
|
|
||||||
register int trimtrail;
|
|
||||||
|
|
||||||
/* white space */
|
|
||||||
if (sepc == '\0') {
|
|
||||||
while ((c = *p++) == ' ' || c == '\t')
|
|
||||||
continue;
|
|
||||||
p--;
|
|
||||||
trimtrail = 1;
|
|
||||||
sep = " \t"; /* note, code below knows this is 2 long */
|
|
||||||
sepc = ' ';
|
|
||||||
} else
|
|
||||||
trimtrail = 0;
|
|
||||||
sepc2 = sep[1]; /* now we can safely pick this up */
|
|
||||||
|
|
||||||
/* catch empties */
|
|
||||||
if (*p == '\0')
|
|
||||||
return(0);
|
|
||||||
|
|
||||||
/* single separator */
|
|
||||||
if (sepc2 == '\0') {
|
|
||||||
fn = nfields;
|
|
||||||
for (;;) {
|
|
||||||
*fp++ = p;
|
|
||||||
fn--;
|
|
||||||
if (fn == 0)
|
|
||||||
break;
|
|
||||||
while ((c = *p++) != sepc)
|
|
||||||
if (c == '\0')
|
|
||||||
return(nfields - fn);
|
|
||||||
*(p-1) = '\0';
|
|
||||||
}
|
|
||||||
/* we have overflowed the fields vector -- just count them */
|
|
||||||
fn = nfields;
|
|
||||||
for (;;) {
|
|
||||||
while ((c = *p++) != sepc)
|
|
||||||
if (c == '\0')
|
|
||||||
return(fn);
|
|
||||||
fn++;
|
|
||||||
}
|
|
||||||
/* not reached */
|
|
||||||
}
|
|
||||||
|
|
||||||
/* two separators */
|
|
||||||
if (sep[2] == '\0') {
|
|
||||||
fn = nfields;
|
|
||||||
for (;;) {
|
|
||||||
*fp++ = p;
|
|
||||||
fn--;
|
|
||||||
while ((c = *p++) != sepc && c != sepc2)
|
|
||||||
if (c == '\0') {
|
|
||||||
if (trimtrail && **(fp-1) == '\0')
|
|
||||||
fn++;
|
|
||||||
return(nfields - fn);
|
|
||||||
}
|
|
||||||
if (fn == 0)
|
|
||||||
break;
|
|
||||||
*(p-1) = '\0';
|
|
||||||
while ((c = *p++) == sepc || c == sepc2)
|
|
||||||
continue;
|
|
||||||
p--;
|
|
||||||
}
|
|
||||||
/* we have overflowed the fields vector -- just count them */
|
|
||||||
fn = nfields;
|
|
||||||
while (c != '\0') {
|
|
||||||
while ((c = *p++) == sepc || c == sepc2)
|
|
||||||
continue;
|
|
||||||
p--;
|
|
||||||
fn++;
|
|
||||||
while ((c = *p++) != '\0' && c != sepc && c != sepc2)
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
/* might have to trim trailing white space */
|
|
||||||
if (trimtrail) {
|
|
||||||
p--;
|
|
||||||
while ((c = *--p) == sepc || c == sepc2)
|
|
||||||
continue;
|
|
||||||
p++;
|
|
||||||
if (*p != '\0') {
|
|
||||||
if (fn == nfields+1)
|
|
||||||
*p = '\0';
|
|
||||||
fn--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return(fn);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* n separators */
|
|
||||||
fn = 0;
|
|
||||||
for (;;) {
|
|
||||||
if (fn < nfields)
|
|
||||||
*fp++ = p;
|
|
||||||
fn++;
|
|
||||||
for (;;) {
|
|
||||||
c = *p++;
|
|
||||||
if (c == '\0')
|
|
||||||
return(fn);
|
|
||||||
sepp = sep;
|
|
||||||
while ((sepc = *sepp++) != '\0' && sepc != c)
|
|
||||||
continue;
|
|
||||||
if (sepc != '\0') /* it was a separator */
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (fn < nfields)
|
|
||||||
*(p-1) = '\0';
|
|
||||||
for (;;) {
|
|
||||||
c = *p++;
|
|
||||||
sepp = sep;
|
|
||||||
while ((sepc = *sepp++) != '\0' && sepc != c)
|
|
||||||
continue;
|
|
||||||
if (sepc == '\0') /* it wasn't a separator */
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
p--;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* not reached */
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef TEST_SPLIT
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* test program
|
|
||||||
* pgm runs regression
|
|
||||||
* pgm sep splits stdin lines by sep
|
|
||||||
* pgm str sep splits str by sep
|
|
||||||
* pgm str sep n splits str by sep n times
|
|
||||||
*/
|
|
||||||
int
|
|
||||||
main(argc, argv)
|
|
||||||
int argc;
|
|
||||||
char *argv[];
|
|
||||||
{
|
|
||||||
char buf[512];
|
|
||||||
register int n;
|
|
||||||
# define MNF 10
|
|
||||||
char *fields[MNF];
|
|
||||||
|
|
||||||
if (argc > 4)
|
|
||||||
for (n = atoi(argv[3]); n > 0; n--) {
|
|
||||||
(void) strcpy(buf, argv[1]);
|
|
||||||
}
|
|
||||||
else if (argc > 3)
|
|
||||||
for (n = atoi(argv[3]); n > 0; n--) {
|
|
||||||
(void) strcpy(buf, argv[1]);
|
|
||||||
(void) split(buf, fields, MNF, argv[2]);
|
|
||||||
}
|
|
||||||
else if (argc > 2)
|
|
||||||
dosplit(argv[1], argv[2]);
|
|
||||||
else if (argc > 1)
|
|
||||||
while (fgets(buf, sizeof(buf), stdin) != NULL) {
|
|
||||||
buf[strlen(buf)-1] = '\0'; /* stomp newline */
|
|
||||||
dosplit(buf, argv[1]);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
regress();
|
|
||||||
|
|
||||||
exit(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
dosplit(string, seps)
|
|
||||||
char *string;
|
|
||||||
char *seps;
|
|
||||||
{
|
|
||||||
# define NF 5
|
|
||||||
char *fields[NF];
|
|
||||||
register int nf;
|
|
||||||
|
|
||||||
nf = split(string, fields, NF, seps);
|
|
||||||
print(nf, NF, fields);
|
|
||||||
}
|
|
||||||
|
|
||||||
print(nf, nfp, fields)
|
|
||||||
int nf;
|
|
||||||
int nfp;
|
|
||||||
char *fields[];
|
|
||||||
{
|
|
||||||
register int fn;
|
|
||||||
register int bound;
|
|
||||||
|
|
||||||
bound = (nf > nfp) ? nfp : nf;
|
|
||||||
printf("%d:\t", nf);
|
|
||||||
for (fn = 0; fn < bound; fn++)
|
|
||||||
printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
#define RNF 5 /* some table entries know this */
|
|
||||||
struct {
|
|
||||||
char *str;
|
|
||||||
char *seps;
|
|
||||||
int nf;
|
|
||||||
char *fi[RNF];
|
|
||||||
} tests[] = {
|
|
||||||
"", " ", 0, { "" },
|
|
||||||
" ", " ", 2, { "", "" },
|
|
||||||
"x", " ", 1, { "x" },
|
|
||||||
"xy", " ", 1, { "xy" },
|
|
||||||
"x y", " ", 2, { "x", "y" },
|
|
||||||
"abc def g ", " ", 5, { "abc", "def", "", "g", "" },
|
|
||||||
" a bcd", " ", 4, { "", "", "a", "bcd" },
|
|
||||||
"a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" },
|
|
||||||
" a b c d ", " ", 6, { "", "a", "b", "c", "d " },
|
|
||||||
|
|
||||||
"", " _", 0, { "" },
|
|
||||||
" ", " _", 2, { "", "" },
|
|
||||||
"x", " _", 1, { "x" },
|
|
||||||
"x y", " _", 2, { "x", "y" },
|
|
||||||
"ab _ cd", " _", 2, { "ab", "cd" },
|
|
||||||
" a_b c ", " _", 5, { "", "a", "b", "c", "" },
|
|
||||||
"a b c_d e f", " _", 6, { "a", "b", "c", "d", "e f" },
|
|
||||||
" a b c d ", " _", 6, { "", "a", "b", "c", "d " },
|
|
||||||
|
|
||||||
"", " _~", 0, { "" },
|
|
||||||
" ", " _~", 2, { "", "" },
|
|
||||||
"x", " _~", 1, { "x" },
|
|
||||||
"x y", " _~", 2, { "x", "y" },
|
|
||||||
"ab _~ cd", " _~", 2, { "ab", "cd" },
|
|
||||||
" a_b c~", " _~", 5, { "", "a", "b", "c", "" },
|
|
||||||
"a b_c d~e f", " _~", 6, { "a", "b", "c", "d", "e f" },
|
|
||||||
"~a b c d ", " _~", 6, { "", "a", "b", "c", "d " },
|
|
||||||
|
|
||||||
"", " _~-", 0, { "" },
|
|
||||||
" ", " _~-", 2, { "", "" },
|
|
||||||
"x", " _~-", 1, { "x" },
|
|
||||||
"x y", " _~-", 2, { "x", "y" },
|
|
||||||
"ab _~- cd", " _~-", 2, { "ab", "cd" },
|
|
||||||
" a_b c~", " _~-", 5, { "", "a", "b", "c", "" },
|
|
||||||
"a b_c-d~e f", " _~-", 6, { "a", "b", "c", "d", "e f" },
|
|
||||||
"~a-b c d ", " _~-", 6, { "", "a", "b", "c", "d " },
|
|
||||||
|
|
||||||
"", " ", 0, { "" },
|
|
||||||
" ", " ", 2, { "", "" },
|
|
||||||
"x", " ", 1, { "x" },
|
|
||||||
"xy", " ", 1, { "xy" },
|
|
||||||
"x y", " ", 2, { "x", "y" },
|
|
||||||
"abc def g ", " ", 4, { "abc", "def", "g", "" },
|
|
||||||
" a bcd", " ", 3, { "", "a", "bcd" },
|
|
||||||
"a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" },
|
|
||||||
" a b c d ", " ", 6, { "", "a", "b", "c", "d " },
|
|
||||||
|
|
||||||
"", "", 0, { "" },
|
|
||||||
" ", "", 0, { "" },
|
|
||||||
"x", "", 1, { "x" },
|
|
||||||
"xy", "", 1, { "xy" },
|
|
||||||
"x y", "", 2, { "x", "y" },
|
|
||||||
"abc def g ", "", 3, { "abc", "def", "g" },
|
|
||||||
"\t a bcd", "", 2, { "a", "bcd" },
|
|
||||||
" a \tb\t c ", "", 3, { "a", "b", "c" },
|
|
||||||
"a b c d e ", "", 5, { "a", "b", "c", "d", "e" },
|
|
||||||
"a b\tc d e f", "", 6, { "a", "b", "c", "d", "e f" },
|
|
||||||
" a b c d e f ", "", 6, { "a", "b", "c", "d", "e f " },
|
|
||||||
|
|
||||||
NULL, NULL, 0, { NULL },
|
|
||||||
};
|
|
||||||
|
|
||||||
regress()
|
|
||||||
{
|
|
||||||
char buf[512];
|
|
||||||
register int n;
|
|
||||||
char *fields[RNF+1];
|
|
||||||
register int nf;
|
|
||||||
register int i;
|
|
||||||
register int printit;
|
|
||||||
register char *f;
|
|
||||||
|
|
||||||
for (n = 0; tests[n].str != NULL; n++) {
|
|
||||||
(void) strcpy(buf, tests[n].str);
|
|
||||||
fields[RNF] = NULL;
|
|
||||||
nf = split(buf, fields, RNF, tests[n].seps);
|
|
||||||
printit = 0;
|
|
||||||
if (nf != tests[n].nf) {
|
|
||||||
printf("split `%s' by `%s' gave %d fields, not %d\n",
|
|
||||||
tests[n].str, tests[n].seps, nf, tests[n].nf);
|
|
||||||
printit = 1;
|
|
||||||
} else if (fields[RNF] != NULL) {
|
|
||||||
printf("split() went beyond array end\n");
|
|
||||||
printit = 1;
|
|
||||||
} else {
|
|
||||||
for (i = 0; i < nf && i < RNF; i++) {
|
|
||||||
f = fields[i];
|
|
||||||
if (f == NULL)
|
|
||||||
f = "(NULL)";
|
|
||||||
if (strcmp(f, tests[n].fi[i]) != 0) {
|
|
||||||
printf("split `%s' by `%s', field %d is `%s', not `%s'\n",
|
|
||||||
tests[n].str, tests[n].seps,
|
|
||||||
i, fields[i], tests[n].fi[i]);
|
|
||||||
printit = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (printit)
|
|
||||||
print(nf, RNF, fields);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
@@ -1,22 +0,0 @@
|
|||||||
/* utility definitions */
|
|
||||||
#ifdef _POSIX2_RE_DUP_MAX
|
|
||||||
#define DUPMAX _POSIX2_RE_DUP_MAX
|
|
||||||
#else
|
|
||||||
#define DUPMAX 255
|
|
||||||
#endif
|
|
||||||
#define INFINITY (DUPMAX + 1)
|
|
||||||
#define NC (CHAR_MAX - CHAR_MIN + 1)
|
|
||||||
typedef unsigned char uch;
|
|
||||||
|
|
||||||
/* switch off assertions (if not already off) if no REDEBUG */
|
|
||||||
#ifndef REDEBUG
|
|
||||||
#ifndef NDEBUG
|
|
||||||
#define NDEBUG /* no assertions please */
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#include <assert.h>
|
|
||||||
|
|
||||||
/* for old systems with bcopy() but no memmove() */
|
|
||||||
#ifdef USEBCOPY
|
|
||||||
#define memmove(d, s, c) bcopy(s, d, c)
|
|
||||||
#endif
|
|
Reference in New Issue
Block a user