summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2010-07-16 12:09:58 +0300
committerArnold D. Robbins <arnold@skeeve.com>2010-07-16 12:09:58 +0300
commitcae8bc6ced84c12590e3554a06a952283735363a (patch)
treeca4f38bfcb1312bfb62fc693564d68f3e9b3e973
parentdbd583bd2b8a6dd40c622875a4e197360cb5aba7 (diff)
downloadgawk-cae8bc6ced84c12590e3554a06a952283735363a.tar.gz
Move to 2.14.gawk-2.14
-rw-r--r--ACKNOWLEDGMENT12
-rw-r--r--FUTURES38
-rw-r--r--Makefile.bsd44alpha8
-rw-r--r--Makefile.in116
-rw-r--r--NEWS (renamed from CHANGES)98
-rw-r--r--PORTS8
-rw-r--r--POSIX95
-rw-r--r--PROBLEMS2
-rw-r--r--README115
-rw-r--r--README.VMS31
-rw-r--r--README.atari37
-rw-r--r--README.linux3
-rw-r--r--README.sun386i45
-rw-r--r--README.ultrix18
-rw-r--r--alloca.c10
-rw-r--r--array.c30
-rw-r--r--atari/Makefile.st187
-rw-r--r--atari/mkconf.g2
-rw-r--r--atari/system.c2
-rw-r--r--atari/tmpnam.c4
-rw-r--r--awk.h192
-rw-r--r--awk.y114
-rw-r--r--awktab.c (renamed from awk.tab.c)1327
-rw-r--r--builtin.c123
-rw-r--r--config.in270
-rw-r--r--config/atari2
-rw-r--r--config/bsd44alpha6
-rw-r--r--config/convex7
-rw-r--r--config/linux.h295
-rw-r--r--config/mach9
-rw-r--r--config/msdos2
-rw-r--r--config/next201
-rw-r--r--config/next216
-rw-r--r--config/osf13
-rw-r--r--config/sunos41-gnulibc0
-rw-r--r--config/v10config.h (renamed from config.h.in)38
-rw-r--r--config/vms-conf.h105
-rw-r--r--config/vms-posix11
-rwxr-xr-xconfigure4
-rw-r--r--dfa.c57
-rw-r--r--dfa.h32
-rw-r--r--eval.c115
-rw-r--r--field.c162
-rw-r--r--gawk.1102
-rw-r--r--gawk.texi (renamed from gawk.texinfo)2859
-rw-r--r--getopt.c662
-rw-r--r--getopt.h121
-rw-r--r--io.c162
-rw-r--r--iop.c59
-rw-r--r--main.c157
-rw-r--r--missing.c22
-rw-r--r--missing/getopt.c93
-rw-r--r--missing/random.c3
-rw-r--r--missing/strcase.c12
-rw-r--r--missing/strftime.378
-rw-r--r--missing/strftime.c250
-rw-r--r--missing/strtod.c2
-rw-r--r--missing/strtol.c120
-rw-r--r--missing/system.c25
-rw-r--r--missing/vprintf.c47
-rwxr-xr-xmkconf32
-rw-r--r--msg.c18
-rw-r--r--node.c49
-rw-r--r--patchlevel.h2
-rw-r--r--pc/config.h65
-rw-r--r--pc/make.bat48
-rw-r--r--protos.h10
-rw-r--r--re.c47
-rw-r--r--regex.c5160
-rw-r--r--regex.h702
-rw-r--r--support/makeinfo.patch233
-rw-r--r--support/texindex.c2
-rw-r--r--support/texinfo.tex364
-rw-r--r--test/Makefile133
-rw-r--r--test/anchgsub.awk1
-rw-r--r--test/anchgsub.good1
-rw-r--r--test/anchgsub.in1
-rw-r--r--test/argarray.awk11
-rw-r--r--test/argarray.good6
-rw-r--r--test/arrayref13
-rw-r--r--test/arrayref.good2
-rw-r--r--test/asgext.awk1
-rw-r--r--test/asgext.good6
-rw-r--r--test/asgext.in3
-rw-r--r--test/awkpath.good1
-rw-r--r--test/compare.awk13
-rw-r--r--test/compare.good5
-rw-r--r--test/compare.in4
-rw-r--r--test/csi1.out574
-rw-r--r--test/data9
-rw-r--r--test/fieldwidths.good1
-rw-r--r--test/fontdata.txt120
-rw-r--r--test/fsbs.good1
-rw-r--r--test/fsbs.in1
-rw-r--r--test/fsrs.awk8
-rw-r--r--test/fsrs.good5
-rw-r--r--test/fsrs.in7
-rw-r--r--test/fstabplus2
-rw-r--r--test/fstabplus.good1
-rw-r--r--test/getline.awk1
-rw-r--r--test/getline.good2
-rw-r--r--test/header.awk5
-rw-r--r--test/igncfs.awk8
-rw-r--r--test/igncfs.good2
-rw-r--r--test/igncfs.in2
-rw-r--r--test/ignorecase.good1
-rw-r--r--test/include.awk13
-rw-r--r--test/inftest.awk5
-rw-r--r--test/inftest.good105
-rw-r--r--test/lastnpages47
-rw-r--r--test/lib/awkpath.awk1
-rw-r--r--test/longwrds.awk20
-rw-r--r--test/longwrds.good21
-rw-r--r--test/manpage200
-rw-r--r--test/manyfiles.awk1
-rw-r--r--test/messages.awk9
-rw-r--r--test/negexp.good1
-rw-r--r--test/nfset.awk1
-rw-r--r--test/nfset.good5
-rw-r--r--test/nfset.in5
-rw-r--r--test/numfunc.awk19
-rw-r--r--test/out1.good1
-rw-r--r--test/out2.good2
-rw-r--r--test/out3.good1
-rw-r--r--test/plus-minus8
-rwxr-xr-xtest/posix69
-rw-r--r--test/posix.good16
-rwxr-xr-xtest/pound-bang3
-rw-r--r--test/pound-bang.good1
-rw-r--r--test/rand.awk6
-rw-r--r--test/reg/exp-eq.awk1
-rw-r--r--test/reg/exp-eq.good3
-rw-r--r--test/reg/exp-eq.in3
-rw-r--r--test/reg/exp.awk1
-rw-r--r--test/reg/exp.good2
-rw-r--r--test/reg/exp.in0
-rw-r--r--test/reg/func.awk1
-rw-r--r--test/reg/func.good1
-rw-r--r--test/reg/func.in0
-rw-r--r--test/reg/func2.awk2
-rw-r--r--test/reg/func2.good2
-rw-r--r--test/reg/func2.in0
-rw-r--r--test/reg/log.awk1
-rw-r--r--test/reg/log.good4
-rw-r--r--test/reg/log.in0
-rwxr-xr-xtest/regtest18
-rw-r--r--test/reparse.awk7
-rw-r--r--test/reparse.good3
-rw-r--r--test/reparse.in1
-rw-r--r--test/reverse.awk13
-rw-r--r--test/rs.data15
-rw-r--r--test/rs.good4
-rw-r--r--test/splitargv.awk7
-rw-r--r--test/splitargv.good7
-rw-r--r--test/splitargv.in7
-rw-r--r--test/sqrt.awk4
-rw-r--r--test/swaplns.awk7
-rw-r--r--test/swaplns.good9
-rw-r--r--test/up_down.awk15
-rw-r--r--test/zap_cpp.awk13
-rw-r--r--version.c4
-rw-r--r--vms/descrip.mms65
-rw-r--r--vms/gawk.hlp57
-rw-r--r--vms/unixlib.h5
-rw-r--r--vms/vms.h6
-rw-r--r--vms/vms_args.c46
-rw-r--r--vms/vms_cli.c3
-rw-r--r--vms/vms_fwrite.c6
-rw-r--r--vms/vms_gawk.c4
-rw-r--r--vms/vms_misc.c58
-rw-r--r--vms/vms_popen.c6
-rw-r--r--vms/vmsbuild.com44
172 files changed, 9204 insertions, 8010 deletions
diff --git a/ACKNOWLEDGMENT b/ACKNOWLEDGMENT
index 276c0133..b6c3b0b0 100644
--- a/ACKNOWLEDGMENT
+++ b/ACKNOWLEDGMENT
@@ -4,20 +4,18 @@ and fixes and suggestions. Unfortunately, we have not been organized
enough to keep track of all the names -- for that we apologize.
Another group of people have assisted even more by porting Gawk to new
-platforms and providing a great deal of feedback. They are (for
-2.12):
+platforms and providing a great deal of feedback. They are:
Hal Peterson <hrp@pecan.cray.com> (Cray)
Pat Rankin <gawk.rankin@EQL.Caltech.Edu> (VMS)
Michal Jaegermann <NTOMCZAK@vm.ucs.UAlberta.CA> (Atari, NeXT, DEC 3100)
Mike Lijewski <mjlx@eagle.cnsf.cornell.edu> (IBM RS6000)
+ Scott Deifik <scottd@amgen.com> (MSDOS 2.14)
+ Kent Williams (MSDOS 2.11)
+ Conrad Kwok (MSDOS earlier versions)
+ Scott Garfinkle (MSDOS earlier versions)
Last, but far from least, we would like to thank Brian Kernighan who
has helped to clear up many dark corners of the language and provided a
restraining touch when we have been overly tempted by "feeping
creaturism".
-
-Support for MSC 5.1 under MS-DOS was supplied for 2.11 by Kent
-Williams, who can be reached at williams@umaxc.weeg.uiowa.edu. It
-relies heavily on the earlier work done for 2.10 by Conrad Kwok and
-Scott Garfinkle.
diff --git a/FUTURES b/FUTURES
index 1c63dcd7..d119b6d0 100644
--- a/FUTURES
+++ b/FUTURES
@@ -28,7 +28,7 @@ Still to go in 2.13
4/17/91: DONE: Table driven built-in subroutine argument count checking
-Update regex.h flags for AWK and feed back to FSF
+6/1/92: DONE: Update regex.h flags for AWK and feed back to FSF
Feedback alloca.s changes to FSF
@@ -41,32 +41,38 @@ Feedback alloca.s changes to FSF
For 2.14
========
+6/18/92: DONE: "next file" keyword sequence
+
+10/28/91: DONE: Warn or fatal if identifier used as both variable and array
+
+12/22/91: DONE: Variables string or numeric or both
+
+In 2.15 or later
+================
+
more lint: check for use of builtin vars. only in new awk.
Extensible hashing and on-disk storage of awk arrays
"restart" keyword
-"nextfile" keyword
-
Add |&
-10/28/91: DONE: Warn or fatal if identifier used as both variable and array
-
Split() with null string as third arg to split up strings
+Analogously, setting FS="" would split the input record into individual
+characters.
+
Allow RS to be a regexp.
RECLEN variable for fixed length records
-Make awk '/foo/' files... run like egrep
+Make awk '/foo/' files... run at egrep speeds
-10/28/91: DONE: Extensive manual cleanup:
+Extensive manual cleanup:
Use of texinfo 2.0 features
- Variables string or numeric or both
-
Lots more examples
Add an error messages section to the manual
@@ -80,18 +86,20 @@ Do a reference card
? Have strftime() pay attention to the value of ENVIRON["TZ"]
-In 2.15 or later
-================
+A way to mix library files and command line strings for source code (-s?)
Allow OFMT to be other than a floating point format.
-SFIO based printf and other i/o
+? SFIO based printf and other i/o
Allow redefining of builtin functions?
+Incorporate newer dfa.c and regex.c
+
Make regex + dfa less dependant on gawk header file includes
-A general sub function edsub(line, pat, sub, global-flag)
+General sub functions edit(line, pat, sub) and gedit(line, pat, sub) that
+ return the substituted strings and allow \1 etc. in the sub string.
Add lint checking everywhere
@@ -104,3 +112,7 @@ Create a gawk compiler?
Do an optimization pass over parse tree?
Provide awk profiling and debugging.
+
+Clean up code by isolating system-specific functions in separate files.
+
+Move to autoconf-based configure system.
diff --git a/Makefile.bsd44alpha b/Makefile.bsd44alpha
new file mode 100644
index 00000000..269f14b8
--- /dev/null
+++ b/Makefile.bsd44alpha
@@ -0,0 +1,8 @@
+PROG= awk
+SRCS= main.c eval.c builtin.c msg.c iop.c io.c field.c array.c \
+ node.c version.c missing.c re.c awk.c regex.c dfa.c
+DPADD= ${LIBM}
+LDADD= -lm
+CFLAGS+= -g
+
+.include <bsd.prog.mk>
diff --git a/Makefile.in b/Makefile.in
index 19f008ec..58eb9394 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -1,6 +1,6 @@
# Makefile for GNU Awk.
#
-# Copyright (C) 1986, 1988-1991 the Free Software Foundation, Inc.
+# Copyright (C) 1986, 1988-1992 the Free Software Foundation, Inc.
#
# This file is part of GAWK, the GNU implementation of the
# AWK Progamming Language.
@@ -24,8 +24,8 @@
DESTDIR=
BINDIR= /usr/local/bin
-MANDIR= /usr/man/manl
-MANEXT= l
+MANDIR= /usr/local/man/man1
+MANEXT= 1
# CFLAGS: options to the C compiler
#
@@ -34,18 +34,19 @@ MANEXT= l
# -pg include new (gmon) profiling info
#
# The provided "configure" is used to turn a config file (samples in
-# the "config" directory into commands to edit config.h.in into
+# the "config" directory into commands to edit config.in into
# a suitable config.h and to edit Makefile.in into Makefile.
# To port GAWK, create an appropriate config file using the ones in
-# the config directory as examples and using the comments in config.h.in
+# the config directory as examples and using the comments in config.in
# as a guide.
#
-CC= cc
+CC= gcc
+##MAKE_CC## CC = cc
-OPTIMIZE= -g #-O -fstrength-reduce
+OPTIMIZE= -g -O
PROFILE= #-pg
-DEBUG= #-DMALLOCDEBUG #-DMEMDEBUG #-DDEBUG #-DFUNC_TRACE #-DMPROF
+DEBUG= #-DMALLOCDEBUG #-DDEBUG #-DFUNC_TRACE #-DMPROF
LINKSTATIC= #-Bstatic
WARN= #-W -Wunused -Wimplicit -Wreturn-type -Wcomment # for gcc only
@@ -60,7 +61,6 @@ LIBS =
# Cray 2 running Unicos 5.0.7
##MAKE_LIBNET## LIBS = -lnet
-##MAKE_NeXT## FLAGS = -DGFMT_WORKAROUND
# Systems with alloca in /lib/libPW.a
##MAKE_ALLOCA_PW## LIBS = -lPW
@@ -78,17 +78,24 @@ LIBS =
FLAGS=
##MAKE_RS6000## FLAGS = -qchars=signed
+# VMS POSIX, VAXC V3.2
+##MAKE_VMS-Posix## FLAGS = -UVMS -D__STDC__=0
+
# HP/Apollo running cc version 6.7 or earlier
##MAKE_Apollo## FLAGS = -U__STDC__ -A run,sys5.3
##MAKE_Apollo## LIBS = -A sys,any
-CFLAGS= $(FLAGS) $(DEBUG) $(LINKSTATIC) $(PROFILE) $(OPTIMIZE) $(WARN)
+# Use -s -Xlinker -object flags when you are satisfied that
+# that the program compiles correctly
+##MAKE_NeXT## FLAGS = -DGFMT_WORKAROUND #-s -Xlinker -object
+
+CFLAGS= -DGAWK $(FLAGS) $(DEBUG) $(LINKSTATIC) $(PROFILE) $(OPTIMIZE) $(WARN)
# object files
AWKOBJS = main.o eval.o builtin.o msg.o iop.o io.o field.o array.o \
- node.o version.o missing.o re.o
+ node.o version.o missing.o re.o getopt.o
-ALLOBJS = $(AWKOBJS) awk.tab.o
+ALLOBJS = $(AWKOBJS) awktab.o
# GNUOBJS
# GNU stuff that gawk uses as library routines.
@@ -96,30 +103,29 @@ GNUOBJS= regex.o dfa.o $(ALLOCA)
# source and documentation files
SRC = main.c eval.c builtin.c msg.c version.c \
- iop.c io.c field.c array.c node.c missing.c re.c
+ iop.c io.c field.c array.c node.c missing.c re.c getopt.c
-ALLSRC= $(SRC) awk.tab.c
+ALLSRC= $(SRC) awktab.c
-AWKSRC= awk.h awk.y $(ALLSRC) patchlevel.h protos.h config.h.in
+AWKSRC= awk.h awk.y $(ALLSRC) patchlevel.h protos.h config.in getopt.h
GNUSRC = alloca.c alloca.s dfa.c dfa.h regex.c regex.h
-COPIES = missing/getopt.c missing/system.c missing/tzset.c \
+COPIES = missing/system.c missing/tzset.c \
missing/memcmp.c missing/memcpy.c missing/memset.c \
missing/random.c missing/strcase.c missing/strchr.c \
- missing/strerror.c missing/strtod.c missing/vprintf.c \
- missing/strftime.c missing/strftime.3 missing/strtol.c
+ missing/strerror.c missing/strtod.c \
+ missing/strftime.c missing/strftime.3
SUPPORT = support/texindex.c support/texinfo.tex
-DOCS= gawk.1 gawk.texinfo
+DOCS= gawk.1 gawk.texi
-INFOFILES= gawk-info gawk-info-1 gawk-info-2 gawk-info-3 gawk-info-4 \
- gawk-info-5 gawk-info-6 gawk.aux gawk.cp gawk.cps gawk.fn \
- gawk.fns gawk.ky gawk.kys gawk.pg gawk.pgs gawk.toc \
- gawk.tp gawk.tps gawk.vr gawk.vrs
+INFOFILES= gawk.info gawk.info-* \
+ gawk.aux gawk.cp gawk.cps gawk.fn gawk.fns gawk.ky gawk.kys \
+ gawk.pg gawk.pgs gawk.toc gawk.tp gawk.tps gawk.vr gawk.vrs
-MISC = CHANGES COPYING FUTURES Makefile.in PROBLEMS README* PORTS mkconf \
+MISC = NEWS COPYING FUTURES Makefile.* PROBLEMS README* PORTS POSIX \
mungeconf configure ACKNOWLEDGMENT LIMITATIONS
OTHERS= pc/* atari/* vms/*
@@ -130,32 +136,29 @@ ALLFILES= $(AWKSRC) $(GNUSRC) $(COPIES) $(MISC) $(DOCS) $(ALLDOC) $(OTHERS) \
$(SUPPORT)
# Release of gawk. There can be no leading or trailing white space here!
-REL=2.13
+REL=2.14
# rules to build gawk
gawk: $(ALLOBJS) $(GNUOBJS) $(REOBJS)
$(CC) -o gawk $(CFLAGS) $(ALLOBJS) $(GNUOBJS) $(REOBJS) -lm $(LIBS)
-$(AWKOBJS): awk.h config.h
-
-dfa.o: awk.h config.h dfa.h
+$(AWKOBJS) regex.o dfa.o: awk.h dfa.h regex.h
-regex.o: config.h regex.h
- $(CC) $(CFLAGS) -DREGEX_MALLOC -DGAWK -c regex.c
+getopt.o: getopt.h
main.o: patchlevel.h
-awk.tab.o: awk.h awk.tab.c
-
-awk.tab.c: awk.y
+awktab.c: awk.y
$(PARSER) -v awk.y
- sed '/^extern char .malloc(), .realloc();$$/d' y.tab.c >awk.tab.c
+##MAKE_VMS-Posix## mv ytab.c awktab.c
+##MAKE_VMS-Posix## dummy.awk_tab.target:
+ sed '/^extern char .malloc(), .realloc();$$/d' y.tab.c >awktab.c
rm y.tab.c
-config.h: config.h.in
+config.h: config.in
@echo You must provide a config.h!
@echo Run \"./configure\" to build it for known systems
- @echo or copy config.h.in to config.h and edit it.; exit 1
+ @echo or copy config.in to config.h and edit it.; exit 1
install: gawk
install -s gawk $(DESTDIR)$(BINDIR)
@@ -186,28 +189,29 @@ xref:
cxref -c $(FLAGS) $(ALLSRC) | grep -v ' /' >xref
clean:
- rm -f *.o core awk.output gmon.out make.out y.output
+ rm -rf *.o core awk.output gmon.out make.out y.output \
+ *.orig *.rej */*.orig */*.rej
cleaner: clean
- rm -f gawk awk.tab.c
+ rm -f gawk awktab.c
clobber: clean
rm -f $(ALLDOC) gawk.log
-gawk.dvi: gawk.texinfo
- tex gawk.texinfo ; texindex gawk.??
- tex gawk.texinfo ; texindex gawk.??
- tex gawk.texinfo
+gawk.dvi: gawk.texi
+ tex gawk.texi; texindex gawk.??
+ tex gawk.texi; texindex gawk.??
+ tex gawk.texi
-$(INFOFILES): gawk.texinfo
- makeinfo gawk.texinfo
+gawk.info: gawk.texi
+ makeinfo gawk.texi
gawk-test-$(REL).tar.Z::
-rm -f gawk-test-$(REL).tar.Z
tar -cf - test | compress >gawk-test-$(REL).tar.Z
-dist: $(AWKSRC) $(GNUSRC) $(DOCS) $(MISC) $(COPIES) $(SUPPORT)
- configure msdos
+dist: $(AWKSRC) $(GNUSRC) $(DOCS) $(MISC) $(COPIES) $(SUPPORT) clean
+ ./configure msdos
mv config.h pc
-rm -rf gawk-$(REL) gawk-$(REL).*.tar.Z
-mkdir gawk-$(REL)
@@ -224,17 +228,17 @@ dist: $(AWKSRC) $(GNUSRC) $(DOCS) $(MISC) $(COPIES) $(SUPPORT)
cp -p config/* gawk-$(REL)/config
-mkdir gawk-$(REL)/support
cp -p support/* gawk-$(REL)/support
- ln -s ../test gawk-$(REL)
- tar -cfh - gawk-$(REL) | compress > gawk-$(REL).`gawk '{print $$3}' patchlevel.h`.tar.Z
+ tar -cf - test | (cd gawk-$(REL); tar xpf - )
+ tar -cf - gawk-$(REL) | compress > gawk-$(REL).`gawk '{print $$3}' patchlevel.h`.tar.Z
-gawk-doc-$(REL).tar.Z: $(ALLDOC)
+gawk-doc-$(REL).tar.Z: gawk.info gawk.dvi gawk.1
-rm -rf gawk-doc-$(REL) gawk-doc-$(REL).tar.Z
-mkdir gawk-doc-$(REL)
cp -p $(INFOFILES) gawk.dvi gawk-doc-$(REL)
nroff -man gawk.1 > gawk-doc-$(REL)/gawk.1.pr
- tar -cf - gawk-$(REL)-doc | compress > gawk-doc-$(REL).tar.Z
+ tar -cf - gawk-doc-$(REL) | compress > gawk-doc-$(REL).tar.Z
-gawk-ps-$(REL).tar.Z: gawk-ps-$(REL).tar.Z
+gawk-ps-$(REL).tar.Z: gawk.dvi gawk.1
-rm -rf gawk-ps-$(REL) gawk-ps-$(REL).tar.Z
-mkdir gawk-ps-$(REL)
dvips -o !cat gawk.dvi > gawk-ps-$(REL)/gawk.postscript
@@ -244,13 +248,5 @@ gawk-ps-$(REL).tar.Z: gawk-ps-$(REL).tar.Z
release: gawk-src-$(REL).tar.Z gawk-doc-$(REL).tar.Z gawk-ps-$(REL).tar.Z \
gawk-test-$(REL).tar.Z
-diff:
- for i in RCS/*; do rcsdiff -c -b $$i > `basename $$i ,v`.diff; done
-
-test::
- make gawk
+test: gawk
cd test; make -k
-
-bigtest::
- make gawk
- cd test; make -k bigtest
diff --git a/CHANGES b/NEWS
index fa862323..191de0bf 100644
--- a/CHANGES
+++ b/NEWS
@@ -1,19 +1,77 @@
-Changes from 2.13.2 to 2.13.3
------------------------------
+Changes from 2.13.2 to 2.14
+---------------------------
Updated manual!
+Added "next file" to skip efficiently to the next input file.
+
+Fixed potential of overflowing buffer in do_sprintf().
+
+Plugged small memory leak in sub_common().
+
+EOF on a redirect is now "sticky" -- it can only be cleared by close()ing
+ the pipe or file.
+
+Now works if used via a #! /bin/gawk line at the top of an executable file
+ when that line ends with whitespace.
+
+Added some checks to the grammar to catch redefinition of builtin functions.
+ This could eventually be the basis for an extension to allow redefining
+ functions, but in the mean time it's a good error catching facility.
+
+Negative integer exponents now work.
+
+Modified do_system() to make sure it had a non-null string to be passed
+ to system(3). Thus, system("") will flush any pending output but not go
+ through the overhead of forking an un-needed shell.
+
+A fix to floating point comparisons so that NaNs compare right on IEEE systems.
+
+Added code to make sure we're not opening directories for reading and such.
+
+Added code to do better diagnoses of weird or null file names.
+
+Allow continue outside of a loop, unless in strict posix mode. Lint option
+ will issue warning.
+
+New missing/strftime.c. There has been one chage that affects gawk. Posix
+ now defines a %V conversion so the vms conversion has been changed to %v.
+ If this version is used with gawk -Wlint and they use %V in a call to
+ strftime, they'll get a warning.
+
Error messages now conform to GNU standard (I hope).
+Changed comparisons to conform to the description found in the file POSIX.
+ This is inconsistent with the current POSIX draft, but that is broken.
+ Hopefully the final POSIX standard will conform to this version.
+ (Alas, this will have to wait for 1003.2b, which will be a revision to
+ the 1003.2 standard. That standard has been frozen with the broken
+ comparison rules.)
+
The length of a string was a short and now is a size_t.
Updated VMS help.
-Added a few new tests to the test suite.
+Added quite a few new tests to the test suite and deleted many due to lack of
+ written releases. Test output is only removed if it is identical to the
+ "good" output.
+
+Fixed a couple of bugs for reference to $0 when $0 is "" -- particularly in
+ a BEGIN block.
+
+Fixed premature freeing in construct "$0 = $0".
+
+Removed the call to wait_any() in gawk_popen(), since on at least some systems,
+ if gawk's input was from a pipe, the predecssor process in the pipe was a
+ child of gawk and this caused a deadlock.
Regexp can (once again) match a newline, if given explicitly.
-Fixed VMS pipe simulation.
+nextopen() makes sure file name is null terminated.
+
+Fixed VMS pipe simulation. Improved VMS I/O performance.
+
+Catch . used in variable names.
Fixed bug in getline without redirect from a file -- it was quitting after the
first EOF, rather than trying the next file.
@@ -25,12 +83,19 @@ Fixed bug in treatment of backslash at the end of a string -- it was bombing
Moved setting of regexp syntax to before the option parsing in main(), to
handle things like -v FS='[.,;]'
-Fixed bug when NF is set by user -- fields_arr must be expanded if necessary.
+Fixed bug when NF is set by user -- fields_arr must be expanded if necessary
+ and "new" fields must be initialized.
-Fixed bug for [g]sub() where no match found and for zero-length string.
+Fixed several bugs in [g]sub() for no match found or the match is 0-length.
+
+Fixed bug where in gsub() a pattern anchorred at the beginning would still
+ substitute throughout the string.
make test does not assume the . is in PATH.
+Fixed bug when a field beyond the end of the record was requested after
+ $0 was altered (directly or indirectly).
+
Fixed bug for assignment to field beyond end of record -- the assigned value
was not found on subsequent reference to that field.
@@ -51,8 +116,29 @@ Fixed problem with x += x
Use of scalar as array and vice versa is now detected.
+IGNORECASE now obeyed for FS (even if FS is a single alphabetic character).
+
Switch to GPL version 2.
+Renamed awk.tab.c to awktab.c for MSDOS and VMS tar programs.
+
+Renamed this file (CHANGES) to NEWS.
+
+Use fmod() instead of modf() and provide FMOD_MISSING #define to undo
+ this change.
+
+Correct the volatile declarations in eval.c.
+
+Avoid errant closing of the file descriptors for stdin, stdout and stderr.
+
+Be more flexible about where semi-colons can occur in programs.
+
+Check for write errors on all output, not just on close().
+
+Eliminate the need for missing/{strtol.c,vprintf.c}.
+
+Use GNU getopt and eliminate missing/getopt.c.
+
More "lint" checking.
diff --git a/PORTS b/PORTS
index b7d1123f..bd2678dd 100644
--- a/PORTS
+++ b/PORTS
@@ -1,4 +1,4 @@
-This version of gawk has been successfully compiled and run "make test"
+A recent version of gawk has been successfully compiled and run "make test"
on the following:
Sun 4/490 running 4.1
@@ -12,6 +12,8 @@ SGI running IRIX 3.3 using gcc (fails with cc)
Sequent Balance running Dynix V3.1
Cray Y-MP8 running Unicos 6.0.11
Cray 2 running Unicos 6.1 (modulo trailing zeroes in chem)
-VMS 5.x (should also work on 4.6 and 4.7)
+VAX/VMS V5.x (should also work on 4.6 and 4.7)
+VMS POSIX V1.0, V1.1
+OpenVMS AXP V1.0
MSDOS - Microsoft C 5.1, compiles and runs very simple testing
-CLOSE: 4.3reno
+BSD 4.4alpha
diff --git a/POSIX b/POSIX
new file mode 100644
index 00000000..f2405420
--- /dev/null
+++ b/POSIX
@@ -0,0 +1,95 @@
+Right now, the numeric vs. string comparisons are screwed up in draft
+11.2. What prompted me to check it out was the note in gnu.bug.utils
+which observed that gawk was doing the comparison $1 == "000"
+numerically. I think that we can agree that intuitively, this should
+be done as a string comparison. Version 2.13.2 of gawk follows the
+current POSIX draft. Following is how I (now) think this
+stuff should be done.
+
+1. A numeric literal or the result of a numeric operation has the NUMERIC
+ attribute.
+
+2. A string literal or the result of a string operation has the STRING
+ attribute.
+
+3. Fields, getline input, FILENAME, ARGV elements, ENVIRON elements and the
+ elements of an array created by split() that are numeric strings
+ have the STRNUM attribute. Otherwise, they have the STRING attribute.
+ Uninitialized variables also have the STRNUM attribute.
+
+4. Attributes propagate across assignments, but are not changed by
+ any use. (Although a use may cause the entity to acquire an additional
+ value such that it has both a numeric and string value -- this leaves the
+ attribute unchanged.)
+
+When two operands are compared, either string comparison or numeric comparison
+may be used, depending on the attributes of the operands, according to the
+following (symmetric) matrix:
+
+ +----------------------------------------------
+ | STRING NUMERIC STRNUM
+--------+----------------------------------------------
+ |
+STRING | string string string
+ |
+NUMERIC | string numeric numeric
+ |
+STRNUM | string numeric numeric
+--------+----------------------------------------------
+
+So, the following program should print all OKs.
+
+echo '0e2 0a 0 0b
+0e2 0a 0 0b' |
+$AWK '
+NR == 1 {
+ num = 0
+ str = "0e2"
+
+ print ++test ": " ( (str == "0e2") ? "OK" : "OOPS" )
+ print ++test ": " ( ("0e2" != 0) ? "OK" : "OOPS" )
+ print ++test ": " ( ("0" != $2) ? "OK" : "OOPS" )
+ print ++test ": " ( ("0e2" == $1) ? "OK" : "OOPS" )
+
+ print ++test ": " ( (0 == "0") ? "OK" : "OOPS" )
+ print ++test ": " ( (0 == num) ? "OK" : "OOPS" )
+ print ++test ": " ( (0 != $2) ? "OK" : "OOPS" )
+ print ++test ": " ( (0 == $1) ? "OK" : "OOPS" )
+
+ print ++test ": " ( ($1 != "0") ? "OK" : "OOPS" )
+ print ++test ": " ( ($1 == num) ? "OK" : "OOPS" )
+ print ++test ": " ( ($2 != 0) ? "OK" : "OOPS" )
+ print ++test ": " ( ($2 != $1) ? "OK" : "OOPS" )
+ print ++test ": " ( ($3 == 0) ? "OK" : "OOPS" )
+ print ++test ": " ( ($3 == $1) ? "OK" : "OOPS" )
+ print ++test ": " ( ($2 != $4) ? "OK" : "OOPS" ) # 15
+}
+{
+ a = "+2"
+ b = 2
+ if (NR % 2)
+ c = a + b
+ print ++test ": " ( (a != b) ? "OK" : "OOPS" ) # 16 and 22
+
+ d = "2a"
+ b = 2
+ if (NR % 2)
+ c = d + b
+ print ++test ": " ( (d != b) ? "OK" : "OOPS" )
+
+ print ++test ": " ( (d + 0 == b) ? "OK" : "OOPS" )
+
+ e = "2"
+ print ++test ": " ( (e == b "") ? "OK" : "OOPS" )
+
+ a = "2.13"
+ print ++test ": " ( (a == 2.13) ? "OK" : "OOPS" )
+
+ a = "2.130000"
+ print ++test ": " ( (a != 2.13) ? "OK" : "OOPS" )
+
+ if (NR == 2) {
+ CONVFMT = "%.6f"
+ print ++test ": " ( (a == 2.13) ? "OK" : "OOPS" )
+ }
+}'
diff --git a/PROBLEMS b/PROBLEMS
index de1cf33f..0f9a620d 100644
--- a/PROBLEMS
+++ b/PROBLEMS
@@ -1,4 +1,4 @@
-This is a list of known problems in gawk 2.13.
+This is a list of known problems in gawk 2.14.
Hopefully they will all be fixed in the next major release of gawk.
Please keep in mind that the code is still undergoing significant evolution.
diff --git a/README b/README
index 8277acc2..5161a00a 100644
--- a/README
+++ b/README
@@ -1,68 +1,36 @@
README:
-This is GNU Awk 2.13. It should be upwardly compatible with the
-System V Release 4 awk. It is almost completely compliant with draft 11
+This is GNU Awk 2.14. It should be upwardly compatible with the
+System V Release 4 awk. It is almost completely compliant with draft 11.3
of POSIX 1003.2.
-This release is essentially a bug fix and tuning release.
+This release is essentially a bug fix release.
See the installation instructions, below.
Known problems are given in the PROBLEMS file. Work to be done is
described briefly in the FUTURES file. Verified ports are listed in
-the PORTS file. Please read the LIMITATIONS and ACKNOWLEDGMENT files.
+the PORTS file. Changes in this version are summarized in the CHANGES file.
+Please read the LIMITATIONS and ACKNOWLEDGMENT files.
-To format the documentation, with TeX, you must use texinfo.tex 2.53
+Read the file POSIX for a discussion of how the standard says comparisons
+should be done vs. how they really should be done and how gawk does them.
+
+To format the documentation with TeX, you must use texinfo.tex 2.53
or later. Otherwise footnotes look unacceptable.
-If you wish to use remake the Info files, you should use makeinfo. We
-used makeinfo 2.10. Note that this version of makeinfo requires a patch,
-which is supplied in support/makeinfo.patch. We don't know if this patch
-will make it into the next release of makeinfo or not.
+If you wish to remake the Info files, you should use makeinfo. The 2.15
+version of makeinfo works with no errors.
The man page is up to date.
-If you do not have nroff or troff, you can use `awf' included in
-the test suite to format the manual page with only a few small problems.
-
-Summary of Changes from 2.11.1
-
-Configuration is via a config file which is used by the "configure" script
-to create Makefile and config.h. Sample configuration files for various systems
-are included in the config directory.
-
-Non-POSIX options are arguments to -W; all non-compliant options are
-deprecated.
-
-New option "-W lint" to do extra checking. The coverage
-will expand a bit in future releases.
-
-Numeric to string conversion is done via the builtin variable CONVFMT
-rather than OFMT, in conformance with the POSIX draft standard. It is
-initialized with the same value as OFMT, so the vast majority of programs
-should see no change in behaviour.
-
-Awk program source no longer has any line length limits.
-
-New builtin functions systime() and strftime() provided.
-
-Error messages improved.
-
-FIELDWIDTHS variable gives a space-separated list of numbers specifying the
-widths of input fields, to accomodate fixed-format input.
-
-Numerous bug fixes and portability improvements.
-
-Performance is about 50% better than 2.11.1, although the improvements
-are uneven.
-
-The code has been extensively tested with test coverage monitored.
-A test suite is now included. This will be expanded in future releases.
INSTALLATION:
-The Makefile may need some tailoring. The only changes necessary should
+Check whether there is a system-specific README file for your system.
+
+Makefile.in may need some tailoring. The only changes necessary should
be to change installation targets or to change compiler flags.
-The changes to make in the Makefile are commented and should be obvious.
+The changes to make in Makefile.in are commented and should be obvious.
All other changes should be made in a config file. Samples for
various systems are included in the config directory. Starting with
@@ -72,7 +40,7 @@ standard conforming systems. We have included substitute versions of
routines not universally available. Simply add the appropriate define
for the missing feature(s) on your system.
-If you have neither bison nor yacc, use the awk.tab.c file here. It was
+If you have neither bison nor yacc, use the awktab.c file here. It was
generated with bison, and should have no AT&T code in it. (Note that
modifying awk.y without bison or yacc will be difficult, at best. You might
want to get a copy of bison from the FSF too.)
@@ -81,22 +49,28 @@ If no config file is included for your system, start by copying one
for a similar system. One way of determining the defines needed is to
try to load gawk with nothing defined and see what routines are
unresolved by the loader. This should give you a good idea of how to
-proceed. We would like to receive a copy of any new config files.
+proceed.
+
+The next release will use the FSF autoconfig program, so we are no longer
+soliciting new config files.
If you have an MS-DOS system, use the stuff in the pc directory.
-For an Atari theere is an atari directory and similarly one for VMS.
+For an Atari there is an atari directory and similarly one for VMS.
+
+Chapter 16 of The GAWK Manual discusses configuration in detail.
-After successful compilation, do 'make test' to run a small test suite.
-There should be no output from the 'cmp' invocations. If there is, please
-investigate and report the problem. More extensive testing can be invoked
-with 'make bigtest'. There are many interesting programs in the test suite!
+After successful compilation, do 'make test' to run a small test
+suite. There should be no output from the 'cmp' invocations except in
+the cases where there are small differences in floating point values.
+If there are other differences, please investigate and report the
+problem. More extensive testing can be invoked with 'make bigtest'.
PRINTING THE MANUAL
-The 'support' directory contains texinfo.tex 2.53, which will be necessary
-for printing the manual, and the texindex.c program from the emacs distribution
-which is also necessary. See the makefile for the steps needed to get a
-DVI file from the manual.
+The 'support' directory contains texinfo.tex 2.65, which will be necessary
+for printing the manual, and the texindex.c program from the texinfo
+distribution which is also necessary. See the makefile for the steps needed
+to get a DVI file from the manual.
CAVEATS
@@ -104,7 +78,7 @@ The existence of a patchlevel.h file does *N*O*T* imply a commitment on
our part to issue bug fixes or patches. It is there in case we should
decide to do so.
-BUG REPORTS AND FIXES:
+BUG REPORTS AND FIXES (Un*x systems):
Please coordinate changes through David Trueman and/or Arnold Robbins.
@@ -112,8 +86,8 @@ David Trueman
Department of Mathematics, Statistics and Computing Science,
Dalhousie University, Halifax, Nova Scotia, Canada
-UUCP {uunet utai watmath}!dalcs!david
-INTERNET david@cs.dal.ca
+UUCP: {uunet utai watmath}!dalcs!david
+INTERNET: david@cs.dal.ca
Arnold Robbins
1736 Reindeer Drive
@@ -121,3 +95,22 @@ Atlanta, GA, 30329, USA
INTERNET: arnold@skeeve.atl.ga.us
UUCP: { gatech, emory, emoryu1 }!skeeve!arnold
+
+BUG REPORTS AND FIXES (non-Unix ports):
+
+MS-DOS:
+ Scott Deifik
+ AMGEN Inc.
+ Amgen Center, Bldg.17-Dept.393
+ Thousand Oaks, CA 91320-1789
+ Tel-805-499-5725 ext.4677
+ Fax-805-498-0358
+ scottd@amgen.com
+
+VMS:
+ Pat Rankin
+ rankin@eql.caltech.edu (e-mail only)
+
+Atari ST:
+ Michal Jaegermann
+ NTOMCZAK@vm.ucs.UAlberta.CA (e-mail only)
diff --git a/README.VMS b/README.VMS
index bbe9fa43..da5dc2d4 100644
--- a/README.VMS
+++ b/README.VMS
@@ -19,10 +19,10 @@ VAX C V2.x -- (version 2.3 or 2.4; older ones won't work); edit either
GNU C -- edit vmsbuild.com or descrip.mms; the changes are different
from those for VAX C V2.x, but equally straightforward. No
changes to config.h should be needed.
+DEC C -- edit vmsbuild.com or descrip.mms according to their comments.
- Tested under VMS V5.3 and V5.4-2 using VAX C V3.2, V3.1, and V2.3
-and also GNU C V1.39. Should work without modifications for VMS V4.6
-and up.
+ Tested under VAX/VMS V5.5-1 using VAX C V3.2, GNU C 1.40 and 2.3.
+Should work without modifications for VMS V4.6 and up.
Installing GAWK on VMS:
@@ -81,3 +81,28 @@ separated list of directory specifications. When defining it, the
value should be quoted so that it retains a single translation, not a
multi-translation RMS searchlist.
+
+Building and using GAWK under VMS POSIX:
+
+ Ignore the instructions above, although vms/gawk.hlp should still
+be made available in a help library. Make sure that the two scripts,
+'configure' and 'mungeconf', are executable; use `chmod +x' on them if
+necessary. Then execute the following two commands:
+ |psx> configure vms-posix
+ |psx> make awktab.c gawk
+The first command will construct files "config.h" and "Makefile" out of
+templates. The second command will compile and link 'gawk'. Due to
+a 'make' bug in VMS POSIX 1.0 and V1.1, the file "awktab.c" must be
+given as an explicit target or it will not be built and the final link
+step will fail. Ignore the warning "Could not find lib m in lib list";
+it is harmless, caused by the Makefile's explicit use of -lm as a linker
+option which is not needed under VMS POSIX. Under V1.1 (but not V1.0)
+a problem with the yacc skeleton /etc/yyparse.c will cause a compiler
+warning for awktab.c, followed by a linker warning about compilation
+warnings in the resulting object module. These warnings can be ignored.
+
+ Another 'make' bug interferes with exercising various components
+of the test suite, but all the actual tests should execute correctly.
+(The main exception being book/wordfreq, which gives different results
+due to VMS POSIX 'sort' rather than to 'gawk'.)
+
diff --git a/README.atari b/README.atari
new file mode 100644
index 00000000..5185afcd
--- /dev/null
+++ b/README.atari
@@ -0,0 +1,37 @@
+Gawk on Atari has been compiled and tested using gcc compiler
+(versions 1.4 and 2.2.2) both with and without -mshort flag. Other
+compilers can be used but if sizeof(pointer) != sizeof(int) this
+code will not compile correctly with non-ANSI compiler (prototypes
+and library).
+
+Compiled executables were tested and with minor modifications, due
+to differences in environment and/or shell, succesfully completed
+at least the following tests:
+awf, pearls, resub, chem, swaplns, delete, messages, argarray,
+longwrds, getline, inftest, spiece2top, fstabplus, compare, arrayref,
+rs, fsrs, rand, fsbs, negexp, ugh2, asgext, anchgsub, splitargv,
+fieldwidths, ignorecase, posix, manyfiles, igncfs, lisp, regtest,
+awkpath, reparse, nfset.
+
+Nearly all of these test do not require any adjustments to run, but a
+modified test suite with a driving Makefile (for gulam) is available
+on a request from Michal Jaegermann, ntomczak@vm.ucs.ualberta.ca,
+via e-mail.
+
+TOS and MiNT
+-----------
+Setup for Atari assumes that gawk will be used under TOS. In
+particular atari/system.c source is for a system function which nicely
+cooperates with gulam shell and pipes are simulated with temporary
+files. If you plan to run gawk under MiNT then you likely want a
+different system function and real pipes. For that purpose do not
+define SYSTEM_MISSING in a configuration file and do not define
+PIPES_SIMULATED in io.c file. Please note that in the later case gawk
+calls, in gawk_popen(), a function execl() with a hard-coded name of
+"/bin/sh". You want likely to change that to get a name and some
+arguments from an environment variable. This was not done here in
+order to avoid changes which may prove troublesome in a general
+distribution. Subdirectory ./atari contains also a file textrd.c with
+a bug fix for old versions of gcc libraries. This bug is currently
+fixed and the file is not used, but it is left as a convenience for
+those who may not updated yet their libraries.
diff --git a/README.linux b/README.linux
new file mode 100644
index 00000000..813fc623
--- /dev/null
+++ b/README.linux
@@ -0,0 +1,3 @@
+A proper config file was not submitted for linux, so just copy
+config/linux.h to config.h and make the small changes noted in
+the comment at the top of that file.
diff --git a/README.sun386i b/README.sun386i
new file mode 100644
index 00000000..e9daf26c
--- /dev/null
+++ b/README.sun386i
@@ -0,0 +1,45 @@
+Date: Mon, 16 Mar 1992 14:49:10 -0400
+From: <beebe@math.utah.edu>
+To: david@cs.dal.ca, arnold@skeeve.atl.ga.us
+
+...
+
+On the Sun 386i, floating-point numbers are printed without a leading
+zero digit. This causes the cmp step to fail for the chem target. I
+revised the Makefile, and at the same time, inserted an RM macro in
+place of the many rm calls. The complete updated Makefile is appended
+below. This change should be applied to all systems to avoid the
+leading-zero problem in the future.
+
+On the Sun 386i, my initial "make sunos40" resulted in a load failure
+with
+
+>> Undefined:
+>> strncasecmp
+>> strftime
+
+I therefore modified the Makefile to read
+
+OPTIMIZE= -g -O -fstrength-reduce -DSTRFTIME_MISSING -DSTRCASE_MISSING
+
+and loading got further, but still failed:
+
+>> tzset: ld: /lib/libc.a(localtime.o): multiply defined
+>> *** Error code 1
+>> make: Fatal error: Command failed for target `gawk'
+>> Current working directory /home/share/gnu/src/gawk-2.13
+>> *** Error code 1
+>> make: Fatal error: Command failed for target `test'
+
+Investigation with nm showed that localtime() is only referenced by
+builtin.o, so I simply did
+
+ chmod +x gawk
+ make test bigtest
+
+and was able to complete the test after the leading-zero digit fix
+was applied to the Makefile.
+
+The Sun 386i is not receiving further development by Sun, and SunOS
+4.0.3 is the last O/S release for it, so perhaps you don't want to do
+anything other than note the problem in the README file.
diff --git a/README.ultrix b/README.ultrix
index 8a0ce552..b2e5d840 100644
--- a/README.ultrix
+++ b/README.ultrix
@@ -3,3 +3,21 @@ regex.c' is causing an infinite loop in an optimizer. Other sources
compile fine with -O flag. If you are going to use this flag either
add a special rule to Makefile for a compilation of regex.c, or issue
'cc -c regex.c' before hitting 'make'.
+
+From: Steve Simmons <scs@wotan.iti.org>
+Subject: Non-bug report on gawk 2.13.2
+To: david@cs.dal.ca, arnold@skeeve.atl.ga.us
+Date: Thu, 25 Jul 1991 13:45:38 -0300
+
+Just fyi -- it passes tests with flying colors under Ultrix 4.2. The
+README.ultrix file applies more than ever. You might want to add
+these paragraphs to it:
+
+ As of Ultrix 4.2 the optimise works for regex.c, but you must give an
+ additional switch to get everything optimised. Using '-Olimit 1500'
+ does the job. Without the switch gawk will compile and run correctly,
+ but you will get complaints about lost optimisations in builtin.c,
+ awk.tab.c and regex.c.
+
+ The configure for ultrix4.1 works just fine for ultrix4.2
+
diff --git a/alloca.c b/alloca.c
index c29fbda3..866f3d5d 100644
--- a/alloca.c
+++ b/alloca.c
@@ -42,8 +42,8 @@ you
lose
-- must know STACK_DIRECTION at compile-time
#endif /* STACK_DIRECTION undefined */
-#endif static
-#endif emacs
+#endif /* static */
+#endif /* emacs */
#ifdef __STDC__
typedef void *pointer; /* generic pointer type */
@@ -55,6 +55,8 @@ typedef char *pointer; /* generic pointer type */
extern void free();
extern pointer xmalloc();
+extern int write();
+extern void exit();
/*
Define STACK_DIRECTION if you know the direction of stack
@@ -154,8 +156,8 @@ alloca (size) /* returns pointer to storage */
register header *hp; /* traverses linked list */
for (hp = last_alloca_header; hp != NULL;)
- if (STACK_DIR > 0 && hp->h.deep > depth
- || STACK_DIR < 0 && hp->h.deep < depth)
+ if ((STACK_DIR > 0 && hp->h.deep > depth)
+ || (STACK_DIR < 0 && hp->h.deep < depth))
{
register header *np = hp->h.next;
diff --git a/array.c b/array.c
index de965ff6..a5f5c754 100644
--- a/array.c
+++ b/array.c
@@ -3,7 +3,7 @@
*/
/*
- * Copyright (C) 1986, 1988, 1989, 1991 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@@ -103,7 +103,7 @@ hash(s, len)
register char *s;
register int len;
{
- register unsigned int h = 0, g;
+ register unsigned long h = 0, g;
while (len--) {
h = (h << 4) + *s++;
@@ -128,26 +128,18 @@ NODE *symbol;
register NODE *subs;
int hash1;
{
- register NODE *bucket;
- int chained = 0;
+ register NODE *bucket, *prev = 0;
for (bucket = symbol->var_array[hash1]; bucket; bucket = bucket->ahnext) {
if (cmp_nodes(bucket->ahname, subs) == 0) {
- if (chained) { /* move found to front of chain */
- register NODE *this, *prev;
- for (prev = this = symbol->var_array[hash1];
- this; prev = this, this = this->ahnext) {
- if (this == bucket) {
- prev->ahnext = this->ahnext;
- this->ahnext = symbol->var_array[hash1];
- symbol->var_array[hash1] = this;
- }
- }
+ if (prev) { /* move found to front of chain */
+ prev->ahnext = bucket->ahnext;
+ bucket->ahnext = symbol->var_array[hash1];
+ symbol->var_array[hash1] = bucket;
}
return bucket;
- }
- if (bucket)
- chained = 1;
+ } else
+ prev = bucket; /* save previous list entry */
}
return NULL;
}
@@ -208,6 +200,10 @@ NODE *symbol, *subs;
return &(bucket->ahvalue);
}
}
+
+ /* It's not there, install it. */
+ if (do_lint && subs->stlen == 0)
+ warning("subscript of array is null string");
getnode(bucket);
bucket->type = Node_ahash;
bucket->ahname = dupnode(subs);
diff --git a/atari/Makefile.st b/atari/Makefile.st
index b1ca4988..38e3eadc 100644
--- a/atari/Makefile.st
+++ b/atari/Makefile.st
@@ -1,4 +1,5 @@
-# Makefile for GNU Awk - ST version
+# Makefile for GNU Awk - sample ST version.
+# This makefile for ST version of gcc compiler and associated libraries.
#
# This is a subset of the full Makefile cut down for Atari ST
# gcc compiler is assumed
@@ -9,9 +10,9 @@
# You need sed.ttp for an automatic creation of config.h file!
# Check gulam script mkconf.g in atari directory.
# In a pinch you may create one by checking config/atari file and
-# editing config.h-dist by hand.
+# editing config.in by hand.
#
-# Copyright (C) 1986, 1988, 1989 the Free Software Foundation, Inc.
+# Copyright (C) 1986, 1988-1992 the Free Software Foundation, Inc.
#
# This file is part of GAWK, the GNU implementation of the
# AWK Progamming Language.
@@ -26,147 +27,119 @@
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
-# You should have received a copy of the GNU General Public License
-# along with GAWK; see the file COPYING. If not, write to
-# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-# User tunable macros
-
-DESTDIR=
-BINDIR= c:\bin
-MANDIR= c:\man\manl
-MANEXT= l
-RM=rm -f
-
-# CFLAGS: options to the C compiler
-#
-# -O optimize
-# -mshort use integers which are 16-bits wide (a "correct" size on ST)
-#
-# Set LIBS to any libraries that are machine specific
-
-
-# Comment out the next line if you don't have gcc.
-# Also choose just one of -g and -O.
-CC= gcc
-# if cross-compiler
-#CC= cgcc
-LIBS = -lpml16
-
-OPTIMIZE= -O -fstrength-reduce -fcombine-regs -fomit-frame-pointer
-#OPTIMIZE= -O
-AFLAGS = -G -x -mshort # -Wall
-DEBUG= #-DMALLOCDEBUG #-DMEMDEBUG #-DDEBUG #-DFUNC_TRACE #-DMPROF
-WARN= #-W -Wunused -Wimplicit -Wreturn-type -Wcomment # for gcc only
-
-# Parser to use on grammar -- if you don't have bison use the first one
+# cross-compiler
+CC= cgcc
+# native
+CC= gcc
+# comment out the following two lines if you do not want use
+# 16-bit wide ints
+WIDTH = -mshort
+EXT=16
+
+# for gcc 1.40
+OPTIMIZE= -O -fstrength-reduce -fcombine-regs -fomit-frame-pointer
+# for gcc 2.2.2
+#OPTIMIZE= -O2 -fstrength-reduce -fomit-frame-pointer
+PROFILE= #-pg
+DEBUG= #-DMALLOCDEBUG #-DDEBUG #-DFUNC_TRACE #-DMPROF
+LINKSTATIC= #-Bstatic
+WARN= #-W -Wunused -Wimplicit -Wreturn-type -Wcomment # for gcc only
+
+# Parser to use on grammar - any one of the following will work
#PARSER = yacc
PARSER = byacc
#PARSER = bison -y
+# basename of parser output - adjust to your parser requirements
+POUTPUT = awk_tab
-#
-# With the exception of names of parser output files
-# need to customize this file below this point.
-#
+# Set LIBS to any libraries that are machine specific
+LIBS = -lpml$(EXT)
-FLAGS= $(DEBUG) $(AFLAGS)
-CFLAGS= $(FLAGS) $(LINKSTATIC) $(PROFILE) $(OPTIMIZE) $(WARN)
+FLAGS=-G -Xlinker -x $(WIDTH)
-# object files
-AWKOBJS = main.o eval.o builtin.o msg.o iop.o io.o field.o \
- array.o node.o missing.o re.o version.o
+CFLAGS= -DGAWK $(FLAGS) $(DEBUG) $(LINKSTATIC) $(PROFILE) $(OPTIMIZE) $(WARN)
-# basename of parser output
-#POUTPUT = awk.tab
-POUTPUT = awk_tab
+# object files
+AWKOBJS = main.o eval.o builtin.o msg.o iop.o io.o field.o array.o \
+ node.o version.o missing.o re.o # getopt.o
-ALLOBJS = $(AWKOBJS) $(POUTPUT).o
+ALLOBJS = $(AWKOBJS) awktab.o
# GNUOBJS
-# GNU stuff that gawk uses as library routines.
+# GNU stuff that gawk uses as library routines.
GNUOBJS= regex.o dfa.o $(ALLOCA)
# source and documentation files
-SRC = main.c eval.c builtin.c msg.c \
- iop.c io.c field.c array.c node.c missing.c re.c version.c
+SRC = main.c eval.c builtin.c msg.c version.c \
+ iop.c io.c field.c array.c node.c missing.c re.c getopt.c
-ALLSRC= $(SRC) $(POUTPUT).c
-#ALLSRC= $(SRC) awk_tab.c
+ALLSRC= $(SRC) awktab.c
-AWKSRC= awk.h awk.y $(ALLSRC) patchlevel.h protos.h config.h-dist
+AWKSRC= awk.h awk.y $(ALLSRC) patchlevel.h protos.h config.in getopt.h
GNUSRC = alloca.c alloca.s dfa.c dfa.h regex.c regex.h
-COPIES = missing/getopt.c missing/system.c missing/tzset.c \
+COPIES = missing/system.c missing/tzset.c \
missing/memcmp.c missing/memcpy.c missing/memset.c \
missing/random.c missing/strcase.c missing/strchr.c \
- missing/strerror.c missing/strtod.c missing/vprintf.c \
- missing/strftime.c missing/strftime.3 missing/strtol.c
+ missing/strerror.c missing/strtod.c \
+ missing/strftime.c missing/strftime.3
SUPPORT = support/texindex.c support/texinfo.tex
-DOCS= gawk.1 gawk.texinfo
+DOCS= gawk.1 gawk.texi
+
+INFOFILES= gawk.info gawk.info-* \
+ gawk.aux gawk.cp gawk.cps gawk.fn gawk.fns gawk.ky gawk.kys \
+ gawk.pg gawk.pgs gawk.toc gawk.tp gawk.tps gawk.vr gawk.vrs
+
+MISC = NEWS COPYING FUTURES Makefile.* PROBLEMS README* PORTS POSIX \
+ mungeconf configure ACKNOWLEDGMENT LIMITATIONS
-#INFOFILES= gawk-info gawk-info-1 gawk-info-2 gawk-info-3 gawk-info-4 \
-# gawk-info-5 gawk-info-6 gawk.aux gawk.cp gawk.cps gawk.fn \
-# gawk.fns gawk.ky gawk.kys gawk.pg gawk.pgs gawk.toc \
-# gawk.tp gawk.tps gawk.vr gawk.vrs
+OTHERS= pc/* atari/* vms/*
ALLDOC= gawk.dvi $(INFOFILES)
+ALLFILES= $(AWKSRC) $(GNUSRC) $(COPIES) $(MISC) $(DOCS) $(ALLDOC) $(OTHERS) \
+ $(SUPPORT)
+
+# set this for a version of toglclr you are using
+TOGLFLAGS = -fload
+
# rules to build gawk
-gawk: $(ALLOBJS) $(GNUOBJS) $(REOBJS)
- $(CC) -o gawk.ttp $(CFLAGS) $(ALLOBJS) $(GNUOBJS) $(REOBJS) $(LIBS)
- toglclr gawk.ttp
+gawk.ttp: $(ALLOBJS) $(GNUOBJS) $(REOBJS)
+ $(CC) -o $@ $(CFLAGS) $(ALLOBJS) $(GNUOBJS) $(REOBJS) $(LIBS)
+ toglclr $(TOGLFLAGS) $@
-$(AWKOBJS): awk.h config.h
+$(AWKOBJS) regex.o dfa.o: awk.h dfa.h regex.h
-dfa.o: awk.h config.h dfa.h
-regex.o: awk.h config.h regex.h
-main.o: patchlevel.h
+getopt.o: getopt.h
-$(POUTPUT).o: awk.h $(POUTPUT).c
+main.o: patchlevel.h
-$(POUTPUT).c: awk.y awk.h
- $(PARSER) awk.y
- sed '/^extern char .malloc(), .realloc();$$/d' ytab.c > $(POTPUT).c
- rm ytab.c
+awktab.c: awk.y
+ $(PARSER) -v awk.y
+ sed '/^extern char .malloc(), .realloc();$$/d' $(POUTPUT).c >awktab.c
+ rm $(POUTPUT).c
-config.h: config.h-dist
- pushd atari
- mkconf.g
- popd
+config.h: config.in
+ @echo You must provide a config.h!
+ @echo Run \"./configure\" to build it for known systems
+ @echo or copy config.in to config.h and edit it.; exit 1
-# One of these rules should have already been selected by running mkconf.
+gawk.dvi: gawk.texi
+ tex gawk.texi; texindex gawk.??
+ tex gawk.texi
-##MAKE_ALLOCA_S## alloca.o: alloca.s
-##MAKE_ALLOCA_S## /lib/cpp < alloca.s | sed '/^#/d' > t.s
-##MAKE_ALLOCA_S## as t.s -o alloca.o
-##MAKE_ALLOCA_S## rm t.s
-##MAKE_ALLOCA_C## alloca.o: alloca.c
-install: gawk
- cp gawk.ttp $(DESTDIR)$(BINDIR)
- cp gawk.1 $(DESTDIR)$(MANDIR)/gawk.$(MANEXT)
+gawk.info: gawk.texi
+ makeinfo gawk.texi
clean:
- $(RM) gawk.ttp *.o core awk.output $(POUTPUT).c gmon.out make.out \
- y.output
+ rm *.o *.orig *.rej */*.orig */*.rej
+
+cleaner: clean
+ rm gawk.ttp awktab.c
clobber: clean
- $(RM) $(ALLDOC) gawk.log
-
-gawk.dvi: gawk.texinfo
- tex gawk.texinfo ; texindex gawk.??
- tex gawk.texinfo ; texindex gawk.??
- tex gawk.texinfo
-
-#$(INFOFILES): gawk.texinfo
-# makeinfo gawk.texinfo
-
-# Many tests in test directory depend on having Bourne shell around.
-# Before using this target edit test Makefile and test scripts adapting
-# them to your local conditions.
-test: gawk
- pushd test
- make
- popd
+ rm $(ALLDOC) gawk.log
diff --git a/atari/mkconf.g b/atari/mkconf.g
index b7db88a9..f551218f 100644
--- a/atari/mkconf.g
+++ b/atari/mkconf.g
@@ -1,6 +1,6 @@
#
# gulam script to produce configuration file for Atari ST;
-# performs the same job as mkconf, but only for this specific configuration;
+# performs the same job as configure, but only for this specific configuration;
# it is assumed that it is located in a subdirectory .\atari
#
if { -e ..\config\atari }
diff --git a/atari/system.c b/atari/system.c
index 26943bb4..41552e5b 100644
--- a/atari/system.c
+++ b/atari/system.c
@@ -47,7 +47,7 @@ system(const char *command)
#endif
char cmdln[1024];
char *args[64];
- char *getenv();
+ char *getenv(const char *);
if(!command)
return(ERROR);
diff --git a/atari/tmpnam.c b/atari/tmpnam.c
index bd68c4c0..b5ab45bd 100644
--- a/atari/tmpnam.c
+++ b/atari/tmpnam.c
@@ -15,9 +15,7 @@
extern char * getenv(const char *);
extern char * mktemp(char *);
-extern char * strcpy(char *, const char *);
-extern char * strcat(char *, const char *);
-extern size_t strlen(const char *s);
+char * tempnam(const char *path, const char *base);
static char pattern[] = "\\gwkXXXXX";
char *tmpnam(buf)
diff --git a/awk.h b/awk.h
index 08cc42ab..5fcf7c2f 100644
--- a/awk.h
+++ b/awk.h
@@ -3,7 +3,7 @@
*/
/*
- * Copyright (C) 1986, 1988, 1989, 1991 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@@ -25,23 +25,31 @@
/* ------------------------------ Includes ------------------------------ */
#include <stdio.h>
+#include <limits.h>
#include <ctype.h>
#include <setjmp.h>
#include <varargs.h>
#include <time.h>
#include <errno.h>
-#include <signal.h>
+#if !defined(errno) && !defined(MSDOS)
+extern int errno;
+#endif
+#ifdef __GNU_LIBRARY__
+#include <signum.h>
+#endif
/* ----------------- System dependencies (with more includes) -----------*/
-#ifndef VAXC
+#if !defined(VMS) || (!defined(VAXC) && !defined(__DECC))
#include <sys/types.h>
#include <sys/stat.h>
-#else /* VMS w/ Digital's "VAX C" compiler */
+#else /* VMS w/ VAXC or DECC */
#include <types.h>
#include <stat.h>
#include <file.h> /* avoid <fcntl.h> in io.c */
-#endif /*VAXC*/
+#endif
+
+#include <signal.h>
#include "config.h"
@@ -77,8 +85,10 @@ typedef unsigned int size_t;
#else
#if defined(atarist) || defined(VMS)
#include <unixlib.h>
-#else
+#else /* atarist || VMS */
+#ifndef MSDOS
#include <unistd.h>
+#endif /* MSDOS */
#endif /* atarist || VMS */
#endif /* Next */
#else /* STDC_HEADERS */
@@ -90,6 +100,7 @@ extern char * getenv P((char *name));
extern double atof P((char *s));
#endif
+#ifndef __GNUC__
#ifdef sparc
/* nasty nasty SunOS-ism */
#include <alloca.h>
@@ -97,10 +108,11 @@ extern double atof P((char *s));
extern char *alloca();
#endif
#else /* not sparc */
-#if (!defined(atarist)) && (!defined(NeXT)) && (!defined(alloca))
+#if !defined(alloca) && !defined(ALLOCA_PROTO)
extern char *alloca();
-#endif /* atarist */
+#endif
#endif /* sparc */
+#endif /* __GNUC__ */
#ifdef HAVE_UNDERSCORE_SETJMP
/* nasty nasty berkelixm */
@@ -109,32 +121,47 @@ extern char *alloca();
#endif
/*
- * if you don't have vprintf, but you are BSD, the version defined in
- * vprintf.c should do the trick. Otherwise, try this and cross your fingers.
+ * if you don't have vprintf, try this and cross your fingers.
*/
-#if defined(VPRINTF_MISSING) && !defined(DOPRNT_MISSING) && !defined(BSDSTDIO)
+#if defined(VPRINTF_MISSING)
#define vfprintf(fp,fmt,arg) _doprnt((fmt), (arg), (fp))
#endif
#ifdef VMS
/* some macros to redirect to code in vms/vms_misc.c */
#define exit vms_exit
+#define open vms_open
#define strerror vms_strerror
#define strdup vms_strdup
extern void exit P((int));
+extern int open P((const char *,int,...));
extern char *strerror P((int));
extern char *strdup P((const char *str));
+extern int vms_devopen P((const char *,int));
# ifndef NO_TTY_FWRITE
#define fwrite tty_fwrite
#define fclose tty_fclose
extern size_t fwrite P((const void *,size_t,size_t,FILE *));
extern int fclose P((FILE *));
# endif
+extern FILE *popen P((const char *,const char *));
+extern int pclose P((FILE *));
extern void vms_arg_fixup P((int *,char ***));
+/* some things not in STDC_HEADERS */
+extern int gnu_strftime P((char *,size_t,const char *,const struct tm *));
+extern int unlink P((const char *));
+extern int getopt P((int,char **,char *));
+extern int isatty P((int));
+#ifndef fileno
+extern int fileno P((FILE *));
+#endif
+extern int close(), dup(), dup2(), fstat(), read(), stat();
#endif /*VMS*/
-#ifndef _MSC_VER
-extern int errno; /* not necessary on many systems, but it can't hurt */
+#ifdef MSDOS
+#include <io.h>
+extern FILE *popen P((char *, char *));
+extern int pclose P((FILE *));
#endif
#define GNU_REGEX
@@ -288,7 +315,8 @@ typedef enum {
Node_OFS,
Node_ORS,
Node_OFMT,
- Node_CONVFMT
+ Node_CONVFMT,
+ Node_K_nextfile
} NODETYPE;
/*
@@ -350,17 +378,16 @@ typedef struct exp_node {
} sub;
NODETYPE type;
unsigned short flags;
-# define MEM 0x7
# define MALLOC 1 /* can be free'd */
# define TEMP 2 /* should be free'd */
# define PERM 4 /* can't be free'd */
-# define VAL 0x18
-# define NUM 8 /* numeric value is current */
+# define STRING 8 /* assigned as string */
# define STR 16 /* string value is current */
-# define NUMERIC 32 /* entire string is numeric */
+# define NUM 32 /* numeric value is current */
# define NUMBER 64 /* assigned as number */
-# define STRING 128 /* assigned as string */
-# define MAYBE_NUM 256
+# define MAYBE_NUM 128 /* user input: if NUMERIC then
+ * a NUMBER
+ */
} NODE;
#define lnode sub.nodep.l.lptr
@@ -424,7 +451,7 @@ typedef struct iobuf {
char *end;
size_t size; /* this will be determined by an fstat() call */
int cnt;
- size_t secsiz;
+ long secsiz;
int flag;
# define IOP_IS_TTY 1
} IOBUF;
@@ -443,6 +470,7 @@ struct redirect {
# define RED_APPEND 16
# define RED_NOBUF 32
# define RED_USED 64
+# define RED_EOF 128
char *value;
FILE *fp;
IOBUF *iop;
@@ -460,11 +488,7 @@ struct redirect {
/* Return means return from a function call; leave value in ret_node */
#define TAG_RETURN 3
-#if defined(MSDOS) || (defined(atarist)) && (defined(__MSHORT__))
-#define HUGE 0x7fff
-#else
-#define HUGE 0x7fffffff
-#endif
+#define HUGE INT_MAX
/* -------------------------- External variables -------------------------- */
/* gawk builtin variables */
@@ -505,6 +529,8 @@ extern int field0_valid;
extern int strict;
extern int do_posix;
extern int do_lint;
+extern int in_begin_rule;
+extern int in_end_rule;
/* ------------------------- Pseudo-functions ------------------------- */
@@ -531,48 +557,33 @@ extern int do_lint;
r_tree_eval((_t))))))
#endif
-#define make_number(x) mk_number((x), (MALLOC|NUM|NUMERIC|NUMBER))
-#define tmp_number(x) mk_number((x), (MALLOC|TEMP|NUM|NUMERIC|NUMBER))
+#define make_number(x) mk_number((x), (MALLOC|NUM|NUMBER))
+#define tmp_number(x) mk_number((x), (MALLOC|TEMP|NUM|NUMBER))
-#define free_temp(n) if ((n)->flags&TEMP) { unref(n); } else
+#define free_temp(n) do {if ((n)->flags&TEMP) { unref(n); }} while (0)
#define make_string(s,l) make_str_node((s), SZTC (l),0)
#define SCAN 1
#define ALREADY_MALLOCED 2
-#define cant_happen() fatal("line %d, file: %s; bailing out", \
- __LINE__, basename(__FILE__));
-#ifdef MEMDEBUG
-#define memmsg(X,Y,Z,ZZ) \
- fprintf(stdout, "malloc: %s: %s: %ld 0x%08lx\n", Z, X, (long)Y, ZZ)
-#if defined(__STDC__) && !defined(NO_TOKEN_PASTING)
-#define free(s) fprintf(stdout, "free: %s: 0x%08lx\n", #s, (long)s), do_free(s)
-#else
-#define free(s) fprintf(stdout, "free: s: 0x%08lx\n", (long)s), do_free(s)
-#endif
-#else /* MEMDEBUG */
-#define memmsg(x,y,z,zz)
-#endif /* MEMDEBUG */
+#define cant_happen() fatal("internal error line %d, file: %s", \
+ __LINE__, __FILE__);
#if defined(__STDC__) && !defined(NO_TOKEN_PASTING)
-#define emalloc(var,ty,x,str) if ((var=(ty)malloc((MALLOC_ARG_T)(x)))==NULL)\
- fatal("%s: %s: can't allocate memory (%s)",\
- (str), #var, strerror(errno));\
- else memmsg(#var, x, str, var)
-#define erealloc(var,ty,x,str) if((var=(ty)realloc((char *)var,\
- (MALLOC_ARG_T)(x)))==NULL)\
- fatal("%s: %s: can't allocate memory (%s)",\
- (str), #var, strerror(errno));\
- else memmsg("re:" #var, x, str, var)
+#define emalloc(var,ty,x,str) (void)((var=(ty)malloc((MALLOC_ARG_T)(x))) ||\
+ (fatal("%s: %s: can't allocate memory (%s)",\
+ (str), #var, strerror(errno)),0))
+#define erealloc(var,ty,x,str) (void)((var=(ty)realloc((char *)var,\
+ (MALLOC_ARG_T)(x))) ||\
+ (fatal("%s: %s: can't allocate memory (%s)",\
+ (str), #var, strerror(errno)),0))
#else /* __STDC__ */
-#define emalloc(var,ty,x,str) if ((var=(ty)malloc((MALLOC_ARG_T)(x)))==NULL)\
- fatal("%s: %s: can't allocate memory (%s)",\
- (str), "var", strerror(errno));\
- else memmsg("var", x, str, var)
-#define erealloc(var,ty,x,str) if((var=(ty)realloc((char *)var,\
- (MALLOC_ARG_T)(x)))==NULL)\
- fatal("%s: %s: can't allocate memory (%s)",\
- (str), "var", strerror(errno));\
- else memmsg("re: var", x, str, var)
+#define emalloc(var,ty,x,str) (void)((var=(ty)malloc((MALLOC_ARG_T)(x))) ||\
+ (fatal("%s: %s: can't allocate memory (%s)",\
+ (str), "var", strerror(errno)),0))
+#define erealloc(var,ty,x,str) (void)((var=(ty)realloc((char *)var,\
+ (MALLOC_ARG_T)(x))) ||\
+ (fatal("%s: %s: can't allocate memory (%s)",\
+ (str), "var", strerror(errno)),0))
#endif /* __STDC__ */
#ifdef DEBUG
@@ -596,18 +607,6 @@ extern double _msc51bug;
/* ------------- Function prototypes or defs (as appropriate) ------------- */
-extern void set_NF();
-extern void set_FIELDWIDTHS();
-extern void set_NR();
-extern void set_FNR();
-extern void set_FS();
-extern void set_RS();
-extern void set_IGNORECASE();
-extern void set_OFMT();
-extern void set_CONVFMT();
-extern void set_OFS();
-extern void set_ORS();
-
/* array.c */
extern NODE *concat_exp P((NODE *tree));
extern void assoc_clear P((NODE *symbol));
@@ -664,7 +663,7 @@ extern NODE *do_prvars P((void));
extern NODE *do_bp P((void));
extern void do_free P((char *s));
/* dfa.c */
-extern void regsyntax P((int bits, int fold));
+extern void regsyntax P((long bits, int fold));
extern void regparse P((const char *s, size_t len, struct regexp *r));
extern void reganalyze P((struct regexp *r, int searchflag));
extern void regstate P((int s, struct regexp *r, int trans[]));
@@ -673,13 +672,17 @@ extern char *regexecute P((struct regexp *r, char *begin,
extern void reginit P((struct regexp *r));
extern void regcompile P((const char *s, size_t len,
struct regexp *r, int searchflag));
-extern void regfree P((struct regexp *r));
+extern void reg_free P((struct regexp *r));
/* eval.c */
-extern int interpret P((NODE *tree));
+extern int interpret P((NODE *volatile tree));
extern NODE *r_tree_eval P((NODE *tree));
extern int cmp_nodes P((NODE *t1, NODE *t2));
extern NODE **get_lhs P((NODE *ptr, Func_ptr *assign));
extern void set_IGNORECASE P((void));
+void set_OFS P((void));
+void set_ORS P((void));
+void set_OFMT P((void));
+void set_CONVFMT P((void));
/* field.c */
extern void init_fields P((void));
extern void set_record P((char *buf, int cnt, int freeold));
@@ -701,6 +704,7 @@ extern int close_io P((void));
extern int devopen P((char *name, char *mode));
extern int pathopen P((char *file));
extern NODE *do_getline P((NODE *tree));
+extern void do_nextfile P((void));
/* iop.c */
extern int optimal_bufsize P((int fd));
extern IOBUF *iop_alloc P((int fd));
@@ -711,16 +715,18 @@ extern Regexp *mk_re_parse P((char *s, int ignorecase));
extern void load_environ P((void));
extern char *arg_assign P((char *arg));
extern SIGTYPE catchsig P((int sig, int code));
-extern const char *basename P((const char *));
/* msg.c */
-#if 0 /* old varargs.h stuff */
-extern void msg P((int va_alist));
-extern void warning P((int va_alist));
-extern void fatal P((int va_alist));
+#ifdef MSDOS
+extern void err P((char *s, char *emsg, char *va_list, ...));
+extern void msg P((char *va_alist, ...));
+extern void warning P((char *va_alist, ...));
+extern void fatal P((char *va_alist, ...));
+#else
+extern void err ();
+extern void msg ();
+extern void warning ();
+extern void fatal ();
#endif
-void msg ();
-void warning ();
-void fatal ();
/* node.c */
extern AWKNUM r_force_number P((NODE *n));
extern NODE *r_force_string P((NODE *s));
@@ -736,23 +742,13 @@ extern void unref P((NODE *tmp));
extern int parse_escape P((char **string_ptr));
/* re.c */
extern Regexp *make_regexp P((NODE *s, int ignorecase, int dfa));
-extern int research P((Regexp *rp, char *str, int len, int need_start));
+extern int research P((Regexp *rp, char *str, int start, int len, int need_start));
extern void refree P((Regexp *rp));
-extern void regerror P((const char *s));
+extern void reg_error P((const char *s));
extern Regexp *re_update P((NODE *t));
-/* regex.c */
-extern int re_set_syntax P((int syntax));
-extern char *re_compile_pattern P((char *pattern,
- size_t size,
- struct re_pattern_buffer *bufp ));
-
-extern int re_search P((struct re_pattern_buffer *pbufp,
- char *string,
- int size,
- int startpos,
- int range,
- struct re_registers *regs ));
-extern void re_compile_fastmap P((struct re_pattern_buffer *bufp));
+extern void resyntax P((int syntax));
+extern void resetup P((void));
+
/* strcase.c */
extern int strcasecmp P((const char *s1, const char *s2));
extern int strncasecmp P((const char *s1, const char *s2, register size_t n));
diff --git a/awk.y b/awk.y
index d2d2dbc5..ee3e60e5 100644
--- a/awk.y
+++ b/awk.y
@@ -3,7 +3,7 @@
*/
/*
- * Copyright (C) 1986, 1988, 1989, 1991 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@@ -29,6 +29,7 @@
#endif
#define YYMAXDEPTH 300
+#define YYSSIZE YYMAXDEPTH
#include "awk.h"
@@ -36,7 +37,7 @@ static void yyerror (); /* va_alist */
static char *get_src_buf P((void));
static int yylex P((void));
static NODE *node_common P((NODETYPE op));
-static NODE *snode P((NODE *subn, NODETYPE op, int index));
+static NODE *snode P((NODE *subn, NODETYPE op, int sindex));
static NODE *mkrangenode P((NODE *cpair));
static NODE *make_for_loop P((NODE *init, NODE *cond, NODE *incr));
static NODE *append_right P((NODE *list, NODE *new));
@@ -93,6 +94,7 @@ extern NODE *end_block;
%type <nodeval> input_redir output_redir
%type <nodetypeval> print
%type <sval> func_name
+%type <lval> lex_builtin
%token <sval> FUNC_CALL NAME REGEXP
%token <lval> ERROR
@@ -239,6 +241,18 @@ func_name
{ $$ = $1; }
| FUNC_CALL
{ $$ = $1; }
+ | lex_builtin
+ {
+ yyerror("%s() is a built-in function, it cannot be redefined",
+ tokstart);
+ errcount++;
+ /* yyerrok; */
+ }
+ ;
+
+lex_builtin
+ : LEX_BUILTIN
+ | LEX_LENGTH
;
function_prologue
@@ -292,9 +306,9 @@ regexp
;
action
- : l_brace statements r_brace opt_semi
+ : l_brace statements r_brace opt_semi opt_nls
{ $$ = $2 ; }
- | l_brace r_brace opt_semi
+ | l_brace r_brace opt_semi opt_nls
{ $$ = NULL; }
;
@@ -366,10 +380,19 @@ statement
$$ = node ($2, $1, $3);
}
- | LEX_NEXT
- { if (! io_allowed) yyerror("next used in BEGIN or END action"); }
- statement_term
- { $$ = node ((NODE *)NULL, Node_K_next, (NODE *)NULL); }
+ | LEX_NEXT opt_exp statement_term
+ { NODETYPE type;
+
+ if (! io_allowed) yyerror("next used in BEGIN or END action");
+ if ($2 && $2 == lookup("file")) {
+ if (do_lint)
+ warning("`next file' is a gawk extension");
+ else if (strict || do_posix)
+ yyerror("`next file' is a gawk extension");
+ type = Node_K_nextfile;
+ } else type = Node_K_next;
+ $$ = node ((NODE *)NULL, type, (NODE *)NULL);
+ }
| LEX_EXIT opt_exp statement_term
{ $$ = node ($2, Node_K_exit, (NODE *)NULL); }
| LEX_RETURN
@@ -514,7 +537,11 @@ expression_list
exp : variable ASSIGNOP
{ want_assign = 0; }
exp
- { $$ = node ($1, $2, $4); }
+ {
+ if (do_lint && $4->type == Node_regex)
+ warning("Regular expression on left of assignment.");
+ $$ = node ($1, $2, $4);
+ }
| '(' expression_list r_paren LEX_IN NAME
{ $$ = node (variable($5,1), Node_in_array, $2); }
| exp '|' LEX_GETLINE opt_variable
@@ -551,7 +578,11 @@ exp : variable ASSIGNOP
| exp LEX_IN NAME
{ $$ = node (variable($3,1), Node_in_array, $1); }
| exp RELOP exp
- { $$ = node ($1, $2, $3); }
+ {
+ if (do_lint && $3->type == Node_regex)
+ warning("Regular expression on left of comparison.");
+ $$ = node ($1, $2, $3);
+ }
| exp '<' exp
{ $$ = node ($1, Node_less, $3); }
| exp '>' exp
@@ -620,19 +651,22 @@ non_post_simp_exp
{ $$ = node ($2, Node_not,(NODE *) NULL); }
| '(' exp r_paren
{ $$ = $2; }
- | LEX_BUILTIN '(' opt_expression_list r_paren
- { $$ = snode ($3, Node_builtin, (int) $1); }
+ | LEX_BUILTIN
+ {
+ if (! io_allowed && strcmp(tokstart, "nextfile") == 0)
+ yyerror("nextfile() is illegal in BEGIN and END");
+ }
+ '(' opt_expression_list r_paren
+ { $$ = snode ($4, Node_builtin, (int) $1); }
| LEX_LENGTH '(' opt_expression_list r_paren
{ $$ = snode ($3, Node_builtin, (int) $1); }
| LEX_LENGTH
{
if (do_lint)
- warning("call of length without parentheses is not portable");
+ warning("call of `length' without parentheses is not portable");
$$ = snode ((NODE *)NULL, Node_builtin, (int) $1);
- if (do_posix) {
- yyerror("POSIX requires parentheses for call to `length'");
- yyerrok;
- }
+ if (do_posix)
+ warning( "call of `length' without parentheses is deprecated by POSIX");
}
| FUNC_CALL '(' opt_expression_list r_paren
{
@@ -726,7 +760,7 @@ struct token {
# define VERSION 0xFF00 /* old awk is zero */
# define NOT_OLD 0x0100 /* feature not in old awk */
# define NOT_POSIX 0x0200 /* feature not in POSIX */
-# define GAWK 0x0400 /* gawk extension */
+# define GAWKX 0x0400 /* gawk extension */
NODE *(*ptr) (); /* function that implements this keyword */
};
@@ -757,7 +791,7 @@ static struct token tokentab[] = {
{"func", Node_K_function, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0},
{"function", Node_K_function, LEX_FUNCTION, NOT_OLD, 0},
{"getline", Node_K_getline, LEX_GETLINE, NOT_OLD, 0},
-{"gsub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_gsub},
+{"gsub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_gsub},
{"if", Node_K_if, LEX_IF, 0, 0},
{"in", Node_illegal, LEX_IN, 0, 0},
{"index", Node_builtin, LEX_BUILTIN, A(2), do_index},
@@ -774,12 +808,12 @@ static struct token tokentab[] = {
{"split", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_split},
{"sprintf", Node_builtin, LEX_BUILTIN, 0, do_sprintf},
{"sqrt", Node_builtin, LEX_BUILTIN, A(1), do_sqrt},
-{"srand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), do_srand},
-{"strftime", Node_builtin, LEX_BUILTIN, GAWK|A(1)|A(2), do_strftime},
-{"sub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_sub},
+{"srand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), do_srand},
+{"strftime", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_strftime},
+{"sub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_sub},
{"substr", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_substr},
{"system", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system},
-{"systime", Node_builtin, LEX_BUILTIN, GAWK|A(0), do_systime},
+{"systime", Node_builtin, LEX_BUILTIN, GAWKX|A(0), do_systime},
{"tolower", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_tolower},
{"toupper", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_toupper},
{"while", Node_K_while, LEX_WHILE, 0, 0},
@@ -829,9 +863,9 @@ va_dcl
}
va_start(args);
mesg = va_arg(args, char *);
- (void) vsprintf(bp, mesg, args);
+ strcpy(bp, mesg);
+ err("", buf, args);
va_end(args);
- msg(buf);
exit(2);
}
@@ -881,7 +915,7 @@ get_src_buf()
lexptr_begin = buf;
lexend = lexptr + 1;
} else
- lexptr = lexptr_begin = NULL;
+ lexeme = lexptr = lexptr_begin = NULL;
return lexptr;
}
if (!samefile) {
@@ -889,7 +923,7 @@ get_src_buf()
if (source == NULL) {
if (buf)
free(buf);
- return lexptr = lexptr_begin = NULL;
+ return lexeme = lexptr = lexptr_begin = NULL;
}
fd = pathopen(source);
if (fd == -1)
@@ -999,7 +1033,7 @@ yylex()
want_regexp = 0;
token = tokstart;
- while (c = nextc()) {
+ while ((c = nextc()) != 0) {
switch (c) {
case '[':
in_brack = 1;
@@ -1266,6 +1300,13 @@ retry:
pushback();
return '-';
+ case '.':
+ c = nextc();
+ pushback();
+ if (!isdigit(c))
+ return '.';
+ else
+ c = '.'; /* FALL THROUGH */
case '0':
case '1':
case '2':
@@ -1276,7 +1317,6 @@ retry:
case '7':
case '8':
case '9':
- case '.':
/* It's a number */
for (;;) {
int gotnumber = 0;
@@ -1396,7 +1436,7 @@ retry:
low = 0;
high = (sizeof (tokentab) / sizeof (tokentab[0])) - 1;
while (low <= high) {
- int i, c;
+ int i/* , c */;
mid = (low + high) / 2;
c = *tokstart - tokentab[mid].operator[0];
@@ -1408,7 +1448,7 @@ retry:
low = mid + 1;
} else {
if (do_lint) {
- if (tokentab[mid].flags & GAWK)
+ if (tokentab[mid].flags & GAWKX)
warning("%s() is a gawk extension",
tokentab[mid].operator);
if (tokentab[mid].flags & NOT_POSIX)
@@ -1418,7 +1458,7 @@ retry:
warning("%s is not supported in old awk",
tokentab[mid].operator);
}
- if ((strict && (tokentab[mid].flags & GAWK))
+ if ((strict && (tokentab[mid].flags & GAWKX))
|| (do_posix && (tokentab[mid].flags & NOT_POSIX)))
break;
if (tokentab[mid].class == LEX_BUILTIN
@@ -1480,9 +1520,9 @@ NODETYPE op;
* Checks for arg. count and supplies defaults where possible.
*/
static NODE *
-snode(subn, op, index)
+snode(subn, op, idx)
NODETYPE op;
-int index;
+int idx;
NODE *subn;
{
register NODE *r;
@@ -1500,12 +1540,12 @@ NODE *subn;
}
/* check against how many args. are allowed for this builtin */
- args_allowed = tokentab[index].flags & ARGS;
+ args_allowed = tokentab[idx].flags & ARGS;
if (args_allowed && !(args_allowed & A(nexp)))
fatal("%s() cannot have %d argument%c",
- tokentab[index].operator, nexp, nexp == 1 ? ' ' : 's');
+ tokentab[idx].operator, nexp, nexp == 1 ? ' ' : 's');
- r->proc = tokentab[index].ptr;
+ r->proc = tokentab[idx].ptr;
/* special case processing for a few builtins */
if (nexp == 0 && r->proc == do_length) {
diff --git a/awk.tab.c b/awktab.c
index f19b16ca..0eb809a1 100644
--- a/awk.tab.c
+++ b/awktab.c
@@ -5,6 +5,7 @@
#endif
#define YYMAXDEPTH 300
+#define YYSSIZE YYMAXDEPTH
#include "awk.h"
@@ -12,7 +13,7 @@ static void yyerror (); /* va_alist */
static char *get_src_buf P((void));
static int yylex P((void));
static NODE *node_common P((NODETYPE op));
-static NODE *snode P((NODE *subn, NODETYPE op, int index));
+static NODE *snode P((NODE *subn, NODETYPE op, int sindex));
static NODE *mkrangenode P((NODE *cpair));
static NODE *make_for_loop P((NODE *init, NODE *cond, NODE *incr));
static NODE *append_right P((NODE *list, NODE *new));
@@ -47,7 +48,7 @@ extern int errcount;
extern NODE *begin_block;
extern NODE *end_block;
-# line 75 "awk.y"
+# line 76 "awk.y"
typedef union {
long lval;
AWKNUM fval;
@@ -103,7 +104,7 @@ extern int yyerrflag;
YYSTYPE yylval, yyval;
# define YYERRCODE 256
-# line 717 "awk.y"
+# line 751 "awk.y"
struct token {
@@ -116,7 +117,7 @@ struct token {
# define VERSION 0xFF00 /* old awk is zero */
# define NOT_OLD 0x0100 /* feature not in old awk */
# define NOT_POSIX 0x0200 /* feature not in POSIX */
-# define GAWK 0x0400 /* gawk extension */
+# define GAWKX 0x0400 /* gawk extension */
NODE *(*ptr) (); /* function that implements this keyword */
};
@@ -147,7 +148,7 @@ static struct token tokentab[] = {
{"func", Node_K_function, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0},
{"function", Node_K_function, LEX_FUNCTION, NOT_OLD, 0},
{"getline", Node_K_getline, LEX_GETLINE, NOT_OLD, 0},
-{"gsub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_gsub},
+{"gsub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_gsub},
{"if", Node_K_if, LEX_IF, 0, 0},
{"in", Node_illegal, LEX_IN, 0, 0},
{"index", Node_builtin, LEX_BUILTIN, A(2), do_index},
@@ -164,12 +165,12 @@ static struct token tokentab[] = {
{"split", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_split},
{"sprintf", Node_builtin, LEX_BUILTIN, 0, do_sprintf},
{"sqrt", Node_builtin, LEX_BUILTIN, A(1), do_sqrt},
-{"srand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), do_srand},
-{"strftime", Node_builtin, LEX_BUILTIN, GAWK|A(1)|A(2), do_strftime},
-{"sub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_sub},
+{"srand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), do_srand},
+{"strftime", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_strftime},
+{"sub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_sub},
{"substr", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_substr},
{"system", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system},
-{"systime", Node_builtin, LEX_BUILTIN, GAWK|A(0), do_systime},
+{"systime", Node_builtin, LEX_BUILTIN, GAWKX|A(0), do_systime},
{"tolower", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_tolower},
{"toupper", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_toupper},
{"while", Node_K_while, LEX_WHILE, 0, 0},
@@ -219,9 +220,9 @@ va_dcl
}
va_start(args);
mesg = va_arg(args, char *);
- (void) vsprintf(bp, mesg, args);
+ strcpy(bp, mesg);
+ err("", buf, args);
va_end(args);
- msg(buf);
exit(2);
}
@@ -271,7 +272,7 @@ get_src_buf()
lexptr_begin = buf;
lexend = lexptr + 1;
} else
- lexptr = lexptr_begin = NULL;
+ lexeme = lexptr = lexptr_begin = NULL;
return lexptr;
}
if (!samefile) {
@@ -279,7 +280,7 @@ get_src_buf()
if (source == NULL) {
if (buf)
free(buf);
- return lexptr = lexptr_begin = NULL;
+ return lexeme = lexptr = lexptr_begin = NULL;
}
fd = pathopen(source);
if (fd == -1)
@@ -389,7 +390,7 @@ yylex()
want_regexp = 0;
token = tokstart;
- while (c = nextc()) {
+ while ((c = nextc()) != 0) {
switch (c) {
case '[':
in_brack = 1;
@@ -656,6 +657,13 @@ retry:
pushback();
return '-';
+ case '.':
+ c = nextc();
+ pushback();
+ if (!isdigit(c))
+ return '.';
+ else
+ c = '.'; /* FALL THROUGH */
case '0':
case '1':
case '2':
@@ -666,7 +674,6 @@ retry:
case '7':
case '8':
case '9':
- case '.':
/* It's a number */
for (;;) {
int gotnumber = 0;
@@ -786,7 +793,7 @@ retry:
low = 0;
high = (sizeof (tokentab) / sizeof (tokentab[0])) - 1;
while (low <= high) {
- int i, c;
+ int i/* , c */;
mid = (low + high) / 2;
c = *tokstart - tokentab[mid].operator[0];
@@ -798,7 +805,7 @@ retry:
low = mid + 1;
} else {
if (do_lint) {
- if (tokentab[mid].flags & GAWK)
+ if (tokentab[mid].flags & GAWKX)
warning("%s() is a gawk extension",
tokentab[mid].operator);
if (tokentab[mid].flags & NOT_POSIX)
@@ -808,7 +815,7 @@ retry:
warning("%s is not supported in old awk",
tokentab[mid].operator);
}
- if ((strict && (tokentab[mid].flags & GAWK))
+ if ((strict && (tokentab[mid].flags & GAWKX))
|| (do_posix && (tokentab[mid].flags & NOT_POSIX)))
break;
if (tokentab[mid].class == LEX_BUILTIN
@@ -870,9 +877,9 @@ NODETYPE op;
* Checks for arg. count and supplies defaults where possible.
*/
static NODE *
-snode(subn, op, index)
+snode(subn, op, idx)
NODETYPE op;
-int index;
+int idx;
NODE *subn;
{
register NODE *r;
@@ -890,12 +897,12 @@ NODE *subn;
}
/* check against how many args. are allowed for this builtin */
- args_allowed = tokentab[index].flags & ARGS;
+ args_allowed = tokentab[idx].flags & ARGS;
if (args_allowed && !(args_allowed & A(nexp)))
fatal("%s() cannot have %d argument%c",
- tokentab[index].operator, nexp, nexp == 1 ? ' ' : 's');
+ tokentab[idx].operator, nexp, nexp == 1 ? ' ' : 's');
- r->proc = tokentab[index].ptr;
+ r->proc = tokentab[idx].ptr;
/* special case processing for a few builtins */
if (nexp == 0 && r->proc == do_length) {
@@ -1146,399 +1153,416 @@ int yyexca[] ={
0, -1,
-2, 0,
-1, 5,
- 0, 59,
+ 0, 61,
-2, 0,
-1, 77,
- 264, 76,
- 267, 76,
- 62, 76,
- 124, 76,
- 59, 76,
- -2, 0,
--1, 111,
- 41, 84,
+ 264, 78,
+ 267, 78,
+ 62, 78,
+ 124, 78,
+ 59, 78,
-2, 0,
-1, 112,
- 41, 84,
+ 41, 86,
-2, 0,
-1, 113,
- 41, 84,
+ 41, 86,
-2, 0,
-1, 126,
266, 0,
- -2, 99,
+ -2, 101,
-1, 128,
263, 0,
60, 0,
62, 0,
124, 0,
- -2, 103,
+ -2, 105,
-1, 129,
263, 0,
60, 0,
62, 0,
124, 0,
- -2, 104,
+ -2, 106,
-1, 130,
263, 0,
60, 0,
62, 0,
124, 0,
- -2, 105,
+ -2, 107,
-1, 149,
- 264, 77,
- 267, 77,
- 62, 77,
- 124, 77,
- 59, 77,
+ 264, 79,
+ 267, 79,
+ 62, 79,
+ 124, 79,
+ 59, 79,
+ -2, 0,
+-1, 188,
+ 41, 86,
-2, 0,
--1, 186,
- 41, 85,
+-1, 190,
+ 41, 87,
-2, 0,
--1, 220,
- 41, 67,
+-1, 224,
+ 41, 69,
-2, 0,
--1, 248,
+-1, 253,
266, 0,
- -2, 116,
--1, 250,
- 263, 0,
-2, 118,
--1, 258,
- 41, 68,
+-1, 255,
+ 263, 0,
+ -2, 120,
+-1, 263,
+ 41, 70,
-2, 0,
};
-# define YYNPROD 158
-# define YYLAST 1843
+# define YYNPROD 161
+# define YYLAST 1998
int yyact[]={
- 62, 212, 20, 13, 107, 24, 13, 87, 225, 17,
- 88, 89, 36, 123, 35, 82, 25, 291, 91, 235,
- 45, 45, 4, 37, 168, 88, 89, 286, 45, 285,
- 264, 88, 89, 200, 260, 24, 259, 261, 52, 184,
- 166, 165, 249, 161, 63, 127, 199, 22, 100, 186,
- 158, 173, 82, 122, 65, 124, 125, 126, 63, 128,
- 129, 130, 131, 20, 218, 82, 24, 90, 107, 171,
- 17, 82, 63, 36, 45, 35, 64, 25, 63, 174,
- 159, 136, 63, 93, 22, 171, 228, 63, 263, 45,
- 220, 185, 22, 270, 68, 202, 163, 144, 142, 175,
- 113, 103, 112, 111, 6, 252, 101, 227, 183, 167,
- 39, 102, 183, 183, 183, 160, 26, 157, 110, 133,
- 20, 86, 82, 24, 11, 140, 97, 17, 121, 24,
- 36, 46, 35, 98, 25, 48, 36, 41, 35, 164,
- 25, 108, 82, 194, 82, 159, 45, 141, 91, 77,
- 257, 71, 148, 22, 149, 68, 258, 182, 100, 10,
- 27, 159, 5, 1, 219, 50, 118, 12, 221, 222,
- 224, 120, 20, 0, 203, 24, 189, 0, 97, 17,
- 0, 24, 36, 0, 35, 98, 25, 0, 36, 0,
- 35, 0, 233, 0, 0, 197, 193, 198, 45, 236,
- 0, 240, 241, 242, 187, 188, 0, 190, 0, 0,
- 22, 217, 0, 0, 0, 192, 0, 0, 0, 183,
- 0, 0, 0, 0, 0, 135, 30, 23, 4, 4,
- 33, 34, 0, 0, 0, 201, 0, 213, 265, 0,
- 83, 0, 80, 81, 72, 73, 74, 75, 76, 84,
- 85, 78, 79, 0, 18, 0, 169, 23, 31, 32,
- 28, 29, 22, 82, 276, 253, 20, 159, 255, 24,
- 169, 232, 82, 17, 0, 0, 36, 0, 35, 159,
- 25, 0, 0, 273, 169, 267, 67, 30, 23, 82,
- 274, 33, 34, 59, 169, 60, 82, 82, 82, 204,
- 0, 83, 19, 80, 81, 72, 73, 74, 75, 76,
- 84, 85, 78, 79, 0, 18, 0, 279, 0, 31,
- 32, 28, 29, 95, 0, 0, 0, 0, 0, 289,
- 103, 0, 0, 0, 0, 101, 104, 0, 105, 0,
- 102, 0, 0, 67, 30, 23, 0, 0, 33, 34,
- 30, 23, 0, 0, 33, 34, 0, 53, 83, 0,
- 80, 81, 72, 73, 74, 75, 76, 84, 85, 78,
- 79, 0, 18, 0, 0, 0, 31, 32, 28, 29,
- 154, 0, 31, 32, 28, 29, 0, 100, 0, 0,
- 0, 0, 0, 0, 0, 211, 30, 23, 0, 0,
- 33, 34, 30, 23, 0, 0, 33, 34, 0, 0,
+ 62, 216, 107, 13, 87, 229, 13, 88, 89, 166,
+ 165, 123, 240, 297, 4, 82, 45, 37, 91, 45,
+ 292, 265, 291, 264, 204, 88, 89, 187, 88, 89,
+ 270, 24, 171, 266, 254, 155, 63, 161, 24, 127,
+ 176, 65, 98, 168, 169, 36, 52, 35, 157, 25,
+ 63, 174, 82, 122, 63, 124, 125, 126, 22, 128,
+ 129, 130, 131, 93, 136, 82, 189, 203, 66, 63,
+ 100, 82, 103, 222, 107, 64, 177, 101, 63, 158,
+ 158, 45, 102, 22, 103, 276, 206, 63, 232, 101,
+ 104, 174, 105, 269, 102, 224, 188, 163, 16, 178,
+ 144, 142, 113, 112, 6, 11, 133, 26, 186, 111,
+ 39, 257, 140, 186, 186, 170, 48, 94, 231, 99,
+ 160, 110, 82, 109, 86, 46, 41, 121, 159, 100,
+ 114, 115, 108, 134, 99, 99, 167, 164, 77, 262,
+ 71, 100, 82, 198, 82, 158, 141, 118, 91, 148,
+ 149, 263, 120, 10, 27, 20, 5, 1, 24, 50,
+ 12, 158, 17, 192, 223, 36, 0, 35, 0, 25,
+ 0, 225, 226, 228, 0, 0, 152, 0, 0, 0,
+ 191, 45, 207, 197, 0, 0, 0, 193, 0, 186,
+ 134, 0, 0, 201, 0, 237, 99, 241, 195, 99,
+ 99, 99, 99, 99, 99, 245, 246, 247, 0, 221,
+ 134, 0, 199, 0, 0, 0, 190, 217, 0, 0,
+ 0, 0, 94, 186, 4, 20, 205, 4, 24, 0,
+ 0, 0, 17, 0, 90, 36, 0, 35, 0, 25,
+ 0, 0, 0, 271, 0, 22, 0, 68, 172, 152,
+ 0, 45, 94, 23, 99, 233, 0, 236, 258, 30,
+ 23, 0, 280, 33, 34, 213, 172, 0, 211, 82,
+ 282, 0, 20, 158, 0, 24, 99, 0, 82, 17,
+ 0, 172, 36, 0, 35, 158, 25, 153, 212, 209,
+ 172, 31, 32, 28, 29, 82, 279, 0, 45, 208,
+ 0, 0, 82, 82, 82, 0, 152, 152, 152, 152,
+ 152, 273, 152, 152, 152, 22, 20, 68, 0, 24,
+ 0, 285, 155, 17, 185, 24, 36, 0, 35, 98,
+ 25, 0, 36, 295, 35, 0, 25, 281, 0, 0,
+ 0, 0, 45, 0, 0, 0, 289, 152, 0, 152,
+ 152, 152, 152, 0, 152, 152, 152, 0, 0, 0,
+ 0, 0, 22, 299, 202, 0, 0, 0, 0, 0,
+ 304, 305, 306, 152, 152, 0, 0, 0, 135, 30,
+ 23, 0, 0, 33, 34, 0, 152, 0, 0, 0,
+ 0, 0, 0, 83, 0, 80, 81, 72, 73, 74,
+ 75, 76, 84, 85, 78, 79, 22, 18, 0, 0,
+ 0, 31, 32, 28, 29, 20, 0, 0, 24, 0,
+ 215, 0, 17, 0, 0, 36, 0, 35, 0, 25,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 260,
+ 0, 45, 59, 0, 60, 61, 0, 0, 67, 30,
+ 23, 0, 0, 33, 34, 0, 0, 0, 0, 0,
+ 0, 0, 0, 83, 0, 80, 81, 72, 73, 74,
+ 75, 76, 84, 85, 78, 79, 0, 18, 0, 0,
+ 0, 31, 32, 28, 29, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 67, 30, 23, 150, 0,
+ 33, 34, 21, 0, 0, 0, 53, 0, 0, 0,
83, 0, 80, 81, 72, 73, 74, 75, 76, 84,
- 85, 78, 79, 16, 18, 0, 0, 0, 31, 32,
- 28, 29, 0, 0, 31, 32, 28, 29, 0, 0,
- 0, 0, 94, 0, 99, 0, 0, 0, 109, 0,
- 0, 0, 0, 154, 20, 114, 115, 24, 214, 99,
- 99, 17, 0, 0, 36, 0, 35, 0, 25, 0,
- 0, 0, 0, 150, 0, 0, 0, 0, 0, 0,
- 45, 59, 0, 60, 61, 0, 0, 0, 0, 0,
- 20, 23, 0, 24, 33, 34, 58, 17, 171, 0,
- 36, 152, 35, 0, 25, 0, 154, 154, 154, 154,
- 154, 0, 154, 154, 154, 0, 0, 59, 0, 60,
- 61, 99, 31, 32, 99, 99, 99, 99, 99, 99,
- 20, 0, 0, 24, 0, 0, 0, 17, 0, 0,
- 36, 0, 35, 0, 25, 53, 154, 94, 154, 154,
- 154, 154, 0, 154, 154, 154, 45, 59, 0, 60,
- 61, 0, 0, 0, 0, 0, 20, 0, 0, 24,
- 0, 154, 154, 17, 152, 0, 36, 94, 35, 99,
- 25, 53, 0, 0, 154, 0, 0, 0, 0, 0,
- 0, 191, 0, 59, 0, 60, 61, 0, 99, 243,
- 245, 246, 247, 248, 0, 250, 251, 70, 0, 0,
- 14, 0, 20, 14, 0, 24, 0, 0, 14, 17,
- 51, 53, 36, 63, 35, 0, 25, 152, 152, 152,
- 152, 152, 0, 152, 152, 152, 0, 0, 0, 59,
- 0, 60, 61, 0, 0, 0, 0, 0, 269, 14,
- 0, 0, 0, 0, 14, 0, 0, 53, 0, 0,
- 0, 0, 0, 0, 281, 0, 0, 152, 0, 152,
- 152, 152, 152, 0, 152, 152, 152, 0, 30, 23,
- 0, 0, 33, 34, 58, 0, 0, 56, 4, 0,
- 0, 0, 152, 152, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 53, 0, 152, 18, 57, 54, 55,
- 31, 32, 28, 29, 30, 23, 0, 0, 33, 34,
- 58, 0, 0, 56, 0, 0, 155, 0, 0, 24,
- 0, 0, 0, 98, 0, 0, 36, 0, 35, 0,
- 25, 0, 18, 57, 54, 55, 31, 32, 28, 29,
- 0, 268, 0, 0, 30, 23, 210, 0, 33, 34,
- 58, 0, 155, 56, 0, 24, 0, 0, 0, 98,
- 0, 0, 36, 0, 35, 0, 25, 0, 0, 0,
- 0, 0, 18, 57, 54, 55, 31, 32, 28, 29,
- 30, 23, 210, 0, 33, 34, 58, 0, 0, 56,
- 20, 0, 0, 24, 0, 0, 0, 17, 0, 0,
- 36, 0, 35, 0, 25, 0, 0, 0, 18, 57,
- 54, 55, 31, 32, 28, 29, 0, 59, 0, 60,
- 61, 0, 0, 0, 0, 0, 30, 23, 21, 0,
- 33, 34, 58, 0, 0, 56, 0, 0, 155, 0,
- 0, 24, 0, 0, 0, 98, 0, 0, 36, 96,
- 35, 0, 25, 0, 18, 57, 54, 55, 31, 32,
- 28, 29, 0, 20, 116, 117, 24, 0, 0, 0,
+ 85, 78, 79, 96, 18, 0, 0, 0, 31, 32,
+ 28, 29, 0, 0, 0, 0, 0, 0, 116, 117,
+ 30, 23, 0, 0, 33, 34, 2, 23, 0, 0,
+ 33, 34, 38, 0, 83, 0, 80, 81, 72, 73,
+ 74, 75, 76, 84, 85, 78, 79, 0, 18, 106,
+ 0, 0, 31, 32, 28, 29, 0, 0, 31, 32,
+ 156, 0, 0, 0, 0, 20, 0, 0, 24, 0,
+ 0, 119, 17, 0, 0, 36, 0, 35, 0, 25,
+ 96, 0, 0, 179, 180, 181, 182, 183, 184, 0,
+ 132, 0, 59, 0, 60, 138, 139, 0, 0, 0,
+ 143, 0, 0, 0, 0, 0, 0, 0, 248, 250,
+ 251, 252, 253, 0, 255, 256, 0, 0, 0, 30,
+ 23, 0, 0, 33, 34, 58, 0, 0, 56, 4,
+ 0, 0, 20, 156, 0, 24, 0, 0, 96, 17,
+ 174, 0, 36, 0, 35, 0, 25, 18, 57, 54,
+ 55, 31, 32, 28, 29, 0, 53, 0, 275, 59,
+ 230, 60, 61, 196, 0, 0, 70, 0, 0, 14,
+ 0, 0, 14, 20, 0, 287, 24, 14, 0, 51,
17, 0, 0, 36, 0, 35, 0, 25, 0, 0,
- 0, 53, 0, 0, 0, 0, 0, 0, 0, 0,
- 59, 0, 60, 0, 0, 0, 0, 0, 0, 0,
- 155, 0, 0, 24, 0, 0, 156, 98, 20, 0,
- 36, 24, 35, 0, 25, 17, 0, 0, 36, 0,
- 35, 0, 25, 0, 0, 0, 96, 0, 0, 176,
- 177, 178, 179, 180, 181, 59, 0, 60, 0, 0,
- 30, 23, 0, 0, 33, 34, 209, 0, 0, 207,
- 0, 0, 0, 20, 53, 0, 24, 0, 0, 0,
- 17, 0, 0, 36, 0, 35, 0, 25, 153, 208,
- 205, 206, 31, 32, 28, 29, 30, 23, 0, 156,
- 33, 34, 209, 20, 96, 207, 24, 0, 0, 0,
- 17, 0, 0, 36, 0, 35, 0, 25, 0, 53,
- 0, 0, 0, 226, 153, 208, 205, 206, 31, 32,
- 28, 29, 0, 0, 30, 23, 0, 0, 33, 34,
- 58, 0, 0, 56, 0, 0, 0, 0, 0, 0,
- 0, 0, 156, 156, 156, 156, 156, 0, 156, 156,
- 156, 0, 18, 57, 54, 55, 31, 32, 28, 29,
- 20, 0, 0, 24, 0, 0, 0, 17, 0, 0,
- 36, 0, 35, 23, 25, 0, 33, 34, 209, 0,
- 0, 0, 156, 22, 156, 156, 156, 156, 0, 156,
- 156, 156, 0, 0, 0, 0, 0, 30, 23, 0,
- 0, 33, 34, 58, 31, 32, 56, 156, 156, 155,
- 0, 0, 24, 0, 0, 0, 98, 0, 0, 36,
- 156, 35, 0, 25, 0, 18, 57, 54, 0, 31,
- 32, 28, 29, 0, 0, 23, 0, 0, 33, 34,
+ 156, 156, 156, 156, 156, 0, 156, 156, 156, 45,
+ 59, 0, 60, 61, 0, 0, 0, 0, 14, 0,
+ 0, 0, 0, 14, 0, 0, 0, 0, 20, 0,
+ 0, 24, 238, 53, 0, 17, 0, 0, 36, 0,
+ 35, 156, 25, 156, 156, 156, 156, 0, 156, 156,
+ 156, 0, 0, 194, 0, 59, 0, 60, 61, 0,
+ 0, 0, 0, 0, 0, 0, 0, 156, 156, 0,
+ 0, 0, 20, 0, 53, 24, 268, 0, 0, 17,
+ 156, 0, 36, 63, 35, 0, 25, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 277, 59,
+ 23, 60, 61, 33, 34, 58, 0, 0, 0, 0,
+ 0, 20, 0, 0, 24, 290, 0, 0, 17, 53,
+ 294, 36, 0, 35, 0, 25, 0, 0, 0, 0,
+ 298, 31, 32, 301, 302, 0, 0, 303, 59, 0,
+ 60, 61, 0, 0, 0, 0, 0, 0, 155, 0,
+ 0, 24, 0, 0, 0, 98, 0, 0, 36, 0,
+ 35, 0, 25, 53, 0, 0, 30, 23, 0, 0,
+ 33, 34, 58, 274, 0, 56, 0, 0, 214, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 18, 57, 54, 55, 31, 32,
+ 28, 29, 53, 0, 0, 0, 0, 30, 23, 0,
+ 0, 33, 34, 58, 0, 0, 56, 20, 0, 0,
+ 24, 0, 0, 0, 17, 0, 0, 36, 0, 35,
+ 0, 25, 0, 0, 0, 18, 57, 54, 55, 31,
+ 32, 28, 29, 0, 59, 0, 60, 0, 0, 0,
0, 0, 30, 23, 0, 0, 33, 34, 58, 155,
- 22, 56, 24, 0, 0, 0, 98, 0, 0, 36,
- 0, 35, 2, 25, 0, 0, 31, 32, 38, 0,
+ 0, 56, 24, 0, 0, 155, 98, 0, 24, 36,
+ 0, 35, 98, 25, 0, 36, 0, 35, 0, 25,
+ 18, 57, 54, 55, 31, 32, 28, 29, 0, 214,
+ 0, 0, 0, 19, 0, 0, 30, 23, 0, 0,
+ 33, 34, 58, 20, 0, 56, 24, 0, 53, 0,
+ 17, 0, 0, 36, 95, 35, 0, 25, 0, 0,
+ 0, 0, 0, 0, 18, 57, 54, 55, 31, 32,
+ 28, 29, 0, 0, 0, 30, 23, 0, 0, 33,
+ 34, 58, 20, 0, 56, 24, 0, 0, 0, 17,
+ 0, 0, 36, 0, 35, 0, 25, 0, 0, 0,
+ 0, 0, 0, 18, 57, 54, 55, 31, 32, 28,
+ 29, 154, 30, 23, 0, 0, 33, 34, 213, 0,
+ 0, 211, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 22, 0, 0, 0, 0, 0, 0,
+ 153, 212, 209, 210, 31, 32, 28, 29, 20, 0,
+ 0, 24, 0, 0, 0, 17, 0, 0, 36, 0,
+ 35, 0, 25, 0, 155, 0, 0, 24, 0, 0,
+ 0, 98, 22, 0, 36, 59, 35, 60, 25, 0,
+ 0, 30, 23, 0, 154, 33, 34, 58, 0, 218,
+ 56, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 18,
+ 57, 54, 0, 31, 32, 28, 29, 0, 0, 0,
+ 0, 0, 0, 30, 23, 0, 0, 33, 34, 213,
+ 23, 0, 211, 33, 34, 213, 0, 0, 0, 53,
+ 0, 154, 154, 154, 154, 154, 0, 154, 154, 154,
+ 0, 153, 212, 209, 210, 31, 32, 28, 29, 0,
+ 0, 31, 32, 0, 0, 0, 40, 30, 23, 0,
+ 0, 33, 34, 0, 0, 0, 0, 4, 0, 8,
+ 9, 0, 154, 0, 154, 154, 154, 154, 0, 154,
+ 154, 154, 0, 0, 15, 18, 0, 0, 0, 31,
+ 32, 28, 29, 0, 0, 7, 30, 23, 154, 154,
+ 33, 34, 0, 0, 0, 0, 0, 20, 8, 9,
+ 24, 154, 0, 0, 17, 0, 0, 36, 0, 35,
+ 0, 25, 0, 15, 18, 0, 0, 0, 31, 32,
+ 28, 29, 20, 45, 0, 24, 0, 0, 0, 17,
+ 0, 0, 36, 0, 35, 155, 25, 0, 24, 0,
+ 0, 0, 98, 0, 0, 36, 0, 35, 0, 25,
+ 0, 0, 30, 23, 0, 0, 33, 34, 58, 0,
+ 0, 56, 0, 0, 0, 0, 0, 0, 30, 23,
+ 0, 0, 33, 34, 213, 0, 0, 211, 0, 0,
18, 57, 0, 0, 31, 32, 28, 29, 0, 0,
- 0, 0, 0, 0, 0, 106, 20, 0, 23, 24,
- 0, 33, 34, 17, 0, 0, 36, 0, 35, 0,
- 25, 0, 0, 0, 0, 0, 0, 119, 0, 0,
- 0, 0, 45, 0, 0, 0, 40, 30, 23, 31,
- 32, 33, 34, 0, 0, 0, 132, 4, 0, 8,
- 9, 138, 139, 0, 0, 0, 143, 0, 0, 0,
- 0, 0, 0, 0, 15, 18, 0, 0, 0, 31,
- 32, 28, 29, 20, 0, 0, 24, 0, 0, 0,
- 17, 155, 0, 36, 24, 35, 0, 25, 98, 0,
- 0, 36, 0, 35, 0, 25, 0, 0, 0, 0,
- 0, 66, 0, 7, 30, 23, 0, 0, 33, 34,
- 0, 0, 0, 0, 0, 0, 8, 9, 0, 0,
+ 0, 0, 0, 0, 0, 0, 153, 212, 0, 0,
+ 31, 32, 28, 29, 20, 0, 0, 24, 0, 0,
+ 175, 17, 155, 0, 36, 24, 35, 0, 25, 147,
+ 0, 0, 36, 0, 35, 0, 25, 0, 0, 0,
+ 20, 0, 0, 24, 0, 0, 0, 17, 0, 0,
+ 36, 0, 35, 0, 25, 0, 0, 0, 0, 0,
+ 0, 155, 0, 0, 24, 0, 0, 0, 98, 0,
+ 0, 36, 0, 35, 0, 25, 20, 0, 0, 24,
+ 0, 0, 0, 17, 0, 0, 36, 0, 35, 0,
+ 25, 0, 0, 0, 0, 0, 0, 0, 97, 0,
+ 0, 24, 0, 0, 0, 98, 0, 0, 36, 0,
+ 35, 173, 25, 97, 0, 0, 24, 0, 0, 0,
+ 98, 0, 0, 36, 0, 35, 0, 0, 0, 0,
+ 0, 30, 23, 0, 20, 33, 34, 24, 0, 0,
+ 0, 17, 0, 0, 36, 0, 35, 0, 25, 0,
+ 0, 0, 0, 0, 0, 92, 30, 23, 0, 18,
+ 33, 34, 0, 31, 32, 28, 29, 0, 249, 30,
+ 23, 0, 0, 33, 34, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 18, 0, 0, 0, 31, 32,
+ 28, 29, 0, 0, 0, 0, 0, 153, 0, 0,
+ 0, 31, 32, 28, 29, 0, 0, 0, 0, 0,
+ 234, 0, 235, 0, 0, 0, 0, 0, 0, 239,
+ 0, 0, 0, 243, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 227, 30, 23,
+ 0, 0, 33, 34, 261, 151, 30, 23, 69, 0,
+ 33, 34, 0, 0, 267, 0, 0, 44, 44, 44,
+ 0, 0, 0, 0, 30, 23, 18, 0, 33, 34,
+ 31, 32, 28, 29, 153, 0, 0, 0, 31, 32,
+ 28, 29, 0, 278, 3, 30, 23, 0, 0, 33,
+ 34, 283, 18, 43, 43, 43, 31, 32, 28, 29,
+ 30, 200, 0, 293, 33, 34, 296, 0, 0, 0,
+ 0, 0, 0, 153, 137, 0, 300, 31, 32, 28,
+ 29, 0, 30, 23, 44, 44, 33, 34, 18, 0,
+ 0, 44, 31, 32, 28, 29, 0, 30, 23, 0,
+ 42, 33, 34, 0, 0, 0, 0, 0, 0, 0,
+ 47, 49, 0, 0, 31, 32, 28, 29, 0, 23,
+ 43, 43, 33, 34, 0, 0, 0, 43, 0, 31,
+ 32, 28, 29, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 137, 0, 0, 0, 0, 0, 0, 0,
+ 31, 32, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 44, 0, 44, 0,
+ 0, 0, 0, 0, 0, 0, 145, 146, 0, 0,
+ 0, 0, 0, 162, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 15, 18, 0, 0, 0, 31, 32, 28, 29,
+ 0, 137, 43, 0, 43, 0, 0, 0, 0, 0,
+ 242, 0, 44, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 30, 23, 172, 0, 33, 34, 209,
- 20, 0, 207, 24, 0, 0, 134, 17, 0, 0,
- 36, 0, 35, 0, 25, 0, 0, 0, 0, 0,
- 0, 153, 208, 205, 0, 31, 32, 28, 29, 0,
- 0, 0, 0, 30, 23, 0, 0, 33, 34, 209,
- 155, 0, 207, 24, 0, 0, 0, 147, 0, 0,
- 36, 0, 35, 0, 25, 0, 0, 262, 0, 0,
- 0, 153, 208, 134, 0, 31, 32, 28, 29, 0,
- 30, 23, 0, 0, 33, 34, 0, 20, 0, 271,
- 24, 0, 0, 134, 17, 195, 170, 36, 0, 35,
- 0, 25, 0, 0, 0, 284, 0, 0, 18, 0,
- 288, 0, 31, 32, 28, 29, 0, 0, 0, 0,
- 292, 0, 0, 295, 296, 155, 0, 297, 24, 0,
- 42, 0, 98, 0, 0, 36, 0, 35, 0, 25,
- 47, 49, 0, 0, 0, 0, 92, 30, 23, 0,
- 0, 33, 34, 0, 244, 30, 23, 0, 0, 33,
- 34, 0, 20, 0, 0, 24, 0, 0, 0, 17,
- 0, 0, 36, 0, 35, 18, 25, 0, 0, 31,
- 32, 28, 29, 153, 0, 0, 0, 31, 32, 28,
- 29, 229, 0, 230, 231, 0, 0, 0, 0, 0,
- 234, 0, 0, 0, 238, 0, 145, 146, 0, 0,
- 0, 0, 0, 162, 275, 0, 0, 69, 0, 0,
- 0, 0, 0, 283, 0, 256, 44, 44, 44, 0,
- 0, 0, 0, 223, 30, 23, 0, 0, 33, 34,
- 293, 3, 0, 0, 0, 0, 0, 298, 299, 300,
- 43, 43, 43, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 18, 272, 0, 0, 31, 32, 28, 29,
- 277, 0, 0, 151, 30, 23, 0, 0, 33, 34,
- 0, 0, 287, 137, 0, 290, 0, 0, 215, 216,
- 0, 0, 0, 44, 44, 294, 0, 0, 0, 0,
- 44, 0, 153, 0, 0, 0, 31, 32, 28, 29,
- 0, 30, 23, 0, 0, 33, 34, 43, 43, 0,
- 0, 0, 0, 0, 43, 0, 0, 0, 0, 0,
- 239, 0, 0, 0, 0, 0, 0, 0, 0, 18,
- 0, 0, 0, 31, 32, 28, 29, 0, 254, 30,
- 23, 137, 0, 33, 34, 0, 0, 0, 0, 0,
+ 44, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 43, 0,
+ 0, 272, 0, 0, 0, 0, 0, 0, 219, 0,
+ 220, 0, 0, 0, 0, 0, 43, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 44, 44, 153, 0, 0,
- 0, 31, 32, 28, 29, 0, 30, 196, 0, 0,
- 33, 34, 0, 0, 0, 0, 0, 0, 280, 43,
- 43, 282, 0, 0, 0, 0, 0, 137, 0, 0,
- 0, 0, 0, 0, 18, 237, 0, 44, 31, 32,
- 28, 29, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 44, 0, 0, 0, 0,
- 0, 43, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 266, 0, 0, 0, 43,
+ 284, 0, 44, 0, 0, 44, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 244, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 43, 0,
+ 0, 43, 259, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 278, 0, 44, 0, 0, 44, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 43,
- 0, 0, 43 };
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 286, 0, 0, 288 };
int yypact[]={
- -245, -1000, 1027, -244, -1000, 960, -1000, -1000, -38, -38,
- -39, -1000, -76, 579, 30, -1000, -258, 1220, -1, -1000,
- 93, 293, -245, -23, 145, -1000, -1000, -1000, 63, 62,
- 60, -1, -1, -1000, -1000, 145, 145, -1000, -1000, -1000,
- -1000, -76, -1000, -244, -245, -1000, -76, -1000, -1000, -1000,
- -1000, 87, 1384, -272, 1384, 1384, 1384, -213, 1384, 1384,
- 1384, 1384, 930, -245, -31, 15, -1000, -1000, -245, -245,
- 30, -1000, 58, -245, 57, -38, -38, 1347, -1000, 1384,
- -1000, -215, 421, 56, -1000, -1000, -217, -1000, -1000, -1000,
- 28, 457, -1000, 19, -1000, -1000, -46, 145, 1384, -279,
- 145, 145, 145, 145, 145, 145, -1000, 1220, -1000, -1000,
- -220, 1220, 1220, 1220, -1000, -1000, -46, -46, -1000, -1000,
- -1000, -31, 767, -1, 885, 840, 233, -1000, 930, 930,
- 930, 533, -1000, 15, -1000, -1000, -1000, -1000, -1000, -1000,
- -1000, -31, 1384, 139, 1459, -1000, -1000, 1220, -29, 43,
- 729, -1000, -264, -1, -1000, 93, 293, -38, -38, 767,
- 1384, -27, -1000, 1384, 50, -1000, -1000, 1384, 1307, 1384,
- -278, -1000, -1000, -1000, 145, 457, -46, -46, -46, -46,
- 64, 64, 14, 767, 39, 44, 38, 44, 44, 15,
- -1000, 1384, -1000, -1000, 457, -256, -87, 15, 28, -38,
- 1384, 1384, 1384, 1228, 1422, 1422, 1422, 1422, -216, 1422,
- 1422, 877, -1000, 19, -1000, -1000, -1000, -38, 1220, 457,
- -222, 767, 767, -1000, 767, -221, 293, -1000, -1000, -1000,
- -1000, -1000, -1000, 767, -245, 48, -228, 1153, -29, -1000,
- 767, 767, 767, 729, -1000, 729, 1116, 1076, 815, -1000,
- 877, 693, 1422, -1000, -1000, 0, -245, 44, 34, -1000,
- -1000, -1000, 139, 1384, 44, 497, 1384, -38, 1422, 729,
- -38, 139, -245, -229, -1000, -1000, 457, -245, 1384, 44,
- -1000, 729, -1000, -255, -1000, -1000, -1000, -245, 139, 44,
- -245, -245, -1000, -1000, -245, 139, 139, 139, -1000, -1000,
- -1000 };
+ -253, -1000, 1019, -250, -1000, 980, -1000, -1000, -43, -43,
+ -40, -1000, -65, 749, 192, -1000, -261, 1279, -5, -1000,
+ 1445, 47, -253, -17, 1460, -1000, -1000, -1000, -1000, 63,
+ 62, -5, -5, -1000, -1000, 1460, 1460, -1000, -1000, -1000,
+ -1000, -65, -1000, -250, -253, -1000, -65, -1000, -1000, -1000,
+ -1000, 239, 1387, -274, 1387, 1387, 1387, -219, 1387, 1387,
+ 1387, 1387, 1481, -253, 122, 22, -1000, -1000, -253, -253,
+ 192, -1000, 61, -253, 60, -43, -43, 1369, 1387, 1387,
+ -1000, -221, 382, 57, -1000, -1000, -248, -1000, -1000, -1000,
+ 10, 619, -1000, 16, -1000, -1000, -24, 1460, 1387, -282,
+ 1460, 1460, 1460, 1460, 1460, 1460, -1000, 1279, -1000, -1000,
+ -232, 56, 1279, 1279, -1000, -1000, -24, -24, -1000, -1000,
+ -1000, 122, 788, -5, 1085, 894, 552, -1000, 1481, 1481,
+ 1481, 705, -1000, 22, -1000, -1000, -253, -1000, -1000, -1000,
+ -1000, 122, 1387, 283, 1423, -1000, -1000, 1279, -38, 43,
+ 936, -1000, -264, -5, -1000, 1445, 47, -43, 788, -43,
+ 1387, -18, -1000, 1387, 55, -1000, -1000, -1000, -1000, -1000,
+ 1387, 1361, 1387, -281, -1000, -1000, -1000, 1460, 619, -24,
+ -24, -24, -24, 35, 35, 25, 788, 41, 1279, 50,
+ 34, 50, 22, -1000, 1387, -253, -1000, -1000, 619, -263,
+ -89, 22, 10, -43, 1387, 1387, 1387, 1292, 1408, 1408,
+ 1408, 1408, -224, 1408, 1408, 289, -1000, 16, -1000, -1000,
+ -1000, -43, 1279, 619, -235, 788, 788, -1000, 788, -225,
+ 47, -1000, -1000, 50, -1000, -1000, -1000, 788, -1000, -253,
+ 53, -228, 1254, -38, -1000, 788, 788, 788, 936, -1000,
+ 936, 1101, 2, 942, -1000, 289, 825, 1408, -1000, -1000,
+ -8, -253, 50, 6, -1000, -1000, -1000, -1000, 283, 1387,
+ 50, 660, 1387, -43, 1408, 936, -43, 283, -253, -236,
+ -1000, -1000, 619, -253, 1387, 50, -1000, 936, -1000, -259,
+ -1000, -1000, -1000, -253, 283, 50, -253, -253, -1000, -1000,
+ -253, 283, 283, 283, -1000, -1000, -1000 };
int yypgo[]={
- 0, 167, 165, 395, 0, 163, 162, 104, 838, 116,
- 160, 159, 124, 423, 156, 154, 152, 49, 91, 76,
- 1281, 151, 150, 50, 83, 302, 51, 46, 149, 139,
- 1162, 137, 131, 1460, 121, 1335, 607, 54, 81, 24,
- 118, 1571, 1547, 117, 115, 109, 105 };
+ 0, 160, 159, 420, 0, 157, 156, 104, 502, 107,
+ 154, 153, 105, 98, 151, 150, 149, 216, 66, 75,
+ 68, 140, 139, 48, 63, 1003, 40, 67, 138, 137,
+ 136, 546, 126, 125, 1720, 124, 1400, 686, 41, 64,
+ 32, 121, 1664, 1628, 120, 115, 111, 109 };
int yyr1[]={
- 0, 5, 6, 6, 6, 6, 31, 7, 32, 7,
- 7, 7, 7, 7, 7, 7, 29, 29, 34, 1,
- 2, 11, 11, 40, 25, 12, 12, 19, 19, 19,
- 19, 33, 33, 20, 20, 20, 20, 20, 20, 20,
- 20, 20, 20, 20, 20, 20, 43, 20, 20, 44,
- 20, 20, 20, 28, 28, 21, 21, 41, 41, 30,
- 30, 26, 26, 27, 27, 27, 27, 22, 22, 14,
- 14, 14, 14, 14, 23, 23, 16, 16, 15, 15,
- 15, 15, 15, 15, 18, 18, 17, 17, 17, 17,
- 17, 17, 45, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 46,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 8, 8, 8, 8, 8, 8, 8, 8,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 10, 10, 10, 24, 24, 13, 13, 13,
- 13, 36, 37, 35, 38, 38, 42, 39 };
+ 0, 5, 6, 6, 6, 6, 32, 7, 33, 7,
+ 7, 7, 7, 7, 7, 7, 29, 29, 29, 30,
+ 30, 35, 1, 2, 11, 11, 41, 25, 12, 12,
+ 19, 19, 19, 19, 34, 34, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 44, 20, 20, 20, 28, 28, 21, 21, 42,
+ 42, 31, 31, 26, 26, 27, 27, 27, 27, 22,
+ 22, 14, 14, 14, 14, 14, 23, 23, 16, 16,
+ 15, 15, 15, 15, 15, 15, 18, 18, 17, 17,
+ 17, 17, 17, 17, 45, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 46, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 8, 8, 8, 8, 8, 8,
+ 8, 8, 9, 9, 47, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 10, 10, 10, 24, 24,
+ 13, 13, 13, 13, 37, 38, 36, 39, 39, 43,
+ 40 };
int yyr2[]={
0, 7, 3, 5, 3, 5, 1, 7, 1, 7,
- 5, 5, 5, 3, 5, 5, 3, 3, 1, 15,
- 9, 3, 7, 1, 9, 9, 7, 3, 5, 3,
- 5, 2, 4, 5, 5, 7, 3, 13, 17, 17,
- 21, 19, 5, 5, 13, 9, 1, 7, 7, 1,
- 9, 13, 5, 3, 3, 13, 19, 3, 4, 0,
- 2, 1, 5, 1, 5, 5, 5, 1, 3, 3,
- 7, 3, 5, 7, 1, 3, 1, 3, 3, 7,
- 3, 5, 7, 7, 1, 3, 3, 7, 3, 5,
- 7, 7, 1, 9, 11, 9, 7, 7, 7, 7,
- 3, 5, 7, 7, 7, 7, 11, 3, 5, 1,
- 9, 7, 7, 7, 3, 5, 7, 7, 7, 11,
- 3, 5, 2, 2, 7, 7, 7, 7, 7, 7,
- 5, 7, 9, 9, 3, 9, 5, 5, 3, 3,
- 5, 5, 5, 5, 2, 1, 3, 3, 9, 5,
- 5, 4, 5, 3, 0, 2, 3, 5 };
+ 5, 5, 5, 3, 5, 5, 3, 3, 3, 2,
+ 2, 1, 15, 9, 3, 7, 1, 9, 11, 9,
+ 3, 5, 3, 5, 2, 4, 5, 5, 7, 3,
+ 13, 17, 17, 21, 19, 5, 5, 13, 9, 7,
+ 7, 1, 9, 13, 5, 3, 3, 13, 19, 3,
+ 4, 0, 2, 1, 5, 1, 5, 5, 5, 1,
+ 3, 3, 7, 3, 5, 7, 1, 3, 1, 3,
+ 3, 7, 3, 5, 7, 7, 1, 3, 3, 7,
+ 3, 5, 7, 7, 1, 9, 11, 9, 7, 7,
+ 7, 7, 3, 5, 7, 7, 7, 7, 11, 3,
+ 5, 1, 9, 7, 7, 7, 3, 5, 7, 7,
+ 7, 11, 3, 5, 2, 2, 7, 7, 7, 7,
+ 7, 7, 5, 7, 1, 11, 9, 3, 9, 5,
+ 5, 3, 3, 5, 5, 5, 5, 2, 1, 3,
+ 3, 9, 5, 5, 4, 5, 3, 0, 2, 3,
+ 5 };
int yychk[]={
- -1000, -5, -30, -41, 267, -6, -7, 256, 269, 270,
- -11, -12, -1, -4, -36, 284, -13, 40, 285, -25,
+ -1000, -5, -31, -42, 267, -6, -7, 256, 269, 270,
+ -11, -12, -1, -4, -37, 284, -13, 40, 285, -25,
33, -8, 123, 258, 36, 47, -9, -10, 291, 292,
- 257, 289, 290, 261, 262, 45, 43, 267, -30, -7,
- 256, -31, -33, -41, -42, 59, -32, -33, -12, -33,
- -2, -36, -39, 124, 287, 288, 266, 286, 263, 60,
- 62, 63, -4, 44, -19, -37, -20, 256, 125, -42,
- -36, -21, 275, 276, 277, 278, 279, -28, 282, 283,
- 273, 274, -4, 271, 280, 281, -34, 265, 289, 290,
+ 257, 289, 290, 261, 262, 45, 43, 267, -31, -7,
+ 256, -32, -34, -42, -43, 59, -33, -34, -12, -34,
+ -2, -37, -40, 124, 287, 288, 266, 286, 263, 60,
+ 62, 63, -4, 44, -19, -38, -20, 256, 125, -43,
+ -37, -21, 275, 276, 277, 278, 279, -28, 282, 283,
+ 273, 274, -4, 271, 280, 281, -35, 265, 289, 290,
-17, -4, 256, -24, -13, -25, -8, 33, 40, -13,
- 94, 42, 47, 37, 43, 45, -30, 91, -9, -13,
- -40, 40, 40, 40, -13, -13, -8, -8, -12, -30,
+ 94, 42, 47, 37, 43, 45, -31, 91, -9, -13,
+ -41, -47, 40, 40, -13, -13, -8, -8, -12, -31,
-12, -19, -4, 285, -4, -4, -4, 258, -4, -4,
- -4, -4, -30, -37, -20, 256, -38, -42, -30, -30,
- -37, -19, 40, -30, 40, -33, -33, 40, -16, -15,
- -3, 256, -13, 285, -25, 33, -8, -43, -23, -4,
- -44, 258, -33, 40, -29, 258, 257, -45, -39, 256,
- -35, 41, -35, -26, 60, -4, -8, -8, -8, -8,
- -8, -8, -17, -4, 259, -18, -17, -18, -18, -37,
- -24, 58, -38, -37, -4, -20, 258, -23, -17, -27,
- 62, 264, 124, -39, 256, 287, 288, 266, 286, 263,
- 63, -3, 265, -24, -25, -33, -33, -23, 91, -4,
- 40, -4, -4, 256, -4, 286, -8, 93, 47, -35,
- -35, -35, -38, -4, -35, 275, 286, -42, -35, -33,
- -4, -4, -4, -3, 256, -3, -3, -3, -3, 258,
- -3, -3, -46, -26, -33, -17, -35, -22, -14, 258,
- 256, 258, -30, 40, 258, -4, -42, -27, 58, -3,
- 93, -30, -35, -39, 256, -20, -4, -35, -42, -23,
- -33, -3, -33, -20, -30, 258, 256, -35, -30, -23,
- -35, 272, -30, -20, -35, -30, -30, -30, -20, -20,
- -20 };
+ -4, -4, -31, -38, -20, 256, -39, -43, -31, -31,
+ -38, -19, 40, -31, 40, -34, -34, 40, -16, -15,
+ -3, 256, -13, 285, -25, 33, -8, -23, -4, -23,
+ -44, 258, -34, 40, -29, 258, 257, -30, 291, 292,
+ -45, -40, 256, -36, 41, -36, -26, 60, -4, -8,
+ -8, -8, -8, -8, -8, -17, -4, 259, 40, -18,
+ -17, -18, -38, -24, 58, -39, -31, -38, -4, -20,
+ 258, -23, -17, -27, 62, 264, 124, -40, 256, 287,
+ 288, 266, 286, 263, 63, -3, 265, -24, -25, -34,
+ -34, -23, 91, -4, 40, -4, -4, 256, -4, 286,
+ -8, 93, 47, -18, -36, -36, -39, -4, -31, -36,
+ 275, 286, -43, -36, -34, -4, -4, -4, -3, 256,
+ -3, -3, -3, -3, 258, -3, -3, -46, -26, -34,
+ -17, -36, -22, -14, 258, 256, 258, -36, -31, 40,
+ 258, -4, -43, -27, 58, -3, 93, -31, -36, -40,
+ 256, -20, -4, -36, -43, -23, -34, -3, -34, -20,
+ -31, 258, 256, -36, -31, -23, -36, 272, -31, -20,
+ -36, -31, -31, -31, -20, -20, -20 };
int yydef[]={
- 59, -2, 0, 60, 57, -2, 2, 4, 6, 8,
- 0, 13, 0, 21, 0, 18, 144, 0, 145, 100,
- 0, 107, 59, 147, 0, 23, 122, 123, 0, 134,
- 0, 0, 0, 138, 139, 0, 0, 58, 1, 3,
- 5, 0, 10, 31, 59, 156, 0, 11, 12, 14,
+ 61, -2, 0, 62, 59, -2, 2, 4, 6, 8,
+ 0, 13, 0, 24, 0, 21, 147, 0, 148, 102,
+ 0, 109, 61, 150, 0, 26, 124, 125, 134, 137,
+ 0, 0, 0, 141, 142, 0, 0, 60, 1, 3,
+ 5, 0, 10, 34, 61, 159, 0, 11, 12, 14,
15, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 108, 59, 0, 154, 27, 29, 59, 59,
- 0, 36, 0, 59, 0, 0, 0, -2, 46, 74,
- 49, 0, 0, 0, 53, 54, 0, 92, 142, 143,
- 0, 86, 88, 61, 146, 101, 130, 0, 0, 144,
- 0, 0, 0, 0, 0, 0, 151, 0, 149, 150,
- 0, -2, -2, -2, 136, 137, 140, 141, 7, 32,
- 9, 0, 22, 145, 97, 98, -2, 102, -2, -2,
- -2, 0, 157, 154, 28, 30, 26, 155, 152, 33,
- 34, 0, 0, 0, 74, 42, 43, 0, 63, -2,
- 78, 80, 144, 145, 114, 0, 120, 0, 0, 75,
- 74, 0, 52, 0, 0, 16, 17, 0, 0, 89,
- 0, 153, 131, 96, 0, 0, 124, 125, 126, 127,
- 128, 129, 0, 86, 0, 0, -2, 0, 0, 154,
- 95, 0, 25, 35, 0, 0, 147, 0, 0, 0,
- 0, 0, 0, 0, 81, 0, 0, 0, 0, 0,
- 0, 121, 109, 61, 115, 47, 48, 0, 0, 0,
- -2, 93, 87, 91, 90, 0, 62, 148, 24, 132,
- 133, 135, 20, 106, 59, 0, 0, 0, 63, 45,
- 64, 65, 66, 79, 83, 82, 111, 112, -2, 117,
- -2, 0, 0, 113, 50, 0, 59, 0, -2, 69,
- 71, 94, 0, 0, 0, 0, 74, 0, 0, 110,
- 0, 0, 59, 0, 72, 37, 0, 59, 74, 0,
- 44, 119, 51, 55, 19, 70, 73, 59, 0, 0,
- 59, 59, 38, 39, 59, 0, 0, 0, 41, 56,
- 40 };
+ 0, 0, 110, 61, 0, 157, 30, 32, 61, 61,
+ 0, 39, 0, 61, 0, 0, 0, -2, 76, 76,
+ 51, 0, 0, 0, 55, 56, 0, 94, 145, 146,
+ 0, 88, 90, 63, 149, 103, 132, 0, 0, 147,
+ 0, 0, 0, 0, 0, 0, 154, 0, 152, 153,
+ 0, 0, -2, -2, 139, 140, 143, 144, 7, 35,
+ 9, 0, 25, 148, 99, 100, -2, 104, -2, -2,
+ -2, 0, 160, 157, 31, 33, 61, 158, 155, 36,
+ 37, 0, 0, 0, 76, 45, 46, 0, 65, -2,
+ 80, 82, 147, 148, 116, 0, 122, 0, 77, 0,
+ 76, 0, 54, 0, 0, 16, 17, 18, 19, 20,
+ 0, 0, 91, 0, 156, 133, 98, 0, 0, 126,
+ 127, 128, 129, 130, 131, 0, 88, 0, -2, 0,
+ -2, 0, 157, 97, 0, 61, 29, 38, 0, 0,
+ 150, 0, 0, 0, 0, 0, 0, 0, 83, 0,
+ 0, 0, 0, 0, 0, 123, 111, 63, 117, 49,
+ 50, 0, 0, 0, -2, 95, 89, 93, 92, 0,
+ 64, 151, 27, 0, 136, 138, 23, 108, 28, 61,
+ 0, 0, 0, 65, 48, 66, 67, 68, 81, 85,
+ 84, 113, 114, -2, 119, -2, 0, 0, 115, 52,
+ 0, 61, 0, -2, 71, 73, 96, 135, 0, 0,
+ 0, 0, 76, 0, 0, 112, 0, 0, 61, 0,
+ 74, 40, 0, 61, 76, 0, 47, 121, 53, 57,
+ 22, 72, 75, 61, 0, 0, 61, 61, 41, 42,
+ 61, 0, 0, 0, 44, 58, 43 };
typedef struct { char *t_name; int t_val; } yytoktype;
#ifndef YYDEBUG
# define YYDEBUG 0 /* don't allow debugging */
@@ -1623,6 +1647,9 @@ char * yyreds[] =
"rule : function_prologue function_body",
"func_name : NAME",
"func_name : FUNC_CALL",
+ "func_name : lex_builtin",
+ "lex_builtin : LEX_BUILTIN",
+ "lex_builtin : LEX_LENGTH",
"function_prologue : LEX_FUNCTION",
"function_prologue : LEX_FUNCTION func_name '(' opt_param_list r_paren opt_nls",
"function_body : l_brace statements r_brace opt_semi",
@@ -1630,8 +1657,8 @@ char * yyreds[] =
"pattern : exp comma exp",
"regexp : '/'",
"regexp : '/' REGEXP '/'",
- "action : l_brace statements r_brace opt_semi",
- "action : l_brace r_brace opt_semi",
+ "action : l_brace statements r_brace opt_semi opt_nls",
+ "action : l_brace r_brace opt_semi opt_nls",
"statements : statement",
"statements : statements statement",
"statements : error",
@@ -1651,8 +1678,7 @@ char * yyreds[] =
"statement : LEX_CONTINUE statement_term",
"statement : print '(' expression_list r_paren output_redir statement_term",
"statement : print opt_rexpression_list output_redir statement_term",
- "statement : LEX_NEXT",
- "statement : LEX_NEXT statement_term",
+ "statement : LEX_NEXT opt_exp statement_term",
"statement : LEX_EXIT opt_exp statement_term",
"statement : LEX_RETURN",
"statement : LEX_RETURN opt_exp statement_term",
@@ -1737,6 +1763,7 @@ char * yyreds[] =
"simp_exp : simp_exp '-' simp_exp",
"non_post_simp_exp : '!' simp_exp",
"non_post_simp_exp : '(' exp r_paren",
+ "non_post_simp_exp : LEX_BUILTIN",
"non_post_simp_exp : LEX_BUILTIN '(' opt_expression_list r_paren",
"non_post_simp_exp : LEX_LENGTH '(' opt_expression_list r_paren",
"non_post_simp_exp : LEX_LENGTH",
@@ -2206,10 +2233,10 @@ yyparse()
{
case 1:
-# line 135 "awk.y"
+# line 137 "awk.y"
{ expression_value = yypvt[-1].nodeval; } break;
case 2:
-# line 140 "awk.y"
+# line 142 "awk.y"
{
if (yypvt[-0].nodeval != NULL)
yyval.nodeval = yypvt[-0].nodeval;
@@ -2218,7 +2245,7 @@ case 2:
yyerrok;
} break;
case 3:
-# line 149 "awk.y"
+# line 151 "awk.y"
{
if (yypvt[-0].nodeval == NULL)
yyval.nodeval = yypvt[-1].nodeval;
@@ -2234,16 +2261,16 @@ case 3:
yyerrok;
} break;
case 4:
-# line 163 "awk.y"
+# line 165 "awk.y"
{ yyval.nodeval = NULL; } break;
case 5:
-# line 164 "awk.y"
+# line 166 "awk.y"
{ yyval.nodeval = NULL; } break;
case 6:
-# line 168 "awk.y"
+# line 170 "awk.y"
{ io_allowed = 0; } break;
case 7:
-# line 170 "awk.y"
+# line 172 "awk.y"
{
if (begin_block) {
if (begin_block->type != Node_rule_list)
@@ -2259,10 +2286,10 @@ case 7:
yyerrok;
} break;
case 8:
-# line 184 "awk.y"
+# line 186 "awk.y"
{ io_allowed = 0; } break;
case 9:
-# line 186 "awk.y"
+# line 188 "awk.y"
{
if (end_block) {
if (end_block->type != Node_rule_list)
@@ -2278,27 +2305,27 @@ case 9:
yyerrok;
} break;
case 10:
-# line 201 "awk.y"
+# line 203 "awk.y"
{
warning("BEGIN blocks must have an action part");
errcount++;
yyerrok;
} break;
case 11:
-# line 207 "awk.y"
+# line 209 "awk.y"
{
warning("END blocks must have an action part");
errcount++;
yyerrok;
} break;
case 12:
-# line 213 "awk.y"
+# line 215 "awk.y"
{ yyval.nodeval = node (yypvt[-1].nodeval, Node_rule_node, yypvt[-0].nodeval); yyerrok; } break;
case 13:
-# line 215 "awk.y"
+# line 217 "awk.y"
{ yyval.nodeval = node ((NODE *)NULL, Node_rule_node, yypvt[-0].nodeval); yyerrok; } break;
case 14:
-# line 217 "awk.y"
+# line 219 "awk.y"
{
yyval.nodeval = node (yypvt[-1].nodeval,
Node_rule_node,
@@ -2312,46 +2339,54 @@ case 14:
yyerrok;
} break;
case 15:
-# line 230 "awk.y"
+# line 232 "awk.y"
{
func_install(yypvt[-1].nodeval, yypvt[-0].nodeval);
yyval.nodeval = NULL;
yyerrok;
} break;
case 16:
-# line 239 "awk.y"
+# line 241 "awk.y"
{ yyval.sval = yypvt[-0].sval; } break;
case 17:
-# line 241 "awk.y"
+# line 243 "awk.y"
{ yyval.sval = yypvt[-0].sval; } break;
case 18:
-# line 246 "awk.y"
+# line 245 "awk.y"
+{
+ yyerror("%s() is a built-in function, it cannot be redefined",
+ tokstart);
+ errcount++;
+ /* yyerrok; */
+ } break;
+case 21:
+# line 260 "awk.y"
{
param_counter = 0;
} break;
-case 19:
-# line 250 "awk.y"
+case 22:
+# line 264 "awk.y"
{
yyval.nodeval = append_right(make_param(yypvt[-4].sval), yypvt[-2].nodeval);
can_return = 1;
} break;
-case 20:
-# line 258 "awk.y"
+case 23:
+# line 272 "awk.y"
{
yyval.nodeval = yypvt[-2].nodeval;
can_return = 0;
} break;
-case 21:
-# line 267 "awk.y"
+case 24:
+# line 281 "awk.y"
{ yyval.nodeval = yypvt[-0].nodeval; } break;
-case 22:
-# line 269 "awk.y"
+case 25:
+# line 283 "awk.y"
{ yyval.nodeval = mkrangenode ( node(yypvt[-2].nodeval, Node_cond_pair, yypvt[-0].nodeval) ); } break;
-case 23:
-# line 278 "awk.y"
+case 26:
+# line 292 "awk.y"
{ ++want_regexp; } break;
-case 24:
-# line 280 "awk.y"
+case 27:
+# line 294 "awk.y"
{
NODE *n;
@@ -2364,17 +2399,17 @@ case 24:
n->re_cnt = 1;
yyval.nodeval = n;
} break;
-case 25:
-# line 296 "awk.y"
-{ yyval.nodeval = yypvt[-2].nodeval ; } break;
-case 26:
-# line 298 "awk.y"
+case 28:
+# line 310 "awk.y"
+{ yyval.nodeval = yypvt[-3].nodeval ; } break;
+case 29:
+# line 312 "awk.y"
{ yyval.nodeval = NULL; } break;
-case 27:
-# line 303 "awk.y"
+case 30:
+# line 317 "awk.y"
{ yyval.nodeval = yypvt[-0].nodeval; } break;
-case 28:
-# line 305 "awk.y"
+case 31:
+# line 319 "awk.y"
{
if (yypvt[-1].nodeval == NULL || yypvt[-1].nodeval->type != Node_statement_list)
yypvt[-1].nodeval = node(yypvt[-1].nodeval, Node_statement_list,(NODE *)NULL);
@@ -2382,58 +2417,58 @@ case 28:
node( yypvt[-0].nodeval, Node_statement_list, (NODE *)NULL));
yyerrok;
} break;
-case 29:
-# line 313 "awk.y"
-{ yyval.nodeval = NULL; } break;
-case 30:
-# line 315 "awk.y"
+case 32:
+# line 327 "awk.y"
{ yyval.nodeval = NULL; } break;
case 33:
-# line 325 "awk.y"
+# line 329 "awk.y"
{ yyval.nodeval = NULL; } break;
-case 34:
-# line 327 "awk.y"
+case 36:
+# line 339 "awk.y"
{ yyval.nodeval = NULL; } break;
-case 35:
-# line 329 "awk.y"
+case 37:
+# line 341 "awk.y"
+{ yyval.nodeval = NULL; } break;
+case 38:
+# line 343 "awk.y"
{ yyval.nodeval = yypvt[-1].nodeval; } break;
-case 36:
-# line 331 "awk.y"
+case 39:
+# line 345 "awk.y"
{ yyval.nodeval = yypvt[-0].nodeval; } break;
-case 37:
-# line 333 "awk.y"
+case 40:
+# line 347 "awk.y"
{ yyval.nodeval = node (yypvt[-3].nodeval, Node_K_while, yypvt[-0].nodeval); } break;
-case 38:
-# line 335 "awk.y"
+case 41:
+# line 349 "awk.y"
{ yyval.nodeval = node (yypvt[-2].nodeval, Node_K_do, yypvt[-5].nodeval); } break;
-case 39:
-# line 337 "awk.y"
+case 42:
+# line 351 "awk.y"
{
yyval.nodeval = node (yypvt[-0].nodeval, Node_K_arrayfor, make_for_loop(variable(yypvt[-5].sval,1),
(NODE *)NULL, variable(yypvt[-3].sval,1)));
} break;
-case 40:
-# line 342 "awk.y"
+case 43:
+# line 356 "awk.y"
{
yyval.nodeval = node(yypvt[-0].nodeval, Node_K_for, (NODE *)make_for_loop(yypvt[-7].nodeval, yypvt[-5].nodeval, yypvt[-3].nodeval));
} break;
-case 41:
-# line 346 "awk.y"
+case 44:
+# line 360 "awk.y"
{
yyval.nodeval = node (yypvt[-0].nodeval, Node_K_for,
(NODE *)make_for_loop(yypvt[-6].nodeval, (NODE *)NULL, yypvt[-3].nodeval));
} break;
-case 42:
-# line 352 "awk.y"
+case 45:
+# line 366 "awk.y"
{ yyval.nodeval = node ((NODE *)NULL, Node_K_break, (NODE *)NULL); } break;
-case 43:
-# line 355 "awk.y"
+case 46:
+# line 369 "awk.y"
{ yyval.nodeval = node ((NODE *)NULL, Node_K_continue, (NODE *)NULL); } break;
-case 44:
-# line 357 "awk.y"
+case 47:
+# line 371 "awk.y"
{ yyval.nodeval = node (yypvt[-3].nodeval, yypvt[-5].nodetypeval, yypvt[-1].nodeval); } break;
-case 45:
-# line 359 "awk.y"
+case 48:
+# line 373 "awk.y"
{
if (yypvt[-3].nodetypeval == Node_K_print && yypvt[-2].nodeval == NULL)
yypvt[-2].nodeval = node(node(make_number(0.0),
@@ -2444,187 +2479,199 @@ case 45:
yyval.nodeval = node (yypvt[-2].nodeval, yypvt[-3].nodetypeval, yypvt[-1].nodeval);
} break;
-case 46:
-# line 370 "awk.y"
-{ if (! io_allowed) yyerror("next used in BEGIN or END action"); } break;
-case 47:
-# line 372 "awk.y"
-{ yyval.nodeval = node ((NODE *)NULL, Node_K_next, (NODE *)NULL); } break;
-case 48:
-# line 374 "awk.y"
-{ yyval.nodeval = node (yypvt[-1].nodeval, Node_K_exit, (NODE *)NULL); } break;
case 49:
-# line 376 "awk.y"
-{ if (! can_return) yyerror("return used outside function context"); } break;
+# line 384 "awk.y"
+{ NODETYPE type;
+
+ if (! io_allowed) yyerror("next used in BEGIN or END action");
+ if (yypvt[-1].nodeval && yypvt[-1].nodeval == lookup("file")) {
+ if (do_lint)
+ warning("`next file' is a gawk extension");
+ else if (strict || do_posix)
+ yyerror("`next file' is a gawk extension");
+ type = Node_K_nextfile;
+ } else type = Node_K_next;
+ yyval.nodeval = node ((NODE *)NULL, type, (NODE *)NULL);
+ } break;
case 50:
-# line 378 "awk.y"
-{ yyval.nodeval = node (yypvt[-1].nodeval, Node_K_return, (NODE *)NULL); } break;
+# line 397 "awk.y"
+{ yyval.nodeval = node (yypvt[-1].nodeval, Node_K_exit, (NODE *)NULL); } break;
case 51:
-# line 380 "awk.y"
-{ yyval.nodeval = node (variable(yypvt[-4].sval,1), Node_K_delete, yypvt[-2].nodeval); } break;
+# line 399 "awk.y"
+{ if (! can_return) yyerror("return used outside function context"); } break;
case 52:
-# line 382 "awk.y"
-{ yyval.nodeval = yypvt[-1].nodeval; } break;
+# line 401 "awk.y"
+{ yyval.nodeval = node (yypvt[-1].nodeval, Node_K_return, (NODE *)NULL); } break;
case 53:
-# line 387 "awk.y"
-{ yyval.nodetypeval = yypvt[-0].nodetypeval; } break;
+# line 403 "awk.y"
+{ yyval.nodeval = node (variable(yypvt[-4].sval,1), Node_K_delete, yypvt[-2].nodeval); } break;
case 54:
-# line 389 "awk.y"
-{ yyval.nodetypeval = yypvt[-0].nodetypeval; } break;
+# line 405 "awk.y"
+{ yyval.nodeval = yypvt[-1].nodeval; } break;
case 55:
-# line 394 "awk.y"
+# line 410 "awk.y"
+{ yyval.nodetypeval = yypvt[-0].nodetypeval; } break;
+case 56:
+# line 412 "awk.y"
+{ yyval.nodetypeval = yypvt[-0].nodetypeval; } break;
+case 57:
+# line 417 "awk.y"
{
yyval.nodeval = node(yypvt[-3].nodeval, Node_K_if,
node(yypvt[-0].nodeval, Node_if_branches, (NODE *)NULL));
} break;
-case 56:
-# line 400 "awk.y"
+case 58:
+# line 423 "awk.y"
{ yyval.nodeval = node (yypvt[-6].nodeval, Node_K_if,
node (yypvt[-3].nodeval, Node_if_branches, yypvt[-0].nodeval)); } break;
-case 57:
-# line 406 "awk.y"
+case 59:
+# line 429 "awk.y"
{ want_assign = 0; } break;
-case 61:
-# line 417 "awk.y"
-{ yyval.nodeval = NULL; } break;
-case 62:
-# line 419 "awk.y"
-{ yyval.nodeval = node (yypvt[-0].nodeval, Node_redirect_input, (NODE *)NULL); } break;
case 63:
-# line 424 "awk.y"
+# line 440 "awk.y"
{ yyval.nodeval = NULL; } break;
case 64:
-# line 426 "awk.y"
-{ yyval.nodeval = node (yypvt[-0].nodeval, Node_redirect_output, (NODE *)NULL); } break;
+# line 442 "awk.y"
+{ yyval.nodeval = node (yypvt[-0].nodeval, Node_redirect_input, (NODE *)NULL); } break;
case 65:
-# line 428 "awk.y"
-{ yyval.nodeval = node (yypvt[-0].nodeval, Node_redirect_append, (NODE *)NULL); } break;
+# line 447 "awk.y"
+{ yyval.nodeval = NULL; } break;
case 66:
-# line 430 "awk.y"
-{ yyval.nodeval = node (yypvt[-0].nodeval, Node_redirect_pipe, (NODE *)NULL); } break;
+# line 449 "awk.y"
+{ yyval.nodeval = node (yypvt[-0].nodeval, Node_redirect_output, (NODE *)NULL); } break;
case 67:
-# line 435 "awk.y"
-{ yyval.nodeval = NULL; } break;
+# line 451 "awk.y"
+{ yyval.nodeval = node (yypvt[-0].nodeval, Node_redirect_append, (NODE *)NULL); } break;
case 68:
-# line 437 "awk.y"
-{ yyval.nodeval = yypvt[-0].nodeval; } break;
+# line 453 "awk.y"
+{ yyval.nodeval = node (yypvt[-0].nodeval, Node_redirect_pipe, (NODE *)NULL); } break;
case 69:
-# line 442 "awk.y"
-{ yyval.nodeval = make_param(yypvt[-0].sval); } break;
+# line 458 "awk.y"
+{ yyval.nodeval = NULL; } break;
case 70:
-# line 444 "awk.y"
-{ yyval.nodeval = append_right(yypvt[-2].nodeval, make_param(yypvt[-0].sval)); yyerrok; } break;
+# line 460 "awk.y"
+{ yyval.nodeval = yypvt[-0].nodeval; } break;
case 71:
-# line 446 "awk.y"
-{ yyval.nodeval = NULL; } break;
+# line 465 "awk.y"
+{ yyval.nodeval = make_param(yypvt[-0].sval); } break;
case 72:
-# line 448 "awk.y"
-{ yyval.nodeval = NULL; } break;
+# line 467 "awk.y"
+{ yyval.nodeval = append_right(yypvt[-2].nodeval, make_param(yypvt[-0].sval)); yyerrok; } break;
case 73:
-# line 450 "awk.y"
+# line 469 "awk.y"
{ yyval.nodeval = NULL; } break;
case 74:
-# line 456 "awk.y"
+# line 471 "awk.y"
{ yyval.nodeval = NULL; } break;
case 75:
-# line 458 "awk.y"
-{ yyval.nodeval = yypvt[-0].nodeval; } break;
+# line 473 "awk.y"
+{ yyval.nodeval = NULL; } break;
case 76:
-# line 463 "awk.y"
+# line 479 "awk.y"
{ yyval.nodeval = NULL; } break;
case 77:
-# line 465 "awk.y"
+# line 481 "awk.y"
{ yyval.nodeval = yypvt[-0].nodeval; } break;
case 78:
-# line 470 "awk.y"
-{ yyval.nodeval = node (yypvt[-0].nodeval, Node_expression_list, (NODE *)NULL); } break;
+# line 486 "awk.y"
+{ yyval.nodeval = NULL; } break;
case 79:
-# line 472 "awk.y"
+# line 488 "awk.y"
+{ yyval.nodeval = yypvt[-0].nodeval; } break;
+case 80:
+# line 493 "awk.y"
+{ yyval.nodeval = node (yypvt[-0].nodeval, Node_expression_list, (NODE *)NULL); } break;
+case 81:
+# line 495 "awk.y"
{
yyval.nodeval = append_right(yypvt[-2].nodeval,
node( yypvt[-0].nodeval, Node_expression_list, (NODE *)NULL));
yyerrok;
} break;
-case 80:
-# line 478 "awk.y"
-{ yyval.nodeval = NULL; } break;
-case 81:
-# line 480 "awk.y"
-{ yyval.nodeval = NULL; } break;
case 82:
-# line 482 "awk.y"
+# line 501 "awk.y"
{ yyval.nodeval = NULL; } break;
case 83:
-# line 484 "awk.y"
+# line 503 "awk.y"
{ yyval.nodeval = NULL; } break;
case 84:
-# line 489 "awk.y"
+# line 505 "awk.y"
{ yyval.nodeval = NULL; } break;
case 85:
-# line 491 "awk.y"
-{ yyval.nodeval = yypvt[-0].nodeval; } break;
+# line 507 "awk.y"
+{ yyval.nodeval = NULL; } break;
case 86:
-# line 496 "awk.y"
-{ yyval.nodeval = node (yypvt[-0].nodeval, Node_expression_list, (NODE *)NULL); } break;
+# line 512 "awk.y"
+{ yyval.nodeval = NULL; } break;
case 87:
-# line 498 "awk.y"
+# line 514 "awk.y"
+{ yyval.nodeval = yypvt[-0].nodeval; } break;
+case 88:
+# line 519 "awk.y"
+{ yyval.nodeval = node (yypvt[-0].nodeval, Node_expression_list, (NODE *)NULL); } break;
+case 89:
+# line 521 "awk.y"
{
yyval.nodeval = append_right(yypvt[-2].nodeval,
node( yypvt[-0].nodeval, Node_expression_list, (NODE *)NULL));
yyerrok;
} break;
-case 88:
-# line 504 "awk.y"
-{ yyval.nodeval = NULL; } break;
-case 89:
-# line 506 "awk.y"
-{ yyval.nodeval = NULL; } break;
case 90:
-# line 508 "awk.y"
+# line 527 "awk.y"
{ yyval.nodeval = NULL; } break;
case 91:
-# line 510 "awk.y"
+# line 529 "awk.y"
{ yyval.nodeval = NULL; } break;
case 92:
-# line 515 "awk.y"
-{ want_assign = 0; } break;
+# line 531 "awk.y"
+{ yyval.nodeval = NULL; } break;
case 93:
-# line 517 "awk.y"
-{ yyval.nodeval = node (yypvt[-3].nodeval, yypvt[-2].nodetypeval, yypvt[-0].nodeval); } break;
+# line 533 "awk.y"
+{ yyval.nodeval = NULL; } break;
case 94:
-# line 519 "awk.y"
-{ yyval.nodeval = node (variable(yypvt[-0].sval,1), Node_in_array, yypvt[-3].nodeval); } break;
+# line 538 "awk.y"
+{ want_assign = 0; } break;
case 95:
-# line 521 "awk.y"
+# line 540 "awk.y"
+{
+ if (do_lint && yypvt[-0].nodeval->type == Node_regex)
+ warning("Regular expression on left of assignment.");
+ yyval.nodeval = node (yypvt[-3].nodeval, yypvt[-2].nodetypeval, yypvt[-0].nodeval);
+ } break;
+case 96:
+# line 546 "awk.y"
+{ yyval.nodeval = node (variable(yypvt[-0].sval,1), Node_in_array, yypvt[-3].nodeval); } break;
+case 97:
+# line 548 "awk.y"
{
yyval.nodeval = node (yypvt[-0].nodeval, Node_K_getline,
node (yypvt[-3].nodeval, Node_redirect_pipein, (NODE *)NULL));
} break;
-case 96:
-# line 526 "awk.y"
+case 98:
+# line 553 "awk.y"
{
if (do_lint && ! io_allowed && yypvt[-0].nodeval == NULL)
warning("non-redirected getline undefined inside BEGIN or END action");
yyval.nodeval = node (yypvt[-1].nodeval, Node_K_getline, yypvt[-0].nodeval);
} break;
-case 97:
-# line 532 "awk.y"
+case 99:
+# line 559 "awk.y"
{ yyval.nodeval = node (yypvt[-2].nodeval, Node_and, yypvt[-0].nodeval); } break;
-case 98:
-# line 534 "awk.y"
+case 100:
+# line 561 "awk.y"
{ yyval.nodeval = node (yypvt[-2].nodeval, Node_or, yypvt[-0].nodeval); } break;
-case 99:
-# line 536 "awk.y"
+case 101:
+# line 563 "awk.y"
{
if (yypvt[-2].nodeval->type == Node_regex)
warning("Regular expression on left of MATCH operator.");
yyval.nodeval = node (yypvt[-2].nodeval, yypvt[-1].nodetypeval, mk_rexp(yypvt[-0].nodeval));
} break;
-case 100:
-# line 542 "awk.y"
+case 102:
+# line 569 "awk.y"
{ yyval.nodeval = yypvt[-0].nodeval; } break;
-case 101:
-# line 544 "awk.y"
+case 103:
+# line 571 "awk.y"
{
yyval.nodeval = node(node(make_number(0.0),
Node_field_spec,
@@ -2632,156 +2679,164 @@ case 101:
Node_nomatch,
yypvt[-0].nodeval);
} break;
-case 102:
-# line 552 "awk.y"
-{ yyval.nodeval = node (variable(yypvt[-0].sval,1), Node_in_array, yypvt[-2].nodeval); } break;
-case 103:
-# line 554 "awk.y"
-{ yyval.nodeval = node (yypvt[-2].nodeval, yypvt[-1].nodetypeval, yypvt[-0].nodeval); } break;
case 104:
-# line 556 "awk.y"
-{ yyval.nodeval = node (yypvt[-2].nodeval, Node_less, yypvt[-0].nodeval); } break;
+# line 579 "awk.y"
+{ yyval.nodeval = node (variable(yypvt[-0].sval,1), Node_in_array, yypvt[-2].nodeval); } break;
case 105:
-# line 558 "awk.y"
-{ yyval.nodeval = node (yypvt[-2].nodeval, Node_greater, yypvt[-0].nodeval); } break;
+# line 581 "awk.y"
+{
+ if (do_lint && yypvt[-0].nodeval->type == Node_regex)
+ warning("Regular expression on left of comparison.");
+ yyval.nodeval = node (yypvt[-2].nodeval, yypvt[-1].nodetypeval, yypvt[-0].nodeval);
+ } break;
case 106:
-# line 560 "awk.y"
-{ yyval.nodeval = node(yypvt[-4].nodeval, Node_cond_exp, node(yypvt[-2].nodeval, Node_if_branches, yypvt[-0].nodeval));} break;
+# line 587 "awk.y"
+{ yyval.nodeval = node (yypvt[-2].nodeval, Node_less, yypvt[-0].nodeval); } break;
case 107:
-# line 562 "awk.y"
-{ yyval.nodeval = yypvt[-0].nodeval; } break;
+# line 589 "awk.y"
+{ yyval.nodeval = node (yypvt[-2].nodeval, Node_greater, yypvt[-0].nodeval); } break;
case 108:
-# line 564 "awk.y"
-{ yyval.nodeval = node (yypvt[-1].nodeval, Node_concat, yypvt[-0].nodeval); } break;
+# line 591 "awk.y"
+{ yyval.nodeval = node(yypvt[-4].nodeval, Node_cond_exp, node(yypvt[-2].nodeval, Node_if_branches, yypvt[-0].nodeval));} break;
case 109:
-# line 569 "awk.y"
-{ want_assign = 0; } break;
+# line 593 "awk.y"
+{ yyval.nodeval = yypvt[-0].nodeval; } break;
case 110:
-# line 571 "awk.y"
-{ yyval.nodeval = node (yypvt[-3].nodeval, yypvt[-2].nodetypeval, yypvt[-0].nodeval); } break;
+# line 595 "awk.y"
+{ yyval.nodeval = node (yypvt[-1].nodeval, Node_concat, yypvt[-0].nodeval); } break;
case 111:
-# line 573 "awk.y"
-{ yyval.nodeval = node (yypvt[-2].nodeval, Node_and, yypvt[-0].nodeval); } break;
+# line 600 "awk.y"
+{ want_assign = 0; } break;
case 112:
-# line 575 "awk.y"
-{ yyval.nodeval = node (yypvt[-2].nodeval, Node_or, yypvt[-0].nodeval); } break;
+# line 602 "awk.y"
+{ yyval.nodeval = node (yypvt[-3].nodeval, yypvt[-2].nodetypeval, yypvt[-0].nodeval); } break;
case 113:
-# line 577 "awk.y"
+# line 604 "awk.y"
+{ yyval.nodeval = node (yypvt[-2].nodeval, Node_and, yypvt[-0].nodeval); } break;
+case 114:
+# line 606 "awk.y"
+{ yyval.nodeval = node (yypvt[-2].nodeval, Node_or, yypvt[-0].nodeval); } break;
+case 115:
+# line 608 "awk.y"
{
if (do_lint && ! io_allowed && yypvt[-0].nodeval == NULL)
warning("non-redirected getline undefined inside BEGIN or END action");
yyval.nodeval = node (yypvt[-1].nodeval, Node_K_getline, yypvt[-0].nodeval);
} break;
-case 114:
-# line 583 "awk.y"
+case 116:
+# line 614 "awk.y"
{ yyval.nodeval = yypvt[-0].nodeval; } break;
-case 115:
-# line 585 "awk.y"
+case 117:
+# line 616 "awk.y"
{ yyval.nodeval = node((NODE *) NULL, Node_nomatch, yypvt[-0].nodeval); } break;
-case 116:
-# line 587 "awk.y"
+case 118:
+# line 618 "awk.y"
{ yyval.nodeval = node (yypvt[-2].nodeval, yypvt[-1].nodetypeval, mk_rexp(yypvt[-0].nodeval)); } break;
-case 117:
-# line 589 "awk.y"
+case 119:
+# line 620 "awk.y"
{ yyval.nodeval = node (variable(yypvt[-0].sval,1), Node_in_array, yypvt[-2].nodeval); } break;
-case 118:
-# line 591 "awk.y"
+case 120:
+# line 622 "awk.y"
{ yyval.nodeval = node (yypvt[-2].nodeval, yypvt[-1].nodetypeval, yypvt[-0].nodeval); } break;
-case 119:
-# line 593 "awk.y"
+case 121:
+# line 624 "awk.y"
{ yyval.nodeval = node(yypvt[-4].nodeval, Node_cond_exp, node(yypvt[-2].nodeval, Node_if_branches, yypvt[-0].nodeval));} break;
-case 120:
-# line 595 "awk.y"
+case 122:
+# line 626 "awk.y"
{ yyval.nodeval = yypvt[-0].nodeval; } break;
-case 121:
-# line 597 "awk.y"
+case 123:
+# line 628 "awk.y"
{ yyval.nodeval = node (yypvt[-1].nodeval, Node_concat, yypvt[-0].nodeval); } break;
-case 124:
-# line 605 "awk.y"
-{ yyval.nodeval = node (yypvt[-2].nodeval, Node_exp, yypvt[-0].nodeval); } break;
-case 125:
-# line 607 "awk.y"
-{ yyval.nodeval = node (yypvt[-2].nodeval, Node_times, yypvt[-0].nodeval); } break;
case 126:
-# line 609 "awk.y"
-{ yyval.nodeval = node (yypvt[-2].nodeval, Node_quotient, yypvt[-0].nodeval); } break;
+# line 636 "awk.y"
+{ yyval.nodeval = node (yypvt[-2].nodeval, Node_exp, yypvt[-0].nodeval); } break;
case 127:
-# line 611 "awk.y"
-{ yyval.nodeval = node (yypvt[-2].nodeval, Node_mod, yypvt[-0].nodeval); } break;
+# line 638 "awk.y"
+{ yyval.nodeval = node (yypvt[-2].nodeval, Node_times, yypvt[-0].nodeval); } break;
case 128:
-# line 613 "awk.y"
-{ yyval.nodeval = node (yypvt[-2].nodeval, Node_plus, yypvt[-0].nodeval); } break;
+# line 640 "awk.y"
+{ yyval.nodeval = node (yypvt[-2].nodeval, Node_quotient, yypvt[-0].nodeval); } break;
case 129:
-# line 615 "awk.y"
-{ yyval.nodeval = node (yypvt[-2].nodeval, Node_minus, yypvt[-0].nodeval); } break;
+# line 642 "awk.y"
+{ yyval.nodeval = node (yypvt[-2].nodeval, Node_mod, yypvt[-0].nodeval); } break;
case 130:
-# line 620 "awk.y"
-{ yyval.nodeval = node (yypvt[-0].nodeval, Node_not,(NODE *) NULL); } break;
+# line 644 "awk.y"
+{ yyval.nodeval = node (yypvt[-2].nodeval, Node_plus, yypvt[-0].nodeval); } break;
case 131:
-# line 622 "awk.y"
-{ yyval.nodeval = yypvt[-1].nodeval; } break;
+# line 646 "awk.y"
+{ yyval.nodeval = node (yypvt[-2].nodeval, Node_minus, yypvt[-0].nodeval); } break;
case 132:
-# line 624 "awk.y"
-{ yyval.nodeval = snode (yypvt[-1].nodeval, Node_builtin, (int) yypvt[-3].lval); } break;
+# line 651 "awk.y"
+{ yyval.nodeval = node (yypvt[-0].nodeval, Node_not,(NODE *) NULL); } break;
case 133:
-# line 626 "awk.y"
-{ yyval.nodeval = snode (yypvt[-1].nodeval, Node_builtin, (int) yypvt[-3].lval); } break;
+# line 653 "awk.y"
+{ yyval.nodeval = yypvt[-1].nodeval; } break;
case 134:
-# line 628 "awk.y"
+# line 655 "awk.y"
+{
+ if (! io_allowed && strcmp(tokstart, "nextfile") == 0)
+ yyerror("nextfile() is illegal in BEGIN and END");
+ } break;
+case 135:
+# line 660 "awk.y"
+{ yyval.nodeval = snode (yypvt[-1].nodeval, Node_builtin, (int) yypvt[-4].lval); } break;
+case 136:
+# line 662 "awk.y"
+{ yyval.nodeval = snode (yypvt[-1].nodeval, Node_builtin, (int) yypvt[-3].lval); } break;
+case 137:
+# line 664 "awk.y"
{
if (do_lint)
- warning("call of length without parentheses is not portable");
+ warning("call of `length' without parentheses is not portable");
yyval.nodeval = snode ((NODE *)NULL, Node_builtin, (int) yypvt[-0].lval);
- if (do_posix) {
- yyerror("POSIX requires parentheses for call to `length'");
- yyerrok;
- }
+ if (do_posix)
+ warning( "call of `length' without parentheses is deprecated by POSIX");
} break;
-case 135:
-# line 638 "awk.y"
+case 138:
+# line 672 "awk.y"
{
yyval.nodeval = node (yypvt[-1].nodeval, Node_func_call, make_string(yypvt[-3].sval, strlen(yypvt[-3].sval)));
} break;
-case 136:
-# line 642 "awk.y"
+case 139:
+# line 676 "awk.y"
{ yyval.nodeval = node (yypvt[-0].nodeval, Node_preincrement, (NODE *)NULL); } break;
-case 137:
-# line 644 "awk.y"
+case 140:
+# line 678 "awk.y"
{ yyval.nodeval = node (yypvt[-0].nodeval, Node_predecrement, (NODE *)NULL); } break;
-case 138:
-# line 646 "awk.y"
+case 141:
+# line 680 "awk.y"
{ yyval.nodeval = yypvt[-0].nodeval; } break;
-case 139:
-# line 648 "awk.y"
+case 142:
+# line 682 "awk.y"
{ yyval.nodeval = yypvt[-0].nodeval; } break;
-case 140:
-# line 651 "awk.y"
+case 143:
+# line 685 "awk.y"
{ if (yypvt[-0].nodeval->type == Node_val) {
yypvt[-0].nodeval->numbr = -(force_number(yypvt[-0].nodeval));
yyval.nodeval = yypvt[-0].nodeval;
} else
yyval.nodeval = node (yypvt[-0].nodeval, Node_unary_minus, (NODE *)NULL);
} break;
-case 141:
-# line 658 "awk.y"
+case 144:
+# line 692 "awk.y"
{ yyval.nodeval = yypvt[-0].nodeval; } break;
-case 142:
-# line 663 "awk.y"
+case 145:
+# line 697 "awk.y"
{ yyval.nodeval = node (yypvt[-1].nodeval, Node_postincrement, (NODE *)NULL); } break;
-case 143:
-# line 665 "awk.y"
+case 146:
+# line 699 "awk.y"
{ yyval.nodeval = node (yypvt[-1].nodeval, Node_postdecrement, (NODE *)NULL); } break;
-case 145:
-# line 671 "awk.y"
+case 148:
+# line 705 "awk.y"
{ yyval.nodeval = NULL; } break;
-case 146:
-# line 673 "awk.y"
+case 149:
+# line 707 "awk.y"
{ yyval.nodeval = yypvt[-0].nodeval; } break;
-case 147:
-# line 678 "awk.y"
+case 150:
+# line 712 "awk.y"
{ yyval.nodeval = variable(yypvt[-0].sval,1); } break;
-case 148:
-# line 680 "awk.y"
+case 151:
+# line 714 "awk.y"
{
if (yypvt[-1].nodeval->rnode == NULL) {
yyval.nodeval = node (variable(yypvt[-3].sval,1), Node_subscript, yypvt[-1].nodeval->lnode);
@@ -2789,23 +2844,23 @@ case 148:
} else
yyval.nodeval = node (variable(yypvt[-3].sval,1), Node_subscript, yypvt[-1].nodeval);
} break;
-case 149:
-# line 688 "awk.y"
-{ yyval.nodeval = node (yypvt[-0].nodeval, Node_field_spec, (NODE *)NULL); } break;
-case 150:
-# line 690 "awk.y"
-{ yyval.nodeval = node (yypvt[-0].nodeval, Node_field_spec, (NODE *)NULL); } break;
case 152:
-# line 698 "awk.y"
-{ yyerrok; } break;
+# line 722 "awk.y"
+{ yyval.nodeval = node (yypvt[-0].nodeval, Node_field_spec, (NODE *)NULL); } break;
case 153:
-# line 702 "awk.y"
+# line 724 "awk.y"
+{ yyval.nodeval = node (yypvt[-0].nodeval, Node_field_spec, (NODE *)NULL); } break;
+case 155:
+# line 732 "awk.y"
{ yyerrok; } break;
case 156:
-# line 711 "awk.y"
+# line 736 "awk.y"
+{ yyerrok; } break;
+case 159:
+# line 745 "awk.y"
{ yyerrok; want_assign = 0; } break;
-case 157:
-# line 714 "awk.y"
+case 160:
+# line 748 "awk.y"
{ yyerrok; } break;
}
goto yystack; /* reset registers in driver code */
diff --git a/builtin.c b/builtin.c
index c2e28e52..5022ca4c 100644
--- a/builtin.c
+++ b/builtin.c
@@ -3,7 +3,7 @@
*/
/*
- * Copyright (C) 1986, 1988, 1989, 1991 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@@ -23,9 +23,11 @@
* the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+
#include "awk.h"
-#ifndef atarist
+
+#ifndef SRANDOM_PROTO
extern void srandom P((int seed));
#endif
extern char *initstate P((unsigned seed, char *state, int n));
@@ -67,7 +69,7 @@ NODE *tree;
NODE *tmp;
double d, res;
#ifndef exp
- double exp();
+ double exp P((double));
#endif
tmp= tree_eval(tree->lnode);
@@ -103,8 +105,8 @@ NODE *tree;
while (l1) {
if (l2 > l1)
break;
- if (casetable[*p1] == casetable[*p2]
- && strncasecmp(p1, p2, l2) == 0) {
+ if (casetable[(int)*p1] == casetable[(int)*p2]
+ && (l2 == 1 || strncasecmp(p1, p2, l2) == 0)) {
ret = 1 + s1->stlen - l1;
break;
}
@@ -115,7 +117,8 @@ NODE *tree;
while (l1) {
if (l2 > l1)
break;
- if (STREQN(p1, p2, l2)) {
+ if (*p1 == *p2
+ && (l2 == 1 || STREQN(p1, p2, l2))) {
ret = 1 + s1->stlen - l1;
break;
}
@@ -133,8 +136,8 @@ do_int(tree)
NODE *tree;
{
NODE *tmp;
- double floor();
- double ceil();
+ double floor P((double));
+ double ceil P((double));
double d;
tmp = tree_eval(tree->lnode);
@@ -166,7 +169,7 @@ NODE *tree;
{
NODE *tmp;
#ifndef log
- double log();
+ double log P((double));
#endif
double d, arg;
@@ -248,7 +251,7 @@ NODE *tree;
emalloc(obuf, char *, 120, "do_sprintf");
osiz = 120;
- ofre = osiz;
+ ofre = osiz - 1;
olen = 0;
sfmt = tree_eval(tree->lnode);
sfmt = force_string(sfmt);
@@ -331,7 +334,7 @@ retry:
goto retry;
case 'c':
parse_next_arg();
- if (arg->flags & NUMERIC) {
+ if (arg->flags & NUMBER) {
#ifdef sun386
tmp_uval = arg->numbr;
uval= (unsigned long) tmp_uval;
@@ -596,12 +599,11 @@ register NODE *tree;
tree = do_sprintf(tree->lnode);
(void) fwrite(tree->stptr, sizeof(char), tree->stlen, fp);
free_temp(tree);
- if ((fp == stdout && output_is_tty) || (rp && (rp->flag & RED_NOBUF))) {
+ if ((fp == stdout && output_is_tty) || (rp && (rp->flag & RED_NOBUF)))
fflush(fp);
- if (ferror(fp)) {
- warning("error writing output: %s", strerror(errno));
- clearerr(fp);
- }
+ if (ferror(fp)) {
+ warning("error writing output: %s", strerror(errno));
+ clearerr(fp);
}
}
@@ -611,7 +613,7 @@ NODE *tree;
{
NODE *tmp;
double arg;
- extern double sqrt();
+ extern double sqrt P((double));
tmp = tree_eval(tree->lnode);
arg = (double) force_number(tmp);
@@ -660,20 +662,20 @@ NODE *tree;
{
NODE *t1, *t2;
struct tm *tm;
- long clock;
+ time_t fclock;
char buf[100];
int ret;
t1 = force_string(tree_eval(tree->lnode));
if (tree->rnode == NULL) /* second arg. missing, default */
- (void) time(&clock);
+ (void) time(&fclock);
else {
t2 = tree_eval(tree->rnode->lnode);
- clock = (long) force_number(t2);
+ fclock = (time_t) force_number(t2);
free_temp(t2);
}
- tm = localtime(&clock);
+ tm = localtime(&fclock);
ret = strftime(buf, 100, t1->stptr, tm);
@@ -684,10 +686,10 @@ NODE *
do_systime(tree)
NODE *tree;
{
- long clock;
+ time_t lclock;
- (void) time(&clock);
- return tmp_number((AWKNUM) clock);
+ (void) time(&lclock);
+ return tmp_number((AWKNUM) lclock);
}
NODE *
@@ -695,12 +697,16 @@ do_system(tree)
NODE *tree;
{
NODE *tmp;
- int ret;
+ int ret = 0;
+ char *cmd;
(void) flush_io (); /* so output is synchronous with gawk's */
tmp = tree_eval(tree->lnode);
- ret = system(force_string(tmp)->stptr);
- ret = (ret >> 8) & 0xff;
+ cmd = force_string(tmp)->stptr;
+ if (cmd && *cmd) {
+ ret = system(cmd);
+ ret = (ret >> 8) & 0xff;
+ }
free_temp(tmp);
return tmp_number((AWKNUM) ret);
}
@@ -749,7 +755,7 @@ register NODE *tree;
putc(*s++, fp);
#else
if (OFSlen)
- fwrite(s, sizeof(char), OFSlen, fp);
+ (void) fwrite(s, sizeof(char), OFSlen, fp);
#endif /* VMS && !NO_TTY_FWRITE */
}
}
@@ -757,17 +763,16 @@ register NODE *tree;
#if (!defined(VMS)) || defined(NO_TTY_FWRITE)
while (*s)
putc(*s++, fp);
- if ((fp == stdout && output_is_tty) || (rp && (rp->flag & RED_NOBUF))) {
+ if ((fp == stdout && output_is_tty) || (rp && (rp->flag & RED_NOBUF)))
#else
if (ORSlen)
- fwrite(s, sizeof(char), ORSlen, fp);
- if ((rp && (rp->flag & RED_NOBUF))) {
+ (void) fwrite(s, sizeof(char), ORSlen, fp);
+ if ((rp && (rp->flag & RED_NOBUF)))
#endif /* VMS && !NO_TTY_FWRITE */
fflush(fp);
- if (ferror(fp)) {
- warning("error writing output: %s", strerror(errno));
- clearerr(fp);
- }
+ if (ferror(fp)) {
+ warning("error writing output: %s", strerror(errno));
+ clearerr(fp);
}
}
@@ -810,7 +815,7 @@ do_atan2(tree)
NODE *tree;
{
NODE *t1, *t2;
- extern double atan2();
+ extern double atan2 P((double, double));
double d1, d2;
t1 = tree_eval(tree->lnode);
@@ -827,7 +832,7 @@ do_sin(tree)
NODE *tree;
{
NODE *tmp;
- extern double sin();
+ extern double sin P((double));
double d;
tmp = tree_eval(tree->lnode);
@@ -841,7 +846,7 @@ do_cos(tree)
NODE *tree;
{
NODE *tmp;
- extern double cos();
+ extern double cos P((double));
double d;
tmp = tree_eval(tree->lnode);
@@ -882,7 +887,7 @@ NODE *tree;
(void) setstate(state);
if (!tree)
- srandom((int) (save_seed = (long) time((long *) 0)));
+ srandom((int) (save_seed = (long) time((time_t *) 0)));
else {
tmp = tree_eval(tree->lnode);
srandom((int) (save_seed = (long) force_number(tmp)));
@@ -904,7 +909,7 @@ NODE *tree;
t1 = force_string(tree_eval(tree->lnode));
tree = tree->rnode->lnode;
rp = re_update(tree);
- rstart = research(rp, t1->stptr, t1->stlen, 1);
+ rstart = research(rp, t1->stptr, 0, t1->stlen, 1);
if (rstart >= 0) { /* match succeded */
rstart++; /* 1-based indexing */
rlength = REEND(rp, t1->stptr) - RESTART(rp, t1->stptr);
@@ -939,7 +944,6 @@ int global;
int repllen;
int sofar;
int ampersands;
- int inplace = 0;
int matches = 0;
Regexp *rp;
NODE *s; /* subst. pattern */
@@ -957,15 +961,18 @@ int global;
tree = tree->rnode;
tmp = tree->lnode;
- if (tmp->type == Node_val)
- lhs = NULL;
t = force_string(tree_eval(tmp));
/* do the search early to avoid work on non-match */
- if (research(rp, t->stptr, t->stlen, 1) == -1)
- return tmp_number((AWKNUM) 0);
+ if (research(rp, t->stptr, 0, t->stlen, 1) == -1 ||
+ (RESTART(rp, t->stptr) >= t->stlen) && (matches = 1)) {
+ free_temp(t);
+ return tmp_number((AWKNUM) matches);
+ }
- if (lhs != NULL)
+ if (tmp->type == Node_val)
+ lhs = NULL;
+ else
lhs = get_lhs(tmp, &after_assign);
t->flags |= STRING;
/*
@@ -989,11 +996,7 @@ int global;
repl = s->stptr;
replend = repl + s->stlen;
repllen = replend - repl;
- if (repllen == 0) { /* replacement is null string */
- buf = text; /* so do subs. in place */
- inplace = 1;
- } else
- emalloc(buf, char *, buflen, "do_sub");
+ emalloc(buf, char *, buflen, "do_sub");
ampersands = 0;
for (scan = repl; scan < replend; scan++) {
if (*scan == '&') {
@@ -1006,8 +1009,8 @@ int global;
bp = buf;
for (;;) {
matches++;
- matchstart = text + RESTART(rp, t->stptr);
- matchend = text + REEND(rp, t->stptr);
+ matchstart = t->stptr + RESTART(rp, t->stptr);
+ matchend = t->stptr + REEND(rp, t->stptr);
/*
* create the result, copying in parts of the original
@@ -1032,17 +1035,18 @@ int global;
*bp++ = *scan;
} else
*bp++ = *scan;
- if (global && matchstart == matchend) {
+ if (global && matchstart == matchend && matchend < text + textlen - 1) {
*bp++ = *text;
matchend++;
}
textlen = text + textlen - matchend;
text = matchend;
- if (!global || research(rp, text, textlen, 1) == -1)
+ if (!global || textlen <= 0 ||
+ research(rp, t->stptr, text-t->stptr, textlen, 1) == -1)
break;
}
sofar = bp - buf;
- if (!inplace && buflen - sofar - textlen - 1) {
+ if (buflen - sofar - textlen - 1) {
buflen = sofar + textlen + 2;
erealloc(buf, char *, buflen, "do_sub");
bp = buf + sofar;
@@ -1050,10 +1054,7 @@ int global;
for (scan = matchend; scan < text + textlen; scan++)
*bp++ = *scan;
textlen = bp - buf;
- if (inplace)
- erealloc(buf, char *, textlen + 2, "do_sub");
- else
- free(t->stptr);
+ free(t->stptr);
t->stptr = buf;
t->stlen = textlen;
@@ -1065,7 +1066,7 @@ int global;
}
if (after_assign)
(*after_assign)();
- t->flags &= ~(NUM|NUMERIC);
+ t->flags &= ~(NUM|NUMBER);
}
return tmp_number((AWKNUM) matches);
}
diff --git a/config.in b/config.in
new file mode 100644
index 00000000..3a7ae0e4
--- /dev/null
+++ b/config.in
@@ -0,0 +1,270 @@
+/*
+ * config.h -- configuration definitions for gawk.
+ *
+ * __SYSTEM__
+ */
+
+/*
+ * Copyright (C) 1991, 1992 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GAWK; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * This file isolates configuration dependencies for gnu awk.
+ * You should know something about your system, perhaps by having
+ * a manual handy, when you edit this file. You should copy config.h-dist
+ * to config.h, and edit config.h. Do not modify config.h-dist, so that
+ * it will be easy to apply any patches that may be distributed.
+ *
+ * The general idea is that systems conforming to the various standards
+ * should need to do the least amount of changing. Definining the various
+ * items in ths file usually means that your system is missing that
+ * particular feature.
+ *
+ * The order of preference in standard conformance is ANSI C, POSIX,
+ * and the SVID.
+ *
+ * If you have no clue as to what's going on with your system, try
+ * compiling gawk without editing this file and see what shows up
+ * missing in the link stage. From there, you can probably figure out
+ * which defines to turn on.
+ */
+
+/**************************/
+/* Miscellanious features */
+/**************************/
+
+/*
+ * BLKSIZE_MISSING
+ *
+ * Check your /usr/include/sys/stat.h file. If the stat structure
+ * does not have a member named st_blksize, define this. (This will
+ * most likely be the case on most System V systems prior to V.4.)
+ */
+/* #define BLKSIZE_MISSING 1 */
+
+/*
+ * SIGTYPE
+ *
+ * The return type of the routines passed to the signal function.
+ * Modern systems use `void', older systems use `int'.
+ * If left undefined, it will default to void.
+ */
+/* #define SIGTYPE int */
+
+/*
+ * SIZE_T_MISSING
+ *
+ * If your system has no typedef for size_t, define this to get a default
+ */
+/* #define SIZE_T_MISSING 1 */
+
+/*
+ * CHAR_UNSIGNED
+ *
+ * If your machine uses unsigned characters (IBM RT and RS/6000 and others)
+ * then define this for use in regex.c
+ */
+/* #define CHAR_UNSIGNED 1 */
+
+/*
+ * HAVE_UNDERSCORE_SETJMP
+ *
+ * Check in your /usr/include/setjmp.h file. If there are routines
+ * there named _setjmp and _longjmp, then you should define this.
+ * Typically only systems derived from Berkeley Unix have this.
+ */
+/* #define HAVE_UNDERSCORE_SETJMP 1 */
+
+/***********************************************/
+/* Missing library subroutines or system calls */
+/***********************************************/
+
+/*
+ * MEMCMP_MISSING
+ * MEMCPY_MISSING
+ * MEMSET_MISSING
+ *
+ * These three routines are for manipulating blocks of memory. Most
+ * likely they will either all three be present or all three be missing,
+ * so they're grouped together.
+ */
+/* #define MEMCMP_MISSING 1 */
+/* #define MEMCPY_MISSING 1 */
+/* #define MEMSET_MISSING 1 */
+
+/*
+ * RANDOM_MISSING
+ *
+ * Your system does not have the random(3) suite of random number
+ * generating routines. These are different than the old rand(3)
+ * routines!
+ */
+/* #define RANDOM_MISSING 1 */
+
+/*
+ * STRCASE_MISSING
+ *
+ * Your system does not have the strcasemp() and strncasecmp()
+ * routines that originated in Berkeley Unix.
+ */
+/* #define STRCASE_MISSING 1 */
+
+/*
+ * STRCHR_MISSING
+ *
+ * Your system does not have the strchr() and strrchr() functions.
+ */
+/* #define STRCHR_MISSING 1 */
+
+/*
+ * STRERROR_MISSING
+ *
+ * Your system lacks the ANSI C strerror() routine for returning the
+ * strings associated with errno values.
+ */
+/* #define STRERROR_MISSING 1 */
+
+/*
+ * STRTOD_MISSING
+ *
+ * Your system does not have the strtod() routine for converting
+ * strings to double precision floating point values.
+ */
+/* #define STRTOD_MISSING 1 */
+
+/*
+ * STRFTIME_MISSING
+ *
+ * Your system lacks the ANSI C strftime() routine for formatting
+ * broken down time values.
+ */
+/* #define STRFTIME_MISSING 1 */
+
+/*
+ * TZSET_MISSING
+ *
+ * If you have a 4.2 BSD vintage system, then the strftime() routine
+ * supplied in the missing directory won't be enough, because it relies on the
+ * tzset() routine from System V / Posix. Fortunately, there is an
+ * emulation for tzset() too that should do the trick. If you don't
+ * have tzset(), define this.
+ */
+/* #define TZSET_MISSING 1 */
+
+/*
+ * TZNAME_MISSING
+ *
+ * Some systems do not support the external variables tzname and daylight.
+ * If this is the case *and* strftime() is missing, define this.
+ */
+/* #define TZNAME_MISSING 1 */
+
+/*
+ * STDC_HEADERS
+ *
+ * If your system does have ANSI compliant header files that
+ * provide prototypes for library routines, then define this.
+ */
+/* #define STDC_HEADERS 1 */
+
+/*
+ * NO_TOKEN_PASTING
+ *
+ * If your compiler define's __STDC__ but does not support token
+ * pasting (tok##tok), then define this.
+ */
+/* #define NO_TOKEN_PASTING 1 */
+
+/*****************************************************************/
+/* Stuff related to the Standard I/O Library. */
+/*****************************************************************/
+/* Much of this is (still, unfortunately) black magic in nature. */
+/* You may have to use some or all of these together to get gawk */
+/* to work correctly. */
+/*****************************************************************/
+
+/*
+ * NON_STD_SPRINTF
+ *
+ * Look in your /usr/include/stdio.h file. If the return type of the
+ * sprintf() function is NOT `int', define this.
+ */
+/* #define NON_STD_SPRINTF 1 */
+
+/*
+ * VPRINTF_MISSING
+ *
+ * Define this if your system lacks vprintf() and the other routines
+ * that go with it. This will trigger an attempt to use _doprnt().
+ * If you don't have that, this attempt will fail and you are on your own.
+ */
+/* #define VPRINTF_MISSING 1 */
+
+/*
+ * Casts from size_t to int and back. These will become unnecessary
+ * at some point in the future, but for now are required where the
+ * two types are a different representation.
+ */
+/* #define SZTC */
+/* #define INTC */
+
+/*
+ * SYSTEM_MISSING
+ *
+ * Define this if your library does not provide a system function
+ * or you are not entirely happy with it and would rather use
+ * a provided replacement (atari only).
+ */
+/* #define SYSTEM_MISSING 1 */
+
+/*
+ * FMOD_MISSING
+ *
+ * Define this if your system lacks the fmod() function and modf() will
+ * be used instead.
+ */
+/* #define FMOD_MISSING 1 */
+
+
+/*******************************/
+/* Gawk configuration options. */
+/*******************************/
+
+/*
+ * DEFPATH
+ *
+ * The default search path for the -f option of gawk. It is used
+ * if the AWKPATH environment variable is undefined. The default
+ * definition is provided here. Most likely you should not change
+ * this.
+ */
+
+/* #define DEFPATH ".:/usr/lib/awk:/usr/local/lib/awk" */
+/* #define ENVSEP ':' */
+
+/*
+ * alloca already has a prototype defined - don't redefine it
+ */
+/* #define ALLOCA_PROTO 1 */
+
+/*
+ * srandom already has a prototype defined - don't redefine it
+ */
+/* #define SRANDOM_PROTO 1 */
diff --git a/config/atari b/config/atari
index 36bf23f6..29a80bb3 100644
--- a/config/atari
+++ b/config/atari
@@ -3,6 +3,8 @@ BLKSIZE_MISSING 1 /* Not really - but it may work better that way */
STRCASE_MISSING 1
STDC_HEADERS 1
SYSTEM_MISSING 1
+SRANDOM_PROTO 1
+ALLOCA_PROTO 1
DEFPATH ".,c:\\\\lib\\\\awk,c:\\\\gnu\\\\lib\\\\awk"
ENVSEP ','
SZTC (size_t)
diff --git a/config/bsd44alpha b/config/bsd44alpha
new file mode 100644
index 00000000..4dfcb79d
--- /dev/null
+++ b/config/bsd44alpha
@@ -0,0 +1,6 @@
+For generic 4.4 alpha
+STRTOD_MISSING 1
+HAVE_UNDERSCORE_SETJMP 1
+STDC_HEADERS 1
+ALLOCA_PROTO 1
+SRANDOM_PROTO 1
diff --git a/config/convex b/config/convex
new file mode 100644
index 00000000..4e8c2d8e
--- /dev/null
+++ b/config/convex
@@ -0,0 +1,7 @@
+ConvexOS 9.1, Convex C 4.1. I used cc -O1
+HAVE_UNDERSCORE_SETJMP 1
+STRERROR_MISSING 1
+STRCASE_MISSING 1
+STRTOD_MISSING 1
+STDC_HEADERS 1
+CHAR_UNSIGNED 1
diff --git a/config/linux.h b/config/linux.h
new file mode 100644
index 00000000..f0b6856d
--- /dev/null
+++ b/config/linux.h
@@ -0,0 +1,295 @@
+/*
+ * config.h -- configuration definitions for gawk.
+ *
+ * Linux 0.96a+
+ *
+ * Linux version 3 Jun 1992, faith@cs.unc.edu (using gcc 2.1)
+ * Revised: Thu Jun 4 22:24:01 1992 by root
+ *
+ * NOTE: Select, in Makefile-dist, the bison option and turn on gcc switches.
+ * In builtin.c, place a
+ * #ifndef linux
+ * #endif
+ * wrapper around the externs for srandom(), initstate(), setstate(), and
+ * random().
+ */
+
+/*
+ * Copyright (C) 1991, the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 1, or (at your option)
+ * any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GAWK; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * This file isolates configuration dependencies for gnu awk.
+ * You should know something about your system, perhaps by having
+ * a manual handy, when you edit this file. You should copy config.h-dist
+ * to config.h, and edit config.h. Do not modify config.h-dist, so that
+ * it will be easy to apply any patches that may be distributed.
+ *
+ * The general idea is that systems conforming to the various standards
+ * should need to do the least amount of changing. Definining the various
+ * items in ths file usually means that your system is missing that
+ * particular feature.
+ *
+ * The order of preference in standard conformance is ANSI C, POSIX,
+ * and the SVID.
+ *
+ * If you have no clue as to what's going on with your system, try
+ * compiling gawk without editing this file and see what shows up
+ * missing in the link stage. From there, you can probably figure out
+ * which defines to turn on.
+ */
+
+/**************************/
+/* Miscellanious features */
+/**************************/
+
+/*
+ * BLKSIZE_MISSING
+ *
+ * Check your /usr/include/sys/stat.h file. If the stat structure
+ * does not have a member named st_blksize, define this. (This will
+ * most likely be the case on most System V systems prior to V.4.)
+ */
+#define BLKSIZE_MISSING 1
+
+/*
+ * SIGTYPE
+ *
+ * The return type of the routines passed to the signal function.
+ * Modern systems use `void', older systems use `int'.
+ * If left undefined, it will default to void.
+ */
+/* #define SIGTYPE int */
+
+/*
+ * SIZE_T_MISSING
+ *
+ * If your system has no typedef for size_t, define this to get a default
+ */
+/* #define SIZE_T_MISSING 1 */
+
+/*
+ * CHAR_UNSIGNED
+ *
+ * If your machine uses unsigned characters (IBM RT and RS/6000 and others)
+ * then define this for use in regex.c
+ */
+/* #define CHAR_UNSIGNED 1 */
+
+/*
+ * HAVE_UNDERSCORE_SETJMP
+ *
+ * Check in your /usr/include/setjmp.h file. If there are routines
+ * there named _setjmp and _longjmp, then you should define this.
+ * Typically only systems derived from Berkeley Unix have this.
+ */
+/* #define HAVE_UNDERSCORE_SETJMP 1 */
+
+/***********************************************/
+/* Missing library subroutines or system calls */
+/***********************************************/
+
+/*
+ * GETOPT_MISSING
+ *
+ * Define this if your library does not have the getopt(3) library
+ * routine for parsing command line arguments.
+ */
+/* #define GETOPT_MISSING 1 */
+
+/*
+ * MEMCMP_MISSING
+ * MEMCPY_MISSING
+ * MEMSET_MISSING
+ *
+ * These three routines are for manipulating blocks of memory. Most
+ * likely they will either all three be present or all three be missing,
+ * so they're grouped together.
+ */
+/* #define MEMCMP_MISSING 1 */
+/* #define MEMCPY_MISSING 1 */
+/* #define MEMSET_MISSING 1 */
+
+/*
+ * RANDOM_MISSING
+ *
+ * Your system does not have the random(3) suite of random number
+ * generating routines. These are different than the old rand(3)
+ * routines!
+ */
+/* #define RANDOM_MISSING 1 */
+
+/*
+ * STRCASE_MISSING
+ *
+ * Your system does not have the strcasemp() and strncasecmp()
+ * routines that originated in Berkeley Unix.
+ */
+#define STRCASE_MISSING 1
+
+/*
+ * STRCHR_MISSING
+ *
+ * Your system does not have the strchr() and strrchr() functions.
+ */
+/* #define STRCHR_MISSING 1 */
+
+/*
+ * STRERROR_MISSING
+ *
+ * Your system lacks the ANSI C strerror() routine for returning the
+ * strings associated with errno values.
+ */
+/* #define STRERROR_MISSING 1 */
+
+/*
+ * STRTOD_MISSING
+ *
+ * Your system does not have the strtod() routine for converting
+ * strings to double precision floating point values.
+ */
+/* #define STRTOD_MISSING 1 */
+
+/*
+ * STRTOL_MISSING
+ *
+ * Your system does not have the strtol() routine for converting
+ * strings to long integers.
+ */
+/* #define STRTOL_MISSING 1 */
+
+/*
+ * STRFTIME_MISSING
+ *
+ * Your system lacks the ANSI C strftime() routine for formatting
+ * broken down time values.
+ */
+/* #define STRFTIME_MISSING 1 */
+
+/*
+ * TZSET_MISSING
+ *
+ * If you have a 4.2 BSD vintage system, then the strftime() routine
+ * supplied in the missing directory won't be enough, because it relies on the
+ * tzset() routine from System V / Posix. Fortunately, there is an
+ * emulation for tzset() too that should do the trick. If you don't
+ * have tzset(), define this.
+ */
+/* #define TZSET_MISSING 1 */
+
+/*
+ * TZNAME_MISSING
+ *
+ * Some systems do not support the external variables tzname and daylight.
+ * If this is the case *and* strftime() is missing, define this.
+ */
+/* #define TZNAME_MISSING 1 */
+
+/*
+ * STDC_HEADERS
+ *
+ * If your system does have ANSI compliant header files that
+ * provide prototypes for library routines, then define this.
+ */
+#define STDC_HEADERS 1
+
+/*
+ * NO_TOKEN_PASTING
+ *
+ * If your compiler define's __STDC__ but does not support token
+ * pasting (tok##tok), then define this.
+ */
+/* #define NO_TOKEN_PASTING 1 */
+
+/*****************************************************************/
+/* Stuff related to the Standard I/O Library. */
+/*****************************************************************/
+/* Much of this is (still, unfortunately) black magic in nature. */
+/* You may have to use some or all of these together to get gawk */
+/* to work correctly. */
+/*****************************************************************/
+
+/*
+ * NON_STD_SPRINTF
+ *
+ * Look in your /usr/include/stdio.h file. If the return type of the
+ * sprintf() function is NOT `int', define this.
+ */
+/* #define NON_STD_SPRINTF 1 */
+
+/*
+ * VPRINTF_MISSING
+ *
+ * Define this if your system lacks vprintf() and the other routines
+ * that go with it.
+ */
+/* #define VPRINTF_MISSING 1 */
+
+/*
+ * BSDSTDIO
+ *
+ * Define this if your standard i/o library is internally compatible
+ * with the one shipped with Berkeley Unix systems (4.n, n <= 3-reno).
+ * If you've defined VPRINTF_MISSING, you probably will need this too.
+ */
+/* #define BSDSTDIO 1 */
+
+/*
+ * DOPRNT_MISSING
+ *
+ * Define this if your standard i/o library does not have the _doprnt()
+ * routine. This is used in an attempt to simulate the vfprintf()
+ * routine.
+ */
+#define DOPRNT_MISSING 1
+
+/*
+ * Casts from size_t to int and back. These will become unnecessary
+ * at some point in the future, but for now are required where the
+ * two types are a different representation.
+ */
+/* #define SZTC */
+/* #define INTC */
+
+/*
+ * SYSTEM_MISSING
+ *
+ * Define this if your library does not provide a system function
+ * or you are not entirely happy with it and would rather use
+ * a provided replacement (atari only).
+ */
+/* #define SYSTEM_MISSING 1 */
+
+
+/*******************************/
+/* Gawk configuration options. */
+/*******************************/
+
+/*
+ * DEFPATH
+ *
+ * The default search path for the -f option of gawk. It is used
+ * if the AWKPATH environment variable is undefined. The default
+ * definition is provided here. Most likely you should not change
+ * this.
+ */
+
+/* #define DEFPATH ".:/usr/lib/awk:/usr/local/lib/awk" */
+/* #define ENVSEP ':' */
diff --git a/config/mach b/config/mach
new file mode 100644
index 00000000..fc630e37
--- /dev/null
+++ b/config/mach
@@ -0,0 +1,9 @@
+For Mach-386 2.6 system. Should work on other Mach 2.5 or 2.6 systems.
+SIGTYPE int
+HAVE_UNDERSCORE_SETJMP 1
+STRERROR_MISSING 1
+STRFTIME_MISSING 1
+STRTOD_MISSING 1
+STRTOL_MISSING 1
+TZNAME_MISSING 1
+BSDSTDIO 1
diff --git a/config/msdos b/config/msdos
index cb2d4c67..ea3f059f 100644
--- a/config/msdos
+++ b/config/msdos
@@ -1,9 +1,9 @@
MS-DOS systems using MSC 5.1
BLKSIZE_MISSING 1
-SIZE_T_MISSING 1
GCVT_MISSING 1
GETOPT_MISSING 1
RANDOM_MISSING 1
STRCASE_MISSING 1
STRFTIME_MISSING 1
STRTOL_MISSING 1
+STDC_HEADERS 1
diff --git a/config/next20 b/config/next20
index 6151e496..4e903ce7 100644
--- a/config/next20
+++ b/config/next20
@@ -1,6 +1,7 @@
NeXT running 2.0
STRTOD_MISSING 1 /* NeXT strtod() is buggy */
STDC_HEADERS 1
+ALLOCA_PROTO 1
SZTC (size_t)
INTC (int)
MAKE_NeXT
diff --git a/config/next21 b/config/next21
new file mode 100644
index 00000000..a95e6762
--- /dev/null
+++ b/config/next21
@@ -0,0 +1,6 @@
+NeXT running 2.1
+STDC_HEADERS 1
+ALLOCA_PROTO 1
+SZTC (size_t)
+INTC (int)
+MAKE_NeXT
diff --git a/config/osf1 b/config/osf1
new file mode 100644
index 00000000..de9fb527
--- /dev/null
+++ b/config/osf1
@@ -0,0 +1,3 @@
+For generic OSF/1
+STDC_HEADERS 1
+HAVE_UNDERSCORE_SETJMP 1
diff --git a/config/sunos41-gnulibc b/config/sunos41-gnulibc
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/config/sunos41-gnulibc
diff --git a/config.h.in b/config/v10config.h
index d2b63c06..5c6ddb15 100644
--- a/config.h.in
+++ b/config/v10config.h
@@ -1,7 +1,7 @@
/*
* config.h -- configuration definitions for gawk.
*
- * __SYSTEM__
+ * Vax Running 10th Edition Unix
*/
/*
@@ -57,7 +57,7 @@
* does not have a member named st_blksize, define this. (This will
* most likely be the case on most System V systems prior to V.4.)
*/
-/* #define BLKSIZE_MISSING 1 */
+#define BLKSIZE_MISSING 1
/*
* SIGTYPE
@@ -66,7 +66,7 @@
* Modern systems use `void', older systems use `int'.
* If left undefined, it will default to void.
*/
-/* #define SIGTYPE int */
+#define SIGTYPE SIG_TYP /* defined in <signal.h> */
/*
* SIZE_T_MISSING
@@ -124,7 +124,7 @@
* generating routines. These are different than the old rand(3)
* routines!
*/
-/* #define RANDOM_MISSING 1 */
+#define RANDOM_MISSING 1
/*
* STRCASE_MISSING
@@ -132,7 +132,7 @@
* Your system does not have the strcasemp() and strncasecmp()
* routines that originated in Berkeley Unix.
*/
-/* #define STRCASE_MISSING 1 */
+#define STRCASE_MISSING 1
/*
* STRCHR_MISSING
@@ -147,7 +147,15 @@
* Your system lacks the ANSI C strerror() routine for returning the
* strings associated with errno values.
*/
-/* #define STRERROR_MISSING 1 */
+#define STRERROR_MISSING 1
+
+/*
+ * STRFTIME_MISSING
+ *
+ * Your system lacks the ANSI C strftime() routine for formatting
+ * broken down time values.
+ */
+#define STRFTIME_MISSING 1
/*
* STRTOD_MISSING
@@ -166,14 +174,6 @@
/* #define STRTOL_MISSING 1 */
/*
- * STRFTIME_MISSING
- *
- * Your system lacks the ANSI C strftime() routine for formatting
- * broken down time values.
- */
-/* #define STRFTIME_MISSING 1 */
-
-/*
* TZSET_MISSING
*
* If you have a 4.2 BSD vintage system, then the strftime() routine
@@ -182,15 +182,7 @@
* emulation for tzset() too that should do the trick. If you don't
* have tzset(), define this.
*/
-/* #define TZSET_MISSING 1 */
-
-/*
- * TZNAME_MISSING
- *
- * Some systems do not support the external variables tzname and daylight.
- * If this is the case *and* strftime() is missing, define this.
- */
-/* #define TZNAME_MISSING 1 */
+#define TZSET_MISSING 1
/*
* STDC_HEADERS
diff --git a/config/vms-conf.h b/config/vms-conf.h
index d68f0a91..b64d0b5f 100644
--- a/config/vms-conf.h
+++ b/config/vms-conf.h
@@ -5,14 +5,14 @@
*/
/*
- * Copyright (C) 1991, the Free Software Foundation, Inc.
+ * Copyright (C) 1991, 1992 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
*
* GAWK is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 1, or (at your option)
+ * the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* GAWK is distributed in the hope that it will be useful,
@@ -22,7 +22,7 @@
*
* You should have received a copy of the GNU General Public License
* along with GAWK; see the file COPYING. If not, write to
- * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**************************/
@@ -76,14 +76,6 @@
/***********************************************/
/*
- * GETOPT_MISSING
- * VMS: missing
- * Define this if your library does not have the getopt(3) library
- * routine for parsing command line arguments.
- */
-#define GETOPT_MISSING 1
-
-/*
* MEMCMP_MISSING
* MEMCPY_MISSING
* MEMSET_MISSING
@@ -129,14 +121,6 @@
/* #define STRERROR_MISSING 1 */
/*
- * STRFTIME_MISSING
- * VMS: missing (as of V5.4)
- * Your system lacks the ANSI C strftime() routine for formatting
- * broken down time values.
- */
-#define STRFTIME_MISSING 1
-
-/*
* STRTOD_MISSING
* VMS: <stdlib.h> (introduced V4.6)
* Your system does not have the strtod() routine for converting
@@ -145,12 +129,12 @@
/* #define STRTOD_MISSING 1 */
/*
- * STRTOL_MISSING
- * VMS: <stdlib.h> (introduced V4.6)
- * Your system does not have the strtol() routine for converting
- * strings to long integers.
+ * STRFTIME_MISSING
+ * VMS: missing (as of V5.4) [see below; do not change STRFTIME_MISSING]
+ * Your system lacks the ANSI C strftime() routine for formatting
+ * broken down time values.
*/
-/* #define STRTOL_MISSING 1 */
+#define STRFTIME_MISSING 1
/*
* TZSET_MISSING
@@ -164,6 +148,14 @@
/* #define TZSET_MISSING 1 */
/*
+ * TZNAME_MISSING
+ *
+ * Some systems do not support the external variables tzname and daylight.
+ * If this is the case *and* strftime() is missing, define this.
+ */
+/* #define TZNAME_MISSING 1 */
+
+/*
* STDC_HEADERS
* VMS: close enough (as of V4.6, VAX C V2.3) [GCC, see below]
* If your system does have ANSI compliant header files that
@@ -199,29 +191,12 @@
* VPRINTF_MISSING
* VMS: ok (introduced V4.6)
* Define this if your system lacks vprintf() and the other routines
- * that go with it.
+ * that go with it. This will trigger an attempt to use _doprnt().
+ * If you don't have that, this attempt will fail and you are on your own.
*/
/* #define VPRINTF_MISSING 1 */
/*
- * BSDSTDIO
- * VMS: forgot it
- * Define this if your standard i/o library is internally compatible
- * with the one shipped with Berkeley Unix systems (4.n, n <= 3-reno).
- * If you've defined VPRINTF_MISSING, you probably will need this too.
- */
-/* #define BSDSTDIO 1 */
-
-/*
- * DOPRNT_MISSING
- * VMS: missing--doesn't matter
- * Define this if your standard i/o library does not have the _doprnt()
- * routine. This is used in an attempt to simulate the vfprintf()
- * routine.
- */
-/* #define DOPRNT_MISSING 1 */
-
-/*
* Casts from size_t to int and back. These will become unnecessary
* at some point in the future, but for now are required where the
* two types are a different representation.
@@ -238,6 +213,14 @@
*/
/* #define SYSTEM_MISSING 1 */
+/*
+ * FMOD_MISSING
+ * VMS: ok (introduced V4.6)
+ * Define this if your system lacks the fmod() function and modf() will
+ * be used instead.
+ */
+/* #define FMOD_MISSING 1 */
+
/*******************************/
/* Gawk configuration options. */
@@ -256,6 +239,16 @@
#define ENVSEP ','
/*
+ * alloca already has a prototype defined - don't redefine it
+ */
+/* #define ALLOCA_PROTO 1 */
+
+/*
+ * srandom already has a prototype defined - don't redefine it
+ */
+/* #define SRANDOM_PROTO 1 */
+
+/*
* Extended source file access.
*/
#define DEFAULT_FILETYPE ".awk"
@@ -281,11 +274,22 @@
#if defined(VAXC) && !defined(__STDC__)
#define __STDC__ 0
#define NO_TOKEN_PASTING
+#ifndef __DECC /* DEC C does not support #pragma builtins even in VAXC mode */
#define VAXC_BUILTINS
+#endif
/* #define YYDEBUG 0 */
#endif
/*
+ * DEC C
+ *
+ * Digital's ANSI complier.
+ */
+#ifdef __DECC
+ /* nothing special at the moment */
+#endif
+
+/*
* GNU C
*
* Versions of GCC (actually GAS) earlier than 1.38 don't produce the
@@ -300,8 +304,19 @@
* because most of the ANSI-C required header files are missing.
*/
#ifdef __GNUC__
-#define const
-#undef STDC_HEADERS
+/* #define const */
+/* #undef STDC_HEADERS */
+#ifndef STDC_HEADERS
#define alloca __builtin_alloca
#define environ $$PsectAttributes_NOSHR$$environ /* awful GAS kludge */
#endif
+#endif
+
+#ifdef STRFTIME_MISSING
+/*
+ * Always use the version of strftime() in missing/strftime.c instead of
+ * the [as yet undocumented/unsupported] one in VAXCRTL. Renaming it here
+ * guarantees that it won't clash with the library routine.
+ */
+#define strftime gnu_strftime
+#endif
diff --git a/config/vms-posix b/config/vms-posix
new file mode 100644
index 00000000..dbf50b70
--- /dev/null
+++ b/config/vms-posix
@@ -0,0 +1,11 @@
+VMS POSIX (not to be confused with native VMS...)
+STDC_HEADERS 1
+RANDOM_MISSING 1
+STRCASE_MISSING 1
+NO_TOKEN_PASTING 1
+MAKE_ALLOCA_C
+MAKE_VMS-Posix
+MAKE_CC
+#define DEFAULT_FILETYPE ".awk"
+#define getopt gnu_getopt
+#define opterr gnu_opterr
diff --git a/configure b/configure
index 3a39811c..7f48e0fd 100755
--- a/configure
+++ b/configure
@@ -11,7 +11,7 @@ case "$#" in
esac
if [ -f config/$1 ]; then
- sh ./mungeconf config/$1 config.h.in >config.h
+ sh ./mungeconf config/$1 config.in >config.h
# echo #echo lines to stdout
sed -n '/^#echo /s///p' config/$1
@@ -27,6 +27,6 @@ if [ -f config/$1 ]; then
else
echo "\`$1' is not a known configuration."
echo "Either construct one based on the examples in the config directory,"
- echo "or copy config.h.in to config.h and edit it."
+ echo "or copy config.in to config.h and edit it."
exit 1
fi
diff --git a/dfa.c b/dfa.c
index b33ef8e7..7777c66c 100644
--- a/dfa.c
+++ b/dfa.c
@@ -176,7 +176,7 @@ xcalloc(n, s)
ptr_t r = calloc(n, s);
if (NULL == r)
- regerror("Memory exhausted"); /* regerror does not return */
+ reg_error("Memory exhausted"); /* reg_error does not return */
return r;
}
@@ -188,7 +188,7 @@ xmalloc(n)
assert(n != 0);
if (NULL == r)
- regerror("Memory exhausted");
+ reg_error("Memory exhausted");
return r;
}
@@ -201,7 +201,7 @@ xrealloc(p, n)
assert(n != 0);
if (NULL == r)
- regerror("Memory exhausted");
+ reg_error("Memory exhausted");
return r;
}
@@ -317,7 +317,7 @@ static case_fold;
/* Entry point to set syntax options. */
void
regsyntax(bits, fold)
- int bits;
+ long bits;
int fold;
{
syntax_bits_set = 1;
@@ -341,7 +341,7 @@ static closure_allowed; /* True if backward context allows closures
{ \
if (! lexleft) \
if (eoferr != NULL) \
- regerror(eoferr); \
+ reg_error(eoferr); \
else \
return _END; \
(c) = (unsigned char) *lexptr++; \
@@ -361,7 +361,7 @@ lex()
case '^':
if (! (syntax_bits & RE_CONTEXT_INDEP_OPS)
&& (!caret_allowed ||
- (syntax_bits & RE_TIGHT_VBAR) && lexptr - 1 != lexstart))
+ ((syntax_bits & RE_TIGHT_VBAR) && lexptr - 1 != lexstart)))
goto normal_char;
caret_allowed = 0;
return syntax_bits & RE_TIGHT_VBAR ? _ALLBEGLINE : _BEGLINE;
@@ -656,9 +656,9 @@ static void regexp();
static void
atom()
{
- if (tok >= 0 && tok < _NOTCHAR || tok >= _SET || tok == _BACKREF
+ if (tok >= 0 && (tok < _NOTCHAR || tok >= _SET || tok == _BACKREF
|| tok == _BEGLINE || tok == _ENDLINE || tok == _BEGWORD
- || tok == _ENDWORD || tok == _LIMWORD || tok == _NOTLIMWORD)
+ || tok == _ENDWORD || tok == _LIMWORD || tok == _NOTLIMWORD))
{
addtok(tok);
tok = lex();
@@ -668,7 +668,7 @@ atom()
tok = lex();
regexp();
if (tok != _RPAREN)
- regerror("Unbalanced (");
+ reg_error("Unbalanced (");
tok = lex();
}
else
@@ -725,7 +725,7 @@ regparse(s, len, r)
closure_allowed = 0;
if (! syntax_bits_set)
- regerror("No syntax specified");
+ reg_error("No syntax specified");
tok = lex();
depth = r->depth;
@@ -748,7 +748,7 @@ regparse(s, len, r)
}
if (tok != _END)
- regerror("Unbalanced )");
+ reg_error("Unbalanced )");
addtok(_END - r->nregexps);
addtok(_CAT);
@@ -857,7 +857,7 @@ state_index(r, s, newline, letter)
int newline;
int letter;
{
- int hash = 0;
+ int lhash = 0;
int constraint;
int i, j;
@@ -865,12 +865,12 @@ state_index(r, s, newline, letter)
letter = letter ? 1 : 0;
for (i = 0; i < s->nelem; ++i)
- hash ^= s->elems[i].index + s->elems[i].constraint;
+ lhash ^= s->elems[i].index + s->elems[i].constraint;
/* Try to find a state that exactly matches the proposed one. */
for (i = 0; i < r->sindex; ++i)
{
- if (hash != r->states[i].hash || s->nelem != r->states[i].elems.nelem
+ if (lhash != r->states[i].hash || s->nelem != r->states[i].elems.nelem
|| newline != r->states[i].newline || letter != r->states[i].letter)
continue;
for (j = 0; j < s->nelem; ++j)
@@ -884,7 +884,7 @@ state_index(r, s, newline, letter)
/* We'll have to create a new state. */
REALLOC_IF_NECESSARY(r->states, _dfa_state, r->salloc, r->sindex);
- r->states[i].hash = hash;
+ r->states[i].hash = lhash;
MALLOC(r->states[i].elems.elems, _position, s->nelem);
copy(s, &r->states[i].elems);
r->states[i].newline = newline;
@@ -1638,7 +1638,7 @@ regexecute(r, begin, end, newline, count, backref)
if (! r->tralloc)
build_state_zero(r);
- s = 0;
+ s = s1 = 0;
p = (unsigned char *) begin;
trans = r->trans;
*end = '\n';
@@ -1646,7 +1646,7 @@ regexecute(r, begin, end, newline, count, backref)
for (;;)
{
/* The dreaded inner loop. */
- if (t = trans[s])
+ if ((t = trans[s]) != 0)
do
{
s1 = t[*p++];
@@ -1654,7 +1654,7 @@ regexecute(r, begin, end, newline, count, backref)
goto last_was_s;
s = t[*p++];
}
- while (t = trans[s]);
+ while ((t = trans[s]) != 0);
goto last_was_s1;
last_was_s:
tmp = s, s = s1, s1 = tmp;
@@ -1730,27 +1730,27 @@ regcompile(s, len, r, searchflag)
{
if (case_fold) /* dummy folding in service of regmust() */
{
- char *copy;
+ char *regcopy;
int i;
- copy = malloc(len);
- if (!copy)
- regerror("out of memory");
+ regcopy = malloc(len);
+ if (!regcopy)
+ reg_error("out of memory");
/* This is a complete kludge and could potentially break
\<letter> escapes . . . */
case_fold = 0;
for (i = 0; i < len; ++i)
if (ISUPPER(s[i]))
- copy[i] = tolower(s[i]);
+ regcopy[i] = tolower(s[i]);
else
- copy[i] = s[i];
+ regcopy[i] = s[i];
reginit(r);
r->mustn = 0;
r->must[0] = '\0';
- regparse(copy, len, r);
- free(copy);
+ regparse(regcopy, len, r);
+ free(regcopy);
regmust(r);
reganalyze(r, searchflag);
case_fold = 1;
@@ -1769,7 +1769,7 @@ regcompile(s, len, r, searchflag)
/* Free the storage held by the components of a regexp. */
void
-regfree(r)
+reg_free(r)
struct regexp *r;
{
int i;
@@ -2111,7 +2111,7 @@ register struct regexp * r;
{
register must * musts;
register must * mp;
- register char * result;
+ register char * result = "";
register int ri;
register int i;
register _token t;
@@ -2136,7 +2136,6 @@ register struct regexp * r;
mp[i].left[0] = mp[i].right[0] = mp[i].is[0] = '\0';
mp[i].in[0] = NULL;
}
- result = "";
for (ri = 0; ri < reg->tindex; ++ri) {
switch (t = reg->tokens[ri]) {
case _ALLBEGLINE:
diff --git a/dfa.h b/dfa.h
index 69a0651a..b4869808 100644
--- a/dfa.h
+++ b/dfa.h
@@ -129,23 +129,23 @@ what you give them. Help stamp out software-hoarding! */
parentheses are needed for literal searching.
0 means backslash-parentheses are grouping, and plain parentheses
are for literal searching. */
-#define RE_NO_BK_PARENS 1
+#define RE_NO_BK_PARENS 1L
/* 1 means plain | serves as the "or"-operator, and \| is a literal.
0 means \| serves as the "or"-operator, and | is a literal. */
-#define RE_NO_BK_VBAR (1 << 1)
+#define RE_NO_BK_VBAR (1L << 1)
/* 0 means plain + or ? serves as an operator, and \+, \? are literals.
1 means \+, \? are operators and plain +, ? are literals. */
-#define RE_BK_PLUS_QM (1 << 2)
+#define RE_BK_PLUS_QM (1L << 2)
/* 1 means | binds tighter than ^ or $.
0 means the contrary. */
-#define RE_TIGHT_VBAR (1 << 3)
+#define RE_TIGHT_VBAR (1L << 3)
/* 1 means treat \n as an _OR operator
0 means treat it as a normal character */
-#define RE_NEWLINE_OR (1 << 4)
+#define RE_NEWLINE_OR (1L << 4)
/* 0 means that a special characters (such as *, ^, and $) always have
their special meaning regardless of the surrounding context.
@@ -154,12 +154,12 @@ what you give them. Help stamp out software-hoarding! */
^ - only special at the beginning, or after ( or |
$ - only special at the end, or before ) or |
*, +, ? - only special when not after the beginning, (, or | */
-#define RE_CONTEXT_INDEP_OPS (1 << 5)
+#define RE_CONTEXT_INDEP_OPS (1L << 5)
/* 1 means that \ in a character class escapes the next character (typically
a hyphen. It also is overloaded to mean that hyphen at the end of the range
is allowable and means that the hyphen is to be taken literally. */
-#define RE_AWK_CLASS_HACK (1 << 6)
+#define RE_AWK_CLASS_HACK (1L << 6)
/* Now define combinations of bits for the standard possibilities. */
#ifdef notdef
@@ -334,9 +334,9 @@ typedef short _token;
Prevl and currl similarly depend upon whether the previous and current
characters are word-constituent letters. */
#define _MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \
- ((constraint) & 1 << ((prevn) ? 2 : 0) + ((currn) ? 1 : 0) + 4)
+ ((constraint) & (1 << (((prevn) ? 2 : 0) + ((currn) ? 1 : 0) + 4)))
#define _MATCHES_LETTER_CONTEXT(constraint, prevl, currl) \
- ((constraint) & 1 << ((prevl) ? 2 : 0) + ((currl) ? 1 : 0))
+ ((constraint) & (1 << (((prevl) ? 2 : 0) + ((currl) ? 1 : 0))))
#define _SUCCEEDS_IN_CONTEXT(constraint, prevn, currn, prevl, currl) \
(_MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \
&& _MATCHES_LETTER_CONTEXT(constraint, prevl, currl))
@@ -484,7 +484,7 @@ struct regexp
/* Regsyntax() takes two arguments; the first sets the syntax bits described
earlier in this file, and the second sets the case-folding flag. */
-extern void regsyntax(int, int);
+extern void regsyntax(long, int);
/* Compile the given string of the given length into the given struct regexp.
Final argument is a flag specifying whether to build a searching or an
@@ -506,7 +506,7 @@ extern void regcompile(const char *, size_t, struct regexp *, int);
extern char *regexecute(struct regexp *, char *, char *, int, int *, int *);
/* Free the storage held by the components of a struct regexp. */
-extern void regfree(struct regexp *);
+extern void reg_free(struct regexp *);
/* Entry points for people who know what they're doing. */
@@ -528,12 +528,12 @@ extern void regstate(int, struct regexp *, int []);
/* Regerror() is called by the regexp routines whenever an error occurs. It
takes a single argument, a NUL-terminated string describing the error.
- The default regerror() prints the error message to stderr and exits.
- The user can provide a different regfree() if so desired. */
-extern void regerror(const char *);
+ The default reg_error() prints the error message to stderr and exits.
+ The user can provide a different reg_free() if so desired. */
+extern void reg_error(const char *);
#else /* ! __STDC__ */
-extern void regsyntax(), regcompile(), regfree(), reginit(), regparse();
-extern void reganalyze(), regstate(), regerror();
+extern void regsyntax(), regcompile(), reg_free(), reginit(), regparse();
+extern void reganalyze(), regstate(), reg_error();
extern char *regexecute();
#endif
diff --git a/eval.c b/eval.c
index 58c39fc0..09ca96b2 100644
--- a/eval.c
+++ b/eval.c
@@ -3,7 +3,7 @@
*/
/*
- * Copyright (C) 1986, 1988, 1989, 1991 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@@ -27,6 +27,7 @@
extern double pow P((double x, double y));
extern double modf P((double x, double *yp));
+extern double fmod P((double x, double y));
static int eval_condition P((NODE *tree));
static NODE *op_assign P((NODE *tree));
@@ -125,16 +126,16 @@ char casetable[] = {
*/
int
interpret(tree)
-register NODE *tree;
+register NODE *volatile tree;
{
- volatile jmp_buf loop_tag_stack; /* shallow binding stack for loop_tag */
- static jmp_buf rule_tag;/* tag the rule currently being run, for NEXT
- * and EXIT statements. It is static because
- * there are no nested rules */
- register NODE *t = NULL;/* temporary */
- volatile NODE **lhs; /* lhs == Left Hand Side for assigns, etc */
- volatile NODE *stable_tree;
- int traverse = 1; /* True => loop thru tree (Node_rule_list) */
+ jmp_buf volatile loop_tag_stack; /* shallow binding stack for loop_tag */
+ static jmp_buf rule_tag; /* tag the rule currently being run, for NEXT
+ * and EXIT statements. It is static because
+ * there are no nested rules */
+ register NODE *volatile t = NULL; /* temporary */
+ NODE **volatile lhs; /* lhs == Left Hand Side for assigns, etc */
+ NODE *volatile stable_tree;
+ int volatile traverse = 1; /* True => loop thru tree (Node_rule_list) */
if (tree == NULL)
return 1;
@@ -253,7 +254,7 @@ register NODE *tree;
#define hakvar forloop->init
#define arrvar forloop->incr
PUSH_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
- lhs = (volatile NODE **) get_lhs(tree->hakvar, &after_assign);
+ lhs = get_lhs(tree->hakvar, &after_assign);
t = tree->arrvar;
if (t->type == Node_param_list)
t = stack_ptr[t->param_cnt];
@@ -289,9 +290,23 @@ register NODE *tree;
break;
case Node_K_continue:
- if (loop_tag_valid == 0)
- fatal("unexpected continue");
- longjmp(loop_tag, TAG_CONTINUE);
+ if (loop_tag_valid == 0) {
+ /*
+ * AT&T nawk treats continue outside of loops like
+ * next. Allow it if not posix, and complain if
+ * lint.
+ */
+ static int warned = 0;
+
+ if (do_lint && ! warned) {
+ warning("use of `continue' outside of loop is not portable");
+ warned = 1;
+ }
+ if (do_posix)
+ fatal("use of `continue' outside of loop is not allowed");
+ longjmp(rule_tag, TAG_CONTINUE);
+ } else
+ longjmp(loop_tag, TAG_CONTINUE);
break;
case Node_K_print:
@@ -310,6 +325,10 @@ register NODE *tree;
longjmp(rule_tag, TAG_CONTINUE);
break;
+ case Node_K_nextfile:
+ do_nextfile();
+ break;
+
case Node_K_exit:
/*
* In A,K,&W, p. 49, it says that an exit statement "...
@@ -339,6 +358,8 @@ register NODE *tree;
* Appears to be an expression statement. Throw away the
* value.
*/
+ if (do_lint && tree->type == Node_var)
+ warning("statement has no effect");
t = tree_eval(tree);
free_temp(t);
break;
@@ -373,7 +394,8 @@ register NODE *tree;
return tree->var_value;
}
if (tree->type == Node_param_list)
- return (stack_ptr[(_t)->param_cnt])->var_value;
+/* return (stack_ptr[(_t)->param_cnt])->var_value; */
+ return (stack_ptr[(tree)->param_cnt])->var_value;
#endif
switch (tree->type) {
case Node_and:
@@ -448,8 +470,13 @@ register NODE *tree;
r = tree_eval(tree->rnode);
lhs = get_lhs(tree->lnode, &after_assign);
- unref(*lhs);
- *lhs = dupnode(r);
+ if (r != *lhs) {
+ NODE *save;
+
+ save = *lhs;
+ *lhs = dupnode(r);
+ unref(save);
+ }
free_temp(r);
if (after_assign)
(*after_assign)();
@@ -548,7 +575,7 @@ register NODE *tree;
free_temp(t2);
switch (tree->type) {
case Node_exp:
- if ((lx = x2) == x2) { /* integer exponent */
+ if ((lx = x2) == x2 && lx >= 0) { /* integer exponent */
if (lx == 0)
x = 1;
else if (lx == 1)
@@ -585,8 +612,12 @@ register NODE *tree;
case Node_mod:
if (x2 == 0)
fatal("division by zero attempted in mod");
+#ifndef FMOD_MISSING
+ return tmp_number(fmod (x1, x2));
+#else
(void) modf(x1 / x2, &x);
return tmp_number(x1 - x * x2);
+#endif
case Node_plus:
return tmp_number(x1 + x2);
@@ -646,7 +677,7 @@ register NODE *tree;
t1 = tree_eval(tree);
if (t1->flags & MAYBE_NUM)
(void) force_number(t1);
- if (t1->flags & NUMERIC)
+ if (t1->flags & NUMBER)
ret = t1->numbr != 0.0;
else
ret = t1->stlen != 0;
@@ -661,10 +692,8 @@ int
cmp_nodes(t1, t2)
register NODE *t1, *t2;
{
- AWKNUM diff;
register int ret;
register int len1, len2;
- int donum;
if (t1 == t2)
return 0;
@@ -672,24 +701,9 @@ register NODE *t1, *t2;
(void) force_number(t1);
if (t2->flags & MAYBE_NUM)
(void) force_number(t2);
-#ifdef maybe
- if ((t1->flags & NUMERIC) && (t2->flags & NUMERIC)) {
-#else
- donum = 0;
- if ((t1->flags & NUMBER)) {
- (void) force_number(t2);
- if (t2->flags & NUMERIC)
- donum = 1;
- } else if ((t2->flags & NUMBER)) {
- (void) force_number(t1);
- if (t1->flags & NUMERIC)
- donum = 1;
- }
- if (donum) {
-#endif
- diff = t1->numbr - t2->numbr;
- if (diff == 0) return 0;
- else if (diff < 0) return -1;
+ if ((t1->flags & NUMBER) && (t2->flags & NUMBER)) {
+ if (t1->numbr == t2->numbr) return 0;
+ else if (t1->numbr - t2->numbr < 0) return -1;
else return 1;
}
(void) force_string(t1);
@@ -790,9 +804,13 @@ register NODE *tree;
case Node_assign_mod:
if (rval == (AWKNUM) 0)
fatal("division by zero attempted in %=");
+#ifndef FMOD_MISSING
+ *lhs = make_number(fmod(lval, rval));
+#else
(void) modf(lval / rval, &t1);
t2 = lval - rval * t1;
*lhs = make_number(t2);
+#endif
break;
case Node_assign_plus:
@@ -819,11 +837,11 @@ NODE *arg_list; /* Node_expression_list of calling args. */
{
register NODE *arg, *argp, *r;
NODE *n, *f;
- volatile jmp_buf func_tag_stack;
- volatile jmp_buf loop_tag_stack;
- volatile int save_loop_tag_valid = 0;
- volatile NODE **save_stack, *save_ret_node;
- NODE **local_stack = NULL, **sp;
+ jmp_buf volatile func_tag_stack;
+ jmp_buf volatile loop_tag_stack;
+ int volatile save_loop_tag_valid = 0;
+ NODE **volatile save_stack, *save_ret_node;
+ NODE **volatile local_stack = NULL, **sp;
int count;
extern NODE *ret_node;
@@ -897,10 +915,10 @@ NODE *arg_list; /* Node_expression_list of calling args. */
PUSH_BINDING(loop_tag_stack, loop_tag, junk);
loop_tag_valid = 0;
}
- save_stack = (volatile NODE **) stack_ptr;
+ save_stack = stack_ptr;
stack_ptr = local_stack;
PUSH_BINDING(func_tag_stack, func_tag, func_tag_valid);
- save_ret_node = (volatile NODE *) ret_node;
+ save_ret_node = ret_node;
ret_node = Nnull_string; /* default return value */
if (setjmp(func_tag) == 0)
(void) interpret(f->rnode);
@@ -1114,7 +1132,7 @@ register NODE *tree;
tree = tree->rnode;
}
rp = re_update(tree);
- i = research(rp, t1->stptr, t1->stlen, 0);
+ i = research(rp, t1->stptr, 0, t1->stlen, 0);
i = (i == -1) ^ (match == 1);
free_temp(t1);
return tmp_number((AWKNUM) i);
@@ -1130,6 +1148,7 @@ set_IGNORECASE()
warning("IGNORECASE not supported in compatibility mode");
}
IGNORECASE = (force_number(IGNORECASE_node->var_value) != 0.0);
+ set_FS();
}
void
@@ -1149,6 +1168,8 @@ set_ORS()
}
static NODE **fmt_list = NULL;
+static int fmt_ok P((NODE *n));
+static int fmt_index P((NODE *n));
static int
fmt_ok(n)
diff --git a/field.c b/field.c
index 26e26dac..d50d793c 100644
--- a/field.c
+++ b/field.c
@@ -3,7 +3,7 @@
*/
/*
- * Copyright (C) 1986, 1988, 1989, 1991 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@@ -25,7 +25,6 @@
#include "awk.h"
-
static int (*parse_field) P((int, char **, int, char *,
Regexp *, void (*)(), NODE *));
static void rebuild_record P((void));
@@ -38,6 +37,9 @@ static int sc_parse_field P((int, char **, int, char *,
static int fw_parse_field P((int, char **, int, char *,
Regexp *, void (*)(), NODE *));
static void set_element P((int, char *, int, NODE *));
+static void grow_fields_arr P((int num));
+static void set_field P((int num, char *str, int len, NODE *dummy));
+
static Regexp *FS_regexp = NULL;
static char *parse_extent; /* marks where to restart parse of record */
@@ -50,8 +52,7 @@ static char *save_fs; /* save current value of FS when line is read,
*/
NODE **fields_arr; /* array of pointers to the field nodes */
-int field0_valid = 1; /* $(>0) has not been changed yet */
-NODE *field0;
+int field0_valid; /* $(>0) has not been changed yet */
int default_FS;
static NODE **nodes; /* permanent repository of field nodes */
static int *FIELDWIDTHS = NULL;
@@ -59,20 +60,20 @@ static int *FIELDWIDTHS = NULL;
void
init_fields()
{
+ NODE *n;
+
emalloc(fields_arr, NODE **, sizeof(NODE *), "init_fields");
emalloc(nodes, NODE **, sizeof(NODE *), "init_fields");
- emalloc(field0, NODE *, sizeof(NODE), "init_fields");
- field0->type = Node_val;
- field0->stref = 0;
- field0->stptr = "";
- field0->stlen = 0;
- field0->flags = (STRING|STR|PERM); /* never free buf */
- fields_arr[0] = field0;
+ getnode(n);
+ *n = *Nnull_string;
+ fields_arr[0] = nodes[0] = n;
parse_extent = fields_arr[0]->stptr;
save_FS = dupnode(FS_node->var_value);
save_fs = save_FS->stptr;
+ field0_valid = 1;
}
+
static void
grow_fields_arr(num)
int num;
@@ -84,9 +85,8 @@ int num;
erealloc(nodes, NODE **, (num+1) * sizeof(NODE *), "set_field");
for (t = nf_high_water+1; t <= num; t++) {
getnode(n);
- n->type = Node_val;
- nodes[t] = n;
- fields_arr[t] = nodes[t];
+ *n = *Nnull_string;
+ fields_arr[t] = nodes[t] = n;
}
nf_high_water = num;
}
@@ -132,6 +132,8 @@ rebuild_record()
ptr--;
}
tlen += (NF - 1) * ofslen;
+ if (tlen < 0)
+ tlen = 0;
emalloc(ops, char *, tlen + 2, "fix_fields");
cops = ops;
ops[0] = '\0';
@@ -183,11 +185,11 @@ int freeold;
save_FS = dupnode(FS_node->var_value);
save_fs = save_FS->stptr;
}
- field0->stptr = buf;
- field0->stlen = cnt;
- field0->stref = 1;
- field0->flags = (STRING|STR|PERM|MAYBE_NUM);
- fields_arr[0] = field0;
+ nodes[0]->stptr = buf;
+ nodes[0]->stlen = cnt;
+ nodes[0]->stref = 1;
+ nodes[0]->flags = (STRING|STR|PERM|MAYBE_NUM);
+ fields_arr[0] = nodes[0];
}
fields_arr[0]->flags |= MAYBE_NUM;
field0_valid = 1;
@@ -203,9 +205,15 @@ reset_record()
void
set_NF()
{
+ register int i;
+
NF = (int) force_number(NF_node->var_value);
if (NF > nf_high_water)
grow_fields_arr(NF);
+ for (i = parse_high_water + 1; i <= NF; i++) {
+ unref(fields_arr[i]);
+ fields_arr[i] = Nnull_string;
+ }
field0_valid = 0;
}
@@ -239,7 +247,7 @@ NODE *n;
scan++;
field = scan;
while (scan < end
- && research(rp, scan, (int)(end - scan), 1) != -1
+ && research(rp, scan, 0, (int)(end - scan), 1) != -1
&& nf < up_to) {
if (REEND(rp, scan) == RESTART(rp, scan)) { /* null match */
scan++;
@@ -283,12 +291,17 @@ NODE *n;
register int nf = parse_high_water;
register char *field;
register char *end = scan + len;
+ char sav;
if (up_to == HUGE)
nf = 0;
if (len == 0)
return nf;
+ /* before doing anything save the char at *end */
+ sav = *end;
+ /* because it will be destroyed now: */
+
*end = ' '; /* sentinel character */
for (; nf < up_to; scan++) {
/*
@@ -305,13 +318,17 @@ NODE *n;
if (scan == end)
break;
}
+
+ /* everything done, restore original char at *end */
+ *end = sav;
+
*buf = scan;
return nf;
}
/*
* this is called both from get_field() and from do_split()
- * via (*pase_field)(). This variation is for when FS is a single character
+ * via (*parse_field)(). This variation is for when FS is a single character
* other than space.
*/
static int
@@ -329,11 +346,17 @@ NODE *n;
register int nf = parse_high_water;
register char *field;
register char *end = scan + len;
+ char sav;
if (up_to == HUGE)
nf = 0;
if (len == 0)
return nf;
+
+ /* before doing anything save the char at *end */
+ sav = *end;
+ /* because it will be destroyed now: */
+
*end = fschar; /* sentinel character */
for (; nf < up_to; scan++) {
field = scan;
@@ -344,6 +367,10 @@ NODE *n;
if (scan == end)
break;
}
+
+ /* everything done, restore original char at *end */
+ *end = sav;
+
*buf = scan;
return nf;
}
@@ -389,8 +416,6 @@ get_field(requested, assign)
register int requested;
Func_ptr *assign; /* this field is on the LHS of an assign */
{
- int parsed;
-
/*
* if requesting whole line but some other field has been altered,
* then the whole line must be rebuilt
@@ -416,32 +441,36 @@ Func_ptr *assign; /* this field is on the LHS of an assign */
/* assert(requested > 0); */
if (assign)
- field0_valid = 0;
- if (requested <= parse_high_water) /* we have already parsed this field */
+ field0_valid = 0; /* $0 needs reconstruction */
+
+ if (requested <= parse_high_water) /* already parsed this field */
return &fields_arr[requested];
- if (parse_high_water == 0) /* starting at the beginning */
- parse_extent = fields_arr[0]->stptr;
- /*
- * parse up to requested fields, calling set_field() for each, and saving
- * in parse_extent the point where the parse left off
- */
- parsed = (*parse_field)(requested, &parse_extent,
- fields_arr[0]->stlen - (parse_extent-fields_arr[0]->stptr),
- save_fs, FS_regexp, set_field, (NODE *)NULL);
- parse_high_water = parsed;
- /*
- * if we reached the end of the record, set NF to the number of fields
- * so far. Note that requested might actually refer to a field that
- * is beyond the end of the record, but we won't set NF to that value at
- * this point, since this is only a reference to the field and NF
- * only gets set if the field is assigned to -- in this case parsed has
- * been set to requested above
- */
- if (parse_extent == fields_arr[0]->stptr + fields_arr[0]->stlen)
- NF = parsed;
- if (requested == HUGE-1)
- requested = parsed;
- if (parsed < requested) { /* requested field beyond end of record; */
+
+ if (NF == -1) { /* have not yet parsed to end of record */
+ /*
+ * parse up to requested fields, calling set_field() for each,
+ * saving in parse_extent the point where the parse left off
+ */
+ if (parse_high_water == 0) /* starting at the beginning */
+ parse_extent = fields_arr[0]->stptr;
+ parse_high_water = (*parse_field)(requested, &parse_extent,
+ fields_arr[0]->stlen - (parse_extent-fields_arr[0]->stptr),
+ save_fs, FS_regexp, set_field, (NODE *)NULL);
+
+ /*
+ * if we reached the end of the record, set NF to the number of
+ * fields so far. Note that requested might actually refer to
+ * a field that is beyond the end of the record, but we won't
+ * set NF to that value at this point, since this is only a
+ * reference to the field and NF only gets set if the field
+ * is assigned to -- this case is handled below
+ */
+ if (parse_extent == fields_arr[0]->stptr + fields_arr[0]->stlen)
+ NF = parse_high_water;
+ if (requested == HUGE-1) /* HUGE-1 means set NF */
+ requested = parse_high_water;
+ }
+ if (parse_high_water < requested) { /* requested beyond end of record */
if (assign) { /* expand record */
register int i;
@@ -449,7 +478,7 @@ Func_ptr *assign; /* this field is on the LHS of an assign */
grow_fields_arr(requested);
/* fill in fields that don't exist */
- for (i = parsed + 1; i <= requested; i++)
+ for (i = parse_high_water + 1; i <= requested; i++)
fields_arr[i] = Nnull_string;
NF = requested;
@@ -482,7 +511,8 @@ NODE *tree;
NODE *t1, *t2, *t3, *tmp;
register char *splitc = "";
char *s;
- int (*parseit)();
+ int (*parseit)P((int, char **, int, char *,
+ Regexp *, void (*)(), NODE *));
Regexp *rp = NULL;
t1 = tree_eval(tree->lnode);
@@ -518,7 +548,7 @@ NODE *tree;
}
s = t1->stptr;
- tmp = tmp_number((AWKNUM) (*parseit)(HUGE, &s, t1->stlen,
+ tmp = tmp_number((AWKNUM) (*parseit)(HUGE, &s, (int)t1->stlen,
splitc, rp, set_element, t2));
free_temp(t1);
return tmp;
@@ -545,9 +575,16 @@ set_FS()
if (tmp->stptr[0] == ' ') {
(void) strcpy(buf, "[ \n]+");
default_FS = 1;
- } else if (tmp->stptr[0] != '\n')
- sprintf(buf, "[%c\n]", tmp->stptr[0]);
- else {
+ } else if (tmp->stptr[0] != '\n') {
+ if (IGNORECASE == 0)
+ sprintf(buf, "[%c\n]", tmp->stptr[0]);
+ else {
+ char c = tmp->stptr[0];
+
+ sprintf(buf, "[%c%c\n]",
+ c, islower(c) ? toupper(c) : isupper(c) ? tolower(c) : c );
+ }
+ } else {
parse_field = sc_parse_field;
FS = tmp->stptr;
}
@@ -562,12 +599,21 @@ set_FS()
parse_field = re_parse_field;
else if (*FS == ' ' && tmp->stlen == 1)
default_FS = 1;
- else if (*FS != ' ' && tmp->stlen == 1)
- parse_field = sc_parse_field;
+ else if (*FS != ' ' && tmp->stlen == 1) {
+ if (IGNORECASE == 0)
+ parse_field = sc_parse_field;
+ else {
+ char c = *FS;
+
+ sprintf(buf, "[%c%c]",
+ c, islower(c) ? toupper(c) : isupper(c) ? tolower(c) : c );
+ parse_field = re_parse_field;
+ }
+ }
}
if (parse_field == re_parse_field) {
tmp = tmp_string(FS, strlen(FS));
- FS_regexp = make_regexp(tmp, 0, 1);
+ FS_regexp = make_regexp(tmp, IGNORECASE, 1);
free_temp(tmp);
} else
FS_regexp = NULL;
@@ -609,7 +655,7 @@ set_FIELDWIDTHS()
fw_alloc *= 2;
erealloc(FIELDWIDTHS, int *, fw_alloc * sizeof(int), "set_FIELDWIDTHS");
}
- FIELDWIDTHS[i] = (int) strtol(scan, &end, 10);
+ FIELDWIDTHS[i] = (int) strtod(scan, &end);
if (end == scan)
break;
scan = end;
diff --git a/gawk.1 b/gawk.1
index 204b5bc7..aefd2320 100644
--- a/gawk.1
+++ b/gawk.1
@@ -1,7 +1,7 @@
.ds PX \s-1POSIX\s+1
.ds UX \s-1UNIX\s+1
.ds AN \s-1ANSI\s+1
-.TH GAWK 1 "Jun 5 1991" "Free Software Foundation" "Utility Commands"
+.TH GAWK 1 "Jul 20 1992" "Free Software Foundation" "Utility Commands"
.SH NAME
gawk \- pattern scanning and processing language
.SH SYNOPSIS
@@ -123,6 +123,9 @@ mode. In compatibility mode,
behaves identically to \*(UX
.IR awk ;
none of the GNU-specific extensions are recognized.
+See
+.BR "GNU EXTENSIONS" ,
+below, for more information.
.TP
.PD 0
.B copyleft
@@ -319,7 +322,7 @@ Assigning a new value to
.B FS
overrides the use of
.BR FIELDWIDTHS ,
-and restores the default behaviour.
+and restores the default behavior.
.PP
Each field in the input line may be referenced by its position,
.BR $1 ,
@@ -721,7 +724,7 @@ inclusive. It does not combine with any other sort of pattern expression.
Regular expressions are the extended kind found in
.IR egrep .
They are composed of characters as follows:
-.TP \w'[^abc...]'u+1n
+.TP \w'\fB[^\fIabc...\fB]\fR'u+2n
.I c
matches the non-metacharacter
.IR c .
@@ -932,6 +935,17 @@ AWK program. If the end of the input data is reached, the
.B END
block(s), if any, are executed.
.TP
+.B "next file"
+Stop processing the current input file. The next input record read
+comes from the next input file.
+.B FILENAME
+is updated,
+.B FNR
+is reset to 1, and processing starts over with the first pattern in the
+AWK program. If the end of the input data is reached, the
+.B END
+block(s), if any, are executed.
+.TP
.B print
Prints the current record.
.TP
@@ -971,7 +985,7 @@ pipes into
.BR getline .
.BR Getline
will return 0 on end of file, and \-1 on an error.
-.SS The \fIprintf\fP Statement
+.SS The \fIprintf\fP\^ Statement
.PP
The AWK versions of the
.B printf
@@ -1082,7 +1096,7 @@ recognizes certain special filenames internally. These filenames
allow access to open file descriptors inherited from
.IR gawk 's
parent process (usually the shell). The filenames are:
-.TP \w'\fB/dev/fd/\^\fIn\fR'u+1n
+.TP \w'\fB/dev/stdout\fR'u+1n
.B /dev/stdin
The standard input.
.TP
@@ -1093,7 +1107,7 @@ The standard output.
The standard error output.
.TP
.BI /dev/fd/\^ n
-The file denoted by the open file descriptor
+The file associated with the open file descriptor
.IR n .
.PP
These are particularly useful for error messages. For example:
@@ -1255,7 +1269,7 @@ translated to their corresponding upper-case counterparts.
Non-alphabetic characters are left unchanged.
.SS Time Functions
.PP
-Since one of the primary uses of AWK programs in processing log files
+Since one of the primary uses of AWK programs is processing log files
that contain time stamp information,
.I gawk
provides the following two functions for obtaining time stamps and
@@ -1421,7 +1435,7 @@ Alfred V. Aho, Brian W. Kernighan, Peter J. Weinberger,
Addison-Wesley, 1988. ISBN 0-201-07981-X.
.PP
.IR "The GAWK Manual" ,
-published by the Free Software Foundation, 1991.
+Edition 0.14, published by the Free Software Foundation, 1992.
.SH POSIX COMPATIBILITY
A primary goal for
.I gawk
@@ -1452,7 +1466,7 @@ block was run. Applications came to depend on this ``feature.''
When
.I awk
was changed to match its documentation, this option was added to
-accomodate applications that depended upon the old behaviour.
+accomodate applications that depended upon the old behavior.
(This feature was agreed upon by both the AT&T and GNU developers.)
.PP
The
@@ -1546,6 +1560,11 @@ No path search is performed for files named via the
option. Therefore the
.B AWKPATH
environment variable is not special.
+.TP
+\(bu
+The use of
+.B "next file"
+to abandon processing of the current input file.
.RE
.PP
The AWK book does not define the return value of the
@@ -1572,6 +1591,9 @@ option is ``t'', then
.B FS
will be set to the tab character.
Since this is a rather ugly special case, it is not the default behavior.
+This behavior also does not occur if
+.B \-Wposix
+has been specified.
.ig
.PP
If
@@ -1590,6 +1612,52 @@ This option should only be of interest to the
maintainers, and may not even be compiled into
.IR gawk .
..
+.SH HISTORICAL FEATURES
+There are two features of historical AWK implementations that
+.I gawk
+supports.
+First, it is possible to call the
+.B length()
+built-in function not only with no argument, but even without parentheses!
+Thus,
+.RS
+.PP
+.ft B
+a = length
+.ft R
+.RE
+.PP
+is the same as either of
+.RS
+.PP
+.ft B
+a = length()
+.br
+a = length($0)
+.ft R
+.RE
+.PP
+This feature is marked as ``deprecated'' in the \*(PX standard, and
+.I gawk
+will issue a warning about its use if
+.B \-Wlint
+is specified on the command line.
+.PP
+The other feature is the use of the
+.B continue
+statement outside the body of a
+.BR while ,
+.BR for ,
+or
+.B do
+loop. Traditional AWK implementations have treated such usage as
+equivalent to the
+.B next
+statement.
+.I Gawk
+will support this usage if
+.B \-Wposix
+has not been specified.
.SH BUGS
The
.B \-F
@@ -1598,9 +1666,9 @@ it remains only for backwards compatibility.
.SH VERSION INFORMATION
This man page documents
.IR gawk ,
-version 2.13.
+version 2.14.
.PP
-For the 2.13 version of
+For the 2.14 version of
.IR gawk ,
the
.BR \-c ,
@@ -1615,9 +1683,7 @@ and
options of the 2.11 version are recognized. However,
.I gawk
will print a warning message,
-and these options will go away in the 2.14 version.
-.PP
-The 2.12 version was a development version that was not officially released.
+and these options will go away in the 2.15 version.
.SH AUTHORS
The original version of \*(UX
.I awk
@@ -1632,11 +1698,15 @@ to be compatible with the original version of
.I awk
distributed in Seventh Edition \*(UX.
John Woods contributed a number of bug fixes.
-David Trueman of Dalhousie University, with contributions
-from Arnold Robbins at Emory University and AudioFAX, made
+David Trueman, with contributions
+from Arnold Robbins, made
.I gawk
compatible with the new version of \*(UX
.IR awk .
+.PP
+The initial DOS port was done by Conrad Kwok and Scott Garfinkle.
+Scott Deifik is the current DOS maintainer. Pat Rankin did the
+port to VMS, and Michal Jaegermann did the port to the Atari ST.
.SH ACKNOWLEDGEMENTS
Brian Kernighan of Bell Labs
provided valuable assistance during testing and debugging.
diff --git a/gawk.texinfo b/gawk.texi
index 00a8ada5..f7b0fa33 100644
--- a/gawk.texinfo
+++ b/gawk.texi
@@ -2,6 +2,7 @@
@c %**start of header (This is for running Texinfo on a region.)
@setfilename gawk.info
@settitle The GAWK Manual
+@c @smallbook
@c %**end of header (This is for running Texinfo on a region.)
@ifinfo
@@ -13,15 +14,29 @@
@syncodeindex vr cp
@end iftex
+@c If "finalout" is commented out, the printed output will show
+@c black boxes that mark lines that are too long. Thus, it is
+@c unwise to comment it out when running a master in case there are
+@c overfulls which are deemed okay.
+
@iftex
@finalout
@end iftex
+@c ===> NOTE! <==
+@c Determine the edition number in *four* places by hand:
+@c 1. First ifinfo section 2. title page 3. copyright page 4. top node
+@c To find the locations, search for !!set
+
@ifinfo
This file documents @code{awk}, a program that you can use to select
particular records in a file and perform operations upon them.
-Copyright (C) 1989,1991 Free Software Foundation, Inc.
+This is Edition 0.14 of @cite{The GAWK Manual}, @*
+for the 2.14 version of the GNU implementation @*
+of AWK.
+
+Copyright (C) 1989, 1991, 1992 Free Software Foundation, Inc.
Permission is granted to make and distribute verbatim copies of
this manual provided the copyright notice and this permission notice
@@ -45,16 +60,13 @@ except that this permission notice may be stated in a translation approved
by the Foundation.
@end ifinfo
-@c @smallbook
-@c For printing as a small manual, uncomment the above line. Then change
-@c every `@example' to `@smallexample' and every `@end example' to
-@c `@end smallexample'. That's all.
-
@setchapternewpage odd
+
+@c !!set edition, date, version
@titlepage
@title The GAWK Manual
-@subtitle Edition 0.13 Beta
-@subtitle October 1991
+@subtitle Edition 0.14
+@subtitle November 1992
@author Diane Barlow Close
@author Arnold D. Robbins
@author Paul H. Rubin
@@ -65,16 +77,17 @@ by the Foundation.
@page
@vskip 0pt plus 1filll
-Copyright @copyright{} 1989, 1991 Free Software Foundation, Inc.
+Copyright @copyright{} 1989, 1991, 1992 Free Software Foundation, Inc.
@sp 2
-
-This is Edition 0.13 Beta of @cite{The GAWK Manual}, @*
-for the 2.13.3 version of the GNU implementation @*
+
+@c !!set edition, date, version
+This is Edition 0.14 of @cite{The GAWK Manual}, @*
+for the 2.14 version of the GNU implementation @*
of AWK.
@sp 2
Published by the Free Software Foundation @*
-675 Massachusetts Avenue, @*
+675 Massachusetts Avenue @*
Cambridge, MA 02139 USA @*
Printed copies are available for $15 each.
@@ -101,76 +114,67 @@ by the Foundation.
@c node, in `unnumbered' sections, then the chapter, `What is gawk'.
This file documents @code{awk}, a program that you can use to select
-particular records in a file and perform operations upon them; it
-contains the following chapters:
-@end ifinfo
-
-@menu
-* Preface:: What you can do with @code{awk}; brief history
- and acknowledgements.
-
-* Copying:: Your right to copy and distribute @code{gawk}.
-
-* This Manual:: Using this manual.
- Includes sample input files that you can use.
-* Getting Started:: A basic introduction to using @code{awk}.
- How to run an @code{awk} program. Command line syntax.
-
-* Reading Files:: How to read files and manipulate fields.
-
-* Printing:: How to print using @code{awk}. Describes the
- @code{print} and @code{printf} statements.
- Also describes redirection of output.
-
-* One-liners:: Short, sample @code{awk} programs.
-
-* Patterns:: The various types of patterns explained in detail.
-
-* Actions:: The various types of actions are introduced here.
- Describes expressions and the various operators in
- detail. Also describes comparison expressions.
-
-* Expressions:: Expressions are the basic building blocks of statements.
-
-* Statements:: The various control statements are described in
- detail.
-
-* Arrays:: The description and use of arrays. Also includes
- array-oriented control statements.
-
-* Built-in:: The built-in functions are summarized here.
-
-* User-defined:: User-defined functions are described in detail.
-
-* Var: Built-in Variables. The built-in variables are summarized here.
-
-* Command Line:: How to run @code{gawk}.
-
-* Language History:: The evolution of the @code{awk} language.
-
-* Installation:: Installing @code{gawk} under various operating systems.
-
-* Gawk Summary:: @code{gawk} Options and Language Summary.
-
-* Sample Program:: A sample @code{awk} program with a complete explanation.
-
-* Bugs:: Reporting Problems and Bugs.
+particular records in a file and perform operations upon them.
-* Notes:: Something about the implementation of @code{gawk}.
+@c !!set edition, date, version
+This is Edition 0.14 of @cite{The GAWK Manual}, @*
+for the 2.14 version of the GNU implementation @*
+of AWK.
-* Glossary:: An explanation of some unfamiliar terms.
+@end ifinfo
-* Index::
+@menu
+* Preface:: What you can do with @code{awk}; brief history
+ and acknowledgements.
+* Copying:: Your right to copy and distribute @code{gawk}.
+* This Manual:: Using this manual.
+ Includes sample input files that you can use.
+* Getting Started:: A basic introduction to using @code{awk}.
+ How to run an @code{awk} program.
+ Command line syntax.
+* Reading Files:: How to read files and manipulate fields.
+* Printing:: How to print using @code{awk}. Describes the
+ @code{print} and @code{printf} statements.
+ Also describes redirection of output.
+* One-liners:: Short, sample @code{awk} programs.
+* Patterns:: The various types of patterns
+ explained in detail.
+* Actions:: The various types of actions are
+ introduced here. Describes
+ expressions and the various operators in
+ detail. Also describes comparison expressions.
+* Expressions:: Expressions are the basic building
+ blocks of statements.
+* Statements:: The various control statements are
+ described in detail.
+* Arrays:: The description and use of arrays.
+ Also includes array-oriented control
+ statements.
+* Built-in:: The built-in functions are summarized here.
+* User-defined:: User-defined functions are described in detail.
+* Built-in Variables:: Built-in Variables
+* Command Line:: How to run @code{gawk}.
+* Language History:: The evolution of the @code{awk} language.
+* Installation:: Installing @code{gawk} under
+ various operating systems.
+* Gawk Summary:: @code{gawk} Options and Language Summary.
+* Sample Program:: A sample @code{awk} program with a
+ complete explanation.
+* Bugs:: Reporting Problems and Bugs.
+* Notes:: Something about the
+ implementation of @code{gawk}.
+* Glossary:: An explanation of some unfamiliar terms.
+* Index::
@end menu
-@node Preface, Copying, Top , Top
+@node Preface, Copying, Top, Top
@comment node-name, next, previous, up
@unnumbered Preface
@iftex
@cindex what is @code{awk}
@end iftex
-If you are like many computer users, you frequently would like to make
+If you are like many computer users, you would frequently like to make
changes in various text files wherever certain patterns appear, or
extract data from parts of certain lines while discarding the rest. To
write a program to do this in a language such as C or Pascal is a
@@ -183,10 +187,10 @@ with just a few lines of code.
The GNU implementation of @code{awk} is called @code{gawk}; it is fully
upward compatible with the System V Release 4 version of
-@code{awk}. @code{gawk} is also upward compatible with the @sc{POSIX}
+@code{awk}. @code{gawk} is also upward compatible with the @sc{posix}
(draft) specification of the @code{awk} language. This means that all
properly written @code{awk} programs should work with @code{gawk}.
-So we usually don't distinguish between @code{gawk} and other @code{awk}
+Thus, we usually don't distinguish between @code{gawk} and other @code{awk}
implementations in this manual.@refill
@cindex uses of @code{awk}
@@ -196,19 +200,19 @@ system commands such as @code{ls}. Using @code{awk} you can: @refill
@itemize @bullet
@item
-manage small, personal databases,
+manage small, personal databases
@item
-generate reports,
+generate reports
@item
-validate data,
+validate data
@item
-produce indexes, and perform other document preparation tasks,
+produce indexes, and perform other document preparation tasks
@item
even experiment with algorithms that can be adapted later to other computer
-languages!
+languages
@end itemize
@iftex
@@ -219,10 +223,11 @@ expert user, and for the on-line Info version of the manual.
@end iftex
@menu
-* History:: The history of @code{gawk} and @code{awk}. Acknowledgements.
+* History:: The history of @code{gawk} and
+ @code{awk}. Acknowledgements.
@end menu
-@node History, , Preface, Preface
+@node History, , Preface, Preface
@comment node-name, next, previous, up
@unnumberedsec History of @code{awk} and @code{gawk}
@@ -236,7 +241,7 @@ streams, and computed regular expressions.
This new version became generally available with System V Release 3.1.
The version in System V Release 4 added some new features and also cleaned
up the behavior in some of the ``dark corners'' of the language.
-The specification for @code{awk} in the @sc{POSIX} Command Language
+The specification for @code{awk} in the @sc{posix} Command Language
and Utilities standard further clarified the language based on feedback
from both the @code{gawk} designers, and the original @code{awk}
designers.@refill
@@ -245,22 +250,23 @@ The GNU implementation, @code{gawk}, was written in 1986 by Paul Rubin
and Jay Fenlason, with advice from Richard Stallman. John Woods
contributed parts of the code as well. In 1988 and 1989, David Trueman, with
help from Arnold Robbins, thoroughly reworked @code{gawk} for compatibility
-with the newer @code{awk}. Current development focuses on bug fixes,
+with the newer @code{awk}. Current development (1992) focuses on bug fixes,
performance improvements, and standards compliance.
-Many people need to be thanked for their assistance in producing this
+We need to thank many people for their assistance in producing this
manual. Jay Fenlason contributed many ideas and sample programs. Richard
-Mlynarik and Robert Chassell gave helpful comments on early drafts of this
+Mlynarik and Robert J. Chassell gave helpful comments on early drafts of this
manual. The paper @cite{A Supplemental Document for @code{awk}} by John W.
Pierce of the Chemistry Department at UC San Diego, pinpointed several
issues relevant both to @code{awk} implementation and to this manual, that
would otherwise have escaped us. David Trueman, Pat Rankin, and Michal
-Jaegermann also contributed portions of the manual.@refill
+Jaegermann also contributed sections of the manual.@refill
The following people provided many helpful comments on this edition of
the manual: Rick Adams, Michael Brennan, Rich Burridge, Diane Close,
Christopher (``Topher'') Eliot, Michael Lijewski, Pat Rankin, Miriam Robbins,
-and Michal Tomczak.
+and Michal Jaegermann. Robert J. Chassell provided much valuable advice on
+the use of Texinfo.
Finally, we would like to thank Brian Kernighan of Bell Labs for invaluable
assistance during the testing and debugging of @code{gawk}, and for
@@ -374,7 +380,7 @@ of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
-@enumerate A
+@enumerate a
@item
You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
@@ -423,7 +429,7 @@ You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
-@enumerate A
+@enumerate a
@item
Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
@@ -648,13 +654,13 @@ You should also get your employer (if you work as a programmer) or your
school, if any, to sign a ``copyright disclaimer'' for the program, if
necessary. Here is a sample; alter the names:
-@example
+@smallexample
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
@var{signature of Ty Coon}, 1 April 1989
Ty Coon, President of Vice
-@end example
+@end smallexample
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
@@ -663,7 +669,7 @@ library. If this is what you want to do, use the GNU Library General
Public License instead of this License.
@node This Manual, Getting Started, Copying, Top
-@chapter Using This Manual
+@chapter Using this Manual
@cindex manual, using this
@cindex using this manual
@cindex language, @code{awk}
@@ -674,23 +680,24 @@ Public License instead of this License.
The term @code{awk} refers to a particular program, and to the language you
use to tell this program what to do. When we need to be careful, we call
the program ``the @code{awk} utility'' and the language ``the @code{awk}
-language.'' The term @code{gawk} refers to a version of @code{awk}, developed
-as part the GNU project. The purpose of this manual is to explain the
+language.'' The term @code{gawk} refers to a version of @code{awk} developed
+as part the GNU project. The purpose of this manual is to explain
+both the
@code{awk} language and how to run the @code{awk} utility.@refill
While concentrating on the features of @code{gawk}, the manual will also
attempt to describe important differences between @code{gawk} and other
@code{awk} implementations. In particular, any features that are not
-in the @sc{POSIX} standard for @code{awk} will be noted. @refill
+in the @sc{posix} standard for @code{awk} will be noted. @refill
The term @dfn{@code{awk} program} refers to a program written by you in
the @code{awk} programming language.@refill
-@xref{Getting Started}, for the bare essentials you need to know to
-start using @code{awk}.
+@xref{Getting Started, ,Getting Started with @code{awk}}, for the bare
+essentials you need to know to start using @code{awk}.
Some useful ``one-liners'' are included to give you a feel for the
-@code{awk} language (@pxref{One-liners}).
+@code{awk} language (@pxref{One-liners, ,Useful ``One-liners''}).
@ignore
@strong{I deleted four paragraphs here because they would confuse the
@@ -713,21 +720,19 @@ If you find terms that you aren't familiar with, try looking them
up in the glossary (@pxref{Glossary}).@refill
The entire @code{awk} language is summarized for quick reference in
-@ref{Gawk Summary}. Look there if you just need to refresh your memory
-about a particular feature.@refill
+@ref{Gawk Summary, ,@code{gawk} Summary}. Look there if you just need
+to refresh your memory about a particular feature.@refill
Most of the time complete @code{awk} programs are used as examples, but in
some of the more advanced sections, only the part of the @code{awk} program
that illustrates the concept being described is shown.@refill
@menu
-This chapter contains the following sections:
-
-* Sample Data Files:: Sample data files for use in the @code{awk} programs
- illustrated in this manual.
+* Sample Data Files:: Sample data files for use in the @code{awk}
+ programs illustrated in this manual.
@end menu
-@node Sample Data Files, , This Manual, This Manual
+@node Sample Data Files, , This Manual, This Manual
@section Data Files for the Examples
@cindex input file, sample
@@ -747,7 +752,6 @@ means the board operates 24 hours a day. A @samp{B} in the last
column means the board operates evening and weekend hours, only. A
@samp{C} means the board operates only on weekends.
-@group
@example
aardvark 555-5553 1200/300 B
alpo-net 555-3412 2400/1200/300 A
@@ -761,18 +765,16 @@ macfoo 555-6480 1200/300 A
sdace 555-3430 2400/1200/300 A
sabafoo 555-2127 1200/300 C
@end example
-@end group
@cindex @file{inventory-shipped} file
The second data file, called @file{inventory-shipped}, represents
-information about shipments during the year. Each line of this file is
-also one record. Each record contains the month of the year, the number
+information about shipments during the year.
+Each record contains the month of the year, the number
of green crates shipped, the number of red boxes shipped, the number of
orange bags shipped, and the number of blue packages shipped,
respectively. There are 16 entries, covering the 12 months of one year
and 4 months of the next year.@refill
-@group
@example
Jan 13 25 15 115
Feb 15 32 24 226
@@ -792,21 +794,21 @@ Feb 26 58 80 652
Mar 24 75 70 495
Apr 21 70 74 514
@end example
-@end group
@ifinfo
If you are reading this in GNU Emacs using Info, you can copy the regions
of text showing these sample files into your own test files. This way you
can try out the examples shown in the remainder of this document. You do
this by using the command @kbd{M-x write-region} to copy text from the Info
-file into a file for use with @code{awk} (see your @cite{GNU Emacs Manual}
+file into a file for use with @code{awk}
+(@xref{Misc File Ops, , , emacs, GNU Emacs Manual},
for more information). Using this information, create your own
@file{BBS-list} and @file{inventory-shipped} files, and practice what you
learn in this manual.
@end ifinfo
@node Getting Started, Reading Files, This Manual, Top
-@chapter Getting Started With @code{awk}
+@chapter Getting Started with @code{awk}
@cindex script, definition of
@cindex rule, definition of
@cindex program, definition of
@@ -821,9 +823,9 @@ input file is reached.@refill
When you run @code{awk}, you specify an @code{awk} @dfn{program} which
tells @code{awk} what to do. The program consists of a series of
@dfn{rules}. (It may also contain @dfn{function definitions}, but that
-is an advanced feature, so let's ignore it for now.
-@xref{User-defined}.) Each rule specifies one pattern to search for,
-and one action to perform when that pattern is found.
+is an advanced feature, so we will ignore it for now.
+@xref{User-defined, ,User-defined Functions}.) Each rule specifies one
+pattern to search for, and one action to perform when that pattern is found.
Syntactically, a rule consists of a pattern followed by an action. The
action is enclosed in curly braces to separate it from the pattern.
@@ -837,13 +839,15 @@ program looks like this:
@end example
@menu
-* Very Simple:: A very simple example.
-* Two Rules:: A less simple one-line example with two rules.
-* More Complex:: A more complex example.
-* Running gawk:: How to run @code{gawk} programs; includes command line syntax.
-* Comments:: Adding documentation to @code{gawk} programs.
-* Statements/Lines:: Subdividing or combining statements into lines.
-* When:: When to use @code{gawk} and when to use other things.
+* Very Simple:: A very simple example.
+* Two Rules:: A less simple one-line example with two rules.
+* More Complex:: A more complex example.
+* Running gawk:: How to run @code{gawk} programs;
+ includes command line syntax.
+* Comments:: Adding documentation to @code{gawk} programs.
+* Statements/Lines:: Subdividing or combining statements into lines.
+* When:: When to use @code{gawk} and
+ when to use other things.
@end menu
@node Very Simple, Two Rules, Getting Started, Getting Started
@@ -852,7 +856,7 @@ program looks like this:
@cindex @samp{print $0}
The following command runs a simple @code{awk} program that searches the
input file @file{BBS-list} for the string of characters: @samp{foo}. (A
-string of characters is usually called, quite simply, a @dfn{string}.
+string of characters is usually called, a @dfn{string}.
The term @dfn{string} is perhaps based on similar usage in English, such
as ``a string of pearls,'' or, ``a string of cars in a train.'')
@@ -863,24 +867,26 @@ awk '/foo/ @{ print $0 @}' BBS-list
@noindent
When lines containing @samp{foo} are found, they are printed, because
@w{@samp{print $0}} means print the current line. (Just @samp{print} by
-itself also means the same thing, so we could have written that
+itself means the same thing, so we could have written that
instead.)
You will notice that slashes, @samp{/}, surround the string @samp{foo}
in the actual @code{awk} program. The slashes indicate that @samp{foo}
is a pattern to search for. This type of pattern is called a
@dfn{regular expression}, and is covered in more detail later
-(@pxref{Regexp}). There are single-quotes around the @code{awk} program
-so that the shell won't interpret any of it as special shell
-characters.@refill
+(@pxref{Regexp, ,Regular Expressions as Patterns}). There are
+single-quotes around the @code{awk} program so that the shell won't
+interpret any of it as special shell characters.@refill
Here is what this program prints:
@example
+@group
fooey 555-1234 2400/1200/300 B
foot 555-6699 1200/300 B
macfoo 555-6480 1200/300 A
sabafoo 555-2127 1200/300 C
+@end group
@end example
@cindex action, default
@@ -909,8 +915,9 @@ which they appear in the @code{awk} program. If no patterns match, then
no actions are run.
After processing all the rules (perhaps none) that match the line,
-@code{awk} reads the next line (however, @pxref{Next Statement}).
-This continues until the end of the file is reached.@refill
+@code{awk} reads the next line (however,
+@pxref{Next Statement, ,The @code{next} Statement}). This continues
+until the end of the file is reached.@refill
For example, the @code{awk} program:
@@ -978,11 +985,11 @@ ls -l | awk '$5 == "Nov" @{ sum += $4 @}
This command prints the total number of bytes in all the files in the
current directory that were last modified in November (of any year).
(In the C shell you would need to type a semicolon and then a backslash
-at the end of the first line; in a @sc{POSIX}-compliant shell, such as the
+at the end of the first line; in a @sc{posix}-compliant shell, such as the
Bourne shell or the Bourne-Again shell, you can type the example as shown.)
-The @w{@samp{ls -l}} part of this example is a command that gives you a full
-listing of all the files in a directory, including file size and date.
+The @w{@samp{ls -l}} part of this example is a command that gives you a
+listing of the files in a directory, including file size and date.
Its output looks like this:@refill
@example
@@ -1019,11 +1026,11 @@ After the last line of output from @code{ls} has been processed, the
printed. In this example, the value of @code{sum} would be 80600.@refill
These more advanced @code{awk} techniques are covered in later sections
-(@pxref{Actions}). Before you can move on to more advanced @code{awk}
-programming, you have to know how @code{awk} interprets your input and
-displays your output. By manipulating fields and using @code{print}
-statements, you can produce some very useful and spectacular looking
-reports.@refill
+(@pxref{Actions, ,Overview of Actions}). Before you can move on to more
+advanced @code{awk} programming, you have to know how @code{awk} interprets
+your input and displays your output. By manipulating fields and using
+@code{print} statements, you can produce some very useful and spectacular
+looking reports.@refill
@node Running gawk, Comments, More Complex, Getting Started
@section How to Run @code{awk} Programs
@@ -1064,10 +1071,11 @@ awk -f @var{program-file} @var{input-file1} @var{input-file2} @dots{}
@end example
@menu
-* One-shot:: Running a short throw-away @code{awk} program.
-* Read Terminal:: Using no input files (input from terminal instead).
-* Long:: Putting permanent @code{awk} programs in files.
-* Executable Scripts:: Making self-contained @code{awk} programs.
+* One-shot:: Running a short throw-away @code{awk} program.
+* Read Terminal:: Using no input files (input from
+ terminal instead).
+* Long:: Putting permanent @code{awk} programs in files.
+* Executable Scripts:: Making self-contained @code{awk} programs.
@end menu
@node One-shot, Read Terminal, Running gawk, Running gawk
@@ -1152,7 +1160,7 @@ as matching the pattern @samp{th}. Notice that it did not recognize
@samp{Thomas} as matching the pattern. The @code{awk} language is
@dfn{case sensitive}, and matches patterns exactly. (However, you can
override this with the variable @code{IGNORECASE}.
-@xref{Case-sensitivity}.)
+@xref{Case-sensitivity, ,Case-sensitivity in Matching}.)
@node Long, Executable Scripts, Read Terminal, Running gawk
@subsection Running Long Programs
@@ -1192,9 +1200,9 @@ awk '/th/'
@end example
@noindent
-which was explained earlier (@pxref{Read Terminal}). Note that you
-don't usually need single quotes around the file name that you specify
-with @samp{-f}, because most file names don't contain any of the shell's
+which was explained earlier (@pxref{Read Terminal, ,Running @code{awk} without Input Files}).
+Note that you don't usually need single quotes around the file name that you
+specify with @samp{-f}, because most file names don't contain any of the shell's
special characters. Notice that in @file{th-prog}, the @code{awk}
program did not have single quotes around it. The quotes are only needed
for programs that are provided on the @code{awk} command line.
@@ -1216,8 +1224,8 @@ affect the execution of the @code{awk} program, but it does make
Once you have learned @code{awk}, you may want to write self-contained
@code{awk} scripts, using the @samp{#!} script mechanism. You can do
this on many Unix systems @footnote{The @samp{#!} mechanism works on
-Unix systems derived from BSD Unix, System V Release 4, and some System
-V Release 3 systems.} and (someday) on GNU.@refill
+Unix systems derived from Berkeley Unix, System V Release 4, and some System
+V Release 3 systems.} (and someday on GNU).@refill
For example, you could create a text file named @file{hello}, containing
the following (where @samp{BEGIN} is a feature we have not yet
@@ -1297,11 +1305,11 @@ character, @samp{#}, and continues to the end of the line. The
@code{awk} language ignores the rest of a line following a sharp sign.
For example, we could have put the following into @file{th-prog}:@refill
-@example
+@smallexample
# This program finds records containing the pattern @samp{th}. This is how
# you continue comments on additional lines.
/th/
-@end example
+@end smallexample
You can put comment lines into keyboard-composed throw-away @code{awk}
programs also, but this usually isn't very useful; the purpose of a
@@ -1331,7 +1339,8 @@ lines by inserting a newline after any of the following:@refill
A newline at any other point is considered the end of the statement.
(Splitting lines after @samp{?} and @samp{:} is a minor @code{gawk}
extension. The @samp{?} and @samp{:} referred to here is the
-three operand coditional expression described in @ref{Conditional Exp}.)
+three operand conditional expression described in
+@ref{Conditional Exp, ,Conditional Expressions}.)@refill
@cindex backslash continuation
@cindex continuation of lines
@@ -1361,7 +1370,7 @@ expression or a string.@refill
@strong{Warning: backslash continuation does not work as described above
with the C shell.} Continuation with backslash works for @code{awk}
programs in files, and also for one-shot programs @emph{provided} you
-are using a @sc{POSIX}-compliant shell, such as the Bourne shell or the
+are using a @sc{posix}-compliant shell, such as the Bourne shell or the
Bourne-again shell. But the C shell used on Berkeley Unix behaves
differently! There, you must use two backslashes in a row, followed by
a newline.@refill
@@ -1383,18 +1392,18 @@ separated with a semicolon is a recent change in the @code{awk}
language; it was done for consistency with the treatment of statements
within an action.
-@node When, , Statements/Lines, Getting Started
+@node When, , Statements/Lines, Getting Started
@section When to Use @code{awk}
@cindex when to use @code{awk}
@cindex applications of @code{awk}
-What use is all of this to me, you might ask? Using additional utility
-programs, more advanced patterns, field separators, arithmetic
+You might wonder how @code{awk} might be useful for you. Using additional
+utility programs, more advanced patterns, field separators, arithmetic
statements, and other selection criteria, you can produce much more
complex output. The @code{awk} language is very useful for producing
reports from large amounts of raw data, such as summarizing information
-from the output of other utility programs like @code{ls}.
-(@xref{More Complex, , A More Complex Example}.)
+from the output of other utility programs like @code{ls}.
+(@xref{More Complex, ,A More Complex Example}.)
Programs written with @code{awk} are usually much smaller than they would
be in other languages. This makes @code{awk} programs easy to compose and
@@ -1431,32 +1440,31 @@ In the typical @code{awk} program, all input is read either from the
standard input (by default the keyboard, but often a pipe from another
command) or from files whose names you specify on the @code{awk} command
line. If you specify input files, @code{awk} reads them in order, reading
-all the data frome one before going on to the next. The name of the current
+all the data from one before going on to the next. The name of the current
input file can be found in the built-in variable @code{FILENAME}
(@pxref{Built-in Variables}).@refill
-The input is read in units called @dfn{records}, and processed by the
+The input is read in units called records, and processed by the
rules one record at a time. By default, each record is one line. Each
-record is split automatically into @dfn{fields}, to make it more
+record is split automatically into fields, to make it more
convenient for a rule to work on its parts.
On rare occasions you will need to use the @code{getline} command,
-which can do explicit input from any number of files (@pxref{Getline}).
+which can do explicit input from any number of files
+(@pxref{Getline, ,Explicit Input with @code{getline}}).@refill
@menu
-* Records:: Controlling how data is split into records.
-* Fields:: An introduction to fields.
-* Non-Constant Fields:: Non-constant Field Numbers.
-* Changing Fields:: Changing the Contents of a Field.
-* Field Separators:: The field separator and how to change it.
-* Constant Size:: Reading constant width data.
-* Multiple Line:: Reading multi-line records.
-
-* Getline:: Reading files under explicit program control
- using the @code{getline} function.
-
-* Close Input:: Closing an input file (so you can read from
- the beginning once more).
+* Records:: Controlling how data is split into records.
+* Fields:: An introduction to fields.
+* Non-Constant Fields:: Non-constant Field Numbers.
+* Changing Fields:: Changing the Contents of a Field.
+* Field Separators:: The field separator and how to change it.
+* Constant Size:: Reading constant width data.
+* Multiple Line:: Reading multi-line records.
+* Getline:: Reading files under explicit program control
+ using the @code{getline} function.
+* Close Input:: Closing an input file (so you can read from
+ the beginning once more).
@end menu
@node Records, Fields, Reading Files, Reading Files
@@ -1487,23 +1495,25 @@ full value of all its other built-in variables.@refill
Someday this should be true!
The value of @code{RS} is not limited to a one-character string. It can
-be any regular expression (@pxref{Regexp}). In general, each record
+be any regular expression (@pxref{Regexp, ,Regular Expressions as Patterns}).
+In general, each record
ends at the next string that matches the regular expression; the next
record starts at the end of the matching string. This general rule is
actually at work in the usual case, where @code{RS} contains just a
newline: a record ends at the beginning of the next matching string (the
next newline in the input) and the following record starts just after
the end of this string (at the first character of the following line).
-The newline, since it matches @code{RS}, is not part of either record.
+The newline, since it matches @code{RS}, is not part of either record.@refill
@end ignore
You can change the value of @code{RS} in the @code{awk} program with the
-assignment operator, @samp{=} (@pxref{Assignment Ops}). The new
-record-separator character should be enclosed in quotation marks to make
+assignment operator, @samp{=} (@pxref{Assignment Ops, ,Assignment Expressions}).
+The new record-separator character should be enclosed in quotation marks to make
a string constant. Often the right time to do this is at the beginning
of execution, before any input has been processed, so that the very
first record will be read with the proper separator. To do this, use
-the special @code{BEGIN} pattern (@pxref{BEGIN/END}). For
+the special @code{BEGIN} pattern
+(@pxref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}). For
example:@refill
@example
@@ -1520,7 +1530,8 @@ its output, the effect of this @code{awk} program is to copy the input
with each slash changed to a newline.
Another way to change the record separator is on the command line,
-using the variable-assignment feature (@pxref{Command Line}).
+using the variable-assignment feature
+(@pxref{Command Line, ,Invoking @code{awk}}).@refill
@example
awk '@{ print $0 @}' RS="/" BBS-list
@@ -1536,7 +1547,7 @@ even if the last character in the file is not the character in @code{RS}.
@c merge the preceding paragraph and this stuff into one paragraph
@c and put it in an `expert info' section.
This produces correct behavior in the vast majority of cases, although
-the following (extreme) pipeline prints a suprising @samp{1}. (There
+the following (extreme) pipeline prints a surprising @samp{1}. (There
is one field, consisting of a newline.)
@example
@@ -1547,7 +1558,7 @@ echo | awk 'BEGIN @{ RS = "a" @} ; @{ print NF @}'
The empty string, @code{""} (a string of no characters), has a special meaning
as the value of @code{RS}: it means that records are separated only
-by blank lines. @xref{Multiple Line}, for more details.
+by blank lines. @xref{Multiple Line, ,Multiple-Line Records}, for more details.
@cindex number of records, @code{NR} or @code{FNR}
@vindex NR
@@ -1605,9 +1616,9 @@ field.@refill
No matter how many fields there are, the last field in a record can be
represented by @code{$NF}. So, in the example above, @code{$NF} would
be the same as @code{$7}, which is @samp{example.}. Why this works is
-explained below (@pxref{Non-Constant Fields}). If you try to refer to a
-field beyond the last one, such as @code{$8} when the record has only 7
-fields, you get the empty string.@refill
+explained below (@pxref{Non-Constant Fields, ,Non-constant Field Numbers}).
+If you try to refer to a field beyond the last one, such as @code{$8}
+when the record has only 7 fields, you get the empty string.@refill
@vindex NF
@cindex number of fields, @code{NF}
@@ -1627,8 +1638,8 @@ awk '$1 ~ /foo/ @{ print $0 @}' BBS-list
@noindent
This example prints each record in the file @file{BBS-list} whose first
field contains the string @samp{foo}. The operator @samp{~} is called a
-@dfn{matching operator} (@pxref{Comparison Ops}); it tests whether a
-string (here, the field @code{$1}) matches a given regular
+@dfn{matching operator} (@pxref{Comparison Ops, ,Comparison Expressions});
+it tests whether a string (here, the field @code{$1}) matches a given regular
expression.@refill
By contrast, the following example:
@@ -1699,30 +1710,31 @@ modifies the input file.)
Consider this example:
-@example
+@smallexample
awk '@{ $3 = $2 - 10; print $2, $3 @}' inventory-shipped
-@end example
+@end smallexample
@noindent
The @samp{-} sign represents subtraction, so this program reassigns
field three, @code{$3}, to be the value of field two minus ten,
-@code{$2 - 10}. (@xref{Arithmetic Ops}.) Then field two, and the
-new value for field three, are printed.
+@code{$2 - 10}. (@xref{Arithmetic Ops, ,Arithmetic Operators}.)
+Then field two, and the new value for field three, are printed.
In order for this to work, the text in field @code{$2} must make sense
as a number; the string of characters must be converted to a number in
order for the computer to do arithmetic on it. The number resulting
from the subtraction is converted back to a string of characters which
-then becomes field three. @xref{Conversion}.
+then becomes field three.
+@xref{Conversion, ,Conversion of Strings and Numbers}.@refill
When you change the value of a field (as perceived by @code{awk}), the
text of the input record is recalculated to contain the new field where
the old one was. Therefore, @code{$0} changes to reflect the altered
field. Thus,
-@example
+@smallexample
awk '@{ $2 = $2 - 10; print $0 @}' inventory-shipped
-@end example
+@end smallexample
@noindent
prints a copy of the input file, with 10 subtracted from the second
@@ -1731,9 +1743,9 @@ field of each line.
You can also assign contents to fields that are out of range. For
example:
-@example
+@smallexample
awk '@{ $6 = ($5 + $4 + $3 + $2) ; print $6 @}' inventory-shipped
-@end example
+@end smallexample
@noindent
We've just created @code{$6}, whose value is the sum of fields
@@ -1750,49 +1762,49 @@ existing fields.
This recomputation affects and is affected by several features not yet
discussed, in particular, the @dfn{output field separator}, @code{OFS},
which is used to separate the fields (@pxref{Output Separators}), and
-@code{NF} (the number of fields; @pxref{Fields}). For example, the
-value of @code{NF} is set to the number of the highest field you
-create.@refill
+@code{NF} (the number of fields; @pxref{Fields, ,Examining Fields}).
+For example, the value of @code{NF} is set to the number of the highest
+field you create.@refill
Note, however, that merely @emph{referencing} an out-of-range field
does @emph{not} change the value of either @code{$0} or @code{NF}.
Referencing an out-of-range field merely produces a null string. For
example:@refill
-@example
+@smallexample
if ($(NF+1) != "")
print "can't happen"
else
print "everything is normal"
-@end example
+@end smallexample
@noindent
should print @samp{everything is normal}, because @code{NF+1} is certain
-to be out of range. (@xref{If Statement}, for more information about
-@code{awk}'s @code{if-else} statements.)
+to be out of range. (@xref{If Statement, ,The @code{if} Statement},
+for more information about @code{awk}'s @code{if-else} statements.)@refill
It is important to note that assigning to a field will change the
value of @code{$0}, but will not change the value of @code{NF},
even when you assign the null string to a field. For example:
-@example
+@smallexample
echo a b c d | awk '@{ OFS = ":"; $2 = "" ; print ; print NF @}'
-@end example
+@end smallexample
@noindent
prints
-@example
+@smallexample
a::c:d
4
-@end example
+@end smallexample
@noindent
The field is still there, it just has an empty value. You can tell
because there are two colons in a row.
@node Field Separators, Constant Size, Changing Fields, Reading Files
-@section Specifying How Fields Are Separated
+@section Specifying how Fields are Separated
@vindex FS
@cindex fields, separating
@cindex field separator, @code{FS}
@@ -1801,7 +1813,7 @@ because there are two colons in a row.
(This section is rather long; it describes one of the most fundamental
operations in @code{awk}. If you are a novice with @code{awk}, we
recommend that you re-read this section after you have studied the
-section on regular expressions, @ref{Regexp}.)
+section on regular expressions, @ref{Regexp, ,Regular Expressions as Patterns}.)
The way @code{awk} splits an input record into fields is controlled by
the @dfn{field separator}, which is a single character or a regular
@@ -1822,12 +1834,14 @@ Shell programmers take note! @code{awk} does not use the name @code{IFS}
which is used by the shell.@refill
You can change the value of @code{FS} in the @code{awk} program with the
-assignment operator, @samp{=} (@pxref{Assignment Ops}). Often the right
-time to do this is at the beginning of execution, before any input has
-been processed, so that the very first record will be read with the
-proper separator. To do this, use the special @code{BEGIN} pattern
-(@pxref{BEGIN/END}). For example, here we set the value of @code{FS} to
-the string @code{","}:
+assignment operator, @samp{=} (@pxref{Assignment Ops, ,Assignment Expressions}).
+Often the right time to do this is at the beginning of execution,
+before any input has been processed, so that the very first record
+will be read with the proper separator. To do this, use the special
+@code{BEGIN} pattern
+(@pxref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}).
+For example, here we set the value of @code{FS} to the string
+@code{","}:@refill
@example
awk 'BEGIN @{ FS = "," @} ; @{ print $2 @}'
@@ -1896,7 +1910,8 @@ single spaces to separate fields the way single commas were used above.
You can set @code{FS} to @w{@code{"[@ ]"}}. This regular expression
matches a single space and nothing else.
-@cindex field separator, setting on command line
+@c the following index entry is an overfull hbox. --mew 30jan1992
+@cindex field separator: on command line
@cindex command line, setting @code{FS} on
@code{FS} can be set on the command line. You use the @samp{-F} argument to
do so. For example:
@@ -1928,17 +1943,18 @@ awk -F\\\\ '@dots{}' files @dots{}
@noindent
Since @samp{\} is used for quoting in the shell, @code{awk} will see
@samp{-F\\}. Then @code{awk} processes the @samp{\\} for escape
-characters (@pxref{Constants}), finally yielding a single @samp{\} to
-be used for the field separator.
+characters (@pxref{Constants, ,Constant Expressions}), finally yielding
+a single @samp{\} to be used for the field separator.
@c end expert info
-As a special case, in compatibility mode (@pxref{Command Line}), if the
+As a special case, in compatibility mode
+(@pxref{Command Line, ,Invoking @code{awk}}), if the
argument to @samp{-F} is @samp{t}, then @code{FS} is set to the tab
character. (This is because if you type @samp{-F\t}, without the quotes,
at the shell, the @samp{\} gets deleted, so @code{awk} figures that you
really want your fields to be separated with tabs, and not @samp{t}s.
Use @samp{-v FS="t"} on the command line if you really do want to separate
-your fields with @samp{t}s.)
+your fields with @samp{t}s.)@refill
For example, let's use an @code{awk} program file called @file{baud.awk}
that contains the pattern @code{/300/}, and the action @samp{print $1}.
@@ -2001,7 +2017,7 @@ colons. The second field represents a user's encrypted password, but if
the field is empty, that user has no password.
@c begin expert info
-According to the @sc{POSIX} standard, @code{awk} is supposed to behave
+According to the @sc{posix} standard, @code{awk} is supposed to behave
as if each record is split into fields at the time that it is read.
In particular, this means that you can change the value of @code{FS}
after a record is read, but before any of the fields are referenced.
@@ -2036,25 +2052,27 @@ root:nSijPlPhZZwgE:0:0:Root:/:
@c begin expert info
There is an important difference between the two cases of @samp{FS = @w{" "}}
-and @samp{FS = @w{"[ \t]+"}} (which is a regular expression matching one or
-more blanks or tabs). For both values of @code{FS}, fields are separated
-by runs of blanks and/or tabs. However, when the value of @code{FS} is
-@code{" "}, @code{awk} will strip leading and trailing whitespace from the
-record, and then decide where the fields are.
+(a single blank) and @samp{FS = @w{"[ \t]+"}} (which is a regular expression
+matching one or more blanks or tabs). For both values of @code{FS}, fields
+are separated by runs of blanks and/or tabs. However, when the value of
+@code{FS} is @code{" "}, @code{awk} will strip leading and trailing whitespace
+from the record, and then decide where the fields are.
+
+For example, the following expression prints @samp{b}:
@example
echo ' a b c d ' | awk '@{ print $2 @}'
@end example
@noindent
-prints @samp{b}. However, this example
+However, the following prints @samp{a}:
@example
echo ' a b c d ' | awk 'BEGIN @{ FS = "[ \t]+" @} ; @{ print $2 @}'
@end example
@noindent
-prints @samp{a}. In this case, the first field is null.
+In this case, the first field is null.
The stripping of leading and trailing whitespace also comes into
play whenever @code{$0} is recomputed. For instance, this pipeline
@@ -2074,7 +2092,7 @@ a b c d
@noindent
The first @code{print} statement prints the record as it was read,
with leading whitespace intact. The assignment to @code{$2} rebuilds
-@code{$0} by concatentating @code{$1} through @code{$NF} together,
+@code{$0} by concatenating @code{$1} through @code{$NF} together,
separated by the value of @code{OFS}. Since the leading whitespace
was ignored when finding @code{$1}, it is not part of the new @code{$0}.
Finally, the last @code{print} statement prints the new @code{$0}.
@@ -2099,7 +2117,7 @@ Leading and trailing matches of @var{regexp} delimit empty fields.
@end table
@node Constant Size, Multiple Line, Field Separators, Reading Files
-@section Reading Constant Width Data
+@section Reading Fixed-width Data
(This section discusses an advanced, experimental feature. If you are
a novice @code{awk} user, you may wish to skip it on the first reading.)
@@ -2128,11 +2146,11 @@ The following data is the output of the @code{w} utility. It is useful
to illustrate the use of @code{FIELDWIDTHS}.
@example
- 10:06pm up 21 days, 14:04, 23 users, load average: 1.21, 1.36, 1.43
+ 10:06pm up 21 days, 14:04, 23 users
User tty login@ idle JCPU PCPU what
hzuo ttyV0 8:58pm 9 5 vi p24.tex
hzang ttyV3 6:37pm 50 -csh
-eklye ttyV5 9:53pm 7 1 em thesis_11jul91.tex
+eklye ttyV5 9:53pm 7 1 em thes.tex
dportein ttyV6 8:17pm 1:47 -csh
gierd ttyD3 10:00pm 1 elm
dave ttyD4 9:47pm 4 4 w
@@ -2148,14 +2166,14 @@ haven't been introduced yet.)@refill
@example
BEGIN @{ FIELDWIDTHS = "9 6 10 6 7 7 35" @}
NR > 2 @{
- idle = $4
- sub(/^ */, "", idle) # strip leading spaces
- if (idle == "") idle = 0
- if (idle ~ /:/) @{ split(idle, t, ":"); idle = t[1] * 60 + t[2] @}
- if (idle ~ /days/) @{ idle *= 24 * 60 * 60 @}
+ idle = $4
+ sub(/^ */, "", idle) # strip leading spaces
+ if (idle == "") idle = 0
+ if (idle ~ /:/) @{ split(idle, t, ":"); idle = t[1] * 60 + t[2] @}
+ if (idle ~ /days/) @{ idle *= 24 * 60 * 60 @}
- print $1, $2, idle
- @}
+ print $1, $2, idle
+@}
@end example
Here is the result of running the program on the data:
@@ -2202,7 +2220,7 @@ What should separate records?
One technique is to use an unusual character or string to separate
records. For example, you could use the formfeed character (written
-@samp{\f} in @code{awk}, as in C) to separate them, making each record
+@code{\f} in @code{awk}, as in C) to separate them, making each record
a page of the file. To do this, just set the variable @code{RS} to
@code{"\f"} (a string containing the formfeed character). Any
other character could equally well be used, as long as it won't be part
@@ -2228,6 +2246,7 @@ encountered. And the next record doesn't start until the first nonblank
line that follows---no matter how many blank lines appear in a row, they
are considered one record-separator. (End of file is also considered
a record separator.)@refill
+@c !!! This use of `end of file' is confusing. Needs to be clarified.
The second step is to separate the fields in the record. One way to do
this is to put each field on a separate line: to do this, just set the
@@ -2245,7 +2264,8 @@ you get useful behavior in the default case (i.e., @w{@code{FS == " "}}).
This feature can be a problem if you really don't want the
newline character to separate fields, since there is no way to
prevent it. However, you can work around this by using the @code{split}
-function to break up the record manually (@pxref{String Functions}).
+function to break up the record manually
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).@refill
@ignore
Here are two ways to use records separated by blank lines and break each
@@ -2331,7 +2351,7 @@ awk '@{
This @code{awk} program deletes all C-style comments, @samp{/* @dots{}
*/}, from the input. By replacing the @samp{print $0} with other
statements, you could perform more complicated processing on the
-decommented input, such as searching it for matches of a regular
+decommented input, like searching for matches of a regular
expression. (This program has a subtle problem---can you spot it?)
@c the program to remove comments doesn't work if one
@@ -2339,16 +2359,17 @@ expression. (This program has a subtle problem---can you spot it?)
@c idea for restart would be useful here). --- brennan@boeing.com
This form of the @code{getline} command sets @code{NF} (the number of
-fields; @pxref{Fields}), @code{NR} (the number of records read so far;
-@pxref{Records}), @code{FNR} (the number of records read from this input
-file), and the value of @code{$0}.
+fields; @pxref{Fields, ,Examining Fields}), @code{NR} (the number of
+records read so far; @pxref{Records, ,How Input is Split into Records}),
+@code{FNR} (the number of records read from this input file), and the
+value of @code{$0}.
@strong{Note:} the new value of @code{$0} is used in testing
the patterns of any subsequent rules. The original value
of @code{$0} that triggered the rule which executed @code{getline}
is lost. By contrast, the @code{next} statement reads a new record
but immediately begins processing it normally, starting with the first
-rule in the program. @xref{Next Statement}.
+rule in the program. @xref{Next Statement, ,The @code{next} Statement}.
@item getline @var{var}
This form of @code{getline} reads a record into the variable @var{var}.
@@ -2385,6 +2406,7 @@ free
Here's the program:
@example
+@group
awk '@{
if ((getline tmp) > 0) @{
print tmp
@@ -2392,6 +2414,7 @@ awk '@{
@} else
print $0
@}'
+@end group
@end example
The @code{getline} function used in this way sets only the variables
@@ -2465,7 +2488,7 @@ the @samp{@@include} line.@refill
The @code{close} function is called to ensure that if two identical
@samp{@@include} lines appear in the input, the entire specified file is
-included twice. @xref{Close Input}.@refill
+included twice. @xref{Close Input, ,Closing Input Files and Pipes}.@refill
One deficiency of this program is that it does not process nested
@samp{@@include} statements the way a true macro preprocessor would.
@@ -2495,8 +2518,8 @@ awk '@{
@noindent
The @code{close} function is called to ensure that if two identical
-@samp{@@execute} lines appear in the input, the command is run again for
-each one. @xref{Close Input}.
+@samp{@@execute} lines appear in the input, the command is run for
+each one. @xref{Close Input, ,Closing Input Files and Pipes}.
Given the input:
@@ -2539,7 +2562,6 @@ following program reads the current date and time into the variable
@code{current_time}, using the @code{date} utility, and then
prints it.@refill
-@group
@example
awk 'BEGIN @{
"date" | getline current_time
@@ -2547,13 +2569,12 @@ awk 'BEGIN @{
print "Report printed on " current_time
@}'
@end example
-@end group
In this version of @code{getline}, none of the built-in variables are
changed, and the record is not split into fields.
@end table
-@node Close Input,, Getline, Reading Files
+@node Close Input, , Getline, Reading Files
@section Closing Input Files and Pipes
@cindex closing input files and pipes
@findex close
@@ -2610,14 +2631,16 @@ statement. For fancier formatting use the @code{printf} statement.
Both are described in this chapter.
@menu
-* Print:: The @code{print} statement.
-* Print Examples:: Simple examples of @code{print} statements.
-* Output Separators:: The output separators and how to change them.
-* OFMT:: Controlling Numeric Output With @code{print}.
-* Printf:: The @code{printf} statement.
-* Redirection:: How to redirect output to multiple files and pipes.
-* Special Files:: File name interpretation in @code{gawk}. @code{gawk}
- allows access to inherited file descriptors.
+* Print:: The @code{print} statement.
+* Print Examples:: Simple examples of @code{print} statements.
+* Output Separators:: The output separators and how to change them.
+* OFMT:: Controlling Numeric Output With @code{print}.
+* Printf:: The @code{printf} statement.
+* Redirection:: How to redirect output to multiple
+ files and pipes.
+* Special Files:: File name interpretation in @code{gawk}.
+ @code{gawk} allows access to
+ inherited file descriptors.
@end menu
@node Print, Print Examples, Printing, Printing
@@ -2637,19 +2660,21 @@ print @var{item1}, @var{item2}, @dots{}
The entire list of items may optionally be enclosed in parentheses. The
parentheses are necessary if any of the item expressions uses a
relational operator; otherwise it could be confused with a redirection
-(@pxref{Redirection}). The relational operators are @samp{==},
+(@pxref{Redirection, ,Redirecting Output of @code{print} and @code{printf}}).
+The relational operators are @samp{==},
@samp{!=}, @samp{<}, @samp{>}, @samp{>=}, @samp{<=}, @samp{~} and
-@samp{!~} (@pxref{Comparison Ops}).@refill
+@samp{!~} (@pxref{Comparison Ops, ,Comparison Expressions}).@refill
The items printed can be constant strings or numbers, fields of the
current record (such as @code{$1}), variables, or any @code{awk}
expressions. The @code{print} statement is completely general for
-computing @emph{what} values to print. With two exceptions
-(@pxref{Output Separators},
-and @pxref{OFMT}), what you can't do is
-specify @emph{how} to print them---how many columns to use, whether to
-use exponential notation or not, and so on. For that, you need the
-@code{printf} statement (@pxref{Printf}).@refill
+computing @emph{what} values to print. With two exceptions,
+you cannot specify @emph{how} to print them---how many
+columns, whether to use exponential notation or not, and so on.
+(@xref{Output Separators}, and
+@ref{OFMT, ,Controlling Numeric Output with @code{print}}.)
+For that, you need the @code{printf} statement
+(@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing}).@refill
The simple statement @samp{print} with no items is equivalent to
@samp{print $0}: it prints the entire current record. To print a blank
@@ -2716,17 +2741,19 @@ awk '@{ print $1 $2 @}' inventory-shipped
prints:
@example
+@group
Jan13
Feb15
Mar15
@dots{}
+@end group
@end example
Neither example's output makes much sense to someone unfamiliar with the
file @file{inventory-shipped}. A heading line at the beginning would make
it clearer. Let's add some headings to our table of months (@code{$1}) and
green crates shipped (@code{$2}). We do this using the @code{BEGIN} pattern
-(@pxref{BEGIN/END}) to force the headings to be printed only once:
+(@pxref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}) to force the headings to be printed only once:
@example
awk 'BEGIN @{ print "Month Crates"
@@ -2737,16 +2764,16 @@ awk 'BEGIN @{ print "Month Crates"
@noindent
Did you already guess what happens? This program prints the following:
-@group
@example
+@group
Month Crates
----- ------
Jan 13
Feb 15
Mar 15
@dots{}
-@end example
@end group
+@end example
@noindent
The headings and the table data don't line up! We can fix this by printing
@@ -2762,8 +2789,8 @@ You can imagine that this way of lining up columns can get pretty
complicated when you have many columns to fix. Counting spaces for two
or three columns can be simple, but more than this and you can get
``lost'' quite easily. This is why the @code{printf} statement was
-created (@pxref{Printf}); one of its specialties is lining up columns of
-data.@refill
+created (@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing});
+one of its specialties is lining up columns of data.@refill
@node Output Separators, OFMT, Print Examples, Printing
@section Output Separators
@@ -2789,18 +2816,21 @@ character; thus, normally each @code{print} statement makes a separate line.
You can change how output fields and records are separated by assigning
new values to the variables @code{OFS} and/or @code{ORS}. The usual
-place to do this is in the @code{BEGIN} rule (@pxref{BEGIN/END}), so
+place to do this is in the @code{BEGIN} rule
+(@pxref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}), so
that it happens before any input is processed. You may also do this
with assignments on the command line, before the names of your input
-files.
+files.@refill
The following example prints the first and second fields of each input
record separated by a semicolon, with a blank line added after each
line:@refill
@example
+@group
awk 'BEGIN @{ OFS = ";"; ORS = "\n\n" @}
@{ print $1, $2 @}' BBS-list
+@end group
@end example
If the value of @code{ORS} does not contain a newline, all your output
@@ -2808,7 +2838,7 @@ will be run together on a single line, unless you output newlines some
other way.
@node OFMT, Printf, Output Separators, Printing
-@section Controlling Numeric Output With @code{print}
+@section Controlling Numeric Output with @code{print}
@vindex OFMT
When you use the @code{print} statement to print numeric values,
@code{awk} internally converts the number to a string of characters,
@@ -2817,7 +2847,8 @@ to do this conversion. For now, it suffices to say that the @code{sprintf}
function accepts a @dfn{format specification} that tells it how to format
numbers (or strings), and that there are a number of different ways that
numbers can be formatted. The different format specifications are discussed
-more fully in @ref{Printf}.@refill
+more fully in
+@ref{Printf, ,Using @code{printf} Statements for Fancier Printing}.@refill
The built-in variable @code{OFMT} contains the default format specification
that @code{print} uses with @code{sprintf} when it wants to convert a
@@ -2826,15 +2857,17 @@ as the value of @code{OFMT}, you can change how @code{print} will print
your numbers. As a brief example:
@example
+@group
awk 'BEGIN @{ OFMT = "%d" # print numbers as integers
print 17.23 @}'
+@end group
@end example
@noindent
will print @samp{17}.
@node Printf, Redirection, OFMT, Printing
-@section Using @code{printf} Statements For Fancier Printing
+@section Using @code{printf} Statements for Fancier Printing
@cindex formatted output
@cindex output, formatted
@@ -2848,10 +2881,10 @@ the @dfn{format string}, which controls how and where to print the other
arguments.
@menu
-* Basic Printf:: Syntax of the @code{printf} statement.
-* Control Letters:: Format-control letters.
-* Format Modifiers:: Format-specification modifiers.
-* Printf Examples:: Several examples.
+* Basic Printf:: Syntax of the @code{printf} statement.
+* Control Letters:: Format-control letters.
+* Format Modifiers:: Format-specification modifiers.
+* Printf Examples:: Several examples.
@end menu
@node Basic Printf, Control Letters, Printf, Printf
@@ -2868,9 +2901,10 @@ printf @var{format}, @var{item1}, @var{item2}, @dots{}
The entire list of items may optionally be enclosed in parentheses. The
parentheses are necessary if any of the item expressions uses a
relational operator; otherwise it could be confused with a redirection
-(@pxref{Redirection}). The relational operators are @samp{==},
+(@pxref{Redirection, ,Redirecting Output of @code{print} and @code{printf}}).
+The relational operators are @samp{==},
@samp{!=}, @samp{<}, @samp{>}, @samp{>=}, @samp{<=}, @samp{~} and
-@samp{!~} (@pxref{Comparison Ops}).@refill
+@samp{!~} (@pxref{Comparison Ops, ,Comparison Expressions}).@refill
@cindex format string
The difference between @code{printf} and @code{print} is the argument
@@ -2878,7 +2912,7 @@ The difference between @code{printf} and @code{print} is the argument
specifies how to output each of the other arguments. It is called
the @dfn{format string}.
-The format string is the same as in the @sc{ANSI} C library function
+The format string is the same as in the @sc{ansi} C library function
@code{printf}. Most of @var{format} is text to be output verbatim.
Scattered among this text are @dfn{format specifiers}, one per item.
Each format specifier says to output the next item at that place in the
@@ -2925,8 +2959,8 @@ printf "%4.3e", 1950
@end example
@noindent
-prints @samp{1.950e+03}, with a total of 4 significant figures of
-which 3 follow the decimal point. The @samp{4.3} are @dfn{modifiers},
+prints @samp{1.950e+03}, with a total of four significant figures of
+which three follow the decimal point. The @samp{4.3} are @dfn{modifiers},
discussed below.
@item f
@@ -3071,7 +3105,7 @@ printf "<%" w "." p "s>\n", s
@noindent
This is not particularly easy to read, however.
-@node Printf Examples, , Format Modifiers, Printf
+@node Printf Examples, , Format Modifiers, Printf
@subsection Examples of Using @code{printf}
Here is how to use @code{printf} to make an aligned table:
@@ -3087,6 +3121,7 @@ prints the phone numbers (@code{$2}) afterward on the line. This
produces an aligned two-column table of names and phone numbers:@refill
@example
+@group
aardvark 555-5553
alpo-net 555-3412
barfly 555-7685
@@ -3098,6 +3133,7 @@ foot 555-6699
macfoo 555-6480
sdace 555-3430
sabafoo 555-2127
+@end group
@end example
Did you notice that we did not specify that the phone numbers be printed
@@ -3111,13 +3147,16 @@ last things on their lines. We don't need to put spaces after them.
We could make our table look even nicer by adding headings to the tops
of the columns. To do this, use the @code{BEGIN} pattern
-(@pxref{BEGIN/END}) to force the header to be printed only once, at the
-beginning of the @code{awk} program:
+(@pxref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns})
+to force the header to be printed only once, at the beginning of
+the @code{awk} program:@refill
@example
+@group
awk 'BEGIN @{ print "Name Number"
print "---- ------" @}
@{ printf "%-10s %s\n", $1, $2 @}' BBS-list
+@end group
@end example
Did you notice that we mixed @code{print} and @code{printf} statements in
@@ -3125,9 +3164,11 @@ the above example? We could have used just @code{printf} statements to get
the same results:
@example
+@group
awk 'BEGIN @{ printf "%-10s %s\n", "Name", "Number"
printf "%-10s %s\n", "----", "------" @}
@{ printf "%-10s %s\n", $1, $2 @}' BBS-list
+@end group
@end example
@noindent
@@ -3147,7 +3188,8 @@ awk 'BEGIN @{ format = "%-10s %s\n"
See if you can use the @code{printf} statement to line up the headings and
table data for our @file{inventory-shipped} example covered earlier in the
-section on the @code{print} statement (@pxref{Print}).
+section on the @code{print} statement
+(@pxref{Print, ,The @code{print} Statement}).@refill
@node Redirection, Special Files, Printf, Printing
@section Redirecting Output of @code{print} and @code{printf}
@@ -3180,7 +3222,7 @@ also.@refill
This type of redirection prints the items onto the output file
@var{output-file}. The file name @var{output-file} can be any
expression. Its value is changed to a string and then used as a
-file name (@pxref{Expressions}).@refill
+file name (@pxref{Expressions, ,Expressions as Action Statements}).@refill
When this type of redirection is used, the @var{output-file} is erased
before the first output is written to it. Subsequent writes do not
@@ -3192,10 +3234,10 @@ BBS names to a file @file{name-list} and a list of phone numbers to a
file @file{phone-list}. Each output file contains one name or number
per line.
-@example
+@smallexample
awk '@{ print $2 > "phone-list"
print $1 > "name-list" @}' BBS-list
-@end example
+@end smallexample
@item print @var{items} >> @var{output-file}
This type of redirection prints the items onto the output file
@@ -3219,10 +3261,10 @@ shell command to be run.
For example, this produces two files, one unsorted list of BBS names
and one list sorted in reverse alphabetical order:
-@example
+@smallexample
awk '@{ print $1 > "names.unsorted"
print $1 | "sort -r > names.sorted" @}' BBS-list
-@end example
+@end smallexample
Here the unsorted list is written with an ordinary redirection while
the sorted list is written by piping through the @code{sort} utility.
@@ -3231,15 +3273,21 @@ Here is an example that uses redirection to mail a message to a mailing
list @samp{bug-system}. This might be useful when trouble is encountered
in an @code{awk} script run periodically for system maintenance.
-@example
-print "Awk script failed:", $0 | "mail bug-system"
-print "at record number", FNR, "of", FILENAME | "mail bug-system"
-close("mail bug-system")
-@end example
+@smallexample
+report = "mail bug-system"
+print "Awk script failed:", $0 | report
+print "at record number", FNR, "of", FILENAME | report
+close(report)
+@end smallexample
We call the @code{close} function here because it's a good idea to close
the pipe as soon as all the intended output has been sent to it.
-@xref{Close Output}, for more information on this.
+@xref{Close Output, ,Closing Output Files and Pipes}, for more information
+on this. This example also illustrates the use of a variable to represent
+a @var{file} or @var{command}: it is not necessary to always
+use a string constant. Using a variable is generally a good idea,
+since @code{awk} requires you to spell the string value identically
+every time.
@end table
Redirecting output using @samp{>}, @samp{>>}, or @samp{|} asks the system
@@ -3247,7 +3295,7 @@ to open a file or pipe only if the particular @var{file} or @var{command}
you've specified has not already been written to by your program, or if
it has been closed since it was last written to.@refill
-@node Close Output, , File/Pipe Redirection, Redirection
+@node Close Output, , File/Pipe Redirection, Redirection
@subsection Closing Output Files and Pipes
@cindex closing output files and pipes
@findex close
@@ -3292,7 +3340,8 @@ Here are some reasons why you might need to close an output file:
@item
To write a file and read it back later on in the same @code{awk}
program. Close the file when you are finished writing it; then
-you can start reading it with @code{getline} (@pxref{Getline}).
+you can start reading it with @code{getline}
+(@pxref{Getline, ,Explicit Input with @code{getline}}).@refill
@item
To write numerous files, successively, in the same @code{awk}
@@ -3318,7 +3367,7 @@ a single message of several lines. By contrast, if you close the pipe
after each line of output, then each line makes a separate message.
@end itemize
-@node Special Files, , Redirection, Printing
+@node Special Files, , Redirection, Printing
@section Standard I/O Streams
@cindex standard input
@cindex standard output
@@ -3336,14 +3385,14 @@ streams, standard output and standard error, is so that they can be
redirected separately.
@iftex
-@cindex differences between @code{gawk} and @code{awk}
+@cindex differences: @code{gawk} and @code{awk}
@end iftex
In other implementations of @code{awk}, the only way to write an error
message to standard error in an @code{awk} program is as follows:
-@example
+@smallexample
print "Serious error detected!\n" | "cat 1>&2"
-@end example
+@end smallexample
@noindent
This works by opening a pipeline to a shell command which can access the
@@ -3353,11 +3402,13 @@ separate process. So people writing @code{awk} programs have often
neglected to do this. Instead, they have sent the error messages to the
terminal, like this:
-@example
+@smallexample
+@group
NF != 4 @{
printf("line %d skipped: doesn't have 4 fields\n", FNR) > "/dev/tty"
@}
-@end example
+@end group
+@end smallexample
@noindent
This has the same effect most of the time, but not always: although the
@@ -3399,16 +3450,17 @@ respectively, but they are more self-explanatory.
The proper way to write an error message in a @code{gawk} program
is to use @file{/dev/stderr}, like this:
-@example
+@smallexample
NF != 4 @{
printf("line %d skipped: doesn't have 4 fields\n", FNR) > "/dev/stderr"
@}
-@end example
+@end smallexample
Recognition of these special file names is disabled if @code{gawk} is in
-compatibility mode (@pxref{Command Line}).
+compatibility mode (@pxref{Command Line, ,Invoking @code{awk}}).
-@strong{Note}: Unless your system actually has a @file{/dev/fd} directory,
+@quotation
+@strong{Caution}: Unless your system actually has a @file{/dev/fd} directory,
the interpretation of these file names is done by @code{gawk} itself.
For example, using @samp{/dev/fd/4} for output will actually write on
file descriptor 4, and not on a new file descriptor that was @code{dup}'ed
@@ -3416,6 +3468,7 @@ from file descriptor 4. Most of the time this does not matter; however, it
is important to @emph{not} close any of the files related to file descriptors
0, 1, and 2. If you do close one of these files, unpredictable behavior
will result.
+@end quotation
@node One-liners, Patterns, Printing, Top
@chapter Useful ``One-liners''
@@ -3427,6 +3480,16 @@ programs contain constructs that haven't been covered yet. The description
of the program will give you a good idea of what is going on, but please
read the rest of the manual to become an @code{awk} expert!
+@c Per suggestions from Michal Jaegermann
+@ifinfo
+Since you are reading this in Info, each line of the example code is
+enclosed in quotes, to represent text that you would type literally.
+The examples themselves represent shell commands that use single quotes
+to keep the shell from interpreting the contents of the program.
+When reading the examples, focus on the text between the open and close
+quotes.
+@end ifinfo
+
@table @code
@item awk '@{ if (NF > max) max = NF @}
@itemx @ @ @ @ @ END @{ print max @}'
@@ -3473,7 +3536,7 @@ This programs counts lines in a file.
This program also counts lines in a file, but lets @code{awk} do the work.
@item awk '@{ print NR, $0 @}'
-This program concatenates and line numbers all its input files,
+This program adds line numbers to all its input files,
similar to @samp{cat -n}.
@end table
@@ -3486,71 +3549,57 @@ executed when its pattern matches the current input record. This
chapter tells all about how to write patterns.
@menu
-* Kinds of Patterns:: A list of all kinds of patterns.
- The following subsections describe them in detail.
-
-* Empty:: The empty pattern, which matches every record.
-
-* Regexp:: Regular expressions such as @samp{/foo/}.
-
-* Comparison Patterns:: Comparison expressions such as @code{$1 > 10}.
-
-* Boolean Patterns:: Combining comparison expressions.
-
-* Expression Patterns:: Any expression can be used as a pattern.
-
-* Ranges:: Using pairs of patterns to specify record ranges.
-
-* BEGIN/END:: Specifying initialization and cleanup rules.
+* Kinds of Patterns:: A list of all kinds of patterns.
+ The following subsections describe
+ them in detail.
+* Regexp:: Regular expressions such as @samp{/foo/}.
+* Comparison Patterns:: Comparison expressions such as @code{$1 > 10}.
+* Boolean Patterns:: Combining comparison expressions.
+* Expression Patterns:: Any expression can be used as a pattern.
+* Ranges:: Pairs of patterns specify record ranges.
+* BEGIN/END:: Specifying initialization and cleanup rules.
+* Empty:: The empty pattern, which matches every record.
@end menu
-@node Kinds of Patterns, Empty, Patterns, Patterns
+@node Kinds of Patterns, Regexp, Patterns, Patterns
@section Kinds of Patterns
@cindex patterns, types of
Here is a summary of the types of patterns supported in @code{awk}.
+@c At the next rewrite, check to see that this order matches the
+@c order in the text. It might not matter to a reader, but it's good
+@c style. Also, it might be nice to mention all the topics of sections
+@c that follow in this list; that way people can scan and know when to
+@c expect a specific topic. Specifically please also make an entry
+@c for Boolean operators as patterns in the right place. --mew
@table @code
@item /@var{regular expression}/
A regular expression as a pattern. It matches when the text of the
input record fits the regular expression.
-(@xref{Regexp, , Regular Expressions as Patterns}.)@refill
+(@xref{Regexp, ,Regular Expressions as Patterns}.)@refill
@item @var{expression}
A single expression. It matches when its value, converted to a number,
is nonzero (if a number) or nonnull (if a string).
-(@xref{Expression Patterns}.)@refill
+(@xref{Expression Patterns, ,Expressions as Patterns}.)@refill
@item @var{pat1}, @var{pat2}
A pair of patterns separated by a comma, specifying a range of records.
-(@xref{Ranges, , Specifying Record Ranges With Patterns}.)
+(@xref{Ranges, ,Specifying Record Ranges with Patterns}.)
@item BEGIN
@itemx END
Special patterns to supply start-up or clean-up information to
-@code{awk}. (@xref{BEGIN/END}.)
+@code{awk}. (@xref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}.)
@item @var{null}
The empty pattern matches every input record.
-(@xref{Empty, , The Empty Pattern}.)@refill
+(@xref{Empty, ,The Empty Pattern}.)@refill
@end table
-@node Empty, Regexp, Kinds of Patterns, Patterns
-@section The Empty Pattern
-
-@cindex empty pattern
-@cindex pattern, empty
-An empty pattern is considered to match @emph{every} input record. For
-example, the program:@refill
-@example
-awk '@{ print $1 @}' BBS-list
-@end example
-
-@noindent
-prints the first field of every record.
-
-@node Regexp, Comparison Patterns, Empty, Patterns
+@node Regexp, Comparison Patterns, Kinds of Patterns, Patterns
@section Regular Expressions as Patterns
@cindex pattern, regular expressions
@cindex regexp
@@ -3569,8 +3618,8 @@ Therefore, the pattern @code{/foo/} matches any input record containing
classes of strings.
@menu
-* Usage: Regexp Usage. How regexps are used in patterns.
-* Operators: Regexp Operators. How to write a regexp.
+* Regexp Usage:: How to Use Regular Expressions
+* Regexp Operators:: Regular Expression Operators
* Case-sensitivity:: How to do case-insensitive matching.
@end menu
@@ -3590,7 +3639,7 @@ awk '/foo/ @{ print $2 @}' BBS-list
@cindex regular expression matching operators
@cindex string-matching operators
@cindex operators, string-matching
-@cindex operators, regular expression matching
+@cindex operators, regexp matching
@cindex regexp search operators
Regular expressions can also be used in comparison expressions. Then
you can specify the string to match against; it need not be the entire
@@ -3727,10 +3776,10 @@ character set, put a @samp{\} in front of it. For example:
matches either @samp{d}, or @samp{]}.@refill
This treatment of @samp{\} is compatible with other @code{awk}
-implementations, and is also mandated by the @sc{POSIX} Command Language
+implementations, and is also mandated by the @sc{posix} Command Language
and Utilities standard. The regular expressions in @code{awk} are a superset
-of the @sc{POSIX} specification for Extended Regular Expressions (EREs).
-@sc{POSIX} EREs are based on the regular expressions accepted by the
+of the @sc{posix} specification for Extended Regular Expressions (EREs).
+@sc{posix} EREs are based on the regular expressions accepted by the
traditional @code{egrep} utility.
In @code{egrep} syntax, backslash is not syntactically special within
@@ -3739,7 +3788,7 @@ represent the characters @samp{]}, @samp{-} and @samp{^} as members of a
character set.
In @code{egrep} syntax, to match @samp{-}, write it as @samp{---},
-which is a range containing only @samp{-}. You may also give @samp{-}
+which is a range containing only @w{@samp{-}.} You may also give @samp{-}
as the first or last character in the set. To match @samp{^}, put it
anywhere except as the first character of a set. To match a @samp{]},
make it the first character in the set. For example:@refill
@@ -3845,16 +3894,17 @@ matching. For example:
@noindent
matches the character @samp{$}.
-The escape sequences used for string constants (@pxref{Constants}) are
+The escape sequences used for string constants
+(@pxref{Constants, ,Constant Expressions}) are
valid in regular expressions as well; they are also introduced by a
-@samp{\}.
+@samp{\}.@refill
@end table
In regular expressions, the @samp{*}, @samp{+}, and @samp{?} operators have
the highest precedence, followed by concatenation, and finally by @samp{|}.
As in arithmetic, parentheses can change how operators are grouped.@refill
-@node Case-sensitivity,, Regexp Operators, Regexp
+@node Case-sensitivity, , Regexp Operators, Regexp
@subsection Case-sensitivity in Matching
Case is normally significant in regular expressions, both when matching
@@ -3870,7 +3920,9 @@ read. There are two other alternatives that you might prefer.
One way to do a case-insensitive match at a particular point in the
program is to convert the data to a single case, using the
@code{tolower} or @code{toupper} built-in string functions (which we
-haven't discussed yet; @pxref{String Functions}). For example:
+haven't discussed yet;
+@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+For example:@refill
@example
tolower($1) ~ /foo/ @{ @dots{} @}
@@ -3894,7 +3946,7 @@ IGNORECASE = 1
if (x ~ /ab/) @dots{} # now it will succeed
@end example
-You cannot generally use @code{IGNORECASE} to make certain rules
+In general, you cannot use @code{IGNORECASE} to make certain rules
case-insensitive and other rules case-sensitive, because there is no way
to set @code{IGNORECASE} just for the pattern of a particular rule. To
do this, you must use character sets or @code{tolower}. However, one
@@ -3906,8 +3958,8 @@ rule. Setting @code{IGNORECASE} from the command line is a way to make
a program case-insensitive without having to edit it.
The value of @code{IGNORECASE} has no effect if @code{gawk} is in
-compatibility mode (@pxref{Command Line}). Case is always significant
-in compatibility mode.
+compatibility mode (@pxref{Command Line, ,Invoking @code{awk}}).
+Case is always significant in compatibility mode.@refill
@node Comparison Patterns, Boolean Patterns, Regexp, Patterns
@section Comparison Expressions as Patterns
@@ -3918,9 +3970,9 @@ in compatibility mode.
@dfn{Comparison patterns} test relationships such as equality between
two strings or numbers. They are a special case of expression patterns
-(@pxref{Expression Patterns}). They are written with @dfn{relational
-operators}, which are a superset of those in C. Here is a table of
-them:
+(@pxref{Expression Patterns, ,Expressions as Patterns}). They are written
+with @dfn{relational operators}, which are a superset of those in C.
+Here is a table of them:@refill
@table @code
@item @var{x} < @var{y}
@@ -3950,8 +4002,9 @@ True if @var{x} does not match the regular expression described by @var{y}.
The operands of a relational operator are compared as numbers if they
are both numbers. Otherwise they are converted to, and compared as,
-strings (@pxref{Conversion}, for the detailed rules). Strings are compared
-by comparing the first character of each, then the second character of each,
+strings (@pxref{Conversion, ,Conversion of Strings and Numbers},
+for the detailed rules). Strings are compared by comparing the first
+character of each, then the second character of each,
and so on, until there is a difference. If the two strings are equal until
the shorter one runs out, the shorter one is considered to be less than the
longer one. Thus, @code{"10"} is less than @code{"9"}, and @code{"abc"}
@@ -3960,7 +4013,8 @@ is less than @code{"abcd"}.@refill
The left operand of the @samp{~} and @samp{!~} operators is a string.
The right operand is either a constant regular expression enclosed in
slashes (@code{/@var{regexp}/}), or any expression, whose string value
-is used as a dynamic regular expression (@pxref{Regexp Usage}).
+is used as a dynamic regular expression
+(@pxref{Regexp Usage, ,How to Use Regular Expressions}).@refill
The following example prints the second field of each input record
whose first field is precisely @samp{foo}.
@@ -4017,9 +4071,10 @@ awk '! /foo/' BBS-list
@end example
Note that boolean patterns are a special case of expression patterns
-(@pxref{Expression Patterns}); they are expressions that use the
-boolean operators. @xref{Boolean Ops}, for complete information on
-the boolean operators.
+(@pxref{Expression Patterns, ,Expressions as Patterns}); they are
+expressions that use the boolean operators.
+@xref{Boolean Ops, ,Boolean Expressions}, for complete information
+on the boolean operators.@refill
The subpatterns of a boolean pattern can be constant regular
expressions, comparisons, or any other @code{awk} expressions. Range
@@ -4053,13 +4108,13 @@ pattern. @code{/foo/} as an expression has the value 1 if @samp{foo}
appears in the current input record; thus, as a pattern, @code{/foo/}
matches any record containing @samp{foo}.
-Other implementations of @code{awk} that are not yet @sc{POSIX} compliant
+Other implementations of @code{awk} that are not yet @sc{posix} compliant
are less general than @code{gawk}: they allow comparison expressions, and
boolean combinations thereof (optionally with parentheses), but not
necessarily other kinds of expressions.
@node Ranges, BEGIN/END, Expression Patterns, Patterns
-@section Specifying Record Ranges With Patterns
+@section Specifying Record Ranges with Patterns
@cindex range pattern
@cindex patterns, range
@@ -4076,12 +4131,12 @@ awk '$1 == "on", $1 == "off"'
@noindent
prints every record between @samp{on}/@samp{off} pairs, inclusive.
-In more detail, a range pattern starts out by matching @var{begpat}
+A range pattern starts out by matching @var{begpat}
against every input record; when a record matches @var{begpat}, the
range pattern becomes @dfn{turned on}. The range pattern matches this
record. As long as it stays turned on, it automatically matches every
-input record read. But meanwhile, it also matches @var{endpat} against
-every input record, and when that succeeds, the range pattern is turned
+input record read. It also matches @var{endpat} against
+every input record; when that succeeds, the range pattern is turned
off again for the following record. Now it goes back to checking
@var{begpat} against each record.
@@ -4094,7 +4149,7 @@ It is possible for a pattern to be turned both on and off by the same
record, if both conditions are satisfied by that record. Then the action is
executed for just that record.
-@node BEGIN/END,, Ranges, Patterns
+@node BEGIN/END, Empty, Ranges, Patterns
@section @code{BEGIN} and @code{END} Special Patterns
@cindex @code{BEGIN} special pattern
@@ -4108,13 +4163,11 @@ executed, once, before the first input record has been read. An @code{END}
rule is executed, once, after all the input has been read. For
example:@refill
-@group
@example
awk 'BEGIN @{ print "Analysis of `foo'" @}
/foo/ @{ ++foobar @}
END @{ print "`foo' appears " foobar " times." @}' BBS-list
@end example
-@end group
This program finds the number of records in the input file @file{BBS-list}
that contain the string @samp{foo}. The @code{BEGIN} rule prints a title
@@ -4140,7 +4193,8 @@ the order in which library functions are named on the command line
controls the order in which their @code{BEGIN} and @code{END} rules are
executed. Therefore you have to be careful to write such rules in
library files so that the order in which they are executed doesn't matter.
-@xref{Command Line}, for more information on using library functions.
+@xref{Command Line, ,Invoking @code{awk}}, for more information on
+using library functions.
If an @code{awk} program only has a @code{BEGIN} rule, and no other
rules, then the program exits after the @code{BEGIN} rule has been run.
@@ -4153,6 +4207,22 @@ the program. This is necessary in case the @code{END} rule checks the
@code{BEGIN} and @code{END} rules must have actions; there is no default
action for these rules since there is no current record when they run.
+@node Empty, , BEGIN/END, Patterns
+@comment node-name, next, previous, up
+@section The Empty Pattern
+
+@cindex empty pattern
+@cindex pattern, empty
+An empty pattern is considered to match @emph{every} input record. For
+example, the program:@refill
+
+@example
+awk '@{ print $1 @}' BBS-list
+@end example
+
+@noindent
+prints the first field of every record.
+
@node Actions, Expressions, Patterns, Top
@chapter Overview of Actions
@cindex action, definition of
@@ -4160,12 +4230,12 @@ action for these rules since there is no current record when they run.
@cindex action, curly braces
@cindex action, separating statements
-An @code{awk} @dfn{program} or @dfn{script} consists of a series of
-@dfn{rules} and function definitions, interspersed. (Functions are
-described later. @xref{User-defined}.)
+An @code{awk} program or script consists of a series of
+rules and function definitions, interspersed. (Functions are
+described later. @xref{User-defined, ,User-defined Functions}.)
-A rule contains a pattern and an @dfn{action}, either of which may be
-omitted. The purpose of the action is to tell @code{awk} what to do
+A rule contains a pattern and an action, either of which may be
+omitted. The purpose of the @dfn{action} is to tell @code{awk} what to do
once a match for the pattern is found. Thus, the entire program
looks somewhat like this:
@@ -4192,15 +4262,16 @@ Here are the kinds of statements supported in @code{awk}:
@itemize @bullet
@item
Expressions, which can call functions or assign values to variables
-(@pxref{Expressions}). Executing this kind of statement simply computes
-the value of the expression and then ignores it. This is useful when
-the expression has side effects (@pxref{Assignment Ops}).
+(@pxref{Expressions, ,Expressions as Action Statements}). Executing
+this kind of statement simply computes the value of the expression and
+then ignores it. This is useful when the expression has side effects
+(@pxref{Assignment Ops, ,Assignment Expressions}).@refill
@item
Control statements, which specify the control flow of @code{awk}
programs. The @code{awk} language gives you C-like constructs
(@code{if}, @code{for}, @code{while}, and so on) as well as a few
-special ones (@pxref{Statements}).@refill
+special ones (@pxref{Statements, ,Control Statements in Actions}).@refill
@item
Compound statements, which consist of one or more statements enclosed in
@@ -4209,19 +4280,22 @@ statements together in the body of an @code{if}, @code{while}, @code{do}
or @code{for} statement.
@item
-Input control, using the @code{getline} command (@pxref{Getline}),
-and the @code{next} statement (@pxref{Next Statement}).
+Input control, using the @code{getline} command
+(@pxref{Getline, ,Explicit Input with @code{getline}}), and the @code{next}
+statement (@pxref{Next Statement, ,The @code{next} Statement}).
@item
-Output statements, @code{print} and @code{printf}. @xref{Printing}.
+Output statements, @code{print} and @code{printf}.
+@xref{Printing, ,Printing Output}.@refill
@item
-Deletion statements, for deleting array elements. @xref{Delete}.
+Deletion statements, for deleting array elements.
+@xref{Delete, ,The @code{delete} Statement}.@refill
@end itemize
@iftex
The next two chapters cover in detail expressions and control
-statements, respectively. We go on to treat arrays, and built-in
+statements, respectively. We go on to treat arrays and built-in
functions, both of which are used in expressions. Then we proceed
to discuss how to define your own functions.
@end iftex
@@ -4232,9 +4306,7 @@ to discuss how to define your own functions.
Expressions are the basic building block of @code{awk} actions. An
expression evaluates to a value, which you can print, test, store in a
-variable or pass to a function.
-
-But, beyond that, an expression can assign a new value to a variable
+variable or pass to a function. But beyond that, an expression can assign a new value to a variable
or a field, with an assignment operator.
An expression can serve as a statement on its own. Most other kinds of
@@ -4244,22 +4316,27 @@ variables, array references, constants, and function calls, as well as
combinations of these with various operators.
@menu
-* Constants:: String, numeric, and regexp constants.
-* Variables:: Variables give names to values for later use.
-* Arithmetic Ops:: Arithmetic operations (@samp{+}, @samp{-}, etc.)
-* Concatenation:: Concatenating strings.
-* Comparison Ops:: Comparison of numbers and strings with @samp{<}, etc.
-* Boolean Ops:: Combining comparison expressions using boolean operators
- @samp{||} (``or''), @samp{&&} (``and'') and @samp{!} (``not'').
-
-* Assignment Ops:: Changing the value of a variable or a field.
-* Increment Ops:: Incrementing the numeric value of a variable.
-
-* Conversion:: The conversion of strings to numbers and vice versa.
-* Conditional Exp:: Conditional expressions select between two subexpressions
- under control of a third subexpression.
-* Function Calls:: A function call is an expression.
-* Precedence:: How various operators nest.
+* Constants:: String, numeric, and regexp constants.
+* Variables:: Variables give names to values for later use.
+* Arithmetic Ops:: Arithmetic operations (@samp{+}, @samp{-}, etc.)
+* Concatenation:: Concatenating strings.
+* Comparison Ops:: Comparison of numbers and strings
+ with @samp{<}, etc.
+* Boolean Ops:: Combining comparison expressions
+ using boolean operators
+ @samp{||} (``or''), @samp{&&} (``and'') and @samp{!} (``not'').
+
+* Assignment Ops:: Changing the value of a variable or a field.
+* Increment Ops:: Incrementing the numeric value of a variable.
+
+* Conversion:: The conversion of strings to numbers
+ and vice versa.
+* Values:: The whole truth about numbers and strings.
+* Conditional Exp:: Conditional expressions select
+ between two subexpressions under control
+ of a third subexpression.
+* Function Calls:: A function call is an expression.
+* Precedence:: How various operators nest.
@end menu
@node Constants, Variables, Expressions, Expressions
@@ -4309,7 +4386,9 @@ sequences beginning with a backslash (@samp{\}).
One use of an escape sequence is to include a double-quote character in
a string constant. Since a plain double-quote would end the string, you
must use @samp{\"} to represent a single double-quote character as a
-part of the string. Backslash itself is another character that can't be
+part of the string.
+The
+backslash character itself is another character that cannot be
included normally; you write @samp{\\} to put one backslash in the
string. Thus, the string whose contents are the two characters
@samp{"\} must be written @code{"\"\\"}.
@@ -4350,25 +4429,27 @@ Represents the octal value @var{nnn}, where @var{nnn} are one to three
digits between 0 and 7. For example, the code for the ASCII ESC
(escape) character is @samp{\033}.@refill
-@item \x@var{hh@dots{}}
+@item \x@var{hh}@dots{}
Represents the hexadecimal value @var{hh}, where @var{hh} are hexadecimal
digits (@samp{0} through @samp{9} and either @samp{A} through @samp{F} or
-@samp{a} through @samp{f}). Like the same construct in @sc{ANSI} C, the escape
+@samp{a} through @samp{f}). Like the same construct in @sc{ansi} C, the escape
sequence continues until the first non-hexadecimal digit is seen. However,
using more than two hexadecimal digits produces undefined results. (The
-@samp{\x} escape sequence is not allowed in @sc{POSIX} @code{awk}.)@refill
+@samp{\x} escape sequence is not allowed in @sc{posix} @code{awk}.)@refill
@end table
-A constant regexp is a regular expression description enclosed in
+A @dfn{constant regexp} is a regular expression description enclosed in
slashes, such as @code{/^beginning and end$/}. Most regexps used in
@code{awk} programs are constant, but the @samp{~} and @samp{!~}
-operators can also match computed or ``dynamic'' regexps (@pxref{Regexp Usage}).
+operators can also match computed or ``dynamic'' regexps
+(@pxref{Regexp Usage, ,How to Use Regular Expressions}).@refill
Constant regexps may be used like simple expressions. When a
constant regexp is not on the right hand side of the @samp{~} or
@samp{!~} operators, it has the same meaning as if it appeared
-in a pattern, i.e. @samp{($0 ~ /foo/)} (@pxref{Expression Patterns}).
-This means that the following two code segments:@refill
+in a pattern, i.e. @samp{($0 ~ /foo/)}
+(@pxref{Expression Patterns, ,Expressions as Patterns}).
+This means that the two code segments,@refill
@example
if ($0 ~ /barfly/ || $0 ~ /camelot/)
@@ -4414,10 +4495,11 @@ will assign either 0 or 1 to the variable @code{matches}, depending
upon the contents of the current input record.
Constant regular expressions are also used as the first argument for
-the @code{sub} and @code{gsub} functions (@pxref{String Functions}).
+the @code{sub} and @code{gsub} functions
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).@refill
This feature of the language was never well documented until the
-@sc{POSIX} specification.
+@sc{posix} specification.
You may be wondering, when is
@@ -4437,13 +4519,17 @@ it is more efficient to use the @samp{/foo/} form: @code{awk} can note
that you have supplied a regexp and store it internally in a form that
makes pattern matching more efficient. In the second form, @code{awk}
must first convert the string into this internal form, and then perform
-the pattern matching. The first form is also better ``style;'' it is
-clear that you intend a regexp match.
+the pattern matching. The first form is also better style; it shows
+clearly that you intend a regexp match.
@node Variables, Arithmetic Ops, Constants, Expressions
@section Variables
@cindex variables, user-defined
@cindex user-defined variables
+@c there should be more than one subsection, ideally. Not a big deal.
+@c But usually there are supposed to be at least two. One way to get
+@c around this is to write the info in the subsection as the info in the
+@c section itself and not have any subsections.. --mew
Variables let you give names to values and refer to them later. You have
already seen variables in many of the examples. The name of a variable
@@ -4454,7 +4540,7 @@ are distinct variables.
A variable name is a valid expression by itself; it represents the
variable's current value. Variables are given new values with
@dfn{assignment operators} and @dfn{increment operators}.
-@xref{Assignment Ops}.
+@xref{Assignment Ops, ,Assignment Expressions}.
A few variables have special built-in meanings, such as @code{FS}, the
field separator, and @code{NF}, the number of fields in the current
@@ -4464,24 +4550,24 @@ variables, but their values are also used or changed automatically by
@code{awk}. Each built-in variable's name is made entirely of upper case
letters.
-Variables in @code{awk} can be assigned either numeric values or string
+Variables in @code{awk} can be assigned either numeric or string
values. By default, variables are initialized to the null string, which
-is effectively zero if converted to a number. So there is no need to
-``initialize'' each variable explicitly in @code{awk}, the way you would
-need to do in C or most other traditional programming languages.
+is effectively zero if converted to a number. There is no need to
+``initialize'' each variable explicitly in @code{awk}, the way you would in C or most other traditional languages.
@menu
-* Assignment Options:: Setting variables on the command line and a summary
- of command line syntax. This is an advanced method
- of input.
+* Assignment Options:: Setting variables on the command line
+ and a summary of command line syntax.
+ This is an advanced method of input.
@end menu
-@node Assignment Options,, Variables, Variables
+@node Assignment Options, , Variables, Variables
@subsection Assigning Variables on the Command Line
You can set any @code{awk} variable by including a @dfn{variable assignment}
among the arguments on the command line when you invoke @code{awk}
-(@pxref{Command Line}). Such an assignment has this form:
+(@pxref{Command Line, ,Invoking @code{awk}}). Such an assignment has
+this form:@refill
@example
@var{variable}=@var{text}
@@ -4523,7 +4609,7 @@ the @code{awk} program in an array named @code{ARGV}
(@pxref{Built-in Variables}).@refill
@code{awk} processes the values of command line assignments for escape
-sequences (@pxref{Constants}).
+sequences (@pxref{Constants, ,Constant Expressions}).
@node Arithmetic Ops, Concatenation, Variables, Expressions
@section Arithmetic Operators
@@ -4597,7 +4683,7 @@ may be machine dependent.
@itemx @var{x} ** @var{y}
Exponentiation: @var{x} raised to the @var{y} power. @code{2 ^ 3} has
the value 8. The character sequence @samp{**} is equivalent to
-@samp{^}. (The @sc{POSIX} standard only specifies the use of @samp{^}
+@samp{^}. (The @sc{posix} standard only specifies the use of @samp{^}
for exponentiation.)
@end table
@@ -4709,18 +4795,28 @@ True if array @var{array} has an element with the subscript @var{subscript}.
Comparison expressions have the value 1 if true and 0 if false.
The rules @code{gawk} uses for performing comparisons are based on those
-in draft 11.1 of the @sc{POSIX} standard. The @sc{POSIX} standard introduced
+in draft 11.2 of the @sc{posix} standard. The @sc{posix} standard introduced
the concept of a @dfn{numeric string}, which is simply a string that looks
like a number, for example, @code{@w{" +2"}}.
@vindex CONVFMT
When performing a relational operation, @code{gawk} considers the type of an
operand to be the type it received on its last @emph{assignment}, rather
-than the type of its last @emph{use}. If one operand of a comparison is
-numeric, and the other operand is either numeric or a numeric string,
-then @code{gawk} does a numeric comparison. Otherwise, it does a string
-comparison. The numeric operand will be converted to a string using
-the value of @code{CONVFMT} (@pxref{Conversion}). Strings are compared
+than the type of its last @emph{use}
+(@pxref{Values, ,Numeric and String Values}).
+This type is @emph{unknown} when the operand is from an ``external'' source:
+field variables, command line arguments, array elements resulting from a
+@code{split} operation, and the value of an @code{ENVIRON} element.
+In this case only, if the operand is a numeric string, then it is
+considered to be of both string type and numeric type. If at least one
+operand of a comparison is of string type only, then a string
+comparison is performed. Any numeric operand will be converted to a
+string using the value of @code{CONVFMT}
+(@pxref{Conversion, ,Conversion of Strings and Numbers}).
+If one operand of a comparison is numeric, and the other operand is
+either numeric or both numeric and string, then @code{gawk} does a
+numeric comparison. If both operands have both types, then the
+comparison is numeric. Strings are compared
by comparing the first character of each, then the second character of each,
and so on. Thus @code{"10"} is less than @code{"9"}. If there are two
strings where one is a prefix of the other, the shorter string is less than
@@ -4737,29 +4833,24 @@ numeric comparison (true)
string comparison (false)
@item 1.5 != " +2"
-numeric comparison (true)
+string comparison (true)
@item "1e2" < "3"
string comparison (true)
@item a = 2; b = "2"
@itemx a == b
-numeric comparison (true)
+string comparison (true)
@end table
-It is important to note that the concept of ``numeric string'' applies
-only to constants in the @code{awk} program source. Input data is somewhat
-different. In reality, all input data to @code{awk} is character data
-(as opposed to binary data). However, @code{awk} interprets characters in
-the input data that look like numbers @emph{as numbers}, and not as numeric
-strings. Thus,
-
@example
echo 1e2 3 | awk '@{ print ($1 < $2) ? "true" : "false" @}'
@end example
@noindent
-prints @samp{false}.
+prints @samp{false} since both @code{$1} and @code{$2} are numeric
+strings and thus have both string and numeric types, thus dictating
+a numeric comparison.
The purpose of the comparison rules and the use of numeric strings is
to attempt to produce the behavior that is ``least surprising,'' while
@@ -4786,7 +4877,7 @@ has the value 1 if the first field contains @samp{foo}, such as @samp{foobar}.
The right hand operand of the @samp{~} and @samp{!~} operators may be
either a constant regexp (@code{/@dots{}/}), or it may be an ordinary
expression, in which case the value of the expression as a string is a
-dynamic regexp (@pxref{Regexp Usage}).
+dynamic regexp (@pxref{Regexp Usage, ,How to Use Regular Expressions}).
@cindex regexp as expression
In very recent implementations of @code{awk}, a constant regular
@@ -4804,7 +4895,8 @@ regexp to avoid confusing the @code{gawk} parser. For example,
One special place where @code{/foo/} is @emph{not} an abbreviation for
@code{$0 ~ /foo/} is when it is the right-hand operand of @samp{~} or
-@samp{!~}! @xref{Constants}, where this is discussed in more detail.
+@samp{!~}! @xref{Constants, ,Constant Expressions}, where this is
+discussed in more detail.
@node Boolean Ops, Assignment Ops, Comparison Ops, Expressions
@section Boolean Expressions
@@ -4817,8 +4909,8 @@ One special place where @code{/foo/} is @emph{not} an abbreviation for
@cindex or operator
@cindex not operator
-A @dfn{boolean expression} is combination of comparison expressions or
-matching expressions, using the @dfn{boolean operators} ``or''
+A @dfn{boolean expression} is a combination of comparison expressions or
+matching expressions, using the boolean operators ``or''
(@samp{||}), ``and'' (@samp{&&}), and ``not'' (@samp{!}), along with
parentheses to control nesting. The truth of the boolean expression is
computed by combining the truth values of the component expressions.
@@ -4834,8 +4926,9 @@ you can use it as a pattern to control the execution of rules.
Here are descriptions of the three boolean operators, with an example of
each. It may be instructive to compare these examples with the
-analogous examples of boolean patterns (@pxref{Boolean Patterns}), which
-use the same boolean operators in patterns instead of expressions.
+analogous examples of boolean patterns
+(@pxref{Boolean Patterns, ,Boolean Operators and Patterns}), which
+use the same boolean operators in patterns instead of expressions.@refill
@table @code
@item @var{boolean1} && @var{boolean2}
@@ -4843,9 +4936,9 @@ True if both @var{boolean1} and @var{boolean2} are true. For example,
the following statement prints the current input record if it contains
both @samp{2400} and @samp{foo}.@refill
-@example
+@smallexample
if ($0 ~ /2400/ && $0 ~ /foo/) print
-@end example
+@end smallexample
The subexpression @var{boolean2} is evaluated only if @var{boolean1}
is true. This can make a difference when @var{boolean2} contains
@@ -4854,14 +4947,14 @@ expressions that have side effects: in the case of @code{$0 ~ /foo/ &&
no @samp{foo} in the record.
@item @var{boolean1} || @var{boolean2}
-True if at least one of @var{boolean1} and @var{boolean2} is true.
+True if at least one of @var{boolean1} or @var{boolean2} is true.
For example, the following command prints all records in the input
file @file{BBS-list} that contain @emph{either} @samp{2400} or
@samp{foo}, or both.@refill
-@example
+@smallexample
awk '@{ if ($0 ~ /2400/ || $0 ~ /foo/) print @}' BBS-list
-@end example
+@end smallexample
The subexpression @var{boolean2} is evaluated only if @var{boolean1}
is false. This can make a difference when @var{boolean2} contains
@@ -4872,9 +4965,9 @@ True if @var{boolean} is false. For example, the following program prints
all records in the input file @file{BBS-list} that do @emph{not} contain the
string @samp{foo}.
-@example
+@smallexample
awk '@{ if (! ($0 ~ /foo/)) print @}' BBS-list
-@end example
+@end smallexample
@end table
@node Assignment Ops, Increment Ops, Boolean Ops, Expressions
@@ -4919,9 +5012,11 @@ makes itself felt through the alteration of the variable. We call this
a @dfn{side effect}.
@cindex lvalue
-The left-hand operand of an assignment need not be a variable (@pxref{Variables});
-it can also be a field (@pxref{Changing Fields}) or
-an array element (@pxref{Arrays}). These are all called @dfn{lvalues},
+The left-hand operand of an assignment need not be a variable
+(@pxref{Variables}); it can also be a field
+(@pxref{Changing Fields, ,Changing the Contents of a Field}) or
+an array element (@pxref{Arrays, ,Arrays in @code{awk}}).
+These are all called @dfn{lvalues},
which means they can appear on the left-hand side of an assignment operator.
The right-hand operand may be any expression; it produces the new value
which the assignment stores in the specified variable, field or array
@@ -5007,7 +5102,7 @@ Sets @var{lvalue} to its remainder by @var{modulus}.
@item @var{lvalue} ^= @var{power}
@itemx @var{lvalue} **= @var{power}
Raises @var{lvalue} to the power @var{power}.
-(Only the @code{^=} operator is specified by @sc{POSIX}.)
+(Only the @code{^=} operator is specified by @sc{posix}.)
@end table
@ignore
@@ -5087,7 +5182,7 @@ decrements @var{lvalue}. The value of the expression is the @emph{old}
value of @var{lvalue}.
@end table
-@node Conversion, Conditional Exp, Increment Ops, Expressions
+@node Conversion, Values, Increment Ops, Expressions
@section Conversion of Strings and Numbers
@cindex conversion of strings and numbers
@@ -5113,17 +5208,19 @@ If, for some reason, you need to force a number to be converted to a
string, concatenate the null string with that number. To force a string
to be converted to a number, add zero to that string.
-Strings are converted to numbers by interpreting them as numerals:
-@code{"2.5"} converts to 2.5, and @code{"1e3"} converts to 1000.
+A string is converted to a number by interpreting a numeric prefix
+of the string as numerals:
+@code{"2.5"} converts to 2.5, @code{"1e3"} converts to 1000, and @code{"25fix"}
+has a numeric value of 25.
Strings that can't be interpreted as valid numbers are converted to
zero.
@vindex CONVFMT
The exact manner in which numbers are converted into strings is controlled
by the @code{awk} built-in variable @code{CONVFMT} (@pxref{Built-in Variables}).
-Numbers are converted using a special
-version of the @code{sprintf} function (@pxref{Built-in}) with @code{CONVFMT}
-as the format specifier.@refill
+Numbers are converted using a special version of the @code{sprintf} function
+(@pxref{Built-in, ,Built-in Functions}) with @code{CONVFMT} as the format
+specifier.@refill
@code{CONVFMT}'s default value is @code{"%.6g"}, which prints a value with
at least six significant digits. For some applications you will want to
@@ -5154,10 +5251,15 @@ the manual assumes everywhere that variables are either numbers or strings;
in fact both kinds of values may be valid. If both happen to be valid, a
conversion isn't necessary and isn't done. Revising the manual to be
consistent with this, though, is too big a job to tackle at the moment.
+
+7/92: This has sort of been done, only the section isn't completely right!
+ What to do?
+7/92: Pretty much fixed, at least for the short term, thanks to text
+ from David.
@end ignore
@vindex OFMT
-Prior to the @sc{POSIX} standard, @code{awk} specified that the value
+Prior to the @sc{posix} standard, @code{awk} specified that the value
of @code{OFMT} was used for converting numbers to strings. @code{OFMT}
specifies the output format to use when printing numbers with @code{print}.
@code{CONVFMT} was introduced in order to separate the semantics of
@@ -5168,7 +5270,74 @@ However, this use of @code{OFMT} is something to keep in mind if you must
port your program to other implementations of @code{awk}; we recommend
that instead of changing your programs, you just port @code{gawk} itself!@refill
-@node Conditional Exp, Function Calls, Conversion, Expressions
+@node Values, Conditional Exp, Conversion, Expressions
+@section Numeric and String Values
+@cindex conversion of strings and numbers
+
+Through most of this manual, we present @code{awk} values (such as constants,
+fields, or variables) as @emph{either} numbers @emph{or} strings. This is
+a convenient way to think about them, since typically they are used in only
+one way, or the other.
+
+In truth though, @code{awk} values can be @emph{both} string and
+numeric, at the same time. Internally, @code{awk} represents values
+with a string, a (floating point) number, and an indication that one,
+the other, or both representations of the value are valid.
+
+Keeping track of both kinds of values is important for execution
+efficiency: a variable can acquire a string value the first time it
+is used as a string, and then that string value can be used until the
+variable is assigned a new value. Thus, if a variable with only a numeric
+value is used in several concatenations in a row, it only has to be given
+a string representation once. The numeric value remains valid, so that
+no conversion back to a number is necessary if the variable is later used
+in an arithmetic expression.
+
+Tracking both kinds of values is also important for precise numerical
+calculations. Consider the following:
+
+@smallexample
+a = 123.321
+CONVFMT = "%3.1f"
+b = a " is a number"
+c = a + 1.654
+@end smallexample
+
+@noindent
+The variable @code{a} receives a string value in the concatenation and
+assignment to @code{b}. The string value of @code{a} is @code{"123.3"}.
+If the numeric value was lost when it was converted to a string, then the
+numeric use of @code{a} in the last statement would lose information.
+@code{c} would be assigned the value 124.954 instead of 124.975.
+Such errors accumulate rapidly, and very adversely affect numeric
+computations.@refill
+
+Once a numeric value acquires a corresponding string value, it stays valid
+until a new assignment is made. If @code{CONVFMT}
+(@pxref{Conversion, ,Conversion of Strings and Numbers}) changes in the
+meantime, the old string value will still be used. For example:@refill
+
+@smallexample
+BEGIN @{
+ CONVFMT = "%2.2f"
+ a = 123.456
+ b = a "" # force `a' to have string value too
+ printf "a = %s\n", a
+ CONVFMT = "%.6g"
+ printf "a = %s\n", a
+ a += 0 # make `a' numeric only again
+ printf "a = %s\n", a # use `a' as string
+@}
+@end smallexample
+
+@noindent
+This program prints @samp{a = 123.46} twice, and then prints
+@samp{a = 123.456}.
+
+@xref{Conversion, ,Conversion of Strings and Numbers}, for the rules that
+specify how string values are made from numeric values.
+
+@node Conditional Exp, Function Calls, Values, Expressions
@section Conditional Expressions
@cindex conditional expression
@cindex expression, conditional
@@ -5222,10 +5391,10 @@ example, the function @code{sqrt} computes the square root of a number.
A fixed set of functions are @dfn{built-in}, which means they are
available in every @code{awk} program. The @code{sqrt} function is one
-of these. @xref{Built-in}, for a list of built-in functions and their
-descriptions. In addition, you can define your own functions in the
-program for use elsewhere in the same program. @xref{User-defined},
-for how to do this.
+of these. @xref{Built-in, ,Built-in Functions}, for a list of built-in
+functions and their descriptions. In addition, you can define your own
+functions in the program for use elsewhere in the same program.
+@xref{User-defined, ,User-defined Functions}, for how to do this.@refill
@cindex arguments in function call
The way to use a function is with a @dfn{function call} expression,
@@ -5247,8 +5416,8 @@ open-parenthesis!} A user-defined function name looks just like the name of
a variable, and space would make the expression look like concatenation
of a variable with an expression inside parentheses. Space before the
parenthesis is harmless with built-in functions, but it is best not to get
-into the habit of using space, lest you do likewise for a user-defined
-function one day by mistake.
+into the habit of using space to avoid mistakes with user-defined
+functions.
Each function expects a particular number of arguments. For example, the
@code{sqrt} function must be called with a single argument, the number
@@ -5259,10 +5428,11 @@ sqrt(@var{argument})
@end example
Some of the built-in functions allow you to omit the final argument.
-If you do so, they use a reasonable default. @xref{Built-in},
-for full details. If arguments are omitted in calls to user-defined
-functions, then those arguments are treated as local variables,
-initialized to the null string (@pxref{User-defined}).
+If you do so, they use a reasonable default.
+@xref{Built-in, ,Built-in Functions}, for full details. If arguments
+are omitted in calls to user-defined functions, then those arguments are
+treated as local variables, initialized to the null string
+(@pxref{User-defined, ,User-defined Functions}).@refill
Like every other expression, the function call has a value, which is
computed by the function based on the arguments you give it. In this
@@ -5277,7 +5447,7 @@ square root of each one:
awk '@{ print "The square root of", $1, "is", sqrt($1) @}'
@end example
-@node Precedence,, Function Calls, Expressions
+@node Precedence, , Function Calls, Expressions
@section Operator Precedence (How Operators Nest)
@cindex precedence
@cindex operator precedence
@@ -5291,10 +5461,10 @@ product (i.e., @code{a + (b * c)}).
You can overrule the precedence of the operators by using parentheses.
You can think of the precedence rules as saying where the
parentheses are assumed if you do not write parentheses yourself. In
-fact, it is wise always to use parentheses whenever you have an unusual
+fact, it is wise to always use parentheses whenever you have an unusual
combination of operators, because other people who read the program may
not remember what the precedence is in this case. You might forget,
-too; then you could make a mistake. Explicit parentheses will prevent
+too; then you could make a mistake. Explicit parentheses will help prevent
any such mistake.
When operators of equal precedence are used together, the leftmost
@@ -5319,10 +5489,10 @@ precedence:
@item assignment
@samp{=}, @samp{+=}, @samp{-=}, @samp{*=}, @samp{/=}, @samp{%=},
@samp{^=}, @samp{**=}. These operators group right-to-left.
-(The @samp{**=} operator is not specified by @sc{POSIX}.)
+(The @samp{**=} operator is not specified by @sc{posix}.)
@item conditional
-@samp{?:}. These operators group right-to-left.
+@samp{?:}. This operator groups right-to-left.
@item logical ``or''.
@samp{||}.
@@ -5370,7 +5540,7 @@ The operands are simply written side by side.
@item exponentiation
@samp{^}, @samp{**}. These operators group right-to-left.
-(The @samp{**} operator is not specified by @sc{POSIX}.)
+(The @samp{**} operator is not specified by @sc{posix}.)
@item increment, decrement
@samp{++}, @samp{--}.
@@ -5399,23 +5569,19 @@ single compound statement with curly braces, separating them with
newlines or semicolons.
@menu
-* If Statement:: Conditionally execute some @code{awk} statements.
-
-* While Statement:: Loop until some condition is satisfied.
-
-* Do Statement:: Do specified action while looping until some
- condition is satisfied.
-
-* For Statement:: Another looping statement, that provides
- initialization and increment clauses.
-
-* Break Statement:: Immediately exit the innermost enclosing loop.
-
-* Continue Statement:: Skip to the end of the innermost enclosing loop.
-
-* Next Statement:: Stop processing the current input record.
-
-* Exit Statement:: Stop execution of @code{awk}.
+* If Statement:: Conditionally execute
+ some @code{awk} statements.
+* While Statement:: Loop until some condition is satisfied.
+* Do Statement:: Do specified action while looping until some
+ condition is satisfied.
+* For Statement:: Another looping statement, that provides
+ initialization and increment clauses.
+* Break Statement:: Immediately exit the innermost enclosing loop.
+* Continue Statement:: Skip to the end of the innermost
+ enclosing loop.
+* Next Statement:: Stop processing the current input record.
+* Next File Statement:: Stop processing the current file.
+* Exit Statement:: Stop execution of @code{awk}.
@end menu
@node If Statement, While Statement, Statements, Statements
@@ -5430,12 +5596,12 @@ if (@var{condition}) @var{then-body} @r{[}else @var{else-body}@r{]}
@end example
@noindent
-Here @var{condition} is an expression that controls what the rest of the
+@var{condition} is an expression that controls what the rest of the
statement will do. If @var{condition} is true, @var{then-body} is
executed; otherwise, @var{else-body} is executed (assuming that the
@code{else} clause is present). The @code{else} part of the statement is
optional. The condition is considered false if its value is zero or
-the null string, true otherwise.@refill
+the null string, and true otherwise.@refill
Here is an example:
@@ -5456,12 +5622,10 @@ If the @code{else} appears on the same line as @var{then-body}, and
curly braces), then a semicolon must separate @var{then-body} from
@code{else}. To illustrate this, let's rewrite the previous example:
-@group
@example
awk '@{ if (x % 2 == 0) print "x is even"; else
print "x is odd" @}'
@end example
-@end group
@noindent
If you forget the @samp{;}, @code{awk} won't be able to parse the
@@ -5496,7 +5660,7 @@ keeps running.
The first thing the @code{while} statement does is test @var{condition}.
If @var{condition} is true, it executes the statement @var{body}.
-(Truth, as usual in @code{awk}, means that the value of @var{condition}
+(@var{condition} is true when the value
is not zero and not a null string.) After @var{body} has been executed,
@var{condition} is tested again, and if it is still true, @var{body} is
executed again. This process repeats until @var{condition} is no longer
@@ -5537,13 +5701,11 @@ The @code{do} loop is a variation of the @code{while} looping statement.
The @code{do} loop executes the @var{body} once, then repeats @var{body}
as long as @var{condition} is true. It looks like this:
-@group
@example
do
@var{body}
while (@var{condition})
@end example
-@end group
Even if @var{condition} is false at the start, @var{body} is executed at
least once (and only once, unless executing @var{body} makes
@@ -5598,9 +5760,11 @@ compares it against the desired number of iterations.
Here is an example of a @code{for} statement:
@example
+@group
awk '@{ for (i = 1; i <= 3; i++)
print $i
@}'
+@end group
@end example
@noindent
@@ -5650,9 +5814,10 @@ while (@var{condition}) @{
@noindent
The only exception is when the @code{continue} statement
-(@pxref{Continue Statement}) is used inside the loop; changing a
-@code{for} statement to a @code{while} statement in this way can change
-the effect of the @code{continue} statement inside the loop.
+(@pxref{Continue Statement, ,The @code{continue} Statement}) is used
+inside the loop; changing a @code{for} statement to a @code{while}
+statement in this way can change the effect of the @code{continue}
+statement inside the loop.@refill
There is an alternate version of the @code{for} loop, for iterating over
all the indices of an array:
@@ -5663,7 +5828,8 @@ for (i in array)
@end example
@noindent
-@xref{Arrays}, for more information on this version of the @code{for} loop.
+@xref{Arrays, ,Arrays in @code{awk}}, for more information on this
+version of the @code{for} loop.
The @code{awk} language has a @code{for} statement in addition to a
@code{while} statement because often a @code{for} loop is both less work to
@@ -5699,14 +5865,15 @@ When the remainder is zero in the first @code{if} statement, @code{awk}
immediately @dfn{breaks out} of the containing @code{for} loop. This means
that @code{awk} proceeds immediately to the statement following the loop
and continues processing. (This is very different from the @code{exit}
-statement (@pxref{Exit Statement}) which stops the entire @code{awk}
-program.)@refill
+statement which stops the entire @code{awk} program.
+@xref{Exit Statement, ,The @code{exit} Statement}.)@refill
Here is another program equivalent to the previous one. It illustrates how
the @var{condition} of a @code{for} or @code{while} could just as well be
replaced with a @code{break} inside an @code{if}:
@example
+@group
awk '# find smallest divisor of num
@{ num = $1
for (div = 2; ; div++) @{
@@ -5720,6 +5887,7 @@ awk '# find smallest divisor of num
@}
@}
@}'
+@end group
@end example
@node Continue Statement, Next Statement, Break Statement, Statements
@@ -5752,7 +5920,7 @@ If one of the input records contains the string @samp{ignore}, this
example skips the print statement for that record, and continues back to
the first statement in the loop.
-This isn't a practical example of @code{continue}, since it would be
+This is not a practical example of @code{continue}, since it would be
just as easy to write the loop like this:
@example
@@ -5811,7 +5979,18 @@ awk 'BEGIN @{
@noindent
This program loops forever once @code{x} gets to 5.
-@node Next Statement, Exit Statement, Continue Statement, Statements
+As described above, the @code{continue} statement has no meaning when
+used outside the body of a loop. However, although it was never documented,
+historical implementations of @code{awk} have treated the @code{continue}
+statement outside of a loop as if it were a @code{next} statement
+(@pxref{Next Statement, ,The @code{next} Statement}).
+By default, @code{gawk} silently supports this usage. However, if
+@samp{-W posix} has been specified on the command line
+(@pxref{Command Line, ,Invoking @code{awk}}),
+it will be treated as an error, since the @sc{posix} standard specifies
+that @code{continue} should only be used inside the body of a loop.@refill
+
+@node Next Statement, Next File Statement, Continue Statement, Statements
@section The @code{next} Statement
@cindex @code{next} statement
@@ -5821,9 +6000,10 @@ further rules are executed for the current record. The rest of the
current rule's action is not executed either.
Contrast this with the effect of the @code{getline} function
-(@pxref{Getline}). That too causes @code{awk} to read the next record
-immediately, but it does not alter the flow of control in any way. So
-the rest of the current action executes with a new input record.
+(@pxref{Getline, ,Explicit Input with @code{getline}}). That too causes
+@code{awk} to read the next record immediately, but it does not alter the
+flow of control in any way. So the rest of the current action executes
+with a new input record.
At the highest level, @code{awk} program execution is a loop that reads
an input record and then tests each rule's pattern against it. If you
@@ -5836,27 +6016,88 @@ For example, if your @code{awk} program works only on records with four
fields, and you don't want it to fail when given bad input, you might
use this rule near the beginning of the program:
-@example
+@smallexample
NF != 4 @{
printf("line %d skipped: doesn't have 4 fields", FNR) > "/dev/stderr"
next
@}
-@end example
+@end smallexample
@noindent
so that the following rules will not see the bad record. The error
message is redirected to the standard error output stream, as error
-messages should be. @xref{Special Files}.
+messages should be. @xref{Special Files, ,Standard I/O Streams}.
-According to the @sc{POSIX} standard, the behavior is undefined if
+According to the @sc{posix} standard, the behavior is undefined if
the @code{next} statement is used in a @code{BEGIN} or @code{END} rule.
@code{gawk} will treat it as a syntax error.
If the @code{next} statement causes the end of the input to be reached,
then the code in the @code{END} rules, if any, will be executed.
-@ref{BEGIN/END}.
+@xref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}.
+
+@node Next File Statement, Exit Statement, Next Statement, Statements
+@section The @code{next file} Statement
+
+@cindex @code{next file} statement
+The @code{next file} statement is similar to the @code{next} statement.
+However, instead of abandoning processing of the current record, the
+@code{next file} statement instructs @code{awk} to stop processing the
+current data file.
-@node Exit Statement, , Next Statement, Statements
+Upon execution of the @code{next file} statement, @code{FILENAME} is
+updated to the name of the next data file listed on the command line,
+@code{FNR} is reset to 1, and processing starts over with the first
+rule in the progam. @xref{Built-in Variables}.
+
+If the @code{next file} statement causes the end of the input to be reached,
+then the code in the @code{END} rules, if any, will be executed.
+@xref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}.
+
+The @code{next file} statement is a @code{gawk} extension; it is not
+(currently) available in any other @code{awk} implementation. You can
+simulate its behavior by creating a library file named @file{nextfile.awk},
+with the following contents. (This sample program uses user-defined
+functions, a feature that has not been presented yet.
+@xref{User-defined, ,User-defined Functions},
+for more information.)@refill
+
+@example
+# nextfile --- function to skip remaining records in current file
+
+# this should be read in before the "main" awk program
+
+function nextfile() @{ _abandon_ = FILENAME; next @}
+
+_abandon_ == FILENAME && FNR > 1 @{ next @}
+_abandon_ == FILENAME && FNR == 1 @{ _abandon_ = "" @}
+@end example
+
+The @code{nextfile} function simply sets a ``private'' variable@footnote{Since
+all variables in @code{awk} are global, this program uses the common
+practice of prefixing the variable name with an underscore. In fact, it
+also suffixes the variable name with an underscore, as extra insurance
+against using a variable name that might be used in some other library
+file.} to the name of the current data file, and then retrieves the next
+record. Since this file is read before the main @code{awk} program,
+the rules that follows the function definition will be executed before the
+rules in the main program. The first rule continues to skip records as long as
+the name of the input file has not changed, and this is not the first
+record in the file. This rule is sufficient most of the time. But what if
+the @emph{same} data file is named twice in a row on the command line?
+This rule would not process the data file the second time. The second rule
+catches this case: If the data file name is what was being skipped, but
+@code{FNR} is 1, then this is the second time the file is being processed,
+and it should not be skipped.
+
+The @code{next file} statement would be useful if you have many data
+files to process, and due to the nature of the data, you expect that you
+would not want to process every record in the file. In order to move on to
+the next data file, you would have to continue scanning the unwanted
+records (as described above). The @code{next file} statement accomplishes
+this much more efficiently.
+
+@node Exit Statement, , Next File Statement, Statements
@section The @code{exit} Statement
@cindex @code{exit} statement
@@ -5867,7 +6108,7 @@ is ignored.@refill
If an @code{exit} statement is executed from a @code{BEGIN} rule the
program stops processing everything immediately. No input records are
read. However, if an @code{END} rule is present, it is executed
-(@pxref{BEGIN/END}).
+(@pxref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}).
If @code{exit} is used as part of an @code{END} rule, it causes
the program to stop immediately.
@@ -5875,7 +6116,7 @@ the program to stop immediately.
An @code{exit} statement that is part of an ordinary rule (that is, not part
of a @code{BEGIN} or @code{END} rule) stops the execution of any further
automatic rules, but the @code{END} rule is executed if there is one.
-If you don't want the @code{END} rule to do its job in this case, you
+If you do not want the @code{END} rule to do its job in this case, you
can set a variable to nonzero before the @code{exit} statement, and check
that variable in the @code{END} rule.
@@ -5890,38 +6131,38 @@ using an @code{exit} statement with a nonzero argument. Here's an
example of this:@refill
@example
+@group
BEGIN @{
if (("date" | getline date_now) < 0) @{
print "Can't get system date" > "/dev/stderr"
exit 4
@}
@}
+@end group
@end example
@node Arrays, Built-in, Statements, Top
@chapter Arrays in @code{awk}
An @dfn{array} is a table of values, called @dfn{elements}. The
-elements of an array are distinguished by their @dfn{indices}. Indices
+elements of an array are distinguished by their indices. @dfn{Indices}
may be either numbers or strings. Each array has a name, which looks
like a variable name, but must not be in use as a variable name in the
same @code{awk} program.
@menu
-* Intro: Array Intro. Basic facts about arrays in @code{awk}.
-* Reference to Elements:: How to examine one element of an array.
-* Assigning Elements:: How to change an element of an array.
-* Example: Array Example. Sample program explained.
-
-* Scanning an Array:: A variation of the @code{for} statement. It loops
- through the indices of an array's existing elements.
-
-* Delete:: The @code{delete} statement removes an element from an array.
-
-* Numeric Array Subscripts:: How to use numbers as subscripts in @code{awk}.
-
-* Multi-dimensional:: Emulating multi-dimensional arrays in @code{awk}.
-* Multi-scanning:: Scanning multi-dimensional arrays.
+* Array Intro:: Introduction to Arrays
+* Reference to Elements:: How to examine one element of an array.
+* Assigning Elements:: How to change an element of an array.
+* Array Example:: Basic Example of an Array
+* Scanning an Array:: A variation of the @code{for} statement.
+ It loops through the indices of
+ an array's existing elements.
+* Delete:: The @code{delete} statement removes
+ an element from an array.
+* Numeric Array Subscripts:: How to use numbers as subscripts in @code{awk}.
+* Multi-dimensional:: Emulating multi-dimensional arrays in @code{awk}.
+* Multi-scanning:: Scanning multi-dimensional arrays.
@end menu
@node Array Intro, Reference to Elements, Arrays, Arrays
@@ -5939,7 +6180,7 @@ as a variable) in one @code{awk} program.
Arrays in @code{awk} superficially resemble arrays in other programming
languages; but there are fundamental differences. In @code{awk}, you
don't need to specify the size of an array before you start to use it.
-What's more, in @code{awk} any number or string may be used as an
+Additionally, any number or string in @code{awk} may be used as an
array index.
In most other languages, you have to @dfn{declare} an array and specify
@@ -5952,8 +6193,9 @@ specifies the second element, which is stored in memory right after the
first element, and so on. It is impossible to add more elements to the
array, because it has room for only as many elements as you declared.
-A contiguous array of four elements might look like this, conceptually,
-if the element values are 8, @code{"foo"}, @code{""} and 30:@refill
+A contiguous array of four elements might look like this,
+conceptually, if the element values are @code{8}, @code{"foo"},
+@code{""} and @code{30}:@refill
@example
+---------+---------+--------+---------+
@@ -5964,7 +6206,7 @@ if the element values are 8, @code{"foo"}, @code{""} and 30:@refill
@noindent
Only the values are stored; the indices are implicit from the order of
-the values. 8 is the value at index 0, because 8 appears in the
+the values. @code{8} is the value at index 0, because @code{8} appears in the
position with 0 elements before it.
@cindex arrays, definition of
@@ -6017,8 +6259,9 @@ numeric form---thus illustrating that a single array can have both
numbers and strings as indices.
When @code{awk} creates an array for you, e.g., with the @code{split}
-built-in function (@pxref{String Functions}), that array's indices
-are consecutive integers starting at 1.
+built-in function,
+that array's indices are consecutive integers starting at 1.
+(@xref{String Functions, ,Built-in Functions for String Manipulation}.)
@node Reference to Elements, Assigning Elements, Array Intro, Arrays
@section Referring to an Array Element
@@ -6034,7 +6277,7 @@ An array reference is an expression which looks like this:
@end example
@noindent
-Here @var{array} is the name of an array. The expression @var{index} is
+Here, @var{array} is the name of an array. The expression @var{index} is
the index of the element of the array that you want.
The value of the array reference is the current value of that array
@@ -6044,11 +6287,12 @@ of array @code{foo} at index 4.3.
If you refer to an array element that has no recorded value, the value
of the reference is @code{""}, the null string. This includes elements
to which you have not assigned any value, and elements that have been
-deleted (@pxref{Delete}). Such a reference automatically creates that
-array element, with the null string as its value. (In some cases,
-this is unfortunate, because it might waste memory inside @code{awk}).
+deleted (@pxref{Delete, ,The @code{delete} Statement}). Such a reference
+automatically creates that array element, with the null string as its value.
+(In some cases, this is unfortunate, because it might waste memory inside
+@code{awk}).
-@cindex arrays, determining presence of elements
+@cindex arrays, presence of elements
You can find out if an element exists in an array at a certain index with
the expression:
@@ -6065,9 +6309,9 @@ exists, and 0 (false) if it does not exist.@refill
For example, to test whether the array @code{frequencies} contains the
index @code{"2"}, you could write this statement:@refill
-@example
+@smallexample
if ("2" in frequencies) print "Subscript \"2\" is present."
-@end example
+@end smallexample
Note that this is @emph{not} a test of whether or not the array
@code{frequencies} contains an element whose @emph{value} is @code{"2"}.
@@ -6075,9 +6319,9 @@ Note that this is @emph{not} a test of whether or not the array
@emph{does not} create @code{frequencies["2"]}, while the following
(incorrect) alternative would do so:@refill
-@example
+@smallexample
if (frequencies["2"] != "") print "Subscript \"2\" is present."
-@end example
+@end smallexample
@node Assigning Elements, Array Example, Reference to Elements, Arrays
@section Assigning Array Elements
@@ -6164,7 +6408,7 @@ END @{
@end example
@node Scanning an Array, Delete, Array Example, Arrays
-@section Scanning All Elements of an Array
+@section Scanning all Elements of an Array
@cindex @code{for (x in @dots{})}
@cindex arrays, special @code{for} statement
@cindex scanning an array
@@ -6194,10 +6438,10 @@ least once) in the input, by storing a 1 into the array @code{used} with
the word as index. The second rule scans the elements of @code{used} to
find all the distinct words that appear in the input. It prints each
word that is more than 10 characters long, and also prints the number of
-such words. @xref{Built-in}, for more information on the built-in
-function @code{length}.
+such words. @xref{Built-in, ,Built-in Functions}, for more information
+on the built-in function @code{length}.
-@example
+@smallexample
# Record a 1 for each word that is used at least once.
@{
for (i = 1; i <= NF; i++)
@@ -6213,7 +6457,7 @@ END @{
@}
print num_long_words, "words longer than 10 characters"
@}
-@end example
+@end smallexample
@noindent
@xref{Sample Program}, for a more detailed example of this type.
@@ -6240,9 +6484,10 @@ statement:
delete @var{array}[@var{index}]
@end example
-When an array element is deleted, it is as if you had never referred to it
-and had never given it any value. Any value the element formerly had
-can no longer be obtained.
+You can not refer to an array element after it has been deleted;
+it is as if you had never referred
+to it and had never given it any value. You can no longer obtain any
+value the element once had.
Here is an example of deleting elements in an array:
@@ -6272,7 +6517,7 @@ It is not an error to delete an element which does not exist.
An important aspect of arrays to remember is that array subscripts
are @emph{always} strings. If you use a numeric value as a subscript,
it will be converted to a string value before it is used for subscripting
-(@pxref{Conversion}).
+(@pxref{Conversion, ,Conversion of Strings and Numbers}).
@cindex conversions, during subscripting
@cindex numbers, used as subscripts
@@ -6301,9 +6546,10 @@ to be converted to a string, this time @code{"12.15"}, since the value of
@code{CONVFMT} only allows two significant digits. This test fails,
since @code{"12.15"} is a different string from @code{"12.153"}.@refill
-Following the rules for conversions (@pxref{Conversion}), integer
+According to the rules for conversions
+(@pxref{Conversion, ,Conversion of Strings and Numbers}), integer
values are always converted to strings as integers, no matter what the
-value of @code{CONVFMT} may happen to be. So the usual case of
+value of @code{CONVFMT} may happen to be. So the usual case of@refill
@example
for (i = 1; i <= maxsub; i++)
@@ -6321,7 +6567,8 @@ effect on your programs.
@node Multi-dimensional, Multi-scanning, Numeric Array Subscripts, Arrays
@section Multi-dimensional Arrays
-@cindex subscripts, multi-dimensional in arrays
+@c the following index entry is an overfull hbox. --mew 30jan1992
+@cindex subscripts in arrays
@cindex arrays, multi-dimensional subscripts
@cindex multi-dimensional subscripts
A multi-dimensional array is an array in which an element is identified
@@ -6334,17 +6581,18 @@ two-dimensional array named @code{grid} is with
@vindex SUBSEP
Multi-dimensional arrays are supported in @code{awk} through
concatenation of indices into one string. What happens is that
-@code{awk} converts the indices into strings (@pxref{Conversion}) and
+@code{awk} converts the indices into strings
+(@pxref{Conversion, ,Conversion of Strings and Numbers}) and
concatenates them together, with a separator between them. This creates
a single string that describes the values of the separate indices. The
combined string is used as a single index into an ordinary,
one-dimensional array. The separator used is the value of the built-in
-variable @code{SUBSEP}.
+variable @code{SUBSEP}.@refill
For example, suppose we evaluate the expression @code{foo[5,12]="value"}
when the value of @code{SUBSEP} is @code{"@@"}. The numbers 5 and 12 are
converted to strings and
-concatenated with a comma between them, yielding @code{"5@@12"}; thus,
+concatenated with an @samp{@@} between them, yielding @code{"5@@12"}; thus,
the array element @code{foo["5@@12"]} is set to @code{"value"}.@refill
Once the element's value is stored, @code{awk} has no record of whether
@@ -6403,25 +6651,29 @@ END @{
When given the input:
@example
+@group
1 2 3 4 5 6
2 3 4 5 6 1
3 4 5 6 1 2
4 5 6 1 2 3
+@end group
@end example
@noindent
it produces:
@example
+@group
4 3 2 1
5 4 3 2
6 5 4 3
1 6 5 4
2 1 6 5
3 2 1 6
+@end group
@end example
-@node Multi-scanning, , Multi-dimensional, Arrays
+@node Multi-scanning, , Multi-dimensional, Arrays
@section Scanning Multi-dimensional Arrays
There is no special @code{for} statement for scanning a
@@ -6431,9 +6683,11 @@ multi-dimensional @emph{way of accessing} an array.
However, if your program has an array that is always accessed as
multi-dimensional, you can get the effect of scanning it by combining
-the scanning @code{for} statement (@pxref{Scanning an Array}) with the
-@code{split} built-in function (@pxref{String Functions}). It works
-like this:
+the scanning @code{for} statement
+(@pxref{Scanning an Array, ,Scanning all Elements of an Array}) with the
+@code{split} built-in function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+It works like this:@refill
@example
for (combined in @var{array}) @{
@@ -6473,20 +6727,16 @@ been recovered.
your @code{awk} program to call. This chapter defines all the built-in
functions in @code{awk}; some of them are mentioned in other sections,
but they are summarized here for your convenience. (You can also define
-new functions yourself. @xref{User-defined}.)
+new functions yourself. @xref{User-defined, ,User-defined Functions}.)
@menu
-* Calling Built-in:: How to call built-in functions.
-
-* Numeric Functions:: Functions that work with numbers,
- including @code{int}, @code{sin} and @code{rand}.
-
-* String Functions:: Functions for string manipulation,
- such as @code{split}, @code{match}, and @code{sprintf}.
-
-* I/O Functions:: Functions for files and shell commands.
-
-* Time Functions:: Functions for dealing with time stamps.
+* Calling Built-in:: How to call built-in functions.
+* Numeric Functions:: Functions that work with numbers,
+ including @code{int}, @code{sin} and @code{rand}.
+* String Functions:: Functions for string manipulation,
+ such as @code{split}, @code{match}, and @code{sprintf}.
+* I/O Functions:: Functions for files and shell commands.
+* Time Functions:: Functions for dealing with time stamps.
@end menu
@node Calling Built-in, Numeric Functions, Built-in, Built-in
@@ -6522,6 +6772,8 @@ with a value of 4 for its actual parameter.
@node Numeric Functions, String Functions, Calling Built-in, Built-in
@section Numeric Built-in Functions
+@c I didn't make all the examples small because a couple of them were
+@c short already. --mew 29jan1992
Here is a full list of built-in functions that work with numbers:
@@ -6578,7 +6830,7 @@ Here is an example where a similar function is used to produce
random integers between 1 and @var{n}. Note that this program will
print a new random number for each input record.
-@example
+@smallexample
awk '
# Function to roll a simulated die.
function roll(n) @{ return 1 + int(rand() * n) @}
@@ -6587,7 +6839,7 @@ function roll(n) @{ return 1 + int(rand() * n) @}
@{
printf("%d points\n", roll(6)+roll(6)+roll(6))
@}'
-@end example
+@end smallexample
@strong{Note:} @code{rand} starts generating numbers from the same
point, or @dfn{seed}, each time you run @code{awk}. This means that
@@ -6628,9 +6880,9 @@ This searches the string @var{in} for the first occurrence of the string
@var{find}, and returns the position in characters where that occurrence
begins in the string @var{in}. For example:@refill
-@example
+@smallexample
awk 'BEGIN @{ print index("peanut", "an") @}'
-@end example
+@end smallexample
@noindent
prints @samp{3}. If @var{find} is not found, @code{index} returns 0.
@@ -6648,9 +6900,11 @@ three characters.
If no argument is supplied, @code{length} returns the length of @code{$0}.
In older versions of @code{awk}, you could call the @code{length} function
-without any parentheses. However, this is not allowed by the @sc{POSIX}
-specification, and for maximal portability of your @code{awk} programs
-you should always supply the parentheses.
+without any parentheses. Doing so is marked as ``deprecated'' in the
+@sc{posix} standard. This means that while you can do this in your
+programs, it is a feature that can eventually be removed from a future
+version of the standard. Therefore, for maximal portability of your
+@code{awk} programs you should always supply the parentheses.
@item match(@var{string}, @var{regexp})
@findex match
@@ -6669,7 +6923,7 @@ length in characters of the matched substring. If no match is found,
For example:
-@example
+@smallexample
awk '@{
if ($1 == "FIND")
regex = $2
@@ -6679,7 +6933,7 @@ awk '@{
print "Match of", regex, "found at", where, "in", $0
@}
@}'
-@end example
+@end smallexample
@noindent
This program looks for lines that match the regular expression stored in
@@ -6687,7 +6941,7 @@ the variable @code{regex}. This regular expression can be changed. If the
first word on a line is @samp{FIND}, @code{regex} is changed to be the
second word on that line. Therefore, given:
-@example
+@smallexample
FIND fo*bar
My program was a foobar
But none of it would doobar
@@ -6695,19 +6949,19 @@ FIND Melvin
JF+KM
This line is property of The Reality Engineering Co.
This file created by Melvin.
-@end example
+@end smallexample
@noindent
@code{awk} prints:
-@example
+@smallexample
Match of fo*bar found at 18 in My program was a foobar
Match of Melvin found at 26 in This file created by Melvin.
-@end example
+@end smallexample
@item split(@var{string}, @var{array}, @var{fieldsep})
@findex split
-This divides @var{string} up into pieces separated by @var{fieldsep},
+This divides @var{string} into pieces separated by @var{fieldsep},
and stores the pieces in @var{array}. The first piece is stored in
@code{@var{array}[1]}, the second piece in @code{@var{array}[2]}, and so
forth. The string value of the third argument, @var{fieldsep}, is
@@ -6719,19 +6973,19 @@ the @var{fieldsep} is omitted, the value of @code{FS} is used.
The @code{split} function, then, splits strings into pieces in a
manner similar to the way input lines are split into fields. For example:
-@example
+@smallexample
split("auto-da-fe", a, "-")
-@end example
+@end smallexample
@noindent
splits the string @samp{auto-da-fe} into three fields using @samp{-} as the
separator. It sets the contents of the array @code{a} as follows:
-@example
+@smallexample
a[1] = "auto"
a[2] = "da"
a[3] = "fe"
-@end example
+@end smallexample
@noindent
The value returned by this call to @code{split} is 3.
@@ -6743,12 +6997,13 @@ are separated by runs of whitespace.
@item sprintf(@var{format}, @var{expression1},@dots{})
@findex sprintf
This returns (without printing) the string that @code{printf} would
-have printed out with the same arguments (@pxref{Printf}). For
-example:
+have printed out with the same arguments
+(@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing}).
+For example:@refill
-@example
+@smallexample
sprintf("pi = %.2f (approx.)", 22/7)
-@end example
+@end smallexample
@noindent
returns the string @w{@code{"pi = 3.14 (approx.)"}}.
@@ -6770,10 +7025,10 @@ default is to use and alter @code{$0}.
For example:@refill
-@example
+@smallexample
str = "water, water, everywhere"
sub(/at/, "ith", str)
-@end example
+@end smallexample
@noindent
sets @code{str} to @w{@code{"wither, water, everywhere"}}, by replacing the
@@ -6787,9 +7042,9 @@ stands for the precise substring that was matched by @var{regexp}. (If
the regexp can match more than one string, then this precise substring
may vary.) For example:@refill
-@example
+@smallexample
awk '@{ sub(/candidate/, "& and his wife"); print @}'
-@end example
+@end smallexample
@noindent
changes the first occurrence of @samp{candidate} to @samp{candidate
@@ -6797,13 +7052,13 @@ and his wife} on each input line.
Here is another example:
-@example
+@smallexample
awk 'BEGIN @{
str = "daabaaa"
sub(/a*/, "c&c", str)
print str
@}'
-@end example
+@end smallexample
@noindent
prints @samp{dcaacbaaa}. This show how @samp{&} can represent a non-constant
@@ -6816,9 +7071,9 @@ in a string constant to include a literal @samp{&} in the replacement.
For example, here is how to replace the first @samp{|} on each line with
an @samp{&}:@refill
-@example
+@smallexample
awk '@{ sub(/\|/, "\\&"); print @}'
-@end example
+@end smallexample
@strong{Note:} as mentioned above, the third argument to @code{sub} must
be an lvalue. Some versions of @code{awk} allow the third argument to
@@ -6828,9 +7083,9 @@ the substitution (if any) would be thrown away because there is no place
to put it. Such versions of @code{awk} accept expressions like
this:@refill
-@example
+@smallexample
sub(/USA/, "United States", "the USA and Canada")
-@end example
+@end smallexample
@noindent
But that is considered erroneous in @code{gawk}.
@@ -6842,9 +7097,9 @@ This is similar to the @code{sub} function, except @code{gsub} replaces
substrings it can find. The @samp{g} in @code{gsub} stands for
``global,'' which means replace everywhere. For example:@refill
-@example
+@smallexample
awk '@{ gsub(/Britain/, "United Kingdom"); print @}'
-@end example
+@end smallexample
@noindent
replaces all occurrences of the string @samp{Britain} with @samp{United
@@ -6886,7 +7141,7 @@ Nonalphabetic characters are left unchanged. For example,
@end table
@node I/O Functions, Time Functions, String Functions, Built-in
-@section Built-in Functions For Input/Output
+@section Built-in Functions for Input/Output
@table @code
@item close(@var{filename})
@@ -6894,12 +7149,14 @@ Close the file @var{filename}, for input or output. The argument may
alternatively be a shell command that was used for redirecting to or
from a pipe; then the pipe is closed.
-@xref{Close Input}, regarding closing input files and pipes.
-@xref{Close Output}, regarding closing output files and pipes.
+@xref{Close Input, ,Closing Input Files and Pipes}, regarding closing
+input files and pipes. @xref{Close Output, ,Closing Output Files and Pipes},
+regarding closing output files and pipes.@refill
@item system(@var{command})
@findex system
-@cindex interaction of @code{awk} with other programs
+@c the following index entry is an overfull hbox. --mew 30jan1992
+@cindex interaction, @code{awk} and other programs
The system function allows the user to execute operating system commands
and then return to the @code{awk} program. The @code{system} function
executes the command given by the string @var{command}. It returns, as
@@ -6908,11 +7165,11 @@ its value, the status returned by the command that was executed.
For example, if the following fragment of code is put in your @code{awk}
program:
-@example
+@smallexample
END @{
system("mail -s 'awk run done' operator < /dev/null")
@}
-@end example
+@end smallexample
@noindent
the system operator will be sent mail when the @code{awk} program
@@ -6927,8 +7184,58 @@ Some operating systems cannot implement the @code{system} function.
@code{system} causes a fatal error if it is not supported.
@end table
+@c fakenode --- for prepinfo
+@subheading Controlling Output Buffering with @code{system}
+@cindex flushing buffers
+@cindex buffers, flushing
+@cindex buffering output
+@cindex output, buffering
+
+Many utility programs will @dfn{buffer} their output; they save information
+to be written to a disk file or terminal in memory, until there is enough
+to be written in one operation. This is often more efficient than writing
+every little bit of information as soon as it is ready. However, sometimes
+it is necessary to force a program to @dfn{flush} its buffers; that is,
+write the information to its destination, even if a buffer is not full.
+You can do this from your @code{awk} program by calling @code{system}
+with a null string as its argument:
+
+@example
+system("") # flush output
+@end example
+
+@noindent
+@code{gawk} treats this use of the @code{system} function as a special
+case, and is smart enough not to run a shell (or other command
+interpreter) with the empty command. Therefore, with @code{gawk}, this
+idiom is not only useful, it is efficient. While this idiom should work
+with other @code{awk} implementations, it will not necessarily avoid
+starting an unnecessary shell.
+@ignore
+Need a better explanation, perhaps in a separate paragraph. Explain that
+for
+
+awk 'BEGIN { print "hi"
+ system("echo hello")
+ print "howdy" }'
+
+that the output had better be
+
+ hi
+ hello
+ howdy
+
+and not
+
+ hello
+ hi
+ howdy
+
+which it would be if awk did not flush its buffers before calling system.
+@end ignore
+
@node Time Functions, , I/O Functions, Built-in
-@section Functions For Dealing With Time Stamps
+@section Functions for Dealing with Time Stamps
@cindex time stamps
@cindex time of day
@@ -6936,27 +7243,27 @@ A common use for @code{awk} programs is the processing of log files.
Log files often contain time stamp information, indicating when a
particular log record was written. Many programs log their time stamp
in the form returned by the @code{time} system call, which is the
-number of seconds since a particular epoch. On @sc{POSIX} systems,
-it is the number of seconds since Midnight, January 1, 1970, UTC.
+number of seconds since a particular epoch. On @sc{posix} systems,
+it is the number of seconds since Midnight, January 1, 1970, @sc{utc}.
In order to make it easier to process such log files, and to easily produce
useful reports, @code{gawk} provides two functions for working with time
stamps. Both of these are @code{gawk} extensions; they are not specified
-in the @sc{POSIX} standard, nor are they in any other known version
+in the @sc{posix} standard, nor are they in any other known version
of @code{awk}.
@table @code
@item systime()
@findex systime
This function returns the current time as the number of seconds since
-the system epoch. On @sc{POSIX} systems, this is the number of seconds
-since Midnight, January 1, 1970, UTC. It may be a different number on
+the system epoch. On @sc{posix} systems, this is the number of seconds
+since Midnight, January 1, 1970, @sc{utc}. It may be a different number on
other systems.
@item strftime(@var{format}, @var{timestamp})
@findex strftime
This function returns a string. It is similar to the function of the
-same name in the @sc{ANSI} C standard library. The time specified by
+same name in the @sc{ansi} C standard library. The time specified by
@var{timestamp} is used to produce a string, based on the contents
of the @var{format} string.
@end table
@@ -6974,7 +7281,7 @@ specifications in the @var{format} string. If no @var{timestamp} argument
is supplied, @code{gawk} will use the current time of day as the
time stamp.@refill
-@code{strftime} is guaranteed by the @sc{ANSI} C standard to support
+@code{strftime} is guaranteed by the @sc{ansi} C standard to support
the following date format specifications:
@table @code
@@ -7052,7 +7359,7 @@ A literal @samp{%}.
@end table
If a conversion specifier is not one of the above, the behavior is undefined.
-@footnote{This is because the @sc{ANSI} standard for C leaves the behavior
+@footnote{This is because the @sc{ansi} standard for C leaves the behavior
of the C version of @code{strftime} undefined, and @code{gawk} will use the
system's version of @code{strftime} if it's there. Typically, the conversion
specifier will either not appear in the returned string, or it will appear
@@ -7060,18 +7367,18 @@ literally.}
Informally, a @dfn{locale} is the geographic place in which a program
is meant to run. For example, a common way to abbreviate the date
-September Fourth, 1991 in the United States would be ``9/4/91''.
+September 4, 1991 in the United States would be ``9/4/91''.
In many countries in Europe, however, it would be abbreviated ``4.9.91''.
Thus, the @samp{%x} specification in a @code{"US"} locale might produce
@samp{9/4/91}, while in a @code{"EUROPE"} locale, it might produce
-@samp{4.9.91}. The @sc{ANSI} C standard defines a default @code{"C"}
+@samp{4.9.91}. The @sc{ansi} C standard defines a default @code{"C"}
locale, which is an environment that is typical of what most C programmers
are used to.
A public-domain C version of @code{strftime} is shipped with @code{gawk}
-for systems that are not yet fully @sc{ANSI}-compliant. If that version is
-used to compile @code{gawk} (@pxref{Installation}), then the following
-additional format specifications are available:
+for systems that are not yet fully @sc{ansi}-compliant. If that version is
+used to compile @code{gawk} (@pxref{Installation, ,Installing @code{gawk}}),
+then the following additional format specifications are available:@refill
@table @code
@item %D
@@ -7101,35 +7408,50 @@ A TAB character.
@item %C
The century, as a number between 00 and 99.
-@item %Ec %EC %Ex %Ey %EY %Od %Oe %OH
-@itemx %OI %Om %OM %OS %OU %Ow %OW %Oy
+@item %u
+is replaced by the weekday as a decimal number
+[1 (Monday)--7].
+
+@item %V
+is replaced by the week number of the year (the first Monday as the first
+day of week 1) as a decimal number (01--53).
+The method for determining the week number is as specified by ISO 8601
+(to wit: if the week containing January 1 has four or more days in the
+new year, then it is week 1, otherwise it is week 53 of the previous year
+and the next week is week 1).@refill
+
+@item %Ec %EC %Ex %Ey %EY %Od %Oe %OH %OI
+@itemx %Om %OM %OS %Ou %OU %OV %Ow %OW %Oy
These are ``alternate representations'' for the specifications
that use only the second letter (@samp{%c}, @samp{%C}, and so on).
They are recognized, but their normal representations are used.
-(These facilitate compliance with the @sc{POSIX} @code{date}
+(These facilitate compliance with the @sc{posix} @code{date}
utility.)@refill
-@item %V
+@item %v
The date in VMS format (e.g. 20-JUN-1991).
@end table
Here are two examples that use @code{strftime}. The first is an
-@code{awk} version of the C @code{ctime} function.
+@code{awk} version of the C @code{ctime} function. (This is a
+user defined function, which we have not discussed yet.
+@xref{User-defined, ,User-defined Functions}, for more information.)
@example
# ctime.awk
#
# awk version of C ctime(3) function
-function ctime( format)
+function ctime(ts, format)
@{
format = "%a %b %e %H:%M:%S %Z %Y"
-
- return strftime(format) # defaults to current time
+ if (ts == 0)
+ ts = systime() # use current time as default
+ return strftime(format, ts)
@}
@end example
-This next example is an @code{awk} implementation of the @sc{POSIX}
+This next example is an @code{awk} implementation of the @sc{posix}
@code{date} utility. Normally, the @code{date} utility prints the
current date and time of day in a well known format. However, if you
provide an argument to it that begins with a @samp{+}, @code{date}
@@ -7185,10 +7507,11 @@ built-in ones (@pxref{Function Calls}), but it is up to you to define
them---to tell @code{awk} what they should do.
@menu
-* Definition Syntax:: How to write definitions and what they mean.
-* Function Example:: An example function definition and what it does.
-* Function Caveats:: Things to watch out for.
-* Return Statement:: Specifying the value a function returns.
+* Definition Syntax:: How to write definitions and what they mean.
+* Function Example:: An example function definition and
+ what it does.
+* Function Caveats:: Things to watch out for.
+* Return Statement:: Specifying the value a function returns.
@end menu
@node Definition Syntax, Function Example, User-defined, User-defined
@@ -7241,7 +7564,7 @@ null string.
Usually when you write a function you know how many names you intend to
use for arguments and how many you intend to use as locals. By
convention, you should write an extra space between the arguments and
-the locals, so that other people can follow how your function is
+the locals, so other people can follow how your function is
supposed to be used.
During execution of the function body, the arguments and local variable
@@ -7264,10 +7587,11 @@ before all uses of the function. This is because @code{awk} reads the
entire program before starting to execute any of it.
In many @code{awk} implementations, the keyword @code{function} may be
-abbreviated @code{func}. However, @sc{POSIX} only specifies the use of
+abbreviated @code{func}. However, @sc{posix} only specifies the use of
the keyword @code{function}. This actually has some practical implications.
-If @code{gawk} is in @sc{POSIX}-compatibility mode (@pxref{Command Line}),
-then the following statement will @emph{not} define a function:@refill
+If @code{gawk} is in @sc{posix}-compatibility mode
+(@pxref{Command Line, ,Invoking @code{awk}}), then the following
+statement will @emph{not} define a function:@refill
@example
func foo() @{ a = sqrt($1) ; print a @}
@@ -7345,24 +7669,27 @@ A function call consists of the function name followed by the arguments
in parentheses. What you write in the call for the arguments are
@code{awk} expressions; each time the call is executed, these
expressions are evaluated, and the values are the actual arguments. For
-example, here is a call to @code{foo} with three arguments:
+example, here is a call to @code{foo} with three arguments (the first
+being a string concatenation):
@example
foo(x y, "lose", 4 * z)
@end example
-@strong{Note:} whitespace characters (spaces and tabs) are not allowed
+@quotation
+@strong{Caution:} whitespace characters (spaces and tabs) are not allowed
between the function name and the open-parenthesis of the argument list.
If you write whitespace by mistake, @code{awk} might think that you mean
to concatenate a variable with an expression in parentheses. However, it
notices that you used a function name and not a variable name, and reports
an error.
+@end quotation
@cindex call by value
When a function is called, it is given a @emph{copy} of the values of
its arguments. This is called @dfn{call by value}. The caller may use
a variable as the expression for the argument, but the called function
-does not know this: all it knows is what value the argument had. For
+does not know this: it only knows what value the argument had. For
example, if you write this code:
@example
@@ -7400,8 +7727,15 @@ However, when arrays are the parameters to functions, they are @emph{not}
copied. Instead, the array itself is made available for direct manipulation
by the function. This is usually called @dfn{call by reference}.
Changes made to an array parameter inside the body of a function @emph{are}
-visible outside that function. @emph{This can be very dangerous if you don't
-watch what you are doing.} For example:@refill
+visible outside that function.
+@ifinfo
+This can be @strong{very} dangerous if you do not watch what you are
+doing. For example:@refill
+@end ifinfo
+@iftex
+@emph{This can be very dangerous if you do not watch what you are
+doing.} For example:@refill
+@end iftex
@example
function changeit (array, ind, nvalue) @{
@@ -7419,7 +7753,7 @@ BEGIN @{
prints @samp{a[1] = 1, a[2] = two, a[3] = 3}, because calling
@code{changeit} stores @code{"two"} in the second element of @code{a}.
-@node Return Statement, , Function Caveats, User-defined
+@node Return Statement, , Function Caveats, User-defined
@section The @code{return} Statement
@cindex @code{return} statement
@@ -7445,6 +7779,7 @@ Here is an example of a user-defined function that returns a value
for the largest number among the elements of an array:@refill
@example
+@group
function maxelt (vec, i, ret) @{
for (i in vec) @{
if (ret == "" || vec[i] > ret)
@@ -7452,10 +7787,11 @@ function maxelt (vec, i, ret) @{
@}
return ret
@}
+@end group
@end example
@noindent
-You call @code{maxelt} with one argument, an array name. The local
+You call @code{maxelt} with one argument, which is an array name. The local
variables @code{i} and @code{ret} are not intended to be arguments;
while there is nothing to stop you from passing two or three arguments
to @code{maxelt}, the results would be strange. The extra space before
@@ -7468,6 +7804,7 @@ array, calls @code{maxelt}, and then reports the maximum number in that
array:@refill
@example
+@group
awk '
function maxelt (vec, i, ret) @{
for (i in vec) @{
@@ -7476,7 +7813,9 @@ function maxelt (vec, i, ret) @{
@}
return ret
@}
+@end group
+@group
# Load all fields of each record into nums.
@{
for(i = 1; i <= NF; i++)
@@ -7486,16 +7825,19 @@ function maxelt (vec, i, ret) @{
END @{
print maxelt(nums)
@}'
+@end group
@end example
Given the following input:
@example
+@group
1 5 23 8 16
44 3 5 2 8 26
256 291 1396 2962 100
-6 467 998 1101
99385 11 0 225
+@end group
@end example
@noindent
@@ -7527,13 +7869,14 @@ of them are also documented in the chapters where their areas of
activity are described.
@menu
-* User-modified:: Built-in variables that you change to control @code{awk}.
-
-* Auto-set:: Built-in variables where @code{awk} gives you information.
+* User-modified:: Built-in variables that you change
+ to control @code{awk}.
+* Auto-set:: Built-in variables where @code{awk}
+ gives you information.
@end menu
@node User-modified, Auto-set, Built-in Variables, Built-in Variables
-@section Built-in Variables That Control @code{awk}
+@section Built-in Variables that Control @code{awk}
@cindex built-in variables, user modifiable
This is a list of the variables which you can change to control how
@@ -7545,10 +7888,10 @@ This is a list of the variables which you can change to control how
@end iftex
@item CONVFMT
This string is used by @code{awk} to control conversion of numbers to
-strings (@pxref{Conversion}). It works by being passed, in effect, as
-the first argument to the @code{sprintf} function. Its default value
-is @code{"%.6g"}. @code{CONVFMT} was introduced by the @sc{POSIX}
-standard.@refill
+strings (@pxref{Conversion, ,Conversion of Strings and Numbers}).
+It works by being passed, in effect, as the first argument to the
+@code{sprintf} function. Its default value is @code{"%.6g"}.
+@code{CONVFMT} was introduced by the @sc{posix} standard.@refill
@iftex
@vindex FIELDWIDTHS
@@ -7558,20 +7901,22 @@ This is a space separated list of columns that tells @code{gawk}
how to manage input with fixed, columnar boundaries. It is an
experimental feature that is still evolving. Assigning to @code{FIELDWIDTHS}
overrides the use of @code{FS} for field splitting.
-@xref{Constant Size}, for more information.@refill
+@xref{Constant Size, ,Reading Fixed-width Data}, for more information.@refill
-If @code{gawk} is in compatibility mode (@pxref{Command Line}), then
-@code{FIELDWIDTHS} has no special meaning, and field splitting operations are
-done based exclusively on the value of @code{FS}.
+If @code{gawk} is in compatibility mode
+(@pxref{Command Line, ,Invoking @code{awk}}), then @code{FIELDWIDTHS}
+has no special meaning, and field splitting operations are done based
+exclusively on the value of @code{FS}.@refill
@iftex
@vindex FS
@end iftex
@item FS
-@code{FS} is the input field separator (@pxref{Field Separators}).
+@code{FS} is the input field separator
+(@pxref{Field Separators, ,Specifying how Fields are Separated}).
The value is a single-character string or a multi-character regular
expression that matches the separations between fields in an input
-record.
+record.@refill
The default value is @w{@code{" "}}, a string consisting of a single
space. As a special exception, this value actually means that any
@@ -7600,18 +7945,19 @@ matching with @samp{~} and @samp{!~}, and the @code{gsub} @code{index},
doing their particular regexp operations. @strong{Note:} since field
splitting with the value of the @code{FS} variable is also a regular
expression operation, that too is done with case ignored.
-@xref{Case-sensitivity}.
+@xref{Case-sensitivity, ,Case-sensitivity in Matching}.
-If @code{gawk} is in compatibility mode (@pxref{Command Line}), then
-@code{IGNORECASE} has no special meaning, and regexp operations are
-always case-sensitive.@refill
+If @code{gawk} is in compatibility mode
+(@pxref{Command Line, ,Invoking @code{awk}}), then @code{IGNORECASE} has
+no special meaning, and regexp operations are always case-sensitive.@refill
@item OFMT
@iftex
@vindex OFMT
@end iftex
This string is used by @code{awk} to control conversion of numbers to
-strings (@pxref{Conversion}) for printing with the @code{print} statement.
+strings (@pxref{Conversion, ,Conversion of Strings and Numbers}) for
+printing with the @code{print} statement.
It works by being passed, in effect, as the first argument to the
@code{sprintf} function. Its default value is @code{"%.6g"}.
Earlier versions of @code{awk} also used @code{OFMT} to specify the
@@ -7633,7 +7979,7 @@ default value is @w{@code{" "}}, a string consisting of a single space.
This is the output record separator. It is output at the end of every
@code{print} statement. Its default value is a string containing a
single newline character, which could be written as @code{"\n"}.
-(@xref{Output Separators}).@refill
+(@xref{Output Separators}.)@refill
@item RS
@iftex
@@ -7641,7 +7987,8 @@ single newline character, which could be written as @code{"\n"}.
@end iftex
This is @code{awk}'s input record separator. Its default value is a string
containing a single newline character, which means that an input record
-consists of a single line of text. (@xref{Records}.)@refill
+consists of a single line of text.
+(@xref{Records, ,How Input is Split into Records}.)@refill
@item SUBSEP
@iftex
@@ -7650,11 +7997,12 @@ consists of a single line of text. (@xref{Records}.)@refill
@code{SUBSEP} is the subscript separator. It has the default value of
@code{"\034"}, and is used to separate the parts of the name of a
multi-dimensional array. Thus, if you access @code{foo[12,3]}, it
-really accesses @code{foo["12\0343"]} (@pxref{Multi-dimensional}).@refill
+really accesses @code{foo["12\0343"]}
+(@pxref{Multi-dimensional, ,Multi-dimensional Arrays}).@refill
@end table
-@node Auto-set, , User-modified, Built-in Variables
-@section Built-in Variables That Convey Information to You
+@node Auto-set, , User-modified, Built-in Variables
+@section Built-in Variables that Convey Information
This is a list of the variables that are set automatically by @code{awk}
on certain occasions so as to provide information to your program.
@@ -7668,12 +8016,13 @@ on certain occasions so as to provide information to your program.
@end iftex
The command-line arguments available to @code{awk} programs are stored in
an array called @code{ARGV}. @code{ARGC} is the number of command-line
-arguments present. @xref{Command Line}. @code{ARGV} is indexed from zero
-to @w{@code{ARGC - 1}}. For example:
+arguments present. @xref{Command Line, ,Invoking @code{awk}}.
+@code{ARGV} is indexed from zero to @w{@code{ARGC - 1}}. For example:@refill
-@example
-awk '@{ print ARGV[$1] @}' inventory-shipped BBS-list
-@end example
+@smallexample
+awk 'BEGIN @{ for (i = 0; i < ARGC; i++)
+ print ARGV[i] @}' inventory-shipped BBS-list
+@end smallexample
@noindent
In this example, @code{ARGV[0]} contains @code{"awk"}, @code{ARGV[1]}
@@ -7735,15 +8084,16 @@ If @code{awk} is reading from the standard input (in other words,
there are no files listed on the command line),
@code{FILENAME} is set to @code{"-"}.
@code{FILENAME} is changed each time a new file is read
-(@pxref{Reading Files}).@refill
+(@pxref{Reading Files, ,Reading Input Files}).@refill
@item FNR
@iftex
@vindex FNR
@end iftex
@code{FNR} is the current record number in the current file. @code{FNR} is
-incremented each time a new record is read (@pxref{Getline}).
-It is reinitialized to 0 each time a new input file is started.
+incremented each time a new record is read
+(@pxref{Getline, ,Explicit Input with @code{getline}}). It is reinitialized
+to 0 each time a new input file is started.@refill
@item NF
@iftex
@@ -7751,14 +8101,15 @@ It is reinitialized to 0 each time a new input file is started.
@end iftex
@code{NF} is the number of fields in the current input record.
@code{NF} is set each time a new record is read, when a new field is
-created, or when @code{$0} changes (@pxref{Fields}).@refill
+created, or when @code{$0} changes (@pxref{Fields, ,Examining Fields}).@refill
@item NR
@iftex
@vindex NR
@end iftex
This is the number of input records @code{awk} has processed since
-the beginning of the program's execution. (@pxref{Records}).
+the beginning of the program's execution.
+(@pxref{Records, ,How Input is Split into Records}).
@code{NR} is set each time a new record is read.@refill
@item RLENGTH
@@ -7766,24 +8117,26 @@ the beginning of the program's execution. (@pxref{Records}).
@vindex RLENGTH
@end iftex
@code{RLENGTH} is the length of the substring matched by the
-@code{match} function (@pxref{String Functions}). @code{RLENGTH} is set
-by invoking the @code{match} function. Its value is the length of the
-matched string, or @minus{}1 if no match was found.@refill
+@code{match} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+@code{RLENGTH} is set by invoking the @code{match} function. Its value
+is the length of the matched string, or @minus{}1 if no match was found.@refill
@item RSTART
@iftex
@vindex RSTART
@end iftex
@code{RSTART} is the start-index in characters of the substring matched by the
-@code{match} function (@pxref{String Functions}). @code{RSTART} is set
-by invoking the @code{match} function. Its value is the position of the
-string where the matched substring starts, or 0 if no match was
-found.@refill
+@code{match} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+@code{RSTART} is set by invoking the @code{match} function. Its value
+is the position of the string where the matched substring starts, or 0
+if no match was found.@refill
@end table
@node Command Line, Language History, Built-in Variables, Top
@c node-name, next, previous, up
-@chapter Invocation of @code{awk}
+@chapter Invoking @code{awk}
@cindex command line
@cindex invocation of @code{gawk}
@cindex arguments, command line
@@ -7795,13 +8148,16 @@ enclosed in @samp{@r{[}@dots{}@r{]}} in these templates are optional.
@example
awk @r{[@code{-F@var{fs}}] [@code{-W} @var{gawk-opts}] [@code{-v @var{var}=@var{val}}] [@code{--}]} '@var{program}' @var{file} @dots{}
-awk @r{[@code{-F@var{fs}}] [@code{-W} @var{gawk-opts}] [@code{-v @var{var}=@var{val}}] @code{-f @var{source-file}} [@code{-f @var{source-file} @dots{}}] [@code{--}]} @var{file} @dots{}
+awk @r{[@code{-F@var{fs}}] [@code{-W} @var{gawk-opts}] [@code{-v @var{var}=@var{val}}] @code{-f @var{source-file}}
+ [@code{-f @var{source-file} @dots{}}] [@code{--}]} @var{file} @dots{}
@end example
@menu
-* Options:: Command line options and their meanings.
-* Other Arguments:: Input file names and variable assignments.
-* AWKPATH Variable:: Searching directories for @code{awk} programs.
+* Options:: Command line options and their meanings.
+* Other Arguments:: Input file names and variable assignments.
+* AWKPATH Variable:: Searching directories for @code{awk} programs.
+* Obsolete:: Obsolete Options and/or features.
+* Undocumented:: Undocumented Options and Features.
@end menu
@node Options, Other Arguments, Command Line, Command Line
@@ -7812,7 +8168,8 @@ The options and their meanings are as follows:
@table @code
@item -F@var{fs}
-Sets the @code{FS} variable to @var{fs} (@pxref{Field Separators}).
+Sets the @code{FS} variable to @var{fs}
+(@pxref{Field Separators, ,Specifying how Fields are Separated}).@refill
@item -f @var{source-file}
Indicates that the @code{awk} program is to be found in @var{source-file}
@@ -7829,7 +8186,8 @@ it more than once, setting another variable each time, like this:
@samp{@w{-v foo=1} @w{-v bar=2}}.
@item -W @var{gawk-opt}
-Following the @sc{POSIX} standard, options that are specific to @code{gawk}
+@cindex @samp{-W} option
+Following the @sc{posix} standard, options that are specific to @code{gawk}
are supplied as arguments to the @samp{-W} option. These arguments
may be separated by commas, or quoted and separated by whitespace.
Case is ignored when processing these options. The following options
@@ -7839,8 +8197,10 @@ are available:
@item compat
Specifies @dfn{compatibility mode}, in which the GNU extensions in
@code{gawk} are disabled, so that @code{gawk} behaves just like Unix
-@code{awk}. @xref{POSIX/GNU}, which summarizes the extensions.
-Also see @ref{Compatibility Mode}.
+@code{awk}.
+@xref{POSIX/GNU, ,Extensions in @code{gawk} not in POSIX @code{awk}},
+which summarizes the extensions. Also see
+@ref{Compatibility Mode, ,Downward Compatibility and Debugging}.@refill
@item lint
Provide warnings about constructs that are dubious or non-portable to
@@ -7849,29 +8209,31 @@ other @code{awk} implementations.
@item copyleft
@itemx copyright
Print the short version of the General Public License.
-This option may disappear in a future version of @code{gawk}.
+This option may disappear in a future version of @code{gawk}.
@item posix
-Operate in strict @sc{POSIX} mode. This disables all @code{gawk}
+Operate in strict @sc{posix} mode. This disables all @code{gawk}
extensions (just like @code{compat}), and adds the following additional
restrictions:
@itemize @bullet{}
@item
-@code{\x} escape sequences are not recognized (@pxref{Constants}).
+@code{\x} escape sequences are not recognized
+(@pxref{Constants, ,Constant Expressions}).@refill
@item
The synonym @code{func} for the keyword @code{function} is not
-recognized (@pxref{Definition Syntax}).
+recognized (@pxref{Definition Syntax, ,Syntax of Function Definitions}).
@item
The operators @samp{**} and @samp{**=} cannot be used in
-place of @samp{^} and @samp{^=} (@pxref{Arithmetic Ops}, and also
-@pxref{Assignment Ops}).@refill
+place of @samp{^} and @samp{^=} (@pxref{Arithmetic Ops, ,Arithmetic Operators},
+and also @pxref{Assignment Ops, ,Assignment Expressions}).@refill
@item
Specifying @samp{-Ft} on the command line does not set the value
-of @code{FS} to be a single tab character (@pxref{Field Separators}).
+of @code{FS} to be a single tab character
+(@pxref{Field Separators, ,Specifying how Fields are Separated}).@refill
@end itemize
Although you can supply both @samp{-W compat} and @samp{-W posix} on the
@@ -7887,7 +8249,7 @@ distributing. This option may disappear in a future version of @code{gawk}.
@item --
Signals the end of the command line options. The following arguments
are not treated as options even if they begin with @samp{-}. This
-interpretation of @samp{--} follows the @sc{POSIX} argument parsing
+interpretation of @samp{--} follows the @sc{posix} argument parsing
conventions.
This is useful if you have file names that start with @samp{-},
@@ -7905,7 +8267,8 @@ are otherwise ignored.
In compatibility mode, as a special case, if the value of @var{fs} supplied
to the @samp{-F} option is @samp{t}, then @code{FS} is set to the tab
character (@code{"\t"}). This is only true for @samp{-W compat}, and not
-for @samp{-W posix} (@pxref{Field Separators}).@refill
+for @samp{-W posix}
+(@pxref{Field Separators, ,Specifying how Fields are Separated}).@refill
If the @samp{-f} option is @emph{not} used, then the first non-option
command line argument is expected to be the program text.
@@ -7921,7 +8284,7 @@ type in a program at the terminal and use library functions, by specifying
to use as part of the @code{awk} program. After typing your program,
type @kbd{Control-d} (the end-of-file character) to terminate it.
(You may also use @samp{-f -} to read program source from the standard
-input, but then you won't be able to also use the standard input as a
+input, but then you will not be able to also use the standard input as a
source of data.)
@node Other Arguments, AWKPATH Variable, Options, Command Line
@@ -7949,15 +8312,17 @@ instead of reading a file.
Therefore, the variables actually receive the specified values after all
previously specified files have been read. In particular, the values of
variables assigned in this fashion are @emph{not} available inside a
-@code{BEGIN} rule (@pxref{BEGIN/END}), since such rules are run before
-@code{awk} begins scanning the argument list. The values given on the
-command line are processed for escape sequences (@pxref{Constants}).@refill
+@code{BEGIN} rule
+(@pxref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}),
+since such rules are run before @code{awk} begins scanning the argument list.
+The values given on the command line are processed for escape sequences
+(@pxref{Constants, ,Constant Expressions}).@refill
In some earlier implementations of @code{awk}, when a variable assignment
occurred before any file names, the assignment would happen @emph{before}
the @code{BEGIN} rule was executed. Some applications came to depend
upon this ``feature.'' When @code{awk} was changed to be more consistent,
-the @samp{-v} option was added to accomodate applications that depended
+the @samp{-v} option was added to accommodate applications that depended
upon this old behavior.
The variable assignment feature is most useful for assigning to variables
@@ -7968,15 +8333,15 @@ example:@refill
@cindex multiple passes over data
@cindex passes, multiple
-@example
+@smallexample
awk 'pass == 1 @{ @var{pass 1 stuff} @}
pass == 2 @{ @var{pass 2 stuff} @}' pass=1 datafile pass=2 datafile
-@end example
+@end smallexample
Given the variable assignment feature, the @samp{-F} option is not
strictly necessary. It remains for historical compatibility.
-@node AWKPATH Variable,, Other Arguments, Command Line
+@node AWKPATH Variable, Obsolete, Other Arguments, Command Line
@section The @code{AWKPATH} Environment Variable
@cindex @code{AWKPATH} environment variable
@cindex search path
@@ -8012,7 +8377,7 @@ would have to be typed for each file.
Path searching is not done if @code{gawk} is in compatibility mode.
This is true for both @samp{-W compat} and @samp{-W posix}.
-@xref{Command Line}.
+@xref{Command Line, ,Invoking @code{awk}}.
@strong{Note:} if you want files in the current directory to be found,
you must include the current directory in the path, either by writing
@@ -8024,31 +8389,143 @@ found in the current directory. This path search mechanism is identical
to the shell's.
@c someday, @cite{The Bourne Again Shell}....
+@node Obsolete, Undocumented, AWKPATH Variable, Command Line
+@section Obsolete Options and/or Features
+
+@cindex deprecated options
+@cindex obsolete options
+@cindex deprecated features
+@cindex obsolete features
+This section describes features and/or command line options from the
+previous release of @code{gawk} that are either not available in the
+current version, or that are still supported but deprecated (meaning that
+they will @emph{not} be in the next release).
+
+@c update this section for each release!
+
+For version 2.14 of @code{gawk}, the following command line options
+are recognized, but produce a warning message
+(@pxref{Command Line, ,Invoking @code{awk}}).@refill
+
+@table @samp
+@ignore
+@item -nostalgia
+Use @samp{-W nostalgia} instead.
+@end ignore
+
+@item -c
+Use @samp{-W compat} instead.
+
+@item -V
+Use @samp{-W version} instead.
+
+@item -C
+Use @samp{-W copyright} instead.
+
+@item -a
+@itemx -e
+These options produce a warning message but have no effect on the
+execution of @code{gawk}. The @sc{posix} standard now specifies
+traditional @code{awk} regular expressions for the @code{awk} utility.
+@end table
+
+The public-domain version of @code{strftime} that is distributed with
+@code{gawk} changed for the 2.14 release. The @samp{%V} conversion specifier
+that used to generate the date in VMS format was changed to @samp{%v}.
+This is because the @sc{posix} standard for the @code{date} utility now
+specifies a @samp{%V} conversion specifier.
+@xref{Time Functions, ,Functions for Dealing with Time Stamps}, for details.
+
+@node Undocumented, , Obsolete, Command Line
+@section Undocumented Options and Features
+
+This section intentionally left blank.
+
+@c Read The Source, Luke!
+
+@ignore
+@c If these came out in the Info file or TeX manual, then they wouldn't
+@c be undocumented, would they?
+
+@code{gawk} has one undocumented option:
+
+@table @samp
+@item -W nostalgia
+Print the message @code{"awk: bailing out near line 1"} and dump core.
+This option was inspired by the common behavior of very early versions of
+Unix @code{awk}, and by a t--shirt.
+@end table
+
+Early versions of @code{awk} used to not require any separator (either
+a newline or @samp{;}) between the rules in @code{awk} programs. Thus,
+it was common to see one-line programs like:
+
+@example
+awk '@{ sum += $1 @} END @{ print sum @}'
+@end example
+
+@code{gawk} actually supports this, but it is purposely undocumented
+since it is considered bad style. The correct way to write such a program
+is either
+
+@example
+awk '@{ sum += $1 @} ; END @{ print sum @}'
+@end example
+
+@noindent
+or
+
+@example
+awk '@{ sum += $1 @}
+ END @{ print sum @}' data
+@end example
+
+@noindent
+@xref{Statements/Lines, ,@code{awk} Statements versus Lines}, for a fuller
+explanation.@refill
+
+As an accident of the implementation of the original Unix @code{awk}, if
+a built-in function used @code{$0} as its default argument, it was possible
+to call that function without the parentheses. In particular, it was
+common practice to use the @code{length} function in this fashion.
+For example, the pipeline:
+
+@example
+echo abcdef | awk '@{ print length @}'
+@end example
+
+@noindent
+would print @samp{6}.
+
+For backwards compatibility with old programs, @code{gawk} supports
+this usage, but only for the @code{length} function. New programs should
+@emph{not} call the @code{length} function this way. In particular,
+this usage will not be portable to other @sc{posix} compliant versions
+of @code{awk}. It is also poor style.
+
+@end ignore
+
@node Language History, Installation, Command Line, Top
@chapter The Evolution of the @code{awk} Language
This manual describes the GNU implementation of @code{awk}, which is patterned
-after the @sc{POSIX} specification. Many @code{awk} users are only familiar
+after the @sc{posix} specification. Many @code{awk} users are only familiar
with the original @code{awk} implementation in Version 7 Unix, which is also
the basis for the version in Berkeley Unix (through 4.3--Reno). This chapter
briefly describes the evolution of the @code{awk} language.
@menu
-* V7/S5R3.1:: The major changes between V7 and System V Release 3.1.
-
-* S5R4:: The minor changes between System V Releases 3.1 and 4.
-
-* POSIX:: New features from the @sc{POSIX} standard.
-
-* POSIX/GNU:: The extensions in @code{gawk} not in @sc{POSIX} @code{awk}.
-
-* Obsolete:: Obsolete Options and/or features.
-
-* Undocumented:: Undocumented Options and Features.
+* V7/S5R3.1:: The major changes between V7 and
+ System V Release 3.1.
+* S5R4:: Minor changes between System V
+ Releases 3.1 and 4.
+* POSIX:: New features from the @sc{posix} standard.
+* POSIX/GNU:: The extensions in @code{gawk}
+ not in @sc{posix} @code{awk}.
@end menu
@node V7/S5R3.1, S5R4, Language History, Language History
-@section Major Changes Between V7 and S5R3.1
+@section Major Changes between V7 and S5R3.1
The @code{awk} language evolved considerably between the release of
Version 7 Unix (1978) and the new version first made widely available in
@@ -8058,28 +8535,32 @@ cross-references to further details.
@itemize @bullet
@item
The requirement for @samp{;} to separate rules on a line
-(@pxref{Statements/Lines}).
+(@pxref{Statements/Lines, ,@code{awk} Statements versus Lines}).
@item
User-defined functions, and the @code{return} statement
-(@pxref{User-defined}).
+(@pxref{User-defined, ,User-defined Functions}).
@item
-The @code{delete} statement (@pxref{Delete}).
+The @code{delete} statement (@pxref{Delete, ,The @code{delete} Statement}).
@item
-The @code{do}-@code{while} statement (@pxref{Do Statement}).
+The @code{do}-@code{while} statement
+(@pxref{Do Statement, ,The @code{do}-@code{while} Statement}).@refill
@item
The built-in functions @code{atan2}, @code{cos}, @code{sin}, @code{rand} and
-@code{srand} (@pxref{Numeric Functions}).
+@code{srand} (@pxref{Numeric Functions, ,Numeric Built-in Functions}).
@item
The built-in functions @code{gsub}, @code{sub}, and @code{match}
-(@pxref{String Functions}).
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
@item
-The built-in functions @code{close} and @code{system} (@pxref{I/O Functions}).
+The built-in functions @code{close}, which closes an open file, and
+@code{system}, which allows the user to execute operating system
+commands (@pxref{I/O Functions, ,Built-in Functions for Input/Output}).@refill
+@c Does the above verbiage prevents an overfull hbox? --mew, rjc 24jan1992
@item
The @code{ARGC}, @code{ARGV}, @code{FNR}, @code{RLENGTH}, @code{RSTART},
@@ -8087,39 +8568,45 @@ and @code{SUBSEP} built-in variables (@pxref{Built-in Variables}).
@item
The conditional expression using the operators @samp{?} and @samp{:}
-(@pxref{Conditional Exp}).
+(@pxref{Conditional Exp, ,Conditional Expressions}).@refill
@item
-The exponentiation operator @samp{^} (@pxref{Arithmetic Ops}) and its
-assignment operator form @samp{^=} (@pxref{Assignment Ops}).@refill
+The exponentiation operator @samp{^}
+(@pxref{Arithmetic Ops, ,Arithmetic Operators}) and its assignment operator
+form @samp{^=} (@pxref{Assignment Ops, ,Assignment Expressions}).@refill
@item
C-compatible operator precedence, which breaks some old @code{awk}
-programs (@pxref{Precedence}).
+programs (@pxref{Precedence, ,Operator Precedence (How Operators Nest)}).
@item
-Regexps as the value of @code{FS} (@pxref{Field Separators}), and as the
-third argument to the @code{split} function (@pxref{String Functions}).@refill
+Regexps as the value of @code{FS}
+(@pxref{Field Separators, ,Specifying how Fields are Separated}), and as the
+third argument to the @code{split} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).@refill
@item
Dynamic regexps as operands of the @samp{~} and @samp{!~} operators
-(@pxref{Regexp Usage}).
+(@pxref{Regexp Usage, ,How to Use Regular Expressions}).
@item
-Escape sequences (@pxref{Constants}) in regexps.@refill
+Escape sequences (@pxref{Constants, ,Constant Expressions}) in regexps.@refill
@item
The escape sequences @samp{\b}, @samp{\f}, and @samp{\r}
-(@pxref{Constants}).
+(@pxref{Constants, ,Constant Expressions}).
@item
-Redirection of input for the @code{getline} function (@pxref{Getline}).
+Redirection of input for the @code{getline} function
+(@pxref{Getline, ,Explicit Input with @code{getline}}).@refill
@item
-Multiple @code{BEGIN} and @code{END} rules (@pxref{BEGIN/END}).
+Multiple @code{BEGIN} and @code{END} rules
+(@pxref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}).@refill
@item
-Simulation of multidimensional arrays (@pxref{Multi-dimensional}).
+Simulated multi-dimensional arrays
+(@pxref{Multi-dimensional, ,Multi-dimensional Arrays}).@refill
@end itemize
@node S5R4, POSIX, V7/S5R3.1, Language History
@@ -8133,49 +8620,49 @@ The System V Release 4 version of Unix @code{awk} added these features
The @code{ENVIRON} variable (@pxref{Built-in Variables}).
@item
-Multiple @samp{-f} options on the command line (@pxref{Command Line}).
+Multiple @samp{-f} options on the command line
+(@pxref{Command Line, ,Invoking @code{awk}}).@refill
@item
The @samp{-v} option for assigning variables before program execution begins
-(@pxref{Command Line}).
+(@pxref{Command Line, ,Invoking @code{awk}}).@refill
@item
The @samp{--} option for terminating command line options.
@item
-The @samp{\a}, @samp{\v}, and @samp{\x} escape sequences (@pxref{Constants}).
+The @samp{\a}, @samp{\v}, and @samp{\x} escape sequences
+(@pxref{Constants, ,Constant Expressions}).@refill
@item
A defined return value for the @code{srand} built-in function
-(@pxref{Numeric Functions}).
+(@pxref{Numeric Functions, ,Numeric Built-in Functions}).
@item
The @code{toupper} and @code{tolower} built-in string functions
-for case translation (@pxref{String Functions}).
+for case translation
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).@refill
@item
A cleaner specification for the @samp{%c} format-control letter in the
-@code{printf} function (@pxref{Printf}).
+@code{printf} function
+(@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing}).@refill
@item
The ability to dynamically pass the field width and precision (@code{"%*.*d"})
-in the argument list of the @code{printf} function (@pxref{Printf}).
+in the argument list of the @code{printf} function
+(@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing}).@refill
@item
The use of constant regexps such as @code{/foo/} as expressions, where
they are equivalent to use of the matching operator, as in @code{$0 ~
-/foo/} (@pxref{Constants}).
+/foo/} (@pxref{Constants, ,Constant Expressions}).
@end itemize
@node POSIX, POSIX/GNU, S5R4, Language History
-@section Changes Between S5R4 and the POSIX Standard for @code{awk}
-@c
-@c @section Changes Between S5R4 and the @sc{POSIX} Standard for @code{awk}
-@c
-@c when the POSIX is inside a @sc{} in a section, it comes out in
-@c a typewriter font, which isn't what we want. texinfo bug.
-
-The @sc{POSIX} Command Language And Utilities Standard for @code{awk}
+@section Changes between S5R4 and POSIX @code{awk}
+
+The @sc{posix} Command Language and Utilities standard for @code{awk}
introduced the following changes into the language:
@itemize @bullet{}
@@ -8184,163 +8671,64 @@ The use of @samp{-W} for implementation-specific options.
@item
The use of @code{CONVFMT} for controlling the conversion of numbers
-to strings (@pxref{Conversion}).
+to strings (@pxref{Conversion, ,Conversion of Strings and Numbers}).
@item
The concept of a numeric string, and tighter comparison rules to go
-with it (@pxref{Comparison Ops}).
+with it (@pxref{Comparison Ops, ,Comparison Expressions}).
@item
More complete documentation of many of the previously undocumented
features of the language.
@end itemize
-@node POSIX/GNU, Obsolete, POSIX, Language History
-@section Extensions In @code{gawk} Not In POSIX @code{awk}
-@c
-@c @section Extensions In @code{gawk} Not In @sc{POSIX} @code{awk}
+@node POSIX/GNU, , POSIX, Language History
+@section Extensions in @code{gawk} not in POSIX @code{awk}
The GNU implementation, @code{gawk}, adds these features:
@itemize @bullet
@item
The @code{AWKPATH} environment variable for specifying a path search for
-the @samp{-f} command line option (@pxref{Command Line}).
+the @samp{-f} command line option
+(@pxref{Command Line, ,Invoking @code{awk}}).@refill
@item
The various @code{gawk} specific features available via the @samp{-W}
-command line option (@pxref{Command Line}).
+command line option (@pxref{Command Line, ,Invoking @code{awk}}).
+
+@item
+The @code{IGNORECASE} variable and its effects
+(@pxref{Case-sensitivity, ,Case-sensitivity in Matching}).@refill
@item
-The @code{IGNORECASE} variable and its effects (@pxref{Case-sensitivity}).
+The @code{FIELDWIDTHS} variable and its effects
+(@pxref{Constant Size, ,Reading Fixed-width Data}).@refill
@item
-The @code{FIELDWIDTHS} variable and its effects (@pxref{Constant Size}).
+The @code{next file} statement for skipping to the next data file
+(@pxref{Next File Statement, ,The @code{next file} Statement}).@refill
@item
The @code{systime} and @code{strftime} built-in functions for obtaining
-and printing time stamps (@pxref{Time Functions}).
+and printing time stamps
+(@pxref{Time Functions, ,Functions for Dealing with Time Stamps}).@refill
@item
The @file{/dev/stdin}, @file{/dev/stdout}, @file{/dev/stderr}, and
-@file{/dev/fd/@var{n}} file name interpretation (@pxref{Special Files}).
+@file{/dev/fd/@var{n}} file name interpretation
+(@pxref{Special Files, ,Standard I/O Streams}).@refill
@item
-The @samp{-W compat} option to turn off these extensions (@pxref{Command Line}).
+The @samp{-W compat} option to turn off these extensions
+(@pxref{Command Line, ,Invoking @code{awk}}).@refill
@item
-The @samp{-W posix} option for full @sc{POSIX} compliance
-(@pxref{Command Line}).@refill
+The @samp{-W posix} option for full @sc{posix} compliance
+(@pxref{Command Line, ,Invoking @code{awk}}).@refill
@end itemize
-@node Obsolete, Undocumented, POSIX/GNU, Language History
-@section Obsolete Options and/or Features
-
-@cindex deprecated options
-@cindex obsolete options
-@cindex deprecated features
-@cindex obsolete features
-This section describes features and/or command line options from the
-previous release of @code{gawk} that are either not available in the
-current version, or that are still supported but deprecated (meaning that
-they will @emph{not} be in the next release).
-
-@c update this section for each release!
-
-For version 2.13 of @code{gawk}, the following command line options
-are recognized, but produce a warning message (@pxref{Command Line}).
-
-@table @samp
-@ignore
-@item -nostalgia
-Use @samp{-W nostalgia} instead.
-@end ignore
-
-@item -c
-Use @samp{-W compat} instead.
-
-@item -V
-Use @samp{-W version} instead.
-
-@item -C
-Use @samp{-W copyright} instead.
-
-@item -a
-@itemx -e
-These options produce a warning message but have no effect on the
-execution of @code{gawk}. The @sc{POSIX} standard now specifies
-traditional @code{awk} regular expressions for the @code{awk} utility.
-@end table
-
-@node Undocumented, , Obsolete, Language History
-@section Undocumented Options and Features
-
-This section intentionally left blank.
-
-@c Read The Source, Luke!
-
-@ignore
-@c If these came out in the Info file or TeX manual, then they wouldn't
-@c be undocumented, would they?
-
-@code{gawk} has one undocumented option:
-
-@table @samp
-@item -W nostalgia
-Print the message @code{"awk: bailing out near line 1"} and dump core.
-This option was inspired by the common behavior of very early versions of
-Unix @code{awk}, and by a t--shirt.
-@end table
-
-Early versions of @code{awk} used to not require any separator (either
-a newline or @samp{;}) between the rules in @code{awk} programs. Thus,
-it was common to see one-line programs like:
-
-@example
-awk '@{ sum += $1 @} END @{ print sum @}'
-@end example
-
-@code{gawk} actually supports this, but it is purposely undocumented
-since it is considered bad style. The correct way to write such a program
-is either
-
-@example
-awk '@{ sum += $1 @} ; END @{ print sum @}'
-@end example
-
-@noindent
-or
-
-@example
-awk '@{ sum += $1 @}
- END @{ print sum @}' data
-@end example
-
-@noindent
-@xref{Statements/Lines}, for a fuller explanation.
-
-As an accident of the implementation of the original Unix @code{awk}, if
-a built-in function used @code{$0} as its default argument, it was possible
-to call that function without the parentheses. In particular, it was
-common practice to use the @code{length} function in this fashion.
-For example, the pipeline:
-
-@example
-echo abcdef | awk '@{ print length @}'
-@end example
-
-@noindent
-would print @samp{6}.
-
-For backwards compatibility with old programs, @code{gawk} supports
-this usage, but only for the @code{length} function. New programs should
-@emph{not} call the @code{length} function this way. In particular,
-this usage will not be portable to other @sc{POSIX} compliant versions
-of @code{awk}. It is also poor style.
-
-@end ignore
-
@node Installation, Gawk Summary, Language History, Top
@chapter Installing @code{gawk}
@@ -8352,35 +8740,28 @@ distribution lists the electronic mail addresses of the people who did
the respective ports.@refill
@menu
-* Gawk Distribution:: What is in the @code{gawk} distribution.
-
-* Unix Installation:: Installing @code{gawk} under various versions
- of Unix.
-
-* VMS Installation:: Installing @code{gawk} on VMS.
-
-* MS-DOS Installation:: Installing @code{gawk} on MS-DOS.
-
-* Atari Installation:: Installing @code{gawk} on the Atari ST.
+* Gawk Distribution:: What is in the @code{gawk} distribution.
+* Unix Installation:: Installing @code{gawk} under various versions
+ of Unix.
+* VMS Installation:: Installing @code{gawk} on VMS.
+* MS-DOS Installation:: Installing @code{gawk} on MS-DOS.
+* Atari Installation:: Installing @code{gawk} on the Atari ST.
@end menu
@node Gawk Distribution, Unix Installation, Installation, Installation
@section The @code{gawk} Distribution
-@ifinfo
This section first describes how to get and extract the @code{gawk}
distribution, and then discusses what is in the various files and
subdirectories.
-@end ifinfo
@menu
-* Extracting:: How to get and extract the distribution.
-
-* Distribution contents:: What is in the distribution.
+* Extracting:: How to get and extract the distribution.
+* Distribution contents:: What is in the distribution.
@end menu
@node Extracting, Distribution contents, Gawk Distribution, Gawk Distribution
-@subsection Getting The @code{gawk} Distribution
+@subsection Getting the @code{gawk} Distribution
@cindex getting gawk
@cindex anonymous ftp
@@ -8393,7 +8774,7 @@ Like all GNU software, it will be archived at other well known systems,
from which it will be possible to use some sort of anonymous @code{uucp} to
obtain the distribution as well.
-Once you have the distribution (for example, @file{gawk-2.13.3.tar.Z}), first
+Once you have the distribution (for example, @file{gawk-2.14.0.tar.Z}), first
use @code{uncompress} to expand the file, and then use @code{tar} to extract it.
@code{uncompress} usually has a link named @code{zcat}, which causes it
to decompress the file to the standard output. You can use the following
@@ -8401,16 +8782,16 @@ pipeline to produce the @code{gawk} distribution:
@example
# Under System V, add 'o' to the tar flags
-zcat gawk-2.13.3.tar.Z | tar -xvpf -
+zcat gawk-2.14.0.tar.Z | tar -xvpf -
@end example
@noindent
-This will create a directory named @file{gawk-2.13} in the current
+This will create a directory named @file{gawk-2.14} in the current
directory.
-The distribution file name is of the form @file{gawk-2.13.@var{n}.tar.Z}.
+The distribution file name is of the form @file{gawk-2.14.@var{n}.tar.Z}.
The @var{n} represents a @dfn{patchlevel}, meaning that minor bugs have
-been fixed in the major release. The current patchlevel is 3, but when
+been fixed in the major release. The current patchlevel is 0, but when
retrieving distributions, you should get the version with the highest
patchlevel.@refill
@@ -8419,17 +8800,18 @@ for getting and extracting the @code{gawk} distribution. You should consult
a local expert.
@node Distribution contents, , Extracting, Gawk Distribution
-@subsection Contents Of The @code{gawk} Distribution
+@subsection Contents of the @code{gawk} Distribution
@code{gawk} has a number of C source files, documentation files,
subdirectories and files related to the configuration process
-(@pxref{Unix Installation}), and several subdirectories related to
-different, non-Unix, operating systems.@refill
+(@pxref{Unix Installation, ,Compiling and Installing @code{gawk} on Unix}),
+and several subdirectories related to different, non-Unix,
+operating systems.@refill
@table @asis
-@item The C and YACC source files
-The various @samp{.c}, @samp{.y}, and @samp{.h} files are the actual
-@code{gawk} source code.
+@item various @samp{.c}, @samp{.y}, and @samp{.h} files
+
+The C and YACC source files are the actual @code{gawk} source code.
@end table
@table @file
@@ -8448,7 +8830,7 @@ have successfully run the test suite.
@item ACKNOWLEDGMENT
A list of the people who contributed major parts of the code or documentation.
-@item CHANGES
+@item NEWS
A list of changes to @code{gawk} since the last release or patch.
@item COPYING
@@ -8482,28 +8864,27 @@ It should be processed with @TeX{} to produce a printed manual, and
with @code{makeinfo} to produce the Info file.@refill
@end iftex
-@item Makefile-dist
+@item Makefile.in
@itemx config
-@itemx config.h-dist
+@itemx config.in
@itemx configure
@itemx missing
-@itemx mkconf
@itemx mungeconf
These files and subdirectories are used when configuring @code{gawk}
for various Unix systems. They are explained in detail in
-@ref{Unix Installation}.@refill
+@ref{Unix Installation, ,Compiling and Installing @code{gawk} on Unix}.@refill
@item atari
Files needed for building @code{gawk} on an Atari ST.
-@xref{Atari Installation}, for details.
+@xref{Atari Installation, ,Installing @code{gawk} on the Atari ST}, for details.
@item pc
Files needed for building @code{gawk} under MS-DOS.
-@xref{MS-DOS Installation}, for details.
+@xref{MS-DOS Installation, ,Installing @code{gawk} on MS-DOS}, for details.
@item vms
-Files needed for building @code{gawk} under VAX VMS.
-@xref{VMS Installation}, for details.
+Files needed for building @code{gawk} under VMS.
+@xref{VMS Installation, ,Compiling Installing and Running @code{gawk} on VMS}, for details.
@item test
Many interesting @code{awk} programs, provided as a test suite for
@@ -8515,25 +8896,28 @@ be confident of a successful port.@refill
@end table
@node Unix Installation, VMS Installation, Gawk Distribution, Installation
-@section Compiling And Installing @code{gawk} on Unix
+@section Compiling and Installing @code{gawk} on Unix
-@menu
-* Quick Installation:: Compiling @code{gawk} on a supported Unix version.
-
-* Configuration Philosophy:: How it's all supposed to work.
+Often, you can compile and install @code{gawk} by typing only two
+commands. However, if you do not use a supported system, you may need
+to configure @code{gawk} for your system yourself.
-* New Configurations:: What to do if there is no supplied configuration
- for your system.
+@menu
+* Quick Installation:: Compiling @code{gawk} on a
+ supported Unix version.
+* Configuration Philosophy:: How it's all supposed to work.
+* New Configurations:: What to do if there is no supplied
+ configuration for your system.
@end menu
@node Quick Installation, Configuration Philosophy, Unix Installation, Unix Installation
-@subsection Compiling @code{gawk} For A Supported Unix Version
+@subsection Compiling @code{gawk} for a Supported Unix Version
@cindex installation, unix
After you have extracted the @code{gawk} distribution, @code{cd}
-to @file{gawk-2.13}. Look in the @file{config} subdirectory for a
+to @file{gawk-2.14}. Look in the @file{config} subdirectory for a
file that matches your hardware/software combination. In general,
-only the software is relevant, for example @code{sunos41} is used
+only the software is relevant; for example @code{sunos41} is used
for SunOS 4.1, on both Sun 3 and Sun 4 hardware.@refill
If you find such a file, run the command:
@@ -8568,12 +8952,12 @@ C language and the Unix operating system.)
The source code for @code{gawk} generally attempts to adhere to industry
standards wherever possible. This means that @code{gawk} uses library
-routines that are specified by the @sc{ANSI} C standard and by the @sc{POSIX}
-operating system interface standard. When using an @sc{ANSI} C compiler,
+routines that are specified by the @sc{ansi} C standard and by the @sc{posix}
+operating system interface standard. When using an @sc{ansi} C compiler,
function prototypes are provided to help improve the compile-time checking.
-Many older Unix systems do not support all of either the @sc{ANSI} or the
-@sc{POSIX} standards. The @file{missing} subdirectory in the @code{gawk}
+Many older Unix systems do not support all of either the @sc{ansi} or the
+@sc{posix} standards. The @file{missing} subdirectory in the @code{gawk}
distribution contains replacement versions of those subroutines that are
most likely to be missing.
@@ -8597,7 +8981,7 @@ Conditionally compiled code in the other source files relies on the
other definitions in the @file{config.h} file.
Besides creating @file{config.h}, @code{configure} produces a @file{Makefile}
-from @file{Makefile-dist}. There are a number of lines in @file{Makefile-dist}
+from @file{Makefile.in}. There are a number of lines in @file{Makefile.in}
that are system or feature specific. For example, there is line that begins
with @samp{##MAKE_ALLOCA_C##}. This is normally a comment line, since
it starts with @samp{#}. If a configuration file has @samp{MAKE_ALLOCA_C}
@@ -8606,14 +8990,8 @@ from the beginning of the line. This will enable the rules in the
@file{Makefile} that use a C version of @samp{alloca}. There are several
similar features that work in this fashion.@refill
-The file @file{mkconf} is a link to @file{configure}. This name for
-the program is left over from an earlier patchlevel of @code{gawk} 2.13.
-For the next release of @code{gawk}, the distribution will comply fully
-with the GNU standards for software distributions. @file{Makefile-dist}
-will be renamed @file{Makefile.in}, and @file{mkconf} will go away.@refill
-
@node New Configurations, , Configuration Philosophy, Unix Installation
-@subsection Configuring @code{gawk} For A New System
+@subsection Configuring @code{gawk} for a New System
(This section is of interest only if you know something about using the
C language and the Unix operating system, and if you have to install
@@ -8621,9 +8999,10 @@ C language and the Unix operating system, and if you have to install
If you are a C or Unix novice, get help from a local expert.)
If you need to configure @code{gawk} for a Unix system that is not
-supported in the distribution, first see @ref{Configuration Philosophy}.
-Then, copy @file{config.h-dist} to @file{config.h}, and copy
-@file{Makefile-dist} to @file{Makefile}.@refill
+supported in the distribution, first see
+@ref{Configuration Philosophy, ,The Configuration Process}.
+Then, copy @file{config.in} to @file{config.h}, and copy
+@file{Makefile.in} to @file{Makefile}.@refill
Next, edit both files. Both files are liberally commented, and the
necessary changes should be straightforward.
@@ -8648,25 +9027,24 @@ luck whatsoever, please report your system type, and the steps you took.
Once you do have a working configuration, please send it to the maintainers
so that support for your system can be added to the official release.
-@xref{Bugs}, for information on how to report problems in configuring
-@code{gawk}. You may also use the same mechanisms for sending in new
-configurations.@refill
+@xref{Bugs, ,Reporting Problems and Bugs}, for information on how to report
+problems in configuring @code{gawk}. You may also use the same mechanisms
+for sending in new configurations.@refill
@node VMS Installation, MS-DOS Installation, Unix Installation, Installation
@section Compiling, Installing, and Running @code{gawk} on VMS
@c based on material from
-@c Pat Rankin <gawk.rankin@eql.caltech.edu>
+@c Pat Rankin <rankin@eql.caltech.edu>
@cindex installation, vms
This section describes how to compile and install @code{gawk} under VMS.
@menu
-* VMS Compilation:: How to compile @code{gawk} under VMS.
-
-* VMS Installation Details:: How to install @code{gawk} under VMS.
-
-* VMS Running:: How to run @code{gawk} under VMS.
+* VMS Compilation:: How to compile @code{gawk} under VMS.
+* VMS Installation Details:: How to install @code{gawk} under VMS.
+* VMS Running:: How to run @code{gawk} under VMS.
+* VMS POSIX:: Alternate instructions for VMS POSIX.
@end menu
@node VMS Compilation, VMS Installation Details, VMS Installation, VMS Installation
@@ -8677,16 +9055,16 @@ will issue all the necessary @code{CC} and @code{LINK} commands, and there is
also a @file{Makefile} for use with the @code{MMS} utility. From the source
directory, use either
-@example
+@smallexample
$ @@[.VMS]VMSBUILD.COM
-@end example
+@end smallexample
@noindent
or
-@example
+@smallexample
$ MMS/DESCRIPTION=[.VMS]DECSRIP.MMS GAWK
-@end example
+@end smallexample
Depending upon which C compiler you are using, follow one of the sets
of instructions in this table:
@@ -8694,10 +9072,10 @@ of instructions in this table:
@table @asis
@item VAX C V3.x
Use either @file{vmsbuild.com} or @file{descrip.mms} as is. These use
-@code{CC/OPTIMIZE=NOLINE}, which is essential for version 3.0.
+@code{CC/OPTIMIZE=NOLINE}, which is essential for Version 3.0.
@item VAX C V2.x
-You must have version 2.3 or 2.4; older ones won't work. Edit either
+You must have Version 2.3 or 2.4; older ones won't work. Edit either
@file{vmsbuild.com} or @file{descrip.mms} according to the comments in them.
For @file{vmsbuild.com}, this just entails removing two @samp{!} delimiters.
Also edit @file{config.h} (which is a copy of file @file{[.config]vms-conf.h})
@@ -8708,11 +9086,14 @@ and comment out or delete the two lines @samp{#define __STDC__ 0} and
Edit @file{vmsbuild.com} or @file{descrip.mms}; the changes are different
from those for VAX C V2.x, but equally straightforward. No changes to
@file{config.h} should be needed.
+
+@item DEC C
+Edit @file{vmsbuild.com} or @file{descrip.mms} according to their comments.
+No changes to @file{config.h} should be needed.
@end table
-@code{gawk} 2.13 has been tested under VMS V5.3 and V5.4-2 using VAX C V3.2,
-V3.1, and V2.3 and also GNU C V1.39. It should work without modifications for
-VMS V4.6 and up.
+@code{gawk} 2.14 has been tested under VAX/VMS 5.5-1 using VAX C V3.2,
+GNU C 1.40 and 2.3. It should work without modifications for VMS V4.6 and up.
@node VMS Installation Details, VMS Running, VMS Compilation, VMS Installation
@subsection Installing @code{gawk} on VMS
@@ -8720,9 +9101,9 @@ VMS V4.6 and up.
To install @code{gawk}, all you need is a ``foreign'' command, which is
a @code{DCL} symbol whose value begins with a dollar sign.
-@example
+@smallexample
$ GAWK :== $device:[directory]GAWK
-@end example
+@end smallexample
@noindent
(Substitute the actual location of @code{gawk.exe} for
@@ -8735,14 +9116,16 @@ to run @code{gawk}.@refill
Optionally, the help entry can be loaded into a VMS help library:
-@example
+@smallexample
$ LIBRARY/HELP SYS$HELP:HELPLIB [.VMS]GAWK.HLP
-@end example
+@end smallexample
@noindent
(You may want to substitute a site-specific help library rather than
the standard VMS library @samp{HELPLIB}.) After loading the help text,
+@c this is so tiny, but `should' be smallexample for consistency sake...
+@c I didn't because it was so short. --mew 29jan1992
@example
$ HELP GAWK
@end example
@@ -8761,7 +9144,7 @@ then @code{gawk} appends the suffix @samp{.awk} to the filename and the
file search will be re-tried. If @samp{AWK_LIBRARY} is not defined, that
portion of the file search will fail benignly.@refill
-@node VMS Running, , VMS Installation Details, VMS Installation
+@node VMS Running, VMS POSIX, VMS Installation Details, VMS Installation
@subsection Running @code{gawk} on VMS
Command line parsing and quoting conventions are significantly different
@@ -8771,10 +9154,10 @@ should run correctly.
Here are a couple of trivial tests:
-@example
+@smallexample
$ gawk -- "BEGIN @{print ""Hello, World!""@}"
$ gawk -"W" version ! could also be -"W version" or "-W version"
-@end example
+@end smallexample
@noindent
Note that upper-case and mixed-case text must be quoted.
@@ -8795,6 +9178,37 @@ of @samp{AWKPATH} is a comma-separated list of directory specifications.
When defining it, the value should be quoted so that it retains a single
translation, and not a multi-translation @code{RMS} searchlist.
+@node VMS POSIX, , VMS Running, VMS Installation
+@subsection Building and using @code{gawk} under VMS POSIX
+
+Ignore the instructions above, although @file{vms/gawk.hlp} should still
+be made available in a help library. Make sure that the two scripts,
+@file{configure} and @file{mungeconf}, are executable; use @samp{chmod +x}
+on them if necessary. Then execute the following commands:
+
+@smallexample
+$ POSIX
+psx> configure vms-posix
+psx> make awktab.c gawk
+@end smallexample
+
+@noindent
+The first command will construct files @file{config.h} and @file{Makefile}
+out of templates. The second command will compile and link @code{gawk}.
+Due to a @code{make} bug in VMS POSIX V1.0 and V1.1,
+the file @file{awktab.c} must be given as an explicit target or it will
+not be built and the final link step will fail. Ignore the warning
+@samp{"Could not find lib m in lib list"}; it is harmless, caused by the
+explicit use of @samp{-lm} as a linker option which is not needed
+under VMS POSIX. Under V1.1 (but not V1.0) a problem with the @code{yacc}
+skeleton @file{/etc/yyparse.c} will cause a compiler warning for
+@file{awktab.c}, followed by a linker warning about compilation warnings
+in the resulting object module. These warnings can be ignored.@refill
+
+Once built, @code{gawk} will work like any other shell utility. Unlike
+the normal VMS port of @code{gawk}, no special command line manipulation is
+needed in the VMS POSIX environment.
+
@node MS-DOS Installation, Atari Installation, VMS Installation, Installation
@section Installing @code{gawk} on MS-DOS
@@ -8803,9 +9217,8 @@ The first step is to get all the files in the @code{gawk} distribution
onto your PC. Move all the files from the @file{pc} directory into
the main directory where the other files are. Edit the file
@file{make.bat} so that it will be an acceptable MS-DOS batch file.
-This means making sure that all lines are terminated with ASCII
-Carriage Return and Line Feed characters. Rename the Unix file
-@file{awk.tab.c} to @file{awktab.c}, since MS-DOS has file naming
+This means making sure that all lines are terminated with the ASCII
+carriage return and line feed characters.
restrictions.
@code{gawk} has only been compiled with version 5.1 of the Microsoft
@@ -8831,10 +9244,10 @@ models (STe, TT) as well.
In order to use @code{gawk}, you need to have a shell, either text or
graphics, that does not map all the characters of a command line to
upper case. Maintaining case distinction in option flags is very
-important (@pxref{Command Line}). Popular shells like @code{gulam}
-or @code{gemini} will work, as will newer versions of @code{desktop}.
-Support for I/O redirection is necessary to make it easy to import
-@code{awk} programs from other environments. Pipes are nice to have,
+important (@pxref{Command Line, ,Invoking @code{awk}}). Popular shells
+like @code{gulam} or @code{gemini} will work, as will newer versions of
+@code{desktop}. Support for I/O redirection is necessary to make it easy
+to import @code{awk} programs from other environments. Pipes are nice to have,
but not vital.
If you have received an executable version of @code{gawk}, place it,
@@ -8845,13 +9258,14 @@ While executing, @code{gawk} creates a number of temporary files.
or @code{TMPDIR}, in that order. If either one is found, its value
is assumed to be a directory for temporary files. This directory
must exist, and if you can spare the memory, it is a good idea to
-put it on a @sc{RAM} drive. If neither @code{TEMP} nor @code{TMPDIR}
+put it on a @sc{ram} drive. If neither @code{TEMP} nor @code{TMPDIR}
are found, then @code{gawk} uses the current directory for its
temporary files.
The ST version of @code{gawk} searches for its program files as
-described in @ref{AWKPATH Variable}. On the ST, the default value for
-@code{AWKPATH} is @code{@w{".,c:\lib\awk,c:\gnu\lib\awk"}}.
+described in @ref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable}.
+On the ST, the default value for the @code{AWKPATH} variable is
+@code{@w{".,c:\lib\awk,c:\gnu\lib\awk"}}.
The search path can be modified by explicitly setting @code{AWKPATH} to
whatever you wish. Note that colons cannot be used on the ST to separate
elements in the @code{AWKPATH} variable, since they have another, reserved,
@@ -8892,7 +9306,7 @@ strings have to be doubled in order to get literal backslashes.
The initial port of @code{gawk} to the ST was done with @code{gcc}.
If you wish to recompile @code{gawk} from scratch, you will need to use
-a compiler that accepts @sc{ANSI} standard C (such as @code{gcc}, Turbo C,
+a compiler that accepts @sc{ansi} standard C (such as @code{gcc}, Turbo C,
or Prospero C). If @code{sizeof(int) != @w{sizeof(int *)}}, the correctness
of the generated code depends heavily on the fact that all function calls
have function prototypes in the current scope. If your compiler does
@@ -8941,11 +9355,13 @@ This appendix provides a brief summary of the @code{gawk} command line and the
therefore terse, but complete.
@menu
-* Command Line Summary:: Recapitulation of the command line.
-* Language Summary:: A terse review of the language.
-* Variables/Fields:: Variables, fields, and arrays.
-* Rules Summary:: Patterns and Actions, and their component parts.
-* Functions Summary:: Defining and calling functions.
+* Command Line Summary:: Recapitulation of the command line.
+* Language Summary:: A terse review of the language.
+* Variables/Fields:: Variables, fields, and arrays.
+* Rules Summary:: Patterns and Actions, and their
+ component parts.
+* Functions Summary:: Defining and calling functions.
+* Historical Features:: Some undocumented but supported ``features''.
@end menu
@node Command Line Summary, Language Summary, Gawk Summary, Gawk Summary
@@ -8981,8 +9397,8 @@ Specifies compatibility mode, in which @code{gawk} extensions are turned
off.
@item -W posix
-Specifies @sc{POSIX} compatibility mode, in which @code{gawk} extensions
-are turned off, and additional restrictions apply.
+Specifies @sc{posix} compatibility mode, in which @code{gawk} extensions
+are turned off and additional restrictions apply.
@item -W version
Print version information for this particular copy of @code{gawk} on the error
@@ -8999,11 +9415,11 @@ Give warnings about dubious or non-portable @code{awk} constructs.
@item --
Signal the end of options. This is useful to allow further arguments to the
@code{awk} program itself to start with a @samp{-}. This is mainly for
-consistency with the argument parsing conventions of @sc{POSIX}.
+consistency with the argument parsing conventions of @sc{posix}.
@end table
Any other options are flagged as invalid, but are otherwise ignored.
-@xref{Command Line}, for more details.
+@xref{Command Line, ,Invoking @code{awk}}, for more details.
@node Language Summary, Variables/Fields, Command Line Summary, Gawk Summary
@appendixsec Language Summary
@@ -9026,14 +9442,17 @@ order they are specified. This is useful for building libraries of
@code{awk} functions, without having to include them in each new
@code{awk} program that uses them. To use a library function in a file
from a program typed in on the command line, specify @samp{-f /dev/tty};
-then type your program, and end it with a @kbd{Control-d}. @xref{Command Line}.
+then type your program, and end it with a @kbd{Control-d}.
+@xref{Command Line, ,Invoking @code{awk}}.@refill
The environment variable @code{AWKPATH} specifies a search path to use
-when finding source files named with the @samp{-f} option. The default path
+when finding source files named with the @samp{-f} option. The default
+path, which is
@samp{.:/usr/lib/awk:/usr/local/lib/awk} is used if @code{AWKPATH} is not set.
If a file name given to the @samp{-f} option contains a @samp{/} character,
-no path search is performed. @xref{AWKPATH Variable}, for a full description
-of the @code{AWKPATH} environment variable.@refill
+no path search is performed.
+@xref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable},
+for a full description of the @code{AWKPATH} environment variable.@refill
@code{gawk} compiles the program into an internal form, and then proceeds to
read each file named in the @code{ARGV} array. If there are no files named
@@ -9059,10 +9478,10 @@ may be simulated. There are several predefined variables that
@code{awk} sets as a program runs; these are summarized below.
@menu
-* Fields Summary:: Input field splitting.
-* Built-in Summary:: @code{awk}'s built-in variables.
-* Arrays Summary:: Using arrays.
-* Data Type Summary:: Values in @code{awk} are numbers or strings.
+* Fields Summary:: Input field splitting.
+* Built-in Summary:: @code{awk}'s built-in variables.
+* Arrays Summary:: Using arrays.
+* Data Type Summary:: Values in @code{awk} are numbers or strings.
@end menu
@node Fields Summary, Built-in Summary, Variables/Fields, Variables/Fields
@@ -9074,8 +9493,8 @@ separator. If @code{FS} is a single character, fields are separated by
that character. Otherwise, @code{FS} is expected to be a full regular
expression. In the special case that @code{FS} is a single blank,
fields are separated by runs of blanks and/or tabs. Note that the value
-of @code{IGNORECASE} (@pxref{Case-sensitivity}) also affects how fields
-are split when @code{FS} is a regular expression.
+of @code{IGNORECASE} (@pxref{Case-sensitivity, ,Case-sensitivity in Matching})
+also affects how fields are split when @code{FS} is a regular expression.@refill
Each field in the input line may be referenced by its position, @code{$1},
@code{$2}, and so on. @code{$0} is the whole line. The value of a field may
@@ -9097,8 +9516,8 @@ intervening fields with the null string as their value, and causes the
value of @code{$0} to be recomputed, with the fields being separated by
the value of @code{OFS}.@refill
-@xref{Reading Files}, for a full description of the way @code{awk} defines
-and uses fields.
+@xref{Reading Files, ,Reading Input Files}, for a full description of the
+way @code{awk} defines and uses fields.
@node Built-in Summary, Arrays Summary, Fields Summary, Variables/Fields
@appendixsubsec Built-in Variables
@@ -9187,7 +9606,7 @@ The string used to separate multiple subscripts in array elements, by
default @code{"\034"}.
@end table
-@xref{Built-in Variables}.
+@xref{Built-in Variables}, for more information.
@node Arrays Summary, Data Type Summary, Built-in Summary, Variables/Fields
@appendixsubsec Arrays
@@ -9195,7 +9614,8 @@ default @code{"\034"}.
Arrays are subscripted with an expression between square brackets
(@samp{[} and @samp{]}). Array subscripts are @emph{always} strings;
numbers are converted to strings as necessary, following the standard
-conversion rules (@pxref{Conversion}).@refill
+conversion rules
+(@pxref{Conversion, ,Conversion of Strings and Numbers}).@refill
If you use multiple expressions separated by commas inside the square
brackets, then the array subscript is a string consisting of the
@@ -9206,24 +9626,23 @@ The special operator @code{in} may be used in an @code{if} or
@code{while} statement to see if an array has an index consisting of a
particular value.
-@group
@example
if (val in array)
print array[val]
@end example
-@end group
If the array has multiple subscripts, use @code{(i, j, @dots{}) in array}
to test for existence of an element.
The @code{in} construct may also be used in a @code{for} loop to iterate
-over all the elements of an array. @xref{Scanning an Array}.
+over all the elements of an array.
+@xref{Scanning an Array, ,Scanning all Elements of an Array}.@refill
An element may be deleted from an array using the @code{delete} statement.
-@xref{Arrays}, for more detailed information.
+@xref{Arrays, ,Arrays in @code{awk}}, for more detailed information.
-@node Data Type Summary, , Arrays Summary, Variables/Fields
+@node Data Type Summary, , Arrays Summary, Variables/Fields
@appendixsubsec Data Types
The value of an @code{awk} expression is always either a number
@@ -9236,7 +9655,8 @@ numeral, it converts to 0.
Certain contexts (such as concatenation) require string values.
They convert numbers to strings by effectively printing them
-with @code{sprintf}. @xref{Conversion}, for the details.
+with @code{sprintf}.
+@xref{Conversion, ,Conversion of Strings and Numbers}, for the details.@refill
To force conversion of a string value to a number, simply add 0
to it. If the value you start with is already a number, this
@@ -9255,16 +9675,16 @@ empty, string). In contexts where a number is required, this is
equivalent to 0.
@xref{Variables}, for more information on variable naming and initialization;
-@pxref{Conversion}, for more information on how variable values are
-interpreted.@refill
+@pxref{Conversion, ,Conversion of Strings and Numbers}, for more information
+on how variable values are interpreted.@refill
@node Rules Summary, Functions Summary, Variables/Fields, Gawk Summary
@appendixsec Patterns and Actions
@menu
-* Pattern Summary:: Quick overview of patterns.
-* Regexp Summary:: Quick overview of regular expressions.
-* Actions Summary:: Quick overview of actions.
+* Pattern Summary:: Quick overview of patterns.
+* Regexp Summary:: Quick overview of regular expressions.
+* Actions Summary:: Quick overview of actions.
@end menu
An @code{awk} program is mostly composed of rules, each consisting of a
@@ -9291,11 +9711,12 @@ ending it with a @samp{\}, in which case the newline is ignored.@refill
Multiple statements may be put on one line by separating them with a @samp{;}.
This applies to both the statements within the action part of a rule (the
-usual case), and to the rule statements themselves.
+usual case), and to the rule statements.
-@xref{Comments}, for information on @code{awk}'s commenting convention;
-@pxref{Statements/Lines}, for a description of the line continuation
-mechanism in @code{awk}.
+@xref{Comments, ,Comments in @code{awk} Programs}, for information on
+@code{awk}'s commenting convention;
+@pxref{Statements/Lines, ,@code{awk} Statements versus Lines}, for a
+description of the line continuation mechanism in @code{awk}.@refill
@node Pattern Summary, Regexp Summary, Rules Summary, Rules Summary
@appendixsubsec Patterns
@@ -9401,12 +9822,13 @@ matches zero or one @var{r}'s.
matches @var{r} (grouping).
@end table
-@xref{Regexp}, for a more detailed explanation of regular expressions.
+@xref{Regexp, ,Regular Expressions as Patterns}, for a more detailed
+explanation of regular expressions.
The escape sequences allowed in string constants are also valid in
-regular expressions (@pxref{Constants}).
+regular expressions (@pxref{Constants, ,Constant Expressions}).
-@node Actions Summary, , Regexp Summary, Rules Summary
+@node Actions Summary, , Regexp Summary, Rules Summary
@appendixsubsec Actions
Action statements are enclosed in braces, @samp{@{} and @samp{@}}.
@@ -9429,7 +9851,7 @@ and input/output statements available are patterned after those in C.
@node Operator Summary, Control Flow Summary, Actions Summary, Actions Summary
@appendixsubsubsec Operators
-The operators in @code{awk}, in order of increasing precedence, are
+The operators in @code{awk}, in order of increasing precedence, are:
@table @code
@item = += -= *= /= %= ^=
@@ -9468,7 +9890,7 @@ Unary plus, unary minus, and logical negation.
@item ^
Exponentiation (@samp{**} may also be used, and @samp{**=} for the assignment
-operator, but they are not specified in the @sc{POSIX} standard).
+operator, but they are not specified in the @sc{posix} standard).
@item ++ --
Increment and decrement, both prefix and postfix.
@@ -9477,8 +9899,10 @@ Increment and decrement, both prefix and postfix.
Field reference.
@end table
-@xref{Expressions}, for a full description of all the operators listed
-above. @xref{Fields}, for a description of the field reference operator.
+@xref{Expressions, ,Expressions as Action Statements}, for a full
+description of all the operators listed above.
+@xref{Fields, ,Examining Fields}, for a description of the field
+reference operator.@refill
@node Control Flow Summary, I/O Summary, Operator Summary, Actions Summary
@appendixsubsubsec Control Statements
@@ -9498,8 +9922,8 @@ exit @r{[} @var{expression} @r{]}
@{ @var{statements} @}
@end example
-@xref{Statements}, for a full description of all the control statements
-listed above.
+@xref{Statements, ,Control Statements in Actions}, for a full description
+of all the control statements listed above.
@node I/O Summary, Printf Summary, Control Flow Summary, Actions Summary
@appendixsubsubsec I/O Statements
@@ -9525,6 +9949,13 @@ processing starts over with the first pattern in the @code{awk} program.
If the end of the input data is reached, the @code{END} rule(s), if any,
are executed.
+@item next file
+Stop processing the current input file. The next input record read comes
+from the next input file. @code{FILENAME} is updated, @code{FNR} is set to 1,
+and processing starts over with the first pattern in the @code{awk} program.
+If the end of the input data is reached, the @code{END} rule(s), if any,
+are executed.
+
@item print
Prints the current record.
@@ -9547,10 +9978,11 @@ and @samp{| @var{command}} writes on a pipe. In a similar fashion,
@samp{@var{command} | getline} pipes input into @code{getline}.
@code{getline} returns 0 on end of file, and @minus{}1 on an error.@refill
-@xref{Getline}, for a full description of the @code{getline} statement.
-@xref{Printing}, for a full description of @code{print} and
-@code{printf}. Finally, @pxref{Next Statement}, for a description of
-how the @code{next} statement works.@refill
+@xref{Getline, ,Explicit Input with @code{getline}}, for a full description
+of the @code{getline} statement.
+@xref{Printing, ,Printing Output}, for a full description of @code{print} and
+@code{printf}. Finally, @pxref{Next Statement, ,The @code{next} Statement},
+for a description of how the @code{next} statement works.@refill
@node Printf Summary, Special File Summary, I/O Summary, Actions Summary
@appendixsubsubsec @code{printf} Summary
@@ -9617,7 +10049,8 @@ Either or both of the @var{width} and @var{prec} values may be specified
as @samp{*}. In that case, the particular value is taken from the argument
list.
-@xref{Printf}, for examples and for a more detailed description.
+@xref{Printf, ,Using @code{printf} Statements for Fancier Printing}, for
+examples and for a more detailed description.
@node Special File Summary, Numeric Functions Summary, Printf Summary, Actions Summary
@appendixsubsubsec Special File Names
@@ -9645,8 +10078,8 @@ The file denoted by the open file descriptor @var{n}.
@noindent
These file names may also be used on the command line to name data files.
-@xref{Special Files}, for a longer description that provides the motivation
-for this feature.
+@xref{Special Files, ,Standard I/O Streams}, for a longer description that
+provides the motivation for this feature.
@node Numeric Functions Summary, String Functions Summary, Special File Summary, Actions Summary
@appendixsubsubsec Numeric Functions
@@ -9745,19 +10178,20 @@ The following two functions are available for getting the current
time of day, and for formatting time stamps.
@table @code
-@item systime
+@item systime()
returns the current time of day as the number of seconds since a particular
-epoch (Midnight, January 1, 1970 UTC, on @sc{POSIX} systems).
+epoch (Midnight, January 1, 1970 @sc{utc}, on @sc{posix} systems).
@item strftime(@var{format}, @var{timestamp})
formats @var{timestamp} according to the specification in @var{format}.
The current time of day is used if no @var{timestamp} is supplied.
-@xref{Time Functions}, for the details on the conversion specifiers
-that @code{strftime} accepts.
+@xref{Time Functions, ,Functions for Dealing with Time Stamps}, for the
+details on the conversion specifiers that @code{strftime} accepts.@refill
@end table
@iftex
-@xref{Built-in}, for a description of all of @code{awk}'s built-in functions.
+@xref{Built-in, ,Built-in Functions}, for a description of all of
+@code{awk}'s built-in functions.
@end iftex
@node String Constants Summary, , Time Functions Summary, Actions Summary
@@ -9794,11 +10228,11 @@ Vertical tab.
@item \x@var{hex digits}
The character represented by the string of hexadecimal digits following
-the @samp{\x}. As in @sc{ANSI} C, all following hexadecimal digits are
+the @samp{\x}. As in @sc{ansi} C, all following hexadecimal digits are
considered part of the escape sequence. (This feature should tell us
something about language design by committee.) E.g., @code{"\x1B"} is a
string containing the ASCII ESC (escape) character. (The @samp{\x}
-escape sequence is not in @sc{POSIX} @code{awk}.)
+escape sequence is not in @sc{posix} @code{awk}.)
@item \@var{ddd}
The character represented by the 1-, 2-, or 3-digit sequence of octal
@@ -9813,9 +10247,9 @@ The escape sequences may also be used inside constant regular expressions
(e.g., the regexp @code{@w{/[@ \t\f\n\r\v]/}} matches whitespace
characters).@refill
-@xref{Constants}.
+@xref{Constants, ,Constant Expressions}.
-@node Functions Summary, , Rules Summary, Gawk Summary
+@node Functions Summary, Historical Features, Rules Summary, Gawk Summary
@appendixsec Functions
Functions in @code{awk} are defined as follows:
@@ -9837,11 +10271,41 @@ immediately follow the function name, without any intervening white space.
This is to avoid a syntactic ambiguity with the concatenation operator.
The word @code{func} may be used in place of @code{function} (but not in
-@sc{POSIX} @code{awk}).
+@sc{posix} @code{awk}).
Use the @code{return} statement to return a value from a function.
-@xref{User-defined}, for a more complete description.
+@xref{User-defined, ,User-defined Functions}, for a more complete description.
+
+@node Historical Features, , Functions Summary, Gawk Summary
+@appendixsec Historical Features
+
+There are two features of historical @code{awk} implementations that
+@code{gawk} supports. First, it is possible to call the @code{length}
+built-in function not only with no arguments, but even without parentheses!
+
+@example
+a = length
+@end example
+
+@noindent
+is the same as either of
+
+@example
+a = length()
+a = length($0)
+@end example
+
+@noindent
+This feature is marked as ``deprecated'' in the @sc{posix} standard, and
+@code{gawk} will issuge a warning about its use if @samp{-W lint} is
+specified on the command line.
+
+The other feature is the use of the @code{continue} statement outside the
+body of a @code{while}, @code{for}, or @code{do} loop. Traditional
+@code{awk} implementations have treated such usage as equivalent to the
+@code{next} statement. @code{gawk} will support this usage if @samp{-W posix}
+has not been specified.
@node Sample Program, Bugs, Gawk Summary, Top
@appendix Sample Program
@@ -9870,9 +10334,10 @@ END @{
The first thing to notice about this program is that it has two rules. The
first rule, because it has an empty pattern, is executed on every line of
-the input. It uses @code{awk}'s field-accessing mechanism (@pxref{Fields})
-to pick out the individual words from the line, and the built-in variable
-@code{NF} (@pxref{Built-in Variables}) to know how many fields are available.
+the input. It uses @code{awk}'s field-accessing mechanism
+(@pxref{Fields, ,Examining Fields}) to pick out the individual words from
+the line, and the built-in variable @code{NF} (@pxref{Built-in Variables})
+to know how many fields are available.@refill
For each input word, an element of the array @code{freq} is incremented to
reflect that the word has been seen an additional time.@refill
@@ -9907,7 +10372,7 @@ table of how frequently each word occurs.@refill
The way to solve these problems is to use some of the more advanced
features of the @code{awk} language. First, we use @code{tolower} to remove
case distinctions. Next, we use @code{gsub} to remove punctuation
-characters. Finally, we the system @code{sort} utility to process the
+characters. Finally, we use the system @code{sort} utility to process the
output of the @code{awk} script. First, here is the new version of
the program:@refill
@@ -9943,7 +10408,7 @@ frequency table, which is not ordered.
The @code{awk} script's output is then sorted by the @code{sort} command and
printed on the terminal. The options given to @code{sort} in this example
-specify to sort by the second field of each input line (skipping one field),
+specify to sort using the second field of each input line (skipping one field),
that the sort keys should be treated as numeric quantities (otherwise
@samp{15} would come before @samp{5}), and that the sorting should be done
in descending (reverse) order.@refill
@@ -9978,9 +10443,11 @@ function that I have written.}
@appendix Reporting Problems and Bugs
@c This chapter stolen shamelessly from the GNU m4 manual.
+@c This chapter has been unshamelessly altered to emulate changes made to
+@c make.texi from whence it was originally shamelessly stolen! :-} --mew
If you have problems with @code{gawk} or think that you have found a bug,
-please report it to the developers; we don't promise to do anything
+please report it to the developers; we cannot promise to do anything
but we might well want to fix it.
Before reporting a bug, make sure you have actually found a real bug.
@@ -10001,16 +10468,43 @@ Once you have a precise problem, send e-mail to (Internet)
version number of @code{gawk} you are using. You can get this information
with the command @samp{gawk -W version '@{@}' /dev/null}.
You should send carbon copies of your mail to David Trueman at
-@samp{david@@cs.dal.ca}, and to Arnold Robbins, at
+@samp{david@@cs.dal.ca}, and to Arnold Robbins, who can be reached at
@samp{arnold@@skeeve.atl.ga.us}. David is most likely to fix code
problems, while Arnold is most likely to fix documentation problems.@refill
Non-bug suggestions are always welcome as well. If you have questions
about things that are unclear in the documentation or are just obscure
-features, ask Arnold Robbins; he'll be happy to help you out (but no
-promises). You can send him electronic mail at the Internet address
+features, ask Arnold Robbins; he will try to help you out, although he
+may not have the time to fix the problem. You can send him electronic mail at the Internet address
above.
+If you find bugs in one of the non-Unix ports of @code{gawk}, please send
+an electronic mail message to the person who maintains that port. They
+are listed below, and also in the @file{README} file in the @code{gawk}
+distribution. Information in the @code{README} file should be considered
+authoritative if it conflicts with this manual.
+
+The people maintaining the non-Unix ports of @code{gawk} are:
+
+@table @asis
+@item MS-DOS
+The port to MS-DOS is maintained by Scott Deifik.
+His electronic mail address is @samp{scottd@@amgen.com}.
+
+@item VMS
+The port to VAX VMS is maintained by Pat Rankin.
+His electronic mail address is @samp{rankin@@eql.caltech.edu}.
+
+@item Atari ST
+The port to the Atari ST is maintained by Michal Jaegermann.
+His electronic mail address is @samp{ntomczak@@vm.ucs.ualberta.ca}.
+
+@end table
+
+If your bug is also reproducible under Unix, please send copies of your
+report to the general GNU bug list, as well as to Arnold Robbins and David
+Trueman, at the addresses listed above.
+
@node Notes, Glossary, Bugs, Top
@appendix Implementation Notes
@@ -10019,20 +10513,18 @@ maintainers of @code{gawk}. Everything in it applies specifically to
@code{gawk}, and not to other implementations.
@menu
-* Compatibility Mode:: How to disable certain @code{gawk} extensions.
-
-* Future Extensions:: New features we may implement soon.
-
-* Improvements:: Suggestions for improvements by volunteers.
+* Compatibility Mode:: How to disable certain @code{gawk} extensions.
+* Future Extensions:: New features we may implement soon.
+* Improvements:: Suggestions for improvements by volunteers.
@end menu
@node Compatibility Mode, Future Extensions, Notes, Notes
-@appendixsec Downwards Compatibility and Debugging
+@appendixsec Downward Compatibility and Debugging
-@xref{POSIX/GNU}, for a summary of the GNU extensions to the @code{awk}
-language and program. All of these features can be turned off by
-invoking @code{gawk} with the @samp{-W compat} option, or with the
-@samp{-W posix} option.@refill
+@xref{POSIX/GNU, ,Extensions in @code{gawk} not in POSIX @code{awk}},
+for a summary of the GNU extensions to the @code{awk} language and program.
+All of these features can be turned off by invoking @code{gawk} with the
+@samp{-W compat} option, or with the @samp{-W posix} option.@refill
If @code{gawk} is compiled for debugging with @samp{-DDEBUG}, then there
is one more option available on the command line:
@@ -10061,30 +10553,46 @@ The meaning of @code{RS} may be generalized along the lines of @code{FS}.
Changes made in @code{gawk} to the array @code{ENVIRON} may be
propagated to subprocesses run by @code{gawk}.
-@item Data bases
+@item Databases
It may be possible to map a GDBM/NDBM/SDBM file into an @code{awk} array.
@item Single-character fields
The null string, @code{""}, as a field separator, will cause field
-splitting and the split function to separate individual characters.
+splitting and the @code{split} function to separate individual characters.
Thus, @code{split(a, "abcd", "")} would yield @code{a[1] == "a"},
@code{a[2] == "b"}, and so on.
@item More @code{lint} warnings
There are more things that could be checked for portability.
+@item @code{ARGIND} variable to indicate the position in @code{ARGV}
+It would occasionally be useful to know which element in @code{ARGV}
+is the current file being processed. It is not sufficient to simply
+loop through @code{ARGV} comparing each element to @code{FILENAME},
+particularly if a program makes more than one pass through a single
+data file. Initially @code{ARGIND} would be a read-only variable.
+That is, @code{gawk} would set it for you as each file is processed, but
+would ignore any changes that your program made to it.@refill
+@ignore
+Would it make sense down the road to nuke `next file' in favor of
+semantics that would make this work?
+
+ function nextfile() { ARGIND++ ; next }
+@end ignore
+
@item @code{RECLEN} variable for fixed length records
Along with @code{FIELDWIDTHS}, this would speed up the processing of
fixed-length records.
+@item @code{RT} variable to hold the record terminator
+It is occasionally useful to have access to the actual string of
+characters that matched the @code{RS} variable. The @code{RT}
+variable would hold these characters.
+
@item A @code{restart} keyword
After modifying @code{$0}, @code{restart} would restart the pattern
matching loop, without reading a new record from the input.
-@item A @code{nextfile} keyword
-This would be like @code{next}, but instead of abandoning the current
-input record, it would abandon the entire current input file.
-
@item A @samp{|&} redirection
The @samp{|&} redirection, in place of @samp{|}, would open a two-way
pipeline for communication with a sub-process (via @code{getline} and
@@ -10094,6 +10602,17 @@ pipeline for communication with a sub-process (via @code{getline} and
The effects of the @code{IGNORECASE} variable may be generalized to
all string comparisons, and not just regular expression operations.
+@item A way to mix command line source code and library files
+There may be a new option that would make it possible to easily use library
+functions from a program entered on the command line.
+@c probably a @samp{-s} option...
+
+@item GNU-style long options
+We will add GNU-style long options
+to @code{gawk} for compatibility with other GNU programs.
+(For example, @samp{--field-separator=:} would be equivalent to
+@samp{-F:}.)@refill
+
@c this is @emph{very} long term --- not worth including right now.
@ignore
@item The C Comma Operator
@@ -10104,7 +10623,7 @@ result is thrown away. The value of the full expression is the value of
@end ignore
@end table
-@node Improvements,, Future Extensions, Notes
+@node Improvements, , Future Extensions, Notes
@appendixsec Suggestions for Improvements
Here are some projects that would-be @code{gawk} hackers might like to take
@@ -10117,9 +10636,9 @@ project.@refill
@item
Compilation of @code{awk} programs: @code{gawk} uses a Bison (YACC-like)
parser to convert the script given it into a syntax tree; the syntax
-tree is then executed by a simple recursive evaluator. Both of these
-steps incur a lot of overhead, since parsing can be slow and the recursive
-evaluator performs many procedure calls to do even the simplest things.@refill
+tree is then executed by a simple recursive evaluator. This method incurs
+a lot of overhead, since the recursive evaluator performs many procedure
+calls to do even the simplest things.@refill
It should be possible for @code{gawk} to convert the script's parse tree
into a C program which the user would then compile, using the normal
@@ -10153,7 +10672,7 @@ See the @file{FUTURES} file for more ideas. Contact us if you would
seriously like to tackle any of the items listed there.
@end enumerate
-@node Glossary, Index , Notes, Top
+@node Glossary, Index, Notes, Top
@appendix Glossary
@table @asis
@@ -10161,7 +10680,7 @@ seriously like to tackle any of the items listed there.
A series of @code{awk} statements attached to a rule. If the rule's
pattern matches an input record, the @code{awk} language executes the
rule's action. Actions are always enclosed in curly braces.
-@xref{Actions}.@refill
+@xref{Actions, ,Overview of Actions}.@refill
@item Amazing @code{awk} Assembler
Henry Spencer at the University of Toronto wrote a retargetable assembler
@@ -10170,14 +10689,14 @@ machine descriptions for several 8-bit microcomputers. It is distributed
with @code{gawk} (as part of the test suite) and is a good example of a
program that would have been better written in another language.@refill
-@item @sc{ANSI}
+@item @sc{ansi}
The American National Standards Institute. This organization produces
many standards, among them the standard for the C programming language.
@item Assignment
An @code{awk} expression that changes the value of some @code{awk}
variable or data object. An object that you can assign to is called an
-@dfn{lvalue}. @xref{Assignment Ops}.@refill
+@dfn{lvalue}. @xref{Assignment Ops, ,Assignment Expressions}.@refill
@item @code{awk} Language
The language in which @code{awk} programs are written.
@@ -10195,13 +10714,14 @@ Another name for an @code{awk} program.
The @code{awk} language provides built-in functions that perform various
numerical, time stamp related, and string computations. Examples are
@code{sqrt} (for the square root of a number) and @code{substr} (for a
-substring of a string). @xref{Built-in}.@refill
+substring of a string). @xref{Built-in, ,Built-in Functions}.@refill
@item Built-in Variable
-The variables @code{ARGC}, @code{ARGV}, @code{CONVFMT}, @code{FIELDWIDTHS},
+@code{ARGC}, @code{ARGV}, @code{CONVFMT}, @code{FIELDWIDTHS},
@code{ENVIRON}, @code{FILENAME}, @code{FNR}, @code{FS}, @code{IGNORECASE},
@code{NF}, @code{NR}, @code{OFMT}, @code{OFS}, @code{ORS},
-@code{RLENGTH}, @code{RSTART}, @code{RS}, and @code{SUBSEP}, have special
+@code{RLENGTH}, @code{RSTART}, @code{RS}, and @code{SUBSEP},
+are the variables that have special
meaning to @code{awk}. Changing some of them affects @code{awk}'s running
environment. @xref{Built-in Variables}.@refill
@@ -10216,17 +10736,18 @@ points out similarities between @code{awk} and C when appropriate.@refill
@item CHEM
A preprocessor for @code{pic} that reads descriptions of molecules
and produces @code{pic} input for drawing them. It was written by
-Brian Kernighan, and is distributed with the @code{gawk} test suite.@refill
+Brian Kernighan, and is available from @code{netlib@@research.att.com}.@refill
@item Compound Statement
A series of @code{awk} statements, enclosed in curly braces. Compound
-statements may be nested. @xref{Statements}.@refill
+statements may be nested.
+@xref{Statements, ,Control Statements in Actions}.@refill
@item Concatenation
Concatenating two strings means sticking them together, one after another,
giving a new string. For example, the string @samp{foo} concatenated with
the string @samp{bar} gives the string @samp{foobar}.
-@xref{Concatenation}.@refill
+@xref{Concatenation, ,String Concatenation}.@refill
@item Conditional Expression
An expression using the @samp{?:} ternary operator, such as
@@ -10234,19 +10755,19 @@ An expression using the @samp{?:} ternary operator, such as
@var{expr1} is evaluated; if the result is true, the value of the whole
expression is the value of @var{expr2} otherwise the value is
@var{expr3}. In either case, only one of @var{expr2} and @var{expr3}
-is evaluated. @xref{Conditional Exp}.@refill
+is evaluated. @xref{Conditional Exp, ,Conditional Expressions}.@refill
@item Constant Regular Expression
A constant regular expression is a regular expression written within
slashes, such as @samp{/foo/}. This regular expression is chosen
when you write the @code{awk} program, and cannot be changed doing
-its execution. @xref{Regexp Usage}.
+its execution. @xref{Regexp Usage, ,How to Use Regular Expressions}.
@item Comparison Expression
A relation that is either true or false, such as @code{(a < b)}.
Comparison expressions are used in @code{if}, @code{while}, and @code{for}
statements, and in patterns to select which input records to process.
-@xref{Comparison Ops}.@refill
+@xref{Comparison Ops, ,Comparison Expressions}.@refill
@item Curly Braces
The characters @samp{@{} and @samp{@}}. Curly braces are used in
@@ -10255,44 +10776,53 @@ bodies.@refill
@item Data Objects
These are numbers and strings of characters. Numbers are converted into
-strings and vice versa, as needed. @xref{Conversion}.@refill
+strings and vice versa, as needed.
+@xref{Conversion, ,Conversion of Strings and Numbers}.@refill
@item Dynamic Regular Expression
A dynamic regular expression is a regular expression written as an
ordinary expression. It could be a string constant, such as
@code{"foo"}, but it may also be an expression whose value may vary.
-@xref{Regexp Usage}.
+@xref{Regexp Usage, ,How to Use Regular Expressions}.
@item Escape Sequences
A special sequence of characters used for describing nonprinting
characters, such as @samp{\n} for newline, or @samp{\033} for the ASCII
-ESC (escape) character. @xref{Constants}.
+ESC (escape) character. @xref{Constants, ,Constant Expressions}.
@item Field
When @code{awk} reads an input record, it splits the record into pieces
separated by whitespace (or by a separator regexp which you can
change by setting the built-in variable @code{FS}). Such pieces are
called fields. If the pieces are of fixed length, you can use the built-in
-variable @code{FIELDWIDTHS} to describe their lengths. @xref{Records}.@refill
+variable @code{FIELDWIDTHS} to describe their lengths.
+@xref{Records, ,How Input is Split into Records}.@refill
@item Format
Format strings are used to control the appearance of output in the
@code{printf} statement. Also, data conversions from numbers to strings
are controlled by the format string contained in the built-in variable
-@code{CONVFMT}. @xref{Control Letters}.@refill
+@code{CONVFMT}. @xref{Control Letters, ,Format-Control Letters}.@refill
@item Function
A specialized group of statements often used to encapsulate general
or program-specific tasks. @code{awk} has a number of built-in
-functions, and also allows you to define your own. @xref{Built-in};
-also @pxref{User-defined}.
+functions, and also allows you to define your own.
+@xref{Built-in, ,Built-in Functions}.
+Also, see @ref{User-defined, ,User-defined Functions}.@refill
@item @code{gawk}
The GNU implementation of @code{awk}.
+@item GNU
+``GNU's not Unix''. An on-going project of the Free Software Foundation
+to create a complete, freely distributable, @sc{posix}-compliant computing
+environment.
+
@item Input Record
A single chunk of data read in by @code{awk}. Usually, an @code{awk} input
-record consists of one line of text. @xref{Records}.@refill
+record consists of one line of text.
+@xref{Records, ,How Input is Split into Records}.@refill
@item Keyword
In the @code{awk} language, a keyword is a word that has special
@@ -10332,8 +10862,8 @@ tested. If the condition is satisfied, the pattern is said to @dfn{match}
the input record. A typical pattern might compare the input record against
a regular expression. @xref{Patterns}.@refill
-@item @sc{POSIX}
-The name for a series of standards being developed by the @sc{IEEE}
+@item @sc{posix}
+The name for a series of standards being developed by the @sc{ieee}
that specify a Portable Operating System interface. The ``IX'' denotes
the Unix heritage of these standards. The main standard of interest for
@code{awk} users is P1003.2, the Command Language and Utilities standard.
@@ -10354,7 +10884,8 @@ stream, or output to other than the standard output stream.
You can redirect the output of the @code{print} and @code{printf} statements
to a file or a system command, using the @samp{>}, @samp{>>}, and @samp{|}
operators. You can redirect input to the @code{getline} statement using
-the @samp{<} and @samp{|} operators. @xref{Redirection}.@refill
+the @samp{<} and @samp{|} operators.
+@xref{Redirection, ,Redirecting Output of @code{print} and @code{printf}}.@refill
@item Regular Expression
See ``regexp.''
@@ -10365,7 +10896,7 @@ set of strings, possibly an infinite set. For example, the regexp
@samp{R.*xp} matches any string starting with the letter @samp{R}
and ending with the letters @samp{xp}. In @code{awk}, regexps are
used in patterns and in conditional expressions. Regexps may contain
-escape sequences. @xref{Regexp}.@refill
+escape sequences. @xref{Regexp, ,Regular Expressions as Patterns}.@refill
@item Rule
A segment of an @code{awk} program, that specifies how to process single
@@ -10377,12 +10908,12 @@ Otherwise, the rule does nothing for that input record.@refill
@item Side Effect
A side effect occurs when an expression has an effect aside from merely
producing a value. Assignment expressions, increment expressions and
-function calls have side effects. @xref{Assignment Ops}.
+function calls have side effects. @xref{Assignment Ops, ,Assignment Expressions}.
@item Special File
A file name interpreted internally by @code{gawk}, instead of being handed
directly to the underlying operating system. For example, @file{/dev/stdin}.
-@xref{Special Files}.
+@xref{Special Files, ,Standard I/O Streams}.
@item Stream Editor
A program that reads records from an input stream and processes them one
@@ -10395,14 +10926,14 @@ user.@refill
A datum consisting of a sequence of characters, such as @samp{I am a
string}. Constant strings are written with double-quotes in the
@code{awk} language, and may contain escape sequences.
-@xref{Constants}.
+@xref{Constants, ,Constant Expressions}.
@item Whitespace
A sequence of blank or tab characters occurring inside an input record or a
string.@refill
@end table
-@node Index, , Glossary, Top
+@node Index, , Glossary, Top
@unnumbered Index
@printindex cp
@@ -10412,7 +10943,7 @@ string.@refill
Unresolved Issues:
------------------
-1. From: ntomczak@vm.ucs.ualberta.ca
+1. From: ntomczak@vm.ucs.ualberta.ca (Michal Jaegermann)
Examples of usage tend to suggest that /../ and ".." delimiters
can be used for regular expressions, even if definition is consistently
using /../. I am not sure what the real rules are and in particular
@@ -10425,3 +10956,53 @@ Unresolved Issues:
'/\(/ { print }'
You may also try to use "\(" as an argument to match() to see what
will happen.
+
+2. From ADR.
+
+ The posix (and original Unix!) notion of awk values as both number
+ and string values needs to be put into the manual. This involves
+ major and minor rewrites of most of the manual, but should help in
+ clarifying many of the weirder points of the language.
+
+3. From ADR.
+
+ The manual should be reorganized. Expressions should be introduced
+ early, building up to regexps as expressions, and from there to their
+ use as patterns and then in actions. Built-in vars should come earlier
+ in the manual too. The 'expert info' sections marked with comments
+ should get their own sections or subsections with nodes and titles.
+ The manual should be gone over thoroughly for indexing.
+
+4. From ADR.
+
+ Robert J. Chassell points out that awk programs should have some indication
+ of how to use them. It would be useful to perhaps have a "programming
+ style" section of the manual that would include this and other tips.
+
+5. From ADR in response to moraes@uunet.ca
+ (This would make the beginnings of a good "puzzles" section...)
+
+ Date: Mon, 2 Dec 91 10:08:05 EST
+ From: gatech!cc!arnold (Arnold Robbins)
+ To: cs.dal.ca!david, uunet.ca!moraes
+ Subject: redirecting to /dev/stderr
+ Cc: skeeve!arnold, boeing.com!brennan, research.att.com!bwk
+
+ In 2.13.3 the following program no longer dumps core:
+
+ BEGIN { print "hello" > /dev/stderr ; exit(1) }
+
+ Instead, it creates a file named `0' with the word `hello' in it. AWK
+ semantics strikes again. The meaning of the statement is
+
+ print "hello" > (($0 ~ /dev/) stderr)
+
+ /dev/ tests $0 for the pattern `dev'. This yields a 0. The variable stderr,
+ having never been used, has a null string in it. The concatenation yields
+ a string value of "0" which is used as the file name. Sigh.
+
+ I think with some more time I can come up with a decent fix, but it will
+ probably only print a diagnostic with -Wlint.
+
+ Arnold
+
diff --git a/getopt.c b/getopt.c
new file mode 100644
index 00000000..bbf345c3
--- /dev/null
+++ b/getopt.c
@@ -0,0 +1,662 @@
+/* Getopt for GNU.
+ NOTE: getopt is now part of the C library, so if you don't know what
+ "Keep this file name-space clean" means, talk to roland@gnu.ai.mit.edu
+ before changing it!
+
+ Copyright (C) 1987, 88, 89, 90, 91, 1992 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Library General Public License as published
+ by the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#ifdef GAWK
+#include "config.h"
+#endif
+
+#include <stdio.h>
+
+/* This needs to come after some library #include
+ to get __GNU_LIBRARY__ defined. */
+#ifdef __GNU_LIBRARY__
+#include <stdlib.h>
+#include <string.h>
+#endif /* GNU C library. */
+
+
+#ifndef __STDC__
+#define const
+#endif
+
+/* If GETOPT_COMPAT is defined, `+' as well as `--' can introduce a
+ long-named option. Because this is not POSIX.2 compliant, it is
+ being phased out. */
+#define GETOPT_COMPAT
+
+/* This version of `getopt' appears to the caller like standard Unix `getopt'
+ but it behaves differently for the user, since it allows the user
+ to intersperse the options with the other arguments.
+
+ As `getopt' works, it permutes the elements of ARGV so that,
+ when it is done, all the options precede everything else. Thus
+ all application programs are extended to handle flexible argument order.
+
+ Setting the environment variable POSIXLY_CORRECT disables permutation.
+ Then the behavior is completely standard.
+
+ GNU application programs can use a third alternative mode in which
+ they can distinguish the relative order of options and other arguments. */
+
+#include "getopt.h"
+
+/* For communication from `getopt' to the caller.
+ When `getopt' finds an option that takes an argument,
+ the argument value is returned here.
+ Also, when `ordering' is RETURN_IN_ORDER,
+ each non-option ARGV-element is returned here. */
+
+char *optarg = 0;
+
+/* Index in ARGV of the next element to be scanned.
+ This is used for communication to and from the caller
+ and for communication between successive calls to `getopt'.
+
+ On entry to `getopt', zero means this is the first call; initialize.
+
+ When `getopt' returns EOF, this is the index of the first of the
+ non-option elements that the caller should itself scan.
+
+ Otherwise, `optind' communicates from one call to the next
+ how much of ARGV has been scanned so far. */
+
+int optind = 0;
+
+/* The next char to be scanned in the option-element
+ in which the last option character we returned was found.
+ This allows us to pick up the scan where we left off.
+
+ If this is zero, or a null string, it means resume the scan
+ by advancing to the next ARGV-element. */
+
+static char *nextchar;
+
+/* Callers store zero here to inhibit the error message
+ for unrecognized options. */
+
+int opterr = 1;
+
+/* Describe how to deal with options that follow non-option ARGV-elements.
+
+ If the caller did not specify anything,
+ the default is REQUIRE_ORDER if the environment variable
+ POSIXLY_CORRECT is defined, PERMUTE otherwise.
+
+ REQUIRE_ORDER means don't recognize them as options;
+ stop option processing when the first non-option is seen.
+ This is what Unix does.
+ This mode of operation is selected by either setting the environment
+ variable POSIXLY_CORRECT, or using `+' as the first character
+ of the list of option characters.
+
+ PERMUTE is the default. We permute the contents of ARGV as we scan,
+ so that eventually all the non-options are at the end. This allows options
+ to be given in any order, even with programs that were not written to
+ expect this.
+
+ RETURN_IN_ORDER is an option available to programs that were written
+ to expect options and other ARGV-elements in any order and that care about
+ the ordering of the two. We describe each non-option ARGV-element
+ as if it were the argument of an option with character code 1.
+ Using `-' as the first character of the list of option characters
+ selects this mode of operation.
+
+ The special argument `--' forces an end of option-scanning regardless
+ of the value of `ordering'. In the case of RETURN_IN_ORDER, only
+ `--' can cause `getopt' to return EOF with `optind' != ARGC. */
+
+static enum
+{
+ REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
+} ordering;
+
+#ifdef __GNU_LIBRARY__
+#include <string.h>
+#define my_index strchr
+#define my_bcopy(src, dst, n) memcpy ((dst), (src), (n))
+#else
+
+/* Avoid depending on library functions or files
+ whose names are inconsistent. */
+
+char *getenv ();
+
+static char *
+my_index (string, chr)
+ char *string;
+ int chr;
+{
+ while (*string)
+ {
+ if (*string == chr)
+ return string;
+ string++;
+ }
+ return 0;
+}
+
+static void
+my_bcopy (from, to, size)
+ char *from, *to;
+ int size;
+{
+ int i;
+ for (i = 0; i < size; i++)
+ to[i] = from[i];
+}
+#endif /* GNU C library. */
+
+/* Handle permutation of arguments. */
+
+/* Describe the part of ARGV that contains non-options that have
+ been skipped. `first_nonopt' is the index in ARGV of the first of them;
+ `last_nonopt' is the index after the last of them. */
+
+static int first_nonopt;
+static int last_nonopt;
+
+/* Exchange two adjacent subsequences of ARGV.
+ One subsequence is elements [first_nonopt,last_nonopt)
+ which contains all the non-options that have been skipped so far.
+ The other is elements [last_nonopt,optind), which contains all
+ the options processed since those non-options were skipped.
+
+ `first_nonopt' and `last_nonopt' are relocated so that they describe
+ the new indices of the non-options in ARGV after they are moved. */
+
+static void
+exchange (argv)
+ char **argv;
+{
+ int nonopts_size = (last_nonopt - first_nonopt) * sizeof (char *);
+ char **temp = (char **) malloc (nonopts_size);
+
+ /* Interchange the two blocks of data in ARGV. */
+
+ my_bcopy (&argv[first_nonopt], temp, nonopts_size);
+ my_bcopy (&argv[last_nonopt], &argv[first_nonopt],
+ (optind - last_nonopt) * sizeof (char *));
+ my_bcopy (temp, &argv[first_nonopt + optind - last_nonopt], nonopts_size);
+
+ free(temp);
+
+ /* Update records for the slots the non-options now occupy. */
+
+ first_nonopt += (optind - last_nonopt);
+ last_nonopt = optind;
+}
+
+/* Scan elements of ARGV (whose length is ARGC) for option characters
+ given in OPTSTRING.
+
+ If an element of ARGV starts with '-', and is not exactly "-" or "--",
+ then it is an option element. The characters of this element
+ (aside from the initial '-') are option characters. If `getopt'
+ is called repeatedly, it returns successively each of the option characters
+ from each of the option elements.
+
+ If `getopt' finds another option character, it returns that character,
+ updating `optind' and `nextchar' so that the next call to `getopt' can
+ resume the scan with the following option character or ARGV-element.
+
+ If there are no more option characters, `getopt' returns `EOF'.
+ Then `optind' is the index in ARGV of the first ARGV-element
+ that is not an option. (The ARGV-elements have been permuted
+ so that those that are not options now come last.)
+
+ OPTSTRING is a string containing the legitimate option characters.
+ If an option character is seen that is not listed in OPTSTRING,
+ return '?' after printing an error message. If you set `opterr' to
+ zero, the error message is suppressed but we still return '?'.
+
+ If a char in OPTSTRING is followed by a colon, that means it wants an arg,
+ so the following text in the same ARGV-element, or the text of the following
+ ARGV-element, is returned in `optarg'. Two colons mean an option that
+ wants an optional arg; if there is text in the current ARGV-element,
+ it is returned in `optarg', otherwise `optarg' is set to zero.
+
+ If OPTSTRING starts with `-' or `+', it requests different methods of
+ handling the non-option ARGV-elements.
+ See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above.
+
+ Long-named options begin with `--' instead of `-'.
+ Their names may be abbreviated as long as the abbreviation is unique
+ or is an exact match for some defined option. If they have an
+ argument, it follows the option name in the same ARGV-element, separated
+ from the option name by a `=', or else the in next ARGV-element.
+ When `getopt' finds a long-named option, it returns 0 if that option's
+ `flag' field is nonzero, the value of the option's `val' field
+ if the `flag' field is zero.
+
+ The elements of ARGV aren't really const, because we permute them.
+ But we pretend they're const in the prototype to be compatible
+ with other systems.
+
+ LONGOPTS is a vector of `struct option' terminated by an
+ element containing a name which is zero.
+
+ LONGIND returns the index in LONGOPT of the long-named option found.
+ It is only valid when a long-named option has been found by the most
+ recent call.
+
+ If LONG_ONLY is nonzero, '-' as well as '--' can introduce
+ long-named options. */
+
+int
+_getopt_internal (argc, argv, optstring, longopts, longind, long_only)
+ int argc;
+ char *const *argv;
+ const char *optstring;
+ const struct option *longopts;
+ int *longind;
+ int long_only;
+{
+ int option_index;
+
+ optarg = 0;
+
+ /* Initialize the internal data when the first call is made.
+ Start processing options with ARGV-element 1 (since ARGV-element 0
+ is the program name); the sequence of previously skipped
+ non-option ARGV-elements is empty. */
+
+ if (optind == 0)
+ {
+ first_nonopt = last_nonopt = optind = 1;
+
+ nextchar = NULL;
+
+ /* Determine how to handle the ordering of options and nonoptions. */
+
+ if (optstring[0] == '-')
+ {
+ ordering = RETURN_IN_ORDER;
+ ++optstring;
+ }
+ else if (optstring[0] == '+')
+ {
+ ordering = REQUIRE_ORDER;
+ ++optstring;
+ }
+ else if (getenv ("POSIXLY_CORRECT") != NULL)
+ ordering = REQUIRE_ORDER;
+ else
+ ordering = PERMUTE;
+ }
+
+ if (nextchar == NULL || *nextchar == '\0')
+ {
+ if (ordering == PERMUTE)
+ {
+ /* If we have just processed some options following some non-options,
+ exchange them so that the options come first. */
+
+ if (first_nonopt != last_nonopt && last_nonopt != optind)
+ exchange ((char **) argv);
+ else if (last_nonopt != optind)
+ first_nonopt = optind;
+
+ /* Now skip any additional non-options
+ and extend the range of non-options previously skipped. */
+
+ while (optind < argc
+ && (argv[optind][0] != '-' || argv[optind][1] == '\0')
+#ifdef GETOPT_COMPAT
+ && (longopts == NULL
+ || argv[optind][0] != '+' || argv[optind][1] == '\0')
+#endif /* GETOPT_COMPAT */
+ )
+ optind++;
+ last_nonopt = optind;
+ }
+
+ /* Special ARGV-element `--' means premature end of options.
+ Skip it like a null option,
+ then exchange with previous non-options as if it were an option,
+ then skip everything else like a non-option. */
+
+ if (optind != argc && !strcmp (argv[optind], "--"))
+ {
+ optind++;
+
+ if (first_nonopt != last_nonopt && last_nonopt != optind)
+ exchange ((char **) argv);
+ else if (first_nonopt == last_nonopt)
+ first_nonopt = optind;
+ last_nonopt = argc;
+
+ optind = argc;
+ }
+
+ /* If we have done all the ARGV-elements, stop the scan
+ and back over any non-options that we skipped and permuted. */
+
+ if (optind == argc)
+ {
+ /* Set the next-arg-index to point at the non-options
+ that we previously skipped, so the caller will digest them. */
+ if (first_nonopt != last_nonopt)
+ optind = first_nonopt;
+ return EOF;
+ }
+
+ /* If we have come to a non-option and did not permute it,
+ either stop the scan or describe it to the caller and pass it by. */
+
+ if ((argv[optind][0] != '-' || argv[optind][1] == '\0')
+#ifdef GETOPT_COMPAT
+ && (longopts == NULL
+ || argv[optind][0] != '+' || argv[optind][1] == '\0')
+#endif /* GETOPT_COMPAT */
+ )
+ {
+ if (ordering == REQUIRE_ORDER)
+ return EOF;
+ optarg = argv[optind++];
+ return 1;
+ }
+
+ /* We have found another option-ARGV-element.
+ Start decoding its characters. */
+
+ nextchar = (argv[optind] + 1
+ + (longopts != NULL && argv[optind][1] == '-'));
+ }
+
+ if (longopts != NULL
+ && ((argv[optind][0] == '-'
+ && (argv[optind][1] == '-' || long_only))
+#ifdef GETOPT_COMPAT
+ || argv[optind][0] == '+'
+#endif /* GETOPT_COMPAT */
+ ))
+ {
+ const struct option *p;
+ char *s = nextchar;
+ int exact = 0;
+ int ambig = 0;
+ const struct option *pfound = NULL;
+ int indfound = 0;
+ extern int strncmp();
+
+ while (*s && *s != '=')
+ s++;
+
+ /* Test all options for either exact match or abbreviated matches. */
+ for (p = longopts, option_index = 0; p->name;
+ p++, option_index++)
+ if (!strncmp (p->name, nextchar, s - nextchar))
+ {
+ if (s - nextchar == strlen (p->name))
+ {
+ /* Exact match found. */
+ pfound = p;
+ indfound = option_index;
+ exact = 1;
+ break;
+ }
+ else if (pfound == NULL)
+ {
+ /* First nonexact match found. */
+ pfound = p;
+ indfound = option_index;
+ }
+ else
+ /* Second nonexact match found. */
+ ambig = 1;
+ }
+
+ if (ambig && !exact)
+ {
+ if (opterr)
+ fprintf (stderr, "%s: option `%s' is ambiguous\n",
+ argv[0], argv[optind]);
+ nextchar += strlen (nextchar);
+ optind++;
+ return '?';
+ }
+
+ if (pfound != NULL)
+ {
+ option_index = indfound;
+ optind++;
+ if (*s)
+ {
+ /* Don't test has_arg with >, because some C compilers don't
+ allow it to be used on enums. */
+ if (pfound->has_arg)
+ optarg = s + 1;
+ else
+ {
+ if (opterr)
+ {
+ if (argv[optind - 1][1] == '-')
+ /* --option */
+ fprintf (stderr,
+ "%s: option `--%s' doesn't allow an argument\n",
+ argv[0], pfound->name);
+ else
+ /* +option or -option */
+ fprintf (stderr,
+ "%s: option `%c%s' doesn't allow an argument\n",
+ argv[0], argv[optind - 1][0], pfound->name);
+ }
+ nextchar += strlen (nextchar);
+ return '?';
+ }
+ }
+ else if (pfound->has_arg == 1)
+ {
+ if (optind < argc)
+ optarg = argv[optind++];
+ else
+ {
+ if (opterr)
+ fprintf (stderr, "%s: option `%s' requires an argument\n",
+ argv[0], argv[optind - 1]);
+ nextchar += strlen (nextchar);
+ return '?';
+ }
+ }
+ nextchar += strlen (nextchar);
+ if (longind != NULL)
+ *longind = option_index;
+ if (pfound->flag)
+ {
+ *(pfound->flag) = pfound->val;
+ return 0;
+ }
+ return pfound->val;
+ }
+ /* Can't find it as a long option. If this is not getopt_long_only,
+ or the option starts with '--' or is not a valid short
+ option, then it's an error.
+ Otherwise interpret it as a short option. */
+ if (!long_only || argv[optind][1] == '-'
+#ifdef GETOPT_COMPAT
+ || argv[optind][0] == '+'
+#endif /* GETOPT_COMPAT */
+ || my_index (optstring, *nextchar) == NULL)
+ {
+ if (opterr)
+ {
+ if (argv[optind][1] == '-')
+ /* --option */
+ fprintf (stderr, "%s: unrecognized option `--%s'\n",
+ argv[0], nextchar);
+ else
+ /* +option or -option */
+ fprintf (stderr, "%s: unrecognized option `%c%s'\n",
+ argv[0], argv[optind][0], nextchar);
+ }
+ nextchar = (char *) "";
+ optind++;
+ return '?';
+ }
+ }
+
+ /* Look at and handle the next option-character. */
+
+ {
+ char c = *nextchar++;
+ char *temp = my_index (optstring, c);
+
+ /* Increment `optind' when we start to process its last character. */
+ if (*nextchar == '\0')
+ ++optind;
+
+ if (temp == NULL || c == ':')
+ {
+ if (opterr)
+ {
+ if (c < 040 || c >= 0177)
+ fprintf (stderr, "%s: unrecognized option, character code 0%o\n",
+ argv[0], c);
+ else
+ fprintf (stderr, "%s: unrecognized option `-%c'\n", argv[0], c);
+ }
+ return '?';
+ }
+ if (temp[1] == ':')
+ {
+ if (temp[2] == ':')
+ {
+ /* This is an option that accepts an argument optionally. */
+ if (*nextchar != '\0')
+ {
+ optarg = nextchar;
+ optind++;
+ }
+ else
+ optarg = 0;
+ nextchar = NULL;
+ }
+ else
+ {
+ /* This is an option that requires an argument. */
+ if (*nextchar != '\0')
+ {
+ optarg = nextchar;
+ /* If we end this ARGV-element by taking the rest as an arg,
+ we must advance to the next element now. */
+ optind++;
+ }
+ else if (optind == argc)
+ {
+ if (opterr)
+ fprintf (stderr, "%s: option `-%c' requires an argument\n",
+ argv[0], c);
+ c = '?';
+ }
+ else
+ /* We already incremented `optind' once;
+ increment it again when taking next ARGV-elt as argument. */
+ optarg = argv[optind++];
+ nextchar = NULL;
+ }
+ }
+ return c;
+ }
+}
+
+int
+getopt (argc, argv, optstring)
+ int argc;
+ char *const *argv;
+ const char *optstring;
+{
+ return _getopt_internal (argc, argv, optstring,
+ (const struct option *) 0,
+ (int *) 0,
+ 0);
+}
+
+#ifdef TEST
+
+/* Compile with -DTEST to make an executable for use in testing
+ the above definition of `getopt'. */
+
+int
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int c;
+ int digit_optind = 0;
+
+ while (1)
+ {
+ int this_option_optind = optind ? optind : 1;
+
+ c = getopt (argc, argv, "abc:d:0123456789");
+ if (c == EOF)
+ break;
+
+ switch (c)
+ {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (digit_optind != 0 && digit_optind != this_option_optind)
+ printf ("digits occur in two different argv-elements.\n");
+ digit_optind = this_option_optind;
+ printf ("option %c\n", c);
+ break;
+
+ case 'a':
+ printf ("option a\n");
+ break;
+
+ case 'b':
+ printf ("option b\n");
+ break;
+
+ case 'c':
+ printf ("option c with value `%s'\n", optarg);
+ break;
+
+ case '?':
+ break;
+
+ default:
+ printf ("?? getopt returned character code 0%o ??\n", c);
+ }
+ }
+
+ if (optind < argc)
+ {
+ printf ("non-option ARGV-elements: ");
+ while (optind < argc)
+ printf ("%s ", argv[optind++]);
+ printf ("\n");
+ }
+
+ exit (0);
+}
+
+#endif /* TEST */
diff --git a/getopt.h b/getopt.h
new file mode 100644
index 00000000..5d4300fa
--- /dev/null
+++ b/getopt.h
@@ -0,0 +1,121 @@
+/* Declarations for getopt.
+ Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Library General Public License as published
+ by the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#ifndef _GETOPT_H
+#define _GETOPT_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* For communication from `getopt' to the caller.
+ When `getopt' finds an option that takes an argument,
+ the argument value is returned here.
+ Also, when `ordering' is RETURN_IN_ORDER,
+ each non-option ARGV-element is returned here. */
+
+extern char *optarg;
+
+/* Index in ARGV of the next element to be scanned.
+ This is used for communication to and from the caller
+ and for communication between successive calls to `getopt'.
+
+ On entry to `getopt', zero means this is the first call; initialize.
+
+ When `getopt' returns EOF, this is the index of the first of the
+ non-option elements that the caller should itself scan.
+
+ Otherwise, `optind' communicates from one call to the next
+ how much of ARGV has been scanned so far. */
+
+extern int optind;
+
+/* Callers store zero here to inhibit the error message `getopt' prints
+ for unrecognized options. */
+
+extern int opterr;
+
+/* Describe the long-named options requested by the application.
+ The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
+ of `struct option' terminated by an element containing a name which is
+ zero.
+
+ The field `has_arg' is:
+ no_argument (or 0) if the option does not take an argument,
+ required_argument (or 1) if the option requires an argument,
+ optional_argument (or 2) if the option takes an optional argument.
+
+ If the field `flag' is not NULL, it points to a variable that is set
+ to the value given in the field `val' when the option is found, but
+ left unchanged if the option is not found.
+
+ To have a long-named option do something other than set an `int' to
+ a compiled-in constant, such as set a value from `optarg', set the
+ option's `flag' field to zero and its `val' field to a nonzero
+ value (the equivalent single-letter option character, if there is
+ one). For long options that have a zero `flag' field, `getopt'
+ returns the contents of the `val' field. */
+
+struct option
+{
+#if __STDC__
+ const char *name;
+#else
+ char *name;
+#endif
+ /* has_arg can't be an enum because some compilers complain about
+ type mismatches in all the code that assumes it is an int. */
+ int has_arg;
+ int *flag;
+ int val;
+};
+
+/* Names for the values of the `has_arg' field of `struct option'. */
+
+enum _argtype
+{
+ no_argument,
+ required_argument,
+ optional_argument
+};
+
+#if __STDC__
+extern int getopt (int argc, char *const *argv, const char *shortopts);
+extern int getopt_long (int argc, char *const *argv, const char *shortopts,
+ const struct option *longopts, int *longind);
+extern int getopt_long_only (int argc, char *const *argv,
+ const char *shortopts,
+ const struct option *longopts, int *longind);
+
+/* Internal only. Users should not call this directly. */
+extern int _getopt_internal (int argc, char *const *argv,
+ const char *shortopts,
+ const struct option *longopts, int *longind,
+ int long_only);
+#else /* not __STDC__ */
+extern int getopt ();
+extern int getopt_long ();
+extern int getopt_long_only ();
+
+extern int _getopt_internal ();
+#endif /* not __STDC__ */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _GETOPT_H */
diff --git a/io.c b/io.c
index 163dc19b..478bd367 100644
--- a/io.c
+++ b/io.c
@@ -3,7 +3,7 @@
*/
/*
- * Copyright (C) 1986, 1988, 1989, 1991 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@@ -35,36 +35,65 @@
#define INVALID_HANDLE (__SMALLEST_VALID_HANDLE - 1)
#endif
-static IOBUF *nextfile P((void));
+#if defined(MSDOS) || defined(atarist)
+#define PIPES_SIMULATED
+#endif
+
+static IOBUF *nextfile P((int skipping));
static int inrec P((IOBUF *iop));
static int iop_close P((IOBUF *iop));
struct redirect *redirect P((NODE *tree, int *errflg));
static void close_one P((void));
static int close_redir P((struct redirect *rp));
-#if (!defined(MSDOS)) && (!defined(atarist))
+#ifndef PIPES_SIMULATED
static int wait_any P((int interesting));
#endif
static IOBUF *gawk_popen P((char *cmd, struct redirect *rp));
static int gawk_pclose P((struct redirect *rp));
static int do_pathopen P((char *file));
+#ifndef MSDOS
+#ifndef _CRAY
+#ifndef VMS
+extern FILE *fdopen P((int, const char *));
+#else /* avoid conflicting prototype */
+extern FILE *fdopen();
+#endif /* VMS */
+#endif /* _CRAY */
+#endif /* MSDOS */
+
static struct redirect *red_head = NULL;
-static IOBUF *curfile = NULL;
extern int output_is_tty;
extern NODE *ARGC_node;
extern NODE *ARGV_node;
extern NODE **fields_arr;
+static jmp_buf filebuf; /* for nextfile() */
+
+void
+do_nextfile()
+{
+ (void) nextfile(1);
+ longjmp(filebuf, 1);
+}
+
static IOBUF *
-nextfile()
+nextfile(skipping)
+int skipping;
{
static int i = 1;
static int files = 0;
- char *arg;
+ NODE *arg;
int fd = INVALID_HANDLE;
static IOBUF *curfile = NULL;
+ if (skipping) {
+ if (curfile != NULL)
+ iop_close(curfile);
+ curfile = NULL;
+ return NULL;
+ }
if (curfile != NULL) {
if (curfile->cnt == EOF)
(void) iop_close(curfile);
@@ -72,20 +101,21 @@ nextfile()
return curfile;
}
for (; i < (int) (ARGC_node->lnode->numbr); i++) {
- arg = (*assoc_lookup(ARGV_node, tmp_number((AWKNUM) i)))->stptr;
- if (*arg == '\0')
+ arg = *assoc_lookup(ARGV_node, tmp_number((AWKNUM) i));
+ if (arg->stptr[0] == '\0')
continue;
- if (!arg_assign(arg)) {
+ arg->stptr[arg->stlen] = '\0';
+ if (!arg_assign(arg->stptr)) {
files++;
- fd = devopen(arg, "r");
+ fd = devopen(arg->stptr, "r");
if (fd == INVALID_HANDLE)
fatal("cannot open file `%s' for reading (%s)",
- arg, strerror(errno));
+ arg->stptr, strerror(errno));
/* NOTREACHED */
/* This is a kludge. */
unref(FILENAME_node->var_value);
FILENAME_node->var_value =
- make_string(arg, strlen(arg));
+ dupnode(arg);
FNR = 0;
i++;
break;
@@ -155,7 +185,13 @@ IOBUF *iop;
ret = 0;
else
#endif
- ret = close(iop->fd);
+ /* Don't close standard files or else crufty code elsewhere will lose */
+ if (iop->fd == fileno(stdin) ||
+ iop->fd == fileno(stdout) ||
+ iop->fd == fileno(stderr))
+ ret = 0;
+ else
+ ret = close(iop->fd);
if (ret == -1)
warning("close of fd %d failed (%s)", iop->fd, strerror(errno));
if (iop->buf)
@@ -170,7 +206,9 @@ do_input()
IOBUF *iop;
extern int exiting;
- while ((iop = nextfile()) != NULL) {
+ if (setjmp(filebuf) != 0) {
+ }
+ while ((iop = nextfile(0)) != NULL) {
if (inrec(iop) == 0)
while (interpret(expression_value) && inrec(iop) == 0)
;
@@ -193,33 +231,52 @@ int *errflg;
char *direction = "to";
char *mode;
int fd;
+ char *what = NULL;
switch (tree->type) {
case Node_redirect_append:
tflag = RED_APPEND;
+ /* FALL THROUGH */
case Node_redirect_output:
outflag = (RED_FILE|RED_WRITE);
tflag |= outflag;
+ if (tree->type == Node_redirect_output)
+ what = ">";
+ else
+ what = ">>";
break;
case Node_redirect_pipe:
tflag = (RED_PIPE|RED_WRITE);
+ what = "|";
break;
case Node_redirect_pipein:
tflag = (RED_PIPE|RED_READ);
+ what = "|";
break;
case Node_redirect_input:
tflag = (RED_FILE|RED_READ);
+ what = "<";
break;
default:
fatal ("invalid tree type %d in redirect()", tree->type);
break;
}
- tmp = force_string(tree_eval(tree->subnode));
+ tmp = tree_eval(tree->subnode);
+ if (do_lint && ! (tmp->flags & STR))
+ warning("expression in `%s' redirection only has numeric value",
+ what);
+ tmp = force_string(tmp);
str = tmp->stptr;
+ if (str == NULL || *str == '\0')
+ fatal("expression for `%s' redirection has null string value",
+ what);
+ if (do_lint
+ && (STREQN(str, "0", tmp->stlen) || STREQN(str, "1", tmp->stlen)))
+ warning("filename `%s' for `%s' redirection may be result of logical expression", str, what);
for (rp = red_head; rp != NULL; rp = rp->next)
if (strlen(rp->value) == tmp->stlen
&& STREQN(rp->value, str, tmp->stlen)
- && ((rp->flag & ~RED_NOBUF) == tflag
+ && ((rp->flag & ~(RED_NOBUF|RED_EOF)) == tflag
|| (outflag
&& (rp->flag & (RED_FILE|RED_WRITE)) == outflag)))
break;
@@ -243,6 +300,11 @@ int *errflg;
red_head = rp;
}
while (rp->fp == NULL && rp->iop == NULL) {
+ if (rp->flag & RED_EOF)
+ /* encountered EOF on file or pipe -- must be cleared
+ * by explicit close() before reading more
+ */
+ return rp;
mode = NULL;
errno = 0;
switch (tree->type) {
@@ -454,13 +516,11 @@ close_io ()
}
/* devopen --- handle /dev/std{in,out,err}, /dev/fd/N, regular files */
-
int
devopen (name, mode)
char *name, *mode;
{
int openfd = INVALID_HANDLE;
- FILE *fdopen ();
char *cp, *ptr;
int flag = 0;
struct stat buf;
@@ -482,10 +542,8 @@ char *name, *mode;
}
#ifdef VMS
- if ((openfd = vms_devopen(name)) >= 0)
+ if ((openfd = vms_devopen(name, flag)) >= 0)
return openfd;
-# define strcmp strcasecmp /* VMS filenames are not case sensitive; */
-# define strncmp strncasecmp /* strncmp() is used by STREQN() below. */
#endif /*VMS*/
if (STREQ(name, "-"))
@@ -493,7 +551,6 @@ char *name, *mode;
else if (STREQN(name, "/dev/", 5) && stat(name, &buf) == -1) {
cp = name + 5;
- /* XXX - first three tests ignore mode */
if (STREQ(cp, "stdin") && (flag & O_RDONLY) == O_RDONLY)
openfd = fileno(stdin);
else if (STREQ(cp, "stdout") && (flag & O_WRONLY) == O_WRONLY)
@@ -502,7 +559,7 @@ char *name, *mode;
openfd = fileno(stderr);
else if (STREQN(cp, "fd/", 3)) {
cp += 3;
- openfd = strtol(cp, &ptr, 10);
+ openfd = (int)strtod(cp, &ptr);
if (openfd <= INVALID_HANDLE || ptr == cp)
openfd = INVALID_HANDLE;
#ifdef VMS
@@ -510,22 +567,18 @@ char *name, *mode;
name = "NL:"; /* "/dev/null" => "NL:" */
} else if (STREQ(cp, "tty")) {
name = "TT:"; /* "/dev/tty" => "TT:" */
-# undef strcmp
-# undef strncmp
#endif /*VMS*/
}
}
- if (openfd != INVALID_HANDLE)
- return openfd;
- else
- return open(name, flag, 0666);
+ if (openfd == INVALID_HANDLE)
+ openfd = open(name, flag, 0666);
+ if (openfd != INVALID_HANDLE && fstat(openfd, &buf) > 0)
+ if ((buf.st_mode & S_IFMT) == S_IFDIR)
+ fatal("file `%s' is a directory", name);
+ return openfd;
}
-#if defined(MSDOS) || defined(atarist)
-#define PIPES_SIMULATED
-#endif
-
#ifndef PIPES_SIMULATED
/* real pipes */
static int
@@ -578,7 +631,12 @@ struct redirect *rp;
int p[2];
register int pid;
- (void) wait_any(0); /* wait for outstanding processes */
+ /* used to wait for any children to synchronize input and output,
+ * but this could cause gawk to hang when it is started in a pipeline
+ * and thus has a child process feeding it input (shell dependant)
+ */
+ /*(void) wait_any(0);*/ /* wait for outstanding processes */
+
if (pipe(p) < 0)
fatal("cannot open pipe \"%s\" (%s)", cmd, strerror(errno));
if ((pid = fork()) == 0) {
@@ -589,6 +647,9 @@ struct redirect *rp;
fatal("dup of pipe failed (%s)", strerror(errno));
if (close(p[0]) == -1 || close(p[1]) == -1)
fatal("close of pipe failed (%s)", strerror(errno));
+ if (close(0) == -1)
+ fatal("close of stdin in child failed (%s)",
+ strerror(errno));
execl("/bin/sh", "sh", "-c", cmd, 0);
_exit(127);
}
@@ -615,7 +676,7 @@ struct redirect *rp;
return (rp->status >> 8) & 0xFF;
}
-#else /* PIPES_SUMULATED */
+#else /* PIPES_SIMULATED */
/* use temporary file rather than pipe */
#ifdef VMS
@@ -640,6 +701,7 @@ struct redirect *rp;
rp->iop->fd = dup(fd); /* kludge to allow close() + pclose() */
rval = iop_close(rp->iop);
+ rp->iop = NULL;
aval = pclose(kludge);
return (rval < 0 ? rval : aval);
}
@@ -670,6 +732,7 @@ struct redirect *rp;
return NULL;
pipes[current].name = name;
pipes[current].command = strdup(cmd);
+ rp->iop = iop_alloc(current);
return (rp->iop = iop_alloc(current));
}
@@ -694,7 +757,7 @@ struct redirect *rp;
}
#endif /* VMS */
-#endif /* PIPES_SUMULATED */
+#endif /* PIPES_SIMULATED */
NODE *
do_getline(tree)
@@ -707,7 +770,7 @@ NODE *tree;
while (cnt == EOF) {
if (tree->rnode == NULL) { /* no redirection */
- iop = nextfile();
+ iop = nextfile(0);
if (iop == NULL) /* end of input */
return tmp_number((AWKNUM) 0.0);
} else {
@@ -717,12 +780,27 @@ NODE *tree;
if (rp == NULL && redir_error) /* failed redirect */
return tmp_number((AWKNUM) -1.0);
iop = rp->iop;
+ if (iop == NULL) /* end of input */
+ return tmp_number((AWKNUM) 0.0);
}
cnt = get_a_record(&s, iop, *RS);
if (cnt == EOF) {
if (rp) {
- (void) iop_close(iop);
- rp->iop = NULL;
+#ifdef PIPES_SIMULATED
+ /*
+ * Don't do iop_close() here if we are
+ * reading from a simulated pipe; otherwise
+ * gawk_close will not remove temporary
+ * files from where we were reading.
+ */
+ if ((rp->flag & (RED_PIPE|RED_READ)) !=
+ (RED_PIPE|RED_READ))
+#endif /* PIPES_SIMULATED */
+ {
+ (void) iop_close(iop);
+ rp->iop = NULL;
+ }
+ rp->flag |= RED_EOF; /* sticky EOF */
return tmp_number((AWKNUM) 0.0);
} else
continue; /* try another file */
@@ -794,7 +872,7 @@ char *file;
return (0);
if (strict)
- return (open (file, 0));
+ return (devopen(file, "r"));
if (first) {
first = 0;
@@ -815,7 +893,7 @@ char *file;
if (strchr(file, '/') != NULL)
#endif /*MSDOS*/
#endif /*VMS*/
- return (devopen (file, "r"));
+ return (devopen(file, "r"));
do {
trypath[0] = '\0';
@@ -839,7 +917,7 @@ char *file;
strcpy (cp, file);
} else
strcpy (trypath, file);
- if ((fd = devopen (trypath, "r")) >= 0)
+ if ((fd = devopen(trypath, "r")) >= 0)
return (fd);
/* no luck, keep going */
diff --git a/iop.c b/iop.c
index 38bd29aa..172e7e88 100644
--- a/iop.c
+++ b/iop.c
@@ -3,7 +3,7 @@
*/
/*
- * Copyright (C) 1986, 1988, 1989, 1991 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@@ -50,12 +50,21 @@ int
optimal_bufsize(fd)
int fd;
{
-#ifdef VMS
-/* don't even bother trying [fstat() fails across DECnet] */
- return BUFSIZ;
-#else
struct stat stb;
+#ifdef VMS
+ /*
+ * These values correspond with the RMS multi-block count used by
+ * vms_open() in vms/vms_misc.c.
+ */
+ if (isatty(fd) > 0)
+ return BUFSIZ;
+ else if (fstat(fd, &stb) < 0)
+ return 8*512; /* conservative in case of DECnet access */
+ else
+ return 24*512;
+
+#else
/*
* System V doesn't have the file system block size in the
* stat structure. So we have to make some sort of reasonable
@@ -81,11 +90,15 @@ int fd;
if (0 == fd || isatty(fd))
#endif /*atarist */
return BUFSIZ;
+#ifndef BLKSIZE_MISSING
+ /* VMS POSIX 1.0: st_blksize is never assigned a value, so zero it */
+ stb.st_blksize = 0;
+#endif
if (fstat(fd, &stb) == -1)
fatal("can't stat fd %d (%s)", fd, strerror(errno));
- if (lseek(fd, 0L, 0) == -1)
+ if (lseek(fd, (off_t)0, 0) == -1)
return DEFBLKSIZE;
- return (stb.st_size < DEFBLKSIZE ? stb.st_size : DEFBLKSIZE);
+ return ((int) (stb.st_size < DEFBLKSIZE ? stb.st_size : DEFBLKSIZE));
#endif /*! TEST */
#endif /*! VMS */
}
@@ -124,20 +137,14 @@ int fd;
* loop can run as a single test.
*/
int
-get_a_record(out, iop, RS)
+get_a_record(out, iop, grRS)
char **out;
IOBUF *iop;
-register int RS;
+register int grRS;
{
register char *bp = iop->off;
char *bufend;
char *start = iop->off; /* beginning of record */
-#ifdef atarist
-#define P_DIFF ptrdiff_t
-#else
-#define P_DIFF size_t
-#endif
- P_DIFF len;
int saw_newline;
char rs;
int eat_whitespace;
@@ -145,12 +152,12 @@ register int RS;
if (iop->cnt == EOF) /* previous read hit EOF */
return EOF;
- if (RS == 0) { /* special case: RS == "" */
+ if (grRS == 0) { /* special case: grRS == "" */
rs = '\n';
eat_whitespace = 0;
saw_newline = 0;
} else
- rs = RS;
+ rs = (char) grRS;
/* set up sentinel */
if (iop->buf) {
@@ -171,9 +178,11 @@ register int RS;
if (bp >= bufend) {
char *oldbuf = NULL;
char *oldsplit = iop->buf + iop->secsiz;
+ long len; /* record length so far */
len = bp - start;
if (len > iop->secsiz) {
+ /* expand secondary buffer */
if (iop->secsiz == -2)
iop->secsiz = 256;
while (len > iop->secsiz)
@@ -184,12 +193,14 @@ register int RS;
bufend = iop->buf + iop->size + iop->secsiz;
*bufend = rs;
}
- if (len) {
+ if (len > 0) {
char *newsplit = iop->buf + iop->secsiz;
if (start < oldsplit) {
- memcpy(newsplit - len, start, oldsplit - start);
- memcpy(newsplit - (bp - oldsplit), oldsplit, bp - oldsplit);
+ memcpy(newsplit - len, start,
+ oldsplit - start);
+ memcpy(newsplit - (bp - oldsplit),
+ oldsplit, bp - oldsplit);
} else
memcpy(newsplit - len, start, len);
}
@@ -208,7 +219,7 @@ register int RS;
if (bp >= iop->end) {
iop->cnt = read(iop->fd, iop->end, bufend - iop->end);
if (iop->cnt == -1)
- fatal("error reading input");
+ fatal("error reading input: %s", strerror(errno));
else if (iop->cnt == 0) {
iop->cnt = EOF;
break;
@@ -216,7 +227,7 @@ register int RS;
iop->end += iop->cnt;
*iop->end = rs;
}
- if (RS == 0) {
+ if (grRS == 0) {
extern int default_FS;
if (default_FS && (bp == start || eat_whitespace)) {
@@ -239,7 +250,7 @@ register int RS;
;
if (bp <= iop->end) {
- if (RS == 0)
+ if (grRS == 0)
saw_newline = 1;
else
break;
@@ -254,7 +265,7 @@ register int RS;
*bp = '\0';
else
bp++;
- if (RS == 0) {
+ if (grRS == 0) {
if (*--bp == rs)
*bp = '\0';
else
diff --git a/main.c b/main.c
index b0ca9e6d..04cb1e9d 100644
--- a/main.c
+++ b/main.c
@@ -3,7 +3,7 @@
*/
/*
- * Copyright (C) 1986, 1988, 1989, 1991 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@@ -35,6 +35,12 @@ static void pre_assign P((char *v));
SIGTYPE catchsig P((int sig, int code));
static void gawk_option P((char *optstr));
static void nostalgia P((void));
+static char *gawk_name P((char *filespec));
+
+#ifdef MSDOS
+extern int getopt P((int argc, char **argv, char *optstring));
+extern int isatty P((int));
+#endif
/* These nodes store all the special variables AWK uses */
NODE *FS_node, *NF_node, *RS_node, *NR_node;
@@ -88,6 +94,8 @@ char *cmdline_src = NULL; /* if prog is on command line */
int strict = 0; /* turn off gnu extensions */
int do_posix = 0; /* turn off gnu extensions and \x */
int do_lint = 0; /* provide warnings about questionable stuff */
+int in_begin_rule = 0; /* we're in a BEGIN rule */
+int in_end_rule = 0; /* we're in a END rule */
int output_is_tty = 0; /* control flushing of output */
@@ -102,18 +110,22 @@ NODE *expression_value;
*
* Note that after 2.13, c,a,e,C,D, and V go away.
*/
+/* the + on the front is for GNU getopt */
#ifdef DEBUG
-char awk_opts[] = "F:f:v:W:caeCVD";
+char awk_opts[] = "+F:f:v:W:caeCVD";
#else
-char awk_opts[] = "F:f:v:W:caeCV";
+char awk_opts[] = "+F:f:v:W:caeCV";
#endif
+extern void resetup P((void));
+
int
main(argc, argv)
int argc;
char **argv;
{
int c;
+ char *scan;
extern int optind;
extern char *optarg;
int i;
@@ -121,15 +133,13 @@ char **argv;
(void) signal(SIGFPE, (SIGTYPE (*) P((int))) catchsig);
(void) signal(SIGSEGV, (SIGTYPE (*) P((int))) catchsig);
-#ifdef VMS
+#ifndef MSDOS
(void) signal(SIGBUS, (SIGTYPE (*) P((int))) catchsig);
#endif
-#ifndef VMS
- myname = basename(argv[0]);
-#else /* VMS */
- myname = strdup(basename(argv[0]));
- argv[0] = (char *) myname; /* strip path [prior to getopt()] */
+ myname = gawk_name(argv[0]);
+ argv[0] = (char *)myname;
+#ifdef VMS
vms_arg_fixup(&argc, &argv); /* emulate redirection, expand wildcards */
#endif
if (argc < 2)
@@ -143,7 +153,7 @@ char **argv;
Nnull_string = make_string("", 0);
Nnull_string->numbr = 0.0;
Nnull_string->type = Node_val;
- Nnull_string->flags = (PERM|STR|STRING|NUM|NUMERIC|NUMBER);
+ Nnull_string->flags = (PERM|STR|STRING|NUM|NUMBER);
/* Set up the special variables */
@@ -171,29 +181,27 @@ char **argv;
}
if (do_nostalgia) {
fprintf(stderr, "%s, %s\n",
- "warning: option -nostalgia will go away in the next release",
+ "warning: option -nostalgia will go away in a future release",
"use -W nostalgia");
nostalgia();
/* NOTREACHED */
}
/* Tell the regex routines how they should work. . . */
- (void) re_set_syntax(RE_SYNTAX_AWK);
- regsyntax(RE_SYNTAX_AWK, 0);
-
+ resetup();
while ((c = getopt (argc, argv, awk_opts)) != EOF) {
switch (c) {
#ifdef DEBUG
case 'D':
fprintf(stderr,
-"warning: option -D will go away in the next release, use -W parsedebug\n");
+"warning: option -D will go away in a future release, use -W parsedebug\n");
gawk_option("parsedebug");
break;
#endif
case 'c':
fprintf(stderr,
- "warning: option -c will go away in the next release, use -W compat\n");
+ "warning: option -c will go away in a future release, use -W compat\n");
gawk_option("compat");
break;
@@ -207,7 +215,16 @@ char **argv;
* this makes function libraries real easy.
* most of the magic is in the scanner.
*/
- srcfiles[++numfiles] = optarg;
+ /* The following is to allow for whitespace at the end
+ * of a #! /bin/gawk line in an executable file
+ */
+ scan = optarg;
+ while (isspace(*scan))
+ scan++;
+ if (*scan == '\0')
+ srcfiles[++numfiles] = argv[optind++];
+ else
+ srcfiles[++numfiles] = optarg;
break;
case 'v':
@@ -216,22 +233,22 @@ char **argv;
case 'V':
warning(
- "option -V will go away in the next release, use -W version");
+ "option -V will go away in a future release, use -W version");
gawk_option("version");
break;
case 'C':
warning(
- "option -C will go away in the next release, use -W copyright");
+ "option -C will go away in a future release, use -W copyright");
gawk_option("copyright");
break;
case 'a': /* use old fashioned awk regexps */
- warning("option -a will go away in the next release");
+ warning("option -a will go away in a future release");
break;
case 'e': /* use Posix style regexps */
- warning("option -e will go away in the next release");
+ warning("option -e will go away in a future release");
break;
case 'W': /* gawk specific options */
@@ -270,12 +287,18 @@ char **argv;
/* Set up the field variables */
init_fields();
- if (begin_block)
+ if (begin_block) {
+ in_begin_rule = 1;
(void) interpret(begin_block);
+ }
+ in_begin_rule = 0;
if (!exiting && (expression_value || end_block))
do_input();
- if (end_block)
+ if (end_block) {
+ in_end_rule = 1;
(void) interpret(end_block);
+ }
+ in_end_rule = 0;
if (close_io() != 0 && exit_val == 0)
exit_val = 1;
exit(exit_val); /* more portable */
@@ -353,25 +376,29 @@ int ignorecase;
static void
copyleft ()
{
- static char blurb[] =
-"Copyright (C) 1989, 1991, Free Software Foundation.\n\
+ static char blurb_part1[] =
+"Copyright (C) 1989, 1991, 1992, Free Software Foundation.\n\
\n\
This program is free software; you can redistribute it and/or modify\n\
it under the terms of the GNU General Public License as published by\n\
the Free Software Foundation; either version 2 of the License, or\n\
(at your option) any later version.\n\
-\n\
-This program is distributed in the hope that it will be useful,\n\
+\n";
+ static char blurb_part2[] =
+"This program is distributed in the hope that it will be useful,\n\
but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
GNU General Public License for more details.\n\
-\n\
-You should have received a copy of the GNU General Public License\n\
+\n";
+ static char blurb_part3[] =
+"You should have received a copy of the GNU General Public License\n\
along with this program; if not, write to the Free Software\n\
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.\n";
fprintf(stderr, "%s, patchlevel %d\n", version_string, PATCHLEVEL);
- fputs(blurb, stderr);
+ fputs(blurb_part1, stderr);
+ fputs(blurb_part2, stderr);
+ fputs(blurb_part3, stderr);
fflush(stderr);
}
@@ -472,7 +499,9 @@ init_vars()
void
load_environ()
{
+#if !defined(MSDOS) && !(defined(VMS) && defined(__DECC))
extern char **environ;
+#endif
register char *var, *val;
NODE **aptr;
register int i;
@@ -550,13 +579,12 @@ int sig, code;
#endif
if (sig == SIGFPE) {
fatal("floating point exception");
-#ifndef VMS
- } else if (sig == SIGSEGV) {
- msg("fatal error: segmentation fault");
-#else
- } else if (sig == SIGSEGV || sig == SIGBUS) {
- msg("fatal error: access violation");
+ } else if (sig == SIGSEGV
+#ifndef MSDOS
+ || sig == SIGBUS
#endif
+ ) {
+ msg("fatal error: internal error");
/* fatal won't abort() if not compiled for debugging */
abort();
} else
@@ -648,35 +676,40 @@ nostalgia()
abort();
}
-const char *
-basename(filespec)
-const char *filespec;
+static char *
+gawk_name(filespec)
+char *filespec;
{
-#ifndef VMS /* "path/name" -> "name" */
- char *p = strrchr(filespec, '/');
+ char *p;
+
+#ifdef VMS /* "device:[root.][directory.subdir]GAWK.EXE;n" -> "GAWK" */
+ char *q;
-#if defined(MSDOS) || defined(atarist)
- char *q = strrchr(filespec, '\\');
-
- if (p == NULL || q > p)
- p = q;
-#endif
+ p = strrchr(filespec, ']'); /* directory punctuation */
+ q = strrchr(filespec, '>'); /* alternate <international> punct */
- return (p == NULL ? filespec : (const char *)(p + 1));
-
-#else /* "device:[root.][directory.subdir]GAWK.EXE;n" -> "GAWK" */
- static char buf[255+1];
- char *p = strrchr(filespec, ']'); /* directory punctuation */
- char *q = strrchr(filespec, '>'); /* alternate <international> punct */
-
- if (p == NULL || q > p)
- p = q;
- (void) strcpy(buf, p == NULL ? filespec : (p + 1));
- q = strrchr(buf, '.');
- if (q != NULL)
- *q = '\0'; /* strip .type;version */
-
- return (const char *) buf;
+ if (p == NULL || q > p) p = q;
+ p = strdup(p == NULL ? filespec : (p + 1));
+ if ((q = strrchr(p, '.')) != NULL) *q = '\0'; /* strip .typ;vers */
+ return p;
#endif /*VMS*/
+
+#if defined(MSDOS) || defined(atarist)
+ char *q;
+
+ p = filespec;
+
+ if (q = strrchr(p, '\\'))
+ p = q + 1;
+ if (q = strchr(p, '.'))
+ *q = '\0';
+ strlwr(p);
+
+ return (p == NULL ? filespec : p);
+#endif /* MSDOS || atarist */
+
+ /* "path/name" -> "name" */
+ p = strrchr(filespec, '/');
+ return (p == NULL ? filespec : p + 1);
}
diff --git a/missing.c b/missing.c
index daf16e60..66c79b41 100644
--- a/missing.c
+++ b/missing.c
@@ -5,10 +5,10 @@
#include <stdio.h>
#include <ctype.h>
#include <errno.h>
-#ifndef VAXC
+#if !defined(VMS) || (!defined(VAXC) && !defined(__DECC))
#include <fcntl.h>
#include <sys/types.h>
-#else /*VAXC (VMS)*/
+#else /*VMS w/ VAXC or DECC*/
#include <file.h>
#include <types.h>
#endif
@@ -16,6 +16,10 @@
#include "config.h"
+#ifdef STDC_HEADERS
+#include <string.h>
+#endif
+
#ifdef TZSET_MISSING
#include <sys/time.h>
#else
@@ -29,7 +33,7 @@
*/
#include "atari/stack.c"
#include "atari/tmpnam.c"
-#include "atari/textrd.c" /* gnulib bug fix */
+/* #include "atari/textrd.c" */ /* current libraries are correct bug fix */
#endif /* atarist */
#ifdef SYSTEM_MISSING
@@ -40,10 +44,6 @@
#endif
#endif /* SYSTEM_MISSING */
-#ifdef GETOPT_MISSING
-#include "missing/getopt.c"
-#endif /* GETOPT_MISSING */
-
#ifdef MEMCMP_MISSING
#include "missing/memcmp.c"
#endif /* MEMCMP_MISSING */
@@ -80,14 +80,6 @@
#include "missing/strtod.c"
#endif /* STRTOD_MISSING */
-#ifdef STRTOL_MISSING
-#include "missing/strtol.c"
-#endif /* STRTOL_MISSING */
-
-#if defined(VPRINTF_MISSING) && defined(BSDSTDIO)
-#include "missing/vprintf.c"
-#endif /* VPRINTF_MISSING && BSDSTDIO */
-
#ifdef TZSET_MISSING
#include "missing/tzset.c"
#endif /* TZSET_MISSING */
diff --git a/missing/getopt.c b/missing/getopt.c
deleted file mode 100644
index 09a1b233..00000000
--- a/missing/getopt.c
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
-** @(#)getopt.c 2.5 (smail) 9/15/87
-*/
-
-/*
- * Here's something you've all been waiting for: the AT&T public domain
- * source for getopt(3). It is the code which was given out at the 1985
- * UNIFORUM conference in Dallas. I obtained it by electronic mail
- * directly from AT&T. The people there assure me that it is indeed
- * in the public domain.
- *
- * There is no manual page. That is because the one they gave out at
- * UNIFORUM was slightly different from the current System V Release 2
- * manual page. The difference apparently involved a note about the
- * famous rules 5 and 6, recommending using white space between an option
- * and its first argument, and not grouping options that have arguments.
- * Getopt itself is currently lenient about both of these things. White
- * space is allowed, but not mandatory, and the last option in a group can
- * have an argument. That particular version of the man page evidently
- * has no official existence, and my source at AT&T did not send a copy.
- * The current SVR2 man page reflects the actual behavor of this getopt.
- * However, I am not about to post a copy of anything licensed by AT&T.
- */
-
-#if defined(__STDC__) || defined(USG) || defined(MSDOS) || defined(VMS)
-#define index strchr
-#endif
-
-/*LINTLIBRARY*/
-#ifndef NULL
-#define NULL 0
-#endif
-#define EOF (-1)
-#define ERR(s, c) if(opterr){\
- extern int write();\
- char errbuf[2];\
- errbuf[0] = c; errbuf[1] = '\n';\
- (void) write(2, argv[0], (unsigned)strlen(argv[0]));\
- (void) write(2, s, (unsigned)strlen(s));\
- (void) write(2, errbuf, 2);}
-
-extern char *index();
-
-int opterr = 1;
-int optind = 1;
-int optopt;
-char *optarg;
-
-int
-getopt(argc, argv, opts)
-int argc;
-char **argv, *opts;
-{
- static int sp = 1;
- register int c;
- register char *cp;
-
- if(sp == 1)
- if(optind >= argc ||
- argv[optind][0] != '-' || argv[optind][1] == '\0')
- return(EOF);
- else if(strcmp(argv[optind], "--") == NULL) {
- optind++;
- return(EOF);
- }
- optopt = c = argv[optind][sp];
- if(c == ':' || (cp=index(opts, c)) == NULL) {
- ERR(": illegal option -- ", c);
- if(argv[optind][++sp] == '\0') {
- optind++;
- sp = 1;
- }
- return('?');
- }
- if(*++cp == ':') {
- if(argv[optind][sp+1] != '\0')
- optarg = &argv[optind++][sp+1];
- else if(++optind >= argc) {
- ERR(": option requires an argument -- ", c);
- sp = 1;
- return('?');
- } else
- optarg = argv[optind++];
- sp = 1;
- } else {
- if(argv[optind][++sp] == '\0') {
- sp = 1;
- optind++;
- }
- optarg = NULL;
- }
- return(c);
-}
diff --git a/missing/random.c b/missing/random.c
index 3cd675e4..16e598ae 100644
--- a/missing/random.c
+++ b/missing/random.c
@@ -196,6 +196,7 @@ static long *end_ptr = &randtbl[ DEG_3 + 1 ];
* values produced by this routine.
*/
+void
srandom( x )
unsigned x;
@@ -250,7 +251,7 @@ initstate( seed, arg_state, n )
if( n < BREAK_1 ) {
if( n < BREAK_0 ) {
fprintf( stderr, "initstate: not enough state (%d bytes) with which to do jack; ignored.\n", n );
- return;
+ return 0;
}
rand_type = TYPE_0;
rand_deg = DEG_0;
diff --git a/missing/strcase.c b/missing/strcase.c
index 6834f27d..5d93911f 100644
--- a/missing/strcase.c
+++ b/missing/strcase.c
@@ -61,8 +61,9 @@ static u_char charmap[] = {
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
};
+int
strcasecmp(s1, s2)
- char *s1, *s2;
+ const char *s1, *s2;
{
register u_char *cm = charmap,
*us1 = (u_char *)s1,
@@ -74,16 +75,17 @@ strcasecmp(s1, s2)
return(cm[*us1] - cm[*--us2]);
}
+int
strncasecmp(s1, s2, n)
- char *s1, *s2;
- register int n;
+ const char *s1, *s2;
+ register size_t n;
{
register u_char *cm = charmap,
*us1 = (u_char *)s1,
*us2 = (u_char *)s2;
- while (--n >= 0 && cm[*us1] == cm[*us2++])
+ while ((long)(--n) >= 0 && cm[*us1] == cm[*us2++])
if (*us1++ == '\0')
return(0);
- return(n < 0 ? 0 : cm[*us1] - cm[*--us2]);
+ return((long)n < 0 ? 0 : cm[*us1] - cm[*--us2]);
}
diff --git a/missing/strftime.3 b/missing/strftime.3
index b61ed029..254db661 100644
--- a/missing/strftime.3
+++ b/missing/strftime.3
@@ -190,8 +190,47 @@ following additional conversions:
.TP
.B %C
The century, as a number between 00 and 99.
+.TP
+.B %u
+is replaced by the weekday as a decimal number
+.RB [ "1 " (Monday)- 7 ].
+.TP
+.B %V
+is replaced by the week number of the year (the first Monday as the first
+day of week 1) as a decimal number
+.RB ( 01 - 53 ).
+The method for determining the week number is as specified by ISO 8601
+(to wit: if the week containing January 1 has four or more days in the
+new year, then it is week 1, otherwise it is week 53 of the previous year
+and the next week is week 1).
+.LP
+The text of the POSIX standard for the
+.I date
+utility describes
+.B %U
+and
+.B %W
+this way:
+.TP
+.B %U
+is replaced by the week number of the year (the first Sunday as the first
+day of week 1) as a decimal number
+.RB ( 00 - 53 ).
+All days in a new year preceding the first Sunday are considered to be
+in week 0.
+.TP
+.B %W
+is replaced by the week number of the year (the first Monday as the first
+day of week 1) as a decimal number
+.RB ( 00 - 53 ).
+All days in a new year preceding the first Sunday are considered to be
+in week 0.
+(Note: this last statement is quoted verbatim from the POSIX standard.
+It probably means to say ``all days in a new year preceding the first
+.I Monday
+are considered to be in week 0.'')
.LP
-In additon, the alternate representations
+In addition, the alternate representations
.BR %Ec ,
.BR %EC ,
.BR %Ex ,
@@ -204,7 +243,9 @@ In additon, the alternate representations
.BR %Om ,
.BR %OM ,
.BR %OS ,
+.BR %Ou ,
.BR %OU ,
+.BR %OV ,
.BR %Ow ,
.BR %OW ,
and
@@ -215,7 +256,7 @@ If
.B VMS_EXT
is defined, then the following additional conversion is available:
.TP
-.B %V
+.B %v
The date in VMS format (e.g. 20-JUN-1991).
.SH SEE ALSO
time(2), ctime(3), localtime(3)
@@ -228,32 +269,27 @@ environment variable.
It is not clear what is ``appropriate'' for the C locale; the values
returned are a best guess on the author's part.
.SH CAVEATS
-This implementation calls
+The pre-processor symbol
+.B POSIX_SEMANTICS
+is automatically defined, which forces the code to call
.IR tzset (3)
-exactly once. If the
+whenever the
.B TZ
-environment variable is changed after
-.B strftime
-has been called, then
-.IR tzset (3)
-must be called again, explicitly, in order for the
-correct timezone information to be available.
+environment variable has changed.
+If this routine will be used in an application that will not be changing
+.BR TZ ,
+then there may be some performance improvements by not defining
+.BR POSIX_SEMANTICS .
.SH AUTHOR
.nf
Arnold Robbins
-AudioFAX, Inc.
-Suite 200
-2000 Powers Ferry Road
-Marietta, GA. 30067
-U.S.A.
-INTERNET: arnold@audiofax.com
-UUCP: emory!audfax!arnold
-Phone: +1 404 618 4281
-Fax-box: +1 404 618 4581
+.sp
+INTERNET: arnold@skeeve.atl.ga.us
+UUCP: emory!skeeve!arnold
+Phone: +1 404 248 9324
.fi
.SH ACKNOWLEDGEMENTS
Thanks to Geoff Clare <gwc@root.co.uk> for helping debug earlier
-versions of this routine.
+versions of this routine, and for advice about POSIX semantics.
Additional thanks to Arthur David Olsen <ado@elsie.nci.nih.gov>
for some code improvements.
-
diff --git a/missing/strftime.c b/missing/strftime.c
index 11f41ce9..da696a8d 100644
--- a/missing/strftime.c
+++ b/missing/strftime.c
@@ -9,53 +9,109 @@
*
* If you want stuff in the System V ascftime routine, add the SYSV_EXT define.
* For stuff needed to implement the P1003.2 date command, add POSIX2_DATE.
+ * For complete POSIX semantics, add POSIX_SEMANTICS.
*
* The code for %c, %x, and %X is my best guess as to what's "appropriate".
* This version ignores LOCALE information.
* It also doesn't worry about multi-byte characters.
* So there.
*
+ * This file is also shipped with GAWK (GNU Awk), gawk specific bits of
+ * code are included if GAWK is defined.
+ *
* Arnold Robbins
* January, February, March, 1991
+ * Updated March, April 1992
*
* Fixes from ado@elsie.nci.nih.gov
- * February 1991
+ * February 1991, May 1992
*/
-#if 0
+#ifndef GAWK
#include <stdio.h>
-#include <string.h>
#include <ctype.h>
+#include <string.h>
#include <time.h>
#include <sys/types.h>
#endif
-#ifndef __STDC__
-#define const /**/
+/* defaults: season to taste */
+#define SYSV_EXT 1 /* stuff in System V ascftime routine */
+#define POSIX2_DATE 1 /* stuff in Posix 1003.2 date command */
+#define VMS_EXT 1 /* include %v for VMS date format */
+#ifndef GAWK
+#define POSIX_SEMANTICS 1 /* call tzset() if TZ changes */
+#endif
+
+#if defined(POSIX2_DATE) && ! defined(SYSV_EXT)
+#define SYSV_EXT 1
+#endif
+
+#if defined(POSIX2_DATE)
+#define adddecl(stuff) stuff
+#else
+#define adddecl(stuff)
#endif
#ifndef __STDC__
+#define const /**/
+extern void *malloc();
+extern void *realloc();
extern void tzset();
extern char *strchr();
+extern char *getenv();
static int weeknumber();
+adddecl(static int iso8601wknum();)
#else
+extern void *malloc(unsigned count);
+extern void *realloc(void *ptr, unsigned count);
extern void tzset(void);
extern char *strchr(const char *str, int ch);
+extern char *getenv(const char *v);
static int weeknumber(const struct tm *timeptr, int firstweekday);
+adddecl(static int iso8601wknum(const struct tm *timeptr);)
#endif
+#ifdef __GNUC__
+#define inline __inline__
+#else
+#define inline /**/
+#endif
+
+#define range(low, item, hi) max(low, min(item, hi))
+
#if !defined(MSDOS) && !defined(TZNAME_MISSING)
extern char *tzname[2];
extern int daylight;
#endif
-#define SYSV_EXT 1 /* stuff in System V ascftime routine */
-#define POSIX2_DATE 1 /* stuff in Posix 1003.2 date command */
-#define VMS_EXT 1 /* include %V for VMS date format */
+/* min --- return minimum of two numbers */
-#if defined(POSIX2_DATE) && ! defined(SYSV_EXT)
-#define SYSV_EXT 1
+#ifndef __STDC__
+static inline int
+min(a, b)
+int a, b;
+#else
+static inline int
+min(int a, int b)
+#endif
+{
+ return (a < b ? a : b);
+}
+
+/* max --- return maximum of two numbers */
+
+#ifndef __STDC__
+static inline int
+max(a, b)
+int a, b;
+#else
+static inline int
+max(int a, int b)
#endif
+{
+ return (a > b ? a : b);
+}
/* strftime --- produce formatted time */
@@ -76,6 +132,12 @@ strftime(char *s, size_t maxsize, const char *format, const struct tm *timeptr)
char tbuf[100];
int i;
static short first = 1;
+#ifdef POSIX_SEMANTICS
+ static char *savetz = NULL;
+ static int savetzlen = 0;
+ char *tz;
+ int tzlen;
+#endif
/* various tables, useful in North America */
static char *days_a[] = {
@@ -103,10 +165,39 @@ strftime(char *s, size_t maxsize, const char *format, const struct tm *timeptr)
if (strchr(format, '%') == NULL && strlen(format) + 1 >= maxsize)
return 0;
+#ifndef POSIX_SEMANTICS
if (first) {
tzset();
first = 0;
}
+#else /* POSIX_SEMANTICS */
+ tz = getenv("TZ");
+ tzlen = strlen(tz);
+ if (first) {
+ if (tz != NULL) {
+ savetz = (char *) malloc(tzlen + 1);
+ if (savetz != NULL) {
+ savetzlen = tzlen + 1;
+ strcpy(savetz, tz);
+ }
+ }
+ tzset();
+ first = 0;
+ }
+ /* if we have a saved TZ, and it is different, recapture and reset */
+ if (tz && savetz && (tz[0] != savetz[0] || strcmp(tz, savetz) != 0)) {
+ i = strlen(tz) + 1;
+ if (i > savetzlen) {
+ savetz = (char *) realloc(savetz, i);
+ if (savetz) {
+ savetzlen = i;
+ strcpy(savetz, tz);
+ }
+ } else
+ strcpy(savetz, tz);
+ tzset();
+ }
+#endif /* POSIX_SEMANTICS */
for (; *format && s < endp - 1; format++) {
tbuf[0] = '\0';
@@ -157,25 +248,27 @@ strftime(char *s, size_t maxsize, const char *format, const struct tm *timeptr)
case 'c': /* appropriate date and time representation */
sprintf(tbuf, "%s %s %2d %02d:%02d:%02d %d",
- days_a[timeptr->tm_wday],
- months_a[timeptr->tm_mon],
- timeptr->tm_mday,
- timeptr->tm_hour,
- timeptr->tm_min,
- timeptr->tm_sec,
+ days_a[range(0, timeptr->tm_wday, 6)],
+ months_a[range(0, timeptr->tm_mon, 11)],
+ range(1, timeptr->tm_mday, 31),
+ range(0, timeptr->tm_hour, 23),
+ range(0, timeptr->tm_min, 59),
+ range(0, timeptr->tm_sec, 61),
timeptr->tm_year + 1900);
break;
case 'd': /* day of the month, 01 - 31 */
- sprintf(tbuf, "%02d", timeptr->tm_mday);
+ i = range(1, timeptr->tm_mday, 31);
+ sprintf(tbuf, "%02d", i);
break;
case 'H': /* hour, 24-hour clock, 00 - 23 */
- sprintf(tbuf, "%02d", timeptr->tm_hour);
+ i = range(0, timeptr->tm_hour, 23);
+ sprintf(tbuf, "%02d", i);
break;
case 'I': /* hour, 12-hour clock, 01 - 12 */
- i = timeptr->tm_hour;
+ i = range(0, timeptr->tm_hour, 23);
if (i == 0)
i = 12;
else if (i > 12)
@@ -188,22 +281,26 @@ strftime(char *s, size_t maxsize, const char *format, const struct tm *timeptr)
break;
case 'm': /* month, 01 - 12 */
- sprintf(tbuf, "%02d", timeptr->tm_mon + 1);
+ i = range(0, timeptr->tm_mon, 11);
+ sprintf(tbuf, "%02d", i + 1);
break;
case 'M': /* minute, 00 - 59 */
- sprintf(tbuf, "%02d", timeptr->tm_min);
+ i = range(0, timeptr->tm_min, 59);
+ sprintf(tbuf, "%02d", i);
break;
case 'p': /* am or pm based on 12-hour clock */
- if (timeptr->tm_hour < 12)
+ i = range(0, timeptr->tm_hour, 23);
+ if (i < 12)
strcpy(tbuf, ampm[0]);
else
strcpy(tbuf, ampm[1]);
break;
case 'S': /* second, 00 - 61 */
- sprintf(tbuf, "%02d", timeptr->tm_sec);
+ i = range(0, timeptr->tm_sec, 61);
+ sprintf(tbuf, "%02d", i);
break;
case 'U': /* week of year, Sunday is first day of week */
@@ -211,7 +308,8 @@ strftime(char *s, size_t maxsize, const char *format, const struct tm *timeptr)
break;
case 'w': /* weekday, Sunday == 0, 0 - 6 */
- sprintf(tbuf, "%d", timeptr->tm_wday);
+ i = range(0, timeptr->tm_wday, 6);
+ sprintf(tbuf, "%d", i);
break;
case 'W': /* week of year, Monday is first day of week */
@@ -220,17 +318,17 @@ strftime(char *s, size_t maxsize, const char *format, const struct tm *timeptr)
case 'x': /* appropriate date representation */
sprintf(tbuf, "%s %s %2d %d",
- days_a[timeptr->tm_wday],
- months_a[timeptr->tm_mon],
- timeptr->tm_mday,
+ days_a[range(0, timeptr->tm_wday, 6)],
+ months_a[range(0, timeptr->tm_mon, 11)],
+ range(1, timeptr->tm_mday, 31),
timeptr->tm_year + 1900);
break;
case 'X': /* appropriate time representation */
sprintf(tbuf, "%02d:%02d:%02d",
- timeptr->tm_hour,
- timeptr->tm_min,
- timeptr->tm_sec);
+ range(0, timeptr->tm_hour, 23),
+ range(0, timeptr->tm_min, 59),
+ range(0, timeptr->tm_sec, 61));
break;
case 'y': /* year without a century, 00 - 99 */
@@ -273,7 +371,7 @@ strftime(char *s, size_t maxsize, const char *format, const struct tm *timeptr)
break;
case 'e': /* day of month, blank padded */
- sprintf(tbuf, "%2d", timeptr->tm_mday);
+ sprintf(tbuf, "%2d", range(1, timeptr->tm_mday, 31));
break;
case 'r': /* time as %I:%M:%S %p */
@@ -291,10 +389,10 @@ strftime(char *s, size_t maxsize, const char *format, const struct tm *timeptr)
#ifdef VMS_EXT
- case 'V': /* date as dd-bbb-YYYY */
+ case 'v': /* date as dd-bbb-YYYY */
sprintf(tbuf, "%2d-%3.3s-%4d",
- timeptr->tm_mday,
- months_a[timeptr->tm_mon],
+ range(1, timeptr->tm_mday, 31),
+ months_a[range(0, timeptr->tm_mon, 11)],
timeptr->tm_year + 1900);
for (i = 3; i < 6; i++)
if (islower(tbuf[i]))
@@ -313,7 +411,30 @@ strftime(char *s, size_t maxsize, const char *format, const struct tm *timeptr)
case 'O':
/* POSIX locale extensions, ignored for now */
goto again;
+
+ case 'V': /* week of year according ISO 8601 */
+#if defined(GAWK) && defined(VMS_EXT)
+ {
+ extern int do_lint;
+ extern void warning();
+ static int warned = 0;
+
+ if (! warned && do_lint) {
+ warned = 1;
+ warning(
+ "conversion %%V added in P1003.2/11.3; for VMS style date, use %%v");
+ }
+ }
#endif
+ sprintf(tbuf, "%d", iso8601wknum(timeptr));
+ break;
+
+ case 'u':
+ /* ISO 8601: Weekday as a decimal number [1 (Monday) - 7] */
+ sprintf(tbuf, "%d", timeptr->tm_wday == 0 ? 7 :
+ timeptr->tm_wday);
+ break;
+#endif /* POSIX2_DATE */
default:
tbuf[0] = '%';
tbuf[1] = *format;
@@ -336,6 +457,67 @@ out:
return 0;
}
+#ifdef POSIX2_DATE
+/* iso8601wknum --- compute week number according to ISO 8601 */
+
+#ifndef __STDC__
+static int
+iso8601wknum(timeptr)
+const struct tm *timeptr;
+#else
+static int
+iso8601wknum(const struct tm *timeptr)
+#endif
+{
+ /*
+ * From 1003.2 D11.3:
+ * If the week (Monday to Sunday) containing January 1
+ * has four or more days in the new year, then it is week 1;
+ * otherwise it is week 53 of the previous year, and the
+ * next week is week 1.
+ *
+ * ADR: This means if Jan 1 was Monday through Thursday,
+ * it was week 1, otherwise week 53.
+ */
+
+ int simple_wknum, jan1day, diff, ret;
+
+ /* get week number, Monday as first day of the week */
+ simple_wknum = weeknumber(timeptr, 1) + 1;
+
+ /*
+ * With thanks and tip of the hatlo to ado@elsie.nci.nih.gov
+ *
+ * What day of the week does January 1 fall on?
+ * We know that
+ * (timeptr->tm_yday - jan1.tm_yday) MOD 7 ==
+ * (timeptr->tm_wday - jan1.tm_wday) MOD 7
+ * and that
+ * jan1.tm_yday == 1
+ * and that
+ * timeptr->tm_wday MOD 7 == timeptr->tm_wday
+ * from which it follows that. . .
+ */
+ jan1day = (timeptr->tm_yday - 1) % 7 - timeptr->tm_wday;
+ if (jan1day < 0)
+ jan1day += 7;
+
+ /*
+ * If Jan 1 was a Monday through Thursday, it was in
+ * week 1. Otherwise it was last year's week 53, which is
+ * this year's week 0.
+ */
+ if (jan1day >= 1 && jan1day <= 4)
+ diff = 0;
+ else
+ diff = 1;
+ ret = simple_wknum - diff;
+ if (ret == 0) /* we're in the first week of the year */
+ ret = 53;
+ return ret;
+}
+#endif
+
/* weeknumber --- figure how many weeks into the year */
/* With thanks and tip of the hatlo to ado@elsie.nci.nih.gov */
diff --git a/missing/strtod.c b/missing/strtod.c
index 38c7ce50..746a5da9 100644
--- a/missing/strtod.c
+++ b/missing/strtod.c
@@ -29,7 +29,7 @@ extern double atof();
double
strtod (s, ptr)
-register const char *s;
+register char *s;
register char **ptr;
{
double ret = 0.0;
diff --git a/missing/strtol.c b/missing/strtol.c
deleted file mode 100644
index e102ae34..00000000
--- a/missing/strtol.c
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
-Article 4291 of comp.lang.c:
-From: chris@mimsy.umd.edu (Chris Torek)
-Newsgroups: comp.lang.c
-Subject: Re: error checking strtol
-Message-ID: <24445@mimsy.umd.edu>
-Date: 17 May 90 09:31:17 GMT
-Organization: U of Maryland, Dept. of Computer Science, Coll. Pk., MD 20742
-
-The following is a working strtol. It depends only on the existence of
-correct header files (including <limits.h>) and on ASCII (IBM programmers
-will have to use strchr()). It does not support locales other than `C'.
-System V programmers should be able to replace their current strtol with
-this one. (After writing this, I checked the SVR2 source; it did not
-handle several cases correctly.)
-*/
-
-#ifdef __STDC__
-#include <limits.h>
-#else
-#define LONG_MIN (-0x80000000) /* for 32-bit 2s-complement at least */
-#define LONG_MAX 0x7fffffff
-#endif
-
-#if 0
-#include <limits.h>
-#include <ctype.h>
-#include <errno.h>
-#endif
-
-#ifndef _MSC_VER
-int errno;
-#endif
-
-/*
- * Convert a string to a long integer.
- *
- * Ignores `locale' stuff. Assumes that the upper and lower case
- * alphabets and digits are each contiguous.
- */
-long
-strtol(nptr, endptr, base)
- const char *nptr;
- char **endptr;
- register int base;
-{
- register const char *s = nptr;
- register unsigned long acc;
- register int c;
- register unsigned long cutoff;
- register int neg = 0, any, cutlim;
-
- /*
- * Skip white space and pick up leading +/- sign if any.
- * If base is 0, allow 0x for hex and 0 for octal, else
- * assume decimal; if base is already 16, allow 0x.
- */
- do {
- c = *s++;
- } while (isspace(c));
- if (c == '-') {
- neg = 1;
- c = *s++;
- } else if (c == '+')
- c = *s++;
- if ((base == 0 || base == 16) &&
- c == '0' && (*s == 'x' || *s == 'X')) {
- c = s[1];
- s += 2;
- base = 16;
- }
- if (base == 0)
- base = c == '0' ? 8 : 10;
-
- /*
- * Compute the cutoff value between legal numbers and illegal
- * numbers. That is the largest legal value, divided by the
- * base. An input number that is greater than this value, if
- * followed by a legal input character, is too big. One that
- * is equal to this value may be valid or not; the limit
- * between valid and invalid numbers is then based on the last
- * digit. For instance, if the range for longs is
- * [-2147483648..2147483647] and the input base is 10,
- * cutoff will be set to 214748364 and cutlim to either
- * 7 (neg==0) or 8 (neg==1), meaning that if we have accumulated
- * a value > 214748364, or equal but the next digit is > 7 (or 8),
- * the number is too big, and we will return a range error.
- *
- * Set any if any `digits' consumed; make it negative to indicate
- * overflow.
- */
- cutoff = neg ? -(unsigned long)LONG_MIN : LONG_MAX;
- cutlim = cutoff % (unsigned long)base;
- cutoff /= (unsigned long)base;
- for (acc = 0, any = 0;; c = *s++) {
- if (isdigit(c))
- c -= '0';
- else if (isalpha(c))
- c -= isupper(c) ? 'A' - 10 : 'a' - 10;
- else
- break;
- if (c >= base)
- break;
- if (any < 0 || acc > cutoff || acc == cutoff && c > cutlim)
- any = -1;
- else {
- any = 1;
- acc *= base;
- acc += c;
- }
- }
- if (any < 0) {
- acc = neg ? LONG_MIN : LONG_MAX;
- errno = ERANGE;
- } else if (neg)
- acc = -acc;
- if (endptr != 0)
- *endptr = (char *) (any ? s - 1 : nptr);
- return (acc);
-}
diff --git a/missing/system.c b/missing/system.c
index bceca9e9..a062917a 100644
--- a/missing/system.c
+++ b/missing/system.c
@@ -1,3 +1,28 @@
+/*
+ * system.c --- replacement system() for systems missing one
+ */
+
+/*
+ * Copyright (C) 1986, 1988, 1989, 1991 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GAWK; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
int
system(s)
char *s;
diff --git a/missing/vprintf.c b/missing/vprintf.c
deleted file mode 100644
index bfa529e8..00000000
--- a/missing/vprintf.c
+++ /dev/null
@@ -1,47 +0,0 @@
-#if 0
-#include <varargs.h>
-#endif
-
-int
-vsprintf(str, fmt, ap)
- char *str, *fmt;
- va_list ap;
-{
- FILE f;
- int len;
-
- f._flag = _IOWRT+_IOSTRG;
- f._ptr = (char *)str; /* My copy of BSD stdio.h has this as (char *)
- * with a comment that it should be
- * (unsigned char *). Since this code is
- * intended for use on a vanilla BSD system,
- * we'll stick with (char *) for now.
- */
- f._cnt = 32767;
- len = _doprnt(fmt, ap, &f);
- *f._ptr = 0;
- return (len);
-}
-
-int
-vfprintf(iop, fmt, ap)
- FILE *iop;
- char *fmt;
- va_list ap;
-{
- int len;
-
- len = _doprnt(fmt, ap, iop);
- return (ferror(iop) ? EOF : len);
-}
-
-int
-vprintf(fmt, ap)
- char *fmt;
- va_list ap;
-{
- int len;
-
- len = _doprnt(fmt, ap, stdout);
- return (ferror(stdout) ? EOF : len);
-}
diff --git a/mkconf b/mkconf
deleted file mode 100755
index 3a39811c..00000000
--- a/mkconf
+++ /dev/null
@@ -1,32 +0,0 @@
-#! /bin/sh
-#
-# configure -- produce a config.h from a known configuration
-
-case "$#" in
-1) ;;
-*) echo "Usage: $0 system_type" >&2
- echo "Known systems: `cd config; echo ;ls -C`" >&2
- exit 2
- ;;
-esac
-
-if [ -f config/$1 ]; then
- sh ./mungeconf config/$1 config.h.in >config.h
-
- # echo #echo lines to stdout
- sed -n '/^#echo /s///p' config/$1
-
- sed -n '/^MAKE_.*/s//s,^##&## ,,/p' config/$1 >sedscr
- if [ -s sedscr ]
- then
- sed -f sedscr Makefile.in >Makefile
- else
- cp Makefile.in Makefile
- fi
- rm -f sedscr
-else
- echo "\`$1' is not a known configuration."
- echo "Either construct one based on the examples in the config directory,"
- echo "or copy config.h.in to config.h and edit it."
- exit 1
-fi
diff --git a/msg.c b/msg.c
index 629470ee..b60fe9d1 100644
--- a/msg.c
+++ b/msg.c
@@ -3,7 +3,7 @@
*/
/*
- * Copyright (C) 1986, 1988, 1989, 1991 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@@ -29,11 +29,11 @@ int sourceline = 0;
char *source = NULL;
/* VARARGS2 */
-static void
-err(s, msg, argp)
+void
+err(s, emsg, argp)
char *s;
-char *msg;
-va_list *argp;
+char *emsg;
+va_list argp;
{
char *file;
@@ -54,7 +54,7 @@ va_list *argp;
(void) fprintf(stderr, "FNR=%d) ", FNR);
}
(void) fprintf(stderr, s);
- vfprintf(stderr, msg, *argp);
+ vfprintf(stderr, emsg, argp);
(void) fprintf(stderr, "\n");
(void) fflush(stderr);
}
@@ -69,7 +69,7 @@ va_dcl
va_start(args);
mesg = va_arg(args, char *);
- err("", mesg, &args);
+ err("", mesg, args);
va_end(args);
}
@@ -83,7 +83,7 @@ va_dcl
va_start(args);
mesg = va_arg(args, char *);
- err("warning: ", mesg, &args);
+ err("warning: ", mesg, args);
va_end(args);
}
@@ -97,7 +97,7 @@ va_dcl
va_start(args);
mesg = va_arg(args, char *);
- err("fatal: ", mesg, &args);
+ err("fatal: ", mesg, args);
va_end(args);
#ifdef DEBUG
abort();
diff --git a/node.c b/node.c
index 2d16fc6b..925fe8bc 100644
--- a/node.c
+++ b/node.c
@@ -3,7 +3,7 @@
*/
/*
- * Copyright (C) 1986, 1988, 1989, 1991 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@@ -33,7 +33,7 @@ register NODE *n;
register char *cpend;
char save;
char *ptr;
- unsigned int newflags = NUMERIC;
+ unsigned int newflags = 0;
#ifdef DEBUG
if (n == NULL)
@@ -65,12 +65,12 @@ register NODE *n;
return 0.0;
if (n->flags & MAYBE_NUM) {
- newflags |= NUMBER;
+ newflags = NUMBER;
n->flags &= ~MAYBE_NUM;
}
if (cpend - cp == 1) {
if (isdigit(*cp)) {
- n->numbr = *cp - '0';
+ n->numbr = (AWKNUM)(*cp - '0');
n->flags |= newflags;
}
return n->numbr;
@@ -81,7 +81,7 @@ register NODE *n;
*cpend = '\0';
n->numbr = (AWKNUM) strtod((const char *)cp, &ptr);
- /* POSIX says trailing space is OK for NUMERIC */
+ /* POSIX says trailing space is OK for NUMBER */
while (isspace(*ptr))
ptr++;
*cpend = save;
@@ -118,23 +118,21 @@ r_force_string(s)
register NODE *s;
{
char buf[128];
- register long num;
register char *sp = buf;
+ register long num = 0;
#ifdef DEBUG
- if (s == NULL)
- cant_happen();
- if (s->type != Node_val)
- cant_happen();
- if (s->flags & STR)
- return s;
- if (!(s->flags & NUM))
- cant_happen();
- if (s->stref != 0)
- ; /*cant_happen();*/
+ if (s == NULL) cant_happen();
+ if (s->type != Node_val) cant_happen();
+ if (s->flags & STR) return s;
+ if (!(s->flags & NUM)) cant_happen();
+ if (s->stref != 0) ; /*cant_happen();*/
#endif
- if ((num = s->numbr) == s->numbr) {
- /* integral value */
+
+ /* avoids floating point exception in DOS*/
+ if ( s->numbr <= LONG_MAX && s->numbr >= -LONG_MAX)
+ num = (long)s->numbr;
+ if ((AWKNUM) num == s->numbr) { /* integral value */
if (num < NVAL && num >= 0) {
sp = values[num];
s->stlen = 1;
@@ -146,7 +144,7 @@ register NODE *s;
} else {
(void) sprintf(sp, CONVFMT, s->numbr);
s->stlen = strlen(sp);
- s->stfmt = CONVFMTidx;
+ s->stfmt = (char)CONVFMTidx;
}
s->stref = 1;
emalloc(s->stptr, char *, s->stlen + 2, "force_string");
@@ -231,24 +229,25 @@ int flags;
if (flags & SCAN) { /* scan for escape sequences */
char *pf;
- register char *pt;
+ register char *ptm;
register int c;
register char *end;
end = &(r->stptr[len]);
- for (pf = pt = r->stptr; pf < end;) {
+ for (pf = ptm = r->stptr; pf < end;) {
c = *pf++;
if (c == '\\') {
c = parse_escape(&pf);
if (c < 0) {
- warning("backslash at end of string");
+ if (do_lint)
+ warning("backslash at end of string");
c = '\\';
}
- *pt++ = c;
+ *ptm++ = c;
} else
- *pt++ = c;
+ *ptm++ = c;
}
- len = pt - r->stptr;
+ len = ptm - r->stptr;
erealloc(r->stptr, char *, len + 1, "make_str_node");
r->stptr[len] = '\0';
r->flags |= PERM;
diff --git a/patchlevel.h b/patchlevel.h
index f3608240..2867bba9 100644
--- a/patchlevel.h
+++ b/patchlevel.h
@@ -1 +1 @@
-#define PATCHLEVEL 3
+#define PATCHLEVEL 0
diff --git a/pc/config.h b/pc/config.h
index 4718a9cf..9311ee61 100644
--- a/pc/config.h
+++ b/pc/config.h
@@ -5,14 +5,14 @@
*/
/*
- * Copyright (C) 1991, the Free Software Foundation, Inc.
+ * Copyright (C) 1991, 1992 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
*
* GAWK is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 1, or (at your option)
+ * the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* GAWK is distributed in the hope that it will be useful,
@@ -22,7 +22,7 @@
*
* You should have received a copy of the GNU General Public License
* along with GAWK; see the file COPYING. If not, write to
- * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
@@ -73,7 +73,7 @@
*
* If your system has no typedef for size_t, define this to get a default
*/
-#define SIZE_T_MISSING 1
+/* #define SIZE_T_MISSING 1 */
/*
* CHAR_UNSIGNED
@@ -97,14 +97,6 @@
/***********************************************/
/*
- * GETOPT_MISSING
- *
- * Define this if your library does not have the getopt(3) library
- * routine for parsing command line arguments.
- */
-#define GETOPT_MISSING 1
-
-/*
* MEMCMP_MISSING
* MEMCPY_MISSING
* MEMSET_MISSING
@@ -158,14 +150,6 @@
/* #define STRTOD_MISSING 1 */
/*
- * STRTOL_MISSING
- *
- * Your system does not have the strtol() routine for converting
- * strings to long integers.
- */
-#define STRTOL_MISSING 1
-
-/*
* STRFTIME_MISSING
*
* Your system lacks the ANSI C strftime() routine for formatting
@@ -198,7 +182,7 @@
* If your system does have ANSI compliant header files that
* provide prototypes for library routines, then define this.
*/
-/* #define STDC_HEADERS 1 */
+#define STDC_HEADERS 1
/*
* NO_TOKEN_PASTING
@@ -228,29 +212,12 @@
* VPRINTF_MISSING
*
* Define this if your system lacks vprintf() and the other routines
- * that go with it.
+ * that go with it. This will trigger an attempt to use _doprnt().
+ * If you don't have that, this attempt will fail and you are on your own.
*/
/* #define VPRINTF_MISSING 1 */
/*
- * BSDSTDIO
- *
- * Define this if your standard i/o library is internally compatible
- * with the one shipped with Berkeley Unix systems (4.n, n <= 3-reno).
- * If you've defined VPRINTF_MISSING, you probably will need this too.
- */
-/* #define BSDSTDIO 1 */
-
-/*
- * DOPRNT_MISSING
- *
- * Define this if your standard i/o library does not have the _doprnt()
- * routine. This is used in an attempt to simulate the vfprintf()
- * routine.
- */
-/* #define DOPRNT_MISSING 1 */
-
-/*
* Casts from size_t to int and back. These will become unnecessary
* at some point in the future, but for now are required where the
* two types are a different representation.
@@ -267,6 +234,14 @@
*/
/* #define SYSTEM_MISSING 1 */
+/*
+ * FMOD_MISSING
+ *
+ * Define this if your system lacks the fmod() function and modf() will
+ * be used instead.
+ */
+/* #define FMOD_MISSING 1 */
+
/*******************************/
/* Gawk configuration options. */
@@ -284,4 +259,14 @@
/* #define DEFPATH ".:/usr/lib/awk:/usr/local/lib/awk" */
/* #define ENVSEP ':' */
+/*
+ * alloca already has a prototype defined - don't redefine it
+ */
+/* #define ALLOCA_PROTO 1 */
+
+/*
+ * srandom already has a prototype defined - don't redefine it
+ */
+/* #define SRANDOM_PROTO 1 */
+
/* anything that follows is for system-specific short-term kludges */
diff --git a/pc/make.bat b/pc/make.bat
index 99d76512..a416cadc 100644
--- a/pc/make.bat
+++ b/pc/make.bat
@@ -3,6 +3,7 @@ REM
REM *** This has only been using MSC 5.1 ***
REM
REM Written by Arnold Robbins, May 1991
+REM Modified by Scott Deifik, July, 1992
REM Based on earlier makefile for dos
REM
REM Copyright (C) 1986, 1988, 1989, 1991 the Free Software Foundation, Inc.
@@ -24,32 +25,27 @@ REM You should have received a copy of the GNU General Public License
REM along with GAWK; see the file COPYING. If not, write to
REM the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
REM
-REM debug flags: DEBUG=#-DDEBUG #-DFUNC_TRACE -DMEMDEBUG
-REM DEBUGGER= #-Zi
-REM
-cl -c -AL -Oalt array.c
-cl -c -AL -Oalt awktab.c
-cl -c -AL -Oalt builtin.c
-cl -c -AL -Oalt dfa.c
-cl -c -AL -Oalt eval.c
-cl -c -AL -Oalt field.c
-cl -c -AL -Oalt io.c
-cl -c -AL -Oalt iop.c
-cl -c -AL -Oalt main.c
-cl -c -AL -Oalt missing.c
-cl -c -AL -Oalt msg.c
-cl -c -AL -Oalt node.c
-cl -c -AL -Oalt popen.c
-cl -c -AL -Oalt re.c
-cl -c -AL -Oalt version.c
-REM
-REM this kludge necessary because MSC 5.1 compiler bombs with -Oail (where
-REM -Ox == "-Oailt -Gs")
-REM
+REM compile debug flags: -DDEBUG -DFUNC_TRACE -DMEMDEBUG -Zi -Od
+REM
+REM
+cl -c -AL array.c
+cl -c -AL awktab.c
+cl -c -AL builtin.c
+cl -c -AL dfa.c
+cl -c -AL eval.c
+cl -c -AL field.c
+cl -c -AL io.c
+cl -c -AL iop.c
+cl -c -AL main.c
+cl -c -AL missing.c
+cl -c -AL msg.c
+cl -c -AL node.c
+cl -c -AL popen.c
+cl -c -AL re.c
REM You can ignore the warnings you will get
-cl -c -AL -Ot regex.c
+cl -c -AL regex.c
+cl -c -AL version.c
REM
-REM I'm not sure what this is for. It was commented out
-REM LINKFLAGS= /CO /NOE /NOI /st:0x1800
+REM link debug flags: /CO /NOE /NOI /st:30000
REM
-link @names.lnk,gawk.exe /E /FAR /PAC /NOE /NOI /st:0x1800;
+link @names.lnk,gawk.exe /NOE /NOI /st:30000;
diff --git a/protos.h b/protos.h
index fd752dfd..4a49f477 100644
--- a/protos.h
+++ b/protos.h
@@ -3,7 +3,7 @@
*/
/*
- * Copyright (C) 1991, the Free Software Foundation, Inc.
+ * Copyright (C) 1991, 1992, the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@@ -44,14 +44,14 @@ extern int strncmp P((const char *, const char *, int));
#ifndef VMS
extern char *strerror P((int));
#else
-extern char *strerror(); /* extern char *strerror(int,...); */
+extern char *strerror P((int,...));
#endif
extern char *strchr P((const char *, int));
extern char *strrchr P((const char *, int));
extern char *strstr P((const char *s1, const char *s2));
extern int strlen P((const char *));
extern long strtol P((const char *, char **, int));
-#ifndef _MSC_VER
+#if !defined(_MSC_VER) && !defined(__GNU_LIBRARY__)
extern int strftime P((char *, int, const char *, const struct tm *));
#endif
extern time_t time P((time_t *));
@@ -63,7 +63,7 @@ extern int memcmp P((const aptr_t, const aptr_t, size_t));
/* extern int fprintf P((FILE *, char *, ...)); */
extern int fprintf P(());
-#ifndef MSDOS
+#if !defined(MSDOS) && !defined(__GNU_LIBRARY__)
extern int fwrite P((const char *, int, int, FILE *));
extern int fputs P((const char *, FILE *));
extern int unlink P((const char *));
@@ -88,9 +88,11 @@ extern double pow P((double x, double y));
extern double atof P((char *));
extern double strtod P((const char *, char **));
extern int fstat P((int, struct stat *));
+extern int stat P((const char *, struct stat *));
extern off_t lseek P((int, off_t, int));
extern int fseek P((FILE *, long, int));
extern int close P((int));
+extern int creat P(());
extern int open P(());
extern int pipe P((int *));
extern int dup P((int));
diff --git a/re.c b/re.c
index 3909f0b1..d9d5fa09 100644
--- a/re.c
+++ b/re.c
@@ -3,7 +3,7 @@
*/
/*
- * Copyright (C) 1991 the Free Software Foundation, Inc.
+ * Copyright (C) 1991, 1992 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@@ -39,9 +39,6 @@ int dfa;
memset((char *) rp, 0, sizeof(*rp));
emalloc(rp->pat.buffer, char *, 16, "make_regexp");
rp->pat.allocated = 16;
- rp->regs.num_regs = 1;
- emalloc(rp->regs.start, int *, sizeof(int), "make_regexp");
- emalloc(rp->regs.end, int *, sizeof(int), "make_regexp");
emalloc(rp->pat.fastmap, char *, 256, "make_regexp");
if (ignorecase)
@@ -60,30 +57,33 @@ int dfa;
}
int
-research(rp, str, len, need_start)
+research(rp, str, start, len, need_start)
Regexp *rp;
register char *str;
+int start;
register int len;
int need_start;
{
- int count;
- int try_backref;
- char save1;
- char save2;
- char *ret = &save2;
+ char *ret = str;
if (rp->dfa) {
- save1 = str[len];
- str[len] = '\n';
- save2 = str[len+1];
- ret = regexecute(&(rp->dfareg), str, str+len+1, 1, &count,
- &try_backref);
- str[len] = save1;
- str[len+1] = save2;
+ char save1;
+ char save2;
+ int count = 0;
+ int try_backref;
+
+ save1 = str[start+len];
+ str[start+len] = '\n';
+ save2 = str[start+len+1];
+ ret = regexecute(&(rp->dfareg), str+start, str+start+len+1, 1,
+ &count, &try_backref);
+ str[start+len] = save1;
+ str[start+len+1] = save2;
}
if (ret) {
if (need_start || rp->dfa == 0)
- return re_search(&(rp->pat), str, len, 0, len, &(rp->regs));
+ return re_search(&(rp->pat), str, start+len, start,
+ len, &(rp->regs));
else
return 1;
} else
@@ -97,12 +97,12 @@ Regexp *rp;
free(rp->pat.buffer);
free(rp->pat.fastmap);
if (rp->dfa)
- regfree(&(rp->dfareg));
+ reg_free(&(rp->dfareg));
free(rp);
}
void
-regerror(s)
+reg_error(s)
const char *s;
{
fatal(s);
@@ -145,3 +145,10 @@ NODE *t;
t->re_flags |= IGNORECASE;
return t->re_reg;
}
+
+void
+resetup()
+{
+ (void) re_set_syntax(RE_SYNTAX_AWK);
+ regsyntax(RE_SYNTAX_AWK, 0);
+}
diff --git a/regex.c b/regex.c
index 2cb3f2bd..f4dd4c2c 100644
--- a/regex.c
+++ b/regex.c
@@ -1,10 +1,9 @@
/* Extended regular expression matching and search library.
- Version 0.1.
- Copyright (C) 1985, 89, 90, 91 Free Software Foundation, Inc.
+ Copyright (C) 1985, 1989-90 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
+ the Free Software Foundation; either version 1, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
@@ -20,96 +19,69 @@
/* To test, compile with -Dtest. This Dtestable feature turns this into
a self-contained program which reads a pattern, describes how it
compiles, then reads a string and searches for it.
-
+
On the other hand, if you compile with both -Dtest and -Dcanned you
can run some tests we've already thought of. */
-#ifdef GAWK
-#include "config.h"
-#endif
-
-#ifdef REGEX_MALLOC
-
-#define REGEX_ALLOCATE malloc
-#define REGEX_REALLOCATE(source, size) (realloc (source, size))
-
-#else /* not REGEX_MALLOC */
-
-
-/* Make alloca work the best possible way. */
-#ifdef __GNUC__
-#define alloca __builtin_alloca
-#else
-#ifdef sparc
-#include <alloca.h>
-#else
-#ifdef _AIX
- #pragma alloca
-#else /* not __GNUC__ or sparc or _AIX */
-char *alloca ();
-#endif /* _AIX */
-#endif /* sparc */
-#endif /* not __GNUC__ */
-
-/* Still not defined (REGEX_MALLOC) */
-
-#define REGEX_ALLOCATE alloca
-
-/* Requires a `void *destination' declared. */
-#define REGEX_REALLOCATE(source, size) \
- (destination = alloca (size), \
- bcopy (source, destination, size), \
- destination)
-
-#endif /* not defined (REGEX_MALLOC) */
-
-
#ifdef emacs
/* The `emacs' switch turns on certain special matching commands
that make sense only in emacs. */
-#include "config.h"
#include "lisp.h"
#include "buffer.h"
#include "syntax.h"
-/* Emacs uses `NULL' as a predicate. */
-#undef NULL
-
-
-#else /* not emacs */
+/* We write fatal error messages on standard error. */
+#include <stdio.h>
+/* isalpha(3) etc. are used for the character classes. */
+#include <ctype.h>
-#include <sys/types.h> /* POSIX types. */
+#else /* not emacs */
-#if defined(GAWK) || defined (USG) || defined (POSIX) || defined (STDC_HEADERS)
-#ifndef BSTRING
-#include <string.h>
-#define bcopy(s,d,n) memcpy ((d), (s), (n))
-#define bcmp(s1,s2,n) memcmp ((s1), (s2), (n))
-#define bzero(s,n) memset ((s), 0, (n))
-#endif /* not BSTRING */
-#endif /* USG or POSIX or STDC_HEADERS */
+#include "awk.h"
-#if defined (STDC_HEADERS)
-#include <stdlib.h>
+#define NO_ALLOCA /* try it out for now */
+#ifndef NO_ALLOCA
+/* Make alloca work the best possible way. */
+#ifdef __GNUC__
+#ifndef atarist
+#ifndef alloca
+#define alloca __builtin_alloca
+#endif
+#endif /* atarist */
#else
-#ifdef __STDC__
-void *malloc (size_t);
-void *realloc (void *, size_t);
-#else /* not __STDC__ */
-char *malloc ();
-char *realloc ();
-#endif /* not __STDC__ */
-#endif /* not (POSIX or STDC_HEADERS) */
-
-
-
-/* How many characters in the character set. */
-#define CHAR_SET_SIZE 256
-
+#if defined(sparc) && !defined(__GNUC__)
+#include <alloca.h>
+#else
+char *alloca ();
+#endif
+#endif /* __GNUC__ */
+
+#define FREE_AND_RETURN_VOID(stackb) return
+#define FREE_AND_RETURN(stackb,val) return(val)
+#define DOUBLE_STACK(stackx,stackb,len) \
+ (stackx = (unsigned char **) alloca (2 * len \
+ * sizeof (unsigned char *)),\
+ /* Only copy what is in use. */ \
+ (unsigned char **) memcpy (stackx, stackb, len * sizeof (char *)))
+#else /* NO_ALLOCA defined */
+#define FREE_AND_RETURN_VOID(stackb) free(stackb);return
+#define FREE_AND_RETURN(stackb,val) free(stackb);return(val)
+#define DOUBLE_STACK(stackx,stackb,len) \
+ (unsigned char **) realloc (stackb, 2 * len * sizeof (unsigned char *))
+#endif /* NO_ALLOCA */
+
+static void store_jump P((char *, int, char *));
+static void insert_jump P((int, char *, char *, char *));
+static void store_jump_n P((char *, int, char *, unsigned));
+static void insert_jump_n P((int, char *, char *, char *, unsigned));
+static void insert_op_2 P((int, char *, char *, int, int ));
+static int memcmp_translate P((unsigned char *, unsigned char *,
+ int, unsigned char *));
+long re_set_syntax P((long));
/* Define the syntax stuff, so we can do the \<, \>, etc. */
@@ -117,18 +89,20 @@ char *realloc ();
commands in re_match_2. */
#ifndef Sword
#define Sword 1
-#endif /* not Sword */
+#endif
#define SYNTAX(c) re_syntax_table[c]
#ifdef SYNTAX_TABLE
-extern char *re_syntax_table;
+char *re_syntax_table;
#else /* not SYNTAX_TABLE */
-static char re_syntax_table[CHAR_SET_SIZE];
+static char re_syntax_table[256];
+static void init_syntax_once P((void));
+
static void
init_syntax_once ()
@@ -139,7 +113,7 @@ init_syntax_once ()
if (done)
return;
- bzero (re_syntax_table, sizeof re_syntax_table);
+ memset (re_syntax_table, 0, sizeof re_syntax_table);
for (c = 'a'; c <= 'z'; c++)
re_syntax_table[c] = Sword;
@@ -149,252 +123,169 @@ init_syntax_once ()
for (c = '0'; c <= '9'; c++)
re_syntax_table[c] = Sword;
-
- re_syntax_table['_'] = Sword;
+
+ /* Add specific syntax for ISO Latin-1. */
+ for (c = 0300; c <= 0377; c++)
+ re_syntax_table[c] = Sword;
+ re_syntax_table[0327] = 0;
+ re_syntax_table[0367] = 0;
done = 1;
}
-#endif /* not SYNTAX_TABLE */
-#endif /* not emacs */
-
-/* We write fatal error messages on standard error. */
-#include <stdio.h>
+#endif /* SYNTAX_TABLE */
+#undef P
+#endif /* emacs */
-/* isalpha(3) etc. are used for the character classes. */
-#include <ctype.h>
/* Sequents are missing isgraph. */
-#ifdef sequent
-#define ISGRAPH_MISSING
+#ifndef isgraph
+#define isgraph(c) (isprint((c)) && !isspace((c)))
#endif
-#ifdef ISGRAPH_MISSING
-#define isgraph(c) (isprint (c) && !isspace (c))
-#endif
-
-
/* Get the interface, including the syntax bits. */
#include "regex.h"
-/* We will need this constant several times. */
-#define BYTEWIDTH 8
-
-
-
/* These are the command codes that appear in compiled regular
expressions, one per byte. Some command codes are followed by
argument bytes. A command code can specify any interpretation
whatsoever for its arguments. Zero-bytes may appear in the compiled
regular expression.
-
+
The value of `exactn' is needed in search.c (search_buffer) in emacs.
So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
`exactn' we use here must also be 1. */
enum regexpcode
{
- no_op=0,
- exactn=1, /* Followed by one byte giving n, then by n
- literal bytes. */
- begline, /* Fail unless at beginning of line. */
- endline, /* Fail unless at end of line. */
- endline_in_repeat, /* If in trailing position, turn into an endline,
- otherwise, turn into a no_op. This should
- never end up in the final compiled pattern! */
- endline_before_newline,/* If after an endline, don't that endline turn into
- an exactn for '$' when RE_CONTEXT_INDEP_ANCHORS
- is set. Should never end up in the compiled
- pattern! */
- repeated_endline_before_newline, /* A combination of above two. */
- no_pop_jump, /* Followed by two byte relative address to
- which to jump. */
- jump_past_next_alt, /* Same as no_pop_jump, but don't jump if the
- current group (the largest-numbered active
- one) hasn't matched anything. */
- on_failure_jump, /* Followed by two byte relative address of
- place to resume at in case of failure. */
- pop_failure_jump, /* Throw away latest failure point and then jump to
- address. */
- maybe_pop_jump,
- /* Like jump but change to pop_failure_jump
- only if know won't have to backtrack to
- match; otherwise change to no_pop_jump.
- This is used to jump back to the
- beginning of a repeat. If what follows
- this jump clearly won't match what the
- repeat does, such that we can be sure
- that there is no use backtracking out of
- repetitions already matched, then we
- change it to a pop_failure_jump. */
- dummy_failure_jump, /* Jump, and push a dummy failure point. This
- failure point will be thrown away if an
- attempt is made to use it for a failure. A
- `+' construct makes this before the first
- repeat. Also use it as an intermediary kind
- of jump when compiling an alternative. */
- succeed_n, /* Used like on_failure_jump except has to
- succeed n times; The two-byte relative
- address following it is useless until then.
- The address is followed by two bytes
- containing n. */
- no_pop_jump_n, /* Similar to no_pop_jump, but jump n times
- only; also the relative address following is
- in turn followed by yet two more bytes
- containing n. */
- set_number_at, /* Set the following relative location (two
- bytes) to the subsequent (two-byte) number. */
- anychar, /* Matches any (more or less) character. */
- charset, /* Matches any one char belonging to specified set.
- First following byte is number of bitmap
- bytes. Then come bytes for a bitmap saying
- which chars are in. Bits in each byte are
- ordered low-bit-first. A character is in the
- set if its bit is 1. A character too large
- to have a bit in the map is automatically not
- in the set. */
- charset_not, /* Same parameters as charset, but match any
- character that is not one of those specified. */
- start_memory, /* Start remembering the text that is matched, for
- storing in a memory register. Followed by
- one byte containing the register number.
- Register numbers will be in the range 0
- through one less than the pattern buffer's
- re_nsub field. */
- stop_memory, /* Stop remembering the text that is matched
- and store it in a memory register. Followed
- by one byte containing the register number.
- Register numbers will be in the range 0
- through one less than the pattern buffer's
- re_nsub field. */
- duplicate, /* Match a duplicate of something remembered.
- Followed by one byte containing the register
- number. */
- before_dot, /* Succeeds if before point. */
- at_dot, /* Succeeds if at point. */
- after_dot, /* Succeeds if after point. */
- begbuf, /* Succeeds if at beginning of buffer. */
- endbuf, /* Succeeds if at end of buffer. */
- wordchar, /* Matches any word-constituent character. */
- notwordchar, /* Matches any char that is not a word-constituent. */
- wordbeg, /* Succeeds if at word beginning. */
- wordend, /* Succeeds if at word end. */
- wordbound, /* Succeeds if at a word boundary. */
- notwordbound, /* Succeeds if not at a word boundary. */
- syntaxspec, /* Matches any character whose syntax is specified.
- followed by a byte which contains a syntax
- code, e.g., Sword. */
- notsyntaxspec /* Matches any character whose syntax differs from
- that specified. */
+ unused=0,
+ exactn=1, /* Followed by one byte giving n, then by n literal bytes. */
+ begline, /* Fail unless at beginning of line. */
+ endline, /* Fail unless at end of line. */
+ jump, /* Followed by two bytes giving relative address to jump to. */
+ on_failure_jump, /* Followed by two bytes giving relative address of
+ place to resume at in case of failure. */
+ finalize_jump, /* Throw away latest failure point and then jump to
+ address. */
+ maybe_finalize_jump, /* Like jump but finalize if safe to do so.
+ This is used to jump back to the beginning
+ of a repeat. If the command that follows
+ this jump is clearly incompatible with the
+ one at the beginning of the repeat, such that
+ we can be sure that there is no use backtracking
+ out of repetitions already completed,
+ then we finalize. */
+ dummy_failure_jump, /* Jump, and push a dummy failure point. This
+ failure point will be thrown away if an attempt
+ is made to use it for a failure. A + construct
+ makes this before the first repeat. Also
+ use it as an intermediary kind of jump when
+ compiling an or construct. */
+ succeed_n, /* Used like on_failure_jump except has to succeed n times;
+ then gets turned into an on_failure_jump. The relative
+ address following it is useless until then. The
+ address is followed by two bytes containing n. */
+ jump_n, /* Similar to jump, but jump n times only; also the relative
+ address following is in turn followed by yet two more bytes
+ containing n. */
+ set_number_at, /* Set the following relative location to the
+ subsequent number. */
+ anychar, /* Matches any (more or less) one character. */
+ charset, /* Matches any one char belonging to specified set.
+ First following byte is number of bitmap bytes.
+ Then come bytes for a bitmap saying which chars are in.
+ Bits in each byte are ordered low-bit-first.
+ A character is in the set if its bit is 1.
+ A character too large to have a bit in the map
+ is automatically not in the set. */
+ charset_not, /* Same parameters as charset, but match any character
+ that is not one of those specified. */
+ start_memory, /* Start remembering the text that is matched, for
+ storing in a memory register. Followed by one
+ byte containing the register number. Register numbers
+ must be in the range 0 through RE_NREGS. */
+ stop_memory, /* Stop remembering the text that is matched
+ and store it in a memory register. Followed by
+ one byte containing the register number. Register
+ numbers must be in the range 0 through RE_NREGS. */
+ duplicate, /* Match a duplicate of something remembered.
+ Followed by one byte containing the index of the memory
+ register. */
+ before_dot, /* Succeeds if before point. */
+ at_dot, /* Succeeds if at point. */
+ after_dot, /* Succeeds if after point. */
+ begbuf, /* Succeeds if at beginning of buffer. */
+ endbuf, /* Succeeds if at end of buffer. */
+ wordchar, /* Matches any word-constituent character. */
+ notwordchar, /* Matches any char that is not a word-constituent. */
+ wordbeg, /* Succeeds if at word beginning. */
+ wordend, /* Succeeds if at word end. */
+ wordbound, /* Succeeds if at a word boundary. */
+ notwordbound,/* Succeeds if not at a word boundary. */
+ syntaxspec, /* Matches any character whose syntax is specified.
+ followed by a byte which contains a syntax code,
+ e.g., Sword. */
+ notsyntaxspec /* Matches any character whose syntax differs from
+ that specified. */
};
+
+/* Number of failure points to allocate space for initially,
+ when matching. If this number is exceeded, more space is allocated,
+ so it is not a hard limit. */
+#ifndef NFAILURES
+#define NFAILURES 80
+#endif
-#ifdef CHAR_UNSIGNED /* for, e.g., IBM RT */
-#define SIGN_EXTEND_CHAR(c) (((c)^128) - 128) /* As in Harbison and Steele. */
+#ifdef CHAR_UNSIGNED
+#define SIGN_EXTEND_CHAR(c) ((c)>(char)127?(c)-256:(c)) /* for IBM RT */
#endif
#ifndef SIGN_EXTEND_CHAR
-#define SIGN_EXTEND_CHAR /* As nothing. */
+#define SIGN_EXTEND_CHAR(x) (x)
#endif
-
-
+
/* Store NUMBER in two contiguous bytes starting at DESTINATION. */
-
#define STORE_NUMBER(destination, number) \
- do {(destination)[0] = (number) & 0377; \
- (destination)[1] = (number) >> 8; \
- } while (0)
-
-
+ { (destination)[0] = (number) & 0377; \
+ (destination)[1] = (number) >> 8; }
+
/* Same as STORE_NUMBER, except increment the destination pointer to
the byte after where the number is stored. Watch out that values for
DESTINATION such as p + 1 won't work, whereas p will. */
-
#define STORE_NUMBER_AND_INCR(destination, number) \
- do { STORE_NUMBER(destination, number); \
- (destination) += 2; \
- } while (0)
-
-
-
-
+ { STORE_NUMBER(destination, number); \
+ (destination) += 2; }
/* Put into DESTINATION a number stored in two contingous bytes starting
at SOURCE. */
-
#define EXTRACT_NUMBER(destination, source) \
- do { (destination) = *(source) & 0377; \
- (destination) += SIGN_EXTEND_CHAR (*(char *)((source) + 1)) << 8; \
- } while (0)
-
-int
-extract_number (source)
- unsigned char *source;
-{
- int answer;
- int i_temp = * (char *) (source + 1);
- char c_temp = * (char *) (source + 1);
-
- i_temp = SIGN_EXTEND_CHAR (i_temp);
- c_temp = SIGN_EXTEND_CHAR (c_temp);
-
- i_temp <<= 8;
- c_temp <<= 8;
-
- answer = *source & 0377;
- answer += (SIGN_EXTEND_CHAR (*(char *)((source) + 1))) << 8;
-
- return answer;
-}
-
+ { (destination) = *(source) & 0377; \
+ (destination) += SIGN_EXTEND_CHAR (*(char *)((source) + 1)) << 8; }
/* Same as EXTRACT_NUMBER, except increment the pointer for source to
point to second byte of SOURCE. Note that SOURCE has to be a value
such as p, not, e.g., p + 1. */
-
#define EXTRACT_NUMBER_AND_INCR(destination, source) \
- do { EXTRACT_NUMBER (destination, source); \
- (source) += 2; \
- } while (0)
-
-
-void
-extract_number_and_incr (destination, source)
- int *destination;
- unsigned char **source;
-{
- *destination = extract_number (*source);
- *source += 2;
-}
-
-
-
-typedef enum { false = 0, true = 1 } boolean;
-
-/* Number of failure points for which to initially allocate space
- when matching. If this number is exceeded, we allocate more space---
- so it is not a hard limit. */
-
-#ifndef INIT_FAILURE_ALLOC
-#define INIT_FAILURE_ALLOC 5
-#endif
+ { EXTRACT_NUMBER (destination, source); \
+ (source) += 2; }
/* Specify the precise syntax of regexps for compilation. This provides
for compatibility for various utilities which historically have
different, incompatible syntaxes.
-
+
The argument SYNTAX is a bit-mask comprised of the various bits
defined in regex.h. */
-int
+long
re_set_syntax (syntax)
- int syntax;
+ long syntax;
{
- int ret;
+ long ret;
ret = obscure_syntax;
obscure_syntax = syntax;
@@ -402,383 +293,137 @@ re_set_syntax (syntax)
}
/* Set by re_set_syntax to the current regexp syntax to recognize. */
-int obscure_syntax = 0;
-
-
-
-/* Routine used by re_compile_pattern, re_comp and regcomp. */
-
-#ifdef __STDC__
-static char *regex_compile (const char *pattern, const int size,
- const int syntax, struct re_pattern_buffer *bufp);
-#else
-static char *regex_compile ();
-#endif
+long obscure_syntax = 0;
-/* re_compile_pattern takes a regular-expression string and converts it
- into a buffer full of byte commands for matching.
-
- PATTERN is the address of the pattern string.
- SIZE is the length of it.
-
- BUFP is a struct re_pattern_buffer * whose pertinent fields are
- mentioned below:
-
- It has a char * field called BUFFER which points to the
- space where this routine will put the compiled pattern; the
- user can either allocate this using malloc (whereupon they
- should set the long field ALLOCATED to the number of bytes
- malloced) or set ALLOCATED to 0 and let the routine
- allocate it. The routine may use realloc to enlarge the
- buffer space.
-
- If the user wants to translate all ordinary elements in the
- compiled pattern, they should set the char * field
- TRANSLATE to a translate table, otherwise, they should set
- it to 0.
-
- The routine sets the int field SYNTAX to the value of the
- global variable `obscure_syntax'.
-
- It returns in the long field USED how many bytes long the
- compiled pattern is.
-
- It returns 0 in the char field FASTMAP_ACCURATE, on
- the assumption that the user usually doesn't compile the
- same pattern twice and that consequently any fastmap in the
- pattern buffer is inaccurate.
-
- In the size_t field RE_NSUB, it returns the number of
- subexpressions it found in PATTERN.
-
- Returns 0 if the pattern was valid and an error string if it wasn't. */
-
-
-char *
-re_compile_pattern (pattern, size, bufp)
- const char *pattern;
- const int size;
- struct re_pattern_buffer *bufp;
-{
- bufp->return_default_num_regs = (obscure_syntax & RE_ALLOCATE_REGISTERS) > 0;
-
- return regex_compile (pattern, size, obscure_syntax, bufp);
-}
-
-
-
-/* Macros for regex_compile. */
+/* Macros for re_compile_pattern, which is found below these definitions. */
#define CHAR_CLASS_MAX_LENGTH 6
-
-/* Fetch the next character in the uncompiled pattern---translating it
- if necessary. */
-
+/* Fetch the next character in the uncompiled pattern, translating it if
+ necessary. */
#define PATFETCH(c) \
- do {if (p == pend) goto end_of_pattern; \
- c = * (unsigned char *) p++; \
- if (translate) \
- c = translate[c]; \
- } while (0)
+ {if (p == pend) goto end_of_pattern; \
+ c = * (unsigned char *) p++; \
+ if (translate) c = translate[c]; }
/* Fetch the next character in the uncompiled pattern, with no
translation. */
-
#define PATFETCH_RAW(c) \
- do {if (p == pend) goto end_of_pattern; \
- c = * (unsigned char *) p++; \
- } while (0)
+ {if (p == pend) goto end_of_pattern; \
+ c = * (unsigned char *) p++; }
#define PATUNFETCH p--
-/* Pattern offset stuff. */
-
-#define INIT_PATTERN_OFFSETS_LIST_SIZE 32
-
-typedef short pattern_offset_type;
-
-typedef struct {
- pattern_offset_type *offsets;
- unsigned size;
- unsigned avail;
-} pattern_offsets_list_type;
-
-#define PATTERN_OFFSETS_LIST_PTR_FULL(pattern_offsets_list_ptr) \
- (pattern_offsets_list_ptr->avail == pattern_offsets_list_ptr->size)
-
-
-/* Anchor and op list stuff. */
-
-typedef pattern_offsets_list_type anchor_list_type;
-typedef pattern_offsets_list_type op_list_type;
-
-
-
-/* Bits list declaration. An arbitrarily long string of bits. */
-
-typedef struct {
- unsigned *bits;
- unsigned size;
-} bits_list_type;
-
-
-/* Bits list macros. See below for routines. */
-
-#define BITS_BLOCK_SIZE (sizeof (unsigned) * BYTEWIDTH)
-#define BITS_BLOCK(position) ((position) / BITS_BLOCK_SIZE)
-#define BITS_MASK(position) (1 << ((position) % BITS_BLOCK_SIZE))
-
-
-/* Initialize BITS_LIST (of type bits_list_type) to have one bits
- block. Mostly analogous to routine init_bits_list, but, if
- REGEX_MALLOC is not defined, uses `alloca' instead of `malloc'. This
- is because using malloc in re_search* or re_match* could cause core
- leaks when C-g is used in Emacs, plus malloc's slower and causes
- storage fragmentation. This has to be a macro because the results of
- `alloca' disappear at the end of the routine it's in. (If for some
- reason you delete this explanation, please put it in the comment for
- the failure stack.)
-
- Return 1 if there's enough memory to do so and 0 if there isn't. */
-
-#define INIT_BITS_LIST(bits_list) \
- (bits_list.bits = (unsigned *) REGEX_ALLOCATE (sizeof (unsigned)), \
- bits_list.bits == NULL \
- ? 0 \
- : (bits_list.size = BITS_BLOCK_SIZE, \
- bits_list.bits[0] = 0, \
- 1))
-
-
-/* Extend BITS_LIST_PTR (of type bits_list_type) by one bits block.
- Return 1 if there's enough memory to do so and 0 if there isn't.
- Analogous to routine extend_bits_list, but uses alloca instead of
- realloc, for reasons stated above in INIT_BITS_LIST's comment.
-
- Because REGEX_REALLOCATE requires a declaration of `void
- *destination', so does this. */
-
-
-#define EXTEND_BITS_LIST(bits_list) \
- (bits_list.bits \
- = (unsigned *) REGEX_REALLOCATE (bits_list.bits, \
- bits_list.size / BYTEWIDTH \
- + BITS_BLOCK_SIZE / BYTEWIDTH), \
- bits_list.bits == NULL \
- ? 0 \
- : (bits_list.size += BITS_BLOCK_SIZE, \
- bits_list.bits[(bits_list.size/BITS_BLOCK_SIZE) - 1] = 0, \
- 1))
-
-
-/* Set the bit for a positive POSITION in BITS_LIST_PTR to VALUE, which,
- in turn, can only be 0 or 1.
-
- Returns 1 if can set the bit.
- 0 if ran out of memory allocating (if necessary) room for it.
- value if the value is invalid (i.e., not 0 or 1).
-
- Because EXTENT_BITS_LIST requires a declaration of `void
- *destination', so does this. */
-
-#define SET_BIT_TO_VALUE(bits_list, position, value) \
- (position > bits_list.size - 1 \
- && !EXTEND_BITS_LIST (bits_list) \
- ? 0 \
- : (value == 1 \
- ? (bits_list.bits[BITS_BLOCK (position)] \
- |= BITS_MASK (position), 1) \
- : (value == 0 \
- ? (bits_list.bits[BITS_BLOCK (position)] \
- &= ~(BITS_MASK (position)), 1) \
- : value) \
- ))
-
-
-
-/* Compile stack stuff. */
-
-typedef struct {
- pattern_offset_type laststart_offset;
- pattern_offset_type fixup_alt_jump;
- pattern_offset_type regnum;
- pattern_offset_type begalt_offset;
-} compile_stack_element;
-
-
-typedef struct {
- compile_stack_element *stack;
- unsigned size;
- unsigned avail; /* Offset of next open position. */
- } compile_stack_type;
-
-
-#define INIT_COMPILE_STACK_SIZE 32
-
-#define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
-#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
-
-
/* If the buffer isn't allocated when it comes in, use this. */
-#define INIT_BUF_SIZE 32
+#define INIT_BUF_SIZE 28
/* Make sure we have at least N more bytes of space in buffer. */
#define GET_BUFFER_SPACE(n) \
{ \
- while (b - bufp->buffer + (n) > bufp->allocated) \
- EXTEND_BUFFER \
+ while (b - bufp->buffer + (n) >= bufp->allocated) \
+ EXTEND_BUFFER; \
}
-/* Make sure we have one more byte of buffer space and then add C to it. */
-#define BUF_PUSH(c) \
- do { \
+/* Make sure we have one more byte of buffer space and then add CH to it. */
+#define BUFPUSH(ch) \
+ { \
GET_BUFFER_SPACE (1); \
- *b++ = (char) (c); \
- } while (0)
-
-/* Make sure we have two more bytes of buffer space and then add C1 and
- C2 to it. */
-#define BUF_PUSH_2(c1, c2) \
- do { \
- GET_BUFFER_SPACE (2); \
- *b++ = (char) (c1); \
- *b++ = (char) (c2); \
- } while (0)
-
-
-
-#define MAX_BUF_SIZE (1L << 16)
-
-/* Extend the buffer by twice its current size via realloc and
- reset the pointers that pointed into the old block to point to the
- correct places in the new one. If extending the buffer results in it
- being larger than MAX_BUF_SIZE, then flag memory exhausted. */
+ *b++ = (char) (ch); \
+ }
+
+/* Extend the buffer by twice its current size via reallociation and
+ reset the pointers that pointed into the old allocation to point to
+ the correct places in the new allocation. If extending the buffer
+ results in it being larger than 1 << 16, then flag memory exhausted. */
#define EXTEND_BUFFER \
- { \
- char *old_buffer = bufp->buffer; \
- if (bufp->allocated == MAX_BUF_SIZE) \
- goto too_big; \
- bufp->allocated <<= 1; \
- if (bufp->allocated > MAX_BUF_SIZE) \
- bufp->allocated = MAX_BUF_SIZE; \
+ { char *old_buffer = bufp->buffer; \
+ if (bufp->allocated == (1L<<16)) goto too_big; \
+ bufp->allocated *= 2; \
+ if (bufp->allocated > (1L<<16)) bufp->allocated = (1L<<16); \
bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated); \
- if (bufp->buffer == NULL) \
+ if (bufp->buffer == 0) \
goto memory_exhausted; \
b = (b - old_buffer) + bufp->buffer; \
- begalt = (begalt - old_buffer) + bufp->buffer; \
- beg_interval = (beg_interval - old_buffer) + bufp->buffer; \
- if (fixup_alt_jump) \
- fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer; \
+ if (fixup_jump) \
+ fixup_jump = (fixup_jump - old_buffer) + bufp->buffer; \
if (laststart) \
laststart = (laststart - old_buffer) + bufp->buffer; \
+ begalt = (begalt - old_buffer) + bufp->buffer; \
if (pending_exact) \
pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
}
-/* Set the bit for character C in a list. */
+/* Set the bit for character C in a character set list. */
#define SET_LIST_BIT(c) (b[(c) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
/* Get the next unsigned number in the uncompiled pattern. */
#define GET_UNSIGNED_NUMBER(num) \
- { if (p != pend) \
- { \
- PATFETCH (c); \
- while (isdigit (c)) \
- { \
- if (num < 0) \
- num = 0; \
- num = num * 10 + c - '0'; \
- if (p == pend) \
- break; \
- PATFETCH (c); \
- } \
- } \
- }
-
-
-#define DO_RANGE \
- { \
- /* Get untranslated range start and end characters. */ \
- char this_char = p[-2]; \
- char end; \
- \
- if (p == pend) \
- goto invalid_range_end; \
- PATFETCH_RAW (end); \
- if ((syntax & RE_NO_EMPTY_RANGES) && this_char > end) \
- goto invalid_range_end; \
- while (this_char <= end) \
- { \
- SET_LIST_BIT (translate ? translate[this_char] : this_char); \
- this_char++; \
- } \
- }
-
-
-#define IS_CHAR_CLASS(string) \
- (strcmp (string, "alpha") == 0 || strcmp (string, "upper") == 0 \
- || strcmp (string, "lower") == 0 || strcmp (string, "digit") == 0 \
- || strcmp (string, "alnum") == 0 || strcmp (string, "xdigit") == 0 \
- || strcmp (string, "space") == 0 || strcmp (string, "print") == 0 \
- || strcmp (string, "punct") == 0 || strcmp (string, "graph") == 0 \
- || strcmp (string, "cntrl") == 0) \
-
-
-
-/* Subroutines for regex_compile. */
+ { if (p != pend) \
+ { \
+ PATFETCH (c); \
+ while (isdigit (c)) \
+ { \
+ if (num < 0) \
+ num = 0; \
+ num = num * 10 + c - '0'; \
+ if (p == pend) \
+ break; \
+ PATFETCH (c); \
+ } \
+ } \
+ }
-static void store_jump (), insert_jump (), store_jump_n (),
- insert_jump_n (), insert_op_2 (), remove_intervening_anchors (),
- clear_this_and_higher_levels (), increase_level (),
- decrease_level (), adjust_pattern_offsets_list ();
+/* Subroutines for re_compile_pattern. */
+/* static void store_jump (), insert_jump (), store_jump_n (),
+ insert_jump_n (), insert_op_2 (); */
-static unsigned record_anchor_position (), init_bits_list (),
- get_level_match_status (),
- set_this_level (), set_next_lower_level (),
- make_group_active (), make_group_inactive (),
- set_match_status_of_active_groups (),
- get_group_match_status (), add_op (),
- init_pattern_offsets_list ();
+/* re_compile_pattern takes a regular-expression string
+ and converts it into a buffer full of byte commands for matching.
-static boolean is_in_compile_stack (), lower_levels_match_nothing (),
- no_levels_match_anything (), verify_and_adjust_endlines ();
+ PATTERN is the address of the pattern string
+ SIZE is the length of it.
+ BUFP is a struct re_pattern_buffer * which points to the info
+ on where to store the byte commands.
+ This structure contains a char * which points to the
+ actual space, which should have been obtained with malloc.
+ re_compile_pattern may use realloc to grow the buffer space.
+ The number of bytes of commands can be found out by looking in
+ the `struct re_pattern_buffer' that bufp pointed to, after
+ re_compile_pattern returns. */
-static char *
-regex_compile (pattern, size, syntax, bufp)
- const char *pattern;
- const int size;
- const int syntax;
+char *
+re_compile_pattern (pattern, size, bufp)
+ char *pattern;
+ size_t size;
struct re_pattern_buffer *bufp;
{
register char *b = bufp->buffer;
- const char *p = pattern;
- const char *pend = pattern + size;
+ register char *p = pattern;
+ char *pend = pattern + size;
register unsigned c, c1;
- const char *p1;
+ char *p0;
unsigned char *translate = (unsigned char *) bufp->translate;
- boolean enough_memory;
/* Address of the count-byte of the most recently inserted `exactn'
command. This makes it possible to tell whether a new exact-match
character can be added to that command or requires a new `exactn'
command. */
-
+
char *pending_exact = 0;
- /* Address of the place where a forward jump should go to the end of
+ /* Address of the place where a forward-jump should go to the end of
the containing expression. Each alternative of an `or', except the
- last, ends with a forward jump of this sort. */
+ last, ends with a forward-jump of this sort. */
- char *fixup_alt_jump = 0;
+ char *fixup_jump = 0;
/* Address of start of the most recently finished expression.
- This tells, e. g., postfix * where to find the start of its operand. */
+ This tells postfix * where to find the start of its operand. */
char *laststart = 0;
@@ -790,10 +435,10 @@ regex_compile (pattern, size, syntax, bufp)
char many_times_ok;
- /* Address of beginning of regexp, or inside of last group. */
+ /* Address of beginning of regexp, or inside of last \(. */
char *begalt = b;
-
+
/* In processing an interval, at least this many matches must be made. */
int lower_bound;
@@ -802,8 +447,19 @@ regex_compile (pattern, size, syntax, bufp)
/* Place in pattern (i.e., the {) to which to go back if the interval
is invalid. */
- const char *beg_interval = 0;
- const char *following_left_brace = 0;
+ char *beg_interval = 0;
+
+ /* Stack of information saved by \( and restored by \).
+ Four stack elements are pushed by each \(:
+ First, the value of b.
+ Second, the value of fixup_jump.
+ Third, the value of regnum.
+ Fourth, the value of begalt. */
+
+ int stackb[40];
+ int *stackp = stackb;
+ int *stacke = stackb + 40;
+ int *stackt;
/* Counts \('s as they are encountered. Remembered for the matching \),
where it becomes the register number to put in the stop_memory
@@ -811,69 +467,7 @@ regex_compile (pattern, size, syntax, bufp)
int regnum = 1;
- compile_stack_type compile_stack;
- anchor_list_type anchor_list;
-
- /* Keeps track of whether or not the pattern at a given grouping level
- matches the empty string so far. Each bit in the `bits' field of
- this variable corresponds to a level, starting at level zero (i.e.,
- the whole pattern) at the rightmost bit of list[0]. Level 1 is the
- bit to the left of that, etc. Additional bits that won't fit in
- bits[0] are in bits[2], bits[3], etc. */
-
- bits_list_type level_match_status;
- unsigned current_level = 0;
-
- /* Does a similar thing for groups that the above variable does for
- levels. */
- bits_list_type group_match_status;
-
- /* Keeps track of whether or not a given group is active. Accessed as
- is group_match_status. */
- bits_list_type group_active_status;
-
- /* Keeps track of operations relevant to detecting valid position of '$'. */
- op_list_type op_list;
-
- /* Keeps track of whether or not hit a `$' since the the beginning of
- the pattern or the last (if any) alternative; if so, then `^' is an
- ordinary character. */
-
- boolean had_an_endline = false;
-
-
- compile_stack.stack
- = (compile_stack_element *) malloc (INIT_COMPILE_STACK_SIZE
- * sizeof (compile_stack_element));
-
- if (compile_stack.stack == NULL)
- goto memory_exhausted;
-
- compile_stack.size = INIT_COMPILE_STACK_SIZE;
- compile_stack.avail = 0;
-
-
- if (syntax & RE_REPEATED_ANCHORS_AWAY)
- if (!init_pattern_offsets_list (&anchor_list,
- INIT_COMPILE_STACK_SIZE << 1))
- goto memory_exhausted;
-
- if (!(init_bits_list (&level_match_status)
- && init_bits_list (&group_match_status)
- && init_bits_list (&group_active_status)))
- goto memory_exhausted;
-
-
- if (!init_pattern_offsets_list (&op_list, INIT_PATTERN_OFFSETS_LIST_SIZE))
- goto memory_exhausted;
-
-
- bufp->syntax = syntax;
bufp->fastmap_accurate = 0;
- bufp->not_bol = bufp->not_eol = 0;
-
- /* Always count groups, whether or not bufp->no_sub is set. */
- bufp->re_nsub = 0;
#ifndef emacs
#ifndef SYNTAX_TABLE
@@ -882,21 +476,15 @@ regex_compile (pattern, size, syntax, bufp)
#endif
#endif
-
if (bufp->allocated == 0)
{
bufp->allocated = INIT_BUF_SIZE;
if (bufp->buffer)
- {
- /* EXTEND_BUFFER loses when bufp->allocated is 0. This loses if
- buffer's address is bogus. */
- bufp->buffer = (char *) realloc (bufp->buffer, INIT_BUF_SIZE);
- }
+ /* EXTEND_BUFFER loses when bufp->allocated is 0. */
+ bufp->buffer = (char *) realloc (bufp->buffer, INIT_BUF_SIZE);
else
- {
- /* Caller did not allocate a buffer. Do it for them. */
- bufp->buffer = (char *) malloc (INIT_BUF_SIZE);
- }
+ /* Caller did not allocate a buffer. Do it for them. */
+ bufp->buffer = (char *) malloc (INIT_BUF_SIZE);
if (!bufp->buffer) goto memory_exhausted;
begalt = b = bufp->buffer;
}
@@ -906,669 +494,431 @@ regex_compile (pattern, size, syntax, bufp)
PATFETCH (c);
switch (c)
- {
- case '$':
- {
- if ((syntax & RE_ANCHORS_ONLY_AT_ENDS) && p != pend
- && (syntax & RE_CONTEXT_INVALID_ANCHORS))
- goto invalid_pattern;
-
- if (syntax & RE_TIGHT_ALT)
- {
- /* Make operand of last alternation jump to this endline. */
-
- if (fixup_alt_jump)
- store_jump (fixup_alt_jump, jump_past_next_alt, b);
-
- fixup_alt_jump = 0;
- }
-
- if (syntax & RE_REPEATED_ANCHORS_AWAY)
- if (!record_anchor_position (!COMPILE_STACK_EMPTY,
- b - bufp->buffer, &anchor_list))
- goto memory_exhausted;
-
- if (!add_op (&op_list, b - bufp->buffer))
- goto memory_exhausted;
-
- BUF_PUSH ((p != pend && *p == '\n')
- ? (int) endline_before_newline
- : (int) endline);
-
- /* If there's a chance this endline would have to turn into
- `exactn 1 '$',' have to push dummy ops to make room;
- can't insert later because would mess up any surrounding
- jumps. */
-
- if (!(syntax & RE_CONTEXT_INDEP_ANCHORS)
- && !((syntax & RE_ANCHORS_ONLY_AT_ENDS) && p == pend))
+ {
+ case '$':
+ {
+ char *p1 = p;
+ /* When testing what follows the $,
+ look past the \-constructs that don't consume anything. */
+ if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
+ while (p1 != pend)
+ {
+ if (*p1 == '\\' && p1 + 1 != pend
+ && (p1[1] == '<' || p1[1] == '>'
+ || p1[1] == '`' || p1[1] == '\''
+#ifdef emacs
+ || p1[1] == '='
+#endif
+ || p1[1] == 'b' || p1[1] == 'B'))
+ p1 += 2;
+ else
+ break;
+ }
+ if (obscure_syntax & RE_TIGHT_VBAR)
{
- laststart = b - 1;
- BUF_PUSH_2 (no_op, no_op);
- }
-
- had_an_endline = true;
- break;
+ if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p1 != pend)
+ goto normal_char;
+ /* Make operand of last vbar end before this `$'. */
+ if (fixup_jump)
+ store_jump (fixup_jump, jump, b);
+ fixup_jump = 0;
+ BUFPUSH (endline);
+ break;
+ }
+ /* $ means succeed if at end of line, but only in special contexts.
+ If validly in the middle of a pattern, it is a normal character. */
+
+ if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) && p1 != pend)
+ goto invalid_pattern;
+ if (p1 == pend || *p1 == '\n'
+ || (obscure_syntax & RE_CONTEXT_INDEP_OPS)
+ || (obscure_syntax & RE_NO_BK_PARENS
+ ? *p1 == ')'
+ : *p1 == '\\' && p1[1] == ')')
+ || (obscure_syntax & RE_NO_BK_VBAR
+ ? *p1 == '|'
+ : *p1 == '\\' && p1[1] == '|'))
+ {
+ BUFPUSH (endline);
+ break;
+ }
+ goto normal_char;
}
-
- case '^':
- /* If change anything in this case, have to change analogous
- code in *endline* (yes, endline---because the routine goes
- backwards through the pattern) case of the routine
- verify_and_adjust_endlines. */
+ case '^':
+ /* ^ means succeed if at beg of line, but only if no preceding
+ pattern. */
- /* ^ means match the beginning of a string. If
- RE_CONTEXT_INDEP_ANCHORS is set, then it represents the
- match-beginning-of-line operator anywhere in the regular
- expression.
-
- If that bit isn't set, then it represents the
- match-beginning-of-line operator in leading positions and
- matches itself in other positions (unless it's invalid
- there). */
-
- /* If the '^' must be at the pattern's beginning or else is
- in a leading position. */
-
- if (((syntax & RE_ANCHORS_ONLY_AT_ENDS)
- || (syntax & RE_TIGHT_ALT))
- ? p - 1 == pattern
-
- /* If just after a newline, or... */
- : ((p - 2 >= pattern && p[-2] == '\n')
-
- /* ...no levels match anything, then in a leading position. */
-
- || no_levels_match_anything (level_match_status)))
- {
- if (had_an_endline)
- goto normal_char;
-
- if (syntax & RE_REPEATED_ANCHORS_AWAY)
- if (!record_anchor_position (!COMPILE_STACK_EMPTY,
- b - bufp->buffer, &anchor_list))
- goto memory_exhausted;
-
- }
-
- else if (syntax & RE_CONTEXT_INVALID_ANCHORS)
- goto invalid_pattern;
-
- /* If not just after a newline and not always supposed to be
- an anchor, consider it a ordinary character. */
-
- else if (!(syntax & RE_CONTEXT_INDEP_ANCHORS)
- && ((syntax & RE_ANCHORS_ONLY_AT_ENDS)
- /* To make, e.g., `^(^a)' match `^a'. */
- ? p - 1 != pattern
- : (int)laststart))
- goto normal_char;
-
- if (syntax & RE_TIGHT_ALT)
- {
- if (p != pattern + 1 && !(syntax & RE_CONTEXT_INDEP_ANCHORS))
- goto normal_char;
+ if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) && laststart)
+ goto invalid_pattern;
+ if (laststart && p - 2 >= pattern && p[-2] != '\n'
+ && !(obscure_syntax & RE_CONTEXT_INDEP_OPS))
+ goto normal_char;
+ if (obscure_syntax & RE_TIGHT_VBAR)
+ {
+ if (p != pattern + 1
+ && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
+ goto normal_char;
+ BUFPUSH (begline);
+ begalt = b;
+ }
+ else
+ BUFPUSH (begline);
+ break;
- BUF_PUSH (begline);
- begalt = b; /* Make alternative begin after the '^'. */
- }
- else
- BUF_PUSH (begline);
-
- break;
-
- case '+':
- case '?':
- if ((syntax & RE_BK_PLUS_QM)
- || (syntax & RE_LIMITED_OPS))
- goto normal_char;
- handle_plus:
- case '*':
- /* If there is no previous pattern... */
- if (!laststart)
+ case '+':
+ case '?':
+ if ((obscure_syntax & RE_BK_PLUS_QM)
+ || (obscure_syntax & RE_LIMITED_OPS))
+ goto normal_char;
+ handle_plus:
+ case '*':
+ /* If there is no previous pattern, char not special. */
+ if (!laststart)
{
- if (syntax & RE_CONTEXT_INVALID_OPS)
- goto missing_preceding_re;
- else if (!(syntax & RE_CONTEXT_INDEP_OPS))
- goto normal_char;
+ if (obscure_syntax & RE_CONTEXTUAL_INVALID_OPS)
+ goto invalid_pattern;
+ else if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
+ goto normal_char;
}
-
- if ((syntax & RE_REPEATED_ANCHORS_AWAY)
- && (enum regexpcode) *laststart == start_memory)
- remove_intervening_anchors (laststart, b, anchor_list, bufp);
-
- /* If there is a sequence of repetition chars, collapse it
- down to just one. We can't combine interval operators with
- these because we'd incorrect behavior for, e.g., `a{2}*',
- which should only match an even number of `a's. */
-
- zero_times_ok = 0;
- many_times_ok = 0;
-
- while (1)
- {
- zero_times_ok |= c != '+';
- many_times_ok |= c != '?';
-
- if (p == pend)
- break;
-
- PATFETCH (c);
-
- if (c == '*')
- {
- if (syntax & RE_NO_CONSECUTIVE_REPEATS)
- goto invalid_preceding_re;
- }
- else if (!(syntax & RE_BK_PLUS_QM)
+ /* If there is a sequence of repetition chars,
+ collapse it down to just one. */
+ zero_times_ok = 0;
+ many_times_ok = 0;
+ while (1)
+ {
+ zero_times_ok |= c != '+';
+ many_times_ok |= c != '?';
+ if (p == pend)
+ break;
+ PATFETCH (c);
+ if (c == '*')
+ ;
+ else if (!(obscure_syntax & RE_BK_PLUS_QM)
&& (c == '+' || c == '?'))
- {
- if (syntax & RE_NO_CONSECUTIVE_REPEATS)
- goto invalid_preceding_re;
- }
- else if ((syntax & RE_BK_PLUS_QM)
+ ;
+ else if ((obscure_syntax & RE_BK_PLUS_QM)
&& c == '\\')
{
- if (p == pend)
- goto trailing_backslash;
-
- PATFETCH (c1);
-
- if (!(c1 == '+' || c1 == '?'))
+ /* int c1; */
+ PATFETCH (c1);
+ if (!(c1 == '+' || c1 == '?'))
{
PATUNFETCH;
PATUNFETCH;
break;
}
-
- if (syntax & RE_NO_CONSECUTIVE_REPEATS)
- goto invalid_preceding_re;
-
- c = c1;
+ c = c1;
}
else
{
PATUNFETCH;
break;
}
- }
+ }
/* Star, etc. applied to an empty pattern is equivalent
to an empty pattern. */
- if (!laststart)
+ if (!laststart)
break;
- /* Now we know whether or not zero matches is allowed
+ /* Now we know whether or not zero matches is allowed
and also whether or not two or more matches is allowed. */
-
- if (!add_op (&op_list, b - bufp->buffer))
- goto memory_exhausted;
-
- /* If more than one repetition is allowed, put in at the
- end a backward relative jump from b to before the next jump
- we're going to put in below (which jumps from laststart to
- after this jump). */
-
- if (many_times_ok)
- {
- GET_BUFFER_SPACE (3);
- store_jump (b, maybe_pop_jump, laststart - 3);
- b += 3; /* Because store_jump puts stuff here. */
+ if (many_times_ok)
+ {
+ /* If more than one repetition is allowed, put in at the
+ end a backward relative jump from b to before the next
+ jump we're going to put in below (which jumps from
+ laststart to after this jump). */
+ GET_BUFFER_SPACE (3);
+ store_jump (b, maybe_finalize_jump, laststart - 3);
+ b += 3; /* Because store_jump put stuff here. */
}
- /* Otherwise, put in a no_op so verify_and_adjust_endlines can
- detect that, e.g., a preceding `$' is not an anchor. */
- else
- BUF_PUSH (no_op);
-
-
/* On failure, jump from laststart to b + 3, which will be the
end of the buffer after this jump is inserted. */
-
- if (syntax & RE_REPEATED_ANCHORS_AWAY)
- adjust_pattern_offsets_list (3, laststart - bufp->buffer,
- &anchor_list);
-
- adjust_pattern_offsets_list (3, laststart - bufp->buffer, &op_list);
GET_BUFFER_SPACE (3);
- insert_jump (on_failure_jump, laststart, b + 3, b);
- pending_exact = 0;
+ insert_jump (on_failure_jump, laststart, b + 3, b);
+ pending_exact = 0;
b += 3;
-
if (!zero_times_ok)
{
/* At least one repetition is required, so insert a
- dummy_failure before the initial on_failure_jump
+ dummy-failure before the initial on-failure-jump
instruction of the loop. This effects a skip over that
instruction the first time we hit that loop. */
-
- if (syntax & RE_REPEATED_ANCHORS_AWAY)
- adjust_pattern_offsets_list (3, laststart - bufp->buffer,
- &anchor_list);
-
- adjust_pattern_offsets_list (3, laststart - bufp->buffer,
- &op_list);
- GET_BUFFER_SPACE (3);
+ GET_BUFFER_SPACE (6);
insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
b += 3;
- }
+ }
break;
case '.':
laststart = b;
-
- if (!add_op (&op_list, b - bufp->buffer))
- goto memory_exhausted;
-
- BUF_PUSH (anychar);
-
- if (!set_this_level (&level_match_status, current_level)
- || !set_match_status_of_active_groups (group_active_status,
- &group_match_status))
- goto memory_exhausted;
-
- break;
+ BUFPUSH (anychar);
+ break;
case '[':
- {
- unsigned just_had_a_char_class = 0;
-
- if (p == pend)
- goto unmatched_left_bracket;
-
- while (b - bufp->buffer
- > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
- EXTEND_BUFFER;
-
- laststart = b;
-
- if (!add_op (&op_list, b - bufp->buffer))
- goto memory_exhausted;
-
- if (*p == '^')
- {
- BUF_PUSH (charset_not);
- p++;
- }
- else
- BUF_PUSH (charset);
-
- /* Remember the first position in the bracket expression. */
- p1 = p;
+ if (p == pend)
+ goto invalid_pattern;
+ while (b - bufp->buffer
+ > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
+ EXTEND_BUFFER;
- BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
- /* Clear the whole map */
- bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
-
- if ((syntax & RE_HAT_LISTS_NOT_NEWLINE)
- && (enum regexpcode) b[-2] == charset_not)
- SET_LIST_BIT ('\n');
-
-
- /* Read in characters and ranges, setting map bits. */
- while (1)
- {
- if (p == pend)
- goto unmatched_left_bracket;
+ laststart = b;
+ if (*p == '^')
+ {
+ BUFPUSH (charset_not);
+ p++;
+ }
+ else
+ BUFPUSH (charset);
+ p0 = p;
- PATFETCH (c);
+ BUFPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
+ /* Clear the whole map */
+ memset (b, 0, (1 << BYTEWIDTH) / BYTEWIDTH);
+
+ if ((obscure_syntax & RE_HAT_NOT_NEWLINE) && b[-2] == charset_not)
+ SET_LIST_BIT ('\n');
- /* If set, \ escapes characters when inside [...]. */
- if ((syntax & RE_AWK_CLASS_HACK) && c == '\\')
- {
- if (p == pend)
- goto trailing_backslash;
-
- PATFETCH(c1);
- SET_LIST_BIT (c1);
- continue;
- }
- /* Could be the end of the bracket expression. If it's
- not (i.e., when the bracket expression is `[]' so
- far), the ']' character bit gets set way below. */
-
- if (c == ']' && p != p1 + 1)
- break;
-
+ /* Read in characters and ranges, setting map bits. */
+ while (1)
+ {
+ /* Don't translate while fetching, in case it's a range bound.
+ When we set the bit for the character, we translate it. */
+ PATFETCH_RAW (c);
- /* Look ahead to see if it's a range when the last thing
- was a character class. */
-
- if (just_had_a_char_class && c == '-' && *p != ']')
- goto invalid_range_end;
-
- /* Look ahead to see if it's a range when the last thing
- was a character: if this is a hyphen not at the
- beginning or the end of a list, then it's the range
- operator. */
-
- if (c == '-'
- && !(p - 2 >= pattern && p[-2] == '[')
- && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
- && *p != ']')
- {
- DO_RANGE;
- }
+ /* If set, \ escapes characters when inside [...]. */
+ if ((obscure_syntax & RE_AWK_CLASS_HACK) && c == '\\')
+ {
+ PATFETCH(c1);
+ SET_LIST_BIT (c1);
+ continue;
+ }
+ if (c == ']')
+ {
+ if (p == p0 + 1)
+ {
+ /* If this is an empty bracket expression. */
+ if ((obscure_syntax & RE_NO_EMPTY_BRACKETS)
+ && p == pend)
+ goto invalid_pattern;
+ }
+ else
+ /* Stop if this isn't merely a ] inside a bracket
+ expression, but rather the end of a bracket
+ expression. */
+ break;
+ }
+ /* Get a range. */
+ if (p[0] == '-' && p[1] != ']')
+ {
+ PATFETCH (c1);
+ /* Don't translate the range bounds while fetching them. */
+ PATFETCH_RAW (c1);
- else if (p[0] == '-' && p[1] != ']')
- {
- /* This handles ranges made up of characters only. */
- PATFETCH (c1); /* The `-'. */
- DO_RANGE;
- }
-
- /* See if we're at the beginning of a possible character
- class. */
-
- else if ((syntax & RE_CHAR_CLASSES)
- && c == '[' && p[0] == ':')
- {
- /* Longest valid character class word has six chars. */
- char str[CHAR_CLASS_MAX_LENGTH];
+ if ((obscure_syntax & RE_NO_EMPTY_RANGES) && c > c1)
+ goto invalid_pattern;
- PATFETCH (c);
- c1 = 0;
-
- /* If pattern is `[[:'. */
- if (p == pend)
- goto unmatched_left_bracket;
-
- while (1)
- {
- /* Don't translate the ``character class''
- characters. */
- PATFETCH_RAW (c);
- if (c == ':' || c == ']' || p == pend
- || c1 == CHAR_CLASS_MAX_LENGTH)
- break;
- str[c1++] = c;
- }
- str[c1] = '\0';
+ if ((obscure_syntax & RE_NO_HYPHEN_RANGE_END)
+ && c1 == '-' && *p != ']')
+ goto invalid_pattern;
- /* If isn't a word bracketed by `[:' and:`]':
- undo the ending character, the letters, and leave
- the leading `:' and `[' (but set bits for them). */
-
- if (c == ':' && p[0] == ']')
- {
- if (!IS_CHAR_CLASS (str))
- goto invalid_char_class;
-
- /* The ] at the end of the character class. */
- PATFETCH (c);
-
- if (p == pend)
- goto unmatched_left_bracket;
-
- for (c = 0; c < (1 << BYTEWIDTH); c++)
- {
- if ((strcmp (str, "alpha") == 0 && isalpha (c))
- || (strcmp (str, "upper") == 0 && isupper (c))
- || (strcmp (str, "lower") == 0 && islower (c))
- || (strcmp (str, "digit") == 0 && isdigit (c))
- || (strcmp (str, "alnum") == 0 && isalnum (c))
- || (strcmp (str, "xdigit") == 0 && isxdigit (c))
- || (strcmp (str, "space") == 0 && isspace (c))
- || (strcmp (str, "print") == 0 && isprint (c))
- || (strcmp (str, "punct") == 0 && ispunct (c))
- || (strcmp (str, "graph") == 0 && isgraph (c))
- || (strcmp (str, "cntrl") == 0 && iscntrl (c)))
- SET_LIST_BIT (c);
- }
- just_had_a_char_class = 1;
- }
- else
- {
- c1++;
- while (c1--)
- PATUNFETCH;
- SET_LIST_BIT ('[');
- SET_LIST_BIT (':');
- just_had_a_char_class = 0;
- }
- }
- else
- {
- just_had_a_char_class = 0;
- SET_LIST_BIT (c);
- }
- }
-
- /* Discard any (non)matching list bytes that are all 0 at the
- end of the map. Decrement the map-length byte too. */
-
- while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
- b[-1]--;
- b += b[-1];
- }
-
- if (!set_this_level (&level_match_status, current_level)
- || !set_match_status_of_active_groups (group_active_status,
- &group_match_status))
- goto memory_exhausted;
+ while (c <= c1)
+ {
+ /* Translate each char that's in the range. */
+ if (translate)
+ SET_LIST_BIT (translate[c]);
+ else
+ SET_LIST_BIT (c);
+ c++;
+ }
+ }
+ else if ((obscure_syntax & RE_CHAR_CLASSES)
+ && c == '[' && p[0] == ':')
+ {
+ /* Longest valid character class word has six characters. */
+ char str[CHAR_CLASS_MAX_LENGTH];
+ PATFETCH (c);
+ c1 = 0;
+ /* If no ] at end. */
+ if (p == pend)
+ goto invalid_pattern;
+ while (1)
+ {
+ /* Don't translate the ``character class'' characters. */
+ PATFETCH_RAW (c);
+ if (c == ':' || c == ']' || p == pend
+ || c1 == CHAR_CLASS_MAX_LENGTH)
+ break;
+ str[c1++] = c;
+ }
+ str[c1] = '\0';
+ if (p == pend
+ || c == ']' /* End of the bracket expression. */
+ || p[0] != ']'
+ || p + 1 == pend
+ || (strcmp (str, "alpha") != 0
+ && strcmp (str, "upper") != 0
+ && strcmp (str, "lower") != 0
+ && strcmp (str, "digit") != 0
+ && strcmp (str, "alnum") != 0
+ && strcmp (str, "xdigit") != 0
+ && strcmp (str, "space") != 0
+ && strcmp (str, "print") != 0
+ && strcmp (str, "punct") != 0
+ && strcmp (str, "graph") != 0
+ && strcmp (str, "cntrl") != 0))
+ {
+ /* Undo the ending character, the letters, and leave
+ the leading : and [ (but set bits for them). */
+ c1++;
+ while (c1--)
+ PATUNFETCH;
+ SET_LIST_BIT ('[');
+ SET_LIST_BIT (':');
+ }
+ else
+ {
+ /* The ] at the end of the character class. */
+ PATFETCH (c);
+ if (c != ']')
+ goto invalid_pattern;
+ for (c = 0; c < (1 << BYTEWIDTH); c++)
+ {
+ if ((strcmp (str, "alpha") == 0 && isalpha (c))
+ || (strcmp (str, "upper") == 0 && isupper (c))
+ || (strcmp (str, "lower") == 0 && islower (c))
+ || (strcmp (str, "digit") == 0 && isdigit (c))
+ || (strcmp (str, "alnum") == 0 && isalnum (c))
+ || (strcmp (str, "xdigit") == 0 && isxdigit (c))
+ || (strcmp (str, "space") == 0 && isspace (c))
+ || (strcmp (str, "print") == 0 && isprint (c))
+ || (strcmp (str, "punct") == 0 && ispunct (c))
+ || (strcmp (str, "graph") == 0 && isgraph (c))
+ || (strcmp (str, "cntrl") == 0 && iscntrl (c)))
+ SET_LIST_BIT (c);
+ }
+ }
+ }
+ else if (translate)
+ SET_LIST_BIT (translate[c]);
+ else
+ SET_LIST_BIT (c);
+ }
+ /* Discard any character set/class bitmap bytes that are all
+ 0 at the end of the map. Decrement the map-length byte too. */
+ while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
+ b[-1]--;
+ b += b[-1];
break;
-
- case '(':
- if (!(syntax & RE_NO_BK_PARENS))
+ case '(':
+ if (! (obscure_syntax & RE_NO_BK_PARENS))
goto normal_char;
else
goto handle_open;
case ')':
- if (! (syntax & RE_NO_BK_PARENS))
+ if (! (obscure_syntax & RE_NO_BK_PARENS))
goto normal_char;
else
goto handle_close;
case '\n':
- if (! (syntax & RE_NEWLINE_ALT))
+ if (! (obscure_syntax & RE_NEWLINE_OR))
goto normal_char;
else
goto handle_bar;
case '|':
- if (!(syntax & RE_NO_BK_VBAR))
+ if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS)
+ && (! laststart || p == pend))
+ goto invalid_pattern;
+ else if (! (obscure_syntax & RE_NO_BK_VBAR))
goto normal_char;
else
goto handle_bar;
case '{':
- if ((syntax & RE_NO_BK_BRACES)
- && (syntax & RE_INTERVALS))
- goto handle_interval;
- else
+ if (! ((obscure_syntax & RE_NO_BK_CURLY_BRACES)
+ && (obscure_syntax & RE_INTERVALS)))
goto normal_char;
+ else
+ goto handle_interval;
case '\\':
- if (p == pend)
- goto trailing_backslash;
-
- PATFETCH_RAW (c);
+ if (p == pend) goto invalid_pattern;
+ PATFETCH_RAW (c);
switch (c)
{
- case '(':
- if (syntax & RE_NO_BK_PARENS)
+ case '(':
+ if (obscure_syntax & RE_NO_BK_PARENS)
goto normal_backsl;
- handle_open:
- bufp->re_nsub++;
- increase_level (&current_level);
+ handle_open:
+ if (stackp == stacke) goto nesting_too_deep;
- if (!make_group_active (&group_active_status, regnum))
- goto memory_exhausted;
-
- if (syntax & RE_NO_EMPTY_GROUPS)
- {
- p1 = p;
- if (*p1 == '^') p1++;
- if (*p1 == '$') p1++;
- if (!(syntax & RE_NO_BK_PARENS) && *p1 == '\\') p1++;
-
- /* If found an empty group... */
- if (*p1 == ')')
- goto invalid_pattern;
- }
-
- /* Value to restore in laststart when hit end of this
- group; should point to the start_memory that we are
- about to push. */
-
- if (COMPILE_STACK_FULL)
- {
- compile_stack.stack = (compile_stack_element *)
- realloc (compile_stack.stack,
- (compile_stack.size << 1)
- * sizeof (compile_stack_element));
-
- if (compile_stack.stack == NULL)
- goto memory_exhausted;
-
- compile_stack.size <<= 1;
- }
-
- compile_stack.stack[compile_stack.avail].laststart_offset
- = b - bufp->buffer;
- compile_stack.stack[compile_stack.avail].fixup_alt_jump
- = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
- compile_stack.stack[compile_stack.avail].regnum = regnum;
- compile_stack.stack[compile_stack.avail].begalt_offset
- = begalt - bufp->buffer;
- compile_stack.avail++;
-
- if (!add_op (&op_list, b - bufp->buffer))
- goto memory_exhausted;
-
- BUF_PUSH_2 (start_memory, regnum);
- regnum++;
- fixup_alt_jump = 0;
+ /* Laststart should point to the start_memory that we are about
+ to push (unless the pattern has RE_NREGS or more ('s). */
+ *stackp++ = b - bufp->buffer;
+ if (regnum < RE_NREGS)
+ {
+ BUFPUSH (start_memory);
+ BUFPUSH (regnum);
+ }
+ *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
+ *stackp++ = regnum++;
+ *stackp++ = begalt - bufp->buffer;
+ fixup_jump = 0;
laststart = 0;
begalt = b;
- break;
+ break;
case ')':
- if (syntax & RE_NO_BK_PARENS)
+ if (obscure_syntax & RE_NO_BK_PARENS)
goto normal_backsl;
-
- if (COMPILE_STACK_EMPTY)
- if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
- goto normal_backsl;
- else
- goto unmatched_close;
-
- handle_close:
- if (fixup_alt_jump)
- store_jump (fixup_alt_jump, jump_past_next_alt, b);
-
- /* See similar code for backslashed parens above. */
-
- if (COMPILE_STACK_EMPTY)
- if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
- goto normal_char;
- else
- goto unmatched_close;
-
- if (get_level_match_status (level_match_status, current_level))
- if (!set_next_lower_level (&level_match_status, current_level))
- goto memory_exhausted;
-
- /* Only call these if know you have a matched close. */
- decrease_level (&current_level);
- make_group_inactive (&group_active_status, regnum);
-
- compile_stack.avail--;
- begalt
- = compile_stack.stack[compile_stack.avail].begalt_offset
- + bufp->buffer;
- laststart
- = (compile_stack.stack[compile_stack.avail].laststart_offset
- + bufp->buffer);
-
- fixup_alt_jump = compile_stack.stack[compile_stack.avail].fixup_alt_jump
- ? compile_stack.stack[compile_stack.avail]
- .fixup_alt_jump + bufp->buffer - 1
- : 0;
-
- if (!add_op (&op_list, b - bufp->buffer))
- goto memory_exhausted;
-
- BUF_PUSH_2 (stop_memory,
- compile_stack.stack[compile_stack.avail].regnum);
+ handle_close:
+ if (stackp == stackb) goto unmatched_close;
+ begalt = *--stackp + bufp->buffer;
+ if (fixup_jump)
+ store_jump (fixup_jump, jump, b);
+ if (stackp[-1] < RE_NREGS)
+ {
+ BUFPUSH (stop_memory);
+ BUFPUSH (stackp[-1]);
+ }
+ stackp -= 2;
+ fixup_jump = *stackp ? *stackp + bufp->buffer - 1 : 0;
+ laststart = *--stackp + bufp->buffer;
break;
- case '|': /* `\|'. */
- if ((syntax & RE_LIMITED_OPS)
- || (syntax & RE_NO_BK_VBAR))
+ case '|':
+ if ((obscure_syntax & RE_LIMITED_OPS)
+ || (obscure_syntax & RE_NO_BK_VBAR))
goto normal_backsl;
handle_bar:
- if (syntax & RE_LIMITED_OPS)
+ if (obscure_syntax & RE_LIMITED_OPS)
goto normal_char;
-
- /* Disallow empty alternatives if RE_NO_EMPTY_ALTS is set.
- Caveat: can't detect if the vbar is followed by a
- trailing '$' yet, unless it's the last thing in a
- pattern; the routine for verifying endlines has to do
- the rest. */
-
- if ((syntax & RE_NO_EMPTY_ALTS)
- && (!laststart || p == pend
- || (*p == '$' && p + 1 == pend)
- || ((syntax & RE_NO_BK_PARENS)
- ? (p < pend && *p == ')')
- : (p + 1 < pend && p[0] == '\\' && p[1] == ')'))))
- goto invalid_pattern;
-
-
- /* Clear some variables. */
-
- if (lower_levels_match_nothing (level_match_status,
- current_level))
- clear_this_and_higher_levels (&level_match_status,
- current_level);
- had_an_endline = false;
-
-
- /* Insert before the previous alternative a jump which
+ /* Insert before the previous alternative a jump which
jumps to this alternative if the former fails. */
-
- if (syntax & RE_REPEATED_ANCHORS_AWAY)
- adjust_pattern_offsets_list (3, begalt - bufp->buffer,
- &anchor_list);
-
- adjust_pattern_offsets_list (3, begalt - bufp->buffer, &op_list);
- GET_BUFFER_SPACE (3);
+ GET_BUFFER_SPACE (6);
insert_jump (on_failure_jump, begalt, b + 6, b);
pending_exact = 0;
b += 3;
-
- /* The alternative before this one has a jump after it
- which gets executed if it gets matched. Adjust that
- jump so it will jump to this alternative's analogous
- jump (put in below, which in turn will jump to the next
- (if any) alternative's such jump, etc.). The last such
- jump jumps to the correct final destination. A picture:
- _____ _____
- | | | |
- | v | v
- a | b | c
-
- If we are at `b,' then fixup_alt_jump right now points to a
- three-byte space after `a.' We'll put in the jump, set
- fixup_alt_jump to right after `b,' and leave behind three
- bytes which we'll fill in when we get to after `c.' */
-
- if (fixup_alt_jump)
- store_jump (fixup_alt_jump, jump_past_next_alt, b);
+ /* The alternative before the previous alternative has a
+ jump after it which gets executed if it gets matched.
+ Adjust that jump so it will jump to the previous
+ alternative's analogous jump (put in below, which in
+ turn will jump to the next (if any) alternative's such
+ jump, etc.). The last such jump jumps to the correct
+ final destination. */
+ if (fixup_jump)
+ store_jump (fixup_jump, jump, b);
- /* Mark and leave space for a jump after this alternative
- ---to be filled in later either by next alternative or
- when know we're at the end of a series of alternatives. */
-
- if (!add_op (&op_list, b - bufp->buffer))
- goto memory_exhausted;
-
- fixup_alt_jump = b;
- GET_BUFFER_SPACE (3);
+ /* Leave space for a jump after previous alternative---to be
+ filled in later. */
+ fixup_jump = b;
b += 3;
laststart = 0;
@@ -1576,167 +926,93 @@ regex_compile (pattern, size, syntax, bufp)
break;
case '{':
- /* If \{ is a literal. */
- if (!(syntax & RE_INTERVALS)
- /* If we're at a "\{" and it's not the open-interval
- operator. */
- || ((syntax & RE_INTERVALS)
- && (syntax & RE_NO_BK_BRACES))
- || (p - 2 == pattern && p == pend))
+ if (! (obscure_syntax & RE_INTERVALS)
+ /* Let \{ be a literal. */
+ || ((obscure_syntax & RE_INTERVALS)
+ && (obscure_syntax & RE_NO_BK_CURLY_BRACES))
+ /* If it's the string "\{". */
+ || (p - 2 == pattern && p == pend))
goto normal_backsl;
-
handle_interval:
- /* If got here, then intervals must be allowed. */
-
- beg_interval = p - 1; /* The `{'. */
- following_left_brace = 0;
- lower_bound = -1; /* So can see if are set. */
- upper_bound = -1;
-
- if (p == pend)
- {
- if (syntax & RE_NO_BK_BRACES)
- goto unfetch_interval;
+ beg_interval = p - 1; /* The {. */
+ /* If there is no previous pattern, this isn't an interval. */
+ if (!laststart)
+ {
+ if (obscure_syntax & RE_CONTEXTUAL_INVALID_OPS)
+ goto invalid_pattern;
else
- goto unmatched_left_curly_brace;
+ goto normal_backsl;
}
-
+ /* It also isn't an interval if not preceded by an re
+ matching a single character or subexpression, or if
+ the current type of intervals can't handle back
+ references and the previous thing is a back reference. */
+ if (! (*laststart == anychar
+ || *laststart == charset
+ || *laststart == charset_not
+ || *laststart == start_memory
+ || (*laststart == exactn && laststart[1] == 1)
+ || (! (obscure_syntax & RE_NO_BK_REFS)
+ && *laststart == duplicate)))
+ {
+ if (obscure_syntax & RE_NO_BK_CURLY_BRACES)
+ goto normal_char;
+
+ /* Posix extended syntax is handled in previous
+ statement; this is for Posix basic syntax. */
+ if (obscure_syntax & RE_INTERVALS)
+ goto invalid_pattern;
+
+ goto normal_backsl;
+ }
+ lower_bound = -1; /* So can see if are set. */
+ upper_bound = -1;
GET_UNSIGNED_NUMBER (lower_bound);
-
- if (c == ',')
+ if (c == ',')
{
- GET_UNSIGNED_NUMBER (upper_bound);
+ GET_UNSIGNED_NUMBER (upper_bound);
if (upper_bound < 0)
upper_bound = RE_DUP_MAX;
}
-
if (upper_bound < 0)
upper_bound = lower_bound;
-
- if (lower_bound < 0 || upper_bound > RE_DUP_MAX
- || lower_bound > upper_bound)
- {
- if (syntax & RE_NO_BK_BRACES)
- goto unfetch_interval;
- else
- goto invalid_braces_content;
- }
-
- if (!(syntax & RE_NO_BK_BRACES))
+ if (! (obscure_syntax & RE_NO_BK_CURLY_BRACES))
{
if (c != '\\')
- goto unmatched_left_curly_brace;
-
+ goto invalid_pattern;
PATFETCH (c);
}
-
- if (c != '}')
- {
- if (syntax & RE_NO_BK_BRACES)
- goto unfetch_interval;
- else
- goto invalid_braces_content;
- }
-
-
- /* Parsed a valid interval, but if an interval can't
- operate on another repetition operator, check that what
- follows isn't one. */
-
- if ((syntax & RE_NO_CONSECUTIVE_REPEATS) && p != pend)
- {
- if (*p == '*' || *p == '+' || *p == '?')
- goto invalid_preceding_re;
-
- if (syntax & RE_NO_BK_BRACES)
- {
- if (*p == '{')
- {
- /* Close but not exactly as above. */
-
- int lower_bound = -1;
- int upper_bound = -1;
-
- following_left_brace = p++;
- GET_UNSIGNED_NUMBER (lower_bound);
-
- if (c == ',')
- {
- GET_UNSIGNED_NUMBER (upper_bound);
- if (upper_bound < 0)
- upper_bound = RE_DUP_MAX;
- }
-
- if (upper_bound < 0)
- upper_bound = lower_bound;
-
- /* If not a valid interval, then we don't have
- an interval operating on another one; what
- we have instead is a series match-self ops
- starting with a '{'. */
-
- if (lower_bound < 0 || upper_bound > RE_DUP_MAX
- || lower_bound > upper_bound || c != '}')
- {
- /* Back up to '{' so can use again
- put it in C, as the normal_char label
- code expects that; will go to that
- label after putting the preceding valid
- interval in the buffer. */
-
- p = following_left_brace;
- PATFETCH (c);
- }
- else
- goto invalid_preceding_re;
- }
- }
- else if (p[0] == '\\' && p[1] == '{')
- goto invalid_preceding_re;
- }
-
-
- /* We just parsed a valid interval. */
-
- /* If it's invalid to have no preceding re. */
- if (!laststart)
+ if (c != '}' || lower_bound < 0 || upper_bound > RE_DUP_MAX
+ || lower_bound > upper_bound
+ || ((obscure_syntax & RE_NO_BK_CURLY_BRACES)
+ && p != pend && *p == '{'))
{
- if (syntax & RE_CONTEXT_INVALID_OPS)
- goto missing_preceding_re;
- else if (syntax & RE_CONTEXT_INDEP_OPS)
- laststart = b;
- else
+ if (obscure_syntax & RE_NO_BK_CURLY_BRACES)
goto unfetch_interval;
- }
- else if ((syntax & RE_REPEATED_ANCHORS_AWAY)
- && (enum regexpcode) *laststart == start_memory)
- remove_intervening_anchors (laststart, b, anchor_list, bufp);
-
- /* If upper_bound is zero, don't want to succeed at all;
+ else
+ goto invalid_pattern;
+ }
+
+ /* If upper_bound is zero, don't want to succeed at all;
jump from laststart to b + 3, which will be the end of
the buffer after this jump is inserted. */
if (upper_bound == 0)
{
- if (syntax & RE_REPEATED_ANCHORS_AWAY)
- adjust_pattern_offsets_list (3, laststart - bufp->buffer,
- &anchor_list);
-
- adjust_pattern_offsets_list (3, laststart - bufp->buffer,
- &op_list);
GET_BUFFER_SPACE (3);
- insert_jump (no_pop_jump, laststart, b + 3, b);
+ insert_jump (jump, laststart, b + 3, b);
b += 3;
}
/* Otherwise, after lower_bound number of succeeds, jump
- to after the no_pop_jump_n which will be inserted at the end
- of the buffer, and insert that no_pop_jump_n. */
+ to after the jump_n which will be inserted at the end
+ of the buffer, and insert that jump_n. */
else
{ /* Set to 5 if only one repetition is allowed and
- hence no no_pop_jump_n is inserted at the current
- end of the buffer. Otherwise, need 10 bytes total
- for the succeed_n and the no_pop_jump_n. */
+ hence no jump_n is inserted at the current end of
+ the buffer; then only space for the succeed_n is
+ needed. Otherwise, need space for both the
+ succeed_n and the jump_n. */
unsigned slots_needed = upper_bound == 1 ? 5 : 10;
@@ -1745,70 +1021,38 @@ regex_compile (pattern, size, syntax, bufp)
be set by its attendant set_number_at, because
re_compile_fastmap will need to know it. Jump to
what the end of buffer will be after inserting
- this succeed_n and possibly appending a
- no_pop_jump_n. */
-
- if (syntax & RE_REPEATED_ANCHORS_AWAY)
- adjust_pattern_offsets_list (5, laststart - bufp->buffer,
- &anchor_list);
-
- adjust_pattern_offsets_list (5, laststart - bufp->buffer,
- &op_list);
+ this succeed_n and possibly appending a jump_n. */
insert_jump_n (succeed_n, laststart, b + slots_needed,
b, lower_bound);
b += 5; /* Just increment for the succeed_n here. */
-
- /* More than one repetition is allowed, so put in at
+ /* More than one repetition is allowed, so put in at
the end of the buffer a backward jump from b to the
succeed_n we put in above. By the time we've gotten
to this jump when matching, we'll have matched once
already, so jump back only upper_bound - 1 times. */
- if (!add_op (&op_list, b - bufp->buffer))
- goto memory_exhausted;
-
if (upper_bound > 1)
{
- store_jump_n (b, no_pop_jump_n, laststart,
- upper_bound - 1);
+ store_jump_n (b, jump_n, laststart, upper_bound - 1);
b += 5;
/* When hit this when matching, reset the
- preceding no_pop_jump_n's n to upper_bound - 1. */
-
- BUF_PUSH (set_number_at);
-
- /* Only need to get space for the numbers. */
- GET_BUFFER_SPACE (4);
+ preceding jump_n's n to upper_bound - 1. */
+ BUFPUSH (set_number_at);
+ GET_BUFFER_SPACE (2);
STORE_NUMBER_AND_INCR (b, -5);
STORE_NUMBER_AND_INCR (b, upper_bound - 1);
}
- /* Otherwise, put in a no_op, so verify_and_adjust_endlines
- can detect, e.g., a preceding `$' is not an anchor. */
- else
- BUF_PUSH (no_op);
-
-
- /* When hit this when matching, set the succeed_n's n. */
-
- if (syntax & RE_REPEATED_ANCHORS_AWAY)
- adjust_pattern_offsets_list (5, laststart - bufp->buffer,
- &anchor_list);
-
- adjust_pattern_offsets_list (5, laststart - bufp->buffer,
- &op_list);
+ /* When hit this when matching, set the succeed_n's n. */
GET_BUFFER_SPACE (5);
insert_op_2 (set_number_at, laststart, b, 5, lower_bound);
b += 5;
}
pending_exact = 0;
beg_interval = 0;
-
- if (following_left_brace)
- goto normal_char;
-
break;
+
unfetch_interval:
/* If an invalid interval, match the characters as literals. */
if (beg_interval)
@@ -1819,88 +1063,64 @@ regex_compile (pattern, size, syntax, bufp)
"regex: no interval beginning to which to backtrack.\n");
exit (1);
}
+
beg_interval = 0;
-
- /* normal_char and normal_backsl expect a character in `c'. */
- PATFETCH (c);
-
- if (!(syntax & RE_NO_BK_BRACES))
- {
- if (p > pattern && p[-1] == '\\')
- goto normal_backsl;
- }
- goto normal_char;
+ PATFETCH (c); /* normal_char expects char in `c'. */
+ goto normal_char;
+ break;
#ifdef emacs
case '=':
- BUF_PUSH (at_dot);
+ BUFPUSH (at_dot);
break;
case 's':
laststart = b;
+ BUFPUSH (syntaxspec);
PATFETCH (c);
- BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
+ BUFPUSH (syntax_spec_code[c]);
break;
case 'S':
laststart = b;
+ BUFPUSH (notsyntaxspec);
PATFETCH (c);
- BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
+ BUFPUSH (syntax_spec_code[c]);
break;
#endif /* emacs */
case 'w':
laststart = b;
-
- if (!add_op (&op_list, b - bufp->buffer))
- goto memory_exhausted;
-
- BUF_PUSH (wordchar);
-
- if (!set_this_level (&level_match_status, current_level)
- || !set_match_status_of_active_groups (group_active_status,
- &group_match_status))
- goto memory_exhausted;
-
- break;
+ BUFPUSH (wordchar);
+ break;
case 'W':
laststart = b;
-
- if (!add_op (&op_list, b - bufp->buffer))
- goto memory_exhausted;
-
- BUF_PUSH (notwordchar);
-
- if (!set_this_level (&level_match_status, current_level)
- || !set_match_status_of_active_groups (group_active_status,
- &group_match_status))
- goto memory_exhausted;
-
- break;
+ BUFPUSH (notwordchar);
+ break;
case '<':
- BUF_PUSH (wordbeg);
+ BUFPUSH (wordbeg);
break;
case '>':
- BUF_PUSH (wordend);
+ BUFPUSH (wordend);
break;
case 'b':
- BUF_PUSH (wordbound);
+ BUFPUSH (wordbound);
break;
case 'B':
- BUF_PUSH (notwordbound);
+ BUFPUSH (notwordbound);
break;
case '`':
- BUF_PUSH (begbuf);
+ BUFPUSH (begbuf);
break;
case '\'':
- BUF_PUSH (endbuf);
+ BUFPUSH (endbuf);
break;
case '1':
@@ -1912,39 +1132,28 @@ regex_compile (pattern, size, syntax, bufp)
case '7':
case '8':
case '9':
- if (syntax & RE_NO_BK_REFS)
+ if (obscure_syntax & RE_NO_BK_REFS)
goto normal_char;
-
c1 = c - '0';
-
- if (c1 >= regnum)
+ if (c1 >= regnum)
{
- if (syntax & RE_NO_MISSING_BK_REF)
- goto invalid_back_reference;
+ if (obscure_syntax & RE_NO_EMPTY_BK_REF)
+ goto invalid_pattern;
else
goto normal_char;
}
-
/* Can't back reference to a subexpression if inside of it. */
- if (is_in_compile_stack (compile_stack, c1))
- goto normal_char;
-
- laststart = b;
-
- if (!add_op (&op_list, b - bufp->buffer))
- goto memory_exhausted;
-
- BUF_PUSH_2 (duplicate, c1);
-
- if (get_group_match_status (group_match_status, c1))
- if (!set_this_level (&level_match_status, current_level))
- goto memory_exhausted;
-
- break;
+ for (stackt = stackp - 2; stackt > stackb; stackt -= 4)
+ if (*stackt == c1)
+ goto normal_char;
+ laststart = b;
+ BUFPUSH (duplicate);
+ BUFPUSH (c1);
+ break;
case '+':
case '?':
- if (syntax & RE_BK_PLUS_QM)
+ if (obscure_syntax & RE_BK_PLUS_QM)
goto handle_plus;
else
goto normal_backsl;
@@ -1955,141 +1164,61 @@ regex_compile (pattern, size, syntax, bufp)
/* You might think it would be useful for \ to mean
not to translate; but if we don't translate it
it will never match anything. */
-
- if (translate)
- c = translate[c];
-
- goto normal_char;
+ if (translate) c = translate[c];
+ goto normal_char;
}
break;
- default:
-
- /* Expects the character in `c'! */
- normal_char:
- /* If no exactn currently being built. */
- if (!pending_exact
-
- /* If last exactn not at current position. */
- || pending_exact + *pending_exact + 1 != b
-
- || *pending_exact == 0177
-
- /* If followed by a repetition operator. */
- || *p == '*' || *p == '^'
- || ((syntax & RE_BK_PLUS_QM)
+ default:
+ normal_char: /* Expects the character in `c'. */
+ if (!pending_exact || pending_exact + *pending_exact + 1 != b
+ || *pending_exact == 0177 || *p == '*' || *p == '^'
+ || ((obscure_syntax & RE_BK_PLUS_QM)
? *p == '\\' && (p[1] == '+' || p[1] == '?')
: (*p == '+' || *p == '?'))
- || ((syntax & RE_INTERVALS)
- && ((syntax & RE_NO_BK_BRACES)
+ || ((obscure_syntax & RE_INTERVALS)
+ && ((obscure_syntax & RE_NO_BK_CURLY_BRACES)
? *p == '{'
: (p[0] == '\\' && p[1] == '{'))))
{
- /* Start building a new exactn. */
-
- laststart = b;
-
- if (!add_op (&op_list, b - bufp->buffer))
- goto memory_exhausted;
-
- BUF_PUSH_2 (exactn, 0);
- pending_exact = b - 1;
-
- if (!set_this_level (&level_match_status, current_level))
- goto memory_exhausted;
- }
- BUF_PUSH (c);
- (*pending_exact)++;
- break;
-
- } /* end switch (c). */
- } /* end while p!= pend. */
+ laststart = b;
+ BUFPUSH (exactn);
+ pending_exact = b;
+ BUFPUSH (0);
+ }
+ BUFPUSH (c);
+ (*pending_exact)++;
+ }
+ }
-
- /* Through the pattern now. */
-
- if (fixup_alt_jump)
- store_jump (fixup_alt_jump, jump_past_next_alt, b);
+ if (fixup_jump)
+ store_jump (fixup_jump, jump, b);
- if (!COMPILE_STACK_EMPTY)
- goto unmatched_open;
+ if (stackp != stackb) goto unmatched_open;
- /* Have to set this before calling the next routine. */
bufp->used = b - bufp->buffer;
-
- if (!verify_and_adjust_endlines (op_list, group_match_status, bufp,
- &enough_memory))
- goto invalid_pattern;
-
- if (!enough_memory)
- goto memory_exhausted;
-
-
- /* Normal return. */
return 0;
-
- /* Abnormal return. */
-
invalid_pattern:
- bufp->used = b - bufp->buffer;
- return "Invalid regular expression";
+ return "Invalid regular expression";
unmatched_open:
- bufp->used = b - bufp->buffer;
- return "Unmatched ( or \\(";
+ return "Unmatched \\(";
unmatched_close:
- bufp->used = b - bufp->buffer;
- return "Unmatched ) or \\)";
+ return "Unmatched \\)";
end_of_pattern:
- bufp->used = b - bufp->buffer;
- return "Premature end of regular expression";
+ return "Premature end of regular expression";
+
+ nesting_too_deep:
+ return "Nesting too deep";
too_big:
- bufp->used = b - bufp->buffer;
- return "Regular expression too big";
+ return "Regular expression too big";
memory_exhausted:
- bufp->used = b - bufp->buffer;
- return "Memory exhausted";
-
- invalid_char_class:
- bufp->used = b - bufp->buffer;
- return "Invalid character class name";
-
- unmatched_left_bracket:
- bufp->used = b - bufp->buffer;
- return "Unmatched [ or [^";
-
- invalid_range_end:
- bufp->used = b - bufp->buffer;
- return "Invalid range end";
-
- trailing_backslash:
- bufp->used = b - bufp->buffer;
- return "Trailing backslash";
-
- unmatched_left_curly_brace:
- bufp->used = b - bufp->buffer;
- return "Unmatched \\{";
-
- invalid_braces_content:
- bufp->used = b - bufp->buffer;
- return "Invalid content of \\{\\}";
-
- missing_preceding_re:
- bufp->used = b - bufp->buffer;
- return "Missing preceding regular expression";
-
- invalid_preceding_re:
- bufp->used = b - bufp->buffer;
- return "Invalid preceding regular expression";
-
- invalid_back_reference:
- bufp->used = b - bufp->buffer;
- return "Invalid back reference";
+ return "Memory exhausted";
}
@@ -2100,9 +1229,9 @@ regex_compile (pattern, size, syntax, bufp)
static void
store_jump (from, opcode, to)
char *from, *to;
- char opcode;
+ int opcode;
{
- from[0] = opcode;
+ from[0] = (char)opcode;
STORE_NUMBER(from + 1, to - (from + 3));
}
@@ -2115,7 +1244,7 @@ store_jump (from, opcode, to)
static void
insert_jump (op, from, to, current_end)
- char op;
+ int op;
char *from, *to, *current_end;
{
register char *pfrom = current_end; /* Copy from here... */
@@ -2138,10 +1267,10 @@ insert_jump (op, from, to, current_end)
static void
store_jump_n (from, opcode, to, n)
char *from, *to;
- char opcode;
+ int opcode;
unsigned n;
{
- from[0] = opcode;
+ from[0] = (char)opcode;
STORE_NUMBER (from + 1, to - (from + 3));
STORE_NUMBER (from + 3, n);
}
@@ -2157,7 +1286,7 @@ store_jump_n (from, opcode, to, n)
static void
insert_jump_n (op, from, to, current_end, n)
- char op;
+ int op;
char *from, *to, *current_end;
unsigned n;
{
@@ -2178,7 +1307,7 @@ insert_jump_n (op, from, to, current_end, n)
static void
insert_op_2 (op, there, current_end, num_1, num_2)
- char op;
+ int op;
char *there, *current_end;
int num_1, num_2;
{
@@ -2188,855 +1317,58 @@ insert_op_2 (op, there, current_end, num_1, num_2)
while (pfrom != there)
*--pto = *--pfrom;
- there[0] = op;
+ there[0] = (char)op;
STORE_NUMBER (there + 1, num_1);
STORE_NUMBER (there + 3, num_2);
}
-/* Compile stack routine for regex_compile. */
-
-/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
- false if it's not. */
-
-static boolean
-is_in_compile_stack (compile_stack, regnum)
- compile_stack_type compile_stack;
- int regnum;
-{
- int this_element;
-
- if (COMPILE_STACK_EMPTY)
- return false;
-
- for (this_element = compile_stack.avail - 1;
- this_element >= 0;
- this_element--)
- if (compile_stack.stack[this_element].regnum == regnum)
- return true;
-
- return false;
-}
-
-
-/* Pattern offsets list stuff. */
-
-/* Initializes a pattern offsets list PATTERN_OFFSETS_LIST_PTR to be
- INIT_SIZE large.
-
- Returns 1 if it can allocate the space and 0 if it can't. */
-
-static unsigned
-init_pattern_offsets_list (pattern_offsets_list_ptr, init_size)
- pattern_offsets_list_type *pattern_offsets_list_ptr;
- int init_size;
-{
- if (init_size < 0)
- {
- printf ("Can't initialize a pattern offsets list with a negative \
-or zero init_size %d.\n", init_size);
- exit (1);
- }
- else
- {
- pattern_offsets_list_ptr->offsets
- = (pattern_offset_type *) malloc (init_size
- * sizeof (pattern_offset_type));
-
- if (pattern_offsets_list_ptr->offsets == NULL)
- return 0;
-
- pattern_offsets_list_ptr->size = init_size;
- pattern_offsets_list_ptr->avail = 0;
- }
- return 1;
-}
-
-
-/* Doubles the size of a pattern offsets list PATTERN_OFFSETS_LIST_PTR.
-
- Returns 1 if it can allocate the space and 0 if it can't. */
-
-static unsigned
-double_pattern_offsets_list (pattern_offsets_list_ptr)
- pattern_offsets_list_type *pattern_offsets_list_ptr;
-{
- pattern_offsets_list_ptr->offsets
- = (pattern_offset_type *) realloc (pattern_offsets_list_ptr->offsets,
- (pattern_offsets_list_ptr->size << 1) * sizeof (pattern_offset_type));
-
- if (pattern_offsets_list_ptr->offsets == NULL)
- return 0;
-
- pattern_offsets_list_ptr->size <<= 1;
- return 1;
-}
-
-
-/* Adds OFFSET to PATTERN_OFFSETS_LIST_PTR.
-
- Returns 1 if it can add the offset and 0 if it needs to allocate
- space for it and can't. */
-
-static unsigned
-add_pattern_offset (pattern_offsets_list_ptr, offset)
- pattern_offsets_list_type *pattern_offsets_list_ptr;
- pattern_offset_type offset;
-{
- if (PATTERN_OFFSETS_LIST_PTR_FULL (pattern_offsets_list_ptr))
- if (!double_pattern_offsets_list (pattern_offsets_list_ptr))
- return 0;
-
- pattern_offsets_list_ptr->offsets[pattern_offsets_list_ptr->avail] = offset;
- pattern_offsets_list_ptr->avail++;
-
- return 1;
-}
-
-
-/* Adjust each offset in PATTERN_OFFSETS_LIST_PTR by INCREMENT. */
-
-static void
-adjust_pattern_offsets_list (increment, start_position,
- pattern_offsets_list_ptr)
- unsigned increment;
- unsigned start_position;
- pattern_offsets_list_type *pattern_offsets_list_ptr;
-{
- unsigned this_pattern_offset = 0;
-
- while (this_pattern_offset < pattern_offsets_list_ptr->avail
- && pattern_offsets_list_ptr->offsets[this_pattern_offset]
- < start_position)
- this_pattern_offset++;
-
- for (; this_pattern_offset < pattern_offsets_list_ptr->avail;
- this_pattern_offset++)
- pattern_offsets_list_ptr->offsets[this_pattern_offset] += increment;
-}
-
-
-/* Anchor routines for regex_compile. */
-
-/* If it's in a group, record in ANCHOR_LIST_PTR an anchor offset that's
- at OFFSET.
-
- Returns 1 if can put the offset in ANCHOR_LIST_PTR.
- Returns 0 if runs out of memory allocating space for it. */
-
-static unsigned
-record_anchor_position (in_a_group, offset, anchor_list_ptr)
- unsigned in_a_group;
- pattern_offset_type offset;
- anchor_list_type *anchor_list_ptr;
-{
- if (in_a_group)
- if (!add_pattern_offset (anchor_list_ptr, offset))
- return 0;
-
- return 1;
-}
-
-
-/* Set all `begline's between START and END in BUFP to `no_op's.
- Set all such `endline's to either `endline_in_repeat's and all such
- `endline_before_newline's to `repeated_endline_before_repeat's. */
-
-static void
-remove_intervening_anchors (start, end, anchor_list, bufp)
- char *start, *end;
- anchor_list_type anchor_list;
- struct re_pattern_buffer *bufp;
-{
- unsigned this_anchor = 0;
-
- while (this_anchor < anchor_list.avail
- && start - bufp->buffer <= anchor_list.offsets[this_anchor]
- && anchor_list.offsets[this_anchor] <= end - bufp->buffer)
- {
- char *this_anchor_ptr
- = bufp->buffer + anchor_list.offsets[this_anchor++];
-
- *this_anchor_ptr = *this_anchor_ptr == endline
- ? (char)endline_in_repeat
- : *this_anchor_ptr == endline_before_newline
- ? (char)repeated_endline_before_newline
- : *this_anchor_ptr == begline
- ? (char)no_op
- : *this_anchor_ptr;
- }
-}
-
-
-/* Op list stuff. */
-
-/* Add OP_OFFSET to OP_LIST_PTR.
- Return 1 if can add it and 0 if can't allocate the space to do so. */
-
-static unsigned
-add_op (op_list_ptr, op_offset)
- op_list_type *op_list_ptr;
- pattern_offset_type op_offset;
-{
- return add_pattern_offset (op_list_ptr, op_offset);
-}
-
-
-/* Verify that all `$'s in an entire pattern buffer BUFP are valid
- anchors or ordinary characters. Either leave or change intermediate
- forms of `$' anchor ops into `endline' or `exactn ...' where
- appropriate.
-
- Return true in ENOUGH_MEMORY if don't run out of space allocating
- internal data structures.
-
- Return from the routine true if the pattern is valid and false
- if it isn't. */
-
-static boolean
-verify_and_adjust_endlines (op_list, group_forward_match_status,
- bufp, enough_memory)
- op_list_type op_list;
- /* `duplicate' case needs this: which groups matched something;
- set when went fowards through the pattern. */
- bits_list_type group_forward_match_status;
- struct re_pattern_buffer *bufp;
- boolean *enough_memory;
-{
- int this_op_offset; /* Has to be type int because decrementing it. */
- /* See comments for analogous variables used for '^' in regex_compile. */
-
- bits_list_type level_match_status;
- unsigned current_level = 0;
- bits_list_type group_match_status;
- bits_list_type group_active_status;
- char *bend = bufp->buffer + bufp->used;
- char *previous_p = NULL;
-
-
- if (!(init_bits_list (&level_match_status)
- && init_bits_list (&group_match_status)
- && init_bits_list (&group_active_status)))
- {
- *enough_memory = false;
- return true;
- }
- else
- *enough_memory = true;
-
- for (this_op_offset = op_list.avail - 1; this_op_offset >= 0;
- this_op_offset--)
- {
- char *p = bufp->buffer + op_list.offsets[this_op_offset];
-
- if (!enough_memory)
- break;
-
- switch ((enum regexpcode) *p)
- {
- case endline:
- case endline_in_repeat:
- case endline_before_newline:
- case repeated_endline_before_newline:
-
- /* If the '$' must be at the pattern's end or else is
- in a trailing position. */
-
- if ((bufp->syntax & RE_ANCHORS_ONLY_AT_ENDS)
- ? p + 1 == bend
- : ((bufp->syntax & RE_TIGHT_ALT)
- ? p + 3 == bend /* Would have two following no_ops. */
- : (*p == endline_before_newline
- || *p == repeated_endline_before_newline
- || no_levels_match_anything (level_match_status))))
- {
- if ((enum regexpcode) *p == endline_in_repeat
- || (enum regexpcode) *p == repeated_endline_before_newline)
- if (bufp->syntax & RE_REPEATED_ANCHORS_AWAY)
- *p = no_op;
- else
- *p = endline;
-
-
- /* If this is a trailing '$' in an empty alternative. */
-
- if ((bufp->syntax & RE_NO_EMPTY_ALTS)
-
- /* If there's an alternation op right before this `$'. */
- && ((this_op_offset > 0
- && *(bufp->buffer
- + op_list.offsets[this_op_offset - 1])
- == jump_past_next_alt)
-
- /* Or this `$' is the only thing in the first
- alternative of more than one of them. */
-
- || ((this_op_offset == 0 /* It's first. */
- /* Or it's right after an open-group op. */
- || (this_op_offset > 0
- && *(bufp->buffer
- + op_list.offsets[this_op_offset - 1])
- == start_memory))
-
- /* And it's right before an alternation op. */
- && previous_p != NULL
- && *previous_p == jump_past_next_alt)))
- return false;
- }
-
- else if (bufp->syntax & RE_CONTEXT_INVALID_ANCHORS)
- return false;
-
- else if (!(bufp->syntax & RE_CONTEXT_INDEP_ANCHORS))
- {
- p[0] = (char)exactn;
- p[1] = (char)1;
- p[2] = '$';
- }
-
- break;
-
-
- /* Yes, start and stop_memory are switched because we're going
- backwards through the pattern! */
-
- case stop_memory:
- increase_level (&current_level);
-
- if (!make_group_active (&group_active_status, p[1]))
- enough_memory = false;
-
- break;
-
- case start_memory:
- if (get_level_match_status (level_match_status, current_level))
- if (!set_next_lower_level (&level_match_status, current_level))
- enough_memory = false;
- else
- {
- decrease_level (&current_level);
- make_group_inactive (&group_active_status, p[1]);
- }
-
- break;
-
-
- /* Hit an alternative. */
-
- case jump_past_next_alt:
- if (lower_levels_match_nothing (level_match_status, current_level))
- clear_this_and_higher_levels (&level_match_status,current_level);
-
- break;
-
- /* These below mean was followed by a repetition operator. */
- case no_op:
- case maybe_pop_jump:
- case no_pop_jump_n:
- if (bufp->syntax & RE_REPEATED_ANCHORS_AWAY)
- break;
- case charset:
- case charset_not:
- case wordchar:
- case notwordchar:
- case exactn:
- case anychar:
- if (!set_this_level (&level_match_status, current_level)
- || !set_match_status_of_active_groups (group_active_status,
- &group_match_status))
- enough_memory = false;;
-
- break;
-
- case duplicate:
- /* Only set level_match_status if this back reference
- refers to a nonempty group. */
-
- if (get_group_match_status (group_forward_match_status, p[1]))
- if (!set_this_level (&level_match_status, current_level))
- enough_memory = false;
-
- break;
-
- default:
- printf ("Found an unknown operator %u in compiled pattern.\n", *p);
- }
- previous_p = p;
- }
- return true;
-}
-
-
-
-/* Bits list routines. (See above for macros.) */
-
-/* Initialize BITS_LIST_PTR to have one bits block.
- Return 1 if there's enough memory to do so and 0 if there isn't. */
-
-static unsigned
-init_bits_list (bits_list_ptr)
- bits_list_type *bits_list_ptr;
-{
- bits_list_ptr->bits = (unsigned *) malloc (sizeof (unsigned));
-
- if (bits_list_ptr->bits == NULL)
- return 0;
-
- bits_list_ptr->size = BITS_BLOCK_SIZE;
- bits_list_ptr->bits[0] = 0;
-
- return 1;
-}
-
-
-/* Extend BITS_LIST_PTR by one bits block.
- Return 1 if there's enough memory to do so and 0 if there isn't. */
-
-static unsigned
-extend_bits_list (bits_list_ptr)
- bits_list_type *bits_list_ptr;
-{
- bits_list_ptr->bits
- = (unsigned *) realloc (bits_list_ptr->bits,
- bits_list_ptr->size + sizeof (unsigned));
-
- if (bits_list_ptr->bits == NULL)
- return 0;
-
- bits_list_ptr->size += BITS_BLOCK_SIZE;
- bits_list_ptr->bits[(bits_list_ptr->size/BITS_BLOCK_SIZE) - 1] = 0;
-
- return 1;
-}
-
-
-/* Get the bit value at a positive POSITION in BITS_LIST. */
-
-static unsigned
-get_bit (bits_list, position)
- bits_list_type bits_list;
- unsigned position;
-{
- if (position < 0)
- {
- printf ("Tried to get a bit at position less than zero.\n");
- exit (1);
- }
-
- if (position > bits_list.size - 1)
- {
- printf ("Getting bit value: position %d exceeds bits list size %d.\n",
- position, bits_list.size);
- exit (1);
- }
-
- return bits_list.bits[BITS_BLOCK (position)] & BITS_MASK (position);
-}
-
-
-/* Set the bit for a positive POSITION in BITS_LIST_PTR to VALUE, which,
- in turn, can only be 0 or 1.
-
- Returns 1 if can set the bit and 0 if ran out of memory allocating
- (if necessary) room for it. */
-
-static unsigned
-set_bit_to_value (bits_list_ptr, position, value)
- bits_list_type *bits_list_ptr;
- unsigned position;
- unsigned value;
-{
- if (position < 0)
- {
- printf ("Tried to set a bit at position less than zero.\n");
- exit (1);
- }
-
- if (position > bits_list_ptr->size - 1
- && !extend_bits_list (bits_list_ptr))
- return 0;
-
- if (value == 1)
- bits_list_ptr->bits[BITS_BLOCK (position)] |= BITS_MASK (position);
- else if (value == 0)
- bits_list_ptr->bits[BITS_BLOCK (position)] &= ~(BITS_MASK (position));
- else
- {
- printf ("Invalid value %d to set a bit.\n");
- exit (1);
- }
- return 1;
-}
-
-
-/* Level stuff. */
-
-
-/* Return 1 if LEVEL in LEVEL_MATCH_STATUS matches something and
- 0 if it doesn't. Assumes LEVEL is positive. */
-
-static unsigned
-get_level_match_status (level_match_status, level)
- bits_list_type level_match_status;
- unsigned level;
-{
- return get_bit (level_match_status, level);
-}
-
-
-/* Mark as matching something the level LEVEL in LEVEL_MATCH_STATUS_PTR.
- Assumes LEVEL is positive.
-
- Return 1 if can mark the level and 0 if need to allocate space for it
- but can't. */
-
-static unsigned
-set_this_level (level_match_status_ptr, level)
- bits_list_type *level_match_status_ptr;
- unsigned level;
-{
- return set_bit_to_value (level_match_status_ptr, level, 1);
-}
-
-
-/* Mark as matching something the level below the LEVEL recorded in
- LEVEL_MATCH_STATUS_PTR. Assumes LEVEL is greater than zero.
-
- Return 1 if can mark the level and 0 ran out of memory trying to do so. */
-
-static unsigned
-set_next_lower_level (level_match_status_ptr, level)
- bits_list_type *level_match_status_ptr;
- unsigned level;
-{
- unsigned this_level;
-
- return set_bit_to_value (level_match_status_ptr, level - 1, 1);
-}
-
-
-/* Mark as matching something the level LEVEL and all levels higher than
- it currently in LEVEL_MATCH_STATUS_PTR. Assumes LEVEL is positive.
-
- Return 1 if can mark the levels and 0 ran out of memory trying to do so. */
-
-static void
-clear_this_and_higher_levels (level_match_status_ptr, level)
- bits_list_type *level_match_status_ptr;
- unsigned level;
-{
- unsigned this_level;
-
- for (this_level = level;
- this_level < level_match_status_ptr->size;
- this_level++)
- set_bit_to_value (level_match_status_ptr, this_level, 0);
-}
-
-
-/* Returns true if none of the levels in LEVEL_MATCH_STATUS less than a
- positive LEVEL match anything, and false otherwise. */
-
-static boolean
-lower_levels_match_nothing (level_match_status, level)
- bits_list_type level_match_status;
- unsigned level;
-{
- unsigned this_level;
-
- for (this_level = 0; this_level < level; this_level++)
- if (get_bit (level_match_status, this_level))
- return false;
-
- return true;
-}
-
-/* Returns true if none of the levels in LEVEL_MATCH_STATUS match
- anything, and false otherwise. */
-
-static boolean
-no_levels_match_anything (level_match_status)
- bits_list_type level_match_status;
-{
- unsigned this_bits_block;
-
- for (this_bits_block = 0;
- this_bits_block < level_match_status.size/BITS_BLOCK_SIZE;
- this_bits_block++)
- if (level_match_status.bits[this_bits_block] != 0)
- return false;
-
- return true;
-}
-
-
-/* Increase CURRENT_LEVEL_PTR. */
-
-static void
-increase_level (current_level_ptr)
- unsigned *current_level_ptr;
-{
- (*current_level_ptr)++;
-}
-
-
-/* Decrease CURRENT_LEVEL_PTR, but exit on error if try to decrease
- below zero. */
-
-static void
-decrease_level (current_level_ptr)
- unsigned *current_level_ptr;
-{
- if (*current_level_ptr == 0)
- {
- printf ("Tried to decrease current level below zero.\n");
- exit (1);
- }
- (*current_level_ptr)--;
-}
-
-
-/* Group stuff. */
-
-
-/* Mark a positive GROUP in GROUP_ACTIVE_STATUS_PTR as active.
- Return 1 if can mark the group and 0 ran out of memory trying to do so. */
-
-static unsigned
-make_group_active (group_active_status_ptr, group)
- bits_list_type *group_active_status_ptr;
- unsigned group;
-{
- return set_bit_to_value (group_active_status_ptr, group, 1);
-}
-
-
-/* Mark a positive GROUP in GROUP_ACTIVE_STATUS_PTR as inactive.
- Return 1 if can mark the group and 0 ran out of memory trying to do so. */
-
-static unsigned
-make_group_inactive (group_active_status_ptr, group)
- bits_list_type *group_active_status_ptr;
- unsigned group;
-{
- return set_bit_to_value (group_active_status_ptr, group, 0);
-}
-
-
-/* Mark as active in GROUP_MATCH_STATUS_PTR those active groups recorded
- in GROUP_ACTIVE_STATUS_PTR.
-
- Return 1 if can mark the groups and 0 ran out of memory trying to do so. */
-
-static unsigned
-set_match_status_of_active_groups (group_active_status, group_match_status_ptr)
- bits_list_type group_active_status;
- bits_list_type *group_match_status_ptr;
-{
- unsigned this_bit_block;
-
- if (group_active_status.size > group_match_status_ptr->size
- && !extend_bits_list (group_match_status_ptr))
- return 0;
-
- for (this_bit_block = 0;
- this_bit_block < group_active_status.size/BITS_BLOCK_SIZE;
- this_bit_block++)
- group_match_status_ptr->bits[this_bit_block]
- |= group_active_status.bits[this_bit_block];
-
- return 1;
-}
-
-
-/* Return 1 if GROUP in GROUP_MATCH_STATUS matches something and
- 0 if it doesn't. Assumes GROUP is positive. */
-
-static unsigned
-get_group_match_status (group_match_status, group)
- bits_list_type group_match_status;
- unsigned group;
-{
- return get_bit (group_match_status, group);
-}
-
-
-
-
-/* Failure stack declarations and macros for both re_compile_fastmap and
- re_match_2. Have to use `alloca' for reasons stated in INIT_BITS_LIST's
- comment. */
-
-
-/* Roughly the maximum number of failure points on the stack. Would be
- exactly that if always used MAX_FAILURE_SPACE each time we failed. */
-
-int re_max_failures = 2000;
-
-
-typedef unsigned char *failure_stack_element;
-
-typedef struct {
- failure_stack_element *stack;
- unsigned size;
- unsigned avail; /* Offset of next open position. */
- } failure_stack_type;
-
-
-#define FAILURE_STACK_EMPTY (failure_stack.avail == 0)
-#define FAILURE_STACK_PTR_EMPTY (failure_stack_ptr->avail == 0)
-#define FAILURE_STACK_FULL (failure_stack.avail == failure_stack.size)
-
-
-/* Initialize a failure stack.
-
- Return 1 if was able to allocate the space for (FAILURE_STACK) and
- 0 if not. */
-
-#define INIT_FAILURE_STACK(failure_stack) \
- ((failure_stack).stack = (failure_stack_element *) \
- REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (failure_stack_element)),\
- \
- (failure_stack).stack == NULL \
- ? 0 \
- : ((failure_stack).size = INIT_FAILURE_ALLOC, \
- (failure_stack).avail = 0, \
- 1))
-
-
-/* Double the size of FAILURE_STACK, up to MAX_SIZE.
-
- Return 1 if was able to double it, and 0 if either ran out of memory
- allocating space for it or it was already MAX_SIZE large.
-
- REGEX_REALLOCATE requires `void *destination' be declared. */
-
-#define DOUBLE_FAILURE_STACK(failure_stack, max_size) \
- ((failure_stack).size > max_size \
- ? 0 \
- : ((failure_stack).stack = (failure_stack_element *) \
- REGEX_REALLOCATE ((failure_stack).stack, \
- ((failure_stack).size << 1) * sizeof (failure_stack_element)),\
- \
- (failure_stack).stack == NULL \
- ? 0 \
- : ((failure_stack).size <<= 1, \
- 1)))
-
-
-/* Push PATTERN_OP on (FAILURE_STACK).
-
- Return 1 if was able to do so and 0 if ran out of memory allocating
- space to do so.
-
- DOUBLE_FAILURE_STACK requires `void *destination' be declared. */
-
-#define PUSH_PATTERN_OP(pattern_op, failure_stack) \
- ((FAILURE_STACK_FULL \
- && !DOUBLE_FAILURE_STACK (failure_stack, re_max_failures)) \
- ? 0 \
- : ((failure_stack).stack[(failure_stack).avail++] = pattern_op, \
- 1))
-
-
-/* Push most of the information about the state we will want
- if we ever fail back to it.
-
- Requires regstart, regend, reg_info, and num_internal_regs be declared.
- DOUBLE_FAILURE_STACK requires `void *destination' be declared.
-
- Does a `return FAILURE_CODE' if runs out of memory. */
-
-#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_stack, failure_code) \
- do { \
- long highest_used_reg, this_reg; \
- void *destination; \
- \
- /* Find out how many registers are active or have been matched. \
- (Aside from register zero, which is only set at the end.) */ \
- \
- for (highest_used_reg = num_internal_regs - 1; highest_used_reg > 0;\
- highest_used_reg--) \
- if (regstart[highest_used_reg] != (unsigned char *) -1) \
- break; \
- \
- while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
- if (!DOUBLE_FAILURE_STACK (failure_stack, \
- re_max_failures * MAX_FAILURE_ITEMS)) \
- return failure_code; \
- \
- /* Now push the info for each of those registers. */ \
- \
- for (this_reg = 1; this_reg <= highest_used_reg; this_reg++) \
- { \
- (failure_stack).stack[(failure_stack).avail++] \
- = regstart[this_reg]; \
- \
- (failure_stack).stack[(failure_stack).avail++] = regend[this_reg];\
- \
- (failure_stack).stack[(failure_stack).avail++] \
- = (unsigned char *) &reg_info[this_reg]; \
- } \
- \
- /* Push how many registers we saved. */ \
- (failure_stack).stack[(failure_stack).avail++] \
- = (unsigned char *) highest_used_reg; \
- \
- (failure_stack).stack[(failure_stack).avail++] = pattern_place; \
- (failure_stack).stack[(failure_stack).avail++] = string_place; \
- } while (0)
-
-
-
-
/* Given a pattern, compute a fastmap from it. The fastmap records
which of the (1 << BYTEWIDTH) possible characters can start a string
that matches the pattern. This fastmap is used by re_search to skip
- quickly over totally impossible text.
+ quickly over totally implausible text.
The caller must supply the address of a (1 << BYTEWIDTH)-byte data
area as bufp->fastmap.
- The other components of bufp describe the pattern to be used.
-
- Returns 0 if it can compile a fastmap.
- Returns -2 if there is an internal error. */
+ The other components of bufp describe the pattern to be used. */
-int
+void
re_compile_fastmap (bufp)
struct re_pattern_buffer *bufp;
{
unsigned char *pattern = (unsigned char *) bufp->buffer;
int size = bufp->used;
register char *fastmap = bufp->fastmap;
- unsigned char *p = pattern;
+ register unsigned char *p = pattern;
register unsigned char *pend = pattern + size;
- int j, k;
+ register int j, k;
unsigned char *translate = (unsigned char *) bufp->translate;
- failure_stack_type failure_stack;
- void *destination;
+ unsigned is_a_succeed_n;
+#ifndef NO_ALLOCA
+ unsigned char *stackb[NFAILURES];
+ unsigned char **stackp = stackb;
- INIT_FAILURE_STACK (failure_stack);
+#else
+ unsigned char **stackb;
+ unsigned char **stackp;
+ stackb = (unsigned char **) malloc (NFAILURES * sizeof (unsigned char *));
+ stackp = stackb;
- bzero (fastmap, (1 << BYTEWIDTH));
+#endif /* NO_ALLOCA */
+ memset (fastmap, 0, (1 << BYTEWIDTH));
bufp->fastmap_accurate = 1;
bufp->can_be_null = 0;
while (p)
{
- boolean is_a_succeed_n = false;
-
+ is_a_succeed_n = 0;
if (p == pend)
- if (FAILURE_STACK_EMPTY)
- {
- bufp->can_be_null = 1;
- break;
- }
- else
- p = failure_stack.stack[--failure_stack.avail];
-
-
+ {
+ bufp->can_be_null = 1;
+ break;
+ }
#ifdef SWITCH_ENUM_BUG
switch ((int) ((enum regexpcode) *p++))
#else
@@ -3044,7 +1376,10 @@ re_compile_fastmap (bufp)
#endif
{
case exactn:
- fastmap[translate ? translate[p[1]] : p[1]] = 1;
+ if (translate)
+ fastmap[translate[p[1]]] = 1;
+ else
+ fastmap[p[1]] = 1;
break;
case begline:
@@ -3060,63 +1395,55 @@ re_compile_fastmap (bufp)
continue;
case endline:
- fastmap[translate ? translate['\n'] : '\n'] = 1;
+ if (translate)
+ fastmap[translate['\n']] = 1;
+ else
+ fastmap['\n'] = 1;
- if (! bufp->can_be_null)
+ if (bufp->can_be_null != 1)
bufp->can_be_null = 2;
break;
- case no_pop_jump_n:
- case pop_failure_jump:
- case maybe_pop_jump:
- case no_pop_jump:
- case jump_past_next_alt:
+ case jump_n:
+ case finalize_jump:
+ case maybe_finalize_jump:
+ case jump:
case dummy_failure_jump:
- extract_number_and_incr (&j, &p);
+ EXTRACT_NUMBER_AND_INCR (j, p);
p += j;
if (j > 0)
continue;
-
/* Jump backward reached implies we just went through
- the body of a loop and matched nothing. Opcode jumped to
- should be an on_failure_jump or succeed_n. Just treat it
- like an ordinary jump. For a * loop, it has pushed its
- failure point already; If so, discard that as redundant. */
+ the body of a loop and matched nothing.
+ Opcode jumped to should be an on_failure_jump.
+ Just treat it like an ordinary jump.
+ For a * loop, it has pushed its failure point already;
+ If so, discard that as redundant. */
if ((enum regexpcode) *p != on_failure_jump
&& (enum regexpcode) *p != succeed_n)
continue;
-
p++;
- extract_number_and_incr (&j, &p);
+ EXTRACT_NUMBER_AND_INCR (j, p);
p += j;
-
- /* If what's on the stack is where we are now, pop it. */
-
- if (!FAILURE_STACK_EMPTY
- && failure_stack.stack[failure_stack.avail - 1] == p)
- failure_stack.avail--;
-
+ if (stackp != stackb && *stackp == p)
+ stackp--;
continue;
case on_failure_jump:
handle_on_failure_jump:
- extract_number_and_incr (&j, &p);
-
- if (!PUSH_PATTERN_OP (p + j, failure_stack))
- return -2;
-
- if (is_a_succeed_n)
- extract_number_and_incr (&k, &p); /* Skip the n. */
-
- continue;
+ EXTRACT_NUMBER_AND_INCR (j, p);
+ *++stackp = p + j;
+ if (is_a_succeed_n)
+ EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
+ continue;
case succeed_n:
- is_a_succeed_n = true;
+ is_a_succeed_n = 1;
/* Get to the number of times to succeed. */
p += 2;
/* Increment p past the n for when k != 0. */
- extract_number_and_incr (&k, &p);
+ EXTRACT_NUMBER_AND_INCR (k, p);
if (k == 0)
{
p -= 4;
@@ -3141,7 +1468,9 @@ re_compile_fastmap (bufp)
if (j != '\n')
fastmap[j] = 1;
if (bufp->can_be_null)
- return 0;
+ {
+ FREE_AND_RETURN_VOID(stackb);
+ }
/* Don't return; check the alternative paths
so we can set can_be_null if appropriate. */
break;
@@ -3172,39 +1501,57 @@ re_compile_fastmap (bufp)
if (SYNTAX (j) != (enum syntaxcode) k)
fastmap[j] = 1;
break;
+
+#else /* not emacs */
+ case syntaxspec:
+ case notsyntaxspec:
+ break;
#endif /* not emacs */
- case charset:
- for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
+ case charset:
+ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
- fastmap[translate ? translate[j] : j] = 1;
+ {
+ if (translate)
+ fastmap[translate[j]] = 1;
+ else
+ fastmap[j] = 1;
+ }
break;
case charset_not:
- /* Chars beyond end of map must be allowed. */
+ /* Chars beyond end of map must be allowed */
for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
- fastmap[translate ? translate[j] : j] = 1;
+ if (translate)
+ fastmap[translate[j]] = 1;
+ else
+ fastmap[j] = 1;
for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
- fastmap[translate ? translate[j] : j] = 1;
+ {
+ if (translate)
+ fastmap[translate[j]] = 1;
+ else
+ fastmap[j] = 1;
+ }
+ break;
- break;
- } /* End switch *p++. */
+ case unused: /* pacify gcc -Wall */
+ break;
+ }
- /* Getting here means we have successfully found the possible starting
+ /* Get here means we have successfully found the possible starting
characters of one path of the pattern. We need not follow this
path any farther. Instead, look at the next alternative
remembered in the stack. */
-
- if (!FAILURE_STACK_EMPTY)
- p = failure_stack.stack[--failure_stack.avail];
+ if (stackp != stackb)
+ p = *stackp--;
else
break;
}
- return 0;
-} /* re_compile_fastmap */
-
+ FREE_AND_RETURN_VOID(stackb);
+}
@@ -3212,123 +1559,110 @@ re_compile_fastmap (bufp)
doesn't let you say where to stop matching. */
int
-re_search (bufp, string, size, startpos, range, regs)
- struct re_pattern_buffer *bufp;
- const char *string;
- const int size, startpos, range;
+re_search (pbufp, string, size, startpos, range, regs)
+ struct re_pattern_buffer *pbufp;
+ char *string;
+ int size, startpos, range;
struct re_registers *regs;
{
- return re_search_2 (bufp, (char *) 0, 0, string, size, startpos, range,
+ return re_search_2 (pbufp, (char *) 0, 0, string, size, startpos, range,
regs, size);
}
-/* Using the compiled pattern in BUFP->buffer, first tries to match the
+/* Using the compiled pattern in PBUFP->buffer, first tries to match the
virtual concatenation of STRING1 and STRING2, starting first at index
STARTPOS, then at STARTPOS + 1, and so on. RANGE is the number of
places to try before giving up. If RANGE is negative, it searches
- backwards, i.e., the starting positions tried are STARTPOS, STARTPOS - 1,
- etc. STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
+ backwards, i.e., the starting positions tried are STARTPOS, STARTPOS
+ - 1, etc. STRING1 and STRING2 are of SIZE1 and SIZE2, respectively.
In REGS, return the indices of the virtual concatenation of STRING1
- and STRING2 that matched the entire BUFP->buffer and its contained
+ and STRING2 that matched the entire PBUFP->buffer and its contained
subexpressions. Do not consider matching one past the index MSTOP in
the virtual concatenation of STRING1 and STRING2.
The value returned is the position in the strings at which the match
- was found, -1 if no match was found, or -2 if error (such as
+ was found, or -1 if no match was found, or -2 if error (such as
failure stack overflow). */
int
-re_search_2 (bufp, string1, size1, string2, size2, startpos, range,
- regs, stop)
- struct re_pattern_buffer *bufp;
- const char *string1, *string2;
- const int size1, size2;
- const int startpos;
- const int range;
+re_search_2 (pbufp, string1, size1, string2, size2, startpos, range,
+ regs, mstop)
+ struct re_pattern_buffer *pbufp;
+ char *string1, *string2;
+ int size1, size2;
+ int startpos;
+ register int range;
struct re_registers *regs;
- const int stop;
+ int mstop;
{
- register char *fastmap = bufp->fastmap;
- register unsigned char *translate = (unsigned char *) bufp->translate;
+ register char *fastmap = pbufp->fastmap;
+ register unsigned char *translate = (unsigned char *) pbufp->translate;
int total_size = size1 + size2;
- int private_startpos = startpos;
- int private_endpos = startpos + range;
- int private_range = range;
+ int endpos = startpos + range;
int val;
- const struct re_pattern_buffer *private_bufp;
/* Check for out-of-range starting position. */
- if (private_startpos < 0 || private_startpos > total_size)
+ if (startpos < 0 || startpos > total_size)
return -1;
- /* Fix up range if it would eventually take private_startpos outside
- of the virtual concatenation of string1 and string2. */
-
- if (private_endpos < -1)
- private_range = -1 - private_startpos;
-
- else if (private_endpos > total_size)
- private_range = total_size - private_startpos;
-
-
-/* Update the fastmap now if not correct already. */
- if (fastmap && !bufp->fastmap_accurate)
- if (re_compile_fastmap (bufp) == -2)
- return -2;
+ /* Fix up range if it would eventually take startpos outside of the
+ virtual concatenation of string1 and string2. */
+ if (endpos < -1)
+ range = -1 - startpos;
+ else if (endpos > total_size)
+ range = total_size - startpos;
+
+ /* Update the fastmap now if not correct already. */
+ if (fastmap && !pbufp->fastmap_accurate)
+ re_compile_fastmap (pbufp);
/* If the search isn't to be a backwards one, don't waste time in a
long search for a pattern that says it is anchored. */
- if (bufp->used > 0 && (enum regexpcode) bufp->buffer[0] == begbuf
- && private_range > 0)
+ if (pbufp->used > 0 && (enum regexpcode) pbufp->buffer[0] == begbuf
+ && range > 0)
{
- if (private_startpos > 0)
+ if (startpos > 0)
return -1;
else
- private_range = 1;
+ range = 1;
}
- private_bufp = bufp;
-
while (1)
{
/* If a fastmap is supplied, skip quickly over characters that
cannot possibly be the start of a match. Note, however, that
- if the pattern can possibly match the null string, we don't
- want to skip over characters; we want the first null string we
- can match. */
+ if the pattern can possibly match the null string, we must
+ test it at each starting point so that we take the first null
+ string we get. */
- if (fastmap && private_startpos < total_size && !bufp->can_be_null)
+ if (fastmap && startpos < total_size && pbufp->can_be_null != 1)
{
- if (private_range > 0) /* Searching forwards. */
+ if (range > 0) /* Searching forwards. */
{
register int lim = 0;
register unsigned char *p;
- int irange = private_range;
-
- if (private_startpos < size1
- && private_startpos + private_range >= size1)
- lim = private_range - (size1 - private_startpos);
+ int irange = range;
+ if (startpos < size1 && startpos + range >= size1)
+ lim = range - (size1 - startpos);
p = ((unsigned char *)
- &(private_startpos >= size1
- ? string2 - size1
- : string1)[private_startpos]);
+ &(startpos >= size1 ? string2 - size1 : string1)[startpos]);
- while (private_range > lim && !fastmap[translate
+ while (range > lim && !fastmap[translate
? translate[*p++]
: *p++])
- private_range--;
- private_startpos += irange - private_range;
+ range--;
+ startpos += irange - range;
}
else /* Searching backwards. */
{
register unsigned char c;
- if (size1 == 0 || private_startpos >= size1)
- c = string2[private_startpos - size1];
+ if (string1 == 0 || startpos >= size1)
+ c = string2[startpos - size1];
else
- c = string1[private_startpos];
+ c = string1[startpos];
c &= 0xff;
if (translate ? !fastmap[translate[c]] : !fastmap[c])
@@ -3336,30 +1670,35 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range,
}
}
- if (private_range >= 0 && private_startpos == total_size
- && fastmap && bufp->can_be_null == 0)
+ if (range >= 0 && startpos == total_size
+ && fastmap && pbufp->can_be_null == 0)
return -1;
- val = re_match_2 (private_bufp, string1, size1, string2, size2,
- private_startpos, regs, stop);
+ val = re_match_2 (pbufp, string1, size1, string2, size2, startpos,
+ regs, mstop);
if (val >= 0)
- return private_startpos;
-
+ return startpos;
if (val == -2)
return -2;
+#ifndef NO_ALLOCA
+#ifdef C_ALLOCA
+ alloca (0);
+#endif /* C_ALLOCA */
+
+#endif /* NO_ALLOCA */
advance:
- if (!private_range)
+ if (!range)
break;
- else if (private_range > 0)
+ else if (range > 0)
{
- private_range--;
- private_startpos++;
+ range--;
+ startpos++;
}
else
{
- private_range++;
- private_startpos--;
+ range++;
+ startpos--;
}
}
return -1;
@@ -3369,118 +1708,115 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range,
#ifndef emacs /* emacs never uses this. */
int
-re_match (bufp, string, size, pos, regs)
- const struct re_pattern_buffer *bufp;
- const char *string;
- const int size, pos;
+re_match (pbufp, string, size, pos, regs)
+ struct re_pattern_buffer *pbufp;
+ char *string;
+ int size, pos;
struct re_registers *regs;
{
- return re_match_2 (bufp, (char *) 0, 0, string, size, pos, regs, size);
+ return re_match_2 (pbufp, (char *) 0, 0, string, size, pos, regs, size);
}
#endif /* not emacs */
-
-/* Routines for re_match_2, defined below. */
+/* The following are used for re_match_2, defined below: */
-static boolean group_can_match_nothing ();
-static int bcmp_translate ();
+/* Roughly the maximum number of failure points on the stack. Would be
+ exactly that if always pushed MAX_NUM_FAILURE_ITEMS each time we failed. */
+
+int re_max_failures = 2000;
+/* Routine used by re_match_2. */
+/* static int memcmp_translate (); *//* already declared */
-/* Macros used by re_match_2, defined below: */
/* Structure and accessing macros used in re_match_2: */
-typedef struct register_info
+struct register_info
{
- bits_list_type inner_groups; /* Which groups are inside this one. */
- int can_match_nothing; /* Set if this group can match nothing;
- -1 if not ever set. */
unsigned is_active : 1;
unsigned matched_something : 1;
- unsigned ever_matched_something : 1;
-} reg_info_type;
-
-
-/* Macros used by re_match_2: */
-/* I.e., regstart, regend, and reg_info. */
+};
-#define INNER_GROUPS(R) ((R).inner_groups)
-#define CAN_MATCH_NOTHING(R) ((R).can_match_nothing)
#define IS_ACTIVE(R) ((R).is_active)
#define MATCHED_SOMETHING(R) ((R).matched_something)
-#define EVER_MATCHED_SOMETHING(R) ((R).ever_matched_something)
-/* Record that group INNER is inside of all currently active groups. */
-
-#define NOTE_INNER_GROUP(inner) \
- do { unsigned this_reg; \
- for (this_reg = 0; this_reg < num_internal_regs; this_reg++) \
- { \
- void *destination; /* For SET_BIT_TO_VALUE. */ \
- int ret = SET_BIT_TO_VALUE (INNER_GROUPS (reg_info[this_reg]), \
- inner, \
- IS_ACTIVE(reg_info[this_reg])); \
- if (ret == 0) \
- { \
- printf ("Ran out of memory in re_match_2 (NOTE_INNER_GROUP).\n");\
- exit (1); \
- } \
- if (ret != 1) \
- { \
- printf ("Invalid value %d to set a bit.\n", ret); \
- exit (1); \
- } \
- } \
- } while (0)
-
-
-/* Call this when have matched something; it sets `matched' flags for the
- registers corresponding to the group of which we currently are inside.
- Also records whether this group ever matched something. */
-
-#define SET_REGS_MATCHED \
- do { unsigned this_reg; \
- for (this_reg = 0; this_reg < num_internal_regs; this_reg++) \
- { \
- MATCHED_SOMETHING (reg_info[this_reg]) = \
- EVER_MATCHED_SOMETHING (reg_info[this_reg]) = \
- (IS_ACTIVE (reg_info[this_reg])) ? 1 : 0; \
- } \
- } while (0)
+/* Macros used by re_match_2: */
+/* I.e., regstart, regend, and reg_info. */
-/* Failure stack macros for re_match_2. */
+#define NUM_REG_ITEMS 3
-/* This is the number of items that are pushed and popped on the stack
- for each register, i.e., its REGSTART, REGEND and REG_INFO. */
+/* We push at most this many things on the stack whenever we
+ fail. The `+ 2' refers to PATTERN_PLACE and STRING_PLACE, which are
+ arguments to the PUSH_FAILURE_POINT macro. */
-#define NUM_REG_ITEMS 3
+#define MAX_NUM_FAILURE_ITEMS (RE_NREGS * NUM_REG_ITEMS + 2)
-/* Refers to highest_used_reg (which we calculate), PATTERN_PLACE and
- STRING_PLACE, which are arguments to the PUSH_FAILURE_POINT macro. */
-
-#define NUM_OTHER_ITEMS 3
-/* We put at most these many items on the stack whenever we push a
- failure point . */
+/* We push this many things on the stack whenever we fail. */
-#define MAX_FAILURE_ITEMS \
- (num_internal_regs * NUM_REG_ITEMS + NUM_OTHER_ITEMS)
+#define NUM_FAILURE_ITEMS (last_used_reg * NUM_REG_ITEMS + 2)
-/* We really push this many items when pushing a failure point. We
- calculate highest_used_reg each time. */
+/* This pushes most of the information about the current state we will want
+ if we ever fail back to it. */
-#define NUM_FAILURE_ITEMS \
- (highest_used_reg * NUM_REG_ITEMS + NUM_OTHER_ITEMS)
+#define PUSH_FAILURE_POINT(pattern_place, string_place) \
+ { \
+ long last_used_reg, this_reg; \
+ \
+ /* Find out how many registers are active or have been matched. \
+ (Aside from register zero, which is only set at the end.) */ \
+ for (last_used_reg = RE_NREGS - 1; last_used_reg > 0; last_used_reg--)\
+ if (regstart[last_used_reg] != (unsigned char *)(-1L)) \
+ break; \
+ \
+ if (stacke - stackp < NUM_FAILURE_ITEMS) \
+ { \
+ unsigned char **stackx; \
+ unsigned int len = stacke - stackb; \
+ if (len > re_max_failures * MAX_NUM_FAILURE_ITEMS) \
+ { \
+ FREE_AND_RETURN(stackb,(-2)); \
+ } \
+ \
+ /* Roughly double the size of the stack. */ \
+ stackx = DOUBLE_STACK(stackx,stackb,len); \
+ /* Rearrange the pointers. */ \
+ stackp = stackx + (stackp - stackb); \
+ stackb = stackx; \
+ stacke = stackb + 2 * len; \
+ } \
+ \
+ /* Now push the info for each of those registers. */ \
+ for (this_reg = 1; this_reg <= last_used_reg; this_reg++) \
+ { \
+ *stackp++ = regstart[this_reg]; \
+ *stackp++ = regend[this_reg]; \
+ *stackp++ = (unsigned char *) &reg_info[this_reg]; \
+ } \
+ \
+ /* Push how many registers we saved. */ \
+ *stackp++ = (unsigned char *) last_used_reg; \
+ \
+ *stackp++ = pattern_place; \
+ *stackp++ = string_place; \
+ }
+
-/* How many items can still be added to the stack without overflowing it. */
-#define REMAINING_AVAIL_SLOTS \
- (failure_stack.size - failure_stack.avail)
+/* This pops what PUSH_FAILURE_POINT pushes. */
+#define POP_FAILURE_POINT() \
+ { \
+ int temp; \
+ stackp -= 2; /* Remove failure points. */ \
+ temp = (int) *--stackp; /* How many regs pushed. */ \
+ temp *= NUM_REG_ITEMS; /* How much to take off the stack. */ \
+ stackp -= temp; /* Remove the register info. */ \
+ }
#define MATCHING_IN_FIRST_STRING (dend == end_match_1)
@@ -3506,6 +1842,19 @@ typedef struct register_info
}
+/* Call this when have matched something; it sets `matched' flags for the
+ registers corresponding to the subexpressions of which we currently
+ are inside. */
+#define SET_REGS_MATCHED \
+ { unsigned this_reg; \
+ for (this_reg = 0; this_reg < RE_NREGS; this_reg++) \
+ { \
+ if (IS_ACTIVE(reg_info[this_reg])) \
+ MATCHED_SOMETHING(reg_info[this_reg]) = 1; \
+ else \
+ MATCHED_SOMETHING(reg_info[this_reg]) = 0; \
+ } \
+ }
/* Test if at very beginning or at very end of the virtual concatenation
of string1 and string2. If there is only one string, we've put it in
@@ -3523,151 +1872,58 @@ typedef struct register_info
2) if we're before the beginning of string2, we have to look at the
last character in string1; we assume there is a string1, so use
this in conjunction with AT_STRINGS_BEG. */
-
#define IS_A_LETTER(d) \
(SYNTAX ((d) == end1 ? *string2 : (d) == string2 - 1 ? *(end1 - 1) : *(d))\
== Sword)
-#ifdef REGEX_MALLOC
-#define FREE_VARIABLES \
- do { \
- free (failure_stack.stack); \
- free (regstart); \
- free (regend); \
- free (old_regstart); \
- free (old_regend); \
- free (reg_info); \
- free (best_regstart); \
- free (best_regend); \
- reg_info = NULL; \
- failure_stack.stack = NULL; \
- regstart = regend = old_regstart = old_regend \
- = best_regstart = best_regend = NULL; \
- } while (0)
-#endif
+/* Match the pattern described by PBUFP against the virtual
+ concatenation of STRING1 and STRING2, which are of SIZE1 and SIZE2,
+ respectively. Start the match at index POS in the virtual
+ concatenation of STRING1 and STRING2. In REGS, return the indices of
+ the virtual concatenation of STRING1 and STRING2 that matched the
+ entire PBUFP->buffer and its contained subexpressions. Do not
+ consider matching one past the index MSTOP in the virtual
+ concatenation of STRING1 and STRING2.
-
-/* The main matching routine, re_match_2. */
-
-static void pop_failure_point();
-
-
-/* re_match_2 matches a buffer full of byte commands for matching (gotten
- from compiling a regular expression) and matches it against the
- the virtual concatenation of its two string arguments.
-
- BUFP is a struct re_pattern_buffer * whose pertinent fields are
- mentioned below:
-
- It has a char * field BUFFER which points to the byte
- commands which make up the compiled pattern.
-
- Its char * field TRANSLATE, if not 0, translates all
- ordinary elements in the compiled pattern.
-
- Its int field SYNTAX is the syntax with which the pattern
- was compiled and hence should be matched with.
-
- The long field USED is how many bytes long the compiled
- pattern is.
-
- Its size_t field RE_NSUB contains how many subexpressions
- the pattern has.
-
- It ignores its NO_SUB bit.
-
- If its RETURN_DEFAULT_NUM_REGS bit is set, then if REGS is
- nonzero, re_match_2 reports in REGS->start[i] and
- REGS->end[i], for i = 1 to BUFP->RE_NSUB + 1, which
- substring of the virtual concatenation of STRING1 and
- STRING2 matched the i-th subexpression of the regular
- expression compiled in BUFFER; it records in REGS->start[0]
- and REGS->end[0] information about all of that
- concatenation. If RETURN_DEFAULT_NUM_REGS isn't set,
- re_match_2 returns in REGS similar information about i
- things for i = 1 to REGS->num_regs. If REGS is zero,
- re_match_2 ignores it. See the comment for `struct
- re_registers' for more details.
-
- STRING1 and STRING2
- are the addresses of the strings of which re_match_2 tries
- to match the virtual concatenation. Because of this
- concatenation, this function can be used on an Emacs
- buffer's contents.
-
- SIZE1 is the size of STRING1.
+ If pbufp->fastmap is nonzero, then it had better be up to date.
- SIZE2 is the size of STRING2.
-
- POS is the index in the virtual concatenation of STRING1 and
- STRING2 at which re_match_2 tries to start the match.
-
- REGS is a struct re_registers *. If it's not zero, then
- re_match_2 will fill its fields START and END with
- information about what substrings of the virtual
- concatenation of STRING1 and STRING2 were matched by the
- groups represented in BUFP's BUFFER field. You must have
- allocated the correct amount of space in the `start' and
- `end' fields of REGS to accommodate `num_regs' (the other
- field) registers. See the comment for `struct re_registers'
- in regex.h for more details.
-
- STOP is the index in the virtual concatenation of STRING1 and
- STRING2 beyond which re_match_2 won't consider matching.
+ The reason that the data to match are specified as two components
+ which are to be regarded as concatenated is so this function can be
+ used directly on the contents of an Emacs buffer.
- It returns -1 if there is no match, -2 if there is an internal error
- (such as its stack overflowing). Otherwise, it returns the length of
- the substring it matched. */
+ -1 is returned if there is no match. -2 is returned if there is an
+ error (such as match stack overflow). Otherwise the value is the
+ length of the substring which was matched. */
int
-re_match_2 (bufp, string1_arg, size1_arg, string2_arg, size2_arg, pos,
- regs, stop)
- const struct re_pattern_buffer *bufp;
- const char *string1_arg;
- const int size1_arg;
- const char *string2_arg;
- const int size2_arg;
- const int pos;
+re_match_2 (pbufp, string1_arg, size1, string2_arg, size2, pos, regs, mstop)
+ struct re_pattern_buffer *pbufp;
+ char *string1_arg, *string2_arg;
+ int size1, size2;
+ int pos;
struct re_registers *regs;
- const int stop;
+ int mstop;
{
- unsigned char *p = (unsigned char *) bufp->buffer;
- unsigned char *p1;
+ register unsigned char *p = (unsigned char *) pbufp->buffer;
/* Pointer to beyond end of buffer. */
- register unsigned char *pend = p + bufp->used;
+ register unsigned char *pend = p + pbufp->used;
unsigned char *string1 = (unsigned char *) string1_arg;
unsigned char *string2 = (unsigned char *) string2_arg;
- int size1 = size1_arg;
- int size2 = size2_arg;
unsigned char *end1; /* Just past end of first string. */
unsigned char *end2; /* Just past end of second string. */
-
/* Pointers into string1 and string2, just past the last characters in
each to consider matching. */
unsigned char *end_match_1, *end_match_2;
register unsigned char *d, *dend;
- int mcnt, mcnt2; /* Multipurpose. */
- unsigned char *translate = (unsigned char *) bufp->translate;
+ register int mcnt; /* Multipurpose. */
+ unsigned char *translate = (unsigned char *) pbufp->translate;
unsigned is_a_jump_n = 0;
- /* This is how many registers the caller wants. */
- unsigned num_regs_wanted = regs
- ? bufp->return_default_num_regs
- ? bufp->re_nsub + 1
- : regs->num_regs
- : 0;
-
- /* Want to fill *all* the registers internally. */
- unsigned num_internal_regs = bufp->re_nsub + 1;
-
- void *destination; /* For REGEX_REALLOCATE. */
-
-
/* Failure point stack. Each place that can handle a failure further
down the line pushes a failure point on this stack. It consists of
restart, regend, and reg_info for all registers corresponding to the
@@ -3678,7 +1934,13 @@ re_match_2 (bufp, string1_arg, size1_arg, string2_arg, size2_arg, pos,
``dummy''; if a failure happens and the failure point is a dummy, it
gets discarded and the next next one is tried. */
- failure_stack_type failure_stack;
+#ifndef NO_ALLOCA
+ unsigned char *initial_stack[MAX_NUM_FAILURE_ITEMS * NFAILURES];
+#endif
+ unsigned char **stackb;
+ unsigned char **stackp;
+ unsigned char **stacke;
+
/* Information on the contents of registers. These are pointers into
the input strings; they record just what was matched (on this
@@ -3688,21 +1950,8 @@ re_match_2 (bufp, string1_arg, size1_arg, string2_arg, size2_arg, pos,
stopped matching the regnum-th subexpression. (The zeroth register
keeps track of what the whole pattern matches.) */
- unsigned char **regstart = (unsigned char **)
- REGEX_ALLOCATE (num_internal_regs * sizeof (unsigned char *));
- unsigned char **regend = (unsigned char **)
- REGEX_ALLOCATE (num_internal_regs * sizeof (unsigned char *));
-
- /* If a group that's operated upon by a repetition operator fails to
- match anything, then the register for its start will need to be
- restored because it will have been set to wherever in the string we
- are when we last see its open-group operator. The argument is
- similar for a register's end. */
-
- unsigned char **old_regstart
- = (unsigned char **) REGEX_ALLOCATE (num_internal_regs * sizeof (unsigned char *));
- unsigned char **old_regend
- = (unsigned char **) REGEX_ALLOCATE (num_internal_regs * sizeof (unsigned char *));
+ unsigned char *regstart[RE_NREGS];
+ unsigned char *regend[RE_NREGS];
/* The is_active field of reg_info helps us keep track of which (possibly
nested) subexpressions we are currently in. The matched_something
@@ -3711,8 +1960,7 @@ re_match_2 (bufp, string1_arg, size1_arg, string2_arg, size2_arg, pos,
subexpression. These two fields get reset each time through any
loop their register is in. */
- struct register_info *reg_info = (struct register_info *)
- REGEX_ALLOCATE (num_internal_regs * sizeof (struct register_info));
+ struct register_info reg_info[RE_NREGS];
/* The following record the register info as found in the above
@@ -3721,92 +1969,36 @@ re_match_2 (bufp, string1_arg, size1_arg, string2_arg, size2_arg, pos,
turn happens only if we have not yet matched the entire string. */
unsigned best_regs_set = 0;
+ unsigned char *best_regstart[RE_NREGS];
+ unsigned char *best_regend[RE_NREGS];
- unsigned char **best_regstart
- = (unsigned char **) REGEX_ALLOCATE (num_internal_regs * sizeof (unsigned char *));
-
- unsigned char **best_regend
- = (unsigned char **) REGEX_ALLOCATE (num_internal_regs * sizeof (unsigned char *));
-
- unsigned current_reg = 0;
-
- /* End of declarations. */
-
-
- if (!INIT_FAILURE_STACK (failure_stack))
- return -2;
-
- if (!(regstart && regend && old_regstart && old_regend && reg_info
- && best_regstart && best_regend))
- {
-#ifdef REGEX_MALLOC
- FREE_VARIABLES;
+ /* Initialize the stack. */
+#ifdef NO_ALLOCA
+ stackb = (unsigned char **) malloc (MAX_NUM_FAILURE_ITEMS * NFAILURES * sizeof (char *));
+#else
+ stackb = initial_stack;
#endif
- return -2;
- }
+ stackp = stackb;
+ stacke = &stackb[MAX_NUM_FAILURE_ITEMS * NFAILURES];
- /* The starting position is bogus. */
- if (pos < 0 || pos > size1 + size2)
- {
-#ifdef REGEX_MALLOC
- FREE_VARIABLES;
+#ifdef DEBUG_REGEX
+ fprintf (stderr, "Entering re_match_2(%s%s)\n", string1_arg, string2_arg);
#endif
- return -1;
- }
-
-
+
/* Initialize subexpression text positions to -1 to mark ones that no
\( or ( and \) or ) has been seen for. Also set all registers to
- inactive and mark them as not having any inner groups, able to
- match the empty string, matched anything so far, or ever failed. */
-
- for (mcnt = 0; mcnt < num_internal_regs; mcnt++)
+ inactive and mark them as not having matched anything or ever
+ failed. */
+ for (mcnt = 0; mcnt < RE_NREGS; mcnt++)
{
- regstart[mcnt] = regend[mcnt]
- = old_regstart[mcnt] = old_regend[mcnt] = (unsigned char *) -1;
-
- if (!INIT_BITS_LIST (INNER_GROUPS (reg_info[mcnt])))
- {
-#ifdef REGEX_MALLOC
- FREE_VARIABLES;
-#endif
- return -2;
- }
-
- CAN_MATCH_NOTHING (reg_info[mcnt]) = -1; /* I.e., unset. */
- /* The bit fields. */
+ regstart[mcnt] = regend[mcnt] = (unsigned char *) (-1L);
IS_ACTIVE (reg_info[mcnt]) = 0;
MATCHED_SOMETHING (reg_info[mcnt]) = 0;
- EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
}
- IS_ACTIVE (reg_info[0]) = 1;
-
-
- if (regs && num_regs_wanted > 0)
- {
- if (bufp->syntax & RE_ALLOCATE_REGISTERS)
- {
- regs->num_regs = num_regs_wanted;
- regs->start = (int *) malloc (regs->num_regs * sizeof (int));
-
- if (regs->start == NULL)
- return -2;
-
- regs->end = (int *) malloc (regs->num_regs * sizeof (int));
-
- if (regs->end == NULL)
- return -2;
- }
-
- for (mcnt = 0; mcnt < regs->num_regs; mcnt++)
- {
- regs->start[mcnt] = -1;
- regs->end[mcnt] = -1;
- }
- }
-
-
+ if (regs)
+ for (mcnt = 0; mcnt < RE_NREGS; mcnt++)
+ regs->start[mcnt] = regs->end[mcnt] = -1;
/* Set up pointers to ends of strings.
Don't allow the second string to be empty unless both are empty. */
@@ -3820,17 +2012,16 @@ re_match_2 (bufp, string1_arg, size1_arg, string2_arg, size2_arg, pos,
end1 = string1 + size1;
end2 = string2 + size2;
-
/* Compute where to stop matching, within the two strings. */
- if (stop <= size1)
+ if (mstop <= size1)
{
- end_match_1 = string1 + stop;
+ end_match_1 = string1 + mstop;
end_match_2 = string2;
}
else
{
end_match_1 = end1;
- end_match_2 = string2 + stop - size1;
+ end_match_2 = string2 + mstop - size1;
}
/* `p' scans through the pattern as `d' scans through the data. `dend'
@@ -3838,18 +2029,12 @@ re_match_2 (bufp, string1_arg, size1_arg, string2_arg, size2_arg, pos,
advanced into the following input string whenever necessary, but
this happens before fetching; therefore, at the beginning of the
loop, `d' can be pointing at the end of a string, but it cannot
- equal `string2'. */
+ equal string2. */
if (size1 != 0 && pos <= size1)
- {
- d = string1 + pos;
- dend = end_match_1;
- }
+ d = string1 + pos, dend = end_match_1;
else
- {
- d = string2 + pos - size1;
- dend = end_match_2;
- }
+ d = string2 + pos - size1, dend = end_match_2;
/* This loops over pattern commands. It exits by returning from the
@@ -3858,6 +2043,12 @@ re_match_2 (bufp, string1_arg, size1_arg, string2_arg, size2_arg, pos,
while (1)
{
+#ifdef DEBUG_REGEX
+ fprintf (stderr,
+ "regex loop(%d): matching 0x%02d\n",
+ p - (unsigned char *) pbufp->buffer,
+ *p);
+#endif
is_a_jump_n = 0;
/* End of pattern means we might have succeeded. */
if (p == pend)
@@ -3865,7 +2056,7 @@ re_match_2 (bufp, string1_arg, size1_arg, string2_arg, size2_arg, pos,
/* If not end of string, try backtracking. Otherwise done. */
if (d != end_match_2)
{
- if (!FAILURE_STACK_EMPTY)
+ if (stackp != stackb)
{
/* More failure points to try. */
@@ -3881,7 +2072,7 @@ re_match_2 (bufp, string1_arg, size1_arg, string2_arg, size2_arg, pos,
best_regs_set = 1;
best_regend[0] = d; /* Never use regstart[0]. */
- for (mcnt = 1; mcnt < num_internal_regs; mcnt++)
+ for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
{
best_regstart[mcnt] = regstart[mcnt];
best_regend[mcnt] = regend[mcnt];
@@ -3896,54 +2087,46 @@ re_match_2 (bufp, string1_arg, size1_arg, string2_arg, size2_arg, pos,
/* Restore best match. */
d = best_regend[0];
- if (d >= string1 && d <= end1)
- dend = end_match_1;
-
- for (mcnt = 0; mcnt < num_internal_regs; mcnt++)
+ for (mcnt = 0; mcnt < RE_NREGS; mcnt++)
{
regstart[mcnt] = best_regstart[mcnt];
regend[mcnt] = best_regend[mcnt];
}
}
- } /* if (d != end_match_2) */
+ }
/* If caller wants register contents data back, convert it
to indices. */
- if (regs && regs->num_regs > 0)
+ if (regs)
{
regs->start[0] = pos;
-
- regs->end[0] = MATCHING_IN_FIRST_STRING
- ? d - string1
- : d - string2 + size1;
-
- for (mcnt = 1; mcnt < regs->num_regs; mcnt++)
+ if (MATCHING_IN_FIRST_STRING)
+ regs->end[0] = d - string1;
+ else
+ regs->end[0] = d - string2 + size1;
+ for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
{
- if (mcnt >= num_internal_regs
- || regstart[mcnt] == (unsigned char *) -1
- || regend[mcnt] == (unsigned char *) -1)
+ if (regend[mcnt] == (unsigned char *)(-1L))
{
regs->start[mcnt] = -1;
regs->end[mcnt] = -1;
continue;
}
-
- regs->start[mcnt] = IS_IN_FIRST_STRING (regstart[mcnt])
- ? regstart[mcnt] - string1
- : regstart[mcnt] - string2 + size1;
+ if (IS_IN_FIRST_STRING (regstart[mcnt]))
+ regs->start[mcnt] = regstart[mcnt] - string1;
+ else
+ regs->start[mcnt] = regstart[mcnt] - string2 + size1;
- regs->end[mcnt] = IS_IN_FIRST_STRING (regend[mcnt])
- ? regend[mcnt] - string1
- : regend[mcnt] - string2 + size1;
+ if (IS_IN_FIRST_STRING (regend[mcnt]))
+ regs->end[mcnt] = regend[mcnt] - string1;
+ else
+ regs->end[mcnt] = regend[mcnt] - string2 + size1;
}
}
-
-#ifdef REGEX_MALLOC
- FREE_VARIABLES;
-#endif
- return d - pos - (MATCHING_IN_FIRST_STRING
- ? string1
- : string2 - size1);
+ FREE_AND_RETURN(stackb,
+ (d - pos - (MATCHING_IN_FIRST_STRING ?
+ string1 :
+ string2 - size1)));
}
/* Otherwise match next pattern command. */
@@ -3955,135 +2138,51 @@ re_match_2 (bufp, string1_arg, size1_arg, string2_arg, size2_arg, pos,
{
/* \( [or `(', as appropriate] is represented by start_memory,
- \) by stop_memory. Both of those commands are followed by a
- register number in the next byte. The text matched within
- the \( and \) is recorded (in the internal registers data
- structure) under that number. */
-
- case start_memory:
- /* Find out if this group can match the empty string. */
- p1 = p; /* To send to group_can_match_nothing. */
-
- if (CAN_MATCH_NOTHING (reg_info[*p]) == -1)
- CAN_MATCH_NOTHING (reg_info[*p])
- = group_can_match_nothing (&p1, pend, reg_info);
-
- /* Save the position in the string where we were the last time
- we were at this open-group operator in case the group is
- operated upon by a repetition operator, e.g., with `(a*)*b'
- against `ab'; then we want to ignore where we are now in
- the string in case this attempt to match fails. */
-
- old_regstart[*p] = CAN_MATCH_NOTHING (reg_info[*p])
- ? ((regstart[*p] == (unsigned char *) -1)
- ? d : regstart[*p])
- : regstart[*p];
+ \) by stop_memory. Both of those commands are followed by
+ a register number in the next byte. The text matched
+ within the \( and \) is recorded under that number. */
+ case start_memory:
regstart[*p] = d;
-
IS_ACTIVE (reg_info[*p]) = 1;
MATCHED_SOMETHING (reg_info[*p]) = 0;
p++;
break;
case stop_memory:
- /* Save the position we were in the string the last time we
- were at this close-group operator in case the group is
- operated upon by a repetition operator, e.g., with
- `((a*)*(b*)*)*' against `aba'; then we want to ignore where
- we are now in the string in case this attempt to match
- fails. */
-
- old_regend[*p] = CAN_MATCH_NOTHING (reg_info[*p])
- ? ((regend[*p] == (unsigned char *) -1)
- ? d : regend[*p])
- : regend[*p];
regend[*p] = d;
IS_ACTIVE (reg_info[*p]) = 0;
-
- /* Record that this group is inside of all currently active
- groups; makes no sense for group 1. */
- if (*p != 1)
- NOTE_INNER_GROUP (*p);
-
-
- /* If just failed to match something this time around with a
- group that's operated on by a repetition operator, try to
- force exit from the ``loop,'' and restore the register
- information for this group that we had before trying this
- last match. */
-
- if ((!MATCHED_SOMETHING (reg_info[*p])
- || (enum regexpcode) p[-3] == start_memory)
+
+ /* If just failed to match something this time around with a sub-
+ expression that's in a loop, try to force exit from the loop. */
+ if ((! MATCHED_SOMETHING (reg_info[*p])
+ || (enum regexpcode) p[-3] == start_memory)
&& (p + 1) != pend)
{
- p1 = p + 1;
+ register unsigned char *p2 = p + 1;
mcnt = 0;
- switch ((enum regexcode) *p1++)
+ switch (*p2++)
{
- case no_pop_jump_n:
+ case jump_n:
is_a_jump_n = 1;
- case pop_failure_jump:
- case maybe_pop_jump:
- case no_pop_jump:
+ case finalize_jump:
+ case maybe_finalize_jump:
+ case jump:
case dummy_failure_jump:
- extract_number_and_incr (&mcnt, &p1);
+ EXTRACT_NUMBER_AND_INCR (mcnt, p2);
if (is_a_jump_n)
- p1 += 2;
+ p2 += 2;
break;
}
- p1 += mcnt;
+ p2 += mcnt;
/* If the next operation is a jump backwards in the pattern
- to an on_failure_jump right before the start_memory
- corresponding to this stop_memory, exit from the loop
- by forcing a failure after pushing on the stack the
- on_failure_jump's jump in the pattern, and d. */
-
- if (mcnt < 0 && (enum regexpcode) *p1 == on_failure_jump
- && (enum regexpcode) p1[3] == start_memory && p1[4] == *p)
+ to an on_failure_jump, exit from the loop by forcing a
+ failure after pushing on the stack the on_failure_jump's
+ jump in the pattern, and d. */
+ if (mcnt < 0 && (enum regexpcode) *p2++ == on_failure_jump)
{
- /* If this group ever matched anything, then
- restore what its registers were before trying
- this last failed match, e.g., with `(a*)*b' against
- `ab' for regstart[1], and, e.g., with `((a*)*(b*)*)*'
- against `aba' for regend[3].
-
- Restore the registers for inner groups, too, e.g.,
- for `((a*)(b*))*' against `aba' (register 2 gets
- trashed). */
-
- if (EVER_MATCHED_SOMETHING (reg_info[*p]))
- {
- unsigned this_reg;
- unsigned bits_mask;
-
- EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
-
- /* Restore this group's registers. */
-
- regstart[*p] = old_regstart[*p];
- regend[*p] = old_regend[*p];
-
- /* Restore the inner groups' (if any) registers. */
-
- for (this_reg = 0;
- this_reg < INNER_GROUPS (reg_info[*p]).size;
- this_reg++)
- {
- if (get_bit (INNER_GROUPS (reg_info[*p]), this_reg))
- {
- regstart[this_reg] = old_regstart[this_reg];
-
- if ((int)old_regend[this_reg]
- >= (int)regstart[this_reg])
- regend[this_reg] = old_regend[this_reg];
- }
- }
- }
- p1++;
- extract_number_and_incr (&mcnt, &p1);
- PUSH_FAILURE_POINT (p1 + mcnt, d, failure_stack, -2);
-
+ EXTRACT_NUMBER_AND_INCR (mcnt, p2);
+ PUSH_FAILURE_POINT (p2 + mcnt, d);
goto fail;
}
}
@@ -4094,16 +2193,10 @@ re_match_2 (bufp, string1_arg, size1_arg, string2_arg, size2_arg, pos,
followed by the numeric value of <digit> as the register number. */
case duplicate:
{
+ int regno = *p++; /* Get which register to match against */
register unsigned char *d2, *dend2;
- int regno = *p++; /* Get which register to match against. */
-
- /* Can't back reference a group which we've never matched. */
- if ((regstart[regno] == (unsigned char *) -1
- || regend[regno] == (unsigned char *) -1)
- && ! bufp->can_be_null)
- goto really_fail;
-
- /* Where in input to try to start matching. */
+
+ /* Where in input to try to start matching. */
d2 = regstart[regno];
/* Where to stop matching; if both the place to start and
@@ -4122,10 +2215,7 @@ re_match_2 (bufp, string1_arg, size1_arg, string2_arg, size2_arg, pos,
{
if (dend2 == end_match_2) break;
if (dend2 == regend[regno]) break;
-
- /* end of string1 => advance to string2. */
- d2 = string2;
- dend2 = regend[regno];
+ d2 = string2, dend2 = regend[regno]; /* end of string1 => advance to string2. */
}
/* At end of register contents => success */
if (d2 == dend2) break;
@@ -4144,8 +2234,8 @@ re_match_2 (bufp, string1_arg, size1_arg, string2_arg, size2_arg, pos,
/* Compare that many; failure if mismatch, else move
past them. */
if (translate
- ? bcmp_translate (d, d2, mcnt, translate)
- : bcmp (d, d2, mcnt))
+ ? memcmp_translate (d, d2, mcnt, translate)
+ : memcmp ((char *)d, (char *)d2, mcnt))
goto fail;
d += mcnt, d2 += mcnt;
}
@@ -4154,14 +2244,12 @@ re_match_2 (bufp, string1_arg, size1_arg, string2_arg, size2_arg, pos,
case anychar:
PREFETCH; /* Fetch a data character. */
- /* Match anything but possibly a newline or a null. */
- if ((!(bufp->syntax & RE_DOT_NEWLINE)
- && (translate ? translate[*d] : *d) == '\n')
- || ((bufp->syntax & RE_DOT_NOT_NULL)
+ /* Match anything but a newline, maybe even a null. */
+ if ((translate ? translate[*d] : *d) == '\n'
+ || ((obscure_syntax & RE_DOT_NOT_NULL)
&& (translate ? translate[*d] : *d) == '\000'))
goto fail;
-
- SET_REGS_MATCHED;
+ SET_REGS_MATCHED;
d++;
break;
@@ -4175,7 +2263,10 @@ re_match_2 (bufp, string1_arg, size1_arg, string2_arg, size2_arg, pos,
PREFETCH; /* Fetch a data character. */
- c = translate ? translate[*d] : *d;
+ if (translate)
+ c = translate[*d];
+ else
+ c = *d;
if (c < *p * BYTEWIDTH
&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
@@ -4190,105 +2281,66 @@ re_match_2 (bufp, string1_arg, size1_arg, string2_arg, size2_arg, pos,
}
case begline:
- if (bufp->not_bol == 1)
- goto fail;
-
- if (d && (*d == '\n' || d[-1] == '\n'))
- {
- if (*d == '\n')
- d++;
-
- if (bufp->syntax & RE_NO_ANCHOR_AT_NEWLINE)
- goto fail;
- else
- break;
- }
-
if ((size1 != 0 && d == string1)
|| (size1 == 0 && size2 != 0 && d == string2)
+ || (d && d[-1] == '\n')
|| (size1 == 0 && size2 == 0))
break;
else
goto fail;
case endline:
- if (bufp->not_eol == 1)
- goto fail;
-
if (d == end2
- || (d == end1 && size2 == 0))
+ || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n'))
break;
-
- if (*d == '\n' || (d == end1 && *string2 == '\n'))
- {
- PREFETCH;
-
- if (*d == '\n')
- d++;
-
- if (bufp->syntax & RE_NO_ANCHOR_AT_NEWLINE)
- goto fail;
- else
- break;
- }
goto fail;
- /* Uses of on_failure_jump:
-
- Each alternative starts with an on_failure_jump that points
- to the beginning of the next alternative. Each alternative
- except the last ends with a jump that in effect jumps past
- the rest of the alternatives. (They really jump to the
- ending jump of the following alternative, because tensioning
- these jumps is a hassle.)
+ /* `or' constructs are handled by starting each alternative with
+ an on_failure_jump that points to the start of the next
+ alternative. Each alternative except the last ends with a
+ jump to the joining point. (Actually, each jump except for
+ the last one really jumps to the following jump, because
+ tensioning the jumps is a hassle.) */
+
+ /* The start of a stupid repeat has an on_failure_jump that points
+ past the end of the repeat text. This makes a failure point so
+ that on failure to match a repetition, matching restarts past
+ as many repetitions have been found with no way to fail and
+ look for another one. */
- Repeats start with an on_failure_jump that points past both
- the repetition text and the following jump or
- pop_failure_jump back to this on_failure_jump. */
+ /* A smart repeat is similar but loops back to the on_failure_jump
+ so that each repetition makes another failure point. */
case on_failure_jump:
on_failure:
- extract_number_and_incr (&mcnt, &p);
- PUSH_FAILURE_POINT (p + mcnt, d, failure_stack, -2);
-
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ PUSH_FAILURE_POINT (p + mcnt, d);
break;
-
- /* A smart repeat ends with a maybe_pop_jump.
- We change it either to a pop_failure_jump or a no_pop_jump. */
-
- case maybe_pop_jump:
- extract_number_and_incr (&mcnt, &p);
+ /* The end of a smart repeat has a maybe_finalize_jump back.
+ Change it either to a finalize_jump or an ordinary jump. */
+ case maybe_finalize_jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
{
register unsigned char *p2 = p;
-
- /* Compare the beginning of the repeat with what in the
- pattern follows its end. If we can establish that there
- is nothing that they would both match, i.e., that we
- would have to backtrack because of (as would in, e.g.,
- `a*a') then we can change to pop_failure_jump, because
- we'll never have to backtrack. */
-
- /* Skip over parentheses. */
+ /* Compare what follows with the beginning of the repeat.
+ If we can establish that there is nothing that they would
+ both match, we can change to finalize_jump. */
while (p2 + 1 != pend
&& (*p2 == (unsigned char) stop_memory
|| *p2 == (unsigned char) start_memory))
- p2 += 2; /* Skip over reg number, too. */
-
- if (p2 == pend)
- p[-3] = (unsigned char) pop_failure_jump;
- else if (*p2 == (unsigned char) exactn
+ p2 += 2; /* Skip over reg number. */
+ if (p2 == pend)
+ p[-3] = (unsigned char) finalize_jump;
+ else if (*p2 == (unsigned char) exactn
|| *p2 == (unsigned char) endline)
{
register int c = *p2 == (unsigned char) endline ? '\n' : p2[2];
register unsigned char *p1 = p + mcnt;
-
- /* p1[0] ... p1[2] are the on_failure_jump corresponding
- to the maybe_finalize_jump of this case. Examine what
- follows it. */
-
- if (p1[3] == (unsigned char) exactn && p1[5] != c)
- p[-3] = (unsigned char) pop_failure_jump;
+ /* p1[0] ... p1[2] are an on_failure_jump.
+ Examine what follows that. */
+ if (p1[3] == (unsigned char) exactn && p1[5] != c)
+ p[-3] = (unsigned char) finalize_jump;
else if (p1[3] == (unsigned char) charset
|| p1[3] == (unsigned char) charset_not)
{
@@ -4296,83 +2348,53 @@ re_match_2 (bufp, string1_arg, size1_arg, string2_arg, size2_arg, pos,
if (c < p1[4] * BYTEWIDTH
&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
not = !not;
- /* `not' is equal to 1 if c would match, which means
- that we can't change to pop_failure_jump. */
+ /* `not' is 1 if c would match. */
+ /* That means it is not safe to finalize. */
if (!not)
- p[-3] = (unsigned char) pop_failure_jump;
+ p[-3] = (unsigned char) finalize_jump;
}
}
}
p -= 2; /* Point at relative address again. */
- if (p[-1] != (unsigned char) pop_failure_jump)
+ if (p[-1] != (unsigned char) finalize_jump)
{
- p[-1] = (unsigned char) no_pop_jump;
- goto no_pop;
+ p[-1] = (unsigned char) jump;
+ goto nofinalize;
}
/* Note fall through. */
- /* The end of a simple repeat has a pop_failure_jump back to
- its matching on_failure_jump, where the latter will push a
- failure point point. The pop_failure_jump takes off failure
- points put on by this pop_failure_jump's matching
- on_failure_jump; we got through the pattern to here from the
- matching on_failure_jump, so didn't fail. Also remove the
- register information put on by the matching on_failure_jump. */
-
- case pop_failure_jump:
- pop:
- pop_failure_point (&failure_stack);
- /* Note fall through. */
-
- /* Jump without taking off any failure points. */
+ /* The end of a stupid repeat has a finalize_jump back to the
+ start, where another failure point will be made which will
+ point to after all the repetitions found so far. */
- case no_pop_jump:
- no_pop:
- extract_number_and_incr (&mcnt, &p); /* Get the amount to jump. */
- p += mcnt; /* Do the jump. */
+ /* Take off failure points put on by matching on_failure_jump
+ because didn't fail. Also remove the register information
+ put on by the on_failure_jump. */
+ case finalize_jump:
+ POP_FAILURE_POINT ();
+ /* Note fall through. */
+
+ /* Jump without taking off any failure points. */
+ case jump:
+ nofinalize:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ p += mcnt;
break;
-
- /* If the last alternative didn't match anything and empty
- alternatives aren't allowed, then don't skip over the next
- one. */
-
- case jump_past_next_alt:
- {
- int this_reg; /* Counting down. */
-
- /* The current register is the innermost (the one with the
- highest number) active one. */
-
- for (this_reg = num_internal_regs - 1;
- this_reg >= 0; this_reg--)
- if (IS_ACTIVE (reg_info[this_reg]))
- break;
-
- if (!(bufp->syntax & RE_NO_EMPTY_ALTS)
- || MATCHED_SOMETHING (reg_info[this_reg]))
- goto no_pop;
-
- p += 2; /* Skip past the jump's number. */
- break;
- }
-
case dummy_failure_jump:
/* Normally, the on_failure_jump pushes a failure point, which
- then gets popped at pop_failure_jump. We will end up at
- pop_failure_jump, also, and with a pattern of, say, `a+', we
+ then gets popped at finalize_jump. We will end up at
+ finalize_jump, also, and with a pattern of, say, `a+', we
are skipping over the on_failure_jump, so we have to push
- something meaningless for pop_failure_jump to pop. */
-
- PUSH_FAILURE_POINT (0, 0, failure_stack, -2);
-
- goto no_pop;
+ something meaningless for finalize_jump to pop. */
+ PUSH_FAILURE_POINT (0, 0);
+ goto nofinalize;
/* Have to succeed matching what follows at least n times. Then
just handle like an on_failure_jump. */
case succeed_n:
- mcnt = extract_number (p + 2);
+ EXTRACT_NUMBER (mcnt, p + 2);
/* Originally, this is how many times we HAVE to succeed. */
if (mcnt)
{
@@ -4382,8 +2404,8 @@ re_match_2 (bufp, string1_arg, size1_arg, string2_arg, size2_arg, pos,
}
else if (mcnt == 0)
{
- p[2] = (char) no_op;
- p[3] = (char) no_op;
+ p[2] = unused;
+ p[3] = unused;
goto on_failure;
}
else
@@ -4393,14 +2415,15 @@ re_match_2 (bufp, string1_arg, size1_arg, string2_arg, size2_arg, pos,
}
break;
- case no_pop_jump_n:
- mcnt = extract_number (p + 2);
+ case jump_n:
+ EXTRACT_NUMBER (mcnt, p + 2);
/* Originally, this is how many times we CAN jump. */
if (mcnt)
{
mcnt--;
STORE_NUMBER(p + 2, mcnt);
- goto no_pop;
+ goto nofinalize; /* Do the jump without taking off
+ any failure points. */
}
/* If don't have to jump any more, skip over the rest of command. */
else
@@ -4411,16 +2434,16 @@ re_match_2 (bufp, string1_arg, size1_arg, string2_arg, size2_arg, pos,
{
register unsigned char *p1;
- extract_number_and_incr (&mcnt, &p);
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
p1 = p + mcnt;
- extract_number_and_incr (&mcnt, &p);
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
STORE_NUMBER (p1, mcnt);
break;
}
/* Ignore these. Used to ignore the n of succeed_n's which
currently have n == 0. */
- case no_op:
+ case unused:
break;
case wordbound:
@@ -4434,56 +2457,32 @@ re_match_2 (bufp, string1_arg, size1_arg, string2_arg, size2_arg, pos,
break;
case wordbeg:
- /* Have to check if AT_STRINGS_BEG before looking at d - 1. */
- if (IS_A_LETTER (d) && (AT_STRINGS_BEG || !IS_A_LETTER (d - 1)))
+ if (IS_A_LETTER (d) && (!IS_A_LETTER (d - 1) || AT_STRINGS_BEG))
break;
goto fail;
case wordend:
/* Have to check if AT_STRINGS_BEG before looking at d - 1. */
- if (!AT_STRINGS_BEG && IS_A_LETTER (d - 1)
+ if (!AT_STRINGS_BEG && IS_A_LETTER (d - 1)
&& (!IS_A_LETTER (d) || AT_STRINGS_END))
break;
goto fail;
#ifdef emacs
-#ifdef emacs19
- case before_dot:
- if (PTR_CHAR_POS (d) >= point)
- goto fail;
- break;
-
- case at_dot:
- if (PTR_CHAR_POS (d) != point)
- goto fail;
- break;
-
- case after_dot:
- if (PTR_CHAR_POS (d) <= point)
- goto fail;
- break;
-#else /* not emacs19 */
- case before_dot:
- if (((d - string2 <= (unsigned) size2)
- ? d - bf_p2 : d - bf_p1)
- <= point)
+ case before_dot:
+ if (PTR_CHAR_POS (d) >= point)
goto fail;
break;
case at_dot:
- if (((d - string2 <= (unsigned) size2)
- ? d - bf_p2 : d - bf_p1)
- == point)
+ if (PTR_CHAR_POS (d) != point)
goto fail;
break;
case after_dot:
- if (((d - string2 <= (unsigned) size2)
- ? d - bf_p2 : d - bf_p1)
- >= point)
+ if (PTR_CHAR_POS (d) <= point)
goto fail;
break;
-#endif /* not emacs19 */
case wordchar:
mcnt = (int) Sword;
@@ -4525,6 +2524,13 @@ re_match_2 (bufp, string1_arg, size1_arg, string2_arg, size2_arg, pos,
SET_REGS_MATCHED;
break;
+ case before_dot:
+ case at_dot:
+ case after_dot:
+ case syntaxspec:
+ case notsyntaxspec:
+ break;
+
#endif /* not emacs */
case begbuf:
@@ -4568,67 +2574,31 @@ re_match_2 (bufp, string1_arg, size1_arg, string2_arg, size2_arg, pos,
/* Jump here if any matching operation fails. */
fail:
- if (!FAILURE_STACK_EMPTY)
+ if (stackp != stackb)
/* A restart point is known. Restart there and pop it. */
{
- short highest_used_reg, this_reg;
- boolean is_a_jump_n = false;
-
- /* If this failure point is from a dummy_failure_point,
- just skip it. */
-
- if (!failure_stack.stack[failure_stack.avail - 2])
+ short last_used_reg, this_reg;
+
+ /* If this failure point is from a dummy_failure_point, just
+ skip it. */
+ if (!stackp[-2])
{
- pop_failure_point (&failure_stack);
+ POP_FAILURE_POINT ();
goto fail;
}
- /* Among other things, undo the last failure point push. */
-
- d = failure_stack.stack[--failure_stack.avail];
- p = failure_stack.stack[--failure_stack.avail];
-
-
- /* If failed to a backwards jump that's part of a repetition
- loop, need to pop this failure point and use the next one. */
-
- switch ((enum regexpcode) *p)
- {
- case no_pop_jump_n:
- is_a_jump_n = true;
- case maybe_pop_jump:
- case pop_failure_jump:
- case no_pop_jump:
- p1 = p + 1;
- extract_number_and_incr (&mcnt, &p1);
- p1 += mcnt;
-
- if ((is_a_jump_n && *p1 == succeed_n)
- || (!is_a_jump_n && *p1 == on_failure_jump))
- {
- /* Put p and d back on the stack again... */
- failure_stack.avail += 2;
-
- /* ...and pop the whole failure point. */
- pop_failure_point (&failure_stack);
- goto fail;
- }
- break;
- }
-
+ d = *--stackp;
+ p = *--stackp;
if (d >= string1 && d <= end1)
dend = end_match_1;
-
/* Restore register info. */
- highest_used_reg
- = (short) failure_stack.stack[--failure_stack.avail];
+ last_used_reg = (long) *--stackp;
/* Make the ones that weren't saved -1 or 0 again. */
- for (this_reg = num_internal_regs - 1; this_reg > highest_used_reg;
- this_reg--)
+ for (this_reg = RE_NREGS - 1; this_reg > last_used_reg; this_reg--)
{
- regend[this_reg] = (unsigned char *) -1;
- regstart[this_reg] = (unsigned char *) -1;
+ regend[this_reg] = (unsigned char *) (-1L);
+ regstart[this_reg] = (unsigned char *) (-1L);
IS_ACTIVE (reg_info[this_reg]) = 0;
MATCHED_SOMETHING (reg_info[this_reg]) = 0;
}
@@ -4636,342 +2606,24 @@ re_match_2 (bufp, string1_arg, size1_arg, string2_arg, size2_arg, pos,
/* And restore the rest from the stack. */
for ( ; this_reg > 0; this_reg--)
{
- reg_info[this_reg] = *(struct register_info *)
- failure_stack.stack[--failure_stack.avail];
-
- regend[this_reg]
- = failure_stack.stack[--failure_stack.avail];
-
- regstart[this_reg]
- = failure_stack.stack[--failure_stack.avail];
+ reg_info[this_reg] = *(struct register_info *) *--stackp;
+ regend[this_reg] = *--stackp;
+ regstart[this_reg] = *--stackp;
}
- }
+ }
else
break; /* Matching at this starting point really fails. */
- } /* while (1) */
+ }
- really_fail:
if (best_regs_set)
goto restore_best_regs;
-#ifdef REGEX_MALLOC
- FREE_VARIABLES;
-#endif
- return -1; /* Failure to match. */
-}
-
-
-
-
-/* Subroutine definitions for re_match_2. */
-
-
-
-/* Failure stack stuff. */
-
-/* Pops what PUSH_FAILURE_STACK pushes. */
-
-static void
-pop_failure_point(failure_stack_ptr)
- failure_stack_type *failure_stack_ptr;
-{
- int temp;
-
- if (FAILURE_STACK_PTR_EMPTY)
- {
- printf ("Tried to pop empty failure point in re_match_2.\n");
- exit (1);
- }
-
- /* Remove failure points and point to how many regs pushed. */
- else
- {
- if (failure_stack_ptr->avail < 3)
- {
- printf ("Aren't enough items to pop on re_match_2 failure stack: \
-there's only %d on it.\n", failure_stack_ptr->avail);
- exit (1);
- }
- failure_stack_ptr->avail -= 3;
- temp = (int) failure_stack_ptr->stack[failure_stack_ptr->avail];
- temp *= NUM_REG_ITEMS; /* How much to take off the stack. */
-
- if (failure_stack_ptr->avail < temp)
- {
- printf ("Can't pop %d items off re_match_2 failure stack: \
-there's only %d on it.\n", temp, failure_stack_ptr->avail);
- exit (1);
- }
- failure_stack_ptr->avail -= temp; /* Remove the register info. */
- }
+ FREE_AND_RETURN(stackb,(-1)); /* Failure to match. */
}
-/* Other things. */
-
-static boolean common_op_can_match_nothing ();
-static boolean alternative_can_match_nothing ();
-
-
-/* We are given P pointing to a register number after a start_memory.
-
- Return true if the pattern up to the corresponding stop_memory can
- match the empty string, and false otherwise.
-
- If we find the matching stop_memory, sets P to point to one past its number.
- Otherwise, sets P to an undefined byte less than or equal to END.
-
- We don't handle duplicates properly (yet). */
-
-static boolean
-group_can_match_nothing (p, end, reg_info)
- unsigned char **p, *end;
- struct register_info *reg_info;
-{
- int mcnt;
- unsigned char *p1 = *p + 1; /* Point to after this register number. */
-
- while (p1 < end)
- {
- /* Skip over opcodes that can match nothing, and return true or
- false, as appropriate, when we get to one that can't, or to the
- matching stop_memory. */
-
- switch ((enum regexpcode) *p1)
- {
- /* Could be either a loop or a series of alternatives. */
- case on_failure_jump:
- p1++;
- extract_number_and_incr (&mcnt, &p1);
-
- /* If the next operation is not a jump backwards in the
- pattern. */
-
- if (mcnt >= 0)
- {
- /* Go through the on_failure_jumps of the alternatives,
- seeing if any of the alternatives cannot match nothing.
- The last alternative starts with only a no_pop_jump,
- whereas the rest start with on_failure_jump and end
- with a no_pop_jump, e.g., here is the pattern for `a|b|c':
-
- /on_failure_jump/0/6/exactn/1/a/jump_past_next_alt/0/6
- /on_failure_jump/0/6/exactn/1/b/jump_past_next_alt/0/3
- /exactn/1/c
-
- So, we have to first go through the first (n-1)
- alternatives and then deal with the last one separately. */
-
-
- /* Deal with the first (n-1) alternatives, which start
- with an on_failure_jump (see above) that jumps to right
- past a jump_past_next_alt. */
-
- while ((enum regexpcode) p1[mcnt-3] == jump_past_next_alt)
- {
- /* MCNT holds how many bytes long the alternative
- is, including the ending `jump_past_next_alt' and its number. */
-
- if (!alternative_can_match_nothing (p1, p1 + mcnt - 3,
- reg_info))
- return false;
-
- /* Move to right after this alternative, including the
- jump_past_next_alt. */
-
- p1 += mcnt;
-
- /* Break if it's the beginning of an n-th alternative
- that doesn't begin with an on_failure_jump. */
-
- if ((enum regexpcode) *p1 != on_failure_jump)
- break;
-
- /* Still have to check that it's not an n-th
- alternative that starts with an on_failure_jump. */
- p1++;
- extract_number_and_incr (&mcnt, &p1);
- if ((enum regexpcode) p1[mcnt-3] != jump_past_next_alt)
- {
- /* Get to the beginning of the n-th alternative. */
- p1 -= 3;
- break;
- }
- }
-
- /* Deal with the last alternative: go back and get number
- of the jump_past_next_alt just before it. MCNT contains how
- many bytes long the alternative is. */
-
- mcnt = extract_number (p1 - 2);
-
- if (!alternative_can_match_nothing (p1, p1 + mcnt, reg_info))
- return false;
-
- p1 += mcnt; /* Get past the n-th alternative. */
-
- } /* if mcnt > 0 */
-
- break;
-
- case stop_memory:
- if (p1[1] == **p)
- {
- *p = p1 + 2;
- return true;
- }
- else
- {
- printf ("Error: encountered an unmatched (%d) stop_memory in \
-group_can_match_nothing.\n", **p);
- exit (1);
- }
- break;
-
- default:
- if (!common_op_can_match_nothing (&p1, end, reg_info))
- return false;
- }
- } /* While p1 < end. */
-
- return false;
-}
-
-
-/* Similar to group_can_match_nothing, but doesn't deal with alternatives:
- It expects P to be the first byte of a single alternative and END one
- byte past the last. The alternative can contain groups. */
-
-
-static boolean
-alternative_can_match_nothing (p, end, reg_info)
- unsigned char *p, *end;
- struct register_info *reg_info;
-{
- int mcnt;
- unsigned char *p1 = p;
-
- while (p1 < end)
- {
- /* Skip over opcodes that can match nothing, and break when we get
- to one that can't. */
-
- switch ((enum regexpcode) *p1)
- {
- /* It's a loop. */
- case on_failure_jump:
- p1++;
- extract_number_and_incr (&mcnt, &p1);
- p1 += mcnt;
- break;
-
- default:
- if (!common_op_can_match_nothing (&p1, end, reg_info))
- return false;
- }
- } /* While not at the end of the alternative. */
-
- return true;
-}
-
-
-/* Deals with the ops common to group_can_match_nothing and
- alternative_can_match_nothing.
-
- Sets P to one after the op and its arguments, if any. */
-
-static boolean
-common_op_can_match_nothing (p, end, reg_info)
- unsigned char **p, *end;
- struct register_info *reg_info;
-{
- int mcnt;
- unsigned char *p1 = *p;
- boolean ret;
- int reg_no;
-
- switch ((enum regexp1code) *p1++)
- {
- case no_op:
- case begline:
- case endline:
- case endline_in_repeat:
- case endline_before_newline:
- break;
-
- case start_memory:
- reg_no = *p1;
- ret = group_can_match_nothing (&p1, end, reg_info);
-
- /* Have to set this here in case we're checking a group which
- contains a group and a back reference to it. */
-
- if (CAN_MATCH_NOTHING (reg_info[reg_no]) == -1)
- CAN_MATCH_NOTHING (reg_info[reg_no]) = ret;
-
- if (!ret)
- return false;
- break;
-
- /* If this is an optimized succeed_n for zero times, make the jump. */
- case no_pop_jump:
- extract_number_and_incr (&mcnt, &p1);
-
- if (mcnt >= 0)
- p1 += mcnt;
- else
- return false;
- break;
-
- case succeed_n:
- /* Get to the number of times to succeed. */
- p1 += 2;
- extract_number_and_incr (&mcnt, &p1);
-
- if (mcnt == 0)
- {
- p1 -= 4;
- extract_number_and_incr (&mcnt, &p1);
- p1 += mcnt;
- }
- else
- return false;
- break;
-
- case duplicate:
- if (!CAN_MATCH_NOTHING (reg_info[*p1]))
- return false;
- break;
-
- case set_number_at:
- p1 += 4;
- case before_dot:
- case at_dot:
- case after_dot:
- case begbuf:
- case endbuf:
- case wordbeg:
- case wordend:
- case wordbound:
- case notwordbound:
- break;
-
- default:
- /* All other opcodes mean we cannot match the empty string. */
- return false;
- }
-
- *p = p1;
- return true;
-}
-
-
-
-/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
- bytes; nonzero otherwise. */
-
static int
-bcmp_translate (s1, s2, len, translate)
+memcmp_translate (s1, s2, len, translate)
unsigned char *s1, *s2;
register int len;
unsigned char *translate;
@@ -4986,21 +2638,17 @@ bcmp_translate (s1, s2, len, translate)
}
-
-/* Entry points compatible with 4.2 BSD regex library. We don't define
- them if this is an Emacs or POSIX compilation. */
+/* Entry points compatible with 4.2 BSD regex library. */
-#if !defined(GAWK) && !defined (emacs) && !defined (_POSIX_SOURCE)
+#if !defined(emacs) && !defined(GAWK)
static struct re_pattern_buffer re_comp_buf;
char *
re_comp (s)
- const char *s;
+ char *s;
{
- char *return_value;
-
if (!s)
{
if (!re_comp_buf.buffer)
@@ -5010,403 +2658,32 @@ re_comp (s)
if (!re_comp_buf.buffer)
{
- re_comp_buf.buffer = (char *) malloc (200);
-
- if (re_comp_buf.buffer == NULL)
- return "Memory exhausted";
-
+ if (!(re_comp_buf.buffer = (char *) malloc (200)))
+ return "Memory exhausted";
re_comp_buf.allocated = 200;
-
- re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
-
- if (re_comp_buf.fastmap == NULL)
+ if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH)))
return "Memory exhausted";
}
- return regex_compile (s, strlen (s), obscure_syntax, &re_comp_buf);
+ return re_compile_pattern (s, strlen (s), &re_comp_buf);
}
int
re_exec (s)
- const char *s;
-{
- const int len = strlen (s);
- return 0 <= re_search (&re_comp_buf, s, len, 0, len,
- (struct re_registers *) 0);
-}
-
-#endif /* not emacs and not _POSIX_SOURCE */
-
-
-
-/* Entry points compatible with POSIX regex library. Only define these
- when this is a POSIX compilation (and it's not Emacs). */
-
-#if !defined(emacs) && !defined(GAWK)
-
-/* regcomp takes a regular-expression string and converts it into a
- buffer full of byte commands for matching.
-
- PREG is a regex_t * whose pertinent fields are mentioned in below:
-
- It has a char * field called BUFFER which points to the
- space where this routine will put the compiled pattern; the
- user can either allocate this using malloc (whereupon they
- should set the long field ALLOCATED to the number of bytes
- malloced) or set ALLOCATED to 0 and let the routine
- allocate it. The routine may use realloc to enlarge the
- buffer space.
-
- If the user wants to translate all ordinary elements in the
- compiled pattern, they should set the char * field
- TRANSLATE to a translate table (and not set the REG_ICASE
- bit of CFLAGS, which would override this translate table
- with one that ignores case); otherwise, they should set
- TRANSLATE to 0.
-
- The routine sets the int field SYNTAX to RE_SYNTAX_POSIX_EXTENDED
- if the REG_EXTENDED bit in CFLAGS is set; otherwise, it sets it
- to RE_SYNTAX_POSIX_BASIC.
-
- It returns in the long field USED how many bytes long the
- compiled pattern is.
-
- It returns 0 in the char field FASTMAP_ACCURATE, on
- the assumption that the user usually doesn't compile the
- same pattern twice and that consequently any fastmap in the
- pattern buffer is inaccurate.
-
- In the size_t field RE_NSUB, it returns the number of
- subexpressions it found in PATTERN.
-
- PATTERN is the address of the pattern string.
-
- CFLAGS is a series of bits ORed together which affect compilation.
- If the bit REG_EXTENDED is set, regcomp compiles the
- pattern as an extended regular expression, otherwise it
- compiles it as a basic one. If the bit REG_NEWLINE is set,
- then dot and nonmatching lists won't match a newline, but
- pattern anchors will match at them. If the bit REG_ICASE
- is set, then it considers upper- and lowercase versions of
- letters to be equal when matching. If the bit REG_NOSUB is
- set, then when PREG is passed to regexec, that routine will
- only report success or failure.
-
-
- It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
- POSIX return codes and their meanings.) */
-
-
-int
-regcomp (preg, pattern, cflags)
- regex_t *preg;
- const char *pattern;
- int cflags;
-{
- char *return_value;
-
- int syntax = cflags & REG_EXTENDED
- ? RE_SYNTAX_POSIX_EXTENDED
- : RE_SYNTAX_POSIX_BASIC;
-
- if (cflags & REG_NEWLINE)
- {
- syntax &= ~RE_DOT_NEWLINE;
- syntax |= RE_HAT_LISTS_NOT_NEWLINE;
- syntax &= ~RE_NO_ANCHOR_AT_NEWLINE;
- }
-
- if (cflags & REG_ICASE)
- {
- unsigned i;
-
- preg->translate = (char *) malloc (CHAR_SET_SIZE);
-
- if (preg->translate == NULL)
- return REG_ESPACE;
-
- /* Map any uppercase characters into corresponding lowercase ones. */
- for (i = 0; i < CHAR_SET_SIZE; i++)
- preg->translate[i] = isupper (i) ? tolower (i) : i;
- }
- else
- preg->translate = 0;
-
- preg->no_sub = cflags & REG_NOSUB;
-
- return_value = regex_compile (pattern, strlen (pattern), syntax, preg);
-
-
- if (return_value == 0)
- return 0;
- else if (strcmp (return_value, "Invalid regular expression") == 0)
- return REG_BADPAT;
- else if (strcmp (return_value, "Invalid character class name") == 0)
- return REG_ECTYPE;
- else if (strcmp (return_value, "Trailing backslash") == 0)
- return REG_EESCAPE;
- else if (strcmp (return_value, "Invalid back reference") == 0)
- return REG_ESUBREG;
- else if (strcmp (return_value, "Unmatched [ or [^") == 0)
- return REG_EBRACK;
- else if (strcmp (return_value, "Unmatched ( or \\(") == 0
- || strcmp (return_value, "Unmatched ) or \\)") == 0)
- return REG_EPAREN;
- else if (strcmp (return_value, "Unmatched \\{") == 0)
- return REG_EBRACE;
- else if (strcmp (return_value, "Invalid content of \\{\\}") == 0)
- return REG_BADBR;
- else if (strcmp (return_value, "Invalid range end") == 0)
- return REG_ERANGE;
- else if (strcmp (return_value, "Memory exhausted") == 0)
- return REG_ESPACE;
- else if (strcmp (return_value, "Invalid preceding regular expression") == 0
- || strcmp (return_value,
- "Missing preceding regular expression") == 0)
- return REG_BADRPT;
-
- /* Codes added by GNU. */
-
- else if (strcmp (return_value, "Premature end of regular expression") == 0)
- return REG_EEND;
- else if (strcmp (return_value, "Regular expression too big") == 0)
- return REG_ESIZE;
-else
- return REG_BADPAT;
-}
-
-
-/* regexex matches a buffer full of byte commands for matching (gotten
- from compiling a regular expression) and matches it against a string.
-
- PREG is a regex_t * whose pertinent fields are mentioned below:
-
- It has a char * field called BUFFER which points to
- the byte commands which make up the compiled pattern.
-
- Its char * field TRANSLATE, if not 0, translates all
- ordinary elements in the compiled pattern.
-
- Its int field SYNTAX is the syntax with which the pattern
- was compiled and hence should be matched with.
-
- The long field USED is how many bytes long the compiled
- pattern is.
-
- Its size_t field RE_NSUB contains how many subexpressions
- the pattern has. (This may be useful for choosing a value
- for NMATCH).
-
- If its unsigned NO_SUB bit is set, then regexec will not
- return anything in PMATCH, but only report whether or not
- BUFFER matched STRING.
-
- Regardless of how its unsigned RETURN_DEFAULT_NUM_REGS bit
- is set, regexec only returns in PMATCH information about
- the whole pattern and NMATCH - 1 of its subexpressions.
-
- STRING is the address of the string to be matched.
-
- NMATCH is how many elements of PMATCH regex should fill.
-
- PMATCH is an array of struct regex_t's. If PREG's NO_SUB field
- isn't set, then regexec records in PMATCH[i], for i = 1 to
- PMATCH - 1, which substring of STRING matched the i-th
- subexpression of the regular expression compiled in BUFFER;
- it records in PMATCH[0] that information about all of
- STRING. See the comment for `typedef struct...regmatch_t'
- in regex.h for more details.
-
- The caller must allocate PMATCH to have at least NMATCH
- elements.
-
- EFLAGS is two bits OR-ed together which affect execution. If the
- bit REG_NOTBOL is set, then STRING's first character is not
- the beginning of a line; that means any beginning-of-line
- byte command in BUFFER won't match that first character.
- If the bit REG_NOTEOL is set, then a similar things holds
- for STRING's last character: it isn't the end of a line and
- any end-of-line byte command in BUFFER won't match it.
-
-
- It returns 0 if it matches and REG_NOMATCH if it doesn't. */
-
-int
-regexec (preg, string, nmatch, pmatch, eflags)
- const regex_t *preg;
- const char *string;
- size_t nmatch;
- regmatch_t pmatch[];
- int eflags;
+ char *s;
{
- int return_value;
- unsigned this_op;
- struct re_registers regs;
- regex_t private_preg;
-
- private_preg = *preg;
- private_preg.not_bol = eflags & REG_NOTBOL;
- private_preg.not_eol = eflags & REG_NOTEOL;
-
- private_preg.return_default_num_regs = 0;
-
- if (!private_preg.no_sub && nmatch > 0)
- {
- regs.num_regs = nmatch;
- regs.start = malloc (nmatch * sizeof (int));
- regs.end = malloc (nmatch * sizeof (int));
- }
- else
- {
- regs.num_regs = 0;
- regs.start = NULL;
- regs.end = NULL;
- }
-
- return_value = re_match (&private_preg, string, strlen (string), 0,
- !private_preg.no_sub && nmatch > 0 ? &regs : 0);
-
- if (return_value == strlen (string))
- {
- if (!private_preg.no_sub && nmatch > 0)
- {
- unsigned this_reg;
-
- for (this_reg = 0; this_reg < nmatch; this_reg++)
- {
- pmatch[this_reg].rm_so = regs.start[this_reg];
- pmatch[this_reg].rm_eo = regs.end[this_reg];
- }
- }
- }
- if (regs.start != NULL)
- free (regs.start);
-
- if (regs.end != NULL)
- free (regs.end);
-
- return return_value == strlen (string) ? 0 : REG_NOMATCH;
+ int len = strlen (s);
+ return 0 <= re_search (&re_comp_buf, s, len, 0, len,
+ (struct re_registers *) 0);
}
-
-
-/* Puts the first BUFFER_SIZE - 1 characters in BUFFER (if BUFFER isn't null)
- and terminates it with a null.
-
- Returns one more than the size of MESSAGE. */
-
-static size_t
-put_in_buffer (message, buffer, buffer_size)
- char *message;
- char *buffer;
- size_t buffer_size;
-{
- unsigned this_char;
-
- if (buffer != NULL && buffer_size > 0)
- {
- strncpy (buffer, message, buffer_size - 1);
- buffer[buffer_size - 1] = 0;
- }
-
- return strlen (message) + 1;
-}
-
-
-/* Returns a message corresponding to an error code, ERRCODE, returned
- from either regcomp or regexec. */
-
-size_t
-re_gerror (errcode, preg, errbuf, errbuf_size)
- int errcode;
- const regex_t *preg;
- char *errbuf;
- size_t errbuf_size;
-{
- switch (errcode)
- {
- case REG_NOERROR:
- return put_in_buffer ("Regex message: no error.", errbuf, errbuf_size);
-
- case REG_NOMATCH:
- return put_in_buffer ("Regex error: regexec didn't find a match.",
- errbuf, errbuf_size);
- case REG_BADPAT:
- return put_in_buffer ("Regex error: Invalid regular expression.",
- errbuf, errbuf_size);
- case REG_ECOLLATE:
- return put_in_buffer ("Regex error: (not implemented) Invalid \
-collating character.", errbuf, errbuf_size);
-
- case REG_ECTYPE:
- return put_in_buffer ("Regex error: Invalid character class name.",
- errbuf, errbuf_size);
- case REG_EESCAPE:
- return put_in_buffer ("Regex error: Trailing backslash.",
- errbuf, errbuf_size);
- case REG_ESUBREG:
- return put_in_buffer("Regex error: Invalid back reference.",
- errbuf, errbuf_size);
- case REG_EBRACK:
- return put_in_buffer ("Regex error: Unmatched [ or [^.",
- errbuf, errbuf_size);
- case REG_EPAREN:
- return put_in_buffer ("Regex error: Unmatched parenthesis.",
- errbuf, errbuf_size);
- case REG_EBRACE:
- return put_in_buffer ("Regex error: Unmatched \\{.",
- errbuf, errbuf_size);
- case REG_BADBR:
- return put_in_buffer ("Regex error: Invalid content of \\{\\}.",
- errbuf, errbuf_size);
- case REG_ERANGE:
- return put_in_buffer ("Regex error: Invalid range end.",
- errbuf, errbuf_size);
- case REG_ESPACE:
- return put_in_buffer ("Regex error: Ran out of memory.",
- errbuf, errbuf_size);
- case REG_BADRPT:
- return put_in_buffer ("Regex error: Preceding regular expression \
-either missing or not simple.", errbuf, errbuf_size);
-
- case REG_EEND:
- return put_in_buffer ("Regex error: Regular expression ended \
-prematurely.", errbuf, errbuf_size);
-
- case REG_ESIZE:
- return put_in_buffer ("Regex error: Excessively large regular \
-expression.", errbuf, errbuf_size);
- }
-}
-
-
-void
-re_gfree (preg)
- regex_t *preg;
-{
- if (preg->buffer != NULL)
- free (preg->buffer);
- preg->buffer = NULL;
-
- preg->allocated = 0;
- preg->used = 0;
-
- if (preg->fastmap != NULL)
- free (preg->fastmap);
- preg->fastmap = NULL;
-
- preg->fastmap_accurate = 0;
-
- if (preg->translate != NULL)
- free (preg->translate);
- preg->translate = NULL;
-}
-
-#endif /* not emacs */
-
+#endif /* not emacs && not GAWK */
#ifdef test
+#ifdef atarist
+long _stksize = 2L; /* reserve memory for stack */
+#endif
#include <stdio.h>
/* Indexed by a character, gives the upper case equivalent of the
@@ -5451,25 +2728,25 @@ char upcase[0400] =
#include "tests.h"
-typedef enum {extended_test, basic_test, other_test, interface_test} test_type;
+typedef enum { extended_test, basic_test } test_type;
/* Use this to run the tests we've thought of. */
void
main ()
{
- test_type t = interface_test;
-
+ test_type t = extended_test;
+
if (t == basic_test)
- test_posix_basic ();
+ {
+ printf ("Running basic tests:\n\n");
+ test_posix_basic ();
+ }
else if (t == extended_test)
- test_posix_extended ();
- else if (t == other_test)
- test_others ();
- else if (t == interface_test)
- test_posix_c_interface ();
-
- exit (0);
+ {
+ printf ("Running extended tests:\n\n");
+ test_posix_extended ();
+ }
}
#else /* not canned */
@@ -5489,7 +2766,7 @@ main (argc, argv)
/* Allow a command argument to specify the style of syntax. */
if (argc > 1)
- re_set_syntax (atoi (argv[1]));
+ obscure_syntax = atol (argv[1]);
buf.allocated = 40;
buf.buffer = (char *) malloc (buf.allocated);
@@ -5528,7 +2805,7 @@ main (argc, argv)
#endif
-#if 0
+#ifdef NOTDEF
print_buf (bufp)
struct re_pattern_buffer *bufp;
{
@@ -5552,8 +2829,7 @@ print_buf (bufp)
printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
}
-#endif /* 0 */
-
+#endif /* NOTDEF */
printchar (c)
char c;
@@ -5576,13 +2852,3 @@ error (string)
exit (1);
}
#endif /* test */
-
-
-
-/*
-Local variables:
-make-backup-files: t
-version-control: t
-trim-versions-without-asking: nil
-End:
-*/
diff --git a/regex.h b/regex.h
index 6f735156..e0977bcb 100644
--- a/regex.h
+++ b/regex.h
@@ -1,11 +1,10 @@
/* Definitions for data structures callers pass the regex library.
- Requires sys/types.h for size_t.
- Version 0.1.
- Copyright (C) 1985, 89, 90, 91 Free Software Foundation, Inc.
+
+ Copyright (C) 1985, 1989-90 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
+ the Free Software Foundation; either version 1, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
@@ -21,554 +20,233 @@
#ifndef __REGEXP_LIBRARY
#define __REGEXP_LIBRARY
+/* Define number of parens for which we record the beginnings and ends.
+ This affects how much space the `struct re_registers' type takes up. */
+#ifndef RE_NREGS
+#define RE_NREGS 10
+#endif
+
+#define BYTEWIDTH 8
+
+
+/* Maximum number of duplicates an interval can allow. */
+#define RE_DUP_MAX ((1 << 15) - 1)
-/* This defines the particular regexp syntax to use. */
-extern int obscure_syntax;
+/* This defines the various regexp syntaxes. */
+extern long obscure_syntax;
/* The following bits are used in the obscure_syntax variable to choose among
alternative regexp syntaxes. */
-/* If this bit is set, (...) defines a group, and \( and \) are literals.
- If not set, \(...\) defines a group, and ( and ) are literals. */
-#define RE_NO_BK_PARENS 1
+/* If this bit is set, plain parentheses serve as grouping, and backslash
+ parentheses are needed for literal searching.
+ If not set, backslash-parentheses are grouping, and plain parentheses
+ are for literal searching. */
+#define RE_NO_BK_PARENS 1L
-/* If this bit is set, then | is an alternation operator, and \| is literal.
- If not set, then \| is an alternation operator, and | is literal. */
-#define RE_NO_BK_VBAR (1 << 1)
+/* If this bit is set, plain | serves as the `or'-operator, and \| is a
+ literal.
+ If not set, \| serves as the `or'-operator, and | is a literal. */
+#define RE_NO_BK_VBAR (1L << 1)
-/* If this bit is not set, then + and ? are operators, and \+ and \? are
- literals.
- If set, then \+ and \? are operators and + and ? are literals. */
-#define RE_BK_PLUS_QM (1 << 2)
-
-/* If this bit is set, then | binds tighter than ^ or $.
+/* If this bit is not set, plain + or ? serves as an operator, and \+, \? are
+ literals.
+ If set, \+, \? are operators and plain +, ? are literals. */
+#define RE_BK_PLUS_QM (1L << 2)
+
+/* If this bit is set, | binds tighter than ^ or $.
If not set, the contrary. */
-#define RE_TIGHT_ALT (1 << 3)
-
-/* If this bit is set, newline is an alternation operator.
- If not set, then newline is literal. */
-#define RE_NEWLINE_ALT (1 << 4)
-
-/* If this bit is set, then special characters are always special
- regardless of where they are in the pattern.
- If this bit is not set, then special characters are special only in
- some contexts; otherwise they are ordinary. Specifically,
-
- * + ? and intervals are only special when not after the beginning,
- open-group, or alternation operator. */
-#define RE_CONTEXT_INDEP_OPS (1 << 5)
-
-/* If this bit is not set, then \ inside a bracket expression is literal.
- If set, then such a \ quotes the following character. */
-#define RE_AWK_CLASS_HACK (1 << 6)
-
-/* If this bit is set, then either \{...\} or {...} defines an
- interval, depending on RE_NO_BK_BRACES.
- If not set, then \{, \}, {, and } are literals. */
-#define RE_INTERVALS (1 << 7)
-
-/* If this bit is not set, then \{ and \} defines an interval,
- and { and } are literals.
- If set, then { and } defines an interval, and \{ and \} are literals. */
-#define RE_NO_BK_BRACES (1 << 8)
-
-/* If this bit is set, then character classes are supported. They are:
+#define RE_TIGHT_VBAR (1L << 3)
+
+/* If this bit is set, then treat newline as an OR operator.
+ If not set, treat it as a normal character. */
+#define RE_NEWLINE_OR (1L << 4)
+
+/* If this bit is set, then special characters may act as normal
+ characters in some contexts. Specifically, this applies to:
+ ^ -- only special at the beginning, or after ( or |;
+ $ -- only special at the end, or before ) or |;
+ *, +, ? -- only special when not after the beginning, (, or |.
+ If this bit is not set, special characters (such as *, ^, and $)
+ always have their special meaning regardless of the surrounding
+ context. */
+#define RE_CONTEXT_INDEP_OPS (1L << 5)
+
+/* If this bit is not set, then \ before anything inside [ and ] is taken as
+ a real \.
+ If set, then such a \ escapes the following character. This is a
+ special case for awk. */
+#define RE_AWK_CLASS_HACK (1L << 6)
+
+/* If this bit is set, then \{ and \} or { and } serve as interval operators.
+ If not set, then \{ and \} and { and } are treated as literals. */
+#define RE_INTERVALS (1L << 7)
+
+/* If this bit is not set, then \{ and \} serve as interval operators and
+ { and } are literals.
+ If set, then { and } serve as interval operators and \{ and \} are
+ literals. */
+#define RE_NO_BK_CURLY_BRACES (1L << 8)
+
+/* If this bit is set, then character classes are supported; they are:
[:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
[:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
If not set, then character classes are not supported. */
-#define RE_CHAR_CLASSES (1 << 9)
+#define RE_CHAR_CLASSES (1L << 9)
-/* If this bit is set, then period doesn't match a null.
- If not set, then it does. */
-#define RE_DOT_NOT_NULL (1 << 10)
+/* If this bit is set, then the dot re doesn't match a null byte.
+ If not set, it does. */
+#define RE_DOT_NOT_NULL (1L << 10)
/* If this bit is set, then [^...] doesn't match a newline.
- If not set, then it does. */
-#define RE_HAT_LISTS_NOT_NEWLINE (1 << 11)
-
-/* If this bit is set, then back references are not recognized.
- If not set, then they are. */
-#define RE_NO_BK_REFS (1 << 12)
-
-/* If this bit is set, then all back references must refer to a preceding
- subexpression.
- If not set, then a back reference to a nonexistent subexpression is
- treated as literal characters. */
-#define RE_NO_MISSING_BK_REF (1 << 13)
-
-/* If this bit is set, then *, +, ?, and { cannot be first in an re or
- immediately after OR or BEGINGROUP. Furthermore, OR cannot be
- first or last in an re, or immediately follow another OR or
- BEGINGROUP. Also, ^ cannot appear in a nonleading position and $
- cannot appear in a nontrailing position (outside of bracket
- expressions, that is). */
-#define RE_CONTEXT_INVALID_OPS (1 << 14)
-
-/* If this bit is set, then +, ? and | aren't recognized as operators.
- If not set, then they are. */
-#define RE_LIMITED_OPS (1 << 15)
-
-/* If this bit is set, then an ending range point has to collate higher
- than or equal to the starting range point.
- If not set, then when the ending range point collates higher than the
- starting range point, the range is considered to be empty. */
-#define RE_NO_EMPTY_RANGES (1 << 16)
-
-/* If this bit is set, then neither the match-beginning-of-line nor
- the match-end-of-line operator match a newline.
- If not set, then these operators can match a newline. */
-#define RE_NO_ANCHOR_AT_NEWLINE (1 << 17)
-
-/* If this bit is set, then you can't have empty groups.
- If not set, then you can. */
-#define RE_NO_EMPTY_GROUPS (1 << 18)
-
-/* If this bit is set, then you can't have empty alternatives.
- If not set, then you can. */
-#define RE_NO_EMPTY_ALTS (1 << 19)
-
-/* If this bit is set, then you can't have more than one non-interval
- repetition operators (i.e., `*', `+' and `?') in a row, e.g., as in
- `a*+?*'.
- If not set, then you can. */
-#define RE_NO_CONSECUTIVE_REPEATS (1 << 20)
-
-
-/* If this bit is set, then ignore anchors inside groups which in turn
- are operated on by repetion operators.
- If not set, then don't. */
-#define RE_REPEATED_ANCHORS_AWAY (1 << 21)
-
-/* If this bit is set, then the match-any-character operator (.) matches
- a newline.
- If not set, then it doesn't. */
-#define RE_DOT_NEWLINE (1 << 22)
-
-/* If this bit is set, then '^' and '$' can be anchors only at the
- beginning or the end of the pattern.
- If not set, then they don't have to be at the beginning or end of the
- pattern to be anchors. */
-#define RE_ANCHORS_ONLY_AT_ENDS (1 << 23)
-
-/* If this bit is set, then Regex considers an unmatched close-group
- operator to be the ordinary character parenthesis.
- If not set, then an unmatched close-group operator is invalid. */
-#define RE_UNMATCHED_RIGHT_PAREN_ORD (1 << 24)
-
-/* If this bit is set, then ^ cannot appear in a nonleading position and
- $ cannot appear in a nontrailing position (outside of bracket
- expressions, that is). */
-#define RE_CONTEXT_INVALID_ANCHORS (1 << 25)
-
-/* If this bit is set, then ^ and $ are always anchors, regardless of
- their positions in a regular expression.
- If this bit is not set, then ^ is an anchor only if in a leading
- position and $ is one only if in a trailing position. Specifically,
-
- ^ is in a leading position if at the beginning of a regular
- expression , or after an open-group or an alternation operator;
-
- $ is in a trailing position if at the end of a regular
- expression, or before close-group or an alternation operator.
-*/
-#define RE_CONTEXT_INDEP_ANCHORS (1 << 26)
-
-/* If this bit is set, then the searching and matching routines will
- allocate enough register space to accommodate the number of groups
- in the regular expression.
- If this bit is not set, then the user must allocate the space. */
-#define RE_ALLOCATE_REGISTERS (1 << 27)
-
-
-/* Define combinations of the above bits for the standard possibilities. */
-#define RE_SYNTAX_EMACS 0
-
-#define RE_SYNTAX_AWK \
- (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_ALLOCATE_REGISTERS \
- | RE_AWK_CLASS_HACK)
-
-#define RE_SYNTAX_POSIX_AWK RE_SYNTAX_AWK
-
-#define RE_SYNTAX_GREP \
- (RE_BK_PLUS_QM | RE_NEWLINE_ALT | RE_ALLOCATE_REGISTERS)
-
-#define RE_SYNTAX_EGREP \
- (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_CONTEXT_INDEP_OPS \
- | RE_CONTEXT_INDEP_ANCHORS | RE_NEWLINE_ALT | RE_ALLOCATE_REGISTERS)
-
-#define RE_SYNTAX_POSIX_BASIC \
- (RE_INTERVALS | RE_CHAR_CLASSES | RE_DOT_NOT_NULL \
- | RE_NO_MISSING_BK_REF | RE_LIMITED_OPS | RE_NO_EMPTY_RANGES \
- | RE_NO_ANCHOR_AT_NEWLINE | RE_DOT_NEWLINE | RE_ALLOCATE_REGISTERS)
-
-#define RE_SYNTAX_POSIX_EXTENDED \
- (RE_INTERVALS | RE_NO_BK_BRACES | RE_NO_BK_VBAR \
- | RE_NO_BK_PARENS | RE_CHAR_CLASSES | RE_CONTEXT_INVALID_OPS \
- | RE_NO_BK_REFS | RE_NO_EMPTY_RANGES | RE_UNMATCHED_RIGHT_PAREN_ORD \
- | RE_DOT_NOT_NULL | RE_NO_EMPTY_GROUPS | RE_NO_EMPTY_ALTS \
- | RE_NO_ANCHOR_AT_NEWLINE | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS\
- | RE_ALLOCATE_REGISTERS)
-
-
-
-/* Maximum number of duplicates an interval can allow. */
-#define RE_DUP_MAX ((1 << 15) - 1)
-
-
-/* POSIX cflags bits (i.e., information for regcomp). */
-
-/* If this bit is set, then use extended regular expression syntax.
- If not set, then use basic regular expression syntax. */
-#define REG_EXTENDED 1
-
-/* If this bit is set, then (line 526, p.687 of POSIX 1003.2/D10)
- newline loses its special significance; i.e., anchors do not match at
- newlines in the string.
- If not set, then anchors do match at newlines. */
-#define REG_NEWLINE (1 << 1)
+ If not set, it does. */
+#define RE_HAT_NOT_NEWLINE (1L << 11)
-/* If this bit is set, then ignore case when matching.
- If not set, then case is significant. */
-#define REG_ICASE (1 << 2)
-
-/* If this bit is set, then report only success or fail in regexec ().
- If not set, then return nonzero indicating either not match or an error. */
-#define REG_NOSUB (1 << 3)
+/* If this bit is set, back references are recognized.
+ If not set, they aren't. */
+#define RE_NO_BK_REFS (1L << 12)
+/* If this bit is set, back references must refer to a preceding
+ subexpression. If not set, a back reference to a nonexistent
+ subexpression is treated as literal characters. */
+#define RE_NO_EMPTY_BK_REF (1L << 13)
-/* POSIX eflags bits (i.e., information for regexec). */
+/* If this bit is set, bracket expressions can't be empty.
+ If it is set, they can be empty. */
+#define RE_NO_EMPTY_BRACKETS (1L << 14)
-/* If this bit is set, then the string's first character is not the
- beginning of a line, so the beginning-of-line anchor shouldn't
- match it.
- If not set, then the string's first character can match the
- beginning-of-line anchor. */
-#define REG_NOTBOL 1
+/* If this bit is set, then *, +, ? and { cannot be first in an re or
+ immediately after a |, or a (. Furthermore, a | cannot be first or
+ last in an re, or immediately follow another | or a (. Also, a ^
+ cannot appear in a nonleading position and a $ cannot appear in a
+ nontrailing position (outside of bracket expressions, that is). */
+#define RE_CONTEXTUAL_INVALID_OPS (1L << 15)
-/* If this bit is set, then the string's last character is not the
- end of a line, so the end-of-line anchor shouldn't match it.
- If not set, then the string's last character can match the
- end-of-line anchor. */
-#define REG_NOTEOL (1 << 1)
-
-
-/* POSIX regexec return error value. */
-
-#define REG_NOMATCH 1 /* Didn't find a match. */
-
-/* POSIX regcomp return error codes. */
-
-#define REG_BADPAT 2 /* Found an invalid pattern. */
-#define REG_ECOLLATE 3 /* Not implemented. */
-#define REG_ECTYPE 4 /* Found an invalid character class name. */
-#define REG_EESCAPE 5 /* Found a trailing backslash. */
-#define REG_ESUBREG 6 /* Found an invalid back reference. */
-#define REG_EBRACK 7 /* Found an unmatched left bracket. */
-#define REG_EPAREN 8 /* Found a parentheses imbalance. */
-#define REG_EBRACE 9 /* Found an unmatched \{. */
-#define REG_BADBR 10 /* Found invalid contents of \{\}. */
-#define REG_ERANGE 11 /* Found invalid range end. */
-#define REG_ESPACE 12 /* Ran out of memory. */
-#define REG_BADRPT 13 /* No preceding re for repetition op. */
-#define REG_ENEWLINE 14 /* Not implemented. */
-
-/* Some regcomp codes we've added. */
-#define REG_NOERROR 0 /* No error. */
-#define REG_EEND 15
-#define REG_ESIZE 16
-
-
-
-
-/* This data structure represents a compiled pattern. Before calling
- the pattern compiler, the fields `buffer', `allocated', `fastmap',
- `translate', and `no_sub' can be set. After the pattern has been
- compiled, the `re_nsub' field is available. All other fields are
- private to the regex routines. */
+/* If this bit is set, then +, ? and | aren't recognized as operators.
+ If it's not, they are. */
+#define RE_LIMITED_OPS (1L << 16)
-/* If this changes, change documentation in regex.texinfo. */
+/* If this bit is set, then an ending range point has to collate higher
+ or equal to the starting range point.
+ If it's not set, then when the ending range point collates higher
+ than the starting range point, the range is just considered empty. */
+#define RE_NO_EMPTY_RANGES (1L << 17)
+
+/* If this bit is set, then a hyphen (-) can't be an ending range point.
+ If it isn't, then it can. */
+#define RE_NO_HYPHEN_RANGE_END (1L << 18)
+
+
+/* Define combinations of bits for the standard possibilities. */
+#define RE_SYNTAX_POSIX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR \
+ | RE_CONTEXT_INDEP_OPS)
+#define RE_SYNTAX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_AWK_CLASS_HACK)
+#define RE_SYNTAX_EGREP (RE_NO_BK_PARENS | RE_NO_BK_VBAR \
+ | RE_CONTEXT_INDEP_OPS | RE_NEWLINE_OR)
+#define RE_SYNTAX_GREP (RE_BK_PLUS_QM | RE_NEWLINE_OR)
+#define RE_SYNTAX_EMACS 0
+#define RE_SYNTAX_POSIX_BASIC (RE_INTERVALS | RE_BK_PLUS_QM \
+ | RE_CHAR_CLASSES | RE_DOT_NOT_NULL \
+ | RE_HAT_NOT_NEWLINE | RE_NO_EMPTY_BK_REF \
+ | RE_NO_EMPTY_BRACKETS | RE_LIMITED_OPS \
+ | RE_NO_EMPTY_RANGES | RE_NO_HYPHEN_RANGE_END)
+
+#define RE_SYNTAX_POSIX_EXTENDED (RE_INTERVALS | RE_NO_BK_CURLY_BRACES \
+ | RE_NO_BK_VBAR | RE_NO_BK_PARENS \
+ | RE_HAT_NOT_NEWLINE | RE_CHAR_CLASSES \
+ | RE_NO_EMPTY_BRACKETS | RE_CONTEXTUAL_INVALID_OPS \
+ | RE_NO_BK_REFS | RE_NO_EMPTY_RANGES \
+ | RE_NO_HYPHEN_RANGE_END)
+
+
+/* This data structure is used to represent a compiled pattern. */
struct re_pattern_buffer
-{
- /* Space that holds the compiled pattern. */
- char *buffer;
-
- /* Number of bytes to which `buffer' points. */
- long allocated;
-
- /* Number of bytes actually used in `buffer'. */
- long used;
-
- /* Syntax setting with which the pattern was compiled. */
- int syntax;
-
- /* Pointer to a fastmap, if any, otherwise zero. re_search uses
- the fastmap, if there is one, to skip over impossible
- starting points for matches. */
- char *fastmap;
-
- /* Either a translate table to apply to all characters before
- comparing them, or zero for no translation. The translation
- is applied to a pattern when it is compiled and to a string
- when it is matched. */
- char *translate;
-
- /* Number of subexpressions found by the compiler. */
- size_t re_nsub;
-
- /* Set to 1 by re_compile_fastmap if this pattern can match the
- null string; 0 prevents the searcher from matching it with
- the null string. Set to 2 if it might match the null string
- either at the end of a search range or just before a
- character listed in the fastmap. */
- char can_be_null;
-
-
- /* The remaining fields are all one-bit booleans. */
-
- /* Set to zero when regex_compile compiles a pattern; set to one
- by re_compile_fastmap when it updates the fastmap, if any. */
- unsigned fastmap_accurate : 1;
-
- /* If set, regexec reports only success or failure and does not
- return anything in pmatch[]. */
- unsigned no_sub : 1;
-
- /* If set, a beginning-of-line anchor never matches. */
- unsigned not_bol : 1;
-
- /* Similarly for an end-of-line anchor. */
- unsigned not_eol : 1;
-
- /* If set, and the regs argument is nonzero, the GNU
- matching and searching functions return information
- for as many registers as needed to report about the
- whole pattern and all its subexpressions. If not set,
- and the regs argument is nonzero, then the functions
- return information for regs->num_regs registers. */
- unsigned return_default_num_regs : 1;
-};
-
-typedef struct re_pattern_buffer regex_t;
-
-
-/* search.c (search_buffer) in Emacs needs this one value. It is
- defined both in `regex.c' and here. */
+ {
+ char *buffer; /* Space holding the compiled pattern commands. */
+ long allocated; /* Size of space that `buffer' points to. */
+ long used; /* Length of portion of buffer actually occupied */
+ char *fastmap; /* Pointer to fastmap, if any, or zero if none. */
+ /* re_search uses the fastmap, if there is one,
+ to skip over totally implausible characters. */
+ char *translate; /* Translate table to apply to all characters before
+ comparing, or zero for no translation.
+ The translation is applied to a pattern when it is
+ compiled and to data when it is matched. */
+ char fastmap_accurate;
+ /* Set to zero when a new pattern is stored,
+ set to one when the fastmap is updated from it. */
+ char can_be_null; /* Set to one by compiling fastmap
+ if this pattern might match the null string.
+ It does not necessarily match the null string
+ in that case, but if this is zero, it cannot.
+ 2 as value means can match null string
+ but at end of range or before a character
+ listed in the fastmap. */
+ };
+
+
+/* search.c (search_buffer) needs this one value. It is defined both in
+ regex.c and here. */
#define RE_EXACTN_VALUE 1
-
-/* struct re_registers: Structure to store register contents data in.
-
- (If change comments here, change in regex.texinfo also.)
-
- Some groups in a regular expression match (possibly empty) substrings
- of the string that regular expression matched. The matcher remembers
- the beginning and ending point of the substring matched by each
- group. To get what they matched, pass the address of a structure of
- this type to a GNU matching or searching function.
-
- When you call a GNU matching and searching function, it stores
- information into this structure according to the following (in all
- examples below, `(' represents the open-group and `)' the
- close-group operator):
-
- If the regular expression has an i-th group that matches a substring
- of string, then the function sets REGS->start[i] to the index in
- string where the substring matched by the i-th group begins, and
- REGS->end[i] to the index just beyond that substring's end. The
- function sets REGS->start[0] and REGS->end[0] to analogous
- information about the entire pattern.
-
- For example, when you match the pattern `((a)(b))' with the string
- `ab', you get:
-
- 0 in REGS->start[0] and 2 in REGS->end[0]
- 0 in REGS->start[1] and 2 in REGS->end[1]
- 0 in REGS->start[2] and 1 in REGS->end[2]
- 1 in REGS->start[3] and 2 in REGS->end[3]
-
- If a group matches more than once (as it might if followed by, e.g.,
- a repetition operator), then the function reports the information
- about what the group @emph{last matched.
-
- For example, when you match the string `aa' with the pattern `(a)*',
- you get:
-
- 0 in REGS->start[0] and 2 in REGS->end[0]
- 1 in REGS->start[1] and 2 in REGS->end[1]
-
-
- If the i-th group does not participate in a successful match, e.g.,
- it is an alternative not taken or a repetition operator allows zero
- repetitions of it, then the function sets REGS->start[i] and
- REGS->end[i] to -1.
-
- For example, when you match the string `b' with the pattern `(a)*b',
- you get:
-
- 0 in REGS->start[0] and 1 in REGS->end[0]
- -1 in REGS->start[1] and -1 in REGS->end[1]
+/* Structure to store register contents data in.
+ Pass the address of such a structure as an argument to re_match, etc.,
+ if you want this information back.
- If the i-th group matches a zero-length string, then the function
- sets REGS->start[i] and REGS->end[i] to the index just beyond that
- zero-length string.
-
- For example, when you match the string `b' with the pattern `(a*)b',
- you get:
-
- 0 in REGS->start[0] and 1 in REGS->end[0]
- 0 in REGS->start[1] and 0 in REGS->end[1]
-
- The function sets REGS->start[0] and REGS->end[0] to analogous
- information about the entire pattern.
-
- For example, when you match the empty string with the pattern `(a*)',
- you get:
-
- 0 in REGS->start[0] and 0 in REGS->end[0]
- 0 in REGS->start[1] and 0 in REGS->end[1]
-
- If an i-th group contains a j-th group and the function reports a
- match of the i-th group, then it records in REGS->start[j] and
- REGS->end[j] the last match (if it matched) of the j-th group.
-
- For example, when you match the string `abb' with the pattern
- `((a*)b)*, group 2' last matches the empty string, so you get:
-
- 0 in REGS->start[0] and 3 in REGS->end[0]
- 2 in REGS->start[1] and 3 in REGS->end[1]
- 2 in REGS->start[2] and 2 in REGS->end[2]
-
- When you match the string `abb' with the pattern `((a)*b)*', group 2
- doesn't participate in the last match, so you get:
-
- 0 in REGS->start[0] and 3 in REGS->end[0]
- 2 in REGS->start[1] and 3 in REGS->end[1]
- 0 in REGS->start[2] and 1 in REGS->end[2]
-
- If an i-th group contains a j-th group and the function sets
- REGS->start[i] and REGS->end[i] to -1, then it also sets REGS->start[j]
- and REGS->end[j] to -1.
-
- For example, when you match the string `c' with the pattern
- `((a)*b)*c', you get:
-
- 0 in REGS->start[0] and 1 in REGS->end[0]
- -1 in REGS->start[1] and -1 in REGS->end[1]
- -1 in REGS->start[2] and -1 in REGS->end[2]
-*/
+ For i from 1 to RE_NREGS - 1, start[i] records the starting index in
+ the string of where the ith subexpression matched, and end[i] records
+ one after the ending index. start[0] and end[0] are analogous, for
+ the entire pattern. */
struct re_registers
-{
- unsigned num_regs;
- int *start;
- int *end;
-};
-
-
-/* POSIX specification for registers. See comments for struct
- re_registers for how this is used and read `POSIX' for `GNU',
- `PMATCH' for `REGS', `PMATCH[i].rm_so' for `REGS->start' and
- `PMATCH[i].rm_eo' for `REGS->end'. */
-
-typedef off_t regoff_t;
-
-typedef struct
-{
- regoff_t rm_so; /* Byte offset from string's start to substring' start. */
- regoff_t rm_eo; /* Byte offset from string's end to substring' end. */
-} regmatch_t;
+ {
+ int start[RE_NREGS];
+ int end[RE_NREGS];
+ };
#ifdef __STDC__
-/* Compile the regular expression PATTERN, with length LENGTH
- and syntax given by the global `obscure_syntax', into the buffer
- BUFFER. Return NULL if successful, and an error string if not. */
-
-extern char *re_compile_pattern (const char *pattern, const int length,
- struct re_pattern_buffer *buffer);
-
-
-/* Compile a fastmap for the compiled pattern in BUFFER; used to
- accelerate searches. Return 0 if successful and -2 if was an
- internal error. */
-
-extern int re_compile_fastmap (struct re_pattern_buffer *buffer);
-
-
-/* Search in the string STRING (with length LENGTH) for the pattern
- compiled into BUFFER. Start searching at position START, for RANGE
- characters. Return the starting position of the match or -1 for no
- match, or -2 for an internal error. Also return register
- information in REGS (if REGS is non-null). */
-
-extern int re_search (struct re_pattern_buffer *buffer,
- const char *string, const int length,
- const int start, const int range,
- struct re_registers *regs);
-
-/* Like `re_search', but search in the concatenation of STRING1 and
- STRING2. Also, stop searching at index START + STOP. */
-
-extern int re_search_2 (struct re_pattern_buffer *buffer,
- const char *string1, const int length1,
- const char *string2, const int length2,
- const int start, const int range,
- struct re_registers *regs,
- const int stop);
-
-/* Like `re_search', but return how many characters in STRING the regexp
- in BUFFER matched, starting at position START. */
-
-extern int re_match (const struct re_pattern_buffer *buffer,
- const char *string, const int length,
- const int start, struct re_registers *regs);
-
-
-/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
-
-extern int re_match_2 (const struct re_pattern_buffer *buffer,
- const char *string1, const int length1,
- const char *string2, const int length2,
- const int start,
- struct re_registers *regs,
- const int stop);
-
-
+extern char *re_compile_pattern (char *, size_t, struct re_pattern_buffer *);
+/* Is this really advertised? */
+extern void re_compile_fastmap (struct re_pattern_buffer *);
+extern int re_search (struct re_pattern_buffer *, char*, int, int, int,
+ struct re_registers *);
+extern int re_search_2 (struct re_pattern_buffer *, char *, int,
+ char *, int, int, int,
+ struct re_registers *, int);
+extern int re_match (struct re_pattern_buffer *, char *, int, int,
+ struct re_registers *);
+extern int re_match_2 (struct re_pattern_buffer *, char *, int,
+ char *, int, int, struct re_registers *, int);
+extern long re_set_syntax (long syntax);
+
+#ifndef GAWK
/* 4.2 bsd compatibility. */
-extern char *re_comp (const char *);
-extern int re_exec (const char *);
-
-extern int regcomp (regex_t *preg, const char *pattern, int cflags);
-extern int regexec (const regex_t *preg, const char *string, size_t nmatch,
- regmatch_t pmatch[], int eflags);
-extern size_t re_gerror (int errcode, const regex_t *preg, char* errbuf,
- size_t errbuf_size);
-extern void re_gfree (regex_t *preg);
-
-#else /* not __STDC__ */
+extern char *re_comp (char *);
+extern int re_exec (char *);
+#endif
-/* Support old C compilers. */
-#define const
+#else /* !__STDC__ */
extern char *re_compile_pattern ();
+/* Is this really advertised? */
+extern void re_compile_fastmap ();
extern int re_search (), re_search_2 ();
extern int re_match (), re_match_2 ();
+extern long re_set_syntax();
+#ifndef GAWK
/* 4.2 bsd compatibility. */
extern char *re_comp ();
extern int re_exec ();
-
-extern int regcomp ();
-extern int regexec ();
-extern size_t re_gerror ();
-extern void re_gfree ();
+#endif
#endif /* __STDC__ */
@@ -577,14 +255,4 @@ extern void re_gfree ();
extern char *re_syntax_table;
#endif
-#endif /* not __REGEXP_LIBRARY */
-
-
-
-/*
-Local variables:
-make-backup-files: t
-version-control: t
-trim-versions-without-asking: nil
-End:
-*/
+#endif /* !__REGEXP_LIBRARY */
diff --git a/support/makeinfo.patch b/support/makeinfo.patch
deleted file mode 100644
index 7d2d307f..00000000
--- a/support/makeinfo.patch
+++ /dev/null
@@ -1,233 +0,0 @@
-*** makeinfo.c.dist Thu Sep 26 21:45:04 1991
---- makeinfo.c Thu Oct 24 21:04:19 1991
-***************
-*** 73,78 ****
---- 73,79 ----
- /* Forward declarations. */
- unsigned char *xmalloc (), *xrealloc ();
- extern int in_fixed_width_font;
-+ extern int the_current_enumerate_type;
-
- /* Some systems don't declare this function in pwd.h. */
- struct passwd *getpwnam ();
-***************
-*** 925,931 ****
- extern int executing_string;
- FSTACK *temp = filestack;
-
-! if (!filestack)
- abort (); /* My fault. I wonder what I did? */
-
- /* Make sure that commands with braces have been satisfied. */
---- 926,932 ----
- extern int executing_string;
- FSTACK *temp = filestack;
-
-! if (!filestack && !executing_string)
- abort (); /* My fault. I wonder what I did? */
-
- /* Make sure that commands with braces have been satisfied. */
-***************
-*** 941,947 ****
-
- /* Pop the stack. */
- filestack = filestack->next;
-! free (temp);
- pop_node_filename ();
- }
-
---- 942,949 ----
-
- /* Pop the stack. */
- filestack = filestack->next;
-! if (temp)
-! free (temp);
- pop_node_filename ();
- }
-
-***************
-*** 2586,2593 ****
- case enumerate:
- inhibit_paragraph_indentation = 0;
- current_indent += default_indentation_increment;
-- start_numbering (1);
- filling_enabled = indented_fill = true;
- break;
-
- case alphaenumerate:
---- 2588,2610 ----
- case enumerate:
- inhibit_paragraph_indentation = 0;
- current_indent += default_indentation_increment;
- filling_enabled = indented_fill = true;
-+
-+ if (*(insertion_stack->item_function))
-+ {
-+ if (isalpha (*(insertion_stack->item_function)))
-+ {
-+ start_lettering (*(insertion_stack->item_function));
-+ }
-+ else
-+ {
-+ int n = atoi (insertion_stack->item_function);
-+
-+ start_numbering (n);
-+ }
-+ }
-+ else
-+ start_numbering (1);
- break;
-
- case alphaenumerate:
-***************
-*** 2687,2693 ****
- break;
-
- case enumerate:
-! stop_numbering ();
- current_indent -= default_indentation_increment;
- break;
-
---- 2704,2714 ----
- break;
-
- case enumerate:
-! if (isalpha (the_current_enumerate_type))
-! stop_lettering ();
-! else
-! stop_numbering ();
-!
- current_indent -= default_indentation_increment;
- break;
-
-***************
-*** 2770,2775 ****
---- 2791,2800 ----
- int letter_offset = 0;
- int the_current_letter = 0;
-
-+ int enumerate_type_stack[max_ns];
-+ int enumerate_type_offset = 0;
-+ int the_current_enumerate_type = '1';
-+
- start_numbering (at_number)
- int at_number;
- {
-***************
-*** 2778,2783 ****
---- 2803,2815 ----
- line_error ("Enumeration stack overflow");
- return;
- }
-+ if (enumerate_type_offset + 1 == max_ns)
-+ {
-+ line_error ("Enumeration stack overflow");
-+ return;
-+ }
-+ the_current_enumerate_type =
-+ enumerate_type_stack[enumerate_type_offset++] = '1';
- number_stack[number_offset++] = the_current_number;
- the_current_number = at_number;
- }
-***************
-*** 2787,2792 ****
---- 2819,2831 ----
- the_current_number = number_stack[--number_offset];
- if (number_offset < 0)
- number_offset = 0;
-+ --enumerate_type_offset;
-+ the_current_enumerate_type = enumerate_type_stack[enumerate_type_offset-1];
-+ if (enumerate_type_offset < 0)
-+ {
-+ enumerate_type_offset = 0;
-+ the_current_enumerate_type = '1';
-+ }
- }
-
- start_lettering (at_letter)
-***************
-*** 2797,2802 ****
---- 2836,2848 ----
- line_error ("Alpha-enumeration stack overflow");
- return;
- }
-+ if (enumerate_type_offset + 1 == max_ns)
-+ {
-+ line_error ("Enumeration stack overflow");
-+ return;
-+ }
-+ the_current_enumerate_type =
-+ enumerate_type_stack[enumerate_type_offset++] = 'a';
- letter_stack[letter_offset++] = the_current_letter;
- the_current_letter = at_letter;
- }
-***************
-*** 2806,2811 ****
---- 2852,2864 ----
- the_current_letter = letter_stack[--letter_offset];
- if (letter_offset < 0)
- letter_offset = 0;
-+ --enumerate_type_offset;
-+ the_current_enumerate_type = enumerate_type_stack[enumerate_type_offset-1];
-+ if (enumerate_type_offset < 0)
-+ {
-+ enumerate_type_offset = 0;
-+ the_current_enumerate_type = '1';
-+ }
- }
-
- /* Place a letter into the output stream. */
-***************
-*** 4307,4319 ****
- temp++;
- else
- {
-! if (input_text[temp] == '.' ||
-! input_text[temp] == ',' ||
- input_text[temp] == '\t')
- return;
- else
- {
-! line_error ("Cross-reference must be terminated with a period or a comma");
- return;
- }
- }
---- 4360,4372 ----
- temp++;
- else
- {
-! if (member (input_text[temp], ".,;:") ||
-! (px_ref_flag && input_text[temp] == ')' ) ||
- input_text[temp] == '\t')
- return;
- else
- {
-! line_error ("Cross-reference must be terminated with punctuation");
- return;
- }
- }
-***************
-*** 4729,4736 ****
- output_column++;
- }
- else if (current_insertion_type () == enumerate)
-! number_item ();
-! else
- letter_item ();
-
- /* Special hack. This makes close paragraph ignore you until
---- 4782,4794 ----
- output_column++;
- }
- else if (current_insertion_type () == enumerate)
-! {
-! if (isalpha (the_current_enumerate_type))
-! letter_item ();
-! else
-! number_item ();
-! }
-! else /* alphaenumerate or capsenumerate */
- letter_item ();
-
- /* Special hack. This makes close paragraph ignore you until
-
diff --git a/support/texindex.c b/support/texindex.c
index 0933aa62..02838f17 100644
--- a/support/texindex.c
+++ b/support/texindex.c
@@ -3,7 +3,7 @@
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 1, or (at your option)
+ the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
diff --git a/support/texinfo.tex b/support/texinfo.tex
index 31fc8151..73b5fd21 100644
--- a/support/texinfo.tex
+++ b/support/texinfo.tex
@@ -22,7 +22,7 @@
%You are forbidden to forbid anyone else to use, share and improve
%what you give them. Help stamp out software-hoarding!
-\def\texinfoversion{2.53}
+\def\texinfoversion{2.65}
\message{Loading texinfo package [Version \texinfoversion]:}
\message{}
@@ -62,6 +62,7 @@
%---------------------Begin change-----------------------
%
+%%%% For @cropmarks command.
% Dimensions to add cropmarks at corners Added by P. A. MacKay, 12 Nov. 1986
%
\newdimen\cornerlong \newdimen\cornerthick
@@ -69,7 +70,9 @@
\newdimen \outerhsize \newdimen \outervsize
\cornerlong=1pc\cornerthick=.3pt % These set size of cropmarks
\outerhsize=7in
-\outervsize=9.5in
+%\outervsize=9.5in
+% Alternative @smallbook page size is 9.25in
+\outervsize=9.25in
\topandbottommargin=.75in
%
%---------------------End change-----------------------
@@ -85,6 +88,7 @@
{\let\hsize=\pagewidth \makefootline}}}%
\advancepageno \ifnum\outputpenalty>-20000 \else\dosupereject\fi}
+%%%% For @cropmarks command %%%%
% Here is a modification of the main output routine for Near East Publications
% This provides right-angle cropmarks at all four corners.
@@ -355,7 +359,7 @@
% Conditionals to test whether a flag is set.
-\outer\def\ifset{\begingroup\ignoresections\parsearg\ifsetxxx}
+\def\ifset{\begingroup\ignoresections\parsearg\ifsetxxx}
\def\ifsetxxx #1{\endgroup
\expandafter\ifx\csname IF#1\endcsname\relax \let\temp=\ifsetfail
@@ -365,7 +369,7 @@
\def\ifsetfail{\begingroup\ignoresections\ifsetfailxxx}
\long\def\ifsetfailxxx #1\end ifset{\endgroup\ignorespaces}
-\outer\def\ifclear{\begingroup\ignoresections\parsearg\ifclearxxx}
+\def\ifclear{\begingroup\ignoresections\parsearg\ifclearxxx}
\def\ifclearxxx #1{\endgroup
\expandafter\ifx\csname IF#1\endcsname\relax \let\temp=\relax
@@ -457,18 +461,26 @@
\let\li = \sf % Sometimes we call it \li, not \sf.
%% Try out Computer Modern fonts at \magstephalf
-\font\textrm=cmr10 scaled \magstephalf
-\font\texttt=cmtt10 scaled \magstephalf
+\let\mainmagstep=\magstephalf
+
+\ifx\bigger\relax
+\let\mainmagstep=\magstep1
+\font\textrm=cmr12
+\font\texttt=cmtt12
+\else
+\font\textrm=cmr10 scaled \mainmagstep
+\font\texttt=cmtt10 scaled \mainmagstep
+\fi
% Instead of cmb10, you many want to use cmbx10.
% cmbx10 is a prettier font on its own, but cmb10
% looks better when embedded in a line with cmr10.
-\font\textbf=cmb10 scaled \magstephalf
-\font\textit=cmti10 scaled \magstephalf
-\font\textsl=cmsl10 scaled \magstephalf
-\font\textsf=cmss10 scaled \magstephalf
-\font\textsc=cmcsc10 scaled \magstephalf
-\font\texti=cmmi10 scaled \magstephalf
-\font\textsy=cmsy10 scaled \magstephalf
+\font\textbf=cmb10 scaled \mainmagstep
+\font\textit=cmti10 scaled \mainmagstep
+\font\textsl=cmsl10 scaled \mainmagstep
+\font\textsf=cmss10 scaled \mainmagstep
+\font\textsc=cmcsc10 scaled \mainmagstep
+\font\texti=cmmi10 scaled \mainmagstep
+\font\textsy=cmsy10 scaled \mainmagstep
% A few fonts for @defun, etc.
\font\defbf=cmbx10 scaled \magstep1 %was 1314
@@ -920,19 +932,33 @@ July\or August\or September\or October\or November\or December\fi
\itemzzz {#1}}
\def\itemzzz #1{\begingroup %
-\advance \hsize by -\rightskip %
-\advance \hsize by -\leftskip %
-\setbox0=\hbox{\itemfont{#1}}%
-\itemindex{#1}%
-\parskip=0in %
-\noindent %
-\ifdim \wd0>\itemmax %
-\vadjust{\penalty 10000}%
-\hbox to \hsize{\hskip -\tableindent\box0\hss}\ %
-\else %
-\hbox to 0pt{\hskip -\tableindent\box0\hss}%
-\fi %
-\endgroup %
+ \advance\hsize by -\rightskip
+ \advance\hsize by -\tableindent
+ \setbox0=\hbox{\itemfont{#1}}%
+ \itemindex{#1}%
+ \nobreak % This prevents a break before @itemx.
+ %
+ % Be sure we are not still in the middle of a paragraph.
+ \parskip=0in
+ \par
+ %
+ % If the item text does not fit in the space we have, put it on a line
+ % by itself, and do not allow a page break either before or after that
+ % line. We do not start a paragraph here because then if the next
+ % command is, e.g., @kindex, the whatsit would get put into the
+ % horizontal list on a line by itself, resulting in extra blank space.
+ \ifdim \wd0>\itemmax
+ \setbox0=\hbox{\hskip \leftskip \hskip -\tableindent \unhbox0}\box0
+ \nobreak
+ \else
+ % The item text fits into the space. Start a paragraph, so that the
+ % following text (if any) will end up on the same line. Since that
+ % text will be indented by \tableindent, we make the item text be in
+ % a zero-width box.
+ \noindent
+ \rlap{\hskip -\tableindent\box0}%
+ \fi
+ \endgroup
}
\def\item{\errmessage{@item while not in a table}}
@@ -994,17 +1020,18 @@ July\or August\or September\or October\or November\or December\fi
\def\itemize{\parsearg\itemizezzz}
-\def\itemizezzz #1{\itemizey {#1}{\Eitemize}}
+\def\itemizezzz #1{%
+ \begingroup % ended by the @end itemsize
+ \itemizey {#1}{\Eitemize}
+}
\def\itemizey #1#2{%
\aboveenvbreak %
-\begingroup %
-\itemno = 0 %
\itemmax=\itemindent %
\advance \itemmax by -\itemmargin %
\advance \leftskip by \itemindent %
-\parindent = 0pt
-\parskip = \smallskipamount
+\parindent = 0pt %
+\parskip = \smallskipamount %
\ifdim \parskip=0pt \parskip=2pt \fi%
\def#2{\endgraf\endgroup\afterenvbreak}%
\def\itemcontents{#1}%
@@ -1018,25 +1045,106 @@ July\or August\or September\or October\or November\or December\fi
\def\frenchspacing{\sfcode46=1000 \sfcode63=1000 \sfcode33=1000
\sfcode58=1000 \sfcode59=1000 \sfcode44=1000 }
-% Allow argument of `a', `A' or `1' to specify type of enumeration.
+% \splitoff TOKENS\endmark defines \first to be the first token in
+% TOKENS, and \rest to be the remainder.
+%
+\def\splitoff#1#2\endmark{\def\first{#1}\def\rest{#2}}%
+
+% Allow an optional argument of an uppercase letter, lowercase letter,
+% or number, to specify the first label in the enumerated list. No
+% argument is the same as `1'.
+%
\def\enumerate{\parsearg\enumeratezzz}
\def\enumeratezzz #1{\enumeratey #1 \endenumeratey}
-\def\enumeratey #1 #2\endenumeratey{
-\if#1a \alphaenumerate\else\if#1A \capsenumerate\else
-\itemizey{\the\itemno.}\Eenumerate\flushcr
-\fi\fi}
-
-\def\alphaenumerate{\itemizey{\ifcase\itemno\or
-a\or b\or c\or d\or e\or f\or g\or h\or i\or j\or k\or l\or m\or n\or o\or
-p\or q\or r\or s\or t\or u\or v\or w\or x\or y\or z\else
-\errmessage{More than 26 items in @alphaenumerate; get a bigger alphabet.}\fi.}%
-\Ealphaenumerate\flushcr}
-
-\def\capsenumerate{\itemizey{\ifcase\itemno\or
-A\or B\or C\or D\or E\or F\or G\or H\or I\or J\or K\or L\or M\or N\or O\or
-P\or Q\or R\or S\or T\or U\or V\or W\or X\or Y\or Z\else
-\errmessage{More than 26 items in @capsenumerate; get a bigger alphabet.}\fi.}%
-\Ecapsenumerate\flushcr}
+\def\enumeratey #1 #2\endenumeratey{%
+ \begingroup % ended by the @end enumerate
+ %
+ % If we were given no argument, pretend we were given `1'.
+ \def\thearg{#1}%
+ \ifx\thearg\empty \def\thearg{1}\fi
+ %
+ % Detect if the argument is a single token. If so, it might be a
+ % letter. Otherwise, the only valid thing it can be is a number.
+ % (We will always have one token, because of the test we just made.
+ % This is a good thing, since \splitoff doesn't work given nothing at
+ % all -- the first parameter is undelimited.)
+ \expandafter\splitoff\thearg\endmark
+ \ifx\rest\empty
+ % Only one token in the argument. It could still be anything.
+ % A ``lowercase letter'' is one whose \lccode is nonzero.
+ % An ``uppercase letter'' is one whose \lccode is both nonzero, and
+ % not equal to itself.
+ % Otherwise, we assume it's a number.
+ %
+ % We need the \relax at the end of the \ifnum lines to stop TeX from
+ % continuing to look for a <number>.
+ %
+ \ifnum\lccode\expandafter`\thearg=0\relax
+ \numericenumerate % a number (we hope)
+ \else
+ % It's a letter.
+ \ifnum\lccode\expandafter`\thearg=\expandafter`\thearg\relax
+ \lowercaseenumerate % lowercase letter
+ \else
+ \uppercaseenumerate % uppercase letter
+ \fi
+ \fi
+ \else
+ % Multiple tokens in the argument. We hope it's a number.
+ \numericenumerate
+ \fi
+}
+
+% An @enumerate whose labels are integers. The starting integer is
+% given in \thearg.
+%
+\def\numericenumerate{%
+ \itemno = \thearg
+ \startenumeration{\the\itemno}%
+}
+
+% The starting (lowercase) letter is in \thearg.
+\def\lowercaseenumerate{%
+ \itemno = \expandafter`\thearg
+ \startenumeration{%
+ % Be sure we're not beyond the end of the alphabet.
+ \ifnum\itemno=0
+ \errmessage{No more lowercase letters in @enumerate; get a bigger
+ alphabet}%
+ \fi
+ \char\lccode\itemno
+ }%
+}
+
+% The starting (uppercase) letter is in \thearg.
+\def\uppercaseenumerate{%
+ \itemno = \expandafter`\thearg
+ \startenumeration{%
+ % Be sure we're not beyond the end of the alphabet.
+ \ifnum\itemno=0
+ \errmessage{No more uppercase letters in @enumerate; get a bigger
+ alphabet}
+ \fi
+ \char\uccode\itemno
+ }%
+}
+
+% Call itemizey, adding a period to the first argument and supplying the
+% common last two arguments. Also subtract one from the initial value in
+% \itemno, since @item increments \itemno.
+%
+\def\startenumeration#1{%
+ \advance\itemno by -1
+ \itemizey{#1.}\Eenumerate\flushcr
+}
+
+% @alphaenumerate and @capsenumerate are abbreviations for giving an arg
+% to @enumerate.
+%
+\def\alphaenumerate{\enumerate{a}}
+\def\capsenumerate{\enumerate{A}}
+\def\Ealphaenumerate{\Eenumerate}
+\def\Ecapsenumerate{\Eenumerate}
% Definition of @item while inside @itemize.
@@ -1046,7 +1154,7 @@ P\or Q\or R\or S\or T\or U\or V\or W\or X\or Y\or Z\else
\ifhmode \errmessage{\in hmode at itemizeitem}\fi
{\parskip=0in \hskip 0pt
\hbox to 0pt{\hss \itemcontents\hskip \itemmargin}%
-\vadjust{\penalty 300}}%
+\vadjust{\penalty 1200}}%
\flushcr}
\message{indexing,}
@@ -1270,33 +1378,51 @@ P\or Q\or R\or S\or T\or U\or V\or W\or X\or Y\or Z\else
\def\printindex{\parsearg\doprintindex}
-\def\doprintindex#1{\tex %
-\dobreak \chapheadingskip {10000}
-\catcode`\%=\other\catcode`\&=\other\catcode`\#=\other
-\catcode`\$=\other\catcode`\_=\other
-\catcode`\~=\other
-% The following don't help, since the chars were translated
-% when the raw index was written, and their fonts were discarded
-% due to \indexnofonts.
-%\catcode`\"=\active
-%\catcode`\^=\active
-%\catcode`\_=\active
-%\catcode`\|=\active
-%\catcode`\<=\active
-%\catcode`\>=\active
-\def\indexbackslash{\rawbackslashxx}
-\indexfonts\rm \tolerance=9500 \advance\baselineskip -1pt
-\begindoublecolumns
-\openin 1 \jobname.#1s
-\ifeof 1
-% \enddoublecolumns gets confused if there is no text in the index,
-% and it loses the chapter title and the aux file entries for the index.
-% The easiest way to prevent this problem is to make sure there is some text.
-(Index is empty)
-\else \closein 1 \input \jobname.#1s
-\fi
-\enddoublecolumns
-\Etex}
+\def\doprintindex#1{%
+ \tex
+ \dobreak \chapheadingskip {10000}
+ \catcode`\%=\other\catcode`\&=\other\catcode`\#=\other
+ \catcode`\$=\other\catcode`\_=\other
+ \catcode`\~=\other
+ %
+ % The following don't help, since the chars were translated
+ % when the raw index was written, and their fonts were discarded
+ % due to \indexnofonts.
+ %\catcode`\"=\active
+ %\catcode`\^=\active
+ %\catcode`\_=\active
+ %\catcode`\|=\active
+ %\catcode`\<=\active
+ %\catcode`\>=\active
+ % %
+ \def\indexbackslash{\rawbackslashxx}
+ \indexfonts\rm \tolerance=9500 \advance\baselineskip -1pt
+ \begindoublecolumns
+ %
+ % See if the index file exists and is nonempty.
+ \openin 1 \jobname.#1s
+ \ifeof 1
+ % \enddoublecolumns gets confused if there is no text in the index,
+ % and it loses the chapter title and the aux file entries for the
+ % index. The easiest way to prevent this problem is to make sure
+ % there is some text.
+ (Index is nonexistent)
+ \else
+ %
+ % If the index file exists but is empty, then \openin leaves \ifeof
+ % false. We have to make TeX try to read something from the file, so
+ % it can discover if there is anything in it.
+ \read 1 to \temp
+ \ifeof 1
+ (Index is empty)
+ \else
+ \input \jobname.#1s
+ \fi
+ \fi
+ \closein 1
+ \enddoublecolumns
+ \Etex
+}
% These macros are used by the sorted index file itself.
% Change them to control the appearance of the index.
@@ -1305,13 +1431,13 @@ P\or Q\or R\or S\or T\or U\or V\or W\or X\or Y\or Z\else
% \balancecolumns gets confused if there is any shrink.
\newskip\initialskipamount \initialskipamount 12pt plus4pt
-\outer\def\initial #1{%
+\def\initial #1{%
{\let\tentt=\sectt \let\tt=\sectt \let\sf=\sectt
\ifdim\lastskip<\initialskipamount
\removelastskip \penalty-200 \vskip \initialskipamount\fi
\line{\secbf#1\hfill}\kern 2pt\penalty10000}}
-\outer\def\entry #1#2{
+\def\entry #1#2{
{\parfillskip=0in \parskip=0in \parindent=0in
\hangindent=1in \hangafter=1%
\noindent\hbox{#1}\indexdotfill #2\par
@@ -1441,6 +1567,7 @@ P\or Q\or R\or S\or T\or U\or V\or W\or X\or Y\or Z\else
\def\dfn##1{\realbackslash dfn {##1}}
}
+\def\thischaptername{No Chapter Title}
\outer\def\chapter{\parsearg\chapterzzz}
\def\chapterzzz #1{\seccheck{chapter}%
\secno=0 \subsecno=0 \subsubsecno=0
@@ -1751,6 +1878,8 @@ P\or Q\or R\or S\or T\or U\or V\or W\or X\or Y\or Z\else
\newskip \secheadingskip \secheadingskip = 21pt plus 8pt minus 4pt
\def\secheadingbreak{\dobreak \secheadingskip {-1000}}
+% @paragraphindent is defined for the Info formatting commands only.
+\let\paragraphindent=\comment
% Section fonts are the base font at magstep2, which produces
% a size a bit more than 14 points in the default situation.
@@ -1944,26 +2073,23 @@ P\or Q\or R\or S\or T\or U\or V\or W\or X\or Y\or Z\else
\let\ptexequiv = \equiv
-{\tentt
-\global\setbox\dblarrowbox = \hbox to 1em{\hfil$\Rightarrow$\hfil}
-\global\setbox\longdblarrowbox = \hbox to 1em{\hfil$\mapsto$\hfil}
-\global\setbox\pushcharbox = \hbox to 1em{\hfil$\dashv$\hfil}
-\global\setbox\equivbox = \hbox to 1em{\hfil$\ptexequiv$\hfil}
+%{\tentt
+%\global\setbox\dblarrowbox = \hbox to 1em{\hfil$\Rightarrow$\hfil}
+%\global\setbox\longdblarrowbox = \hbox to 1em{\hfil$\mapsto$\hfil}
+%\global\setbox\pushcharbox = \hbox to 1em{\hfil$\dashv$\hfil}
+%\global\setbox\equivbox = \hbox to 1em{\hfil$\ptexequiv$\hfil}
% Adapted from the manmac format (p.420 of TeXbook)
-\global\setbox\bullbox = \hbox to 1em{\kern.15em\vrule height .75ex width .85ex
- depth .1ex\hfil}
-}
+%\global\setbox\bullbox = \hbox to 1em{\kern.15em\vrule height .75ex width .85ex
+% depth .1ex\hfil}
+%}
\def\point{$\star$}
-\def\result{\leavevmode\raise.15ex\copy\dblarrowbox}
-\def\expansion{\leavevmode\raise.1ex\copy\longdblarrowbox}
-\def\print{\leavevmode\lower.1ex\copy\pushcharbox}
-
-\def\equiv{\leavevmode\lower.1ex\copy\equivbox}
+\def\result{\leavevmode\raise.15ex\hbox to 1em{\hfil$\Rightarrow$\hfil}}
+\def\expansion{\leavevmode\raise.1ex\hbox to 1em{\hfil$\mapsto$\hfil}}
+\def\print{\leavevmode\lower.1ex\hbox to 1em{\hfil$\dashv$\hfil}}
-% Does anyone really want this?
-% \def\bull{\leavevmode\copy\bullbox}
+\def\equiv{\leavevmode\lower.1ex\hbox to 1em{\hfil$\ptexequiv$\hfil}}
% Adapted from the TeXbook's \boxit.
{\tentt \global\dimen0 = 3em}% Width of the box.
@@ -2040,6 +2166,9 @@ P\or Q\or R\or S\or T\or U\or V\or W\or X\or Y\or Z\else
\def\afterenvbreak{\endgraf \ifdim\lastskip<\aboveenvskipamount
\removelastskip \penalty-50 \vskip\aboveenvskipamount \fi}
+% \nonarrowing is a flag. If "set", @lisp etc don't narrow margins.
+\let\nonarrowing=\relax
+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% \cartouche: draw rectangle w/rounded corners around argument
\font\circle=lcircle10
@@ -2072,6 +2201,8 @@ P\or Q\or R\or S\or T\or U\or V\or W\or X\or Y\or Z\else
% side, and for 6pt waste from
% each corner char
\normbskip=\baselineskip \normpskip=\parskip \normlskip=\lineskip
+ % Flag to tell @lisp, etc., not to narrow margin.
+ \let\nonarrowing=\comment
\vbox\bgroup
\baselineskip=0pt\parskip=0pt\lineskip=0pt
\carttop
@@ -2110,7 +2241,12 @@ P\or Q\or R\or S\or T\or U\or V\or W\or X\or Y\or Z\else
\let\par=\lisppar
\def\Elisp{\endgroup\afterenvbreak}%
\parskip=0pt
+% @cartouche defines \nonarrowing to inhibit narrowing
+% at next level down.
+\ifx\nonarrowing\relax
\advance \leftskip by \lispnarrowing
+\let\nonarrowing=\relax
+\fi
\parindent=0pt
\let\exdent=\internalexdent
\obeyspaces \obeylines \tt \rawbackslash
@@ -2139,7 +2275,12 @@ P\or Q\or R\or S\or T\or U\or V\or W\or X\or Y\or Z\else
\let\par=\lisppar
\def\Esmalllisp{\endgroup\afterenvbreak}%
\parskip=0pt
+% @cartouche defines \nonarrowing to inhibit narrowing
+% at next level down.
+\ifx\nonarrowing\relax
\advance \leftskip by \lispnarrowing
+\let\nonarrowing=\relax
+\fi
\parindent=0pt
\let\exdent=\internalexdent
\obeyspaces \obeylines \ninett \indexfonts \rawbackslash
@@ -2158,7 +2299,12 @@ P\or Q\or R\or S\or T\or U\or V\or W\or X\or Y\or Z\else
\let\par=\lisppar
\def\Edisplay{\endgroup\afterenvbreak}%
\parskip=0pt
+% @cartouche defines \nonarrowing to inhibit narrowing
+% at next level down.
+\ifx\nonarrowing\relax
\advance \leftskip by \lispnarrowing
+\let\nonarrowing=\relax
+\fi
\parindent=0pt
\let\exdent=\internalexdent
\obeyspaces \obeylines
@@ -2221,8 +2367,13 @@ P\or Q\or R\or S\or T\or U\or V\or W\or X\or Y\or Z\else
\singlespace
\parindent=0pt
\def\Equotation{\par\endgroup\afterenvbreak}%
-\advance \rightskip by \lispnarrowing
-\advance \leftskip by \lispnarrowing}
+% @cartouche defines \nonarrowing to inhibit narrowing
+% at next level down.
+\ifx\nonarrowing\relax
+\advance \leftskip by \lispnarrowing
+\advance \rightskip by \lispnarrowing
+\let\nonarrowing=\relax
+\fi}
\message{defuns,}
% Define formatter for defuns
@@ -2792,7 +2943,7 @@ Section\xreftie'char\the\appendixno.\the\secno.\the\subsecno.\the\subsubsecno %
\catcode `\=\other
\catcode `\=\other
\catcode `\=\other
-\catcode `\=\other
+\catcode 26=\other
\catcode `\^^[=\other
\catcode `\^^\=\other
\catcode `\^^]=\other
@@ -2883,6 +3034,10 @@ Section\xreftie'char\the\appendixno.\the\secno.\the\subsecno.\the\subsubsecno %
% Prevent underfull vbox error messages.
\vbadness=10000
+% Following George Bush, just get rid of widows and orphans.
+\widowpenalty=10000
+\clubpenalty=10000
+
% Use TeX 3.0's \emergencystretch to help line breaking, but if we're
% using an old version of TeX, don't do anything. We want the amount of
% stretch added to depend on the line length, hence the dependence on
@@ -2893,7 +3048,7 @@ Section\xreftie'char\the\appendixno.\the\secno.\the\subsecno.\the\subsubsecno %
\divide\emergencystretch by 45
\fi
-% Use @smallbook to reset parameters for 7x9.5 format
+% Use @smallbook to reset parameters for 7x9.5 format (or else 7x9.25)
\def\smallbook{
\global\lispnarrowing = 0.3in
\global\baselineskip 12pt
@@ -2917,10 +3072,23 @@ Section\xreftie'char\the\appendixno.\the\secno.\the\subsecno.\the\subsubsecno %
\def\afourpaper{
\global\tolerance=700
\global\hfuzz=1pt
+\global\baselineskip=12pt
+\global\parskip 15pt plus 1pt
\global\vsize= 53\baselineskip
\advance\vsize by \topskip
-\global\hsize= 5.85in % A4 wide 10pt
+%\global\hsize= 5.85in % A4 wide 10pt
+\global\hsize= 6.5in
+\global\outerhsize=\hsize
+\global\advance\outerhsize by 0.5in
+\global\outervsize=\vsize
+\global\advance\outervsize by 0.6in
+\global\doublecolumnhsize=\hsize
+\global\divide\doublecolumnhsize by 2
+\global\advance\doublecolumnhsize by -0.1in
+\global\doublecolumnvsize=\vsize
+\global\multiply\doublecolumnvsize by 2
+\global\advance\doublecolumnvsize by 0.1in
\global\pagewidth=\hsize
\global\pageheight=\vsize
diff --git a/test/Makefile b/test/Makefile
new file mode 100644
index 00000000..dcf4f15f
--- /dev/null
+++ b/test/Makefile
@@ -0,0 +1,133 @@
+SHELL = /bin/sh
+
+all: msg swaplns messages argarray longwrds \
+ getline inftest fstabplus compare arrayref rs fsrs rand \
+ fsbs negexp asgext anchgsub splitargv awkpath nfset reparse
+
+gawk.extensions: fieldwidths ignorecase posix manyfiles igncfs
+
+bigtest: all pound-bang gawk.extensions
+
+extra: regtest
+
+pound-bang::
+ cp ../gawk /tmp && chmod +x pound-bang && ./pound-bang pound-bang >tmp
+ rm -f /tmp/gawk
+ cmp pound-bang.good tmp && rm -f tmp
+
+msg::
+ @echo 'Any output from "cmp" is bad news, although some differences'
+ @echo 'in floating point values are probably benign -- in particular,'
+ @echo 'some systems may omit a leading zero and the floating point'
+ @echo 'precision may lead to slightly different output in a few cases.'
+
+swaplns::
+ @../gawk -f swaplns.awk data >tmp
+ cmp swaplns.good tmp && rm -f tmp
+
+messages::
+ @../gawk -f messages.awk >out2 2>out3
+ cmp out1.good out1 && cmp out2.good out2 && cmp out3.good out3 && \
+ rm -f out1 out2 out3
+
+argarray::
+ @TEST=test ../gawk -f argarray.awk >tmp
+ cmp argarray.good tmp && rm -f tmp
+
+fstabplus::
+ @echo '1 2' | ../gawk -f fstabplus >tmp
+ cmp fstabplus.good tmp && rm -f tmp
+
+fsrs::
+ @../gawk -f fsrs.awk fsrs.in >tmp
+ cmp fsrs.good tmp && rm -f tmp
+
+igncfs::
+ @../gawk -f igncfs.awk igncfs.in >tmp
+ cmp igncfs.good tmp && rm -f tmp
+
+longwrds::
+ @../gawk -f longwrds.awk manpage | sort >tmp
+ cmp longwrds.good tmp && rm -f tmp
+
+fieldwidths::
+ @echo '123456789' | ../gawk -v FIELDWIDTHS="2 3 4" '{print $$2}' >tmp
+ cmp fieldwidths.good tmp && rm -f tmp
+
+ignorecase::
+ @echo xYz | ../gawk -v IGNORECASE=1 '{sub(/y/, ""); print}' >tmp
+ cmp ignorecase.good tmp && rm -f tmp
+
+regtest::
+ @echo 'Some of the output from regtest is very system specific, do not'
+ @echo 'be distressed if your output differs from that distributed.'
+ @echo 'Manual inspection is called for.'
+ AWK=`pwd`/../gawk ./regtest
+
+posix::
+ @echo '1:2,3 4' | ../gawk -f posix >tmp
+ cmp posix.good tmp && rm -f tmp
+
+manyfiles::
+ @mkdir junk
+ @../gawk 'BEGIN { for (i = 1; i <= 100; i++) print i, i}' >tmp
+ @../gawk -f manyfiles.awk tmp tmp
+ @echo -n "This number better be 1 ->"
+ @wc -l junk/* | ../gawk '$$1 != 2' | wc -l
+ @rm -rf junk tmp
+
+compare::
+ @../gawk -f compare.awk 0 1 compare.in >tmp
+ cmp compare.good tmp && rm -f tmp
+
+arrayref::
+ @../gawk -f arrayref >tmp
+ cmp arrayref.good tmp && rm -f tmp
+
+rs::
+ @../gawk -v RS="" '{print $$1, $$2}' rs.data >tmp
+ cmp rs.good tmp && rm -f tmp
+
+fsbs::
+ @../gawk -v FS='\' '{ print $$1, $$2 }' fsbs.in >tmp
+ cmp fsbs.good tmp && rm -f tmp
+
+inftest::
+ @../gawk -f inftest.awk >tmp
+ cmp inftest.good tmp && rm -f tmp
+
+getline::
+ @../gawk -f getline.awk getline.awk getline.awk >tmp
+ cmp getline.good tmp && rm -f tmp
+
+rand::
+ @echo The following line should just be 19 random numbers between 1 and 100
+ @../gawk -f rand.awk
+
+negexp::
+ @../gawk 'BEGIN {a = -2; print 10^a }' >tmp
+ cmp negexp.good tmp && rm -f tmp
+
+asgext::
+ @../gawk -f asgext.awk asgext.in >tmp
+ cmp asgext.good tmp && rm -f tmp
+
+anchgsub::
+ @../gawk -f anchgsub.awk anchgsub.in >tmp
+ cmp anchgsub.good tmp && rm -f tmp
+
+splitargv::
+ @../gawk -f splitargv.awk splitargv.in >tmp
+ cmp splitargv.good tmp && rm -f tmp
+
+awkpath::
+ @AWKPATH=".:lib" ../gawk -f awkpath.awk >tmp
+ cmp awkpath.good tmp && rm -f tmp
+
+nfset::
+ @../gawk -f nfset.awk nfset.in >tmp
+ cmp nfset.good tmp && rm -f tmp
+
+reparse::
+ @../gawk -f reparse.awk reparse.in >tmp
+ cmp reparse.good tmp && rm -f tmp
diff --git a/test/anchgsub.awk b/test/anchgsub.awk
new file mode 100644
index 00000000..52e8aa4d
--- /dev/null
+++ b/test/anchgsub.awk
@@ -0,0 +1 @@
+{ gsub(/^[ ]*/, "", $0) ; print }
diff --git a/test/anchgsub.good b/test/anchgsub.good
new file mode 100644
index 00000000..c33dfb95
--- /dev/null
+++ b/test/anchgsub.good
@@ -0,0 +1 @@
+This is a test, this is only a test.
diff --git a/test/anchgsub.in b/test/anchgsub.in
new file mode 100644
index 00000000..b829d84e
--- /dev/null
+++ b/test/anchgsub.in
@@ -0,0 +1 @@
+ This is a test, this is only a test.
diff --git a/test/argarray.awk b/test/argarray.awk
new file mode 100644
index 00000000..3911b6e9
--- /dev/null
+++ b/test/argarray.awk
@@ -0,0 +1,11 @@
+BEGIN {
+ argn = " argument" (ARGC > 1 ? "s" : "")
+ are = ARGC > 1 ? "are" : "is"
+ print "here we have " ARGC argn
+ print "which " are
+ for (x = 0; x < ARGC; x++)
+ print "\t", ARGV[x]
+ print "Environment variable TEST=" ENVIRON["TEST"]
+ print "and the current input file is called", FILENAME
+ print "but this would change if we would have something to process"
+}
diff --git a/test/argarray.good b/test/argarray.good
new file mode 100644
index 00000000..fe6d0bf8
--- /dev/null
+++ b/test/argarray.good
@@ -0,0 +1,6 @@
+here we have 1 argument
+which is
+ gawk
+Environment variable TEST=test
+and the current input file is called -
+but this would change if we would have something to process
diff --git a/test/arrayref b/test/arrayref
new file mode 100644
index 00000000..144d41a0
--- /dev/null
+++ b/test/arrayref
@@ -0,0 +1,13 @@
+ BEGIN { # foo[10] = 0 # put this line in and it will work
+ test(foo); print foo[1]
+ test2(foo2); print foo2[1]
+ }
+
+ function test(foo)
+ {
+ test2(foo)
+ }
+ function test2(bar)
+ {
+ bar[1] = 1
+ }
diff --git a/test/arrayref.good b/test/arrayref.good
new file mode 100644
index 00000000..6ed281c7
--- /dev/null
+++ b/test/arrayref.good
@@ -0,0 +1,2 @@
+1
+1
diff --git a/test/asgext.awk b/test/asgext.awk
new file mode 100644
index 00000000..c7f17754
--- /dev/null
+++ b/test/asgext.awk
@@ -0,0 +1 @@
+{ print $3; $4 = "a"; print }
diff --git a/test/asgext.good b/test/asgext.good
new file mode 100644
index 00000000..2c0df70f
--- /dev/null
+++ b/test/asgext.good
@@ -0,0 +1,6 @@
+3
+1 2 3 a
+
+1 a
+3
+1 2 3 a
diff --git a/test/asgext.in b/test/asgext.in
new file mode 100644
index 00000000..3743b5b4
--- /dev/null
+++ b/test/asgext.in
@@ -0,0 +1,3 @@
+1 2 3
+1
+1 2 3 4
diff --git a/test/awkpath.good b/test/awkpath.good
new file mode 100644
index 00000000..6cffe1b7
--- /dev/null
+++ b/test/awkpath.good
@@ -0,0 +1 @@
+Found it.
diff --git a/test/compare.awk b/test/compare.awk
new file mode 100644
index 00000000..39a88f31
--- /dev/null
+++ b/test/compare.awk
@@ -0,0 +1,13 @@
+BEGIN {
+ if (ARGV[1]) print 1
+ ARGV[1] = ""
+ if (ARGV[2]) print 2
+ ARGV[2] = ""
+ if ("0") print "zero"
+ if ("") print "null"
+ if (0) print 0
+}
+{
+ if ($0) print $0
+ if ($1) print $1
+}
diff --git a/test/compare.good b/test/compare.good
new file mode 100644
index 00000000..8241359b
--- /dev/null
+++ b/test/compare.good
@@ -0,0 +1,5 @@
+2
+zero
+1
+1
+0 1
diff --git a/test/compare.in b/test/compare.in
new file mode 100644
index 00000000..1ab098bc
--- /dev/null
+++ b/test/compare.in
@@ -0,0 +1,4 @@
+0
+1
+0 1
+
diff --git a/test/csi1.out b/test/csi1.out
new file mode 100644
index 00000000..f93c2cc2
--- /dev/null
+++ b/test/csi1.out
@@ -0,0 +1,574 @@
+Title: Ideal Charge Sensitive Amp
+Date: today
+Plotname: Transient analysis.
+Flags: real
+No. Variables: 7
+No. Points: 70
+Variables: 0 time time
+ 1 v(1) voltage
+ 2 v(3) voltage
+ 3 v(5) voltage
+ 4 v(9) voltage
+ 5 v(11) voltage
+ 6 v(13) voltage
+Values:
+ 0 0.000000000000000e+00
+ 0.000000000000000e+00
+ 0.000000000000000e+00
+ 0.000000000000000e+00
+ 0.000000000000000e+00
+ 0.000000000000000e+00
+ 0.000000000000000e+00
+
+ 1 1.000000000000000e-09
+ -1.264149466030735e-09
+ 1.264149466030735e-04
+ 2.526984953580682e-04
+ 2.521735549927725e-16
+ 5.033500623385340e-16
+ 1.004709971525236e-15
+
+ 2 1.180906969374945e-09
+ -1.514801380340722e-09
+ 1.514801380340722e-04
+ 3.028004880113196e-04
+ 3.078631347571166e-16
+ 6.145090159683228e-16
+ 1.226588337655132e-15
+
+ 3 1.542720908124834e-09
+ -2.049689597483709e-09
+ 2.049689597483709e-04
+ 4.097115469383853e-04
+ 4.437763676567463e-16
+ 8.857978571714022e-16
+ 1.768092896751413e-15
+
+ 4 2.266348785624612e-09
+ -3.265760932995932e-09
+ 3.265760932995932e-04
+ 6.527287612098135e-04
+ 9.397999789660777e-16
+ 1.875866006928358e-15
+ 3.744278902267733e-15
+
+ 5 3.713604540624168e-09
+ -5.968710391500898e-09
+ 5.968710391500899e-04
+ 1.192640228419305e-03
+ 4.345207513494314e-15
+ 8.671579322257517e-15
+ 1.730556654319970e-14
+
+ 6 6.608116050623280e-09
+ -1.165754959289845e-08
+ 1.165754959289845e-03
+ 2.327838222854165e-03
+ 5.642967340561880e-14
+ 1.124945877057287e-13
+ 2.242619207939039e-13
+
+ 7 1.179564303717826e-08
+ -2.193530367475176e-08
+ 2.193530367475176e-03
+ 4.374658167824437e-03
+ 9.992331799374361e-13
+ 1.987384302704409e-12
+ 3.952723001792399e-12
+
+ 8 2.217069701028822e-08
+ -4.233554991551749e-08
+ 4.233554991551749e-03
+ 8.421454467083355e-03
+ 2.567075209989195e-11
+ 5.079257750506960e-11
+ 1.004987167234087e-10
+
+ 9 4.292080495650814e-08
+ -8.246787850423504e-08
+ 8.246787850423504e-03
+ 1.631955711542618e-02
+ 7.429352627112179e-10
+ 1.454666979009004e-09
+ 2.848205166007601e-09
+
+ 10 6.747377244914711e-08
+ -1.288241599365203e-07
+ 1.288241599365203e-02
+ 2.533540213098864e-02
+ 6.016036906368960e-09
+ 1.168438224023758e-08
+ 2.269242430687209e-08
+
+ 11 1.027485608531171e-07
+ -1.933367451151489e-07
+ 1.933367451151489e-02
+ 3.768347326653115e-02
+ 4.547604757457383e-08
+ 8.718365928170803e-08
+ 1.671235294549662e-07
+
+ 12 1.550811172440954e-07
+ -2.846833995746566e-07
+ 2.846833995746566e-02
+ 5.474832541420137e-02
+ 3.375250800116866e-07
+ 6.342555725437884e-07
+ 1.191537432922743e-06
+
+ 13 2.124749343141491e-07
+ -3.791439105081753e-07
+ 3.791439105081753e-02
+ 7.183486021773615e-02
+ 1.485074359346451e-06
+ 2.739473718610748e-06
+ 5.050651248135159e-06
+
+ 14 2.934050742073026e-07
+ -5.028277031417205e-07
+ 5.028277031417205e-02
+ 9.325941790944214e-02
+ 6.951921938590966e-06
+ 1.246646151568946e-05
+ 2.233163733376972e-05
+
+ 15 3.899183279910254e-07
+ -6.368485744692849e-07
+ 6.368485744692850e-02
+ 1.150948619702300e-01
+ 2.649747455292758e-05
+ 4.602250953893021e-05
+ 7.978045912595374e-05
+
+ 16 5.093968731264188e-07
+ -7.843833530131612e-07
+ 7.843833530131612e-02
+ 1.371850504655500e-01
+ 9.207624151396878e-05
+ 1.537740811662592e-04
+ 2.559528885476014e-04
+
+ 17 6.489752711203596e-07
+ -9.338518261925084e-07
+ 9.338518261925084e-02
+ 1.570273197910303e-01
+ 2.792909973592655e-04
+ 4.461073848024906e-04
+ 7.086148431639843e-04
+
+ 18 8.223850555731352e-07
+ -1.089864299179190e-06
+ 1.089864299179190e-01
+ 1.742648556363447e-01
+ 8.126979446078827e-04
+ 1.228244125338940e-03
+ 1.839630830557003e-03
+
+ 19 1.039936620365588e-06
+ -1.246638298441507e-06
+ 1.246638298441507e-01
+ 1.867092058644591e-01
+ 2.285828604654097e-03
+ 3.224697163278019e-03
+ 4.483765996458879e-03
+
+ 20 1.319715872115905e-06
+ -1.397349357516515e-06
+ 1.397349357516515e-01
+ 1.916979805788835e-01
+ 6.317689492712388e-03
+ 8.164324401851115e-03
+ 1.030606900115141e-02
+
+ 21 1.686517899697324e-06
+ -1.529891860605749e-06
+ 1.529891860605749e-01
+ 1.859565532106186e-01
+ 1.718247093837089e-02
+ 1.982736574760900e-02
+ 2.201037442321020e-02
+
+ 22 2.186647083767628e-06
+ -1.629428398181613e-06
+ 1.629428398181613e-01
+ 1.662034028698354e-01
+ 4.642596629125201e-02
+ 4.602451354215933e-02
+ 4.268299343823448e-02
+
+ 23 2.786647083767628e-06
+ -1.671982381159767e-06
+ 1.671982381159767e-01
+ 1.360832067083017e-01
+ 1.086486529677349e-01
+ 9.028520856351136e-02
+ 6.680053659714552e-02
+
+ 24 3.386647083767628e-06
+ -1.666708289319382e-06
+ 1.666708289319382e-01
+ 1.065961137297528e-01
+ 2.019933895320813e-01
+ 1.409501492585539e-01
+ 8.111767734595639e-02
+
+ 25 3.986647083767628e-06
+ -1.636239392733020e-06
+ 1.636239392733020e-01
+ 8.119800486443494e-02
+ 3.221985296170131e-01
+ 1.879090396235947e-01
+ 8.016274379356519e-02
+
+ 26 4.586647083767628e-06
+ -1.592958303223056e-06
+ 1.592958303223056e-01
+ 6.070506604262340e-02
+ 4.601454482849191e-01
+ 2.224246188905672e-01
+ 6.424612709287120e-02
+
+ 27 5.186647083767628e-06
+ -1.543623379945047e-06
+ 1.543623379945047e-01
+ 4.479678708109880e-02
+ 6.043239691667113e-01
+ 2.392201482511665e-01
+ 3.780590619181199e-02
+
+ 28 5.786647083767628e-06
+ -1.491907702045976e-06
+ 1.491907702045976e-01
+ 3.275072893014933e-02
+ 7.431846280885604e-01
+ 2.368781117682013e-01
+ 6.852359091660139e-03
+
+ 29 6.386647083767628e-06
+ -1.439792435778303e-06
+ 1.439792435778303e-01
+ 2.378169349328440e-02
+ 8.668590173262367e-01
+ 2.171478488724319e-01
+ -2.306848179644787e-02
+
+ 30 6.986647083767627e-06
+ -1.388331481824472e-06
+ 1.388331481824472e-01
+ 1.718228907053183e-02
+ 9.681191419274604e-01
+ 1.838077897422596e-01
+ -4.796365600793339e-02
+
+ 31 7.586647083767627e-06
+ -1.338071075679957e-06
+ 1.338071075679957e-01
+ 1.236766007539650e-02
+ 1.042671016972224e+00
+ 1.415321327946899e-01
+ -6.566996968152291e-02
+
+ 32 8.186647083767628e-06
+ -1.289280023988654e-06
+ 1.289280023988654e-01
+ 8.876981250123693e-03
+ 1.088969089643156e+00
+ 9.499362660424981e-02
+ -7.562842047016014e-02
+
+ 33 8.786647083767628e-06
+ -1.242076017350638e-06
+ 1.242076017350638e-01
+ 6.357881031810623e-03
+ 1.107746658963011e+00
+ 4.827333239307662e-02
+ -7.843915471504923e-02
+
+ 34 9.386647083767627e-06
+ -1.196494910292573e-06
+ 1.196494910292573e-01
+ 4.546231107047477e-03
+ 1.101424879765328e+00
+ 4.554708620525528e-03
+ -7.538805789638806e-02
+
+ 35 9.986647083767627e-06
+ -1.152528702498193e-06
+ 1.152528702498193e-01
+ 3.246760457659020e-03
+ 1.073516666992439e+00
+ -3.396212210108883e-02
+ -6.804675773661231e-02
+
+ 36 1.058664708376763e-05
+ -1.110146344414089e-06
+ 1.110146344414089e-01
+ 2.316517021036618e-03
+ 1.028097189336683e+00
+ -6.599874853245306e-02
+ -5.798336364268095e-02
+
+ 37 1.118664708376763e-05
+ -1.069305118106022e-06
+ 1.069305118106022e-01
+ 1.651595638606269e-03
+ 9.693765313679967e-01
+ -9.105384005235785e-02
+ -4.658294889089031e-02
+
+ 38 1.178664708376763e-05
+ -1.029956847083499e-06
+ 1.029956847083499e-01
+ 1.176870150147819e-03
+ 9.013842949659325e-01
+ -1.092246920089114e-01
+ -3.495841677136451e-02
+
+ 39 1.238664708376763e-05
+ -9.920512695316036e-07
+ 9.920512695316036e-02
+ 8.382359024086052e-04
+ 8.277595463975323e-01
+ -1.210283048726013e-01
+ -2.392680475810324e-02
+
+ 40 1.298664708376763e-05
+ -9.555378560684139e-07
+ 9.555378560684139e-02
+ 5.968428720504958e-04
+ 7.516306562496196e-01
+ -1.272418012047138e-01
+ -1.402749198168676e-02
+
+ 41 1.358664708376763e-05
+ -9.203667750735073e-07
+ 9.203667750735073e-02
+ 4.248571522307446e-04
+ 6.755660933441945e-01
+ -1.287717046055234e-01
+ -5.563263673549205e-03
+
+ 42 1.418664708376763e-05
+ -8.864893913817122e-07
+ 8.864893913817122e-02
+ 3.023712418928277e-04
+ 6.015772305806626e-01
+ -1.265545538806776e-01
+ 1.349584122132374e-03
+
+ 43 1.478664708376763e-05
+ -8.538585100266795e-07
+ 8.538585100266795e-02
+ 2.151653061229580e-04
+ 5.311562150980697e-01
+ -1.214869821570401e-01
+ 6.731151169583247e-03
+
+ 44 1.538664708376763e-05
+ -8.224284811671143e-07
+ 8.224284811671143e-02
+ 1.530922818134957e-04
+ 4.653348807170765e-01
+ -1.143810920904009e-01
+ 1.069309216830403e-02
+
+ 45 1.598664708376763e-05
+ -7.921552298901439e-07
+ 7.921552298901439e-02
+ 1.089168937368159e-04
+ 4.047538335014330e-01
+ -1.059400761361653e-01
+ 1.340149806837349e-02
+
+ 46 1.658664708376763e-05
+ -7.629962458090119e-07
+ 7.629962458090119e-02
+ 7.748311028005782e-05
+ 3.497338006619688e-01
+ -9.674904580077289e-02
+ 1.504812104097493e-02
+
+ 47 1.718664708376763e-05
+ -7.349105515804137e-07
+ 7.349105515804137e-02
+ 5.511827138234112e-05
+ 3.003438820737499e-01
+ -8.727655437525563e-02
+ 1.582944868848389e-02
+
+ 48 1.778664708376763e-05
+ -7.078586608014144e-07
+ 7.078586608014144e-02
+ 3.920723301882219e-05
+ 2.564634021780585e-01
+ -7.788305033150851e-02
+ 1.593255151302921e-02
+
+ 49 1.838664708376763e-05
+ -6.818025309915621e-07
+ 6.818025309915621e-02
+ 2.788835904166550e-05
+ 2.178356320976079e-01
+ -6.883330624805746e-02
+ 1.552640761946806e-02
+
+ 50 1.898664708376763e-05
+ -6.567055147578671e-07
+ 6.567055147578671e-02
+ 1.983668298288980e-05
+ 1.841127879600602e-01
+ -6.031062641991226e-02
+ 1.475741378469763e-02
+
+ 51 1.958664708376763e-05
+ -6.325323108102860e-07
+ 6.325323108102860e-02
+ 1.410934680022455e-05
+ 1.548924830204021e-01
+ -5.243129198065891e-02
+ 1.374792257730501e-02
+
+ 52 2.018664708376763e-05
+ -6.092489157117236e-07
+ 6.092489157117236e-02
+ 1.003548599883787e-05
+ 1.297462940285003e-01
+ -4.525823594528676e-02
+ 1.259683280234207e-02
+
+ 53 2.078664708376763e-05
+ -5.868225768176519e-07
+ 5.868225768176519e-02
+ 7.137810396578103e-06
+ 1.082413673531994e-01
+ -3.881335316739105e-02
+ 1.138146154193364e-02
+
+ 54 2.138664708376763e-05
+ -5.652217466261597e-07
+ 5.652217466261597e-02
+ 5.076773907606561e-06
+ 8.995609902455238e-02
+ -3.308815413377584e-02
+ 1.016011499551744e-02
+
+ 55 2.198664708376763e-05
+ -5.444160386317303e-07
+ 5.444160386317303e-02
+ 3.610835627798982e-06
+ 7.449092524094347e-02
+ -2.805268412856604e-02
+ 8.974939299801216e-03
+
+ 56 2.258664708376763e-05
+ -5.243761847070938e-07
+ 5.243761847070938e-02
+ 2.568179429164067e-06
+ 6.147519509948855e-02
+ -2.366276900703495e-02
+ 7.854766820299426e-03
+
+ 57 2.318664708376763e-05
+ -5.050739940006428e-07
+ 5.050739940006428e-02
+ 1.826590813882191e-06
+ 5.057099452538205e-02
+ -1.986573288373220e-02
+ 6.817778296350529e-03
+
+ 58 2.378664708376763e-05
+ -4.864823133176291e-07
+ 4.864823133176291e-02
+ 1.299139640224724e-06
+ 4.147467043621487e-02
+ -1.660477643856061e-02
+ 5.873879622379742e-03
+
+ 59 2.438664708376763e-05
+ -4.685749889436973e-07
+ 4.685749889436973e-02
+ 9.239944550735617e-07
+ 3.391668122982992e-02
+ -1.382221958359773e-02
+ 5.026748086095437e-03
+
+ 60 2.498664708376763e-05
+ -4.513268298648920e-07
+ 4.513268298648920e-02
+ 6.571766090561239e-07
+ 2.766028273321528e-02
+ -1.146180858163665e-02
+ 4.275540948961647e-03
+
+ 61 2.558664708376763e-05
+ -4.347135723367043e-07
+ 4.347135723367043e-02
+ 4.674059248624993e-07
+ 2.249945299326918e-02
+ -9.470272664280982e-03
+ 3.616283467240179e-03
+
+ 62 2.618664708376763e-05
+ -4.187118457546762e-07
+ 4.187118457546762e-02
+ 3.324343204742104e-07
+ 1.825636722182948e-02
+ -7.798294060677664e-03
+ 3.042967402610447e-03
+
+ 63 2.678664708376763e-05
+ -4.032991397798472e-07
+ 4.032991397798472e-02
+ 2.364379187533634e-07
+ 1.477865648663428e-02
+ -6.401031800939865e-03
+ 2.548397723334224e-03
+
+ 64 2.738664708376763e-05
+ -3.884537726735109e-07
+ 3.884537726735109e-02
+ 1.681620986362542e-07
+ 1.193661985373132e-02
+ -5.238316132556311e-03
+ 2.124826894723649e-03
+
+ 65 2.798664708376763e-05
+ -3.741548607971242e-07
+ 3.741548607971242e-02
+ 1.196021236620889e-07
+ 9.620508435261841e-03
+ -4.274608378947787e-03
+ 1.764414688058854e-03
+
+ 66 2.858664708376763e-05
+ -3.603822892346797e-07
+ 3.603822892346797e-02
+ 8.506472355827554e-08
+ 7.737959636137384e-03
+ -3.478801396862400e-03
+ 1.459548113681114e-03
+
+ 67 2.918664708376763e-05
+ -3.471166834963309e-07
+ 3.471166834963309e-02
+ 6.050063999733899e-08
+ 6.211629169891409e-03
+ -2.823918805423725e-03
+ 1.203051836793585e-03
+
+ 68 2.978664708376763e-05
+ -3.343393822635298e-07
+ 3.343393822635298e-02
+ 4.302990054439619e-08
+ 4.977045512498984e-03
+ -2.286756907007834e-03
+ 9.883148941077680e-04
+
+ 69 3.000000000000000e-05
+ -3.299106422685328e-07
+ 3.299106422685328e-02
+ 3.815817763877542e-08
+ 4.599101705030142e-03
+ -2.120719628045261e-03
+ 9.210101886687262e-04
+
diff --git a/test/data b/test/data
new file mode 100644
index 00000000..71fb1627
--- /dev/null
+++ b/test/data
@@ -0,0 +1,9 @@
+This directory contains some examples/test-cases for different
+features of gawk - mostly not present in an old awk. Some are from
+"The GAWK Manual", some are original, and some are mixture of the two.
+Read header comments before attempting to use. Have fun and remember
+that program which consists only of BEGIN block does not need an input
+file.
+
+ --mj
+
diff --git a/test/fieldwidths.good b/test/fieldwidths.good
new file mode 100644
index 00000000..51b40081
--- /dev/null
+++ b/test/fieldwidths.good
@@ -0,0 +1 @@
+345
diff --git a/test/fontdata.txt b/test/fontdata.txt
new file mode 100644
index 00000000..b2601237
--- /dev/null
+++ b/test/fontdata.txt
@@ -0,0 +1,120 @@
+@
+@ Data file for awk program genscrpt.awk which generates gulam
+@ script for creation of bitmap TeX fonts.
+@ Edit this file to your needs - each line starting with @ is ignored
+@ unless it is in a form '@ fonts' or '@ magstep'. The rest should be quite
+@ obvious.
+@
+@ basic fonts - all magsteps
+@ fonts
+cmb10
+cmbx10
+cmbx5
+cmbx7
+cmcsc10
+cmex10
+cmmi10
+cmmi5
+cmmi7
+cmr10
+cmr5
+cmr7
+cmsl10
+cmss10
+cmssbx10
+cmsy10
+cmsy5
+cmsy7
+cmti10
+cmtt10
+@ magstep
+0 0.5 1 2 3 4 5
+@ other fonts only in magsteps 0, 0.5 and 1
+@ fonts
+cmbsy10
+cmbx12
+cmbx6
+cmbx7
+cmbx8
+cmbx9
+cmbxsl10
+cmbxti10
+cmdunh10
+cmff10
+cmfi10
+cmfib8
+cmitt10
+cmmi12
+cmmi6
+cmmi8
+cmmi9
+cmmib10
+cmr12
+cmr17
+cmr6
+cmr8
+cmr9
+cmsl12
+cmsl8
+cmsl9
+cmsltt10
+cmss12
+cmss17
+cmss8
+cmss9
+cmssdc10
+cmssi10
+cmssi12
+cmssi17
+cmssi8
+cmssi9
+cmssq8
+cmssqi8
+cmsy6
+cmsy8
+cmsy9
+cmtcsc10
+cmtex10
+cmtex8
+cmtex9
+cmti12
+cmti7
+cmti8
+cmti9
+cmtt12
+cmtt8
+cmtt9
+cmu10
+cmvtt10
+@ magstep
+0 0.5 1
+@ specials
+@ fonts
+logo10
+@ magstep
+0 0.5 1
+@ fonts
+cminch
+@ magstep
+0
+@ LaTeX fonts
+lasy10
+lasy5
+lasy7
+lasyb10
+@ magstep
+0 0.5 1 2 3 4 5
+@ These fonts should not use cmbase
+@ fonts
+circle10
+circlew10
+line10
+linew10
+@ magstep
+0 0.5 1 2 3 4 5
+@ fonts
+lasy6
+lasy8
+lasy9
+@ magstep
+0 0.5 1
diff --git a/test/fsbs.good b/test/fsbs.good
new file mode 100644
index 00000000..8d04f961
--- /dev/null
+++ b/test/fsbs.good
@@ -0,0 +1 @@
+1 2
diff --git a/test/fsbs.in b/test/fsbs.in
new file mode 100644
index 00000000..0a102c32
--- /dev/null
+++ b/test/fsbs.in
@@ -0,0 +1 @@
+1\2
diff --git a/test/fsrs.awk b/test/fsrs.awk
new file mode 100644
index 00000000..a0014891
--- /dev/null
+++ b/test/fsrs.awk
@@ -0,0 +1,8 @@
+BEGIN {
+ RS=""; FS="\n";
+ ORS=""; OFS="\n";
+ }
+{
+ split ($2,f," ")
+ print $0;
+}
diff --git a/test/fsrs.good b/test/fsrs.good
new file mode 100644
index 00000000..7dafd658
--- /dev/null
+++ b/test/fsrs.good
@@ -0,0 +1,5 @@
+a b
+c d
+e f1 2
+3 4
+5 6 \ No newline at end of file
diff --git a/test/fsrs.in b/test/fsrs.in
new file mode 100644
index 00000000..4b49d81c
--- /dev/null
+++ b/test/fsrs.in
@@ -0,0 +1,7 @@
+a b
+c d
+e f
+
+1 2
+3 4
+5 6
diff --git a/test/fstabplus b/test/fstabplus
new file mode 100644
index 00000000..748a44f4
--- /dev/null
+++ b/test/fstabplus
@@ -0,0 +1,2 @@
+BEGIN { FS = "\t+" }
+ { print $1, $2 }
diff --git a/test/fstabplus.good b/test/fstabplus.good
new file mode 100644
index 00000000..8d04f961
--- /dev/null
+++ b/test/fstabplus.good
@@ -0,0 +1 @@
+1 2
diff --git a/test/getline.awk b/test/getline.awk
new file mode 100644
index 00000000..f4e413f9
--- /dev/null
+++ b/test/getline.awk
@@ -0,0 +1 @@
+BEGIN { while( getline > 0) { print } }
diff --git a/test/getline.good b/test/getline.good
new file mode 100644
index 00000000..9b7f2b90
--- /dev/null
+++ b/test/getline.good
@@ -0,0 +1,2 @@
+BEGIN { while( getline > 0) { print } }
+BEGIN { while( getline > 0) { print } }
diff --git a/test/header.awk b/test/header.awk
new file mode 100644
index 00000000..2066c829
--- /dev/null
+++ b/test/header.awk
@@ -0,0 +1,5 @@
+BEGIN{
+ "date" | getline cur_time
+ close ("date")
+ print "This line printed on", cur_time
+}
diff --git a/test/igncfs.awk b/test/igncfs.awk
new file mode 100644
index 00000000..ebb58b24
--- /dev/null
+++ b/test/igncfs.awk
@@ -0,0 +1,8 @@
+BEGIN {
+ IGNORECASE=1
+ FS="[^a-z]+"
+}
+{
+ for (i=1; i<NF; i++) printf "%s, ", $i
+ printf "%s\n", $NF
+}
diff --git a/test/igncfs.good b/test/igncfs.good
new file mode 100644
index 00000000..41df9a4c
--- /dev/null
+++ b/test/igncfs.good
@@ -0,0 +1,2 @@
+this, is, handled, ok
+This, is, Not, hanDLed, Well
diff --git a/test/igncfs.in b/test/igncfs.in
new file mode 100644
index 00000000..55980172
--- /dev/null
+++ b/test/igncfs.in
@@ -0,0 +1,2 @@
+this is handled ok
+This is Not hanDLed Well
diff --git a/test/ignorecase.good b/test/ignorecase.good
new file mode 100644
index 00000000..d66e95ca
--- /dev/null
+++ b/test/ignorecase.good
@@ -0,0 +1 @@
+xz
diff --git a/test/include.awk b/test/include.awk
new file mode 100644
index 00000000..a506a813
--- /dev/null
+++ b/test/include.awk
@@ -0,0 +1,13 @@
+# input file should have lines which start with "@incl" followed by
+# a name of a file to include
+{
+ if ((NF == 2) && ($1 == "@incl")) {
+ print " -- included file -- ", $2
+ while ((getline line < $2) > 0)
+ print line
+ close ($2)
+ printf "\t***\n"
+ } else {
+ print
+ }
+}
diff --git a/test/inftest.awk b/test/inftest.awk
new file mode 100644
index 00000000..ec0eda13
--- /dev/null
+++ b/test/inftest.awk
@@ -0,0 +1,5 @@
+BEGIN {
+ x = 100
+ do { y = x ; x *= 1000; print x,y } while ( y != x )
+ print "loop terminated"
+}
diff --git a/test/inftest.good b/test/inftest.good
new file mode 100644
index 00000000..83a93d01
--- /dev/null
+++ b/test/inftest.good
@@ -0,0 +1,105 @@
+100000 100
+100000000 100000
+1e+11 100000000
+1e+14 1e+11
+1e+17 1e+14
+1e+20 1e+17
+1e+23 1e+20
+1e+26 1e+23
+1e+29 1e+26
+1e+32 1e+29
+1e+35 1e+32
+1e+38 1e+35
+1e+41 1e+38
+1e+44 1e+41
+1e+47 1e+44
+1e+50 1e+47
+1e+53 1e+50
+1e+56 1e+53
+1e+59 1e+56
+1e+62 1e+59
+1e+65 1e+62
+1e+68 1e+65
+1e+71 1e+68
+1e+74 1e+71
+1e+77 1e+74
+1e+80 1e+77
+1e+83 1e+80
+1e+86 1e+83
+1e+89 1e+86
+1e+92 1e+89
+1e+95 1e+92
+1e+98 1e+95
+1e+101 1e+98
+1e+104 1e+101
+1e+107 1e+104
+1e+110 1e+107
+1e+113 1e+110
+1e+116 1e+113
+1e+119 1e+116
+1e+122 1e+119
+1e+125 1e+122
+1e+128 1e+125
+1e+131 1e+128
+1e+134 1e+131
+1e+137 1e+134
+1e+140 1e+137
+1e+143 1e+140
+1e+146 1e+143
+1e+149 1e+146
+1e+152 1e+149
+1e+155 1e+152
+1e+158 1e+155
+1e+161 1e+158
+1e+164 1e+161
+1e+167 1e+164
+1e+170 1e+167
+1e+173 1e+170
+1e+176 1e+173
+1e+179 1e+176
+1e+182 1e+179
+1e+185 1e+182
+1e+188 1e+185
+1e+191 1e+188
+1e+194 1e+191
+1e+197 1e+194
+1e+200 1e+197
+1e+203 1e+200
+1e+206 1e+203
+1e+209 1e+206
+1e+212 1e+209
+1e+215 1e+212
+1e+218 1e+215
+1e+221 1e+218
+1e+224 1e+221
+1e+227 1e+224
+1e+230 1e+227
+1e+233 1e+230
+1e+236 1e+233
+1e+239 1e+236
+1e+242 1e+239
+1e+245 1e+242
+1e+248 1e+245
+1e+251 1e+248
+1e+254 1e+251
+1e+257 1e+254
+1e+260 1e+257
+1e+263 1e+260
+1e+266 1e+263
+1e+269 1e+266
+1e+272 1e+269
+1e+275 1e+272
+1e+278 1e+275
+1e+281 1e+278
+1e+284 1e+281
+1e+287 1e+284
+1e+290 1e+287
+1e+293 1e+290
+1e+296 1e+293
+1e+299 1e+296
+1e+302 1e+299
+1e+305 1e+302
+1e+308 1e+305
+Inf 1e+308
+Inf Inf
+loop terminated
diff --git a/test/lastnpages b/test/lastnpages
new file mode 100644
index 00000000..0acb7738
--- /dev/null
+++ b/test/lastnpages
@@ -0,0 +1,47 @@
+From nstn.ns.ca!news.cs.indiana.edu!news.nd.edu!spool.mu.edu!uunet!elroy.jpl.nasa.gov!swrinde!zaphod.mps.ohio-state.edu!uakari.primate.wisc.edu!dali.cs.montana.edu!milton!uw-beaver!fluke!ssc-vax!brennan Mon May 6 23:41:40 ADT 1991
+Article: 26492 of comp.unix.questions
+Path: cs.dal.ca!nstn.ns.ca!news.cs.indiana.edu!news.nd.edu!spool.mu.edu!uunet!elroy.jpl.nasa.gov!swrinde!zaphod.mps.ohio-state.edu!uakari.primate.wisc.edu!dali.cs.montana.edu!milton!uw-beaver!fluke!ssc-vax!brennan
+From: brennan@ssc-vax.UUCP (Michael D Brennan)
+Newsgroups: comp.unix.questions
+Subject: Re: How to print last <n> pages of a file
+Message-ID: <3948@ssc-bee.ssc-vax.UUCP>
+Date: 6 May 91 15:42:00 GMT
+Article-I.D.: ssc-bee.3948
+Organization: Boeing Aerospace & Electronics, Seattle WA
+Lines: 33
+
+
+The following shell & (new) awk program prints the last n pages.
+
+If you get more than 65 lines to a page, the program that inserts
+the ^L's should be fixed.
+
+-------------------------------------------------------------
+#!/bin/sh
+# usage: lastpages -- prints 1 page reads stdin
+# lastpages n -- prints n pages reads stdin
+# lastpages n files -- prints n pages, reads file list
+
+program='BEGIN{RS = ORS = "\f" }
+
+
+{ page[NR] = $0
+ if ( NR > numpages ) delete page[NR-numpages]
+}
+
+END {
+ i = NR - numpages + 1
+ if ( i <= 0 ) i = 1
+
+ while( i <= NR ) print page[i++]
+}'
+
+
+case $# in
+0) awk "$program" numpages=1 - ;;
+1) awk "$program" numpages=$1 - ;;
+*) pages=$1 ; shift
+ awk "$program" numpages=$pages $* ;;
+esac
+
+
diff --git a/test/lib/awkpath.awk b/test/lib/awkpath.awk
new file mode 100644
index 00000000..6663ca4b
--- /dev/null
+++ b/test/lib/awkpath.awk
@@ -0,0 +1 @@
+BEGIN { print "Found it." }
diff --git a/test/longwrds.awk b/test/longwrds.awk
new file mode 100644
index 00000000..f6a7816d
--- /dev/null
+++ b/test/longwrds.awk
@@ -0,0 +1,20 @@
+# From Gawk Manual modified by bug fix and removal of punctuation
+# Record every word which is used at least once
+{
+ for (i = 1; i <= NF; i++) {
+ tmp = tolower($i)
+ if (0 != (pos = match(tmp, /([a-z]|-)+/)))
+ used[substr(tmp, pos, RLENGTH)] = 1
+ }
+}
+
+#Find a number of distinct words longer than 10 characters
+END {
+ num_long_words = 0
+ for (x in used)
+ if (length(x) > 10) {
+ ++num_long_words
+ print x
+ }
+ print num_long_words, "long words"
+}
diff --git a/test/longwrds.good b/test/longwrds.good
new file mode 100644
index 00000000..01faa847
--- /dev/null
+++ b/test/longwrds.good
@@ -0,0 +1,21 @@
+20 long words
+compatibility
+concatenated
+consistency
+definitions
+description
+distributing
+fistatements
+gawk-options
+gnu-specific
+identically
+implementation
+implementations
+information
+non-portable
+pattern-action
+pre-defined
+program-file
+program-text
+programming
+restrictions
diff --git a/test/manpage b/test/manpage
new file mode 100644
index 00000000..09c39485
--- /dev/null
+++ b/test/manpage
@@ -0,0 +1,200 @@
+.ds PX \s-1POSIX\s+1
+.ds UX \s-1UNIX\s+1
+.ds AN \s-1ANSI\s+1
+.TH GAWK 1 "May 28 1991" "Free Software Foundation" "Utility Commands"
+.SH NAME
+gawk \- pattern scanning and processing language
+.SH SYNOPSIS
+.B gawk
+[
+.B \-W
+.I gawk-options
+] [
+.BI \-F\^ fs
+] [
+.B \-v
+.IR var = val
+]
+.B \-f
+.I program-file
+[
+.B \-\^\-
+] file .\^.\^.
+.br
+.B gawk
+[
+.B \-W
+.I gawk-options
+] [
+.BI \-F\^ fs
+] [
+.B \-v
+.IR var = val
+] [
+.B \-\^\-
+]
+.I program-text
+file .\^.\^.
+.SH DESCRIPTION
+.I Gawk
+is the GNU Project's implementation of the AWK programming language.
+It conforms to the definition of the language in
+the \*(PX 1003.2 Command Language And Utilities Standard
+(draft 11).
+This version in turn is based on the description in
+.IR "The AWK Programming Language" ,
+by Aho, Kernighan, and Weinberger,
+with the additional features defined in the System V Release 4 version
+of \*(UX
+.IR awk .
+.I Gawk
+also provides some GNU-specific extensions.
+.PP
+The command line consists of options to
+.I gawk
+itself, the AWK program text (if not supplied via the
+.B \-f
+option), and values to be made
+available in the
+.B ARGC
+and
+.B ARGV
+pre-defined AWK variables.
+.SH OPTIONS
+.PP
+.I Gawk
+accepts the following options, which should be available on any implementation
+of the AWK language.
+.TP
+.BI \-F fs
+Use
+.I fs
+for the input field separator (the value of the
+.B FS
+predefined
+variable).
+.TP
+\fB\-v\fI var\fR\^=\^\fIval\fR
+Assign the value
+.IR val ,
+to the variable
+.IR var ,
+before execution of the program begins.
+Such variable values are available to the
+.B BEGIN
+block of an AWK program.
+.TP
+.BI \-f " program-file"
+Read the AWK program source from the file
+.IR program-file ,
+instead of from the first command line argument.
+Multiple
+.B \-f
+options may be used.
+.TP
+.B \-\^\-
+Signal the end of options. This is useful to allow further arguments to the
+AWK program itself to start with a ``\-''.
+This is mainly for consistency with the argument parsing convention used
+by most other \*(PX programs.
+.PP
+Following the \*(PX standard,
+.IR gawk -specific
+options are supplied via arguments to the
+.B \-W
+option. Multiple
+.B \-W
+options may be supplied, or multiple arguments may be supplied together
+if they are separated by commas, or enclosed in quotes and separated
+by white space.
+Case is ignored in arguments to the
+.B \-W
+option.
+.PP
+The
+.B \-W
+option accepts the following arguments:
+.TP \w'\fBcopyright\fR'u+1n
+.B compat
+Run in
+.I compatibility
+mode. In compatibility mode,
+.I gawk
+behaves identically to \*(UX
+.IR awk ;
+none of the GNU-specific extensions are recognized.
+.TP
+.PD 0
+.B copyleft
+.TP
+.PD
+.B copyright
+Print the short version of the GNU copyright information message on
+the error output.
+.TP
+.B lint
+Provide warnings about constructs that are
+dubious or non-portable to other AWK implementations.
+.TP
+.B posix
+This turns on
+.I compatibility
+mode, with the following additional restrictions:
+.RS
+.TP \w'\(bu'u+1n
+\(bu
+.B \ex
+escape sequences are not recognized.
+.TP
+\(bu
+The synonym
+.B func
+for the keyword
+.B function
+is not recognized.
+.TP
+\(bu
+The operators
+.B **
+and
+.B **=
+cannot be used in place of
+.B ^
+and
+.BR ^= .
+.RE
+.TP
+.B version
+Print version information for this particular copy of
+.I gawk
+on the error output.
+This is useful mainly for knowing if the current copy of
+.I gawk
+on your system
+is up to date with respect to whatever the Free Software Foundation
+is distributing.
+.PP
+Any other options are flagged as illegal, but are otherwise ignored.
+.SH AWK PROGRAM EXECUTION
+.PP
+An AWK program consists of a sequence of pattern-action statements
+and optional function definitions.
+.RS
+.PP
+\fIpattern\fB { \fIaction statements\fB }\fR
+.br
+\fBfunction \fIname\fB(\fIparameter list\fB) { \fIstatements\fB }\fR
+.RE
+.PP
+.I Gawk
+first reads the program source from the
+.IR program-file (s)
+if specified, or from the first non-option argument on the command line.
+The
+.B \-f
+option may be used multiple times on the command line.
+.I Gawk
+will read the program text as if all the
+.IR program-file s
+had been concatenated together. This is useful for building libraries
+of AWK functions, without having to include them in each new AWK
diff --git a/test/manyfiles.awk b/test/manyfiles.awk
new file mode 100644
index 00000000..212e88d7
--- /dev/null
+++ b/test/manyfiles.awk
@@ -0,0 +1 @@
+{ print $2 > "junk/" $1 }
diff --git a/test/messages.awk b/test/messages.awk
new file mode 100644
index 00000000..555f6e38
--- /dev/null
+++ b/test/messages.awk
@@ -0,0 +1,9 @@
+# This is a demo of different ways of printing with gawk. Try it
+# with and without -c (compatibility) flag, redirecting output
+# from gawk to a file or not. Some results can be quite unexpected.
+BEGIN {
+ print "Goes to a file out1" > "out1"
+ print "Normal print statement"
+ print "This printed on stdout" > "/dev/stdout"
+ print "You blew it!" > "/dev/stderr"
+}
diff --git a/test/negexp.good b/test/negexp.good
new file mode 100644
index 00000000..6e6566ce
--- /dev/null
+++ b/test/negexp.good
@@ -0,0 +1 @@
+0.01
diff --git a/test/nfset.awk b/test/nfset.awk
new file mode 100644
index 00000000..09ebd083
--- /dev/null
+++ b/test/nfset.awk
@@ -0,0 +1 @@
+{ NF = 5 ; print }
diff --git a/test/nfset.good b/test/nfset.good
new file mode 100644
index 00000000..3ba48aec
--- /dev/null
+++ b/test/nfset.good
@@ -0,0 +1,5 @@
+1 2
+1 2 3 4
+1 2 3 4 5
+1 2 3 4 5
+1
diff --git a/test/nfset.in b/test/nfset.in
new file mode 100644
index 00000000..43329b51
--- /dev/null
+++ b/test/nfset.in
@@ -0,0 +1,5 @@
+1 2
+1 2 3 4
+1 2 3 4 5
+1 2 3 4 5 6 7 8
+1
diff --git a/test/numfunc.awk b/test/numfunc.awk
new file mode 100644
index 00000000..de1d7a4d
--- /dev/null
+++ b/test/numfunc.awk
@@ -0,0 +1,19 @@
+BEGIN {
+ y = 8
+ x = 1
+ while (x < 256) {
+ print "arctan", y/x, atan2(y , x)
+ x += x
+ }
+ print ""
+ pi8 = atan2(1, 1) / 2
+ arg = 0
+ for (i = 0; i <= 8; i++) {
+ print "cos sin", arg, cos(arg), sin(arg)
+ arg += pi8
+ }
+ print ""
+ for (i = -5; i<= 5; i++) {
+ print "exp log", i, exp(i), log(exp(i))
+ }
+}
diff --git a/test/out1.good b/test/out1.good
new file mode 100644
index 00000000..f54b2b4d
--- /dev/null
+++ b/test/out1.good
@@ -0,0 +1 @@
+Goes to a file out1
diff --git a/test/out2.good b/test/out2.good
new file mode 100644
index 00000000..66b7d2f7
--- /dev/null
+++ b/test/out2.good
@@ -0,0 +1,2 @@
+Normal print statement
+This printed on stdout
diff --git a/test/out3.good b/test/out3.good
new file mode 100644
index 00000000..7eb822ff
--- /dev/null
+++ b/test/out3.good
@@ -0,0 +1 @@
+You blew it!
diff --git a/test/plus-minus b/test/plus-minus
new file mode 100644
index 00000000..9fec4bff
--- /dev/null
+++ b/test/plus-minus
@@ -0,0 +1,8 @@
+{
+ if ($1 == "-")
+ print "minus"
+ if ($1 == "+")
+ print "plus"
+ if (($1 != "-") && ($1 != "+"))
+ print "something else"
+}
diff --git a/test/posix b/test/posix
new file mode 100755
index 00000000..79474f30
--- /dev/null
+++ b/test/posix
@@ -0,0 +1,69 @@
+BEGIN {
+ a = "+2"; b = 2; c = "+2a"; d = "+2 "; e = " 2"
+
+ printf "Test #1: "
+ if (b == a) print "\"" a "\"" " compares as a number"
+ else print "\"" a "\"" " compares as a string"
+
+ printf "Test #2: "
+ if (b == c) print "\"" c "\"" " compares as a number"
+ else print "\"" c "\"" " compares as a string"
+
+ printf "Test #3: "
+ if (b == d) print "\"" d "\"" " compares as a number"
+ else print "\"" d "\"" " compares as a string"
+
+ printf "Test #4: "
+ if (b == e) print "\"" e "\"" " compares as a number"
+ else print "\"" e "\"" " compares as a string"
+
+ f = a + b + c + d + e
+ print "after addition"
+
+ printf "Test #5: "
+ if (b == a) print "\"" a "\"" " compares as a number"
+ else print "\"" a "\"" " compares as a string"
+
+ printf "Test #6: "
+ if (b == c) print "\"" c "\"" " compares as a number"
+ else print "\"" c "\"" " compares as a string"
+
+ printf "Test #7: "
+ if (b == d) print "\"" d "\"" " compares as a number"
+ else print "\"" d "\"" " compares as a string"
+
+ printf "Test #8: "
+ if (b == e) print "\"" e "\"" " compares as a number"
+ else print "\"" e "\"" " compares as a string"
+
+ printf "Test #9: "
+ if ("3e5" > "5") print "\"3e5\" > \"5\""
+ else print "\"3e5\" <= \"5\""
+
+ printf "Test #10: "
+ x = 32.14
+ y[x] = "test"
+ OFMT = "%e"
+ print y[x]
+
+ printf "Test #11: "
+ x = x + 0
+ print y[x]
+
+ printf "Test #12: "
+ OFMT="%f"
+ CONVFMT="%e"
+ print 1.5, 1.5 ""
+
+ printf "Test #13: "
+ if ( 1000000 "" == 1000001 "") print "match"
+ else print "nomatch"
+}
+{
+ printf "Test #14: "
+ FS = ":"
+ print $1
+ FS = ","
+ printf "Test #15: "
+ print $2
+}
diff --git a/test/posix.good b/test/posix.good
new file mode 100644
index 00000000..100b1505
--- /dev/null
+++ b/test/posix.good
@@ -0,0 +1,16 @@
+Test #1: "+2" compares as a string
+Test #2: "+2a" compares as a string
+Test #3: "+2 " compares as a string
+Test #4: " 2" compares as a string
+after addition
+Test #5: "+2" compares as a string
+Test #6: "+2a" compares as a string
+Test #7: "+2 " compares as a string
+Test #8: " 2" compares as a string
+Test #9: "3e5" <= "5"
+Test #10: test
+Test #11: test
+Test #12: 1.500000 1.500000e+00
+Test #13: nomatch
+Test #14: 1:2,3
+Test #15: 4
diff --git a/test/pound-bang b/test/pound-bang
new file mode 100755
index 00000000..a1c24d2b
--- /dev/null
+++ b/test/pound-bang
@@ -0,0 +1,3 @@
+#! /tmp/gawk -f
+ { ccount += length($0) }
+END { print "average line length is", ccount/NR}
diff --git a/test/pound-bang.good b/test/pound-bang.good
new file mode 100644
index 00000000..a94f6a99
--- /dev/null
+++ b/test/pound-bang.good
@@ -0,0 +1 @@
+average line length is 29.6667
diff --git a/test/rand.awk b/test/rand.awk
new file mode 100644
index 00000000..08f9894e
--- /dev/null
+++ b/test/rand.awk
@@ -0,0 +1,6 @@
+BEGIN {
+ srand()
+ for (i = 0; i < 19; i++)
+ printf "%3d ", (1 + int(100 * rand()))
+ print ""
+}
diff --git a/test/reg/exp-eq.awk b/test/reg/exp-eq.awk
new file mode 100644
index 00000000..fed6a694
--- /dev/null
+++ b/test/reg/exp-eq.awk
@@ -0,0 +1 @@
+{ $0 ^= 3 ; print $1}
diff --git a/test/reg/exp-eq.good b/test/reg/exp-eq.good
new file mode 100644
index 00000000..d8d59aa0
--- /dev/null
+++ b/test/reg/exp-eq.good
@@ -0,0 +1,3 @@
+1
+8
+27
diff --git a/test/reg/exp-eq.in b/test/reg/exp-eq.in
new file mode 100644
index 00000000..01e79c32
--- /dev/null
+++ b/test/reg/exp-eq.in
@@ -0,0 +1,3 @@
+1
+2
+3
diff --git a/test/reg/exp.awk b/test/reg/exp.awk
new file mode 100644
index 00000000..4e707f89
--- /dev/null
+++ b/test/reg/exp.awk
@@ -0,0 +1 @@
+BEGIN { print exp(0), exp(1000000), exp(0.5) }
diff --git a/test/reg/exp.good b/test/reg/exp.good
new file mode 100644
index 00000000..07b88537
--- /dev/null
+++ b/test/reg/exp.good
@@ -0,0 +1,2 @@
+1 gawk: reg/exp.awk:1: warning: exp argument 1e+06 is out of range
+Inf 1.64872
diff --git a/test/reg/exp.in b/test/reg/exp.in
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/test/reg/exp.in
diff --git a/test/reg/func.awk b/test/reg/func.awk
new file mode 100644
index 00000000..e32cd4e6
--- /dev/null
+++ b/test/reg/func.awk
@@ -0,0 +1 @@
+BEGIN { print dummy(1) }
diff --git a/test/reg/func.good b/test/reg/func.good
new file mode 100644
index 00000000..d3c7c715
--- /dev/null
+++ b/test/reg/func.good
@@ -0,0 +1 @@
+gawk: reg/func.awk:1: fatal: function `dummy' not defined
diff --git a/test/reg/func.in b/test/reg/func.in
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/test/reg/func.in
diff --git a/test/reg/func2.awk b/test/reg/func2.awk
new file mode 100644
index 00000000..2abf2c10
--- /dev/null
+++ b/test/reg/func2.awk
@@ -0,0 +1,2 @@
+function dummy() { ; }
+BEGIN { print dummy (1) }
diff --git a/test/reg/func2.good b/test/reg/func2.good
new file mode 100644
index 00000000..ae87bc3d
--- /dev/null
+++ b/test/reg/func2.good
@@ -0,0 +1,2 @@
+gawk: reg/func2.awk:2: fatal: function `dummy' called with space between name and (,
+or used in other expression context
diff --git a/test/reg/func2.in b/test/reg/func2.in
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/test/reg/func2.in
diff --git a/test/reg/log.awk b/test/reg/log.awk
new file mode 100644
index 00000000..bcae90b8
--- /dev/null
+++ b/test/reg/log.awk
@@ -0,0 +1 @@
+BEGIN { print log(0), log(-1), log(100) }
diff --git a/test/reg/log.good b/test/reg/log.good
new file mode 100644
index 00000000..857ab770
--- /dev/null
+++ b/test/reg/log.good
@@ -0,0 +1,4 @@
+log: SING error
+-Inf gawk: reg/log.awk:1: warning: log called with negative argument -1
+log: DOMAIN error
+NaN 4.60517
diff --git a/test/reg/log.in b/test/reg/log.in
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/test/reg/log.in
diff --git a/test/regtest b/test/regtest
new file mode 100755
index 00000000..72b0dbf9
--- /dev/null
+++ b/test/regtest
@@ -0,0 +1,18 @@
+#! /bin/sh
+
+case "$AWK" in
+"") AWK=../gawk ;;
+esac
+#AWK=${AWK:-../gawk}
+
+for i in reg/*.awk
+do
+ it=`basename $i .awk`
+ $AWK -f $i <reg/$it.in >reg/$it.out 2>&1
+ if cmp -s reg/$it.out reg/$it.good
+ then
+ rm -f reg/$it.out
+ else
+ echo "regtest: $it fails"
+ fi
+done
diff --git a/test/reparse.awk b/test/reparse.awk
new file mode 100644
index 00000000..433ecbb2
--- /dev/null
+++ b/test/reparse.awk
@@ -0,0 +1,7 @@
+{
+ gsub(/x/, " ")
+ $0 = $0
+ print $1
+ print $0
+ print $1, $2, $3
+}
diff --git a/test/reparse.good b/test/reparse.good
new file mode 100644
index 00000000..6bdfacfa
--- /dev/null
+++ b/test/reparse.good
@@ -0,0 +1,3 @@
+1
+1 a b c 2
+1 a b
diff --git a/test/reparse.in b/test/reparse.in
new file mode 100644
index 00000000..6f31cde8
--- /dev/null
+++ b/test/reparse.in
@@ -0,0 +1 @@
+1 axbxc 2
diff --git a/test/reverse.awk b/test/reverse.awk
new file mode 100644
index 00000000..c6b2e299
--- /dev/null
+++ b/test/reverse.awk
@@ -0,0 +1,13 @@
+#this program creates palindromic output - slightly modified from Gawk Manual
+{
+ rev($0, length)
+}
+
+function rev(str, len) {
+ if (len == 0) {
+ print " ", $0
+ return
+ }
+ printf "%c", substr(str, len, 1)
+ rev(str, len - 1)
+}
diff --git a/test/rs.data b/test/rs.data
new file mode 100644
index 00000000..edef835e
--- /dev/null
+++ b/test/rs.data
@@ -0,0 +1,15 @@
+
+
+a
+b
+
+
+c d
+
+
+
+e
+
+
+
+
diff --git a/test/rs.good b/test/rs.good
new file mode 100644
index 00000000..7e14cc14
--- /dev/null
+++ b/test/rs.good
@@ -0,0 +1,4 @@
+a b
+c d
+e
+
diff --git a/test/splitargv.awk b/test/splitargv.awk
new file mode 100644
index 00000000..10886ef2
--- /dev/null
+++ b/test/splitargv.awk
@@ -0,0 +1,7 @@
+BEGIN {
+ for (idx = 1; idx < ARGC; idx++)
+ split(ARGV[idx], temp, ".");
+ }
+ {
+ print $0;
+ }
diff --git a/test/splitargv.good b/test/splitargv.good
new file mode 100644
index 00000000..10886ef2
--- /dev/null
+++ b/test/splitargv.good
@@ -0,0 +1,7 @@
+BEGIN {
+ for (idx = 1; idx < ARGC; idx++)
+ split(ARGV[idx], temp, ".");
+ }
+ {
+ print $0;
+ }
diff --git a/test/splitargv.in b/test/splitargv.in
new file mode 100644
index 00000000..10886ef2
--- /dev/null
+++ b/test/splitargv.in
@@ -0,0 +1,7 @@
+BEGIN {
+ for (idx = 1; idx < ARGC; idx++)
+ split(ARGV[idx], temp, ".");
+ }
+ {
+ print $0;
+ }
diff --git a/test/sqrt.awk b/test/sqrt.awk
new file mode 100644
index 00000000..c9d8f512
--- /dev/null
+++ b/test/sqrt.awk
@@ -0,0 +1,4 @@
+BEGIN {
+ for (i = 0; i <= 25; i++)
+ printf "gawk sez -- square root of %2d is %15.12f\n", i, sqrt(i)
+}
diff --git a/test/swaplns.awk b/test/swaplns.awk
new file mode 100644
index 00000000..6bf2240a
--- /dev/null
+++ b/test/swaplns.awk
@@ -0,0 +1,7 @@
+{
+ if ((getline tmp) > 0) {
+ print tmp
+ print
+ } else
+ print
+}
diff --git a/test/swaplns.good b/test/swaplns.good
new file mode 100644
index 00000000..d38b7caa
--- /dev/null
+++ b/test/swaplns.good
@@ -0,0 +1,9 @@
+features of gawk - mostly not present in an old awk. Some are from
+This directory contains some examples/test-cases for different
+Read header comments before attempting to use. Have fun and remember
+"The GAWK Manual", some are original, and some are mixture of the two.
+file.
+that program which consists only of BEGIN block does not need an input
+ --mj
+
+
diff --git a/test/up_down.awk b/test/up_down.awk
new file mode 100644
index 00000000..32ab847c
--- /dev/null
+++ b/test/up_down.awk
@@ -0,0 +1,15 @@
+{
+ lim = split ($0, line)
+ out = ""
+ if (lim > 0) {
+ i = 0
+ while (i < lim) {
+ i++
+ if (i % 2)
+ out = out sprintf("%s ", toupper(line[i]))
+ else
+ out = out sprintf("%s ", tolower(line[i]))
+ }
+ }
+ print out
+}
diff --git a/test/zap_cpp.awk b/test/zap_cpp.awk
new file mode 100644
index 00000000..99a5a1f4
--- /dev/null
+++ b/test/zap_cpp.awk
@@ -0,0 +1,13 @@
+# this will remove (comment out) all preprocessor traces from
+# cpp produced files:
+# run this awk program as follows
+# awk -f zap_cpp.awk <file>
+# end redirect output where you want it to
+NF > 0 {
+ if ($1 ~ /^#/)
+ print "/*", $0, "*/"
+ else
+ print
+}
+
+
diff --git a/version.c b/version.c
index 046fbf22..0c365f85 100644
--- a/version.c
+++ b/version.c
@@ -1,4 +1,4 @@
-char *version_string = "@(#)Gnu Awk (gawk) 2.13";
+char *version_string = "@(#)Gnu Awk (gawk) 2.14";
/* 1.02 fixed /= += *= etc to return the new Left Hand Side instead
of the Right Hand Side */
@@ -39,3 +39,5 @@ char *version_string = "@(#)Gnu Awk (gawk) 2.13";
/* 2.13 Public release of 2.12 */
+/* 2.14 Mostly bug fixes. */
+
diff --git a/vms/descrip.mms b/vms/descrip.mms
index 040d458a..e22df05e 100644
--- a/vms/descrip.mms
+++ b/vms/descrip.mms
@@ -10,14 +10,14 @@
# enable the following ".first" rule and its associated action. For
# GNU C, change the LIBS macro definition.
#
-# awk_tab.c :
-# If you have DEC/Shell, change the PARSER and PASERINIT macros to use
-# yacc rather than bison. If you have neither yacc nor bison, you'll
-# have to make sure that the distributed version of "awk.tab.c" is
-# named "awk_tab.c" and that its modification date is later than the
-# date of "awk.y", so that MMS won't try to build that target. If you
-# use bison and it is already defined system-wide, comment out the
-# PARSERINIT definition.
+# awktab.c :
+# If you don't have bison but do have VMS POSIX or DEC/Shell,
+# change the PARSER and PASERINIT macros to use yacc. If you don't
+# have either yacc or bison, you'll have to make sure that the
+# distributed version of "awktab.c" has its modification date later
+# than the date of "awk.y", so that MMS won't try to build that
+# target. If you use bison and it is already defined system-wide,
+# comment out the PARSERINIT definition.
#
# install.help :
# You can make the target 'install.help' to load the VMS help text
@@ -27,7 +27,7 @@
#
# gawk.dvi :
# If you have TeX, you can make the target 'gawk.dvi' to process
-# _The_GAWK_Manual_ from gawk.texinfo. You'll need to use a device
+# _The_GAWK_Manual_ from gawk.texi. You'll need to use a device
# specific post-processor on gawk.dvi in order to get printable data.
#
@@ -40,11 +40,15 @@ MAKEFILE = $(VMSDIR)Descrip.MMS
# work within the main directory, even when handling files in [.vms]
# note: use 2nd variant for either VAX C V2.x or for GNU C
-CFLAGS = /Include=[]/Object=[]/Opt=noInline $(CCFLAGS)
-#CFLAGS = /Include=([],$(VMSDIR))/Object=[] $(CCFLAGS)
+CFLAGS = /Include=[]/Object=[]/Opt=noInline/Define="GAWK" $(CCFLAGS)
+#CFLAGS = /Include=([],$(VMSDIR))/Object=[]/Define="GAWK" $(CCFLAGS)
# uncomment this for GNU C
#CC = gcc
+# beta VAX/VMS -> Alpha/VMS cross-compiler
+#CC = gemcc/Standard=VAXC/G_Float
+# Alpha/VMS
+#CC = cc/Standard=VAXC/G_Float
# uncomment these two lines for GNU C _if_ it's not installed system-wide
#.first !compiler init, needed if there's no system-wide setup
@@ -60,10 +64,12 @@ CFLAGS = /Include=[]/Object=[]/Opt=noInline $(CCFLAGS)
# run-time libraries; use the 2nd one for GNU C
LIBS = sys$share:vaxcrtl.exe/Shareable
#LIBS = gnu_cc:[000000]gcclib.olb/Library,sys$library:vaxcrtl.olb/Library
+#LIBS = # DECC$SHR instead of VAXCRTL; for Alpha/VMS (or VMS V6.x?)
PARSER = bison
PARSERINIT = set command gnu_bison:[000000]bison
#PARSER = yacc
+#PARSERINIT = yacc := posix/run/path=posix """/bin/yacc"
#PARSERINIT = yacc := $shell$exe:yacc
# this is used for optional target 'install.help'
@@ -79,9 +85,9 @@ ALLOCA = alloca.obj
# object files
AWKOBJS = main.obj,eval.obj,builtin.obj,msg.obj,iop.obj,io.obj,\
- field.obj,array.obj,node.obj,version.obj,missing.obj,re.obj
+ field.obj,array.obj,node.obj,version.obj,missing.obj,re.obj,getopt.obj
-ALLOBJS = $(AWKOBJS),awk_tab.obj
+ALLOBJS = $(AWKOBJS),awktab.obj
# GNUOBJS
# GNU stuff that gawk uses as library routines.
@@ -101,8 +107,8 @@ VMSOTHR = $(VMSDIR)Descrip.MMS,$(VMSDIR)vmsbuild.com,$(VMSDIR)version.com,\
$(VMSDIR)gawk.hlp
# Release of gawk
-REL=2.13
-PATCHLVL=2
+REL=2.14
+PATCHLVL=0
# dummy target to allow building "gawk" in addition to explicit "gawk.exe"
gawk : gawk.exe
@@ -120,7 +126,8 @@ gawk.opt : $(MAKEFILE) # create linker options file
@ write opt "$(VMSOBJS)"
@ write opt "$(LIBS)"
@ write opt "psect_attr=environ,noshr !extern [noshare] char **"
- @ write opt "stack=50 !preallocate more pages (default is 20)"
+ @ write opt "stack=48 !preallocate more pages (default is 20)"
+ @ write opt "iosegment=128 !ditto (default is 32)"
write opt "identification=""V$(REL).$(PATCHLVL)"""
close opt
@@ -134,18 +141,20 @@ vms_gawk.obj : $(VMSDIR)vms_gawk.c
vms_cli.obj : $(VMSDIR)vms_cli.c
dfa.obj : awk.h config.h dfa.h
regex.obj : awk.h config.h regex.h
+getopt.obj : getopt.h
main.obj : patchlevel.h
-awk_tab.obj : awk.h awk_tab.c
+awktab.obj : awk.h awktab.c
# bison or yacc required
-awk_tab.c : awk.y # foo.y :: yacc => y_tab.c, bison => foo_tab.c
- @- if f$search("y_tab.c").nes."" then delete y_tab.c;*
+awktab.c : awk.y # foo.y :: yacc => y[_]tab.c, bison => foo_tab.c
+ @- if f$search("ytab.c") .nes."" then delete ytab.c;* !POSIX yacc
+ @- if f$search("y_tab.c") .nes."" then delete y_tab.c;* !DEC/Shell yacc
+ @- if f$search("awk_tab.c").nes."" then delete awk_tab.c;* !bison
- $(PARSERINIT)
$(PARSER) $(YFLAGS) $<
- @- if f$search("y_tab.c").nes."" then rename/new_vers y_tab.c $@ !yacc
-
-##version.c : version.sh $(MAKEFILE)
-## @$(VMSDIR)version.com "$(REL)"
+ @- if f$search("ytab.c") .nes."" then rename/new_vers ytab.c $@
+ @- if f$search("y_tab.c") .nes."" then rename/new_vers y_tab.c $@
+ @- if f$search("awk_tab.c").nes."" then rename/new_vers awk_tab.c $@
config.h : [.config]vms-conf.h
copy $< $@
@@ -155,7 +164,7 @@ alloca.obj : alloca.c
$(CC) $(CFLAGS) /define=("STACK_DIRECTION=(-1)","exit=vms_exit") $<
$(VMSCMD) : $(VMSDIR)gawk.cld
- set command/object=$@ $(CLDFLAGS) $<
+ set command $(CLDFLAGS)/object=$@ $<
# special target for loading the help text into a VMS help library
install.help : $(VMS)gawk.hlp
@@ -175,16 +184,16 @@ spotless : clean tidy
#
# build gawk.dvi from within the 'support' subdirectory
#
-gawk.dvi : [.support]texindex.exe gawk.texinfo
+gawk.dvi : [.support]texindex.exe gawk.texi
@ set default [.support]
@ write sys$output " Warnings from TeX are expected during the first pass"
- TeX [-]gawk.texinfo
+ TeX [-]gawk.texi
mcr []texindex gawk.cp gawk.fn gawk.ky gawk.pg gawk.tp gawk.vr
@ write sys$output " Second pass"
- TeX [-]gawk.texinfo
+ TeX [-]gawk.texi
mcr []texindex gawk.cp gawk.fn gawk.ky gawk.pg gawk.tp gawk.vr
@ write sys$output " Third (final) pass"
- TeX [-]gawk.texinfo
+ TeX [-]gawk.texi
-@ purge
-@ delete gawk.lis;,.aux;,gawk.%%;,.cps;,.fns;,.kys;,.pgs;,.toc;,.tps;,.vrs;
@ rename/new_vers gawk.dvi [-]*.*
diff --git a/vms/gawk.hlp b/vms/gawk.hlp
index 660e0353..c8f4b453 100644
--- a/vms/gawk.hlp
+++ b/vms/gawk.hlp
@@ -1,6 +1,7 @@
! Gawk.Hlp
! Pat Rankin, Jun'90
! revised, Jun'91
+! revised, Jul'92
! Online help for GAWK.
!
1 GAWK
@@ -30,6 +31,8 @@
program. However, GAWK is written in 'C' and the C Run-Time Library
(VAXCRTL) converts unquoted text into *lowercase*. Therefore, the
-Fval and -W options must be enclosed in quotes.
+
+ Note: under VMS POSIX, the usual shell command line processing occurs.
3 options
-f file use the specified file as the awk program source; if more
than one instance of -f is used, each file will be read
@@ -57,7 +60,7 @@
(since awk's 'print' statement includes the trailing 'newline').
On VMS, to include a quote character inside of a quoted string, two
- successive quotes ("") must be used.
+ successive quotes ("") must be used. (Not necessary for VMS POSIX.)
3 data_files
After all dash-options are examined, and after the program text if
there were no occurrences of the -f option, remaining (space separated)
@@ -80,7 +83,7 @@
perform some file manipulation from the command line:
<ifile open file 'ifile' (readonly) as 'stdin' [SYS$INPUT]
- >nfile create 'nfile' at 'stdout' [SYS$OUTPUT], in stream-lf format
+ >nfile create 'nfile' as 'stdout' [SYS$OUTPUT], in stream-lf format
>>ofile append to 'ofile' for 'stdout'; create it if necessary
>&efile point 'stderr' [SYS$ERROR] at 'efile', but don't open it yet
>$vfile create 'vfile' as 'stdout', using RMS attributes appropriate
@@ -93,6 +96,10 @@
>>$vfile incorrect; would be interpreted as file "$vfile" in stream-lf
format rather than as file "vfile" in RMS 'text' format
| error; command line pipes not supported
+
+ Note: under VMS POSIX these features are implemented by the shell
+ rather than inside GAWK, so consult the shell documentation for
+ specific details.
3 wildcard_expansion
The command parsing in the VMS implementation of GAWK does some
emulation of a UN*X-style shell, where certain characters on the
@@ -109,6 +116,11 @@
found, those filenames are put into the command line in place of the
original pattern. If no matching files are found, the original
pattern is left in place.
+
+ Note: under VMS POSIX wildcard expansion, or "file globbing", is
+ performed by the shell rather than inside GAWK, so consult the shell
+ documentation for details. In particular, the last sentence of the
+ previous paragraph does not apply.
2 DCL_syntax
GAWK's DCL-style interface is more or less a standard DCL command, with
one required parameter. Multiple values--when present--are separated
@@ -120,6 +132,8 @@
Usage: GAWK /COMMANDS="awk program text" data_file[,data_file,...]
or GAWK /INPUT=awk_file data_file[,"Var=value",data_file,...]
( or GAWK /INPUT=(awk_file1,awk_file2,...) data_file[,...] )
+
+ Not applicable under VMS POSIX.
3 Parameter
data_file[,datafile,...] (data_file data_file ...)
data_file[,"Var=value",...,data_file,...] (data_file Var=value &c)
@@ -536,9 +550,10 @@
false (instead of actually re-evaluating it). In this case, the
increment-expression of a for-loop is also skipped.
- Both 'break' and 'continue' are only allowed within a loop ('for',
- 'while', or 'do-while'), and in nested loops they only apply to the
- innermost loop.
+ 'break' is only allowed within a loop ('for', 'while', or
+ 'do-while'). If 'continue' is used outside of a loop, it is
+ treated like 'next' (see action-controls). Inside nested loops,
+ both 'break' and 'continue' only apply to the innermost loop.
4 action-controls
There are two special statements for controlling statement execution.
The 'next' statement, when executed, causes the rest of the current
@@ -546,6 +561,10 @@
the next input record will be immediately processed. This is useful
if any early action knows that the current record will fail all the
remaining patterns; skipping those rules will reduce processing time.
+ An extended form, 'next file', is also available. It causes the
+ remainder of the current file to be skipped, and then either the
+ next input file will be processed, if any, or the END action will be
+ performed. 'next file' is not available in traditional awk.
The 'exit' statement causes GAWK execution to terminate. All open
files are closed, and no further processing is done. The END rule,
@@ -1031,10 +1050,10 @@
incorporated into the official GNU distribution of version 2.13 in
Spring 1991. (Version 2.12 was never publically released.)
2 release_notes
- GAWK 2.13 tested under VMS V5.3 and V5.4-2, May, 1991; compatible with
- VMS versions V4.6 and later. Current source code compatible with DEC's
- VAXC v3.x and v2.4 or v2.3; also compiles successfully with GNUC (GNU's
- gcc).
+ GAWK 2.14 tested under VMS V5.5, July, 1992; compatible with VMS
+ versions V4.6 and later. Current source code compatible with DEC's
+ VAXC v3.x and v2.4 or v2.3; also compiles successfully with GNUC
+ (GNU's gcc). VMS POSIX uses c89 and requires VAXC V3.x.
3 AWK_LIBRARY
GAWK uses a built in search path when looking for a program file
specified by the -f option (or the /input qualifier) when that file
@@ -1042,9 +1061,12 @@
look in the current default directory, then if the file wasn't found
it will look in the directory specified by the translation of logical
name "AWK_LIBRARY".
+
+ Not applicable under VMS POSIX.
3 known_problems
There are several known problems with GAWK running on VMS. Some can
- be ignored, others require work-arounds.
+ be ignored, others require work-arounds. Note: GAWK in the VMS POSIX
+ environment does not have these problems.
4 command_line_parsing
The command
gawk "program text"
@@ -1117,6 +1139,21 @@
failure. The final exit status will be 1 (VMS success) if 0 is
used, or even (VMS non-success) if non-zero is used.
3 changes
+ Changes between version 2.14 and 2.13.2:
+
+ General
+ 'next file' construct added
+ 'continue' outside of any loop is treated as 'next'
+ Assorted bug fixes and efficiency improvements
+ _The_GAWK_Manual_ updated
+ Test suite expanded
+
+ VMS-specific
+ VMS POSIX support added
+ Disk I/O throughput enhanced
+ Pipe emulation improved and incorrect interaction with user-mode
+ redefinition of SYS$OUTPUT eliminated
+3 prior_changes
Changes between version 2.13 and 2.11.1: (2.12 was not released)
General
diff --git a/vms/unixlib.h b/vms/unixlib.h
index 17d99706..24fadce1 100644
--- a/vms/unixlib.h
+++ b/vms/unixlib.h
@@ -20,5 +20,8 @@ char *ecvt(), *fcvt(), *gcvt();
int getpid(), getppid();
-unsigned getgid(), getuid(), getegid(), geteuid();
+unsigned getuid();
+#ifndef _stdlib_h /* gcc's stdlib.h has these with conflicting types */
+unsigned getgid(), getegid(), geteuid();
+#endif
int setgid(), setuid(); /* no-ops */
diff --git a/vms/vms.h b/vms/vms.h
index 378adba2..f2a180a2 100644
--- a/vms/vms.h
+++ b/vms/vms.h
@@ -59,10 +59,10 @@ extern u_long LIB$FIND_FILE_END P((void *));
extern u_long LIB$GET_EF P((long *));
extern u_long SYS$ASSIGN P((const Dsc *, short *, long, const Dsc *));
extern u_long SYS$DASSGN P((short));
-extern u_long SYS$QIO P((long, short, long, void *, const void *, long,
+extern u_long SYS$QIO P((u_long, u_long, u_long, void *, void (*)(), u_long,
const char *, int, int, u_long, int, int));
extern u_long SYS$SYNCH P((long, void *));
-#endif !NO_TTY_FWRITE
+#endif /*!NO_TTY_FWRITE*/
/* system services for logical name manipulation */
extern u_long SYS$TRNLNM P((const u_long *,const Dsc *,const Dsc *,
const unsigned char *,Itm *));
@@ -75,7 +75,7 @@ extern void v_add_arg P((int, const char *));
extern void vms_exit P((int));
extern char *vms_strerror P((int));
extern char *vms_strdup P((const char *));
-extern int vms_devopen P((const char *));
+extern int vms_devopen P((const char *,int));
extern int vms_execute P((const char *, const char *, const char *));
extern int vms_gawk P((void));
extern u_long Cli_Present P((const char *));
diff --git a/vms/vms_args.c b/vms/vms_args.c
index b317d8d0..1ba179a3 100644
--- a/vms/vms_args.c
+++ b/vms/vms_args.c
@@ -69,6 +69,13 @@
* file-spec is left in the argument list rather than having it expand
* into thin air. No attempt is made to identify and make $(var)
* environment substitutions--must draw the line somewhere!
+ *
+ * Oct'91, gawk 2.13.3
+ * Open '<' with full sharing allowed, so that we can read batch logs
+ * and other open files. Create record-format output ('>$') with read
+ * sharing permited, so that others can read our output file to check
+ * progess. For stream output ('>' or '>>'), sharing is disallowed
+ * (for performance reasons).
*/
#include "awk.h" /* really "../awk.h" */
@@ -91,14 +98,14 @@ void
vms_arg_fixup( int *pargc, char ***pargv )
{
char *f_in, *f_out, *f_err,
- *out_mode, *rms_opt1, *rms_opt2;
+ *out_mode, *rms_opt1, *rms_opt2, *rms_opt3, *rms_opt4;
char **argv = *pargv;
int i, argc = *pargc;
int err_to_out_redirect = 0, out_to_err_redirect = 0;
-#ifndef NO_CHECK_SHELL
+#ifdef CHECK_DECSHELL /* don't define this if linking with DECC$SHR */
if (shell$is_shell())
- return; /* don't do anything if we're running DECshell */
+ return; /* don't do anything if we're running DEC/Shell */
#endif
#ifndef NO_DCL_CMD
for (i = 1; i < argc ; i++) /* check for dash or other non-VMS args */
@@ -109,11 +116,13 @@ vms_arg_fixup( int *pargc, char ***pargv )
v_argz = v_argc = 0, v_argv = NULL;
}
#endif
- v_add_arg(v_argc = 0, basename(argv[0])); /* store arg #0 (image name) */
+ v_add_arg(v_argc = 0, argv[0]); /* store arg #0 (image name) */
f_in = f_out = f_err = NULL; /* stdio setup (no filenames yet) */
out_mode = "w"; /* default access for stdout */
rms_opt1 = rms_opt2 = "ctx=stm"; /* ("context = stream") == no-opt */
+ rms_opt3 = "shr=nil"; /* no sharing (for '>' output file) */
+ rms_opt4 = "mrs=0"; /* maximum record size */
for (i = 1; i < argc; i++) {
char *p, *fn;
@@ -147,9 +156,10 @@ vms_arg_fixup( int *pargc, char ***pargv )
else if (*p == '&') /* '>&' => stderr */
is_out = 0, p++;
else if (*p == '$') /* '>$' => kludge for record format */
- rms_opt1 = "rfm=var", rms_opt2 = "rat=cr", p++;
+ rms_opt1 = "rfm=var", rms_opt2 = "rat=cr",
+ rms_opt3 = "shr=get", rms_opt4 = "mrs=32767", p++;
else /* '>' => create */
- ; /* use default values initialized prior to loop */
+ {} /* use default values initialized prior to loop */
p = skipblanks(p);
fn = (*p ? p : argv[++i]); /* use next arg if necessary */
if (i >= argc || *fn == '-') {
@@ -197,8 +207,9 @@ vms_arg_fixup( int *pargc, char ***pargv )
if (*(p+1) == '\0' && i == argc - 1) {
fatal("background tasks not available ('&' encountered)");
break;
- } else /* fall through */
+ } else { /* fall through */
; /*NOBREAK*/
+ }
case '-': /* argument */
is_arg = 1; /*(=> skip wildcard check)*/
default: /* other (filespec assumed) */
@@ -239,24 +250,23 @@ ordinary_arg:
else
(void) vms_define("SYS$ERROR", f_err);
}
- /* do stdin before stdout, so we bomb we won't create empty output file */
+ /* do stdin before stdout, so if we bomb we won't make empty output file */
if (f_in) { /* [re]open file and define logical name */
- stdin = freopen(f_in, "r", stdin, "mbf=2");
+ stdin = freopen(f_in, "r", stdin,
+ "ctx=rec", "shr=get,put,del,upd",
+ "mrs=32767", "mbc=24", "mbf=2");
if (stdin != NULL)
(void) vms_define("SYS$INPUT", f_in);
else
fatal("<%s (%s)", f_in, strerror(errno));
}
- if (f_out) { /* disallow file sharing to reduce overhead */
+ if (f_out) {
stdout = freopen(f_out, out_mode, stdout,
- rms_opt1, rms_opt2, "shr=nil", "mbf=2"); /*VAXCRTL*/
- if (stdout != NULL) {
-#ifdef crtl_bug /* eof sometimes doesn't get set properly for stm_lf file */
-# define BIGBUF 8*BUFSIZ /* maximum record size: 4096 instead of 512 */
- setvbuf(stdout, malloc(BIGBUF), _IOFBF, BIGBUF);
-#endif
+ rms_opt1, rms_opt2, rms_opt3, rms_opt4,
+ "mbc=24", "mbf=2");
+ if (stdout != NULL)
(void) vms_define("SYS$OUTPUT", f_out);
- } else
+ else
fatal(">%s%s (%s)", (*out_mode == 'a' ? ">" : ""),
f_out, strerror(errno));
}
@@ -342,8 +352,6 @@ v_add_arg( int idx, const char *val )
fatal("%s: %s: can't allocate memory (%s)", "vms_args",
"v_argv", strerror(errno));
} else {
- memmsg((oldsize == 0 ? "v_argv" : "re: v_argv"), v_argz,
- "vms_args", v_argv);
while (old_size < v_argz) v_argv[old_size++] = NULL;
}
}
diff --git a/vms/vms_cli.c b/vms/vms_cli.c
index e4e33404..2f475b11 100644
--- a/vms/vms_cli.c
+++ b/vms/vms_cli.c
@@ -7,6 +7,9 @@
#define P(foo) ()
#include "config.h" /* in case we want to suppress 'const' &c */
#include "vms.h"
+#ifndef _STRING_H
+#include <string.h>
+#endif
extern u_long CLI$PRESENT(const Dsc *);
extern u_long CLI$GET_VALUE(const Dsc *, Dsc *, short *);
diff --git a/vms/vms_fwrite.c b/vms/vms_fwrite.c
index 94c345a5..6f2637b3 100644
--- a/vms/vms_fwrite.c
+++ b/vms/vms_fwrite.c
@@ -42,8 +42,8 @@ static int find_c( const char *s, int n, char c ) {
#define is_stdout(file_no) ((file_no) == 1) /* fileno(stdout) */
#define is_stderr(file_no) ((file_no) == 2) /* fileno(stderr) */
-#define PREFIX_CR (0x8D << 16) /* leading carriage return */
-#define POSTFIX_CR (0x8D << 24) /* trailing carriage return (=> lf/cr) */
+#define PREFIX_CR 0x008D0000 /* leading carriage return */
+#define POSTFIX_CR 0x8D000000 /* trailing carriage return (=> lf/cr) */
static short channel[_NFILE] = {0};
static FILE *prev_file = 0;
@@ -121,7 +121,7 @@ tty_fwrite( const void *buf, size_t size, size_t number, FILE *file )
if (vmsfail(sts)) break;
}
/* queue an asynchronous write */
- sts = SYS$QIO(evfn, chan, io_func, &iosb, (u_long (*)())0, 0,
+ sts = SYS$QIO(evfn, chan, io_func, &iosb, (void (*)())0, 0L,
pt, pos, 0, cc_fmt, 0, 0);
if (vmsfail(sts)) break; /*(should never happen)*/
pt += pos, count -= pos;
diff --git a/vms/vms_gawk.c b/vms/vms_gawk.c
index ec4747d3..552139bd 100644
--- a/vms/vms_gawk.c
+++ b/vms/vms_gawk.c
@@ -43,6 +43,8 @@
#define Get_Value(arg,buf,siz) vmswork(Cli_Get_Value(arg,buf,siz))
extern void gawk_cmd(); /* created with $ SET COMMAND/OBJECT */
+#define GAWK_CMD ((const void *)gawk_cmd)
+extern void _exit(int);
static int vms_usage(int);
#define ARG_SIZ 250
@@ -77,7 +79,7 @@ vms_gawk()
command, so we'll now attempt to generate a command from the
foreign command string and parse that.
*/
- sts = Cli_Parse_Command(gawk_cmd, COMMAND_NAME);
+ sts = Cli_Parse_Command(GAWK_CMD, COMMAND_NAME);
if (vmswork(sts))
sts = Cli_Present("GAWK_P1");
}
diff --git a/vms/vms_misc.c b/vms/vms_misc.c
index c7044348..58231ff4 100644
--- a/vms/vms_misc.c
+++ b/vms/vms_misc.c
@@ -23,7 +23,13 @@
* the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#include "awk.h" /* really "../awk.h" */
+#define creat creat_dummy /* one of gcc-vms's headers has bad prototype */
+#include "awk.h"
+#undef creat
+#include <fab.h>
+#ifndef O_RDONLY
+#include <fcntl.h>
+#endif
#include <ssdef.h>
#include <stsdef.h>
@@ -36,9 +42,9 @@
# undef exit
#endif
void
-vms_exit( int errno ) /* note: local override of global 'errno' */
+vms_exit( int final_status )
{
- exit(errno == 0 ? SS$_NORMAL : (SS$_ABORT | STS$M_INHIB_MSG));
+ exit(final_status == 0 ? SS$_NORMAL : (SS$_ABORT | STS$M_INHIB_MSG));
}
#define exit(v) vms_exit(v)
@@ -55,7 +61,7 @@ vms_exit( int errno ) /* note: local override of global 'errno' */
char *
vms_strerror( int errnum )
{
- extern char *strerror( /* int, ... */ );
+ extern char *strerror P((int,...));
return ( errnum != EVMSERR ? strerror(errnum)
: strerror(EVMSERR, vaxc$errno) );
}
@@ -93,21 +99,53 @@ unlink( const char *file_spec ) {
}
/*
+ * Work-around an open(O_CREAT+O_TRUNC) bug (screwed up modification
+ * and creation dates when new version is created), and also use some
+ * VMS-specific file options. Note: optional 'prot' arg is completely
+ * ignored; gawk doesn't need it.
+ */
+#ifdef open
+# undef open
+#endif
+/* vms_open() - open a file, possibly creating it */
+int
+vms_open( const char *name, int mode, ... )
+{
+ extern int creat P((const char *,int,...));
+ extern int open P((const char *,int,unsigned,...));
+
+ if (mode == (O_WRONLY|O_CREAT|O_TRUNC))
+ return creat(name, 0, "shr=nil", "mbc=24");
+ else {
+ struct stat stb;
+ const char *mbc, *shr = "shr=get";
+
+ if (stat(name, &stb) < 0) { /* assume DECnet */
+ mbc = "mbc=8";
+ } else { /* ordinary file; allow full sharing iff record format */
+ mbc = "mbc=12";
+ if (stb.st_fab_rfm < FAB$C_STM) shr = "shr=get,put,upd";
+ }
+ return open(name, mode, 0, shr, mbc, "mbf=2");
+ }
+}
+
+ /*
* Check for attempt to (re-)open known file.
*/
/* vms_devopen() - check for "SYS$INPUT" or "SYS$OUTPUT" or "SYS$ERROR" */
int
-vms_devopen( const char *name )
+vms_devopen( const char *name, int mode )
{
FILE *file = NULL;
if (strncasecmp(name, "SYS$", 4) == 0) {
name += 4; /* skip "SYS$" */
- if (strncasecmp(name, "INPUT", 5) == 0)
+ if (strncasecmp(name, "INPUT", 5) == 0 && (mode & O_WRONLY) == 0)
file = stdin, name += 5;
- else if (strncasecmp(name, "OUTPUT", 6) == 0)
+ else if (strncasecmp(name, "OUTPUT", 6) == 0 && (mode & O_WRONLY) != 0)
file = stdout, name += 6;
- else if (strncasecmp(name, "ERROR", 5) == 0)
+ else if (strncasecmp(name, "ERROR", 5) == 0 && (mode & O_WRONLY) != 0)
file = stderr, name += 5;
if (*name == ':') name++; /* treat trailing colon as optional */
}
@@ -132,9 +170,9 @@ void tzset()
# ifdef bcopy
# undef bcopy
# endif
-void bcopy( char *src, char *dst, int len )
+void bcopy( const char *src, char *dst, int len )
{
- (void) OTS$MOVE3(len, src, dst);
+ (void) memcpy(dst, src, len);
}
#endif /*!__GNUC__*/
diff --git a/vms/vms_popen.c b/vms/vms_popen.c
index 654364c4..b962d01d 100644
--- a/vms/vms_popen.c
+++ b/vms/vms_popen.c
@@ -54,7 +54,7 @@ fork()
return -1;
}
-#else PIPES_SIMULATED
+#else /*PIPES_SIMULATED*/
/*
* Simulate pipes using temporary files; hope that the user
* doesn't expect pipe i/o to be interleaved with other i/o ;-}.
@@ -116,7 +116,7 @@ popen( const char *command, const char *mode )
/* an input pipe reads a temporary file created by the command */
vms_execute(command, (char *)0, name); /* 'command >tempfile' */
}
- if ((current = fopen(name, mode, "mbf=2")) == NULL) {
+ if ((current = fopen(name, mode, "mbc=24", "mbf=2")) == NULL) {
free(name);
return NULL;
}
@@ -344,6 +344,6 @@ restore_translation( const Dsc *logname, const Itm *itemlist )
}
}
-#endif /* PIPES_SIMULATED */
+#endif /*PIPES_SIMULATED*/
#endif /*!NO_VMS_PIPES*/
diff --git a/vms/vmsbuild.com b/vms/vmsbuild.com
index e823eb1b..069a4ca8 100644
--- a/vms/vmsbuild.com
+++ b/vms/vmsbuild.com
@@ -1,34 +1,40 @@
$! vmsbuild.com -- Commands to build GAWK Pat Rankin, Dec'89
$! revised, Mar'90
$! gawk 2.13 revised, Jun'91
+$! gawk 2.14 revised, Sep'92
$!
-$ REL = "2.13" !release version number
-$ PATCHLVL = "2"
+$ REL = "2.14" !release version number
+$ PATCHLVL = "0"
$!
-$! [ remove "/optimize=noinline" for VAX C V2.x ]
+$! [ remove "/optimize=noinline" for VAX C V2.x or DEC C ]
+$! [ add "/standard=VAXC" for DEC C and "/g_float" for Alpha ]
$ if f$type(cc) .nes."STRING" then cc := cc/nolist/optimize=noinline
$ if f$type(link).nes."STRING" then link := link/nomap
+$ if f$type(set_command).nes."STRING" then set_command := set command
$!
-$ cc := 'cc'/include=[]
+$ cc := 'cc'/Include=[]/Define="""GAWK"""
$ libs = "sys$share:vaxcrtl.exe/Shareable"
$
+$! uncomment for DEC C
+$ ! libs = ""
+$
$! uncomment the next two lines for VAX C V2.x
$ ! define vaxc$library sys$library:,sys$disk:[.vms]
$ ! define c$library [],[.vms]
$!
$! uncomment next two lines for GNU C
-$ ! cc := gcc/include=([],[.vms]) !use GNU C rather than VAX C
+$ ! cc := gcc/Include=([],[.vms])/Define="""GAWK""" !use GNU C rather than VAX C
$ ! libs = "gnu_cc:[000000]gcclib.olb/Library,sys$library:vaxcrtl.olb/Library"
$!
-$ if f$search("config.h") .eqs."" then copy [.config]vms-conf.h []config.h
-$ if f$search("awk_tab.c").nes."" then goto awk_tab_ok
+$ if f$search("config.h").eqs."" then copy [.config]vms-conf.h []config.h
+$ if f$search("awktab.c").nes."" then goto awktab_ok
$ write sys$output " You must process `awk.y' with ""yacc"" or ""bison"""
-$ if f$search("awk.tab_c").nes."" then - !unpacked with poor 'tar' reader
- write sys$output " or else rename `awk.tab_c' to `awk_tab.c'."
-$ if f$search("y_tab.c").nes."" then - !yacc was run manually
- write sys$output " or else rename `y_tab.c' to `awk_tab.c'."
+$ if f$search("awk_tab.c").nes."" then - !bison was run manually
+ write sys$output " or else rename `awk_tab.c' to `awktab.c'."
+$ if f$search("ytab.c").nes."" .or. f$search("y_tab.c").nes."" then - !yacc
+ write sys$output " or else rename `ytab.c' or `y_tab.c' to `awktab.c'."
$ exit
-$awk_tab_ok:
+$awktab_ok:
$ cc main.c
$ cc eval.c
$ cc builtin.c
@@ -40,9 +46,10 @@ $ cc array.c
$ cc node.c
$ cc version.c
$ cc missing.c
-$ cc awk_tab.c
-$ cc regex.c
$ cc re.c
+$ cc getopt.c
+$ cc awktab.c
+$ cc regex.c
$ cc dfa.c
$ cc/define=("STACK_DIRECTION=(-1)","exit=vms_exit") alloca
$ cc [.vms]vms_misc.c
@@ -51,17 +58,18 @@ $ cc [.vms]vms_fwrite.c
$ cc [.vms]vms_args.c
$ cc [.vms]vms_gawk.c
$ cc [.vms]vms_cli.c
-$ set command/object=[]gawk_cmd.obj [.vms]gawk.cld
+$ set_command/object=[]gawk_cmd.obj [.vms]gawk.cld
$!
$ create gawk.opt
! GAWK -- Gnu AWK
main.obj,eval.obj,builtin.obj,msg.obj,iop.obj,io.obj
-field.obj,array.obj,node.obj,version.obj,missing.obj,awk_tab.obj
-regex.obj,re.obj,dfa.obj,[]alloca.obj
+field.obj,array.obj,node.obj,version.obj,missing.obj
+re.obj,getopt.obj,awktab.obj,regex.obj,dfa.obj,[]alloca.obj
[]vms_misc.obj,vms_popen.obj,vms_fwrite.obj
[]vms_args.obj,vms_gawk.obj,vms_cli.obj,gawk_cmd.obj
psect_attr=environ,noshr !extern [noshare] char **
-stack=50 !preallocate more pages (default is 20)
+stack=48 !preallocate more pages (default is 20)
+iosegment=128 !ditto (default is 32)
$ open/append Fopt gawk.opt
$ write Fopt libs
$ write Fopt "identification=""V''REL'.''PATCHLVL'"""