summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2010-07-16 12:41:09 +0300
committerArnold D. Robbins <arnold@skeeve.com>2010-07-16 12:41:09 +0300
commit8c042f99cc7465c86351d21331a129111b75345d (patch)
tree9656e653be0e42e5469cec77635c20356de152c2
parent8ceb5f934787eb7be5fb452fb39179df66119954 (diff)
downloadgawk-8c042f99cc7465c86351d21331a129111b75345d.tar.gz
Move to gawk-3.0.0.gawk-3.0.0
-rw-r--r--ACKNOWLEDGMENT10
-rw-r--r--COPYING5
-rw-r--r--ChangeLog23
-rw-r--r--FUTURES133
-rw-r--r--INSTALL213
-rw-r--r--LIMITATIONS2
-rw-r--r--Makefile.bsd4414
-rw-r--r--Makefile.cline65
-rw-r--r--Makefile.dec269
-rw-r--r--Makefile.in320
-rw-r--r--NEWS109
-rw-r--r--POSIX.STD (renamed from POSIX)14
-rw-r--r--PROBLEMS4
-rw-r--r--README119
-rw-r--r--README.amiga83
-rw-r--r--README.atari37
-rw-r--r--README.hpux8x18
-rw-r--r--README.pc119
-rw-r--r--README.rs600021
-rw-r--r--README.rt-aos20
-rw-r--r--README.sgi5
-rw-r--r--README.sun386i45
-rw-r--r--README_d/README.FIRST (renamed from README.FIRST)2
-rw-r--r--README_d/README.VMS (renamed from README.VMS)0
-rw-r--r--README_d/README.atari21
-rw-r--r--README_d/README.pc178
-rw-r--r--README_d/README.sgi17
-rw-r--r--README_d/README.sunos48
-rw-r--r--README_d/README.ultrix (renamed from README.ultrix)3
-rw-r--r--README_d/README.yacc (renamed from README.yacc)0
-rw-r--r--acconfig.h34
-rw-r--r--aclocal.m4134
-rw-r--r--alloca.c516
-rw-r--r--alloca.s352
-rw-r--r--amiga/ChangeLog3
-rw-r--r--amiga/gawkmisc.ami124
-rw-r--r--array.c172
-rw-r--r--atari/ChangeLog3
-rw-r--r--atari/Makefile.awklib104
-rw-r--r--atari/Makefile.st240
-rw-r--r--atari/config.h186
-rw-r--r--atari/gawkmisc.atr124
-rw-r--r--atari/mkconf.g18
-rw-r--r--atari/mkscrpt.sed15
-rw-r--r--atari/redirect.h32
-rw-r--r--atari/tmpnam.c4
-rw-r--r--awk.h511
-rw-r--r--awk.y1016
-rw-r--r--awklib/Makefile.in96
-rw-r--r--awklib/eg/data/BBS-list11
-rw-r--r--awklib/eg/data/inventory-shipped17
-rw-r--r--awklib/eg/lib/assert.awk18
-rw-r--r--awklib/eg/lib/ctime.awk11
-rw-r--r--awklib/eg/lib/ftrans.awk15
-rw-r--r--awklib/eg/lib/getopt.awk82
-rw-r--r--awklib/eg/lib/gettime.awk61
-rw-r--r--awklib/eg/lib/grcat.c34
-rw-r--r--awklib/eg/lib/groupawk.in80
-rw-r--r--awklib/eg/lib/join.awk15
-rw-r--r--awklib/eg/lib/mktime.awk106
-rw-r--r--awklib/eg/lib/nextfile.awk15
-rw-r--r--awklib/eg/lib/ord.awk54
-rw-r--r--awklib/eg/lib/passwdawk.in56
-rw-r--r--awklib/eg/lib/pwcat.c29
-rw-r--r--awklib/eg/misc/arraymax.awk10
-rw-r--r--awklib/eg/misc/arraymax.data5
-rw-r--r--awklib/eg/misc/findpat.data7
-rw-r--r--awklib/eg/misc/findpat.sh10
-rw-r--r--awklib/eg/prog/alarm.awk81
-rw-r--r--awklib/eg/prog/awksed.awk31
-rw-r--r--awklib/eg/prog/cut.awk136
-rw-r--r--awklib/eg/prog/dupword.awk16
-rw-r--r--awklib/eg/prog/egrep.awk96
-rw-r--r--awklib/eg/prog/extract.awk72
-rw-r--r--awklib/eg/prog/histsort.awk14
-rw-r--r--awklib/eg/prog/id.awk69
-rw-r--r--awklib/eg/prog/igawk.sh130
-rw-r--r--awklib/eg/prog/labels.awk53
-rw-r--r--awklib/eg/prog/split.awk54
-rw-r--r--awklib/eg/prog/tee.awk38
-rw-r--r--awklib/eg/prog/translate.awk46
-rw-r--r--awklib/eg/prog/uniq.awk116
-rw-r--r--awklib/eg/prog/wc.awk68
-rw-r--r--awklib/eg/prog/wordfreq.awk13
-rw-r--r--awklib/extract.awk87
-rw-r--r--awklib/group.awk80
-rwxr-xr-xawklib/igawk.save120
-rw-r--r--awklib/passwd.awk56
-rw-r--r--awklib/stamp-eg2
-rw-r--r--awktab.c2541
-rw-r--r--builtin.c768
-rw-r--r--config.in304
-rw-r--r--config/apollo6
-rw-r--r--config/atari11
-rw-r--r--config/bsd4216
-rw-r--r--config/bsd4316
-rw-r--r--config/bsd43r3
-rw-r--r--config/bsd43t14
-rw-r--r--config/bsd445
-rw-r--r--config/convex7
-rw-r--r--config/cray9
-rw-r--r--config/cray2-507
-rw-r--r--config/cray2-606
-rw-r--r--config/cray609
-rw-r--r--config/gnu6
-rw-r--r--config/hiosf15
-rw-r--r--config/hiuxwe25
-rw-r--r--config/hpux7.09
-rw-r--r--config/hpux8x5
-rw-r--r--config/ibmrt-aos19
-rw-r--r--config/interactive2.29
-rw-r--r--config/linux3
-rw-r--r--config/lynxos10
-rw-r--r--config/mach9
-rw-r--r--config/msc609
-rw-r--r--config/news6
-rw-r--r--config/next208
-rw-r--r--config/next217
-rw-r--r--config/next307
-rw-r--r--config/osf13
-rw-r--r--config/osf1.dec4
-rw-r--r--config/riscos4527
-rw-r--r--config/rs60006
-rw-r--r--config/sco6
-rw-r--r--config/sequent17
-rw-r--r--config/sgi5
-rw-r--r--config/sgi334
-rw-r--r--config/sgi33.cc5
-rw-r--r--config/sgi4055
-rw-r--r--config/sgi405.cc8
-rw-r--r--config/solaris2.cc7
-rw-r--r--config/sunos38
-rw-r--r--config/sunos407
-rw-r--r--config/sunos414
-rw-r--r--config/sunos41.cc6
-rw-r--r--config/sysv26
-rw-r--r--config/sysv37
-rw-r--r--config/sysv45
-rw-r--r--config/ultrix317
-rw-r--r--config/ultrix402
-rw-r--r--config/ultrix414
-rw-r--r--config/utek18
-rw-r--r--config/v10config.h277
-rw-r--r--config/vms-conf.h323
-rw-r--r--config/vms-posix16
-rw-r--r--configh.in189
-rwxr-xr-xconfigure2483
-rw-r--r--configure.in104
-rw-r--r--custom.h43
-rw-r--r--dfa.c45
-rw-r--r--dfa.h6
-rw-r--r--doc/Makefile.in99
-rw-r--r--doc/gawk.1 (renamed from gawk.1)1006
-rw-r--r--doc/gawk.texi20460
-rw-r--r--doc/igawk.173
-rw-r--r--doc/texinfo.tex (renamed from support/texinfo.tex)752
-rw-r--r--eval.c369
-rw-r--r--field.c390
-rw-r--r--gawk.texi11270
-rw-r--r--gawkmisc.c66
-rw-r--r--getopt.c275
-rw-r--r--getopt.h14
-rw-r--r--getopt1.c2
-rwxr-xr-xinstall-sh238
-rw-r--r--io.c831
-rw-r--r--iop.c324
-rw-r--r--main.c424
-rw-r--r--missing.c72
-rw-r--r--missing/memset.c14
-rw-r--r--missing/strchr.c22
-rw-r--r--missing/strerror.c14
-rw-r--r--missing/strftime.347
-rw-r--r--missing/strftime.c197
-rw-r--r--missing/system.c6
-rw-r--r--missing/tzset.c3
-rwxr-xr-xmkinstalldirs32
-rw-r--r--msg.c60
-rwxr-xr-xmungeconf20
-rw-r--r--node.c82
-rw-r--r--patchlevel.h2
-rw-r--r--pc/ChangeLog3
-rw-r--r--pc/Makefile299
-rw-r--r--pc/Makefile.emx53
-rw-r--r--pc/Makefile.msc68
-rw-r--r--pc/Makefile.os2125
-rw-r--r--pc/Makefile.tst376
-rw-r--r--pc/awklib/igawk85
-rw-r--r--pc/awklib/igawk.awk51
-rw-r--r--pc/awklib/igawk.bat1
-rw-r--r--pc/config.h389
-rw-r--r--pc/gawk-32.def3
-rw-r--r--pc/gawkmisc.pc134
-rw-r--r--pc/getid.c131
-rw-r--r--pc/include/fcntl.h3
-rw-r--r--pc/include/stdio.h3
-rw-r--r--pc/include/stdlib.h3
-rw-r--r--pc/include/string.h3
-rw-r--r--pc/include/sys/stat.h3
-rw-r--r--pc/include/sys/types.h3
-rw-r--r--pc/include/time.h3
-rw-r--r--pc/install.awk61
-rw-r--r--pc/makegawk.bat65
-rwxr-xr-xpc/mkconf.cmd31
-rw-r--r--pc/mkinstal.sh33
-rwxr-xr-xpc/mungeconf.cmd15
-rw-r--r--pc/names.lnk20
-rw-r--r--pc/names2.lnk19
-rw-r--r--pc/popen.c236
-rw-r--r--posix/ChangeLog3
-rw-r--r--posix/gawkmisc.c115
-rw-r--r--protos.h60
-rw-r--r--re.c153
-rw-r--r--regex.c2188
-rw-r--r--regex.h32
-rw-r--r--stamp-h.in1
-rw-r--r--support/texindex.c1605
-rw-r--r--test/ChangeLog3
-rw-r--r--test/Makefile239
-rw-r--r--test/Makefile.in279
-rw-r--r--test/README15
-rw-r--r--test/anchgsub.ok (renamed from test/anchgsub.good)0
-rw-r--r--test/argarray.in1
-rw-r--r--test/argarray.ok (renamed from test/argarray.good)4
-rw-r--r--test/argtest.ok (renamed from test/argtest.good)0
-rw-r--r--test/arrayparm.ok (renamed from test/arrayparm.good)0
-rw-r--r--test/arrayref.awk (renamed from test/arrayref)0
-rw-r--r--test/arrayref.ok (renamed from test/arrayref.good)0
-rw-r--r--test/asgext.ok (renamed from test/asgext.good)0
-rw-r--r--test/awkpath.ok (renamed from test/awkpath.good)0
-rw-r--r--test/badargs.ok (renamed from test/badargs.good)5
-rw-r--r--test/childin.ok1
-rw-r--r--test/compare.ok (renamed from test/compare.good)0
-rw-r--r--test/convfmt.ok (renamed from test/convfmt.good)0
-rw-r--r--test/csi1.out574
-rw-r--r--test/defref.awk1
-rw-r--r--test/defref.ok2
-rw-r--r--test/fflush.ok16
-rwxr-xr-xtest/fflush.sh16
-rw-r--r--test/fieldwdth.ok (renamed from test/fieldwdth.good)0
-rw-r--r--test/fldchg.awk8
-rw-r--r--test/fldchg.in1
-rw-r--r--test/fldchg.ok3
-rw-r--r--test/fontdata.txt120
-rw-r--r--test/fsbs.ok (renamed from test/fsbs.good)0
-rw-r--r--test/fsrs.ok (renamed from test/fsrs.good)0
-rw-r--r--test/fstabplus.awk (renamed from test/fstabplus)0
-rw-r--r--test/fstabplus.ok (renamed from test/fstabplus.good)0
-rw-r--r--test/gensub.awk6
-rw-r--r--test/gensub.in2
-rw-r--r--test/gensub.ok3
-rw-r--r--test/getline.ok (renamed from test/getline.good)0
-rw-r--r--test/gnureops.awk45
-rw-r--r--test/gnureops.ok17
-rw-r--r--test/header.awk5
-rw-r--r--test/igncfs.ok (renamed from test/igncfs.good)0
-rw-r--r--test/ignrcase.ok (renamed from test/ignrcase.good)0
-rw-r--r--test/include.awk13
-rw-r--r--test/inftest.ok (renamed from test/inftest.good)0
-rw-r--r--test/intprec.awk1
-rw-r--r--test/intprec.ok1
-rw-r--r--test/lastnpages47
-rw-r--r--test/litoct.awk1
-rw-r--r--test/litoct.ok1
-rw-r--r--test/longwrds.ok (renamed from test/longwrds.good)0
-rw-r--r--test/math.awk10
-rw-r--r--test/math.ok6
-rw-r--r--test/negexp.ok (renamed from test/negexp.good)0
-rw-r--r--test/nfset.ok (renamed from test/nfset.good)0
-rw-r--r--test/noeffect.awk4
-rw-r--r--test/noeffect.ok2
-rw-r--r--test/nofmtch.awk1
-rw-r--r--test/nofmtch.ok2
-rw-r--r--test/nonl.ok (renamed from test/nonl.good)0
-rw-r--r--test/numfunc.awk19
-rw-r--r--test/numsubstr.awk1
-rw-r--r--test/numsubstr.in3
-rw-r--r--test/numsubstr.ok3
-rw-r--r--test/out1.ok (renamed from test/out1.good)0
-rw-r--r--test/out2.ok (renamed from test/out2.good)0
-rw-r--r--test/out3.ok (renamed from test/out3.good)0
-rw-r--r--test/paramdup.ok (renamed from test/paramdup.good)0
-rw-r--r--test/pcntplus.awk1
-rw-r--r--test/pcntplus.ok1
-rw-r--r--test/plus-minus8
-rw-r--r--[-rwxr-xr-x]test/posix.awk (renamed from test/posix)0
-rw-r--r--test/posix.ok (renamed from test/posix.good)0
-rw-r--r--test/poundbang.ok (renamed from test/poundbang.good)0
-rw-r--r--test/prmarscl.awk6
-rw-r--r--test/prmarscl.ok1
-rw-r--r--test/prmreuse.awk14
-rw-r--r--test/prmreuse.ok (renamed from config/sunos41-glibc)0
-rw-r--r--test/reparse.ok (renamed from test/reparse.good)0
-rw-r--r--test/resplit.ok1
-rw-r--r--test/reverse.awk13
-rw-r--r--test/rs.in (renamed from test/rs.data)0
-rw-r--r--test/rs.ok (renamed from test/rs.good)0
-rw-r--r--test/rswhite.awk2
-rw-r--r--test/rswhite.in2
-rw-r--r--test/rswhite.ok2
-rw-r--r--test/sclforin.awk1
-rw-r--r--test/sclforin.ok1
-rw-r--r--test/sclifin.awk7
-rw-r--r--test/sclifin.ok1
-rw-r--r--test/splitargv.ok (renamed from test/splitargv.good)0
-rw-r--r--test/sqrt.awk4
-rw-r--r--test/strftime.ok1
-rw-r--r--test/swaplns.in (renamed from test/data)0
-rw-r--r--test/swaplns.ok (renamed from test/swaplns.good)0
-rw-r--r--test/up_down.awk15
-rw-r--r--test/zap_cpp.awk13
-rw-r--r--version.c5
-rw-r--r--vms/ChangeLog3
-rw-r--r--vms/descrip.mms54
-rw-r--r--vms/gawkmisc.vms121
-rwxr-xr-xvms/posix-cc.sh16
-rw-r--r--vms/redirect.h78
-rw-r--r--vms/vms-conf.h168
-rw-r--r--vms/vms_args.c6
-rw-r--r--vms/vms_fwrite.c6
-rw-r--r--vms/vms_gawk.c6
-rw-r--r--vms/vms_misc.c18
-rw-r--r--vms/vms_popen.c6
-rw-r--r--vms/vmsbuild.com35
323 files changed, 38439 insertions, 22249 deletions
diff --git a/ACKNOWLEDGMENT b/ACKNOWLEDGMENT
index a17a2eba..0851ecf9 100644
--- a/ACKNOWLEDGMENT
+++ b/ACKNOWLEDGMENT
@@ -9,17 +9,17 @@ The following people were involved in porting gawk to different platforms.
Kent Williams (MSDOS 2.11)
Conrad Kwok (MSDOS earlier versions)
Scott Garfinkle (MSDOS earlier versions)
+ Hal Peterson <hrp@pecan.cray.com> (Cray)
This group of people comprise the "GAWK crack portability team", who
test the pre-releases and ensure portability of gawk.
- Hal Peterson <hrp@pecan.cray.com> (Cray)
Pat Rankin <gawk.rankin@EQL.Caltech.Edu> (VMS)
Michal Jaegermann <michal@gortel.phys.UAlberta.CA>
(Atari, NeXT, DEC 3100)
- Scott Deifik <scottd@amgen.com> (MSDOS 2.14 and 2.15)
- Kai Uwe Rommel <rommel@ars.muc.de> (OS/2)
- Darrel Hankerson <hankedr@mail.auburn.edu> (OS/2)
+ Scott Deifik <scottd@amgen.com> (MSDOS 2.14, 2.15, 3.0)
+ Kai Uwe Rommel <rommel@ars.de> (OS/2)
+ Darrel Hankerson <hankedr@mail.auburn.edu> (DOS and formerly OS/2)
Mark Moraes <Mark-Moraes@deshaw.com> (Code Center, Purify)
Kaveh Ghazi <ghazi@noc.rutgers.edu> (Lots of Unix variants)
@@ -27,7 +27,7 @@ Michal, Scott and Darrel go out of their way to make sure that gawk
works on non-32 bit systems, and keep me on track where portability is
concerned. Indeed, all of these folks are incredibly helpful; gawk would
not be the fine program it is now without them.
-
+
Last, but far from least, we would like to thank Brian Kernighan who
has helped to clear up many dark corners of the language and provided a
restraining touch when we have been overly tempted by "feeping
diff --git a/COPYING b/COPYING
index 3358a7be..916d1f0f 100644
--- a/COPYING
+++ b/COPYING
@@ -2,7 +2,7 @@
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.
- 675 Mass Ave, Cambridge, MA 02139, USA
+ 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
@@ -305,7 +305,7 @@ the "copyright" line and a pointer to where the full notice is found.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
Also add information on how to contact you by electronic and paper mail.
@@ -337,4 +337,3 @@ proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Library General
Public License instead of this License.
-
diff --git a/ChangeLog b/ChangeLog
new file mode 100644
index 00000000..40821925
--- /dev/null
+++ b/ChangeLog
@@ -0,0 +1,23 @@
+Wed Jan 10 23:19:36 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * 3.0.0 polished up and release tar file made.
+
+Wed Dec 27 11:46:16 1995 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * 2.94.0 released to porting group (no, I haven't been good
+ about this file; I'll do better once 3.0 is released).
+
+Mon Aug 28 23:04:30 1995 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * awk.h updated for NeXT - bracket TRUE/FALSE
+ * removed shadowing of 'start' in io.c:get_a_record
+ * Makefile.in and Doc/Makefile.in: fixed to use gawk.1 and gawk.texi,
+ instead of gawk.1.in and gawk.texi.in.
+
+Mon Aug 25 11:04:30 1995 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * 2.90.0 released to porting group.
+
+Fri Aug 18 12:43:31 1995 Arnold D. Robbins <arnold@puny.ssc.com>
+
+ * ChangeLog created.
diff --git a/FUTURES b/FUTURES
index 87fda5df..431ecd1b 100644
--- a/FUTURES
+++ b/FUTURES
@@ -1,108 +1,100 @@
This file lists future projects and enhancements for gawk. Items are listed
in roughly the order they will be done for a given release. This file is
-mainly for use by the developer(s) to help keep themselves on track, please
+mainly for use by the developers to help keep themselves on track, please
don't bug us too much about schedules or what all this really means.
-(An `x' indicates that some progress has been made, but that the feature is
-not complete yet.)
+With the 3.0 release, we are acknowledging that awk is not PERL, nor should
+it become PERL. (To paraphrase Dennis Ritchie, "If you want PERL, you
+know where to get it.")
-For 2.16
-========
-x Move to autoconf-based configure system.
-
-x Research awk `fflush' function.
-
-x Generalize IGNORECASE
- any value makes it work, not just numeric non-zero
- make it apply to *all* string comparisons
+The focus on the future is thus narrowed to performance and functional
+enhancements, with only minor plans for significant new features.
-In 2.17
+For 3.0
=======
-x Allow RS to be a regexp.
+ DONE: Move to autoconf-based configure system.
- RT variable to hold text of record terminator
+ DONE: Allow RS to be a regexp.
- RECLEN variable for fixed length records
+ DONE: RT variable to hold text of record terminator
- Feedback alloca.s changes to FSF
+ DONE: split() with null string as third arg to split up strings
-x Split() with null string as third arg to split up strings
+ DONE: Analogously, setting FS="" would split the input record into
+ individual characters.
-x Analogously, setting FS="" would split the input record into individual
- characters.
+ DONE: Generalize IGNORECASE
+ - any value makes it work, not just numeric non-zero
+ - make it apply to *all* string comparisons
-x Clean up code by isolating system-specific functions in separate files.
+ DONE: Incorporate newer dfa.c and regex.c
- Undertake significant directory reorganization.
+ DONE: Go to POSIX regexps
-x Extensive manual cleanup:
- Use of texinfo 2.0 features
- Lots more examples
- Document all of the above.
+ DONE: Make regex + dfa less dependant on gawk header file includes
-x Go to POSIX regexps
+ DONE: Source code formatting cleaned up and regularized
- Make regex + dfa less dependant on gawk header file includes
+ DONE: Clean up code by isolating system-specific functions in
+ separate files.
- Additional manual features:
- Document posix regexps
- Document use of dbm arrays
- ? Add an error messages section to the manual
- ? A section on where gawk is bounded
- regex
- i/o
- sun fp conversions
+ DONE: General sub function:
+ gensub(pat, sub, global_flag[, line])
+ that return the substituted strings and allow \1 etc.
+ in the sub string.
-For 2.18
-========
- DBM storage of awk arrays. Try to allow multiple dbm packages
+ DONE: Add AWKPATH to ENVIRON if it's not there
- General sub functions:
- edit(line, pat, sub) and gedit(line, pat, sub)
- that return the substituted strings and allow \1 etc. in the sub
- string.
-
- ? Have strftime() pay attention to the value of ENVIRON["TZ"]
-
-For 2.19
-========
- Add chdir and stat built-in functions.
+ DONE: Undertake significant directory reorganization.
- Add function pointers as valid variable types.
+ DONE: Extensive manual cleanup:
+ Use of texinfo 2.0 features
+ Lots more examples
+ Document posix regexps
+ Document all of the above.
- Add an `ftw' built-in function that takes a function pointer.
+In 3.1
+======
+ A PROCINFO array to replace /dev/pid, /dev/user, et al.
- Do an optimization pass over parse tree?
+ Use mmap to read input files on systems that support it.
-For 2.20 or later:
-==================
-Add variables similar to C's __FILE__ and __LINE__ for better diagnostics
-from within awk programs.
+ Use a new or improved dfa.
-Add an explicit concatenation operator and assignment version.
+ Integrate GNU NLS support.
-? Add a switch statement
+ Bring out hooks for NLS support into gawk itself.
-Add the ability to seek on an open file and retrieve the current file position.
+ DBM storage of awk arrays. Try to allow multiple dbm packages.
-Add lint checking everywhere, including check for use of builtin vars.
-only in new awk.
+ Use GNU malloc.
-"restart" keyword
+ Use rx instead of regex.
-Add |&
+ Do a reference card.
-Make awk '/foo/' files... run at egrep speeds
+ ? Have strftime() pay attention to the value of ENVIRON["TZ"]
-Do a reference card
+ Additional manual features:
+ Document use of dbm arrays
+ Document NLS support
+ ? Add exercises
+ ? Add an error messages section to the manual
+ ? A section on where gawk is bounded
+ regex
+ i/o
+ sun fp conversions
-Allow OFMT and CONVFMT to be other than a floating point format.
+For 3.2
+=======
+ Add a lint check if the return value of a function is used but
+ the function did not supply a value.
-Allow redefining of builtin functions?
+ Do an optimization pass over parse tree?
-Make it faster and smaller.
+ Make awk '/foo/' files... run at egrep speeds
-For 3.x:
+For 4.x:
========
Create a gawk compiler?
@@ -110,6 +102,3 @@ Create a gawk compiler?
Create a gawk-to-C translator? (or C++??)
Provide awk profiling and debugging.
-
-
-
diff --git a/INSTALL b/INSTALL
index 4d8026f1..a2c8722c 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,32 +1,181 @@
-December, 1993 - arnold@skeeve.atl.ga.us
-
-These are minimal instructions for installing gawk on a Unix system.
-Full instructions for installing and porting gawk are given in Chapter 16
-of the manual.
-
-1. Look in the `config' directory for a file that is appropriate to your
- Unix system. For example, use `ultrix41' for Ultrix 4.1 or later, and
- `sunos41' for SunOS 4.1.x.
-
-2. Type
- ./configure FILE
- where FILE is the name you chose earlier.
-
-3. Type
- make
- This should build gawk.
-
-4. Type
- make -n install
- to see where things will be installed by default. Edit the Makefile to
- change these defaults if they are not appropriate to your system. Then
- type
- make install
- to install gawk.
-
-If you don't have GCC, or if your Unix version is not close enough to one
-of the ones in the `config' directory, then you will need to do more work;
-see the manual.
-
-The next major release of gawk will use GNU Autoconf; the installation
-instructions will change then.
+Basic Installation
+==================
+
+ These are generic installation instructions.
+
+ The `configure' shell script attempts to guess correct values for
+various system-dependent variables used during compilation. It uses
+those values to create a `Makefile' in each directory of the package.
+It may also create one or more `.h' files containing system-dependent
+definitions. Finally, it creates a shell script `config.status' that
+you can run in the future to recreate the current configuration, a file
+`config.cache' that saves the results of its tests to speed up
+reconfiguring, and a file `config.log' containing compiler output
+(useful mainly for debugging `configure').
+
+ If you need to do unusual things to compile the package, please try
+to figure out how `configure' could check whether to do them, and mail
+diffs or instructions to the address given in the `README' so they can
+be considered for the next release. If at some point `config.cache'
+contains results you don't want to keep, you may remove or edit it.
+
+ The file `configure.in' is used to create `configure' by a program
+called `autoconf'. You only need `configure.in' if you want to change
+it or regenerate `configure' using a newer version of `autoconf'.
+
+The simplest way to compile this package is:
+
+ 1. `cd' to the directory containing the package's source code and type
+ `./configure' to configure the package for your system. If you're
+ using `csh' on an old version of System V, you might need to type
+ `sh ./configure' instead to prevent `csh' from trying to execute
+ `configure' itself.
+
+ Running `configure' takes awhile. While running, it prints some
+ messages telling which features it is checking for.
+
+ 2. Type `make' to compile the package.
+
+ 3. Optionally, type `make check' to run any self-tests that come with
+ the package.
+
+ 4. Type `make install' to install the programs and any data files and
+ documentation.
+
+ 5. You can remove the program binaries and object files from the
+ source code directory by typing `make clean'. To also remove the
+ files that `configure' created (so you can compile the package for
+ a different kind of computer), type `make distclean'. There is
+ also a `make maintainer-clean' target, but that is intended mainly
+ for the package's developers. If you use it, you may have to get
+ all sorts of other programs in order to regenerate files that came
+ with the distribution.
+
+Compilers and Options
+=====================
+
+ Some systems require unusual options for compilation or linking that
+the `configure' script does not know about. You can give `configure'
+initial values for variables by setting them in the environment. Using
+a Bourne-compatible shell, you can do that on the command line like
+this:
+ CC=c89 CFLAGS=-O2 LIBS=-lposix ./configure
+
+Or on systems that have the `env' program, you can do it like this:
+ env CPPFLAGS=-I/usr/local/include LDFLAGS=-s ./configure
+
+Compiling For Multiple Architectures
+====================================
+
+ You can compile the package for more than one kind of computer at the
+same time, by placing the object files for each architecture in their
+own directory. To do this, you must use a version of `make' that
+supports the `VPATH' variable, such as GNU `make'. `cd' to the
+directory where you want the object files and executables to go and run
+the `configure' script. `configure' automatically checks for the
+source code in the directory that `configure' is in and in `..'.
+
+ If you have to use a `make' that does not supports the `VPATH'
+variable, you have to compile the package for one architecture at a time
+in the source code directory. After you have installed the package for
+one architecture, use `make distclean' before reconfiguring for another
+architecture.
+
+Installation Names
+==================
+
+ By default, `make install' will install the package's files in
+`/usr/local/bin', `/usr/local/man', etc. You can specify an
+installation prefix other than `/usr/local' by giving `configure' the
+option `--prefix=PATH'.
+
+ You can specify separate installation prefixes for
+architecture-specific files and architecture-independent files. If you
+give `configure' the option `--exec-prefix=PATH', the package will use
+PATH as the prefix for installing programs and libraries.
+Documentation and other data files will still use the regular prefix.
+
+ In addition, if you use an unusual directory layout you can give
+options like `--bindir=PATH' to specify different values for particular
+kinds of files. Run `configure --help' for a list of the directories
+you can set and what kinds of files go in them.
+
+ If the package supports it, you can cause programs to be installed
+with an extra prefix or suffix on their names by giving `configure' the
+option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
+
+Optional Features
+=================
+
+ Some packages pay attention to `--enable-FEATURE' options to
+`configure', where FEATURE indicates an optional part of the package.
+They may also pay attention to `--with-PACKAGE' options, where PACKAGE
+is something like `gnu-as' or `x' (for the X Window System). The
+`README' should mention any `--enable-' and `--with-' options that the
+package recognizes.
+
+ For packages that use the X Window System, `configure' can usually
+find the X include and library files automatically, but if it doesn't,
+you can use the `configure' options `--x-includes=DIR' and
+`--x-libraries=DIR' to specify their locations.
+
+Specifying the System Type
+==========================
+
+ There may be some features `configure' can not figure out
+automatically, but needs to determine by the type of host the package
+will run on. Usually `configure' can figure that out, but if it prints
+a message saying it can not guess the host type, give it the
+`--host=TYPE' option. TYPE can either be a short name for the system
+type, such as `sun4', or a canonical name with three fields:
+ CPU-COMPANY-SYSTEM
+
+See the file `config.sub' for the possible values of each field. If
+`config.sub' isn't included in this package, then this package doesn't
+need to know the host type.
+
+ If you are building compiler tools for cross-compiling, you can also
+use the `--target=TYPE' option to select the type of system they will
+produce code for and the `--build=TYPE' option to select the type of
+system on which you are compiling the package.
+
+Sharing Defaults
+================
+
+ If you want to set default values for `configure' scripts to share,
+you can create a site shell script called `config.site' that gives
+default values for variables like `CC', `cache_file', and `prefix'.
+`configure' looks for `PREFIX/share/config.site' if it exists, then
+`PREFIX/etc/config.site' if it exists. Or, you can set the
+`CONFIG_SITE' environment variable to the location of the site script.
+A warning: not all `configure' scripts look for a site script.
+
+Operation Controls
+==================
+
+ `configure' recognizes the following options to control how it
+operates.
+
+`--cache-file=FILE'
+ Use and save the results of the tests in FILE instead of
+ `./config.cache'. Set FILE to `/dev/null' to disable caching, for
+ debugging `configure'.
+
+`--help'
+ Print a summary of the options to `configure', and exit.
+
+`--quiet'
+`--silent'
+`-q'
+ Do not print messages saying which checks are being made.
+
+`--srcdir=DIR'
+ Look for the package's source code in directory DIR. Usually
+ `configure' can determine that directory automatically.
+
+`--version'
+ Print the version of Autoconf used to generate the `configure'
+ script, and exit.
+
+`configure' also accepts some other, not widely useful, options.
+
diff --git a/LIMITATIONS b/LIMITATIONS
index 64eab85f..05e8bc41 100644
--- a/LIMITATIONS
+++ b/LIMITATIONS
@@ -1,7 +1,7 @@
This file describes limits of gawk on a Unix system (although it
is variable even then). Non-Unix systems may have other limits.
-# of fields in a record: MAX_INT
+# of fields in a record: MAX_LONG
Length of input record: MAX_INT
Length of output record: unlimited
Size of a field: MAX_INT
diff --git a/Makefile.bsd44 b/Makefile.bsd44
deleted file mode 100644
index 1daca532..00000000
--- a/Makefile.bsd44
+++ /dev/null
@@ -1,14 +0,0 @@
-PROG= awk
-SRCS= main.c eval.c builtin.c msg.c iop.c io.c field.c getopt1.c \
- getopt.c array.c \
- node.c version.c missing.c re.c awk.c regex.c dfa.c
-DPADD= ${LIBM}
-LDADD= -lm
-CFLAGS+=-I${.CURDIR} -DGAWK
-BINDIR= /usr/bin
-MANDIR= /usr/share/man/cat
-CLEANFILES+=awk.c y.tab.h awk.0
-
-awk.0: gawk.1
- nroff -man gawk.1 > awk.0
-.include <bsd.prog.mk>
diff --git a/Makefile.cline b/Makefile.cline
deleted file mode 100644
index cccfb728..00000000
--- a/Makefile.cline
+++ /dev/null
@@ -1,65 +0,0 @@
-# >>> CenterLine Make Targets <<<
-#
-gawk_obj:
- #cd /net/ecum/src/system/david/awk/gawk-2.15.3
- #load -DGAWK -DHAVE_CONFIG_H main.o
- #instrument main.o
- #load -DGAWK -DHAVE_CONFIG_H eval.o
- #instrument eval.o
- #load -DGAWK -DHAVE_CONFIG_H builtin.o
- #instrument builtin.o
- #load -DGAWK -DHAVE_CONFIG_H msg.o
- #instrument msg.o
- #load -DGAWK -DHAVE_CONFIG_H iop.o
- #instrument iop.o
- #load -DGAWK -DHAVE_CONFIG_H io.o
- #instrument io.o
- #load -DGAWK -DHAVE_CONFIG_H field.o
- #instrument field.o
- #load -DGAWK -DHAVE_CONFIG_H array.o
- #instrument array.o
- #load -DGAWK -DHAVE_CONFIG_H node.o
- #instrument node.o
- #load -DGAWK -DHAVE_CONFIG_H version.o
- #instrument version.o
- #load -DGAWK -DHAVE_CONFIG_H missing.o
- #instrument missing.o
- #load -DGAWK -DHAVE_CONFIG_H re.o
- #instrument re.o
- #load -DGAWK -DHAVE_CONFIG_H getopt.o
- #instrument getopt.o
- #load -DGAWK -DHAVE_CONFIG_H getopt1.o
- #instrument getopt1.o
- #load -DGAWK -DHAVE_CONFIG_H awktab.o
- #instrument awktab.o
- #load -DGAWK -DHAVE_CONFIG_H regex.o
- #instrument regex.o
- #load -DGAWK -DHAVE_CONFIG_H dfa.o
- #instrument dfa.o
- #load -lm
- #setopt program_name gawk
-
-gawk_src:
- #cd /net/ecum/src/system/david/awk/gawk-2.15.3
- #load -C -DGAWK -DHAVE_CONFIG_H main.c
- #load -C -DGAWK -DHAVE_CONFIG_H eval.c
- #load -C -DGAWK -DHAVE_CONFIG_H builtin.c
- #load -C -DGAWK -DHAVE_CONFIG_H msg.c
- #load -C -DGAWK -DHAVE_CONFIG_H iop.c
- #load -C -DGAWK -DHAVE_CONFIG_H io.c
- #load -C -DGAWK -DHAVE_CONFIG_H field.c
- #load -C -DGAWK -DHAVE_CONFIG_H array.c
- #load -C -DGAWK -DHAVE_CONFIG_H node.c
- #load -C -DGAWK -DHAVE_CONFIG_H version.c
- #load -C -DGAWK -DHAVE_CONFIG_H missing.c
- #load -C -DGAWK -DHAVE_CONFIG_H re.c
- #load -C -DGAWK -DHAVE_CONFIG_H getopt.c
- #load -C -DGAWK -DHAVE_CONFIG_H getopt1.c
- #load -C -DGAWK -DHAVE_CONFIG_H awktab.c
- #load -C -DGAWK -DHAVE_CONFIG_H regex.c
- #load -C -DGAWK -DHAVE_CONFIG_H dfa.c
- #load -lm
- #setopt program_name gawk
-
-#
-# >>> End of CenterLine Make Targets <<<
diff --git a/Makefile.dec b/Makefile.dec
deleted file mode 100644
index df0e2291..00000000
--- a/Makefile.dec
+++ /dev/null
@@ -1,269 +0,0 @@
-# Makefile for GNU Awk.
-#
-# Copyright (C) 1986, 1988-1993 the Free Software Foundation, Inc.
-#
-# This file is part of GAWK, the GNU implementation of the
-# AWK Progamming Language.
-#
-# GAWK is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# GAWK is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GAWK; see the file COPYING. If not, write to
-# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-# User tunable macros -- CHANGE THESE IN Makefile.in RATHER THAN IN
-# Makefile, OR configure WILL OVERWRITE YOUR CHANGES
-
-prefix = /usr/local
-exec_prefix = $(prefix)
-binprefix =
-manprefix =
-
-bindir = $(exec_prefix)/bin
-libdir = $(exec_prefix)/lib
-mandir = $(prefix)/man/man1
-manext = .1
-infodir = $(prefix)/info
-
-# The provided "configure" is used to turn a config file (samples in
-# the "config" directory into commands to edit config.in into
-# a suitable config.h and to edit Makefile.in into Makefile.
-# To port GAWK, create an appropriate config file using the ones in
-# the config directory as examples and using the comments in config.in
-# as a guide.
-
-CC= cc
-##MAKE_CC## CC = cc
-
-PROFILE= #-pg
-DEBUG= #-DMALLOCDEBUG #-DDEBUG #-DFUNC_TRACE #-DMPROF
-LINKSTATIC= #-Bstatic
-WARN= #-W -Wunused -Wimplicit -Wreturn-type -Wcomment # for gcc only
-
-# Parser to use on grammar - any one of the following will work
-PARSER = bison -y
-#PARSER = yacc
-#PARSER = byacc
-
-# Set LIBS to any libraries that are machine specific
-LIBS =
-
-# Cray 2 running Unicos 5.0.7
-##MAKE_LIBNET## LIBS = -lnet
-
-
-# Systems with alloca in /lib/libPW.a
-##MAKE_ALLOCA_PW## LIBS = -lPW
-
-# ALLOCA - only needed if you use bison
-# Set equal to alloca.o if your system is S5 and you don't have
-# alloca. Uncomment one of the rules below to make alloca.o from
-# either alloca.s or alloca.c.
-# This should have already been done automatically by configure.
-#
-# Some systems have alloca in libPW.a, so LIBS=-lPW may work, too.
-##MAKE_ALLOCA_C## ALLOCA= alloca.o
-##MAKE_ALLOCA_S## ALLOCA= alloca.o
-
-VFLAGS=
-
-# VMS POSIX, VAXC V3.2
-##MAKE_VMS-Posix## VFLAGS = -UVMS -D__STDC__=0
-
-# HP/Apollo running cc version 6.7 or earlier
-##MAKE_Apollo## VFLAGS = -U__STDC__ -A run,sys5.3
-##MAKE_Apollo## LIBS = -A sys,any
-
-# SGI IRIX 4.0.5 cc flags
-##MAKE_SGI## VFLAGS = -cckr
-
-##MAKE_NeXT## VFLAGS = -DGFMT_WORKAROUND
-
-CFLAGS = -Olimit 1500
-FLAGS = -DGAWK -DHAVE_CONFIG_H $(VFLAGS) $(DEBUG) $(PROFILE) $(WARN) -Dconst=""
-LDFLAGS = $(LINKSTATIC) $(PROFILE)
-
-.c.o:
- $(CC) $(CFLAGS) $(FLAGS) -c $<
-
-# object files
-AWKOBJS = main.o eval.o builtin.o msg.o iop.o io.o field.o array.o \
- node.o version.o missing.o re.o getopt.o getopt1.o
-
-ALLOBJS = $(AWKOBJS) awktab.o
-
-# GNUOBJS
-# GNU stuff that gawk uses as library routines.
-GNUOBJS= regex.o dfa.o $(ALLOCA)
-
-# source and documentation files
-SRC = main.c eval.c builtin.c msg.c version.c \
- iop.c io.c field.c array.c node.c missing.c re.c getopt.c getopt1.c
-
-ALLSRC= $(SRC) awktab.c
-
-AWKSRC= awk.h awk.y $(ALLSRC) patchlevel.h protos.h config.in getopt.h
-
-GNUSRC = alloca.c alloca.s dfa.c dfa.h regex.c regex.h
-
-COPIES = missing/system.c missing/tzset.c \
- missing/memcmp.c missing/memcpy.c missing/memset.c \
- missing/random.c missing/strncasecmp.c missing/strchr.c \
- missing/strerror.c missing/strtod.c \
- missing/strftime.c missing/strftime.3
-
-SUPPORT = support/texindex.c support/texinfo.tex
-
-DOCS= gawk.1 gawk.texi
-
-TEXFILES= gawk.aux gawk.cp gawk.cps gawk.fn gawk.fns gawk.ky gawk.kys \
- gawk.pg gawk.pgs gawk.toc gawk.tp gawk.tps gawk.vr gawk.vrs
-
-MISC = NEWS COPYING FUTURES Makefile.* PROBLEMS README* PORTS POSIX \
- mungeconf configure ACKNOWLEDGMENT LIMITATIONS
-
-OTHERS= pc/* atari/* vms/*
-
-ALLDOC= gawk.dvi $(TEXFILES) gawk.info*
-
-# Release of gawk. There can be no leading or trailing white space here!
-REL=2.15
-
-# rules to build gawk
-gawk: $(ALLOBJS) $(GNUOBJS) $(REOBJS)
- $(CC) -o gawk $(LDFLAGS) $(ALLOBJS) $(GNUOBJS) $(REOBJS) -lm $(LIBS)
-
-regex.o: regex.h awk.h
- $(CC) $(FLAGS) -c regex.c
-
-$(AWKOBJS) regex.o dfa.o: awk.h dfa.h regex.h
-
-getopt.o: getopt.h
-
-getopt1.o: getopt.h
-
-main.o: patchlevel.h
-
-awktab.c: awk.y
- $(PARSER) -v awk.y
-##MAKE_VMS-Posix## mv ytab.c awktab.c
-##MAKE_VMS-Posix## dummy.awk_tab.target:
- sed '/^extern char .malloc(), .realloc();$$/d' y.tab.c >awktab.c
- rm y.tab.c
-
-awktab.o: awk.h
-
-config.h: config.in
- @echo You must provide a config.h!
- @echo Run \"./configure\" to build it for known systems
- @echo or copy config.in to config.h and edit it.; exit 1
-
-install: gawk gawk.info
- cp gawk $(bindir) && chmod 755 $(bindir)/gawk
- cp gawk.1 $(mandir)/gawk$(manext) && chmod 644 $(mandir)/gawk$(manext)
- cp gawk.info* $(infodir) && chmod 644 $(infodir)/gawk.info*
-
-uninstall:
- rm -f $(bindir)/gawk $(mandir)/gawk$(manext) $(infodir)/gawk.info*
-
-# ALLOCA: uncomment this if your system (notably System V boxen)
-# does not have alloca in /lib/libc.a or /lib/libPW.a
-#
-# If your machine is not supported by the assembly version of alloca.s,
-# use the C version which follows instead. It uses the default rules to
-# make alloca.o.
-#
-# One of these rules should have already been selected by running configure.
-
-
-##MAKE_ALLOCA_S## alloca.o: alloca.s
-##MAKE_ALLOCA_S## /lib/cpp < alloca.s | sed '/^#/d' > t.s
-##MAKE_ALLOCA_S## as t.s -o alloca.o
-##MAKE_ALLOCA_S## rm t.s
-
-##MAKE_ALLOCA_C## alloca.o: alloca.c
-
-# auxiliary rules for release maintenance
-lint: $(ALLSRC)
- lint -hcbax $(FLAGS) $(ALLSRC)
-
-xref:
- cxref -c $(FLAGS) $(ALLSRC) | grep -v ' /' >xref
-
-clean:
- rm -rf gawk *.o core
- cd test && make clean
-
-distclean: clean
- rm -f Makefile *.orig *.rej */*.orig */*.rej awk.output gmon.out \
- make.out y.output config.h
-
-mostlyclean: clean
-
-realclean: distclean
- rm -f awktab.c $(ALLDOC)
-
-cleaner: clean
- rm -f gawk awktab.c Makefile config.h
-
-clobber: clean
- rm -f $(ALLDOC) gawk.log config.h
-
-gawk.dvi: gawk.texi
- cp support/texinfo.tex .
- tex gawk.texi; texindex gawk.??
- tex gawk.texi; texindex gawk.??
- tex gawk.texi
- rm -f texinfo.tex
-
-gawk.info: gawk.texi
- makeinfo gawk.texi
-
-dist: $(AWKSRC) $(GNUSRC) $(DOCS) $(MISC) $(COPIES) $(SUPPORT) distclean
- -rm -rf gawk-$(REL)*
- dir=gawk-$(REL).`gawk '{print $$3}' patchlevel.h` && \
- mkdir $$dir && \
- cp -p $(AWKSRC) $(GNUSRC) $(DOCS) $(MISC) $$dir && \
- mkdir $$dir/missing && cp -p $(COPIES) $$dir/missing && \
- mkdir $$dir/atari && cp -p atari/* $$dir/atari && \
- mkdir $$dir/pc && cp -p pc/* $$dir/pc && \
- mkdir $$dir/vms && cp -p vms/* $$dir/vms && \
- mkdir $$dir/config && cp -p config/* $$dir/config && \
- mkdir $$dir/support && cp -p support/* $$dir/support && \
- cp -pr test $$dir && \
- chmod -R a+r $$dir && \
- chmod -R a-w $$dir && \
- find $$dir -type d -exec chmod 755 {} ';' && \
- find $$dir -print | doschk && \
- tar -cf - $$dir | gzip > $$dir.tar.gz && \
- rm -fr $$dir
-
-gawk-doc-$(REL).tar.gz: gawk.info gawk.dvi gawk.1
- -rm -rf gawk-doc-$(REL) gawk-doc-$(REL).tar.gz
- -mkdir gawk-doc-$(REL)
- cp -p $(ALLDOC) gawk-doc-$(REL)
- groff -Tascii -man gawk.1 > gawk-doc-$(REL)/gawk.1.pr
- tar -cf - gawk-doc-$(REL) | gzip > gawk-doc-$(REL).tar.gz
-
-gawk-ps-$(REL).tar.gz: gawk.dvi gawk.1
- -rm -rf gawk-ps-$(REL) gawk-ps-$(REL).tar.gz
- -mkdir gawk-ps-$(REL)
- dvips -o gawk-ps-$(REL)/gawk.postscript gawk.dvi
- groff -man gawk.1 > gawk-ps-$(REL)/gawk.1.ps
- tar -cf - gawk-ps-$(REL) | gzip > gawk-ps-$(REL).tar.gz
-
-release: dist gawk-doc-$(REL).tar.gz gawk-ps-$(REL).tar.gz
-
-test: gawk
- cd test; make -k
-
-check: test
-
diff --git a/Makefile.in b/Makefile.in
index a676ebe9..3897e39d 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -3,7 +3,7 @@
# Copyright (C) 1986, 1988-1995 the Free Software Foundation, Inc.
#
# This file is part of GAWK, the GNU implementation of the
-# AWK Progamming Language.
+# AWK Programming Language.
#
# GAWK is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -16,104 +16,65 @@
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
-# along with GAWK; see the file COPYING. If not, write to
-# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
-# User tunable macros -- CHANGE THESE IN Makefile.in RATHER THAN IN
-# Makefile, OR configure WILL OVERWRITE YOUR CHANGES
+@SET_MAKE@
-prefix = /usr/local
-exec_prefix = $(prefix)
-binprefix =
-manprefix =
-
-bindir = $(exec_prefix)/bin
-libdir = $(exec_prefix)/lib
-mandir = $(prefix)/man/man1
-manext = .1
-infodir = $(prefix)/info
-
-# The provided "configure" is used to turn a config file (samples in
-# the "config" directory into commands to edit config.in into
-# a suitable config.h and to edit Makefile.in into Makefile.
-# To port GAWK, create an appropriate config file using the ones in
-# the config directory as examples and using the comments in config.in
-# as a guide.
-
-CC= gcc -g
-##MAKE_CC## CC = cc
+MAKEINFO = makeinfo --no-split
-PROFILE= #-pg
-DEBUG= #-DMALLOCDEBUG #-DDEBUG #-DFUNC_TRACE #-DMPROF
-LINKSTATIC= #-Bstatic
-WARN= #-W -Wunused -Wimplicit -Wreturn-type -Wcomment # for gcc only
+srcdir = @srcdir@
+VPATH = @srcdir@
-# Parser to use on grammar - any one of the following will work
-PARSER = bison -y
-#PARSER = yacc
-#PARSER = byacc
+CC = @CC@
+YACC = @YACC@
-# Set LIBS to any libraries that are machine specific
-LIBS =
-
-# Cray 2 running Unicos 5.0.7
-##MAKE_LIBNET## LIBS = -lnet
-
-
-# Systems with alloca in /lib/libPW.a
-##MAKE_ALLOCA_PW## LIBS = -lPW
-
-# ALLOCA - only needed if you use bison
-# Set equal to alloca.o if your system is S5 and you don't have
-# alloca. Uncomment one of the rules below to make alloca.o from
-# either alloca.s or alloca.c.
-# This should have already been done automatically by configure.
-#
-# Some systems have alloca in libPW.a, so LIBS=-lPW may work, too.
-##MAKE_ALLOCA_C## ALLOCA= alloca.o
-##MAKE_ALLOCA_S## ALLOCA= alloca.o
+INSTALL = @INSTALL@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_DATA = @INSTALL_DATA@
-VFLAGS=
+LIBS = @LIBS@
-# VMS POSIX, VAXC V3.2
-##MAKE_VMS-Posix## VFLAGS = -UVMS -D__STDC__=0
+ALLOCA = @ALLOCA@
-# HP/Apollo running cc version 6.7 or earlier
-##MAKE_Apollo## VFLAGS = -U__STDC__ -A run,sys5.3
-##MAKE_Apollo## LIBS = -A sys,any
+exec_prefix = @exec_prefix@
+prefix = @prefix@
+binprefix =
+manprefix =
-# SGI IRIX 4.0.5 cc flags
-##MAKE_SGI## VFLAGS = -cckr -signed
-##MAKE_SGI_GCC## VFLAGS = -fsigned-char
+bindir = @bindir@
+libdir = @libdir@
+manexta = 1
+mandir = @mandir@/man$(manexta)
+manext = .$(manexta)
+infodir = @infodir@
+datadir = @datadir@/awk
+libexecdir = @libexecdir@/awk
-##MAKE_NeXT## VFLAGS = -DGFMT_WORKAROUND
+DEFPATH = ".:$(datadir)"
-CFLAGS = -O
-FLAGS = -DGAWK -DHAVE_CONFIG_H $(VFLAGS) $(DEBUG) $(PROFILE) $(WARN)
-LDFLAGS = $(LINKSTATIC) $(PROFILE)
-
-.c.o:
- $(CC) $(CFLAGS) $(FLAGS) -c $<
+SHELL = /bin/sh
+CFLAGS = @CFLAGS@ -DGAWK -I. -I$(srcdir) @DEFS@
# object files
-AWKOBJS = main.o eval.o builtin.o msg.o iop.o io.o field.o array.o \
- node.o version.o missing.o re.o getopt.o getopt1.o
+AWKOBJS = array.o builtin.o eval.o field.o gawkmisc.o io.o main.o \
+ missing.o msg.o node.o re.o version.o
ALLOBJS = $(AWKOBJS) awktab.o
# GNUOBJS
# GNU stuff that gawk uses as library routines.
-GNUOBJS= regex.o dfa.o $(ALLOCA)
+GNUOBJS= getopt.o getopt1.o regex.o dfa.o $(ALLOCA)
# source and documentation files
-SRC = main.c eval.c builtin.c msg.c version.c \
- iop.c io.c field.c array.c node.c missing.c re.c getopt.c getopt1.c
+SRC = array.c builtin.c eval.c field.c gawkmisc.c io.c main.c \
+ missing.c msg.c node.c re.c version.c
ALLSRC= $(SRC) awktab.c
-AWKSRC= awk.h awk.y $(ALLSRC) patchlevel.h protos.h config.in getopt.h
+AWKSRC= awk.h awk.y custom.h $(ALLSRC) patchlevel.h protos.h
-GNUSRC = alloca.c alloca.s dfa.c dfa.h regex.c regex.h
+GNUSRC = alloca.c dfa.c dfa.h regex.c regex.h getopt.h getopt.c getopt1.c
COPIES = missing/system.c missing/tzset.c \
missing/memcmp.c missing/memcpy.c missing/memset.c \
@@ -121,28 +82,67 @@ COPIES = missing/system.c missing/tzset.c \
missing/strerror.c missing/strtod.c \
missing/strftime.c missing/strftime.3
-SUPPORT = support/texindex.c support/texinfo.tex
-
-DOCS= gawk.1 gawk.texi
+DOCS= doc/gawk.1 doc/gawk.texi doc/texinfo.tex
-TEXFILES= gawk.aux gawk.cp gawk.cps gawk.fn gawk.fns gawk.ky gawk.kys \
- gawk.pg gawk.pgs gawk.toc gawk.tp gawk.tps gawk.vr gawk.vrs
+TEXFILES= doc/gawk.aux doc/gawk.cp doc/gawk.cps doc/gawk.fn doc/gawk.fns \
+ doc/gawk.ky doc/gawk.kys doc/gawk.pg doc/gawk.pgs doc/gawk.toc \
+ doc/gawk.tp doc/gawk.tps doc/gawk.vr doc/gawk.vrs
-MISC = NEWS COPYING FUTURES Makefile.* PROBLEMS README* PORTS POSIX \
- mungeconf configure ACKNOWLEDGMENT LIMITATIONS INSTALL
+MISC = NEWS COPYING FUTURES Makefile.in PROBLEMS README PORTS POSIX.STD \
+ configure configure.in acconfig.h configh.in ACKNOWLEDGMENT \
+ ChangeLog INSTALL LIMITATIONS install-sh mkinstalldirs aclocal.m4 \
+ stamp-h.in
-OTHERS= pc/* atari/* vms/*
+OTHERS= amiga doc pc atari vms README_d posix awklib
-ALLDOC= gawk.dvi $(TEXFILES) gawk.info*
+ALLDOC= doc/gawk.dvi $(TEXFILES) doc/gawk.info*
# Release of gawk. There can be no leading or trailing white space here!
-REL=2.15
+REL=3.0
+
+# clear out suffixes list
+.SUFFIXES:
+.SUFFIXES: .c .o
+
+.c.o:
+ $(CC) -c $(CFLAGS) $<
# rules to build gawk
+all: gawk doc/all awklib/all
+
+# stuff to make sure that configure has been run.
+$(srcdir)/configure: configure.in aclocal.m4
+ cd $(srcdir) && autoconf
+
+# autoheader might not change confh.in, so touch a stamp file
+$(srcdir)/configh.in: stamp-h.in
+$(srcdir)/stamp-h.in: configure.in aclocal.m4 acconfig.h
+ cd $(srcdir) && autoheader
+ date > $(srcdir)/stamp-h.in
+
+config.h: stamp-h
+stamp-h: configh.in config.status
+ $(SHELL) ./config.status
+
+Makefile: Makefile.in config.status
+ $(SHELL) ./config.status
+
+config.status: configure
+ $(SHELL) ./config.status --recheck
+
gawk: $(ALLOBJS) $(GNUOBJS) $(REOBJS)
$(CC) -o gawk $(LDFLAGS) $(ALLOBJS) $(GNUOBJS) $(REOBJS) -lm $(LIBS)
-$(AWKOBJS) regex.o dfa.o: awk.h dfa.h regex.h
+$(ALLOBJS): awk.h dfa.h regex.h config.h custom.h
+
+$(GNUOBJS): config.h custom.h
+
+# SunOS make's (at least) VPATH doesn't do subdirectories...
+# Solaris make doesn't allow $< in the actual rule
+gawkmisc.o: $(srcdir)/amiga/gawkmisc.ami $(srcdir)/atari/gawkmisc.atr \
+ $(srcdir)/pc/gawkmisc.pc $(srcdir)/posix/gawkmisc.c \
+ $(srcdir)/vms/gawkmisc.vms
+ $(CC) -c $(CFLAGS) -DDEFPATH='$(DEFPATH)' $(srcdir)/gawkmisc.c
getopt.o: getopt.h
@@ -151,117 +151,103 @@ getopt1.o: getopt.h
main.o: patchlevel.h
awktab.c: awk.y
- $(PARSER) -v awk.y
-##MAKE_VMS-Posix## sed '/^\#module/d' ytab.c >awktab.c && rm ytab.c
-##MAKE_VMS-Posix## dummy.awk_tab.target:
- sed '/^extern char .malloc(), .realloc();$$/d' y.tab.c >awktab.c
- rm y.tab.c
-
-awktab.o: awk.h
-
-config.h: config.in
- @echo You must provide a config.h!
- @echo Run \"./configure\" to build it for known systems
- @echo or copy config.in to config.h and edit it.; exit 1
+ $(YACC) -v $(srcdir)/awk.y && \
+ if test -f y.tab.c ; then mv y.tab.c ytab.c ; else true ; fi && \
+ sed '/^extern char .malloc(), .realloc();$$/d' ytab.c >awktab.c && \
+ rm ytab.c
-install: gawk gawk.info
- cp gawk $(bindir)/gawk && chmod 755 $(bindir)/gawk
- cp gawk.1 $(mandir)/gawk$(manext) && chmod 644 $(mandir)/gawk$(manext)
- cp gawk.info* $(infodir) && chmod 644 $(infodir)/gawk.info*
+# VMS POSIX make won't apply the default .c.o rule to awktab.o for some reason
+awktab.o: awktab.c awk.h
+ $(CC) -c $(CFLAGS) $(srcdir)/awktab.c
-uninstall:
- rm -f $(bindir)/gawk $(mandir)/gawk$(manext) $(infodir)/gawk.info*
+alloca.o: alloca.c
-# ALLOCA: uncomment this if your system (notably System V boxen)
-# does not have alloca in /lib/libc.a or /lib/libPW.a
-#
-# If your machine is not supported by the assembly version of alloca.s,
-# use the C version which follows instead. It uses the default rules to
-# make alloca.o.
-#
-# One of these rules should have already been selected by running configure.
+install: gawk info installdirs
+ $(INSTALL) gawk $(bindir) && chmod 755 $(bindir)/gawk
+ cd doc && $(MAKE) install
+ cd awklib && $(MAKE) install
+installdirs: mkinstalldirs
+ $(srcdir)/mkinstalldirs $(bindir) $(datadir) \
+ $(libdir) $(infodir) $(mandir) $(libexecdir)
-##MAKE_ALLOCA_S## alloca.o: alloca.s
-##MAKE_ALLOCA_S## /lib/cpp < alloca.s | sed '/^#/d' > t.s
-##MAKE_ALLOCA_S## as t.s -o alloca.o
-##MAKE_ALLOCA_S## rm t.s
-
-##MAKE_ALLOCA_C## alloca.o: alloca.c
+uninstall:
+ rm -f $(bindir)/gawk
+ cd doc && $(MAKE) uninstall
+ cd awklib && $(MAKE) uninstall
+ -rmdir $(datadir) $(libexecdir)
# auxiliary rules for release maintenance
lint: $(ALLSRC)
- lint -hcbax $(FLAGS) $(ALLSRC)
+ lint -hcbax $(CFLAGS) $(ALLSRC)
xref:
- cxref -c $(FLAGS) $(ALLSRC) | grep -v ' /' >xref
+ cxref -c $(CFLAGS) $(ALLSRC) | grep -v ' /' >xref
clean:
- rm -rf gawk *.o core
- cd test && make clean
+ rm -rf gawk *.o core y.output
+ cd doc && $(MAKE) clean
+ cd test && $(MAKE) clean
+ cd awklib && $(MAKE) clean
distclean: clean
- rm -f Makefile *.orig *.rej */*.orig */*.rej awk.output gmon.out \
- make.out y.output config.h
+ rm -f Makefile TAGS tags *.orig *.rej */*.orig */*.rej awk.output \
+ gmon.out make.out config.h config.status config.cache \
+ config.log stamp-h
+ cd doc && $(MAKE) distclean
+ cd awklib && $(MAKE) distclean
mostlyclean: clean
-realclean: distclean
- rm -f awktab.c $(ALLDOC)
+maintainer-clean: distclean
+ @echo "This command is intended for maintainers to use; it"
+ @echo "deletes files that may require special tools to rebuild."
+ rm -f awktab.c
+ cd doc && $(MAKE) maintainer-clean
+ cd test && $(MAKE) maintainer-clean
+ cd awklib && $(MAKE) maintainer-clean
-cleaner: clean
- rm -f gawk awktab.c Makefile config.h
+clobber: maintainer-clean
-clobber: clean
- rm -f $(ALLDOC) gawk.log config.h
+TAGS:
+ etags $(AWKSRC)
+ ctags $(AWKSRC)
-gawk.dvi: gawk.texi
- cp support/texinfo.tex .
- tex gawk.texi; texindex gawk.??
- tex gawk.texi; texindex gawk.??
- tex gawk.texi
- rm -f texinfo.tex
+dvi: $(srcdir)/doc/gawk.texi
+ cd doc && $(MAKE) dvi
-gawk.info: gawk.texi
- makeinfo gawk.texi
+info: $(srcdir)/doc/gawk.texi
+ cd doc && $(MAKE) info
-dist: $(AWKSRC) $(GNUSRC) $(DOCS) $(MISC) $(COPIES) $(SUPPORT) distclean
+doc/all:
+ cd doc && $(MAKE) all
+
+awklib/all:
+ cd awklib && $(MAKE) all
+
+dist: $(AWKSRC) $(GNUSRC) $(DOCS) $(MISC) $(COPIES) awklib/stamp-eg distclean
-rm -rf gawk-$(REL)*
dir=gawk-$(REL).`gawk '{print $$3}' patchlevel.h` && \
mkdir $$dir && \
- cp -p $(AWKSRC) $(GNUSRC) $(DOCS) $(MISC) $$dir && \
- mkdir $$dir/missing && cp -p $(COPIES) $$dir/missing && \
- mkdir $$dir/atari && cp -p atari/* $$dir/atari && \
- mkdir $$dir/pc && cp -p pc/* $$dir/pc && \
- mkdir $$dir/vms && cp -p vms/* $$dir/vms && \
- mkdir $$dir/config && cp -p config/* $$dir/config && \
- mkdir $$dir/support && cp -p support/* $$dir/support && \
+ cp -pr $(AWKSRC) $(GNUSRC) $(MISC) $$dir && \
+ mkdir $$dir/missing && \
+ cp -p $(COPIES) $$dir/missing && \
+ for i in $(OTHERS); \
+ do \
+ cp -pr $$i $$dir ; \
+ done && \
cp -pr test $$dir && \
- chmod -R a+r $$dir && \
- chmod -R a-w $$dir && \
- find $$dir -type d -exec chmod 755 {} ';' && \
+ (cd $$dir/doc; $(MAKEINFO) gawk.texi) && \
+ find $$dir -type d -exec chmod 777 {} ';' && \
find $$dir -print | doschk && \
tar -cf - $$dir | gzip > $$dir.tar.gz && \
rm -fr $$dir
-gawk-doc-$(REL).tar.gz: gawk.info gawk.dvi gawk.1
- -rm -rf gawk-doc-$(REL) gawk-doc-$(REL).tar.gz
- -mkdir gawk-doc-$(REL)
- cp -p $(ALLDOC) gawk-doc-$(REL)
- groff -Tascii -man gawk.1 > gawk-doc-$(REL)/gawk.1.pr
- tar -cf - gawk-doc-$(REL) | gzip > gawk-doc-$(REL).tar.gz
-
-gawk-ps-$(REL).tar.gz: gawk.dvi gawk.1
- -rm -rf gawk-ps-$(REL) gawk-ps-$(REL).tar.gz
- -mkdir gawk-ps-$(REL)
- dvips -o gawk-ps-$(REL)/gawk.postscript gawk.dvi
- groff -man gawk.1 > gawk-ps-$(REL)/gawk.1.ps
- tar -cf - gawk-ps-$(REL) | gzip > gawk-ps-$(REL).tar.gz
-
-release: dist gawk-doc-$(REL).tar.gz gawk-ps-$(REL).tar.gz
+awklib/stamp-eg:
+ cd awklib ; $(MAKE) stamp-eg
-test: gawk
- cd test; make -k
+check: gawk
+ cd test; $(MAKE) -k
-check: test
+test: check
diff --git a/NEWS b/NEWS
index 85f7d155..3ac650cf 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,110 @@
+Changes from 2.15.6 to 3.0
+--------------------------
+
+Fixed spelling of `Programming' in the copyright notice in all the files.
+
+New --re-interval option to turn on interval expressions. They're off
+by default, except for --posix, to avoid breaking old programs.
+
+Passing regexp constants as parameters to user defined functions now
+generates a lint warning.
+
+Several obscure regexp bugs fixed; alas, a small number remain.
+
+The manual has been thoroughly revised. It's now almost 50% bigger than
+it used to be.
+
+The `+' modifier in printf is now reset correctly for each item.
+
+The do_unix variable is now named do_traditional.
+
+Handling of \ in sub and gsub rationalized (somewhat, see the manual for
+the gory [and I do mean gory] details).
+
+IGNORECASE now uses ISO 8859-1 Latin-1 instead of straight ASCII. See the
+source for how to revert to pure ASCII.
+
+--lint will now warn if an assignment occurs in a conditional context.
+This may become obnoxious enough to need turning off in the future, but
+"it seemed like a good idea at the time."
+
+%hf and %Lf are now diagnosed as invalid in printf, just like %lf.
+
+Gawk no longer incorrectly closes stdin in child processes used in
+input pipelines.
+
+For integer formats, gawk now correctly treats the precision as the
+number of digits to print, not the number of characters.
+
+gawk is now much better at catching the use of scalar values when
+arrays are needed, both in function calls and the `x in y' constructs.
+
+New gensub function added. See the manual.
+
+If do_tradtional is true, octal and hex escapes in regexp constants are
+treated literally. This matches historical behavior.
+
+yylex/nextc fixed so that even null characters can be included
+in the source code.
+
+do_format now handles cases where a format specifier doesn't end in
+a control letter. --lint reports an error.
+
+strftime() now uses a default time format equivalent to that of the
+Unix date command, thus it can be called with no arguments.
+
+Gawk now catches functions that are used but not defined at parse time
+instead of at run time. (This is a lint error, making it fatal could break
+old code.)
+
+Arrays that max out are now handled correctly.
+
+Integer formats outside the range of an unsigned long are now detected
+correctly using the SunOS 4.x cc compiler.
+
+--traditional option added as new preferred name for --compat, in keeping
+with GCC.
+
+--lint-old option added, so that warnings about things not in old awk
+are only given if explicitly asked for.
+
+`next file' has changed to one word, `nextfile'. `next file' is still
+accepted but generates a lint warning. `next file' will go away eventually.
+
+Gawk with --lint will now notice empty source files and empty data files.
+
+Amiga support using the Unix emulation added. Thanks to fnf@amigalib.com.
+
+test/Makefile is now "parallel-make safe".
+
+Gawk now uses POSIX regexps + GNU regex ops by default. --posix goes to
+pure posix regexps, and --compat goes to traditional Unix regexps. However,
+interval expressions, even though specified by POSIX, are turned off by
+default, to avoid breaking old code.
+
+IGNORECASE now applies to *everything*, string comparison as well
+as regexp operations.
+
+The AT&T Bell Labs Research awk fflush builtin function is now supported.
+fflush is extended to flush stdout if no arg and everything if given
+the null string as an argument.
+
+If RS is more than one character, it is treated as a regular expression
+and records are delimited accordingly. The variable RT is set to the record
+terminator string. This is disabled in compatibility mode.
+
+If FS is set to the null string (or the third arg. of split() is the null
+string), splitting is done at every single character. This is disabled in
+compatibility mode.
+
+Gawk now uses the Autoconf generated configure script, doing away with all
+the config/* files and the machinery that went with them. The Makefile.in
+has also changed accordingly, complete with all the standard GNU Makefile
+targets. (Non-unix systems may still have their own config.h and Makefile;
+see the appropriate README_d/README.* and/or subdirectory.)
+
+The source code has been cleaned up somewhat and the formatting improved.
+
Changes from 2.15.5 to 2.15.6
-----------------------------
@@ -511,7 +618,7 @@ Fixed several bugs in [g]sub() for no match found or the match is 0-length.
Fixed bug where in gsub() a pattern anchored at the beginning would still
substitute throughout the string.
-make test does not assume the . is in PATH.
+make test does not assume that . is in PATH.
Fixed bug when a field beyond the end of the record was requested after
$0 was altered (directly or indirectly).
diff --git a/POSIX b/POSIX.STD
index f2405420..ac8e1abf 100644
--- a/POSIX
+++ b/POSIX.STD
@@ -1,3 +1,17 @@
+August 1995:
+
+Although the published 1003.2 standard contained the incorrect
+comparison rules of 11.2 draft as described below, no actual implementation
+of awk (that I know of) actually used those rules.
+
+A revision of the 1003.2 standard is in progress, and in the May 1995
+draft, the rules were fixed (based on my submissions for interpretation
+requests) to match the description given below. Thus, the next version
+of the standard will have a correct description of the comparison
+rules.
+
+June 1992:
+
Right now, the numeric vs. string comparisons are screwed up in draft
11.2. What prompted me to check it out was the note in gnu.bug.utils
which observed that gawk was doing the comparison $1 == "000"
diff --git a/PROBLEMS b/PROBLEMS
index a4361800..8f7d954a 100644
--- a/PROBLEMS
+++ b/PROBLEMS
@@ -1,10 +1,10 @@
-This is a list of known problems in gawk 2.15.
+This is a list of known problems in gawk 3.0.
Hopefully they will all be fixed in the next major release of gawk.
Please keep in mind that the code is still undergoing significant evolution.
1. The interactions with the lexer and yyerror need reworking. It is possible
to get line numbers that are one line off if --compat or --posix is
- true and either `next file' or `delete array' are used.
+ true and either `nextfile' or `delete array' are used.
Really the whole lexical analysis stuff needs reworking.
diff --git a/README b/README
index 90ed9c29..982f37c2 100644
--- a/README
+++ b/README
@@ -1,11 +1,12 @@
README:
-This is GNU Awk 2.15. It should be upwardly compatible with the System
-V Release 4 awk. It is almost completely compliant with POSIX 1003.2.
+This is GNU Awk 3.0. It should be upwardly compatible with the Bell
+Labs research version of awk. It is almost completely compliant with
+the 1993 POSIX 1003.2 standard for awk.
This release adds new features -- see NEWS for details.
-See the installation instructions, below.
+See the file INSTALL for installation instructions.
Known problems are given in the PROBLEMS file. Work to be done is
described briefly in the FUTURES file. Verified ports are listed in
@@ -15,111 +16,77 @@ Please read the LIMITATIONS and ACKNOWLEDGMENT files.
Read the file POSIX for a discussion of how the standard says comparisons
should be done vs. how they really should be done and how gawk does them.
-To format the documentation with TeX, you must use texinfo.tex 2.53
-or later. Otherwise footnotes look unacceptable.
+To format the documentation with TeX, use at least version 2.151 of
+texinfo.tex. There is a usable copy of texinfo.tex in the doc directory.
-If you wish to remake the Info files, you should use makeinfo. The 2.15
-version of makeinfo works with no errors.
+The Info file now comes with the distribution.
The man page is up to date.
INSTALLATION:
-Check whether there is a system-specific README file for your system.
+Check whether there is a system-specific README file for your system under
+the `README_d' directory.
-A quick overview of the installation process is in the file INSTALL.
-
-Makefile.in may need some tailoring. The only changes necessary should
-be to change installation targets or to change compiler flags.
-The changes to make in Makefile.in are commented and should be obvious.
-
-All other changes should be made in a config file. Samples for
-various systems are included in the config directory. Starting with
-2.11, our intent has been to make the code conform to standards (ANSI,
-POSIX, SVID, in that order) whenever possible, and to not penalize
-standard conforming systems. We have included substitute versions of
-routines not universally available. Simply add the appropriate define
-for the missing feature(s) on your system.
+See the file INSTALL.
If you have neither bison nor yacc, use the awktab.c file here. It was
-generated with bison, and should have no AT&T code in it. (Note that
-modifying awk.y without bison or yacc will be difficult, at best. You might
-want to get a copy of bison from the FSF too.)
-
-If no config file is included for your system, start by copying one
-for a similar system. One way of determining the defines needed is to
-try to load gawk with nothing defined and see what routines are
-unresolved by the loader. This should give you a good idea of how to
-proceed.
-
-The next release will use the FSF autoconfig program, so we are no longer
-soliciting new config files.
-
-If you have an MS-DOS or OS/2 system, use the stuff in the pc directory.
-For an Atari there is an atari directory and similarly one for VMS.
+generated with bison, and has no AT&T code in it. (Note that modifying
+awk.y without bison or yacc will be difficult, at best. You might want
+to get a copy of bison from the FSF too.)
+
+If you have an MS-DOS or OS/2 system, use the stuff in the `pc' directory.
+Similarly, there are separate directories for Ataris, Amigas, and VMS.
-Chapter 16 of The GAWK Manual discusses configuration in detail.
-(However, it does not discuss OS/2 configuration, see README.pc for
-the details. The manual is being massively revised for 2.16.)
+Appendix B of The GAWK Manual discusses configuration in detail. The
+configuration process is now based on Autoconf, so the whole business
+should be considerably simpler than it was in gawk 2.X.Y.
-After successful compilation, do 'make test' to run a small test
+After successful compilation, do 'make check' to run a small test
suite. There should be no output from the 'cmp' invocations except in
-the cases where there are small differences in floating point values.
-If there are other differences, please investigate and report the
-problem.
+the cases where there are small differences in floating point values,
+and possibly in the case of strftime. Several of the tests ignore
+errors on purpose; those are not a problem. If there are other
+differences, please investigate and report the problem.
PRINTING THE MANUAL
-The 'support' directory contains texinfo.tex 2.115, which will be necessary
-for printing the manual, and the texindex.c program from the texinfo
-distribution which is also necessary. See the makefile for the steps needed
-to get a DVI file from the manual.
-
-CAVEATS
-
-The existence of a patchlevel.h file does *N*O*T* imply a commitment on
-our part to issue bug fixes or patches. It is there in case we should
-decide to do so.
+The 'doc' directory contains a recent version of texinfo.tex, which will be
+necessary for printing the manual. Use `make dvi' to get a DVI file from
+the manual. In the `doc' directory, use `make postscript' to get a PostScript
+version of the manual.
BUG REPORTS AND FIXES (Un*x systems):
-Please coordinate changes through David Trueman and/or Arnold Robbins.
-
-David Trueman
-Department of Mathematics, Statistics and Computing Science,
-Dalhousie University, Halifax, Nova Scotia, Canada
-
-UUCP: {uunet utai watmath}!dalcs!david
-INTERNET: david@cs.dal.ca
+Please coordinate changes through Arnold Robbins. In particular, see
+the section in the manual on reporting bugs. Note that comp.lang.awk is
+about the worst place to post a gawk bug report. Please, use the mechanisms
+outlined in the manual.
Arnold Robbins
-1736 Reindeer Drive
-Atlanta, GA, 30329-3528, USA
-
-INTERNET: arnold@skeeve.atl.ga.us
-UUCP: { gatech, emory, emoryu1 }!skeeve!arnold
+INTERNET: arnold@gnu.ai.mit.edu
BUG REPORTS AND FIXES (non-Unix ports):
MS-DOS:
Scott Deifik
- AMGEN Inc.
- Amgen Center, Bldg.17-Dept.393
- Thousand Oaks, CA 91320-1789
- Tel-805-499-5725 ext.4677
- Fax-805-498-0358
scottd@amgen.com
+ Darrel Hankerson
+ hankedr@mail.auburn.edu
+
VMS:
Pat Rankin
- rankin@eql.caltech.edu (e-mail only)
+ rankin@eql.caltech.edu
Atari ST:
Michal Jaegermann
- michal@gortel.phys.ualberta.ca (e-mail only)
+ michal@gortel.phys.ualberta.ca
OS/2:
Kai Uwe Rommel
- rommel@ars.muc.de (e-mail only)
- Darrel Hankerson
- hankedr@mail.auburn.edu (e-mail only)
+ rommel@ars.de
+
+Amiga:
+ Fred Fish
+ fnf@amigalib.com
diff --git a/README.amiga b/README.amiga
deleted file mode 100644
index 46acfb4c..00000000
--- a/README.amiga
+++ /dev/null
@@ -1,83 +0,0 @@
-From: "Lars Hecking" <ST000002@hrz1.hrz.th-darmstadt.de>
-Date: Thu, 3 Dec 1992 17:56:24 -0400
-Subject: Gawk-2.14 -- Amiga-Port, the 2nd
-
-Gawk on the Amiga has been compiled and tested using the Amiga port
-of gcc by Markus Wild (wild@nessie.cs.id.ethz.ch). I recommend the use
-of this compiler (at least v2.2.2) because the provided environment
-(includes, libs, binutils) allows straight-forward porting of BSD/UNIX
-programs. Gcc compiled programs access/emulate UNIX syscalls via a
-shared library (ixemul.library); thus, only minimal changes to the
-source are necessary. That release also provides a `sh'- command. If
-no `sh'-command is available or another compiler is being used,
-`config.h' and `Makefile' are to be created by hand. Since I'm afraid
-that none of the Amiga-specific make-commands handles UNIX-style
-makefiles correctly I suggest the use of Dennis Vadura's`dmake',
-which is available on the Amiga (v3.6). I'm currently using dmake v3.8
-(a hack) or GNU make v3.62 (another hack :), which even allows
-to start multiple make jobs! Thanx to Markus!).
-
-Machine Configuration (obsoleted by gawk 2.16)
----------------------
-Only copy the following into config/amiga:
-
-Amiga under AmigaOS with gcc
-DOPRNT_MISSING 1
-ENVSEP ','
-DEFPATH ".,/usr/local/lib"
-HAVE_UNDERSCORE_SETJMP 1
-SRANDOM_PROTO 1
-STDC_HEADERS 1
-
-The gcc compiler and the compiled executables are working under
-AmigaOS 1.2/1.3 as well as AmigaOS 2.x. However, some very special
-functions of ixemul.library currently require AmigaOS 2.x to work
-properly.
-
-
-How to compile (obsoleted by gawk 2.16)
---------------
-Compiling and installing on the Amiga rather clones the UNIX-
-installation:
-
-After you have extracted the `gawk' distribution, `cd' to `gawk-
-2.14'. Look in the `config' subdirectory for a file that matches your
-machine (in this case: `amiga', if the gawk maintainers put it there
-:).
-
- If you find such a file, run the command:
-
- sh configure amiga
-
- This produces a `Makefile' and `config.h' tailored to your system.
- You may wish to edit the `Makefile' to use a different C compiler,
- such as `gcc', the GNU C compiler (recommended), if you have it.
- You may also wish to change the `CFLAGS' variable, which controls
- the command line options that are passed to the C compiler (such as
- optimization levels, or producing `resident'-able code)
-
- After you have configured `Makefile' and `config.h', type:
-
- make
-
-and shortly thereafter (a little bit longer on a 7MHz machine >:->,
-you should have an executable version of `gawk'. That's all there is
-to it!
-
-Testing
--------
-Gawk succeeds in all tests, but not with 'make test'. When executing
-the tests by hand (e.g. 'make swaplns' and so on) gawk runs all tests
-ok except for the following:
-- argarray, awkpath: these do not work due to insufficient environment
- variable handling by pdksh ('bin:sh')
-- poundbang, regtest: pdksh refuses to handle the #! statement
- properly
-Modifying these tests in a way that only *gawk-features, not *sh-
-features are tested, it is found that gawk succeeds here too.
-- manyfiles: (error: too many open files) ixemul.library v39.45 uses
- OPEN_MAX=64; manyfiles required at least OPEN_MAX=106
-This may only be overcome when mwild releases a new ixemul.library.
-Changing the number '100' in the 'manyfiles::'-target in
-'test/Makefile' to some value <= 58 (depends on the number of
-currently open files in ixemul.library) gawk tests successfully, too.
diff --git a/README.atari b/README.atari
deleted file mode 100644
index 60bf9210..00000000
--- a/README.atari
+++ /dev/null
@@ -1,37 +0,0 @@
-Gawk on Atari has been compiled and tested using gcc compiler
-(versions 1.4 and 2.4.4) both with and without -mshort flag. Other
-compilers can be used but if sizeof(pointer) != sizeof(int) this
-code will not compile correctly with non-ANSI compiler (prototypes
-and library).
-
-Compiled executables were tested and with minor modifications, due
-to differences in environment and/or shell, succesfully completed
-at least the following tests:
-awf, pearls, resub, chem, swaplns, delete, messages, argarray,
-longwrds, getline, inftest, spiece2top, fstabplus, compare, arrayref,
-rs, fsrs, rand, fsbs, negexp, ugh2, asgext, anchgsub, splitargv,
-fieldwidths, ignorecase, posix, manyfiles, igncfs, lisp, regtest,
-awkpath, reparse, nfset.
-
-Nearly all of these test do not require any adjustments to run, but a
-modified test suite with a driving Makefile (for gulam) is available
-on a request from Michal Jaegermann, michal@gortel.phys.ualberta.ca,
-via e-mail.
-
-TOS and MiNT
------------
-Setup for Atari assumes that gawk will be used under TOS. In
-particular atari/system.c source is for a system function which nicely
-cooperates with gulam shell and pipes are simulated with temporary
-files. If you plan to run gawk under MiNT then you likely want a
-different system function and real pipes. For that purpose do not
-define SYSTEM_MISSING in a configuration file and do not define
-PIPES_SIMULATED in io.c file. Please note that in the later case gawk
-calls, in gawk_popen(), a function execl() with a hard-coded name of
-"/bin/sh". You want likely to change that to get a name and some
-arguments from an environment variable. This was not done here in
-order to avoid changes which may prove troublesome in a general
-distribution. Subdirectory ./atari contains also a file textrd.c with
-a bug fix for old versions of gcc libraries. This bug is currently
-fixed and the file is not used, but it is left as a convenience for
-those who may not updated yet their libraries.
diff --git a/README.hpux8x b/README.hpux8x
deleted file mode 100644
index 6209c0d7..00000000
--- a/README.hpux8x
+++ /dev/null
@@ -1,18 +0,0 @@
-When compiling on HPPA machines running HP-UX 8.0x you should add the
-option: "+Obb1000". The file awktab.c fails to compile without this
-option. The option sets the maximum number of basic blocks allowed
-in a function to 1000.
-------------------------------------
-
-December 1993 - arnold@skeeve.atl.ga.us
-
-I continue to get reports that gawk compiled with gcc (through 2.5.x) on the
-hp9000 series 700 systems does not pass its test suite. When compiled with
-hp's C compiler, it does fine. If you compile with cc and it fails, then
-let me hear about it please. If you compile with gcc and then complain to
-me, I'm going to fuss at you for not reading the documenation. (:-)
-------------------------------------
-
-January 1994 - arnold@skeeve.atl.ga.us
-
-I'm told that with gcc 2.5.8, gawk now compiles.
diff --git a/README.pc b/README.pc
deleted file mode 100644
index 74b452da..00000000
--- a/README.pc
+++ /dev/null
@@ -1,119 +0,0 @@
-This is the README for GNU awk 2.15 under OS/2 and MS-DOS.
-
- Gawk has been compiled and tested under OS/2 1.x--2.x and MS-DOS
-using MSC 5.1, MSC 6.00A, and EMX/gcc 0.8[gh].
-
-
-Compiling for DOS from a DOS (or OS/2) host
--------------------------------------------
-
- Move the stuff in the pc directory to the directory with the rest
-of the gawk sources. The files Makefile.emx (for EMX/gcc) and
-Makefile.msc (for MSC 5.1 or 6.00A) should work with any Unix-like
-make, including Ndmake 4.5 and dmake.
-
- An alternative makegawk.bat file exists for compiling with MSC 5.1
-or 6.00A. You may need to put CRs at the end of each line in the
-makegawk.bat, Makefile.emx, and Makefile.msc files.
-
- If using MSC 5.1, you will also have to copy the Microsoft library
-routine setargv.obj to the gawk directory for the link to work.
-
- Please note: Gawk is very sensitive to compiler optimization;
-using -Oa or -Ox with MSC 5.1 WILL cause errors. It is recommended
-to only use the default optimization. Also, you should get warning
-messages with MSC, but you can ignore them.
-
- Additional information can be found in the notes at the end of
-this file.
-
-
-Compiling for OS/2 or DOS from an OS/2 host
--------------------------------------------
-
- Move the files in the pc directory to the directory with the rest
-of the gawk sources. Makefile.os2 has been tested with dmake 3.8
-and GNU make 3.68. Enter "make -f Makefile.os2" to see a list of
-targets. As an example, an OS/2 and DOS 32-bit executable using
-EMX/gcc can be created with the command "make -f Makefile.os2 emxbnd".
-
- If using MSC 5.1, you will also have to copy the Microsoft library
-routine setargv.obj to the gawk directory for the link to work.
-
-Notes
------
-
-1. The usual shells for OS/2 and DOS (command.com for DOS and cmd.exe
-for OS/2) do not handle command-lines very well. Stewartson's sh (OS/2
-and DOS) is a good choice, and can be found at:
-
- oak.oakland.edu:SimTel/msdos/sysutil/ms_sh23[bs].zip
- ftp-os2.cdrom.com:pub/os2/16bit/unix/ms_sh23[bs].zip
- ftp.leo.org:pub/comp/os/os2/shells/ms_sh23b.zip
-
-An earlier version can be found at
-
- ftp.leo.org:pub/comp/os/os2/shells/sh164-2.zip
-
-The Korn shell (ksh) may be another possibility:
-
- ftp-os2.cdrom.com:pub/os2/32bit/unix/ksh49.zip
- ftp.leo.org:pub/comp/os/os2/shells/ksh513rt.zip
-
-Bash (OS/2 2.x) should be a good choice; however, there has been some
-trouble getting a solid version for OS/2. As of Feb-95, there are two
-bash ports, available in:
-
- ftp.leo.org:pub/comp/os/os2/shells/gnu/gnubash.zip
- ftp.leo.org:pub/comp/os/os2/shells/gnu/bash-112.zip
- ftp-os2.cdrom.com:pub/os2/32bit/unix/bash_112.zip
-
-Hamilton's C Shell is another possibility, available for a number of
-platforms. A demo is available at ftp.leo.org.
-
-The site ftp.leo.org (ftp.informatik.tu-muenchen.de) is maintained
-by Kai Uwe Rommel (rommel@leo.org), and is also accessible at
-http://www.leo.org/archiv/os2/ via WWW.
-
-
-2. Stewartson's shell contains sources for a setargv-replacement
-for MSC, which can add enhanced command-line processing capabilities
-to gawk. See Makefile.os2.
-
-
-3. dmake is by Dennis Vadura (dvadura@watdragon.uwaterloo.ca), CS Dept.,
-University of Waterloo. OS/2 and DOS versions can be found at
-
- ftp.leo.org:pub/comp/os/os2/devtools/utils/dmake38.zip
- ftp.leo.org:pub/comp/os/os2/devtools/utils/dmake40os2.zip
- ftp-os2.cdrom.com:pub/os2/16bit/program/dmake38x.zip
-
-Ndmake is by D.G. Kneller. This ShareWare program was later released
-as Opus Make (which is available for OS/2 and DOS). Ndmake 4.5 is
-available at
-
- oak.oakland.edu:SimTel/msdos/c/ndmake45.zip
-
-GNU make is from the FSF. An OS/2 2.x version can be found at
-
- ftp.leo.org:pub/comp/os/os2/devtools/gnu/gnumake.zip
-
-
-4. Known bugs. The MSC 5.1 bound version has not received extensive
-testing. When running under OS/2 2.1, there is a bug which may be
-related to the 20-file limit. It can be observed in the "manyfiles"
-test of test/Makefile. This does not occur when running under DOS.
-
-The 16-bit DOS version can exhaust memory on scripts such as Henry
-Spencer's "awf". Use the 32-bit version if possible.
-
-
-----
-
-If you have any problems with the DOS or OS/2 versions of Gawk,
-please send them to
-
- Scott Deifik, scottd@amgen.com (DOS versions)
-or
- Kai Uwe Rommel, rommel@ars.muc.de (OS/2 or bound versions)
- Darrel Hankerson, hankedr@mail.auburn.edu
diff --git a/README.rs6000 b/README.rs6000
deleted file mode 100644
index 3ec26c81..00000000
--- a/README.rs6000
+++ /dev/null
@@ -1,21 +0,0 @@
-Date: Fri, 26 Apr 1991 18:01:04 -0300
-From: mjlx@eagle.cnsf.cornell.edu (Mike Lijewski)
-To: arnold@audiofax.com
-Cc: david@cs.dal.ca
-Subject: testing 2.12 on a machine with unsigned chars
-
-I chose to use the alloca which you supply. The RS/6000 has a builtin
-alloca which is accessible using a `#pragma alloca', but I chose not
-to use it. Initially, I tried to use it by conditionally compiling it,
-similar to the way alloca.h is included on sparcs. But this has
-some problems. Firstly, the RS/6000 compiler complains about the
-placement of the #pragma, something to the intent that the pragma must
-precede all C code. This would be easy enough to fix by conditionally
-including the #pragma elsewhere in the relevant files. A more
-difficult problem is that the awk.tab.c generated by bison uses
-alloca. To fix this the right way, bison would have to be modified to
-output the appropriate conditionally compilable code as it does now
-for sparcs. If you think it is worth while to use the builtin alloca,
-I would be happy to get it working, except for the bison problem, and
-send you the diffs. The FSF might also be interested in "fixing"
-bison to use the builtin alloca on the RS/6000.
diff --git a/README.rt-aos b/README.rt-aos
deleted file mode 100644
index 25518e65..00000000
--- a/README.rt-aos
+++ /dev/null
@@ -1,20 +0,0 @@
-From: karl@cs.umb.edu (Karl Berry)
-Newsgroups: gnu.utils.bug
-Subject: gawk 2.15/ibm rt/compile errors
-Date: 17 May 1993 23:16:11 -0400
-Organization: GNUs Not Usenet
-
-gawk 2.15 on an IBM RT running AOS, configured as bsd43, needs the
-following additional definitions (although I suppose this will be
-irrelevant when you switch to Autoconf).
-
--DTZSET_MISSING -DFMOD_MISSING -DCHAR_UNSIGNED
-
-[ Fixed in 2.15.1 -- configure as ibmrt-aos ]
-
-Also, missing/strftime.c did not compile, because it does not handle the
-case where the system provides neither tzname nor the tm_zone member,
-but instead has the timezone function. I used the strftime.c from the
-find 3.8 distribution.
-
-[ Not fixed in 2.15.1 ]
diff --git a/README.sgi b/README.sgi
deleted file mode 100644
index ae5dbf42..00000000
--- a/README.sgi
+++ /dev/null
@@ -1,5 +0,0 @@
-April 1994
-
-I'm told that gawk now compiles on the SGI with gcc without problems.
-The main issue was apparently the use of CHAR_UNSIGNED in regex.c, and
-the current regex.c now manages to figure it out on its own.
diff --git a/README.sun386i b/README.sun386i
deleted file mode 100644
index e9daf26c..00000000
--- a/README.sun386i
+++ /dev/null
@@ -1,45 +0,0 @@
-Date: Mon, 16 Mar 1992 14:49:10 -0400
-From: <beebe@math.utah.edu>
-To: david@cs.dal.ca, arnold@skeeve.atl.ga.us
-
-...
-
-On the Sun 386i, floating-point numbers are printed without a leading
-zero digit. This causes the cmp step to fail for the chem target. I
-revised the Makefile, and at the same time, inserted an RM macro in
-place of the many rm calls. The complete updated Makefile is appended
-below. This change should be applied to all systems to avoid the
-leading-zero problem in the future.
-
-On the Sun 386i, my initial "make sunos40" resulted in a load failure
-with
-
->> Undefined:
->> strncasecmp
->> strftime
-
-I therefore modified the Makefile to read
-
-OPTIMIZE= -g -O -fstrength-reduce -DSTRFTIME_MISSING -DSTRCASE_MISSING
-
-and loading got further, but still failed:
-
->> tzset: ld: /lib/libc.a(localtime.o): multiply defined
->> *** Error code 1
->> make: Fatal error: Command failed for target `gawk'
->> Current working directory /home/share/gnu/src/gawk-2.13
->> *** Error code 1
->> make: Fatal error: Command failed for target `test'
-
-Investigation with nm showed that localtime() is only referenced by
-builtin.o, so I simply did
-
- chmod +x gawk
- make test bigtest
-
-and was able to complete the test after the leading-zero digit fix
-was applied to the Makefile.
-
-The Sun 386i is not receiving further development by Sun, and SunOS
-4.0.3 is the last O/S release for it, so perhaps you don't want to do
-anything other than note the problem in the README file.
diff --git a/README.FIRST b/README_d/README.FIRST
index 25c987ba..2ebd5b7e 100644
--- a/README.FIRST
+++ b/README_d/README.FIRST
@@ -18,4 +18,4 @@ If you send me email about this, without having read this
file, I will yell at you.
Arnold Robbins
-arnold@skeeve.atl.ga.us
+arnold@gnu.ai.mit.edu
diff --git a/README.VMS b/README_d/README.VMS
index 057a359f..057a359f 100644
--- a/README.VMS
+++ b/README_d/README.VMS
diff --git a/README_d/README.atari b/README_d/README.atari
new file mode 100644
index 00000000..ffbb12ac
--- /dev/null
+++ b/README_d/README.atari
@@ -0,0 +1,21 @@
+Gawk on the Atari has been compiled and tested using gcc, both
+with and without -mshort flag. Other compilers can be used but if
+sizeof(pointer) != sizeof(int) this code will not compile correctly
+with a non-ANSI compiler (prototypes and library).
+
+Compiled executables were tested and passed successfully a test suite
+similar to 'make test'. Required changes are minor and minor
+modifications are due to differences in environment and/or shell. If
+a need will arise a modified test suite with a driving Makefile (for
+gulam) is available on a request from Michal Jaegermann,
+michal@gortel.phys.ualberta.ca or michal@ellpspace.math.ualberta.ca,
+via e-mail.
+
+Sample files atari/Makefile.st, atari/Makefile.awklib and
+atari/config.h assume gcc compilation and execution under TOS; it is
+likely that one would want to change it for another setup. If they
+are ok then copy atari/Makefile.st to Makefile, atari/config.h to
+config.h and atari/Makefile.awklib to awklib/Makefile.. Pay attention
+to code fragments bracketed by '#ifdef atarist ... #endif'. These
+modifications may not be required/desired with a different OS and/or
+libraries.
diff --git a/README_d/README.pc b/README_d/README.pc
new file mode 100644
index 00000000..875cd53c
--- /dev/null
+++ b/README_d/README.pc
@@ -0,0 +1,178 @@
+This is the README for GNU awk 3.0 under OS/2 and DOS.
+
+ Gawk has been compiled and tested under OS/2 and DOS using the GNU
+development tools from DJ Delorie (DJGPP, DOS-only) and Eberhard Mattes
+(EMX, DOS and OS/2). Microsoft C can be used to build 16-bit versions
+for DOS and OS/2.
+
+
+Building gawk
+-------------
+
+Copy the files in the `pc' directory to the directory with the rest of
+the gawk sources. The makefile contains a configuration section with
+comments, and may need to be edited in order to work with your make
+utility.
+
+The "prefix" line in the Makefile is used during the install of gawk
+(and in building igawk.bat and igawk.cmd). Since the libraries for
+gawk will be installed under $(prefix)/lib/awk (e.g., /gnu/lib/awk),
+it is convenient to have this directory in DEFPATH of config.h.
+
+The makefile contains a number of targets for building various DOS and
+OS/2 versions. A list of targets will be printed if the make command is
+given without a target. As an example, to build gawk using the djgpp
+tools, enter "make djgpp".
+
+
+Testing and installing gawk
+---------------------------
+
+The command "make test" (and possibly "make install") requires several
+Unix-like tools, including an sh-like shell, sed, cp, and cmp. Only dmake
+and OS/2 GNU make are known to work on "make test"; in particular, the
+make delivered as part of the DJGPP tools and Ndmake will not work.
+
+There are two methods for the install: Method 1 uses a typical Unix-like
+approach and requires cat, cp, mkdir, sed, and sh; method 2 uses gawk
+and batch files. See the configuration section of the makefile.
+
+The file test/Makefile will need some editing (especially for DOS). A
+sample makefile with comments appears in pc/Makefile.tst, and can be
+used to modify test/Makefile for your platform. In addition, the files
+in the test directory ending with ".ok" may need to have their
+end-of-line markers converted, as described in Makefile.tst.
+
+It is routine to install by hand, but note that the install target also
+builds igawk.bat and igawk.cmd, which are used to add an include
+facility to gawk (and which require sh).
+
+
+Notes
+-----
+
+1. An sh-like shell may be useful for awk programming (and is essential
+for running "make test"). Stewartson's sh (OS/2 and DOS) is a good choice:
+
+ oak.oakland.edu:SimTel/msdos/sysutil/ms_sh23[bs].zip
+ ftp-os2.cdrom.com:pub/os2/unix/ms_sh23[bs].zip
+ ftp.leo.org:pub/comp/os/os2/shells/ms_sh23b.zip
+
+Stewartson's shell uses a configuration file (see "Command Line Building"
+in the sh manual page), and it may be necessary to edit the entry for
+gawk. The following entries are suggested:
+
+ -- $(EXTENDED_LINE) -- -- Comment only, not part of file --
+ gawk = unix ignoretype # emxbnd
+ gawk = unix # djgpp; msc* with Stewartson's stdargv
+ # No entry for emx or for msc* without stdargv
+ gawk = ignoretype # if you want something which which always work
+ # --but without the use of @-include files.
+
+However, users of djgpp versions of gawk may prefer "dos" over "unix"
+in the above, due to the broken way djgpp handles @-include files.
+Entries for other other utilities (such as sed and wc) may need to be
+edited in order to match your specific collection of programs.
+
+The Korn shell (ksh) may be another possibility:
+
+ ftp-os2.cdrom.com:pub/os2/unix/ksh522rt.zip
+ ftp.leo.org:pub/comp/os/os2/shells/ksh513rt.zip
+
+Bash (OS/2) should be a good choice; however, there has been some
+trouble getting a solid version for OS/2. As of Feb-95, there are
+two bash ports, available in:
+
+ ftp.leo.org:pub/comp/os/os2/shells/gnu/gnubash.zip
+ ftp.leo.org:pub/comp/os/os2/shells/gnu/bash-112.zip
+ ftp-os2.cdrom.com:pub/os2/unix/bash_112.zip
+
+Hamilton's C Shell is another possibility, available for a number of
+platforms. A demo is available at ftp.leo.org.
+
+Users of the emx versions of gawk may wish to set EMXSHELL, which
+overrides COMSPEC when running shells from emx programs.
+
+The site ftp.leo.org (ftp.informatik.tu-muenchen.de) is maintained
+by Kai Uwe Rommel (rommel@ars.de), and is also accessible at
+http://www.leo.org/archiv/os2/ via WWW.
+
+
+2. Stewartson's shell contains sources for a setargv-replacement
+for MSC, which can add enhanced command-line processing capabilities
+to gawk. Strongly recommended. See the makefile.
+
+
+3. dmake is by Dennis Vadura (dvadura@watdragon.uwaterloo.ca), CS Dept.,
+University of Waterloo. OS/2 and DOS versions can be found at
+
+ ftp.leo.org:pub/comp/os/os2/devtools/utils/dmake38.zip
+ ftp.leo.org:pub/comp/os/os2/devtools/utils/dmake40os2.zip
+ ftp-os2.cdrom.com:pub/os2/dev16/dmake38x.zip
+
+DOS users will need the DOS-only version (due to the swap requirement):
+
+ oak.oakland.edu:SimTel/msdos/c/dmake38[es].zip
+
+Ndmake is by D.G. Kneller. This ShareWare program was later released
+as Opus Make (which is available for OS/2 and DOS). Ndmake 4.5 is
+available at
+
+ oak.oakland.edu:SimTel/msdos/c/ndmake45.zip
+
+GNU make is from the FSF. An OS/2 version can be found at
+
+ ftp.leo.org:pub/comp/os/os2/devtools/gnu/gnumake.zip
+
+For DOS, dmake-3.8 is recommended. The make delivered with djgpp can
+be used on the djgpp target, but will fail on targets with more
+complicated quoting. Makefile compatibility among all the versions
+of OS/2 and DOS gawk has been an ugly problem.
+
+
+Known bugs
+----------
+
+1. DJGPP version 1 does not properly support signals. At the time of
+this writing, Version 2 of djgpp was in beta, and promises better
+signal support. However, as of 2.00.beta2, known bugs remain.
+
+2. DJGPP version 1 fails the fsbs test due to its broken handling of
+the line in test/Makefile. Fixed in the version 2 betas.
+
+3. DJGPP 2.00.beta2 popen() fails on commands with pipes; edit the
+makefile and use the popen in the pc directory. Fixed in beta3.
+
+4. emx does not support DST. On 2-Jan-96, Mattes writes:
+
+ Quotation from ISO 9899-1990:
+
+ 7.12.3.5 The strftime function
+ [...]
+ %Z is replaced by the time zone name or abbreviation, or by no
+ characters if no time zone is determinable.
+
+ As emx does not yet support DST, it does not know which one of the two
+ time zones (with DST vs. without DST) applies. In consequence, `no
+ time zone is determinable'.
+
+As a workaround, it may be possible to edit do_strftime() of builtin.c
+according to Mattes' recommendation:
+
+ If you happen to know whether DST applies or not for a given struct
+ tm, just set its tm_isdst to a positive value or to zero, respectively.
+ Then, strftime() will replace %Z with the name of the time zone.
+
+5. The 16-bit DOS version can exhaust memory on scripts such as Henry
+Spencer's "awf". Use GNU C versions if possible.
+
+
+----
+
+If you have any problems with the DOS or OS/2 versions of Gawk,
+please send bug reports (along with the version and compiler used) to
+
+ Scott Deifik, scottd@amgen.com (DOS versions)
+or
+ Kai Uwe Rommel, rommel@ars.de (OS/2 or bound versions)
+ Darrel Hankerson, hankedr@mail.auburn.edu
diff --git a/README_d/README.sgi b/README_d/README.sgi
new file mode 100644
index 00000000..0d89ad3e
--- /dev/null
+++ b/README_d/README.sgi
@@ -0,0 +1,17 @@
+Sun Dec 31 15:07:11 EST 1995
+
+Gawk 3.0 is known to be broken on 64-bit SGI machines running IRIX 6.x.
+
+1) It needs to be compiled with the native cc, not gcc.
+
+2) Even if compiled with the native cc, the gensub and gnu regex tests fail.
+
+I don't have access to an IRIX 6.x machine, so I am not able to track
+down the problem. If any kind soul is able to run gawk from a debugger
+and figure out what the problem(s) are, and would let me know (and supply
+patches!), I'd greatly appreciate it.
+
+Thanks!
+
+Arnold Robbins
+arnold@gnu.ai.mit.edu
diff --git a/README_d/README.sunos4 b/README_d/README.sunos4
new file mode 100644
index 00000000..e1ae900d
--- /dev/null
+++ b/README_d/README.sunos4
@@ -0,0 +1,8 @@
+Sun Jan 7 23:49:46 EST 1996
+
+GCC and Autoconf disagree about the type of the array argument passed
+to getgroups(2). You can thus ignore the warning that gcc will
+generate under SunOS 4.1.x for io.c.
+
+If you send me email about this without having read this file, I will
+fuss at you!
diff --git a/README.ultrix b/README_d/README.ultrix
index b2e5d840..9c56c250 100644
--- a/README.ultrix
+++ b/README_d/README.ultrix
@@ -18,6 +18,3 @@ these paragraphs to it:
does the job. Without the switch gawk will compile and run correctly,
but you will get complaints about lost optimisations in builtin.c,
awk.tab.c and regex.c.
-
- The configure for ultrix4.1 works just fine for ultrix4.2
-
diff --git a/README.yacc b/README_d/README.yacc
index 9c5de13a..9c5de13a 100644
--- a/README.yacc
+++ b/README_d/README.yacc
diff --git a/acconfig.h b/acconfig.h
new file mode 100644
index 00000000..325fa9f2
--- /dev/null
+++ b/acconfig.h
@@ -0,0 +1,34 @@
+/*
+ * acconfig.h -- configuration definitions for gawk.
+ */
+
+/*
+ * Copyright (C) 1995 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+@TOP@
+
+#undef HAVE_STRINGIZE /* can use ANSI # operator in cpp */
+#undef REGEX_MALLOC /* use malloc instead of alloca in regex.c */
+#undef SPRINTF_RET /* return type of sprintf */
+
+@BOTTOM@
+
+#include <custom.h> /* overrides for stuff autoconf can't deal with */
diff --git a/aclocal.m4 b/aclocal.m4
new file mode 100644
index 00000000..9086bd81
--- /dev/null
+++ b/aclocal.m4
@@ -0,0 +1,134 @@
+dnl
+dnl aclocal.m4 --- autoconf input file for gawk
+dnl
+dnl Copyright (C) 1995 the Free Software Foundation, Inc.
+dnl
+dnl This file is part of GAWK, the GNU implementation of the
+dnl AWK Progamming Language.
+dnl
+dnl GAWK is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU General Public License as published by
+dnl the Free Software Foundation; either version 2 of the License, or
+dnl (at your option) any later version.
+dnl
+dnl GAWK is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+dnl GNU General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU General Public License
+dnl along with this program; if not, write to the Free Software
+dnl Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+dnl
+
+dnl gawk-specific macros for autoconf. one day hopefully part of autoconf
+
+AC_DEFUN(GAWK_AC_C_STRINGIZE, [
+AC_REQUIRE([AC_PROG_CPP])
+AC_MSG_CHECKING([for ANSI stringizing capability])
+AC_CACHE_VAL(gawk_cv_c_stringize,
+AC_EGREP_CPP([#teststring],[
+#define x(y) #y
+
+char *s = x(teststring);
+
+
+dnl To: Keith Bostic <bostic@bsdi.com>
+dnl Cc: bug-gnu-utils@prep.ai.mit.edu
+dnl Subject: Re: autoconf-2.7
+dnl From: Jim Meyering <meyering@asic.sc.ti.com>
+dnl Date: 15 Oct 1996 11:57:12 -0500
+dnl
+dnl | The assumption that, if a simple program can't be compiled and
+dnl | run, that the user is doing cross-compilation, is causing me
+dnl | serious grief. The problem is that Solaris ships a cc command
+dnl | that just fails, if you haven't bought their compiler.
+dnl |
+dnl | What the user is eventually told is that it's not possible to
+dnl | run test programs when cross-compiling, which doesn't point them
+dnl | at the right problem.
+dnl |
+dnl | Maybe it's just me, but I don't know too many normal users that
+dnl | do cross-compilation. I'd like to see a more stringent test to
+dnl | decide if we're doing cross-compilation. (Maybe with a message
+dnl | to use gcc!?!? ;-})
+dnl
+dnl Now I put this line in configure.in files:
+dnl
+dnl AM_SANITY_CHECK_CC
+dnl
+dnl Here's the macro that goes in aclocal.m4 -- it should be in the
+dnl next official release of automake.
+
+AC_DEFUN(AM_SANITY_CHECK_CC,
+[dnl Derived from macros from Bruno Haible and from Cygnus.
+AC_MSG_CHECKING([whether the compiler ($CC $CFLAGS $LDFLAGS) actually works])
+AC_LANG_SAVE
+ AC_LANG_C
+ AC_TRY_RUN([main() { exit(0); }],
+ am_cv_prog_cc_works=yes, am_cv_prog_cc_works=no,
+ dnl When crosscompiling, just try linking.
+ AC_TRY_LINK([], [], am_cv_prog_cc_works=yes,
+ am_cv_prog_cc_works=no))
+AC_LANG_RESTORE
+case "$am_cv_prog_cc_works" in
+ *no) AC_MSG_ERROR([Installation or configuration problem: C compiler cannot cr
+eate executables.]) ;;
+ *yes) ;;
+esac
+AC_MSG_RESULT(yes)
+])dnl
+], gawk_cv_c_stringize=no, gawk_cv_c_stringize=yes))
+if test "${gawk_cv_c_stringize}" = yes
+then
+ AC_DEFINE(HAVE_STRINGIZE)
+fi
+AC_MSG_RESULT([${gawk_cv_c_stringize}])
+])dnl
+
+
+dnl To: Keith Bostic <bostic@bsdi.com>
+dnl Cc: bug-gnu-utils@prep.ai.mit.edu
+dnl Subject: Re: autoconf-2.7
+dnl From: Jim Meyering <meyering@asic.sc.ti.com>
+dnl Date: 15 Oct 1996 11:57:12 -0500
+dnl
+dnl | The assumption that, if a simple program can't be compiled and
+dnl | run, that the user is doing cross-compilation, is causing me
+dnl | serious grief. The problem is that Solaris ships a cc command
+dnl | that just fails, if you haven't bought their compiler.
+dnl |
+dnl | What the user is eventually told is that it's not possible to
+dnl | run test programs when cross-compiling, which doesn't point them
+dnl | at the right problem.
+dnl |
+dnl | Maybe it's just me, but I don't know too many normal users that
+dnl | do cross-compilation. I'd like to see a more stringent test to
+dnl | decide if we're doing cross-compilation. (Maybe with a message
+dnl | to use gcc!?!? ;-})
+dnl
+dnl Now I put this line in configure.in files:
+dnl
+dnl AM_SANITY_CHECK_CC
+dnl
+dnl Here's the macro that goes in aclocal.m4 -- it should be in the
+dnl next official release of automake.
+
+AC_DEFUN(AM_SANITY_CHECK_CC,
+[dnl Derived from macros from Bruno Haible and from Cygnus.
+AC_MSG_CHECKING([whether the compiler ($CC $CFLAGS $LDFLAGS) actually works])
+AC_LANG_SAVE
+ AC_LANG_C
+ AC_TRY_RUN([main() { exit(0); }],
+ am_cv_prog_cc_works=yes, am_cv_prog_cc_works=no,
+ dnl When crosscompiling, just try linking.
+ AC_TRY_LINK([], [], am_cv_prog_cc_works=yes,
+ am_cv_prog_cc_works=no))
+AC_LANG_RESTORE
+case "$am_cv_prog_cc_works" in
+ *no) AC_MSG_ERROR([Installation or configuration problem: C compiler cannot cr
+eate executables.]) ;;
+ *yes) ;;
+esac
+AC_MSG_RESULT(yes)
+])dnl
diff --git a/alloca.c b/alloca.c
index 866f3d5d..76b4ae00 100644
--- a/alloca.c
+++ b/alloca.c
@@ -1,37 +1,42 @@
-/*
- alloca -- (mostly) portable public-domain implementation -- D A Gwyn
-
- last edit: 86/05/30 rms
- include config.h, since on VMS it renames some symbols.
- Use xmalloc instead of malloc.
-
- This implementation of the PWB library alloca() function,
- which is used to allocate space off the run-time stack so
- that it is automatically reclaimed upon procedure exit,
- was inspired by discussions with J. Q. Johnson of Cornell.
-
- It should work under any C implementation that uses an
- actual procedure stack (as opposed to a linked list of
- frames). There are some preprocessor constants that can
- be defined when compiling for your specific system, for
- improved efficiency; however, the defaults should be okay.
-
- The general concept of this implementation is to keep
- track of all alloca()-allocated blocks, and reclaim any
- that are found to be deeper in the stack than the current
- invocation. This heuristic does not reclaim storage as
- soon as it becomes invalid, but it will do so eventually.
-
- As a special case, alloca(0) reclaims storage without
- allocating any. It is a good idea to use alloca(0) in
- your main control loop, etc. to force garbage collection.
-*/
-#ifndef lint
-static char SCCSid[] = "@(#)alloca.c 1.1"; /* for the "what" utility */
+/* alloca.c -- allocate automatically reclaimed memory
+ (Mostly) portable public-domain implementation -- D A Gwyn
+
+ This implementation of the PWB library alloca function,
+ which is used to allocate space off the run-time stack so
+ that it is automatically reclaimed upon procedure exit,
+ was inspired by discussions with J. Q. Johnson of Cornell.
+ J.Otto Tennant <jot@cray.com> contributed the Cray support.
+
+ There are some preprocessor constants that can
+ be defined when compiling for your specific system, for
+ improved efficiency; however, the defaults should be okay.
+
+ The general concept of this implementation is to keep
+ track of all alloca-allocated blocks, and reclaim any
+ that are found to be deeper in the stack than the current
+ invocation. This heuristic does not reclaim storage as
+ soon as it becomes invalid, but it will do so eventually.
+
+ As a special case, alloca(0) reclaims storage without
+ allocating any. It is a good idea to use alloca(0) in
+ your main control loop, etc. to force garbage collection. */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
#endif
#ifdef emacs
-#include "config.h"
+#include "blockinput.h"
+#endif
+
+/* If compiling with GCC 2, this file's not needed. */
+#if !defined (__GNUC__) || __GNUC__ < 2
+
+/* If someone has defined alloca as a macro,
+ there must be some other way alloca is supposed to work. */
+#ifndef alloca
+
+#ifdef emacs
#ifdef static
/* actually, only want this if static is defined as ""
-- this is for usg, in which emacs must undefine static
@@ -45,72 +50,90 @@ lose
#endif /* static */
#endif /* emacs */
-#ifdef __STDC__
-typedef void *pointer; /* generic pointer type */
+/* If your stack is a linked list of frames, you have to
+ provide an "address metric" ADDRESS_FUNCTION macro. */
+
+#if defined (CRAY) && defined (CRAY_STACKSEG_END)
+long i00afunc ();
+#define ADDRESS_FUNCTION(arg) (char *) i00afunc (&(arg))
#else
-typedef char *pointer; /* generic pointer type */
+#define ADDRESS_FUNCTION(arg) &(arg)
#endif
-#define NULL 0 /* null pointer constant */
+#if __STDC__
+typedef void *pointer;
+#else
+typedef char *pointer;
+#endif
-extern void free();
-extern pointer xmalloc();
-extern int write();
-extern void exit();
+#define NULL 0
-/*
- Define STACK_DIRECTION if you know the direction of stack
- growth for your system; otherwise it will be automatically
- deduced at run-time.
+/* Different portions of Emacs need to call different versions of
+ malloc. The Emacs executable needs alloca to call xmalloc, because
+ ordinary malloc isn't protected from input signals. On the other
+ hand, the utilities in lib-src need alloca to call malloc; some of
+ them are very simple, and don't have an xmalloc routine.
- STACK_DIRECTION > 0 => grows toward higher addresses
- STACK_DIRECTION < 0 => grows toward lower addresses
- STACK_DIRECTION = 0 => direction of growth unknown
-*/
+ Non-Emacs programs expect this to call xmalloc.
+
+ Callers below should use malloc. */
+
+#ifndef emacs
+#define malloc xmalloc
+#endif
+extern pointer malloc ();
+
+/* Define STACK_DIRECTION if you know the direction of stack
+ growth for your system; otherwise it will be automatically
+ deduced at run-time.
+
+ STACK_DIRECTION > 0 => grows toward higher addresses
+ STACK_DIRECTION < 0 => grows toward lower addresses
+ STACK_DIRECTION = 0 => direction of growth unknown */
#ifndef STACK_DIRECTION
-#define STACK_DIRECTION 0 /* direction unknown */
+#define STACK_DIRECTION 0 /* Direction unknown. */
#endif
#if STACK_DIRECTION != 0
-#define STACK_DIR STACK_DIRECTION /* known at compile-time */
+#define STACK_DIR STACK_DIRECTION /* Known at compile-time. */
-#else /* STACK_DIRECTION == 0; need run-time code */
+#else /* STACK_DIRECTION == 0; need run-time code. */
-static int stack_dir; /* 1 or -1 once known */
+static int stack_dir; /* 1 or -1 once known. */
#define STACK_DIR stack_dir
static void
-find_stack_direction (/* void */)
+find_stack_direction ()
{
- static char *addr = NULL; /* address of first
- `dummy', once known */
- auto char dummy; /* to get stack address */
+ static char *addr = NULL; /* Address of first `dummy', once known. */
+ auto char dummy; /* To get stack address. */
if (addr == NULL)
- { /* initial entry */
- addr = &dummy;
+ { /* Initial entry. */
+ addr = ADDRESS_FUNCTION (dummy);
- find_stack_direction (); /* recurse once */
+ find_stack_direction (); /* Recurse once. */
+ }
+ else
+ {
+ /* Second entry. */
+ if (ADDRESS_FUNCTION (dummy) > addr)
+ stack_dir = 1; /* Stack grew upward. */
+ else
+ stack_dir = -1; /* Stack grew downward. */
}
- else /* second entry */
- if (&dummy > addr)
- stack_dir = 1; /* stack grew upward */
- else
- stack_dir = -1; /* stack grew downward */
}
-#endif /* STACK_DIRECTION == 0 */
+#endif /* STACK_DIRECTION == 0 */
-/*
- An "alloca header" is used to:
- (a) chain together all alloca()ed blocks;
- (b) keep track of stack depth.
+/* An "alloca header" is used to:
+ (a) chain together all alloca'ed blocks;
+ (b) keep track of stack depth.
- It is very important that sizeof(header) agree with malloc()
- alignment chunk size. The following default should work okay.
-*/
+ It is very important that sizeof(header) agree with malloc
+ alignment chunk size. The following default should work okay. */
#ifndef ALIGN_SIZE
#define ALIGN_SIZE sizeof(double)
@@ -118,89 +141,352 @@ find_stack_direction (/* void */)
typedef union hdr
{
- char align[ALIGN_SIZE]; /* to force sizeof(header) */
+ char align[ALIGN_SIZE]; /* To force sizeof(header). */
struct
{
- union hdr *next; /* for chaining headers */
- char *deep; /* for stack depth measure */
+ union hdr *next; /* For chaining headers. */
+ char *deep; /* For stack depth measure. */
} h;
} header;
-/*
- alloca( size ) returns a pointer to at least `size' bytes of
- storage which will be automatically reclaimed upon exit from
- the procedure that called alloca(). Originally, this space
- was supposed to be taken from the current stack frame of the
- caller, but that method cannot be made to work for some
- implementations of C, for example under Gould's UTX/32.
-*/
+static header *last_alloca_header = NULL; /* -> last alloca header. */
-static header *last_alloca_header = NULL; /* -> last alloca header */
+/* Return a pointer to at least SIZE bytes of storage,
+ which will be automatically reclaimed upon exit from
+ the procedure that called alloca. Originally, this space
+ was supposed to be taken from the current stack frame of the
+ caller, but that method cannot be made to work for some
+ implementations of C, for example under Gould's UTX/32. */
pointer
-alloca (size) /* returns pointer to storage */
- unsigned size; /* # bytes to allocate */
+alloca (size)
+ unsigned size;
{
- auto char probe; /* probes stack depth: */
- register char *depth = &probe;
+ auto char probe; /* Probes stack depth: */
+ register char *depth = ADDRESS_FUNCTION (probe);
#if STACK_DIRECTION == 0
- if (STACK_DIR == 0) /* unknown growth direction */
+ if (STACK_DIR == 0) /* Unknown growth direction. */
find_stack_direction ();
#endif
- /* Reclaim garbage, defined as all alloca()ed storage that
- was allocated from deeper in the stack than currently. */
+ /* Reclaim garbage, defined as all alloca'd storage that
+ was allocated from deeper in the stack than currently. */
{
- register header *hp; /* traverses linked list */
+ register header *hp; /* Traverses linked list. */
+
+#ifdef emacs
+ BLOCK_INPUT;
+#endif
for (hp = last_alloca_header; hp != NULL;)
if ((STACK_DIR > 0 && hp->h.deep > depth)
|| (STACK_DIR < 0 && hp->h.deep < depth))
{
- register header *np = hp->h.next;
+ register header *np = hp->h.next;
- free ((pointer) hp); /* collect garbage */
+ free ((pointer) hp); /* Collect garbage. */
- hp = np; /* -> next header */
+ hp = np; /* -> next header. */
}
else
- break; /* rest are not deeper */
+ break; /* Rest are not deeper. */
- last_alloca_header = hp; /* -> last valid storage */
+ last_alloca_header = hp; /* -> last valid storage. */
+
+#ifdef emacs
+ UNBLOCK_INPUT;
+#endif
}
if (size == 0)
- return NULL; /* no allocation required */
+ return NULL; /* No allocation required. */
- /* Allocate combined header + user data storage. */
+ /* Allocate combined header + user data storage. */
{
- register pointer new = xmalloc (sizeof (header) + size);
- /* address of header */
+ register pointer new = malloc (sizeof (header) + size);
+ /* Address of header. */
- ((header *)new)->h.next = last_alloca_header;
- ((header *)new)->h.deep = depth;
+ ((header *) new)->h.next = last_alloca_header;
+ ((header *) new)->h.deep = depth;
- last_alloca_header = (header *)new;
+ last_alloca_header = (header *) new;
- /* User storage begins just after header. */
+ /* User storage begins just after header. */
- return (pointer)((char *)new + sizeof(header));
+ return (pointer) ((char *) new + sizeof (header));
}
}
-pointer
-xmalloc(n)
-int n;
-{
- char *malloc();
- char *p = malloc(n);
+#if defined (CRAY) && defined (CRAY_STACKSEG_END)
- if (p)
- return (p);
+#ifdef DEBUG_I00AFUNC
+#include <stdio.h>
+#endif
- write(2, "fatal: out of memory!\n", 22);
- exit(1);
+#ifndef CRAY_STACK
+#define CRAY_STACK
+#ifndef CRAY2
+/* Stack structures for CRAY-1, CRAY X-MP, and CRAY Y-MP */
+struct stack_control_header
+ {
+ long shgrow:32; /* Number of times stack has grown. */
+ long shaseg:32; /* Size of increments to stack. */
+ long shhwm:32; /* High water mark of stack. */
+ long shsize:32; /* Current size of stack (all segments). */
+ };
+
+/* The stack segment linkage control information occurs at
+ the high-address end of a stack segment. (The stack
+ grows from low addresses to high addresses.) The initial
+ part of the stack segment linkage control information is
+ 0200 (octal) words. This provides for register storage
+ for the routine which overflows the stack. */
+
+struct stack_segment_linkage
+ {
+ long ss[0200]; /* 0200 overflow words. */
+ long sssize:32; /* Number of words in this segment. */
+ long ssbase:32; /* Offset to stack base. */
+ long:32;
+ long sspseg:32; /* Offset to linkage control of previous
+ segment of stack. */
+ long:32;
+ long sstcpt:32; /* Pointer to task common address block. */
+ long sscsnm; /* Private control structure number for
+ microtasking. */
+ long ssusr1; /* Reserved for user. */
+ long ssusr2; /* Reserved for user. */
+ long sstpid; /* Process ID for pid based multi-tasking. */
+ long ssgvup; /* Pointer to multitasking thread giveup. */
+ long sscray[7]; /* Reserved for Cray Research. */
+ long ssa0;
+ long ssa1;
+ long ssa2;
+ long ssa3;
+ long ssa4;
+ long ssa5;
+ long ssa6;
+ long ssa7;
+ long sss0;
+ long sss1;
+ long sss2;
+ long sss3;
+ long sss4;
+ long sss5;
+ long sss6;
+ long sss7;
+ };
+
+#else /* CRAY2 */
+/* The following structure defines the vector of words
+ returned by the STKSTAT library routine. */
+struct stk_stat
+ {
+ long now; /* Current total stack size. */
+ long maxc; /* Amount of contiguous space which would
+ be required to satisfy the maximum
+ stack demand to date. */
+ long high_water; /* Stack high-water mark. */
+ long overflows; /* Number of stack overflow ($STKOFEN) calls. */
+ long hits; /* Number of internal buffer hits. */
+ long extends; /* Number of block extensions. */
+ long stko_mallocs; /* Block allocations by $STKOFEN. */
+ long underflows; /* Number of stack underflow calls ($STKRETN). */
+ long stko_free; /* Number of deallocations by $STKRETN. */
+ long stkm_free; /* Number of deallocations by $STKMRET. */
+ long segments; /* Current number of stack segments. */
+ long maxs; /* Maximum number of stack segments so far. */
+ long pad_size; /* Stack pad size. */
+ long current_address; /* Current stack segment address. */
+ long current_size; /* Current stack segment size. This
+ number is actually corrupted by STKSTAT to
+ include the fifteen word trailer area. */
+ long initial_address; /* Address of initial segment. */
+ long initial_size; /* Size of initial segment. */
+ };
+
+/* The following structure describes the data structure which trails
+ any stack segment. I think that the description in 'asdef' is
+ out of date. I only describe the parts that I am sure about. */
+
+struct stk_trailer
+ {
+ long this_address; /* Address of this block. */
+ long this_size; /* Size of this block (does not include
+ this trailer). */
+ long unknown2;
+ long unknown3;
+ long link; /* Address of trailer block of previous
+ segment. */
+ long unknown5;
+ long unknown6;
+ long unknown7;
+ long unknown8;
+ long unknown9;
+ long unknown10;
+ long unknown11;
+ long unknown12;
+ long unknown13;
+ long unknown14;
+ };
+
+#endif /* CRAY2 */
+#endif /* not CRAY_STACK */
+
+#ifdef CRAY2
+/* Determine a "stack measure" for an arbitrary ADDRESS.
+ I doubt that "lint" will like this much. */
+
+static long
+i00afunc (long *address)
+{
+ struct stk_stat status;
+ struct stk_trailer *trailer;
+ long *block, size;
+ long result = 0;
+
+ /* We want to iterate through all of the segments. The first
+ step is to get the stack status structure. We could do this
+ more quickly and more directly, perhaps, by referencing the
+ $LM00 common block, but I know that this works. */
+
+ STKSTAT (&status);
+
+ /* Set up the iteration. */
+
+ trailer = (struct stk_trailer *) (status.current_address
+ + status.current_size
+ - 15);
+
+ /* There must be at least one stack segment. Therefore it is
+ a fatal error if "trailer" is null. */
+
+ if (trailer == 0)
+ abort ();
+
+ /* Discard segments that do not contain our argument address. */
+
+ while (trailer != 0)
+ {
+ block = (long *) trailer->this_address;
+ size = trailer->this_size;
+ if (block == 0 || size == 0)
+ abort ();
+ trailer = (struct stk_trailer *) trailer->link;
+ if ((block <= address) && (address < (block + size)))
+ break;
+ }
+
+ /* Set the result to the offset in this segment and add the sizes
+ of all predecessor segments. */
+
+ result = address - block;
+
+ if (trailer == 0)
+ {
+ return result;
+ }
+
+ do
+ {
+ if (trailer->this_size <= 0)
+ abort ();
+ result += trailer->this_size;
+ trailer = (struct stk_trailer *) trailer->link;
+ }
+ while (trailer != 0);
+
+ /* We are done. Note that if you present a bogus address (one
+ not in any segment), you will get a different number back, formed
+ from subtracting the address of the first block. This is probably
+ not what you want. */
+
+ return (result);
}
+
+#else /* not CRAY2 */
+/* Stack address function for a CRAY-1, CRAY X-MP, or CRAY Y-MP.
+ Determine the number of the cell within the stack,
+ given the address of the cell. The purpose of this
+ routine is to linearize, in some sense, stack addresses
+ for alloca. */
+
+static long
+i00afunc (long address)
+{
+ long stkl = 0;
+
+ long size, pseg, this_segment, stack;
+ long result = 0;
+
+ struct stack_segment_linkage *ssptr;
+
+ /* Register B67 contains the address of the end of the
+ current stack segment. If you (as a subprogram) store
+ your registers on the stack and find that you are past
+ the contents of B67, you have overflowed the segment.
+
+ B67 also points to the stack segment linkage control
+ area, which is what we are really interested in. */
+
+ stkl = CRAY_STACKSEG_END ();
+ ssptr = (struct stack_segment_linkage *) stkl;
+
+ /* If one subtracts 'size' from the end of the segment,
+ one has the address of the first word of the segment.
+
+ If this is not the first segment, 'pseg' will be
+ nonzero. */
+
+ pseg = ssptr->sspseg;
+ size = ssptr->sssize;
+
+ this_segment = stkl - size;
+
+ /* It is possible that calling this routine itself caused
+ a stack overflow. Discard stack segments which do not
+ contain the target address. */
+
+ while (!(this_segment <= address && address <= stkl))
+ {
+#ifdef DEBUG_I00AFUNC
+ fprintf (stderr, "%011o %011o %011o\n", this_segment, address, stkl);
+#endif
+ if (pseg == 0)
+ break;
+ stkl = stkl - pseg;
+ ssptr = (struct stack_segment_linkage *) stkl;
+ size = ssptr->sssize;
+ pseg = ssptr->sspseg;
+ this_segment = stkl - size;
+ }
+
+ result = address - this_segment;
+
+ /* If you subtract pseg from the current end of the stack,
+ you get the address of the previous stack segment's end.
+ This seems a little convoluted to me, but I'll bet you save
+ a cycle somewhere. */
+
+ while (pseg != 0)
+ {
+#ifdef DEBUG_I00AFUNC
+ fprintf (stderr, "%011o %011o\n", pseg, size);
+#endif
+ stkl = stkl - pseg;
+ ssptr = (struct stack_segment_linkage *) stkl;
+ size = ssptr->sssize;
+ pseg = ssptr->sspseg;
+ result += size;
+ }
+ return (result);
+}
+
+#endif /* not CRAY2 */
+#endif /* CRAY */
+
+#endif /* no alloca */
+#endif /* not GCC version 2 */
diff --git a/alloca.s b/alloca.s
deleted file mode 100644
index 08affc58..00000000
--- a/alloca.s
+++ /dev/null
@@ -1,352 +0,0 @@
-/* `alloca' standard 4.2 subroutine for 68000's and 16000's and others.
- Also has _setjmp and _longjmp for pyramids.
- Copyright (C) 1985, 1986, 1988 Free Software Foundation, Inc.
-
-This file is part of GNU Emacs.
-
-GNU Emacs is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY. No author or distributor
-accepts responsibility to anyone for the consequences of using it
-or for whether it serves any particular purpose or works at all,
-unless he says so in writing. Refer to the GNU Emacs General Public
-License for full details.
-
-Everyone is granted permission to copy, modify and redistribute
-GNU Emacs, but only under the conditions described in the
-GNU Emacs General Public License. A copy of this license is
-supposed to have been given to you along with GNU Emacs so you
-can know your rights and responsibilities. It should be in a
-file named COPYING. Among other things, the copyright notice
-and this notice must be preserved on all copies. */
-
-
-/* Both 68000 systems I have run this on have had broken versions of alloca.
- Also, I am told that non-berkeley systems do not have it at all.
- So replace whatever system-provided alloca there may be
- on all 68000 systems. */
-
-#define NOT_C_CODE
-/* #include "config.h" */
-
-#ifndef HAVE_ALLOCA /* define this to use system's alloca */
-
-#ifndef hp9000s300
-#ifndef m68k
-#ifndef mc68k
-#ifndef m68000
-#ifndef WICAT
-#ifndef ns32000
-#ifndef ns16000
-#ifndef sequent
-#ifndef pyramid
-#ifndef ATT3B5
-#ifndef XENIX
-you
-lose!!
-#endif /* XENIX */
-#endif /* ATT3B5 */
-#endif /* pyramid */
-#endif /* sequent */
-#endif /* ns16000 */
-#endif /* ns32000 */
-#endif /* WICAT */
-#endif /* m68000 */
-#endif /* mc68k */
-#endif /* m68k */
-#endif /* hp9000s300 */
-
-
-#ifdef hp9000s300
-#ifdef OLD_HP_ASSEMBLER
- data
- text
- globl _alloca
-_alloca
- move.l (sp)+,a0 ; pop return addr from top of stack
- move.l (sp)+,d0 ; pop size in bytes from top of stack
- add.l #ROUND,d0 ; round size up to long word
- and.l #MASK,d0 ; mask out lower two bits of size
- sub.l d0,sp ; allocate by moving stack pointer
- tst.b PROBE(sp) ; stack probe to allocate pages
- move.l sp,d0 ; return pointer
- add.l #-4,sp ; new top of stack
- jmp (a0) ; not a normal return
-MASK equ -4 ; Longword alignment
-ROUND equ 3 ; ditto
-PROBE equ -128 ; safety buffer for C compiler scratch
- data
-#else /* new hp assembler syntax */
-/*
- The new compiler does "move.m <registers> (%sp)" to save registers,
- so we must copy the saved registers when we mung the sp.
- The old compiler did "move.m <register> <offset>(%a6)", which
- gave us no trouble
- */
- text
- set PROBE,-128 # safety for C frame temporaries
- set MAXREG,22 # d2-d7, a2-a5, fp2-fp7 may have been saved
- global _alloca
-_alloca:
- mov.l (%sp)+,%a0 # return addess
- mov.l (%sp)+,%d0 # number of bytes to allocate
- mov.l %sp,%a1 # save old sp for register copy
- mov.l %sp,%d1 # compute new sp
- sub.l %d0,%d1 # space requested
- and.l &-4,%d1 # round down to longword
- sub.l &MAXREG*4,%d1 # space for saving registers
- mov.l %d1,%sp # save new value of sp
- tst.b PROBE(%sp) # create pages (sigh)
- mov.l %a2,%d1 # save reg a2
- mov.l %sp,%a2
- move.w &MAXREG-1,%d0
-copy_regs_loop: /* save caller's saved registers */
- mov.l (%a1)+,(%a2)+
- dbra %d0,copy_regs_loop
- mov.l %a2,%d0 # return value
- mov.l %d1,%a2 # restore a2
- add.l &-4,%sp # adjust tos
- jmp (%a0) # rts
-#endif /* new hp assembler */
-#else
-#if defined(m68k) || defined(mc68k) /* SGS assembler totally different */
- file "alloca.s"
- global alloca
-alloca:
-#ifdef MOTOROLA_DELTA
-/* slightly modified version of alloca to motorola sysV/68 pcc - based
- compiler.
- this compiler saves used regfisters relative to %sp instead of %fp.
- alright, just make new copy of saved register set whenever we allocate
- new space from stack..
- this is true at last until SVR3V5.1 . bug has reported to Motorola. */
- set MAXREG,10 # max no of registers to save (d2-d7, a2-a5)
- mov.l (%sp)+,%a1 # pop return addr from top of stack
- mov.l (%sp)+,%d0 # pop size in bytes from top of stack
- mov.l %sp,%a0 # save stack pointer for register copy
- addq.l &3,%d0 # round size up to long word
- andi.l &-4,%d0 # mask out lower two bits of size
- mov.l %sp,%d1 # compute new value of sp to d1
- sub.l %d0,%d1 # pseudo-allocate by moving stack pointer
- sub.l &MAXREG*4,%d1 # allocate more space for saved regs.
- mov.l %d1,%sp # actual alloaction.
- move.w &MAXREG-1,%d0 # d0 counts saved regs.
- mov.l %a2,%d1 # preserve a2.
- mov.l %sp,%a2 # make pointer to new reg save area.
-copy_regs_loop: # copy stuff from old save area.
- mov.l (%a0)+,(%a2)+ # save saved register
- dbra %d0,copy_regs_loop
- mov.l %a2,%a0 # now a2 is start of allocated space.
- mov.l %a2,%d0 # return it in both a0 and d0 to play safe.
- mov.l %d1,%a2 # restore a2.
- subq.l &4,%sp # new top of stack
- jmp (%a1) # far below normal return
-#else /* not MOTOROLA_DELTA */
- mov.l (%sp)+,%a1 # pop return addr from top of stack
- mov.l (%sp)+,%d0 # pop size in bytes from top of stack
- add.l &R%1,%d0 # round size up to long word
- and.l &-4,%d0 # mask out lower two bits of size
- sub.l %d0,%sp # allocate by moving stack pointer
- tst.b P%1(%sp) # stack probe to allocate pages
- mov.l %sp,%a0 # return pointer as pointer
- mov.l %sp,%d0 # return pointer as int to avoid disaster
- add.l &-4,%sp # new top of stack
- jmp (%a1) # not a normal return
- set S%1,64 # safety factor for C compiler scratch
- set R%1,3+S%1 # add to size for rounding
- set P%1,-132 # probe this far below current top of stack
-#endif /* not MOTOROLA_DELTA */
-
-#else /* not m68k && not mc68k */
-
-#ifdef m68000
-
-#ifdef WICAT
-/*
- * Registers are saved after the corresponding link so we have to explicitly
- * move them to the top of the stack where they are expected to be.
- * Since we do not know how many registers were saved in the calling function
- * we must assume the maximum possible (d2-d7,a2-a5). Hence, we end up
- * wasting some space on the stack.
- *
- * The large probe (tst.b) attempts to make up for the fact that we have
- * potentially used up the space that the caller probed for its own needs.
- */
- .procss m0
- .config "68000 1"
- .module _alloca
-MAXREG: .const 10
- .sect text
- .global _alloca
-_alloca:
- move.l (sp)+,a1 ; pop return address
- move.l (sp)+,d0 ; pop allocation size
- move.l sp,d1 ; get current SP value
- sub.l d0,d1 ; adjust to reflect required size...
- sub.l #MAXREG*4,d1 ; ...and space needed for registers
- and.l #-4,d1 ; backup to longword boundry
- move.l sp,a0 ; save old SP value for register copy
- move.l d1,sp ; set the new SP value
- tst.b -4096(sp) ; grab an extra page (to cover caller)
- move.l a2,d1 ; save callers register
- move.l sp,a2
- move.w #MAXREG-1,d0 ; # of longwords to copy
-loop: move.l (a0)+,(a2)+ ; copy registers...
- dbra d0,loop ; ...til there are no more
- move.l a2,d0 ; end of register area is addr for new space
- move.l d1,a2 ; restore saved a2.
- addq.l #4,sp ; caller will increment sp by 4 after return.
- move.l d0,a0 ; return value in both a0 and d0.
- jmp (a1)
- .end _alloca
-#else
-
-/* Some systems want the _, some do not. Win with both kinds. */
-.globl _alloca
-_alloca:
-.globl alloca
-alloca:
- movl sp@+,a0
- movl a7,d0
- subl sp@,d0
- andl #~3,d0
- movl d0,sp
- tstb sp@(0) /* Make stack pages exist */
- /* Needed on certain systems
- that lack true demand paging */
- addql #4,d0
- jmp a0@
-
-#endif /* not WICAT */
-#endif /* m68000 */
-#endif /* not m68k */
-#endif /* not hp9000s300 */
-
-#if defined (ns16000) || defined (ns32000)
-
- .text
- .align 2
-/* Some systems want the _, some do not. Win with both kinds. */
-.globl _alloca
-_alloca:
-.globl alloca
-alloca:
-
-/* Two different assembler syntaxes are used for the same code
- on different systems. */
-
-#ifdef sequent
-#define IM
-#define REGISTER(x) x
-#else
-#ifdef NS5 /* ns SysV assembler */
-#define IM $
-#define REGISTER(x) x
-#else
-#define IM $
-#define REGISTER(x) 0(x)
-#endif
-#endif
-
-/*
- * The ns16000 is a little more difficult, need to copy regs.
- * Also the code assumes direct linkage call sequence (no mod table crap).
- * We have to copy registers, and therefore waste 32 bytes.
- *
- * Stack layout:
- * new sp -> junk
- * registers (copy)
- * r0 -> new data
- * | (orig retval)
- * | (orig arg)
- * old sp -> regs (orig)
- * local data
- * fp -> old fp
- */
-
- movd tos,r1 /* pop return addr */
- negd tos,r0 /* pop amount to allocate */
- sprd sp,r2
- addd r2,r0
- bicb IM/**/3,r0 /* 4-byte align */
- lprd sp,r0
- adjspb IM/**/36 /* space for regs, +4 for caller to pop */
- movmd 0(r2),4(sp),IM/**/4 /* copy regs */
- movmd 0x10(r2),0x14(sp),IM/**/4
- jump REGISTER(r1) /* funky return */
-#endif /* ns16000 or ns32000 */
-
-#ifdef pyramid
-
-.globl _alloca
-
-_alloca: addw $3,pr0 # add 3 (dec) to first argument
- bicw $3,pr0 # then clear its last 2 bits
- subw pr0,sp # subtract from SP the val in PR0
- andw $-32,sp # keep sp aligned on multiple of 32.
- movw sp,pr0 # ret. current SP
- ret
-
-#ifdef PYRAMID_OLD /* This isn't needed in system version 4. */
-.globl __longjmp
-.globl _longjmp
-.globl __setjmp
-.globl _setjmp
-
-__longjmp: jump _longjmp
-__setjmp: jump _setjmp
-#endif
-
-#endif /* pyramid */
-
-#ifdef ATT3B5
-
- .align 4
- .globl alloca
-
-alloca:
- movw %ap, %r8
- subw2 $9*4, %r8
- movw 0(%r8), %r1 /* pc */
- movw 4(%r8), %fp
- movw 8(%r8), %sp
- addw2 %r0, %sp /* make room */
- movw %sp, %r0 /* return value */
- jmp (%r1) /* continue... */
-
-#endif /* ATT3B5 */
-
-#ifdef XENIX
-
-.386
-
-_TEXT segment dword use32 public 'CODE'
-assume cs:_TEXT
-
-;-------------------------------------------------------------------------
-
-public _alloca
-_alloca proc near
-
- pop ecx ; return address
- pop eax ; amount to alloc
- add eax,3 ; round it to 32-bit boundary
- and al,11111100B ;
- mov edx,esp ; current sp in edx
- sub edx,eax ; lower the stack
- xchg esp,edx ; start of allocation in esp, old sp in edx
- mov eax,esp ; return ptr to base in eax
- push [edx+8] ; save poss. stored reg. values (esi,edi,ebx)
- push [edx+4] ; on lowered stack
- push [edx] ;
- sub esp,4 ; allow for 'add esp, 4'
- jmp ecx ; jump to return address
-
-_alloca endp
-
-_TEXT ends
-
-end
-
-#endif /* XENIX */
-
-#endif /* not HAVE_ALLOCA */
diff --git a/amiga/ChangeLog b/amiga/ChangeLog
new file mode 100644
index 00000000..570c031b
--- /dev/null
+++ b/amiga/ChangeLog
@@ -0,0 +1,3 @@
+Wed Jan 10 22:58:55 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * ChangeLog created.
diff --git a/amiga/gawkmisc.ami b/amiga/gawkmisc.ami
new file mode 100644
index 00000000..863a5c8f
--- /dev/null
+++ b/amiga/gawkmisc.ami
@@ -0,0 +1,124 @@
+/*
+ * gawkmisc.ami --- miscellanious gawk routines that are OS specific.
+ */
+
+/*
+ * Copyright (C) 1986, 1988, 1989, 1991 - 95 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+char quote = '\'';
+char *defpath = ".,/gnu/lib";
+char envsep = ',';
+
+/* gawk_name --- pull out the "gawk" part from how the OS called us */
+
+char *
+gawk_name(filespec)
+const char *filespec;
+{
+ char *p;
+
+ /* "path/name" -> "name" */
+ p = strrchr(filespec, '/');
+ return (p == NULL ? (char *) filespec : p + 1);
+}
+
+/* os_arg_fixup --- fixup the command line */
+
+void
+os_arg_fixup(argcp, argvp)
+int *argcp;
+char ***argvp;
+{
+ /* no-op */
+ return;
+}
+
+/* os_devopen --- open special per-OS devices */
+
+int
+os_devopen(name, flag)
+const char *name;
+int flag;
+{
+ /* no-op */
+ return INVALID_HANDLE;
+}
+
+/* optimal_bufsize --- determine optimal buffer size */
+
+int
+optimal_bufsize(fd, stb)
+int fd;
+struct stat *stb;
+{
+ /* force all members to zero in case OS doesn't use all of them. */
+ memset(stb, '\0', sizeof(struct stat));
+
+ /*
+ * System V.n, n < 4, doesn't have the file system block size in the
+ * stat structure. So we have to make some sort of reasonable
+ * guess. We use stdio's BUFSIZ, since that is what it was
+ * meant for in the first place.
+ */
+#ifdef HAVE_ST_BLKSIZE
+#define DEFBLKSIZE (stb->st_blksize ? stb->st_blksize : BUFSIZ)
+#else
+#define DEFBLKSIZE BUFSIZ
+#endif
+
+ if (isatty(fd))
+ return BUFSIZ;
+ if (fstat(fd, stb) == -1)
+ fatal("can't stat fd %d (%s)", fd, strerror(errno));
+ if (lseek(fd, (off_t)0, 0) == -1) /* not a regular file */
+ return DEFBLKSIZE;
+ if (stb->st_size > 0 && stb->st_size < DEFBLKSIZE) /* small file */
+ return stb->st_size;
+ return DEFBLKSIZE;
+}
+
+/* ispath --- return true if path has directory components */
+
+int
+ispath(file)
+const char *file;
+{
+ return (strchr(file, '/') != NULL);
+}
+
+/* isdirpunct --- return true if char is a directory separator */
+
+int
+isdirpunct(c)
+int c;
+{
+ return (c == '/');
+}
+
+/* fork --- fake fork(2) using vfork */
+
+int
+fork()
+{
+ extern int vfork();
+
+ return vfork();
+}
diff --git a/array.c b/array.c
index d42f9a6c..348b3433 100644
--- a/array.c
+++ b/array.c
@@ -3,10 +3,10 @@
*/
/*
- * Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991 - 95 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
- * AWK Progamming Language.
+ * AWK Programming Language.
*
* GAWK is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -19,8 +19,8 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with GAWK; see the file COPYING. If not, write to
- * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/
/*
@@ -42,6 +42,8 @@
static NODE *assoc_find P((NODE *symbol, NODE *subs, int hash1));
static void grow_table P((NODE *symbol));
+/* concat_exp --- concatenate expression list into a single string */
+
NODE *
concat_exp(tree)
register NODE *tree;
@@ -66,8 +68,7 @@ register NODE *tree;
memcpy(str, r->stptr, r->stlen+1);
s = str + r->stlen;
free_temp(r);
- tree = tree->rnode;
- while (tree) {
+ for (tree = tree->rnode; tree != NULL; tree = tree->rnode) {
if (subseplen == 1)
*s++ = *subsep;
else {
@@ -82,14 +83,14 @@ register NODE *tree;
memcpy(s, r->stptr, r->stlen+1);
s += r->stlen;
free_temp(r);
- tree = tree->rnode;
}
r = make_str_node(str, s - str, ALREADY_MALLOCED);
r->flags |= TEMP;
return r;
}
-/* Flush all the values in symbol[] before doing a split() */
+/* assoc_clear --- flush all the values in symbol[] before doing a split() */
+
void
assoc_clear(symbol)
NODE *symbol;
@@ -97,16 +98,16 @@ NODE *symbol;
int i;
NODE *bucket, *next;
- if (symbol->var_array == 0)
+ if (symbol->var_array == NULL)
return;
for (i = 0; i < symbol->array_size; i++) {
- for (bucket = symbol->var_array[i]; bucket; bucket = next) {
+ for (bucket = symbol->var_array[i]; bucket != NULL; bucket = next) {
next = bucket->ahnext;
unref(bucket->ahname);
unref(bucket->ahvalue);
freenode(bucket);
}
- symbol->var_array[i] = 0;
+ symbol->var_array[i] = NULL;
}
free(symbol->var_array);
symbol->var_array = NULL;
@@ -114,9 +115,8 @@ NODE *symbol;
symbol->flags &= ~ARRAYMAXED;
}
-/*
- * calculate the hash function of the string in subs
- */
+/* hash --- calculate the hash function of the string in subs */
+
unsigned int
hash(s, len, hsize)
register const char *s;
@@ -125,34 +125,22 @@ unsigned long hsize;
{
register unsigned long h = 0;
-#ifdef this_is_really_slow
-
- register unsigned long g;
-
- while (len--) {
- h = (h << 4) + *s++;
- g = (h & 0xf0000000);
- if (g) {
- h = h ^ (g >> 24);
- h = h ^ g;
- }
- }
-
-#else /* this_is_really_slow */
-/*
- * This is INCREDIBLY ugly, but fast. We break the string up into 8 byte
- * units. On the first time through the loop we get the "leftover bytes"
- * (strlen % 8). On every other iteration, we perform 8 HASHC's so we handle
- * all 8 bytes. Essentially, this saves us 7 cmp & branch instructions. If
- * this routine is heavily used enough, it's worth the ugly coding.
- *
- * OZ's original sdbm hash, copied from Margo Seltzers db package.
- *
- */
+ /*
+ * This is INCREDIBLY ugly, but fast. We break the string up into
+ * 8 byte units. On the first time through the loop we get the
+ * "leftover bytes" (strlen % 8). On every other iteration, we
+ * perform 8 HASHC's so we handle all 8 bytes. Essentially, this
+ * saves us 7 cmp & branch instructions. If this routine is
+ * heavily used enough, it's worth the ugly coding.
+ *
+ * OZ's original sdbm hash, copied from Margo Seltzers db package.
+ */
-/* Even more speed: */
-/* #define HASHC h = *s++ + 65599 * h */
-/* Because 65599 = pow(2,6) + pow(2,16) - 1 we multiply by shifts */
+ /*
+ * Even more speed:
+ * #define HASHC h = *s++ + 65599 * h
+ * Because 65599 = pow(2, 6) + pow(2, 16) - 1 we multiply by shifts
+ */
#define HASHC htmp = (h << 6); \
h = *s++ + htmp + (htmp << 10) - h
@@ -161,11 +149,12 @@ unsigned long hsize;
h = 0;
#if defined(VAXC)
-/*
- * [This was an implementation of "Duff's Device", but it has been
- * redone, separating the switch for extra iterations from the loop.
- * This is necessary because the DEC VAX-C compiler is STOOPID.]
- */
+ /*
+ * This was an implementation of "Duff's Device", but it has been
+ * redone, separating the switch for extra iterations from the
+ * loop. This is necessary because the DEC VAX-C compiler is
+ * STOOPID.
+ */
switch (len & (8 - 1)) {
case 7: HASHC;
case 6: HASHC;
@@ -190,7 +179,7 @@ unsigned long hsize;
HASHC;
} while (--loop);
}
-#else /* !VAXC */
+#else /* ! VAXC */
/* "Duff's Device" for those who can handle it */
if (len > 0) {
register size_t loop = (len + 8 - 1) >> 3;
@@ -209,17 +198,15 @@ unsigned long hsize;
} while (--loop);
}
}
-#endif /* !VAXC */
-#endif /* this_is_really_slow - not */
+#endif /* ! VAXC */
if (h >= hsize)
h %= hsize;
return h;
}
-/*
- * locate symbol[subs]
- */
+/* assoc_find --- locate symbol[subs] */
+
static NODE * /* NULL if not found */
assoc_find(symbol, subs, hash1)
NODE *symbol;
@@ -228,59 +215,47 @@ int hash1;
{
register NODE *bucket, *prev = 0;
- for (bucket = symbol->var_array[hash1]; bucket; bucket = bucket->ahnext) {
- if (cmp_nodes(bucket->ahname, subs) == 0) {
-#if 0
- /*
- * Disable this code for now. It screws things up if we have
- * a ``for (iggy in foo)'' in progress. Interestingly enough,
- * this was not a problem in 2.15.3, only in 2.15.4. I'm not
- * sure why it works in 2.15.3.
- */
- if (prev) { /* move found to front of chain */
- prev->ahnext = bucket->ahnext;
- bucket->ahnext = symbol->var_array[hash1];
- symbol->var_array[hash1] = bucket;
- }
-#endif
+ for (bucket = symbol->var_array[hash1]; bucket != NULL;
+ bucket = bucket->ahnext) {
+ if (cmp_nodes(bucket->ahname, subs) == 0)
return bucket;
- } else
+ else
prev = bucket; /* save previous list entry */
}
return NULL;
}
-/*
- * test whether the array element symbol[subs] exists or not
- */
+/* in_array --- test whether the array element symbol[subs] exists or not */
+
int
in_array(symbol, subs)
NODE *symbol, *subs;
{
register int hash1;
+ int ret;
if (symbol->type == Node_param_list)
symbol = stack_ptr[symbol->param_cnt];
- if (symbol->var_array == 0)
+ if ((symbol->flags & SCALAR) != 0)
+ fatal("attempt to use scalar as array");
+ if (symbol->var_array == NULL)
return 0;
subs = concat_exp(subs); /* concat_exp returns a string node */
hash1 = hash(subs->stptr, subs->stlen, (unsigned long) symbol->array_size);
- if (assoc_find(symbol, subs, hash1) == NULL) {
- free_temp(subs);
- return 0;
- } else {
- free_temp(subs);
- return 1;
- }
+ ret = (assoc_find(symbol, subs, hash1) != NULL);
+ free_temp(subs);
+ return ret;
}
/*
+ * assoc_lookup:
+ * Find SYMBOL[SUBS] in the assoc array. Install it with value "" if it
+ * isn't there. Returns a pointer ala get_lhs to where its value is stored.
+ *
* SYMBOL is the address of the node (or other pointer) being dereferenced.
* SUBS is a number or string used as the subscript.
- *
- * Find SYMBOL[SUBS] in the assoc array. Install it with value "" if it
- * isn't there. Returns a pointer ala get_lhs to where its value is stored
*/
+
NODE **
assoc_lookup(symbol, subs)
NODE *symbol, *subs;
@@ -290,7 +265,10 @@ NODE *symbol, *subs;
(void) force_string(subs);
- if (symbol->var_array == 0) {
+ if ((symbol->flags & SCALAR) != 0)
+ fatal("attempt to use scalar as array");
+
+ if (symbol->var_array == NULL) {
symbol->type = Node_var_array;
symbol->array_size = symbol->table_size = 0; /* sanity */
symbol->flags &= ~ARRAYMAXED;
@@ -344,6 +322,8 @@ NODE *symbol, *subs;
return &(bucket->ahvalue);
}
+/* do_delete --- perform `delete array[s]' */
+
void
do_delete(symbol, tree)
NODE *symbol, *tree;
@@ -354,19 +334,28 @@ NODE *symbol, *tree;
if (symbol->type == Node_param_list)
symbol = stack_ptr[symbol->param_cnt];
- if (symbol->var_array == 0)
- return;
+ if (symbol->type == Node_var_array) {
+ if (symbol->var_array == NULL)
+ return;
+ } else
+ fatal("delete: illegal use of variable `%s' as array",
+ symbol->vname);
subs = concat_exp(tree); /* concat_exp returns string node */
hash1 = hash(subs->stptr, subs->stlen, (unsigned long) symbol->array_size);
last = NULL;
- for (bucket = symbol->var_array[hash1]; bucket; last = bucket, bucket = bucket->ahnext)
+ for (bucket = symbol->var_array[hash1]; bucket != NULL;
+ last = bucket, bucket = bucket->ahnext)
if (cmp_nodes(bucket->ahname, subs) == 0)
break;
free_temp(subs);
- if (bucket == NULL)
+ if (bucket == NULL) {
+ if (do_lint)
+ warning("delete: index `%s' not in array `%s'",
+ subs->stptr, symbol->vname);
return;
- if (last)
+ }
+ if (last != NULL)
last->ahnext = bucket->ahnext;
else
symbol->var_array[hash1] = bucket->ahnext;
@@ -384,6 +373,8 @@ NODE *symbol, *tree;
}
}
+/* assoc_scan --- start a ``for (iggy in foo)'' loop */
+
void
assoc_scan(symbol, lookat)
NODE *symbol;
@@ -397,6 +388,8 @@ struct search *lookat;
assoc_next(lookat);
}
+/* assoc_next --- actually find the next element in array */
+
void
assoc_next(lookat)
struct search *lookat;
@@ -462,8 +455,7 @@ NODE *symbol;
static long sizes[] = { 13, 127, 1021, 8191, 16381, 32749, 65497 };
/* find next biggest hash size */
- oldsize = symbol->array_size;
- newsize = 0;
+ newsize = oldsize = symbol->array_size;
for (i = 0, j = sizeof(sizes)/sizeof(sizes[0]); i < j; i++) {
if (oldsize < sizes[i]) {
newsize = sizes[i];
diff --git a/atari/ChangeLog b/atari/ChangeLog
new file mode 100644
index 00000000..570c031b
--- /dev/null
+++ b/atari/ChangeLog
@@ -0,0 +1,3 @@
+Wed Jan 10 22:58:55 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * ChangeLog created.
diff --git a/atari/Makefile.awklib b/atari/Makefile.awklib
new file mode 100644
index 00000000..30be5226
--- /dev/null
+++ b/atari/Makefile.awklib
@@ -0,0 +1,104 @@
+# Makefile for GNU Awk support library
+# Copy this file to 'awklib' subdirectory of main directory
+# and execute via relevant targets in your top Makefile
+#
+# This Makefile actually will work for awklib even when NOT
+# compiling with Atari Makefile!!!
+#
+# Copyright (C) 1995 the Free Software Foundation, Inc.
+#
+# This file is part of GAWK, the GNU implementation of the
+# AWK Programming Language.
+#
+# GAWK is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GAWK is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+
+SHELL = /bin/sh
+
+srcdir = .
+
+INSTALL = /usr/bin/install -c
+INSTALL_PROGRAM = ${INSTALL}
+INSTALL_DATA = ${INSTALL} -m 644
+
+CC = gcc
+CFLAGS = -g -O
+
+prefix = /usr/local
+exec_prefix = ${prefix}
+binprefix =
+manprefix =
+
+bindir = ${exec_prefix}/bin
+libdir = ${exec_prefix}/lib
+mandir = ${prefix}/man/man1
+manext = .1
+infodir = ${prefix}/info
+datadir = ${prefix}/share/awk
+libexecdir = ${exec_prefix}/libexec/awk
+
+PWCAT = pwcat
+GRCAT = grcat
+AUXPROGS = $(PWCAT) $(GRCAT)
+GCOM = '{print}'
+GAWK = ../gawk
+AUXAWK = passwd.awk group.awk
+
+all: stamp-eg $(AUXPROGS) igawk $(AUXAWK)
+
+stamp-eg: $(srcdir)/../doc/gawk.texi
+ rm -fr eg stamp-eg
+ $(GAWK) -f $(srcdir)/extract.awk $(srcdir)/../doc/gawk.texi
+ @echo 'some makes are stupid and will not check a directory' > stamp-eg
+ @echo 'against a file, so this file is a place holder. gack.' >> stamp-eg
+
+$(PWCAT): $(srcdir)/eg/lib/pwcat.c
+ $(CC) $(CFLAGS) $(srcdir)/eg/lib/pwcat.c -o $@
+
+$(GRCAT): $(srcdir)/eg/lib/grcat.c
+ $(CC) $(CFLAGS) $(srcdir)/eg/lib/grcat.c -o $@
+
+igawk: $(srcdir)/eg/prog/igawk.sh
+ $(GAWK) $(GCOM) $(srcdir)/eg/prog/igawk.sh > $@ ; chmod 755 $@
+
+passwd.awk: $(srcdir)/eg/lib/passwdawk.in
+ (cd $(srcdir)/eg/lib ; \
+ sed 's;/usr/local/libexec/awk;$(libexecdir);' < passwdawk.in) > passwd.awk
+
+group.awk: $(srcdir)/eg/lib/groupawk.in
+ (cd $(srcdir)/eg/lib ; \
+ sed 's;/usr/local/libexec/awk;$(libexecdir);' < groupawk.in) > group.awk
+
+install: igawk $(AUXPROGS) $(AUXAWK)
+ $(INSTALL_PROGRAM) igawk $(bindir)
+ for i in $(AUXPROGS) ; do \
+ $(INSTALL_PROGRAM) $$i $(libexecdir) ; \
+ done
+ for i in $(AUXAWK) $(srcdir)/eg/lib/*.awk ; do \
+ $(INSTALL_DATA) $$i $(datadir) ; \
+ done
+
+# libexecdir and bindir are removed in the top level Makefile's uninstall
+uninstall:
+ rm -fr $(libexecdir)/* $(datadir)/*
+ rm -f $(bindir)/igawk
+
+clean:
+ rm -f $(AUXPROGS) igawk
+
+maintainer-clean: clean
+ rm -fr eg stamp-eg
+
+distclean: clean
+ rm -f Makefile
diff --git a/atari/Makefile.st b/atari/Makefile.st
index 33e9e3c2..ca00b466 100644
--- a/atari/Makefile.st
+++ b/atari/Makefile.st
@@ -5,10 +5,14 @@
# and modified system().
# Check comments in this Makefile and adjust to your needs!!
#
-# Copyright (C) 1986, 1988-1993 the Free Software Foundation, Inc.
+# This Makefile assumes that you are using Bourne compatible shell
+# (like bash). If this is not the case you will have to edit various
+# targets or perform some actions by hand.
+#
+# Copyright (C) 1986, 1988-1995 the Free Software Foundation, Inc.
#
# This file is part of GAWK, the GNU implementation of the
-# AWK Progamming Language.
+# AWK Programming Language.
#
# GAWK is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -21,107 +25,219 @@
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
-# along with GAWK; see the file COPYING. If not, write to
-# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
-srcdir = .
-VPATH = .
-# CC = gcc
-CC = cgcc # when cross-compiling
-#YACC = byacc
-YACC = bison -y
-# basename of parser output - adjust to your parser requirements
-YOUTPUT = awk_tab
+srcdir = .
+# native compiler with freshly compiled gawk.ttp to fix awklib
+CC = gcc
+GAWK=../gawk.ttp
+# cross-compiler and gawk already installed on the system (any awk will do)
+# CC = cgcc
+# GAWK = gawk
# WIDTH and EXT have to be both defined or both undefined
-# WIDTH = -mshort
+# WIDTH = -mshort -DINT_IS_16BIT
# EXT = 16
+OFLAGS = -O2 -Wall -fomit-frame-pointer $(WIDTH)
+LDFLAGS = $(WIDTH)
+YACC = bison -y
+
+INSTALL = cp -p
+INSTALL_PROGRAM = ${INSTALL}
+#INSTALL_DATA = ${INSTALL} -m 644
+INSTALL_DATA = ${INSTALL}
+
LIBS = -lpml$(EXT)
-LIBOBJS = strncasecmp.o
-ST_LIBOBJS = stack.o tmpnam.o system.o
-# CFLAGS= -g -DGAWK -DHAVE_CONFIG_H $(WIDTH) -I. -I..
-CFLAGS= -DGAWK -DHAVE_CONFIG_H $(WIDTH) -O2 -I. -I.. -Wall \
- -fomit-frame-pointer # -pipe #
-# keep only global symbols and use long symbol names
-# xstrip -k in target gawk.ttp removes all symbols but _stksize
-# allowing for stack size manipulations without recompiling (with fixstk)
-LDFLAGS= -Xlinker -x -G $(WIDTH)
+ALLOCA =
+
+#all these definitions likely require changes
+exec_prefix = ${prefix}
+prefix = /usr/local
+binprefix =
+manprefix =
+
+bindir = $(exec_prefix)/bin
+libdir = $(exec_prefix)/lib
+manexta = l
+mandir = $(prefix)/man$(manexta)
+manext = .$(manexta)
+infodir = $(prefix)/info
+datadir = $(prefix)/lib/awk
+libexecdir = $(prefix)/libexec/awk
+
+#DEFPATH = ".:$(datadir)"
+# datadir is passed to the next Makefile level and through sed
+# you may need many more backslashes than that if you have to use
+# them at all - sigh...
+DEFPATH = ".,c:\\lib\\awk,c:\\gnu\\lib\\awk"
+
+SHELL = /bin/sh
+CFLAGS = $(OFLAGS) -DGAWK -I. -I$(srcdir) -DHAVE_CONFIG_H
+
+MFLAGS = "CC=$(CC)" "CFLAGS=$(CFLAGS) $(LDFLAGS)" GAWK=$(GAWK) \
+ PWCAT=pwcat.ttp GRCAT=grcat.ttp GCOM='{sub(/\":\"/, "\",\""); print}' \
+ "INSTALL_PROGRAM=$(INSTALL)" "INSTALL_DATA=$(INSTALL)" \
+ "bindir=$(bindir)" \
+ "libdir=$(libdir)" \
+ "mandir=$(mandir)" \
+ "infodir=$(infodir)" \
+ "datadir=$(datadir)" \
+ "libexecdir=$(libexecdir)"
# object files
-AWKOBJS = main.o eval.o builtin.o msg.o iop.o io.o field.o array.o \
- node.o version.o re.o getopt.o getopt1.o
+AWKOBJS = array.o builtin.o eval.o field.o gawkmisc.o io.o main.o \
+ missing.o msg.o node.o re.o version.o
ALLOBJS = $(AWKOBJS) awktab.o
# GNUOBJS
# GNU stuff that gawk uses as library routines.
-REGEX = regex
-GNUOBJS= $(REGEX).o dfa.o
+GNUOBJS= getopt.o getopt1.o regex.o dfa.o $(ALLOCA)
+
+# source and documentation files
+SRC = array.c builtin.c eval.c field.c gawkmisc.c io.c main.c \
+ missing.c msg.c node.c re.c version.c
+
+ALLSRC= $(SRC) awktab.c
+
+AWKSRC= awk.h awk.y $(ALLSRC) patchlevel.h protos.h
+
+GNUSRC = alloca.c dfa.c dfa.h regex.c regex.h getopt.h getopt.c getopt1.c
+
+DOCS= doc/gawk.1.in doc/gawk.texi.in doc/texinfo.tex
+
+TEXFILES= doc/gawk.aux doc/gawk.cp doc/gawk.cps doc/gawk.fn doc/gawk.fns \
+ doc/gawk.ky doc/gawk.kys doc/gawk.pg doc/gawk.pgs doc/gawk.toc \
+ doc/gawk.tp doc/gawk.tps doc/gawk.vr doc/gawk.vrs
+
+ALLDOC= doc/gawk.dvi $(TEXFILES) doc/gawk.info*
+
+# Release of gawk. There can be no leading or trailing white space here!
+REL=3.0
+PROG=gawk.ttp
+
+# clear out suffixes list
+.SUFFIXES:
+.SUFFIXES: .c .o
+
+.c.o:
+ $(CC) -c $(CFLAGS) $<
-all: gawk.ttp
+# rules to build $(PROG)
+all: $(PROG) awklib/all
-gawk.ttp: $(ALLOBJS) $(GNUOBJS) $(REOBJS) $(LIBOBJS) $(ST_LIBOBJS)
- $(CC) -o $@ $(LDFLAGS) \
- $(ALLOBJS) $(GNUOBJS) $(REOBJS) $(LIBOBJS) $(ST_LIBOBJS) $(LIBS)
- toglclr -fload $@
+alldoc: all doc/all
+
+$(PROG): $(ALLOBJS) $(GNUOBJS) $(REOBJS)
+ $(CC) -o $@ $(LDFLAGS) $(ALLOBJS) $(GNUOBJS) $(REOBJS) $(LIBS)
+# toglclr -fload $@
# xstrip -k $@
-$(AWKOBJS) $(GNUOBJS): awk.h dfa.h $(REGEX).h config.h
+
+$(ALLOBJS): awk.h dfa.h regex.h config.h custom.h
+
+$(GNUOBJS): config.h custom.h
+
+gawkmisc.o: $(srcdir)/atari/gawkmisc.atr
+ $(CC) -c -DDEFPATH='$(DEFPATH)' $(CFLAGS) $(srcdir)/$<
+
+# this rule needed or not - depending on your library
+missing.o io.o:
+ $(CC) -c $(CFLAGS) -DPIPES_SIMULATED $(srcdir)/$<
# cheat with defines to force an inclusion of a proper code
getopt.o: getopt.h
$(CC) $(CFLAGS) -D_LIBC -D__alloca=__builtin_alloca -c getopt.c
+
+getopt.o: getopt.h
getopt1.o: getopt.h
-main.o: patchlevel.h
+main.o: patchlevel.h
+
+awktab.c: awk.y
+ $(YACC) -v $(srcdir)/awk.y && \
+ if test -f y.tab.c ; then mv y.tab.c ytab.c ; else true ; fi && \
+ sed '/^extern char .malloc(), .realloc();$$/d' ytab.c >awktab.c && \
+ rm ytab.c
+
+# VMS POSIX make won't apply the default .c.o rule to awktab.o for some reason
+awktab.o: awktab.c awk.h
+ $(CC) -c $(CFLAGS) $(srcdir)/awktab.c
+
+alloca.o: alloca.c
-awktab.c: awk.y
- $(YACC) -v awk.y
- sed '/^extern char .malloc(), .realloc();$$/d' $(YOUTPUT).c >awktab.c
- rm $(YOUTPUT).c
+install: $(PROG)
+ $(INSTALL) $(PROG) $(bindir) && chmod 755 $(bindir)/$(PROG)
+ cd awklib && $(MAKE) $(MFLAGS) install
+# cd doc && $(MAKE) $(MFLAGS) install
-# rules for $(LIBOBJS) and $(ST_LIBOBJS)
+installdoc: info
+ cd doc && $(MAKE) $(MFLAGS) install
-strncasecmp.o: missing/strncasecmp.c config.h
- $(CC) $(CFLAGS) -c missing/strncasecmp.c
+#
+#installtotal: installdirs install installdoc
-stack.o: atari/stack.c
- $(CC) $(CFLAGS) -c atari/stack.c
-
-tmpnam.o: atari/tmpnam.c
- $(CC) $(CFLAGS) -c atari/tmpnam.c
-# this is an optional replacement for a library module.
-system.o: atari/system.c
- $(CC) $(CFLAGS) -c atari/system.c
+installdirs: mkinstalldirs
+ $(srcdir)/mkinstalldirs $(bindir) $(datadir) \
+ $(libdir) $(infodir) $(mandir)
+
+uninstall:
+ rm -f $(bindir)/$(PROG)
+ cd awklib && $(MAKE) $(MFLAGS) uninstall
+# cd doc && $(MAKE) $(MFLAGS) uninstall
clean:
- rm -rf gawk.ttp *.o core
+ rm -rf $(PROG) *.o core y.output
+ cd awklib && $(MAKE) $(MFLAGS) clean
+# the following does not always make sense (when crosscompiling)
+# cd test && $(MAKE) $(MFLAGS) clean
+# cd doc && $(MAKE) $(MFLAGS) clean
+
distclean: clean
rm -f Makefile *.orig *.rej */*.orig */*.rej awk.output gmon.out \
- make.out y.output config.h config.status
+ make.out config.h config.status config.cache config.log stamp-h stamp-h.in
+ cd doc && $(MAKE) $(MFLAGS) distclean
mostlyclean: clean
-realclean: distclean
- rm -f awktab.c $(ALLDOC)
+maintainer-clean: distclean
+ @echo "This command is intended for maintainers to use;"
+ @echo "it deletes files that may require special tools to rebuild."
+ rm -f awktab.c
+ cd doc && $(MAKE) $(MFLAGS) maintainer-clean
+ cd test && $(MAKE) $(MFLAGS) maintainer-clean
+# cd awklib && $(MAKE) $(MFLAGS) maintainer-clean
+
+clobber: maintainer-clean
+
+TAGS:
+ etags $(AWKSRC)
+ ctags $(AWKSRC)
-gawk.dvi: gawk.texi
- tex gawk.texi; texindex gawk.??
- tex gawk.texi; texindex gawk.??
- tex gawk.texi
- rm -f gawk.?? gawk.???
+dvi: $(srcdir)/doc/gawk.texi.in
+ cd doc && $(MAKE) $(MFLAGS) dvi
-gawk.info: gawk.texi
- makeinfo gawk.texi
+info: $(srcdir)/doc/gawk.texi.in
+ cd doc && $(MAKE) $(MFLAGS) info
-# not really (or not with every shell) - but you have an idea
-test: gawk
+doc/all:
+ cd doc && $(MAKE) $(MFLAGS) all
+
+awklib/all:
+ cd awklib && $(MAKE) $(MFLAGS) all
+
+# to run this target you have to adjust test/Makefile quite a bit
+# in order to make it paltable to your shell
+#
+check: $(PROG)
cd test; $(MAKE) -k
-check: test
+test: check
+
diff --git a/atari/config.h b/atari/config.h
index 48eaf0f1..8793d698 100644
--- a/atari/config.h
+++ b/atari/config.h
@@ -1,33 +1,171 @@
+/*
+ * Sample configuration file for ST - works with gcc and TOS libraries;
+ * revise for your configuration if configure script does not work
+ */
+/*
+ * acconfig.h -- configuration definitions for gawk.
+ */
+
/*
- * config.h for Atari ST.
- * Assumes gcc compiler and TOS libraries.
- * Edited by hand from a config.h generated automatically by configure.
+ * Copyright (C) 1995 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/
-/* Default path for Awk library */
-#define DEFPATH ".,c:\\lib\\awk,c:\\gnu\\lib\\awk"
-/* Path separator in use */
-#define ENVSEP ','
-#define SZTC (size_t)
-#define INTC (int)
+/* Define if using alloca.c. */
+/* #undef C_ALLOCA */
+
+/* Define if type char is unsigned and you are not using gcc. */
+#ifndef __CHAR_UNSIGNED__
+/* #undef __CHAR_UNSIGNED__ */
+#endif
-#define STDC_HEADERS 1 /* have the usual ANSI header files */
-#undef REGEX_MALLOC /* use alloca in regex.c */
+/* Define to empty if the keyword does not work. */
+/* #undef const */
-#undef __CHAR_UNSIGNED__ /* default char is signed */
+/* Define to the type of elements in the array set by `getgroups'.
+ Usually this is either `int' or `gid_t'. */
+#define GETGROUPS_T gid_t
-/*
- * srandom already has a prototype defined - don't redefine it
- */
-#define SRANDOM_PROTO 1
+/* Define if the `getpgrp' function takes no argument. */
+#define GETPGRP_VOID 1
-#undef _POSIX_SOURCE /* on Minix, used to get Posix functions */
-#undef _MINIX /* on Minix, used to get Posix functions */
-#undef _POSIX_1_SOURCE /* on Minix, define to 2 */
+/* Define to `int' if <sys/types.h> doesn't define. */
+/* #undef gid_t */
-/*
- * define const to nothing if not __STDC__
+/* Define if you have alloca, as a function or macro. */
+#define HAVE_ALLOCA 1
+
+/* Define if you have <alloca.h> and it should be used (not on Ultrix). */
+/* #undef HAVE_ALLOCA_H */
+
+/* Define if you don't have vprintf but do have _doprnt. */
+/* #undef HAVE_DOPRNT */
+
+/* Define if your struct stat has st_blksize. */
+#define HAVE_ST_BLKSIZE 1
+
+/* Define if your struct tm has tm_zone. */
+/* #undef HAVE_TM_ZONE */
+
+/* Define if you don't have tm_zone but do have the external array
+ tzname. */
+/* #undef HAVE_TZNAME */
+
+/* Define if you have the vprintf function. */
+#define HAVE_VPRINTF 1
+
+/* Define if on MINIX. */
+/* #undef _MINIX */
+
+/* Define to `int' if <sys/types.h> doesn't define. */
+/* #undef pid_t */
+
+/* Define if the system does not provide POSIX.1 features except
+ with this defined. */
+/* #undef _POSIX_1_SOURCE */
+
+/* Define if you need to in order for stat and other things to work. */
+/* #undef _POSIX_SOURCE */
+
+/* Define as the return type of signal handlers (int or void). */
+#define RETSIGTYPE void
+
+/* Define to `unsigned' if <sys/types.h> doesn't define. */
+/* #undef size_t */
+
+/* If using the C implementation of alloca, define if you know the
+ direction of stack growth for your system; otherwise it will be
+ automatically deduced at run-time.
+ STACK_DIRECTION > 0 => grows toward higher addresses
+ STACK_DIRECTION < 0 => grows toward lower addresses
+ STACK_DIRECTION = 0 => direction of growth unknown
*/
-#ifndef __STDC__
-#define const
-#endif
+/* #undef STACK_DIRECTION */
+
+/* Define if you have the ANSI C header files. */
+#define STDC_HEADERS 1
+
+/* Define if you can safely include both <sys/time.h> and <time.h>. */
+#define TIME_WITH_SYS_TIME 1
+
+/* Define if your <sys/time.h> declares struct tm. */
+#define TM_IN_SYS_TIME 1
+
+/* Define to `int' if <sys/types.h> doesn't define. */
+/* #undef uid_t */
+
+/* #undef GETPGRP_IS_STANDARD */ /* getpgrp does/does not take an argument */
+/* #define HAVE_BCOPY 1 *//* we have the bcopy function */
+#define HAVE_MEMCPY 1 /* we have the memcpy function */
+#define HAVE_STRINGIZE 1 /* can use ANSI # operator in cpp */
+#define HAVE_STRING_H 1 /* the <string.h> header file */
+/* #undef REGEX_MALLOC */ /* use malloc instead of alloca in regex.c */
+#define SPRINTF_RET int /* return type of sprintf */
+
+/* Define if you have the fmod function. */
+#define HAVE_FMOD 1
+
+/* Define if you have the memcmp function. */
+#define HAVE_MEMCMP 1
+
+/* Define if you have the memcpy function. */
+#define HAVE_MEMCPY 1
+
+/* Define if you have the memset function. */
+#define HAVE_MEMSET 1
+
+/* Define if you have the random function. */
+#define HAVE_RANDOM 1
+
+/* Define if you have the strchr function. */
+#define HAVE_STRCHR 1
+
+/* Define if you have the strerror function. */
+#define HAVE_STRERROR 1
+
+/* Define if you have the strftime function. */
+#define HAVE_STRFTIME 1
+
+/* Define if you have the strncasecmp function. */
+/* #undef HAVE_STRNCASECMP */
+
+/* Define if you have the strtod function. */
+#define HAVE_STRTOD 1
+
+/* Define if you have the system function. */
+/* This is a white lie - but you may or may not prefer this way */
+/* #define HAVE_SYSTEM 1 */
+
+/* Define if you have the tzset function. */
+#define HAVE_TZSET 1
+
+/* Define if you have the <memory.h> header file. */
+#define HAVE_MEMORY_H 1
+
+/* Define if you have the <signum.h> header file. */
+/* #undef HAVE_SIGNUM_H */
+
+/* Define if you have the <strings.h> header file. */
+/* #undef HAVE_STRINGS_H */
+
+/* Define if you have the <sys/param.h> header file. */
+#define HAVE_SYS_PARAM_H 1
+
+/* Define if you have the <unistd.h> header file. */
+#define HAVE_UNISTD_H 1
diff --git a/atari/gawkmisc.atr b/atari/gawkmisc.atr
new file mode 100644
index 00000000..ef09b813
--- /dev/null
+++ b/atari/gawkmisc.atr
@@ -0,0 +1,124 @@
+/*
+ * gawkmisc.atr --- miscellanious gawk routines that are OS specific.
+ */
+
+/*
+ * Copyright (C) 1986, 1988, 1989, 1991-1993 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <string.h>
+
+char quote = '\'';
+#ifndef DEFPATH
+char *defpath = ".,c:\\lib\\awk,c:\\gnu\\lib\\awk";
+#else
+char *defpath = DEFPATH;
+#endif
+char envsep = ',';
+
+
+/* gawk_name --- pull out the "gawk" part from how the OS called us */
+
+char *
+gawk_name(filespec)
+const char *filespec;
+{
+ char *p, *q;
+
+ p = (char *)filespec;
+
+ if ((q = strrchr(p, '\\')) != NULL)
+ p = q + 1;
+ if ((q = strrchr(p, '/')) != NULL)
+ p = q + 1;
+ if ((q = strchr(p, '.')) != NULL)
+ *q = '\0';
+ strlwr(p);
+
+ return (p == NULL ? (char *)filespec : (char *)p);
+}
+
+/* os_arg_fixup --- fixup the command line */
+
+void
+os_arg_fixup(argcp, argvp)
+int *argcp;
+char ***argvp;
+{
+ /* no-op */
+ return;
+}
+
+/* os_devopen --- open special per-OS devices */
+
+int
+os_devopen(name, flag)
+const char *name;
+int flag;
+{
+ /* no-op */
+ return INVALID_HANDLE;
+}
+
+/* optimal_bufsize --- determine optimal buffer size */
+
+int
+optimal_bufsize(fd, stb)
+int fd;
+struct stat *stb;
+{
+ /* force all members to zero in case OS doesn't use all of them. */
+ memset(stb, '\0', sizeof(struct stat));
+
+ /* The atari has the st_blksize structure, so we just use it. */
+#define DEFBLKSIZE (stb->st_blksize ? stb->st_blksize : BUFSIZ)
+
+ /*
+ * On ST redirected stdin does not have a name attached
+ * (this could be hard to do to) and fstat would fail
+ */
+ if (fd == 0 || isatty(fd))
+ return BUFSIZ;
+ if (fstat(fd, stb) == -1)
+ fatal("can't stat fd %d (%s)", fd, strerror(errno));
+ if (lseek(fd, (off_t)0, 0) == -1) /* not a regular file */
+ return DEFBLKSIZE;
+ if (stb->st_size > 0 && stb->st_size < DEFBLKSIZE) /* small file */
+ return stb->st_size;
+ return DEFBLKSIZE;
+}
+
+/* ispath --- return true if path has directory components */
+
+int
+ispath(file)
+const char *file;
+{
+ return (strchr(file, '/') != NULL || strchr(file, '\\') != NULL);
+}
+
+/* isdirpunct --- return true if char is a directory separator */
+
+int
+isdirpunct(c)
+int c;
+{
+ return (c == '/' || c == '\\');
+}
diff --git a/atari/mkconf.g b/atari/mkconf.g
deleted file mode 100644
index 385559b3..00000000
--- a/atari/mkconf.g
+++ /dev/null
@@ -1,18 +0,0 @@
-#
-# gulam script to produce configuration file for Atari ST;
-# performs the same job as configure, but only for this specific configuration;
-# it is assumed that it is located in a subdirectory .\atari
-#
-if { -e ..\config\atari }
- sed -n -f mkscrpt.sed ..\config\atari > sedscr
- sed -f sedscr ..\config.in > config.h
- sed -n '/^#echo./s///p' ..\config\atari
- rm sedscr
- mv config.h ..
-ef
- echo "'..\config\atari' was lost somewhere"
- echo "Either construct one based on the examples in the config directory,"
- echo "or, in source directory, copy config.in to config.h and edit it."
- exit 1
-endif
-exit 0
diff --git a/atari/mkscrpt.sed b/atari/mkscrpt.sed
deleted file mode 100644
index 949d9c59..00000000
--- a/atari/mkscrpt.sed
+++ /dev/null
@@ -1,15 +0,0 @@
-# there is no automatic editing of Makefile for Atari right now
-# but lines starting with "MAKE_" string are processed for consistency
-# with other configuration files and in a case they would be needed
-# in a future
-:start
- /^MAKE_/d
- /^[^#]/s/.*/s~__SYSTEM__~&~/p
- t cont
- n
- b start
-:cont
- n
- /^MAKE_/d
- /^[^#]/s:^\([^ ]*\)[ ].*:s~^/\\* #define[ ]*\1.*~#define &~:p
-b cont
diff --git a/atari/redirect.h b/atari/redirect.h
new file mode 100644
index 00000000..6452778f
--- /dev/null
+++ b/atari/redirect.h
@@ -0,0 +1,32 @@
+/*
+ * redirect.h --- definitions for functions that are OS specific.
+ */
+
+/*
+ * Copyright (C) 1986, 1988, 1989, 1991-1993 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+/* This file is already conditioned on atarist in awk.h */
+
+#define read _text_read /* we do not want all these CR's to mess our input */
+extern int _text_read(int, char *, int);
+#ifndef __MINT__
+#undef NGROUPS_MAX
+#endif /* __MINT__ */
diff --git a/atari/tmpnam.c b/atari/tmpnam.c
index b5ab45bd..92bf751b 100644
--- a/atari/tmpnam.c
+++ b/atari/tmpnam.c
@@ -1,10 +1,11 @@
+#ifdef PIPES_SIMULATED
/* tmpnam.c : return a temporary file name */
/* written by Eric R. Smith and placed in the public domain */
/**
* - modified for gawk needs - pattern /$$XXXXXX from the original
* code creates names which are hard to remove when somethig
* goes wrong
- * - retuned name can be passed outside via system(); other programs
+ * - returned name can be passed outside via system(); other programs
* may not dig '/' as a path separator
* - somehow more frugal in a memory use
* (mj - October 1990)
@@ -43,3 +44,4 @@ const char *path, *base; /* ignored */
{
return tmpnam(NULL);
}
+#endif /* PIPES_SIMULATED */
diff --git a/awk.h b/awk.h
index 9dd97caa..7e16a132 100644
--- a/awk.h
+++ b/awk.h
@@ -6,7 +6,7 @@
* Copyright (C) 1986, 1988, 1989, 1991-1995 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
- * AWK Progamming Language.
+ * AWK Programming Language.
*
* GAWK is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -19,158 +19,116 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with GAWK; see the file COPYING. If not, write to
- * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/
/* ------------------------------ Includes ------------------------------ */
-#include "config.h"
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
#include <stdio.h>
-#ifndef LIMITS_H_MISSING
+#ifdef HAVE_LIMITS_H
#include <limits.h>
-#endif
+#endif /* HAVE_LIMITS_H */
#include <ctype.h>
#include <setjmp.h>
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+#include <stdarg.h>
+#else
#include <varargs.h>
+#endif
+#include <signal.h>
#include <time.h>
#include <errno.h>
-#if !defined(errno) && !defined(MSDOS) && !defined(OS2)
+#if ! defined(errno) && ! defined(MSDOS) && ! defined(OS2)
extern int errno;
#endif
-#ifdef __GNU_LIBRARY__
-#ifndef linux
+#ifdef HAVE_SIGNUM_H
#include <signum.h>
#endif
-#endif
/* ----------------- System dependencies (with more includes) -----------*/
-#if !defined(VMS) || (!defined(VAXC) && !defined(__DECC))
-#include <sys/types.h>
-#include <sys/stat.h>
-#else /* VMS w/ VAXC or DECC */
-#include <types.h>
-#include <stat.h>
-#include <file.h> /* avoid <fcntl.h> in io.c */
-#endif
-
-#include <signal.h>
+/* This section is the messiest one in the file, not a lot that can be done */
#ifdef __STDC__
#define P(s) s
#define MALLOC_ARG_T size_t
-#else
+#else /* not __STDC__ */
#define P(s) ()
#define MALLOC_ARG_T unsigned
#define volatile
#define const
-#endif
-
-#ifndef SIGTYPE
-#define SIGTYPE void
-#endif
+#endif /* not __STDC__ */
-#ifdef SIZE_T_MISSING
-typedef unsigned int size_t;
-#endif
-
-#ifndef SZTC
-#define SZTC
-#define INTC
+#if ! defined(VMS) || (! defined(VAXC) && ! defined(__DECC))
+#include <sys/types.h>
+#include <sys/stat.h>
+#else /* VMS w/ VAXC or DECC */
+#include <types.h>
+#include <stat.h>
+#include <file.h> /* avoid <fcntl.h> in io.c */
+#ifdef __DECC
+/* DEC C implies DECC$SHR, which doesn't have the %g problem of VAXCRTL */
+#undef GFMT_WORKAROUND
#endif
+#endif /* VMS w/ VAXC or DECC */
#ifdef STDC_HEADERS
#include <stdlib.h>
+#else /* not STDC_HEADERS */
+#include "protos.h"
+#endif /* not STDC_HEADERS */
+
+#ifdef HAVE_STRING_H
#include <string.h>
+#ifdef NEED_MEMORY_H
+#include <memory.h>
+#endif /* NEED_MEMORY_H */
+#else /* not HAVE_STRING_H */
+#ifdef HAVE_STRINGS_H
+#include <strings.h>
+#endif /* HAVE_STRINGS_H */
+#endif /* not HAVE_STRING_H */
+
#ifdef NeXT
+#if __GNUC__ < 2 || __GNUC_MINOR__ < 7
#include <libc.h>
+#endif
#undef atof
-#else
+#define getopt GNU_getopt
+#define GFMT_WORKAROUND
+#endif /* NeXT */
+
#if defined(atarist) || defined(VMS)
#include <unixlib.h>
-#else /* atarist || VMS */
-#if !defined(MSDOS) && !defined(_MSC_VER)
-#include <unistd.h>
-#endif /* MSDOS */
#endif /* atarist || VMS */
-#endif /* Next */
-#else /* STDC_HEADERS */
-#include "protos.h"
-#endif /* STDC_HEADERS */
-
-#if defined(ultrix) && !defined(Ultrix41)
-extern char * getenv P((char *name));
-extern double atof P((char *s));
-#endif
-
-#ifndef __GNUC__
-#ifdef sparc
-/* nasty nasty SunOS-ism */
-#include <alloca.h>
-#ifdef lint
-extern char *alloca();
-#endif
-#else /* not sparc */
-#if !defined(alloca) && !defined(ALLOCA_PROTO)
-#if defined(_MSC_VER)
-#include <malloc.h>
-#else
-#ifdef __hpux
-void *alloca ();
-#else
-extern char *alloca();
-#endif /* __hpux */
-#endif /* _MSC_VER */
-#endif
-#endif /* sparc */
-#endif /* __GNUC__ */
-#ifdef HAVE_UNDERSCORE_SETJMP
-/* nasty nasty berkelixm */
-#define setjmp _setjmp
-#define longjmp _longjmp
-#endif
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif /* HAVE_UNISTD_H */
-/*
- * if you don't have vprintf, try this and cross your fingers.
- */
-#if defined(VPRINTF_MISSING)
+#ifndef HAVE_VPRINTF
+/* if you don't have vprintf, try this and cross your fingers. */
+#ifdef HAVE_DOPRNT
#define vfprintf(fp,fmt,arg) _doprnt((fmt), (arg), (fp))
-#endif
+#else /* not HAVE_DOPRNT */
+you
+lose
+#endif /* not HAVE_DOPRNT */
+#endif /* HAVE_VPRINTF */
#ifdef VMS
-/* some macros to redirect to code in vms/vms_misc.c */
-#define exit vms_exit
-#define open vms_open
-#define strerror vms_strerror
-#define strdup vms_strdup
-extern void exit P((int));
-extern int open P((const char *,int,...));
-extern char *strerror P((int));
-extern char *strdup P((const char *str));
-extern int vms_devopen P((const char *,int));
-# ifndef NO_TTY_FWRITE
-#define fwrite tty_fwrite
-#define fclose tty_fclose
-extern size_t fwrite P((const void *,size_t,size_t,FILE *));
-extern int fclose P((FILE *));
-# endif
-extern FILE *popen P((const char *,const char *));
-extern int pclose P((FILE *));
-extern void vms_arg_fixup P((int *,char ***));
-/* some things not in STDC_HEADERS */
-extern size_t gnu_strftime P((char *,size_t,const char *,const struct tm *));
-extern int unlink P((const char *));
-extern int getopt P((int,char **,char *));
-extern int isatty P((int));
-#ifndef fileno
-extern int fileno P((FILE *));
-#endif
-extern int close(), dup(), dup2(), fstat(), read(), stat();
-extern int getpgrp P((void));
+#include "vms/redirect.h"
#endif /*VMS*/
+#ifdef atarist
+#include "atari/redirect.h"
+#endif
+
#define GNU_REGEX
#ifdef GNU_REGEX
#include "regex.h"
@@ -183,31 +141,38 @@ typedef struct Regexp {
} Regexp;
#define RESTART(rp,s) (rp)->regs.start[0]
#define REEND(rp,s) (rp)->regs.end[0]
-#else /* GNU_REGEX */
+#define SUBPATSTART(rp,s,n) (rp)->regs.start[n]
+#define SUBPATEND(rp,s,n) (rp)->regs.end[n]
#endif /* GNU_REGEX */
-#ifdef atarist
-#define read _text_read /* we do not want all these CR's to mess our input */
-extern int _text_read (int, char *, int);
-#ifndef __MINT__
-#undef NGROUPS_MAX
-#endif /* __MINT__ */
-#endif
+/* ------------------ Constants, Structures, Typedefs ------------------ */
-#ifndef DEFPATH
-#define DEFPATH ".:/usr/local/lib/awk:/usr/lib/awk"
+#ifndef AWKNUM
+#define AWKNUM double
#endif
-#ifndef ENVSEP
-#define ENVSEP ':'
+#ifndef TRUE
+/* a bit hackneyed, but what the heck */
+#define TRUE 1
+#define FALSE 0
#endif
-extern double double_to_int P((double d));
-
-/* ------------------ Constants, Structures, Typedefs ------------------ */
-#define AWKNUM double
+/* Figure out what '\a' really is. */
+#ifdef __STDC__
+#define BELL '\a' /* sure makes life easy, don't it? */
+#else
+# if 'z' - 'a' == 25 /* ascii */
+# if 'a' != 97 /* machine is dumb enough to use mark parity */
+# define BELL '\207'
+# else
+# define BELL '\07'
+# endif
+# else
+# define BELL '\057'
+# endif
+#endif
-typedef enum {
+typedef enum nodevals {
/* illegal entry == 0 */
Node_illegal,
@@ -223,7 +188,7 @@ typedef enum {
Node_exp,
/* unary operators subnode is the expression to work on */
-/*10*/ Node_preincrement,
+ Node_preincrement,
Node_predecrement,
Node_postincrement,
Node_postdecrement,
@@ -235,7 +200,7 @@ typedef enum {
Node_assign_times,
Node_assign_quotient,
Node_assign_mod,
-/*20*/ Node_assign_plus,
+ Node_assign_plus,
Node_assign_minus,
Node_assign_exp,
@@ -249,7 +214,7 @@ typedef enum {
Node_less,
Node_greater,
Node_leq,
-/*30*/ Node_geq,
+ Node_geq,
Node_match,
Node_nomatch,
@@ -265,21 +230,22 @@ typedef enum {
Node_param_list, /* lnode is a variable, rnode is more list */
/* keywords */
-/*40*/ Node_K_if, /* lnode is conditonal, rnode is if_branches */
+ Node_K_if, /* lnode is conditonal, rnode is if_branches */
Node_K_while, /* lnode is condtional, rnode is stuff to run */
Node_K_for, /* lnode is for_struct, rnode is stuff to run */
Node_K_arrayfor, /* lnode is for_struct, rnode is stuff to run */
Node_K_break, /* no subs */
- Node_K_continue, /* no stuff */
+ Node_K_continue, /* no subs */
Node_K_print, /* lnode is exp_list, rnode is redirect */
Node_K_printf, /* lnode is exp_list, rnode is redirect */
Node_K_next, /* no subs */
Node_K_exit, /* subnode is return value, or NULL */
-/*50*/ Node_K_do, /* lnode is conditional, rnode stuff to run */
- Node_K_return,
- Node_K_delete,
- Node_K_getline,
+ Node_K_do, /* lnode is conditional, rnode stuff to run */
+ Node_K_return, /* lnode is return value */
+ Node_K_delete, /* lnode is array, rnode is subscript */
+ Node_K_getline, /* lnode is opt var, rnode is redirection */
Node_K_function, /* lnode is statement list, rnode is params */
+ Node_K_nextfile, /* no subs */
/* I/O redirection for print statements */
Node_redirect_output, /* subnode is where to redirect */
@@ -289,9 +255,8 @@ typedef enum {
Node_redirect_input, /* subnode is where to redirect */
/* Variables */
-/*60*/ Node_var, /* rnode is value, lnode is array stuff */
- Node_var_array, /* array is ptr to elements, asize num of
- * eles */
+ Node_var, /* rnode is value, lnode is array stuff */
+ Node_var_array, /* array is ptr to elements, asize num of eles */
Node_val, /* node is a value - type in flags */
/* Builtins subnode is explist to work on, proc is func to call */
@@ -314,10 +279,10 @@ typedef enum {
Node_func_call, /* lnode is name, rnode is argument list */
Node_cond_exp, /* lnode is conditonal, rnode is if_branches */
- Node_regex,
-/*70*/ Node_hashnode,
- Node_ahash,
- Node_NF,
+ Node_regex, /* a regexp, text, compiled, flags, etc */
+ Node_hashnode, /* an identifier in the symbol table */
+ Node_ahash, /* an array element */
+ Node_NF, /* variables recognized in the grammar */
Node_NR,
Node_FNR,
Node_FS,
@@ -327,8 +292,7 @@ typedef enum {
Node_OFS,
Node_ORS,
Node_OFMT,
- Node_CONVFMT,
- Node_K_nextfile
+ Node_CONVFMT
} NODETYPE;
/*
@@ -345,7 +309,7 @@ typedef struct exp_node {
} l;
union {
struct exp_node *rptr;
- struct exp_node *(*pptr) ();
+ struct exp_node *(*pptr)();
Regexp *preg;
struct for_loop_header *hd;
struct exp_node **av;
@@ -358,9 +322,9 @@ typedef struct exp_node {
} x;
short number;
unsigned char reflags;
-# define CASE 1
-# define CONST 2
-# define FS_DFLT 4
+# define CASE 1
+# define CONST 2
+# define FS_DFLT 4
} nodep;
struct {
AWKNUM fltnum; /* this is here for optimal packing of
@@ -368,7 +332,7 @@ typedef struct exp_node {
*/
char *sp;
size_t slen;
- unsigned char sref;
+ long sref;
int idx;
} val;
struct {
@@ -392,17 +356,18 @@ typedef struct exp_node {
} sub;
NODETYPE type;
unsigned short flags;
-# define MALLOC 1 /* can be free'd */
-# define TEMP 2 /* should be free'd */
-# define PERM 4 /* can't be free'd */
-# define STRING 8 /* assigned as string */
-# define STR 16 /* string value is current */
-# define NUM 32 /* numeric value is current */
-# define NUMBER 64 /* assigned as number */
-# define MAYBE_NUM 128 /* user input: if NUMERIC then
+# define MALLOC 1 /* can be free'd */
+# define TEMP 2 /* should be free'd */
+# define PERM 4 /* can't be free'd */
+# define STRING 8 /* assigned as string */
+# define STR 16 /* string value is current */
+# define NUM 32 /* numeric value is current */
+# define NUMBER 64 /* assigned as number */
+# define MAYBE_NUM 128 /* user input: if NUMERIC then
* a NUMBER */
-# define ARRAYMAXED 256 /* array is at max size */
- char *vname; /* variable's name */
+# define ARRAYMAXED 256 /* array is at max size */
+# define SCALAR 512 /* used as scalar, can't be array */
+ char *vname; /* variable's name */
} NODE;
#define lnode sub.nodep.l.lptr
@@ -440,10 +405,7 @@ typedef struct exp_node {
#define condpair lnode
#define triggered sub.nodep.r.r_ent
-#ifdef DONTDEF
-int primes[] = {31, 61, 127, 257, 509, 1021, 2053, 4099, 8191, 16381};
-#endif
-
+/* a regular for loop */
typedef struct for_loop_header {
NODE *init;
NODE *cond;
@@ -460,6 +422,7 @@ struct search {
/* for faster input, bypass stdio */
typedef struct iobuf {
+ const char *name;
int fd;
char *buf;
char *off;
@@ -468,28 +431,27 @@ typedef struct iobuf {
int cnt;
long secsiz;
int flag;
-# define IOP_IS_TTY 1
-# define IOP_IS_INTERNAL 2
-# define IOP_NO_FREE 4
+# define IOP_IS_TTY 1
+# define IOP_IS_INTERNAL 2
+# define IOP_NO_FREE 4
} IOBUF;
typedef void (*Func_ptr)();
-/*
- * structure used to dynamically maintain a linked-list of open files/pipes
- */
+/* structure used to dynamically maintain a linked-list of open files/pipes */
struct redirect {
unsigned int flag;
-# define RED_FILE 1
-# define RED_PIPE 2
-# define RED_READ 4
-# define RED_WRITE 8
-# define RED_APPEND 16
-# define RED_NOBUF 32
-# define RED_USED 64
-# define RED_EOF 128
+# define RED_FILE 1
+# define RED_PIPE 2
+# define RED_READ 4
+# define RED_WRITE 8
+# define RED_APPEND 16
+# define RED_NOBUF 32
+# define RED_USED 64 /* closed temporarily to reuse fd */
+# define RED_EOF 128
char *value;
FILE *fp;
+ FILE *ifp; /* input fp, needed for PIPES_SIMULATED */
IOBUF *iop;
int pid;
int status;
@@ -499,8 +461,8 @@ struct redirect {
/* structure for our source, either a command line string or a source file */
struct src {
- enum srctype { CMDLINE = 1, SOURCEFILE } stype;
- char *val;
+ enum srctype { CMDLINE = 1, SOURCEFILE } stype;
+ char *val;
};
/* longjmp return codes, must be nonzero */
@@ -512,18 +474,18 @@ struct src {
#define TAG_RETURN 3
#ifndef INT_MAX
-#define INT_MAX (~(1 << (sizeof (int) * 8 - 1)))
+#define INT_MAX ((int)(~(1 << (sizeof (int) * 8 - 1))))
#endif
#ifndef LONG_MAX
-#define LONG_MAX (~(1 << (sizeof (long) * 8 - 1)))
+#define LONG_MAX ((long)(~(1L << (sizeof (long) * 8 - 1))))
#endif
#ifndef ULONG_MAX
#define ULONG_MAX (~(unsigned long)0)
#endif
#ifndef LONG_MIN
-#define LONG_MIN (-LONG_MAX - 1)
+#define LONG_MIN ((long)(-LONG_MAX - 1L))
#endif
-#define HUGE INT_MAX
+#define HUGE LONG_MAX
/* -------------------------- External variables -------------------------- */
/* gawk builtin variables */
@@ -531,7 +493,7 @@ extern long NF;
extern long NR;
extern long FNR;
extern int IGNORECASE;
-extern char *RS;
+extern int RS_is_null;
extern char *OFS;
extern int OFSlen;
extern char *ORS;
@@ -540,13 +502,10 @@ extern char *OFMT;
extern char *CONVFMT;
extern int CONVFMTidx;
extern int OFMTidx;
-extern NODE *FS_node, *NF_node, *RS_node, *NR_node;
-extern NODE *FILENAME_node, *OFS_node, *ORS_node, *OFMT_node;
-extern NODE *CONVFMT_node;
-extern NODE *FNR_node, *RLENGTH_node, *RSTART_node, *SUBSEP_node;
-extern NODE *IGNORECASE_node;
-extern NODE *FIELDWIDTHS_node;
-
+extern NODE *CONVFMT_node, *FIELDWIDTHS_node, *FILENAME_node;
+extern NODE *FNR_node, *FS_node, *IGNORECASE_node, *NF_node;
+extern NODE *NR_node, *OFMT_node, *OFS_node, *ORS_node, *RLENGTH_node;
+extern NODE *RSTART_node, *RS_node, *RT_node, *SUBSEP_node;
extern NODE **stack_ptr;
extern NODE *Nnull_string;
extern NODE **fields_arr;
@@ -554,58 +513,94 @@ extern int sourceline;
extern char *source;
extern NODE *expression_value;
+#if __GNUC__ < 2
extern NODE *_t; /* used as temporary in tree_eval */
-
-extern const char *myname;
+#endif
extern NODE *nextfree;
extern int field0_valid;
-extern int do_unix;
+extern int do_traditional;
extern int do_posix;
extern int do_lint;
+extern int do_lint_old;
+extern int do_intervals;
extern int in_begin_rule;
extern int in_end_rule;
+extern const char *myname;
+
+extern char quote;
+extern char *defpath;
+extern char envsep;
+
+extern char casetable[]; /* for case-independent regexp matching */
+
/* ------------------------- Pseudo-functions ------------------------- */
#define is_identchar(c) (isalnum(c) || (c) == '_')
-
-#ifndef MPROF
-#define getnode(n) if (nextfree) n = nextfree, nextfree = nextfree->nextp;\
- else n = more_nodes()
-#define freenode(n) ((n)->nextp = nextfree, nextfree = (n))
-#else
+#ifdef MPROF
#define getnode(n) emalloc(n, NODE *, sizeof(NODE), "getnode")
+#ifndef DEBUG
#define freenode(n) free(n)
#endif
+#else /* not MPROF */
+#define getnode(n) if (nextfree) n = nextfree, nextfree = nextfree->nextp;\
+ else n = more_nodes()
+#ifndef DEBUG
+#define freenode(n) ((n)->flags &= ~SCALAR, (n)->nextp = nextfree, nextfree = (n))
+#endif
+#endif /* not MPROF */
#ifdef DEBUG
-#define tree_eval(t) r_tree_eval(t)
+#define tree_eval(t) r_tree_eval(t, FALSE)
+#define m_tree_eval(t, iscond) r_tree_eval(t, iscond)
#define get_lhs(p, a) r_get_lhs((p), (a))
#undef freenode
#else
#define get_lhs(p, a) ((p)->type == Node_var ? (&(p)->var_value) : \
r_get_lhs((p), (a)))
-#define tree_eval(t) (_t = (t),_t == NULL ? Nnull_string : \
- (_t->type == Node_param_list ? r_tree_eval(_t) : \
+#define tree_eval(t) m_tree_eval(t, FALSE)
+#if __GNUC__ >= 2
+#define m_tree_eval(t, iscond) \
+ ({NODE * _t = (t); \
+ if (_t == NULL) \
+ _t = Nnull_string; \
+ else { \
+ switch(_t->type) { \
+ case Node_val: \
+ break; \
+ case Node_var: \
+ _t = _t->var_value; \
+ break; \
+ default: \
+ _t = r_tree_eval(_t, iscond);\
+ break; \
+ } \
+ } \
+ _t;})
+#else
+#define m_tree_eval(t, iscond) (_t = (t), _t == NULL ? Nnull_string : \
+ (_t->type == Node_param_list ? \
+ r_tree_eval(_t, iscond) : \
(_t->type == Node_val ? _t : \
(_t->type == Node_var ? _t->var_value : \
- r_tree_eval(_t)))))
-#endif
+ r_tree_eval(_t, iscond)))))
+#endif /* __GNUC__ */
+#endif /* not DEBUG */
#define make_number(x) mk_number((x), (unsigned int)(MALLOC|NUM|NUMBER))
#define tmp_number(x) mk_number((x), (unsigned int)(MALLOC|TEMP|NUM|NUMBER))
-#define free_temp(n) do {if ((n)->flags&TEMP) { unref(n); }} while (0)
-#define make_string(s,l) make_str_node((s), SZTC (l),0)
+#define free_temp(n) do { if ((n)->flags&TEMP) { unref(n); }} while (FALSE)
+#define make_string(s, l) make_str_node((s), (size_t) (l), FALSE)
#define SCAN 1
#define ALREADY_MALLOCED 2
#define cant_happen() fatal("internal error line %d, file: %s", \
__LINE__, __FILE__);
-#if defined(__STDC__) && !defined(NO_TOKEN_PASTING)
+#ifdef HAVE_STRINGIZE
#define emalloc(var,ty,x,str) (void)((var=(ty)malloc((MALLOC_ARG_T)(x))) ||\
(fatal("%s: %s: can't allocate memory (%s)",\
(str), #var, strerror(errno)),0))
@@ -613,7 +608,7 @@ extern int in_end_rule;
(MALLOC_ARG_T)(x))) ||\
(fatal("%s: %s: can't allocate memory (%s)",\
(str), #var, strerror(errno)),0))
-#else /* __STDC__ */
+#else /* HAVE_STRINGIZE */
#define emalloc(var,ty,x,str) (void)((var=(ty)malloc((MALLOC_ARG_T)(x))) ||\
(fatal("%s: %s: can't allocate memory (%s)",\
(str), "var", strerror(errno)),0))
@@ -621,7 +616,7 @@ extern int in_end_rule;
(MALLOC_ARG_T)(x))) ||\
(fatal("%s: %s: can't allocate memory (%s)",\
(str), "var", strerror(errno)),0))
-#endif /* __STDC__ */
+#endif /* HAVE_STRINGIZE */
#ifdef DEBUG
#define force_number r_force_number
@@ -630,17 +625,32 @@ extern int in_end_rule;
#ifdef lint
extern AWKNUM force_number();
#endif
+#if __GNUC__ >= 2
+#define force_number(n) ({NODE *_tn = (n);\
+ (_tn->flags & NUM) ?_tn->numbr : r_force_number(_tn);})
+#define force_string(s) ({NODE *_ts = (s);\
+ ((_ts->flags & STR) && \
+ (_ts->stfmt == -1 || _ts->stfmt == CONVFMTidx)) ?\
+ _ts : r_force_string(_ts);})
+#else
#ifdef MSDOS
extern double _msc51bug;
-#define force_number(n) (_msc51bug=(_t = (n),(_t->flags & NUM) ? _t->numbr : r_force_number(_t)))
+#define force_number(n) (_msc51bug=(_t = (n),\
+ (_t->flags & NUM) ? _t->numbr : r_force_number(_t)))
#else /* not MSDOS */
-#define force_number(n) (_t = (n),(_t->flags & NUM) ? _t->numbr : r_force_number(_t))
-#endif /* MSDOS */
-#define force_string(s) (_t = (s),((_t->flags & STR) && (_t->stfmt == -1 || _t->stfmt == CONVFMTidx))? _t : r_force_string(_t))
+#define force_number(n) (_t = (n),\
+ (_t->flags & NUM) ? _t->numbr : r_force_number(_t))
+#endif /* not MSDOS */
+#define force_string(s) (_t = (s),((_t->flags & STR) && \
+ (_t->stfmt == -1 || \
+ _t->stfmt == CONVFMTidx))? \
+ _t : r_force_string(_t))
+#endif /* not __GNUC__ */
#endif /* not DEBUG */
#define STREQ(a,b) (*(a) == *(b) && strcmp((a), (b)) == 0)
-#define STREQN(a,b,n) ((n)&& *(a)== *(b) && strncmp((a), (b), SZTC (n)) == 0)
+#define STREQN(a,b,n) ((n) && *(a)== *(b) && \
+ strncmp((a), (b), (size_t) (n)) == 0)
/* ------------- Function prototypes or defs (as appropriate) ------------- */
@@ -653,16 +663,18 @@ extern NODE **assoc_lookup P((NODE *symbol, NODE *subs));
extern void do_delete P((NODE *symbol, NODE *tree));
extern void assoc_scan P((NODE *symbol, struct search *lookat));
extern void assoc_next P((struct search *lookat));
-/* awk.tab.c */
+/* awktab.c */
extern char *tokexpand P((void));
extern char nextc P((void));
extern NODE *node P((NODE *left, NODETYPE op, NODE *right));
extern NODE *install P((char *name, NODE *value));
extern NODE *lookup P((const char *name));
-extern NODE *variable P((char *name, int can_free));
+extern NODE *variable P((char *name, int can_free, NODETYPE type));
extern int yyparse P((void));
/* builtin.c */
+extern double double_to_int P((double d));
extern NODE *do_exp P((NODE *tree));
+extern NODE *do_fflush P((NODE *tree));
extern NODE *do_index P((NODE *tree));
extern NODE *do_int P((NODE *tree));
extern NODE *do_length P((NODE *tree));
@@ -686,9 +698,10 @@ extern NODE *do_srand P((NODE *tree));
extern NODE *do_match P((NODE *tree));
extern NODE *do_gsub P((NODE *tree));
extern NODE *do_sub P((NODE *tree));
+extern NODE *do_gensub P((NODE *tree));
/* eval.c */
extern int interpret P((NODE *volatile tree));
-extern NODE *r_tree_eval P((NODE *tree));
+extern NODE *r_tree_eval P((NODE *tree, int iscond));
extern int cmp_nodes P((NODE *t1, NODE *t2));
extern NODE **r_get_lhs P((NODE *ptr, Func_ptr *assign));
extern void set_IGNORECASE P((void));
@@ -701,12 +714,20 @@ extern void init_fields P((void));
extern void set_record P((char *buf, int cnt, int freeold));
extern void reset_record P((void));
extern void set_NF P((void));
-extern NODE **get_field P((int num, Func_ptr *assign));
+extern NODE **get_field P((long num, Func_ptr *assign));
extern NODE *do_split P((NODE *tree));
extern void set_FS P((void));
extern void set_FS_if_not_FIELDWIDTHS P((void));
extern void set_RS P((void));
extern void set_FIELDWIDTHS P((void));
+extern int using_fieldwidths P((void));
+/* gawkmisc.c */
+extern char *gawk_name P((const char *filespec));
+extern void os_arg_fixup P((int *argcp, char ***argvp));
+extern int os_devopen P((const char *name, int flag));
+extern int optimal_bufsize P((int fd, struct stat *sbuf));
+extern int ispath P((const char *file));
+extern int isdirpunct P((int c));
/* io.c */
extern void set_FNR P((void));
extern void set_NR P((void));
@@ -719,16 +740,12 @@ extern int devopen P((const char *name, const char *mode));
extern int pathopen P((const char *file));
extern NODE *do_getline P((NODE *tree));
extern void do_nextfile P((void));
-/* iop.c */
-extern int optimal_bufsize P((int fd));
-extern IOBUF *iop_alloc P((int fd));
-extern int get_a_record P((char **out, IOBUF *iop, int rs, int *errcode));
+extern IOBUF *iop_alloc P((int fd, const char *name));
/* main.c */
extern int main P((int argc, char **argv));
-extern Regexp *mk_re_parse P((char *s, int ignorecase));
extern void load_environ P((void));
extern char *arg_assign P((char *arg));
-extern SIGTYPE catchsig P((int sig, int code));
+extern RETSIGTYPE catchsig P((int sig, int code));
/* msg.c */
extern void err P((const char *s, const char *emsg, va_list argp));
#if _MSC_VER == 510
@@ -737,11 +754,18 @@ extern void error P((va_list va_alist, ...));
extern void warning P((va_list va_alist, ...));
extern void fatal P((va_list va_alist, ...));
#else
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+extern void msg (char *mesg, ...);
+extern void error (char *mesg, ...);
+extern void warning (char *mesg, ...);
+extern void fatal (char *mesg, ...);
+#else
extern void msg ();
extern void error ();
extern void warning ();
extern void fatal ();
#endif
+#endif
/* node.c */
extern AWKNUM r_force_number P((NODE *n));
extern NODE *r_force_string P((NODE *s));
@@ -764,30 +788,35 @@ extern void reg_error P((const char *s));
extern Regexp *re_update P((NODE *t));
extern void resyntax P((int syntax));
extern void resetup P((void));
+extern int avoid_dfa P((NODE *re, char *str, size_t len)); /* temporary */
-/* strcase.c */
-extern int strcasecmp P((const char *s1, const char *s2));
+/* strncasecmp.c */
extern int strncasecmp P((const char *s1, const char *s2, register size_t n));
-#ifdef atarist
+#if defined(atarist)
+#if defined(PIPES_SIMULATED)
/* atari/tmpnam.c */
extern char *tmpnam P((char *buf));
extern char *tempnam P((const char *path, const char *base));
+#else
+#include <wait.h>
#endif
-
-/* Figure out what '\a' really is. */
-#ifdef __STDC__
-#define BELL '\a' /* sure makes life easy, don't it? */
+#include <fcntl.h>
+#define INVALID_HANDLE (__SMALLEST_VALID_HANDLE - 1)
#else
-# if 'z' - 'a' == 25 /* ascii */
-# if 'a' != 97 /* machine is dumb enough to use mark parity */
-# define BELL '\207'
-# else
-# define BELL '\07'
-# endif
-# else
-# define BELL '\057'
-# endif
+#define INVALID_HANDLE (-1)
+#endif /* atarist */
+
+#ifndef STATIC
+#define STATIC static
#endif
-extern char casetable[]; /* for case-independent regexp matching */
+#ifdef C_ALLOCA
+/* The __hpux check is to avoid conflicts with bison's definition of
+ alloca() in awktab.c.*/
+#if (defined(__STDC__) && __STDC__) || defined (__hpux)
+extern void *alloca P((unsigned));
+#else
+extern char *alloca P((unsigned));
+#endif
+#endif
diff --git a/awk.y b/awk.y
index 006b3df1..d4b443fa 100644
--- a/awk.y
+++ b/awk.y
@@ -6,7 +6,7 @@
* Copyright (C) 1986, 1988, 1989, 1991-1995 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
- * AWK Progamming Language.
+ * AWK Programming Language.
*
* GAWK is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -19,8 +19,8 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with GAWK; see the file COPYING. If not, write to
- * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/
%{
@@ -30,7 +30,14 @@
#include "awk.h"
-static void yyerror (); /* va_alist */
+#define CAN_FREE TRUE
+#define DONT_FREE FALSE
+
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+static void yyerror(const char *m, ...) ;
+#else
+static void yyerror(); /* va_alist */
+#endif
static char *get_src_buf P((void));
static int yylex P((void));
static NODE *node_common P((NODETYPE op));
@@ -44,11 +51,17 @@ static void pop_params P((NODE *params));
static NODE *make_param P((char *name));
static NODE *mk_rexp P((NODE *exp));
static int dup_parms P((NODE *func));
+static void param_sanity P((NODE *arglist));
+static int isnoeffect P((NODETYPE));
+
+enum defref { FUNC_DEFINE, FUNC_USE };
+static void func_use P((char *name, enum defref how));
+static void check_funcs P((void));
static int want_assign; /* lexical scanning kludge */
static int want_regexp; /* lexical scanning kludge */
static int can_return; /* lexical scanning kludge */
-static int io_allowed = 1; /* lexical scanning kludge */
+static int io_allowed = TRUE; /* lexical scanning kludge */
static char *lexptr; /* pointer to next char during parsing */
static char *lexend;
static char *lexptr_begin; /* keep track of where we were for error msgs */
@@ -103,7 +116,7 @@ extern NODE *end_block;
%token <nodetypeval> LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE
%token <nodetypeval> LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE
%token <nodetypeval> LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION
-%token <nodetypeval> LEX_GETLINE
+%token <nodetypeval> LEX_GETLINE LEX_NEXTFILE
%token <nodetypeval> LEX_IN
%token <lval> LEX_AND LEX_OR INCREMENT DECREMENT
%token <lval> LEX_BUILTIN LEX_LENGTH
@@ -134,7 +147,10 @@ extern NODE *end_block;
start
: opt_nls program opt_nls
- { expression_value = $2; }
+ {
+ expression_value = $2;
+ check_funcs();
+ }
;
program
@@ -156,9 +172,9 @@ program
else {
if ($1->type != Node_rule_list)
$1 = node($1, Node_rule_list,
- (NODE*)NULL);
- $$ = append_right ($1,
- node($2, Node_rule_list,(NODE *) NULL));
+ (NODE*) NULL);
+ $$ = append_right($1,
+ node($2, Node_rule_list, (NODE *) NULL));
}
yyerrok;
}
@@ -168,36 +184,36 @@ program
;
rule
- : LEX_BEGIN { io_allowed = 0; }
+ : LEX_BEGIN { io_allowed = FALSE; }
action
{
- if (begin_block) {
+ if (begin_block != NULL) {
if (begin_block->type != Node_rule_list)
begin_block = node(begin_block, Node_rule_list,
- (NODE *)NULL);
- (void) append_right (begin_block, node(
- node((NODE *)NULL, Node_rule_node, $3),
- Node_rule_list, (NODE *)NULL) );
+ (NODE *) NULL);
+ (void) append_right(begin_block, node(
+ node((NODE *) NULL, Node_rule_node, $3),
+ Node_rule_list, (NODE *) NULL) );
} else
- begin_block = node((NODE *)NULL, Node_rule_node, $3);
+ begin_block = node((NODE *) NULL, Node_rule_node, $3);
$$ = NULL;
- io_allowed = 1;
+ io_allowed = TRUE;
yyerrok;
}
- | LEX_END { io_allowed = 0; }
+ | LEX_END { io_allowed = FALSE; }
action
{
- if (end_block) {
+ if (end_block != NULL) {
if (end_block->type != Node_rule_list)
end_block = node(end_block, Node_rule_list,
- (NODE *)NULL);
+ (NODE *) NULL);
(void) append_right (end_block, node(
- node((NODE *)NULL, Node_rule_node, $3),
- Node_rule_list, (NODE *)NULL));
+ node((NODE *) NULL, Node_rule_node, $3),
+ Node_rule_list, (NODE *) NULL));
} else
- end_block = node((NODE *)NULL, Node_rule_node, $3);
+ end_block = node((NODE *) NULL, Node_rule_node, $3);
$$ = NULL;
- io_allowed = 1;
+ io_allowed = TRUE;
yyerrok;
}
| LEX_BEGIN statement_term
@@ -213,12 +229,12 @@ rule
yyerrok;
}
| pattern action
- { $$ = node ($1, Node_rule_node, $2); yyerrok; }
+ { $$ = node($1, Node_rule_node, $2); yyerrok; }
| action
- { $$ = node ((NODE *)NULL, Node_rule_node, $1); yyerrok; }
+ { $$ = node((NODE *) NULL, Node_rule_node, $1); yyerrok; }
| pattern statement_term
{
- $$ = node ($1,
+ $$ = node($1,
Node_rule_node,
node(node(node(make_number(0.0),
Node_field_spec,
@@ -264,7 +280,7 @@ function_prologue
func_name '(' opt_param_list r_paren opt_nls
{
$$ = append_right(make_param($3), $5);
- can_return = 1;
+ can_return = TRUE;
/* check for duplicate parameter names */
if (dup_parms($$))
errcount++;
@@ -275,12 +291,12 @@ function_body
: l_brace statements r_brace opt_semi
{
$$ = $2;
- can_return = 0;
+ can_return = FALSE;
}
| l_brace r_brace opt_semi opt_nls
{
$$ = node((NODE *) NULL, Node_K_return, (NODE *) NULL);
- can_return = 0;
+ can_return = FALSE;
}
;
@@ -289,7 +305,7 @@ pattern
: exp
{ $$ = $1; }
| exp ',' exp
- { $$ = mkrangenode ( node($1, Node_cond_pair, $3) ); }
+ { $$ = mkrangenode(node($1, Node_cond_pair, $3)); }
;
regexp
@@ -308,7 +324,7 @@ regexp
n->type = Node_regex;
len = strlen($3);
n->re_exp = make_string($3, len);
- n->re_reg = make_regexp($3, len, 0, 1);
+ n->re_reg = make_regexp($3, len, FALSE, TRUE);
n->re_text = NULL;
n->re_flags = CONST;
n->re_cnt = 1;
@@ -318,20 +334,24 @@ regexp
action
: l_brace statements r_brace opt_semi opt_nls
- { $$ = $2 ; }
+ { $$ = $2; }
| l_brace r_brace opt_semi opt_nls
{ $$ = NULL; }
;
statements
: statement
- { $$ = $1; }
+ {
+ $$ = $1;
+ if (do_lint && isnoeffect($$->type))
+ warning("statement may have no effect");
+ }
| statements statement
{
if ($1 == NULL || $1->type != Node_statement_list)
- $1 = node($1, Node_statement_list,(NODE *)NULL);
+ $1 = node($1, Node_statement_list, (NODE *) NULL);
$$ = append_right($1,
- node( $2, Node_statement_list, (NODE *)NULL));
+ node($2, Node_statement_list, (NODE *) NULL));
yyerrok;
}
| error
@@ -355,35 +375,36 @@ statement
| if_statement
{ $$ = $1; }
| LEX_WHILE '(' exp r_paren opt_nls statement
- { $$ = node ($3, Node_K_while, $6); }
+ { $$ = node($3, Node_K_while, $6); }
| LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls
- { $$ = node ($6, Node_K_do, $3); }
+ { $$ = node($6, Node_K_do, $3); }
| LEX_FOR '(' NAME LEX_IN NAME r_paren opt_nls statement
{
- $$ = node ($8, Node_K_arrayfor, make_for_loop(variable($3,1),
- (NODE *)NULL, variable($5,1)));
+ $$ = node($8, Node_K_arrayfor,
+ make_for_loop(variable($3, CAN_FREE, Node_var),
+ (NODE *) NULL, variable($5, CAN_FREE, Node_var_array)));
}
| LEX_FOR '(' opt_exp semi exp semi opt_exp r_paren opt_nls statement
{
- $$ = node($10, Node_K_for, (NODE *)make_for_loop($3, $5, $7));
+ $$ = node($10, Node_K_for, (NODE *) make_for_loop($3, $5, $7));
}
| LEX_FOR '(' opt_exp semi semi opt_exp r_paren opt_nls statement
{
- $$ = node ($9, Node_K_for,
- (NODE *)make_for_loop($3, (NODE *)NULL, $6));
+ $$ = node($9, Node_K_for,
+ (NODE *) make_for_loop($3, (NODE *) NULL, $6));
}
| LEX_BREAK statement_term
/* for break, maybe we'll have to remember where to break to */
- { $$ = node ((NODE *)NULL, Node_K_break, (NODE *)NULL); }
+ { $$ = node((NODE *) NULL, Node_K_break, (NODE *) NULL); }
| LEX_CONTINUE statement_term
/* similarly */
- { $$ = node ((NODE *)NULL, Node_K_continue, (NODE *)NULL); }
+ { $$ = node((NODE *) NULL, Node_K_continue, (NODE *) NULL); }
| print '(' expression_list r_paren output_redir statement_term
- { $$ = node ($3, $1, $5); }
+ { $$ = node($3, $1, $5); }
| print opt_rexpression_list output_redir statement_term
{
if ($1 == Node_K_print && $2 == NULL) {
- static int warned = 0;
+ static int warned = FALSE;
$2 = node(node(make_number(0.0),
Node_field_spec,
@@ -392,54 +413,88 @@ statement
(NODE *) NULL);
if (do_lint && ! io_allowed && ! warned) {
- warned = 1;
+ warned = TRUE;
warning(
"plain `print' in BEGIN or END rule should probably be `print \"\"'");
}
}
- $$ = node ($2, $1, $3);
+ $$ = node($2, $1, $3);
}
| LEX_NEXT opt_exp statement_term
{ NODETYPE type;
- if ($2 && $2 == lookup("file")) {
- if (do_lint)
- warning("`next file' is a gawk extension");
- if (do_unix || do_posix) {
- /*
- * can't use yyerror, since may have overshot
- * the source line
- */
- errcount++;
- error("`next file' is a gawk extension");
- }
- if (! io_allowed) {
- /* same thing */
+ if ($2) {
+ if ($2 == lookup("file")) {
+ static int warned = FALSE;
+
+ if (! warned) {
+ warned = TRUE;
+ warning("`next file' is obsolete; use `nextfile'");
+ }
+ if (do_lint)
+ warning("`next file' is a gawk extension");
+ if (do_traditional) {
+ /*
+ * can't use yyerror, since may have overshot
+ * the source line
+ */
+ errcount++;
+ error("`next file' is a gawk extension");
+ }
+ if (! io_allowed) {
+ /* same thing */
+ errcount++;
+ error("`next file' used in BEGIN or END action");
+ }
+ type = Node_K_nextfile;
+ } else {
errcount++;
- error("`next file' used in BEGIN or END action");
+ error("illegal expression after `next'");
+ type = Node_K_next; /* sanity */
}
- type = Node_K_nextfile;
} else {
if (! io_allowed)
- yyerror("next used in BEGIN or END action");
+ yyerror("`next' used in BEGIN or END action");
type = Node_K_next;
+ }
+ $$ = node((NODE *) NULL, type, (NODE *) NULL);
}
- $$ = node ((NODE *)NULL, type, (NODE *)NULL);
+ | LEX_NEXTFILE statement_term
+ {
+ if (do_lint)
+ warning("`nextfile' is a gawk extension");
+ if (do_traditional) {
+ /*
+ * can't use yyerror, since may have overshot
+ * the source line
+ */
+ errcount++;
+ error("`nextfile' is a gawk extension");
+ }
+ if (! io_allowed) {
+ /* same thing */
+ errcount++;
+ error("`nextfile' used in BEGIN or END action");
+ }
+ $$ = node((NODE *) NULL, Node_K_nextfile, (NODE *) NULL);
}
| LEX_EXIT opt_exp statement_term
- { $$ = node ($2, Node_K_exit, (NODE *)NULL); }
+ { $$ = node($2, Node_K_exit, (NODE *) NULL); }
| LEX_RETURN
- { if (! can_return) yyerror("return used outside function context"); }
+ {
+ if (! can_return)
+ yyerror("`return' used outside function context");
+ }
opt_exp statement_term
- { $$ = node ($3, Node_K_return, (NODE *)NULL); }
+ { $$ = node($3, Node_K_return, (NODE *) NULL); }
| LEX_DELETE NAME '[' expression_list ']' statement_term
- { $$ = node (variable($2,1), Node_K_delete, $4); }
+ { $$ = node(variable($2, CAN_FREE, Node_var_array), Node_K_delete, $4); }
| LEX_DELETE NAME statement_term
{
if (do_lint)
warning("`delete array' is a gawk extension");
- if (do_unix || do_posix) {
+ if (do_traditional) {
/*
* can't use yyerror, since may have overshot
* the source line
@@ -447,7 +502,7 @@ statement
errcount++;
error("`delete array' is a gawk extension");
}
- $$ = node (variable($2,1), Node_K_delete, (NODE *) NULL);
+ $$ = node(variable($2, CAN_FREE, Node_var_array), Node_K_delete, (NODE *) NULL);
}
| exp statement_term
{ $$ = $1; }
@@ -464,17 +519,17 @@ if_statement
: LEX_IF '(' exp r_paren opt_nls statement
{
$$ = node($3, Node_K_if,
- node($6, Node_if_branches, (NODE *)NULL));
+ node($6, Node_if_branches, (NODE *) NULL));
}
| LEX_IF '(' exp r_paren opt_nls statement
LEX_ELSE opt_nls statement
- { $$ = node ($3, Node_K_if,
- node ($6, Node_if_branches, $9)); }
+ { $$ = node($3, Node_K_if,
+ node($6, Node_if_branches, $9)); }
;
nls
: NEWLINE
- { want_assign = 0; }
+ { want_assign = FALSE; }
| nls NEWLINE
;
@@ -487,18 +542,18 @@ input_redir
: /* empty */
{ $$ = NULL; }
| '<' simp_exp
- { $$ = node ($2, Node_redirect_input, (NODE *)NULL); }
+ { $$ = node($2, Node_redirect_input, (NODE *) NULL); }
;
output_redir
: /* empty */
{ $$ = NULL; }
| '>' exp
- { $$ = node ($2, Node_redirect_output, (NODE *)NULL); }
+ { $$ = node($2, Node_redirect_output, (NODE *) NULL); }
| APPEND_OP exp
- { $$ = node ($2, Node_redirect_append, (NODE *)NULL); }
+ { $$ = node($2, Node_redirect_append, (NODE *) NULL); }
| '|' exp
- { $$ = node ($2, Node_redirect_pipe, (NODE *)NULL); }
+ { $$ = node($2, Node_redirect_pipe, (NODE *) NULL); }
;
opt_param_list
@@ -538,11 +593,11 @@ opt_rexpression_list
rexpression_list
: rexp
- { $$ = node ($1, Node_expression_list, (NODE *)NULL); }
+ { $$ = node($1, Node_expression_list, (NODE *) NULL); }
| rexpression_list comma rexp
{
$$ = append_right($1,
- node( $3, Node_expression_list, (NODE *)NULL));
+ node($3, Node_expression_list, (NODE *) NULL));
yyerrok;
}
| error
@@ -564,11 +619,11 @@ opt_expression_list
expression_list
: exp
- { $$ = node ($1, Node_expression_list, (NODE *)NULL); }
+ { $$ = node($1, Node_expression_list, (NODE *) NULL); }
| expression_list comma exp
{
$$ = append_right($1,
- node( $3, Node_expression_list, (NODE *)NULL));
+ node($3, Node_expression_list, (NODE *) NULL));
yyerrok;
}
| error
@@ -583,35 +638,35 @@ expression_list
/* Expressions, not including the comma operator. */
exp : variable ASSIGNOP
- { want_assign = 0; }
+ { want_assign = FALSE; }
exp
{
if (do_lint && $4->type == Node_regex)
warning("Regular expression on left of assignment.");
- $$ = node ($1, $2, $4);
+ $$ = node($1, $2, $4);
}
| '(' expression_list r_paren LEX_IN NAME
- { $$ = node (variable($5,1), Node_in_array, $2); }
+ { $$ = node(variable($5, CAN_FREE, Node_var_array), Node_in_array, $2); }
| exp '|' LEX_GETLINE opt_variable
{
- $$ = node ($4, Node_K_getline,
- node ($1, Node_redirect_pipein, (NODE *)NULL));
+ $$ = node($4, Node_K_getline,
+ node($1, Node_redirect_pipein, (NODE *) NULL));
}
| LEX_GETLINE opt_variable input_redir
{
if (do_lint && ! io_allowed && $3 == NULL)
warning("non-redirected getline undefined inside BEGIN or END action");
- $$ = node ($2, Node_K_getline, $3);
+ $$ = node($2, Node_K_getline, $3);
}
| exp LEX_AND exp
- { $$ = node ($1, Node_and, $3); }
+ { $$ = node($1, Node_and, $3); }
| exp LEX_OR exp
- { $$ = node ($1, Node_or, $3); }
+ { $$ = node($1, Node_or, $3); }
| exp MATCHOP exp
{
if ($1->type == Node_regex)
warning("Regular expression on left of MATCH operator.");
- $$ = node ($1, $2, mk_rexp($3));
+ $$ = node($1, $2, mk_rexp($3));
}
| regexp
{
@@ -632,123 +687,128 @@ exp : variable ASSIGNOP
$2);
}
| exp LEX_IN NAME
- { $$ = node (variable($3,1), Node_in_array, $1); }
+ { $$ = node(variable($3, CAN_FREE, Node_var_array), Node_in_array, $1); }
| exp RELOP exp
{
if (do_lint && $3->type == Node_regex)
warning("Regular expression on left of comparison.");
- $$ = node ($1, $2, $3);
+ $$ = node($1, $2, $3);
}
| exp '<' exp
- { $$ = node ($1, Node_less, $3); }
+ { $$ = node($1, Node_less, $3); }
| exp '>' exp
- { $$ = node ($1, Node_greater, $3); }
+ { $$ = node($1, Node_greater, $3); }
| exp '?' exp ':' exp
{ $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5));}
| simp_exp
{ $$ = $1; }
| exp simp_exp %prec CONCAT_OP
- { $$ = node ($1, Node_concat, $2); }
+ { $$ = node($1, Node_concat, $2); }
;
rexp
: variable ASSIGNOP
- { want_assign = 0; }
+ { want_assign = FALSE; }
rexp
- { $$ = node ($1, $2, $4); }
+ { $$ = node($1, $2, $4); }
| rexp LEX_AND rexp
- { $$ = node ($1, Node_and, $3); }
+ { $$ = node($1, Node_and, $3); }
| rexp LEX_OR rexp
- { $$ = node ($1, Node_or, $3); }
+ { $$ = node($1, Node_or, $3); }
| LEX_GETLINE opt_variable input_redir
{
if (do_lint && ! io_allowed && $3 == NULL)
warning("non-redirected getline undefined inside BEGIN or END action");
- $$ = node ($2, Node_K_getline, $3);
+ $$ = node($2, Node_K_getline, $3);
}
| regexp
{ $$ = $1; }
| '!' regexp %prec UNARY
{ $$ = node((NODE *) NULL, Node_nomatch, $2); }
| rexp MATCHOP rexp
- { $$ = node ($1, $2, mk_rexp($3)); }
+ { $$ = node($1, $2, mk_rexp($3)); }
| rexp LEX_IN NAME
- { $$ = node (variable($3,1), Node_in_array, $1); }
+ { $$ = node(variable($3, CAN_FREE, Node_var_array), Node_in_array, $1); }
| rexp RELOP rexp
- { $$ = node ($1, $2, $3); }
+ { $$ = node($1, $2, $3); }
| rexp '?' rexp ':' rexp
{ $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5));}
| simp_exp
{ $$ = $1; }
| rexp simp_exp %prec CONCAT_OP
- { $$ = node ($1, Node_concat, $2); }
+ { $$ = node($1, Node_concat, $2); }
;
simp_exp
: non_post_simp_exp
/* Binary operators in order of decreasing precedence. */
| simp_exp '^' simp_exp
- { $$ = node ($1, Node_exp, $3); }
+ { $$ = node($1, Node_exp, $3); }
| simp_exp '*' simp_exp
- { $$ = node ($1, Node_times, $3); }
+ { $$ = node($1, Node_times, $3); }
| simp_exp '/' simp_exp
- { $$ = node ($1, Node_quotient, $3); }
+ { $$ = node($1, Node_quotient, $3); }
| simp_exp '%' simp_exp
- { $$ = node ($1, Node_mod, $3); }
+ { $$ = node($1, Node_mod, $3); }
| simp_exp '+' simp_exp
- { $$ = node ($1, Node_plus, $3); }
+ { $$ = node($1, Node_plus, $3); }
| simp_exp '-' simp_exp
- { $$ = node ($1, Node_minus, $3); }
+ { $$ = node($1, Node_minus, $3); }
| variable INCREMENT
- { $$ = node ($1, Node_postincrement, (NODE *)NULL); }
+ { $$ = node($1, Node_postincrement, (NODE *) NULL); }
| variable DECREMENT
- { $$ = node ($1, Node_postdecrement, (NODE *)NULL); }
+ { $$ = node($1, Node_postdecrement, (NODE *) NULL); }
;
non_post_simp_exp
: '!' simp_exp %prec UNARY
- { $$ = node ($2, Node_not,(NODE *) NULL); }
+ { $$ = node($2, Node_not, (NODE *) NULL); }
| '(' exp r_paren
{ $$ = $2; }
| LEX_BUILTIN
'(' opt_expression_list r_paren
- { $$ = snode ($3, Node_builtin, (int) $1); }
+ { $$ = snode($3, Node_builtin, (int) $1); }
| LEX_LENGTH '(' opt_expression_list r_paren
- { $$ = snode ($3, Node_builtin, (int) $1); }
+ { $$ = snode($3, Node_builtin, (int) $1); }
| LEX_LENGTH
{
if (do_lint)
warning("call of `length' without parentheses is not portable");
- $$ = snode ((NODE *)NULL, Node_builtin, (int) $1);
+ $$ = snode((NODE *) NULL, Node_builtin, (int) $1);
if (do_posix)
- warning( "call of `length' without parentheses is deprecated by POSIX");
+ warning("call of `length' without parentheses is deprecated by POSIX");
}
| FUNC_CALL '(' opt_expression_list r_paren
{
- $$ = node ($3, Node_func_call, make_string($1, strlen($1)));
+ $$ = node($3, Node_func_call, make_string($1, strlen($1)));
+ func_use($1, FUNC_USE);
+ param_sanity($3);
free($1);
}
| variable
| INCREMENT variable
- { $$ = node ($2, Node_preincrement, (NODE *)NULL); }
+ { $$ = node($2, Node_preincrement, (NODE *) NULL); }
| DECREMENT variable
- { $$ = node ($2, Node_predecrement, (NODE *)NULL); }
+ { $$ = node($2, Node_predecrement, (NODE *) NULL); }
| YNUMBER
{ $$ = $1; }
| YSTRING
{ $$ = $1; }
| '-' simp_exp %prec UNARY
- { if ($2->type == Node_val) {
+ {
+ if ($2->type == Node_val) {
$2->numbr = -(force_number($2));
$$ = $2;
} else
- $$ = node ($2, Node_unary_minus, (NODE *)NULL);
+ $$ = node($2, Node_unary_minus, (NODE *) NULL);
}
| '+' simp_exp %prec UNARY
{
- /* was: $$ = $2 */
- /* POSIX semantics: force a conversion to numeric type */
+ /*
+ * was: $$ = $2
+ * POSIX semantics: force a conversion to numeric type
+ */
$$ = node (make_number(0.0), Node_plus, $2);
}
;
@@ -762,17 +822,17 @@ opt_variable
variable
: NAME
- { $$ = variable($1,1); }
+ { $$ = variable($1, CAN_FREE, Node_var); }
| NAME '[' expression_list ']'
{
if ($3->rnode == NULL) {
- $$ = node (variable($1,1), Node_subscript, $3->lnode);
+ $$ = node(variable($1, CAN_FREE, Node_var_array), Node_subscript, $3->lnode);
freenode($3);
} else
- $$ = node (variable($1,1), Node_subscript, $3);
+ $$ = node(variable($1, CAN_FREE, Node_var_array), Node_subscript, $3);
}
| '$' non_post_simp_exp
- { $$ = node ($2, Node_field_spec, (NODE *)NULL); }
+ { $$ = node($2, Node_field_spec, (NODE *) NULL); }
;
l_brace
@@ -793,7 +853,7 @@ opt_semi
;
semi
- : ';' { yyerrok; want_assign = 0; }
+ : ';' { yyerrok; want_assign = FALSE; }
;
comma : ',' opt_nls { yyerrok; }
@@ -812,7 +872,8 @@ struct token {
# define NOT_OLD 0x0100 /* feature not in old awk */
# define NOT_POSIX 0x0200 /* feature not in POSIX */
# define GAWKX 0x0400 /* gawk extension */
- NODE *(*ptr) (); /* function that implements this keyword */
+# define RESX 0x0800 /* Bell Labs Research extension */
+ NODE *(*ptr)(); /* function that implements this keyword */
};
extern NODE
@@ -821,7 +882,8 @@ extern NODE
*do_split(), *do_system(), *do_int(), *do_close(),
*do_atan2(), *do_sin(), *do_cos(), *do_rand(),
*do_srand(), *do_match(), *do_tolower(), *do_toupper(),
- *do_sub(), *do_gsub(), *do_strftime(), *do_systime();
+ *do_sub(), *do_gsub(), *do_strftime(), *do_systime(),
+ *do_fflush();
/* Tokentab is sorted ascii ascending order, so it can be binary searched. */
@@ -838,9 +900,11 @@ static struct token tokentab[] = {
{"else", Node_illegal, LEX_ELSE, 0, 0},
{"exit", Node_K_exit, LEX_EXIT, 0, 0},
{"exp", Node_builtin, LEX_BUILTIN, A(1), do_exp},
+{"fflush", Node_builtin, LEX_BUILTIN, RESX|A(0)|A(1), do_fflush},
{"for", Node_K_for, LEX_FOR, 0, 0},
{"func", Node_K_function, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0},
{"function", Node_K_function, LEX_FUNCTION, NOT_OLD, 0},
+{"gensub", Node_builtin, LEX_BUILTIN, GAWKX|A(3)|A(4), do_gensub},
{"getline", Node_K_getline, LEX_GETLINE, NOT_OLD, 0},
{"gsub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_gsub},
{"if", Node_K_if, LEX_IF, 0, 0},
@@ -851,6 +915,7 @@ static struct token tokentab[] = {
{"log", Node_builtin, LEX_BUILTIN, A(1), do_log},
{"match", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_match},
{"next", Node_K_next, LEX_NEXT, 0, 0},
+{"nextfile", Node_K_nextfile, LEX_NEXTFILE, GAWKX, 0},
{"print", Node_K_print, LEX_PRINT, 0, 0},
{"printf", Node_K_printf, LEX_PRINTF, 0, 0},
{"rand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0), do_rand},
@@ -860,7 +925,7 @@ static struct token tokentab[] = {
{"sprintf", Node_builtin, LEX_BUILTIN, 0, do_sprintf},
{"sqrt", Node_builtin, LEX_BUILTIN, A(1), do_sqrt},
{"srand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), do_srand},
-{"strftime", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_strftime},
+{"strftime", Node_builtin, LEX_BUILTIN, GAWKX|A(0)|A(1)|A(2), do_strftime},
{"sub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_sub},
{"substr", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_substr},
{"system", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system},
@@ -870,10 +935,17 @@ static struct token tokentab[] = {
{"while", Node_K_while, LEX_WHILE, 0, 0},
};
+/* yyerror --- print a syntax error message, show where */
+
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+static void
+yyerror(const char *m, ...)
+#else
/* VARARGS0 */
static void
yyerror(va_alist)
va_dcl
+#endif
{
va_list args;
const char *mesg = NULL;
@@ -885,13 +957,13 @@ va_dcl
errcount++;
/* Find the current line in the input file */
if (lexptr && lexeme) {
- if (!thisline) {
+ if (thisline == NULL) {
cp = lexeme;
if (*cp == '\n') {
cp--;
mesg = "unexpected newline";
}
- for ( ; cp != lexptr_begin && *cp != '\n'; --cp)
+ for (; cp != lexptr_begin && *cp != '\n'; --cp)
continue;
if (*cp == '\n')
cp++;
@@ -908,7 +980,7 @@ va_dcl
msg("%.*s", (int) (bp - thisline), thisline);
bp = buf;
cp = buf + sizeof(buf) - 24; /* 24 more than longest msg. input */
- if (lexptr) {
+ if (lexptr != NULL) {
scan = thisline;
while (bp < cp && scan < lexeme)
if (*scan++ == '\t')
@@ -918,26 +990,36 @@ va_dcl
*bp++ = '^';
*bp++ = ' ';
}
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+ va_start(args, m);
+ if (mesg == NULL)
+ mesg = m;
+#else
va_start(args);
if (mesg == NULL)
mesg = va_arg(args, char *);
+#endif
strcpy(bp, mesg);
err("", buf, args);
va_end(args);
exit(2);
}
+/* get_src_buf --- read the next buffer of source program */
+
static char *
get_src_buf()
{
- static int samefile = 0;
+ static int samefile = FALSE;
static int nextfile = 0;
static char *buf = NULL;
static int fd;
int n;
register char *scan;
static int len = 0;
- static int did_newline = 0;
+ static int did_newline = FALSE;
+ struct stat sbuf;
+
# define SLOP 128 /* enough space to hold most source lines */
again:
@@ -953,13 +1035,19 @@ again:
* gawk '' /path/name
* Sigh.
*/
+ static int warned = FALSE;
+
+ if (do_lint && ! warned) {
+ warned = TRUE;
+ warning("empty program text on command line");
+ }
++nextfile;
goto again;
}
sourceline = 1;
lexptr = lexptr_begin = srcfiles[nextfile].val;
lexend = lexptr + len;
- } else if (!did_newline && *(lexptr-1) != '\n') {
+ } else if (! did_newline && *(lexptr-1) != '\n') {
/*
* The following goop is to ensure that the source
* ends with a newline and that the entire current
@@ -967,7 +1055,7 @@ again:
*/
int offset;
- did_newline = 1;
+ did_newline = TRUE;
offset = lexptr - lexeme;
for (scan = lexeme; scan > lexptr_begin; scan--)
if (*scan == '\n') {
@@ -988,13 +1076,13 @@ again:
lexeme = lexptr = lexptr_begin = NULL;
}
if (lexptr == NULL && ++nextfile <= numfiles)
- return get_src_buf();
+ goto again;
return lexptr;
}
- if (!samefile) {
+ if (! samefile) {
source = srcfiles[nextfile].val;
if (source == NULL) {
- if (buf) {
+ if (buf != NULL) {
free(buf);
buf = NULL;
}
@@ -1002,15 +1090,32 @@ again:
return lexeme = lexptr = lexptr_begin = NULL;
}
fd = pathopen(source);
- if (fd == -1)
+ if (fd <= INVALID_HANDLE) {
+ char *in;
+
+ /* suppress file name and line no. in error mesg */
+ in = source;
+ source = NULL;
fatal("can't open source file \"%s\" for reading (%s)",
- source, strerror(errno));
- len = optimal_bufsize(fd);
- if (buf)
+ in, strerror(errno));
+ }
+ len = optimal_bufsize(fd, & sbuf);
+ if (sbuf.st_size == 0) {
+ static int warned = FALSE;
+
+ if (do_lint && ! warned) {
+ warned = TRUE;
+ warning("source file `%s' is empty", source);
+ }
+ close(fd);
+ ++nextfile;
+ goto again;
+ }
+ if (buf != NULL)
free(buf);
emalloc(buf, char *, len + SLOP, "get_src_buf");
lexptr_begin = buf + SLOP;
- samefile = 1;
+ samefile = TRUE;
sourceline = 1;
} else {
/*
@@ -1039,7 +1144,8 @@ again:
fatal("can't read sourcefile \"%s\" (%s)",
source, strerror(errno));
if (n == 0) {
- samefile = 0;
+ close(fd);
+ samefile = FALSE;
nextfile++;
if (lexeme)
*lexeme = '\0';
@@ -1051,8 +1157,12 @@ again:
return buf;
}
+/* tokadd --- add a character to the token buffer */
+
#define tokadd(x) (*tok++ = (x), tok == tokend ? tokexpand() : tok)
+/* tokexpand --- grow the token buffer */
+
char *
tokexpand()
{
@@ -1061,7 +1171,7 @@ tokexpand()
tokoffset = tok - tokstart;
toksize *= 2;
- if (tokstart)
+ if (tokstart != NULL)
erealloc(tokstart, char *, toksize, "tokexpand");
else
emalloc(tokstart, char *, toksize, "tokexpand");
@@ -1070,9 +1180,12 @@ tokexpand()
return tok;
}
+/* nextc --- get the next input character */
+
#if DEBUG
-char
-nextc() {
+int
+nextc()
+{
int c;
if (lexptr && lexptr < lexend)
@@ -1080,40 +1193,67 @@ nextc() {
else if (get_src_buf())
c = *lexptr++;
else
- c = '\0';
+ c = EOF;
return c;
}
#else
#define nextc() ((lexptr && lexptr < lexend) ? \
*lexptr++ : \
- (get_src_buf() ? *lexptr++ : '\0') \
+ (get_src_buf() ? *lexptr++ : EOF) \
)
#endif
+
+/* pushback --- push a character back on the input */
+
#define pushback() (lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr)
-/*
- * Read the input and turn it into tokens.
- */
+/* allow_newline --- allow newline after &&, ||, ? and : */
+
+static void
+allow_newline()
+{
+ int c;
+
+ for (;;) {
+ c = nextc();
+ if (c == EOF)
+ break;
+ if (c == '#') {
+ while ((c = nextc()) != '\n' && c != EOF)
+ continue;
+ if (c == EOF)
+ break;
+ }
+ if (c == '\n')
+ sourceline++;
+ if (! isspace(c)) {
+ pushback();
+ break;
+ }
+ }
+}
+
+/* yylex --- Read the input and turn it into tokens. */
static int
yylex()
{
- register int c;
- int seen_e = 0; /* These are for numbers */
- int seen_point = 0;
+ register int c, c1;
+ int seen_e = FALSE; /* These are for numbers */
+ int seen_point = FALSE;
int esc_seen; /* for literal strings */
int low, mid, high;
- static int did_newline = 0;
+ static int did_newline = FALSE;
char *tokkey;
- static int lasttok = 0, eof_warned = 0;
+ static int lasttok = 0, eof_warned = FALSE;
- if (!nextc()) {
+ if (nextc() == EOF) {
if (lasttok != NEWLINE) {
lasttok = NEWLINE;
if (do_lint && ! eof_warned) {
warning("source file does not end in newline");
- eof_warned = 1;
+ eof_warned = TRUE;
}
return NEWLINE; /* fake it */
}
@@ -1133,29 +1273,59 @@ yylex()
lexeme = lexptr;
thisline = NULL;
if (want_regexp) {
- int in_brack = 0;
+ int in_brack = 0; /* count brackets, [[:alnum:]] allowed */
+ /*
+ * Counting brackets is non-trivial. [[] is ok,
+ * and so is [\]], with a point being that /[/]/ as a regexp
+ * constant has to work.
+ *
+ * Do not count [ or ] if either one is preceded by a \.
+ * A `[' should be counted if
+ * a) it is the first one so far (in_brack == 0)
+ * b) it is the `[' in `[:'
+ * A ']' should be counted if not preceded by a \, since
+ * it is either closing `:]' or just a plain list.
+ * According to POSIX, []] is how you put a ] into a set.
+ * Try to handle that too.
+ *
+ * The code for \ handles \[ and \].
+ */
- want_regexp = 0;
+ want_regexp = FALSE;
tok = tokstart;
- while ((c = nextc()) != 0) {
+ for (;;) {
+ c = nextc();
switch (c) {
case '[':
- in_brack = 1;
+ /* one day check for `.' and `=' too */
+ if ((c1 = nextc()) == ':' || in_brack == 0)
+ in_brack++;
+ pushback();
break;
case ']':
- in_brack = 0;
+ if (tokstart[0] == '['
+ && (tok == tokstart + 1
+ || (tok == tokstart + 2
+ && tokstart[1] == '^')))
+ /* do nothing */;
+ else
+ in_brack--;
break;
case '\\':
- if ((c = nextc()) == '\0') {
+ if ((c = nextc()) == EOF) {
yyerror("unterminated regexp ends with \\ at end of file");
+ return lasttok = REGEXP; /* kludge */
} else if (c == '\n') {
sourceline++;
continue;
- } else
+ } else {
tokadd('\\');
+ tokadd(c);
+ continue;
+ }
break;
case '/': /* end of the regexp */
- if (in_brack)
+ if (in_brack > 0)
break;
pushback();
@@ -1165,8 +1335,10 @@ yylex()
case '\n':
pushback();
yyerror("unterminated regexp");
- case '\0':
+ return lasttok = REGEXP; /* kludge */
+ case EOF:
yyerror("unterminated regexp at end of file");
+ return lasttok = REGEXP; /* kludge */
}
tokadd(c);
}
@@ -1181,12 +1353,12 @@ retry:
yylval.nodetypeval = Node_illegal;
switch (c) {
- case 0:
+ case EOF:
if (lasttok != NEWLINE) {
lasttok = NEWLINE;
if (do_lint && ! eof_warned) {
warning("source file does not end in newline");
- eof_warned = 1;
+ eof_warned = TRUE;
}
return NEWLINE; /* fake it */
}
@@ -1198,13 +1370,13 @@ retry:
case '#': /* it's a comment */
while ((c = nextc()) != '\n') {
- if (c == '\0') {
+ if (c == EOF) {
if (lasttok != NEWLINE) {
lasttok = NEWLINE;
if (do_lint && ! eof_warned) {
warning(
"source file does not end in newline");
- eof_warned = 1;
+ eof_warned = TRUE;
}
return NEWLINE; /* fake it */
}
@@ -1222,14 +1394,18 @@ retry:
* Use it at your own risk. We think it's a bad idea, which
* is why it's not on by default.
*/
- if (!do_unix) {
+ if (! do_traditional) {
/* strip trailing white-space and/or comment */
while ((c = nextc()) == ' ' || c == '\t')
continue;
- if (c == '#')
+ if (c == '#') {
+ if (do_lint)
+ warning(
+ "use of `\\ #...' line continuation is not portable");
while ((c = nextc()) != '\n')
- if (c == '\0')
+ if (c == EOF)
break;
+ }
pushback();
}
#endif /* RELAXED_CONTINUATION */
@@ -1241,16 +1417,18 @@ retry:
break;
case '$':
- want_assign = 1;
+ want_assign = TRUE;
return lasttok = '$';
+ case ':':
+ case '?':
+ allow_newline();
+ /* fall through */
case ')':
case ']':
case '(':
case '[':
case ';':
- case ':':
- case '?':
case '{':
case ',':
return lasttok = c;
@@ -1264,20 +1442,22 @@ retry:
return lasttok = '*';
} else if (c == '*') {
/* make ** and **= aliases for ^ and ^= */
- static int did_warn_op = 0, did_warn_assgn = 0;
+ static int did_warn_op = FALSE, did_warn_assgn = FALSE;
if (nextc() == '=') {
if (do_lint && ! did_warn_assgn) {
- did_warn_assgn = 1;
+ did_warn_assgn = TRUE;
warning("**= is not allowed by POSIX");
+ warning("operator `**=' is not supported in old awk");
}
yylval.nodetypeval = Node_assign_exp;
- return lasttok = ASSIGNOP;
+ return ASSIGNOP;
} else {
pushback();
if (do_lint && ! did_warn_op) {
- did_warn_op = 1;
+ did_warn_op = TRUE;
warning("** is not allowed by POSIX");
+ warning("operator `**' is not supported in old awk");
}
return lasttok = '^';
}
@@ -1305,12 +1485,11 @@ retry:
case '^':
{
- static int did_warn_op = 0, did_warn_assgn = 0;
+ static int did_warn_op = FALSE, did_warn_assgn = FALSE;
if (nextc() == '=') {
-
if (do_lint && ! did_warn_assgn) {
- did_warn_assgn = 1;
+ did_warn_assgn = TRUE;
warning("operator `^=' is not supported in old awk");
}
yylval.nodetypeval = Node_assign_exp;
@@ -1318,7 +1497,7 @@ retry:
}
pushback();
if (do_lint && ! did_warn_op) {
- did_warn_op = 1;
+ did_warn_op = TRUE;
warning("operator `^' is not supported in old awk");
}
return lasttok = '^';
@@ -1341,7 +1520,7 @@ retry:
}
if (c == '~') {
yylval.nodetypeval = Node_nomatch;
- want_assign = 0;
+ want_assign = FALSE;
return lasttok = MATCHOP;
}
pushback();
@@ -1379,16 +1558,16 @@ retry:
case '~':
yylval.nodetypeval = Node_match;
- want_assign = 0;
+ want_assign = FALSE;
return lasttok = MATCHOP;
case '}':
/*
* Added did newline stuff. Easier than
- * hacking the grammar
+ * hacking the grammar.
*/
if (did_newline) {
- did_newline = 0;
+ did_newline = FALSE;
return lasttok = c;
}
did_newline++;
@@ -1396,7 +1575,7 @@ retry:
return lasttok = NEWLINE;
case '"':
- esc_seen = 0;
+ esc_seen = FALSE;
while ((c = nextc()) != '"') {
if (c == '\n') {
pushback();
@@ -1408,10 +1587,10 @@ retry:
sourceline++;
continue;
}
- esc_seen = 1;
+ esc_seen = TRUE;
tokadd('\\');
}
- if (c == '\0') {
+ if (c == EOF) {
pushback();
yyerror("unterminated string");
}
@@ -1435,10 +1614,11 @@ retry:
case '.':
c = nextc();
pushback();
- if (!isdigit(c))
+ if (! isdigit(c))
return lasttok = '.';
else
- c = '.'; /* FALL THROUGH */
+ c = '.';
+ /* FALL THROUGH */
case '0':
case '1':
case '2':
@@ -1451,24 +1631,24 @@ retry:
case '9':
/* It's a number */
for (;;) {
- int gotnumber = 0;
+ int gotnumber = FALSE;
tokadd(c);
switch (c) {
case '.':
if (seen_point) {
- gotnumber++;
+ gotnumber = TRUE;
break;
}
- ++seen_point;
+ seen_point = TRUE;
break;
case 'e':
case 'E':
if (seen_e) {
- gotnumber++;
+ gotnumber = TRUE;
break;
}
- ++seen_e;
+ seen_e = TRUE;
if ((c = nextc()) == '-' || c == '+')
tokadd(c);
else
@@ -1486,17 +1666,17 @@ retry:
case '9':
break;
default:
- gotnumber++;
+ gotnumber = TRUE;
}
if (gotnumber)
break;
c = nextc();
}
- if (c != 0)
+ if (c != EOF)
pushback();
else if (do_lint && ! eof_warned) {
warning("source file does not end in newline");
- eof_warned = 1;
+ eof_warned = TRUE;
}
tokadd('\0');
yylval.nodeval = make_number(atof(tokstart));
@@ -1506,24 +1686,8 @@ retry:
case '&':
if ((c = nextc()) == '&') {
yylval.nodetypeval = Node_and;
- for (;;) {
- c = nextc();
- if (c == '\0')
- break;
- if (c == '#') {
- while ((c = nextc()) != '\n' && c != '\0')
- continue;
- if (c == '\0')
- break;
- }
- if (c == '\n')
- sourceline++;
- if (! isspace(c)) {
- pushback();
- break;
- }
- }
- want_assign = 0;
+ allow_newline();
+ want_assign = FALSE;
return lasttok = LEX_AND;
}
pushback();
@@ -1532,24 +1696,8 @@ retry:
case '|':
if ((c = nextc()) == '|') {
yylval.nodetypeval = Node_or;
- for (;;) {
- c = nextc();
- if (c == '\0')
- break;
- if (c == '#') {
- while ((c = nextc()) != '\n' && c != '\0')
- continue;
- if (c == '\0')
- break;
- }
- if (c == '\n')
- sourceline++;
- if (! isspace(c)) {
- pushback();
- break;
- }
- }
- want_assign = 0;
+ allow_newline();
+ want_assign = FALSE;
return lasttok = LEX_OR;
}
pushback();
@@ -1559,7 +1707,7 @@ retry:
if (c != '_' && ! isalpha(c))
yyerror("Invalid char '%c' in expression\n", c);
- /* it's some type of name-type-thing. Find its length */
+ /* it's some type of name-type-thing. Find its length. */
tok = tokstart;
while (is_identchar(c)) {
tokadd(c);
@@ -1568,40 +1716,43 @@ retry:
tokadd('\0');
emalloc(tokkey, char *, tok - tokstart, "yylex");
memcpy(tokkey, tokstart, tok - tokstart);
- if (c != 0)
+ if (c != EOF)
pushback();
else if (do_lint && ! eof_warned) {
warning("source file does not end in newline");
- eof_warned = 1;
+ eof_warned = TRUE;
}
- /* See if it is a special token. */
+ /* See if it is a special token. */
low = 0;
- high = (sizeof (tokentab) / sizeof (tokentab[0])) - 1;
+ high = (sizeof(tokentab) / sizeof(tokentab[0])) - 1;
while (low <= high) {
- int i/* , c */;
+ int i;
mid = (low + high) / 2;
c = *tokstart - tokentab[mid].operator[0];
- i = c ? c : strcmp (tokstart, tokentab[mid].operator);
+ i = c ? c : strcmp(tokstart, tokentab[mid].operator);
- if (i < 0) { /* token < mid */
+ if (i < 0) /* token < mid */
high = mid - 1;
- } else if (i > 0) { /* token > mid */
+ else if (i > 0) /* token > mid */
low = mid + 1;
- } else {
+ else {
if (do_lint) {
if (tokentab[mid].flags & GAWKX)
warning("%s() is a gawk extension",
tokentab[mid].operator);
+ if (tokentab[mid].flags & RESX)
+ warning("%s() is a Bell Labs extension",
+ tokentab[mid].operator);
if (tokentab[mid].flags & NOT_POSIX)
warning("POSIX does not allow %s",
tokentab[mid].operator);
- if (tokentab[mid].flags & NOT_OLD)
- warning("%s is not supported in old awk",
- tokentab[mid].operator);
}
- if ((do_unix && (tokentab[mid].flags & GAWKX))
+ if (do_lint_old && (tokentab[mid].flags & NOT_OLD))
+ warning("%s is not supported in old awk",
+ tokentab[mid].operator);
+ if ((do_traditional && (tokentab[mid].flags & GAWKX))
|| (do_posix && (tokentab[mid].flags & NOT_POSIX)))
break;
if (tokentab[mid].class == LEX_BUILTIN
@@ -1620,11 +1771,13 @@ retry:
if (*lexptr == '(')
return lasttok = FUNC_CALL;
else {
- want_assign = 1;
+ want_assign = TRUE;
return lasttok = NAME;
}
}
+/* node_common --- common code for allocating a new node */
+
static NODE *
node_common(op)
NODETYPE op;
@@ -1643,9 +1796,8 @@ NODETYPE op;
return r;
}
-/*
- * This allocates a node with defined lnode and rnode.
- */
+/* node --- allocates a node with defined lnode and rnode. */
+
NODE *
node(left, op, right)
NODE *left, *right;
@@ -1659,10 +1811,10 @@ NODETYPE op;
return r;
}
-/*
- * This allocates a node with defined subnode and proc for builtin functions
- * Checks for arg. count and supplies defaults where possible.
- */
+/* snode --- allocate a node with defined subnode and proc for builtin
+ functions. Checks for arg. count and supplies defaults where
+ possible. */
+
static NODE *
snode(subn, op, idx)
NODETYPE op;
@@ -1677,7 +1829,7 @@ NODE *subn;
r = node_common(op);
/* traverse expression list to see how many args. given */
- for (n= subn; n; n= n->rnode) {
+ for (n = subn; n != NULL; n = n->rnode) {
nexp++;
if (nexp > 3)
break;
@@ -1685,7 +1837,7 @@ NODE *subn;
/* check against how many args. are allowed for this builtin */
args_allowed = tokentab[idx].flags & ARGS;
- if (args_allowed && !(args_allowed & A(nexp)))
+ if (args_allowed && (args_allowed & A(nexp)) == 0)
fatal("%s() cannot have %d argument%c",
tokentab[idx].operator, nexp, nexp == 1 ? ' ' : 's');
@@ -1693,7 +1845,7 @@ NODE *subn;
/* special case processing for a few builtins */
if (nexp == 0 && r->proc == do_length) {
- subn = node(node(make_number(0.0),Node_field_spec,(NODE *)NULL),
+ subn = node(node(make_number(0.0), Node_field_spec, (NODE *) NULL),
Node_expression_list,
(NODE *) NULL);
} else if (r->proc == do_match) {
@@ -1710,6 +1862,15 @@ NODE *subn;
(NODE *) NULL));
else if (do_lint && subn->rnode->rnode->lnode->type == Node_val)
warning("string literal as last arg of substitute");
+ } else if (r->proc == do_gensub) {
+ if (subn->lnode->type != Node_regex)
+ subn->lnode = mk_rexp(subn->lnode);
+ if (nexp == 3)
+ append_right(subn, node(node(make_number(0.0),
+ Node_field_spec,
+ (NODE *) NULL),
+ Node_expression_list,
+ (NODE *) NULL));
} else if (r->proc == do_split) {
if (nexp == 2)
append_right(subn,
@@ -1726,11 +1887,13 @@ NODE *subn;
}
/*
+ * mkrangenode:
* This allocates a Node_line_range node with defined condpair and
* zeroes the trigger word to avoid the temptation of assuming that calling
* 'node( foo, Node_line_range, 0)' will properly initialize 'triggered'.
+ * Otherwise like node().
*/
-/* Otherwise like node() */
+
static NODE *
mkrangenode(cpair)
NODE *cpair;
@@ -1740,11 +1903,12 @@ NODE *cpair;
getnode(r);
r->type = Node_line_range;
r->condpair = cpair;
- r->triggered = 0;
+ r->triggered = FALSE;
return r;
}
-/* Build a for loop */
+/* make_for_loop --- build a for loop */
+
static NODE *
make_for_loop(init, cond, incr)
NODE *init, *cond, *incr;
@@ -1762,10 +1926,52 @@ NODE *init, *cond, *incr;
return n;
}
+/* dup_parms --- return TRUE if there are duplicate parameters */
+
+static int
+dup_parms(func)
+NODE *func;
+{
+ register NODE *np;
+ char *fname, **names;
+ int count, i, j, dups;
+ NODE *params;
+
+ fname = func->param;
+ count = func->param_cnt;
+ params = func->rnode;
+
+ if (count == 0) /* no args, no problem */
+ return FALSE;
+
+ emalloc(names, char **, count * sizeof(char *), "dup_parms");
+
+ i = 0;
+ for (np = params; np != NULL; np = np->rnode)
+ names[i++] = np->param;
+
+ dups = 0;
+ for (i = 1; i < count; i++) {
+ for (j = 0; j < i; j++) {
+ if (strcmp(names[i], names[j]) == 0) {
+ dups++;
+ error(
+ "function `%s': parameter #%d, `%s', duplicates parameter #%d",
+ fname, i+1, names[j], j+1);
+ }
+ }
+ }
+
+ free(names);
+ return (dups > 0 ? TRUE : FALSE);
+}
+
/*
+ * install:
* Install a name in the symbol table, even if it is already there.
* Caller must check against redefinition if that is desired.
*/
+
NODE *
install(name, value)
char *name;
@@ -1788,7 +1994,8 @@ NODE *value;
return hp->hvalue;
}
-/* find the most recent hash node for name installed by install */
+/* lookup --- find the most recent hash node for name installed by install */
+
NODE *
lookup(name)
const char *name;
@@ -1797,19 +2004,20 @@ const char *name;
register size_t len;
len = strlen(name);
- bucket = variables[hash(name, len, (unsigned long) HASHSIZE)];
- while (bucket) {
+ for (bucket = variables[hash(name, len, (unsigned long) HASHSIZE)];
+ bucket != NULL; bucket = bucket->hnext)
if (bucket->hlength == len && STREQN(bucket->hname, name, len))
return bucket->hvalue;
- bucket = bucket->hnext;
- }
+
return NULL;
}
/*
+ * append_right:
* Add new to the rightmost branch of LIST. This uses n^2 time, so we make
* a simple attempt at optimizing it.
*/
+
static NODE *
append_right(list, new)
NODE *list, *new;
@@ -1829,50 +2037,13 @@ NODE *list, *new;
return oldlist;
}
-/* return 1 if there are duplicate parameters, 0 means all ok */
-static int
-dup_parms(func)
-NODE *func;
-{
- register NODE *np;
- char *fname, **names;
- int count, i, j, dups;
- NODE *params;
-
- fname = func->param;
- count = func->param_cnt;
- params = func->rnode;
-
- if (count == 0) /* no args, no problem */
- return 0;
-
- emalloc(names, char **, count * sizeof(char *), "dup_parms");
-
- i = 0;
- for (np = params; np != NULL; np = np->rnode)
- names[i++] = np->param;
-
- dups = 0;
- for (i = 1; i < count; i++) {
- for (j = 0; j < i; j++) {
- if (strcmp(names[i], names[j]) == 0) {
- dups++;
- error(
- "function `%s': parameter #%d, `%s', duplicates parameter #%d",
- fname, i+1, names[j], j+1);
- }
- }
- }
-
- free(names);
- return (dups > 0);
-}
-
/*
+ * func_install:
* check if name is already installed; if so, it had better have Null value,
* in which case def is added as the value. Otherwise, install name with def
* as value.
*/
+
static void
func_install(params, def)
NODE *params;
@@ -1881,14 +2052,18 @@ NODE *def;
NODE *r;
pop_params(params->rnode);
- pop_var(params, 0);
+ pop_var(params, FALSE);
r = lookup(params->param);
if (r != NULL) {
fatal("function name `%s' previously defined", params->param);
} else
(void) install(params->param, node(params, Node_func, def));
+
+ func_use(params->param, FUNC_DEFINE);
}
+/* pop_var --- remove a variable from the symbol table */
+
static void
pop_var(np, freeit)
NODE *np;
@@ -1901,7 +2076,7 @@ int freeit;
name = np->param;
len = strlen(name);
save = &(variables[hash(name, len, (unsigned long) HASHSIZE)]);
- for (bucket = *save; bucket; bucket = bucket->hnext) {
+ for (bucket = *save; bucket != NULL; bucket = bucket->hnext) {
if (len == bucket->hlength && STREQN(bucket->hname, name, len)) {
*save = bucket->hnext;
freenode(bucket);
@@ -1913,6 +2088,8 @@ int freeit;
}
}
+/* pop_params --- remove list of function parameters from symbol table */
+
/*
* pop parameters out of the symbol table. do this in reverse order to
* avoid reading freed memory if there were duplicated parameters.
@@ -1924,9 +2101,11 @@ NODE *params;
if (params == NULL)
return;
pop_params(params->rnode);
- pop_var(params, 1);
+ pop_var(params, TRUE);
}
+/* make_param --- make NAME into a function parameter */
+
static NODE *
make_param(name)
char *name;
@@ -1941,42 +2120,201 @@ char *name;
return (install(name, r));
}
-/* Name points to a variable name. Make sure it's in the symbol table */
+static struct fdesc {
+ char *name;
+ short used;
+ short defined;
+ struct fdesc *next;
+} *ftable[HASHSIZE];
+
+/* func_use --- track uses and definitions of functions */
+
+static void
+func_use(name, how)
+char *name;
+enum defref how;
+{
+ struct fdesc *fp;
+ int len;
+ int ind;
+
+ len = strlen(name);
+ ind = hash(name, len, HASHSIZE);
+
+ for (fp = ftable[ind]; fp != NULL; fp = fp->next) {
+ if (strcmp(fp->name, name) == 0) {
+ if (how == FUNC_DEFINE)
+ fp->defined++;
+ else
+ fp->used++;
+ return;
+ }
+ }
+
+ /* not in the table, fall through to allocate a new one */
+
+ emalloc(fp, struct fdesc *, sizeof(struct fdesc), "func_use");
+ memset(fp, '\0', sizeof(struct fdesc));
+ emalloc(fp->name, char *, len + 1, "func_use");
+ strcpy(fp->name, name);
+ if (how == FUNC_DEFINE)
+ fp->defined++;
+ else
+ fp->used++;
+ fp->next = ftable[ind];
+ ftable[ind] = fp;
+}
+
+/* check_funcs --- verify functions that are called but not defined */
+
+static void
+check_funcs()
+{
+ struct fdesc *fp, *next;
+ int i;
+
+ for (i = 0; i < HASHSIZE; i++) {
+ for (fp = ftable[i]; fp != NULL; fp = fp->next) {
+#ifdef REALLYMEAN
+ /* making this the default breaks old code. sigh. */
+ if (fp->defined == 0) {
+ error(
+ "function `%s' called but never defined", fp->name);
+ errcount++;
+ }
+#else
+ if (do_lint && fp->defined == 0)
+ warning(
+ "function `%s' called but never defined", fp->name);
+#endif
+ if (do_lint && fp->used == 0) {
+ warning("function `%s' defined but never called",
+ fp->name);
+ }
+ }
+ }
+
+ /* now let's free all the memory */
+ for (i = 0; i < HASHSIZE; i++) {
+ for (fp = ftable[i]; fp != NULL; fp = next) {
+ next = fp->next;
+ free(fp->name);
+ free(fp);
+ }
+ }
+}
+
+/* param_sanity --- look for parameters that are regexp constants */
+
+static void
+param_sanity(arglist)
+NODE *arglist;
+{
+ NODE *argp, *arg;
+ int i;
+
+ for (i = 1, argp = arglist; argp != NULL; argp = argp->rnode, i++) {
+ arg = argp->lnode;
+ if (arg->type == Node_regex)
+ warning("regexp constant for parameter #%d yields boolean value", i);
+ }
+}
+
+/* variable --- make sure NAME is in the symbol table */
+
NODE *
-variable(name, can_free)
+variable(name, can_free, type)
char *name;
int can_free;
+NODETYPE type;
{
register NODE *r;
- static int env_loaded = 0;
+ static int env_loaded = FALSE;
- if (!env_loaded && STREQ(name, "ENVIRON")) {
+ if (! env_loaded && STREQ(name, "ENVIRON")) {
load_environ();
- env_loaded = 1;
+ env_loaded = TRUE;
}
if ((r = lookup(name)) == NULL)
- r = install(name, node(Nnull_string, Node_var, (NODE *) NULL));
+ r = install(name, node(Nnull_string, type, (NODE *) NULL));
else if (can_free)
free(name);
return r;
}
+/* mk_rexp --- make a regular expression constant */
+
static NODE *
mk_rexp(exp)
NODE *exp;
{
+ NODE *n;
+
if (exp->type == Node_regex)
return exp;
- else {
- NODE *n;
-
- getnode(n);
- n->type = Node_regex;
- n->re_exp = exp;
- n->re_text = NULL;
- n->re_reg = NULL;
- n->re_flags = 0;
- n->re_cnt = 1;
- return n;
+
+ getnode(n);
+ n->type = Node_regex;
+ n->re_exp = exp;
+ n->re_text = NULL;
+ n->re_reg = NULL;
+ n->re_flags = 0;
+ n->re_cnt = 1;
+ return n;
+}
+
+/* isnoeffect --- when used as a statement, has no side effects */
+
+/*
+ * To be completely general, we should recursively walk the parse
+ * tree, to make sure that all the subexpressions also have no effect.
+ * Instead, we just weaken the actual warning that's printed, up above
+ * in the grammar.
+ */
+
+static int
+isnoeffect(type)
+NODETYPE type;
+{
+ switch (type) {
+ case Node_times:
+ case Node_quotient:
+ case Node_mod:
+ case Node_plus:
+ case Node_minus:
+ case Node_subscript:
+ case Node_concat:
+ case Node_exp:
+ case Node_unary_minus:
+ case Node_field_spec:
+ case Node_and:
+ case Node_or:
+ case Node_equal:
+ case Node_notequal:
+ case Node_less:
+ case Node_greater:
+ case Node_leq:
+ case Node_geq:
+ case Node_match:
+ case Node_nomatch:
+ case Node_not:
+ case Node_val:
+ case Node_in_array:
+ case Node_NF:
+ case Node_NR:
+ case Node_FNR:
+ case Node_FS:
+ case Node_RS:
+ case Node_FIELDWIDTHS:
+ case Node_IGNORECASE:
+ case Node_OFS:
+ case Node_ORS:
+ case Node_OFMT:
+ case Node_CONVFMT:
+ return TRUE;
+ default:
+ break; /* keeps gcc -Wall happy */
}
+
+ return FALSE;
}
diff --git a/awklib/Makefile.in b/awklib/Makefile.in
new file mode 100644
index 00000000..f5d1b95c
--- /dev/null
+++ b/awklib/Makefile.in
@@ -0,0 +1,96 @@
+# Makefile for GNU Awk support library.
+#
+# Copyright (C) 1995 the Free Software Foundation, Inc.
+#
+# This file is part of GAWK, the GNU implementation of the
+# AWK Programming Language.
+#
+# GAWK is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GAWK is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+
+SHELL = /bin/sh
+
+srcdir = @srcdir@
+VPATH = @srcdir@
+
+INSTALL = @INSTALL@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_DATA = @INSTALL_DATA@
+
+CC = @CC@
+CFLAGS = @CFLAGS@
+
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+binprefix =
+manprefix =
+
+bindir = @bindir@
+libdir = @libdir@
+mandir = @mandir@/man1
+manext = .1
+infodir = @infodir@
+datadir = @datadir@/awk
+libexecdir = @libexecdir@/awk
+
+AUXPROGS = pwcat grcat
+AUXAWK = passwd.awk group.awk
+
+all: stamp-eg $(AUXPROGS) igawk $(AUXAWK)
+
+stamp-eg: $(srcdir)/../doc/gawk.texi
+ rm -fr eg stamp-eg
+ ../gawk -f $(srcdir)/extract.awk $(srcdir)/../doc/gawk.texi
+ @echo 'some makes are stupid and will not check a directory' > stamp-eg
+ @echo 'against a file, so this file is a place holder. gack.' >> stamp-eg
+
+pwcat: $(srcdir)/eg/lib/pwcat.c
+ $(CC) $(CFLAGS) $(srcdir)/eg/lib/pwcat.c -o $@
+
+grcat: $(srcdir)/eg/lib/grcat.c
+ $(CC) $(CFLAGS) $(srcdir)/eg/lib/grcat.c -o $@
+
+igawk: $(srcdir)/eg/prog/igawk.sh
+ cp $(srcdir)/eg/prog/igawk.sh $@ ; chmod 755 $@
+
+passwd.awk: $(srcdir)/eg/lib/passwdawk.in
+ (cd $(srcdir)/eg/lib ; \
+ sed 's;/usr/local/libexec/awk;$(libexecdir);' < passwdawk.in) > passwd.awk
+
+group.awk: $(srcdir)/eg/lib/groupawk.in
+ (cd $(srcdir)/eg/lib ; \
+ sed 's;/usr/local/libexec/awk;$(libexecdir);' < groupawk.in) > group.awk
+
+install: igawk $(AUXPROGS) $(AUXAWK)
+ $(INSTALL_PROGRAM) igawk $(bindir)
+ for i in $(AUXPROGS) ; do \
+ $(INSTALL_PROGRAM) $$i $(libexecdir) ; \
+ done
+ for i in $(AUXAWK) $(srcdir)/eg/lib/*.awk ; do \
+ $(INSTALL_DATA) $$i $(datadir) ; \
+ done
+
+# libexecdir and bindir are removed in the top level Makefile's uninstall
+uninstall:
+ rm -fr $(libexecdir)/* $(datadir)/*
+ rm -f $(bindir)/igawk
+
+clean:
+ rm -f $(AUXPROGS) igawk
+
+maintainer-clean: clean
+ rm -fr eg stamp-eg
+
+distclean: clean
+ rm -f Makefile
diff --git a/awklib/eg/data/BBS-list b/awklib/eg/data/BBS-list
new file mode 100644
index 00000000..1007417f
--- /dev/null
+++ b/awklib/eg/data/BBS-list
@@ -0,0 +1,11 @@
+aardvark 555-5553 1200/300 B
+alpo-net 555-3412 2400/1200/300 A
+barfly 555-7685 1200/300 A
+bites 555-1675 2400/1200/300 A
+camelot 555-0542 300 C
+core 555-2912 1200/300 C
+fooey 555-1234 2400/1200/300 B
+foot 555-6699 1200/300 B
+macfoo 555-6480 1200/300 A
+sdace 555-3430 2400/1200/300 A
+sabafoo 555-2127 1200/300 C
diff --git a/awklib/eg/data/inventory-shipped b/awklib/eg/data/inventory-shipped
new file mode 100644
index 00000000..6788a0ef
--- /dev/null
+++ b/awklib/eg/data/inventory-shipped
@@ -0,0 +1,17 @@
+Jan 13 25 15 115
+Feb 15 32 24 226
+Mar 15 24 34 228
+Apr 31 52 63 420
+May 16 34 29 208
+Jun 31 42 75 492
+Jul 24 34 67 436
+Aug 15 34 47 316
+Sep 13 55 37 277
+Oct 29 54 68 525
+Nov 20 87 82 577
+Dec 17 35 61 401
+
+Jan 21 36 64 620
+Feb 26 58 80 652
+Mar 24 75 70 495
+Apr 21 70 74 514
diff --git a/awklib/eg/lib/assert.awk b/awklib/eg/lib/assert.awk
new file mode 100644
index 00000000..914aa632
--- /dev/null
+++ b/awklib/eg/lib/assert.awk
@@ -0,0 +1,18 @@
+# assert --- assert that a condition is true. Otherwise exit.
+# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain
+# May, 1993
+
+function assert(condition, string)
+{
+ if (! condition) {
+ printf("%s:%d: assertion failed: %s\n",
+ FILENAME, FNR, string) > "/dev/stderr"
+ _assert_exit = 1
+ exit 1
+ }
+}
+
+END {
+ if (_assert_exit)
+ exit 1
+}
diff --git a/awklib/eg/lib/ctime.awk b/awklib/eg/lib/ctime.awk
new file mode 100644
index 00000000..0a50d262
--- /dev/null
+++ b/awklib/eg/lib/ctime.awk
@@ -0,0 +1,11 @@
+# ctime.awk
+#
+# awk version of C ctime(3) function
+
+function ctime(ts, format)
+{
+ format = "%a %b %d %H:%M:%S %Z %Y"
+ if (ts == 0)
+ ts = systime() # use current time as default
+ return strftime(format, ts)
+}
diff --git a/awklib/eg/lib/ftrans.awk b/awklib/eg/lib/ftrans.awk
new file mode 100644
index 00000000..0d6e8108
--- /dev/null
+++ b/awklib/eg/lib/ftrans.awk
@@ -0,0 +1,15 @@
+# ftrans.awk --- handle data file transitions
+#
+# user supplies beginfile() and endfile() functions
+#
+# Arnold Robbins, arnold@gnu.ai.mit.edu. November 1992
+# Public Domain
+
+FNR == 1 {
+ if (_filename_ != "")
+ endfile(_filename_)
+ _filename_ = FILENAME
+ beginfile(FILENAME)
+}
+
+END { endfile(_filename_) }
diff --git a/awklib/eg/lib/getopt.awk b/awklib/eg/lib/getopt.awk
new file mode 100644
index 00000000..70a1ec0f
--- /dev/null
+++ b/awklib/eg/lib/getopt.awk
@@ -0,0 +1,82 @@
+# getopt --- do C library getopt(3) function in awk
+#
+# arnold@gnu.ai.mit.edu
+# Public domain
+#
+# Initial version: March, 1991
+# Revised: May, 1993
+
+# External variables:
+# Optind -- index of ARGV for first non-option argument
+# Optarg -- string value of argument to current option
+# Opterr -- if non-zero, print our own diagnostic
+# Optopt -- current option letter
+
+# Returns
+# -1 at end of options
+# ? for unrecognized option
+# <c> a character representing the current option
+
+# Private Data
+# _opti index in multi-flag option, e.g., -abc
+function getopt(argc, argv, options, optl, thisopt, i)
+{
+ optl = length(options)
+ if (optl == 0) # no options given
+ return -1
+
+ if (argv[Optind] == "--") { # all done
+ Optind++
+ _opti = 0
+ return -1
+ } else if (argv[Optind] !~ /^-[^: \t\n\f\r\v\b]/) {
+ _opti = 0
+ return -1
+ }
+ if (_opti == 0)
+ _opti = 2
+ thisopt = substr(argv[Optind], _opti, 1)
+ Optopt = thisopt
+ i = index(options, thisopt)
+ if (i == 0) {
+ if (Opterr)
+ printf("%c -- invalid option\n",
+ thisopt) > "/dev/stderr"
+ if (_opti >= length(argv[Optind])) {
+ Optind++
+ _opti = 0
+ } else
+ _opti++
+ return "?"
+ }
+ if (substr(options, i + 1, 1) == ":") {
+ # get option argument
+ if (length(substr(argv[Optind], _opti + 1)) > 0)
+ Optarg = substr(argv[Optind], _opti + 1)
+ else
+ Optarg = argv[++Optind]
+ _opti = 0
+ } else
+ Optarg = ""
+ if (_opti == 0 || _opti >= length(argv[Optind])) {
+ Optind++
+ _opti = 0
+ } else
+ _opti++
+ return thisopt
+}
+BEGIN {
+ Opterr = 1 # default is to diagnose
+ Optind = 1 # skip ARGV[0]
+
+ # test program
+ if (_getopt_test) {
+ while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1)
+ printf("c = <%c>, optarg = <%s>\n",
+ _go_c, Optarg)
+ printf("non-option arguments:\n")
+ for (; Optind < ARGC; Optind++)
+ printf("\tARGV[%d] = <%s>\n",
+ Optind, ARGV[Optind])
+ }
+}
diff --git a/awklib/eg/lib/gettime.awk b/awklib/eg/lib/gettime.awk
new file mode 100644
index 00000000..500dfcef
--- /dev/null
+++ b/awklib/eg/lib/gettime.awk
@@ -0,0 +1,61 @@
+# gettimeofday --- get the time of day in a usable format
+# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain, May 1993
+#
+# Returns a string in the format of output of date(1)
+# Populates the array argument time with individual values:
+# time["second"] -- seconds (0 - 59)
+# time["minute"] -- minutes (0 - 59)
+# time["hour"] -- hours (0 - 23)
+# time["althour"] -- hours (0 - 12)
+# time["monthday"] -- day of month (1 - 31)
+# time["month"] -- month of year (1 - 12)
+# time["monthname"] -- name of the month
+# time["shortmonth"] -- short name of the month
+# time["year"] -- year within century (0 - 99)
+# time["fullyear"] -- year with century (19xx or 20xx)
+# time["weekday"] -- day of week (Sunday = 0)
+# time["altweekday"] -- day of week (Monday = 0)
+# time["weeknum"] -- week number, Sunday first day
+# time["altweeknum"] -- week number, Monday first day
+# time["dayname"] -- name of weekday
+# time["shortdayname"] -- short name of weekday
+# time["yearday"] -- day of year (0 - 365)
+# time["timezone"] -- abbreviation of timezone name
+# time["ampm"] -- AM or PM designation
+
+function gettimeofday(time, ret, now, i)
+{
+ # get time once, avoids unnecessary system calls
+ now = systime()
+
+ # return date(1)-style output
+ ret = strftime("%a %b %d %H:%M:%S %Z %Y", now)
+
+ # clear out target array
+ for (i in time)
+ delete time[i]
+
+ # fill in values, force numeric values to be
+ # numeric by adding 0
+ time["second"] = strftime("%S", now) + 0
+ time["minute"] = strftime("%M", now) + 0
+ time["hour"] = strftime("%H", now) + 0
+ time["althour"] = strftime("%I", now) + 0
+ time["monthday"] = strftime("%d", now) + 0
+ time["month"] = strftime("%m", now) + 0
+ time["monthname"] = strftime("%B", now)
+ time["shortmonth"] = strftime("%b", now)
+ time["year"] = strftime("%y", now) + 0
+ time["fullyear"] = strftime("%Y", now) + 0
+ time["weekday"] = strftime("%w", now) + 0
+ time["altweekday"] = strftime("%u", now) + 0
+ time["dayname"] = strftime("%A", now)
+ time["shortdayname"] = strftime("%a", now)
+ time["yearday"] = strftime("%j", now) + 0
+ time["timezone"] = strftime("%Z", now)
+ time["ampm"] = strftime("%p", now)
+ time["weeknum"] = strftime("%U", now) + 0
+ time["altweeknum"] = strftime("%W", now) + 0
+
+ return ret
+}
diff --git a/awklib/eg/lib/grcat.c b/awklib/eg/lib/grcat.c
new file mode 100644
index 00000000..9742c592
--- /dev/null
+++ b/awklib/eg/lib/grcat.c
@@ -0,0 +1,34 @@
+/*
+ * grcat.c
+ *
+ * Generate a printable version of the group database
+ *
+ * Arnold Robbins, arnold@gnu.ai.mit.edu
+ * May 1993
+ * Public Domain
+ */
+
+#include <stdio.h>
+#include <grp.h>
+
+int
+main(argc, argv)
+int argc;
+char **argv;
+{
+ struct group *g;
+ int i;
+
+ while ((g = getgrent()) != NULL) {
+ printf("%s:%s:%d:", g->gr_name, g->gr_passwd,
+ g->gr_gid);
+ for (i = 0; g->gr_mem[i] != NULL; i++) {
+ printf("%s", g->gr_mem[i]);
+ if (g->gr_mem[i+1] != NULL)
+ putchar(',');
+ }
+ putchar('\n');
+ }
+ endgrent();
+ exit(0);
+}
diff --git a/awklib/eg/lib/groupawk.in b/awklib/eg/lib/groupawk.in
new file mode 100644
index 00000000..a8103a04
--- /dev/null
+++ b/awklib/eg/lib/groupawk.in
@@ -0,0 +1,80 @@
+# group.awk --- functions for dealing with the group file
+# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+BEGIN \
+{
+ # Change to suit your system
+ _gr_awklib = "/usr/local/libexec/awk/"
+}
+function _gr_init( oldfs, oldrs, olddol0, grcat, n, a, i)
+{
+ if (_gr_inited)
+ return
+
+ oldfs = FS
+ oldrs = RS
+ olddol0 = $0
+ FS = ":"
+ RS = "\n"
+
+ grcat = _gr_awklib "grcat"
+ while ((grcat | getline) > 0) {
+ if ($1 in _gr_byname)
+ _gr_byname[$1] = _gr_byname[$1] "," $4
+ else
+ _gr_byname[$1] = $0
+ if ($3 in _gr_bygid)
+ _gr_bygid[$3] = _gr_bygid[$3] "," $4
+ else
+ _gr_bygid[$3] = $0
+
+ n = split($4, a, "[ \t]*,[ \t]*")
+ for (i = 1; i <= n; i++)
+ if (a[i] in _gr_groupsbyuser)
+ _gr_groupsbyuser[a[i]] = \
+ _gr_groupsbyuser[a[i]] " " $1
+ else
+ _gr_groupsbyuser[a[i]] = $1
+
+ _gr_bycount[++_gr_count] = $0
+ }
+ close(grcat)
+ _gr_count = 0
+ _gr_inited++
+ FS = oldfs
+ RS = oldrs
+ $0 = olddol0
+}
+function getgrnam(group)
+{
+ _gr_init()
+ if (group in _gr_byname)
+ return _gr_byname[group]
+ return ""
+}
+function getgrgid(gid)
+{
+ _gr_init()
+ if (gid in _gr_bygid)
+ return _gr_bygid[gid]
+ return ""
+}
+function getgruser(user)
+{
+ _gr_init()
+ if (user in _gr_groupsbyuser)
+ return _gr_groupsbyuser[user]
+ return ""
+}
+function getgrent()
+{
+ _gr_init()
+ if (++gr_count in _gr_bycount)
+ return _gr_bycount[_gr_count]
+ return ""
+}
+function endgrent()
+{
+ _gr_count = 0
+}
diff --git a/awklib/eg/lib/join.awk b/awklib/eg/lib/join.awk
new file mode 100644
index 00000000..e6b81656
--- /dev/null
+++ b/awklib/eg/lib/join.awk
@@ -0,0 +1,15 @@
+# join.awk --- join an array into a string
+# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+function join(array, start, end, sep, result, i)
+{
+ if (sep == "")
+ sep = " "
+ else if (sep == SUBSEP) # magic value
+ sep = ""
+ result = array[start]
+ for (i = start + 1; i <= end; i++)
+ result = result sep array[i]
+ return result
+}
diff --git a/awklib/eg/lib/mktime.awk b/awklib/eg/lib/mktime.awk
new file mode 100644
index 00000000..60c5b60d
--- /dev/null
+++ b/awklib/eg/lib/mktime.awk
@@ -0,0 +1,106 @@
+# mktime.awk --- convert a canonical date representation
+# into a timestamp
+# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+BEGIN \
+{
+ # Initialize table of month lengths
+ _tm_months[0,1] = _tm_months[1,1] = 31
+ _tm_months[0,2] = 28; _tm_months[1,2] = 29
+ _tm_months[0,3] = _tm_months[1,3] = 31
+ _tm_months[0,4] = _tm_months[1,4] = 30
+ _tm_months[0,5] = _tm_months[1,5] = 31
+ _tm_months[0,6] = _tm_months[1,6] = 30
+ _tm_months[0,7] = _tm_months[1,7] = 31
+ _tm_months[0,8] = _tm_months[1,8] = 31
+ _tm_months[0,9] = _tm_months[1,9] = 30
+ _tm_months[0,10] = _tm_months[1,10] = 31
+ _tm_months[0,11] = _tm_months[1,11] = 30
+ _tm_months[0,12] = _tm_months[1,12] = 31
+}
+# decide if a year is a leap year
+function _tm_isleap(year, ret)
+{
+ ret = (year % 4 == 0 && year % 100 != 0) ||
+ (year % 400 == 0)
+
+ return ret
+}
+# convert a date into seconds
+function _tm_addup(a, total, yearsecs, daysecs,
+ hoursecs, i, j)
+{
+ hoursecs = 60 * 60
+ daysecs = 24 * hoursecs
+ yearsecs = 365 * daysecs
+
+ total = (a[1] - 1970) * yearsecs
+
+ # extra day for leap years
+ for (i = 1970; i < a[1]; i++)
+ if (_tm_isleap(i))
+ total += daysecs
+
+ j = _tm_isleap(a[1])
+ for (i = 1; i < a[2]; i++)
+ total += _tm_months[j, i] * daysecs
+
+ total += (a[3] - 1) * daysecs
+ total += a[4] * hoursecs
+ total += a[5] * 60
+ total += a[6]
+
+ return total
+}
+# mktime --- convert a date into seconds,
+# compensate for time zone
+
+function mktime(str, res1, res2, a, b, i, j, t, diff)
+{
+ i = split(str, a, " ") # don't rely on FS
+
+ if (i != 6)
+ return -1
+
+ # force numeric
+ for (j in a)
+ a[j] += 0
+
+ # validate
+ if (a[1] < 1970 ||
+ a[2] < 1 || a[2] > 12 ||
+ a[3] < 1 || a[3] > 31 ||
+ a[4] < 0 || a[4] > 23 ||
+ a[5] < 0 || a[5] > 59 ||
+ a[6] < 0 || a[6] > 61 )
+ return -1
+
+ res1 = _tm_addup(a)
+ t = strftime("%Y %m %d %H %M %S", res1)
+
+ if (_tm_debug)
+ printf("(%s) -> (%s)\n", str, t) > "/dev/stderr"
+
+ split(t, b, " ")
+ res2 = _tm_addup(b)
+
+ diff = res1 - res2
+
+ if (_tm_debug)
+ printf("diff = %d seconds\n", diff) > "/dev/stderr"
+
+ res1 += diff
+
+ return res1
+}
+BEGIN {
+ if (_tm_test) {
+ printf "Enter date as yyyy mm dd hh mm ss: "
+ getline _tm_test_date
+
+ t = mktime(_tm_test_date)
+ r = strftime("%Y %m %d %H %M %S", t)
+ printf "Got back (%s)\n", r
+ }
+}
diff --git a/awklib/eg/lib/nextfile.awk b/awklib/eg/lib/nextfile.awk
new file mode 100644
index 00000000..0f729a87
--- /dev/null
+++ b/awklib/eg/lib/nextfile.awk
@@ -0,0 +1,15 @@
+# nextfile --- skip remaining records in current file
+# correctly handle successive occurrences of the same file
+# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain
+# May, 1993
+
+# this should be read in before the "main" awk program
+
+function nextfile() { _abandon_ = FILENAME; next }
+
+_abandon_ == FILENAME {
+ if (FNR == 1)
+ _abandon_ = ""
+ else
+ next
+}
diff --git a/awklib/eg/lib/ord.awk b/awklib/eg/lib/ord.awk
new file mode 100644
index 00000000..7e62cb88
--- /dev/null
+++ b/awklib/eg/lib/ord.awk
@@ -0,0 +1,54 @@
+# ord.awk --- do ord and chr
+#
+# Global identifiers:
+# _ord_: numerical values indexed by characters
+# _ord_init: function to initialize _ord_
+#
+# Arnold Robbins
+# arnold@gnu.ai.mit.edu
+# Public Domain
+# 16 January, 1992
+# 20 July, 1992, revised
+
+BEGIN { _ord_init() }
+function _ord_init( low, high, i, t)
+{
+ low = sprintf("%c", 7) # BEL is ascii 7
+ if (low == "\a") { # regular ascii
+ low = 0
+ high = 127
+ } else if (sprintf("%c", 128 + 7) == "\a") {
+ # ascii, mark parity
+ low = 128
+ high = 255
+ } else { # ebcdic(!)
+ low = 0
+ high = 255
+ }
+
+ for (i = low; i <= high; i++) {
+ t = sprintf("%c", i)
+ _ord_[t] = i
+ }
+}
+function ord(str, c)
+{
+ # only first character is of interest
+ c = substr(str, 1, 1)
+ return _ord_[c]
+}
+function chr(c)
+{
+ # force c to be numeric by adding 0
+ return sprintf("%c", c + 0)
+}
+#### test code ####
+# BEGIN \
+# {
+# for (;;) {
+# printf("enter a character: ")
+# if (getline var <= 0)
+# break
+# printf("ord(%s) = %d\n", var, ord(var))
+# }
+# }
diff --git a/awklib/eg/lib/passwdawk.in b/awklib/eg/lib/passwdawk.in
new file mode 100644
index 00000000..7b64f60d
--- /dev/null
+++ b/awklib/eg/lib/passwdawk.in
@@ -0,0 +1,56 @@
+# passwd.awk --- access password file information
+# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+BEGIN {
+ # tailor this to suit your system
+ _pw_awklib = "/usr/local/libexec/awk/"
+}
+
+function _pw_init( oldfs, oldrs, olddol0, pwcat)
+{
+ if (_pw_inited)
+ return
+ oldfs = FS
+ oldrs = RS
+ olddol0 = $0
+ FS = ":"
+ RS = "\n"
+ pwcat = _pw_awklib "pwcat"
+ while ((pwcat | getline) > 0) {
+ _pw_byname[$1] = $0
+ _pw_byuid[$3] = $0
+ _pw_bycount[++_pw_total] = $0
+ }
+ close(pwcat)
+ _pw_count = 0
+ _pw_inited = 1
+ FS = oldfs
+ RS = oldrs
+ $0 = olddol0
+}
+function getpwnam(name)
+{
+ _pw_init()
+ if (name in _pw_byname)
+ return _pw_byname[name]
+ return ""
+}
+function getpwuid(uid)
+{
+ _pw_init()
+ if (uid in _pw_byuid)
+ return _pw_byuid[uid]
+ return ""
+}
+function getpwent()
+{
+ _pw_init()
+ if (_pw_count < _pw_total)
+ return _pw_bycount[++_pw_count]
+ return ""
+}
+function endpwent()
+{
+ _pw_count = 0
+}
diff --git a/awklib/eg/lib/pwcat.c b/awklib/eg/lib/pwcat.c
new file mode 100644
index 00000000..ecd25861
--- /dev/null
+++ b/awklib/eg/lib/pwcat.c
@@ -0,0 +1,29 @@
+/*
+ * pwcat.c
+ *
+ * Generate a printable version of the password database
+ *
+ * Arnold Robbins
+ * arnold@gnu.ai.mit.edu
+ * May 1993
+ * Public Domain
+ */
+
+#include <stdio.h>
+#include <pwd.h>
+
+int
+main(argc, argv)
+int argc;
+char **argv;
+{
+ struct passwd *p;
+
+ while ((p = getpwent()) != NULL)
+ printf("%s:%s:%d:%d:%s:%s:%s\n",
+ p->pw_name, p->pw_passwd, p->pw_uid,
+ p->pw_gid, p->pw_gecos, p->pw_dir, p->pw_shell);
+
+ endpwent();
+ exit(0);
+}
diff --git a/awklib/eg/misc/arraymax.awk b/awklib/eg/misc/arraymax.awk
new file mode 100644
index 00000000..20dd1768
--- /dev/null
+++ b/awklib/eg/misc/arraymax.awk
@@ -0,0 +1,10 @@
+{
+ if ($1 > max)
+ max = $1
+ arr[$1] = $0
+}
+
+END {
+ for (x = 1; x <= max; x++)
+ print arr[x]
+}
diff --git a/awklib/eg/misc/arraymax.data b/awklib/eg/misc/arraymax.data
new file mode 100644
index 00000000..dbee328c
--- /dev/null
+++ b/awklib/eg/misc/arraymax.data
@@ -0,0 +1,5 @@
+5 I am the Five man
+2 Who are you? The new number two!
+4 . . . And four on the floor
+1 Who is number one?
+3 I three you.
diff --git a/awklib/eg/misc/findpat.data b/awklib/eg/misc/findpat.data
new file mode 100644
index 00000000..9f72969e
--- /dev/null
+++ b/awklib/eg/misc/findpat.data
@@ -0,0 +1,7 @@
+FIND ru+n
+My program runs
+but not very quickly
+FIND Melvin
+JF+KM
+This line is property of Reality Engineering Co.
+Melvin was here.
diff --git a/awklib/eg/misc/findpat.sh b/awklib/eg/misc/findpat.sh
new file mode 100644
index 00000000..39710324
--- /dev/null
+++ b/awklib/eg/misc/findpat.sh
@@ -0,0 +1,10 @@
+awk '{
+ if ($1 == "FIND")
+ regex = $2
+ else {
+ where = match($0, regex)
+ if (where != 0)
+ print "Match of", regex, "found at", \
+ where, "in", $0
+ }
+}'
diff --git a/awklib/eg/prog/alarm.awk b/awklib/eg/prog/alarm.awk
new file mode 100644
index 00000000..fa42dce0
--- /dev/null
+++ b/awklib/eg/prog/alarm.awk
@@ -0,0 +1,81 @@
+# alarm --- set an alarm
+# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+# usage: alarm time [ "message" [ count [ delay ] ] ]
+
+BEGIN \
+{
+ # Initial argument sanity checking
+ usage1 = "usage: alarm time ['message' [count [delay]]]"
+ usage2 = sprintf("\t(%s) time ::= hh:mm", ARGV[1])
+
+ if (ARGC < 2) {
+ print usage > "/dev/stderr"
+ exit 1
+ } else if (ARGC == 5) {
+ delay = ARGV[4] + 0
+ count = ARGV[3] + 0
+ message = ARGV[2]
+ } else if (ARGC == 4) {
+ count = ARGV[3] + 0
+ message = ARGV[2]
+ } else if (ARGC == 3) {
+ message = ARGV[2]
+ } else if (ARGV[1] !~ /[0-9]?[0-9]:[0-9][0-9]/) {
+ print usage1 > "/dev/stderr"
+ print usage2 > "/dev/stderr"
+ exit 1
+ }
+
+ # set defaults for once we reach the desired time
+ if (delay == 0)
+ delay = 180 # 3 minutes
+ if (count == 0)
+ count = 5
+ if (message == "")
+ message = sprintf("\aIt is now %s!\a", ARGV[1])
+ else if (index(message, "\a") == 0)
+ message = "\a" message "\a"
+ # split up dest time
+ split(ARGV[1], atime, ":")
+ hour = atime[1] + 0 # force numeric
+ minute = atime[2] + 0 # force numeric
+
+ # get current broken down time
+ gettimeofday(now)
+
+ # if time given is 12-hour hours and it's after that
+ # hour, e.g., `alarm 5:30' at 9 a.m. means 5:30 p.m.,
+ # then add 12 to real hour
+ if (hour < 12 && now["hour"] > hour)
+ hour += 12
+
+ # set target time in seconds since midnight
+ target = (hour * 60 * 60) + (minute * 60)
+
+ # get current time in seconds since midnight
+ current = (now["hour"] * 60 * 60) + \
+ (now["minute"] * 60) + now["second"]
+
+ # how long to sleep for
+ naptime = target - current
+ if (naptime <= 0) {
+ print "time is in the past!" > "/dev/stderr"
+ exit 1
+ }
+ # zzzzzz..... go away if interrupted
+ if (system(sprintf("sleep %d", naptime)) != 0)
+ exit 1
+
+ # time to notify!
+ command = sprintf("sleep %d", delay)
+ for (i = 1; i <= count; i++) {
+ print message
+ # if sleep command interrupted, go away
+ if (system(command) != 0)
+ break
+ }
+
+ exit 0
+}
diff --git a/awklib/eg/prog/awksed.awk b/awklib/eg/prog/awksed.awk
new file mode 100644
index 00000000..cd96ddeb
--- /dev/null
+++ b/awklib/eg/prog/awksed.awk
@@ -0,0 +1,31 @@
+# awksed.awk --- do s/foo/bar/g using just print
+# Thanks to Michael Brennan for the idea
+
+# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain
+# August 1995
+
+function usage()
+{
+ print "usage: awksed pat repl [files...]" > "/dev/stderr"
+ exit 1
+}
+
+BEGIN {
+ # validate arguments
+ if (ARGC < 3)
+ usage()
+
+ RS = ARGV[1]
+ ORS = ARGV[2]
+
+ # don't use arguments as files
+ ARGV[1] = ARGV[2] = ""
+}
+
+# look ma, no hands!
+{
+ if (RT == "")
+ printf "%s", $0
+ else
+ print
+}
diff --git a/awklib/eg/prog/cut.awk b/awklib/eg/prog/cut.awk
new file mode 100644
index 00000000..c69e6492
--- /dev/null
+++ b/awklib/eg/prog/cut.awk
@@ -0,0 +1,136 @@
+# cut.awk --- implement cut in awk
+# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+# Options:
+# -f list Cut fields
+# -d c Field delimiter character
+# -c list Cut characters
+#
+# -s Suppress lines without the delimiter character
+
+function usage( e1, e2)
+{
+ e1 = "usage: cut [-f list] [-d c] [-s] [files...]"
+ e2 = "usage: cut [-c list] [files...]"
+ print e1 > "/dev/stderr"
+ print e2 > "/dev/stderr"
+ exit 1
+}
+BEGIN \
+{
+ FS = "\t" # default
+ OFS = FS
+ while ((c = getopt(ARGC, ARGV, "sf:c:d:")) != -1) {
+ if (c == "f") {
+ by_fields = 1
+ fieldlist = Optarg
+ } else if (c == "c") {
+ by_chars = 1
+ fieldlist = Optarg
+ OFS = ""
+ } else if (c == "d") {
+ if (length(Optarg) > 1) {
+ printf("Using first character of %s" \
+ " for delimiter\n", Optarg) > "/dev/stderr"
+ Optarg = substr(Optarg, 1, 1)
+ }
+ FS = Optarg
+ OFS = FS
+ if (FS == " ") # defeat awk semantics
+ FS = "[ ]"
+ } else if (c == "s")
+ suppress++
+ else
+ usage()
+ }
+
+ for (i = 1; i < Optind; i++)
+ ARGV[i] = ""
+ if (by_fields && by_chars)
+ usage()
+
+ if (by_fields == 0 && by_chars == 0)
+ by_fields = 1 # default
+
+ if (fieldlist == "") {
+ print "cut: needs list for -c or -f" > "/dev/stderr"
+ exit 1
+ }
+
+ if (by_fields)
+ set_fieldlist()
+ else
+ set_charlist()
+}
+function set_fieldlist( n, m, i, j, k, f, g)
+{
+ n = split(fieldlist, f, ",")
+ j = 1 # index in flist
+ for (i = 1; i <= n; i++) {
+ if (index(f[i], "-") != 0) { # a range
+ m = split(f[i], g, "-")
+ if (m != 2 || g[1] >= g[2]) {
+ printf("bad field list: %s\n",
+ f[i]) > "/dev/stderr"
+ exit 1
+ }
+ for (k = g[1]; k <= g[2]; k++)
+ flist[j++] = k
+ } else
+ flist[j++] = f[i]
+ }
+ nfields = j - 1
+}
+function set_charlist( field, i, j, f, g, t,
+ filler, last, len)
+{
+ field = 1 # count total fields
+ n = split(fieldlist, f, ",")
+ j = 1 # index in flist
+ for (i = 1; i <= n; i++) {
+ if (index(f[i], "-") != 0) { # range
+ m = split(f[i], g, "-")
+ if (m != 2 || g[1] >= g[2]) {
+ printf(bad character list: %s\n",
+ f[i]) > "/dev/stderr"
+ exit 1
+ }
+ len = g[2] - g[1] + 1
+ if (g[1] > 1) # compute length of filler
+ filler = g[1] - last - 1
+ else
+ filler = 0
+ if (filler)
+ t[field++] = filler
+ t[field++] = len # length of field
+ last = g[2]
+ flist[j++] = field - 1
+ } else {
+ if (f[i] > 1)
+ filler = f[i] - last - 1
+ else
+ filler = 0
+ if (filler)
+ t[field++] = filler
+ t[field++] = 1
+ last = f[i]
+ flist[j++] = field - 1
+ }
+ }
+ FIELDWIDTHS = join(t, 1, field - 1)
+ nfields = j - 1
+}
+{
+ if (by_fields && suppress && $0 !~ FS)
+ next
+
+ for (i = 1; i <= nfields; i++) {
+ if ($flist[i] != "") {
+ printf "%s", $flist[i]
+ if (i < nfields && $flist[i+1] != "")
+ printf "%s", OFS
+ }
+ }
+ print ""
+}
diff --git a/awklib/eg/prog/dupword.awk b/awklib/eg/prog/dupword.awk
new file mode 100644
index 00000000..8ae0fdc7
--- /dev/null
+++ b/awklib/eg/prog/dupword.awk
@@ -0,0 +1,16 @@
+# dupword --- find duplicate words in text
+# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain
+# December 1991
+
+{
+ $0 = tolower($0)
+ gsub(/[^A-Za-z0-9 \t]/, "");
+ if ($1 == prev)
+ printf("%s:%d: duplicate %s\n",
+ FILENAME, FNR, $1)
+ for (i = 2; i <= NF; i++)
+ if ($i == $(i-1))
+ printf("%s:%d: duplicate %s\n",
+ FILENAME, FNR, $i)
+ prev = $NF
+}
diff --git a/awklib/eg/prog/egrep.awk b/awklib/eg/prog/egrep.awk
new file mode 100644
index 00000000..5a5ec988
--- /dev/null
+++ b/awklib/eg/prog/egrep.awk
@@ -0,0 +1,96 @@
+# egrep.awk --- simulate egrep in awk
+# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+# Options:
+# -c count of lines
+# -s silent - use exit value
+# -v invert test, success if no match
+# -i ignore case
+# -l print filenames only
+# -e argument is pattern
+
+BEGIN {
+ while ((c = getopt(ARGC, ARGV, "ce:svil")) != -1) {
+ if (c == "c")
+ count_only++
+ else if (c == "s")
+ no_print++
+ else if (c == "v")
+ invert++
+ else if (c == "i")
+ IGNORECASE = 1
+ else if (c == "l")
+ filenames_only++
+ else if (c == "e")
+ pattern = Optarg
+ else
+ usage()
+ }
+ if (pattern == "")
+ pattern = ARGV[Optind++]
+
+ for (i = 1; i < Optind; i++)
+ ARGV[i] = ""
+ if (Optind >= ARGC) {
+ ARGV[1] = "-"
+ ARGC = 2
+ } else if (ARGC - Optind > 1)
+ do_filenames++
+
+# if (IGNORECASE)
+# pattern = tolower(pattern)
+}
+#{
+# if (IGNORECASE)
+# $0 = tolower($0)
+#}
+function beginfile(junk)
+{
+ fcount = 0
+}
+function endfile(file)
+{
+ if (! no_print && count_only)
+ if (do_filenames)
+ print file ":" fcount
+ else
+ print fcount
+
+ total += fcount
+}
+{
+ matches = ($0 ~ pattern)
+ if (invert)
+ matches = ! matches
+
+ fcount += matches # 1 or 0
+
+ if (! matches)
+ next
+
+ if (no_print && ! count_only)
+ nextfile
+
+ if (filenames_only && ! count_only) {
+ print FILENAME
+ nextfile
+ }
+
+ if (do_filenames && ! count_only)
+ print FILENAME ":" $0
+ else if (! count_only)
+ print
+}
+END \
+{
+ if (total == 0)
+ exit 1
+ exit 0
+}
+function usage( e)
+{
+ e = "Usage: egrep [-csvil] [-e pat] [files ...]"
+ print e > "/dev/stderr"
+ exit 1
+}
diff --git a/awklib/eg/prog/extract.awk b/awklib/eg/prog/extract.awk
new file mode 100644
index 00000000..a9f5b80f
--- /dev/null
+++ b/awklib/eg/prog/extract.awk
@@ -0,0 +1,72 @@
+# extract.awk --- extract files and run programs
+# from texinfo files
+# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+BEGIN { IGNORECASE = 1 }
+
+/^@c(omment)?[ \t]+system/ \
+{
+ if (NF < 3) {
+ e = (FILENAME ":" FNR)
+ e = (e ": badly formed `system' line")
+ print e > "/dev/stderr"
+ next
+ }
+ $1 = ""
+ $2 = ""
+ stat = system($0)
+ if (stat != 0) {
+ e = (FILENAME ":" FNR)
+ e = (e ": warning: system returned " stat)
+ print e > "/dev/stderr"
+ }
+}
+/^@c(omment)?[ \t]+file/ \
+{
+ if (NF != 3) {
+ e = (FILENAME ":" FNR ": badly formed `file' line")
+ print e > "/dev/stderr"
+ next
+ }
+ if ($3 != curfile) {
+ if (curfile != "")
+ close(curfile)
+ curfile = $3
+ }
+
+ for (;;) {
+ if ((getline line) <= 0)
+ unexpected_eof()
+ if (line ~ /^@c(omment)?[ \t]+endfile/)
+ break
+ else if (line ~ /^@(end[ \t]+)?group/)
+ continue
+ if (index(line, "@") == 0) {
+ print line > curfile
+ continue
+ }
+ n = split(line, a, "@")
+ # if a[1] == "", means leading @,
+ # don't add one back in.
+ for (i = 2; i <= n; i++) {
+ if (a[i] == "") { # was an @@
+ a[i] = "@"
+ if (a[i+1] == "")
+ i++
+ }
+ }
+ print join(a, 1, n, SUBSEP) > curfile
+ }
+}
+function unexpected_eof()
+{
+ printf("%s:%d: unexpected EOF or error\n", \
+ FILENAME, FNR) > "/dev/stderr"
+ exit 1
+}
+
+END {
+ if (curfile)
+ close(curfile)
+}
diff --git a/awklib/eg/prog/histsort.awk b/awklib/eg/prog/histsort.awk
new file mode 100644
index 00000000..c2c9d1a7
--- /dev/null
+++ b/awklib/eg/prog/histsort.awk
@@ -0,0 +1,14 @@
+# histsort.awk --- compact a shell history file
+# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+# Thanks to Byron Rakitzis for the general idea
+{
+ if (data[$0]++ == 0)
+ lines[++count] = $0
+}
+
+END {
+ for (i = 1; i <= count; i++)
+ print lines[i]
+}
diff --git a/awklib/eg/prog/id.awk b/awklib/eg/prog/id.awk
new file mode 100644
index 00000000..b29ef61a
--- /dev/null
+++ b/awklib/eg/prog/id.awk
@@ -0,0 +1,69 @@
+# id.awk --- implement id in awk
+# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+# output is:
+# uid=12(foo) euid=34(bar) gid=3(baz) \
+# egid=5(blat) groups=9(nine),2(two),1(one)
+
+BEGIN \
+{
+ if ((getline < "/dev/user") < 0) {
+ err = "id: no /dev/user support - cannot run"
+ print err > "/dev/stderr"
+ exit 1
+ }
+ close("/dev/user")
+
+ uid = $1
+ euid = $2
+ gid = $3
+ egid = $4
+
+ printf("uid=%d", uid)
+ pw = getpwuid(uid)
+ if (pw != "") {
+ split(pw, a, ":")
+ printf("(%s)", a[1])
+ }
+
+ if (euid != uid) {
+ printf(" euid=%d", euid)
+ pw = getpwuid(euid)
+ if (pw != "") {
+ split(pw, a, ":")
+ printf("(%s)", a[1])
+ }
+ }
+
+ printf(" gid=%d", gid)
+ pw = getgrgid(gid)
+ if (pw != "") {
+ split(pw, a, ":")
+ printf("(%s)", a[1])
+ }
+
+ if (egid != gid) {
+ printf(" egid=%d", egid)
+ pw = getgrgid(egid)
+ if (pw != "") {
+ split(pw, a, ":")
+ printf("(%s)", a[1])
+ }
+ }
+
+ if (NF > 4) {
+ printf(" groups=");
+ for (i = 5; i <= NF; i++) {
+ printf("%d", $i)
+ pw = getgrgid($i)
+ if (pw != "") {
+ split(pw, a, ":")
+ printf("(%s)", a[1])
+ }
+ if (i < NF)
+ printf(",")
+ }
+ }
+ print ""
+}
diff --git a/awklib/eg/prog/igawk.sh b/awklib/eg/prog/igawk.sh
new file mode 100644
index 00000000..a9fff180
--- /dev/null
+++ b/awklib/eg/prog/igawk.sh
@@ -0,0 +1,130 @@
+#! /bin/sh
+
+# igawk --- like gawk but do @include processing
+# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain
+# July 1993
+
+if [ "$1" = debug ]
+then
+ set -x
+ shift
+else
+ # cleanup on exit, hangup, interrupt, quit, termination
+ trap 'rm -f /tmp/ig.[se].$$' 0 1 2 3 15
+fi
+
+while [ $# -ne 0 ] # loop over arguments
+do
+ case $1 in
+ --) shift; break;;
+
+ -W) shift
+ set -- -W"$@"
+ continue;;
+
+ -[vF]) opts="$opts $1 '$2'"
+ shift;;
+
+ -[vF]*) opts="$opts '$1'" ;;
+
+ -f) echo @include "$2" >> /tmp/ig.s.$$
+ shift;;
+
+ -f*) f=`echo "$1" | sed 's/-f//'`
+ echo @include "$f" >> /tmp/ig.s.$$ ;;
+
+ -?file=*) # -Wfile or --file
+ f=`echo "$1" | sed 's/-.file=//'`
+ echo @include "$f" >> /tmp/ig.s.$$ ;;
+
+ -?file) # get arg, $2
+ echo @include "$2" >> /tmp/ig.s.$$
+ shift;;
+
+ -?source=*) # -Wsource or --source
+ t=`echo "$1" | sed 's/-.source=//'`
+ echo "$t" >> /tmp/ig.s.$$ ;;
+
+ -?source) # get arg, $2
+ echo "$2" >> /tmp/ig.s.$$
+ shift;;
+
+ -?version)
+ echo igawk: version 1.0 1>&2
+ gawk --version
+ exit 0 ;;
+
+ -[W-]*) opts="$opts '$1'" ;;
+
+ *) break;;
+ esac
+ shift
+done
+
+if [ ! -s /tmp/ig.s.$$ ]
+then
+ if [ -z "$1" ]
+ then
+ echo igawk: no program! 1>&2
+ exit 1
+ else
+ echo "$1" > /tmp/ig.s.$$
+ shift
+ fi
+fi
+
+# at this point, /tmp/ig.s.$$ has the program
+gawk -- '
+# process @include directives
+
+function pathto(file, i, t, junk)
+{
+ if (index(file, "/") != 0)
+ return file
+
+ for (i = 1; i <= ndirs; i++) {
+ t = (pathlist[i] "/" file)
+ if ((getline junk < t) > 0) {
+ # found it
+ close(t)
+ return t
+ }
+ }
+ return ""
+}
+BEGIN {
+ path = ENVIRON["AWKPATH"]
+ ndirs = split(path, pathlist, ":")
+ for (i = 1; i <= ndirs; i++) {
+ if (pathlist[i] == "")
+ pathlist[i] = "."
+ }
+ stackptr = 0
+ input[stackptr] = ARGV[1] # ARGV[1] is first file
+
+ for (; stackptr >= 0; stackptr--) {
+ while ((getline < input[stackptr]) > 0) {
+ if (tolower($1) != "@include") {
+ print
+ continue
+ }
+ fpath = pathto($2)
+ if (fpath == "") {
+ printf("igawk:%s:%d: cannot find %s\n", \
+ input[stackptr], FNR, $2) > "/dev/stderr"
+ continue
+ }
+ if (! (fpath in processed)) {
+ processed[fpath] = input[stackptr]
+ input[++stackptr] = fpath
+ } else
+ print $2, "included in", input[stackptr], \
+ "already included in", \
+ processed[fpath] > "/dev/stderr"
+ }
+ close(input[stackptr])
+ }
+}' /tmp/ig.s.$$ > /tmp/ig.e.$$
+eval gawk -f /tmp/ig.e.$$ $opts -- "$@"
+
+exit $?
diff --git a/awklib/eg/prog/labels.awk b/awklib/eg/prog/labels.awk
new file mode 100644
index 00000000..55815d20
--- /dev/null
+++ b/awklib/eg/prog/labels.awk
@@ -0,0 +1,53 @@
+# labels.awk
+# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain
+# June 1992
+
+# Program to print labels. Each label is 5 lines of data
+# that may have blank lines. The label sheets have 2
+# blank lines at the top and 2 at the bottom.
+
+BEGIN { RS = "" ; MAXLINES = 100 }
+
+function printpage( i, j)
+{
+ if (Nlines <= 0)
+ return
+
+ printf "\n\n" # header
+
+ for (i = 1; i <= Nlines; i += 10) {
+ if (i == 21 || i == 61)
+ print ""
+ for (j = 0; j < 5; j++) {
+ if (i + j > MAXLINES)
+ break
+ printf " %-41s %s\n", line[i+j], line[i+j+5]
+ }
+ print ""
+ }
+
+ printf "\n\n" # footer
+
+ for (i in line)
+ line[i] = ""
+}
+
+# main rule
+{
+ if (Count >= 20) {
+ printpage()
+ Count = 0
+ Nlines = 0
+ }
+ n = split($0, a, "\n")
+ for (i = 1; i <= n; i++)
+ line[++Nlines] = a[i]
+ for (; i <= 5; i++)
+ line[++Nlines] = ""
+ Count++
+}
+
+END \
+{
+ printpage()
+}
diff --git a/awklib/eg/prog/split.awk b/awklib/eg/prog/split.awk
new file mode 100644
index 00000000..e48653b4
--- /dev/null
+++ b/awklib/eg/prog/split.awk
@@ -0,0 +1,54 @@
+# split.awk --- do split in awk
+# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+# usage: split [-num] [file] [outname]
+
+BEGIN \
+{
+ outfile = "x" # default
+ count = 1000
+ if (ARGC > 4)
+ usage()
+
+ i = 1
+ if (ARGV[i] ~ /^-[0-9]+$/) {
+ count = -ARGV[i]
+ ARGV[i] = ""
+ i++
+ }
+ # test argv in case reading from stdin instead of file
+ if (i in ARGV)
+ i++ # skip data file name
+ if (i in ARGV) {
+ outfile = ARGV[i]
+ ARGV[i] = ""
+ }
+
+ s1 = s2 = "a"
+ out = (outfile s1 s2)
+}
+{
+ if (++tcount > count) {
+ close(out)
+ if (s2 == "z") {
+ if (s1 == "z") {
+ printf("split: %s is too large to split\n", \
+ FILENAME) > "/dev/stderr"
+ exit 1
+ }
+ s1 = chr(ord(s1) + 1)
+ s2 = "a"
+ } else
+ s2 = chr(ord(s2) + 1)
+ out = (outfile s1 s2)
+ tcount = 1
+ }
+ print > out
+}
+function usage( e)
+{
+ e = "usage: split [-num] [file] [outname]"
+ print e > "/dev/stderr"
+ exit 1
+}
diff --git a/awklib/eg/prog/tee.awk b/awklib/eg/prog/tee.awk
new file mode 100644
index 00000000..895e4398
--- /dev/null
+++ b/awklib/eg/prog/tee.awk
@@ -0,0 +1,38 @@
+# tee.awk --- tee in awk
+# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain
+# May 1993
+# Revised December 1995
+
+BEGIN \
+{
+ for (i = 1; i < ARGC; i++)
+ copy[i] = ARGV[i]
+
+ if (ARGV[1] == "-a") {
+ append = 1
+ delete ARGV[1]
+ delete copy[1]
+ ARGC--
+ }
+ if (ARGC < 2) {
+ print "usage: tee [-a] file ..." > "/dev/stderr"
+ exit 1
+ }
+ ARGV[1] = "-"
+ ARGC = 2
+}
+{
+ # moving the if outside the loop makes it run faster
+ if (append)
+ for (i in copy)
+ print >> copy[i]
+ else
+ for (i in copy)
+ print > copy[i]
+ print
+}
+END \
+{
+ for (i in copy)
+ close(copy[i])
+}
diff --git a/awklib/eg/prog/translate.awk b/awklib/eg/prog/translate.awk
new file mode 100644
index 00000000..6e9aa5a5
--- /dev/null
+++ b/awklib/eg/prog/translate.awk
@@ -0,0 +1,46 @@
+# translate --- do tr like stuff
+# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain
+# August 1989
+
+# bugs: does not handle things like: tr A-Z a-z, it has
+# to be spelled out. However, if `to' is shorter than `from',
+# the last character in `to' is used for the rest of `from'.
+
+function stranslate(from, to, target, lf, lt, t_ar, i, c)
+{
+ lf = length(from)
+ lt = length(to)
+ for (i = 1; i <= lt; i++)
+ t_ar[substr(from, i, 1)] = substr(to, i, 1)
+ if (lt < lf)
+ for (; i <= lf; i++)
+ t_ar[substr(from, i, 1)] = substr(to, lt, 1)
+ for (i = 1; i <= lf; i++) {
+ c = substr(from, i, 1)
+ if (index(target, c) > 0)
+ gsub(c, t_ar[c], target)
+ }
+ return target
+}
+
+function translate(from, to)
+{
+ return $0 = stranslate(from, to, $0)
+}
+
+# main program
+BEGIN {
+ if (ARGC < 3) {
+ print "usage: translate from to" > "/dev/stderr"
+ exit
+ }
+ FROM = ARGV[1]
+ TO = ARGV[2]
+ ARGC = 2
+ ARGV[1] = "-"
+}
+
+{
+ translate(FROM, TO)
+ print
+}
diff --git a/awklib/eg/prog/uniq.awk b/awklib/eg/prog/uniq.awk
new file mode 100644
index 00000000..5f63ef0f
--- /dev/null
+++ b/awklib/eg/prog/uniq.awk
@@ -0,0 +1,116 @@
+# uniq.awk --- do uniq in awk
+# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+function usage( e)
+{
+ e = "Usage: uniq [-udc [-n]] [+n] [ in [ out ]]"
+ print e > "/dev/stderr"
+ exit 1
+}
+
+# -c count lines. overrides -d and -u
+# -d only repeated lines
+# -u only non-repeated lines
+# -n skip n fields
+# +n skip n characters, skip fields first
+
+BEGIN \
+{
+ count = 1
+ outputfile = "/dev/stdout"
+ opts = "udc0:1:2:3:4:5:6:7:8:9:"
+ while ((c = getopt(ARGC, ARGV, opts)) != -1) {
+ if (c == "u")
+ non_repeated_only++
+ else if (c == "d")
+ repeated_only++
+ else if (c == "c")
+ do_count++
+ else if (index("0123456789", c) != 0) {
+ # getopt requires args to options
+ # this messes us up for things like -5
+ if (Optarg ~ /^[0-9]+$/)
+ fcount = (c Optarg) + 0
+ else {
+ fcount = c + 0
+ Optind--
+ }
+ } else
+ usage()
+ }
+
+ if (ARGV[Optind] ~ /^\+[0-9]+$/) {
+ charcount = substr(ARGV[Optind], 2) + 0
+ Optind++
+ }
+
+ for (i = 1; i < Optind; i++)
+ ARGV[i] = ""
+
+ if (repeated_only == 0 && non_repeated_only == 0)
+ repeated_only = non_repeated_only = 1
+
+ if (ARGC - Optind == 2) {
+ outputfile = ARGV[ARGC - 1]
+ ARGV[ARGC - 1] = ""
+ }
+}
+function are_equal( n, m, clast, cline, alast, aline)
+{
+ if (fcount == 0 && charcount == 0)
+ return (last == $0)
+
+ if (fcount > 0) {
+ n = split(last, alast)
+ m = split($0, aline)
+ clast = join(alast, fcount+1, n)
+ cline = join(aline, fcount+1, m)
+ } else {
+ clast = last
+ cline = $0
+ }
+ if (charcount) {
+ clast = substr(clast, charcount + 1)
+ cline = substr(cline, charcount + 1)
+ }
+
+ return (clast == cline)
+}
+NR == 1 {
+ last = $0
+ next
+}
+
+{
+ equal = are_equal()
+
+ if (do_count) { # overrides -d and -u
+ if (equal)
+ count++
+ else {
+ printf("%4d %s\n", count, last) > outputfile
+ last = $0
+ count = 1 # reset
+ }
+ next
+ }
+
+ if (equal)
+ count++
+ else {
+ if ((repeated_only && count > 1) ||
+ (non_repeated_only && count == 1))
+ print last > outputfile
+ last = $0
+ count = 1
+ }
+}
+
+END {
+ if (do_count)
+ printf("%4d %s\n", count, last) > outputfile
+ else if ((repeated_only && count > 1) ||
+ (non_repeated_only && count == 1))
+ print last > outputfile
+}
diff --git a/awklib/eg/prog/wc.awk b/awklib/eg/prog/wc.awk
new file mode 100644
index 00000000..e9898159
--- /dev/null
+++ b/awklib/eg/prog/wc.awk
@@ -0,0 +1,68 @@
+# wc.awk --- count lines, words, characters
+# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+# Options:
+# -l only count lines
+# -w only count words
+# -c only count characters
+#
+# Default is to count lines, words, characters
+
+BEGIN {
+ # let getopt print a message about
+ # invalid options. we ignore them
+ while ((c = getopt(ARGC, ARGV, "lwc")) != -1) {
+ if (c == "l")
+ do_lines = 1
+ else if (c == "w")
+ do_words = 1
+ else if (c == "c")
+ do_chars = 1
+ }
+ for (i = 1; i < Optind; i++)
+ ARGV[i] = ""
+
+ # if no options, do all
+ if (! do_lines && ! do_words && ! do_chars)
+ do_lines = do_words = do_chars = 1
+
+ print_total = (ARC - i > 2)
+}
+function beginfile(file)
+{
+ chars = lines = words = 0
+ fname = FILENAME
+}
+
+function endfile(file)
+{
+ tchars += chars
+ tlines += lines
+ twords += words
+ if (do_lines)
+ printf "\t%d", lines
+ if (do_words)
+ printf "\t%d", words
+ if (do_chars)
+ printf "\t%d", chars
+ printf "\t%s\n", fname
+}
+# do per line
+{
+ chars += length($0) + 1 # get newline
+ lines++
+ words += NF
+}
+
+END {
+ if (print_total) {
+ if (do_lines)
+ printf "\t%d", tlines
+ if (do_words)
+ printf "\t%d", twords
+ if (do_chars)
+ printf "\t%d", tchars
+ print "\ttotal"
+ }
+}
diff --git a/awklib/eg/prog/wordfreq.awk b/awklib/eg/prog/wordfreq.awk
new file mode 100644
index 00000000..b67fed47
--- /dev/null
+++ b/awklib/eg/prog/wordfreq.awk
@@ -0,0 +1,13 @@
+# Print list of word frequencies
+{
+ $0 = tolower($0) # remove case distinctions
+ gsub(/[^a-z0-9_ \t]/, "", $0) # remove punctuation
+ for (i = 1; i <= NF; i++)
+ freq[$i]++
+}
+END {
+ sort = "sort +1 -nr"
+ for (word in freq)
+ printf "%s\t%d\n", word, freq[word] | sort
+ close(sort)
+}
diff --git a/awklib/extract.awk b/awklib/extract.awk
new file mode 100644
index 00000000..4f4648c4
--- /dev/null
+++ b/awklib/extract.awk
@@ -0,0 +1,87 @@
+# extract.awk --- extract files and run programs
+# from texinfo files
+# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+BEGIN { IGNORECASE = 1 }
+
+/^@c(omment)?[ \t]+system/ \
+{
+ if (NF < 3) {
+ e = (FILENAME ":" FNR)
+ e = (e ": badly formed `system' line")
+ print e > "/dev/stderr"
+ next
+ }
+ $1 = ""
+ $2 = ""
+ stat = system($0)
+ if (stat != 0) {
+ e = (FILENAME ":" FNR)
+ e = (e ": warning: system returned " stat)
+ print e > "/dev/stderr"
+ }
+}
+/^@c(omment)?[ \t]+file/ \
+{
+ if (NF != 3) {
+ e = (FILENAME ":" FNR ": badly formed `file' line")
+ print e > "/dev/stderr"
+ next
+ }
+ if ($3 != curfile) {
+ if (curfile != "")
+ close(curfile)
+ curfile = $3
+ }
+
+ for (;;) {
+ if ((getline line) <= 0)
+ unexpected_eof()
+ if (line ~ /^@c(omment)?[ \t]+endfile/)
+ break
+ else if (line ~ /^@(end[ \t]+)?group/)
+ continue
+ if (index(line, "@") == 0) {
+ print line > curfile
+ continue
+ }
+ n = split(line, a, "@")
+ # if a[1] == "", means leading @,
+ # don't add one back in.
+ for (i = 2; i <= n; i++) {
+ if (a[i] == "") { # was an @@
+ a[i] = "@"
+ if (a[i+1] == "")
+ i++
+ }
+ }
+ print join(a, 1, n, SUBSEP) > curfile
+ }
+}
+function unexpected_eof()
+{
+ printf("%s:%d: unexpected EOF or error\n", \
+ FILENAME, FNR) > "/dev/stderr"
+ exit 1
+}
+
+END {
+ if (curfile)
+ close(curfile)
+}
+# join.awk --- join an array into a string
+# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+function join(array, start, end, sep, result, i)
+{
+ if (sep == "")
+ sep = " "
+ else if (sep == SUBSEP) # magic value
+ sep = ""
+ result = array[start]
+ for (i = start + 1; i <= end; i++)
+ result = result sep array[i]
+ return result
+}
diff --git a/awklib/group.awk b/awklib/group.awk
new file mode 100644
index 00000000..a8103a04
--- /dev/null
+++ b/awklib/group.awk
@@ -0,0 +1,80 @@
+# group.awk --- functions for dealing with the group file
+# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+BEGIN \
+{
+ # Change to suit your system
+ _gr_awklib = "/usr/local/libexec/awk/"
+}
+function _gr_init( oldfs, oldrs, olddol0, grcat, n, a, i)
+{
+ if (_gr_inited)
+ return
+
+ oldfs = FS
+ oldrs = RS
+ olddol0 = $0
+ FS = ":"
+ RS = "\n"
+
+ grcat = _gr_awklib "grcat"
+ while ((grcat | getline) > 0) {
+ if ($1 in _gr_byname)
+ _gr_byname[$1] = _gr_byname[$1] "," $4
+ else
+ _gr_byname[$1] = $0
+ if ($3 in _gr_bygid)
+ _gr_bygid[$3] = _gr_bygid[$3] "," $4
+ else
+ _gr_bygid[$3] = $0
+
+ n = split($4, a, "[ \t]*,[ \t]*")
+ for (i = 1; i <= n; i++)
+ if (a[i] in _gr_groupsbyuser)
+ _gr_groupsbyuser[a[i]] = \
+ _gr_groupsbyuser[a[i]] " " $1
+ else
+ _gr_groupsbyuser[a[i]] = $1
+
+ _gr_bycount[++_gr_count] = $0
+ }
+ close(grcat)
+ _gr_count = 0
+ _gr_inited++
+ FS = oldfs
+ RS = oldrs
+ $0 = olddol0
+}
+function getgrnam(group)
+{
+ _gr_init()
+ if (group in _gr_byname)
+ return _gr_byname[group]
+ return ""
+}
+function getgrgid(gid)
+{
+ _gr_init()
+ if (gid in _gr_bygid)
+ return _gr_bygid[gid]
+ return ""
+}
+function getgruser(user)
+{
+ _gr_init()
+ if (user in _gr_groupsbyuser)
+ return _gr_groupsbyuser[user]
+ return ""
+}
+function getgrent()
+{
+ _gr_init()
+ if (++gr_count in _gr_bycount)
+ return _gr_bycount[_gr_count]
+ return ""
+}
+function endgrent()
+{
+ _gr_count = 0
+}
diff --git a/awklib/igawk.save b/awklib/igawk.save
new file mode 100755
index 00000000..87412aa8
--- /dev/null
+++ b/awklib/igawk.save
@@ -0,0 +1,120 @@
+#! /bin/sh
+
+# igawk --- like gawk but do @include processing
+# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain
+# July 1993
+
+if [ "$1" = debug ]
+then
+ set -x
+ shift
+else
+ # cleanup on exit, hangup, interrupt, quit, termination
+ trap 'rm -f /tmp/ig.[se].$$' 0 1 2 3 15
+fi
+
+while [ $# -ne 0 ] # loop over arguments
+do
+ case $1 in
+ --) shift; break;;
+
+ -W) shift
+ set -- -W"$@"
+ continue;;
+
+ -[vF]) opts="$opts $1 '$2'"
+ shift;;
+
+ -[vF]*) opts="$opts '$1'" ;;
+
+ -f) echo @include "$2" >> /tmp/ig.s.$$
+ shift;;
+
+ -f*) f=`echo "$1" | sed 's/-f//'`
+ echo @include "$f" >> /tmp/ig.s.$$ ;;
+
+ -?file=*) # -Wfile or --file
+ f=`echo "$1" | sed 's/-.file=//'`
+ echo @include "$f" >> /tmp/ig.s.$$ ;;
+
+ -?file) # get arg, $2
+ echo @include "$2" >> /tmp/ig.s.$$
+ shift;;
+
+ -?source=*) # -Wsource or --source
+ t=`echo "$1" | sed 's/-.source=//'`
+ echo "$t" >> /tmp/ig.s.$$ ;;
+
+ -?source) # get arg, $2
+ echo "$2" >> /tmp/ig.s.$$
+ shift;;
+
+ --*) opts="$opts '$1'" ;;
+
+ *) break;;
+ esac
+
+ shift
+done
+
+if [ ! -s /tmp/ig.s.$$ ]
+then
+ echo "$1" > /tmp/ig.s.$$
+ shift
+fi
+
+# at this point, /tmp/ig.s.$$ has the program
+gawk -- '
+# process @include directives
+
+function pathto(file, i, t, junk)
+{
+ if (index(file, "/") != 0)
+ return file
+
+ for (i = 1; i <= ndirs; i++) {
+ t = (pathlist[i] "/" file)
+ if ((getline junk < t) > 0) {
+ # found it
+ close(t)
+ return t
+ }
+ }
+ return ""
+}
+BEGIN {
+ path = ENVIRON["AWKPATH"]
+ ndirs = split(path, pathlist, ":")
+ for (i = 1; i <= ndirs; i++) {
+ if (pathlist[i] == "")
+ pathlist[i] = "."
+ }
+ stackptr = 0
+ input[stackptr] = ARGV[1] # ARGV[1] is first file
+
+ for (; stackptr >= 0; stackptr--) {
+ while ((getline < input[stackptr]) > 0) {
+ if (tolower($1) != "@include") {
+ print
+ continue
+ }
+ fpath = pathto($2)
+ if (fpath == "") {
+ printf("igawk:%s:%d: cannot find %s\n", \
+ input[stackptr], FNR, $2) > "/dev/stderr"
+ continue
+ }
+ if (! (fpath in processed)) {
+ processed[fpath] = input[stackptr]
+ input[++stackptr] = fpath
+ } else
+ print $2, "included in", input[stackptr], \
+ "already included in", \
+ processed[fpath] > "/dev/stderr"
+ }
+ close(input[stackptr])
+ }
+}' /tmp/ig.s.$$ > /tmp/ig.e.$$
+eval gawk -f /tmp/ig.e.$$ $opts -- "$@"
+
+exit $?
diff --git a/awklib/passwd.awk b/awklib/passwd.awk
new file mode 100644
index 00000000..7b64f60d
--- /dev/null
+++ b/awklib/passwd.awk
@@ -0,0 +1,56 @@
+# passwd.awk --- access password file information
+# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+BEGIN {
+ # tailor this to suit your system
+ _pw_awklib = "/usr/local/libexec/awk/"
+}
+
+function _pw_init( oldfs, oldrs, olddol0, pwcat)
+{
+ if (_pw_inited)
+ return
+ oldfs = FS
+ oldrs = RS
+ olddol0 = $0
+ FS = ":"
+ RS = "\n"
+ pwcat = _pw_awklib "pwcat"
+ while ((pwcat | getline) > 0) {
+ _pw_byname[$1] = $0
+ _pw_byuid[$3] = $0
+ _pw_bycount[++_pw_total] = $0
+ }
+ close(pwcat)
+ _pw_count = 0
+ _pw_inited = 1
+ FS = oldfs
+ RS = oldrs
+ $0 = olddol0
+}
+function getpwnam(name)
+{
+ _pw_init()
+ if (name in _pw_byname)
+ return _pw_byname[name]
+ return ""
+}
+function getpwuid(uid)
+{
+ _pw_init()
+ if (uid in _pw_byuid)
+ return _pw_byuid[uid]
+ return ""
+}
+function getpwent()
+{
+ _pw_init()
+ if (_pw_count < _pw_total)
+ return _pw_bycount[++_pw_count]
+ return ""
+}
+function endpwent()
+{
+ _pw_count = 0
+}
diff --git a/awklib/stamp-eg b/awklib/stamp-eg
new file mode 100644
index 00000000..241abd9e
--- /dev/null
+++ b/awklib/stamp-eg
@@ -0,0 +1,2 @@
+some makes are stupid and will not check a directory
+against a file, so this file is a place holder. gack.
diff --git a/awktab.c b/awktab.c
index 6198e850..9bda6804 100644
--- a/awktab.c
+++ b/awktab.c
@@ -1,5 +1,5 @@
-/* A Bison parser, made from awk.y with Bison version GNU Bison version 1.22
+/* A Bison parser, made from ./awk.y with Bison version GNU Bison version 1.22
*/
#define YYBISON 1 /* Identify Bison output. */
@@ -33,16 +33,17 @@
#define LEX_EXIT 284
#define LEX_FUNCTION 285
#define LEX_GETLINE 286
-#define LEX_IN 287
-#define LEX_AND 288
-#define LEX_OR 289
-#define INCREMENT 290
-#define DECREMENT 291
-#define LEX_BUILTIN 292
-#define LEX_LENGTH 293
-#define UNARY 294
-
-#line 26 "awk.y"
+#define LEX_NEXTFILE 287
+#define LEX_IN 288
+#define LEX_AND 289
+#define LEX_OR 290
+#define INCREMENT 291
+#define DECREMENT 292
+#define LEX_BUILTIN 293
+#define LEX_LENGTH 294
+#define UNARY 295
+
+#line 26 "./awk.y"
#ifdef DEBUG
#define YYDEBUG 12
@@ -50,7 +51,14 @@
#include "awk.h"
-static void yyerror (); /* va_alist */
+#define CAN_FREE TRUE
+#define DONT_FREE FALSE
+
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+static void yyerror(const char *m, ...) ;
+#else
+static void yyerror(); /* va_alist */
+#endif
static char *get_src_buf P((void));
static int yylex P((void));
static NODE *node_common P((NODETYPE op));
@@ -64,11 +72,17 @@ static void pop_params P((NODE *params));
static NODE *make_param P((char *name));
static NODE *mk_rexp P((NODE *exp));
static int dup_parms P((NODE *func));
+static void param_sanity P((NODE *arglist));
+static int isnoeffect P((NODETYPE));
+
+enum defref { FUNC_DEFINE, FUNC_USE };
+static void func_use P((char *name, enum defref how));
+static void check_funcs P((void));
static int want_assign; /* lexical scanning kludge */
static int want_regexp; /* lexical scanning kludge */
static int can_return; /* lexical scanning kludge */
-static int io_allowed = 1; /* lexical scanning kludge */
+static int io_allowed = TRUE; /* lexical scanning kludge */
static char *lexptr; /* pointer to next char during parsing */
static char *lexend;
static char *lexptr_begin; /* keep track of where we were for error msgs */
@@ -91,7 +105,7 @@ extern int errcount;
extern NODE *begin_block;
extern NODE *end_block;
-#line 75 "awk.y"
+#line 88 "./awk.y"
typedef union {
long lval;
AWKNUM fval;
@@ -127,26 +141,26 @@ typedef
-#define YYFINAL 310
+#define YYFINAL 312
#define YYFLAG -32768
-#define YYNTBASE 61
+#define YYNTBASE 62
-#define YYTRANSLATE(x) ((unsigned)(x) <= 294 ? yytranslate[x] : 106)
+#define YYTRANSLATE(x) ((unsigned)(x) <= 295 ? yytranslate[x] : 107)
static const char yytranslate[] = { 0,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 50, 2, 2, 53, 49, 2, 2, 54,
- 55, 47, 45, 41, 46, 2, 48, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 40, 60, 42,
- 2, 43, 39, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 51, 2, 2, 54, 50, 2, 2, 55,
+ 56, 48, 46, 42, 47, 2, 49, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 41, 61, 43,
+ 2, 44, 40, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 56, 2, 57, 52, 2, 2, 2, 2, 2, 2,
+ 57, 2, 58, 53, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 58, 44, 59, 2, 2, 2, 2, 2,
+ 2, 2, 59, 45, 60, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -163,7 +177,7 @@ static const char yytranslate[] = { 0,
6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
- 36, 37, 38, 51
+ 36, 37, 38, 39, 52
};
#if YYDEBUG != 0
@@ -173,132 +187,135 @@ static const short yyprhs[] = { 0,
50, 52, 53, 61, 66, 71, 73, 77, 78, 83,
89, 94, 96, 99, 101, 104, 106, 109, 112, 115,
119, 121, 128, 137, 146, 157, 167, 170, 173, 180,
- 185, 189, 193, 194, 199, 206, 210, 213, 215, 217,
- 224, 234, 236, 239, 240, 242, 243, 246, 247, 250,
- 253, 256, 257, 259, 261, 265, 267, 270, 274, 275,
- 277, 278, 280, 282, 286, 288, 291, 295, 299, 300,
- 302, 304, 308, 310, 313, 317, 321, 322, 327, 333,
- 338, 342, 346, 350, 354, 356, 359, 363, 367, 371,
- 375, 381, 383, 386, 387, 392, 396, 400, 404, 406,
- 409, 413, 417, 421, 427, 429, 432, 434, 438, 442,
- 446, 450, 454, 458, 461, 464, 467, 471, 476, 481,
- 483, 488, 490, 493, 496, 498, 500, 503, 506, 507,
- 509, 511, 516, 519, 522, 525, 527, 528, 530, 532
+ 185, 189, 192, 196, 197, 202, 209, 213, 216, 218,
+ 220, 227, 237, 239, 242, 243, 245, 246, 249, 250,
+ 253, 256, 259, 260, 262, 264, 268, 270, 273, 277,
+ 278, 280, 281, 283, 285, 289, 291, 294, 298, 302,
+ 303, 305, 307, 311, 313, 316, 320, 324, 325, 330,
+ 336, 341, 345, 349, 353, 357, 359, 362, 366, 370,
+ 374, 378, 384, 386, 389, 390, 395, 399, 403, 407,
+ 409, 412, 416, 420, 424, 430, 432, 435, 437, 441,
+ 445, 449, 453, 457, 461, 464, 467, 470, 474, 479,
+ 484, 486, 491, 493, 496, 499, 501, 503, 506, 509,
+ 510, 512, 514, 519, 522, 525, 528, 530, 531, 533,
+ 535
};
-static const short yyrhs[] = { 82,
- 62, 82, 0, 63, 0, 62, 63, 0, 1, 0,
- 62, 1, 0, 0, 0, 15, 64, 74, 0, 0,
- 16, 65, 74, 0, 15, 76, 0, 16, 76, 0,
- 71, 74, 0, 74, 0, 71, 76, 0, 68, 70,
- 0, 4, 0, 3, 0, 67, 0, 37, 0, 38,
- 0, 0, 30, 69, 66, 54, 85, 102, 82, 0,
- 100, 75, 101, 103, 0, 100, 101, 103, 82, 0,
- 92, 0, 92, 41, 92, 0, 0, 48, 73, 5,
- 48, 0, 100, 75, 101, 103, 82, 0, 100, 101,
- 103, 82, 0, 77, 0, 75, 77, 0, 1, 0,
- 75, 1, 0, 81, 0, 104, 82, 0, 104, 82,
- 0, 100, 101, 0, 100, 75, 101, 0, 80, 0,
- 21, 54, 92, 102, 82, 77, 0, 22, 82, 77,
- 21, 54, 92, 102, 82, 0, 23, 54, 4, 32,
- 4, 102, 82, 77, 0, 23, 54, 87, 104, 92,
- 104, 87, 102, 82, 77, 0, 23, 54, 87, 104,
- 104, 87, 102, 82, 77, 0, 24, 76, 0, 25,
- 76, 0, 79, 54, 91, 102, 84, 76, 0, 79,
- 88, 84, 76, 0, 28, 87, 76, 0, 29, 87,
- 76, 0, 0, 19, 78, 87, 76, 0, 20, 4,
- 56, 91, 57, 76, 0, 20, 4, 76, 0, 92,
- 76, 0, 26, 0, 27, 0, 17, 54, 92, 102,
- 82, 77, 0, 17, 54, 92, 102, 82, 77, 18,
- 82, 77, 0, 13, 0, 81, 13, 0, 0, 81,
- 0, 0, 42, 96, 0, 0, 43, 92, 0, 10,
- 92, 0, 44, 92, 0, 0, 86, 0, 4, 0,
- 86, 105, 4, 0, 1, 0, 86, 1, 0, 86,
- 105, 1, 0, 0, 92, 0, 0, 89, 0, 94,
- 0, 89, 105, 94, 0, 1, 0, 89, 1, 0,
- 89, 1, 94, 0, 89, 105, 1, 0, 0, 91,
- 0, 92, 0, 91, 105, 92, 0, 1, 0, 91,
- 1, 0, 91, 1, 92, 0, 91, 105, 1, 0,
- 0, 99, 11, 93, 92, 0, 54, 91, 102, 32,
- 4, 0, 92, 44, 31, 98, 0, 31, 98, 83,
- 0, 92, 33, 92, 0, 92, 34, 92, 0, 92,
- 12, 92, 0, 72, 0, 50, 72, 0, 92, 32,
- 4, 0, 92, 9, 92, 0, 92, 42, 92, 0,
- 92, 43, 92, 0, 92, 39, 92, 40, 92, 0,
- 96, 0, 92, 96, 0, 0, 99, 11, 95, 94,
- 0, 94, 33, 94, 0, 94, 34, 94, 0, 31,
- 98, 83, 0, 72, 0, 50, 72, 0, 94, 12,
- 94, 0, 94, 32, 4, 0, 94, 9, 94, 0,
- 94, 39, 94, 40, 94, 0, 96, 0, 94, 96,
- 0, 97, 0, 96, 52, 96, 0, 96, 47, 96,
- 0, 96, 48, 96, 0, 96, 49, 96, 0, 96,
- 45, 96, 0, 96, 46, 96, 0, 99, 35, 0,
- 99, 36, 0, 50, 96, 0, 54, 92, 102, 0,
- 37, 54, 90, 102, 0, 38, 54, 90, 102, 0,
- 38, 0, 3, 54, 90, 102, 0, 99, 0, 35,
- 99, 0, 36, 99, 0, 7, 0, 8, 0, 46,
- 96, 0, 45, 96, 0, 0, 99, 0, 4, 0,
- 4, 56, 91, 57, 0, 53, 97, 0, 58, 82,
- 0, 59, 82, 0, 55, 0, 0, 104, 0, 60,
- 0, 41, 82, 0
+static const short yyrhs[] = { 83,
+ 63, 83, 0, 64, 0, 63, 64, 0, 1, 0,
+ 63, 1, 0, 0, 0, 15, 65, 75, 0, 0,
+ 16, 66, 75, 0, 15, 77, 0, 16, 77, 0,
+ 72, 75, 0, 75, 0, 72, 77, 0, 69, 71,
+ 0, 4, 0, 3, 0, 68, 0, 38, 0, 39,
+ 0, 0, 30, 70, 67, 55, 86, 103, 83, 0,
+ 101, 76, 102, 104, 0, 101, 102, 104, 83, 0,
+ 93, 0, 93, 42, 93, 0, 0, 49, 74, 5,
+ 49, 0, 101, 76, 102, 104, 83, 0, 101, 102,
+ 104, 83, 0, 78, 0, 76, 78, 0, 1, 0,
+ 76, 1, 0, 82, 0, 105, 83, 0, 105, 83,
+ 0, 101, 102, 0, 101, 76, 102, 0, 81, 0,
+ 21, 55, 93, 103, 83, 78, 0, 22, 83, 78,
+ 21, 55, 93, 103, 83, 0, 23, 55, 4, 33,
+ 4, 103, 83, 78, 0, 23, 55, 88, 105, 93,
+ 105, 88, 103, 83, 78, 0, 23, 55, 88, 105,
+ 105, 88, 103, 83, 78, 0, 24, 77, 0, 25,
+ 77, 0, 80, 55, 92, 103, 85, 77, 0, 80,
+ 89, 85, 77, 0, 28, 88, 77, 0, 32, 77,
+ 0, 29, 88, 77, 0, 0, 19, 79, 88, 77,
+ 0, 20, 4, 57, 92, 58, 77, 0, 20, 4,
+ 77, 0, 93, 77, 0, 26, 0, 27, 0, 17,
+ 55, 93, 103, 83, 78, 0, 17, 55, 93, 103,
+ 83, 78, 18, 83, 78, 0, 13, 0, 82, 13,
+ 0, 0, 82, 0, 0, 43, 97, 0, 0, 44,
+ 93, 0, 10, 93, 0, 45, 93, 0, 0, 87,
+ 0, 4, 0, 87, 106, 4, 0, 1, 0, 87,
+ 1, 0, 87, 106, 1, 0, 0, 93, 0, 0,
+ 90, 0, 95, 0, 90, 106, 95, 0, 1, 0,
+ 90, 1, 0, 90, 1, 95, 0, 90, 106, 1,
+ 0, 0, 92, 0, 93, 0, 92, 106, 93, 0,
+ 1, 0, 92, 1, 0, 92, 1, 93, 0, 92,
+ 106, 1, 0, 0, 100, 11, 94, 93, 0, 55,
+ 92, 103, 33, 4, 0, 93, 45, 31, 99, 0,
+ 31, 99, 84, 0, 93, 34, 93, 0, 93, 35,
+ 93, 0, 93, 12, 93, 0, 73, 0, 51, 73,
+ 0, 93, 33, 4, 0, 93, 9, 93, 0, 93,
+ 43, 93, 0, 93, 44, 93, 0, 93, 40, 93,
+ 41, 93, 0, 97, 0, 93, 97, 0, 0, 100,
+ 11, 96, 95, 0, 95, 34, 95, 0, 95, 35,
+ 95, 0, 31, 99, 84, 0, 73, 0, 51, 73,
+ 0, 95, 12, 95, 0, 95, 33, 4, 0, 95,
+ 9, 95, 0, 95, 40, 95, 41, 95, 0, 97,
+ 0, 95, 97, 0, 98, 0, 97, 53, 97, 0,
+ 97, 48, 97, 0, 97, 49, 97, 0, 97, 50,
+ 97, 0, 97, 46, 97, 0, 97, 47, 97, 0,
+ 100, 36, 0, 100, 37, 0, 51, 97, 0, 55,
+ 93, 103, 0, 38, 55, 91, 103, 0, 39, 55,
+ 91, 103, 0, 39, 0, 3, 55, 91, 103, 0,
+ 100, 0, 36, 100, 0, 37, 100, 0, 7, 0,
+ 8, 0, 47, 97, 0, 46, 97, 0, 0, 100,
+ 0, 4, 0, 4, 57, 92, 58, 0, 54, 98,
+ 0, 59, 83, 0, 60, 83, 0, 56, 0, 0,
+ 105, 0, 61, 0, 42, 83, 0
};
#endif
#if YYDEBUG != 0
static const short yyrline[] = { 0,
- 136, 141, 149, 165, 166, 167, 171, 173, 187, 189,
- 203, 209, 215, 217, 219, 232, 241, 243, 245, 255,
- 256, 260, 264, 275, 280, 289, 291, 300, 302, 320,
- 322, 327, 329, 337, 339, 344, 345, 349, 351, 353,
- 355, 357, 359, 361, 366, 370, 375, 378, 381, 383,
- 403, 430, 432, 434, 436, 438, 452, 457, 459, 464,
- 469, 476, 478, 482, 483, 487, 489, 494, 496, 498,
- 500, 505, 507, 512, 514, 516, 518, 520, 526, 528,
- 533, 535, 540, 542, 548, 550, 552, 554, 559, 561,
- 566, 568, 574, 576, 578, 580, 585, 588, 593, 595,
- 600, 606, 608, 610, 616, 626, 634, 636, 642, 644,
- 646, 648, 650, 655, 658, 659, 661, 663, 669, 671,
- 673, 675, 677, 679, 681, 683, 688, 690, 692, 694,
- 696, 698, 700, 702, 704, 709, 711, 713, 716, 718,
- 726, 731, 732, 734, 736, 738, 741, 748, 757, 759,
- 764, 766, 774, 779, 783, 787, 791, 792, 796, 799
+ 149, 157, 165, 181, 182, 183, 187, 189, 203, 205,
+ 219, 225, 231, 233, 235, 248, 257, 259, 261, 271,
+ 272, 276, 280, 291, 296, 305, 307, 316, 318, 336,
+ 338, 343, 349, 357, 359, 364, 365, 369, 371, 373,
+ 375, 377, 379, 381, 387, 391, 396, 399, 402, 404,
+ 424, 463, 482, 484, 489, 491, 493, 507, 512, 514,
+ 519, 524, 531, 533, 537, 538, 542, 544, 549, 551,
+ 553, 555, 560, 562, 567, 569, 571, 573, 575, 581,
+ 583, 588, 590, 595, 597, 603, 605, 607, 609, 614,
+ 616, 621, 623, 629, 631, 633, 635, 640, 643, 648,
+ 650, 655, 661, 663, 665, 671, 681, 689, 691, 697,
+ 699, 701, 703, 705, 710, 713, 714, 716, 718, 724,
+ 726, 728, 730, 732, 734, 736, 738, 743, 745, 747,
+ 749, 751, 753, 755, 757, 759, 764, 766, 768, 771,
+ 773, 781, 788, 789, 791, 793, 795, 798, 806, 817,
+ 819, 824, 826, 834, 839, 843, 847, 851, 852, 856,
+ 859
};
static const char * const yytname[] = { "$","error","$illegal.","FUNC_CALL",
"NAME","REGEXP","ERROR","YNUMBER","YSTRING","RELOP","APPEND_OP","ASSIGNOP","MATCHOP",
"NEWLINE","CONCAT_OP","LEX_BEGIN","LEX_END","LEX_IF","LEX_ELSE","LEX_RETURN",
"LEX_DELETE","LEX_WHILE","LEX_DO","LEX_FOR","LEX_BREAK","LEX_CONTINUE","LEX_PRINT",
-"LEX_PRINTF","LEX_NEXT","LEX_EXIT","LEX_FUNCTION","LEX_GETLINE","LEX_IN","LEX_AND",
-"LEX_OR","INCREMENT","DECREMENT","LEX_BUILTIN","LEX_LENGTH","'?'","':'","','",
-"'<'","'>'","'|'","'+'","'-'","'*'","'/'","'%'","'!'","UNARY","'^'","'$'","'('",
-"')'","'['","']'","'{'","'}'","';'","start","program","rule","@1","@2","func_name",
-"lex_builtin","function_prologue","@3","function_body","pattern","regexp","@4",
-"action","statements","statement_term","statement","@5","print","if_statement",
-"nls","opt_nls","input_redir","output_redir","opt_param_list","param_list","opt_exp",
-"opt_rexpression_list","rexpression_list","opt_expression_list","expression_list",
-"exp","@6","rexp","@7","simp_exp","non_post_simp_exp","opt_variable","variable",
-"l_brace","r_brace","r_paren","opt_semi","semi","comma",""
+"LEX_PRINTF","LEX_NEXT","LEX_EXIT","LEX_FUNCTION","LEX_GETLINE","LEX_NEXTFILE",
+"LEX_IN","LEX_AND","LEX_OR","INCREMENT","DECREMENT","LEX_BUILTIN","LEX_LENGTH",
+"'?'","':'","','","'<'","'>'","'|'","'+'","'-'","'*'","'/'","'%'","'!'","UNARY",
+"'^'","'$'","'('","')'","'['","']'","'{'","'}'","';'","start","program","rule",
+"@1","@2","func_name","lex_builtin","function_prologue","@3","function_body",
+"pattern","regexp","@4","action","statements","statement_term","statement","@5",
+"print","if_statement","nls","opt_nls","input_redir","output_redir","opt_param_list",
+"param_list","opt_exp","opt_rexpression_list","rexpression_list","opt_expression_list",
+"expression_list","exp","@6","rexp","@7","simp_exp","non_post_simp_exp","opt_variable",
+"variable","l_brace","r_brace","r_paren","opt_semi","semi","comma",""
};
#endif
static const short yyr1[] = { 0,
- 61, 62, 62, 62, 62, 62, 64, 63, 65, 63,
- 63, 63, 63, 63, 63, 63, 66, 66, 66, 67,
- 67, 69, 68, 70, 70, 71, 71, 73, 72, 74,
- 74, 75, 75, 75, 75, 76, 76, 77, 77, 77,
- 77, 77, 77, 77, 77, 77, 77, 77, 77, 77,
- 77, 77, 78, 77, 77, 77, 77, 79, 79, 80,
- 80, 81, 81, 82, 82, 83, 83, 84, 84, 84,
- 84, 85, 85, 86, 86, 86, 86, 86, 87, 87,
- 88, 88, 89, 89, 89, 89, 89, 89, 90, 90,
- 91, 91, 91, 91, 91, 91, 93, 92, 92, 92,
- 92, 92, 92, 92, 92, 92, 92, 92, 92, 92,
- 92, 92, 92, 95, 94, 94, 94, 94, 94, 94,
- 94, 94, 94, 94, 94, 94, 96, 96, 96, 96,
- 96, 96, 96, 96, 96, 97, 97, 97, 97, 97,
- 97, 97, 97, 97, 97, 97, 97, 97, 98, 98,
- 99, 99, 99, 100, 101, 102, 103, 103, 104, 105
+ 62, 63, 63, 63, 63, 63, 65, 64, 66, 64,
+ 64, 64, 64, 64, 64, 64, 67, 67, 67, 68,
+ 68, 70, 69, 71, 71, 72, 72, 74, 73, 75,
+ 75, 76, 76, 76, 76, 77, 77, 78, 78, 78,
+ 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+ 78, 78, 78, 79, 78, 78, 78, 78, 80, 80,
+ 81, 81, 82, 82, 83, 83, 84, 84, 85, 85,
+ 85, 85, 86, 86, 87, 87, 87, 87, 87, 88,
+ 88, 89, 89, 90, 90, 90, 90, 90, 90, 91,
+ 91, 92, 92, 92, 92, 92, 92, 94, 93, 93,
+ 93, 93, 93, 93, 93, 93, 93, 93, 93, 93,
+ 93, 93, 93, 93, 96, 95, 95, 95, 95, 95,
+ 95, 95, 95, 95, 95, 95, 95, 97, 97, 97,
+ 97, 97, 97, 97, 97, 97, 98, 98, 98, 98,
+ 98, 98, 98, 98, 98, 98, 98, 98, 98, 99,
+ 99, 100, 100, 100, 101, 102, 103, 104, 104, 105,
+ 106
};
static const short yyr2[] = { 0,
@@ -307,471 +324,472 @@ static const short yyr2[] = { 0,
1, 0, 7, 4, 4, 1, 3, 0, 4, 5,
4, 1, 2, 1, 2, 1, 2, 2, 2, 3,
1, 6, 8, 8, 10, 9, 2, 2, 6, 4,
- 3, 3, 0, 4, 6, 3, 2, 1, 1, 6,
- 9, 1, 2, 0, 1, 0, 2, 0, 2, 2,
- 2, 0, 1, 1, 3, 1, 2, 3, 0, 1,
- 0, 1, 1, 3, 1, 2, 3, 3, 0, 1,
- 1, 3, 1, 2, 3, 3, 0, 4, 5, 4,
- 3, 3, 3, 3, 1, 2, 3, 3, 3, 3,
- 5, 1, 2, 0, 4, 3, 3, 3, 1, 2,
- 3, 3, 3, 5, 1, 2, 1, 3, 3, 3,
- 3, 3, 3, 2, 2, 2, 3, 4, 4, 1,
- 4, 1, 2, 2, 1, 1, 2, 2, 0, 1,
- 1, 4, 2, 2, 2, 1, 0, 1, 1, 2
+ 3, 2, 3, 0, 4, 6, 3, 2, 1, 1,
+ 6, 9, 1, 2, 0, 1, 0, 2, 0, 2,
+ 2, 2, 0, 1, 1, 3, 1, 2, 3, 0,
+ 1, 0, 1, 1, 3, 1, 2, 3, 3, 0,
+ 1, 1, 3, 1, 2, 3, 3, 0, 4, 5,
+ 4, 3, 3, 3, 3, 1, 2, 3, 3, 3,
+ 3, 5, 1, 2, 0, 4, 3, 3, 3, 1,
+ 2, 3, 3, 3, 5, 1, 2, 1, 3, 3,
+ 3, 3, 3, 3, 2, 2, 2, 3, 4, 4,
+ 1, 4, 1, 2, 2, 1, 1, 2, 2, 0,
+ 1, 1, 4, 2, 2, 2, 1, 0, 1, 1,
+ 2
};
-static const short yydefact[] = { 64,
- 62, 65, 0, 63, 4, 0, 151, 145, 146, 7,
- 9, 22, 149, 0, 0, 0, 140, 0, 0, 28,
- 0, 0, 0, 64, 0, 2, 0, 0, 105, 14,
- 26, 112, 127, 142, 0, 0, 0, 159, 0, 11,
- 36, 64, 0, 12, 0, 66, 150, 143, 144, 0,
- 0, 0, 0, 148, 142, 147, 0, 106, 136, 153,
- 142, 93, 0, 91, 154, 5, 3, 1, 16, 0,
+static const short yydefact[] = { 65,
+ 63, 66, 0, 64, 4, 0, 152, 146, 147, 7,
+ 9, 22, 150, 0, 0, 0, 141, 0, 0, 28,
+ 0, 0, 0, 65, 0, 2, 0, 0, 106, 14,
+ 26, 113, 128, 143, 0, 0, 0, 160, 0, 11,
+ 36, 65, 0, 12, 0, 67, 151, 144, 145, 0,
+ 0, 0, 0, 149, 143, 148, 0, 107, 137, 154,
+ 143, 94, 0, 92, 155, 5, 3, 1, 16, 0,
13, 15, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 113, 0, 0, 0, 0, 0, 0, 97,
- 134, 135, 34, 0, 53, 0, 0, 64, 0, 0,
- 0, 58, 59, 79, 79, 64, 0, 32, 0, 41,
- 0, 0, 157, 64, 0, 0, 91, 0, 8, 37,
- 10, 18, 17, 20, 21, 0, 19, 0, 101, 0,
- 0, 0, 0, 94, 64, 156, 0, 0, 137, 0,
- 157, 108, 104, 107, 102, 103, 0, 27, 109, 110,
- 149, 132, 133, 129, 130, 131, 128, 0, 0, 79,
- 0, 0, 0, 79, 47, 48, 0, 80, 0, 155,
- 35, 33, 157, 85, 149, 0, 0, 119, 68, 0,
- 83, 125, 142, 57, 0, 39, 64, 158, 38, 141,
- 152, 0, 67, 138, 139, 29, 95, 160, 0, 96,
- 92, 157, 64, 0, 100, 98, 0, 0, 0, 56,
- 0, 0, 151, 0, 51, 52, 64, 66, 120, 0,
- 0, 0, 0, 0, 86, 0, 0, 0, 0, 0,
- 0, 0, 126, 114, 40, 31, 76, 74, 0, 0,
- 99, 24, 25, 111, 64, 54, 0, 64, 0, 0,
- 0, 30, 118, 68, 70, 69, 71, 50, 87, 88,
- 84, 123, 121, 122, 116, 117, 0, 0, 64, 77,
- 0, 0, 0, 0, 0, 0, 0, 79, 0, 0,
- 115, 23, 78, 75, 60, 55, 42, 0, 64, 79,
- 0, 49, 124, 64, 64, 0, 0, 64, 0, 43,
- 44, 64, 0, 61, 0, 46, 45, 0, 0, 0
+ 0, 0, 114, 0, 0, 0, 0, 0, 0, 98,
+ 135, 136, 34, 0, 54, 0, 0, 65, 0, 0,
+ 0, 59, 60, 80, 80, 0, 65, 0, 32, 0,
+ 41, 0, 0, 158, 65, 0, 0, 92, 0, 8,
+ 37, 10, 18, 17, 20, 21, 0, 19, 0, 102,
+ 0, 0, 0, 0, 95, 65, 157, 0, 0, 138,
+ 0, 158, 109, 105, 108, 103, 104, 0, 27, 110,
+ 111, 150, 133, 134, 130, 131, 132, 129, 0, 0,
+ 80, 0, 0, 0, 80, 47, 48, 0, 81, 0,
+ 52, 156, 35, 33, 158, 86, 150, 0, 0, 120,
+ 69, 0, 84, 126, 143, 58, 0, 39, 65, 159,
+ 38, 142, 153, 0, 68, 139, 140, 29, 96, 161,
+ 0, 97, 93, 158, 65, 0, 101, 99, 0, 0,
+ 0, 57, 0, 0, 152, 0, 51, 53, 65, 67,
+ 121, 0, 0, 0, 0, 0, 87, 0, 0, 0,
+ 0, 0, 0, 0, 127, 115, 40, 31, 77, 75,
+ 0, 0, 100, 24, 25, 112, 65, 55, 0, 65,
+ 0, 0, 0, 30, 119, 69, 71, 70, 72, 50,
+ 88, 89, 85, 124, 122, 123, 117, 118, 0, 0,
+ 65, 78, 0, 0, 0, 0, 0, 0, 0, 80,
+ 0, 0, 116, 23, 79, 76, 61, 56, 42, 0,
+ 65, 80, 0, 49, 125, 65, 65, 0, 0, 65,
+ 0, 43, 44, 65, 0, 62, 0, 46, 45, 0,
+ 0, 0
};
-static const short yydefgoto[] = { 308,
- 25, 26, 39, 43, 126, 127, 27, 45, 69, 28,
- 29, 57, 30, 107, 40, 108, 160, 109, 110, 2,
- 3, 129, 224, 239, 240, 167, 179, 180, 115, 116,
- 111, 158, 181, 268, 32, 33, 46, 34, 112, 113,
- 139, 187, 42, 138
+static const short yydefgoto[] = { 310,
+ 25, 26, 39, 43, 127, 128, 27, 45, 69, 28,
+ 29, 57, 30, 108, 40, 109, 161, 110, 111, 2,
+ 3, 130, 226, 241, 242, 168, 181, 182, 116, 117,
+ 112, 159, 183, 270, 32, 33, 46, 34, 113, 114,
+ 140, 189, 42, 139
};
-static const short yypact[] = { 2,
--32768, 15, 842,-32768,-32768, -44, -27,-32768,-32768, 6,
- 6,-32768, 3, 3, 3, -15, -9, 1684, 1684,-32768,
- 1678, 1684, 1093, 2, 898,-32768, 32, 46,-32768,-32768,
- 1262, 197,-32768, 49, 665, 1072, 1093,-32768, 32,-32768,
- 15, 2, 32,-32768, 96, 7,-32768,-32768,-32768, 1072,
- 1072, 1684, 1474, -21, -11, -21, 86,-32768, -21,-32768,
--32768,-32768, 0, 1209,-32768,-32768,-32768,-32768,-32768, 665,
--32768,-32768, 1474, 1474, 89, 1474, 1474, 1474, 1474, 1474,
- 1474, 63, 197, 1684, 1684, 1684, 1684, 1684, 1684,-32768,
--32768,-32768,-32768, 55,-32768, 99, 56, 2, 66, 6,
- 6,-32768,-32768, 1474, 1474, 2, 756,-32768, 816,-32768,
- 518, 665, 61, 2, 68, 41, 1358, 11,-32768,-32768,
--32768,-32768,-32768,-32768,-32768, 71,-32768, 1684,-32768, 68,
- 68, 1209, 79, 1474, 2,-32768, 98, 1141,-32768, 756,
- 61, 1745, 1732,-32768, 1494, 1406, 1310, 1358, 1745, 1745,
- 3, 90, 90, -21, -21, -21, -21, 1474, 1474, 1474,
- 59, 1474, 946, 1602,-32768,-32768, 6, 1358, 6,-32768,
--32768,-32768, 61,-32768, 3, 1678, 1093,-32768, 44, 85,
- 1542, 197, 111,-32768, 756,-32768, 2,-32768,-32768,-32768,
--32768, 26, 197,-32768,-32768,-32768, 1358,-32768, 128,-32768,
- 1358, 61, 2, 1474,-32768, 1358, 1209, 6, 1093,-32768,
- 1209, 119, -24, 61,-32768,-32768, 2, 7,-32768, 0,
- 1474, 1474, 1474, 6, 1658, 1161, 1658, 1658, 137, 1658,
- 1658, 1658, 197,-32768,-32768,-32768,-32768,-32768, 68, 42,
--32768,-32768,-32768, 1358, 2,-32768, 12, 2, 94, 139,
- 1024,-32768,-32768, 44, 1358, 1358, 1358,-32768, 1542,-32768,
- 1542, 415, 1222,-32768, 1622, 1582, 1454, 1658, 2,-32768,
- 22, 946, 6, 946, 1474, 68, 1004, 1474, 6, 1658,
- 1542,-32768,-32768,-32768, 131,-32768,-32768, 1209, 2, 1474,
- 68,-32768, 1542, 2, 2, 946, 68, 2, 946,-32768,
--32768, 2, 946,-32768, 946,-32768,-32768, 151, 153,-32768
+static const short yypact[] = { -6,
+-32768, 0, 875,-32768,-32768, -40, -38,-32768,-32768, -7,
+ -7,-32768, 10, 10, 10, -31, -26, 1735, 1735,-32768,
+ 1715, 1735, 1131, -6, 932,-32768, -24, 72,-32768,-32768,
+ 1304, 205,-32768, 5, 709, 1110, 1131,-32768, -24,-32768,
+ 0, -6, -24,-32768, 85, 3,-32768,-32768,-32768, 1110,
+ 1110, 1735, 1620, 8, 106, 8, 81,-32768, 8,-32768,
+-32768,-32768, 37, 1250,-32768,-32768,-32768,-32768,-32768, 709,
+-32768,-32768, 1620, 1620, 90, 1620, 1620, 1620, 1620, 1620,
+ 1620, 65, 205, 1735, 1735, 1735, 1735, 1735, 1735,-32768,
+-32768,-32768,-32768, 50,-32768, 111, 70, -6, 93, -7,
+ -7,-32768,-32768, 1620, 1620, -7, -6, 758,-32768, 819,
+-32768, 1040, 709, 100, -6, 99, 55, 1402, 9,-32768,
+-32768,-32768,-32768,-32768,-32768,-32768, 109,-32768, 1735,-32768,
+ 99, 99, 1250, 119, 1620, -6,-32768, 133, 1180,-32768,
+ 758, 100, 1327, 794,-32768, 1515, 1451, 1353, 1402, 1327,
+ 1327, 10, 125, 125, 8, 8, 8, 8, 1620, 1620,
+ 1620, 42, 1620, 981, 1657,-32768,-32768, -7, 1402, -7,
+-32768,-32768,-32768,-32768, 100,-32768, 10, 1715, 1131,-32768,
+ 96, 39, 1538, 205, 117,-32768, 758,-32768, -6,-32768,
+-32768,-32768,-32768, 7, 205,-32768,-32768,-32768, 1402,-32768,
+ 166,-32768, 1402, 100, -6, 1620,-32768, 1402, 1250, -7,
+ 1131,-32768, 1250, 151, -12, 100,-32768,-32768, -6, 3,
+-32768, 37, 1620, 1620, 1620, -7, 1678, 1201, 1678, 1678,
+ 181, 1678, 1678, 1678, 205,-32768,-32768,-32768,-32768,-32768,
+ 99, 56,-32768,-32768,-32768, 1402, -6,-32768, 11, -6,
+ 131, 183, 1061,-32768,-32768, 96, 1402, 1402, 1402,-32768,
+ 1538,-32768, 1538, 635, 83,-32768, 1599, 1579, 1474, 1678,
+ -6,-32768, 103, 981, -7, 981, 1620, 99, 623, 1620,
+ -7, 1678, 1538,-32768,-32768,-32768, 170,-32768,-32768, 1250,
+ -6, 1620, 99,-32768, 1538, -6, -6, 981, 99, -6,
+ 981,-32768,-32768, -6, 981,-32768, 981,-32768,-32768, 190,
+ 191,-32768
};
static const short yypgoto[] = {-32768,
--32768, 125,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,
- 84,-32768, -22, -54, 334, -96,-32768,-32768,-32768, 202,
- 38, -58, -92,-32768,-32768, -103,-32768,-32768, -14, -19,
- -3,-32768, -114,-32768, 368, 145, -137, 150, 183, -61,
- -23, -138, 469, -175
+-32768, 167,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768,
+ 211,-32768, 107, -53, 315, -105,-32768,-32768,-32768, 199,
+ 97, -22, -62,-32768,-32768, -103,-32768,-32768, 94, -14,
+ -3,-32768, -202,-32768, 318, 177, -134, 95, 124, -69,
+ 407, -138, 420, -177
};
-#define YYLAST 1799
+#define YYLAST 1790
static const short yytable[] = { 31,
- 134, 169, 203, 63, 226, 71, 7, 250, 141, 36,
- 172, 134, 134, 205, 1, 140, 119, 118, 1, 64,
- 121, 31, 283, 91, 92, 284, 237, 4, 37, 238,
- 89, 37, 117, 117, 217, 130, 131, 218, 50, 137,
- 135, 134, 270, 172, 51, 173, 117, 117, 128, 132,
- 186, 135, 135, 221, 136, 22, 208, 185, 1, 90,
- 214, 65, 68, 242, 271, 38, 212, 191, 273, 142,
- 143, 1, 145, 146, 147, 148, 149, 150, 202, 120,
- -72, 135, 135, 91, 92, 225, 222, 223, 172, 24,
- 133, 190, 144, 151, -82, -90, -73, -82, 122, 123,
- 168, 168, 161, 24, 58, 38, 194, 195, 159, 162,
- 259, 261, 262, 263, 209, 265, 266, 267, 38, 164,
- 38, 234, 136, 235, 192, 135, 196, -82, -82, 199,
- 197, 241, 124, 125, 201, 163, 86, 87, 88, 249,
- 264, 89, 276, 170, -82, 91, 92, 275, 294, 67,
- 309, 189, 310, 281, 206, 207, 168, 220, 211, 253,
- 168, 279, 47, 48, 49, 293, 60, 55, 55, 0,
- 55, 61, 198, 64, 291, 285, 0, 287, 0, 0,
- 55, 0, 0, 245, 0, 35, 297, 248, 0, 247,
- 0, 0, 178, 0, 0, 0, 254, 0, 0, 301,
- 244, 55, 304, 0, 0, 117, 306, 35, 307, 70,
- 35, 41, 41, 55, 0, 269, 0, 255, 256, 257,
- 0, 35, 0, 0, 236, 35, 0, 0, 0, 41,
- 0, 0, 0, 55, 55, 55, 55, 55, 55, 0,
- 243, 84, 85, 86, 87, 88, 0, 277, 89, 0,
- 0, 0, 289, 0, 252, 0, 0, 0, 183, 219,
- 55, 0, 0, 0, 295, 0, 55, 298, 0, 0,
- 0, 288, 0, 302, 168, 0, 0, 55, 0, 0,
- 0, 55, 272, 0, 0, 274, 168, 0, 0, 0,
- 0, 55, 55, 0, 55, 55, 55, 55, 55, 55,
- 47, 41, 41, 0, 0, 0, 282, 0, 178, 178,
- 178, 178, 41, 178, 178, 178, 0, 55, 0, 0,
- 0, 0, 0, 0, 47, 55, 296, 0, 0, 0,
- 55, 299, 300, 0, 0, 303, 0, 0, 0, 305,
- 0, 0, 0, 0, 44, 0, 55, 0, 0, 0,
- 55, 178, 0, 0, 0, 55, 55, 0, 0, 0,
- 55, 72, 41, 178, 0, 0, 0, 0, 41, 0,
- 41, 0, 0, 0, 183, 183, 183, 183, 0, 183,
- 183, 183, 0, 0, 0, 54, 56, 0, 59, 0,
- 0, 0, 0, 55, 0, 0, 0, 0, 83, 0,
- 0, 0, 0, 0, 55, 55, 55, 0, 55, 41,
- 55, 55, 55, 0, 55, 55, 55, 183, 7, 59,
- 0, 8, 9,-32768, 0, 41, 55, 0, 0, 183,
- 55, 83, 0, 165, 166, 0, 0, 55, 0, 0,
- 0, 0, 55, 0, 184, 0, 0, 0, 0, 14,
- 15, 152, 153, 154, 155, 156, 157, 0, 0, 18,
- 19, 0, 0, 0, 52, 0, 0, 22, 53, 0,
- 0, 0, 0, 0, 41, 0, 182, 0, 83, 0,
- 41, 0, 0, 0, 83, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 210, 193, 0, 0, 0, 83,
- 215, 0, 216, 114, 0, 0, 0, 0, 0, 83,
- 83, 0, 83, 83, 83, 83, 83, 83, 0, 0,
- 6, 7, 0, 0, 8, 9, 73, 0, 0, 74,
- 1, 0, 0, 0, 0, 83, 0, 0, 114, 0,
- 0, 246, 0, 59, 0, 0, 0, 0, 233, 75,
- 76, 77, 14, 15, 16, 17, 78, 258, 0, 80,
- 81, 82, 18, 19, 83, 0, 0, 52, 83, 0,
- 22, 53, 0, 83, 83, 114, 0, 38, 83, 0,
- 114, 188, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 182, 182, 182, 182, 0, 182, 182, 182,
- 0, 0, 0, 0, 0, 0, 286, 0, 114, 188,
- 0, 83, 292, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 83, 83, 83, 0, 233, 0, 233, 233,
- 233, 114, 233, 233, 233, 182, 0, 0, 0, 0,
- 0, 188, 0, 0, 83, 0, 0, 182, 233, 0,
- 0, 0, 0, 114, 0, 83, 0, 0, 0, 0,
- 233, 0, 0, 0, 0, 93, 0, 6, 7, 0,
- 188, 8, 9, 0, 0, 0, 0, 0, 0, 0,
- 0, 94, 251, 95, 96, 97, 98, 99, 100, 101,
- 102, 103, 104, 105, 0, 13, 0, 0, 0, 14,
- 15, 16, 17, 0, 0, 0, 0, 0, 0, 18,
- 19, 0, 20, 0, 21, 0, 0, 22, 23, 278,
- 0, 0, 24, 106, 38, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 114, 0, 114, 0, 0, 290, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 171, 0, 6, 7,
- 0, 0, 8, 9, 114, 0, 0, 114, 0, 0,
- 0, 114, 94, 114, 95, 96, 97, 98, 99, 100,
- 101, 102, 103, 104, 105, 0, 13, 0, 0, 0,
+ 142, 170, 174, 205, 228, 1, 1, 239, 63, 135,
+ 240, 135, 4, 7, 36, 90, 141, 207, 37, 64,
+ 252, 31, 119, 50, 261, 263, 264, 265, 51, 267,
+ 268, 269, 118, 118, 24, 174, 219, 135, 175, 227,
+ 91, 92, 220, 188, 37, 129, 118, 118, -83, 133,
+ 136, -83, 136, 38, 1, 135, 272, 210, 214, 187,
+ 89, 216, -73, 22, 273, 244, 193, 283, 275, 143,
+ 144, 204, 146, 147, 148, 149, 150, 151, 136, 295,
+ 136, 174, -83, -83, 1, 134, 7, 123, 124, 8,
+ 9, 229, 137, 145,-32768, 152, 136, 136, 211, -83,
+ 169, 169, 38, 285, 160, 223, 286, 47, 48, 49,
+ -91, -74, 55, 55, 162, 55, 61, 237, 14, 15,
+ 65, 68, 125, 126, 163, 55, 35, 236, 18, 19,
+ 24, 199, 38, 52, 71, 203, 22, 53, 121, 224,
+ 225, 91, 92, 131, 132, 120, 55, 165, 35, 122,
+ 70, 35, 91, 92, 137, 208, 209, 169, 55, 213,
+ 38, 169, 35, 194, 222, 201, 35, 198, 287, 243,
+ 289, 251, 86, 87, 88, 64, 293, 89, 55, 55,
+ 55, 55, 55, 55, 266, 277, 278, 296, 299, 311,
+ 312, 67, 303, 281, 164, 306, 249, 255, 60, 308,
+ 0, 309, 246, 172, 185, 0, 55, 118, 41, 41,
+ 0, 191, 55, 0, 0, 0, 0, 0, 0, 257,
+ 258, 259, 0, 55, 0, 0, 41, 55, 0, 0,
+ 0, 58, 200, 0, 0, 0, 0, 55, 55, 0,
+ 55, 55, 55, 55, 55, 55, 47, 0, 0, 279,
+ 84, 85, 86, 87, 88, 0, 0, 89, 0, 0,
+ 0, 0, 0, 55, 0, 0, 0, 0, 0, 0,
+ 0, 47, 55, 290, 0, 0, 169, 55, 0, 0,
+ 0, 0, 0, 0, 0, 238, 0, 0, 169, 0,
+ 0, 0, 0, 55, 0, 0, 0, 55, 41, 41,
+ 0, 245, 55, 55, 41, 0, 0, 55, 0, 0,
+ 41, 0, 0, 0, 0, 254, 0, 0, 0, 0,
+ 180, 185, 185, 185, 185, 44, 185, 185, 185, 0,
+ 0, 0, 0, 0, 0, 54, 56, 0, 59, 0,
+ 55, 0, 72, 274, 0, 0, 276, 0, 83, 0,
+ 0, 55, 55, 55, 0, 55, 0, 55, 55, 55,
+ 41, 55, 55, 55, 185, 0, 41, 284, 41, 59,
+ 0, 0, 0, 55, 0, 0, 185, 55, 0, 0,
+ 0, 83, 0, 0, 55, 0, 0, 298, 221, 55,
+ 0, 0, 301, 302, 0, 0, 305, 0, 0, 0,
+ 307, 153, 154, 155, 156, 157, 158, 0, 41, 0,
+ 0, 0, 0, 0, 166, 167, 0, 0, 0, 0,
+ 171, 0, 0, 0, 41, 0, 186, 184, 0, 83,
+ 0, 0, 0, 0, 0, 83, 0, 180, 180, 180,
+ 180, 0, 180, 180, 180, 0, 195, 0, 0, 0,
+ 83, 0, 0, 0, 115, 0, 0, 0, 0, 0,
+ 83, 83, 0, 83, 83, 83, 83, 83, 83, 138,
+ 0, 0, 0, 41, 0, 0, 212, 0, 0, 41,
+ 180, 0, 217, 0, 218, 0, 83, 0, 0, 115,
+ 0, 0, 180, 0, 0, 59, 0, 0, 0, 0,
+ 235, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 83, 0, 0, 0,
+ 83, 0, 192, 0, 248, 83, 83, 115, 0, 0,
+ 83, 0, 115, 190, 0, 0, 0, 196, 197, 0,
+ 260, 0, 0, 0, 184, 184, 184, 184, 0, 184,
+ 184, 184, 0, 0, 0, 0, 0, 0, 0, 0,
+ 115, 190, 0, 83, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 83, 83, 83, 0, 235, 0,
+ 235, 235, 235, 115, 235, 235, 235, 184, 0, 288,
+ 0, 0, 0, 0, 190, 294, 83, 0, 0, 184,
+ 235, 0, 0, 0, 0, 0, 115, 83, 0, 0,
+ 0, 0, 235, 0, 0, 247, 0, 0, 0, 250,
+ 0, 0, 0, 190, 0, 6, 7, 0, 256, 8,
+ 9, 73, 0, 0, 74, 253, 0, 0, 7, 0,
+ 0, 8, 9,-32768, 0, 0, 0, 271, 0, 0,
+ 0, 0, 0, 0, 0, 75, 76, 77, 14, 15,
+ 16, 17, 78, 0, 0, 80, 81, 82, 18, 19,
+ 14, 15, 280, 52, 0, 0, 22, 53, 0, 0,
+ 18, 19, 0, 38, 291, 52, 0, 0, 22, 53,
+ 0, 0, 0, 115, 0, 115, 297, 0, 292, 300,
+ 0, 0, 0, 0, 0, 304, 0, 0, 0, 93,
+ 0, 6, 7, 0, 0, 8, 9, 115, 0, 0,
+ 115, 0, 0, 0, 115, 94, 115, 95, 96, 97,
+ 98, 99, 100, 101, 102, 103, 104, 105, 0, 13,
+ 106, 0, 0, 0, 14, 15, 16, 17, 0, 0,
+ 0, 0, 0, 0, 18, 19, 0, 20, 173, 21,
+ 6, 7, 22, 23, 8, 9, 0, 24, 107, 38,
+ 0, 0, 0, 0, 94, 0, 95, 96, 97, 98,
+ 99, 100, 101, 102, 103, 104, 105, 0, 13, 106,
+ 0, 0, 0, 14, 15, 16, 17, 7, 0, 0,
+ 8, 9, 73, 18, 19,-32768, 20, 0, 21, 0,
+ 0, 22, 23, 0, 0, 0, 24, 107, 38, 176,
+ 0, 6, 7, 0, 0, 8, 9, 0, -82, 14,
+ 15, -82, 0, 0, 0, 0, 80, 81, 82, 18,
+ 19, 0, 0, 0, 52, 0, 0, 22, 53, 177,
+ 0, 0, 0, 0, 14, 15, 16, 17, 0, 0,
+ 0, 0, -82, -82, 18, 19, 0, 20, 0, 178,
+ 0, 0, 22, 179, -6, 5, 0, 6, 7, -82,
+ 0, 8, 9, 0, 0, 0, 0, -6, 0, 10,
+ 11, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 12, 13, 0, 0, 0, 0,
14, 15, 16, 17, 0, 0, 0, 0, 0, 0,
18, 19, 0, 20, 0, 21, 0, 0, 22, 23,
- 0, 0, 0, 24, 106, 38, 174, 0, 6, 7,
- 0, 0, 8, 9, 0, -81, 0, 0, -81, 0,
+ 0, -65, 66, 24, 6, 7, 0, 0, 8, 9,
+ 0, 0, 0, 0, 1, 0, 10, 11, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, -6, 5, 0, 6, 7, 175, 0, 8, 9,
- 14, 15, 16, 17, -6, 0, 10, 11, -81, -81,
- 18, 19, 0, 20, 0, 176, 0, 0, 22, 177,
- 0, 12, 13, 0, 0, -81, 14, 15, 16, 17,
+ 0, 12, 13, 0, 0, 0, 0, 14, 15, 16,
+ 17, 0, 0, 0, 0, 0, 0, 18, 19, 0,
+ 20, 0, 21, 6, 7, 22, 23, 8, 9, 0,
+ 24, 0, 0, 0, 0, 0, 0, 94, 0, 95,
+ 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
+ 0, 13, 106, 0, 0, 0, 14, 15, 16, 17,
0, 0, 0, 0, 0, 0, 18, 19, 0, 20,
- 0, 21, 0, 0, 22, 23, 0, -64, 66, 24,
- 6, 7, 0, 0, 8, 9, 0, 0, 0, 0,
- 1, 0, 10, 11, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 12, 13, 0,
- 0, 0, 14, 15, 16, 17, 0, 0, 0, 0,
- 0, 0, 18, 19, 0, 20, 0, 21, 6, 7,
- 22, 23, 8, 9, 0, 24, 0, 0, 0, 0,
- 0, 0, 94, 0, 95, 96, 97, 98, 99, 100,
- 101, 102, 103, 104, 105, 0, 13, 0, 0, 0,
- 14, 15, 16, 17, 0, 0, 0, 0, 0, 0,
- 18, 19, 0, 20, 0, 21, 0, 0, 22, 23,
- 0, 0, 0, 24, 0, 38, 6, 7, 0, 0,
- 8, 9, 73, 0, 0, 74, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 6, 7, 0, 0,
- 8, 9, 0, 0, 0, 75, 76, 77, 14, 15,
- 16, 17, 78, 0, 0, 80, 81, 82, 18, 19,
- 0, 0, 0, 52, 13, 0, 22, 53, 14, 15,
- 16, 17, 0, 38, 0, 0, 0, 0, 18, 19,
- 0, 20, 62, 21, 6, 7, 22, 23, 8, 9,
- 0, 0, 0, 38, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 62, 0, 6, 7, 0, 0, 8,
- 9, 0, 13, 0, 0, 0, 14, 15, 16, 17,
+ 0, 21, 0, 0, 22, 23, 0, 0, 0, 24,
+ 0, 38, 6, 7, 0, 0, 8, 9, 73, 0,
+ 0, 74, 1, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 6, 7, 0, 0, 8, 9, 0,
+ 0, 0, 75, 76, 77, 14, 15, 16, 17, 78,
+ 0, 0, 80, 81, 82, 18, 19, 0, 0, 0,
+ 52, 13, 0, 22, 53, 0, 14, 15, 16, 17,
+ 38, 0, 0, 0, 0, 0, 18, 19, 0, 20,
+ 62, 21, 6, 7, 22, 23, 8, 9, 0, 0,
+ 0, 38, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 62, 0, 6, 7, 0, 0, 8, 9, 0,
+ 13, 0, 0, 0, 0, 14, 15, 16, 17, 0,
+ 0, 0, 0, 0, 0, 18, 19, 0, 20, 0,
+ 21, 13, 0, 22, 23, -90, 14, 15, 16, 17,
0, 0, 0, 0, 0, 0, 18, 19, 0, 20,
- 0, 21, 0, 13, 22, 23, -89, 14, 15, 16,
- 17, 0, 0, 0, 0, 0, 0, 18, 19, 0,
- 20, 200, 21, 6, 7, 22, 23, 8, 9, 0,
+ 202, 21, 6, 7, 22, 23, 8, 9, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 260, 0, 6, 7, 0, 0, 8, 9, 0,
- 0, 13, 0, 0, 0, 14, 15, 16, 17, 0,
+ 0, 262, 0, 6, 7, 0, 0, 8, 9, 0,
+ 13, 0, 0, 0, 0, 14, 15, 16, 17, 0,
0, 0, 0, 0, 0, 18, 19, 0, 20, 0,
- 21, 175, 0, 22, 23, 14, 15, 16, 17, 0,
- 0, 0, 0, 0, 0, 18, 19, 0, 20, 0,
- 176, 6, 7, 22, 53, 8, 9, 73, 0, 0,
- 74, 0, 0, 0, 0, 7, 0, 0, 8, 9,
- 227, 0, 0,-32768, 0, 0, 0, 0, 0, 0,
- 75, 76, 77, 14, 15, 16, 17, 78, 0, 0,
- 80, 81, 82, 18, 19, 0, 14, 15, 52, 0,
- 0, 22, 53, 136, 6, 7, 18, 19, 8, 9,
- 73, 52, 0, 74, 22, 53, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 75, 76, 77, 14, 15, 16, 17,
- 78, 0, 79, 80, 81, 82, 18, 19, 0, 0,
- 0, 52, 6, 7, 22, 53, 8, 9, 73, 0,
+ 21, 177, 0, 22, 23, 0, 14, 15, 16, 17,
+ 0, 0, 0, 0, 0, 0, 18, 19, 0, 20,
+ 0, 178, 6, 7, 22, 53, 8, 9, 73, 0,
0, 74, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 75, 76, 77, 14, 15, 16, 17, 78, 204,
- 0, 80, 81, 82, 18, 19, 0, 0, 0, 52,
- 6, 7, 22, 53, 8, 9, 73, 0, 0, 74,
+ 0, 0, 75, 76, 77, 14, 15, 16, 17, 78,
+ 0, 0, 80, 81, 82, 18, 19, 0, 0, 0,
+ 52, 0, 0, 22, 53, 137, 6, 7, 0, 0,
+ 8, 9, 73, 0, 0, 74, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 75,
- 76, 77, 14, 15, 16, 17, 78, 0, 0, 80,
- 81, 82, 18, 19, 0, 0, 0, 52, 6, 7,
- 22, 53, 8, 9, 73, 0, 0, 74, 0, 0,
+ 7, 0, 0, 8, 9,-32768, 75, 76, 77, 14,
+ 15, 16, 17, 78, 0, 79, 80, 81, 82, 18,
+ 19, 0, 0, 0, 52, 6, 7, 22, 53, 8,
+ 9, 73, 14, 15, 74, 0, 0, 0, 0,-32768,
+-32768,-32768, 18, 19, 0, 0, 0, 52, 0, 0,
+ 22, 53, 0, 0, 0, 75, 76, 77, 14, 15,
+ 16, 17, 78, 206, 0, 80, 81, 82, 18, 19,
+ 0, 0, 0, 52, 6, 7, 22, 53, 8, 9,
+ 73, 0, 0, 74, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 75, 76, 0,
- 14, 15, 16, 17, 0, 0, 0, 80, 81, 82,
- 18, 19, 0, 0, 0, 52, 6, 7, 22, 53,
- 8, 9, 227, 0, 0, 228, 0, 0, 0, 0,
+ 0, 0, 0, 0, 75, 76, 77, 14, 15, 16,
+ 17, 78, 0, 0, 80, 81, 82, 18, 19, 0,
+ 0, 0, 52, 6, 7, 22, 53, 8, 9, 73,
+ 0, 0, 74, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 6, 7, 0, 0,
- 8, 9, 0, 0, 0, 229, 230, 231, 14, 15,
- 16, 17, 232, 280, 0, 0, 6, 7, 18, 19,
- 8, 9, 73, 52, 13, 74, 22, 53, 14, 15,
- 16, 17, 0, 0, 0, 0, 0, 0, 18, 19,
- 0, 20, 0, 21, 0, 75, 22, 23, 14, 15,
- 16, 17, 0, 0, 0, 80, 81, 82, 18, 19,
- 0, 0, 0, 52, 6, 7, 22, 53, 8, 9,
- 227, 0, 0, 228, 0, 0, 0, 0, 0, 0,
+ 8, 9, 229, 75, 76, 230, 14, 15, 16, 17,
+ 0, 0, 0, 80, 81, 82, 18, 19, 0, 0,
+ 0, 52, 0, 0, 22, 53, 231, 232, 233, 14,
+ 15, 16, 17, 234, 282, 0, 0, 6, 7, 18,
+ 19, 8, 9, 73, 52, 0, 74, 22, 53, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 229, 230, 231, 14, 15, 16, 17,
- 232, 0, 0, 0, 6, 7, 18, 19, 8, 9,
- 227, 52, 0, 228, 22, 53, 0, 0, 0, 0,
- 0, 0, 0, 0, 6, 213, 0, 0, 8, 9,
- 0, 0, 0, 229, 230, 0, 14, 15, 16, 17,
- 0, 0, 0, 0, 6, 7, 18, 19, 8, 9,
- 227, 52, 13, 228, 22, 53, 14, 15, 16, 17,
- 0, 0, 0, 0, 0, 0, 18, 19, 0, 20,
- 0, 21, 0, 229, 22, 23, 14, 15, 16, 17,
- 6, 7, 0, 0, 8, 9, 18, 19, 0, 0,
- 0, 52, 0, 0, 22, 53, 0, 0, 0, 0,
- 6, 7, 0, 0, 8, 9, 6, 7, 175, 0,
- 8, 9, 14, 15, 16, 17, 0, 0, 0, 0,
- 0, 0, 18, 19, 0, 20, 0, 176, 0, 0,
- 22, 53, 14, 15, 16, 17, 0, 0, 14, 15,
- 16, 17, 18, 19, 0, 20, 0, 52, 18, 19,
- 22, 53, 0, 52, 0, 7, 22, 53, 8, 9,
- 73, 0, 0,-32768, 0, 0, 0, 0, 7, 0,
- 0, 8, 9,-32768, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 14, 15, 0, 0,
- 0, 0, 0, 80, 81, 82, 18, 19, 0, 14,
- 15, 52, 0, 0, 22, 53,-32768,-32768,-32768, 18,
- 19, 0, 0, 0, 52, 0, 0, 22, 53
+ 6, 7, 0, 0, 8, 9, 229, 75, 0, 230,
+ 14, 15, 16, 17, 0, 0, 0, 80, 81, 82,
+ 18, 19, 0, 0, 0, 52, 0, 0, 22, 53,
+ 231, 232, 233, 14, 15, 16, 17, 234, 0, 0,
+ 0, 6, 7, 18, 19, 8, 9, 229, 52, 0,
+ 230, 22, 53, 0, 0, 0, 0, 0, 0, 0,
+ 0, 6, 7, 0, 0, 8, 9, 229, 0, 0,
+ 230, 231, 232, 0, 14, 15, 16, 17, 0, 0,
+ 0, 0, 6, 7, 18, 19, 8, 9, 0, 52,
+ 0, 231, 22, 53, 14, 15, 16, 17, 0, 0,
+ 0, 0, 0, 0, 18, 19, 0, 0, 0, 52,
+ 13, 0, 22, 53, 0, 14, 15, 16, 17, 6,
+ 215, 0, 0, 8, 9, 18, 19, 0, 20, 0,
+ 21, 0, 0, 22, 23, 0, 0, 0, 0, 0,
+ 6, 7, 0, 0, 8, 9, 0, 13, 0, 0,
+ 0, 0, 14, 15, 16, 17, 0, 0, 0, 0,
+ 0, 0, 18, 19, 0, 20, 0, 21, 177, 0,
+ 22, 23, 0, 14, 15, 16, 17, 6, 7, 0,
+ 0, 8, 9, 18, 19, 0, 20, 0, 178, 0,
+ 0, 22, 53, 0, 0, 0, 0, 6, 7, 0,
+ 0, 8, 9, 0, 0, 0, 0, 0, 0, 0,
+ 14, 15, 16, 17, 0, 0, 0, 0, 0, 0,
+ 18, 19, 0, 20, 0, 52, 0, 0, 22, 53,
+ 14, 15, 16, 17, 0, 0, 0, 0, 0, 0,
+ 18, 19, 0, 0, 0, 52, 0, 0, 22, 53
};
static const short yycheck[] = { 3,
- 1, 105, 141, 23, 180, 28, 4, 32, 70, 54,
- 107, 1, 1, 151, 13, 70, 39, 37, 13, 23,
- 43, 25, 1, 35, 36, 4, 1, 13, 56, 4,
- 52, 56, 36, 37, 173, 50, 51, 175, 54, 63,
- 41, 1, 1, 140, 54, 107, 50, 51, 42, 53,
- 112, 41, 41, 10, 55, 53, 160, 112, 13, 11,
- 164, 24, 25, 202, 240, 60, 163, 57, 57, 73,
- 74, 13, 76, 77, 78, 79, 80, 81, 140, 42,
- 55, 41, 41, 35, 36, 1, 43, 44, 185, 58,
- 5, 115, 4, 31, 10, 55, 55, 13, 3, 4,
- 104, 105, 4, 58, 21, 60, 130, 131, 54, 54,
- 225, 226, 227, 228, 56, 230, 231, 232, 60, 54,
- 60, 11, 55, 185, 54, 41, 48, 43, 44, 32,
- 134, 4, 37, 38, 138, 98, 47, 48, 49, 21,
- 4, 52, 4, 106, 60, 35, 36, 54, 18, 25,
- 0, 114, 0, 268, 158, 159, 160, 177, 162, 218,
- 164, 254, 13, 14, 15, 280, 22, 18, 19, -1,
- 21, 22, 135, 177, 278, 272, -1, 274, -1, -1,
- 31, -1, -1, 207, -1, 3, 290, 211, -1, 209,
- -1, -1, 109, -1, -1, -1, 220, -1, -1, 296,
- 204, 52, 299, -1, -1, 209, 303, 25, 305, 27,
- 28, 10, 11, 64, -1, 239, -1, 221, 222, 223,
- -1, 39, -1, -1, 187, 43, -1, -1, -1, 28,
- -1, -1, -1, 84, 85, 86, 87, 88, 89, -1,
- 203, 45, 46, 47, 48, 49, -1, 251, 52, -1,
- -1, -1, 276, -1, 217, -1, -1, -1, 109, 176,
- 111, -1, -1, -1, 288, -1, 117, 291, -1, -1,
- -1, 275, -1, 297, 278, -1, -1, 128, -1, -1,
- -1, 132, 245, -1, -1, 248, 290, -1, -1, -1,
- -1, 142, 143, -1, 145, 146, 147, 148, 149, 150,
- 151, 100, 101, -1, -1, -1, 269, -1, 225, 226,
- 227, 228, 111, 230, 231, 232, -1, 168, -1, -1,
- -1, -1, -1, -1, 175, 176, 289, -1, -1, -1,
- 181, 294, 295, -1, -1, 298, -1, -1, -1, 302,
- -1, -1, -1, -1, 11, -1, 197, -1, -1, -1,
- 201, 268, -1, -1, -1, 206, 207, -1, -1, -1,
- 211, 28, 161, 280, -1, -1, -1, -1, 167, -1,
- 169, -1, -1, -1, 225, 226, 227, 228, -1, 230,
- 231, 232, -1, -1, -1, 18, 19, -1, 21, -1,
- -1, -1, -1, 244, -1, -1, -1, -1, 31, -1,
- -1, -1, -1, -1, 255, 256, 257, -1, 259, 208,
- 261, 262, 263, -1, 265, 266, 267, 268, 4, 52,
- -1, 7, 8, 9, -1, 224, 277, -1, -1, 280,
- 281, 64, -1, 100, 101, -1, -1, 288, -1, -1,
- -1, -1, 293, -1, 111, -1, -1, -1, -1, 35,
- 36, 84, 85, 86, 87, 88, 89, -1, -1, 45,
- 46, -1, -1, -1, 50, -1, -1, 53, 54, -1,
- -1, -1, -1, -1, 273, -1, 109, -1, 111, -1,
- 279, -1, -1, -1, 117, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, 161, 128, -1, -1, -1, 132,
- 167, -1, 169, 35, -1, -1, -1, -1, -1, 142,
- 143, -1, 145, 146, 147, 148, 149, 150, -1, -1,
- 3, 4, -1, -1, 7, 8, 9, -1, -1, 12,
- 13, -1, -1, -1, -1, 168, -1, -1, 70, -1,
- -1, 208, -1, 176, -1, -1, -1, -1, 181, 32,
- 33, 34, 35, 36, 37, 38, 39, 224, -1, 42,
- 43, 44, 45, 46, 197, -1, -1, 50, 201, -1,
- 53, 54, -1, 206, 207, 107, -1, 60, 211, -1,
- 112, 113, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, 225, 226, 227, 228, -1, 230, 231, 232,
- -1, -1, -1, -1, -1, -1, 273, -1, 140, 141,
- -1, 244, 279, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, 255, 256, 257, -1, 259, -1, 261, 262,
- 263, 163, 265, 266, 267, 268, -1, -1, -1, -1,
- -1, 173, -1, -1, 277, -1, -1, 280, 281, -1,
- -1, -1, -1, 185, -1, 288, -1, -1, -1, -1,
- 293, -1, -1, -1, -1, 1, -1, 3, 4, -1,
- 202, 7, 8, -1, -1, -1, -1, -1, -1, -1,
- -1, 17, 214, 19, 20, 21, 22, 23, 24, 25,
- 26, 27, 28, 29, -1, 31, -1, -1, -1, 35,
- 36, 37, 38, -1, -1, -1, -1, -1, -1, 45,
- 46, -1, 48, -1, 50, -1, -1, 53, 54, 251,
- -1, -1, 58, 59, 60, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- 272, -1, 274, -1, -1, 277, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, 1, -1, 3, 4,
- -1, -1, 7, 8, 296, -1, -1, 299, -1, -1,
- -1, 303, 17, 305, 19, 20, 21, 22, 23, 24,
- 25, 26, 27, 28, 29, -1, 31, -1, -1, -1,
- 35, 36, 37, 38, -1, -1, -1, -1, -1, -1,
- 45, 46, -1, 48, -1, 50, -1, -1, 53, 54,
- -1, -1, -1, 58, 59, 60, 1, -1, 3, 4,
- -1, -1, 7, 8, -1, 10, -1, -1, 13, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, 0, 1, -1, 3, 4, 31, -1, 7, 8,
- 35, 36, 37, 38, 13, -1, 15, 16, 43, 44,
- 45, 46, -1, 48, -1, 50, -1, -1, 53, 54,
- -1, 30, 31, -1, -1, 60, 35, 36, 37, 38,
- -1, -1, -1, -1, -1, -1, 45, 46, -1, 48,
- -1, 50, -1, -1, 53, 54, -1, 0, 1, 58,
- 3, 4, -1, -1, 7, 8, -1, -1, -1, -1,
- 13, -1, 15, 16, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, 30, 31, -1,
- -1, -1, 35, 36, 37, 38, -1, -1, -1, -1,
- -1, -1, 45, 46, -1, 48, -1, 50, 3, 4,
- 53, 54, 7, 8, -1, 58, -1, -1, -1, -1,
- -1, -1, 17, -1, 19, 20, 21, 22, 23, 24,
- 25, 26, 27, 28, 29, -1, 31, -1, -1, -1,
- 35, 36, 37, 38, -1, -1, -1, -1, -1, -1,
- 45, 46, -1, 48, -1, 50, -1, -1, 53, 54,
- -1, -1, -1, 58, -1, 60, 3, 4, -1, -1,
- 7, 8, 9, -1, -1, 12, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, 3, 4, -1, -1,
- 7, 8, -1, -1, -1, 32, 33, 34, 35, 36,
- 37, 38, 39, -1, -1, 42, 43, 44, 45, 46,
- -1, -1, -1, 50, 31, -1, 53, 54, 35, 36,
- 37, 38, -1, 60, -1, -1, -1, -1, 45, 46,
- -1, 48, 1, 50, 3, 4, 53, 54, 7, 8,
- -1, -1, -1, 60, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, 1, -1, 3, 4, -1, -1, 7,
- 8, -1, 31, -1, -1, -1, 35, 36, 37, 38,
- -1, -1, -1, -1, -1, -1, 45, 46, -1, 48,
- -1, 50, -1, 31, 53, 54, 55, 35, 36, 37,
- 38, -1, -1, -1, -1, -1, -1, 45, 46, -1,
- 48, 1, 50, 3, 4, 53, 54, 7, 8, -1,
+ 70, 105, 108, 142, 182, 13, 13, 1, 23, 1,
+ 4, 1, 13, 4, 55, 11, 70, 152, 57, 23,
+ 33, 25, 37, 55, 227, 228, 229, 230, 55, 232,
+ 233, 234, 36, 37, 59, 141, 175, 1, 108, 1,
+ 36, 37, 177, 113, 57, 43, 50, 51, 10, 53,
+ 42, 13, 42, 61, 13, 1, 1, 161, 164, 113,
+ 53, 165, 56, 54, 242, 204, 58, 270, 58, 73,
+ 74, 141, 76, 77, 78, 79, 80, 81, 42, 282,
+ 42, 187, 44, 45, 13, 5, 4, 3, 4, 7,
+ 8, 9, 56, 4, 12, 31, 42, 42, 57, 61,
+ 104, 105, 61, 1, 55, 10, 4, 13, 14, 15,
+ 56, 56, 18, 19, 4, 21, 22, 187, 36, 37,
+ 24, 25, 38, 39, 55, 31, 3, 11, 46, 47,
+ 59, 135, 61, 51, 28, 139, 54, 55, 42, 44,
+ 45, 36, 37, 50, 51, 39, 52, 55, 25, 43,
+ 27, 28, 36, 37, 56, 159, 160, 161, 64, 163,
+ 61, 165, 39, 55, 179, 33, 43, 49, 274, 4,
+ 276, 21, 48, 49, 50, 179, 280, 53, 84, 85,
+ 86, 87, 88, 89, 4, 55, 4, 18, 292, 0,
+ 0, 25, 298, 256, 98, 301, 211, 220, 22, 305,
+ -1, 307, 206, 107, 110, -1, 112, 211, 10, 11,
+ -1, 115, 118, -1, -1, -1, -1, -1, -1, 223,
+ 224, 225, -1, 129, -1, -1, 28, 133, -1, -1,
+ -1, 21, 136, -1, -1, -1, -1, 143, 144, -1,
+ 146, 147, 148, 149, 150, 151, 152, -1, -1, 253,
+ 46, 47, 48, 49, 50, -1, -1, 53, -1, -1,
+ -1, -1, -1, 169, -1, -1, -1, -1, -1, -1,
+ -1, 177, 178, 277, -1, -1, 280, 183, -1, -1,
+ -1, -1, -1, -1, -1, 189, -1, -1, 292, -1,
+ -1, -1, -1, 199, -1, -1, -1, 203, 100, 101,
+ -1, 205, 208, 209, 106, -1, -1, 213, -1, -1,
+ 112, -1, -1, -1, -1, 219, -1, -1, -1, -1,
+ 110, 227, 228, 229, 230, 11, 232, 233, 234, -1,
+ -1, -1, -1, -1, -1, 18, 19, -1, 21, -1,
+ 246, -1, 28, 247, -1, -1, 250, -1, 31, -1,
+ -1, 257, 258, 259, -1, 261, -1, 263, 264, 265,
+ 162, 267, 268, 269, 270, -1, 168, 271, 170, 52,
+ -1, -1, -1, 279, -1, -1, 282, 283, -1, -1,
+ -1, 64, -1, -1, 290, -1, -1, 291, 178, 295,
+ -1, -1, 296, 297, -1, -1, 300, -1, -1, -1,
+ 304, 84, 85, 86, 87, 88, 89, -1, 210, -1,
+ -1, -1, -1, -1, 100, 101, -1, -1, -1, -1,
+ 106, -1, -1, -1, 226, -1, 112, 110, -1, 112,
+ -1, -1, -1, -1, -1, 118, -1, 227, 228, 229,
+ 230, -1, 232, 233, 234, -1, 129, -1, -1, -1,
+ 133, -1, -1, -1, 35, -1, -1, -1, -1, -1,
+ 143, 144, -1, 146, 147, 148, 149, 150, 151, 63,
+ -1, -1, -1, 275, -1, -1, 162, -1, -1, 281,
+ 270, -1, 168, -1, 170, -1, 169, -1, -1, 70,
+ -1, -1, 282, -1, -1, 178, -1, -1, -1, -1,
+ 183, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 199, -1, -1, -1,
+ 203, -1, 116, -1, 210, 208, 209, 108, -1, -1,
+ 213, -1, 113, 114, -1, -1, -1, 131, 132, -1,
+ 226, -1, -1, -1, 227, 228, 229, 230, -1, 232,
+ 233, 234, -1, -1, -1, -1, -1, -1, -1, -1,
+ 141, 142, -1, 246, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 257, 258, 259, -1, 261, -1,
+ 263, 264, 265, 164, 267, 268, 269, 270, -1, 275,
+ -1, -1, -1, -1, 175, 281, 279, -1, -1, 282,
+ 283, -1, -1, -1, -1, -1, 187, 290, -1, -1,
+ -1, -1, 295, -1, -1, 209, -1, -1, -1, 213,
+ -1, -1, -1, 204, -1, 3, 4, -1, 222, 7,
+ 8, 9, -1, -1, 12, 216, -1, -1, 4, -1,
+ -1, 7, 8, 9, -1, -1, -1, 241, -1, -1,
+ -1, -1, -1, -1, -1, 33, 34, 35, 36, 37,
+ 38, 39, 40, -1, -1, 43, 44, 45, 46, 47,
+ 36, 37, 253, 51, -1, -1, 54, 55, -1, -1,
+ 46, 47, -1, 61, 278, 51, -1, -1, 54, 55,
+ -1, -1, -1, 274, -1, 276, 290, -1, 279, 293,
+ -1, -1, -1, -1, -1, 299, -1, -1, -1, 1,
+ -1, 3, 4, -1, -1, 7, 8, 298, -1, -1,
+ 301, -1, -1, -1, 305, 17, 307, 19, 20, 21,
+ 22, 23, 24, 25, 26, 27, 28, 29, -1, 31,
+ 32, -1, -1, -1, 36, 37, 38, 39, -1, -1,
+ -1, -1, -1, -1, 46, 47, -1, 49, 1, 51,
+ 3, 4, 54, 55, 7, 8, -1, 59, 60, 61,
+ -1, -1, -1, -1, 17, -1, 19, 20, 21, 22,
+ 23, 24, 25, 26, 27, 28, 29, -1, 31, 32,
+ -1, -1, -1, 36, 37, 38, 39, 4, -1, -1,
+ 7, 8, 9, 46, 47, 12, 49, -1, 51, -1,
+ -1, 54, 55, -1, -1, -1, 59, 60, 61, 1,
+ -1, 3, 4, -1, -1, 7, 8, -1, 10, 36,
+ 37, 13, -1, -1, -1, -1, 43, 44, 45, 46,
+ 47, -1, -1, -1, 51, -1, -1, 54, 55, 31,
+ -1, -1, -1, -1, 36, 37, 38, 39, -1, -1,
+ -1, -1, 44, 45, 46, 47, -1, 49, -1, 51,
+ -1, -1, 54, 55, 0, 1, -1, 3, 4, 61,
+ -1, 7, 8, -1, -1, -1, -1, 13, -1, 15,
+ 16, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 30, 31, -1, -1, -1, -1,
+ 36, 37, 38, 39, -1, -1, -1, -1, -1, -1,
+ 46, 47, -1, 49, -1, 51, -1, -1, 54, 55,
+ -1, 0, 1, 59, 3, 4, -1, -1, 7, 8,
+ -1, -1, -1, -1, 13, -1, 15, 16, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 30, 31, -1, -1, -1, -1, 36, 37, 38,
+ 39, -1, -1, -1, -1, -1, -1, 46, 47, -1,
+ 49, -1, 51, 3, 4, 54, 55, 7, 8, -1,
+ 59, -1, -1, -1, -1, -1, -1, 17, -1, 19,
+ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+ -1, 31, 32, -1, -1, -1, 36, 37, 38, 39,
+ -1, -1, -1, -1, -1, -1, 46, 47, -1, 49,
+ -1, 51, -1, -1, 54, 55, -1, -1, -1, 59,
+ -1, 61, 3, 4, -1, -1, 7, 8, 9, -1,
+ -1, 12, 13, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 3, 4, -1, -1, 7, 8, -1,
+ -1, -1, 33, 34, 35, 36, 37, 38, 39, 40,
+ -1, -1, 43, 44, 45, 46, 47, -1, -1, -1,
+ 51, 31, -1, 54, 55, -1, 36, 37, 38, 39,
+ 61, -1, -1, -1, -1, -1, 46, 47, -1, 49,
+ 1, 51, 3, 4, 54, 55, 7, 8, -1, -1,
+ -1, 61, -1, -1, -1, -1, -1, -1, -1, -1,
-1, 1, -1, 3, 4, -1, -1, 7, 8, -1,
- -1, 31, -1, -1, -1, 35, 36, 37, 38, -1,
- -1, -1, -1, -1, -1, 45, 46, -1, 48, -1,
- 50, 31, -1, 53, 54, 35, 36, 37, 38, -1,
- -1, -1, -1, -1, -1, 45, 46, -1, 48, -1,
- 50, 3, 4, 53, 54, 7, 8, 9, -1, -1,
- 12, -1, -1, -1, -1, 4, -1, -1, 7, 8,
- 9, -1, -1, 12, -1, -1, -1, -1, -1, -1,
- 32, 33, 34, 35, 36, 37, 38, 39, -1, -1,
- 42, 43, 44, 45, 46, -1, 35, 36, 50, -1,
- -1, 53, 54, 55, 3, 4, 45, 46, 7, 8,
- 9, 50, -1, 12, 53, 54, -1, -1, -1, -1,
+ 31, -1, -1, -1, -1, 36, 37, 38, 39, -1,
+ -1, -1, -1, -1, -1, 46, 47, -1, 49, -1,
+ 51, 31, -1, 54, 55, 56, 36, 37, 38, 39,
+ -1, -1, -1, -1, -1, -1, 46, 47, -1, 49,
+ 1, 51, 3, 4, 54, 55, 7, 8, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, 32, 33, 34, 35, 36, 37, 38,
- 39, -1, 41, 42, 43, 44, 45, 46, -1, -1,
- -1, 50, 3, 4, 53, 54, 7, 8, 9, -1,
+ -1, 1, -1, 3, 4, -1, -1, 7, 8, -1,
+ 31, -1, -1, -1, -1, 36, 37, 38, 39, -1,
+ -1, -1, -1, -1, -1, 46, 47, -1, 49, -1,
+ 51, 31, -1, 54, 55, -1, 36, 37, 38, 39,
+ -1, -1, -1, -1, -1, -1, 46, 47, -1, 49,
+ -1, 51, 3, 4, 54, 55, 7, 8, 9, -1,
-1, 12, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, 32, 33, 34, 35, 36, 37, 38, 39, 40,
- -1, 42, 43, 44, 45, 46, -1, -1, -1, 50,
- 3, 4, 53, 54, 7, 8, 9, -1, -1, 12,
+ -1, -1, 33, 34, 35, 36, 37, 38, 39, 40,
+ -1, -1, 43, 44, 45, 46, 47, -1, -1, -1,
+ 51, -1, -1, 54, 55, 56, 3, 4, -1, -1,
+ 7, 8, 9, -1, -1, 12, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, 32,
- 33, 34, 35, 36, 37, 38, 39, -1, -1, 42,
- 43, 44, 45, 46, -1, -1, -1, 50, 3, 4,
- 53, 54, 7, 8, 9, -1, -1, 12, -1, -1,
+ 4, -1, -1, 7, 8, 9, 33, 34, 35, 36,
+ 37, 38, 39, 40, -1, 42, 43, 44, 45, 46,
+ 47, -1, -1, -1, 51, 3, 4, 54, 55, 7,
+ 8, 9, 36, 37, 12, -1, -1, -1, -1, 43,
+ 44, 45, 46, 47, -1, -1, -1, 51, -1, -1,
+ 54, 55, -1, -1, -1, 33, 34, 35, 36, 37,
+ 38, 39, 40, 41, -1, 43, 44, 45, 46, 47,
+ -1, -1, -1, 51, 3, 4, 54, 55, 7, 8,
+ 9, -1, -1, 12, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, 32, 33, -1,
- 35, 36, 37, 38, -1, -1, -1, 42, 43, 44,
- 45, 46, -1, -1, -1, 50, 3, 4, 53, 54,
- 7, 8, 9, -1, -1, 12, -1, -1, -1, -1,
+ -1, -1, -1, -1, 33, 34, 35, 36, 37, 38,
+ 39, 40, -1, -1, 43, 44, 45, 46, 47, -1,
+ -1, -1, 51, 3, 4, 54, 55, 7, 8, 9,
+ -1, -1, 12, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, 3, 4, -1, -1,
- 7, 8, -1, -1, -1, 32, 33, 34, 35, 36,
- 37, 38, 39, 40, -1, -1, 3, 4, 45, 46,
- 7, 8, 9, 50, 31, 12, 53, 54, 35, 36,
- 37, 38, -1, -1, -1, -1, -1, -1, 45, 46,
- -1, 48, -1, 50, -1, 32, 53, 54, 35, 36,
- 37, 38, -1, -1, -1, 42, 43, 44, 45, 46,
- -1, -1, -1, 50, 3, 4, 53, 54, 7, 8,
- 9, -1, -1, 12, -1, -1, -1, -1, -1, -1,
+ 7, 8, 9, 33, 34, 12, 36, 37, 38, 39,
+ -1, -1, -1, 43, 44, 45, 46, 47, -1, -1,
+ -1, 51, -1, -1, 54, 55, 33, 34, 35, 36,
+ 37, 38, 39, 40, 41, -1, -1, 3, 4, 46,
+ 47, 7, 8, 9, 51, -1, 12, 54, 55, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, 32, 33, 34, 35, 36, 37, 38,
- 39, -1, -1, -1, 3, 4, 45, 46, 7, 8,
- 9, 50, -1, 12, 53, 54, -1, -1, -1, -1,
- -1, -1, -1, -1, 3, 4, -1, -1, 7, 8,
- -1, -1, -1, 32, 33, -1, 35, 36, 37, 38,
- -1, -1, -1, -1, 3, 4, 45, 46, 7, 8,
- 9, 50, 31, 12, 53, 54, 35, 36, 37, 38,
- -1, -1, -1, -1, -1, -1, 45, 46, -1, 48,
- -1, 50, -1, 32, 53, 54, 35, 36, 37, 38,
- 3, 4, -1, -1, 7, 8, 45, 46, -1, -1,
- -1, 50, -1, -1, 53, 54, -1, -1, -1, -1,
- 3, 4, -1, -1, 7, 8, 3, 4, 31, -1,
- 7, 8, 35, 36, 37, 38, -1, -1, -1, -1,
- -1, -1, 45, 46, -1, 48, -1, 50, -1, -1,
- 53, 54, 35, 36, 37, 38, -1, -1, 35, 36,
- 37, 38, 45, 46, -1, 48, -1, 50, 45, 46,
- 53, 54, -1, 50, -1, 4, 53, 54, 7, 8,
- 9, -1, -1, 12, -1, -1, -1, -1, 4, -1,
- -1, 7, 8, 9, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, 35, 36, -1, -1,
- -1, -1, -1, 42, 43, 44, 45, 46, -1, 35,
- 36, 50, -1, -1, 53, 54, 42, 43, 44, 45,
- 46, -1, -1, -1, 50, -1, -1, 53, 54
+ 3, 4, -1, -1, 7, 8, 9, 33, -1, 12,
+ 36, 37, 38, 39, -1, -1, -1, 43, 44, 45,
+ 46, 47, -1, -1, -1, 51, -1, -1, 54, 55,
+ 33, 34, 35, 36, 37, 38, 39, 40, -1, -1,
+ -1, 3, 4, 46, 47, 7, 8, 9, 51, -1,
+ 12, 54, 55, -1, -1, -1, -1, -1, -1, -1,
+ -1, 3, 4, -1, -1, 7, 8, 9, -1, -1,
+ 12, 33, 34, -1, 36, 37, 38, 39, -1, -1,
+ -1, -1, 3, 4, 46, 47, 7, 8, -1, 51,
+ -1, 33, 54, 55, 36, 37, 38, 39, -1, -1,
+ -1, -1, -1, -1, 46, 47, -1, -1, -1, 51,
+ 31, -1, 54, 55, -1, 36, 37, 38, 39, 3,
+ 4, -1, -1, 7, 8, 46, 47, -1, 49, -1,
+ 51, -1, -1, 54, 55, -1, -1, -1, -1, -1,
+ 3, 4, -1, -1, 7, 8, -1, 31, -1, -1,
+ -1, -1, 36, 37, 38, 39, -1, -1, -1, -1,
+ -1, -1, 46, 47, -1, 49, -1, 51, 31, -1,
+ 54, 55, -1, 36, 37, 38, 39, 3, 4, -1,
+ -1, 7, 8, 46, 47, -1, 49, -1, 51, -1,
+ -1, 54, 55, -1, -1, -1, -1, 3, 4, -1,
+ -1, 7, 8, -1, -1, -1, -1, -1, -1, -1,
+ 36, 37, 38, 39, -1, -1, -1, -1, -1, -1,
+ 46, 47, -1, 49, -1, 51, -1, -1, 54, 55,
+ 36, 37, 38, 39, -1, -1, -1, -1, -1, -1,
+ 46, 47, -1, -1, -1, 51, -1, -1, 54, 55
};
/* -*-C-*- Note some compilers choke on comments on `#line' lines. */
#line 3 "/usr/local/lib/bison.simple"
@@ -1239,11 +1257,14 @@ yyreduce:
switch (yyn) {
case 1:
-#line 137 "awk.y"
-{ expression_value = yyvsp[-1].nodeval; ;
+#line 150 "./awk.y"
+{
+ expression_value = yyvsp[-1].nodeval;
+ check_funcs();
+ ;
break;}
case 2:
-#line 142 "awk.y"
+#line 158 "./awk.y"
{
if (yyvsp[0].nodeval != NULL)
yyval.nodeval = yyvsp[0].nodeval;
@@ -1253,7 +1274,7 @@ case 2:
;
break;}
case 3:
-#line 151 "awk.y"
+#line 167 "./awk.y"
{
if (yyvsp[0].nodeval == NULL)
yyval.nodeval = yyvsp[-1].nodeval;
@@ -1262,69 +1283,69 @@ case 3:
else {
if (yyvsp[-1].nodeval->type != Node_rule_list)
yyvsp[-1].nodeval = node(yyvsp[-1].nodeval, Node_rule_list,
- (NODE*)NULL);
- yyval.nodeval = append_right (yyvsp[-1].nodeval,
- node(yyvsp[0].nodeval, Node_rule_list,(NODE *) NULL));
+ (NODE*) NULL);
+ yyval.nodeval = append_right(yyvsp[-1].nodeval,
+ node(yyvsp[0].nodeval, Node_rule_list, (NODE *) NULL));
}
yyerrok;
;
break;}
case 4:
-#line 165 "awk.y"
+#line 181 "./awk.y"
{ yyval.nodeval = NULL; ;
break;}
case 5:
-#line 166 "awk.y"
+#line 182 "./awk.y"
{ yyval.nodeval = NULL; ;
break;}
case 6:
-#line 167 "awk.y"
+#line 183 "./awk.y"
{ yyval.nodeval = NULL; ;
break;}
case 7:
-#line 171 "awk.y"
-{ io_allowed = 0; ;
+#line 187 "./awk.y"
+{ io_allowed = FALSE; ;
break;}
case 8:
-#line 173 "awk.y"
+#line 189 "./awk.y"
{
- if (begin_block) {
+ if (begin_block != NULL) {
if (begin_block->type != Node_rule_list)
begin_block = node(begin_block, Node_rule_list,
- (NODE *)NULL);
- (void) append_right (begin_block, node(
- node((NODE *)NULL, Node_rule_node, yyvsp[0].nodeval),
- Node_rule_list, (NODE *)NULL) );
+ (NODE *) NULL);
+ (void) append_right(begin_block, node(
+ node((NODE *) NULL, Node_rule_node, yyvsp[0].nodeval),
+ Node_rule_list, (NODE *) NULL) );
} else
- begin_block = node((NODE *)NULL, Node_rule_node, yyvsp[0].nodeval);
+ begin_block = node((NODE *) NULL, Node_rule_node, yyvsp[0].nodeval);
yyval.nodeval = NULL;
- io_allowed = 1;
+ io_allowed = TRUE;
yyerrok;
;
break;}
case 9:
-#line 187 "awk.y"
-{ io_allowed = 0; ;
+#line 203 "./awk.y"
+{ io_allowed = FALSE; ;
break;}
case 10:
-#line 189 "awk.y"
+#line 205 "./awk.y"
{
- if (end_block) {
+ if (end_block != NULL) {
if (end_block->type != Node_rule_list)
end_block = node(end_block, Node_rule_list,
- (NODE *)NULL);
+ (NODE *) NULL);
(void) append_right (end_block, node(
- node((NODE *)NULL, Node_rule_node, yyvsp[0].nodeval),
- Node_rule_list, (NODE *)NULL));
+ node((NODE *) NULL, Node_rule_node, yyvsp[0].nodeval),
+ Node_rule_list, (NODE *) NULL));
} else
- end_block = node((NODE *)NULL, Node_rule_node, yyvsp[0].nodeval);
+ end_block = node((NODE *) NULL, Node_rule_node, yyvsp[0].nodeval);
yyval.nodeval = NULL;
- io_allowed = 1;
+ io_allowed = TRUE;
yyerrok;
;
break;}
case 11:
-#line 204 "awk.y"
+#line 220 "./awk.y"
{
warning("BEGIN blocks must have an action part");
errcount++;
@@ -1332,7 +1353,7 @@ case 11:
;
break;}
case 12:
-#line 210 "awk.y"
+#line 226 "./awk.y"
{
warning("END blocks must have an action part");
errcount++;
@@ -1340,17 +1361,17 @@ case 12:
;
break;}
case 13:
-#line 216 "awk.y"
-{ yyval.nodeval = node (yyvsp[-1].nodeval, Node_rule_node, yyvsp[0].nodeval); yyerrok; ;
+#line 232 "./awk.y"
+{ yyval.nodeval = node(yyvsp[-1].nodeval, Node_rule_node, yyvsp[0].nodeval); yyerrok; ;
break;}
case 14:
-#line 218 "awk.y"
-{ yyval.nodeval = node ((NODE *)NULL, Node_rule_node, yyvsp[0].nodeval); yyerrok; ;
+#line 234 "./awk.y"
+{ yyval.nodeval = node((NODE *) NULL, Node_rule_node, yyvsp[0].nodeval); yyerrok; ;
break;}
case 15:
-#line 220 "awk.y"
+#line 236 "./awk.y"
{
- yyval.nodeval = node (yyvsp[-1].nodeval,
+ yyval.nodeval = node(yyvsp[-1].nodeval,
Node_rule_node,
node(node(node(make_number(0.0),
Node_field_spec,
@@ -1363,7 +1384,7 @@ case 15:
;
break;}
case 16:
-#line 233 "awk.y"
+#line 249 "./awk.y"
{
func_install(yyvsp[-1].nodeval, yyvsp[0].nodeval);
yyval.nodeval = NULL;
@@ -1371,15 +1392,15 @@ case 16:
;
break;}
case 17:
-#line 242 "awk.y"
+#line 258 "./awk.y"
{ yyval.sval = yyvsp[0].sval; ;
break;}
case 18:
-#line 244 "awk.y"
+#line 260 "./awk.y"
{ yyval.sval = yyvsp[0].sval; ;
break;}
case 19:
-#line 246 "awk.y"
+#line 262 "./awk.y"
{
yyerror("%s() is a built-in function, it cannot be redefined",
tokstart);
@@ -1388,49 +1409,49 @@ case 19:
;
break;}
case 22:
-#line 261 "awk.y"
+#line 277 "./awk.y"
{
param_counter = 0;
;
break;}
case 23:
-#line 265 "awk.y"
+#line 281 "./awk.y"
{
yyval.nodeval = append_right(make_param(yyvsp[-4].sval), yyvsp[-2].nodeval);
- can_return = 1;
+ can_return = TRUE;
/* check for duplicate parameter names */
if (dup_parms(yyval.nodeval))
errcount++;
;
break;}
case 24:
-#line 276 "awk.y"
+#line 292 "./awk.y"
{
yyval.nodeval = yyvsp[-2].nodeval;
- can_return = 0;
+ can_return = FALSE;
;
break;}
case 25:
-#line 281 "awk.y"
+#line 297 "./awk.y"
{
yyval.nodeval = node((NODE *) NULL, Node_K_return, (NODE *) NULL);
- can_return = 0;
+ can_return = FALSE;
;
break;}
case 26:
-#line 290 "awk.y"
+#line 306 "./awk.y"
{ yyval.nodeval = yyvsp[0].nodeval; ;
break;}
case 27:
-#line 292 "awk.y"
-{ yyval.nodeval = mkrangenode ( node(yyvsp[-2].nodeval, Node_cond_pair, yyvsp[0].nodeval) ); ;
+#line 308 "./awk.y"
+{ yyval.nodeval = mkrangenode(node(yyvsp[-2].nodeval, Node_cond_pair, yyvsp[0].nodeval)); ;
break;}
case 28:
-#line 301 "awk.y"
+#line 317 "./awk.y"
{ ++want_regexp; ;
break;}
case 29:
-#line 303 "awk.y"
+#line 319 "./awk.y"
{
NODE *n;
size_t len;
@@ -1439,7 +1460,7 @@ case 29:
n->type = Node_regex;
len = strlen(yyvsp[-1].sval);
n->re_exp = make_string(yyvsp[-1].sval, len);
- n->re_reg = make_regexp(yyvsp[-1].sval, len, 0, 1);
+ n->re_reg = make_regexp(yyvsp[-1].sval, len, FALSE, TRUE);
n->re_text = NULL;
n->re_flags = CONST;
n->re_cnt = 1;
@@ -1447,96 +1468,101 @@ case 29:
;
break;}
case 30:
-#line 321 "awk.y"
-{ yyval.nodeval = yyvsp[-3].nodeval ; ;
+#line 337 "./awk.y"
+{ yyval.nodeval = yyvsp[-3].nodeval; ;
break;}
case 31:
-#line 323 "awk.y"
+#line 339 "./awk.y"
{ yyval.nodeval = NULL; ;
break;}
case 32:
-#line 328 "awk.y"
-{ yyval.nodeval = yyvsp[0].nodeval; ;
+#line 344 "./awk.y"
+{
+ yyval.nodeval = yyvsp[0].nodeval;
+ if (do_lint && isnoeffect(yyval.nodeval->type))
+ warning("statement may have no effect");
+ ;
break;}
case 33:
-#line 330 "awk.y"
+#line 350 "./awk.y"
{
if (yyvsp[-1].nodeval == NULL || yyvsp[-1].nodeval->type != Node_statement_list)
- yyvsp[-1].nodeval = node(yyvsp[-1].nodeval, Node_statement_list,(NODE *)NULL);
+ yyvsp[-1].nodeval = node(yyvsp[-1].nodeval, Node_statement_list, (NODE *) NULL);
yyval.nodeval = append_right(yyvsp[-1].nodeval,
- node( yyvsp[0].nodeval, Node_statement_list, (NODE *)NULL));
+ node(yyvsp[0].nodeval, Node_statement_list, (NODE *) NULL));
yyerrok;
;
break;}
case 34:
-#line 338 "awk.y"
+#line 358 "./awk.y"
{ yyval.nodeval = NULL; ;
break;}
case 35:
-#line 340 "awk.y"
+#line 360 "./awk.y"
{ yyval.nodeval = NULL; ;
break;}
case 38:
-#line 350 "awk.y"
+#line 370 "./awk.y"
{ yyval.nodeval = NULL; ;
break;}
case 39:
-#line 352 "awk.y"
+#line 372 "./awk.y"
{ yyval.nodeval = NULL; ;
break;}
case 40:
-#line 354 "awk.y"
+#line 374 "./awk.y"
{ yyval.nodeval = yyvsp[-1].nodeval; ;
break;}
case 41:
-#line 356 "awk.y"
+#line 376 "./awk.y"
{ yyval.nodeval = yyvsp[0].nodeval; ;
break;}
case 42:
-#line 358 "awk.y"
-{ yyval.nodeval = node (yyvsp[-3].nodeval, Node_K_while, yyvsp[0].nodeval); ;
+#line 378 "./awk.y"
+{ yyval.nodeval = node(yyvsp[-3].nodeval, Node_K_while, yyvsp[0].nodeval); ;
break;}
case 43:
-#line 360 "awk.y"
-{ yyval.nodeval = node (yyvsp[-2].nodeval, Node_K_do, yyvsp[-5].nodeval); ;
+#line 380 "./awk.y"
+{ yyval.nodeval = node(yyvsp[-2].nodeval, Node_K_do, yyvsp[-5].nodeval); ;
break;}
case 44:
-#line 362 "awk.y"
+#line 382 "./awk.y"
{
- yyval.nodeval = node (yyvsp[0].nodeval, Node_K_arrayfor, make_for_loop(variable(yyvsp[-5].sval,1),
- (NODE *)NULL, variable(yyvsp[-3].sval,1)));
+ yyval.nodeval = node(yyvsp[0].nodeval, Node_K_arrayfor,
+ make_for_loop(variable(yyvsp[-5].sval, CAN_FREE, Node_var),
+ (NODE *) NULL, variable(yyvsp[-3].sval, CAN_FREE, Node_var_array)));
;
break;}
case 45:
-#line 367 "awk.y"
+#line 388 "./awk.y"
{
- yyval.nodeval = node(yyvsp[0].nodeval, Node_K_for, (NODE *)make_for_loop(yyvsp[-7].nodeval, yyvsp[-5].nodeval, yyvsp[-3].nodeval));
+ yyval.nodeval = node(yyvsp[0].nodeval, Node_K_for, (NODE *) make_for_loop(yyvsp[-7].nodeval, yyvsp[-5].nodeval, yyvsp[-3].nodeval));
;
break;}
case 46:
-#line 371 "awk.y"
+#line 392 "./awk.y"
{
- yyval.nodeval = node (yyvsp[0].nodeval, Node_K_for,
- (NODE *)make_for_loop(yyvsp[-6].nodeval, (NODE *)NULL, yyvsp[-3].nodeval));
+ yyval.nodeval = node(yyvsp[0].nodeval, Node_K_for,
+ (NODE *) make_for_loop(yyvsp[-6].nodeval, (NODE *) NULL, yyvsp[-3].nodeval));
;
break;}
case 47:
-#line 377 "awk.y"
-{ yyval.nodeval = node ((NODE *)NULL, Node_K_break, (NODE *)NULL); ;
+#line 398 "./awk.y"
+{ yyval.nodeval = node((NODE *) NULL, Node_K_break, (NODE *) NULL); ;
break;}
case 48:
-#line 380 "awk.y"
-{ yyval.nodeval = node ((NODE *)NULL, Node_K_continue, (NODE *)NULL); ;
+#line 401 "./awk.y"
+{ yyval.nodeval = node((NODE *) NULL, Node_K_continue, (NODE *) NULL); ;
break;}
case 49:
-#line 382 "awk.y"
-{ yyval.nodeval = node (yyvsp[-3].nodeval, yyvsp[-5].nodetypeval, yyvsp[-1].nodeval); ;
+#line 403 "./awk.y"
+{ yyval.nodeval = node(yyvsp[-3].nodeval, yyvsp[-5].nodetypeval, yyvsp[-1].nodeval); ;
break;}
case 50:
-#line 384 "awk.y"
+#line 405 "./awk.y"
{
if (yyvsp[-3].nodetypeval == Node_K_print && yyvsp[-2].nodeval == NULL) {
- static int warned = 0;
+ static int warned = FALSE;
yyvsp[-2].nodeval = node(node(make_number(0.0),
Node_field_spec,
@@ -1545,66 +1571,102 @@ case 50:
(NODE *) NULL);
if (do_lint && ! io_allowed && ! warned) {
- warned = 1;
+ warned = TRUE;
warning(
"plain `print' in BEGIN or END rule should probably be `print \"\"'");
}
}
- yyval.nodeval = node (yyvsp[-2].nodeval, yyvsp[-3].nodetypeval, yyvsp[-1].nodeval);
+ yyval.nodeval = node(yyvsp[-2].nodeval, yyvsp[-3].nodetypeval, yyvsp[-1].nodeval);
;
break;}
case 51:
-#line 404 "awk.y"
+#line 425 "./awk.y"
{ NODETYPE type;
- if (yyvsp[-1].nodeval && yyvsp[-1].nodeval == lookup("file")) {
- if (do_lint)
- warning("`next file' is a gawk extension");
- if (do_unix || do_posix) {
- /*
- * can't use yyerror, since may have overshot
- * the source line
- */
- errcount++;
- error("`next file' is a gawk extension");
- }
- if (! io_allowed) {
- /* same thing */
+ if (yyvsp[-1].nodeval) {
+ if (yyvsp[-1].nodeval == lookup("file")) {
+ static int warned = FALSE;
+
+ if (! warned) {
+ warned = TRUE;
+ warning("`next file' is obsolete; use `nextfile'");
+ }
+ if (do_lint)
+ warning("`next file' is a gawk extension");
+ if (do_traditional) {
+ /*
+ * can't use yyerror, since may have overshot
+ * the source line
+ */
+ errcount++;
+ error("`next file' is a gawk extension");
+ }
+ if (! io_allowed) {
+ /* same thing */
+ errcount++;
+ error("`next file' used in BEGIN or END action");
+ }
+ type = Node_K_nextfile;
+ } else {
errcount++;
- error("`next file' used in BEGIN or END action");
+ error("illegal expression after `next'");
+ type = Node_K_next; /* sanity */
}
- type = Node_K_nextfile;
} else {
if (! io_allowed)
- yyerror("next used in BEGIN or END action");
+ yyerror("`next' used in BEGIN or END action");
type = Node_K_next;
- }
- yyval.nodeval = node ((NODE *)NULL, type, (NODE *)NULL);
+ }
+ yyval.nodeval = node((NODE *) NULL, type, (NODE *) NULL);
;
break;}
case 52:
-#line 431 "awk.y"
-{ yyval.nodeval = node (yyvsp[-1].nodeval, Node_K_exit, (NODE *)NULL); ;
+#line 464 "./awk.y"
+{
+ if (do_lint)
+ warning("`nextfile' is a gawk extension");
+ if (do_traditional) {
+ /*
+ * can't use yyerror, since may have overshot
+ * the source line
+ */
+ errcount++;
+ error("`nextfile' is a gawk extension");
+ }
+ if (! io_allowed) {
+ /* same thing */
+ errcount++;
+ error("`nextfile' used in BEGIN or END action");
+ }
+ yyval.nodeval = node((NODE *) NULL, Node_K_nextfile, (NODE *) NULL);
+ ;
break;}
case 53:
-#line 433 "awk.y"
-{ if (! can_return) yyerror("return used outside function context"); ;
+#line 483 "./awk.y"
+{ yyval.nodeval = node(yyvsp[-1].nodeval, Node_K_exit, (NODE *) NULL); ;
break;}
case 54:
-#line 435 "awk.y"
-{ yyval.nodeval = node (yyvsp[-1].nodeval, Node_K_return, (NODE *)NULL); ;
+#line 485 "./awk.y"
+{
+ if (! can_return)
+ yyerror("`return' used outside function context");
+ ;
break;}
case 55:
-#line 437 "awk.y"
-{ yyval.nodeval = node (variable(yyvsp[-4].sval,1), Node_K_delete, yyvsp[-2].nodeval); ;
+#line 490 "./awk.y"
+{ yyval.nodeval = node(yyvsp[-1].nodeval, Node_K_return, (NODE *) NULL); ;
break;}
case 56:
-#line 439 "awk.y"
+#line 492 "./awk.y"
+{ yyval.nodeval = node(variable(yyvsp[-4].sval, CAN_FREE, Node_var_array), Node_K_delete, yyvsp[-2].nodeval); ;
+ break;}
+case 57:
+#line 494 "./awk.y"
{
if (do_lint)
warning("`delete array' is a gawk extension");
- if (do_unix || do_posix) {
+ if (do_traditional) {
/*
* can't use yyerror, since may have overshot
* the source line
@@ -1612,218 +1674,218 @@ case 56:
errcount++;
error("`delete array' is a gawk extension");
}
- yyval.nodeval = node (variable(yyvsp[-1].sval,1), Node_K_delete, (NODE *) NULL);
+ yyval.nodeval = node(variable(yyvsp[-1].sval, CAN_FREE, Node_var_array), Node_K_delete, (NODE *) NULL);
;
break;}
-case 57:
-#line 453 "awk.y"
-{ yyval.nodeval = yyvsp[-1].nodeval; ;
- break;}
case 58:
-#line 458 "awk.y"
-{ yyval.nodetypeval = yyvsp[0].nodetypeval; ;
+#line 508 "./awk.y"
+{ yyval.nodeval = yyvsp[-1].nodeval; ;
break;}
case 59:
-#line 460 "awk.y"
+#line 513 "./awk.y"
{ yyval.nodetypeval = yyvsp[0].nodetypeval; ;
break;}
case 60:
-#line 465 "awk.y"
+#line 515 "./awk.y"
+{ yyval.nodetypeval = yyvsp[0].nodetypeval; ;
+ break;}
+case 61:
+#line 520 "./awk.y"
{
yyval.nodeval = node(yyvsp[-3].nodeval, Node_K_if,
- node(yyvsp[0].nodeval, Node_if_branches, (NODE *)NULL));
+ node(yyvsp[0].nodeval, Node_if_branches, (NODE *) NULL));
;
break;}
-case 61:
-#line 471 "awk.y"
-{ yyval.nodeval = node (yyvsp[-6].nodeval, Node_K_if,
- node (yyvsp[-3].nodeval, Node_if_branches, yyvsp[0].nodeval)); ;
- break;}
case 62:
-#line 477 "awk.y"
-{ want_assign = 0; ;
+#line 526 "./awk.y"
+{ yyval.nodeval = node(yyvsp[-6].nodeval, Node_K_if,
+ node(yyvsp[-3].nodeval, Node_if_branches, yyvsp[0].nodeval)); ;
break;}
-case 66:
-#line 488 "awk.y"
-{ yyval.nodeval = NULL; ;
+case 63:
+#line 532 "./awk.y"
+{ want_assign = FALSE; ;
break;}
case 67:
-#line 490 "awk.y"
-{ yyval.nodeval = node (yyvsp[0].nodeval, Node_redirect_input, (NODE *)NULL); ;
+#line 543 "./awk.y"
+{ yyval.nodeval = NULL; ;
break;}
case 68:
-#line 495 "awk.y"
-{ yyval.nodeval = NULL; ;
+#line 545 "./awk.y"
+{ yyval.nodeval = node(yyvsp[0].nodeval, Node_redirect_input, (NODE *) NULL); ;
break;}
case 69:
-#line 497 "awk.y"
-{ yyval.nodeval = node (yyvsp[0].nodeval, Node_redirect_output, (NODE *)NULL); ;
+#line 550 "./awk.y"
+{ yyval.nodeval = NULL; ;
break;}
case 70:
-#line 499 "awk.y"
-{ yyval.nodeval = node (yyvsp[0].nodeval, Node_redirect_append, (NODE *)NULL); ;
+#line 552 "./awk.y"
+{ yyval.nodeval = node(yyvsp[0].nodeval, Node_redirect_output, (NODE *) NULL); ;
break;}
case 71:
-#line 501 "awk.y"
-{ yyval.nodeval = node (yyvsp[0].nodeval, Node_redirect_pipe, (NODE *)NULL); ;
+#line 554 "./awk.y"
+{ yyval.nodeval = node(yyvsp[0].nodeval, Node_redirect_append, (NODE *) NULL); ;
break;}
case 72:
-#line 506 "awk.y"
-{ yyval.nodeval = NULL; ;
+#line 556 "./awk.y"
+{ yyval.nodeval = node(yyvsp[0].nodeval, Node_redirect_pipe, (NODE *) NULL); ;
break;}
case 73:
-#line 508 "awk.y"
-{ yyval.nodeval = yyvsp[0].nodeval; ;
+#line 561 "./awk.y"
+{ yyval.nodeval = NULL; ;
break;}
case 74:
-#line 513 "awk.y"
-{ yyval.nodeval = make_param(yyvsp[0].sval); ;
+#line 563 "./awk.y"
+{ yyval.nodeval = yyvsp[0].nodeval; ;
break;}
case 75:
-#line 515 "awk.y"
-{ yyval.nodeval = append_right(yyvsp[-2].nodeval, make_param(yyvsp[0].sval)); yyerrok; ;
+#line 568 "./awk.y"
+{ yyval.nodeval = make_param(yyvsp[0].sval); ;
break;}
case 76:
-#line 517 "awk.y"
-{ yyval.nodeval = NULL; ;
+#line 570 "./awk.y"
+{ yyval.nodeval = append_right(yyvsp[-2].nodeval, make_param(yyvsp[0].sval)); yyerrok; ;
break;}
case 77:
-#line 519 "awk.y"
+#line 572 "./awk.y"
{ yyval.nodeval = NULL; ;
break;}
case 78:
-#line 521 "awk.y"
+#line 574 "./awk.y"
{ yyval.nodeval = NULL; ;
break;}
case 79:
-#line 527 "awk.y"
+#line 576 "./awk.y"
{ yyval.nodeval = NULL; ;
break;}
case 80:
-#line 529 "awk.y"
-{ yyval.nodeval = yyvsp[0].nodeval; ;
+#line 582 "./awk.y"
+{ yyval.nodeval = NULL; ;
break;}
case 81:
-#line 534 "awk.y"
-{ yyval.nodeval = NULL; ;
+#line 584 "./awk.y"
+{ yyval.nodeval = yyvsp[0].nodeval; ;
break;}
case 82:
-#line 536 "awk.y"
-{ yyval.nodeval = yyvsp[0].nodeval; ;
+#line 589 "./awk.y"
+{ yyval.nodeval = NULL; ;
break;}
case 83:
-#line 541 "awk.y"
-{ yyval.nodeval = node (yyvsp[0].nodeval, Node_expression_list, (NODE *)NULL); ;
+#line 591 "./awk.y"
+{ yyval.nodeval = yyvsp[0].nodeval; ;
break;}
case 84:
-#line 543 "awk.y"
+#line 596 "./awk.y"
+{ yyval.nodeval = node(yyvsp[0].nodeval, Node_expression_list, (NODE *) NULL); ;
+ break;}
+case 85:
+#line 598 "./awk.y"
{
yyval.nodeval = append_right(yyvsp[-2].nodeval,
- node( yyvsp[0].nodeval, Node_expression_list, (NODE *)NULL));
+ node(yyvsp[0].nodeval, Node_expression_list, (NODE *) NULL));
yyerrok;
;
break;}
-case 85:
-#line 549 "awk.y"
-{ yyval.nodeval = NULL; ;
- break;}
case 86:
-#line 551 "awk.y"
+#line 604 "./awk.y"
{ yyval.nodeval = NULL; ;
break;}
case 87:
-#line 553 "awk.y"
+#line 606 "./awk.y"
{ yyval.nodeval = NULL; ;
break;}
case 88:
-#line 555 "awk.y"
+#line 608 "./awk.y"
{ yyval.nodeval = NULL; ;
break;}
case 89:
-#line 560 "awk.y"
+#line 610 "./awk.y"
{ yyval.nodeval = NULL; ;
break;}
case 90:
-#line 562 "awk.y"
-{ yyval.nodeval = yyvsp[0].nodeval; ;
+#line 615 "./awk.y"
+{ yyval.nodeval = NULL; ;
break;}
case 91:
-#line 567 "awk.y"
-{ yyval.nodeval = node (yyvsp[0].nodeval, Node_expression_list, (NODE *)NULL); ;
+#line 617 "./awk.y"
+{ yyval.nodeval = yyvsp[0].nodeval; ;
break;}
case 92:
-#line 569 "awk.y"
+#line 622 "./awk.y"
+{ yyval.nodeval = node(yyvsp[0].nodeval, Node_expression_list, (NODE *) NULL); ;
+ break;}
+case 93:
+#line 624 "./awk.y"
{
yyval.nodeval = append_right(yyvsp[-2].nodeval,
- node( yyvsp[0].nodeval, Node_expression_list, (NODE *)NULL));
+ node(yyvsp[0].nodeval, Node_expression_list, (NODE *) NULL));
yyerrok;
;
break;}
-case 93:
-#line 575 "awk.y"
-{ yyval.nodeval = NULL; ;
- break;}
case 94:
-#line 577 "awk.y"
+#line 630 "./awk.y"
{ yyval.nodeval = NULL; ;
break;}
case 95:
-#line 579 "awk.y"
+#line 632 "./awk.y"
{ yyval.nodeval = NULL; ;
break;}
case 96:
-#line 581 "awk.y"
+#line 634 "./awk.y"
{ yyval.nodeval = NULL; ;
break;}
case 97:
-#line 586 "awk.y"
-{ want_assign = 0; ;
+#line 636 "./awk.y"
+{ yyval.nodeval = NULL; ;
break;}
case 98:
-#line 588 "awk.y"
+#line 641 "./awk.y"
+{ want_assign = FALSE; ;
+ break;}
+case 99:
+#line 643 "./awk.y"
{
if (do_lint && yyvsp[0].nodeval->type == Node_regex)
warning("Regular expression on left of assignment.");
- yyval.nodeval = node (yyvsp[-3].nodeval, yyvsp[-2].nodetypeval, yyvsp[0].nodeval);
+ yyval.nodeval = node(yyvsp[-3].nodeval, yyvsp[-2].nodetypeval, yyvsp[0].nodeval);
;
break;}
-case 99:
-#line 594 "awk.y"
-{ yyval.nodeval = node (variable(yyvsp[0].sval,1), Node_in_array, yyvsp[-3].nodeval); ;
- break;}
case 100:
-#line 596 "awk.y"
+#line 649 "./awk.y"
+{ yyval.nodeval = node(variable(yyvsp[0].sval, CAN_FREE, Node_var_array), Node_in_array, yyvsp[-3].nodeval); ;
+ break;}
+case 101:
+#line 651 "./awk.y"
{
- yyval.nodeval = node (yyvsp[0].nodeval, Node_K_getline,
- node (yyvsp[-3].nodeval, Node_redirect_pipein, (NODE *)NULL));
+ yyval.nodeval = node(yyvsp[0].nodeval, Node_K_getline,
+ node(yyvsp[-3].nodeval, Node_redirect_pipein, (NODE *) NULL));
;
break;}
-case 101:
-#line 601 "awk.y"
+case 102:
+#line 656 "./awk.y"
{
if (do_lint && ! io_allowed && yyvsp[0].nodeval == NULL)
warning("non-redirected getline undefined inside BEGIN or END action");
- yyval.nodeval = node (yyvsp[-1].nodeval, Node_K_getline, yyvsp[0].nodeval);
+ yyval.nodeval = node(yyvsp[-1].nodeval, Node_K_getline, yyvsp[0].nodeval);
;
break;}
-case 102:
-#line 607 "awk.y"
-{ yyval.nodeval = node (yyvsp[-2].nodeval, Node_and, yyvsp[0].nodeval); ;
- break;}
case 103:
-#line 609 "awk.y"
-{ yyval.nodeval = node (yyvsp[-2].nodeval, Node_or, yyvsp[0].nodeval); ;
+#line 662 "./awk.y"
+{ yyval.nodeval = node(yyvsp[-2].nodeval, Node_and, yyvsp[0].nodeval); ;
break;}
case 104:
-#line 611 "awk.y"
+#line 664 "./awk.y"
+{ yyval.nodeval = node(yyvsp[-2].nodeval, Node_or, yyvsp[0].nodeval); ;
+ break;}
+case 105:
+#line 666 "./awk.y"
{
if (yyvsp[-2].nodeval->type == Node_regex)
warning("Regular expression on left of MATCH operator.");
- yyval.nodeval = node (yyvsp[-2].nodeval, yyvsp[-1].nodetypeval, mk_rexp(yyvsp[0].nodeval));
+ yyval.nodeval = node(yyvsp[-2].nodeval, yyvsp[-1].nodetypeval, mk_rexp(yyvsp[0].nodeval));
;
break;}
-case 105:
-#line 617 "awk.y"
+case 106:
+#line 672 "./awk.y"
{
yyval.nodeval = yyvsp[0].nodeval;
if (do_lint && tokstart[0] == '*') {
@@ -1834,8 +1896,8 @@ case 105:
}
;
break;}
-case 106:
-#line 627 "awk.y"
+case 107:
+#line 682 "./awk.y"
{
yyval.nodeval = node(node(make_number(0.0),
Node_field_spec,
@@ -1844,232 +1906,237 @@ case 106:
yyvsp[0].nodeval);
;
break;}
-case 107:
-#line 635 "awk.y"
-{ yyval.nodeval = node (variable(yyvsp[0].sval,1), Node_in_array, yyvsp[-2].nodeval); ;
- break;}
case 108:
-#line 637 "awk.y"
+#line 690 "./awk.y"
+{ yyval.nodeval = node(variable(yyvsp[0].sval, CAN_FREE, Node_var_array), Node_in_array, yyvsp[-2].nodeval); ;
+ break;}
+case 109:
+#line 692 "./awk.y"
{
if (do_lint && yyvsp[0].nodeval->type == Node_regex)
warning("Regular expression on left of comparison.");
- yyval.nodeval = node (yyvsp[-2].nodeval, yyvsp[-1].nodetypeval, yyvsp[0].nodeval);
+ yyval.nodeval = node(yyvsp[-2].nodeval, yyvsp[-1].nodetypeval, yyvsp[0].nodeval);
;
break;}
-case 109:
-#line 643 "awk.y"
-{ yyval.nodeval = node (yyvsp[-2].nodeval, Node_less, yyvsp[0].nodeval); ;
- break;}
case 110:
-#line 645 "awk.y"
-{ yyval.nodeval = node (yyvsp[-2].nodeval, Node_greater, yyvsp[0].nodeval); ;
+#line 698 "./awk.y"
+{ yyval.nodeval = node(yyvsp[-2].nodeval, Node_less, yyvsp[0].nodeval); ;
break;}
case 111:
-#line 647 "awk.y"
-{ yyval.nodeval = node(yyvsp[-4].nodeval, Node_cond_exp, node(yyvsp[-2].nodeval, Node_if_branches, yyvsp[0].nodeval));;
+#line 700 "./awk.y"
+{ yyval.nodeval = node(yyvsp[-2].nodeval, Node_greater, yyvsp[0].nodeval); ;
break;}
case 112:
-#line 649 "awk.y"
-{ yyval.nodeval = yyvsp[0].nodeval; ;
+#line 702 "./awk.y"
+{ yyval.nodeval = node(yyvsp[-4].nodeval, Node_cond_exp, node(yyvsp[-2].nodeval, Node_if_branches, yyvsp[0].nodeval));;
break;}
case 113:
-#line 651 "awk.y"
-{ yyval.nodeval = node (yyvsp[-1].nodeval, Node_concat, yyvsp[0].nodeval); ;
+#line 704 "./awk.y"
+{ yyval.nodeval = yyvsp[0].nodeval; ;
break;}
case 114:
-#line 656 "awk.y"
-{ want_assign = 0; ;
+#line 706 "./awk.y"
+{ yyval.nodeval = node(yyvsp[-1].nodeval, Node_concat, yyvsp[0].nodeval); ;
break;}
case 115:
-#line 658 "awk.y"
-{ yyval.nodeval = node (yyvsp[-3].nodeval, yyvsp[-2].nodetypeval, yyvsp[0].nodeval); ;
+#line 711 "./awk.y"
+{ want_assign = FALSE; ;
break;}
case 116:
-#line 660 "awk.y"
-{ yyval.nodeval = node (yyvsp[-2].nodeval, Node_and, yyvsp[0].nodeval); ;
+#line 713 "./awk.y"
+{ yyval.nodeval = node(yyvsp[-3].nodeval, yyvsp[-2].nodetypeval, yyvsp[0].nodeval); ;
break;}
case 117:
-#line 662 "awk.y"
-{ yyval.nodeval = node (yyvsp[-2].nodeval, Node_or, yyvsp[0].nodeval); ;
+#line 715 "./awk.y"
+{ yyval.nodeval = node(yyvsp[-2].nodeval, Node_and, yyvsp[0].nodeval); ;
break;}
case 118:
-#line 664 "awk.y"
+#line 717 "./awk.y"
+{ yyval.nodeval = node(yyvsp[-2].nodeval, Node_or, yyvsp[0].nodeval); ;
+ break;}
+case 119:
+#line 719 "./awk.y"
{
if (do_lint && ! io_allowed && yyvsp[0].nodeval == NULL)
warning("non-redirected getline undefined inside BEGIN or END action");
- yyval.nodeval = node (yyvsp[-1].nodeval, Node_K_getline, yyvsp[0].nodeval);
+ yyval.nodeval = node(yyvsp[-1].nodeval, Node_K_getline, yyvsp[0].nodeval);
;
break;}
-case 119:
-#line 670 "awk.y"
-{ yyval.nodeval = yyvsp[0].nodeval; ;
- break;}
case 120:
-#line 672 "awk.y"
-{ yyval.nodeval = node((NODE *) NULL, Node_nomatch, yyvsp[0].nodeval); ;
+#line 725 "./awk.y"
+{ yyval.nodeval = yyvsp[0].nodeval; ;
break;}
case 121:
-#line 674 "awk.y"
-{ yyval.nodeval = node (yyvsp[-2].nodeval, yyvsp[-1].nodetypeval, mk_rexp(yyvsp[0].nodeval)); ;
+#line 727 "./awk.y"
+{ yyval.nodeval = node((NODE *) NULL, Node_nomatch, yyvsp[0].nodeval); ;
break;}
case 122:
-#line 676 "awk.y"
-{ yyval.nodeval = node (variable(yyvsp[0].sval,1), Node_in_array, yyvsp[-2].nodeval); ;
+#line 729 "./awk.y"
+{ yyval.nodeval = node(yyvsp[-2].nodeval, yyvsp[-1].nodetypeval, mk_rexp(yyvsp[0].nodeval)); ;
break;}
case 123:
-#line 678 "awk.y"
-{ yyval.nodeval = node (yyvsp[-2].nodeval, yyvsp[-1].nodetypeval, yyvsp[0].nodeval); ;
+#line 731 "./awk.y"
+{ yyval.nodeval = node(variable(yyvsp[0].sval, CAN_FREE, Node_var_array), Node_in_array, yyvsp[-2].nodeval); ;
break;}
case 124:
-#line 680 "awk.y"
-{ yyval.nodeval = node(yyvsp[-4].nodeval, Node_cond_exp, node(yyvsp[-2].nodeval, Node_if_branches, yyvsp[0].nodeval));;
+#line 733 "./awk.y"
+{ yyval.nodeval = node(yyvsp[-2].nodeval, yyvsp[-1].nodetypeval, yyvsp[0].nodeval); ;
break;}
case 125:
-#line 682 "awk.y"
-{ yyval.nodeval = yyvsp[0].nodeval; ;
+#line 735 "./awk.y"
+{ yyval.nodeval = node(yyvsp[-4].nodeval, Node_cond_exp, node(yyvsp[-2].nodeval, Node_if_branches, yyvsp[0].nodeval));;
break;}
case 126:
-#line 684 "awk.y"
-{ yyval.nodeval = node (yyvsp[-1].nodeval, Node_concat, yyvsp[0].nodeval); ;
+#line 737 "./awk.y"
+{ yyval.nodeval = yyvsp[0].nodeval; ;
break;}
-case 128:
-#line 691 "awk.y"
-{ yyval.nodeval = node (yyvsp[-2].nodeval, Node_exp, yyvsp[0].nodeval); ;
+case 127:
+#line 739 "./awk.y"
+{ yyval.nodeval = node(yyvsp[-1].nodeval, Node_concat, yyvsp[0].nodeval); ;
break;}
case 129:
-#line 693 "awk.y"
-{ yyval.nodeval = node (yyvsp[-2].nodeval, Node_times, yyvsp[0].nodeval); ;
+#line 746 "./awk.y"
+{ yyval.nodeval = node(yyvsp[-2].nodeval, Node_exp, yyvsp[0].nodeval); ;
break;}
case 130:
-#line 695 "awk.y"
-{ yyval.nodeval = node (yyvsp[-2].nodeval, Node_quotient, yyvsp[0].nodeval); ;
+#line 748 "./awk.y"
+{ yyval.nodeval = node(yyvsp[-2].nodeval, Node_times, yyvsp[0].nodeval); ;
break;}
case 131:
-#line 697 "awk.y"
-{ yyval.nodeval = node (yyvsp[-2].nodeval, Node_mod, yyvsp[0].nodeval); ;
+#line 750 "./awk.y"
+{ yyval.nodeval = node(yyvsp[-2].nodeval, Node_quotient, yyvsp[0].nodeval); ;
break;}
case 132:
-#line 699 "awk.y"
-{ yyval.nodeval = node (yyvsp[-2].nodeval, Node_plus, yyvsp[0].nodeval); ;
+#line 752 "./awk.y"
+{ yyval.nodeval = node(yyvsp[-2].nodeval, Node_mod, yyvsp[0].nodeval); ;
break;}
case 133:
-#line 701 "awk.y"
-{ yyval.nodeval = node (yyvsp[-2].nodeval, Node_minus, yyvsp[0].nodeval); ;
+#line 754 "./awk.y"
+{ yyval.nodeval = node(yyvsp[-2].nodeval, Node_plus, yyvsp[0].nodeval); ;
break;}
case 134:
-#line 703 "awk.y"
-{ yyval.nodeval = node (yyvsp[-1].nodeval, Node_postincrement, (NODE *)NULL); ;
+#line 756 "./awk.y"
+{ yyval.nodeval = node(yyvsp[-2].nodeval, Node_minus, yyvsp[0].nodeval); ;
break;}
case 135:
-#line 705 "awk.y"
-{ yyval.nodeval = node (yyvsp[-1].nodeval, Node_postdecrement, (NODE *)NULL); ;
+#line 758 "./awk.y"
+{ yyval.nodeval = node(yyvsp[-1].nodeval, Node_postincrement, (NODE *) NULL); ;
break;}
case 136:
-#line 710 "awk.y"
-{ yyval.nodeval = node (yyvsp[0].nodeval, Node_not,(NODE *) NULL); ;
+#line 760 "./awk.y"
+{ yyval.nodeval = node(yyvsp[-1].nodeval, Node_postdecrement, (NODE *) NULL); ;
break;}
case 137:
-#line 712 "awk.y"
-{ yyval.nodeval = yyvsp[-1].nodeval; ;
+#line 765 "./awk.y"
+{ yyval.nodeval = node(yyvsp[0].nodeval, Node_not, (NODE *) NULL); ;
break;}
case 138:
-#line 715 "awk.y"
-{ yyval.nodeval = snode (yyvsp[-1].nodeval, Node_builtin, (int) yyvsp[-3].lval); ;
+#line 767 "./awk.y"
+{ yyval.nodeval = yyvsp[-1].nodeval; ;
break;}
case 139:
-#line 717 "awk.y"
-{ yyval.nodeval = snode (yyvsp[-1].nodeval, Node_builtin, (int) yyvsp[-3].lval); ;
+#line 770 "./awk.y"
+{ yyval.nodeval = snode(yyvsp[-1].nodeval, Node_builtin, (int) yyvsp[-3].lval); ;
break;}
case 140:
-#line 719 "awk.y"
+#line 772 "./awk.y"
+{ yyval.nodeval = snode(yyvsp[-1].nodeval, Node_builtin, (int) yyvsp[-3].lval); ;
+ break;}
+case 141:
+#line 774 "./awk.y"
{
if (do_lint)
warning("call of `length' without parentheses is not portable");
- yyval.nodeval = snode ((NODE *)NULL, Node_builtin, (int) yyvsp[0].lval);
+ yyval.nodeval = snode((NODE *) NULL, Node_builtin, (int) yyvsp[0].lval);
if (do_posix)
- warning( "call of `length' without parentheses is deprecated by POSIX");
+ warning("call of `length' without parentheses is deprecated by POSIX");
;
break;}
-case 141:
-#line 727 "awk.y"
+case 142:
+#line 782 "./awk.y"
{
- yyval.nodeval = node (yyvsp[-1].nodeval, Node_func_call, make_string(yyvsp[-3].sval, strlen(yyvsp[-3].sval)));
+ yyval.nodeval = node(yyvsp[-1].nodeval, Node_func_call, make_string(yyvsp[-3].sval, strlen(yyvsp[-3].sval)));
+ func_use(yyvsp[-3].sval, FUNC_USE);
+ param_sanity(yyvsp[-1].nodeval);
free(yyvsp[-3].sval);
;
break;}
-case 143:
-#line 733 "awk.y"
-{ yyval.nodeval = node (yyvsp[0].nodeval, Node_preincrement, (NODE *)NULL); ;
- break;}
case 144:
-#line 735 "awk.y"
-{ yyval.nodeval = node (yyvsp[0].nodeval, Node_predecrement, (NODE *)NULL); ;
+#line 790 "./awk.y"
+{ yyval.nodeval = node(yyvsp[0].nodeval, Node_preincrement, (NODE *) NULL); ;
break;}
case 145:
-#line 737 "awk.y"
-{ yyval.nodeval = yyvsp[0].nodeval; ;
+#line 792 "./awk.y"
+{ yyval.nodeval = node(yyvsp[0].nodeval, Node_predecrement, (NODE *) NULL); ;
break;}
case 146:
-#line 739 "awk.y"
+#line 794 "./awk.y"
{ yyval.nodeval = yyvsp[0].nodeval; ;
break;}
case 147:
-#line 742 "awk.y"
-{ if (yyvsp[0].nodeval->type == Node_val) {
+#line 796 "./awk.y"
+{ yyval.nodeval = yyvsp[0].nodeval; ;
+ break;}
+case 148:
+#line 799 "./awk.y"
+{
+ if (yyvsp[0].nodeval->type == Node_val) {
yyvsp[0].nodeval->numbr = -(force_number(yyvsp[0].nodeval));
yyval.nodeval = yyvsp[0].nodeval;
} else
- yyval.nodeval = node (yyvsp[0].nodeval, Node_unary_minus, (NODE *)NULL);
+ yyval.nodeval = node(yyvsp[0].nodeval, Node_unary_minus, (NODE *) NULL);
;
break;}
-case 148:
-#line 749 "awk.y"
+case 149:
+#line 807 "./awk.y"
{
- /* was: $$ = $2 */
- /* POSIX semantics: force a conversion to numeric type */
+ /*
+ * was: $$ = $2
+ * POSIX semantics: force a conversion to numeric type
+ */
yyval.nodeval = node (make_number(0.0), Node_plus, yyvsp[0].nodeval);
;
break;}
-case 149:
-#line 758 "awk.y"
-{ yyval.nodeval = NULL; ;
- break;}
case 150:
-#line 760 "awk.y"
-{ yyval.nodeval = yyvsp[0].nodeval; ;
+#line 818 "./awk.y"
+{ yyval.nodeval = NULL; ;
break;}
case 151:
-#line 765 "awk.y"
-{ yyval.nodeval = variable(yyvsp[0].sval,1); ;
+#line 820 "./awk.y"
+{ yyval.nodeval = yyvsp[0].nodeval; ;
break;}
case 152:
-#line 767 "awk.y"
+#line 825 "./awk.y"
+{ yyval.nodeval = variable(yyvsp[0].sval, CAN_FREE, Node_var); ;
+ break;}
+case 153:
+#line 827 "./awk.y"
{
if (yyvsp[-1].nodeval->rnode == NULL) {
- yyval.nodeval = node (variable(yyvsp[-3].sval,1), Node_subscript, yyvsp[-1].nodeval->lnode);
+ yyval.nodeval = node(variable(yyvsp[-3].sval, CAN_FREE, Node_var_array), Node_subscript, yyvsp[-1].nodeval->lnode);
freenode(yyvsp[-1].nodeval);
} else
- yyval.nodeval = node (variable(yyvsp[-3].sval,1), Node_subscript, yyvsp[-1].nodeval);
+ yyval.nodeval = node(variable(yyvsp[-3].sval, CAN_FREE, Node_var_array), Node_subscript, yyvsp[-1].nodeval);
;
break;}
-case 153:
-#line 775 "awk.y"
-{ yyval.nodeval = node (yyvsp[0].nodeval, Node_field_spec, (NODE *)NULL); ;
- break;}
-case 155:
-#line 783 "awk.y"
-{ yyerrok; ;
+case 154:
+#line 835 "./awk.y"
+{ yyval.nodeval = node(yyvsp[0].nodeval, Node_field_spec, (NODE *) NULL); ;
break;}
case 156:
-#line 787 "awk.y"
+#line 843 "./awk.y"
{ yyerrok; ;
break;}
-case 159:
-#line 796 "awk.y"
-{ yyerrok; want_assign = 0; ;
+case 157:
+#line 847 "./awk.y"
+{ yyerrok; ;
break;}
case 160:
-#line 799 "awk.y"
+#line 856 "./awk.y"
+{ yyerrok; want_assign = FALSE; ;
+ break;}
+case 161:
+#line 859 "./awk.y"
{ yyerrok; ;
break;}
}
@@ -2270,7 +2337,7 @@ yyerrhandle:
yystate = yyn;
goto yynewstate;
}
-#line 802 "awk.y"
+#line 862 "./awk.y"
struct token {
@@ -2284,7 +2351,8 @@ struct token {
# define NOT_OLD 0x0100 /* feature not in old awk */
# define NOT_POSIX 0x0200 /* feature not in POSIX */
# define GAWKX 0x0400 /* gawk extension */
- NODE *(*ptr) (); /* function that implements this keyword */
+# define RESX 0x0800 /* Bell Labs Research extension */
+ NODE *(*ptr)(); /* function that implements this keyword */
};
extern NODE
@@ -2293,7 +2361,8 @@ extern NODE
*do_split(), *do_system(), *do_int(), *do_close(),
*do_atan2(), *do_sin(), *do_cos(), *do_rand(),
*do_srand(), *do_match(), *do_tolower(), *do_toupper(),
- *do_sub(), *do_gsub(), *do_strftime(), *do_systime();
+ *do_sub(), *do_gsub(), *do_strftime(), *do_systime(),
+ *do_fflush();
/* Tokentab is sorted ascii ascending order, so it can be binary searched. */
@@ -2310,9 +2379,11 @@ static struct token tokentab[] = {
{"else", Node_illegal, LEX_ELSE, 0, 0},
{"exit", Node_K_exit, LEX_EXIT, 0, 0},
{"exp", Node_builtin, LEX_BUILTIN, A(1), do_exp},
+{"fflush", Node_builtin, LEX_BUILTIN, RESX|A(0)|A(1), do_fflush},
{"for", Node_K_for, LEX_FOR, 0, 0},
{"func", Node_K_function, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0},
{"function", Node_K_function, LEX_FUNCTION, NOT_OLD, 0},
+{"gensub", Node_builtin, LEX_BUILTIN, GAWKX|A(3)|A(4), do_gensub},
{"getline", Node_K_getline, LEX_GETLINE, NOT_OLD, 0},
{"gsub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_gsub},
{"if", Node_K_if, LEX_IF, 0, 0},
@@ -2323,6 +2394,7 @@ static struct token tokentab[] = {
{"log", Node_builtin, LEX_BUILTIN, A(1), do_log},
{"match", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_match},
{"next", Node_K_next, LEX_NEXT, 0, 0},
+{"nextfile", Node_K_nextfile, LEX_NEXTFILE, GAWKX, 0},
{"print", Node_K_print, LEX_PRINT, 0, 0},
{"printf", Node_K_printf, LEX_PRINTF, 0, 0},
{"rand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0), do_rand},
@@ -2332,7 +2404,7 @@ static struct token tokentab[] = {
{"sprintf", Node_builtin, LEX_BUILTIN, 0, do_sprintf},
{"sqrt", Node_builtin, LEX_BUILTIN, A(1), do_sqrt},
{"srand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), do_srand},
-{"strftime", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_strftime},
+{"strftime", Node_builtin, LEX_BUILTIN, GAWKX|A(0)|A(1)|A(2), do_strftime},
{"sub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_sub},
{"substr", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_substr},
{"system", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system},
@@ -2342,10 +2414,17 @@ static struct token tokentab[] = {
{"while", Node_K_while, LEX_WHILE, 0, 0},
};
+/* yyerror --- print a syntax error message, show where */
+
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+static void
+yyerror(const char *m, ...)
+#else
/* VARARGS0 */
static void
yyerror(va_alist)
va_dcl
+#endif
{
va_list args;
const char *mesg = NULL;
@@ -2357,13 +2436,13 @@ va_dcl
errcount++;
/* Find the current line in the input file */
if (lexptr && lexeme) {
- if (!thisline) {
+ if (thisline == NULL) {
cp = lexeme;
if (*cp == '\n') {
cp--;
mesg = "unexpected newline";
}
- for ( ; cp != lexptr_begin && *cp != '\n'; --cp)
+ for (; cp != lexptr_begin && *cp != '\n'; --cp)
continue;
if (*cp == '\n')
cp++;
@@ -2380,7 +2459,7 @@ va_dcl
msg("%.*s", (int) (bp - thisline), thisline);
bp = buf;
cp = buf + sizeof(buf) - 24; /* 24 more than longest msg. input */
- if (lexptr) {
+ if (lexptr != NULL) {
scan = thisline;
while (bp < cp && scan < lexeme)
if (*scan++ == '\t')
@@ -2390,26 +2469,36 @@ va_dcl
*bp++ = '^';
*bp++ = ' ';
}
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+ va_start(args, m);
+ if (mesg == NULL)
+ mesg = m;
+#else
va_start(args);
if (mesg == NULL)
mesg = va_arg(args, char *);
+#endif
strcpy(bp, mesg);
err("", buf, args);
va_end(args);
exit(2);
}
+/* get_src_buf --- read the next buffer of source program */
+
static char *
get_src_buf()
{
- static int samefile = 0;
+ static int samefile = FALSE;
static int nextfile = 0;
static char *buf = NULL;
static int fd;
int n;
register char *scan;
static int len = 0;
- static int did_newline = 0;
+ static int did_newline = FALSE;
+ struct stat sbuf;
+
# define SLOP 128 /* enough space to hold most source lines */
again:
@@ -2425,13 +2514,19 @@ again:
* gawk '' /path/name
* Sigh.
*/
+ static int warned = FALSE;
+
+ if (do_lint && ! warned) {
+ warned = TRUE;
+ warning("empty program text on command line");
+ }
++nextfile;
goto again;
}
sourceline = 1;
lexptr = lexptr_begin = srcfiles[nextfile].val;
lexend = lexptr + len;
- } else if (!did_newline && *(lexptr-1) != '\n') {
+ } else if (! did_newline && *(lexptr-1) != '\n') {
/*
* The following goop is to ensure that the source
* ends with a newline and that the entire current
@@ -2439,7 +2534,7 @@ again:
*/
int offset;
- did_newline = 1;
+ did_newline = TRUE;
offset = lexptr - lexeme;
for (scan = lexeme; scan > lexptr_begin; scan--)
if (*scan == '\n') {
@@ -2460,13 +2555,13 @@ again:
lexeme = lexptr = lexptr_begin = NULL;
}
if (lexptr == NULL && ++nextfile <= numfiles)
- return get_src_buf();
+ goto again;
return lexptr;
}
- if (!samefile) {
+ if (! samefile) {
source = srcfiles[nextfile].val;
if (source == NULL) {
- if (buf) {
+ if (buf != NULL) {
free(buf);
buf = NULL;
}
@@ -2474,15 +2569,32 @@ again:
return lexeme = lexptr = lexptr_begin = NULL;
}
fd = pathopen(source);
- if (fd == -1)
+ if (fd <= INVALID_HANDLE) {
+ char *in;
+
+ /* suppress file name and line no. in error mesg */
+ in = source;
+ source = NULL;
fatal("can't open source file \"%s\" for reading (%s)",
- source, strerror(errno));
- len = optimal_bufsize(fd);
- if (buf)
+ in, strerror(errno));
+ }
+ len = optimal_bufsize(fd, & sbuf);
+ if (sbuf.st_size == 0) {
+ static int warned = FALSE;
+
+ if (do_lint && ! warned) {
+ warned = TRUE;
+ warning("source file `%s' is empty", source);
+ }
+ close(fd);
+ ++nextfile;
+ goto again;
+ }
+ if (buf != NULL)
free(buf);
emalloc(buf, char *, len + SLOP, "get_src_buf");
lexptr_begin = buf + SLOP;
- samefile = 1;
+ samefile = TRUE;
sourceline = 1;
} else {
/*
@@ -2511,7 +2623,8 @@ again:
fatal("can't read sourcefile \"%s\" (%s)",
source, strerror(errno));
if (n == 0) {
- samefile = 0;
+ close(fd);
+ samefile = FALSE;
nextfile++;
if (lexeme)
*lexeme = '\0';
@@ -2523,8 +2636,12 @@ again:
return buf;
}
+/* tokadd --- add a character to the token buffer */
+
#define tokadd(x) (*tok++ = (x), tok == tokend ? tokexpand() : tok)
+/* tokexpand --- grow the token buffer */
+
char *
tokexpand()
{
@@ -2533,7 +2650,7 @@ tokexpand()
tokoffset = tok - tokstart;
toksize *= 2;
- if (tokstart)
+ if (tokstart != NULL)
erealloc(tokstart, char *, toksize, "tokexpand");
else
emalloc(tokstart, char *, toksize, "tokexpand");
@@ -2542,9 +2659,12 @@ tokexpand()
return tok;
}
+/* nextc --- get the next input character */
+
#if DEBUG
-char
-nextc() {
+int
+nextc()
+{
int c;
if (lexptr && lexptr < lexend)
@@ -2552,40 +2672,67 @@ nextc() {
else if (get_src_buf())
c = *lexptr++;
else
- c = '\0';
+ c = EOF;
return c;
}
#else
#define nextc() ((lexptr && lexptr < lexend) ? \
*lexptr++ : \
- (get_src_buf() ? *lexptr++ : '\0') \
+ (get_src_buf() ? *lexptr++ : EOF) \
)
#endif
+
+/* pushback --- push a character back on the input */
+
#define pushback() (lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr)
-/*
- * Read the input and turn it into tokens.
- */
+/* allow_newline --- allow newline after &&, ||, ? and : */
+
+static void
+allow_newline()
+{
+ int c;
+
+ for (;;) {
+ c = nextc();
+ if (c == EOF)
+ break;
+ if (c == '#') {
+ while ((c = nextc()) != '\n' && c != EOF)
+ continue;
+ if (c == EOF)
+ break;
+ }
+ if (c == '\n')
+ sourceline++;
+ if (! isspace(c)) {
+ pushback();
+ break;
+ }
+ }
+}
+
+/* yylex --- Read the input and turn it into tokens. */
static int
yylex()
{
- register int c;
- int seen_e = 0; /* These are for numbers */
- int seen_point = 0;
+ register int c, c1;
+ int seen_e = FALSE; /* These are for numbers */
+ int seen_point = FALSE;
int esc_seen; /* for literal strings */
int low, mid, high;
- static int did_newline = 0;
+ static int did_newline = FALSE;
char *tokkey;
- static int lasttok = 0, eof_warned = 0;
+ static int lasttok = 0, eof_warned = FALSE;
- if (!nextc()) {
+ if (nextc() == EOF) {
if (lasttok != NEWLINE) {
lasttok = NEWLINE;
if (do_lint && ! eof_warned) {
warning("source file does not end in newline");
- eof_warned = 1;
+ eof_warned = TRUE;
}
return NEWLINE; /* fake it */
}
@@ -2605,29 +2752,59 @@ yylex()
lexeme = lexptr;
thisline = NULL;
if (want_regexp) {
- int in_brack = 0;
+ int in_brack = 0; /* count brackets, [[:alnum:]] allowed */
+ /*
+ * Counting brackets is non-trivial. [[] is ok,
+ * and so is [\]], with a point being that /[/]/ as a regexp
+ * constant has to work.
+ *
+ * Do not count [ or ] if either one is preceded by a \.
+ * A `[' should be counted if
+ * a) it is the first one so far (in_brack == 0)
+ * b) it is the `[' in `[:'
+ * A ']' should be counted if not preceded by a \, since
+ * it is either closing `:]' or just a plain list.
+ * According to POSIX, []] is how you put a ] into a set.
+ * Try to handle that too.
+ *
+ * The code for \ handles \[ and \].
+ */
- want_regexp = 0;
+ want_regexp = FALSE;
tok = tokstart;
- while ((c = nextc()) != 0) {
+ for (;;) {
+ c = nextc();
switch (c) {
case '[':
- in_brack = 1;
+ /* one day check for `.' and `=' too */
+ if ((c1 = nextc()) == ':' || in_brack == 0)
+ in_brack++;
+ pushback();
break;
case ']':
- in_brack = 0;
+ if (tokstart[0] == '['
+ && (tok == tokstart + 1
+ || (tok == tokstart + 2
+ && tokstart[1] == '^')))
+ /* do nothing */;
+ else
+ in_brack--;
break;
case '\\':
- if ((c = nextc()) == '\0') {
+ if ((c = nextc()) == EOF) {
yyerror("unterminated regexp ends with \\ at end of file");
+ return lasttok = REGEXP; /* kludge */
} else if (c == '\n') {
sourceline++;
continue;
- } else
+ } else {
tokadd('\\');
+ tokadd(c);
+ continue;
+ }
break;
case '/': /* end of the regexp */
- if (in_brack)
+ if (in_brack > 0)
break;
pushback();
@@ -2637,8 +2814,10 @@ yylex()
case '\n':
pushback();
yyerror("unterminated regexp");
- case '\0':
+ return lasttok = REGEXP; /* kludge */
+ case EOF:
yyerror("unterminated regexp at end of file");
+ return lasttok = REGEXP; /* kludge */
}
tokadd(c);
}
@@ -2653,12 +2832,12 @@ retry:
yylval.nodetypeval = Node_illegal;
switch (c) {
- case 0:
+ case EOF:
if (lasttok != NEWLINE) {
lasttok = NEWLINE;
if (do_lint && ! eof_warned) {
warning("source file does not end in newline");
- eof_warned = 1;
+ eof_warned = TRUE;
}
return NEWLINE; /* fake it */
}
@@ -2670,13 +2849,13 @@ retry:
case '#': /* it's a comment */
while ((c = nextc()) != '\n') {
- if (c == '\0') {
+ if (c == EOF) {
if (lasttok != NEWLINE) {
lasttok = NEWLINE;
if (do_lint && ! eof_warned) {
warning(
"source file does not end in newline");
- eof_warned = 1;
+ eof_warned = TRUE;
}
return NEWLINE; /* fake it */
}
@@ -2694,14 +2873,18 @@ retry:
* Use it at your own risk. We think it's a bad idea, which
* is why it's not on by default.
*/
- if (!do_unix) {
+ if (! do_traditional) {
/* strip trailing white-space and/or comment */
while ((c = nextc()) == ' ' || c == '\t')
continue;
- if (c == '#')
+ if (c == '#') {
+ if (do_lint)
+ warning(
+ "use of `\\ #...' line continuation is not portable");
while ((c = nextc()) != '\n')
- if (c == '\0')
+ if (c == EOF)
break;
+ }
pushback();
}
#endif /* RELAXED_CONTINUATION */
@@ -2713,16 +2896,18 @@ retry:
break;
case '$':
- want_assign = 1;
+ want_assign = TRUE;
return lasttok = '$';
+ case ':':
+ case '?':
+ allow_newline();
+ /* fall through */
case ')':
case ']':
case '(':
case '[':
case ';':
- case ':':
- case '?':
case '{':
case ',':
return lasttok = c;
@@ -2736,20 +2921,22 @@ retry:
return lasttok = '*';
} else if (c == '*') {
/* make ** and **= aliases for ^ and ^= */
- static int did_warn_op = 0, did_warn_assgn = 0;
+ static int did_warn_op = FALSE, did_warn_assgn = FALSE;
if (nextc() == '=') {
if (do_lint && ! did_warn_assgn) {
- did_warn_assgn = 1;
+ did_warn_assgn = TRUE;
warning("**= is not allowed by POSIX");
+ warning("operator `**=' is not supported in old awk");
}
yylval.nodetypeval = Node_assign_exp;
- return lasttok = ASSIGNOP;
+ return ASSIGNOP;
} else {
pushback();
if (do_lint && ! did_warn_op) {
- did_warn_op = 1;
+ did_warn_op = TRUE;
warning("** is not allowed by POSIX");
+ warning("operator `**' is not supported in old awk");
}
return lasttok = '^';
}
@@ -2777,12 +2964,11 @@ retry:
case '^':
{
- static int did_warn_op = 0, did_warn_assgn = 0;
+ static int did_warn_op = FALSE, did_warn_assgn = FALSE;
if (nextc() == '=') {
-
if (do_lint && ! did_warn_assgn) {
- did_warn_assgn = 1;
+ did_warn_assgn = TRUE;
warning("operator `^=' is not supported in old awk");
}
yylval.nodetypeval = Node_assign_exp;
@@ -2790,7 +2976,7 @@ retry:
}
pushback();
if (do_lint && ! did_warn_op) {
- did_warn_op = 1;
+ did_warn_op = TRUE;
warning("operator `^' is not supported in old awk");
}
return lasttok = '^';
@@ -2813,7 +2999,7 @@ retry:
}
if (c == '~') {
yylval.nodetypeval = Node_nomatch;
- want_assign = 0;
+ want_assign = FALSE;
return lasttok = MATCHOP;
}
pushback();
@@ -2851,16 +3037,16 @@ retry:
case '~':
yylval.nodetypeval = Node_match;
- want_assign = 0;
+ want_assign = FALSE;
return lasttok = MATCHOP;
case '}':
/*
* Added did newline stuff. Easier than
- * hacking the grammar
+ * hacking the grammar.
*/
if (did_newline) {
- did_newline = 0;
+ did_newline = FALSE;
return lasttok = c;
}
did_newline++;
@@ -2868,7 +3054,7 @@ retry:
return lasttok = NEWLINE;
case '"':
- esc_seen = 0;
+ esc_seen = FALSE;
while ((c = nextc()) != '"') {
if (c == '\n') {
pushback();
@@ -2880,10 +3066,10 @@ retry:
sourceline++;
continue;
}
- esc_seen = 1;
+ esc_seen = TRUE;
tokadd('\\');
}
- if (c == '\0') {
+ if (c == EOF) {
pushback();
yyerror("unterminated string");
}
@@ -2907,10 +3093,11 @@ retry:
case '.':
c = nextc();
pushback();
- if (!isdigit(c))
+ if (! isdigit(c))
return lasttok = '.';
else
- c = '.'; /* FALL THROUGH */
+ c = '.';
+ /* FALL THROUGH */
case '0':
case '1':
case '2':
@@ -2923,24 +3110,24 @@ retry:
case '9':
/* It's a number */
for (;;) {
- int gotnumber = 0;
+ int gotnumber = FALSE;
tokadd(c);
switch (c) {
case '.':
if (seen_point) {
- gotnumber++;
+ gotnumber = TRUE;
break;
}
- ++seen_point;
+ seen_point = TRUE;
break;
case 'e':
case 'E':
if (seen_e) {
- gotnumber++;
+ gotnumber = TRUE;
break;
}
- ++seen_e;
+ seen_e = TRUE;
if ((c = nextc()) == '-' || c == '+')
tokadd(c);
else
@@ -2958,17 +3145,17 @@ retry:
case '9':
break;
default:
- gotnumber++;
+ gotnumber = TRUE;
}
if (gotnumber)
break;
c = nextc();
}
- if (c != 0)
+ if (c != EOF)
pushback();
else if (do_lint && ! eof_warned) {
warning("source file does not end in newline");
- eof_warned = 1;
+ eof_warned = TRUE;
}
tokadd('\0');
yylval.nodeval = make_number(atof(tokstart));
@@ -2978,24 +3165,8 @@ retry:
case '&':
if ((c = nextc()) == '&') {
yylval.nodetypeval = Node_and;
- for (;;) {
- c = nextc();
- if (c == '\0')
- break;
- if (c == '#') {
- while ((c = nextc()) != '\n' && c != '\0')
- continue;
- if (c == '\0')
- break;
- }
- if (c == '\n')
- sourceline++;
- if (! isspace(c)) {
- pushback();
- break;
- }
- }
- want_assign = 0;
+ allow_newline();
+ want_assign = FALSE;
return lasttok = LEX_AND;
}
pushback();
@@ -3004,24 +3175,8 @@ retry:
case '|':
if ((c = nextc()) == '|') {
yylval.nodetypeval = Node_or;
- for (;;) {
- c = nextc();
- if (c == '\0')
- break;
- if (c == '#') {
- while ((c = nextc()) != '\n' && c != '\0')
- continue;
- if (c == '\0')
- break;
- }
- if (c == '\n')
- sourceline++;
- if (! isspace(c)) {
- pushback();
- break;
- }
- }
- want_assign = 0;
+ allow_newline();
+ want_assign = FALSE;
return lasttok = LEX_OR;
}
pushback();
@@ -3031,7 +3186,7 @@ retry:
if (c != '_' && ! isalpha(c))
yyerror("Invalid char '%c' in expression\n", c);
- /* it's some type of name-type-thing. Find its length */
+ /* it's some type of name-type-thing. Find its length. */
tok = tokstart;
while (is_identchar(c)) {
tokadd(c);
@@ -3040,40 +3195,43 @@ retry:
tokadd('\0');
emalloc(tokkey, char *, tok - tokstart, "yylex");
memcpy(tokkey, tokstart, tok - tokstart);
- if (c != 0)
+ if (c != EOF)
pushback();
else if (do_lint && ! eof_warned) {
warning("source file does not end in newline");
- eof_warned = 1;
+ eof_warned = TRUE;
}
- /* See if it is a special token. */
+ /* See if it is a special token. */
low = 0;
- high = (sizeof (tokentab) / sizeof (tokentab[0])) - 1;
+ high = (sizeof(tokentab) / sizeof(tokentab[0])) - 1;
while (low <= high) {
- int i/* , c */;
+ int i;
mid = (low + high) / 2;
c = *tokstart - tokentab[mid].operator[0];
- i = c ? c : strcmp (tokstart, tokentab[mid].operator);
+ i = c ? c : strcmp(tokstart, tokentab[mid].operator);
- if (i < 0) { /* token < mid */
+ if (i < 0) /* token < mid */
high = mid - 1;
- } else if (i > 0) { /* token > mid */
+ else if (i > 0) /* token > mid */
low = mid + 1;
- } else {
+ else {
if (do_lint) {
if (tokentab[mid].flags & GAWKX)
warning("%s() is a gawk extension",
tokentab[mid].operator);
+ if (tokentab[mid].flags & RESX)
+ warning("%s() is a Bell Labs extension",
+ tokentab[mid].operator);
if (tokentab[mid].flags & NOT_POSIX)
warning("POSIX does not allow %s",
tokentab[mid].operator);
- if (tokentab[mid].flags & NOT_OLD)
- warning("%s is not supported in old awk",
- tokentab[mid].operator);
}
- if ((do_unix && (tokentab[mid].flags & GAWKX))
+ if (do_lint_old && (tokentab[mid].flags & NOT_OLD))
+ warning("%s is not supported in old awk",
+ tokentab[mid].operator);
+ if ((do_traditional && (tokentab[mid].flags & GAWKX))
|| (do_posix && (tokentab[mid].flags & NOT_POSIX)))
break;
if (tokentab[mid].class == LEX_BUILTIN
@@ -3092,11 +3250,13 @@ retry:
if (*lexptr == '(')
return lasttok = FUNC_CALL;
else {
- want_assign = 1;
+ want_assign = TRUE;
return lasttok = NAME;
}
}
+/* node_common --- common code for allocating a new node */
+
static NODE *
node_common(op)
NODETYPE op;
@@ -3115,9 +3275,8 @@ NODETYPE op;
return r;
}
-/*
- * This allocates a node with defined lnode and rnode.
- */
+/* node --- allocates a node with defined lnode and rnode. */
+
NODE *
node(left, op, right)
NODE *left, *right;
@@ -3131,10 +3290,10 @@ NODETYPE op;
return r;
}
-/*
- * This allocates a node with defined subnode and proc for builtin functions
- * Checks for arg. count and supplies defaults where possible.
- */
+/* snode --- allocate a node with defined subnode and proc for builtin
+ functions. Checks for arg. count and supplies defaults where
+ possible. */
+
static NODE *
snode(subn, op, idx)
NODETYPE op;
@@ -3149,7 +3308,7 @@ NODE *subn;
r = node_common(op);
/* traverse expression list to see how many args. given */
- for (n= subn; n; n= n->rnode) {
+ for (n = subn; n != NULL; n = n->rnode) {
nexp++;
if (nexp > 3)
break;
@@ -3157,7 +3316,7 @@ NODE *subn;
/* check against how many args. are allowed for this builtin */
args_allowed = tokentab[idx].flags & ARGS;
- if (args_allowed && !(args_allowed & A(nexp)))
+ if (args_allowed && (args_allowed & A(nexp)) == 0)
fatal("%s() cannot have %d argument%c",
tokentab[idx].operator, nexp, nexp == 1 ? ' ' : 's');
@@ -3165,7 +3324,7 @@ NODE *subn;
/* special case processing for a few builtins */
if (nexp == 0 && r->proc == do_length) {
- subn = node(node(make_number(0.0),Node_field_spec,(NODE *)NULL),
+ subn = node(node(make_number(0.0), Node_field_spec, (NODE *) NULL),
Node_expression_list,
(NODE *) NULL);
} else if (r->proc == do_match) {
@@ -3182,6 +3341,15 @@ NODE *subn;
(NODE *) NULL));
else if (do_lint && subn->rnode->rnode->lnode->type == Node_val)
warning("string literal as last arg of substitute");
+ } else if (r->proc == do_gensub) {
+ if (subn->lnode->type != Node_regex)
+ subn->lnode = mk_rexp(subn->lnode);
+ if (nexp == 3)
+ append_right(subn, node(node(make_number(0.0),
+ Node_field_spec,
+ (NODE *) NULL),
+ Node_expression_list,
+ (NODE *) NULL));
} else if (r->proc == do_split) {
if (nexp == 2)
append_right(subn,
@@ -3198,11 +3366,13 @@ NODE *subn;
}
/*
+ * mkrangenode:
* This allocates a Node_line_range node with defined condpair and
* zeroes the trigger word to avoid the temptation of assuming that calling
* 'node( foo, Node_line_range, 0)' will properly initialize 'triggered'.
+ * Otherwise like node().
*/
-/* Otherwise like node() */
+
static NODE *
mkrangenode(cpair)
NODE *cpair;
@@ -3212,11 +3382,12 @@ NODE *cpair;
getnode(r);
r->type = Node_line_range;
r->condpair = cpair;
- r->triggered = 0;
+ r->triggered = FALSE;
return r;
}
-/* Build a for loop */
+/* make_for_loop --- build a for loop */
+
static NODE *
make_for_loop(init, cond, incr)
NODE *init, *cond, *incr;
@@ -3234,10 +3405,52 @@ NODE *init, *cond, *incr;
return n;
}
+/* dup_parms --- return TRUE if there are duplicate parameters */
+
+static int
+dup_parms(func)
+NODE *func;
+{
+ register NODE *np;
+ char *fname, **names;
+ int count, i, j, dups;
+ NODE *params;
+
+ fname = func->param;
+ count = func->param_cnt;
+ params = func->rnode;
+
+ if (count == 0) /* no args, no problem */
+ return FALSE;
+
+ emalloc(names, char **, count * sizeof(char *), "dup_parms");
+
+ i = 0;
+ for (np = params; np != NULL; np = np->rnode)
+ names[i++] = np->param;
+
+ dups = 0;
+ for (i = 1; i < count; i++) {
+ for (j = 0; j < i; j++) {
+ if (strcmp(names[i], names[j]) == 0) {
+ dups++;
+ error(
+ "function `%s': parameter #%d, `%s', duplicates parameter #%d",
+ fname, i+1, names[j], j+1);
+ }
+ }
+ }
+
+ free(names);
+ return (dups > 0 ? TRUE : FALSE);
+}
+
/*
+ * install:
* Install a name in the symbol table, even if it is already there.
* Caller must check against redefinition if that is desired.
*/
+
NODE *
install(name, value)
char *name;
@@ -3260,7 +3473,8 @@ NODE *value;
return hp->hvalue;
}
-/* find the most recent hash node for name installed by install */
+/* lookup --- find the most recent hash node for name installed by install */
+
NODE *
lookup(name)
const char *name;
@@ -3269,19 +3483,20 @@ const char *name;
register size_t len;
len = strlen(name);
- bucket = variables[hash(name, len, (unsigned long) HASHSIZE)];
- while (bucket) {
+ for (bucket = variables[hash(name, len, (unsigned long) HASHSIZE)];
+ bucket != NULL; bucket = bucket->hnext)
if (bucket->hlength == len && STREQN(bucket->hname, name, len))
return bucket->hvalue;
- bucket = bucket->hnext;
- }
+
return NULL;
}
/*
+ * append_right:
* Add new to the rightmost branch of LIST. This uses n^2 time, so we make
* a simple attempt at optimizing it.
*/
+
static NODE *
append_right(list, new)
NODE *list, *new;
@@ -3301,50 +3516,13 @@ NODE *list, *new;
return oldlist;
}
-/* return 1 if there are duplicate parameters, 0 means all ok */
-static int
-dup_parms(func)
-NODE *func;
-{
- register NODE *np;
- char *fname, **names;
- int count, i, j, dups;
- NODE *params;
-
- fname = func->param;
- count = func->param_cnt;
- params = func->rnode;
-
- if (count == 0) /* no args, no problem */
- return 0;
-
- emalloc(names, char **, count * sizeof(char *), "dup_parms");
-
- i = 0;
- for (np = params; np != NULL; np = np->rnode)
- names[i++] = np->param;
-
- dups = 0;
- for (i = 1; i < count; i++) {
- for (j = 0; j < i; j++) {
- if (strcmp(names[i], names[j]) == 0) {
- dups++;
- error(
- "function `%s': parameter #%d, `%s', duplicates parameter #%d",
- fname, i+1, names[j], j+1);
- }
- }
- }
-
- free(names);
- return (dups > 0);
-}
-
/*
+ * func_install:
* check if name is already installed; if so, it had better have Null value,
* in which case def is added as the value. Otherwise, install name with def
* as value.
*/
+
static void
func_install(params, def)
NODE *params;
@@ -3353,14 +3531,18 @@ NODE *def;
NODE *r;
pop_params(params->rnode);
- pop_var(params, 0);
+ pop_var(params, FALSE);
r = lookup(params->param);
if (r != NULL) {
fatal("function name `%s' previously defined", params->param);
} else
(void) install(params->param, node(params, Node_func, def));
+
+ func_use(params->param, FUNC_DEFINE);
}
+/* pop_var --- remove a variable from the symbol table */
+
static void
pop_var(np, freeit)
NODE *np;
@@ -3373,7 +3555,7 @@ int freeit;
name = np->param;
len = strlen(name);
save = &(variables[hash(name, len, (unsigned long) HASHSIZE)]);
- for (bucket = *save; bucket; bucket = bucket->hnext) {
+ for (bucket = *save; bucket != NULL; bucket = bucket->hnext) {
if (len == bucket->hlength && STREQN(bucket->hname, name, len)) {
*save = bucket->hnext;
freenode(bucket);
@@ -3385,6 +3567,8 @@ int freeit;
}
}
+/* pop_params --- remove list of function parameters from symbol table */
+
/*
* pop parameters out of the symbol table. do this in reverse order to
* avoid reading freed memory if there were duplicated parameters.
@@ -3396,9 +3580,11 @@ NODE *params;
if (params == NULL)
return;
pop_params(params->rnode);
- pop_var(params, 1);
+ pop_var(params, TRUE);
}
+/* make_param --- make NAME into a function parameter */
+
static NODE *
make_param(name)
char *name;
@@ -3413,42 +3599,201 @@ char *name;
return (install(name, r));
}
-/* Name points to a variable name. Make sure it's in the symbol table */
+static struct fdesc {
+ char *name;
+ short used;
+ short defined;
+ struct fdesc *next;
+} *ftable[HASHSIZE];
+
+/* func_use --- track uses and definitions of functions */
+
+static void
+func_use(name, how)
+char *name;
+enum defref how;
+{
+ struct fdesc *fp;
+ int len;
+ int ind;
+
+ len = strlen(name);
+ ind = hash(name, len, HASHSIZE);
+
+ for (fp = ftable[ind]; fp != NULL; fp = fp->next) {
+ if (strcmp(fp->name, name) == 0) {
+ if (how == FUNC_DEFINE)
+ fp->defined++;
+ else
+ fp->used++;
+ return;
+ }
+ }
+
+ /* not in the table, fall through to allocate a new one */
+
+ emalloc(fp, struct fdesc *, sizeof(struct fdesc), "func_use");
+ memset(fp, '\0', sizeof(struct fdesc));
+ emalloc(fp->name, char *, len + 1, "func_use");
+ strcpy(fp->name, name);
+ if (how == FUNC_DEFINE)
+ fp->defined++;
+ else
+ fp->used++;
+ fp->next = ftable[ind];
+ ftable[ind] = fp;
+}
+
+/* check_funcs --- verify functions that are called but not defined */
+
+static void
+check_funcs()
+{
+ struct fdesc *fp, *next;
+ int i;
+
+ for (i = 0; i < HASHSIZE; i++) {
+ for (fp = ftable[i]; fp != NULL; fp = fp->next) {
+#ifdef REALLYMEAN
+ /* making this the default breaks old code. sigh. */
+ if (fp->defined == 0) {
+ error(
+ "function `%s' called but never defined", fp->name);
+ errcount++;
+ }
+#else
+ if (do_lint && fp->defined == 0)
+ warning(
+ "function `%s' called but never defined", fp->name);
+#endif
+ if (do_lint && fp->used == 0) {
+ warning("function `%s' defined but never called",
+ fp->name);
+ }
+ }
+ }
+
+ /* now let's free all the memory */
+ for (i = 0; i < HASHSIZE; i++) {
+ for (fp = ftable[i]; fp != NULL; fp = next) {
+ next = fp->next;
+ free(fp->name);
+ free(fp);
+ }
+ }
+}
+
+/* param_sanity --- look for parameters that are regexp constants */
+
+static void
+param_sanity(arglist)
+NODE *arglist;
+{
+ NODE *argp, *arg;
+ int i;
+
+ for (i = 1, argp = arglist; argp != NULL; argp = argp->rnode, i++) {
+ arg = argp->lnode;
+ if (arg->type == Node_regex)
+ warning("regexp constant for parameter #%d yields boolean value", i);
+ }
+}
+
+/* variable --- make sure NAME is in the symbol table */
+
NODE *
-variable(name, can_free)
+variable(name, can_free, type)
char *name;
int can_free;
+NODETYPE type;
{
register NODE *r;
- static int env_loaded = 0;
+ static int env_loaded = FALSE;
- if (!env_loaded && STREQ(name, "ENVIRON")) {
+ if (! env_loaded && STREQ(name, "ENVIRON")) {
load_environ();
- env_loaded = 1;
+ env_loaded = TRUE;
}
if ((r = lookup(name)) == NULL)
- r = install(name, node(Nnull_string, Node_var, (NODE *) NULL));
+ r = install(name, node(Nnull_string, type, (NODE *) NULL));
else if (can_free)
free(name);
return r;
}
+/* mk_rexp --- make a regular expression constant */
+
static NODE *
mk_rexp(exp)
NODE *exp;
{
+ NODE *n;
+
if (exp->type == Node_regex)
return exp;
- else {
- NODE *n;
-
- getnode(n);
- n->type = Node_regex;
- n->re_exp = exp;
- n->re_text = NULL;
- n->re_reg = NULL;
- n->re_flags = 0;
- n->re_cnt = 1;
- return n;
+
+ getnode(n);
+ n->type = Node_regex;
+ n->re_exp = exp;
+ n->re_text = NULL;
+ n->re_reg = NULL;
+ n->re_flags = 0;
+ n->re_cnt = 1;
+ return n;
+}
+
+/* isnoeffect --- when used as a statement, has no side effects */
+
+/*
+ * To be completely general, we should recursively walk the parse
+ * tree, to make sure that all the subexpressions also have no effect.
+ * Instead, we just weaken the actual warning that's printed, up above
+ * in the grammar.
+ */
+
+static int
+isnoeffect(type)
+NODETYPE type;
+{
+ switch (type) {
+ case Node_times:
+ case Node_quotient:
+ case Node_mod:
+ case Node_plus:
+ case Node_minus:
+ case Node_subscript:
+ case Node_concat:
+ case Node_exp:
+ case Node_unary_minus:
+ case Node_field_spec:
+ case Node_and:
+ case Node_or:
+ case Node_equal:
+ case Node_notequal:
+ case Node_less:
+ case Node_greater:
+ case Node_leq:
+ case Node_geq:
+ case Node_match:
+ case Node_nomatch:
+ case Node_not:
+ case Node_val:
+ case Node_in_array:
+ case Node_NF:
+ case Node_NR:
+ case Node_FNR:
+ case Node_FS:
+ case Node_RS:
+ case Node_FIELDWIDTHS:
+ case Node_IGNORECASE:
+ case Node_OFS:
+ case Node_ORS:
+ case Node_OFMT:
+ case Node_CONVFMT:
+ return TRUE;
+ default:
+ break; /* keeps gcc -Wall happy */
}
+
+ return FALSE;
}
diff --git a/builtin.c b/builtin.c
index f925ac09..d6efd78d 100644
--- a/builtin.c
+++ b/builtin.c
@@ -6,7 +6,7 @@
* Copyright (C) 1986, 1988, 1989, 1991-1995 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
- * AWK Progamming Language.
+ * AWK Programming Language.
*
* GAWK is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -19,26 +19,31 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with GAWK; see the file COPYING. If not, write to
- * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/
#include "awk.h"
+#undef HUGE
+#undef CHARBITS
+#undef INTBITS
+#include <math.h>
-#ifndef SRANDOM_PROTO
-extern void srandom P((unsigned int seed));
-#endif
-#if defined(RANDOM_MISSING)
+#ifndef HAVE_RANDOM
extern char *initstate P((unsigned seed, char *state, int n));
extern char *setstate P((char *state));
extern long random P((void));
+#define SRANDOM_PROTO
+#endif
+#ifdef SRANDOM_PROTO
+extern void srandom P((unsigned int seed));
#endif
extern NODE **fields_arr;
extern int output_is_tty;
-static NODE *sub_common P((NODE *tree, int global));
+static NODE *sub_common P((NODE *tree, int how_many, int backdigs));
NODE *format_tree P((const char *, int, NODE *));
#ifdef _CRAY
@@ -72,15 +77,19 @@ static void sgfmt P((char *buf, const char *format, int alt,
* On the Cray (Y-MP, anyway), ints and longs are 64 bits, but
* random() does things in terms of 32 bits. So we have to chop
* LONG_MAX down.
+ * On SGI with 64 bit support (IRIX 6.*), check the size of _MIPS_SZLONG
+ * and chop.... Per limits.h.
*/
-#if (defined(__alpha) && defined(__osf__)) || defined(_CRAY)
+#if (defined(__alpha) && defined(__osf__)) || defined(_CRAY) || (_MIPS_SZLONG == 64)
#define GAWK_RANDOM_MAX (LONG_MAX & 0x7fffffff)
#else
#define GAWK_RANDOM_MAX LONG_MAX
#endif
static void efwrite P((const void *ptr, size_t size, size_t count, FILE *fp,
- const char *from, struct redirect *rp,int flush));
+ const char *from, struct redirect *rp, int flush));
+
+/* efwrite --- like fwrite, but with error checking */
static void
efwrite(ptr, size, count, fp, from, rp, flush)
@@ -103,24 +112,22 @@ int flush;
}
return;
- wrerror:
+wrerror:
fatal("%s to \"%s\" failed (%s)", from,
rp ? rp->value : "standard output",
errno ? strerror(errno) : "reason unknown");
}
-/* Builtin functions */
+/* do_exp --- exponential function */
+
NODE *
do_exp(tree)
NODE *tree;
{
NODE *tmp;
double d, res;
-#ifndef exp
- double exp P((double));
-#endif
- tmp= tree_eval(tree->lnode);
+ tmp = tree_eval(tree->lnode);
d = force_number(tmp);
free_temp(tmp);
errno = 0;
@@ -130,6 +137,84 @@ NODE *tree;
return tmp_number((AWKNUM) res);
}
+/* stdfile --- return fp for a standard file */
+
+/*
+ * This function allows `fflush("/dev/stdout")' to work.
+ * The other files will be available via getredirect().
+ * /dev/stdin is not included, since fflush is only for output.
+ */
+
+static FILE *
+stdfile(name, len)
+char *name;
+size_t len;
+{
+ if (len == 11) {
+ if (STREQN(name, "/dev/stderr", 11))
+ return stderr;
+ else if (STREQN(name, "/dev/stdout", 11))
+ return stdout;
+ }
+
+ return NULL;
+}
+
+/* do_fflush --- flush output, either named file or pipe or everything */
+
+NODE *
+do_fflush(tree)
+NODE *tree;
+{
+ extern struct redirect *getredirect();
+ struct redirect *rp;
+ NODE *tmp;
+ FILE *fp;
+ int status = 0;
+ char *file;
+
+ /* fflush() --- flush stdout */
+ if (tree == NULL) {
+ status = fflush(stdout);
+ return tmp_number((AWKNUM) status);
+ }
+
+ tmp = tree_eval(tree->lnode);
+ tmp = force_string(tmp);
+ file = tmp->stptr;
+
+ /* fflush("") --- flush all */
+ if (tmp->stlen == 0) {
+ status = flush_io();
+ free_temp(tmp);
+ return tmp_number((AWKNUM) status);
+ }
+
+ rp = getredirect(tmp->stptr, tmp->stlen);
+ status = 1;
+ if (rp != NULL) {
+ if ((rp->flag & (RED_WRITE|RED_APPEND)) == 0) {
+ /* if (do_lint) */
+ warning(
+ "fflush: cannot flush: %s `%s' opened for reading, not writing",
+ (rp->flag & RED_PIPE) ? "pipe" : "file",
+ file);
+ free_temp(tmp);
+ return tmp_number((AWKNUM) status);
+ }
+ fp = rp->fp;
+ if (fp != NULL)
+ status = fflush(fp);
+ } else if ((fp = stdfile(tmp->stptr, tmp->stlen)) != NULL) {
+ status = fflush(fp);
+ } else
+ warning("fflush: `%s' is not an open file or pipe", file);
+ free_temp(tmp);
+ return tmp_number((AWKNUM) status);
+}
+
+/* do_index --- find index of a string */
+
NODE *
do_index(tree)
NODE *tree;
@@ -149,8 +234,10 @@ NODE *tree;
l1 = s1->stlen;
l2 = s2->stlen;
ret = 0;
+
+ /* IGNORECASE will already be false if posix */
if (IGNORECASE) {
- while (l1) {
+ while (l1 > 0) {
if (l2 > l1)
break;
if (casetable[(int)*p1] == casetable[(int)*p2]
@@ -162,7 +249,7 @@ NODE *tree;
p1++;
}
} else {
- while (l1) {
+ while (l1 > 0) {
if (l2 > l1)
break;
if (*p1 == *p2
@@ -179,13 +266,12 @@ NODE *tree;
return tmp_number((AWKNUM) ret);
}
+/* double_to_int --- convert double to int, used several places */
+
double
double_to_int(d)
double d;
{
- double floor P((double));
- double ceil P((double));
-
if (d >= 0)
d = Floor(d);
else
@@ -193,6 +279,8 @@ double d;
return d;
}
+/* do_int --- convert double to int for awk */
+
NODE *
do_int(tree)
NODE *tree;
@@ -207,6 +295,8 @@ NODE *tree;
return tmp_number((AWKNUM) d);
}
+/* do_length --- length of a string or $0 */
+
NODE *
do_length(tree)
NODE *tree;
@@ -220,14 +310,13 @@ NODE *tree;
return tmp_number((AWKNUM) len);
}
+/* do_log --- the log function */
+
NODE *
do_log(tree)
NODE *tree;
{
NODE *tmp;
-#ifndef log
- double log P((double));
-#endif
double d, arg;
tmp = tree_eval(tree->lnode);
@@ -257,54 +346,57 @@ register NODE *carg;
{
/* copy 'l' bytes from 's' to 'obufout' checking for space in the process */
/* difference of pointers should be of ptrdiff_t type, but let us be kind */
-#define bchunk(s,l) if(l) {\
- while((l)>ofre) {\
- long olen = obufout - obuf;\
- erealloc(obuf, char *, osiz*2, "format_tree");\
- ofre+=osiz;\
- osiz*=2;\
- obufout = obuf + olen;\
- }\
- memcpy(obufout,s,(size_t)(l));\
- obufout+=(l);\
- ofre-=(l);\
- }
+#define bchunk(s, l) if (l) { \
+ while ((l) > ofre) { \
+ long olen = obufout - obuf; \
+ erealloc(obuf, char *, osiz * 2, "format_tree"); \
+ ofre += osiz; \
+ osiz *= 2; \
+ obufout = obuf + olen; \
+ } \
+ memcpy(obufout, s, (size_t) (l)); \
+ obufout += (l); \
+ ofre -= (l); \
+}
+
/* copy one byte from 's' to 'obufout' checking for space in the process */
-#define bchunk_one(s) {\
- if(ofre <= 0) {\
- long olen = obufout - obuf;\
- erealloc(obuf, char *, osiz*2, "format_tree");\
- ofre+=osiz;\
- osiz*=2;\
- obufout = obuf + olen;\
- }\
- *obufout++ = *s;\
- --ofre;\
- }
-
- /* Is there space for something L big in the buffer? */
-#define chksize(l) if((l)>ofre) {\
- long olen = obufout - obuf;\
- erealloc(obuf, char *, osiz*2, "format_tree");\
- obufout = obuf + olen;\
- ofre+=osiz;\
- osiz*=2;\
- }
+#define bchunk_one(s) { \
+ if (ofre <= 0) { \
+ long olen = obufout - obuf; \
+ erealloc(obuf, char *, osiz * 2, "format_tree"); \
+ ofre += osiz; \
+ osiz *= 2; \
+ obufout = obuf + olen; \
+ } \
+ *obufout++ = *s; \
+ --ofre; \
+}
- /*
- * Get the next arg to be formatted. If we've run out of args,
- * return "" (Null string)
- */
-#define parse_next_arg() {\
- if(!carg) { toofew = 1; break; }\
- else {\
- arg=tree_eval(carg->lnode);\
- carg=carg->rnode;\
- }\
- }
+/* Is there space for something L big in the buffer? */
+#define chksize(l) if ((l) > ofre) { \
+ long olen = obufout - obuf; \
+ erealloc(obuf, char *, osiz * 2, "format_tree"); \
+ obufout = obuf + olen; \
+ ofre += osiz; \
+ osiz *= 2; \
+}
+
+/*
+ * Get the next arg to be formatted. If we've run out of args,
+ * return "" (Null string)
+ */
+#define parse_next_arg() { \
+ if (carg == NULL) { \
+ toofew = TRUE; \
+ break; \
+ } else { \
+ arg = tree_eval(carg->lnode); \
+ carg = carg->rnode; \
+ } \
+}
NODE *r;
- int toofew = 0;
+ int toofew = FALSE;
char *obuf, *obufout;
size_t osiz, ofre;
char *chbuf;
@@ -312,8 +404,8 @@ register NODE *carg;
int cs1;
NODE *arg;
long fw, prec;
- int lj, alt, big, have_prec;
- long *cur;
+ int lj, alt, big, bigbig, small, have_prec, need_format;
+ long *cur = NULL;
long val;
#ifdef sun386 /* Can't cast unsigned (int/long) from ptr->value */
long tmp_uval; /* on 386i 4.0.1 C compiler -- it just hangs */
@@ -326,7 +418,7 @@ register NODE *carg;
char *cp;
char *fill;
double tmpval;
- char signchar = 0;
+ char signchar = FALSE;
size_t len;
static char sp[] = " ";
static char zero_string[] = "0";
@@ -338,26 +430,32 @@ register NODE *carg;
osiz = 120;
ofre = osiz - 1;
+ need_format = FALSE;
+
s0 = s1 = fmt_string;
while (n0-- > 0) {
if (*s1 != '%') {
s1++;
continue;
}
+ need_format = TRUE;
bchunk(s0, s1 - s0);
s0 = s1;
cur = &fw;
fw = 0;
prec = 0;
- have_prec = 0;
- lj = alt = big = 0;
+ have_prec = FALSE;
+ signchar = FALSE;
+ lj = alt = big = bigbig = small = FALSE;
fill = sp;
cp = cend;
chbuf = lchbuf;
s1++;
retry:
- --n0;
+ if (n0-- <= 0) /* ran out early! */
+ break;
+
switch (cs1 = *s1++) {
case (-1): /* dummy case to allow for checking */
check_pos:
@@ -365,6 +463,7 @@ check_pos:
break; /* reject as a valid format */
goto retry;
case '%':
+ need_format = FALSE;
bchunk_one("%");
s0 = s1;
break;
@@ -373,7 +472,8 @@ check_pos:
if (lj)
goto retry;
if (cur == &fw)
- fill = zero_string; /* FALL through */
+ fill = zero_string;
+ /* FALL through */
case '1':
case '2':
case '3':
@@ -383,37 +483,39 @@ check_pos:
case '7':
case '8':
case '9':
- if (cur == 0)
- /* goto lose; */
+ if (cur == NULL)
break;
if (prec >= 0)
*cur = cs1 - '0';
- /* with a negative precision *cur is already set */
- /* to -1, so it will remain negative, but we have */
- /* to "eat" precision digits in any case */
+ /*
+ * with a negative precision *cur is already set
+ * to -1, so it will remain negative, but we have
+ * to "eat" precision digits in any case
+ */
while (n0 > 0 && *s1 >= '0' && *s1 <= '9') {
--n0;
*cur = *cur * 10 + *s1++ - '0';
}
if (prec < 0) /* negative precision is discarded */
- have_prec = 0;
+ have_prec = FALSE;
if (cur == &prec)
- cur = 0;
+ cur = NULL;
+ if (n0 == 0) /* badly formatted control string */
+ continue;
goto retry;
case '*':
- if (cur == 0)
- /* goto lose; */
+ if (cur == NULL)
break;
parse_next_arg();
*cur = force_number(arg);
free_temp(arg);
if (cur == &prec)
- cur = 0;
+ cur = NULL;
goto retry;
case ' ': /* print ' ' or '-' */
/* 'space' flag is ignored */
/* if '+' already present */
- if (signchar != 0)
+ if (signchar != FALSE)
goto check_pos;
/* FALL THROUGH */
case '+': /* print '+' or '-' */
@@ -433,32 +535,63 @@ check_pos:
if (cur != &fw)
break;
cur = &prec;
- have_prec++;
+ have_prec = TRUE;
goto retry;
case '#':
- alt++;
+ alt = TRUE;
goto check_pos;
case 'l':
if (big)
break;
else {
- static int warned = 0;
+ static int warned = FALSE;
if (do_lint && ! warned) {
warning("`l' is meaningless in awk formats; ignored");
- warned++;
+ warned = TRUE;
}
if (do_posix)
fatal("'l' is not permitted in POSIX awk formats");
}
- big++;
+ big = TRUE;
+ goto retry;
+ case 'L':
+ if (bigbig)
+ break;
+ else {
+ static int warned = FALSE;
+
+ if (do_lint && ! warned) {
+ warning("`L' is meaningless in awk formats; ignored");
+ warned = TRUE;
+ }
+ if (do_posix)
+ fatal("'L' is not permitted in POSIX awk formats");
+ }
+ bigbig = TRUE;
+ goto retry;
+ case 'h':
+ if (small)
+ break;
+ else {
+ static int warned = FALSE;
+
+ if (do_lint && ! warned) {
+ warning("`h' is meaningless in awk formats; ignored");
+ warned = TRUE;
+ }
+ if (do_posix)
+ fatal("'h' is not permitted in POSIX awk formats");
+ }
+ small = TRUE;
goto retry;
case 'c':
+ need_format = FALSE;
parse_next_arg();
if (arg->flags & NUMBER) {
#ifdef sun386
tmp_uval = arg->numbr;
- uval= (unsigned long) tmp_uval;
+ uval = (unsigned long) tmp_uval;
#else
uval = (unsigned long) arg->numbr;
#endif
@@ -467,24 +600,28 @@ check_pos:
cp = cpbuf;
goto pr_tail;
}
- if (have_prec == 0)
+ if (have_prec == FALSE)
prec = 1;
else if (prec > arg->stlen)
prec = arg->stlen;
cp = arg->stptr;
goto pr_tail;
case 's':
+ need_format = FALSE;
parse_next_arg();
arg = force_string(arg);
- if (have_prec == 0 || prec > arg->stlen)
+ if (! have_prec || prec > arg->stlen)
prec = arg->stlen;
cp = arg->stptr;
goto pr_tail;
case 'd':
case 'i':
+ need_format = FALSE;
parse_next_arg();
tmpval = force_number(arg);
- if (tmpval > LONG_MAX || tmpval < LONG_MIN) {
+ /* this ugly cast fixes a (sunos) pcc problem. sigh. */
+ if (tmpval > (double) ((unsigned long) ULONG_MAX)
+ || tmpval < LONG_MIN) {
/* out of range - emergency use of %g format */
cs1 = 'g';
goto format_float;
@@ -492,26 +629,32 @@ check_pos:
val = (long) tmpval;
if (val < 0) {
- sgn = 1;
+ sgn = TRUE;
if (val > LONG_MIN)
uval = (unsigned long) -val;
else
- uval = (unsigned long)(-(LONG_MIN + 1))
- + (unsigned long)1;
+ uval = (unsigned long) (-(LONG_MIN + 1))
+ + (unsigned long) 1;
} else {
- sgn = 0;
+ sgn = FALSE;
uval = (unsigned long) val;
}
do {
*--cp = (char) ('0' + uval % 10);
uval /= 10;
- } while (uval);
+ } while (uval > 0);
if (sgn)
*--cp = '-';
else if (signchar)
*--cp = signchar;
- if (have_prec != 0) /* ignore '0' flag if */
- fill = sp; /* precision given */
+ /*
+ * precision overrides '0' flags. however, for
+ * integer formats, precsion is minimum number of
+ * *digits*, not characters, thus we want to fill
+ * with zeroes.
+ */
+ if (have_prec)
+ fill = zero_string;
if (prec > fw)
fw = prec;
prec = cend - cp;
@@ -531,20 +674,29 @@ check_pos:
base += 2; /* FALL THROUGH */
case 'o':
base += 8;
+ need_format = FALSE;
parse_next_arg();
tmpval = force_number(arg);
- if (tmpval > ULONG_MAX || tmpval < LONG_MIN) {
+ /* this ugly cast fixes a (sunos) pcc problem. sigh. */
+ if (tmpval > (double) ((unsigned long) ULONG_MAX)
+ || tmpval < LONG_MIN) {
/* out of range - emergency use of %g format */
cs1 = 'g';
goto format_float;
}
- uval = (unsigned long)tmpval;
- if (have_prec != 0) /* ignore '0' flag if */
- fill = sp; /* precision given */
+ uval = (unsigned long) tmpval;
+ /*
+ * precision overrides '0' flags. however, for
+ * integer formats, precsion is minimum number of
+ * *digits*, not characters, thus we want to fill
+ * with zeroes.
+ */
+ if (have_prec)
+ fill = zero_string;
do {
*--cp = chbuf[uval % base];
uval /= base;
- } while (uval);
+ } while (uval > 0);
if (alt) {
if (base == 16) {
*--cp = cs1;
@@ -558,6 +710,8 @@ check_pos:
*--cp = '0';
}
base = 0;
+ if (prec > fw)
+ fw = prec;
prec = cend - cp;
pr_tail:
if (! lj) {
@@ -579,13 +733,14 @@ check_pos:
case 'e':
case 'f':
case 'E':
+ need_format = FALSE;
parse_next_arg();
tmpval = force_number(arg);
format_float:
free_temp(arg);
- if (have_prec == 0)
+ if (! have_prec)
prec = DEFAULT_G_PRECISION;
- chksize(fw + prec + 9); /* 9==slop */
+ chksize(fw + prec + 9); /* 9 == slop */
cp = cpbuf;
*cp++ = '%';
@@ -599,7 +754,7 @@ check_pos:
*cp++ = '0';
cp = strcpy(cp, "*.*") + 3;
*cp++ = cs1;
- *cp = '\0';
+ *cp = '\0';
#ifndef GFMT_WORKAROUND
(void) sprintf(obufout, cpbuf,
(int) fw, (int) prec, (double) tmpval);
@@ -620,20 +775,28 @@ check_pos:
break;
}
if (toofew)
- fatal("%s\n\t%s\n\t%*s%s",
+ fatal("%s\n\t`%s'\n\t%*s%s",
"not enough arguments to satisfy format string",
fmt_string, s1 - fmt_string - 2, "",
"^ ran out for this one"
);
}
- if (do_lint && carg != NULL)
- warning("too many arguments supplied for format string");
+ if (do_lint) {
+ if (need_format)
+ warning(
+ "printf format specifier does not have control letter");
+ if (carg != NULL)
+ warning(
+ "too many arguments supplied for format string");
+ }
bchunk(s0, s1 - s0);
r = make_str_node(obuf, obufout - obuf, ALREADY_MALLOCED);
r->flags |= TEMP;
return r;
}
+/* do_sprintf --- perform sprintf */
+
NODE *
do_sprintf(tree)
NODE *tree;
@@ -646,6 +809,7 @@ NODE *tree;
return r;
}
+/* do_printf --- perform printf, including redirection */
void
do_printf(tree)
@@ -658,26 +822,27 @@ register NODE *tree;
int errflg; /* not used, sigh */
rp = redirect(tree->rnode, &errflg);
- if (rp) {
+ if (rp != NULL) {
fp = rp->fp;
- if (!fp)
+ if (fp == NULL)
return;
} else
return;
} else
fp = stdout;
tree = do_sprintf(tree->lnode);
- efwrite(tree->stptr, sizeof(char), tree->stlen, fp, "printf", rp , 1);
+ efwrite(tree->stptr, sizeof(char), tree->stlen, fp, "printf", rp, TRUE);
free_temp(tree);
}
+/* do_sqrt --- do the sqrt function */
+
NODE *
do_sqrt(tree)
NODE *tree;
{
NODE *tmp;
double arg;
- extern double sqrt P((double));
tmp = tree_eval(tree->lnode);
arg = (double) force_number(tmp);
@@ -687,6 +852,8 @@ NODE *tree;
return tmp_number((AWKNUM) sqrt(arg));
}
+/* do_substr --- do the substr function */
+
NODE *
do_substr(tree)
NODE *tree;
@@ -697,7 +864,7 @@ NODE *tree;
size_t length;
int is_long;
- t1 = tree_eval(tree->lnode);
+ t1 = force_string(tree_eval(tree->lnode));
t2 = tree_eval(tree->rnode->lnode);
if (tree->rnode->rnode == NULL) /* third arg. missing */
length = t1->stlen;
@@ -708,10 +875,14 @@ NODE *tree;
}
indx = (int) force_number(t2) - 1;
free_temp(t2);
- t1 = force_string(t1);
if (indx < 0)
indx = 0;
if (indx >= t1->stlen || (long) length <= 0) {
+ if (do_lint && indx >= t1->stlen)
+ warning("substr: position %d is past end of string",
+ indx);
+ if (do_lint && (long) length <= 0)
+ warning("substr: length %d <= 0", (long) length);
free_temp(t1);
return Nnull_string;
}
@@ -721,34 +892,54 @@ NODE *tree;
warning("substr: length %d at position %d exceeds length of first argument",
length, indx+1);
}
- r = tmp_string(t1->stptr + indx, length);
+ r = tmp_string(t1->stptr + indx, length);
free_temp(t1);
return r;
}
+/* do_strftime --- format a time stamp */
+
NODE *
do_strftime(tree)
NODE *tree;
{
- NODE *t1, *t2;
+ NODE *t1, *t2, *ret;
struct tm *tm;
time_t fclock;
- char buf[100];
-
- t1 = force_string(tree_eval(tree->lnode));
-
- if (tree->rnode == NULL) /* second arg. missing, default */
- (void) time(&fclock);
- else {
- t2 = tree_eval(tree->rnode->lnode);
- fclock = (time_t) force_number(t2);
- free_temp(t2);
+ char buf[BUFSIZ]; /* XXX - fixed length */
+ static char def_format[] = "%a %b %d %H:%M:%S %Z %Y";
+ char *format;
+
+ /* set defaults first */
+ format = def_format; /* traditional date format */
+ (void) time(&fclock); /* current time of day */
+
+ t1 = t2 = NULL;
+ if (tree != NULL) { /* have args */
+ if (tree->lnode != NULL) {
+ t1 = force_string(tree_eval(tree->lnode));
+ format = t1->stptr;
+ if (do_lint && t1->stlen == 0)
+ warning("strftime called with empty format string");
+ }
+
+ if (tree->rnode != NULL) {
+ t2 = tree_eval(tree->rnode->lnode);
+ fclock = (time_t) force_number(t2);
+ free_temp(t2);
+ }
}
+
tm = localtime(&fclock);
- return tmp_string(buf, strftime(buf, 100, t1->stptr, tm));
+ ret = tmp_string(buf, strftime(buf, 100, format, tm));
+ if (t1)
+ free_temp(t1);
+ return ret;
}
+/* do_systime --- get the time of day */
+
NODE *
do_systime(tree)
NODE *tree;
@@ -759,6 +950,8 @@ NODE *tree;
return tmp_number((AWKNUM) lclock);
}
+/* do_system --- run an external command */
+
NODE *
do_system(tree)
NODE *tree;
@@ -768,7 +961,7 @@ NODE *tree;
char *cmd;
char save;
- (void) flush_io (); /* so output is synchronous with gawk's */
+ (void) flush_io(); /* so output is synchronous with gawk's */
tmp = tree_eval(tree->lnode);
cmd = force_string(tmp)->stptr;
@@ -776,9 +969,9 @@ NODE *tree;
/* insure arg to system is zero-terminated */
/*
- * From: David Trueman <emory!cs.dal.ca!david>
+ * From: David Trueman <david@cs.dal.ca>
* To: arnold@cc.gatech.edu (Arnold Robbins)
- * Date: Wed, 3 Nov 1993 12:49:41 -0400
+ * Date: Wed, 3 Nov 1993 12:49:41 -0400
*
* It may not be necessary to save the character, but
* I'm not sure. It would normally be the field
@@ -803,6 +996,8 @@ NODE *tree;
extern NODE **fmt_list; /* declared in eval.c */
+/* do_print --- print items, separated by OFS, terminated with ORS */
+
void
do_print(tree)
register NODE *tree;
@@ -816,50 +1011,44 @@ register NODE *tree;
int errflg; /* not used, sigh */
rp = redirect(tree->rnode, &errflg);
- if (rp) {
+ if (rp != NULL) {
fp = rp->fp;
- if (!fp)
+ if (fp == NULL)
return;
} else
return;
} else
fp = stdout;
tree = tree->lnode;
- while (tree) {
+ while (tree != NULL) {
t1 = tree_eval(tree->lnode);
if (t1->flags & NUMBER) {
if (OFMTidx == CONVFMTidx)
(void) force_string(t1);
else {
-#ifndef GFMT_WORKAROUND
- char buf[100];
-
- (void) sprintf(buf, OFMT, t1->numbr);
- free_temp(t1);
- t1 = tmp_string(buf, strlen(buf));
-#else /* GFMT_WORKAROUND */
free_temp(t1);
t1 = format_tree(OFMT,
fmt_list[OFMTidx]->stlen,
tree);
-#endif /* GFMT_WORKAROUND */
}
}
- efwrite(t1->stptr, sizeof(char), t1->stlen, fp, "print", rp, 0);
+ efwrite(t1->stptr, sizeof(char), t1->stlen, fp, "print", rp, FALSE);
free_temp(t1);
tree = tree->rnode;
- if (tree) {
+ if (tree != NULL) {
s = OFS;
- if (OFSlen)
- efwrite(s, sizeof(char), (size_t)OFSlen,
- fp, "print", rp, 0);
+ if (OFSlen > 0)
+ efwrite(s, sizeof(char), (size_t) OFSlen,
+ fp, "print", rp, FALSE);
}
}
s = ORS;
- if (ORSlen)
- efwrite(s, sizeof(char), (size_t)ORSlen, fp, "print", rp, 1);
+ if (ORSlen > 0)
+ efwrite(s, sizeof(char), (size_t) ORSlen, fp, "print", rp, TRUE);
}
+/* do_tolower --- lower case a string */
+
NODE *
do_tolower(tree)
NODE *tree;
@@ -877,6 +1066,8 @@ NODE *tree;
return t2;
}
+/* do_toupper --- upper case a string */
+
NODE *
do_toupper(tree)
NODE *tree;
@@ -894,12 +1085,13 @@ NODE *tree;
return t2;
}
+/* do_atan2 --- do the atan2 function */
+
NODE *
do_atan2(tree)
NODE *tree;
{
NODE *t1, *t2;
- extern double atan2 P((double, double));
double d1, d2;
t1 = tree_eval(tree->lnode);
@@ -911,35 +1103,39 @@ NODE *tree;
return tmp_number((AWKNUM) atan2(d1, d2));
}
+/* do_sin --- do the sin function */
+
NODE *
do_sin(tree)
NODE *tree;
{
NODE *tmp;
- extern double sin P((double));
double d;
tmp = tree_eval(tree->lnode);
- d = sin((double)force_number(tmp));
+ d = sin((double) force_number(tmp));
free_temp(tmp);
return tmp_number((AWKNUM) d);
}
+/* do_cos --- do the cos function */
+
NODE *
do_cos(tree)
NODE *tree;
{
NODE *tmp;
- extern double cos P((double));
double d;
tmp = tree_eval(tree->lnode);
- d = cos((double)force_number(tmp));
+ d = cos((double) force_number(tmp));
free_temp(tmp);
return tmp_number((AWKNUM) d);
}
-static int firstrand = 1;
+/* do_rand --- do the rand function */
+
+static int firstrand = TRUE;
static char state[512];
/* ARGSUSED */
@@ -950,35 +1146,40 @@ NODE *tree;
if (firstrand) {
(void) initstate((unsigned) 1, state, sizeof state);
srandom(1);
- firstrand = 0;
+ firstrand = FALSE;
}
return tmp_number((AWKNUM) random() / GAWK_RANDOM_MAX);
}
+/* do_srand --- seed the random number generator */
+
NODE *
do_srand(tree)
NODE *tree;
{
NODE *tmp;
- static long save_seed = 0;
+ static long save_seed = 1;
long ret = save_seed; /* SVR4 awk srand returns previous seed */
- if (firstrand)
+ if (firstrand) {
(void) initstate((unsigned) 1, state, sizeof state);
- else
+ /* don't need to srandom(1), we're changing the seed below */
+ firstrand = FALSE;
+ } else
(void) setstate(state);
- if (!tree)
+ if (tree == NULL)
srandom((unsigned int) (save_seed = (long) time((time_t *) 0)));
else {
tmp = tree_eval(tree->lnode);
srandom((unsigned int) (save_seed = (long) force_number(tmp)));
free_temp(tmp);
}
- firstrand = 0;
return tmp_number((AWKNUM) ret);
}
+/* do_match --- match a regexp, set RSTART and RLENGTH */
+
NODE *
do_match(tree)
NODE *tree;
@@ -991,7 +1192,7 @@ NODE *tree;
t1 = force_string(tree_eval(tree->lnode));
tree = tree->rnode->lnode;
rp = re_update(tree);
- rstart = research(rp, t1->stptr, 0, t1->stlen, 1);
+ rstart = research(rp, t1->stptr, 0, t1->stlen, TRUE);
if (rstart >= 0) { /* match succeded */
rstart++; /* 1-based indexing */
rlength = REEND(rp, t1->stptr) - RESTART(rp, t1->stptr);
@@ -1007,10 +1208,16 @@ NODE *tree;
return tmp_number((AWKNUM) rstart);
}
+/* sub_common --- the common code (does the work) for sub, gsub, and gensub */
+
+/*
+ * NB: `howmany' conflicts with a SunOS macro in <sys/param.h>.
+ */
+
static NODE *
-sub_common(tree, global)
+sub_common(tree, how_many, backdigs)
NODE *tree;
-int global;
+int how_many, backdigs;
{
register char *scan;
register char *bp, *cp;
@@ -1032,9 +1239,12 @@ int global;
NODE *t; /* string to make sub. in; $0 if none given */
NODE *tmp;
NODE **lhs = &tree; /* value not used -- just different from NULL */
- int priv = 0;
+ int priv = FALSE;
Func_ptr after_assign = NULL;
+ int global = (how_many == -1);
+ long current;
+
tmp = tree->lnode;
rp = re_update(tmp);
@@ -1046,7 +1256,7 @@ int global;
t = force_string(tree_eval(tmp));
/* do the search early to avoid work on non-match */
- if (research(rp, t->stptr, 0, t->stlen, 1) == -1 ||
+ if (research(rp, t->stptr, 0, t->stlen, TRUE) == -1 ||
RESTART(rp, t->stptr) > t->stlen) {
free_temp(t);
return tmp_number((AWKNUM) 0.0);
@@ -1068,7 +1278,7 @@ int global;
tmp = dupnode(t);
t->flags = saveflags;
t = tmp;
- priv = 1;
+ priv = TRUE;
}
text = t->stptr;
textlen = t->stlen;
@@ -1078,7 +1288,7 @@ int global;
repl = s->stptr;
replend = repl + s->stlen;
repllen = replend - repl;
- emalloc(buf, char *, buflen + 2, "do_sub");
+ emalloc(buf, char *, buflen + 2, "sub_common");
buf[buflen] = '\0';
buf[buflen + 1] = '\0';
ampersands = 0;
@@ -1086,15 +1296,37 @@ int global;
if (*scan == '&') {
repllen--;
ampersands++;
- } else if (*scan == '\\'
- && (*(scan+1) == '&' || *(scan+1) == '\\')) {
- repllen--;
- scan++;
+ } else if (*scan == '\\') {
+ if (backdigs) { /* gensub, behave sanely */
+ if (isdigit(scan[1])) {
+ ampersands++;
+ scan++;
+ } else { /* \q for any q --> q */
+ repllen--;
+ scan++;
+ }
+ } else { /* (proposed) posix '96 mode */
+ if (strncmp(scan, "\\\\\\&", 4) == 0) {
+ /* \\\& --> \& */
+ repllen -= 2;
+ scan += 3;
+ } else if (strncmp(scan, "\\\\&", 3) == 0) {
+ /* \\& --> \<string> */
+ ampersands++;
+ repllen--;
+ scan += 2;
+ } else if (scan[1] == '&') {
+ /* \& --> & */
+ repllen--;
+ scan++;
+ } /* else
+ leave alone, it goes into the output */
+ }
}
}
bp = buf;
- for (;;) {
+ for (current = 1;; current++) {
matches++;
matchstart = t->stptr + RESTART(rp, t->stptr);
matchend = t->stptr + REEND(rp, t->stptr);
@@ -1108,37 +1340,81 @@ int global;
sofar = bp - buf;
while (buflen < (sofar + len + 1)) {
buflen *= 2;
- erealloc(buf, char *, buflen, "do_sub");
+ erealloc(buf, char *, buflen, "sub_common");
bp = buf + sofar;
}
for (scan = text; scan < matchstart; scan++)
*bp++ = *scan;
- for (scan = repl; scan < replend; scan++)
- if (*scan == '&')
- for (cp = matchstart; cp < matchend; cp++)
- *bp++ = *cp;
- else if (*scan == '\\'
- && (*(scan+1) == '&' || *(scan+1) == '\\')) {
- scan++;
- *bp++ = *scan;
- } else
- *bp++ = *scan;
-
+ if (global || current == how_many) {
+ /*
+ * If replacing all occurrences, or this is the
+ * match we want, copy in the replacement text,
+ * making substitutions as we go.
+ */
+ for (scan = repl; scan < replend; scan++)
+ if (*scan == '&')
+ for (cp = matchstart; cp < matchend; cp++)
+ *bp++ = *cp;
+ else if (*scan == '\\') {
+ if (backdigs) { /* gensub, behave sanely */
+ if (isdigit(scan[1])) {
+ int dig = scan[1] - '0';
+ char *start, *end;
+
+ start = t->stptr
+ + SUBPATSTART(rp, t->stptr, dig);
+ end = t->stptr
+ + SUBPATEND(rp, t->stptr, dig);
+
+ for (cp = start; cp < end; cp++)
+ *bp++ = *cp;
+ scan++;
+ } else /* \q for any q --> q */
+ *bp++ = *++scan;
+ } else { /* posix '96 mode, bleah */
+ if (strncmp(scan, "\\\\\\&", 4) == 0) {
+ /* \\\& --> \& */
+ *bp++ = '\\';
+ *bp++ = '&';
+ scan += 3;
+ } else if (strncmp(scan, "\\\\&", 3) == 0) {
+ /* \\& --> \<string> */
+ *bp++ = '\\';
+ for (cp = matchstart; cp < matchend; cp++)
+ *bp++ = *cp;
+ scan += 2;
+ } else if (scan[1] == '&') {
+ /* \& --> & */
+ *bp++ = '&';
+ scan++;
+ } else
+ *bp++ = *scan;
+ }
+ } else
+ *bp++ = *scan;
+ } else {
+ /*
+ * don't want this match, skip over it by copying
+ * in current text.
+ */
+ for (cp = matchstart; cp < matchend; cp++)
+ *bp++ = *cp;
+ }
/* catch the case of gsub(//, "blah", whatever), i.e. empty regexp */
- if (global && matchstart == matchend && matchend < text + textlen) {
+ if (matchstart == matchend && matchend < text + textlen) {
*bp++ = *matchend;
matchend++;
}
textlen = text + textlen - matchend;
text = matchend;
- if (!global || (long)textlen <= 0 ||
- research(rp, t->stptr, text-t->stptr, textlen, 1) == -1)
+ if ((current >= how_many && !global) || (long) textlen <= 0
+ || research(rp, t->stptr, text - t->stptr, textlen, TRUE) == -1)
break;
}
sofar = bp - buf;
if (buflen - sofar - textlen - 1) {
buflen = sofar + textlen + 2;
- erealloc(buf, char *, buflen, "do_sub");
+ erealloc(buf, char *, buflen, "sub_common");
bp = buf + sofar;
}
for (scan = matchend; scan < text + textlen; scan++)
@@ -1155,25 +1431,92 @@ int global;
unref(*lhs);
*lhs = t;
}
- if (after_assign)
+ if (after_assign != NULL)
(*after_assign)();
t->flags &= ~(NUM|NUMBER);
}
return tmp_number((AWKNUM) matches);
}
+/* do_gsub --- global substitution */
+
NODE *
do_gsub(tree)
NODE *tree;
{
- return sub_common(tree, 1);
+ return sub_common(tree, -1, FALSE);
}
+/* do_sub --- single substitution */
+
NODE *
do_sub(tree)
NODE *tree;
{
- return sub_common(tree, 0);
+ return sub_common(tree, 1, FALSE);
+}
+
+/* do_gensub --- fix up the tree for sub_common for the gensub function */
+
+NODE *
+do_gensub(tree)
+NODE *tree;
+{
+ NODE n1, n2, n3, *t, *tmp, *target, *ret;
+ long how_many = 1; /* default is one substitution */
+ double d;
+
+ /*
+ * We have to pull out the value of the global flag, and
+ * build up a tree without the flag in it, turning it into the
+ * kind of tree that sub_common() expects. It helps to draw
+ * a picture of this ...
+ */
+ n1 = *tree;
+ n2 = *(tree->rnode);
+ n1.rnode = & n2;
+
+ t = tree_eval(n2.rnode->lnode); /* value of global flag */
+
+ tmp = force_string(tree_eval(n2.rnode->rnode->lnode)); /* target */
+
+ /*
+ * We make copy of the original target string, and pass that
+ * in to sub_common() as the target to make the substitution in.
+ * We will then return the result string as the return value of
+ * this function.
+ */
+ target = tmp_string(tmp->stptr, tmp->stlen);
+ free_temp(tmp);
+
+ n3 = *(n2.rnode->rnode);
+ n3.lnode = target;
+ n2.rnode = & n3;
+
+ if ((t->flags & (STR|STRING)) != 0) {
+ if (t->stlen > 0 && (t->stptr[0] == 'g' || t->stptr[0] == 'G'))
+ how_many = -1;
+ else
+ how_many = 1;
+ } else {
+ d = force_number(t);
+ if (d > 0)
+ how_many = d;
+ else
+ how_many = 1;
+ }
+
+ free_temp(t);
+
+ ret = sub_common(&n1, how_many, TRUE);
+ free_temp(ret);
+
+ /*
+ * Note that we don't care what sub_common() returns, since the
+ * easiest thing for the programmer is to return the string, even
+ * if no substitutions were done.
+ */
+ return target;
}
#ifdef GFMT_WORKAROUND
@@ -1194,57 +1537,60 @@ double g; /* value to format */
char dform[40];
register char *gpos;
register char *d, *e, *p;
- int again = 0;
+ int again = FALSE;
strncpy(dform, format, sizeof dform - 1);
dform[sizeof dform - 1] = '\0';
gpos = strrchr(dform, '.');
- if (g == 0.0 && alt == 0) { /* easy special case */
+ if (g == 0.0 && ! alt) { /* easy special case */
*gpos++ = 'd';
*gpos = '\0';
(void) sprintf(buf, dform, fwidth, 0);
return;
}
- gpos += 2; /* advance to location of 'g' in the format */
+
+ /* advance to location of 'g' in the format */
+ while (*gpos && *gpos != 'g' && *gpos != 'G')
+ gpos++;
if (prec <= 0) /* negative precision is ignored */
prec = (prec < 0 ? DEFAULT_G_PRECISION : 1);
if (*gpos == 'G')
- again = 1;
+ again = TRUE;
/* start with 'e' format (it'll provide nice exponent) */
*gpos = 'e';
- prec -= 1;
+ prec--;
(void) sprintf(buf, dform, fwidth, prec, g);
if ((e = strrchr(buf, 'e')) != NULL) { /* find exponent */
- int exp = atoi(e+1); /* fetch exponent */
- if (exp >= -4 && exp <= prec) { /* per K&R2, B1.2 */
+ int expn = atoi(e+1); /* fetch exponent */
+ if (expn >= -4 && expn <= prec) { /* per K&R2, B1.2 */
/* switch to 'f' format and re-do */
*gpos = 'f';
- prec -= exp; /* decimal precision */
+ prec -= expn; /* decimal precision */
(void) sprintf(buf, dform, fwidth, prec, g);
e = buf + strlen(buf);
while (*--e == ' ')
continue;
- e += 1;
+ e++;
}
- else if (again != 0)
+ else if (again)
*gpos = 'E';
/* if 'alt' in force, then trailing zeros are not removed */
- if (alt == 0 && (d = strrchr(buf, '.')) != NULL) {
+ if (! alt && (d = strrchr(buf, '.')) != NULL) {
/* throw away an excess of precision */
for (p = e; p > d && *--p == '0'; )
- prec -= 1;
+ prec--;
if (d == p)
- prec -= 1;
+ prec--;
if (prec < 0)
prec = 0;
/* and do that once again */
- again = 1;
+ again = TRUE;
}
- if (again != 0)
+ if (again)
(void) sprintf(buf, dform, fwidth, prec, g);
}
}
diff --git a/config.in b/config.in
deleted file mode 100644
index b04f0e1b..00000000
--- a/config.in
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- * config.h -- configuration definitions for gawk.
- *
- * __SYSTEM__
- */
-
-/*
- * Copyright (C) 1991, 1992, 1993 the Free Software Foundation, Inc.
- *
- * This file is part of GAWK, the GNU implementation of the
- * AWK Progamming Language.
- *
- * GAWK is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * GAWK is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GAWK; see the file COPYING. If not, write to
- * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-/*
- * This file isolates configuration dependencies for gnu awk.
- * You should know something about your system, perhaps by having
- * a manual handy, when you edit this file. You should copy config.h-dist
- * to config.h, and edit config.h. Do not modify config.h-dist, so that
- * it will be easy to apply any patches that may be distributed.
- *
- * The general idea is that systems conforming to the various standards
- * should need to do the least amount of changing. Definining the various
- * items in ths file usually means that your system is missing that
- * particular feature.
- *
- * The order of preference in standard conformance is ANSI C, POSIX,
- * and the SVID.
- *
- * If you have no clue as to what's going on with your system, try
- * compiling gawk without editing this file and see what shows up
- * missing in the link stage. From there, you can probably figure out
- * which defines to turn on.
- */
-
-/**************************/
-/* Miscellanious features */
-/**************************/
-
-/*
- * BLKSIZE_MISSING
- *
- * Check your /usr/include/sys/stat.h file. If the stat structure
- * does not have a member named st_blksize, define this. (This will
- * most likely be the case on most System V systems prior to V.4.)
- */
-/* #define BLKSIZE_MISSING 1 */
-
-/*
- * SIGTYPE
- *
- * The return type of the routines passed to the signal function.
- * Modern systems use `void', older systems use `int'.
- * If left undefined, it will default to void.
- */
-/* #define SIGTYPE int */
-
-/*
- * SIZE_T_MISSING
- *
- * If your system has no typedef for size_t, define this to get a default
- */
-/* #define SIZE_T_MISSING 1 */
-
-/*
- * CHAR_UNSIGNED
- *
- * If your machine uses unsigned characters (IBM RT and RS/6000 and others)
- * then define this for use in regex.c
- */
-/* #define CHAR_UNSIGNED 1 */
-
-/*
- * HAVE_UNDERSCORE_SETJMP
- *
- * Check in your /usr/include/setjmp.h file. If there are routines
- * there named _setjmp and _longjmp, then you should define this.
- * Typically only systems derived from Berkeley Unix have this.
- */
-/* #define HAVE_UNDERSCORE_SETJMP 1 */
-
-/*
- * LIMITS_H_MISSING
- *
- * You don't have a <limits.h> include file.
- */
-/* #define LIMITS_H_MISSING 1 */
-
-/***********************************************/
-/* Missing library subroutines or system calls */
-/***********************************************/
-
-/*
- * MEMCMP_MISSING
- * MEMCPY_MISSING
- * MEMSET_MISSING
- *
- * These three routines are for manipulating blocks of memory. Most
- * likely they will either all three be present or all three be missing,
- * so they're grouped together.
- */
-/* #define MEMCMP_MISSING 1 */
-/* #define MEMCPY_MISSING 1 */
-/* #define MEMSET_MISSING 1 */
-
-/*
- * RANDOM_MISSING
- *
- * Your system does not have the random(3) suite of random number
- * generating routines. These are different than the old rand(3)
- * routines!
- */
-/* #define RANDOM_MISSING 1 */
-
-/*
- * STRCASE_MISSING
- *
- * Your system does not have the strcasemp() and strncasecmp()
- * routines that originated in Berkeley Unix.
- */
-/* #define STRCASE_MISSING 1 */
-
-/*
- * STRCHR_MISSING
- *
- * Your system does not have the strchr() and strrchr() functions.
- */
-/* #define STRCHR_MISSING 1 */
-
-/*
- * STRERROR_MISSING
- *
- * Your system lacks the ANSI C strerror() routine for returning the
- * strings associated with errno values.
- */
-/* #define STRERROR_MISSING 1 */
-
-/*
- * STRTOD_MISSING
- *
- * Your system does not have the strtod() routine for converting
- * strings to double precision floating point values.
- */
-/* #define STRTOD_MISSING 1 */
-
-/*
- * STRFTIME_MISSING
- *
- * Your system lacks the ANSI C strftime() routine for formatting
- * broken down time values.
- */
-/* #define STRFTIME_MISSING 1 */
-
-/*
- * TZSET_MISSING
- *
- * If you have a 4.2 BSD vintage system, then the strftime() routine
- * supplied in the missing directory won't be enough, because it relies on the
- * tzset() routine from System V / Posix. Fortunately, there is an
- * emulation for tzset() too that should do the trick. If you don't
- * have tzset(), define this.
- */
-/* #define TZSET_MISSING 1 */
-
-/*
- * TZNAME_MISSING
- *
- * Some systems do not support the external variables tzname and daylight.
- * If this is the case *and* strftime() is missing, define this.
- */
-/* #define TZNAME_MISSING 1 */
-
-/*
- * TM_ZONE_MISSING
- *
- * Your "struct tm" is missing the tm_zone field.
- * If this is the case *and* strftime() is missing *and* tzname is missing,
- * define this.
- */
-/* #define TM_ZONE_MISSING 1 */
-
-/*
- * STDC_HEADERS
- *
- * If your system does have ANSI compliant header files that
- * provide prototypes for library routines, then define this.
- */
-/* #define STDC_HEADERS 1 */
-
-/*
- * NO_TOKEN_PASTING
- *
- * If your compiler define's __STDC__ but does not support token
- * pasting (tok##tok), then define this.
- */
-/* #define NO_TOKEN_PASTING 1 */
-
-/*****************************************************************/
-/* Stuff related to the Standard I/O Library. */
-/*****************************************************************/
-/* Much of this is (still, unfortunately) black magic in nature. */
-/* You may have to use some or all of these together to get gawk */
-/* to work correctly. */
-/*****************************************************************/
-
-/*
- * NON_STD_SPRINTF
- *
- * Look in your /usr/include/stdio.h file. If the return type of the
- * sprintf() function is NOT `int', define this.
- */
-/* #define NON_STD_SPRINTF 1 */
-
-/*
- * VPRINTF_MISSING
- *
- * Define this if your system lacks vprintf() and the other routines
- * that go with it. This will trigger an attempt to use _doprnt().
- * If you don't have that, this attempt will fail and you are on your own.
- */
-/* #define VPRINTF_MISSING 1 */
-
-/*
- * Casts from size_t to int and back. These will become unnecessary
- * at some point in the future, but for now are required where the
- * two types are a different representation.
- */
-/* #define SZTC */
-/* #define INTC */
-
-/*
- * SYSTEM_MISSING
- *
- * Define this if your library does not provide a system function
- * or you are not entirely happy with it and would rather use
- * a provided replacement (atari only).
- */
-/* #define SYSTEM_MISSING 1 */
-
-/*
- * FMOD_MISSING
- *
- * Define this if your system lacks the fmod() function and modf() will
- * be used instead.
- */
-/* #define FMOD_MISSING 1 */
-
-
-/*******************************/
-/* Gawk configuration options. */
-/*******************************/
-
-/*
- * DEFPATH
- *
- * The default search path for the -f option of gawk. It is used
- * if the AWKPATH environment variable is undefined. The default
- * definition is provided here. Most likely you should not change
- * this.
- */
-
-/* #define DEFPATH ".:/usr/lib/awk:/usr/local/lib/awk" */
-/* #define ENVSEP ':' */
-
-/*
- * alloca already has a prototype defined - don't redefine it
- */
-/* #define ALLOCA_PROTO 1 */
-
-/*
- * srandom already has a prototype defined - don't redefine it
- */
-/* #define SRANDOM_PROTO 1 */
-
-/*
- * getpgrp() in sysvr4 and POSIX takes no argument
- */
-/* #define GETPGRP_NOARG 0 */
-
-/*
- * define const to nothing if not __STDC__
- */
-#ifndef __STDC__
-#define const
-#endif
-
-/* If svr4 and not gcc */
-/* #define SVR4 0 */
-#ifdef SVR4
-#define __svr4__ 1
-#endif
diff --git a/config/apollo b/config/apollo
deleted file mode 100644
index c1660e02..00000000
--- a/config/apollo
+++ /dev/null
@@ -1,6 +0,0 @@
-HP/Apollo workstations, running Domain/OS 10.x, with cc 6.7
-STRCASE_MISSING 1
-STRERROR_MISSING 1
-STRFTIME_MISSING 1
-NO_TOKEN_PASTING 1
-MAKE_Apollo
diff --git a/config/atari b/config/atari
deleted file mode 100644
index 29a80bb3..00000000
--- a/config/atari
+++ /dev/null
@@ -1,11 +0,0 @@
-Atari ST under TOS with gcc compiler
-BLKSIZE_MISSING 1 /* Not really - but it may work better that way */
-STRCASE_MISSING 1
-STDC_HEADERS 1
-SYSTEM_MISSING 1
-SRANDOM_PROTO 1
-ALLOCA_PROTO 1
-DEFPATH ".,c:\\\\lib\\\\awk,c:\\\\gnu\\\\lib\\\\awk"
-ENVSEP ','
-SZTC (size_t)
-INTC (int)
diff --git a/config/bsd42 b/config/bsd42
deleted file mode 100644
index 720cd0f1..00000000
--- a/config/bsd42
+++ /dev/null
@@ -1,16 +0,0 @@
-For generic 4.2 BSD machine.
-SIGTYPE int
-HAVE_UNDERSCORE_SETJMP 1
-GETOPT_MISSING 1
-MEMCMP_MISSING 1
-MEMCPY_MISSING 1
-MEMSET_MISSING 1
-STRCASE_MISSING 1
-STRCHR_MISSING 1
-STRERROR_MISSING 1
-STRFTIME_MISSING 1
-STRTOD_MISSING 1
-STRTOL_MISSING 1
-NON_STD_SPRINTF 1
-VPRINTF_MISSING 1
-BSDSTDIO 1
diff --git a/config/bsd43 b/config/bsd43
deleted file mode 100644
index c48601f7..00000000
--- a/config/bsd43
+++ /dev/null
@@ -1,16 +0,0 @@
-For generic 4.3 BSD machine.
-SIGTYPE int
-HAVE_UNDERSCORE_SETJMP 1
-MEMCMP_MISSING 1
-MEMCPY_MISSING 1
-MEMSET_MISSING 1
-STRCASE_MISSING 1
-STRCHR_MISSING 1
-STRERROR_MISSING 1
-STRFTIME_MISSING 1
-STRTOD_MISSING 1
-STRTOL_MISSING 1
-NON_STD_SPRINTF 1
-VPRINTF_MISSING 1
-BSDSTDIO 1
-TZNAME_MISSING 1
diff --git a/config/bsd43r b/config/bsd43r
deleted file mode 100644
index e1ea95a2..00000000
--- a/config/bsd43r
+++ /dev/null
@@ -1,3 +0,0 @@
-For generic 4.3-Reno BSD machine.
-HAVE_UNDERSCORE_SETJMP 1
-STRTOD_MISSING 1
diff --git a/config/bsd43t b/config/bsd43t
deleted file mode 100644
index d0bdcf6b..00000000
--- a/config/bsd43t
+++ /dev/null
@@ -1,14 +0,0 @@
-For generic 4.3-Tahoe BSD machine.
-SIGTYPE int
-HAVE_UNDERSCORE_SETJMP 1
-MEMCMP_MISSING 1
-MEMCPY_MISSING 1
-MEMSET_MISSING 1
-STRCHR_MISSING 1
-STRERROR_MISSING 1
-STRFTIME_MISSING 1
-STRTOD_MISSING 1
-STRTOL_MISSING 1
-NON_STD_SPRINTF 1
-VPRINTF_MISSING 1
-BSDSTDIO 1
diff --git a/config/bsd44 b/config/bsd44
deleted file mode 100644
index 6c908859..00000000
--- a/config/bsd44
+++ /dev/null
@@ -1,5 +0,0 @@
-For generic 4.4 alpha
-HAVE_UNDERSCORE_SETJMP 1
-STDC_HEADERS 1
-ALLOCA_PROTO 1
-SRANDOM_PROTO 1
diff --git a/config/convex b/config/convex
deleted file mode 100644
index 4e8c2d8e..00000000
--- a/config/convex
+++ /dev/null
@@ -1,7 +0,0 @@
-ConvexOS 9.1, Convex C 4.1. I used cc -O1
-HAVE_UNDERSCORE_SETJMP 1
-STRERROR_MISSING 1
-STRCASE_MISSING 1
-STRTOD_MISSING 1
-STDC_HEADERS 1
-CHAR_UNSIGNED 1
diff --git a/config/cray b/config/cray
deleted file mode 100644
index fab18998..00000000
--- a/config/cray
+++ /dev/null
@@ -1,9 +0,0 @@
-Cray 2 running Unicos 5.0.7
-BLKSIZE_MISSING 1
-SIGTYPE void
-HAVE_UNDERSCORE_SETJMP 1
-RANDOM_MISSING 1
-STRCASE_MISSING 1
-STRERROR_MISSING 1
-STRFTIME_MISSING 1
-STDC_HEADERS 1
diff --git a/config/cray2-50 b/config/cray2-50
deleted file mode 100644
index 744a97eb..00000000
--- a/config/cray2-50
+++ /dev/null
@@ -1,7 +0,0 @@
-Cray CRAY-2 system running Unicos 5.0 or 5.x?
-BLKSIZE_MISSING 1
-SIGTYPE void
-RANDOM_MISSING 1
-STDC_HEADERS 1
-CHAR_UNSIGNED 1
-STRCASE_MISSING 1
diff --git a/config/cray2-60 b/config/cray2-60
deleted file mode 100644
index 6330ba7f..00000000
--- a/config/cray2-60
+++ /dev/null
@@ -1,6 +0,0 @@
-Cray Research CRAY-2 system running Unicos 6.0 or 6.1
-BLKSIZE_MISSING 1
-SIGTYPE void
-RANDOM_MISSING 1
-STDC_HEADERS 1
-CHAR_UNSIGNED 1
diff --git a/config/cray60 b/config/cray60
deleted file mode 100644
index c9aa5729..00000000
--- a/config/cray60
+++ /dev/null
@@ -1,9 +0,0 @@
-Cray Research system running Unicos 6.0 or later
-SIGTYPE void
-RANDOM_MISSING 1
-STDC_HEADERS 1
-SRANDOM_PROTO 1
-CHAR_UNSIGNED 1
-GETPGRP_NOARG 1
-MAKE_ALLOCA_C
-MAKE_CC
diff --git a/config/gnu b/config/gnu
deleted file mode 100644
index 93f0465d..00000000
--- a/config/gnu
+++ /dev/null
@@ -1,6 +0,0 @@
-For the GNU operating system.
-
-SIGTYPE void
-HAVE_UNDERSCORE_SETJMP 1
-STDC_HEADERS 1
-GETPGRP_NOARG 1
diff --git a/config/hiosf1 b/config/hiosf1
deleted file mode 100644
index 6345da6d..00000000
--- a/config/hiosf1
+++ /dev/null
@@ -1,5 +0,0 @@
-For HITACHI S systems hitm-hitachi-osf1.
-RANDOM_MISSING 1
-STDC_HEADERS 1
-CHAR_UNSIGNED 1
-MAKE_CC
diff --git a/config/hiuxwe2 b/config/hiuxwe2
deleted file mode 100644
index aaaa7334..00000000
--- a/config/hiuxwe2
+++ /dev/null
@@ -1,5 +0,0 @@
-For HITACHI PA-RISC hppa1.0-hitachi-hiuxwe2 systems.
-STDC_HEADERS 1
-RANDOM_MISSING 1
-MAKE_ALLOCA_PW
-MAKE_CC
diff --git a/config/hpux7.0 b/config/hpux7.0
deleted file mode 100644
index f0fe7902..00000000
--- a/config/hpux7.0
+++ /dev/null
@@ -1,9 +0,0 @@
-For HPUX 7.0
-STDC_HEADERS 1
-_POSIX_SOURCE 1
-POSIX 1
-HAVE_UNDERSCORE_SETJMP 1
-STRCASE_MISSING 1
-BSDSTDIO 1
-RANDOM_MISSING 1
-GETPGRP_NOARG 1
diff --git a/config/hpux8x b/config/hpux8x
deleted file mode 100644
index 31fda382..00000000
--- a/config/hpux8x
+++ /dev/null
@@ -1,5 +0,0 @@
-HPPA running HP-UX 8.x
-STDC_HEADERS 1
-RANDOM_MISSING 1
-MAKE_ALLOCA_PW
-GETPGRP_NOARG 1
diff --git a/config/ibmrt-aos b/config/ibmrt-aos
deleted file mode 100644
index afbf1aa4..00000000
--- a/config/ibmrt-aos
+++ /dev/null
@@ -1,19 +0,0 @@
-For IBM RT running AOS
-SIGTYPE int
-HAVE_UNDERSCORE_SETJMP 1
-MEMCMP_MISSING 1
-MEMCPY_MISSING 1
-MEMSET_MISSING 1
-STRCASE_MISSING 1
-STRCHR_MISSING 1
-STRERROR_MISSING 1
-STRFTIME_MISSING 1
-STRTOD_MISSING 1
-STRTOL_MISSING 1
-NON_STD_SPRINTF 1
-VPRINTF_MISSING 1
-BSDSTDIO 1
-TZNAME_MISSING 1
-TZSET_MISSING 1
-FMOD_MISSING 1
-CHAR_UNSIGNED 1
diff --git a/config/interactive2.2 b/config/interactive2.2
deleted file mode 100644
index 24d5ec18..00000000
--- a/config/interactive2.2
+++ /dev/null
@@ -1,9 +0,0 @@
-Interactive Unix 2.2
-BLKSIZE_MISSING 1
-RANDOM_MISSING 1
-STRCASE_MISSING 1
-STRERROR_MISSING 1
-STRFTIME_MISSING 1
-STDC_HEADERS 1
-_POSIX_SOURCE 1
-POSIX 1
diff --git a/config/linux b/config/linux
deleted file mode 100644
index ff825f59..00000000
--- a/config/linux
+++ /dev/null
@@ -1,3 +0,0 @@
-Configure file for Linux 0.98.pl4 (with gcc 2.2.2d7)
-DOPRNT_MISSING 1
-SRANDOM_PROTO 1
diff --git a/config/lynxos b/config/lynxos
deleted file mode 100644
index 550b2134..00000000
--- a/config/lynxos
+++ /dev/null
@@ -1,10 +0,0 @@
-For Lynx version 2.1
-SIGTYPE int
-STDC_HEADERS 1
-HAVE_STRING_H 1
-STRCASE_MISSING 1
-STRERROR_MISSING 1
-STRFTIME_MISSING 1
-TZNAME_MISSING 1
-TZSET_MISSING 1
-TM_ZONE_MISSING 1
diff --git a/config/mach b/config/mach
deleted file mode 100644
index fc630e37..00000000
--- a/config/mach
+++ /dev/null
@@ -1,9 +0,0 @@
-For Mach-386 2.6 system. Should work on other Mach 2.5 or 2.6 systems.
-SIGTYPE int
-HAVE_UNDERSCORE_SETJMP 1
-STRERROR_MISSING 1
-STRFTIME_MISSING 1
-STRTOD_MISSING 1
-STRTOL_MISSING 1
-TZNAME_MISSING 1
-BSDSTDIO 1
diff --git a/config/msc60 b/config/msc60
deleted file mode 100644
index d9909659..00000000
--- a/config/msc60
+++ /dev/null
@@ -1,9 +0,0 @@
-MS-DOS systems using MSC 6.0
-BLKSIZE_MISSING 1
-SIZE_T_MISSING 1
-GCVT_MISSING 1
-GETOPT_MISSING 1
-RANDOM_MISSING 1
-STRCASE_MISSING 1
-#STRFTIME_MISSING 1
-STRTOL_MISSING 1
diff --git a/config/news b/config/news
deleted file mode 100644
index dbcc354e..00000000
--- a/config/news
+++ /dev/null
@@ -1,6 +0,0 @@
-Sony News
-HAVE_UNDERSCORE_SETJMP 1
-STRERROR_MISSING 1
-STRFTIME_MISSING 1
-STRTOD_MISSING 1
-NON_STD_SPRINTF 1
diff --git a/config/next20 b/config/next20
deleted file mode 100644
index 29528a94..00000000
--- a/config/next20
+++ /dev/null
@@ -1,8 +0,0 @@
-NeXT running 2.0
-STRTOD_MISSING 1 /* NeXT strtod() is buggy */
-STDC_HEADERS 1
-ALLOCA_PROTO 1
-SRANDOM_PROTO 1
-SZTC (size_t)
-INTC (int)
-MAKE_NeXT
diff --git a/config/next21 b/config/next21
deleted file mode 100644
index f29aa951..00000000
--- a/config/next21
+++ /dev/null
@@ -1,7 +0,0 @@
-NeXT running 2.1 or higher
-STDC_HEADERS 1
-ALLOCA_PROTO 1
-SRANDOM_PROTO 1
-SZTC (size_t)
-INTC (int)
-MAKE_NeXT
diff --git a/config/next30 b/config/next30
deleted file mode 100644
index f29aa951..00000000
--- a/config/next30
+++ /dev/null
@@ -1,7 +0,0 @@
-NeXT running 2.1 or higher
-STDC_HEADERS 1
-ALLOCA_PROTO 1
-SRANDOM_PROTO 1
-SZTC (size_t)
-INTC (int)
-MAKE_NeXT
diff --git a/config/osf1 b/config/osf1
deleted file mode 100644
index de9fb527..00000000
--- a/config/osf1
+++ /dev/null
@@ -1,3 +0,0 @@
-For generic OSF/1
-STDC_HEADERS 1
-HAVE_UNDERSCORE_SETJMP 1
diff --git a/config/osf1.dec b/config/osf1.dec
deleted file mode 100644
index c7aaec67..00000000
--- a/config/osf1.dec
+++ /dev/null
@@ -1,4 +0,0 @@
-For DEC OSF/1 2.0
-STDC_HEADERS 1
-HAVE_UNDERSCORE_SETJMP 1
-SRANDOM_PROTO 1
diff --git a/config/riscos452 b/config/riscos452
deleted file mode 100644
index be4ad54e..00000000
--- a/config/riscos452
+++ /dev/null
@@ -1,7 +0,0 @@
-MIPS RISC/os 4.52
-BLKSIZE_MISSING 1
-RANDOM_MISSING 1
-STRCASE_MISSING 1
-STRERROR_MISSING 1
-STRFTIME_MISSING 1
-CHAR_UNSIGNED 1
diff --git a/config/rs6000 b/config/rs6000
deleted file mode 100644
index ec389a3b..00000000
--- a/config/rs6000
+++ /dev/null
@@ -1,6 +0,0 @@
-For IBM RS/6000 systems.
-RANDOM_MISSING 1
-STDC_HEADERS 1
-CHAR_UNSIGNED 1
-MAKE_ALLOCA_C
-MAKE_RS6000
diff --git a/config/sco b/config/sco
deleted file mode 100644
index 5123b0e7..00000000
--- a/config/sco
+++ /dev/null
@@ -1,6 +0,0 @@
-SCO UNIX
-RANDOM_MISSING 1
-STRCASE_MISSING 1
-STDC_HEADERS 1
-MAKE_ALLOCA_C
-HAVE_STRING_H 1
diff --git a/config/sequent b/config/sequent
deleted file mode 100644
index 0b340110..00000000
--- a/config/sequent
+++ /dev/null
@@ -1,17 +0,0 @@
-For generic 4.3 BSD machine.
-SIGTYPE int
-HAVE_UNDERSCORE_SETJMP 1
-MEMCMP_MISSING 1
-MEMCPY_MISSING 1
-MEMSET_MISSING 1
-STRCASE_MISSING 1
-STRCHR_MISSING 1
-STRERROR_MISSING 1
-STRFTIME_MISSING 1
-STRTOD_MISSING 1
-STRTOL_MISSING 1
-NON_STD_SPRINTF 1
-VPRINTF_MISSING 1
-BSDSTDIO 1
-TZSET_MISSING 1
-FMOD_MISSING 1
diff --git a/config/sgi b/config/sgi
deleted file mode 100644
index 7886bb5f..00000000
--- a/config/sgi
+++ /dev/null
@@ -1,5 +0,0 @@
-SGI Personal Iris (Sys V derived)
-BLKSIZE_MISSING 1
-RANDOM_MISSING 1
-STRERROR_MISSING 1
-STRFTIME_MISSING 1
diff --git a/config/sgi33 b/config/sgi33
deleted file mode 100644
index 633f8e53..00000000
--- a/config/sgi33
+++ /dev/null
@@ -1,4 +0,0 @@
-SGI Personal Iris (Sys V derived) (this works with gcc)
-BLKSIZE_MISSING 1
-STDC_HEADERS 1
-MAKE_ALLOCA_C
diff --git a/config/sgi33.cc b/config/sgi33.cc
deleted file mode 100644
index 2798db8a..00000000
--- a/config/sgi33.cc
+++ /dev/null
@@ -1,5 +0,0 @@
-SGI Personal Iris (Sys V derived) (this works with cc)
-BLKSIZE_MISSING 1
-STDC_HEADERS 1
-CHAR_UNSIGNED 1
-MAKE_ALLOCA_C
diff --git a/config/sgi405 b/config/sgi405
deleted file mode 100644
index 49b2be49..00000000
--- a/config/sgi405
+++ /dev/null
@@ -1,5 +0,0 @@
-SGI Personal Iris (Sys V derived) (this works with gcc)
-BLKSIZE_MISSING 1
-STDC_HEADERS 1
-GETPGRP_NOARG 1
-MAKE_SGI_GCC
diff --git a/config/sgi405.cc b/config/sgi405.cc
deleted file mode 100644
index 391b4c95..00000000
--- a/config/sgi405.cc
+++ /dev/null
@@ -1,8 +0,0 @@
-SGI Personal Iris (Sys V derived) (this works with cc)
-BLKSIZE_MISSING 1
-STDC_HEADERS 1
-CHAR_UNSIGNED 1
-GETPGRP_NOARG 1
-MAKE_CC
-MAKE_SGI
-MAKE_ALLOCA_C
diff --git a/config/solaris2.cc b/config/solaris2.cc
deleted file mode 100644
index be8f2799..00000000
--- a/config/solaris2.cc
+++ /dev/null
@@ -1,7 +0,0 @@
-Solaris 2.x Systems with cc
-RANDOM_MISSING 1
-STRCASE_MISSING 1
-STDC_HEADERS 1
-SVR4 1
-MAKE_ALLOCA_C
-MAKE_CC
diff --git a/config/sunos3 b/config/sunos3
deleted file mode 100644
index be09e0d8..00000000
--- a/config/sunos3
+++ /dev/null
@@ -1,8 +0,0 @@
-Sun running SunOS 3.x
-SIGTYPE int
-HAVE_UNDERSCORE_SETJMP 1
-STRCASE_MISSING 1
-STRERROR_MISSING 1
-STRFTIME_MISSING 1
-NON_STD_SPRINTF 1
-TZSET_MISSING 1
diff --git a/config/sunos40 b/config/sunos40
deleted file mode 100644
index c3e8bdc2..00000000
--- a/config/sunos40
+++ /dev/null
@@ -1,7 +0,0 @@
-Sun running SunOS 4.0.x
-HAVE_UNDERSCORE_SETJMP 1
-STRCASE_MISSING 1
-STRERROR_MISSING 1
-STRFTIME_MISSING 1
-TZNAME_MISSING 1
-NON_STD_SPRINTF 1
diff --git a/config/sunos41 b/config/sunos41
deleted file mode 100644
index c26040dc..00000000
--- a/config/sunos41
+++ /dev/null
@@ -1,4 +0,0 @@
-Sun running SunOS 4.1
-HAVE_UNDERSCORE_SETJMP 1
-STRERROR_MISSING 1
-NON_STD_SPRINTF 1
diff --git a/config/sunos41.cc b/config/sunos41.cc
deleted file mode 100644
index f13e8659..00000000
--- a/config/sunos41.cc
+++ /dev/null
@@ -1,6 +0,0 @@
-Sun running SunOS 4.1
-MAKE_CC
-MAKE_ALLOCA_C
-HAVE_UNDERSCORE_SETJMP 1
-STRERROR_MISSING 1
-NON_STD_SPRINTF 1
diff --git a/config/sysv2 b/config/sysv2
deleted file mode 100644
index 0239639c..00000000
--- a/config/sysv2
+++ /dev/null
@@ -1,6 +0,0 @@
-System V.2 Systems, Amdahl UTS, ATT UnixPCs
-BLKSIZE_MISSING 1
-RANDOM_MISSING 1
-STRCASE_MISSING 1
-STRERROR_MISSING 1
-STRFTIME_MISSING 1
diff --git a/config/sysv3 b/config/sysv3
deleted file mode 100644
index 944db233..00000000
--- a/config/sysv3
+++ /dev/null
@@ -1,7 +0,0 @@
-System V.3 Systems (generic)
-BLKSIZE_MISSING 1
-RANDOM_MISSING 1
-STRCASE_MISSING 1
-STRERROR_MISSING 1
-STRFTIME_MISSING 1
-MAKE_ALLOCA_C
diff --git a/config/sysv4 b/config/sysv4
deleted file mode 100644
index 0ea9c4ff..00000000
--- a/config/sysv4
+++ /dev/null
@@ -1,5 +0,0 @@
-System V.4 Systems (generic)
-RANDOM_MISSING 1
-STRCASE_MISSING 1
-STDC_HEADERS 1
-MAKE_ALLOCA_C
diff --git a/config/ultrix31 b/config/ultrix31
deleted file mode 100644
index 4359bdca..00000000
--- a/config/ultrix31
+++ /dev/null
@@ -1,7 +0,0 @@
-DECstation or VAX running Ultrix 3.x
-HAVE_UNDERSCORE_SETJMP 1
-STRERROR_MISSING 1
-STRFTIME_MISSING 1
-NON_STD_SPRINTF 1
-TZSET_MISSING 1
-FMOD_MISSING 1
diff --git a/config/ultrix40 b/config/ultrix40
deleted file mode 100644
index 2c0b70db..00000000
--- a/config/ultrix40
+++ /dev/null
@@ -1,2 +0,0 @@
-DECstation running Ultrix 4.0 (4.x?)
-STDC_HEADERS 1
diff --git a/config/ultrix41 b/config/ultrix41
deleted file mode 100644
index 8e82ec40..00000000
--- a/config/ultrix41
+++ /dev/null
@@ -1,4 +0,0 @@
-DECstation running Ultrix 4.1 (and 4.2??)
-STDC_HEADERS 1
-#define Ultrix41 1
-MAKE_ALLOCA_C
diff --git a/config/utek b/config/utek
deleted file mode 100644
index 101daef7..00000000
--- a/config/utek
+++ /dev/null
@@ -1,18 +0,0 @@
-For a Tektronix 4300 running UTek 4.0 (BSD based.)
-SIGTYPE int
-MEMCMP_MISSING 1
-MEMCPY_MISSING 1
-MEMSET_MISSING 1
-STRCASE_MISSING 1
-STRERROR_MISSING 1
-STRFTIME_MISSING 1
-STRTOD_MISSING 1
-STRTOL_MISSING 1
-NON_STD_SPRINTF 1
-VPRINTF_MISSING 1
-BSDSTDIO 1
-TZNAME_MISSING 1
-TZSET_MISSING 1
-LIMITS_H_MISSING 1
-TM_ZONE_MISSING 1
-MAKE_CC
diff --git a/config/v10config.h b/config/v10config.h
deleted file mode 100644
index 5c6ddb15..00000000
--- a/config/v10config.h
+++ /dev/null
@@ -1,277 +0,0 @@
-/*
- * config.h -- configuration definitions for gawk.
- *
- * Vax Running 10th Edition Unix
- */
-
-/*
- * Copyright (C) 1991, the Free Software Foundation, Inc.
- *
- * This file is part of GAWK, the GNU implementation of the
- * AWK Progamming Language.
- *
- * GAWK is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 1, or (at your option)
- * any later version.
- *
- * GAWK is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GAWK; see the file COPYING. If not, write to
- * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-/*
- * This file isolates configuration dependencies for gnu awk.
- * You should know something about your system, perhaps by having
- * a manual handy, when you edit this file. You should copy config.h-dist
- * to config.h, and edit config.h. Do not modify config.h-dist, so that
- * it will be easy to apply any patches that may be distributed.
- *
- * The general idea is that systems conforming to the various standards
- * should need to do the least amount of changing. Definining the various
- * items in ths file usually means that your system is missing that
- * particular feature.
- *
- * The order of preference in standard conformance is ANSI C, POSIX,
- * and the SVID.
- *
- * If you have no clue as to what's going on with your system, try
- * compiling gawk without editing this file and see what shows up
- * missing in the link stage. From there, you can probably figure out
- * which defines to turn on.
- */
-
-/**************************/
-/* Miscellanious features */
-/**************************/
-
-/*
- * BLKSIZE_MISSING
- *
- * Check your /usr/include/sys/stat.h file. If the stat structure
- * does not have a member named st_blksize, define this. (This will
- * most likely be the case on most System V systems prior to V.4.)
- */
-#define BLKSIZE_MISSING 1
-
-/*
- * SIGTYPE
- *
- * The return type of the routines passed to the signal function.
- * Modern systems use `void', older systems use `int'.
- * If left undefined, it will default to void.
- */
-#define SIGTYPE SIG_TYP /* defined in <signal.h> */
-
-/*
- * SIZE_T_MISSING
- *
- * If your system has no typedef for size_t, define this to get a default
- */
-/* #define SIZE_T_MISSING 1 */
-
-/*
- * CHAR_UNSIGNED
- *
- * If your machine uses unsigned characters (IBM RT and RS/6000 and others)
- * then define this for use in regex.c
- */
-/* #define CHAR_UNSIGNED 1 */
-
-/*
- * HAVE_UNDERSCORE_SETJMP
- *
- * Check in your /usr/include/setjmp.h file. If there are routines
- * there named _setjmp and _longjmp, then you should define this.
- * Typically only systems derived from Berkeley Unix have this.
- */
-/* #define HAVE_UNDERSCORE_SETJMP 1 */
-
-/***********************************************/
-/* Missing library subroutines or system calls */
-/***********************************************/
-
-/*
- * GETOPT_MISSING
- *
- * Define this if your library does not have the getopt(3) library
- * routine for parsing command line arguments.
- */
-/* #define GETOPT_MISSING 1 */
-
-/*
- * MEMCMP_MISSING
- * MEMCPY_MISSING
- * MEMSET_MISSING
- *
- * These three routines are for manipulating blocks of memory. Most
- * likely they will either all three be present or all three be missing,
- * so they're grouped together.
- */
-/* #define MEMCMP_MISSING 1 */
-/* #define MEMCPY_MISSING 1 */
-/* #define MEMSET_MISSING 1 */
-
-/*
- * RANDOM_MISSING
- *
- * Your system does not have the random(3) suite of random number
- * generating routines. These are different than the old rand(3)
- * routines!
- */
-#define RANDOM_MISSING 1
-
-/*
- * STRCASE_MISSING
- *
- * Your system does not have the strcasemp() and strncasecmp()
- * routines that originated in Berkeley Unix.
- */
-#define STRCASE_MISSING 1
-
-/*
- * STRCHR_MISSING
- *
- * Your system does not have the strchr() and strrchr() functions.
- */
-/* #define STRCHR_MISSING 1 */
-
-/*
- * STRERROR_MISSING
- *
- * Your system lacks the ANSI C strerror() routine for returning the
- * strings associated with errno values.
- */
-#define STRERROR_MISSING 1
-
-/*
- * STRFTIME_MISSING
- *
- * Your system lacks the ANSI C strftime() routine for formatting
- * broken down time values.
- */
-#define STRFTIME_MISSING 1
-
-/*
- * STRTOD_MISSING
- *
- * Your system does not have the strtod() routine for converting
- * strings to double precision floating point values.
- */
-/* #define STRTOD_MISSING 1 */
-
-/*
- * STRTOL_MISSING
- *
- * Your system does not have the strtol() routine for converting
- * strings to long integers.
- */
-/* #define STRTOL_MISSING 1 */
-
-/*
- * TZSET_MISSING
- *
- * If you have a 4.2 BSD vintage system, then the strftime() routine
- * supplied in the missing directory won't be enough, because it relies on the
- * tzset() routine from System V / Posix. Fortunately, there is an
- * emulation for tzset() too that should do the trick. If you don't
- * have tzset(), define this.
- */
-#define TZSET_MISSING 1
-
-/*
- * STDC_HEADERS
- *
- * If your system does have ANSI compliant header files that
- * provide prototypes for library routines, then define this.
- */
-/* #define STDC_HEADERS 1 */
-
-/*
- * NO_TOKEN_PASTING
- *
- * If your compiler define's __STDC__ but does not support token
- * pasting (tok##tok), then define this.
- */
-/* #define NO_TOKEN_PASTING 1 */
-
-/*****************************************************************/
-/* Stuff related to the Standard I/O Library. */
-/*****************************************************************/
-/* Much of this is (still, unfortunately) black magic in nature. */
-/* You may have to use some or all of these together to get gawk */
-/* to work correctly. */
-/*****************************************************************/
-
-/*
- * NON_STD_SPRINTF
- *
- * Look in your /usr/include/stdio.h file. If the return type of the
- * sprintf() function is NOT `int', define this.
- */
-/* #define NON_STD_SPRINTF 1 */
-
-/*
- * VPRINTF_MISSING
- *
- * Define this if your system lacks vprintf() and the other routines
- * that go with it.
- */
-/* #define VPRINTF_MISSING 1 */
-
-/*
- * BSDSTDIO
- *
- * Define this if your standard i/o library is internally compatible
- * with the one shipped with Berkeley Unix systems (4.n, n <= 3-reno).
- * If you've defined VPRINTF_MISSING, you probably will need this too.
- */
-/* #define BSDSTDIO 1 */
-
-/*
- * DOPRNT_MISSING
- *
- * Define this if your standard i/o library does not have the _doprnt()
- * routine. This is used in an attempt to simulate the vfprintf()
- * routine.
- */
-/* #define DOPRNT_MISSING 1 */
-
-/*
- * Casts from size_t to int and back. These will become unnecessary
- * at some point in the future, but for now are required where the
- * two types are a different representation.
- */
-/* #define SZTC */
-/* #define INTC */
-
-/*
- * SYSTEM_MISSING
- *
- * Define this if your library does not provide a system function
- * or you are not entirely happy with it and would rather use
- * a provided replacement (atari only).
- */
-/* #define SYSTEM_MISSING 1 */
-
-
-/*******************************/
-/* Gawk configuration options. */
-/*******************************/
-
-/*
- * DEFPATH
- *
- * The default search path for the -f option of gawk. It is used
- * if the AWKPATH environment variable is undefined. The default
- * definition is provided here. Most likely you should not change
- * this.
- */
-
-/* #define DEFPATH ".:/usr/lib/awk:/usr/local/lib/awk" */
-/* #define ENVSEP ':' */
diff --git a/config/vms-conf.h b/config/vms-conf.h
deleted file mode 100644
index 944dc4af..00000000
--- a/config/vms-conf.h
+++ /dev/null
@@ -1,323 +0,0 @@
-/*
- * config.h -- configuration definitions for gawk.
- *
- * For VMS (assumes V4.6 or later; tested on V5.3 and V5.4)
- */
-
-/*
- * Copyright (C) 1991, 1992 the Free Software Foundation, Inc.
- *
- * This file is part of GAWK, the GNU implementation of the
- * AWK Progamming Language.
- *
- * GAWK is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * GAWK is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GAWK; see the file COPYING. If not, write to
- * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-/**************************/
-/* Miscellanious features */
-/**************************/
-
-/*
- * BLKSIZE_MISSING
- * VMS: missing--not applicable
- * Check your /usr/include/sys/stat.h file. If the stat structure
- * does not have a member named st_blksize, define this. (This will
- * most likely be the case on most System V systems prior to V.4.)
- */
-#define BLKSIZE_MISSING 1
-
-/*
- * SIGTYPE
- * VMS: either should work; void is 'correct'
- * The return type of the routines passed to the signal function.
- * Modern systems use `void', older systems use `int'.
- * If left undefined, it will default to void.
- */
-#define SIGTYPE void
-
-/*
- * SIZE_T_MISSING
- * VMS: <stddef.h> via <stdlib.h> (VAX C V2.3 & up); <sys/types.h> (GNU C)
- * If your system has no typedef for size_t, define this to get a default
- */
-/* #define SIZE_T_MISSING 1 */
-
-/*
- * CHAR_UNSIGNED
- * VMS: well behaved, either signed or unsigned (signed by default)
- * If your machine uses unsigned characters (IBM RT and RS/6000 and others)
- * then define this for use in regex.c
- */
-/* #define CHAR_UNSIGNED 1 */
-
-/*
- * HAVE_UNDERSCORE_SETJMP
- * VMS: not present
- * Check in your /usr/include/setjmp.h file. If there are routines
- * there named _setjmp and _longjmp, then you should define this.
- * Typically only systems derived from Berkeley Unix have this.
- */
-/* #define HAVE_UNDERSCORE_SETJMP 1 */
-
-/***********************************************/
-/* Missing library subroutines or system calls */
-/***********************************************/
-
-/*
- * MEMCMP_MISSING
- * MEMCPY_MISSING
- * MEMSET_MISSING
- * VMS: <string.h> (introduced V4.6)
- * These three routines are for manipulating blocks of memory. Most
- * likely they will either all three be present or all three be missing,
- * so they're grouped together.
- */
-/* #define MEMCMP_MISSING 1 */
-/* #define MEMCPY_MISSING 1 */
-/* #define MEMSET_MISSING 1 */
-
-/*
- * RANDOM_MISSING
- * VMS: missing (as of V5.4)
- * Your system does not have the random(3) suite of random number
- * generating routines. These are different than the old rand(3)
- * routines!
- */
-#define RANDOM_MISSING 1
-
-/*
- * STRCASE_MISSING
- * VMS: missing
- * Your system does not have the strcasemp() and strncasecmp()
- * routines that originated in Berkeley Unix.
- */
-#define STRCASE_MISSING 1
-
-/*
- * STRCHR_MISSING
- * VMS: <string.h>
- * Your system does not have the strchr() and strrchr() functions.
- */
-/* #define STRCHR_MISSING 1 */
-
-/*
- * STRERROR_MISSING
- * VMS: <stdlib.h> (introduced V4.6)
- * Your system lacks the ANSI C strerror() routine for returning the
- * strings associated with errno values.
- */
-/* #define STRERROR_MISSING 1 */
-
-/*
- * STRTOD_MISSING
- * VMS: <stdlib.h> (introduced V4.6)
- * Your system does not have the strtod() routine for converting
- * strings to double precision floating point values.
- */
-/* #define STRTOD_MISSING 1 */
-
-/*
- * STRFTIME_MISSING
- * VMS: missing (as of V5.4) [see below; do not change STRFTIME_MISSING]
- * Your system lacks the ANSI C strftime() routine for formatting
- * broken down time values.
- */
-#define STRFTIME_MISSING 1
-
-/*
- * TZSET_MISSING
- * VMS: missing, but can't use missing/tzset.c [no timezone support]
- * If you have a 4.2 BSD vintage system, then the strftime() routine
- * supplied in the missing directory won't be enough, because it relies on the
- * tzset() routine from System V / Posix. Fortunately, there is an
- * emulation for tzset() too that should do the trick. If you don't
- * have tzset(), define this.
- */
-/* #define TZSET_MISSING 1 */
-
-/*
- * TZNAME_MISSING
- *
- * Some systems do not support the external variables tzname and daylight.
- * If this is the case *and* strftime() is missing, define this.
- */
-/* #define TZNAME_MISSING 1 */
-
-/*
- * STDC_HEADERS
- * VMS: close enough (as of V4.6, VAX C V2.3) [GCC, see below]
- * If your system does have ANSI compliant header files that
- * provide prototypes for library routines, then define this.
- */
-#define STDC_HEADERS 1
-
-/*
- * NO_TOKEN_PASTING
- * VMS: compiler specific--see below
- * If your compiler define's __STDC__ but does not support token
- * pasting (tok##tok), then define this.
- */
-/* #define NO_TOKEN_PASTING 1 */
-
-/*****************************************************************/
-/* Stuff related to the Standard I/O Library. */
-/*****************************************************************/
-/* Much of this is (still, unfortunately) black magic in nature. */
-/* You may have to use some or all of these together to get gawk */
-/* to work correctly. */
-/*****************************************************************/
-
-/*
- * NON_STD_SPRINTF
- * VMS: ok
- * Look in your /usr/include/stdio.h file. If the return type of the
- * sprintf() function is NOT `int', define this.
- */
-/* #define NON_STD_SPRINTF 1 */
-
-/*
- * VPRINTF_MISSING
- * VMS: ok (introduced V4.6)
- * Define this if your system lacks vprintf() and the other routines
- * that go with it. This will trigger an attempt to use _doprnt().
- * If you don't have that, this attempt will fail and you are on your own.
- */
-/* #define VPRINTF_MISSING 1 */
-
-/*
- * Casts from size_t to int and back. These will become unnecessary
- * at some point in the future, but for now are required where the
- * two types are a different representation.
- */
-/* #define SZTC */
-/* #define INTC */
-
-/*
- * SYSTEM_MISSING
- * VMS: ok (introduced V4.6)
- * Define this if your library does not provide a system function
- * or you are not entirely happy with it and would rather use
- * a provided replacement (atari only).
- */
-/* #define SYSTEM_MISSING 1 */
-
-/*
- * FMOD_MISSING
- * VMS: ok (introduced V4.6)
- * Define this if your system lacks the fmod() function and modf() will
- * be used instead.
- */
-/* #define FMOD_MISSING 1 */
-
-
-/*******************************/
-/* Gawk configuration options. */
-/*******************************/
-
-/*
- * DEFPATH
- * VMS: "/AWK_LIBRARY" => "AWK_LIBRARY:"
- * The default search path for the -f option of gawk. It is used
- * if the AWKPATH environment variable is undefined.
- *
- * Note: OK even if no AWK_LIBRARY logical name has been defined.
- */
-
-#define DEFPATH ".,/AWK_LIBRARY"
-#define ENVSEP ','
-
-/*
- * alloca already has a prototype defined - don't redefine it
- */
-/* #define ALLOCA_PROTO 1 */
-
-/*
- * srandom already has a prototype defined - don't redefine it
- */
-/* #define SRANDOM_PROTO 1 */
-
-/*
- * Extended source file access.
- */
-#define DEFAULT_FILETYPE ".awk"
-
-/*
- * Pipe handling.
- */
-#define PIPES_SIMULATED 1
-
-/*
- * %g format in VAXCRTL is broken (chooses %e format when should use %f).
- */
-#define GFMT_WORKAROUND 1
-
-/*
- * VAX C
- *
- * As of V3.2, VAX C is not yet ANSI-compliant. But it's close enough
- * for GAWK's purposes. Comment this out for VAX C V2.4 and earlier.
- * Value of 0 should mean "not ANSI-C", but GAWK uses def/not-def tests.
- * YYDEBUG definition is needed for combination of VAX C V2.x and Bison.
- */
-#if defined(VAXC) && !defined(__STDC__)
-#define __STDC__ 0
-#define NO_TOKEN_PASTING
-#ifndef __DECC /* DEC C does not support #pragma builtins even in VAXC mode */
-#define VAXC_BUILTINS
-#endif
-/* #define YYDEBUG 0 */
-#endif
-
-/*
- * DEC C
- *
- * Digital's ANSI complier.
- */
-#ifdef __DECC
- /* DEC C implies DECC$SHR, which doesn't have the %g problem of VAXCRTL */
-#undef GFMT_WORKAROUND
-#endif
-
-/*
- * GNU C
- *
- * Versions of GCC (actually GAS) earlier than 1.38 don't produce the
- * right code for ``extern const'' constructs, and other usages of
- * const might not be right either. The old set of include files from
- * the gcc-vms distribution did not contain prototypes, and this could
- * provoke some const-related compiler warnings. If you've got an old
- * version of gcc for VMS, define 'const' out of existance, and by all
- * means obtain the most recent version!
- *
- * Note: old versions of GCC should also avoid defining STDC_HEADERS,
- * because most of the ANSI-C required header files are missing.
- */
-#ifdef __GNUC__
-/* #define const */
-/* #undef STDC_HEADERS */
-#ifndef STDC_HEADERS
-#define alloca __builtin_alloca
-#define environ $$PsectAttributes_NOSHR$$environ /* awful GAS kludge */
-#endif
-#endif
-
-#ifdef STRFTIME_MISSING
-/*
- * Always use the version of strftime() in missing/strftime.c instead of
- * the [as yet undocumented/unsupported] one in VAXCRTL. Renaming it here
- * guarantees that it won't clash with the library routine.
- */
-#define strftime gnu_strftime
-#endif
diff --git a/config/vms-posix b/config/vms-posix
deleted file mode 100644
index f1e0f373..00000000
--- a/config/vms-posix
+++ /dev/null
@@ -1,16 +0,0 @@
-VMS POSIX (not to be confused with native VMS...)
-STDC_HEADERS 1
-RANDOM_MISSING 1
-STRCASE_MISSING 1
-NO_TOKEN_PASTING 1
-MAKE_ALLOCA_C
-MAKE_VMS-Posix
-MAKE_CC
-#define DEFAULT_FILETYPE ".awk"
-#define getopt gnu_getopt
-#define optopt gnu_optopt
-#define opterr gnu_opterr
-#define regcomp gnu_regcomp
-#define regexec gnu_regexec
-#define regfree gnu_regfree
-#define regerror gnu_regerror
diff --git a/configh.in b/configh.in
new file mode 100644
index 00000000..02b92087
--- /dev/null
+++ b/configh.in
@@ -0,0 +1,189 @@
+/* configh.in. Generated automatically from configure.in by autoheader. */
+/*
+ * acconfig.h -- configuration definitions for gawk.
+ */
+
+/*
+ * Copyright (C) 1995 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+/* Define if on AIX 3.
+ System headers sometimes define this.
+ We just want to avoid a redefinition error message. */
+#ifndef _ALL_SOURCE
+#undef _ALL_SOURCE
+#endif
+
+/* Define if using alloca.c. */
+#undef C_ALLOCA
+
+/* Define if type char is unsigned and you are not using gcc. */
+#ifndef __CHAR_UNSIGNED__
+#undef __CHAR_UNSIGNED__
+#endif
+
+/* Define to empty if the keyword does not work. */
+#undef const
+
+/* Define to one of _getb67, GETB67, getb67 for Cray-2 and Cray-YMP systems.
+ This function is required for alloca.c support on those systems. */
+#undef CRAY_STACKSEG_END
+
+/* Define to the type of elements in the array set by `getgroups'.
+ Usually this is either `int' or `gid_t'. */
+#undef GETGROUPS_T
+
+/* Define if the `getpgrp' function takes no argument. */
+#undef GETPGRP_VOID
+
+/* Define to `int' if <sys/types.h> doesn't define. */
+#undef gid_t
+
+/* Define if you have alloca, as a function or macro. */
+#undef HAVE_ALLOCA
+
+/* Define if you have <alloca.h> and it should be used (not on Ultrix). */
+#undef HAVE_ALLOCA_H
+
+/* Define if you don't have vprintf but do have _doprnt. */
+#undef HAVE_DOPRNT
+
+/* Define if your struct stat has st_blksize. */
+#undef HAVE_ST_BLKSIZE
+
+/* Define if you have <sys/wait.h> that is POSIX.1 compatible. */
+#undef HAVE_SYS_WAIT_H
+
+/* Define if your struct tm has tm_zone. */
+#undef HAVE_TM_ZONE
+
+/* Define if you don't have tm_zone but do have the external array
+ tzname. */
+#undef HAVE_TZNAME
+
+/* Define if you have the vprintf function. */
+#undef HAVE_VPRINTF
+
+/* Define if on MINIX. */
+#undef _MINIX
+
+/* Define to `int' if <sys/types.h> doesn't define. */
+#undef pid_t
+
+/* Define if the system does not provide POSIX.1 features except
+ with this defined. */
+#undef _POSIX_1_SOURCE
+
+/* Define if you need to in order for stat and other things to work. */
+#undef _POSIX_SOURCE
+
+/* Define as the return type of signal handlers (int or void). */
+#undef RETSIGTYPE
+
+/* Define to `unsigned' if <sys/types.h> doesn't define. */
+#undef size_t
+
+/* If using the C implementation of alloca, define if you know the
+ direction of stack growth for your system; otherwise it will be
+ automatically deduced at run-time.
+ STACK_DIRECTION > 0 => grows toward higher addresses
+ STACK_DIRECTION < 0 => grows toward lower addresses
+ STACK_DIRECTION = 0 => direction of growth unknown
+ */
+#undef STACK_DIRECTION
+
+/* Define if you have the ANSI C header files. */
+#undef STDC_HEADERS
+
+/* Define if you can safely include both <sys/time.h> and <time.h>. */
+#undef TIME_WITH_SYS_TIME
+
+/* Define if your <sys/time.h> declares struct tm. */
+#undef TM_IN_SYS_TIME
+
+/* Define to `int' if <sys/types.h> doesn't define. */
+#undef uid_t
+
+#undef HAVE_STRINGIZE /* can use ANSI # operator in cpp */
+#undef REGEX_MALLOC /* use malloc instead of alloca in regex.c */
+#undef SPRINTF_RET /* return type of sprintf */
+
+/* Define if you have the fmod function. */
+#undef HAVE_FMOD
+
+/* Define if you have the memcmp function. */
+#undef HAVE_MEMCMP
+
+/* Define if you have the memcpy function. */
+#undef HAVE_MEMCPY
+
+/* Define if you have the memset function. */
+#undef HAVE_MEMSET
+
+/* Define if you have the random function. */
+#undef HAVE_RANDOM
+
+/* Define if you have the strchr function. */
+#undef HAVE_STRCHR
+
+/* Define if you have the strerror function. */
+#undef HAVE_STRERROR
+
+/* Define if you have the strftime function. */
+#undef HAVE_STRFTIME
+
+/* Define if you have the strncasecmp function. */
+#undef HAVE_STRNCASECMP
+
+/* Define if you have the strtod function. */
+#undef HAVE_STRTOD
+
+/* Define if you have the system function. */
+#undef HAVE_SYSTEM
+
+/* Define if you have the tzset function. */
+#undef HAVE_TZSET
+
+/* Define if you have the <limits.h> header file. */
+#undef HAVE_LIMITS_H
+
+/* Define if you have the <memory.h> header file. */
+#undef HAVE_MEMORY_H
+
+/* Define if you have the <signum.h> header file. */
+#undef HAVE_SIGNUM_H
+
+/* Define if you have the <stdarg.h> header file. */
+#undef HAVE_STDARG_H
+
+/* Define if you have the <string.h> header file. */
+#undef HAVE_STRING_H
+
+/* Define if you have the <strings.h> header file. */
+#undef HAVE_STRINGS_H
+
+/* Define if you have the <sys/param.h> header file. */
+#undef HAVE_SYS_PARAM_H
+
+/* Define if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
+
+#include <custom.h> /* overrides for stuff autoconf can't deal with */
diff --git a/configure b/configure
index 9594844e..54eab6db 100755
--- a/configure
+++ b/configure
@@ -1,36 +1,2471 @@
#! /bin/sh
+
+# Guess values for system-dependent variables and create Makefiles.
+# Generated automatically using autoconf version 2.7
+# Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
#
-# configure -- produce a config.h from a known configuration
+# This configure script is free software; the Free Software Foundation
+# gives unlimited permission to copy, distribute and modify it.
+
+# Defaults:
+ac_help=
+ac_default_prefix=/usr/local
+# Any additions from configure.in:
+
+# Initialize some variables set by options.
+# The variables have the same names as the options, with
+# dashes changed to underlines.
+build=NONE
+cache_file=./config.cache
+exec_prefix=NONE
+host=NONE
+no_create=
+nonopt=NONE
+no_recursion=
+prefix=NONE
+program_prefix=NONE
+program_suffix=NONE
+program_transform_name=s,x,x,
+silent=
+site=
+srcdir=
+target=NONE
+verbose=
+x_includes=NONE
+x_libraries=NONE
+bindir='${exec_prefix}/bin'
+sbindir='${exec_prefix}/sbin'
+libexecdir='${exec_prefix}/libexec'
+datadir='${prefix}/share'
+sysconfdir='${prefix}/etc'
+sharedstatedir='${prefix}/com'
+localstatedir='${prefix}/var'
+libdir='${exec_prefix}/lib'
+includedir='${prefix}/include'
+oldincludedir='/usr/include'
+infodir='${prefix}/info'
+mandir='${prefix}/man'
+
+# Initialize some other variables.
+subdirs=
+MFLAGS= MAKEFLAGS=
+
+ac_prev=
+for ac_option
+do
+
+ # If the previous option needs an argument, assign it.
+ if test -n "$ac_prev"; then
+ eval "$ac_prev=\$ac_option"
+ ac_prev=
+ continue
+ fi
+
+ case "$ac_option" in
+ -*=*) ac_optarg=`echo "$ac_option" | sed 's/[-_a-zA-Z0-9]*=//'` ;;
+ *) ac_optarg= ;;
+ esac
+
+ # Accept the important Cygnus configure options, so we can diagnose typos.
+
+ case "$ac_option" in
+
+ -bindir | --bindir | --bindi | --bind | --bin | --bi)
+ ac_prev=bindir ;;
+ -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*)
+ bindir="$ac_optarg" ;;
+
+ -build | --build | --buil | --bui | --bu)
+ ac_prev=build ;;
+ -build=* | --build=* | --buil=* | --bui=* | --bu=*)
+ build="$ac_optarg" ;;
+
+ -cache-file | --cache-file | --cache-fil | --cache-fi \
+ | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c)
+ ac_prev=cache_file ;;
+ -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \
+ | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*)
+ cache_file="$ac_optarg" ;;
+
+ -datadir | --datadir | --datadi | --datad | --data | --dat | --da)
+ ac_prev=datadir ;;
+ -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \
+ | --da=*)
+ datadir="$ac_optarg" ;;
+
+ -disable-* | --disable-*)
+ ac_feature=`echo $ac_option|sed -e 's/-*disable-//'`
+ # Reject names that are not valid shell variable names.
+ if test -n "`echo $ac_feature| sed 's/[-a-zA-Z0-9_]//g'`"; then
+ { echo "configure: error: $ac_feature: invalid feature name" 1>&2; exit 1; }
+ fi
+ ac_feature=`echo $ac_feature| sed 's/-/_/g'`
+ eval "enable_${ac_feature}=no" ;;
+
+ -enable-* | --enable-*)
+ ac_feature=`echo $ac_option|sed -e 's/-*enable-//' -e 's/=.*//'`
+ # Reject names that are not valid shell variable names.
+ if test -n "`echo $ac_feature| sed 's/[-_a-zA-Z0-9]//g'`"; then
+ { echo "configure: error: $ac_feature: invalid feature name" 1>&2; exit 1; }
+ fi
+ ac_feature=`echo $ac_feature| sed 's/-/_/g'`
+ case "$ac_option" in
+ *=*) ;;
+ *) ac_optarg=yes ;;
+ esac
+ eval "enable_${ac_feature}='$ac_optarg'" ;;
+
+ -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \
+ | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \
+ | --exec | --exe | --ex)
+ ac_prev=exec_prefix ;;
+ -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \
+ | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \
+ | --exec=* | --exe=* | --ex=*)
+ exec_prefix="$ac_optarg" ;;
+
+ -gas | --gas | --ga | --g)
+ # Obsolete; use --with-gas.
+ with_gas=yes ;;
+
+ -help | --help | --hel | --he)
+ # Omit some internal or obsolete options to make the list less imposing.
+ # This message is too long to be a string in the A/UX 3.1 sh.
+ cat << EOF
+Usage: configure [options] [host]
+Options: [defaults in brackets after descriptions]
+Configuration:
+ --cache-file=FILE cache test results in FILE
+ --help print this message
+ --no-create do not create output files
+ --quiet, --silent do not print \`checking...' messages
+ --version print the version of autoconf that created configure
+Directory and file names:
+ --prefix=PREFIX install architecture-independent files in PREFIX
+ [$ac_default_prefix]
+ --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX
+ [same as prefix]
+ --bindir=DIR user executables in DIR [EPREFIX/bin]
+ --sbindir=DIR system admin executables in DIR [EPREFIX/sbin]
+ --libexecdir=DIR program executables in DIR [EPREFIX/libexec]
+ --datadir=DIR read-only architecture-independent data in DIR
+ [PREFIX/share]
+ --sysconfdir=DIR read-only single-machine data in DIR [PREFIX/etc]
+ --sharedstatedir=DIR modifiable architecture-independent data in DIR
+ [PREFIX/com]
+ --localstatedir=DIR modifiable single-machine data in DIR [PREFIX/var]
+ --libdir=DIR object code libraries in DIR [EPREFIX/lib]
+ --includedir=DIR C header files in DIR [PREFIX/include]
+ --oldincludedir=DIR C header files for non-gcc in DIR [/usr/include]
+ --infodir=DIR info documentation in DIR [PREFIX/info]
+ --mandir=DIR man documentation in DIR [PREFIX/man]
+ --srcdir=DIR find the sources in DIR [configure dir or ..]
+ --program-prefix=PREFIX prepend PREFIX to installed program names
+ --program-suffix=SUFFIX append SUFFIX to installed program names
+ --program-transform-name=PROGRAM
+ run sed PROGRAM on installed program names
+EOF
+ cat << EOF
+Host type:
+ --build=BUILD configure for building on BUILD [BUILD=HOST]
+ --host=HOST configure for HOST [guessed]
+ --target=TARGET configure for TARGET [TARGET=HOST]
+Features and packages:
+ --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no)
+ --enable-FEATURE[=ARG] include FEATURE [ARG=yes]
+ --with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
+ --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no)
+ --x-includes=DIR X include files are in DIR
+ --x-libraries=DIR X library files are in DIR
+EOF
+ if test -n "$ac_help"; then
+ echo "--enable and --with options recognized:$ac_help"
+ fi
+ exit 0 ;;
+
+ -host | --host | --hos | --ho)
+ ac_prev=host ;;
+ -host=* | --host=* | --hos=* | --ho=*)
+ host="$ac_optarg" ;;
+
+ -includedir | --includedir | --includedi | --included | --include \
+ | --includ | --inclu | --incl | --inc)
+ ac_prev=includedir ;;
+ -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \
+ | --includ=* | --inclu=* | --incl=* | --inc=*)
+ includedir="$ac_optarg" ;;
+
+ -infodir | --infodir | --infodi | --infod | --info | --inf)
+ ac_prev=infodir ;;
+ -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*)
+ infodir="$ac_optarg" ;;
+
+ -libdir | --libdir | --libdi | --libd)
+ ac_prev=libdir ;;
+ -libdir=* | --libdir=* | --libdi=* | --libd=*)
+ libdir="$ac_optarg" ;;
+
+ -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \
+ | --libexe | --libex | --libe)
+ ac_prev=libexecdir ;;
+ -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \
+ | --libexe=* | --libex=* | --libe=*)
+ libexecdir="$ac_optarg" ;;
+
+ -localstatedir | --localstatedir | --localstatedi | --localstated \
+ | --localstate | --localstat | --localsta | --localst \
+ | --locals | --local | --loca | --loc | --lo)
+ ac_prev=localstatedir ;;
+ -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \
+ | --localstate=* | --localstat=* | --localsta=* | --localst=* \
+ | --locals=* | --local=* | --loca=* | --loc=* | --lo=*)
+ localstatedir="$ac_optarg" ;;
+
+ -mandir | --mandir | --mandi | --mand | --man | --ma | --m)
+ ac_prev=mandir ;;
+ -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*)
+ mandir="$ac_optarg" ;;
+
+ -nfp | --nfp | --nf)
+ # Obsolete; use --without-fp.
+ with_fp=no ;;
+
+ -no-create | --no-create | --no-creat | --no-crea | --no-cre \
+ | --no-cr | --no-c)
+ no_create=yes ;;
+
+ -no-recursion | --no-recursion | --no-recursio | --no-recursi \
+ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r)
+ no_recursion=yes ;;
+
+ -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \
+ | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \
+ | --oldin | --oldi | --old | --ol | --o)
+ ac_prev=oldincludedir ;;
+ -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \
+ | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \
+ | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*)
+ oldincludedir="$ac_optarg" ;;
+
+ -prefix | --prefix | --prefi | --pref | --pre | --pr | --p)
+ ac_prev=prefix ;;
+ -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*)
+ prefix="$ac_optarg" ;;
+
+ -program-prefix | --program-prefix | --program-prefi | --program-pref \
+ | --program-pre | --program-pr | --program-p)
+ ac_prev=program_prefix ;;
+ -program-prefix=* | --program-prefix=* | --program-prefi=* \
+ | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*)
+ program_prefix="$ac_optarg" ;;
+
+ -program-suffix | --program-suffix | --program-suffi | --program-suff \
+ | --program-suf | --program-su | --program-s)
+ ac_prev=program_suffix ;;
+ -program-suffix=* | --program-suffix=* | --program-suffi=* \
+ | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*)
+ program_suffix="$ac_optarg" ;;
+
+ -program-transform-name | --program-transform-name \
+ | --program-transform-nam | --program-transform-na \
+ | --program-transform-n | --program-transform- \
+ | --program-transform | --program-transfor \
+ | --program-transfo | --program-transf \
+ | --program-trans | --program-tran \
+ | --progr-tra | --program-tr | --program-t)
+ ac_prev=program_transform_name ;;
+ -program-transform-name=* | --program-transform-name=* \
+ | --program-transform-nam=* | --program-transform-na=* \
+ | --program-transform-n=* | --program-transform-=* \
+ | --program-transform=* | --program-transfor=* \
+ | --program-transfo=* | --program-transf=* \
+ | --program-trans=* | --program-tran=* \
+ | --progr-tra=* | --program-tr=* | --program-t=*)
+ program_transform_name="$ac_optarg" ;;
+
+ -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+ | -silent | --silent | --silen | --sile | --sil)
+ silent=yes ;;
+
+ -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
+ ac_prev=sbindir ;;
+ -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
+ | --sbi=* | --sb=*)
+ sbindir="$ac_optarg" ;;
+
+ -sharedstatedir | --sharedstatedir | --sharedstatedi \
+ | --sharedstated | --sharedstate | --sharedstat | --sharedsta \
+ | --sharedst | --shareds | --shared | --share | --shar \
+ | --sha | --sh)
+ ac_prev=sharedstatedir ;;
+ -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \
+ | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \
+ | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \
+ | --sha=* | --sh=*)
+ sharedstatedir="$ac_optarg" ;;
+
+ -site | --site | --sit)
+ ac_prev=site ;;
+ -site=* | --site=* | --sit=*)
+ site="$ac_optarg" ;;
+
+ -srcdir | --srcdir | --srcdi | --srcd | --src | --sr)
+ ac_prev=srcdir ;;
+ -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*)
+ srcdir="$ac_optarg" ;;
+
+ -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \
+ | --syscon | --sysco | --sysc | --sys | --sy)
+ ac_prev=sysconfdir ;;
+ -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \
+ | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*)
+ sysconfdir="$ac_optarg" ;;
+
+ -target | --target | --targe | --targ | --tar | --ta | --t)
+ ac_prev=target ;;
+ -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*)
+ target="$ac_optarg" ;;
+
+ -v | -verbose | --verbose | --verbos | --verbo | --verb)
+ verbose=yes ;;
+
+ -version | --version | --versio | --versi | --vers)
+ echo "configure generated by autoconf version 2.7"
+ exit 0 ;;
+
+ -with-* | --with-*)
+ ac_package=`echo $ac_option|sed -e 's/-*with-//' -e 's/=.*//'`
+ # Reject names that are not valid shell variable names.
+ if test -n "`echo $ac_package| sed 's/[-_a-zA-Z0-9]//g'`"; then
+ { echo "configure: error: $ac_package: invalid package name" 1>&2; exit 1; }
+ fi
+ ac_package=`echo $ac_package| sed 's/-/_/g'`
+ case "$ac_option" in
+ *=*) ;;
+ *) ac_optarg=yes ;;
+ esac
+ eval "with_${ac_package}='$ac_optarg'" ;;
+
+ -without-* | --without-*)
+ ac_package=`echo $ac_option|sed -e 's/-*without-//'`
+ # Reject names that are not valid shell variable names.
+ if test -n "`echo $ac_package| sed 's/[-a-zA-Z0-9_]//g'`"; then
+ { echo "configure: error: $ac_package: invalid package name" 1>&2; exit 1; }
+ fi
+ ac_package=`echo $ac_package| sed 's/-/_/g'`
+ eval "with_${ac_package}=no" ;;
+
+ --x)
+ # Obsolete; use --with-x.
+ with_x=yes ;;
+
+ -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \
+ | --x-incl | --x-inc | --x-in | --x-i)
+ ac_prev=x_includes ;;
+ -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \
+ | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*)
+ x_includes="$ac_optarg" ;;
+
+ -x-libraries | --x-libraries | --x-librarie | --x-librari \
+ | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l)
+ ac_prev=x_libraries ;;
+ -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \
+ | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
+ x_libraries="$ac_optarg" ;;
+
+ -*) { echo "configure: error: $ac_option: invalid option; use --help to show usage" 1>&2; exit 1; }
+ ;;
+
+ *)
+ if test -n "`echo $ac_option| sed 's/[-a-z0-9.]//g'`"; then
+ echo "configure: warning: $ac_option: invalid host type" 1>&2
+ fi
+ if test "x$nonopt" != xNONE; then
+ { echo "configure: error: can only configure for one host and one target at a time" 1>&2; exit 1; }
+ fi
+ nonopt="$ac_option"
+ ;;
+
+ esac
+done
+
+if test -n "$ac_prev"; then
+ { echo "configure: error: missing argument to --`echo $ac_prev | sed 's/_/-/g'`" 1>&2; exit 1; }
+fi
+
+trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15
+
+# File descriptor usage:
+# 0 standard input
+# 1 file creation
+# 2 errors and warnings
+# 3 some systems may open it to /dev/tty
+# 4 used on the Kubota Titan
+# 6 checking for... messages and results
+# 5 compiler messages saved in config.log
+if test "$silent" = yes; then
+ exec 6>/dev/null
+else
+ exec 6>&1
+fi
+exec 5>./config.log
+
+echo "\
+This file contains any messages produced by compilers while
+running configure, to aid debugging if configure makes a mistake.
+" 1>&5
+
+# Strip out --no-create and --no-recursion so they do not pile up.
+# Also quote any args containing shell metacharacters.
+ac_configure_args=
+for ac_arg
+do
+ case "$ac_arg" in
+ -no-create | --no-create | --no-creat | --no-crea | --no-cre \
+ | --no-cr | --no-c) ;;
+ -no-recursion | --no-recursion | --no-recursio | --no-recursi \
+ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) ;;
+ *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?]*)
+ ac_configure_args="$ac_configure_args '$ac_arg'" ;;
+ *) ac_configure_args="$ac_configure_args $ac_arg" ;;
+ esac
+done
+
+# NLS nuisances.
+# Only set LANG and LC_ALL to C if already set.
+# These must not be set unconditionally because not all systems understand
+# e.g. LANG=C (notably SCO).
+if test "${LC_ALL+set}" = set; then LC_ALL=C; export LC_ALL; fi
+if test "${LANG+set}" = set; then LANG=C; export LANG; fi
+
+# confdefs.h avoids OS command line length limits that DEFS can exceed.
+rm -rf conftest* confdefs.h
+# AIX cpp loses on an empty file, so make sure it contains at least a newline.
+echo > confdefs.h
+
+# A filename unique to this package, relative to the directory that
+# configure is in, which we can look for to find out if srcdir is correct.
+ac_unique_file=awk.h
+
+# Find the source files, if location was not specified.
+if test -z "$srcdir"; then
+ ac_srcdir_defaulted=yes
+ # Try the directory containing this script, then its parent.
+ ac_prog=$0
+ ac_confdir=`echo $ac_prog|sed 's%/[^/][^/]*$%%'`
+ test "x$ac_confdir" = "x$ac_prog" && ac_confdir=.
+ srcdir=$ac_confdir
+ if test ! -r $srcdir/$ac_unique_file; then
+ srcdir=..
+ fi
+else
+ ac_srcdir_defaulted=no
+fi
+if test ! -r $srcdir/$ac_unique_file; then
+ if test "$ac_srcdir_defaulted" = yes; then
+ { echo "configure: error: can not find sources in $ac_confdir or .." 1>&2; exit 1; }
+ else
+ { echo "configure: error: can not find sources in $srcdir" 1>&2; exit 1; }
+ fi
+fi
+srcdir=`echo "${srcdir}" | sed 's%\([^/]\)/*$%\1%'`
+
+# Prefer explicitly selected file to automatically selected ones.
+if test -z "$CONFIG_SITE"; then
+ if test "x$prefix" != xNONE; then
+ CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site"
+ else
+ CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site"
+ fi
+fi
+for ac_site_file in $CONFIG_SITE; do
+ if test -r "$ac_site_file"; then
+ echo "loading site script $ac_site_file"
+ . "$ac_site_file"
+ fi
+done
+
+if test -r "$cache_file"; then
+ echo "loading cache $cache_file"
+ . $cache_file
+else
+ echo "creating cache $cache_file"
+ > $cache_file
+fi
+
+ac_ext=c
+# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options.
+ac_cpp='echo $CPP $CPPFLAGS 1>&5;
+$CPP $CPPFLAGS'
+ac_compile='echo ${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5;
+${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5 2>&5'
+ac_link='echo ${CC-cc} -o conftest $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5;
+${CC-cc} -o conftest $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5 2>&5'
+
+if (echo "testing\c"; echo 1,2,3) | grep c >/dev/null; then
+ # Stardent Vistra SVR4 grep lacks -e, says ghazi@caip.rutgers.edu.
+ if (echo -n testing; echo 1,2,3) | sed s/-n/xn/ | grep xn >/dev/null; then
+ ac_n= ac_c='
+' ac_t=' '
+ else
+ ac_n=-n ac_c= ac_t=
+ fi
+else
+ ac_n= ac_c='\c' ac_t=
+fi
-case "$#" in
-1) ;;
-*) echo "Usage: $0 system_type" >&2
- echo "Known systems: `cd config; echo ;ls -C`" >&2
- exit 2
- ;;
-esac
-if [ -f config/$1 ]; then
- sh ./mungeconf config/$1 config.in >config.h
- # echo #echo lines to stdout
- sed -n '/^#echo /s///p' config/$1
- case "$1" in
- bsd44) ln -s Makefile.bsd44 Makefile ; exit 0 ;;
- esac
- sed -n '/^MAKE_.*/s//s,^##&## ,,/p' config/$1 >sedscr
- if [ -s sedscr ]
+for ac_prog in 'bison -y' byacc
+do
+# Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'ac_cv_prog_YACC'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ if test -n "$YACC"; then
+ ac_cv_prog_YACC="$YACC" # Let the user override the test.
+else
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:"
+ for ac_dir in $PATH; do
+ test -z "$ac_dir" && ac_dir=.
+ if test -f $ac_dir/$ac_word; then
+ ac_cv_prog_YACC="$ac_prog"
+ break
+ fi
+ done
+ IFS="$ac_save_ifs"
+fi
+fi
+YACC="$ac_cv_prog_YACC"
+if test -n "$YACC"; then
+ echo "$ac_t""$YACC" 1>&6
+else
+ echo "$ac_t""no" 1>&6
+fi
+
+test -n "$YACC" && break
+done
+test -n "$YACC" || YACC="yacc"
+
+# Extract the first word of "gcc", so it can be a program name with args.
+set dummy gcc; ac_word=$2
+echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ if test -n "$CC"; then
+ ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:"
+ for ac_dir in $PATH; do
+ test -z "$ac_dir" && ac_dir=.
+ if test -f $ac_dir/$ac_word; then
+ ac_cv_prog_CC="gcc"
+ break
+ fi
+ done
+ IFS="$ac_save_ifs"
+ test -z "$ac_cv_prog_CC" && ac_cv_prog_CC="cc"
+fi
+fi
+CC="$ac_cv_prog_CC"
+if test -n "$CC"; then
+ echo "$ac_t""$CC" 1>&6
+else
+ echo "$ac_t""no" 1>&6
+fi
+
+
+echo $ac_n "checking whether we are using GNU C""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'ac_cv_prog_gcc'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ cat > conftest.c <<EOF
+#ifdef __GNUC__
+ yes;
+#endif
+EOF
+if ${CC-cc} -E conftest.c 2>&5 | egrep yes >/dev/null 2>&1; then
+ ac_cv_prog_gcc=yes
+else
+ ac_cv_prog_gcc=no
+fi
+fi
+
+echo "$ac_t""$ac_cv_prog_gcc" 1>&6
+if test $ac_cv_prog_gcc = yes; then
+ GCC=yes
+ if test "${CFLAGS+set}" != set; then
+ echo $ac_n "checking whether ${CC-cc} accepts -g""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'ac_cv_prog_gcc_g'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ echo 'void f(){}' > conftest.c
+if test -z "`${CC-cc} -g -c conftest.c 2>&1`"; then
+ ac_cv_prog_gcc_g=yes
+else
+ ac_cv_prog_gcc_g=no
+fi
+rm -f conftest*
+
+fi
+
+echo "$ac_t""$ac_cv_prog_gcc_g" 1>&6
+ if test $ac_cv_prog_gcc_g = yes; then
+ CFLAGS="-g -O"
+ else
+ CFLAGS="-O"
+ fi
+ fi
+else
+ GCC=
+ test "${CFLAGS+set}" = set || CFLAGS="-g"
+fi
+
+echo $ac_n "checking how to run the C preprocessor""... $ac_c" 1>&6
+# On Suns, sometimes $CPP names a directory.
+if test -n "$CPP" && test -d "$CPP"; then
+ CPP=
+fi
+if test -z "$CPP"; then
+if eval "test \"`echo '$''{'ac_cv_prog_CPP'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ # This must be in double quotes, not single quotes, because CPP may get
+ # substituted into the Makefile and "${CC-cc}" will confuse make.
+ CPP="${CC-cc} -E"
+ # On the NeXT, cc -E runs the code through the compiler's parser,
+ # not just through cpp.
+ cat > conftest.$ac_ext <<EOF
+#line 645 "configure"
+#include "confdefs.h"
+#include <assert.h>
+Syntax Error
+EOF
+eval "$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
+ac_err=`grep -v '^ *+' conftest.out`
+if test -z "$ac_err"; then
+ :
+else
+ echo "$ac_err" >&5
+ rm -rf conftest*
+ CPP="${CC-cc} -E -traditional-cpp"
+ cat > conftest.$ac_ext <<EOF
+#line 659 "configure"
+#include "confdefs.h"
+#include <assert.h>
+Syntax Error
+EOF
+eval "$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
+ac_err=`grep -v '^ *+' conftest.out`
+if test -z "$ac_err"; then
+ :
+else
+ echo "$ac_err" >&5
+ rm -rf conftest*
+ CPP=/lib/cpp
+fi
+rm -f conftest*
+fi
+rm -f conftest*
+ ac_cv_prog_CPP="$CPP"
+fi
+ CPP="$ac_cv_prog_CPP"
+else
+ ac_cv_prog_CPP="$CPP"
+fi
+echo "$ac_t""$CPP" 1>&6
+
+ac_aux_dir=
+for ac_dir in $srcdir $srcdir/.. $srcdir/../..; do
+ if test -f $ac_dir/install-sh; then
+ ac_aux_dir=$ac_dir
+ ac_install_sh="$ac_aux_dir/install-sh -c"
+ break
+ elif test -f $ac_dir/install.sh; then
+ ac_aux_dir=$ac_dir
+ ac_install_sh="$ac_aux_dir/install.sh -c"
+ break
+ fi
+done
+if test -z "$ac_aux_dir"; then
+ { echo "configure: error: can not find install-sh or install.sh in $srcdir $srcdir/.. $srcdir/../.." 1>&2; exit 1; }
+fi
+ac_config_guess=$ac_aux_dir/config.guess
+ac_config_sub=$ac_aux_dir/config.sub
+ac_configure=$ac_aux_dir/configure # This should be Cygnus configure.
+
+# Find a good install program. We prefer a C program (faster),
+# so one script is as good as another. But avoid the broken or
+# incompatible versions:
+# SysV /etc/install, /usr/sbin/install
+# SunOS /usr/etc/install
+# IRIX /sbin/install
+# AIX /bin/install
+# AFS /usr/afsws/bin/install, which mishandles nonexistent args
+# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff"
+# ./install, which can be erroneously created by make from ./install.sh.
+echo $ac_n "checking for a BSD compatible install""... $ac_c" 1>&6
+if test -z "$INSTALL"; then
+if eval "test \"`echo '$''{'ac_cv_path_install'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:"
+ for ac_dir in $PATH; do
+ # Account for people who put trailing slashes in PATH elements.
+ case "$ac_dir/" in
+ /|./|.//|/etc/*|/usr/sbin/*|/usr/etc/*|/sbin/*|/usr/afsws/bin/*|/usr/ucb/*) ;;
+ *)
+ # OSF1 and SCO ODT 3.0 have their own names for install.
+ for ac_prog in ginstall installbsd scoinst install; do
+ if test -f $ac_dir/$ac_prog; then
+ if test $ac_prog = install &&
+ grep dspmsg $ac_dir/$ac_prog >/dev/null 2>&1; then
+ # AIX install. It has an incompatible calling convention.
+ # OSF/1 installbsd also uses dspmsg, but is usable.
+ :
+ else
+ ac_cv_path_install="$ac_dir/$ac_prog -c"
+ break 2
+ fi
+ fi
+ done
+ ;;
+ esac
+ done
+ IFS="$ac_save_ifs"
+
+fi
+ if test "${ac_cv_path_install+set}" = set; then
+ INSTALL="$ac_cv_path_install"
+ else
+ # As a last resort, use the slow shell script. We don't cache a
+ # path for INSTALL within a source directory, because that will
+ # break other packages using the cache if that directory is
+ # removed, or if the path is relative.
+ INSTALL="$ac_install_sh"
+ fi
+fi
+echo "$ac_t""$INSTALL" 1>&6
+
+# Use test -z because SunOS4 sh mishandles braces in ${var-val}.
+# It thinks the first close brace ends the variable substitution.
+test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}'
+
+test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644'
+
+echo $ac_n "checking whether ${MAKE-make} sets \${MAKE}""... $ac_c" 1>&6
+set dummy ${MAKE-make}; ac_make=$2
+if eval "test \"`echo '$''{'ac_cv_prog_make_${ac_make}_set'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ cat > conftestmake <<\EOF
+all:
+ @echo 'ac_maketemp="${MAKE}"'
+EOF
+# GNU make sometimes prints "make[1]: Entering...", which would confuse us.
+eval `${MAKE-make} -f conftestmake 2>/dev/null | grep temp=`
+if test -n "$ac_maketemp"; then
+ eval ac_cv_prog_make_${ac_make}_set=yes
+else
+ eval ac_cv_prog_make_${ac_make}_set=no
+fi
+rm -f conftestmake
+fi
+if eval "test \"`echo '$ac_cv_prog_make_'${ac_make}_set`\" = yes"; then
+ echo "$ac_t""yes" 1>&6
+ SET_MAKE=
+else
+ echo "$ac_t""no" 1>&6
+ SET_MAKE="MAKE=${MAKE-make}"
+fi
+
+
+if test "$CFLAGS" = ""
+then
+ if test "$GCC" = yes
then
- sed -f sedscr Makefile.in >Makefile
+ CFLAGS="-g -O"
else
- cp Makefile.in Makefile
+ CFLAGS="-O"
fi
- rm -f sedscr
+fi
+
+
+echo $ac_n "checking for AIX""... $ac_c" 1>&6
+cat > conftest.$ac_ext <<EOF
+#line 802 "configure"
+#include "confdefs.h"
+#ifdef _AIX
+ yes
+#endif
+
+EOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ egrep "yes" >/dev/null 2>&1; then
+ rm -rf conftest*
+ echo "$ac_t""yes" 1>&6; cat >> confdefs.h <<\EOF
+#define _ALL_SOURCE 1
+EOF
+
+else
+ rm -rf conftest*
+ echo "$ac_t""no" 1>&6
+fi
+rm -f conftest*
+
+
+echo $ac_n "checking for POSIXized ISC""... $ac_c" 1>&6
+if test -d /etc/conf/kconfig.d &&
+ grep _POSIX_VERSION /usr/include/sys/unistd.h >/dev/null 2>&1
+then
+ echo "$ac_t""yes" 1>&6
+ ISC=yes # If later tests want to check for ISC.
+ cat >> confdefs.h <<\EOF
+#define _POSIX_SOURCE 1
+EOF
+
+ if test "$GCC" = yes; then
+ CC="$CC -posix"
+ else
+ CC="$CC -Xp"
+ fi
+else
+ echo "$ac_t""no" 1>&6
+ ISC=
+fi
+
+ac_safe=`echo "minix/config.h" | tr './\055' '___'`
+echo $ac_n "checking for minix/config.h""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ cat > conftest.$ac_ext <<EOF
+#line 849 "configure"
+#include "confdefs.h"
+#include <minix/config.h>
+EOF
+eval "$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
+ac_err=`grep -v '^ *+' conftest.out`
+if test -z "$ac_err"; then
+ rm -rf conftest*
+ eval "ac_cv_header_$ac_safe=yes"
+else
+ echo "$ac_err" >&5
+ rm -rf conftest*
+ eval "ac_cv_header_$ac_safe=no"
+fi
+rm -f conftest*
+fi
+if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then
+ echo "$ac_t""yes" 1>&6
+ MINIX=yes
+else
+ echo "$ac_t""no" 1>&6
+MINIX=
+fi
+
+if test "$MINIX" = yes; then
+ cat >> confdefs.h <<\EOF
+#define _POSIX_SOURCE 1
+EOF
+
+ cat >> confdefs.h <<\EOF
+#define _POSIX_1_SOURCE 2
+EOF
+
+ cat >> confdefs.h <<\EOF
+#define _MINIX 1
+EOF
+
+fi
+
+
+# If we cannot run a trivial program, we must be cross compiling.
+echo $ac_n "checking whether cross-compiling""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'ac_cv_c_cross'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ if test "$cross_compiling" = yes; then
+ ac_cv_c_cross=yes
+else
+cat > conftest.$ac_ext <<EOF
+#line 898 "configure"
+#include "confdefs.h"
+main(){return(0);}
+EOF
+eval $ac_link
+if test -s conftest && (./conftest; exit) 2>/dev/null; then
+ ac_cv_c_cross=no
+else
+ ac_cv_c_cross=yes
+fi
+fi
+rm -fr conftest*
+fi
+
+echo "$ac_t""$ac_cv_c_cross" 1>&6
+cross_compiling=$ac_cv_c_cross
+
+echo $ac_n "checking for ANSI C header files""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'ac_cv_header_stdc'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ cat > conftest.$ac_ext <<EOF
+#line 920 "configure"
+#include "confdefs.h"
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <float.h>
+EOF
+eval "$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
+ac_err=`grep -v '^ *+' conftest.out`
+if test -z "$ac_err"; then
+ rm -rf conftest*
+ ac_cv_header_stdc=yes
+else
+ echo "$ac_err" >&5
+ rm -rf conftest*
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+if test $ac_cv_header_stdc = yes; then
+ # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
+cat > conftest.$ac_ext <<EOF
+#line 942 "configure"
+#include "confdefs.h"
+#include <string.h>
+EOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ egrep "memchr" >/dev/null 2>&1; then
+ :
+else
+ rm -rf conftest*
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+ # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
+cat > conftest.$ac_ext <<EOF
+#line 960 "configure"
+#include "confdefs.h"
+#include <stdlib.h>
+EOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ egrep "free" >/dev/null 2>&1; then
+ :
+else
+ rm -rf conftest*
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+ # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
+if test "$cross_compiling" = yes; then
+ :
+else
+cat > conftest.$ac_ext <<EOF
+#line 981 "configure"
+#include "confdefs.h"
+#include <ctype.h>
+#define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
+#define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
+#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
+int main () { int i; for (i = 0; i < 256; i++)
+if (XOR (islower (i), ISLOWER (i)) || toupper (i) != TOUPPER (i)) exit(2);
+exit (0); }
+
+EOF
+eval $ac_link
+if test -s conftest && (./conftest; exit) 2>/dev/null; then
+ :
+else
+ ac_cv_header_stdc=no
+fi
+fi
+rm -fr conftest*
+fi
+fi
+
+echo "$ac_t""$ac_cv_header_stdc" 1>&6
+if test $ac_cv_header_stdc = yes; then
+ cat >> confdefs.h <<\EOF
+#define STDC_HEADERS 1
+EOF
+
+fi
+
+echo $ac_n "checking for sys/wait.h that is POSIX.1 compatible""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'ac_cv_header_sys_wait_h'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ cat > conftest.$ac_ext <<EOF
+#line 1016 "configure"
+#include "confdefs.h"
+#include <sys/types.h>
+#include <sys/wait.h>
+#ifndef WEXITSTATUS
+#define WEXITSTATUS(stat_val) ((unsigned)(stat_val) >> 8)
+#endif
+#ifndef WIFEXITED
+#define WIFEXITED(stat_val) (((stat_val) & 255) == 0)
+#endif
+int main() { return 0; }
+int t() {
+int s;
+wait (&s);
+s = WIFEXITED (s) ? WEXITSTATUS (s) : 1;
+; return 0; }
+EOF
+if eval $ac_compile; then
+ rm -rf conftest*
+ ac_cv_header_sys_wait_h=yes
+else
+ rm -rf conftest*
+ ac_cv_header_sys_wait_h=no
+fi
+rm -f conftest*
+
+fi
+
+echo "$ac_t""$ac_cv_header_sys_wait_h" 1>&6
+if test $ac_cv_header_sys_wait_h = yes; then
+ cat >> confdefs.h <<\EOF
+#define HAVE_SYS_WAIT_H 1
+EOF
+
+fi
+
+for ac_hdr in limits.h stdarg.h unistd.h signum.h sys/param.h string.h
+do
+ac_safe=`echo "$ac_hdr" | tr './\055' '___'`
+echo $ac_n "checking for $ac_hdr""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ cat > conftest.$ac_ext <<EOF
+#line 1060 "configure"
+#include "confdefs.h"
+#include <$ac_hdr>
+EOF
+eval "$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
+ac_err=`grep -v '^ *+' conftest.out`
+if test -z "$ac_err"; then
+ rm -rf conftest*
+ eval "ac_cv_header_$ac_safe=yes"
+else
+ echo "$ac_err" >&5
+ rm -rf conftest*
+ eval "ac_cv_header_$ac_safe=no"
+fi
+rm -f conftest*
+fi
+if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then
+ echo "$ac_t""yes" 1>&6
+ ac_tr_hdr=HAVE_`echo $ac_hdr | tr 'abcdefghijklmnopqrstuvwxyz./\055' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ___'`
+ cat >> confdefs.h <<EOF
+#define $ac_tr_hdr 1
+EOF
+
else
- echo "\`$1' is not a known configuration."
- echo "Either construct one based on the examples in the config directory,"
- echo "or copy config.in to config.h and edit it."
- exit 1
+ echo "$ac_t""no" 1>&6
fi
+done
+
+if test "$ac_cv_header_string_h" = yes
+then
+ for ac_hdr in memory.h
+do
+ac_safe=`echo "$ac_hdr" | tr './\055' '___'`
+echo $ac_n "checking for $ac_hdr""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ cat > conftest.$ac_ext <<EOF
+#line 1098 "configure"
+#include "confdefs.h"
+#include <$ac_hdr>
+EOF
+eval "$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
+ac_err=`grep -v '^ *+' conftest.out`
+if test -z "$ac_err"; then
+ rm -rf conftest*
+ eval "ac_cv_header_$ac_safe=yes"
+else
+ echo "$ac_err" >&5
+ rm -rf conftest*
+ eval "ac_cv_header_$ac_safe=no"
+fi
+rm -f conftest*
+fi
+if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then
+ echo "$ac_t""yes" 1>&6
+ ac_tr_hdr=HAVE_`echo $ac_hdr | tr 'abcdefghijklmnopqrstuvwxyz./\055' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ___'`
+ cat >> confdefs.h <<EOF
+#define $ac_tr_hdr 1
+EOF
+
+else
+ echo "$ac_t""no" 1>&6
+fi
+done
+
+else
+ for ac_hdr in strings.h
+do
+ac_safe=`echo "$ac_hdr" | tr './\055' '___'`
+echo $ac_n "checking for $ac_hdr""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ cat > conftest.$ac_ext <<EOF
+#line 1135 "configure"
+#include "confdefs.h"
+#include <$ac_hdr>
+EOF
+eval "$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out"
+ac_err=`grep -v '^ *+' conftest.out`
+if test -z "$ac_err"; then
+ rm -rf conftest*
+ eval "ac_cv_header_$ac_safe=yes"
+else
+ echo "$ac_err" >&5
+ rm -rf conftest*
+ eval "ac_cv_header_$ac_safe=no"
+fi
+rm -f conftest*
+fi
+if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then
+ echo "$ac_t""yes" 1>&6
+ ac_tr_hdr=HAVE_`echo $ac_hdr | tr 'abcdefghijklmnopqrstuvwxyz./\055' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ___'`
+ cat >> confdefs.h <<EOF
+#define $ac_tr_hdr 1
+EOF
+
+else
+ echo "$ac_t""no" 1>&6
+fi
+done
+
+fi
+
+echo $ac_n "checking for pid_t""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'ac_cv_type_pid_t'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ cat > conftest.$ac_ext <<EOF
+#line 1170 "configure"
+#include "confdefs.h"
+#include <sys/types.h>
+#if STDC_HEADERS
+#include <stdlib.h>
+#endif
+EOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ egrep "pid_t" >/dev/null 2>&1; then
+ rm -rf conftest*
+ ac_cv_type_pid_t=yes
+else
+ rm -rf conftest*
+ ac_cv_type_pid_t=no
+fi
+rm -f conftest*
+
+fi
+echo "$ac_t""$ac_cv_type_pid_t" 1>&6
+if test $ac_cv_type_pid_t = no; then
+ cat >> confdefs.h <<\EOF
+#define pid_t int
+EOF
+
+fi
+
+echo $ac_n "checking return type of signal handlers""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'ac_cv_type_signal'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ cat > conftest.$ac_ext <<EOF
+#line 1201 "configure"
+#include "confdefs.h"
+#include <sys/types.h>
+#include <signal.h>
+#ifdef signal
+#undef signal
+#endif
+#ifdef __cplusplus
+extern "C" void (*signal (int, void (*)(int)))(int);
+#else
+void (*signal ()) ();
+#endif
+
+int main() { return 0; }
+int t() {
+int i;
+; return 0; }
+EOF
+if eval $ac_compile; then
+ rm -rf conftest*
+ ac_cv_type_signal=void
+else
+ rm -rf conftest*
+ ac_cv_type_signal=int
+fi
+rm -f conftest*
+
+fi
+
+echo "$ac_t""$ac_cv_type_signal" 1>&6
+cat >> confdefs.h <<EOF
+#define RETSIGTYPE $ac_cv_type_signal
+EOF
+
+
+echo $ac_n "checking for size_t""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'ac_cv_type_size_t'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ cat > conftest.$ac_ext <<EOF
+#line 1241 "configure"
+#include "confdefs.h"
+#include <sys/types.h>
+#if STDC_HEADERS
+#include <stdlib.h>
+#endif
+EOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ egrep "size_t" >/dev/null 2>&1; then
+ rm -rf conftest*
+ ac_cv_type_size_t=yes
+else
+ rm -rf conftest*
+ ac_cv_type_size_t=no
+fi
+rm -f conftest*
+
+fi
+echo "$ac_t""$ac_cv_type_size_t" 1>&6
+if test $ac_cv_type_size_t = no; then
+ cat >> confdefs.h <<\EOF
+#define size_t unsigned
+EOF
+
+fi
+
+echo $ac_n "checking for uid_t in sys/types.h""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'ac_cv_type_uid_t'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ cat > conftest.$ac_ext <<EOF
+#line 1272 "configure"
+#include "confdefs.h"
+#include <sys/types.h>
+EOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ egrep "uid_t" >/dev/null 2>&1; then
+ rm -rf conftest*
+ ac_cv_type_uid_t=yes
+else
+ rm -rf conftest*
+ ac_cv_type_uid_t=no
+fi
+rm -f conftest*
+
+fi
+
+echo "$ac_t""$ac_cv_type_uid_t" 1>&6
+if test $ac_cv_type_uid_t = no; then
+ cat >> confdefs.h <<\EOF
+#define uid_t int
+EOF
+
+ cat >> confdefs.h <<\EOF
+#define gid_t int
+EOF
+
+fi
+
+echo $ac_n "checking type of array argument to getgroups""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'ac_cv_type_getgroups'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ if test "$cross_compiling" = yes; then
+ ac_cv_type_getgroups=cross
+else
+cat > conftest.$ac_ext <<EOF
+#line 1308 "configure"
+#include "confdefs.h"
+
+/* Thanks to Mike Rendell for this test. */
+#include <sys/types.h>
+#define NGID 256
+#undef MAX
+#define MAX(x, y) ((x) > (y) ? (x) : (y))
+main()
+{
+ gid_t gidset[NGID];
+ int i, n;
+ union { gid_t gval; long lval; } val;
+
+ val.lval = -1;
+ for (i = 0; i < NGID; i++)
+ gidset[i] = val.gval;
+ n = getgroups (sizeof (gidset) / MAX (sizeof (int), sizeof (gid_t)) - 1,
+ gidset);
+ /* Exit non-zero if getgroups seems to require an array of ints. This
+ happens when gid_t is short but getgroups modifies an array of ints. */
+ exit ((n > 0 && gidset[n] != val.gval) ? 1 : 0);
+}
+
+EOF
+eval $ac_link
+if test -s conftest && (./conftest; exit) 2>/dev/null; then
+ ac_cv_type_getgroups=gid_t
+else
+ ac_cv_type_getgroups=int
+fi
+fi
+rm -fr conftest*
+if test $ac_cv_type_getgroups = cross; then
+ cat > conftest.$ac_ext <<EOF
+#line 1343 "configure"
+#include "confdefs.h"
+#include <unistd.h>
+EOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ egrep "getgroups.*int.*gid_t" >/dev/null 2>&1; then
+ rm -rf conftest*
+ ac_cv_type_getgroups=gid_t
+else
+ rm -rf conftest*
+ ac_cv_type_getgroups=int
+fi
+rm -f conftest*
+
+fi
+fi
+
+echo "$ac_t""$ac_cv_type_getgroups" 1>&6
+cat >> confdefs.h <<EOF
+#define GETGROUPS_T $ac_cv_type_getgroups
+EOF
+
+
+cat > conftest.$ac_ext <<EOF
+#line 1367 "configure"
+#include "confdefs.h"
+#include <stdio.h>
+EOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ egrep "int.*sprintf" >/dev/null 2>&1; then
+ rm -rf conftest*
+ cat >> confdefs.h <<\EOF
+#define SPRINTF_RET int
+EOF
+
+else
+ rm -rf conftest*
+ cat >> confdefs.h <<\EOF
+#define SPRINTF_RET char *
+EOF
+
+fi
+rm -f conftest*
+
+
+if test "$YACC" = "bison -y" ||
+ { test -f $srcdir/awktab.c && grep 'alloca *(' $srcdir/awktab.c > /dev/null; }
+then
+ # The Ultrix 4.2 mips builtin alloca declared by alloca.h only works
+# for constant arguments. Useless!
+echo $ac_n "checking for working alloca.h""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'ac_cv_header_alloca_h'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ cat > conftest.$ac_ext <<EOF
+#line 1398 "configure"
+#include "confdefs.h"
+#include <alloca.h>
+int main() { return 0; }
+int t() {
+char *p = alloca(2 * sizeof(int));
+; return 0; }
+EOF
+if eval $ac_link; then
+ rm -rf conftest*
+ ac_cv_header_alloca_h=yes
+else
+ rm -rf conftest*
+ ac_cv_header_alloca_h=no
+fi
+rm -f conftest*
+
+fi
+
+echo "$ac_t""$ac_cv_header_alloca_h" 1>&6
+if test $ac_cv_header_alloca_h = yes; then
+ cat >> confdefs.h <<\EOF
+#define HAVE_ALLOCA_H 1
+EOF
+
+fi
+
+echo $ac_n "checking for alloca""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'ac_cv_func_alloca'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ cat > conftest.$ac_ext <<EOF
+#line 1430 "configure"
+#include "confdefs.h"
+
+#ifdef __GNUC__
+# define alloca __builtin_alloca
+#else
+# if HAVE_ALLOCA_H
+# include <alloca.h>
+# else
+# ifdef _AIX
+ #pragma alloca
+# else
+# ifndef alloca /* predefined by HP cc +Olibcalls */
+char *alloca ();
+# endif
+# endif
+# endif
+#endif
+
+int main() { return 0; }
+int t() {
+char *p = (char *) alloca(1);
+; return 0; }
+EOF
+if eval $ac_link; then
+ rm -rf conftest*
+ ac_cv_func_alloca=yes
+else
+ rm -rf conftest*
+ ac_cv_func_alloca=no
+fi
+rm -f conftest*
+
+fi
+
+echo "$ac_t""$ac_cv_func_alloca" 1>&6
+if test $ac_cv_func_alloca = yes; then
+ cat >> confdefs.h <<\EOF
+#define HAVE_ALLOCA 1
+EOF
+
+fi
+
+if test $ac_cv_func_alloca = no; then
+ # The SVR3 libPW and SVR4 libucb both contain incompatible functions
+ # that cause trouble. Some versions do not even contain alloca or
+ # contain a buggy version. If you still want to use their alloca,
+ # use ar to extract alloca.o from them instead of compiling alloca.c.
+ ALLOCA=alloca.o
+ cat >> confdefs.h <<\EOF
+#define C_ALLOCA 1
+EOF
+
+
+echo $ac_n "checking whether alloca needs Cray hooks""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'ac_cv_os_cray'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ cat > conftest.$ac_ext <<EOF
+#line 1489 "configure"
+#include "confdefs.h"
+#if defined(CRAY) && ! defined(CRAY2)
+webecray
+#else
+wenotbecray
+#endif
+
+EOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ egrep "webecray" >/dev/null 2>&1; then
+ rm -rf conftest*
+ ac_cv_os_cray=yes
+else
+ rm -rf conftest*
+ ac_cv_os_cray=no
+fi
+rm -f conftest*
+
+fi
+
+echo "$ac_t""$ac_cv_os_cray" 1>&6
+if test $ac_cv_os_cray = yes; then
+for ac_func in _getb67 GETB67 getb67; do
+ echo $ac_n "checking for $ac_func""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ cat > conftest.$ac_ext <<EOF
+#line 1518 "configure"
+#include "confdefs.h"
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char $ac_func(); below. */
+#include <assert.h>
+/* Override any gcc2 internal prototype to avoid an error. */
+char $ac_func();
+
+int main() { return 0; }
+int t() {
+
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined (__stub_$ac_func) || defined (__stub___$ac_func)
+choke me
+#else
+$ac_func();
+#endif
+
+; return 0; }
+EOF
+if eval $ac_link; then
+ rm -rf conftest*
+ eval "ac_cv_func_$ac_func=yes"
+else
+ rm -rf conftest*
+ eval "ac_cv_func_$ac_func=no"
+fi
+rm -f conftest*
+
+fi
+if eval "test \"`echo '$ac_cv_func_'$ac_func`\" = yes"; then
+ echo "$ac_t""yes" 1>&6
+ cat >> confdefs.h <<EOF
+#define CRAY_STACKSEG_END $ac_func
+EOF
+
+ break
+else
+ echo "$ac_t""no" 1>&6
+fi
+
+done
+fi
+
+echo $ac_n "checking stack direction for C alloca""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'ac_cv_c_stack_direction'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ if test "$cross_compiling" = yes; then
+ ac_cv_c_stack_direction=0
+else
+cat > conftest.$ac_ext <<EOF
+#line 1572 "configure"
+#include "confdefs.h"
+find_stack_direction ()
+{
+ static char *addr = 0;
+ auto char dummy;
+ if (addr == 0)
+ {
+ addr = &dummy;
+ return find_stack_direction ();
+ }
+ else
+ return (&dummy > addr) ? 1 : -1;
+}
+main ()
+{
+ exit (find_stack_direction() < 0);
+}
+EOF
+eval $ac_link
+if test -s conftest && (./conftest; exit) 2>/dev/null; then
+ ac_cv_c_stack_direction=1
+else
+ ac_cv_c_stack_direction=-1
+fi
+fi
+rm -fr conftest*
+fi
+
+echo "$ac_t""$ac_cv_c_stack_direction" 1>&6
+cat >> confdefs.h <<EOF
+#define STACK_DIRECTION $ac_cv_c_stack_direction
+EOF
+
+fi
+
+fi
+cat >> confdefs.h <<\EOF
+#define REGEX_MALLOC 1
+EOF
+
+echo $ac_n "checking for vprintf""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'ac_cv_func_vprintf'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ cat > conftest.$ac_ext <<EOF
+#line 1618 "configure"
+#include "confdefs.h"
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char vprintf(); below. */
+#include <assert.h>
+/* Override any gcc2 internal prototype to avoid an error. */
+char vprintf();
+
+int main() { return 0; }
+int t() {
+
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined (__stub_vprintf) || defined (__stub___vprintf)
+choke me
+#else
+vprintf();
+#endif
+
+; return 0; }
+EOF
+if eval $ac_link; then
+ rm -rf conftest*
+ eval "ac_cv_func_vprintf=yes"
+else
+ rm -rf conftest*
+ eval "ac_cv_func_vprintf=no"
+fi
+rm -f conftest*
+
+fi
+if eval "test \"`echo '$ac_cv_func_'vprintf`\" = yes"; then
+ echo "$ac_t""yes" 1>&6
+ cat >> confdefs.h <<\EOF
+#define HAVE_VPRINTF 1
+EOF
+
+else
+ echo "$ac_t""no" 1>&6
+fi
+
+if test "$ac_cv_func_vprintf" != yes; then
+echo $ac_n "checking for _doprnt""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'ac_cv_func__doprnt'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ cat > conftest.$ac_ext <<EOF
+#line 1666 "configure"
+#include "confdefs.h"
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char _doprnt(); below. */
+#include <assert.h>
+/* Override any gcc2 internal prototype to avoid an error. */
+char _doprnt();
+
+int main() { return 0; }
+int t() {
+
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined (__stub__doprnt) || defined (__stub____doprnt)
+choke me
+#else
+_doprnt();
+#endif
+
+; return 0; }
+EOF
+if eval $ac_link; then
+ rm -rf conftest*
+ eval "ac_cv_func__doprnt=yes"
+else
+ rm -rf conftest*
+ eval "ac_cv_func__doprnt=no"
+fi
+rm -f conftest*
+
+fi
+if eval "test \"`echo '$ac_cv_func_'_doprnt`\" = yes"; then
+ echo "$ac_t""yes" 1>&6
+ cat >> confdefs.h <<\EOF
+#define HAVE_DOPRNT 1
+EOF
+
+else
+ echo "$ac_t""no" 1>&6
+fi
+
+fi
+
+
+for ac_func in memset memcpy memcmp fmod random strchr strerror strftime strncasecmp strtod system tzset
+do
+echo $ac_n "checking for $ac_func""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ cat > conftest.$ac_ext <<EOF
+#line 1718 "configure"
+#include "confdefs.h"
+/* System header to define __stub macros and hopefully few prototypes,
+ which can conflict with char $ac_func(); below. */
+#include <assert.h>
+/* Override any gcc2 internal prototype to avoid an error. */
+char $ac_func();
+
+int main() { return 0; }
+int t() {
+
+/* The GNU C library defines this for functions which it implements
+ to always fail with ENOSYS. Some functions are actually named
+ something starting with __ and the normal name is an alias. */
+#if defined (__stub_$ac_func) || defined (__stub___$ac_func)
+choke me
+#else
+$ac_func();
+#endif
+
+; return 0; }
+EOF
+if eval $ac_link; then
+ rm -rf conftest*
+ eval "ac_cv_func_$ac_func=yes"
+else
+ rm -rf conftest*
+ eval "ac_cv_func_$ac_func=no"
+fi
+rm -f conftest*
+
+fi
+if eval "test \"`echo '$ac_cv_func_'$ac_func`\" = yes"; then
+ echo "$ac_t""yes" 1>&6
+ ac_tr_func=HAVE_`echo $ac_func | tr 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'`
+ cat >> confdefs.h <<EOF
+#define $ac_tr_func 1
+EOF
+
+else
+ echo "$ac_t""no" 1>&6
+fi
+done
+
+
+echo $ac_n "checking whether getpgrp takes no argument""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'ac_cv_func_getpgrp_void'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ if test "$cross_compiling" = yes; then
+ { echo "configure: error: cannot check getpgrp if cross compiling" 1>&2; exit 1; }
+else
+cat > conftest.$ac_ext <<EOF
+#line 1771 "configure"
+#include "confdefs.h"
+
+/*
+ * If this system has a BSD-style getpgrp(),
+ * which takes a pid argument, exit unsuccessfully.
+ *
+ * Snarfed from Chet Ramey's bash pgrp.c test program
+ */
+#include <stdio.h>
+#include <sys/types.h>
+
+int pid;
+int pg1, pg2, pg3, pg4;
+int ng, np, s, child;
+
+main()
+{
+ pid = getpid();
+ pg1 = getpgrp(0);
+ pg2 = getpgrp();
+ pg3 = getpgrp(pid);
+ pg4 = getpgrp(1);
+
+ /*
+ * If all of these values are the same, it's pretty sure that
+ * we're on a system that ignores getpgrp's first argument.
+ */
+ if (pg2 == pg4 && pg1 == pg3 && pg2 == pg3)
+ exit(0);
+
+ child = fork();
+ if (child < 0)
+ exit(1);
+ else if (child == 0) {
+ np = getpid();
+ /*
+ * If this is Sys V, this will not work; pgrp will be
+ * set to np because setpgrp just changes a pgrp to be
+ * the same as the pid.
+ */
+ setpgrp(np, pg1);
+ ng = getpgrp(0); /* Same result for Sys V and BSD */
+ if (ng == pg1) {
+ exit(1);
+ } else {
+ exit(0);
+ }
+ } else {
+ wait(&s);
+ exit(s>>8);
+ }
+}
+
+EOF
+eval $ac_link
+if test -s conftest && (./conftest; exit) 2>/dev/null; then
+ ac_cv_func_getpgrp_void=yes
+else
+ ac_cv_func_getpgrp_void=no
+fi
+fi
+rm -fr conftest*
+
+fi
+
+echo "$ac_t""$ac_cv_func_getpgrp_void" 1>&6
+if test $ac_cv_func_getpgrp_void = yes; then
+ cat >> confdefs.h <<\EOF
+#define GETPGRP_VOID 1
+EOF
+
+fi
+
+
+echo $ac_n "checking for st_blksize in struct stat""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'ac_cv_struct_st_blksize'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ cat > conftest.$ac_ext <<EOF
+#line 1851 "configure"
+#include "confdefs.h"
+#include <sys/types.h>
+#include <sys/stat.h>
+int main() { return 0; }
+int t() {
+struct stat s; s.st_blksize;
+; return 0; }
+EOF
+if eval $ac_compile; then
+ rm -rf conftest*
+ ac_cv_struct_st_blksize=yes
+else
+ rm -rf conftest*
+ ac_cv_struct_st_blksize=no
+fi
+rm -f conftest*
+
+fi
+
+echo "$ac_t""$ac_cv_struct_st_blksize" 1>&6
+if test $ac_cv_struct_st_blksize = yes; then
+ cat >> confdefs.h <<\EOF
+#define HAVE_ST_BLKSIZE 1
+EOF
+
+fi
+
+echo $ac_n "checking whether time.h and sys/time.h may both be included""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'ac_cv_header_time'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ cat > conftest.$ac_ext <<EOF
+#line 1884 "configure"
+#include "confdefs.h"
+#include <sys/types.h>
+#include <sys/time.h>
+#include <time.h>
+int main() { return 0; }
+int t() {
+struct tm *tp;
+; return 0; }
+EOF
+if eval $ac_compile; then
+ rm -rf conftest*
+ ac_cv_header_time=yes
+else
+ rm -rf conftest*
+ ac_cv_header_time=no
+fi
+rm -f conftest*
+
+fi
+
+echo "$ac_t""$ac_cv_header_time" 1>&6
+if test $ac_cv_header_time = yes; then
+ cat >> confdefs.h <<\EOF
+#define TIME_WITH_SYS_TIME 1
+EOF
+
+fi
+
+echo $ac_n "checking whether struct tm is in sys/time.h or time.h""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'ac_cv_struct_tm'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ cat > conftest.$ac_ext <<EOF
+#line 1918 "configure"
+#include "confdefs.h"
+#include <sys/types.h>
+#include <time.h>
+int main() { return 0; }
+int t() {
+struct tm *tp; tp->tm_sec;
+; return 0; }
+EOF
+if eval $ac_compile; then
+ rm -rf conftest*
+ ac_cv_struct_tm=time.h
+else
+ rm -rf conftest*
+ ac_cv_struct_tm=sys/time.h
+fi
+rm -f conftest*
+
+fi
+
+echo "$ac_t""$ac_cv_struct_tm" 1>&6
+if test $ac_cv_struct_tm = sys/time.h; then
+ cat >> confdefs.h <<\EOF
+#define TM_IN_SYS_TIME 1
+EOF
+
+fi
+
+echo $ac_n "checking for tm_zone in struct tm""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'ac_cv_struct_tm_zone'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ cat > conftest.$ac_ext <<EOF
+#line 1951 "configure"
+#include "confdefs.h"
+#include <sys/types.h>
+#include <$ac_cv_struct_tm>
+int main() { return 0; }
+int t() {
+struct tm tm; tm.tm_zone;
+; return 0; }
+EOF
+if eval $ac_compile; then
+ rm -rf conftest*
+ ac_cv_struct_tm_zone=yes
+else
+ rm -rf conftest*
+ ac_cv_struct_tm_zone=no
+fi
+rm -f conftest*
+
+fi
+
+echo "$ac_t""$ac_cv_struct_tm_zone" 1>&6
+if test "$ac_cv_struct_tm_zone" = yes; then
+ cat >> confdefs.h <<\EOF
+#define HAVE_TM_ZONE 1
+EOF
+
+else
+ echo $ac_n "checking for tzname""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'ac_cv_var_tzname'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ cat > conftest.$ac_ext <<EOF
+#line 1983 "configure"
+#include "confdefs.h"
+#include <time.h>
+#ifndef tzname /* For SGI. */
+extern char *tzname[]; /* RS6000 and others reject char **tzname. */
+#endif
+int main() { return 0; }
+int t() {
+atoi(*tzname);
+; return 0; }
+EOF
+if eval $ac_link; then
+ rm -rf conftest*
+ ac_cv_var_tzname=yes
+else
+ rm -rf conftest*
+ ac_cv_var_tzname=no
+fi
+rm -f conftest*
+
+fi
+
+echo "$ac_t""$ac_cv_var_tzname" 1>&6
+ if test $ac_cv_var_tzname = yes; then
+ cat >> confdefs.h <<\EOF
+#define HAVE_TZNAME 1
+EOF
+
+ fi
+fi
+
+
+echo $ac_n "checking whether char is unsigned""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'ac_cv_c_char_unsigned'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ if test "$GCC" = yes; then
+ # GCC predefines this symbol on systems where it applies.
+cat > conftest.$ac_ext <<EOF
+#line 2022 "configure"
+#include "confdefs.h"
+#ifdef __CHAR_UNSIGNED__
+ yes
+#endif
+
+EOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ egrep "yes" >/dev/null 2>&1; then
+ rm -rf conftest*
+ ac_cv_c_char_unsigned=yes
+else
+ rm -rf conftest*
+ ac_cv_c_char_unsigned=no
+fi
+rm -f conftest*
+
+else
+if test "$cross_compiling" = yes; then
+ { echo "configure: error: can not run test program while cross compiling" 1>&2; exit 1; }
+else
+cat > conftest.$ac_ext <<EOF
+#line 2044 "configure"
+#include "confdefs.h"
+/* volatile prevents gcc2 from optimizing the test away on sparcs. */
+#if !defined(__STDC__) || __STDC__ != 1
+#define volatile
+#endif
+main() {
+ volatile char c = 255; exit(c < 0);
+}
+EOF
+eval $ac_link
+if test -s conftest && (./conftest; exit) 2>/dev/null; then
+ ac_cv_c_char_unsigned=yes
+else
+ ac_cv_c_char_unsigned=no
+fi
+fi
+rm -fr conftest*
+fi
+fi
+
+echo "$ac_t""$ac_cv_c_char_unsigned" 1>&6
+if test $ac_cv_c_char_unsigned = yes && test "$GCC" != yes; then
+ cat >> confdefs.h <<\EOF
+#define __CHAR_UNSIGNED__ 1
+EOF
+
+fi
+
+echo $ac_n "checking for working const""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'ac_cv_c_const'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ cat > conftest.$ac_ext <<EOF
+#line 2078 "configure"
+#include "confdefs.h"
+
+int main() { return 0; }
+int t() {
+
+/* Ultrix mips cc rejects this. */
+typedef int charset[2]; const charset x;
+/* SunOS 4.1.1 cc rejects this. */
+char const *const *ccp;
+char **p;
+/* NEC SVR4.0.2 mips cc rejects this. */
+struct point {int x, y;};
+static struct point const zero = {0,0};
+/* AIX XL C 1.02.0.0 rejects this.
+ It does not let you subtract one const X* pointer from another in an arm
+ of an if-expression whose if-part is not a constant expression */
+const char *g = "string";
+ccp = &g + (g ? g-g : 0);
+/* HPUX 7.0 cc rejects these. */
+++ccp;
+p = (char**) ccp;
+ccp = (char const *const *) p;
+{ /* SCO 3.2v4 cc rejects this. */
+ char *t;
+ char const *s = 0 ? (char *) 0 : (char const *) 0;
+
+ *t++ = 0;
+}
+{ /* Someone thinks the Sun supposedly-ANSI compiler will reject this. */
+ int x[] = {25, 17};
+ const int *foo = &x[0];
+ ++foo;
+}
+{ /* Sun SC1.0 ANSI compiler rejects this -- but not the above. */
+ typedef const int *iptr;
+ iptr p = 0;
+ ++p;
+}
+{ /* AIX XL C 1.02.0.0 rejects this saying
+ "k.c", line 2.27: 1506-025 (S) Operand must be a modifiable lvalue. */
+ struct s { int j; const int *ap[3]; };
+ struct s *b; b->j = 5;
+}
+{ /* ULTRIX-32 V3.1 (Rev 9) vcc rejects this */
+ const int foo = 10;
+}
+
+; return 0; }
+EOF
+if eval $ac_compile; then
+ rm -rf conftest*
+ ac_cv_c_const=yes
+else
+ rm -rf conftest*
+ ac_cv_c_const=no
+fi
+rm -f conftest*
+
+fi
+
+echo "$ac_t""$ac_cv_c_const" 1>&6
+if test $ac_cv_c_const = no; then
+ cat >> confdefs.h <<\EOF
+#define const
+EOF
+
+fi
+
+
+
+echo $ac_n "checking for ANSI stringizing capability""... $ac_c" 1>&6
+if eval "test \"`echo '$''{'gawk_cv_c_stringize'+set}'`\" = set"; then
+ echo $ac_n "(cached) $ac_c" 1>&6
+else
+ cat > conftest.$ac_ext <<EOF
+#line 2154 "configure"
+#include "confdefs.h"
+
+#define x(y) #y
+
+char *s = x(teststring);
+
+EOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ egrep "#teststring" >/dev/null 2>&1; then
+ rm -rf conftest*
+ gawk_cv_c_stringize=no
+else
+ rm -rf conftest*
+ gawk_cv_c_stringize=yes
+fi
+rm -f conftest*
+
+fi
+
+if test "${gawk_cv_c_stringize}" = yes
+then
+ cat >> confdefs.h <<\EOF
+#define HAVE_STRINGIZE 1
+EOF
+
+fi
+echo "$ac_t""${gawk_cv_c_stringize}" 1>&6
+
+
+trap '' 1 2 15
+cat > confcache <<\EOF
+# This file is a shell script that caches the results of configure
+# tests run on this system so they can be shared between configure
+# scripts and configure runs. It is not useful on other systems.
+# If it contains results you don't want to keep, you may remove or edit it.
+#
+# By default, configure uses ./config.cache as the cache file,
+# creating it if it does not exist already. You can give configure
+# the --cache-file=FILE option to use a different cache file; that is
+# what configure does when it calls configure scripts in
+# subdirectories, so they share the cache.
+# Giving --cache-file=/dev/null disables caching, for debugging configure.
+# config.status only pays attention to the cache file if you give it the
+# --recheck option to rerun configure.
+#
+EOF
+# Ultrix sh set writes to stderr and can't be redirected directly,
+# and sets the high bit in the cache file unless we assign to the vars.
+(set) 2>&1 |
+ sed -n "s/^\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\)=\(.*\)/\1=\${\1='\2'}/p" \
+ >> confcache
+if cmp -s $cache_file confcache; then
+ :
+else
+ if test -w $cache_file; then
+ echo "updating cache $cache_file"
+ cat confcache > $cache_file
+ else
+ echo "not updating unwritable cache $cache_file"
+ fi
+fi
+rm -f confcache
+
+trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15
+
+test "x$prefix" = xNONE && prefix=$ac_default_prefix
+# Let make expand exec_prefix.
+test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
+
+# Any assignment to VPATH causes Sun make to only execute
+# the first set of double-colon rules, so remove it if not needed.
+# If there is a colon in the path, we need to keep it.
+if test "x$srcdir" = x.; then
+ ac_vpsub='/^[ ]*VPATH[ ]*=[^:]*$/d'
+fi
+
+trap 'rm -f $CONFIG_STATUS conftest*; exit 1' 1 2 15
+
+DEFS=-DHAVE_CONFIG_H
+
+# Without the "./", some shells look in PATH for config.status.
+: ${CONFIG_STATUS=./config.status}
+
+echo creating $CONFIG_STATUS
+rm -f $CONFIG_STATUS
+cat > $CONFIG_STATUS <<EOF
+#! /bin/sh
+# Generated automatically by configure.
+# Run this file to recreate the current configuration.
+# This directory was configured as follows,
+# on host `(hostname || uname -n) 2>/dev/null | sed 1q`:
+#
+# $0 $ac_configure_args
+#
+# Compiler output produced by configure, useful for debugging
+# configure, is in ./config.log if it exists.
+
+ac_cs_usage="Usage: $CONFIG_STATUS [--recheck] [--version] [--help]"
+for ac_option
+do
+ case "\$ac_option" in
+ -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
+ echo "running \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion"
+ exec \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion ;;
+ -version | --version | --versio | --versi | --vers | --ver | --ve | --v)
+ echo "$CONFIG_STATUS generated by autoconf version 2.7"
+ exit 0 ;;
+ -help | --help | --hel | --he | --h)
+ echo "\$ac_cs_usage"; exit 0 ;;
+ *) echo "\$ac_cs_usage"; exit 1 ;;
+ esac
+done
+
+ac_given_srcdir=$srcdir
+ac_given_INSTALL="$INSTALL"
+
+trap 'rm -fr `echo "Makefile doc/Makefile test/Makefile awklib/Makefile config.h:configh.in" | sed "s/:[^ ]*//g"` conftest*; exit 1' 1 2 15
+EOF
+cat >> $CONFIG_STATUS <<EOF
+
+# Protect against being on the right side of a sed subst in config.status.
+sed 's/%@/@@/; s/@%/@@/; s/%g\$/@g/; /@g\$/s/[\\\\&%]/\\\\&/g;
+ s/@@/%@/; s/@@/@%/; s/@g\$/%g/' > conftest.subs <<\\CEOF
+$ac_vpsub
+$extrasub
+s%@CFLAGS@%$CFLAGS%g
+s%@CPPFLAGS@%$CPPFLAGS%g
+s%@CXXFLAGS@%$CXXFLAGS%g
+s%@DEFS@%$DEFS%g
+s%@LDFLAGS@%$LDFLAGS%g
+s%@LIBS@%$LIBS%g
+s%@exec_prefix@%$exec_prefix%g
+s%@prefix@%$prefix%g
+s%@program_transform_name@%$program_transform_name%g
+s%@bindir@%$bindir%g
+s%@sbindir@%$sbindir%g
+s%@libexecdir@%$libexecdir%g
+s%@datadir@%$datadir%g
+s%@sysconfdir@%$sysconfdir%g
+s%@sharedstatedir@%$sharedstatedir%g
+s%@localstatedir@%$localstatedir%g
+s%@libdir@%$libdir%g
+s%@includedir@%$includedir%g
+s%@oldincludedir@%$oldincludedir%g
+s%@infodir@%$infodir%g
+s%@mandir@%$mandir%g
+s%@YACC@%$YACC%g
+s%@CC@%$CC%g
+s%@CPP@%$CPP%g
+s%@INSTALL_PROGRAM@%$INSTALL_PROGRAM%g
+s%@INSTALL_DATA@%$INSTALL_DATA%g
+s%@SET_MAKE@%$SET_MAKE%g
+s%@ALLOCA@%$ALLOCA%g
+
+CEOF
+EOF
+cat >> $CONFIG_STATUS <<EOF
+
+CONFIG_FILES=\${CONFIG_FILES-"Makefile doc/Makefile test/Makefile awklib/Makefile"}
+EOF
+cat >> $CONFIG_STATUS <<\EOF
+for ac_file in .. $CONFIG_FILES; do if test "x$ac_file" != x..; then
+ # Support "outfile[:infile]", defaulting infile="outfile.in".
+ case "$ac_file" in
+ *:*) ac_file_in=`echo "$ac_file"|sed 's%.*:%%'`
+ ac_file=`echo "$ac_file"|sed 's%:.*%%'` ;;
+ *) ac_file_in="${ac_file}.in" ;;
+ esac
+
+ # Adjust relative srcdir, etc. for subdirectories.
+
+ # Remove last slash and all that follows it. Not all systems have dirname.
+ ac_dir=`echo $ac_file|sed 's%/[^/][^/]*$%%'`
+ if test "$ac_dir" != "$ac_file" && test "$ac_dir" != .; then
+ # The file is in a subdirectory.
+ test ! -d "$ac_dir" && mkdir "$ac_dir"
+ ac_dir_suffix="/`echo $ac_dir|sed 's%^\./%%'`"
+ # A "../" for each directory in $ac_dir_suffix.
+ ac_dots=`echo $ac_dir_suffix|sed 's%/[^/]*%../%g'`
+ else
+ ac_dir_suffix= ac_dots=
+ fi
+
+ case "$ac_given_srcdir" in
+ .) srcdir=.
+ if test -z "$ac_dots"; then top_srcdir=.
+ else top_srcdir=`echo $ac_dots|sed 's%/$%%'`; fi ;;
+ /*) srcdir="$ac_given_srcdir$ac_dir_suffix"; top_srcdir="$ac_given_srcdir" ;;
+ *) # Relative path.
+ srcdir="$ac_dots$ac_given_srcdir$ac_dir_suffix"
+ top_srcdir="$ac_dots$ac_given_srcdir" ;;
+ esac
+
+ case "$ac_given_INSTALL" in
+ [/$]*) INSTALL="$ac_given_INSTALL" ;;
+ *) INSTALL="$ac_dots$ac_given_INSTALL" ;;
+ esac
+ echo creating "$ac_file"
+ rm -f "$ac_file"
+ configure_input="Generated automatically from `echo $ac_file_in|sed 's%.*/%%'` by configure."
+ case "$ac_file" in
+ *Makefile*) ac_comsub="1i\\
+# $configure_input" ;;
+ *) ac_comsub= ;;
+ esac
+ sed -e "$ac_comsub
+s%@configure_input@%$configure_input%g
+s%@srcdir@%$srcdir%g
+s%@top_srcdir@%$top_srcdir%g
+s%@INSTALL@%$INSTALL%g
+" -f conftest.subs $ac_given_srcdir/$ac_file_in > $ac_file
+fi; done
+rm -f conftest.subs
+
+# These sed commands are passed to sed as "A NAME B NAME C VALUE D", where
+# NAME is the cpp macro being defined and VALUE is the value it is being given.
+#
+# ac_d sets the value in "#define NAME VALUE" lines.
+ac_dA='s%^\([ ]*\)#\([ ]*define[ ][ ]*\)'
+ac_dB='\([ ][ ]*\)[^ ]*%\1#\2'
+ac_dC='\3'
+ac_dD='%g'
+# ac_u turns "#undef NAME" with trailing blanks into "#define NAME VALUE".
+ac_uA='s%^\([ ]*\)#\([ ]*\)undef\([ ][ ]*\)'
+ac_uB='\([ ]\)%\1#\2define\3'
+ac_uC=' '
+ac_uD='\4%g'
+# ac_e turns "#undef NAME" without trailing blanks into "#define NAME VALUE".
+ac_eA='s%^\([ ]*\)#\([ ]*\)undef\([ ][ ]*\)'
+ac_eB='$%\1#\2define\3'
+ac_eC=' '
+ac_eD='%g'
+
+CONFIG_HEADERS=${CONFIG_HEADERS-"config.h:configh.in"}
+for ac_file in .. $CONFIG_HEADERS; do if test "x$ac_file" != x..; then
+ # Support "outfile[:infile]", defaulting infile="outfile.in".
+ case "$ac_file" in
+ *:*) ac_file_in=`echo "$ac_file"|sed 's%.*:%%'`
+ ac_file=`echo "$ac_file"|sed 's%:.*%%'` ;;
+ *) ac_file_in="${ac_file}.in" ;;
+ esac
+
+ echo creating $ac_file
+
+ rm -f conftest.frag conftest.in conftest.out
+ cp $ac_given_srcdir/$ac_file_in conftest.in
+
+EOF
+
+# Transform confdefs.h into a sed script conftest.vals that substitutes
+# the proper values into config.h.in to produce config.h. And first:
+# Protect against being on the right side of a sed subst in config.status.
+# Protect against being in an unquoted here document in config.status.
+rm -f conftest.vals
+cat > conftest.hdr <<\EOF
+s/[\\&%]/\\&/g
+s%[\\$`]%\\&%g
+s%#define \([A-Za-z_][A-Za-z0-9_]*\) \(.*\)%${ac_dA}\1${ac_dB}\1${ac_dC}\2${ac_dD}%gp
+s%ac_d%ac_u%gp
+s%ac_u%ac_e%gp
+EOF
+sed -n -f conftest.hdr confdefs.h > conftest.vals
+rm -f conftest.hdr
+
+# This sed command replaces #undef with comments. This is necessary, for
+# example, in the case of _POSIX_SOURCE, which is predefined and required
+# on some systems where configure will not decide to define it.
+cat >> conftest.vals <<\EOF
+s%^[ ]*#[ ]*undef[ ][ ]*[a-zA-Z_][a-zA-Z_0-9]*%/* & */%
+EOF
+
+# Break up conftest.vals because some shells have a limit on
+# the size of here documents, and old seds have small limits too.
+# Maximum number of lines to put in a single here document.
+ac_max_here_lines=12
+
+rm -f conftest.tail
+while :
+do
+ ac_lines=`grep -c . conftest.vals`
+ # grep -c gives empty output for an empty file on some AIX systems.
+ if test -z "$ac_lines" || test "$ac_lines" -eq 0; then break; fi
+ # Write a limited-size here document to conftest.frag.
+ echo ' cat > conftest.frag <<CEOF' >> $CONFIG_STATUS
+ sed ${ac_max_here_lines}q conftest.vals >> $CONFIG_STATUS
+ echo 'CEOF
+ sed -f conftest.frag conftest.in > conftest.out
+ rm -f conftest.in
+ mv conftest.out conftest.in
+' >> $CONFIG_STATUS
+ sed 1,${ac_max_here_lines}d conftest.vals > conftest.tail
+ rm -f conftest.vals
+ mv conftest.tail conftest.vals
+done
+rm -f conftest.vals
+
+cat >> $CONFIG_STATUS <<\EOF
+ rm -f conftest.frag conftest.h
+ echo "/* $ac_file. Generated automatically by configure. */" > conftest.h
+ cat conftest.in >> conftest.h
+ rm -f conftest.in
+ if cmp -s $ac_file conftest.h 2>/dev/null; then
+ echo "$ac_file is unchanged"
+ rm -f conftest.h
+ else
+ rm -f $ac_file
+ mv conftest.h $ac_file
+ fi
+fi; done
+
+
+date > stamp-h
+exit 0
+EOF
+chmod +x $CONFIG_STATUS
+rm -fr confdefs* $ac_clean_files
+test "$no_create" = yes || ${CONFIG_SHELL-/bin/sh} $CONFIG_STATUS || exit 1
+
diff --git a/configure.in b/configure.in
new file mode 100644
index 00000000..735bc575
--- /dev/null
+++ b/configure.in
@@ -0,0 +1,104 @@
+dnl
+dnl configure.in --- autoconf input file for gawk
+dnl
+dnl Copyright (C) 1995 the Free Software Foundation, Inc.
+dnl
+dnl This file is part of GAWK, the GNU implementation of the
+dnl AWK Programming Language.
+dnl
+dnl GAWK is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU General Public License as published by
+dnl the Free Software Foundation; either version 2 of the License, or
+dnl (at your option) any later version.
+dnl
+dnl GAWK is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+dnl GNU General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU General Public License
+dnl along with this program; if not, write to the Free Software
+dnl Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+dnl
+
+dnl Process this file with autoconf to produce a configure script.
+
+dnl this makes sure that the local install-sh gets found
+dnl define(AC_CONFIG_AUX_DIR,)
+
+dnl This is the configure.in script proper
+AC_INIT(awk.h)
+AC_PREREQ(2.7)
+AC_CONFIG_HEADER(config.h:configh.in)
+
+dnl checks for programs
+AC_PROG_YACC
+AC_PROG_CC
+AC_PROG_CPP
+AC_PROG_INSTALL
+AC_PROG_MAKE_SET
+
+if test "$CFLAGS" = ""
+then
+dnl no user provided CFLAGS, feel free to do this our way
+ if test "$GCC" = yes
+ then
+ CFLAGS="-g -O"
+ else
+dnl go for speed, not debugging. :-)
+ CFLAGS="-O"
+ fi
+fi
+AC_SUBST(CFLAGS)
+
+dnl checks for systems
+AC_AIX
+AC_ISC_POSIX
+AC_MINIX
+
+dnl checks for header files
+AC_HEADER_STDC
+AC_HEADER_SYS_WAIT
+AC_CHECK_HEADERS(limits.h stdarg.h unistd.h signum.h sys/param.h string.h)
+if test "$ac_cv_header_string_h" = yes
+then
+ AC_CHECK_HEADERS(memory.h)
+else
+ AC_CHECK_HEADERS(strings.h)
+fi
+
+dnl checks for typedefs
+AC_TYPE_PID_T
+AC_TYPE_SIGNAL
+AC_SIZE_T
+AC_TYPE_GETGROUPS
+AC_EGREP_HEADER([int.*sprintf], stdio.h,
+ AC_DEFINE(SPRINTF_RET, int),
+ AC_DEFINE(SPRINTF_RET, char *))
+
+dnl checks for functions
+if test "$YACC" = "bison -y" ||
+ { test -f $srcdir/awktab.c && grep 'alloca *(' $srcdir/awktab.c > /dev/null; }
+then
+ AC_FUNC_ALLOCA
+fi
+AC_DEFINE(REGEX_MALLOC)
+AC_FUNC_VPRINTF
+
+AC_CHECK_FUNCS(memset memcpy memcmp fmod random strchr strerror strftime strncasecmp strtod system tzset)
+
+dnl check for how to use getpgrp
+AC_FUNC_GETPGRP
+
+dnl checks for structure members
+AC_STRUCT_ST_BLKSIZE
+AC_HEADER_TIME
+AC_STRUCT_TM
+AC_STRUCT_TIMEZONE
+
+dnl checks for compiler characteristics
+AC_C_CHAR_UNSIGNED
+AC_C_CONST
+GAWK_AC_C_STRINGIZE
+
+AC_OUTPUT(Makefile doc/Makefile test/Makefile awklib/Makefile, [date > stamp-h])
diff --git a/custom.h b/custom.h
new file mode 100644
index 00000000..051ab206
--- /dev/null
+++ b/custom.h
@@ -0,0 +1,43 @@
+/*
+ * custom.h
+ *
+ * This file is for use on systems where Autoconf isn't quite able to
+ * get things right. It is included after config.h in awk.h, to override
+ * definitions from Autoconf that are erroneous. See the manual for more
+ * information.
+ *
+ * If you make additions to this file for your system, please send me
+ * the information, to arnold@gnu.ai.mit.edu.
+ */
+
+/*
+ * Copyright (C) 1995 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+/* for MIPS RiscOS, from Nelson H. F. Beebe, beebe@math.utah.edu */
+#if defined(__host_mips) && defined(SYSTYPE_BSD43)
+#undef HAVE_STRTOD
+#undef HAVE_STRERROR
+#endif
+
+#ifdef VMS_POSIX
+#undef VMS
+#include "vms/redirect.h"
+#endif
diff --git a/dfa.c b/dfa.c
index 11a3fed0..958d9aa8 100644
--- a/dfa.c
+++ b/dfa.c
@@ -13,7 +13,7 @@
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */
/* Written June, 1988 by Mike Haertel
Modified July, 1988 by Arthur David Olson to assist BMG speedups */
@@ -23,7 +23,7 @@
#include <stdio.h>
#ifdef HAVE_CONFIG_H
-#include "config.h"
+#include <config.h>
#endif
#ifdef STDC_HEADERS
@@ -371,6 +371,12 @@ FUNC(is_print, ISPRINT)
FUNC(is_graph, ISGRAPH)
FUNC(is_cntrl, ISCNTRL)
+static int is_blank(c)
+int c;
+{
+ return (c == ' ' || c == '\t');
+}
+
/* The following list maps the names of the Posix named character classes
to predicate functions that determine whether a given character is in
the class. The leading [ has already been eaten by the lexical analyzer. */
@@ -389,6 +395,7 @@ static struct {
{ ":print:]", is_print },
{ ":graph:]", is_graph },
{ ":cntrl:]", is_cntrl },
+ { ":blank:]", is_blank },
{ 0 }
};
@@ -473,31 +480,33 @@ lex()
}
goto normal_char;
+ case '`':
+ if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+ return lasttok = BEGLINE; /* XXX should be beginning of string */
+ goto normal_char;
+
+ case '\'':
+ if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+ return lasttok = ENDLINE; /* XXX should be end of string */
+ goto normal_char;
+
case '<':
- if (syntax_bits & RE_NO_GNU_OPS)
- goto normal_char;
- if (backslash)
+ if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
return lasttok = BEGWORD;
goto normal_char;
case '>':
- if (syntax_bits & RE_NO_GNU_OPS)
- goto normal_char;
- if (backslash)
+ if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
return lasttok = ENDWORD;
goto normal_char;
case 'b':
- if (syntax_bits & RE_NO_GNU_OPS)
- goto normal_char;
- if (backslash)
+ if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
return lasttok = LIMWORD;
goto normal_char;
case 'B':
- if (syntax_bits & RE_NO_GNU_OPS)
- goto normal_char;
- if (backslash)
+ if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
return lasttok = NOTLIMWORD;
goto normal_char;
@@ -624,6 +633,7 @@ lex()
for (c2 = 0; c2 < NOTCHAR; ++c2)
if (ISALNUM(c2))
setbit(c2, ccl);
+ setbit('_', ccl);
if (c == 'W')
notset(ccl);
laststart = 0;
@@ -653,8 +663,13 @@ lex()
for (c1 = 0; prednames[c1].name; ++c1)
if (looking_at(prednames[c1].name))
{
+ int (*pred)() = prednames[c1].pred;
+ if (case_fold
+ && (pred == is_upper || pred == is_lower))
+ pred = is_alpha;
+
for (c2 = 0; c2 < NOTCHAR; ++c2)
- if ((*prednames[c1].pred)(c2))
+ if ((*pred)(c2))
setbit(c2, ccl);
lexptr += strlen(prednames[c1].name);
lexleft -= strlen(prednames[c1].name);
diff --git a/dfa.h b/dfa.h
index cc27d7a6..dda51816 100644
--- a/dfa.h
+++ b/dfa.h
@@ -13,7 +13,7 @@
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */
/* Written June, 1988 by Mike Haertel */
@@ -23,13 +23,17 @@
name space. */
/* Number of bits in an unsigned char. */
+#ifndef CHARBITS
#define CHARBITS 8
+#endif
/* First integer value that is greater than any character code. */
#define NOTCHAR (1 << CHARBITS)
/* INTBITS need not be exact, just a lower bound. */
+#ifndef INTBITS
#define INTBITS (CHARBITS * sizeof (int))
+#endif
/* Number of ints required to hold a bit for every character. */
#define CHARCLASS_INTS ((NOTCHAR + INTBITS - 1) / INTBITS)
diff --git a/doc/Makefile.in b/doc/Makefile.in
new file mode 100644
index 00000000..93278178
--- /dev/null
+++ b/doc/Makefile.in
@@ -0,0 +1,99 @@
+# Makefile for GNU Awk documentation.
+#
+# Copyright (C) 1993-1995 the Free Software Foundation, Inc.
+#
+# This file is part of GAWK, the GNU implementation of the
+# AWK Programming Language.
+#
+# GAWK is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GAWK is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+
+SHELL = /bin/sh
+
+srcdir = @srcdir@
+VPATH = @srcdir@
+
+INSTALL = @INSTALL@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_DATA = @INSTALL_DATA@
+
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+binprefix =
+manprefix =
+
+bindir = @bindir@
+libdir = @libdir@
+mandir = @mandir@/man1
+manext = .1
+infodir = @infodir@
+datadir = @datadir@/awk
+
+TEXI2DVI = texi2dvi
+MAKEINFO = makeinfo --no-split
+
+DOCS= gawk.1 igawk.1 gawk.texi
+
+TEXFILES= gawk.aux gawk.cp gawk.cps gawk.fn gawk.fns gawk.ky gawk.kys \
+ gawk.pg gawk.pgs gawk.toc gawk.tp gawk.tps gawk.vr gawk.vrs
+
+ALLDOC= gawk.dvi $(TEXFILES)
+
+all: $(DOCS) info
+
+install: $(mandir)/gawk$(manext) $(mandir)/igawk$(manext) $(infodir)/gawk.info
+
+$(infodir)/gawk.info: gawk.info
+ -if test -f gawk.info; then d=.; \
+ else d=$(srcdir); fi; \
+ for i in $$d/gawk.info*; do \
+ $(INSTALL_DATA) $$i $(infodir) ; \
+ done; \
+ chmod 644 $(infodir)/gawk.info* ; \
+ if $(SHELL) -c 'install-info --version' > /dev/null 2>&1 ; \
+ then install-info --infodir=$(infodir) gawk.info ; \
+ else true ; fi
+
+$(mandir)/gawk$(manext): gawk.1
+ $(INSTALL_DATA) gawk.1 $(mandir)/gawk$(manext) && chmod 644 $(mandir)/gawk$(manext)
+
+$(mandir)/igawk$(manext): igawk.1
+ $(INSTALL_DATA) igawk.1 $(mandir)/igawk$(manext) && chmod 644 $(mandir)/igawk$(manext)
+
+uninstall:
+ rm -f $(mandir)/gawk$(manext) $(mandir)/igawk$(manext) $(infodir)/gawk.info*
+
+dvi: gawk.dvi
+
+gawk.dvi: gawk.texi
+ TEXINPUTS=$$TEXINPUTS:$(srcdir) $(TEXI2DVI) gawk.texi
+
+info: gawk.info
+
+gawk.info: gawk.texi
+ $(MAKEINFO) gawk.texi
+
+postscript: dvi gawk.1 igawk.1
+ -groff -man gawk.1 > gawk.1.ps
+ -groff -man igawk.1 > igawk.1.ps
+ dvips -o gawk.ps gawk.dvi
+
+clean:
+ rm -f *.ps $(ALLDOC) gawk.log
+
+maintainer-clean: clean
+ rm -f gawk.info
+
+distclean: maintainer-clean
+ rm -f Makefile
diff --git a/gawk.1 b/doc/gawk.1
index 0a6e9241..89150bab 100644
--- a/gawk.1
+++ b/doc/gawk.1
@@ -1,7 +1,7 @@
.ds PX \s-1POSIX\s+1
.ds UX \s-1UNIX\s+1
.ds AN \s-1ANSI\s+1
-.TH GAWK 1 "Nov 24 1994" "Free Software Foundation" "Utility Commands"
+.TH GAWK 1 "Dec 28 1995" "Free Software Foundation" "Utility Commands"
.SH NAME
gawk \- pattern scanning and processing language
.SH SYNOPSIS
@@ -28,11 +28,13 @@ the \*(PX 1003.2 Command Language And Utilities Standard.
This version in turn is based on the description in
.IR "The AWK Programming Language" ,
by Aho, Kernighan, and Weinberger,
-with the additional features defined in the System V Release 4 version
+with the additional features found in the System V Release 4 version
of \*(UX
.IR awk .
.I Gawk
-also provides some GNU-specific extensions.
+also provides more recent Bell Labs
+.I awk
+extensions, and some GNU-specific extensions.
.PP
The command line consists of options to
.I gawk
@@ -46,15 +48,14 @@ available in the
and
.B ARGV
pre-defined AWK variables.
-.SH OPTIONS
+.SH OPTION FORMAT
.PP
.I Gawk
options may be either the traditional \*(PX one letter options,
-or the GNU style long options. \*(PX style options start with a single ``\-'',
-while GNU long options start with ``\-\^\-''.
-GNU style long options are provided for both GNU-specific features and
-for \*(PX mandated features. Other implementations of the AWK language
-are likely to only accept the traditional one letter options.
+or the GNU style long options. \*(PX options start with a single ``\-'',
+while long options start with ``\-\^\-''.
+Long options are provided for both GNU-specific features and
+for \*(PX mandated features.
.PP
Following the \*(PX standard,
.IR gawk -specific
@@ -70,12 +71,15 @@ Case is ignored in arguments to the
option.
Each
.B \-W
-option has a corresponding GNU style long option, as detailed below.
-Arguments to GNU style long options are either joined with the option
+option has a corresponding long option, as detailed below.
+Arguments to long options are either joined with the option
by an
.B =
sign, with no intervening spaces, or they may be provided in the
next command line argument.
+Long options may be abbreviated, as long as the abbreviation
+remains unique.
+.SH OPTIONS
.PP
.I Gawk
accepts the following options.
@@ -84,7 +88,7 @@ accepts the following options.
.BI \-F " fs"
.TP
.PD
-.BI \-\^\-field-separator= fs
+.BI \-\^\-field-separator " fs"
Use
.I fs
for the input field separator (the value of the
@@ -96,7 +100,7 @@ variable).
\fB\-v\fI var\fB\^=\^\fIval\fR
.TP
.PD
-\fB\-\^\-assign=\fIvar\fB\^=\^\fIval\fR
+\fB\-\^\-assign \fIvar\fB\^=\^\fIval\fR
Assign the value
.IR val ,
to the variable
@@ -110,7 +114,7 @@ block of an AWK program.
.BI \-f " program-file"
.TP
.PD
-.BI \-\^\-file= program-file
+.BI \-\^\-file " program-file"
Read the AWK program source from the file
.IR program-file ,
instead of from the first command line argument.
@@ -123,6 +127,7 @@ options may be used.
.PD 0
.BI \-mf= NNN
.TP
+.PD
.BI \-mr= NNN
Set various memory limits to the value
.IR NNN .
@@ -132,17 +137,23 @@ flag sets the maximum number of fields, and the
.B r
flag sets the maximum record size. These two flags and the
.B \-m
-option are from the AT&T Bell Labs research version of \*(UX
+option are from the Bell Labs research version of \*(UX
.IR awk .
They are ignored by
.IR gawk ,
since
.I gawk
has no pre-defined limits.
-.TP \w'\fB\-\^\-copyright\fR'u+1n
+.TP
+.PD 0
+.B "\-W traditional"
+.TP
.PD 0
.B "\-W compat"
.TP
+.PD 0
+.B \-\^\-traditional
+.TP
.PD
.B \-\^\-compat
Run in
@@ -152,6 +163,9 @@ mode. In compatibility mode,
behaves identically to \*(UX
.IR awk ;
none of the GNU-specific extensions are recognized.
+The use of
+.B \-\^\-traditional
+is preferred over the other forms of this option.
See
.BR "GNU EXTENSIONS" ,
below, for more information.
@@ -183,16 +197,26 @@ the error output.
.B \-\^\-usage
Print a relatively short summary of the available options on
the error output.
-Per the GNU Coding Standards, these options cause an immediate,
-successful exit.
+(Per the
+.IR "GNU Coding Standards" ,
+these options cause an immediate, successful exit.)
.TP
.PD 0
.B "\-W lint"
.TP
-.PD 0
+.PD
.B \-\^\-lint
Provide warnings about constructs that are
dubious or non-portable to other AWK implementations.
+.TP
+.PD 0
+.B "\-W lint\-old"
+.TP
+.PD
+.B \-\^\-lint\-old
+Provide warnings about constructs that are
+not portable to the original version of Unix
+.IR awk .
.ig
.\" This option is left undocumented, on purpose.
.TP
@@ -236,13 +260,42 @@ cannot be used in place of
.B ^
and
.BR ^= .
+.TP
+\(bu
+The
+.B fflush()
+function is not available.
.RE
.TP
.PD 0
-.BI "\-W source=" program-text
+.B "\-W re\-interval"
.TP
.PD
-.BI \-\^\-source= program-text
+.B \-\^\-re\-interval
+Enable the use of
+.I "interval expressions"
+in regular expression matching
+(see
+.BR "Regular Expressions" ,
+below).
+Interval expressions were not traditionally available in the
+AWK language. The POSIX standard added them, to make
+.I awk
+and
+.I egrep
+consistent with each other.
+However, their use is likely
+to break old AWK programs, so
+.I gawk
+only provides them if they are requested with this option, or when
+.B \-\^\-posix
+is specified.
+.TP
+.PD 0
+.BI "\-W source " program-text
+.TP
+.PD
+.BI \-\^\-source " program-text"
Use
.I program-text
as AWK program source code.
@@ -251,7 +304,7 @@ This option allows the easy intermixing of library functions (used via the
and
.B \-\^\-file
options) with source code entered on the command line.
-It is intended primarily for medium to large size AWK programs used
+It is intended primarily for medium to large AWK programs used
in shell scripts.
.sp .5
The
@@ -275,8 +328,10 @@ This is useful mainly for knowing if the current copy of
on your system
is up to date with respect to whatever the Free Software Foundation
is distributing.
-Per the GNU Coding Standards, these options cause an immediate,
-successful exit.
+This is also useful when reporting bugs.
+(Per the
+.IR "GNU Coding Standards" ,
+these options cause an immediate, successful exit.)
.TP
.B \-\^\-
Signal the end of options. This is useful to allow further arguments to the
@@ -307,12 +362,12 @@ first reads the program source from the
.IR program-file (s)
if specified,
from arguments to
-.BR "\-W source=" ,
+.BR \-\^\-source ,
or from the first non-option argument on the command line.
The
.B \-f
and
-.B "\-W source="
+.B \-\^\-source
options may be used multiple times on the command line.
.I Gawk
will read the program text as if all the
@@ -329,7 +384,10 @@ specifies a search path to use when finding source files named with
the
.B \-f
option. If this variable does not exist, the default path is
-\fB".:/usr/lib/awk:/usr/local/lib/awk"\fR.
+\fB".:/usr/local/share/awk"\fR.
+(The actual directory may vary, depending upon how
+.I gawk
+was built and installed.)
If a file name given to the
.B \-f
option contains a ``/'' character, no path search is performed.
@@ -377,12 +435,12 @@ is empty (\fB""\fR),
.I gawk
skips over it.
.PP
-For each line in the input,
+For each record in the input,
.I gawk
tests to see if it matches any
.I pattern
in the AWK program.
-For each pattern that the line matches, the associated
+For each pattern that the record matches, the associated
.I action
is executed.
The patterns are tested in the order they occur in the program.
@@ -392,7 +450,7 @@ Finally, after all the input is exhausted,
executes the code in the
.B END
block(s) (if any).
-.SH VARIABLES AND FIELDS
+.SH VARIABLES, RECORDS AND FIELDS
AWK variables are dynamic; they come into existence when they are
first used. Their values are either floating-point numbers or strings,
or both,
@@ -400,11 +458,35 @@ depending upon how they are used. AWK also has one dimensional
arrays; arrays with multiple dimensions may be simulated.
Several pre-defined variables are set as a program
runs; these will be described as needed and summarized below.
+.SS Records
+Normally, records are separated by newline characters. You can control how
+records are separated by assigning values to the built-in variable
+.BR RS .
+If
+.B RS
+is any single character, that character separates records.
+Otherwise,
+.B RS
+is a regular expression. Text in the input that matches this
+regular expression will separate the record.
+However, in compatibility mode,
+only the first character of its string
+value is used for separating records.
+If
+.B RS
+is set to the null string, then records are separated by
+blank lines.
+When
+.B RS
+is set to the null string, the newline character always acts as
+a field separator, in addition to whatever value
+.B FS
+may have.
.SS Fields
.PP
-As each input line is read,
+As each input record is read,
.I gawk
-splits the line into
+splits the record into
.IR fields ,
using the value of the
.B FS
@@ -412,17 +494,23 @@ variable as the field separator.
If
.B FS
is a single character, fields are separated by that character.
+If
+.B FS
+is the null string, then each individual character becomes a
+separate field.
Otherwise,
.B FS
is expected to be a full regular expression.
In the special case that
.B FS
-is a single blank, fields are separated
-by runs of blanks and/or tabs.
+is a single space, fields are separated
+by runs of spaces and/or tabs.
Note that the value of
.B IGNORECASE
(see below) will also affect how fields are split when
.B FS
+is a regular expression, and how records are separated when
+.B RS
is a regular expression.
.PP
If the
@@ -439,12 +527,12 @@ overrides the use of
.BR FIELDWIDTHS ,
and restores the default behavior.
.PP
-Each field in the input line may be referenced by its position,
+Each field in the input record may be referenced by its position,
.BR $1 ,
.BR $2 ,
and so on.
.B $0
-is the whole line. The value of a field may be assigned to as well.
+is the whole record. The value of a field may be assigned to as well.
Fields need not be referenced by constants:
.RS
.PP
@@ -455,10 +543,10 @@ print $n
.ft R
.RE
.PP
-prints the fifth field in the input line.
+prints the fifth field in the input record.
The variable
.B NF
-is set to the total number of fields in the input line.
+is set to the total number of fields in the input record.
.PP
References to non-existent fields (i.e. fields after
.BR $NF )
@@ -475,7 +563,8 @@ to be recomputed, with the fields being separated by the value of
References to negative numbered fields cause a fatal error.
.SS Built-in Variables
.PP
-AWK's built-in variables are:
+.IR Gawk 's
+built-in variables are:
.PP
.TP \w'\fBFIELDWIDTHS\fR'u+1n
.B ARGC
@@ -504,7 +593,7 @@ The conversion format for numbers, \fB"%.6g"\fR, by default.
An array containing the values of the current environment.
The array is indexed by the environment variables, each element being
the value of that variable (e.g., \fBENVIRON["HOME"]\fP might be
-.BR /u/arnold ).
+.BR /home/arnold ).
Changing this array does not affect the environment seen by programs which
.I gawk
spawns via redirection or the
@@ -533,8 +622,8 @@ parses the input into fields of fixed width, instead of using the
value of the
.B FS
variable as the field separator.
-The fixed field width facility is still experimental; expect the
-semantics to change as
+The fixed field width facility is still experimental; the
+semantics may change as
.I gawk
evolves over time.
.TP
@@ -553,20 +642,27 @@ block.
The input record number in the current input file.
.TP
.B FS
-The input field separator, a blank by default.
+The input field separator, a space by default. See
+.BR Fields ,
+above.
.TP
.B IGNORECASE
-Controls the case-sensitivity of all regular expression operations. If
+Controls the case-sensitivity of all regular expression
+and string operations. If
.B IGNORECASE
-has a non-zero value, then pattern matching in rules,
+has a non-zero value, then string comparisons and
+pattern matching in rules,
field splitting with
.BR FS ,
+record separating with
+.BR RS ,
regular expression
matching with
.B ~
and
.BR !~ ,
and the
+.BR gensub() ,
.BR gsub() ,
.BR index() ,
.BR match() ,
@@ -582,7 +678,17 @@ matches all of the strings \fB"ab"\fP, \fB"aB"\fP, \fB"Ab"\fP,
and \fB"AB"\fP.
As with all AWK variables, the initial value of
.B IGNORECASE
-is zero, so all regular expression operations are normally case-sensitive.
+is zero, so all regular expression and string
+operations are normally case-sensitive.
+Under Unix, the full ISO 8859-1 Latin-1 character set is used
+when ignoring case.
+.B NOTE:
+In versions of
+.I gawk
+prior to 3.0,
+.B IGNORECASE
+only affected regular expression operations. It now affects string
+comparisons as well.
.TP
.B NF
The number of fields in the current input record.
@@ -594,28 +700,22 @@ The total number of input records seen so far.
The output format for numbers, \fB"%.6g"\fR, by default.
.TP
.B OFS
-The output field separator, a blank by default.
+The output field separator, a space by default.
.TP
.B ORS
The output record separator, by default a newline.
.TP
.B RS
The input record separator, by default a newline.
-.B RS
-is exceptional in that only the first character of its string
-value is used for separating records.
-(This will probably change in a future release of
-.IR gawk .)
-If
-.B RS
-is set to the null string, then records are separated by
-blank lines.
-When
-.B RS
-is set to the null string, then the newline character always acts as
-a field separator, in addition to whatever value
-.B FS
-may have.
+.TP
+.B RT
+The record terminator.
+.I Gawk
+sets
+.B RT
+to the input text that matched the character or regular expression
+specified by
+.BR RS .
.TP
.B RSTART
The index of the first character matched by
@@ -646,7 +746,7 @@ arrays. For example:
.PP
.RS
.ft B
-i = "A" ;\^ j = "B" ;\^ k = "C"
+i = "A";\^ j = "B";\^ k = "C"
.br
x[i, j, k] = "hello, world\en"
.ft R
@@ -689,7 +789,8 @@ An element may be deleted from an array using the
statement.
The
.B delete
-statement may also be used to delete the entire contents of an array.
+statement may also be used to delete the entire contents of an array,
+just by specifying the array name without a subscript.
.SS Variable Typing And Conversion
.PP
Variables and fields
@@ -742,6 +843,24 @@ clearly incorrect, and
.I gawk
does not do this.
.PP
+Note that string constants, such as \fB"57"\fP, are
+.I not
+numeric strings, they are string constants. The idea of ``numeric string''
+only applies to fields,
+.B getline
+input,
+.BR FILENAME ,
+.B ARGV
+elements,
+.B ENVIRON
+elements and the elements of an array created by
+.B split()
+that are numeric strings.
+The basic idea is that
+.IR "user input" ,
+and only user input, that looks numeric,
+should be treated that way.
+.PP
Uninitialized variables have the numeric value 0 and the string value ""
(the null, or empty, string).
.SH PATTERNS AND ACTIONS
@@ -752,21 +871,27 @@ and
.BR } .
Either the pattern may be missing, or the action may be missing, but,
of course, not both. If the pattern is missing, the action will be
-executed for every single line of input.
+executed for every single record of input.
A missing action is equivalent to
.RS
.PP
.B "{ print }"
.RE
.PP
-which prints the entire line.
+which prints the entire record.
.PP
Comments begin with the ``#'' character, and continue until the
end of the line.
Blank lines may be used to separate statements.
Normally, a statement ends with a newline, however, this is not the
case for lines ending in
-a ``,'', ``{'', ``?'', ``:'', ``&&'', or ``||''.
+a ``,'',
+.BR { ,
+.BR ? ,
+.BR : ,
+.BR && ,
+or
+.BR || .
Lines ending in
.B do
or
@@ -826,7 +951,7 @@ patterns cannot have missing action parts.
.PP
For
.BI / "regular expression" /
-patterns, the associated statement is executed for each input line that matches
+patterns, the associated statement is executed for each input record that matches
the regular expression.
Regular expressions are the same as those in
.IR egrep (1),
@@ -857,7 +982,7 @@ The
.IB pattern1 ", " pattern2
form of an expression is called a
.IR "range pattern" .
-It matches all input records starting with a line that matches
+It matches all input records starting with a record that matches
.IR pattern1 ,
and continuing until a record that matches
.IR pattern2 ,
@@ -876,20 +1001,22 @@ matches the literal character
.IR c .
.TP
.B .
-matches any character except newline.
+matches any character
+.I including
+newline.
.TP
.B ^
-matches the beginning of a line or a string.
+matches the beginning of a string.
.TP
.B $
-matches the end of a line or a string.
+matches the end of a string.
.TP
.BI [ abc... ]
-character class, matches any of the characters
+character list, matches any of the characters
.IR abc... .
.TP
.BI [^ abc... ]
-negated character class, matches any character except
+negated character list, matches any character except
.I abc...
and newline.
.TP
@@ -920,9 +1047,229 @@ matches zero or one
.BI ( r )
grouping: matches
.IR r .
+.TP
+.PD 0
+.IB r { n }
+.TP
+.PD 0
+.IB r { n ,}
+.TP
+.PD
+.IB r { n , m }
+One or two numbers inside braces denote an
+.IR "interval expression" .
+If there is one number in the braces, the preceding regexp
+.I r
+is repeated
+.I n
+times. If there are two numbers separated by a comma,
+.I r
+is repeated
+.I n
+to
+.I m
+times.
+If there is one number followed by a comma, then
+.I r
+is repeated at least
+.I n
+times.
+.sp .5
+Interval expressions are only available if either
+.B \-\^\-posix
+or
+.B \-\^\-re\-interval
+is specified on the command line.
+.TP
+.B \ey
+matches the empty string at either the beginning or the
+end of a word.
+.TP
+.B \eB
+matches the empty string within a word.
+.TP
+.B \e<
+matches the empty string at the beginning of a word.
+.TP
+.B \e>
+matches the empty string at the end of a word.
+.TP
+.B \ew
+matches any word-constituent character (letter, digit, or underscore).
+.TP
+.B \eW
+matches any character that is not word-constituent.
+.TP
+.B \e`
+matches the empty string at the beginning of a buffer (string).
+.TP
+.B \e'
+matches the empty string at the end of a buffer.
.PP
The escape sequences that are valid in string constants (see below)
are also legal in regular expressions.
+.PP
+.I "Character classes"
+are a new feature introduced in the POSIX standard.
+A character class is a special notation for describing
+lists of characters that have a specific attribute, but where the
+actual characters themselves can vary from country to country and/or
+from character set to character set. For example, the notion of what
+is an alphabetic character differs in the USA and in France.
+.PP
+A character class is only valid in a regexp
+.I inside
+the brackets of a character list. Character classes consist of
+.BR [: ,
+a keyword denoting the class, and
+.BR :] .
+Here are the character
+classes defined by the POSIX standard.
+.TP
+.B [:alnum:]
+Alphanumeric characters.
+.TP
+.B [:alpha:]
+Alphabetic characters.
+.TP
+.B [:blank:]
+Space or tab characters.
+.TP
+.B [:cntrl:]
+Control characters.
+.TP
+.B [:digit:]
+Numeric characters.
+.TP
+.B [:graph:]
+Characters that are both printable and visible.
+(A space is printable, but not visible, while an
+.B a
+is both.)
+.TP
+.B [:lower:]
+Lower-case alphabetic characters.
+.TP
+.B [:print:]
+Printable characters (characters that are not control characters.)
+.TP
+.B [:punct:]
+Punctuation characters (characters that are not letter, digits,
+control characters, or space characters).
+.TP
+.B [:space:]
+Space characters (such as space, tab, and formfeed, to name a few).
+.TP
+.B [:upper:]
+Upper-case alphabetic characters.
+.TP
+.B [:xdigit:]
+Characters that are hexadecimal digits.
+.PP
+For example, before the POSIX standard, to match alphanumeric
+characters, you would have had to write
+.BR /[A\-Za\-z0\-9]/ .
+If your character set had other alphabetic characters in it, this would not
+match them. With the POSIX character classes, you can write
+.BR /[[:alnum:]]/ ,
+and this will match
+.I all
+the alphabetic and numeric characters in your character set.
+.PP
+Two additional special sequences can appear in character lists.
+These apply to non-ASCII character sets, which can have single symbols
+(called
+.IR "collating elements" )
+that are represented with more than one
+character, as well as several characters that are equivalent for
+.IR collating ,
+or sorting, purposes. (E.g., in French, a plain ``e''
+and a grave-accented e\` are equivalent.)
+.TP
+Collating Symbols
+A collating symbols is a multi-character collating element enclosed in
+.B [.
+and
+.BR .] .
+For example, if
+.B ch
+is a collating element, then
+.B [[.ch.]]
+is a regexp that matches this collating element, while
+.B [ch]
+is a regexp that matches either
+.B c
+or
+.BR h .
+.TP
+Equivalence Classes
+An equivalence class is a list of equivalent characters enclosed in
+.B [=
+and
+.BR =] .
+Thus,
+.B [[=ee\`=]]
+is regexp that matches either
+.B e
+or
+.B e\` .
+.PP
+These features are very valuable in non-English speaking locales.
+The library functions that
+.I gawk
+uses for regular expression matching
+currently only recognize POSIX character classes; they do not recognize
+collating symbols or equivalence classes.
+.PP
+The
+.BR \ey ,
+.BR \eB ,
+.BR \e< ,
+.BR \e> ,
+.BR \ew ,
+.BR \eW ,
+.BR \e` ,
+and
+.B \e'
+operators are specific to
+.IR gawk ;
+they are extensions based on facilities in the GNU regexp libraries.
+.PP
+The various command line options
+control how
+.I gawk
+interprets characters in regexps.
+.TP
+No options
+In the default case,
+.I gawk
+provide all the facilities of
+POSIX regexps and the GNU regexp operators described above.
+However, interval expressions are not supported.
+.TP
+.B \-\^\-posix
+Only POSIX regexps are supported, the GNU operators are not special.
+(E.g.,
+.B \ew
+matches a literal
+.BR w ).
+Interval expressions are allowed.
+.TP
+.B \-\^\-traditional
+Traditional Unix
+.I awk
+regexps are matched. The GNU operators
+are not special, interval expressions are not available, and neither
+are the POSIX character classes
+.RB ( [[:alnum:]]
+and so on).
+Characters described by octal and hexadecimal escape sequences are
+treated literally, even if they represent regexp metacharacters.
+.TP
+.B \-\^\-re\-interval
+Allow interval expressions in regexps, even if
+.B \-\^\-traditional
+has been provided.
.SS Actions
Action statements are enclosed in braces,
.B {
@@ -934,38 +1281,43 @@ and input/output statements
available are patterned after those in C.
.SS Operators
.PP
-The operators in AWK, in order of increasing precedence, are
+The operators in AWK, in order of decreasing precedence, are
.PP
.TP "\w'\fB*= /= %= ^=\fR'u+1n"
-.PD 0
-.B "= += \-="
+.BR ( \&... )
+Grouping
.TP
-.PD
-.B "*= /= %= ^="
-Assignment. Both absolute assignment
-.BI ( var " = " value )
-and operator-assignment (the other forms) are supported.
+.B $
+Field reference.
.TP
-.B ?:
-The C conditional expression. This has the form
-.IB expr1 " ? " expr2 " : " expr3\c
-\&. If
-.I expr1
-is true, the value of the expression is
-.IR expr2 ,
-otherwise it is
-.IR expr3 .
-Only one of
-.I expr2
-and
-.I expr3
-is evaluated.
+.B "++ \-\^\-"
+Increment and decrement, both prefix and postfix.
.TP
-.B ||
-Logical OR.
+.B ^
+Exponentiation (\fB**\fR may also be used, and \fB**=\fR for
+the assignment operator).
.TP
-.B &&
-Logical AND.
+.B "+ \- !"
+Unary plus, unary minus, and logical negation.
+.TP
+.B "* / %"
+Multiplication, division, and modulus.
+.TP
+.B "+ \-"
+Addition and subtraction.
+.TP
+.I space
+String concatenation.
+.TP
+.PD 0
+.B "< >"
+.TP
+.PD 0
+.B "<= >="
+.TP
+.PD
+.B "!= =="
+The regular relational operators.
.TP
.B "~ !~"
Regular expression match, negated match.
@@ -983,37 +1335,38 @@ This is usually
.I not
what was intended.
.TP
-.PD 0
-.B "< >"
-.TP
-.PD 0
-.B "<= >="
-.TP
-.PD
-.B "!= =="
-The regular relational operators.
-.TP
-.I blank
-String concatenation.
-.TP
-.B "+ \-"
-Addition and subtraction.
+.B in
+Array membership.
.TP
-.B "* / %"
-Multiplication, division, and modulus.
+.B &&
+Logical AND.
.TP
-.B "+ \- !"
-Unary plus, unary minus, and logical negation.
+.B ||
+Logical OR.
.TP
-.B ^
-Exponentiation (\fB**\fR may also be used, and \fB**=\fR for
-the assignment operator).
+.B ?:
+The C conditional expression. This has the form
+.IB expr1 " ? " expr2 " : " expr3\c
+\&. If
+.I expr1
+is true, the value of the expression is
+.IR expr2 ,
+otherwise it is
+.IR expr3 .
+Only one of
+.I expr2
+and
+.I expr3
+is evaluated.
.TP
-.B "++ \-\^\-"
-Increment and decrement, both prefix and postfix.
+.PD 0
+.B "= += \-="
.TP
-.B $
-Field reference.
+.PD
+.B "*= /= %= ^="
+Assignment. Both absolute assignment
+.BI ( var " = " value )
+and operator-assignment (the other forms) are supported.
.SS Control Statements
.PP
The control statements are
@@ -1039,7 +1392,7 @@ as follows:
The input/output statements are as follows:
.PP
.TP "\w'\fBprintf \fIfmt, expr-list\fR'u+1n"
-.BI close( filename )
+.BI close( file )
Close file (or pipe, see below).
.TP
.B getline
@@ -1078,25 +1431,36 @@ AWK program. If the end of the input data is reached, the
.B END
block(s), if any, are executed.
.TP
-.B "next file"
+.B "nextfile"
Stop processing the current input file. The next input record read
comes from the next input file.
.B FILENAME
-is updated,
+and
+.B ARGIND
+are updated,
.B FNR
is reset to 1, and processing starts over with the first pattern in the
AWK program. If the end of the input data is reached, the
.B END
block(s), if any, are executed.
+.B NOTE:
+Earlier versions of gawk used
+.BR "next file" ,
+as two words. While this usage is still recognized, it generates a
+warning message and will eventually be removed.
.TP
.B print
Prints the current record.
+The output record is terminated with the value of the
+.B ORS
+variable.
.TP
.BI print " expr-list"
Prints expressions.
Each expression is separated by the value of the
.B OFS
-variable. The output record is terminated with the value of the
+variable.
+The output record is terminated with the value of the
.B ORS
variable.
.TP
@@ -1121,6 +1485,18 @@ Execute the command
.IR cmd-line ,
and return the exit status.
(This may not be available on non-\*(PX systems.)
+.TP
+\&\fBfflush(\fR[\fIfile\^\fR]\fB)\fR
+Flush any buffers associated with the open output file or pipe
+.IR file .
+If
+.I file
+is missing, then standard output is flushed.
+If
+.I file
+is the null string,
+then all open output files and pipes
+have their buffers flushed.
.PP
Other input/output redirections are also allowed. For
.B print
@@ -1157,27 +1533,47 @@ is numeric, it is treated as a character and printed.
Otherwise, the argument is assumed to be a string, and the only first
character of that string is printed.
.TP
+.PD 0
.B %d
-A decimal number (the integer part).
.TP
+.PD
.B %i
-Just like
-.BR %d .
+A decimal number (the integer part).
.TP
+.PD 0
.B %e
+.TP
+.PD
+.B %E
A floating point number of the form
-.BR [\-]d.ddddddE[+\^\-]dd .
+.BR [\-]d.dddddde[+\^\-]dd .
+The
+.B %E
+format uses
+.B E
+instead of
+.BR e .
.TP
.B %f
A floating point number of the form
.BR [\-]ddd.dddddd .
.TP
+.PD 0
.B %g
+.TP
+.PD
+.B %G
Use
-.B e
+.B %e
or
-.B f
+.B %f
conversion, whichever is shorter, with nonsignificant zeros suppressed.
+The
+.B %G
+format uses
+.B %E
+instead of
+.BR %e .
.TP
.B %o
An unsigned octal number (again, an integer).
@@ -1185,13 +1581,15 @@ An unsigned octal number (again, an integer).
.B %s
A character string.
.TP
+.PD 0
.B %x
-An unsigned hexadecimal number (an integer).
.TP
+.PD
.B %X
-Like
-.BR %x ,
-but using
+An unsigned hexadecimal number (an integer).
+.The
+.B %X
+format uses
.B ABCDEF
instead of
.BR abcdef .
@@ -1208,15 +1606,85 @@ and the control letter:
.B \-
The expression should be left-justified within its field.
.TP
-.I width
-The field should be padded to this width. If the number has a leading
-zero, then the field will be padded with zeros.
-Otherwise it is padded with blanks.
-This applies even to the non-numeric output formats.
+.I space
+For numeric conversions, prefix positive values with a space, and
+negative values with a minus sign.
+.TP
+.B +
+The plus sign, used before the width modifier (see below),
+says to always supply a sign for numeric conversions, even if the data
+to be formatted is positive. The
+.B +
+overrides the space modifier.
.TP
-.BI . prec
-A number indicating the maximum width of strings or digits to the right
-of the decimal point.
+.B #
+Use an ``alternate form'' for certain control letters.
+For
+.BR %o ,
+supply a leading zero.
+For
+.BR %x ,
+and
+.BR %X ,
+supply a leading
+.BR 0x
+or
+.BR 0X
+for
+a nonzero result.
+For
+.BR %e ,
+.BR %E ,
+and
+.BR %f ,
+the result will always contain a
+decimal point.
+For
+.BR %g ,
+and
+.BR %G ,
+trailing zeros are not removed from the result.
+.TP
+.B 0
+A leading
+.B 0
+(zero) acts as a flag, that indicates output should be
+padded with zeroes instead of spaces.
+This applies even to non-numeric output formats.
+This flag only has an effect when the field width is wider than the
+value to be printed.
+.TP
+.I width
+The field should be padded to this width. The field is normally padded
+with spaces. If the
+.B 0
+flag has been used, it is padded with zeroes.
+.TP
+.BI \&. prec
+A number that specifies the precision to use when printing.
+For the
+.BR %e ,
+.BR %E ,
+and
+.BR %f
+formats, this specifies the
+number of digits you want printed to the right of the decimal point.
+For the
+.BR %g ,
+and
+.B %G
+formats, it specifies the maximum number
+of significant digits. For the
+.BR %d ,
+.BR %o ,
+.BR %i ,
+.BR %u ,
+.BR %x ,
+and
+.B %X
+formats, it specifies the minimum number of
+digits to print. For a string, it specifies the maximum number of
+characters from the string that should be printed.
.PP
The dynamic
.I width
@@ -1251,7 +1719,7 @@ recognizes certain special filenames internally. These filenames
allow access to open file descriptors inherited from
.IR gawk 's
parent process (usually the shell).
-Other special filenames provide access information about the running
+Other special filenames provide access to information about the running
.B gawk
process.
The filenames are:
@@ -1270,7 +1738,7 @@ in decimal, terminated with a newline.
.TP
.B /dev/user
Reading this file returns a single record terminated with a newline.
-The fields are separated with blanks.
+The fields are separated with spaces.
.B $1
is the value of the
.IR getuid (2)
@@ -1325,7 +1793,7 @@ These file names may also be used on the command line to name data files.
.PP
AWK has the following pre-defined arithmetic functions:
.PP
-.TP \w'\fBsrand(\^\fIexpr\^\fB)\fR'u+1n
+.TP \w'\fBsrand(\fR[\fIexpr\^\fR]\fB)\fR'u+1n
.BI atan2( y , " x" )
returns the arctangent of
.I y/x
@@ -1352,8 +1820,8 @@ returns the sine in radians.
.BI sqrt( expr )
the square root function.
.TP
-.BI srand( expr )
-use
+\&\fBsrand(\fR[\fIexpr\^\fR]\fB)\fR
+uses
.I expr
as a new seed for the random number generator. If no
.I expr
@@ -1362,10 +1830,58 @@ The return value is the previous seed for the random
number generator.
.SS String Functions
.PP
-AWK has the following pre-defined string functions:
+.I Gawk
+has the following pre-defined string functions:
.PP
.TP "\w'\fBsprintf(\^\fIfmt\fB\^, \fIexpr-list\^\fB)\fR'u+1n"
-\fBgsub(\fIr\fB, \fIs\fB, \fIt\fB)\fR
+\fBgensub(\fIr\fB, \fIs\fB, \fIh \fR[\fB, \fIt\fR]\fB)\fR
+search the target string
+.I t
+for matches of the regular expression
+.IR r .
+If
+.I h
+is a string beginning with
+.B g
+or
+.BR G ,
+then replace all matches of
+.I r
+with
+.IR s .
+Otherwise,
+.I h
+is a number indicating which match of
+.I r
+to replace.
+If no
+.I t
+is supplied,
+.B $0
+is used instead.
+Within the replacement text
+.IR s ,
+the sequence
+.BI \e n\fR,
+where
+.I n
+is a digit from 1 to 9, may be used to indicate just the text that
+matched the
+.IR n 'th
+parenthesized subexpression. The sequence
+.B \e0
+represents the entire matched text, as does the character
+.BR & .
+Unlike
+.B sub()
+and
+.BR gsub() ,
+the modified string is returned as the result of the function,
+and the original target string is
+.I not
+changed.
+.TP "\w'\fBsprintf(\^\fIfmt\fB\^, \fIexpr-list\^\fB)\fR'u+1n"
+\fBgsub(\fIr\fB, \fIs \fR[\fB, \fIt\fR]\fB)\fR
for each substring matching the regular expression
.I r
in the string
@@ -1377,6 +1893,22 @@ If
.I t
is not supplied, use
.BR $0 .
+An
+.B &
+in the replacement text is replaced with the text that was actually matched.
+Use
+.B \e&
+to get a literal
+.BR & .
+See
+.I "AWK Language Programming"
+for a fuller discussion of the rules for
+.BR &'s
+and backslashes in the replacement text of
+.BR sub() ,
+.BR gsub() ,
+and
+.BR gensub() .
.TP
.BI index( s , " t" )
returns the index of the string
@@ -1387,7 +1919,7 @@ or 0 if
.I t
is not present.
.TP
-.BI length( s )
+\fBlength(\fR[\fIs\fR]\fB)
returns the length of the string
.IR s ,
or the length of
@@ -1408,7 +1940,7 @@ is not present, and sets the values of
and
.BR RLENGTH .
.TP
-\fBsplit(\fIs\fB, \fIa\fB, \fIr\fB)\fR
+\fBsplit(\fIs\fB, \fIa \fR[\fB, \fIr\fR]\fB)\fR
splits the string
.I s
into the array
@@ -1423,6 +1955,7 @@ is used instead.
The array
.I a
is cleared first.
+Splitting behaves identically to field splitting, described above.
.TP
.BI sprintf( fmt , " expr-list" )
prints
@@ -1431,13 +1964,13 @@ according to
.IR fmt ,
and returns the resulting string.
.TP
-\fBsub(\fIr\fB, \fIs\fB, \fIt\fB)\fR
+\fBsub(\fIr\fB, \fIs \fR[\fB, \fIt\fR]\fB)\fR
just like
.BR gsub() ,
but only the first matching substring is replaced.
.TP
-\fBsubstr(\fIs\fB, \fIi\fB, \fIn\fB)\fR
-returns the
+\fBsubstr(\fIs\fB, \fIi \fR[\fB, \fIn\fR]\fB)\fR
+returns the at most
.IR n -character
substring of
.I s
@@ -1477,7 +2010,7 @@ formatting them.
returns the current time of day as the number of seconds since the Epoch
(Midnight UTC, January 1, 1970 on \*(PX systems).
.TP
-\fBstrftime(\fIformat\fR, \fItimestamp\fB)\fR
+\fBstrftime(\fR[\fIformat \fR[\fB, \fItimestamp\fR]]\fB)\fR
formats
.I timestamp
according to the specification in
@@ -1489,13 +2022,18 @@ should be of the same form as returned by
If
.I timestamp
is missing, the current time of day is used.
+If
+.I format
+is missing, a default format equivalent to the output of
+.IR date (1)
+will be used.
See the specification for the
.B strftime()
function in \*(AN C for the format conversions that are
guaranteed to be available.
A public-domain version of
.IR strftime (3)
-and a man page for it are shipped with
+and a man page for it come with
.IR gawk ;
if that version was used to build
.IR gawk ,
@@ -1522,7 +2060,7 @@ backspace.
form-feed.
.TP
.B \en
-new line.
+newline.
.TP
.B \er
carriage return.
@@ -1554,6 +2092,13 @@ The escape sequences may also be used inside constant regular expressions
(e.g.,
.B "/[\ \et\ef\en\er\ev]/"
matches whitespace characters).
+.PP
+In compatibility mode, the characters represented by octal and
+hexadecimal escape sequences are treated literally when used in
+regexp constants. Thus,
+.B /a\e52b/
+is equivalent to
+.BR /a\e*b/ .
.SH FUNCTIONS
Functions in AWK are defined as follows:
.PP
@@ -1561,8 +2106,8 @@ Functions in AWK are defined as follows:
\fBfunction \fIname\fB(\fIparameter list\fB) { \fIstatements \fB}\fR
.RE
.PP
-Functions are executed when called from within the action parts of regular
-pattern-action statements. Actual parameters supplied in the function
+Functions are executed when they are called from within expressions
+in either patterns or actions. Actual parameters supplied in the function
call are used to instantiate the formal parameters declared in the function.
Arrays are passed by reference, other variables are passed by value.
.PP
@@ -1574,8 +2119,10 @@ real parameters by extra spaces in the parameter list. For example:
.RS
.ft B
.nf
-function f(p, q, a, b) { # a & b are local
- ..... }
+function f(p, q, a, b) # a & b are local
+{
+ \&.....
+}
/abc/ { ... ; f(1, 2) ; ... }
.fi
@@ -1592,6 +2139,14 @@ Functions may call each other and may be recursive.
Function parameters used as local variables are initialized
to the null string and the number zero upon function invocation.
.PP
+If
+.B \-\^\-lint
+has been provided,
+.I gawk
+will warn about calls to undefined functions at parse time,
+instead of at run time.
+Calling an undefined function at run time is a fatal error.
+.PP
The word
.B func
may be used in place of
@@ -1639,8 +2194,8 @@ Concatenate and line number (a variation on a theme):
Alfred V. Aho, Brian W. Kernighan, Peter J. Weinberger,
Addison-Wesley, 1988. ISBN 0-201-07981-X.
.PP
-.IR "The GAWK Manual" ,
-Edition 0.15, published by the Free Software Foundation, 1993.
+.IR "AWK Language Programming" ,
+Edition 1.0, published by the Free Software Foundation, 1995.
.SH POSIX COMPATIBILITY
A primary goal for
.I gawk
@@ -1651,9 +2206,9 @@ To this end,
.I gawk
incorporates the following user visible
features which are not described in the AWK book,
-but are part of
-.I awk
-in System V Release 4, and are in the \*(PX standard.
+but are part of the Bell Labs version of
+.IR awk ,
+and are in the \*(PX standard.
.PP
The
.B \-v
@@ -1689,9 +2244,7 @@ it to process.
.PP
The AWK book does not define the return value of
.BR srand() .
-The System V Release 4 version of \*(UX
-.I awk
-(and the \*(PX standard)
+The \*(PX standard
has it return the seed it was using, to allow keeping track
of random number sequences. Therefore
.B srand()
@@ -1721,14 +2274,14 @@ built-in functions (from AT&T); and the \*(AN C conversion specifications in
(done first in AT&T's version).
.SH GNU EXTENSIONS
.I Gawk
-has some extensions to \*(PX
+has a number of extensions to \*(PX
.IR awk .
They are described in this section. All the extensions described here
can be disabled by
invoking
.I gawk
with the
-.B "\-W compat"
+.B \-\^\-traditional
option.
.PP
The following features of
@@ -1742,12 +2295,22 @@ are not available in
The
.B \ex
escape sequence.
+(Disabled with
+.BR \-\^\-posix .)
+.TP \w'\(bu'u+1n
+\(bu
+The
+.B fflush()
+function.
+(Disabled with
+.BR \-\^\-posix .)
.TP
\(bu
The
-.B systime()
+.BR systime(),
+.BR strftime(),
and
-.B strftime()
+.B gensub()
functions.
.TP
\(bu
@@ -1755,9 +2318,10 @@ The special file names available for I/O redirection are not recognized.
.TP
\(bu
The
-.B ARGIND
+.BR ARGIND ,
+.BR ERRNO ,
and
-.B ERRNO
+.B RT
variables are not special.
.TP
\(bu
@@ -1768,7 +2332,19 @@ variable and its side-effects are not available.
\(bu
The
.B FIELDWIDTHS
-variable and fixed width field splitting.
+variable and fixed-width field splitting.
+.TP
+\(bu
+The use of
+.B RS
+as a regular expression.
+.TP
+\(bu
+The ability to split out individual characters using the null string
+as the value of
+.BR FS ,
+and as the third argument to
+.BR split() .
.TP
\(bu
No path search is performed for files named via the
@@ -1779,7 +2355,7 @@ environment variable is not special.
.TP
\(bu
The use of
-.B "next file"
+.B "nextfile"
to abandon processing of the current input file.
.TP
\(bu
@@ -1802,7 +2378,7 @@ when closing a file or pipe, respectively.
When
.I gawk
is invoked with the
-.B "\-W compat"
+.B \-\^\-traditional
option,
if the
.I fs
@@ -1813,7 +2389,7 @@ option is ``t'', then
will be set to the tab character.
Since this is a rather ugly special case, it is not the default behavior.
This behavior also does not occur if
-.B "\-W posix"
+.B \-\^\-posix
has been specified.
.ig
.PP
@@ -1848,7 +2424,7 @@ Thus,
.RS
.PP
.ft B
-a = length
+a = length # Holy Algol 60, Batman!
.ft R
.RE
.PP
@@ -1865,7 +2441,7 @@ a = length($0)
This feature is marked as ``deprecated'' in the \*(PX standard, and
.I gawk
will issue a warning about its use if
-.B "\-W lint"
+.B \-\^\-lint
is specified on the command line.
.PP
The other feature is the use of either the
@@ -1883,7 +2459,7 @@ equivalent to the
statement.
.I Gawk
will support this usage if
-.B "\-W compat"
+.B \-\^\-traditional
has been specified.
.SH ENVIRONMENT VARIABLES
If
@@ -1891,13 +2467,23 @@ If
exists in the environment, then
.I gawk
behaves exactly as if
-.B \-\-posix
+.B \-\^\-posix
had been specified on the command line.
If
-.B \-\-lint
+.B \-\^\-lint
has been specified,
.I gawk
will issue a warning message to this effect.
+.PP
+The
+.B AWKPATH
+environment variable can be used to provide a list of directories that
+.I gawk
+will search when looking for files named via the
+.B \-f
+and
+.B \-\^\-file
+options.
.SH BUGS
The
.B \-F
@@ -1921,26 +2507,18 @@ output with output to
while on a system with those files, the output is actually to different
open files.
Caveat Emptor.
+.PP
+Syntactically invalid single character programs tend to overflow
+the parse stack, generating a rather unhelpful message. Such programs
+are surprisingly difficult to diagnose in the completely general case,
+and the effort to do so really is not worth it.
+.PP
+The word ``GNU'' is incorrectly capitalized in at least one file
+in the source code.
.SH VERSION INFORMATION
This man page documents
.IR gawk ,
-version 2.15.
-.PP
-Starting with the 2.15 version of
-.IR gawk ,
-the
-.BR \-c ,
-.BR \-V ,
-.BR \-C ,
-.ig
-.BR \-D ,
-..
-.BR \-a ,
-and
-.B \-e
-options of the 2.11 version are no longer recognized.
-This fact will not even be documented in the manual page for the next
-major version.
+version 3.0.
.SH AUTHORS
The original version of \*(UX
.I awk
@@ -1966,7 +2544,7 @@ The initial DOS port was done by Conrad Kwok and Scott Garfinkle.
Scott Deifik is the current DOS maintainer. Pat Rankin did the
port to VMS, and Michal Jaegermann did the port to the Atari ST.
The port to OS/2 was done by Kai Uwe Rommel, with contributions and
-help from Darrel Hankerson.
+help from Darrel Hankerson. Fred Fish supplied support for the Amiga.
.SH BUG REPORTS
If you find a bug in
.IR gawk ,
@@ -1984,10 +2562,20 @@ Before sending a bug report, please do two things. First, verify that
you have the latest version of
.IR gawk .
Many bugs (usually subtle ones) are fixed at each release, and if
-your's is out of date, the problem may already have been solved.
+yours is out of date, the problem may already have been solved.
Second, please read this man page and the reference manual carefully to
be sure that what you think is a bug really is, instead of just a quirk
in the language.
+.PP
+Whatever you do, do
+.B NOT
+post a bug report in
+.BR comp.lang.awk .
+While the
+.I gawk
+developers occasionally read this newsgroup, posting bug reports there
+is an unreliable way to report bugs. Instead, please use the electronic mail
+addresses given above.
.SH ACKNOWLEDGEMENTS
Brian Kernighan of Bell Labs
provided valuable assistance during testing and debugging.
diff --git a/doc/gawk.texi b/doc/gawk.texi
new file mode 100644
index 00000000..6227ac32
--- /dev/null
+++ b/doc/gawk.texi
@@ -0,0 +1,20460 @@
+\input texinfo @c -*-texinfo-*-
+@c %**start of header (This is for running Texinfo on a region.)
+@setfilename gawk.info
+@settitle AWK Language Programming
+@c %**end of header (This is for running Texinfo on a region.)
+
+@ignore
+@ifinfo
+@format
+START-INFO-DIR-ENTRY
+* Gawk: (gawk.info). A Text Scanning and Processing Language.
+END-INFO-DIR-ENTRY
+@end format
+@end ifinfo
+@end ignore
+
+@c @set xref-automatic-section-title
+@c @set DRAFT
+
+@c The following information should be updated here only!
+@c This sets the edition of the document, the version of gawk it
+@c applies to, and when the document was updated.
+@set TITLE AWK Language Programming
+@set EDITION 1.0
+@set VERSION 3.0
+@set UPDATE-MONTH January 1996
+@iftex
+@set DOCUMENT book
+@end iftex
+@ifinfo
+@set DOCUMENT Info file
+@end ifinfo
+
+@ignore
+Some comments on the layout for TeX.
+1. Use the texinfo.tex from the gawk distribution. It contains fixes that
+ are needed to get the footings for draft mode to not appear.
+2. I have done A LOT of work to make this look good. There `@page' commands
+ and use of `@group ... @end group' in a number of places. If you muck
+ with anything, it's your responsibility not to break the layout.
+@end ignore
+
+@c merge the function and variable indexes into the concept index
+@ifinfo
+@synindex fn cp
+@synindex vr cp
+@end ifinfo
+@iftex
+@syncodeindex fn cp
+@syncodeindex vr cp
+@end iftex
+
+@c If "finalout" is commented out, the printed output will show
+@c black boxes that mark lines that are too long. Thus, it is
+@c unwise to comment it out when running a master in case there are
+@c overfulls which are deemed okay.
+
+@ifclear DRAFT
+@iftex
+@finalout
+@end iftex
+@end ifclear
+
+@smallbook
+@iftex
+@cropmarks
+@end iftex
+
+@ifinfo
+This file documents @code{awk}, a program that you can use to select
+particular records in a file and perform operations upon them.
+
+This is Edition @value{EDITION} of @cite{@value{TITLE}},
+for the @value{VERSION} version of the GNU implementation of AWK.
+
+Copyright (C) 1989, 1991 - 1996 Free Software Foundation, Inc.
+
+Permission is granted to make and distribute verbatim copies of
+this manual provided the copyright notice and this permission notice
+are preserved on all copies.
+
+@ignore
+Permission is granted to process this file through TeX and print the
+results, provided the printed document carries copying permission
+notice identical to this one except for the removal of this paragraph
+(this paragraph not being relevant to the printed manual).
+
+@end ignore
+Permission is granted to copy and distribute modified versions of this
+manual under the conditions for verbatim copying, provided that the entire
+resulting derived work is distributed under the terms of a permission
+notice identical to this one.
+
+Permission is granted to copy and distribute translations of this manual
+into another language, under the above conditions for modified versions,
+except that this permission notice may be stated in a translation approved
+by the Foundation.
+@end ifinfo
+
+@setchapternewpage odd
+
+@titlepage
+@title @value{TITLE}
+@subtitle A User's Guide for GNU AWK
+@subtitle Edition @value{EDITION}
+@subtitle @value{UPDATE-MONTH}
+@author Arnold D. Robbins
+@sp
+@author Based on @cite{The GAWK Manual},
+@author by Robbins, Close, Rubin, and Stallman
+
+@c Include the Distribution inside the titlepage environment so
+@c that headings are turned off. Headings on and off do not work.
+
+@page
+@vskip 0pt plus 1filll
+@ifset LEGALJUNK
+The programs and applications presented in this book have been
+included for their instructional value. They have been tested with care,
+but are not guaranteed for any particular purpose. The publisher does not
+offer any warranties or representations, nor does it accept any
+liabilities with respect to the programs or applications.
+So there.
+@sp 2
+UNIX is a registered trademark of X/Open, Ltd. @*
+Microsoft, MS, and MS-DOS are registered trademarks, and Windows is a
+trademark of Microsoft Corporation in the United States and other
+countries. @*
+Atari, 520ST, 1040ST, TT, STE, Mega, and Falcon are registered trademarks
+or trademarks of Atari Corporation. @*
+DEC, Digital, OpenVMS, ULTRIX, and VMS, are trademarks of Digital Equipment
+Corporation. @*
+@end ifset
+``To boldly go where no man has gone before'' is a
+Registered Trademark of Paramount Pictures Corporation. @*
+@c sorry, i couldn't resist
+@sp 3
+Copyright @copyright{} 1989, 1991 - 1996 Free Software Foundation, Inc.
+@sp 2
+
+This is Edition @value{EDITION} of @cite{@value{TITLE}}, @*
+for the @value{VERSION} (or later) version of the GNU implementation of AWK.
+
+@sp 2
+Published by the Free Software Foundation @*
+59 Temple Place --- Suite 330 @*
+Boston, MA 02111-1307 USA @*
+Phone: +1-617-542-5942 @*
+Fax (including Japan): +1-617-542-2652 @*
+Printed copies are available for $25 each. @*
+@c this ISBN can change! Check with the FSF office...
+@c This one is correct for gawk 3.0 and edition 1.0
+ISBN 1-882114-26-4 @*
+
+Permission is granted to make and distribute verbatim copies of
+this manual provided the copyright notice and this permission notice
+are preserved on all copies.
+
+Permission is granted to copy and distribute modified versions of this
+manual under the conditions for verbatim copying, provided that the entire
+resulting derived work is distributed under the terms of a permission
+notice identical to this one.
+
+Permission is granted to copy and distribute translations of this manual
+into another language, under the above conditions for modified versions,
+except that this permission notice may be stated in a translation approved
+by the Foundation.
+@sp 2
+Cover art by Etienne Suvasa.
+@end titlepage
+
+@c Thanks to Bob Chassell for directions on doing dedications.
+@iftex
+@headings off
+@page
+@w{ }
+@sp 9
+@center @i{To Miriam, for making me complete.}
+@sp
+@center @i{To Chana, for the joy you bring us.}
+@sp
+@center @i{To Rivka, for the exponential increase.}
+@page
+@w{ }
+@page
+@headings on
+@end iftex
+
+@iftex
+@headings off
+@evenheading @thispage@ @ @ @b{@thistitle} @| @|
+@oddheading @| @| @b{@thischapter}@ @ @ @thispage
+@ifset DRAFT
+@evenfooting @today{} @| @emph{DRAFT!} @| Please Do Not Redistribute
+@oddfooting Please Do Not Redistribute @| @emph{DRAFT!} @| @today{}
+@end ifset
+@end iftex
+
+@ifinfo
+@node Top, Preface, (dir), (dir)
+@top General Introduction
+@c Preface or Licensing nodes should come right after the Top
+@c node, in `unnumbered' sections, then the chapter, `What is gawk'.
+
+This file documents @code{awk}, a program that you can use to select
+particular records in a file and perform operations upon them.
+
+This is Edition @value{EDITION} of @cite{@value{TITLE}}, @*
+for the @value{VERSION} version of the GNU implementation @*
+of AWK.
+
+@end ifinfo
+
+@menu
+* Preface:: What this @value{DOCUMENT} is about; brief
+ history and acknowledgements.
+* What Is Awk:: What is the @code{awk} language; using this
+ @value{DOCUMENT}.
+* Getting Started:: A basic introduction to using @code{awk}. How
+ to run an @code{awk} program. Command line
+ syntax.
+* One-liners:: Short, sample @code{awk} programs.
+* Regexp:: All about matching things using regular
+ expressions.
+* Reading Files:: How to read files and manipulate fields.
+* Printing:: How to print using @code{awk}. Describes the
+ @code{print} and @code{printf} statements.
+ Also describes redirection of output.
+* Expressions:: Expressions are the basic building blocks of
+ statements.
+* Patterns and Actions:: Overviews of patterns and actions.
+* Statements:: The various control statements are described
+ in detail.
+* Built-in Variables:: Built-in Variables
+* Arrays:: The description and use of arrays. Also
+ includes array-oriented control statements.
+* Built-in:: The built-in functions are summarized here.
+* User-defined:: User-defined functions are described in
+ detail.
+* Invoking Gawk:: How to run @code{gawk}.
+* Library Functions:: A Library of @code{awk} Functions.
+* Sample Programs:: Many @code{awk} programs with complete
+ explanations.
+* Language History:: The evolution of the @code{awk} language.
+* Gawk Summary:: @code{gawk} Options and Language Summary.
+* Installation:: Installing @code{gawk} under various operating
+ systems.
+* Notes:: Something about the implementation of
+ @code{gawk}.
+* Glossary:: An explanation of some unfamiliar terms.
+* Copying:: Your right to copy and distribute @code{gawk}.
+* Index:: Concept and Variable Index.
+
+* History:: The history of @code{gawk} and @code{awk}.
+* Manual History:: Brief history of the GNU project and this
+ @value{DOCUMENT}.
+* Acknowledgements:: Acknowledgements.
+* This Manual:: Using this @value{DOCUMENT}. Includes sample
+ input files that you can use.
+* Conventions:: Typographical Conventions.
+* Sample Data Files:: Sample data files for use in the @code{awk}
+ programs illustrated in this @value{DOCUMENT}.
+* Names:: What name to use to find @code{awk}.
+* Running gawk:: How to run @code{gawk} programs; includes
+ command line syntax.
+* One-shot:: Running a short throw-away @code{awk} program.
+* Read Terminal:: Using no input files (input from terminal
+ instead).
+* Long:: Putting permanent @code{awk} programs in
+ files.
+* Executable Scripts:: Making self-contained @code{awk} programs.
+* Comments:: Adding documentation to @code{gawk} programs.
+* Very Simple:: A very simple example.
+* Two Rules:: A less simple one-line example with two rules.
+* More Complex:: A more complex example.
+* Statements/Lines:: Subdividing or combining statements into
+ lines.
+* Other Features:: Other Features of @code{awk}.
+* When:: When to use @code{gawk} and when to use other
+ things.
+* Regexp Usage:: How to Use Regular Expressions.
+* Escape Sequences:: How to write non-printing characters.
+* Regexp Operators:: Regular Expression Operators.
+* GNU Regexp Operators:: Operators specific to GNU software.
+* Case-sensitivity:: How to do case-insensitive matching.
+* Leftmost Longest:: How much text matches.
+* Computed Regexps:: Using Dynamic Regexps.
+* Records:: Controlling how data is split into records.
+* Fields:: An introduction to fields.
+* Non-Constant Fields:: Non-constant Field Numbers.
+* Changing Fields:: Changing the Contents of a Field.
+* Field Separators:: The field separator and how to change it.
+* Basic Field Splitting:: How fields are split with single characters or
+ simple strings.
+* Regexp Field Splitting:: Using regexps as the field separator.
+* Single Character Fields:: Making each character a separate field.
+* Command Line Field Separator:: Setting @code{FS} from the command line.
+* Field Splitting Summary:: Some final points and a summary table.
+* Constant Size:: Reading constant width data.
+* Multiple Line:: Reading multi-line records.
+* Getline:: Reading files under explicit program control
+ using the @code{getline} function.
+* Getline Intro:: Introduction to the @code{getline} function.
+* Plain Getline:: Using @code{getline} with no arguments.
+* Getline/Variable:: Using @code{getline} into a variable.
+* Getline/File:: Using @code{getline} from a file.
+* Getline/Variable/File:: Using @code{getline} into a variable from a
+ file.
+* Getline/Pipe:: Using @code{getline} from a pipe.
+* Getline/Variable/Pipe:: Using @code{getline} into a variable from a
+ pipe.
+* Getline Summary:: Summary Of @code{getline} Variants.
+* Print:: The @code{print} statement.
+* Print Examples:: Simple examples of @code{print} statements.
+* Output Separators:: The output separators and how to change them.
+* OFMT:: Controlling Numeric Output With @code{print}.
+* Printf:: The @code{printf} statement.
+* Basic Printf:: Syntax of the @code{printf} statement.
+* Control Letters:: Format-control letters.
+* Format Modifiers:: Format-specification modifiers.
+* Printf Examples:: Several examples.
+* Redirection:: How to redirect output to multiple files and
+ pipes.
+* Special Files:: File name interpretation in @code{gawk}.
+ @code{gawk} allows access to inherited file
+ descriptors.
+* Close Files And Pipes:: Closing Input and Output Files and Pipes.
+* Constants:: String, numeric, and regexp constants.
+* Scalar Constants:: Numeric and string constants.
+* Regexp Constants:: Regular Expression constants.
+* Using Constant Regexps:: When and how to use a regexp constant.
+* Variables:: Variables give names to values for later use.
+* Using Variables:: Using variables in your programs.
+* Assignment Options:: Setting variables on the command line and a
+ summary of command line syntax. This is an
+ advanced method of input.
+* Conversion:: The conversion of strings to numbers and vice
+ versa.
+* Arithmetic Ops:: Arithmetic operations (@samp{+}, @samp{-},
+ etc.)
+* Concatenation:: Concatenating strings.
+* Assignment Ops:: Changing the value of a variable or a field.
+* Increment Ops:: Incrementing the numeric value of a variable.
+* Truth Values:: What is ``true'' and what is ``false''.
+* Typing and Comparison:: How variables acquire types, and how this
+ affects comparison of numbers and strings with
+ @samp{<}, etc.
+* Boolean Ops:: Combining comparison expressions using boolean
+ operators @samp{||} (``or''), @samp{&&}
+ (``and'') and @samp{!} (``not'').
+* Conditional Exp:: Conditional expressions select between two
+ subexpressions under control of a third
+ subexpression.
+* Function Calls:: A function call is an expression.
+* Precedence:: How various operators nest.
+* Pattern Overview:: What goes into a pattern.
+* Kinds of Patterns:: A list of all kinds of patterns.
+* Regexp Patterns:: Using regexps as patterns.
+* Expression Patterns:: Any expression can be used as a pattern.
+* Ranges:: Pairs of patterns specify record ranges.
+* BEGIN/END:: Specifying initialization and cleanup rules.
+* Using BEGIN/END:: How and why to use BEGIN/END rules.
+* I/O And BEGIN/END:: I/O issues in BEGIN/END rules.
+* Empty:: The empty pattern, which matches every record.
+* Action Overview:: What goes into an action.
+* If Statement:: Conditionally execute some @code{awk}
+ statements.
+* While Statement:: Loop until some condition is satisfied.
+* Do Statement:: Do specified action while looping until some
+ condition is satisfied.
+* For Statement:: Another looping statement, that provides
+ initialization and increment clauses.
+* Break Statement:: Immediately exit the innermost enclosing loop.
+* Continue Statement:: Skip to the end of the innermost enclosing
+ loop.
+* Next Statement:: Stop processing the current input record.
+* Nextfile Statement:: Stop processing the current file.
+* Exit Statement:: Stop execution of @code{awk}.
+* User-modified:: Built-in variables that you change to control
+ @code{awk}.
+* Auto-set:: Built-in variables where @code{awk} gives you
+ information.
+* ARGC and ARGV:: Ways to use @code{ARGC} and @code{ARGV}.
+* Array Intro:: Introduction to Arrays
+* Reference to Elements:: How to examine one element of an array.
+* Assigning Elements:: How to change an element of an array.
+* Array Example:: Basic Example of an Array
+* Scanning an Array:: A variation of the @code{for} statement. It
+ loops through the indices of an array's
+ existing elements.
+* Delete:: The @code{delete} statement removes an element
+ from an array.
+* Numeric Array Subscripts:: How to use numbers as subscripts in
+ @code{awk}.
+* Uninitialized Subscripts:: Using Uninitialized variables as subscripts.
+* Multi-dimensional:: Emulating multi-dimensional arrays in
+ @code{awk}.
+* Multi-scanning:: Scanning multi-dimensional arrays.
+* Calling Built-in:: How to call built-in functions.
+* Numeric Functions:: Functions that work with numbers, including
+ @code{int}, @code{sin} and @code{rand}.
+* String Functions:: Functions for string manipulation, such as
+ @code{split}, @code{match}, and
+ @code{sprintf}.
+* I/O Functions:: Functions for files and shell commands.
+* Time Functions:: Functions for dealing with time stamps.
+* Definition Syntax:: How to write definitions and what they mean.
+* Function Example:: An example function definition and what it
+ does.
+* Function Caveats:: Things to watch out for.
+* Return Statement:: Specifying the value a function returns.
+* Options:: Command line options and their meanings.
+* Other Arguments:: Input file names and variable assignments.
+* AWKPATH Variable:: Searching directories for @code{awk} programs.
+* Obsolete:: Obsolete Options and/or features.
+* Undocumented:: Undocumented Options and Features.
+* Known Bugs:: Known Bugs in @code{gawk}.
+* Portability Notes:: What to do if you don't have @code{gawk}.
+* Nextfile Function:: Two implementations of a @code{nextfile}
+ function.
+* Assert Function:: A function for assertions in @code{awk}
+ programs.
+* Ordinal Functions:: Functions for using characters as numbers and
+ vice versa.
+* Join Function:: A function to join an array into a string.
+* Mktime Function:: A function to turn a date into a timestamp.
+* Gettimeofday Function:: A function to get formatted times.
+* Filetrans Function:: A function for handling data file transitions.
+* Getopt Function:: A function for processing command line
+ arguments.
+* Passwd Functions:: Functions for getting user information.
+* Group Functions:: Functions for getting group information.
+* Library Names:: How to best name private global variables in
+ library functions.
+* Clones:: Clones of common utilities.
+* Cut Program:: The @code{cut} utility.
+* Egrep Program:: The @code{egrep} utility.
+* Id Program:: The @code{id} utility.
+* Split Program:: The @code{split} utility.
+* Tee Program:: The @code{tee} utility.
+* Uniq Program:: The @code{uniq} utility.
+* Wc Program:: The @code{wc} utility.
+* Miscellaneous Programs:: Some interesting @code{awk} programs.
+* Dupword Program:: Finding duplicated words in a document.
+* Alarm Program:: An alarm clock.
+* Translate Program:: A program similar to the @code{tr} utility.
+* Labels Program:: Printing mailing labels.
+* Word Sorting:: A program to produce a word usage count.
+* History Sorting:: Eliminating duplicate entries from a history
+ file.
+* Extract Program:: Pulling out programs from Texinfo source
+ files.
+* Simple Sed:: A Simple Stream Editor.
+* Igawk Program:: A wrapper for @code{awk} that includes files.
+* V7/SVR3.1:: The major changes between V7 and System V
+ Release 3.1.
+* SVR4:: Minor changes between System V Releases 3.1
+ and 4.
+* POSIX:: New features from the POSIX standard.
+* BTL:: New features from the AT&T Bell Laboratories
+ version of @code{awk}.
+* POSIX/GNU:: The extensions in @code{gawk} not in POSIX
+ @code{awk}.
+* Command Line Summary:: Recapitulation of the command line.
+* Language Summary:: A terse review of the language.
+* Variables/Fields:: Variables, fields, and arrays.
+* Fields Summary:: Input field splitting.
+* Built-in Summary:: @code{awk}'s built-in variables.
+* Arrays Summary:: Using arrays.
+* Data Type Summary:: Values in @code{awk} are numbers or strings.
+* Rules Summary:: Patterns and Actions, and their component
+ parts.
+* Pattern Summary:: Quick overview of patterns.
+* Regexp Summary:: Quick overview of regular expressions.
+* Actions Summary:: Quick overview of actions.
+* Operator Summary:: @code{awk} operators.
+* Control Flow Summary:: The control statements.
+* I/O Summary:: The I/O statements.
+* Printf Summary:: A summary of @code{printf}.
+* Special File Summary:: Special file names interpreted internally.
+* Built-in Functions Summary:: Built-in numeric and string functions.
+* Time Functions Summary:: Built-in time functions.
+* String Constants Summary:: Escape sequences in strings.
+* Functions Summary:: Defining and calling functions.
+* Historical Features:: Some undocumented but supported ``features''.
+* Gawk Distribution:: What is in the @code{gawk} distribution.
+* Getting:: How to get the distribution.
+* Extracting:: How to extract the distribution.
+* Distribution contents:: What is in the distribution.
+* Unix Installation:: Installing @code{gawk} under various versions
+ of Unix.
+* Quick Installation:: Compiling @code{gawk} under Unix.
+* Configuration Philosophy:: How it's all supposed to work.
+* VMS Installation:: Installing @code{gawk} on VMS.
+* VMS Compilation:: How to compile @code{gawk} under VMS.
+* VMS Installation Details:: How to install @code{gawk} under VMS.
+* VMS Running:: How to run @code{gawk} under VMS.
+* VMS POSIX:: Alternate instructions for VMS POSIX.
+* PC Installation:: Installing and Compiling @code{gawk} on MS-DOS
+ and OS/2
+* Atari Installation:: Installing @code{gawk} on the Atari ST.
+* Atari Compiling:: Compiling @code{gawk} on Atari
+* Atari Using:: Running @code{gawk} on Atari
+* Amiga Installation:: Installing @code{gawk} on an Amiga.
+* Bugs:: Reporting Problems and Bugs.
+* Other Versions:: Other freely available @code{awk}
+ implementations.
+* Compatibility Mode:: How to disable certain @code{gawk} extensions.
+* Additions:: Making Additions To @code{gawk}.
+* Adding Code:: Adding code to the main body of @code{gawk}.
+* New Ports:: Porting @code{gawk} to a new operating system.
+* Future Extensions:: New features that may be implemented one day.
+* Improvements:: Suggestions for improvements by volunteers.
+
+@end menu
+
+@c dedication for Info file
+@ifinfo
+@center To Miriam, for making me complete.
+@sp 1
+@center To Chana, for the joy you bring us.
+@sp 1
+@center To Rivka, for the exponential increase.
+@end ifinfo
+
+@node Preface, What Is Awk, Top, Top
+@unnumbered Preface
+
+@c I saw a comment somewhere that the preface should describe the book itself,
+@c and the introduction should describe what the book covers.
+
+This @value{DOCUMENT} teaches you about the @code{awk} language and
+how you can use it effectively. You should already be familiar with basic
+system commands, such as @code{cat} and @code{ls},@footnote{These commands
+are available on POSIX compliant systems, as well as on traditional Unix
+based systems. If you are using some other operating system, you still need to
+be familiar with the ideas of I/O redirection and pipes} and basic shell
+facilities, such as Input/Output (I/O) redirection and pipes.
+
+Implementations of the @code{awk} language are available for many different
+computing environments. This @value{DOCUMENT}, while describing the @code{awk} language
+in general, also describes a particular implementation of @code{awk} called
+@code{gawk} (which stands for ``GNU Awk''). @code{gawk} runs on a broad range
+of Unix systems, ranging from 80386 PC-based computers, up through large scale
+systems, such as Crays. @code{gawk} has also been ported to MS-DOS and
+OS/2 PC's, Atari and Amiga micro-computers, and VMS.
+
+@menu
+* History:: The history of @code{gawk} and @code{awk}.
+* Manual History:: Brief history of the GNU project and this
+ @value{DOCUMENT}.
+* Acknowledgements:: Acknowledgements.
+@end menu
+
+@node History, Manual History, Preface, Preface
+@unnumberedsec History of @code{awk} and @code{gawk}
+
+@cindex acronym
+@cindex history of @code{awk}
+@cindex Aho, Alfred
+@cindex Weinberger, Peter
+@cindex Kernighan, Brian
+@cindex old @code{awk}
+@cindex new @code{awk}
+The name @code{awk} comes from the initials of its designers: Alfred V.@:
+Aho, Peter J.@: Weinberger, and Brian W.@: Kernighan. The original version of
+@code{awk} was written in 1977 at AT&T Bell Laboratories.
+In 1985 a new version made the programming
+language more powerful, introducing user-defined functions, multiple input
+streams, and computed regular expressions.
+This new version became generally available with Unix System V Release 3.1.
+The version in System V Release 4 added some new features and also cleaned
+up the behavior in some of the ``dark corners'' of the language.
+The specification for @code{awk} in the POSIX Command Language
+and Utilities standard further clarified the language based on feedback
+from both the @code{gawk} designers, and the original Bell Labs @code{awk}
+designers.
+
+The GNU implementation, @code{gawk}, was written in 1986 by Paul Rubin
+and Jay Fenlason, with advice from Richard Stallman. John Woods
+contributed parts of the code as well. In 1988 and 1989, David Trueman, with
+help from Arnold Robbins, thoroughly reworked @code{gawk} for compatibility
+with the newer @code{awk}. Current development focuses on bug fixes,
+performance improvements, standards compliance, and occasionally, new features.
+
+@node Manual History, Acknowledgements, History, Preface
+@unnumberedsec The GNU Project and This Book
+
+@cindex Free Software Foundation
+The Free Software Foundation (FSF) is a non-profit organization dedicated
+to the production and distribution of freely distributable software.
+It was founded by Richard M.@: Stallman, the author of the original
+Emacs editor. GNU Emacs is the most widely used version of Emacs today.
+
+@cindex GNU Project
+The GNU project is an on-going effort on the part of the Free Software
+Foundation to create a complete, freely distributable, POSIX compliant
+computing environment. (GNU stands for ``GNU's not Unix''.)
+The FSF uses the ``GNU General Public License'' (or GPL) to ensure that
+source code for their software is always available to the end user. A
+copy of the GPL is included for your reference
+(@pxref{Copying, ,GNU GENERAL PUBLIC LICENSE}).
+The GPL applies to the C language source code for @code{gawk}.
+
+As of this writing (1995), the only major component of the
+GNU environment still uncompleted is the operating system kernel, and
+work proceeds apace on that. A shell, an editor (Emacs), highly portable
+optimizing C, C++, and Objective-C compilers, a symbolic debugger, and dozens
+of large and small utilities (such as @code{gawk}),
+have all been completed and are freely available.
+
+@cindex Linux
+@cindex NetBSD
+@cindex FreeBSD
+Until the GNU operating system is released, the FSF recommends the use
+of Linux, a freely distributable, Unix-like operating system for 80386
+and other systems. There are many books on Linux. One freely available one
+is @cite{Linux Installation and Getting Started}, by Matt Welsh.
+Many Linux distributions are available, often in computer stores or
+bundled on CD-ROM with books about Linux. Also, the FSF provides a Linux
+distribution (``Debian''); contact them for more information.
+@xref{Getting, ,Getting the @code{gawk} Distribution}, for the FSF's contact
+information.
+(There are two other freely available, Unix-like operating systems for
+80386 and other systems, NetBSD and FreeBSD. Both are based on the
+4.4-Lite Berkeley Software Distribution, and both use recent versions
+of @code{gawk} for their versions of @code{awk}.)
+
+@iftex
+This @value{DOCUMENT} you are reading now is actually free. The
+information in it is freely available to anyone, the machine readable
+source code for the @value{DOCUMENT} comes with @code{gawk}, and anyone
+may take this @value{DOCUMENT} to a copying machine and make as many
+copies of it as they like. (Take a moment to check the copying
+permissions on the Copyright page.)
+
+If you paid money for this @value{DOCUMENT}, what you actually paid for
+was the @value{DOCUMENT}'s nice printing and binding, and the
+publisher's associated costs to produce it. We have made an effort to
+keep these costs reasonable; most people would prefer a bound book to
+over 300 pages of photo-copied text that would then have to be held in
+a loose-leaf binder (not to mention the time and labor involved in
+doing the copying). The same is true of producing this
+@value{DOCUMENT} from the machine readable source; the retail price is
+only slightly more than the cost per page of printing it
+on a laser printer.
+@end iftex
+
+This @value{DOCUMENT} itself has gone through several previous,
+preliminary editions. I started working on a preliminary draft of
+@cite{The GAWK Manual}, by Diane Close, Paul Rubin, and Richard
+Stallman in the fall of 1988.
+It was around 90 pages long, and barely described the original, ``old''
+version of @code{awk}. After substantial revision, the first version of
+the @cite{The GAWK Manual} to be released was Edition 0.11 Beta in
+October of 1989. The manual then underwent more substantial revision
+for Edition 0.13 of December 1991.
+David Trueman, Pat Rankin, and Michal Jaegermann contributed sections
+of the manual for Edition 0.13.
+That edition was published by the
+FSF as a bound book early in 1992. Since then there have been several
+minor revisions, notably Edition 0.14 of November 1992 that was published
+by the FSF in January of 1993, and Edition 0.16 of August 1993.
+
+Edition 1.0 of @cite{@value{TITLE}} represents a significant re-working
+of @cite{The GAWK Manual}, with much additional material.
+The FSF and I agree that I am now the primary author.
+I also felt that it needed a more descriptive title.
+
+@cite{@value{TITLE}} will undoubtedly continue to evolve.
+An electronic version
+comes with the @code{gawk} distribution from the FSF.
+If you find an error in this @value{DOCUMENT}, please report it!
+@xref{Bugs, ,Reporting Problems and Bugs}, for information on submitting
+problem reports electronically, or write to me in care of the FSF.
+
+@node Acknowledgements, , Manual History, Preface
+@unnumberedsec Acknowledgements
+
+I would like to acknowledge Richard M.@: Stallman, for his vision of a
+better world, and for his courage in founding the FSF and starting the
+GNU project.
+
+The initial draft of @cite{The GAWK Manual} had the following acknowledgements:
+
+@quotation
+Many people need to be thanked for their assistance in producing this
+manual. Jay Fenlason contributed many ideas and sample programs. Richard
+Mlynarik and Robert Chassell gave helpful comments on drafts of this
+manual. The paper @cite{A Supplemental Document for @code{awk}} by John W.@:
+Pierce of the Chemistry Department at UC San Diego, pinpointed several
+issues relevant both to @code{awk} implementation and to this manual, that
+would otherwise have escaped us.
+@end quotation
+
+The following people provided many helpful comments on Edition 0.13 of
+@cite{The GAWK Manual}: Rick Adams, Michael Brennan, Rich Burridge, Diane Close,
+Christopher (``Topher'') Eliot, Michael Lijewski, Pat Rankin, Miriam Robbins,
+and Michal Jaegermann.
+
+The following people provided many helpful comments for Edition 1.0 of
+@cite{@value{TITLE}}: Karl Berry, Michael Brennan, Darrel
+Hankerson, Michal Jaegermann, Michael Lijewski, and Miriam Robbins.
+Pat Rankin, Michal Jaegermann, Darrel Hankerson and Scott Deifik
+updated their respective sections for Edition 1.0.
+
+Robert J.@: Chassell provided much valuable advice on
+the use of Texinfo. He also deserves special thanks for
+convincing me @emph{not} to title this @value{DOCUMENT}
+@cite{How To Gawk Politely}.
+Karl Berry helped significantly with the @TeX{} part of Texinfo.
+
+@cindex Trueman, David
+David Trueman deserves special credit; he has done a yeoman job
+of evolving @code{gawk} so that it performs well, and without bugs.
+Although he is no longer involved with @code{gawk},
+working with him on this project was a significant pleasure.
+
+@cindex Deifik, Scott
+@cindex Hankerson, Darrel
+@cindex Rommel, Kai Uwe
+@cindex Rankin, Pat
+@cindex Jaegermann, Michal
+Scott Deifik, Darrel Hankerson, Kai Uwe Rommel, Pat Rankin, and Michal
+Jaegermann (in no particular order) are long time members of the
+@code{gawk} ``crack portability team.'' Without their hard work and
+help, @code{gawk} would not be nearly the fine program it is today. It
+has been and continues to be a pleasure working with this team of fine
+people.
+
+@cindex Friedl, Jeffrey
+Jeffrey Friedl provided invaluable help in tracking down a number
+of last minute problems with regular expressions in @code{gawk} 3.0.
+
+@cindex Kernighan, Brian
+David and I would like to thank Brian Kernighan of Bell Labs for
+invaluable assistance during the testing and debugging of @code{gawk}, and for
+help in clarifying numerous points about the language. We could not have
+done nearly as good a job on either @code{gawk} or its documentation without
+his help.
+
+@cindex Hughes, Phil
+I would like to thank Marshall and Elaine Hartholz of Seattle, and Dr.@:
+Bert and Rita Schreiber of Detroit for large amounts of quiet vacation
+time in their homes, which allowed me to make significant progress on
+this @value{DOCUMENT} and on @code{gawk} itself. Phil Hughes of SSC
+contributed in a very important way by loaning me his laptop Linux
+system, not once, but twice, allowing me to do a lot of work while
+away from home.
+
+@cindex Robbins, Miriam
+Finally, I must thank my wonderful wife, Miriam, for her patience through
+the many versions of this project, for her proof-reading,
+and for sharing me with the computer.
+I would like to thank my parents for their love, and for the grace with
+which they raised and educated me.
+I also must acknowledge my gratitude to G-d, for the many opportunities
+He has sent my way, as well as for the gifts He has given me with which to
+take advantage of those opportunities.
+@sp 2
+@noindent
+Arnold Robbins @*
+Atlanta, Georgia @*
+January, 1996
+
+@ignore
+Stuff still not covered anywhere:
+BASICS:
+ Integer vs. floating point
+ Hex vs. octal vs. decimal
+ Interpreter vs compiler
+ input/output
+@end ignore
+
+@node What Is Awk, Getting Started, Preface, Top
+@chapter Introduction
+
+If you are like many computer users, you would frequently like to make
+changes in various text files wherever certain patterns appear, or
+extract data from parts of certain lines while discarding the rest. To
+write a program to do this in a language such as C or Pascal is a
+time-consuming inconvenience that may take many lines of code. The job
+may be easier with @code{awk}.
+
+The @code{awk} utility interprets a special-purpose programming language
+that makes it possible to handle simple data-reformatting jobs
+with just a few lines of code.
+
+The GNU implementation of @code{awk} is called @code{gawk}; it is fully
+upward compatible with the System V Release 4 version of
+@code{awk}. @code{gawk} is also upward compatible with the POSIX
+specification of the @code{awk} language. This means that all
+properly written @code{awk} programs should work with @code{gawk}.
+Thus, we usually don't distinguish between @code{gawk} and other @code{awk}
+implementations.
+
+@cindex uses of @code{awk}
+Using @code{awk} you can:
+
+@itemize @bullet
+@item
+manage small, personal databases
+
+@item
+generate reports
+
+@item
+validate data
+
+@item
+produce indexes, and perform other document preparation tasks
+
+@item
+even experiment with algorithms that can be adapted later to other computer
+languages
+@end itemize
+
+@menu
+* This Manual:: Using this @value{DOCUMENT}. Includes sample
+ input files that you can use.
+* Conventions:: Typographical Conventions.
+* Sample Data Files:: Sample data files for use in the @code{awk}
+ programs illustrated in this @value{DOCUMENT}.
+@end menu
+
+@node This Manual, Conventions, What Is Awk, What Is Awk
+@section Using This Book
+@cindex book, using this
+@cindex using this book
+@cindex language, @code{awk}
+@cindex program, @code{awk}
+@ignore
+@cindex @code{awk} language
+@cindex @code{awk} program
+@end ignore
+
+The term @code{awk} refers to a particular program, and to the language you
+use to tell this program what to do. When we need to be careful, we call
+the program ``the @code{awk} utility'' and the language ``the @code{awk}
+language.'' The term @code{gawk} refers to a version of @code{awk} developed
+as part the GNU project. The purpose of this @value{DOCUMENT} is to explain
+both the @code{awk} language and how to run the @code{awk} utility.
+
+The main purpose of the @value{DOCUMENT} is to explain the features
+of @code{awk}, as defined in the POSIX standard. It does so in the context
+of one particular implementation, @code{gawk}. While doing so, it will also
+attempt to describe important differences between @code{gawk} and other
+@code{awk} implementations. Finally, any @code{gawk} features that
+are not in the POSIX standard for @code{awk} will be noted.
+
+@iftex
+This @value{DOCUMENT} has the difficult task of being both tutorial and reference.
+If you are a novice, feel free to skip over details that seem too complex.
+You should also ignore the many cross references; they are for the
+expert user, and for the on-line Info version of the document.
+@end iftex
+
+The term @dfn{@code{awk} program} refers to a program written by you in
+the @code{awk} programming language.
+
+@xref{Getting Started, ,Getting Started with @code{awk}}, for the bare
+essentials you need to know to start using @code{awk}.
+
+Some useful ``one-liners'' are included to give you a feel for the
+@code{awk} language (@pxref{One-liners, ,Useful One Line Programs}).
+
+Many sample @code{awk} programs have been provided for you
+(@pxref{Library Functions, ,A Library of @code{awk} Functions}; also
+@pxref{Sample Programs, ,Practical @code{awk} Programs}).
+
+The entire @code{awk} language is summarized for quick reference in
+@ref{Gawk Summary, ,@code{gawk} Summary}. Look there if you just need
+to refresh your memory about a particular feature.
+
+If you find terms that you aren't familiar with, try looking them
+up in the glossary (@pxref{Glossary}).
+
+Most of the time complete @code{awk} programs are used as examples, but in
+some of the more advanced sections, only the part of the @code{awk} program
+that illustrates the concept being described is shown.
+
+While this @value{DOCUMENT} is aimed principally at people who have not been
+exposed
+to @code{awk}, there is a lot of information here that even the @code{awk}
+expert should find useful. In particular, the description of POSIX
+@code{awk}, and the example programs in
+@ref{Library Functions, ,A Library of @code{awk} Functions}, and
+@ref{Sample Programs, ,Practical @code{awk} Programs},
+should be of interest.
+
+@c fakenode --- for prepinfo
+@unnumberedsubsec Dark Corners
+
+@cindex d.c., see ``dark corner''
+@cindex dark corner
+Until the POSIX standard (and @cite{The Gawk Manual}),
+many features of @code{awk} were either poorly documented, or not
+documented at all. Descriptions of such features
+(often called ``dark corners'') are noted in this @value{DOCUMENT} with
+``(d.c.)''.
+They also appear in the index under the heading ``dark corner.''
+
+@node Conventions, Sample Data Files, This Manual, What Is Awk
+@section Typographical Conventions
+
+This @value{DOCUMENT} is written using Texinfo, the GNU documentation formatting language.
+A single Texinfo source file is used to produce both the printed and on-line
+versions of the documentation.
+@iftex
+Because of this, the typographical conventions
+are slightly different than in other books you may have read.
+@end iftex
+@ifinfo
+This section briefly documents the typographical conventions used in Texinfo.
+@end ifinfo
+
+Examples you would type at the command line are preceded by the common
+shell primary and secondary prompts, @samp{$} and @samp{>}.
+Output from the command is preceded by the glyph ``@print{}''.
+This typically represents the command's standard output.
+Error messages, and other output on the command's standard error, are preceded
+by the glyph ``@error{}''. For example:
+
+@example
+$ echo hi on stdout
+@print{} hi on stdout
+$ echo hello on stderr 1>&2
+@error{} hello on stderr
+@end example
+
+@iftex
+In the text, command names appear in @code{this font}, while code segments
+appear in the same font and quoted, @samp{like this}. Some things will
+be emphasized @emph{like this}, and if a point needs to be made
+strongly, it will be done @strong{like this}. The first occurrence of
+a new term is usually its @dfn{definition}, and appears in the same
+font as the previous occurrence of ``definition'' in this sentence.
+File names are indicated like this: @file{/path/to/ourfile}.
+@end iftex
+
+Characters that you type at the keyboard look @kbd{like this}. In particular,
+there are special characters called ``control characters.'' These are
+characters that you type by holding down both the @kbd{CONTROL} key and
+another key, at the same time. For example, a @kbd{Control-d} is typed
+by first pressing and holding the @kbd{CONTROL} key, next
+pressing the @kbd{d} key, and finally releasing both keys.
+
+@node Sample Data Files, , Conventions, What Is Awk
+@section Data Files for the Examples
+
+@cindex input file, sample
+@cindex sample input file
+@cindex @file{BBS-list} file
+Many of the examples in this @value{DOCUMENT} take their input from two sample
+data files. The first, called @file{BBS-list}, represents a list of
+computer bulletin board systems together with information about those systems.
+The second data file, called @file{inventory-shipped}, contains
+information about shipments on a monthly basis. In both files,
+each line is considered to be one @dfn{record}.
+
+In the file @file{BBS-list}, each record contains the name of a computer
+bulletin board, its phone number, the board's baud rate(s), and a code for
+the number of hours it is operational. An @samp{A} in the last column
+means the board operates 24 hours a day. A @samp{B} in the last
+column means the board operates evening and weekend hours, only. A
+@samp{C} means the board operates only on weekends.
+
+@c 2e: Update the baud rates to reflect today's faster modems
+@example
+@c system mkdir eg
+@c system mkdir eg/lib
+@c system mkdir eg/data
+@c system mkdir eg/prog
+@c system mkdir eg/misc
+@c file eg/data/BBS-list
+aardvark 555-5553 1200/300 B
+alpo-net 555-3412 2400/1200/300 A
+barfly 555-7685 1200/300 A
+bites 555-1675 2400/1200/300 A
+camelot 555-0542 300 C
+core 555-2912 1200/300 C
+fooey 555-1234 2400/1200/300 B
+foot 555-6699 1200/300 B
+macfoo 555-6480 1200/300 A
+sdace 555-3430 2400/1200/300 A
+sabafoo 555-2127 1200/300 C
+@c endfile
+@end example
+
+@cindex @file{inventory-shipped} file
+The second data file, called @file{inventory-shipped}, represents
+information about shipments during the year.
+Each record contains the month of the year, the number
+of green crates shipped, the number of red boxes shipped, the number of
+orange bags shipped, and the number of blue packages shipped,
+respectively. There are 16 entries, covering the 12 months of one year
+and four months of the next year.
+
+@example
+@c file eg/data/inventory-shipped
+Jan 13 25 15 115
+Feb 15 32 24 226
+Mar 15 24 34 228
+Apr 31 52 63 420
+May 16 34 29 208
+Jun 31 42 75 492
+Jul 24 34 67 436
+Aug 15 34 47 316
+Sep 13 55 37 277
+Oct 29 54 68 525
+Nov 20 87 82 577
+Dec 17 35 61 401
+
+Jan 21 36 64 620
+Feb 26 58 80 652
+Mar 24 75 70 495
+Apr 21 70 74 514
+@c endfile
+@end example
+
+@ifinfo
+If you are reading this in GNU Emacs using Info, you can copy the regions
+of text showing these sample files into your own test files. This way you
+can try out the examples shown in the remainder of this document. You do
+this by using the command @kbd{M-x write-region} to copy text from the Info
+file into a file for use with @code{awk}
+(@xref{Misc File Ops, , Miscellaneous File Operations, emacs, GNU Emacs Manual},
+for more information). Using this information, create your own
+@file{BBS-list} and @file{inventory-shipped} files, and practice what you
+learn in this @value{DOCUMENT}.
+
+If you are using the stand-alone version of Info,
+see @ref{Extract Program, ,Extracting Programs from Texinfo Source Files},
+for an @code{awk} program that will extract these data files from
+@file{gawk.texi}, the Texinfo source file for this Info file.
+@end ifinfo
+
+@node Getting Started, One-liners, What Is Awk, Top
+@chapter Getting Started with @code{awk}
+@cindex script, definition of
+@cindex rule, definition of
+@cindex program, definition of
+@cindex basic function of @code{awk}
+
+The basic function of @code{awk} is to search files for lines (or other
+units of text) that contain certain patterns. When a line matches one
+of the patterns, @code{awk} performs specified actions on that line.
+@code{awk} keeps processing input lines in this way until the end of the
+input files are reached.
+
+@cindex data-driven languages
+@cindex procedural languages
+@cindex language, data-driven
+@cindex language, procedural
+Programs in @code{awk} are different from programs in most other languages,
+because @code{awk} programs are @dfn{data-driven}; that is, you describe
+the data you wish to work with, and then what to do when you find it.
+Most other languages are @dfn{procedural}; you have to describe, in great
+detail, every step the program is to take. When working with procedural
+languages, it is usually much
+harder to clearly describe the data your program will process.
+For this reason, @code{awk} programs are often refreshingly easy to both
+write and read.
+
+@cindex program, definition of
+@cindex rule, definition of
+When you run @code{awk}, you specify an @code{awk} @dfn{program} that
+tells @code{awk} what to do. The program consists of a series of
+@dfn{rules}. (It may also contain @dfn{function definitions},
+an advanced feature which we will ignore for now.
+@xref{User-defined, ,User-defined Functions}.) Each rule specifies one
+pattern to search for, and one action to perform when that pattern is found.
+
+Syntactically, a rule consists of a pattern followed by an action. The
+action is enclosed in curly braces to separate it from the pattern.
+Rules are usually separated by newlines. Therefore, an @code{awk}
+program looks like this:
+
+@example
+@var{pattern} @{ @var{action} @}
+@var{pattern} @{ @var{action} @}
+@dots{}
+@end example
+
+@menu
+* Names:: What name to use to find @code{awk}.
+* Running gawk:: How to run @code{gawk} programs; includes
+ command line syntax.
+* Very Simple:: A very simple example.
+* Two Rules:: A less simple one-line example with two rules.
+* More Complex:: A more complex example.
+* Statements/Lines:: Subdividing or combining statements into
+ lines.
+* Other Features:: Other Features of @code{awk}.
+* When:: When to use @code{gawk} and when to use other
+ things.
+@end menu
+
+@node Names, Running gawk , Getting Started, Getting Started
+@section A Rose By Any Other Name
+
+@cindex old @code{awk} vs. new @code{awk}
+@cindex new @code{awk} vs. old @code{awk}
+The @code{awk} language has evolved over the years. Full details are
+provided in @ref{Language History, ,The Evolution of the @code{awk} Language}.
+The language described in this @value{DOCUMENT}
+is often referred to as ``new @code{awk}.''
+
+Because of this, many systems have multiple
+versions of @code{awk}.
+Some systems have an @code{awk} utility that implements the
+original version of the @code{awk} language, and a @code{nawk} utility
+for the new version. Others have an @code{oawk} for the ``old @code{awk}''
+language, and plain @code{awk} for the new one. Still others only
+have one version, usually the new one.@footnote{Often, these systems
+use @code{gawk} for their @code{awk} implementation!}
+
+All in all, this makes it difficult for you to know which version of
+@code{awk} you should run when writing your programs. The best advice
+we can give here is to check your local documentation. Look for @code{awk},
+@code{oawk}, and @code{nawk}, as well as for @code{gawk}. Chances are, you
+will have some version of new @code{awk} on your system, and that is what
+you should use when running your programs. (Of course, if you're reading
+this @value{DOCUMENT}, chances are good that you have @code{gawk}!)
+
+Throughout this @value{DOCUMENT}, whenever we refer to a language feature
+that should be available in any complete implementation of POSIX @code{awk},
+we simply use the term @code{awk}. When referring to a feature that is
+specific to the GNU implementation, we use the term @code{gawk}.
+
+@node Running gawk, Very Simple, Names, Getting Started
+@section How to Run @code{awk} Programs
+
+@cindex command line formats
+@cindex running @code{awk} programs
+There are several ways to run an @code{awk} program. If the program is
+short, it is easiest to include it in the command that runs @code{awk},
+like this:
+
+@example
+awk '@var{program}' @var{input-file1} @var{input-file2} @dots{}
+@end example
+
+@noindent
+where @var{program} consists of a series of patterns and actions, as
+described earlier.
+(The reason for the single quotes is described below, in
+@ref{One-shot, ,One-shot Throw-away @code{awk} Programs}.)
+
+When the program is long, it is usually more convenient to put it in a file
+and run it with a command like this:
+
+@example
+awk -f @var{program-file} @var{input-file1} @var{input-file2} @dots{}
+@end example
+
+@menu
+* One-shot:: Running a short throw-away @code{awk} program.
+* Read Terminal:: Using no input files (input from terminal
+ instead).
+* Long:: Putting permanent @code{awk} programs in
+ files.
+* Executable Scripts:: Making self-contained @code{awk} programs.
+* Comments:: Adding documentation to @code{gawk} programs.
+@end menu
+
+@node One-shot, Read Terminal, Running gawk, Running gawk
+@subsection One-shot Throw-away @code{awk} Programs
+
+Once you are familiar with @code{awk}, you will often type in simple
+programs the moment you want to use them. Then you can write the
+program as the first argument of the @code{awk} command, like this:
+
+@example
+awk '@var{program}' @var{input-file1} @var{input-file2} @dots{}
+@end example
+
+@noindent
+where @var{program} consists of a series of @var{patterns} and
+@var{actions}, as described earlier.
+
+@cindex single quotes, why needed
+This command format instructs the @dfn{shell}, or command interpreter,
+to start @code{awk} and use the @var{program} to process records in the
+input file(s). There are single quotes around @var{program} so that
+the shell doesn't interpret any @code{awk} characters as special shell
+characters. They also cause the shell to treat all of @var{program} as
+a single argument for @code{awk} and allow @var{program} to be more
+than one line long.
+
+This format is also useful for running short or medium-sized @code{awk}
+programs from shell scripts, because it avoids the need for a separate
+file for the @code{awk} program. A self-contained shell script is more
+reliable since there are no other files to misplace.
+
+@ref{One-liners, , Useful One Line Programs}, presents several short,
+self-contained programs.
+
+@iftex
+@page
+@end iftex
+As an interesting side point, the command
+
+@example
+awk '/foo/' @var{files} @dots{}
+@end example
+
+@noindent
+is essentially the same as
+
+@cindex @code{egrep}
+@example
+egrep foo @var{files} @dots{}
+@end example
+
+@node Read Terminal, Long, One-shot, Running gawk
+@subsection Running @code{awk} without Input Files
+
+@cindex standard input
+@cindex input, standard
+You can also run @code{awk} without any input files. If you type the
+command line:
+
+@example
+awk '@var{program}'
+@end example
+
+@noindent
+then @code{awk} applies the @var{program} to the @dfn{standard input},
+which usually means whatever you type on the terminal. This continues
+until you indicate end-of-file by typing @kbd{Control-d}.
+(On other operating systems, the end-of-file character may be different.
+For example, on OS/2 and MS-DOS, it is @kbd{Control-z}.)
+
+For example, the following program prints a friendly piece of advice
+(from Douglas Adams' @cite{The Hitchhiker's Guide to the Galaxy}),
+to keep you from worrying about the complexities of computer programming
+(@samp{BEGIN} is a feature we haven't discussed yet).
+
+@example
+$ awk "BEGIN @{ print \"Don't Panic!\" @}"
+@print{} Don't Panic!
+@end example
+
+@cindex quoting, shell
+@cindex shell quoting
+This program does not read any input. The @samp{\} before each of the
+inner double quotes is necessary because of the shell's quoting rules,
+in particular because it mixes both single quotes and double quotes.
+
+This next simple @code{awk} program
+emulates the @code{cat} utility; it copies whatever you type at the
+keyboard to its standard output. (Why this works is explained shortly.)
+
+@example
+$ awk '@{ print @}'
+Now is the time for all good men
+@print{} Now is the time for all good men
+to come to the aid of their country.
+@print{} to come to the aid of their country.
+Four score and seven years ago, ...
+@print{} Four score and seven years ago, ...
+What, me worry?
+@print{} What, me worry?
+@kbd{Control-d}
+@end example
+
+@node Long, Executable Scripts, Read Terminal, Running gawk
+@subsection Running Long Programs
+
+@cindex running long programs
+@cindex @code{-f} option
+@cindex program file
+@cindex file, @code{awk} program
+Sometimes your @code{awk} programs can be very long. In this case it is
+more convenient to put the program into a separate file. To tell
+@code{awk} to use that file for its program, you type:
+
+@example
+awk -f @var{source-file} @var{input-file1} @var{input-file2} @dots{}
+@end example
+
+The @samp{-f} instructs the @code{awk} utility to get the @code{awk} program
+from the file @var{source-file}. Any file name can be used for
+@var{source-file}. For example, you could put the program:
+
+@example
+BEGIN @{ print "Don't Panic!" @}
+@end example
+
+@noindent
+into the file @file{advice}. Then this command:
+
+@example
+awk -f advice
+@end example
+
+@noindent
+does the same thing as this one:
+
+@example
+awk "BEGIN @{ print \"Don't Panic!\" @}"
+@end example
+
+@cindex quoting, shell
+@cindex shell quoting
+@noindent
+which was explained earlier (@pxref{Read Terminal, ,Running @code{awk} without Input Files}).
+Note that you don't usually need single quotes around the file name that you
+specify with @samp{-f}, because most file names don't contain any of the shell's
+special characters. Notice that in @file{advice}, the @code{awk}
+program did not have single quotes around it. The quotes are only needed
+for programs that are provided on the @code{awk} command line.
+
+If you want to identify your @code{awk} program files clearly as such,
+you can add the extension @file{.awk} to the file name. This doesn't
+affect the execution of the @code{awk} program, but it does make
+``housekeeping'' easier.
+
+@node Executable Scripts, Comments, Long, Running gawk
+@subsection Executable @code{awk} Programs
+@cindex executable scripts
+@cindex scripts, executable
+@cindex self contained programs
+@cindex program, self contained
+@cindex @code{#!} (executable scripts)
+
+Once you have learned @code{awk}, you may want to write self-contained
+@code{awk} scripts, using the @samp{#!} script mechanism. You can do
+this on many Unix systems@footnote{The @samp{#!} mechanism works on
+Linux systems,
+Unix systems derived from Berkeley Unix, System V Release 4, and some System
+V Release 3 systems.} (and someday on the GNU system).
+
+For example, you could update the file @file{advice} to look like this:
+
+@example
+#! /bin/awk -f
+
+BEGIN @{ print "Don't Panic!" @}
+@end example
+
+@noindent
+After making this file executable (with the @code{chmod} utility), you
+can simply type @samp{advice}
+at the shell, and the system will arrange to run @code{awk} @footnote{The
+line beginning with @samp{#!} lists the full file name of an interpreter
+to be run, and an optional initial command line argument to pass to that
+interpreter. The operating system then runs the interpreter with the given
+argument and the full argument list of the executed program. The first argument
+in the list is the full file name of the @code{awk} program. The rest of the
+argument list will either be options to @code{awk}, or data files,
+or both.} as if you had typed @samp{awk -f advice}.
+
+@example
+$ advice
+@print{} Don't Panic!
+@end example
+
+@noindent
+Self-contained @code{awk} scripts are useful when you want to write a
+program which users can invoke without their having to know that the program is
+written in @code{awk}.
+
+@cindex shell scripts
+@cindex scripts, shell
+Some older systems do not support the @samp{#!} mechanism. You can get a
+similar effect using a regular shell script. It would look something
+like this:
+
+@example
+: The colon ensures execution by the standard shell.
+awk '@var{program}' "$@@"
+@end example
+
+Using this technique, it is @emph{vital} to enclose the @var{program} in
+single quotes to protect it from interpretation by the shell. If you
+omit the quotes, only a shell wizard can predict the results.
+
+The @code{"$@@"} causes the shell to forward all the command line
+arguments to the @code{awk} program, without interpretation. The first
+line, which starts with a colon, is used so that this shell script will
+work even if invoked by a user who uses the C shell. (Not all older systems
+obey this convention, but many do.)
+@c 2e:
+@c Someday: (See @cite{The Bourne Again Shell}, by ??.)
+
+@node Comments, , Executable Scripts, Running gawk
+@subsection Comments in @code{awk} Programs
+@cindex @code{#} (comment)
+@cindex comments
+@cindex use of comments
+@cindex documenting @code{awk} programs
+@cindex programs, documenting
+
+A @dfn{comment} is some text that is included in a program for the sake
+of human readers; it is not really part of the program. Comments
+can explain what the program does, and how it works. Nearly all
+programming languages have provisions for comments, because programs are
+typically hard to understand without their extra help.
+
+In the @code{awk} language, a comment starts with the sharp sign
+character, @samp{#}, and continues to the end of the line.
+The @samp{#} does not have to be the first character on the line. The
+@code{awk} language ignores the rest of a line following a sharp sign.
+For example, we could have put the following into @file{advice}:
+
+@example
+# This program prints a nice friendly message. It helps
+# keep novice users from being afraid of the computer.
+BEGIN @{ print "Don't Panic!" @}
+@end example
+
+You can put comment lines into keyboard-composed throw-away @code{awk}
+programs also, but this usually isn't very useful; the purpose of a
+comment is to help you or another person understand the program at
+a later time.
+
+@node Very Simple, Two Rules, Running gawk, Getting Started
+@section A Very Simple Example
+
+The following command runs a simple @code{awk} program that searches the
+input file @file{BBS-list} for the string of characters: @samp{foo}. (A
+string of characters is usually called a @dfn{string}.
+The term @dfn{string} is perhaps based on similar usage in English, such
+as ``a string of pearls,'' or, ``a string of cars in a train.'')
+
+@example
+awk '/foo/ @{ print $0 @}' BBS-list
+@end example
+
+@noindent
+When lines containing @samp{foo} are found, they are printed, because
+@w{@samp{print $0}} means print the current line. (Just @samp{print} by
+itself means the same thing, so we could have written that
+instead.)
+
+You will notice that slashes, @samp{/}, surround the string @samp{foo}
+in the @code{awk} program. The slashes indicate that @samp{foo}
+is a pattern to search for. This type of pattern is called a
+@dfn{regular expression}, and is covered in more detail later
+(@pxref{Regexp, ,Regular Expressions}).
+The pattern is allowed to match parts of words.
+There are
+single-quotes around the @code{awk} program so that the shell won't
+interpret any of it as special shell characters.
+
+Here is what this program prints:
+
+@example
+@group
+$ awk '/foo/ @{ print $0 @}' BBS-list
+@print{} fooey 555-1234 2400/1200/300 B
+@print{} foot 555-6699 1200/300 B
+@print{} macfoo 555-6480 1200/300 A
+@print{} sabafoo 555-2127 1200/300 C
+@end group
+@end example
+
+@cindex action, default
+@cindex pattern, default
+@cindex default action
+@cindex default pattern
+In an @code{awk} rule, either the pattern or the action can be omitted,
+but not both. If the pattern is omitted, then the action is performed
+for @emph{every} input line. If the action is omitted, the default
+action is to print all lines that match the pattern.
+
+@cindex empty action
+@cindex action, empty
+Thus, we could leave out the action (the @code{print} statement and the curly
+braces) in the above example, and the result would be the same: all
+lines matching the pattern @samp{foo} would be printed. By comparison,
+omitting the @code{print} statement but retaining the curly braces makes an
+empty action that does nothing; then no lines would be printed.
+
+@node Two Rules, More Complex, Very Simple, Getting Started
+@section An Example with Two Rules
+@cindex how @code{awk} works
+
+The @code{awk} utility reads the input files one line at a
+time. For each line, @code{awk} tries the patterns of each of the rules.
+If several patterns match then several actions are run, in the order in
+which they appear in the @code{awk} program. If no patterns match, then
+no actions are run.
+
+After processing all the rules (perhaps none) that match the line,
+@code{awk} reads the next line (however,
+@pxref{Next Statement, ,The @code{next} Statement},
+and also @pxref{Nextfile Statement, ,The @code{nextfile} Statement}).
+This continues until the end of the file is reached.
+
+For example, the @code{awk} program:
+
+@example
+/12/ @{ print $0 @}
+/21/ @{ print $0 @}
+@end example
+
+@noindent
+contains two rules. The first rule has the string @samp{12} as the
+pattern and @samp{print $0} as the action. The second rule has the
+string @samp{21} as the pattern and also has @samp{print $0} as the
+action. Each rule's action is enclosed in its own pair of braces.
+
+This @code{awk} program prints every line that contains the string
+@samp{12} @emph{or} the string @samp{21}. If a line contains both
+strings, it is printed twice, once by each rule.
+
+This is what happens if we run this program on our two sample data files,
+@file{BBS-list} and @file{inventory-shipped}, as shown here:
+
+@example
+$ awk '/12/ @{ print $0 @}
+> /21/ @{ print $0 @}' BBS-list inventory-shipped
+@print{} aardvark 555-5553 1200/300 B
+@print{} alpo-net 555-3412 2400/1200/300 A
+@print{} barfly 555-7685 1200/300 A
+@print{} bites 555-1675 2400/1200/300 A
+@print{} core 555-2912 1200/300 C
+@print{} fooey 555-1234 2400/1200/300 B
+@print{} foot 555-6699 1200/300 B
+@print{} macfoo 555-6480 1200/300 A
+@print{} sdace 555-3430 2400/1200/300 A
+@print{} sabafoo 555-2127 1200/300 C
+@print{} sabafoo 555-2127 1200/300 C
+@print{} Jan 21 36 64 620
+@print{} Apr 21 70 74 514
+@end example
+
+@noindent
+Note how the line in @file{BBS-list} beginning with @samp{sabafoo}
+was printed twice, once for each rule.
+
+@node More Complex, Statements/Lines, Two Rules, Getting Started
+@section A More Complex Example
+
+@ignore
+We have to use ls -lg here to get portable output across Unix systems.
+The POSIX ls matches this behavior too. Sigh.
+@end ignore
+Here is an example to give you an idea of what typical @code{awk}
+programs do. This example shows how @code{awk} can be used to
+summarize, select, and rearrange the output of another utility. It uses
+features that haven't been covered yet, so don't worry if you don't
+understand all the details.
+
+@example
+ls -lg | awk '$6 == "Nov" @{ sum += $5 @}
+ END @{ print sum @}'
+@end example
+
+@cindex @code{csh}, backslash continuation
+@cindex backslash continuation in @code{csh}
+This command prints the total number of bytes in all the files in the
+current directory that were last modified in November (of any year).
+(In the C shell you would need to type a semicolon and then a backslash
+at the end of the first line; in a POSIX-compliant shell, such as the
+Bourne shell or Bash, the GNU Bourne-Again shell, you can type the example
+as shown.)
+@ignore
+FIXME: how can users tell what shell they are running? Need a footnote
+or something, but getting into this is a distraction.
+@end ignore
+
+The @w{@samp{ls -lg}} part of this example is a system command that gives
+you a listing of the files in a directory, including file size and the date
+the file was last modified. Its output looks like this:
+
+@example
+-rw-r--r-- 1 arnold user 1933 Nov 7 13:05 Makefile
+-rw-r--r-- 1 arnold user 10809 Nov 7 13:03 gawk.h
+-rw-r--r-- 1 arnold user 983 Apr 13 12:14 gawk.tab.h
+-rw-r--r-- 1 arnold user 31869 Jun 15 12:20 gawk.y
+-rw-r--r-- 1 arnold user 22414 Nov 7 13:03 gawk1.c
+-rw-r--r-- 1 arnold user 37455 Nov 7 13:03 gawk2.c
+-rw-r--r-- 1 arnold user 27511 Dec 9 13:07 gawk3.c
+-rw-r--r-- 1 arnold user 7989 Nov 7 13:03 gawk4.c
+@end example
+
+@noindent
+The first field contains read-write permissions, the second field contains
+the number of links to the file, and the third field identifies the owner of
+the file. The fourth field identifies the group of the file.
+The fifth field contains the size of the file in bytes. The
+sixth, seventh and eighth fields contain the month, day, and time,
+respectively, that the file was last modified. Finally, the ninth field
+contains the name of the file.
+
+@cindex automatic initialization
+@cindex initialization, automatic
+The @samp{$6 == "Nov"} in our @code{awk} program is an expression that
+tests whether the sixth field of the output from @w{@samp{ls -lg}}
+matches the string @samp{Nov}. Each time a line has the string
+@samp{Nov} for its sixth field, the action @samp{sum += $5} is
+performed. This adds the fifth field (the file size) to the variable
+@code{sum}. As a result, when @code{awk} has finished reading all the
+input lines, @code{sum} is the sum of the sizes of files whose
+lines matched the pattern. (This works because @code{awk} variables
+are automatically initialized to zero.)
+
+After the last line of output from @code{ls} has been processed, the
+@code{END} rule is executed, and the value of @code{sum} is
+printed. In this example, the value of @code{sum} would be 80600.
+
+These more advanced @code{awk} techniques are covered in later sections
+(@pxref{Action Overview, ,Overview of Actions}). Before you can move on to more
+advanced @code{awk} programming, you have to know how @code{awk} interprets
+your input and displays your output. By manipulating fields and using
+@code{print} statements, you can produce some very useful and impressive
+looking reports.
+
+@node Statements/Lines, Other Features, More Complex, Getting Started
+@section @code{awk} Statements Versus Lines
+@cindex line break
+@cindex newline
+
+Most often, each line in an @code{awk} program is a separate statement or
+separate rule, like this:
+
+@example
+awk '/12/ @{ print $0 @}
+ /21/ @{ print $0 @}' BBS-list inventory-shipped
+@end example
+
+However, @code{gawk} will ignore newlines after any of the following:
+
+@example
+, @{ ? : || && do else
+@end example
+
+@noindent
+A newline at any other point is considered the end of the statement.
+(Splitting lines after @samp{?} and @samp{:} is a minor @code{gawk}
+extension. The @samp{?} and @samp{:} referred to here is the
+three operand conditional expression described in
+@ref{Conditional Exp, ,Conditional Expressions}.)
+
+@cindex backslash continuation
+@cindex continuation of lines
+@cindex line continuation
+If you would like to split a single statement into two lines at a point
+where a newline would terminate it, you can @dfn{continue} it by ending the
+first line with a backslash character, @samp{\}. The backslash must be
+the final character on the line to be recognized as a continuation
+character. This is allowed absolutely anywhere in the statement, even
+in the middle of a string or regular expression. For example:
+
+@example
+awk '/This regular expression is too long, so continue it\
+ on the next line/ @{ print $1 @}'
+@end example
+
+@noindent
+@cindex portability issues
+We have generally not used backslash continuation in the sample programs
+in this @value{DOCUMENT}. Since in @code{gawk} there is no limit on the
+length of a line, it is never strictly necessary; it just makes programs
+more readable. For this same reason, as well as for clarity, we have
+kept most statements short in the sample programs presented throughout
+the @value{DOCUMENT}. Backslash continuation is most useful when your
+@code{awk} program is in a separate source file, instead of typed in on
+the command line. You should also note that many @code{awk}
+implementations are more particular about where you may use backslash
+continuation. For example, they may not allow you to split a string
+constant using backslash continuation. Thus, for maximal portability of
+your @code{awk} programs, it is best not to split your lines in the
+middle of a regular expression or a string.
+
+@cindex @code{csh}, backslash continuation
+@cindex backslash continuation in @code{csh}
+@strong{Caution: backslash continuation does not work as described above
+with the C shell.} Continuation with backslash works for @code{awk}
+programs in files, and also for one-shot programs @emph{provided} you
+are using a POSIX-compliant shell, such as the Bourne shell or Bash, the
+GNU Bourne-Again shell. But the C shell (@code{csh}) behaves
+differently! There, you must use two backslashes in a row, followed by
+a newline. Note also that when using the C shell, @emph{every} newline
+in your awk program must be escaped with a backslash. To illustrate:
+
+@example
+% awk 'BEGIN @{ \
+? print \\
+? "hello, world" \
+? @}'
+@print{} hello, world
+@end example
+
+@noindent
+Here, the @samp{%} and @samp{?} are the C shell's primary and secondary
+prompts, analogous to the standard shell's @samp{$} and @samp{>}.
+
+@code{awk} is a line-oriented language. Each rule's action has to
+begin on the same line as the pattern. To have the pattern and action
+on separate lines, you @emph{must} use backslash continuation---there
+is no other way.
+
+@cindex multiple statements on one line
+When @code{awk} statements within one rule are short, you might want to put
+more than one of them on a line. You do this by separating the statements
+with a semicolon, @samp{;}.
+
+This also applies to the rules themselves.
+Thus, the previous program could have been written:
+
+@example
+/12/ @{ print $0 @} ; /21/ @{ print $0 @}
+@end example
+
+@noindent
+@strong{Note:} the requirement that rules on the same line must be
+separated with a semicolon was not in the original @code{awk}
+language; it was added for consistency with the treatment of statements
+within an action.
+
+@node Other Features, When, Statements/Lines, Getting Started
+@section Other Features of @code{awk}
+
+The @code{awk} language provides a number of predefined, or built-in variables, which
+your programs can use to get information from @code{awk}. There are other
+variables your program can set to control how @code{awk} processes your
+data.
+
+In addition, @code{awk} provides a number of built-in functions for doing
+common computational and string related operations.
+
+As we develop our presentation of the @code{awk} language, we introduce
+most of the variables and many of the functions. They are defined
+systematically in @ref{Built-in Variables}, and
+@ref{Built-in, ,Built-in Functions}.
+
+@node When, , Other Features, Getting Started
+@section When to Use @code{awk}
+
+@cindex when to use @code{awk}
+@cindex applications of @code{awk}
+You might wonder how @code{awk} might be useful for you. Using
+utility programs, advanced patterns, field separators, arithmetic
+statements, and other selection criteria, you can produce much more
+complex output. The @code{awk} language is very useful for producing
+reports from large amounts of raw data, such as summarizing information
+from the output of other utility programs like @code{ls}.
+(@xref{More Complex, ,A More Complex Example}.)
+
+Programs written with @code{awk} are usually much smaller than they would
+be in other languages. This makes @code{awk} programs easy to compose and
+use. Often, @code{awk} programs can be quickly composed at your terminal,
+used once, and thrown away. Since @code{awk} programs are interpreted, you
+can avoid the (usually lengthy) compilation part of the typical
+edit-compile-test-debug cycle of software development.
+
+Complex programs have been written in @code{awk}, including a complete
+retargetable assembler for eight-bit microprocessors (@pxref{Glossary}, for
+more information) and a microcode assembler for a special purpose Prolog
+computer. However, @code{awk}'s capabilities are strained by tasks of
+such complexity.
+
+If you find yourself writing @code{awk} scripts of more than, say, a few
+hundred lines, you might consider using a different programming
+language. Emacs Lisp is a good choice if you need sophisticated string
+or pattern matching capabilities. The shell is also good at string and
+pattern matching; in addition, it allows powerful use of the system
+utilities. More conventional languages, such as C, C++, and Lisp, offer
+better facilities for system programming and for managing the complexity
+of large programs. Programs in these languages may require more lines
+of source code than the equivalent @code{awk} programs, but they are
+easier to maintain and usually run more efficiently.
+
+@node One-liners, Regexp, Getting Started, Top
+@chapter Useful One Line Programs
+
+@cindex one-liners
+Many useful @code{awk} programs are short, just a line or two. Here is a
+collection of useful, short programs to get you started. Some of these
+programs contain constructs that haven't been covered yet. The description
+of the program will give you a good idea of what is going on, but please
+read the rest of the @value{DOCUMENT} to become an @code{awk} expert!
+
+Most of the examples use a data file named @file{data}. This is just a
+placeholder; if you were to use these programs yourself, you would substitute
+your own file names for @file{data}.
+
+@ifinfo
+Since you are reading this in Info, each line of the example code is
+enclosed in quotes, to represent text that you would type literally.
+The examples themselves represent shell commands that use single quotes
+to keep the shell from interpreting the contents of the program.
+When reading the examples, focus on the text between the open and close
+quotes.
+@end ifinfo
+
+@table @code
+@item awk '@{ if (length($0) > max) max = length($0) @}
+@itemx @ @ @ @ @ END @{ print max @}' data
+This program prints the length of the longest input line.
+
+@item awk 'length($0) > 80' data
+This program prints every line that is longer than 80 characters. The sole
+rule has a relational expression as its pattern, and has no action (so the
+default action, printing the record, is used).
+
+@item expand@ data@ |@ awk@ '@{ if (x < length()) x = length() @}
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ END @{ print "maximum line length is " x @}'
+This program prints the length of the longest line in @file{data}. The input
+is processed by the @code{expand} program to change tabs into spaces,
+so the widths compared are actually the right-margin columns.
+
+@item awk 'NF > 0' data
+This program prints every line that has at least one field. This is an
+easy way to delete blank lines from a file (or rather, to create a new
+file similar to the old file but from which the blank lines have been
+deleted).
+
+@c Karl Berry points out that new users probably don't want to see
+@c multiple ways to do things, just the `best' way. He's probably
+@c right. At some point it might be worth adding something about there
+@c often being multiple ways to do things in awk, but for now we'll
+@c just take this one out.
+@ignore
+@item awk '@{ if (NF > 0) print @}' data
+This program also prints every line that has at least one field. Here we
+allow the rule to match every line, and then decide in the action whether
+to print.
+@end ignore
+
+@item awk@ 'BEGIN@ @{@ for (i = 1; i <= 7; i++)
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ print int(101 * rand()) @}'
+This program prints seven random numbers from zero to 100, inclusive.
+
+@item ls -lg @var{files} | awk '@{ x += $5 @} ; END @{ print "total bytes: " x @}'
+This program prints the total number of bytes used by @var{files}.
+
+@item ls -lg @var{files} | awk '@{ x += $5 @}
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ END @{ print "total K-bytes: " (x + 1023)/1024 @}'
+This program prints the total number of kilobytes used by @var{files}.
+
+@item awk -F: '@{ print $1 @}' /etc/passwd | sort
+This program prints a sorted list of the login names of all users.
+
+@item awk 'END @{ print NR @}' data
+This program counts lines in a file.
+
+@item awk 'NR % 2' data
+This program prints the even numbered lines in the data file.
+If you were to use the expression @samp{NR % 2 == 1} instead,
+it would print the odd number lines.
+@end table
+
+@node Regexp, Reading Files, One-liners, Top
+@chapter Regular Expressions
+@cindex pattern, regular expressions
+@cindex regexp
+@cindex regular expression
+@cindex regular expressions as patterns
+
+A @dfn{regular expression}, or @dfn{regexp}, is a way of describing a
+set of strings.
+Because regular expressions are such a fundamental part of @code{awk}
+programming, their format and use deserve a separate chapter.
+
+A regular expression enclosed in slashes (@samp{/})
+is an @code{awk} pattern that matches every input record whose text
+belongs to that set.
+
+The simplest regular expression is a sequence of letters, numbers, or
+both. Such a regexp matches any string that contains that sequence.
+Thus, the regexp @samp{foo} matches any string containing @samp{foo}.
+Therefore, the pattern @code{/foo/} matches any input record containing
+the three characters @samp{foo}, @emph{anywhere} in the record. Other
+kinds of regexps let you specify more complicated classes of strings.
+
+@iftex
+Initially, the examples will be simple. As we explain more about how
+regular expressions work, we will present more complicated examples.
+@end iftex
+
+@menu
+* Regexp Usage:: How to Use Regular Expressions.
+* Escape Sequences:: How to write non-printing characters.
+* Regexp Operators:: Regular Expression Operators.
+* GNU Regexp Operators:: Operators specific to GNU software.
+* Case-sensitivity:: How to do case-insensitive matching.
+* Leftmost Longest:: How much text matches.
+* Computed Regexps:: Using Dynamic Regexps.
+@end menu
+
+@node Regexp Usage, Escape Sequences, Regexp, Regexp
+@section How to Use Regular Expressions
+
+A regular expression can be used as a pattern by enclosing it in
+slashes. Then the regular expression is tested against the
+entire text of each record. (Normally, it only needs
+to match some part of the text in order to succeed.) For example, this
+prints the second field of each record that contains the three
+characters @samp{foo} anywhere in it:
+
+@example
+@group
+$ awk '/foo/ @{ print $2 @}' BBS-list
+@print{} 555-1234
+@print{} 555-6699
+@print{} 555-6480
+@print{} 555-2127
+@end group
+@end example
+
+@cindex regexp matching operators
+@cindex string-matching operators
+@cindex operators, string-matching
+@cindex operators, regexp matching
+@cindex regexp match/non-match operators
+@cindex @code{~} operator
+@cindex @code{!~} operator
+Regular expressions can also be used in matching expressions. These
+expressions allow you to specify the string to match against; it need
+not be the entire current input record. The two operators, @samp{~}
+and @samp{!~}, perform regular expression comparisons. Expressions
+using these operators can be used as patterns or in @code{if},
+@code{while}, @code{for}, and @code{do} statements.
+@ifinfo
+@c adding this xref in TeX screws up the formatting too much
+(@xref{Statements, ,Control Statements in Actions}.)
+@end ifinfo
+
+@table @code
+@item @var{exp} ~ /@var{regexp}/
+This is true if the expression @var{exp} (taken as a string)
+is matched by @var{regexp}. The following example matches, or selects,
+all input records with the upper-case letter @samp{J} somewhere in the
+first field:
+
+@example
+@group
+$ awk '$1 ~ /J/' inventory-shipped
+@print{} Jan 13 25 15 115
+@print{} Jun 31 42 75 492
+@print{} Jul 24 34 67 436
+@print{} Jan 21 36 64 620
+@end group
+@end example
+
+So does this:
+
+@example
+awk '@{ if ($1 ~ /J/) print @}' inventory-shipped
+@end example
+
+@item @var{exp} !~ /@var{regexp}/
+This is true if the expression @var{exp} (taken as a character string)
+is @emph{not} matched by @var{regexp}. The following example matches,
+or selects, all input records whose first field @emph{does not} contain
+the upper-case letter @samp{J}:
+
+@example
+@group
+$ awk '$1 !~ /J/' inventory-shipped
+@print{} Feb 15 32 24 226
+@print{} Mar 15 24 34 228
+@print{} Apr 31 52 63 420
+@print{} May 16 34 29 208
+@dots{}
+@end group
+@end example
+@end table
+
+@cindex regexp constant
+When a regexp is written enclosed in slashes, like @code{/foo/}, we call it
+a @dfn{regexp constant}, much like @code{5.27} is a numeric constant, and
+@code{"foo"} is a string constant.
+
+@node Escape Sequences, Regexp Operators, Regexp Usage, Regexp
+@section Escape Sequences
+
+@cindex escape sequence notation
+Some characters cannot be included literally in string constants
+(@code{"foo"}) or regexp constants (@code{/foo/}). You represent them
+instead with @dfn{escape sequences}, which are character sequences
+beginning with a backslash (@samp{\}).
+
+One use of an escape sequence is to include a double-quote character in
+a string constant. Since a plain double-quote would end the string, you
+must use @samp{\"} to represent an actual double-quote character as a
+part of the string. For example:
+
+@example
+$ awk 'BEGIN @{ print "He said \"hi!\" to her." @}'
+@print{} He said "hi!" to her.
+@end example
+
+The backslash character itself is another character that cannot be
+included normally; you write @samp{\\} to put one backslash in the
+string or regexp. Thus, the string whose contents are the two characters
+@samp{"} and @samp{\} must be written @code{"\"\\"}.
+
+Another use of backslash is to represent unprintable characters
+such as tab or newline. While there is nothing to stop you from entering most
+unprintable characters directly in a string constant or regexp constant,
+they may look ugly.
+
+Here is a table of all the escape sequences used in @code{awk}, and
+what they represent. Unless noted otherwise, all of these escape
+sequences apply to both string constants and regexp constants.
+
+@iftex
+@page
+@end iftex
+@c @cartouche
+@table @code
+@item \\
+A literal backslash, @samp{\}.
+
+@cindex @code{awk} language, V.4 version
+@item \a
+The ``alert'' character, @kbd{Control-g}, ASCII code 7 (BEL).
+
+@item \b
+Backspace, @kbd{Control-h}, ASCII code 8 (BS).
+
+@item \f
+Formfeed, @kbd{Control-l}, ASCII code 12 (FF).
+
+@item \n
+Newline, @kbd{Control-j}, ASCII code 10 (LF).
+
+@item \r
+Carriage return, @kbd{Control-m}, ASCII code 13 (CR).
+
+@item \t
+Horizontal tab, @kbd{Control-i}, ASCII code 9 (HT).
+
+@cindex @code{awk} language, V.4 version
+@item \v
+Vertical tab, @kbd{Control-k}, ASCII code 11 (VT).
+
+@item \@var{nnn}
+The octal value @var{nnn}, where @var{nnn} are one to three digits
+between @samp{0} and @samp{7}. For example, the code for the ASCII ESC
+(escape) character is @samp{\033}.
+
+@cindex @code{awk} language, V.4 version
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@item \x@var{hh}@dots{}
+The hexadecimal value @var{hh}, where @var{hh} are hexadecimal
+digits (@samp{0} through @samp{9} and either @samp{A} through @samp{F} or
+@samp{a} through @samp{f}). Like the same construct in ANSI C, the escape
+sequence continues until the first non-hexadecimal digit is seen. However,
+using more than two hexadecimal digits produces undefined results. (The
+@samp{\x} escape sequence is not allowed in POSIX @code{awk}.)
+
+@item \/
+A literal slash (necessary for regexp constants only).
+You use this when you wish to write a regexp
+constant that contains a slash. Since the regexp is delimited by
+slashes, you need to escape the slash that is part of the pattern,
+in order to tell @code{awk} to keep processing the rest of the regexp.
+
+@item \"
+A literal double-quote (necessary for string constants only).
+You use this when you wish to write a string
+constant that contains a double-quote. Since the string is delimited by
+double-quotes, you need to escape the quote that is part of the string,
+in order to tell @code{awk} to keep processing the rest of the string.
+@end table
+@c @end cartouche
+
+In @code{gawk}, there are additional two character sequences that begin
+with backslash that have special meaning in regexps.
+@xref{GNU Regexp Operators, ,Additional Regexp Operators Only in @code{gawk}}.
+
+In a string constant,
+what happens if you place a backslash before something that is not one of
+the characters listed above? POSIX @code{awk} purposely leaves this case
+undefined. There are two choices.
+
+@itemize @bullet
+@item
+Strip the backslash out. This is what Unix @code{awk} and @code{gawk} both do.
+For example, @code{"a\qc"} is the same as @code{"aqc"}.
+
+@item
+Leave the backslash alone. Some other @code{awk} implementations do this.
+In such implementations, @code{"a\qc"} is the same as if you had typed
+@code{"a\\qc"}.
+@end itemize
+
+In a regexp, a backslash before any character that is not in the above table,
+and not listed in
+@ref{GNU Regexp Operators, ,Additional Regexp Operators Only in @code{gawk}},
+means that the next character should be taken literally, even if it would
+normally be a regexp operator. E.g., @code{/a\+b/} matches the three
+characters @samp{a+b}.
+
+@cindex portability issues
+For complete portability, do not use a backslash before any character not
+listed in the table above.
+
+Another interesting question arises. Suppose you use an octal or hexadecimal
+escape to represent a regexp metacharacter
+(@pxref{Regexp Operators, , Regular Expression Operators}).
+Does @code{awk} treat the character as literal character, or as a regexp
+operator?
+
+@cindex dark corner
+It turns out that historically, such characters were taken literally (d.c.).
+However, the POSIX standard indicates that they should be treated
+as real metacharacters, and this is what @code{gawk} does.
+However, in compatibility mode (@pxref{Options, ,Command Line Options}),
+@code{gawk} treats the characters represented by octal and hexadecimal
+escape sequences literally when used in regexp constants. Thus,
+@code{/a\52b/} is equivalent to @code{/a\*b/}.
+
+To summarize:
+
+@enumerate 1
+@item
+The escape sequences in the table above are always processed first,
+for both string constants and regexp constants. This happens very early,
+as soon as @code{awk} reads your program.
+
+@item
+@code{gawk} processes both regexp constants and dynamic regexps
+(@pxref{Computed Regexps, ,Using Dynamic Regexps}),
+for the special operators listed in
+@ref{GNU Regexp Operators, ,Additional Regexp Operators Only in @code{gawk}}.
+
+@item
+A backslash before any other character means to treat that character
+literally.
+@end enumerate
+
+@node Regexp Operators, GNU Regexp Operators, Escape Sequences, Regexp
+@section Regular Expression Operators
+@cindex metacharacters
+@cindex regular expression metacharacters
+@cindex regexp operators
+
+You can combine regular expressions with the following characters,
+called @dfn{regular expression operators}, or @dfn{metacharacters}, to
+increase the power and versatility of regular expressions.
+
+The escape sequences described
+@iftex
+above
+@end iftex
+in @ref{Escape Sequences},
+are valid inside a regexp. They are introduced by a @samp{\}. They
+are recognized and converted into the corresponding real characters as
+the very first step in processing regexps.
+
+Here is a table of metacharacters. All characters that are not escape
+sequences and that are not listed in the table stand for themselves.
+
+@iftex
+@page
+@end iftex
+@table @code
+@item \
+This is used to suppress the special meaning of a character when
+matching. For example:
+
+@example
+\$
+@end example
+
+@noindent
+matches the character @samp{$}.
+
+@cindex anchors in regexps
+@cindex regexp, anchors
+@item ^
+This matches the beginning of a string. For example:
+
+@example
+^@@chapter
+@end example
+
+@noindent
+matches the @samp{@@chapter} at the beginning of a string, and can be used
+to identify chapter beginnings in Texinfo source files.
+The @samp{^} is known as an @dfn{anchor}, since it anchors the pattern to
+matching only at the beginning of the string.
+
+It is important to realize that @samp{^} does not match the beginning of
+a line embedded in a string. In this example the condition is not true:
+
+@example
+if ("line1\nLINE 2" ~ /^L/) @dots{}
+@end example
+
+@item $
+This is similar to @samp{^}, but it matches only at the end of a string.
+For example:
+
+@example
+p$
+@end example
+
+@noindent
+matches a record that ends with a @samp{p}. The @samp{$} is also an anchor,
+and also does not match the end of a line embedded in a string. In this
+example the condition is not true:
+
+@example
+if ("line1\nLINE 2" ~ /1$/) @dots{}
+@end example
+
+@item .
+The period, or dot, matches any single character,
+@emph{including} the newline character. For example:
+
+@example
+.P
+@end example
+
+@noindent
+matches any single character followed by a @samp{P} in a string. Using
+concatenation we can make a regular expression like @samp{U.A}, which
+matches any three-character sequence that begins with @samp{U} and ends
+with @samp{A}.
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+In strict POSIX mode (@pxref{Options, ,Command Line Options}),
+@samp{.} does not match the @sc{nul}
+character, which is a character with all bits equal to zero.
+Otherwise, @sc{nul} is just another character. Other versions of @code{awk}
+may not be able to match the @sc{nul} character.
+
+@ignore
+2e: Add stuff that character list is the POSIX terminology. In other
+ literature known as character set or character class.
+@end ignore
+
+@cindex character list
+@item [@dots{}]
+This is called a @dfn{character list}. It matches any @emph{one} of the
+characters that are enclosed in the square brackets. For example:
+
+@example
+[MVX]
+@end example
+
+@noindent
+matches any one of the characters @samp{M}, @samp{V}, or @samp{X} in a
+string.
+
+Ranges of characters are indicated by using a hyphen between the beginning
+and ending characters, and enclosing the whole thing in brackets. For
+example:
+
+@example
+[0-9]
+@end example
+
+@noindent
+matches any digit.
+Multiple ranges are allowed. E.g., the list @code{@w{[A-Za-z0-9]}} is a
+common way to express the idea of ``all alphanumeric characters.''
+
+To include one of the characters @samp{\}, @samp{]}, @samp{-} or @samp{^} in a
+character list, put a @samp{\} in front of it. For example:
+
+@example
+[d\]]
+@end example
+
+@noindent
+matches either @samp{d}, or @samp{]}.
+
+@cindex @code{egrep}
+This treatment of @samp{\} in character lists
+is compatible with other @code{awk}
+implementations, and is also mandated by POSIX.
+The regular expressions in @code{awk} are a superset
+of the POSIX specification for Extended Regular Expressions (EREs).
+POSIX EREs are based on the regular expressions accepted by the
+traditional @code{egrep} utility.
+
+@cindex character classes
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@dfn{Character classes} are a new feature introduced in the POSIX standard.
+A character class is a special notation for describing
+lists of characters that have a specific attribute, but where the
+actual characters themselves can vary from country to country and/or
+from character set to character set. For example, the notion of what
+is an alphabetic character differs in the USA and in France.
+
+A character class is only valid in a regexp @emph{inside} the
+brackets of a character list. Character classes consist of @samp{[:},
+a keyword denoting the class, and @samp{:]}. Here are the character
+classes defined by the POSIX standard.
+
+@table @code
+@item [:alnum:]
+Alphanumeric characters.
+
+@item [:alpha:]
+Alphabetic characters.
+
+@item [:blank:]
+Space and tab characters.
+
+@item [:cntrl:]
+Control characters.
+
+@item [:digit:]
+Numeric characters.
+
+@item [:graph:]
+Characters that are printable and are also visible.
+(A space is printable, but not visible, while an @samp{a} is both.)
+
+@item [:lower:]
+Lower-case alphabetic characters.
+
+@item [:print:]
+Printable characters (characters that are not control characters.)
+
+@item [:punct:]
+Punctuation characters (characters that are not letter, digits,
+control characters, or space characters).
+
+@item [:space:]
+Space characters (such as space, tab, and formfeed, to name a few).
+
+@item [:upper:]
+Upper-case alphabetic characters.
+
+@item [:xdigit:]
+Characters that are hexadecimal digits.
+@end table
+
+For example, before the POSIX standard, to match alphanumeric
+characters, you had to write @code{/[A-Za-z0-9]/}. If your
+character set had other alphabetic characters in it, this would not
+match them. With the POSIX character classes, you can write
+@code{/[[:alnum:]]/}, and this will match @emph{all} the alphabetic
+and numeric characters in your character set.
+
+@cindex collating elements
+Two additional special sequences can appear in character lists.
+These apply to non-ASCII character sets, which can have single symbols
+(called @dfn{collating elements}) that are represented with more than one
+character, as well as several characters that are equivalent for
+@dfn{collating}, or sorting, purposes. (E.g., in French, a plain ``e''
+and a grave-accented
+@iftex
+``@`e''
+@end iftex
+@ifinfo
+``e''
+@end ifinfo
+are equivalent.)
+
+@table @asis
+@cindex collating symbols
+@item Collating Symbols
+A @dfn{collating symbol} is a multi-character collating element enclosed in
+@samp{[.} and @samp{.]}. For example, if @samp{ch} is a collating element,
+then @code{[[.ch.]]} is a regexp that matches this collating element, while
+@code{[ch]} is a regexp that matches either @samp{c} or @samp{h}.
+
+@cindex equivalence classes
+@item Equivalence Classes
+An @dfn{equivalence class} is a list of equivalent characters enclosed in
+@samp{[=} and @samp{=]}.
+@iftex
+Thus, @code{[[=e@`e=]]} is regexp that matches either @samp{e} or @samp{@`e}.
+@end iftex
+@ifinfo
+Because Info files use plain ASCII characters, it is not possible to present
+a realistic equivalence class example here.
+@end ifinfo
+@end table
+
+These features are very valuable in non-English speaking locales.
+
+@strong{Caution:} The library functions that @code{gawk} uses for regular
+expression matching currently only recognize POSIX character classes;
+they do not recognize collating symbols or equivalence classes.
+@c maybe one day ...
+
+@cindex complemented character list
+@cindex character list, complemented
+@item [^ @dots{}]
+This is a @dfn{complemented character list}. The first character after
+the @samp{[} @emph{must} be a @samp{^}. It matches any characters
+@emph{except} those in the square brackets, or newline. For example:
+
+@example
+[^0-9]
+@end example
+
+@noindent
+matches any character that is not a digit.
+
+@item |
+This is the @dfn{alternation operator}, and it is used to specify
+alternatives. For example:
+
+@example
+^P|[0-9]
+@end example
+
+@noindent
+matches any string that matches either @samp{^P} or @samp{[0-9]}. This
+means it matches any string that starts with @samp{P} or contains a digit.
+
+The alternation applies to the largest possible regexps on either side.
+In other words, @samp{|} has the lowest precedence of all the regular
+expression operators.
+
+@item (@dots{})
+Parentheses are used for grouping in regular expressions as in
+arithmetic. They can be used to concatenate regular expressions
+containing the alternation operator, @samp{|}. For example,
+@samp{@@(samp|code)\@{[^@}]+\@}} matches both @samp{@@code@{foo@}} and
+@samp{@@samp@{bar@}}. (These are Texinfo formatting control sequences.)
+
+@item *
+This symbol means that the preceding regular expression is to be
+repeated as many times as necessary to find a match. For example:
+
+@example
+ph*
+@end example
+
+@noindent
+applies the @samp{*} symbol to the preceding @samp{h} and looks for matches
+of one @samp{p} followed by any number of @samp{h}s. This will also match
+just @samp{p} if no @samp{h}s are present.
+
+The @samp{*} repeats the @emph{smallest} possible preceding expression.
+(Use parentheses if you wish to repeat a larger expression.) It finds
+as many repetitions as possible. For example:
+
+@example
+awk '/\(c[ad][ad]*r x\)/ @{ print @}' sample
+@end example
+
+@noindent
+prints every record in @file{sample} containing a string of the form
+@samp{(car x)}, @samp{(cdr x)}, @samp{(cadr x)}, and so on.
+Notice the escaping of the parentheses by preceding them
+with backslashes.
+
+@item +
+This symbol is similar to @samp{*}, but the preceding expression must be
+matched at least once. This means that:
+
+@example
+wh+y
+@end example
+
+@noindent
+would match @samp{why} and @samp{whhy} but not @samp{wy}, whereas
+@samp{wh*y} would match all three of these strings. This is a simpler
+way of writing the last @samp{*} example:
+
+@example
+awk '/\(c[ad]+r x\)/ @{ print @}' sample
+@end example
+
+@item ?
+This symbol is similar to @samp{*}, but the preceding expression can be
+matched either once or not at all. For example:
+
+@example
+fe?d
+@end example
+
+@noindent
+will match @samp{fed} and @samp{fd}, but nothing else.
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@cindex interval expressions
+@item @{@var{n}@}
+@itemx @{@var{n},@}
+@itemx @{@var{n},@var{m}@}
+One or two numbers inside braces denote an @dfn{interval expression}.
+If there is one number in the braces, the preceding regexp is repeated
+@var{n} times.
+If there are two numbers separated by a comma, the preceding regexp is
+repeated @var{n} to @var{m} times.
+If there is one number followed by a comma, then the preceding regexp
+is repeated at least @var{n} times.
+
+@table @code
+@item wh@{3@}y
+matches @samp{whhhy} but not @samp{why} or @samp{whhhhy}.
+
+@item wh@{3,5@}y
+matches @samp{whhhy} or @samp{whhhhy} or @samp{whhhhhy}, only.
+
+@item wh@{2,@}y
+matches @samp{whhy} or @samp{whhhy}, and so on.
+@end table
+
+Interval expressions were not traditionally available in @code{awk}.
+As part of the POSIX standard they were added, to make @code{awk}
+and @code{egrep} consistent with each other.
+
+However, since old programs may use @samp{@{} and @samp{@}} in regexp
+constants, by default @code{gawk} does @emph{not} match interval expressions
+in regexps. If either @samp{--posix} or @samp{--re-interval} are specified
+(@pxref{Options, , Command Line Options}), then interval expressions
+are allowed in regexps.
+@end table
+
+@cindex precedence, regexp operators
+@cindex regexp operators, precedence of
+In regular expressions, the @samp{*}, @samp{+}, and @samp{?} operators,
+as well as the braces @samp{@{} and @samp{@}},
+have
+the highest precedence, followed by concatenation, and finally by @samp{|}.
+As in arithmetic, parentheses can change how operators are grouped.
+
+If @code{gawk} is in compatibility mode
+(@pxref{Options, ,Command Line Options}),
+character classes and interval expressions are not available in
+regular expressions.
+
+The next
+@ifinfo
+node
+@end ifinfo
+@iftex
+section
+@end iftex
+discusses the GNU-specific regexp operators, and provides
+more detail concerning how command line options affect the way @code{gawk}
+interprets the characters in regular expressions.
+
+@node GNU Regexp Operators, Case-sensitivity, Regexp Operators, Regexp
+@section Additional Regexp Operators Only in @code{gawk}
+
+@c This section adapted from the regex-0.12 manual
+
+@cindex regexp operators, GNU specific
+GNU software that deals with regular expressions provides a number of
+additional regexp operators. These operators are described in this
+section, and are specific to @code{gawk}; they are not available in other
+@code{awk} implementations.
+
+@cindex word, regexp definition of
+Most of the additional operators are for dealing with word matching.
+For our purposes, a @dfn{word} is a sequence of one or more letters, digits,
+or underscores (@samp{_}).
+
+@table @code
+@cindex @code{\w} regexp operator
+@item \w
+This operator matches any word-constituent character, i.e.@: any
+letter, digit, or underscore. Think of it as a short-hand for
+@c @w{@code{[A-Za-z0-9_]}} or
+@w{@code{[[:alnum:]_]}}.
+
+@cindex @code{\W} regexp operator
+@item \W
+This operator matches any character that is not word-constituent.
+Think of it as a short-hand for
+@c @w{@code{[^A-Za-z0-9_]}} or
+@w{@code{[^[:alnum:]_]}}.
+
+@cindex @code{\<} regexp operator
+@item \<
+This operator matches the empty string at the beginning of a word.
+For example, @code{/\<away/} matches @samp{away}, but not
+@samp{stowaway}.
+
+@cindex @code{\>} regexp operator
+@item \>
+This operator matches the empty string at the end of a word.
+For example, @code{/stow\>/} matches @samp{stow}, but not @samp{stowaway}.
+
+@cindex @code{\y} regexp operator
+@cindex word boundaries, matching
+@item \y
+This operator matches the empty string at either the beginning or the
+end of a word (the word boundar@strong{y}). For example, @samp{\yballs?\y}
+matches either @samp{ball} or @samp{balls} as a separate word.
+
+@cindex @code{\B} regexp operator
+@item \B
+This operator matches the empty string within a word. In other words,
+@samp{\B} matches the empty string that occurs between two
+word-constituent characters. For example,
+@code{/\Brat\B/} matches @samp{crate}, but it does not match @samp{dirty rat}.
+@samp{\B} is essentially the opposite of @samp{\y}.
+@end table
+
+There are two other operators that work on buffers. In Emacs, a
+@dfn{buffer} is, naturally, an Emacs buffer. For other programs, the
+regexp library routines that @code{gawk} uses consider the entire
+string to be matched as the buffer.
+
+For @code{awk}, since @samp{^} and @samp{$} always work in terms
+of the beginning and end of strings, these operators don't add any
+new capabilities. They are provided for compatibility with other GNU
+software.
+
+@cindex buffer matching operators
+@table @code
+@cindex @code{\`} regexp operator
+@item \`
+This operator matches the empty string at the
+beginning of the buffer.
+
+@cindex @code{\'} regexp operator
+@item \'
+This operator matches the empty string at the
+end of the buffer.
+@end table
+
+In other GNU software, the word boundary operator is @samp{\b}. However,
+that conflicts with the @code{awk} language's definition of @samp{\b}
+as backspace, so @code{gawk} uses a different letter.
+
+An alternative method would have been to require two backslashes in the
+GNU operators, but this was deemed to be too confusing, and the current
+method of using @samp{\y} for the GNU @samp{\b} appears to be the
+lesser of two evils.
+
+@c NOTE!!! Keep this in sync with the same table in the summary appendix!
+@cindex regexp, effect of command line options
+The various command line options
+(@pxref{Options, ,Command Line Options})
+control how @code{gawk} interprets characters in regexps.
+
+@table @asis
+@item No options
+In the default case, @code{gawk} provide all the facilities of
+POSIX regexps and the GNU regexp operators described
+@iftex
+above.
+@end iftex
+@ifinfo
+in @ref{Regexp Operators, ,Regular Expression Operators}.
+@end ifinfo
+However, interval expressions are not supported.
+
+@item @code{--posix}
+Only POSIX regexps are supported, the GNU operators are not special
+(e.g., @samp{\w} matches a literal @samp{w}). Interval expressions
+are allowed.
+
+@item @code{--traditional}
+Traditional Unix @code{awk} regexps are matched. The GNU operators
+are not special, interval expressions are not available, and neither
+are the POSIX character classes (@code{[[:alnum:]]} and so on).
+Characters described by octal and hexadecimal escape sequences are
+treated literally, even if they represent regexp metacharacters.
+
+@item @code{--re-interval}
+Allow interval expressions in regexps, even if @samp{--traditional}
+has been provided.
+@end table
+
+@node Case-sensitivity, Leftmost Longest, GNU Regexp Operators, Regexp
+@section Case-sensitivity in Matching
+
+@cindex case sensitivity
+@cindex ignoring case
+Case is normally significant in regular expressions, both when matching
+ordinary characters (i.e.@: not metacharacters), and inside character
+sets. Thus a @samp{w} in a regular expression matches only a lower-case
+@samp{w} and not an upper-case @samp{W}.
+
+The simplest way to do a case-independent match is to use a character
+list: @samp{[Ww]}. However, this can be cumbersome if you need to use it
+often; and it can make the regular expressions harder to
+read. There are two alternatives that you might prefer.
+
+One way to do a case-insensitive match at a particular point in the
+program is to convert the data to a single case, using the
+@code{tolower} or @code{toupper} built-in string functions (which we
+haven't discussed yet;
+@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+For example:
+
+@example
+tolower($1) ~ /foo/ @{ @dots{} @}
+@end example
+
+@noindent
+converts the first field to lower-case before matching against it.
+This will work in any POSIX-compliant implementation of @code{awk}.
+
+@cindex differences between @code{gawk} and @code{awk}
+@cindex @code{~} operator
+@cindex @code{!~} operator
+@vindex IGNORECASE
+Another method, specific to @code{gawk}, is to set the variable
+@code{IGNORECASE} to a non-zero value (@pxref{Built-in Variables}).
+When @code{IGNORECASE} is not zero, @emph{all} regexp and string
+operations ignore case. Changing the value of
+@code{IGNORECASE} dynamically controls the case sensitivity of your
+program as it runs. Case is significant by default because
+@code{IGNORECASE} (like most variables) is initialized to zero.
+
+@example
+x = "aB"
+if (x ~ /ab/) @dots{} # this test will fail
+
+IGNORECASE = 1
+if (x ~ /ab/) @dots{} # now it will succeed
+@end example
+
+In general, you cannot use @code{IGNORECASE} to make certain rules
+case-insensitive and other rules case-sensitive, because there is no way
+to set @code{IGNORECASE} just for the pattern of a particular rule.
+@ignore
+This isn't quite true. Consider:
+
+ IGNORECASE=1 && /foObAr/ { .... }
+ IGNORECASE=0 || /foobar/ { .... }
+
+But that's pretty bad style and I don't want to get into it at this
+late date.
+@end ignore
+To do this, you must use character lists or @code{tolower}. However, one
+thing you can do only with @code{IGNORECASE} is turn case-sensitivity on
+or off dynamically for all the rules at once.
+
+@code{IGNORECASE} can be set on the command line, or in a @code{BEGIN} rule
+(@pxref{Other Arguments, ,Other Command Line Arguments}; also
+@pxref{Using BEGIN/END, ,Startup and Cleanup Actions}).
+Setting @code{IGNORECASE} from the command line is a way to make
+a program case-insensitive without having to edit it.
+
+Prior to version 3.0 of @code{gawk}, the value of @code{IGNORECASE}
+only affected regexp operations. It did not affect string comparison
+with @samp{==}, @samp{!=}, and so on.
+Beginning with version 3.0, both regexp and string comparison
+operations are affected by @code{IGNORECASE}.
+
+@cindex ISO 8859-1
+@cindex ISO Latin-1
+Beginning with version 3.0 of @code{gawk}, the equivalences between upper-case
+and lower-case characters are based on the ISO-8859-1 (ISO Latin-1)
+character set. This character set is a superset of the traditional 128
+ASCII characters, that also provides a number of characters suitable
+for use with European languages.
+@ignore
+A pure ASCII character set can be used instead if @code{gawk} is compiled
+with @samp{-DUSE_PURE_ASCII}.
+@end ignore
+
+The value of @code{IGNORECASE} has no effect if @code{gawk} is in
+compatibility mode (@pxref{Options, ,Command Line Options}).
+Case is always significant in compatibility mode.
+
+@node Leftmost Longest, Computed Regexps, Case-sensitivity, Regexp
+@section How Much Text Matches?
+
+@cindex leftmost longest match
+@cindex matching, leftmost longest
+Consider the following example:
+
+@example
+echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'
+@end example
+
+This example uses the @code{sub} function (which we haven't discussed yet,
+@pxref{String Functions, ,Built-in Functions for String Manipulation})
+to make a change to the input record. Here, the regexp @code{/a+/}
+indicates ``one or more @samp{a} characters,'' and the replacement
+text is @samp{<A>}.
+
+The input contains four @samp{a} characters. What will the output be?
+In other words, how many is ``one or more''---will @code{awk} match two,
+three, or all four @samp{a} characters?
+
+The answer is, @code{awk} (and POSIX) regular expressions always match
+the leftmost, @emph{longest} sequence of input characters that can
+match. Thus, in this example, all four @samp{a} characters are
+replaced with @samp{<A>}.
+
+@example
+$ echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'
+@print{} <A>bcd
+@end example
+
+For simple match/no-match tests, this is not so important. But when doing
+regexp-based field and record splitting, and
+text matching and substitutions with the @code{match}, @code{sub}, @code{gsub},
+and @code{gensub} functions, it is very important.
+@ifinfo
+@xref{String Functions, ,Built-in Functions for String Manipulation},
+for more information on these functions.
+@end ifinfo
+Understanding this principle is also important for regexp-based record
+and field splitting (@pxref{Records, ,How Input is Split into Records},
+and also @pxref{Field Separators, ,Specifying How Fields are Separated}).
+
+@node Computed Regexps, , Leftmost Longest, Regexp
+@section Using Dynamic Regexps
+
+@cindex computed regular expressions
+@cindex regular expressions, computed
+@cindex dynamic regular expressions
+@cindex regexp, dynamic
+@cindex @code{~} operator
+@cindex @code{!~} operator
+The right hand side of a @samp{~} or @samp{!~} operator need not be a
+regexp constant (i.e.@: a string of characters between slashes). It may
+be any expression. The expression is evaluated, and converted if
+necessary to a string; the contents of the string are used as the
+regexp. A regexp that is computed in this way is called a @dfn{dynamic
+regexp}. For example:
+
+@example
+BEGIN @{ identifier_regexp = "[A-Za-z_][A-Za-z_0-9]+" @}
+$0 ~ identifier_regexp @{ print @}
+@end example
+
+@noindent
+sets @code{identifier_regexp} to a regexp that describes @code{awk}
+variable names, and tests if the input record matches this regexp.
+
+@strong{Caution:} When using the @samp{~} and @samp{!~}
+operators, there is a difference between a regexp constant
+enclosed in slashes, and a string constant enclosed in double quotes.
+If you are going to use a string constant, you have to understand that
+the string is in essence scanned @emph{twice}; the first time when
+@code{awk} reads your program, and the second time when it goes to
+match the string on the left-hand side of the operator with the pattern
+on the right. This is true of any string valued expression (such as
+@code{identifier_regexp} above), not just string constants.
+
+@cindex regexp constants, difference between slashes and quotes
+What difference does it make if the string is
+scanned twice? The answer has to do with escape sequences, and particularly
+with backslashes. To get a backslash into a regular expression inside a
+string, you have to type two backslashes.
+
+For example, @code{/\*/} is a regexp constant for a literal @samp{*}.
+Only one backslash is needed. To do the same thing with a string,
+you would have to type @code{"\\*"}. The first backslash escapes the
+second one, so that the string actually contains the
+two characters @samp{\} and @samp{*}.
+
+@cindex common mistakes
+@cindex mistakes, common
+@cindex errors, common
+Given that you can use both regexp and string constants to describe
+regular expressions, which should you use? The answer is ``regexp
+constants,'' for several reasons.
+
+@enumerate 1
+@item
+String constants are more complicated to write, and
+more difficult to read. Using regexp constants makes your programs
+less error-prone. Not understanding the difference between the two
+kinds of constants is a common source of errors.
+
+@item
+It is also more efficient to use regexp constants: @code{awk} can note
+that you have supplied a regexp and store it internally in a form that
+makes pattern matching more efficient. When using a string constant,
+@code{awk} must first convert the string into this internal form, and
+then perform the pattern matching.
+
+@item
+Using regexp constants is better style; it shows clearly that you
+intend a regexp match.
+@end enumerate
+
+@node Reading Files, Printing, Regexp, Top
+@chapter Reading Input Files
+
+@cindex reading files
+@cindex input
+@cindex standard input
+@vindex FILENAME
+In the typical @code{awk} program, all input is read either from the
+standard input (by default the keyboard, but often a pipe from another
+command) or from files whose names you specify on the @code{awk} command
+line. If you specify input files, @code{awk} reads them in order, reading
+all the data from one before going on to the next. The name of the current
+input file can be found in the built-in variable @code{FILENAME}
+(@pxref{Built-in Variables}).
+
+The input is read in units called @dfn{records}, and processed by the
+rules of your program one record at a time.
+By default, each record is one line. Each
+record is automatically split into chunks called @dfn{fields}.
+This makes it more convenient for programs to work on the parts of a record.
+
+On rare occasions you will need to use the @code{getline} command.
+The @code{getline} command is valuable, both because it
+can do explicit input from any number of files, and because the files
+used with it do not have to be named on the @code{awk} command line
+(@pxref{Getline, ,Explicit Input with @code{getline}}).
+
+@menu
+* Records:: Controlling how data is split into records.
+* Fields:: An introduction to fields.
+* Non-Constant Fields:: Non-constant Field Numbers.
+* Changing Fields:: Changing the Contents of a Field.
+* Field Separators:: The field separator and how to change it.
+* Constant Size:: Reading constant width data.
+* Multiple Line:: Reading multi-line records.
+* Getline:: Reading files under explicit program control
+ using the @code{getline} function.
+@end menu
+
+@node Records, Fields, Reading Files, Reading Files
+@section How Input is Split into Records
+
+@cindex record separator, @code{RS}
+@cindex changing the record separator
+@cindex record, definition of
+@vindex RS
+The @code{awk} utility divides the input for your @code{awk}
+program into records and fields.
+Records are separated by a character called the @dfn{record separator}.
+By default, the record separator is the newline character.
+This is why records are, by default, single lines.
+You can use a different character for the record separator by
+assigning the character to the built-in variable @code{RS}.
+
+You can change the value of @code{RS} in the @code{awk} program,
+like any other variable, with the
+assignment operator, @samp{=} (@pxref{Assignment Ops, ,Assignment Expressions}).
+The new record-separator character should be enclosed in quotation marks,
+which indicate
+a string constant. Often the right time to do this is at the beginning
+of execution, before any input has been processed, so that the very
+first record will be read with the proper separator. To do this, use
+the special @code{BEGIN} pattern
+(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}). For
+example:
+
+@example
+awk 'BEGIN @{ RS = "/" @} ; @{ print $0 @}' BBS-list
+@end example
+
+@noindent
+changes the value of @code{RS} to @code{"/"}, before reading any input.
+This is a string whose first character is a slash; as a result, records
+are separated by slashes. Then the input file is read, and the second
+rule in the @code{awk} program (the action with no pattern) prints each
+record. Since each @code{print} statement adds a newline at the end of
+its output, the effect of this @code{awk} program is to copy the input
+with each slash changed to a newline. Here are the results of running
+the program on @file{BBS-list}:
+
+@example
+@group
+$ awk 'BEGIN @{ RS = "/" @} ; @{ print $0 @}' BBS-list
+@print{} aardvark 555-5553 1200
+@print{} 300 B
+@print{} alpo-net 555-3412 2400
+@print{} 1200
+@print{} 300 A
+@print{} barfly 555-7685 1200
+@print{} 300 A
+@print{} bites 555-1675 2400
+@print{} 1200
+@print{} 300 A
+@print{} camelot 555-0542 300 C
+@print{} core 555-2912 1200
+@print{} 300 C
+@print{} fooey 555-1234 2400
+@print{} 1200
+@print{} 300 B
+@print{} foot 555-6699 1200
+@print{} 300 B
+@print{} macfoo 555-6480 1200
+@print{} 300 A
+@print{} sdace 555-3430 2400
+@print{} 1200
+@print{} 300 A
+@print{} sabafoo 555-2127 1200
+@print{} 300 C
+@print{}
+@end group
+@end example
+
+@noindent
+Note that the entry for the @samp{camelot} BBS is not split.
+In the original data file
+(@pxref{Sample Data Files, , Data Files for the Examples}),
+the line looks like this:
+
+@example
+camelot 555-0542 300 C
+@end example
+
+@noindent
+It only has one baud rate; there are no slashes in the record.
+
+Another way to change the record separator is on the command line,
+using the variable-assignment feature
+(@pxref{Other Arguments, ,Other Command Line Arguments}).
+
+@example
+awk '@{ print $0 @}' RS="/" BBS-list
+@end example
+
+@noindent
+This sets @code{RS} to @samp{/} before processing @file{BBS-list}.
+
+Using an unusual character such as @samp{/} for the record separator
+produces correct behavior in the vast majority of cases. However,
+the following (extreme) pipeline prints a surprising @samp{1}. There
+is one field, consisting of a newline. The value of the built-in
+variable @code{NF} is the number of fields in the current record.
+
+@example
+$ echo | awk 'BEGIN @{ RS = "a" @} ; @{ print NF @}'
+@print{} 1
+@end example
+
+@cindex dark corner
+@noindent
+Reaching the end of an input file terminates the current input record,
+even if the last character in the file is not the character in @code{RS}
+(d.c.).
+
+@cindex empty string
+The empty string, @code{""} (a string of no characters), has a special meaning
+as the value of @code{RS}: it means that records are separated
+by one or more blank lines, and nothing else.
+@xref{Multiple Line, ,Multiple-Line Records}, for more details.
+
+If you change the value of @code{RS} in the middle of an @code{awk} run,
+the new value is used to delimit subsequent records, but the record
+currently being processed (and records already processed) are not
+affected.
+
+@vindex RT
+@cindex record terminator, @code{RT}
+@cindex terminator, record
+@cindex differences between @code{gawk} and @code{awk}
+After the end of the record has been determined, @code{gawk}
+sets the variable @code{RT} to the text in the input that matched
+@code{RS}.
+
+@cindex regular expressions as record separators
+The value of @code{RS} is in fact not limited to a one-character
+string. It can be any regular expression
+(@pxref{Regexp, ,Regular Expressions}).
+In general, each record
+ends at the next string that matches the regular expression; the next
+record starts at the end of the matching string. This general rule is
+actually at work in the usual case, where @code{RS} contains just a
+newline: a record ends at the beginning of the next matching string (the
+next newline in the input) and the following record starts just after
+the end of this string (at the first character of the following line).
+The newline, since it matches @code{RS}, is not part of either record.
+
+When @code{RS} is a single character, @code{RT} will
+contain the same single character. However, when @code{RS} is a
+regular expression, then @code{RT} becomes more useful; it contains
+the actual input text that matched the regular expression.
+
+The following example illustrates both of these features.
+It sets @code{RS} equal to a regular expression that
+matches either a newline, or a series of one or more upper-case letters
+with optional leading and/or trailing white space
+(@pxref{Regexp, , Regular Expressions}).
+
+@example
+$ echo record 1 AAAA record 2 BBBB record 3 |
+> gawk 'BEGIN @{ RS = "\n|( *[[:upper:]]+ *)" @}
+> @{ print "Record =", $0, "and RT =", RT @}'
+@print{} Record = record 1 and RT = AAAA
+@print{} Record = record 2 and RT = BBBB
+@print{} Record = record 3 and RT =
+@print{}
+@end example
+
+@noindent
+The final line of output has an extra blank line. This is because the
+value of @code{RT} is a newline, and then the @code{print} statement
+supplies its own terminating newline.
+
+@xref{Simple Sed, ,A Simple Stream Editor}, for a more useful example
+of @code{RS} as a regexp and @code{RT}.
+
+@cindex differences between @code{gawk} and @code{awk}
+The use of @code{RS} as a regular expression and the @code{RT}
+variable are @code{gawk} extensions; they are not available in
+compatibility mode
+(@pxref{Options, ,Command Line Options}).
+In compatibility mode, only the first character of the value of
+@code{RS} is used to determine the end of the record.
+
+@cindex number of records, @code{NR}, @code{FNR}
+@vindex NR
+@vindex FNR
+The @code{awk} utility keeps track of the number of records that have
+been read so far from the current input file. This value is stored in a
+built-in variable called @code{FNR}. It is reset to zero when a new
+file is started. Another built-in variable, @code{NR}, is the total
+number of input records read so far from all data files. It starts at zero
+but is never automatically reset to zero.
+
+@node Fields, Non-Constant Fields, Records, Reading Files
+@section Examining Fields
+
+@cindex examining fields
+@cindex fields
+@cindex accessing fields
+When @code{awk} reads an input record, the record is
+automatically separated or @dfn{parsed} by the interpreter into chunks
+called @dfn{fields}. By default, fields are separated by whitespace,
+like words in a line.
+Whitespace in @code{awk} means any string of one or more spaces and/or
+tabs; other characters such as newline, formfeed, and so on, that are
+considered whitespace by other languages are @emph{not} considered
+whitespace by @code{awk}.
+
+The purpose of fields is to make it more convenient for you to refer to
+these pieces of the record. You don't have to use them---you can
+operate on the whole record if you wish---but fields are what make
+simple @code{awk} programs so powerful.
+
+@cindex @code{$} (field operator)
+@cindex field operator @code{$}
+To refer to a field in an @code{awk} program, you use a dollar-sign,
+@samp{$}, followed by the number of the field you want. Thus, @code{$1}
+refers to the first field, @code{$2} to the second, and so on. For
+example, suppose the following is a line of input:
+
+@example
+This seems like a pretty nice example.
+@end example
+
+@noindent
+Here the first field, or @code{$1}, is @samp{This}; the second field, or
+@code{$2}, is @samp{seems}; and so on. Note that the last field,
+@code{$7}, is @samp{example.}. Because there is no space between the
+@samp{e} and the @samp{.}, the period is considered part of the seventh
+field.
+
+@vindex NF
+@cindex number of fields, @code{NF}
+@code{NF} is a built-in variable whose value
+is the number of fields in the current record.
+@code{awk} updates the value of @code{NF} automatically, each time
+a record is read.
+
+No matter how many fields there are, the last field in a record can be
+represented by @code{$NF}. So, in the example above, @code{$NF} would
+be the same as @code{$7}, which is @samp{example.}. Why this works is
+explained below (@pxref{Non-Constant Fields, ,Non-constant Field Numbers}).
+If you try to reference a field beyond the last one, such as @code{$8}
+when the record has only seven fields, you get the empty string.
+@c the empty string acts like 0 in some contexts, but I don't want to
+@c get into that here....
+
+@code{$0}, which looks like a reference to the ``zeroth'' field, is
+a special case: it represents the whole input record. @code{$0} is
+used when you are not interested in fields.
+
+Here are some more examples:
+
+@example
+@group
+$ awk '$1 ~ /foo/ @{ print $0 @}' BBS-list
+@print{} fooey 555-1234 2400/1200/300 B
+@print{} foot 555-6699 1200/300 B
+@print{} macfoo 555-6480 1200/300 A
+@print{} sabafoo 555-2127 1200/300 C
+@end group
+@end example
+
+@noindent
+This example prints each record in the file @file{BBS-list} whose first
+field contains the string @samp{foo}. The operator @samp{~} is called a
+@dfn{matching operator}
+(@pxref{Regexp Usage, , How to Use Regular Expressions});
+it tests whether a string (here, the field @code{$1}) matches a given regular
+expression.
+
+By contrast, the following example
+looks for @samp{foo} in @emph{the entire record} and prints the first
+field and the last field for each input record containing a
+match.
+
+@example
+@group
+$ awk '/foo/ @{ print $1, $NF @}' BBS-list
+@print{} fooey B
+@print{} foot B
+@print{} macfoo A
+@print{} sabafoo C
+@end group
+@end example
+
+@node Non-Constant Fields, Changing Fields, Fields, Reading Files
+@section Non-constant Field Numbers
+
+The number of a field does not need to be a constant. Any expression in
+the @code{awk} language can be used after a @samp{$} to refer to a
+field. The value of the expression specifies the field number. If the
+value is a string, rather than a number, it is converted to a number.
+Consider this example:
+
+@example
+awk '@{ print $NR @}'
+@end example
+
+@noindent
+Recall that @code{NR} is the number of records read so far: one in the
+first record, two in the second, etc. So this example prints the first
+field of the first record, the second field of the second record, and so
+on. For the twentieth record, field number 20 is printed; most likely,
+the record has fewer than 20 fields, so this prints a blank line.
+
+Here is another example of using expressions as field numbers:
+
+@example
+awk '@{ print $(2*2) @}' BBS-list
+@end example
+
+@code{awk} must evaluate the expression @samp{(2*2)} and use
+its value as the number of the field to print. The @samp{*} sign
+represents multiplication, so the expression @samp{2*2} evaluates to four.
+The parentheses are used so that the multiplication is done before the
+@samp{$} operation; they are necessary whenever there is a binary
+operator in the field-number expression. This example, then, prints the
+hours of operation (the fourth field) for every line of the file
+@file{BBS-list}. (All of the @code{awk} operators are listed, in
+order of decreasing precedence, in
+@ref{Precedence, , Operator Precedence (How Operators Nest)}.)
+
+If the field number you compute is zero, you get the entire record.
+Thus, @code{$(2-2)} has the same value as @code{$0}. Negative field
+numbers are not allowed; trying to reference one will usually terminate
+your running @code{awk} program. (The POSIX standard does not define
+what happens when you reference a negative field number. @code{gawk}
+will notice this and terminate your program. Other @code{awk}
+implementations may behave differently.)
+
+As mentioned in @ref{Fields, ,Examining Fields},
+the number of fields in the current record is stored in the built-in
+variable @code{NF} (also @pxref{Built-in Variables}). The expression
+@code{$NF} is not a special feature: it is the direct consequence of
+evaluating @code{NF} and using its value as a field number.
+
+@node Changing Fields, Field Separators, Non-Constant Fields, Reading Files
+@section Changing the Contents of a Field
+
+@cindex field, changing contents of
+@cindex changing contents of a field
+@cindex assignment to fields
+You can change the contents of a field as seen by @code{awk} within an
+@code{awk} program; this changes what @code{awk} perceives as the
+current input record. (The actual input is untouched; @code{awk} @emph{never}
+modifies the input file.)
+
+Consider this example and its output:
+
+@example
+@group
+$ awk '@{ $3 = $2 - 10; print $2, $3 @}' inventory-shipped
+@print{} 13 3
+@print{} 15 5
+@print{} 15 5
+@dots{}
+@end group
+@end example
+
+@noindent
+The @samp{-} sign represents subtraction, so this program reassigns
+field three, @code{$3}, to be the value of field two minus ten,
+@samp{$2 - 10}. (@xref{Arithmetic Ops, ,Arithmetic Operators}.)
+Then field two, and the new value for field three, are printed.
+
+In order for this to work, the text in field @code{$2} must make sense
+as a number; the string of characters must be converted to a number in
+order for the computer to do arithmetic on it. The number resulting
+from the subtraction is converted back to a string of characters which
+then becomes field three.
+@xref{Conversion, ,Conversion of Strings and Numbers}.
+
+When you change the value of a field (as perceived by @code{awk}), the
+text of the input record is recalculated to contain the new field where
+the old one was. Therefore, @code{$0} changes to reflect the altered
+field. Thus, this program
+prints a copy of the input file, with 10 subtracted from the second
+field of each line.
+
+@example
+@group
+$ awk '@{ $2 = $2 - 10; print $0 @}' inventory-shipped
+@print{} Jan 3 25 15 115
+@print{} Feb 5 32 24 226
+@print{} Mar 5 24 34 228
+@dots{}
+@end group
+@end example
+
+You can also assign contents to fields that are out of range. For
+example:
+
+@example
+$ awk '@{ $6 = ($5 + $4 + $3 + $2)
+> print $6 @}' inventory-shipped
+@print{} 168
+@print{} 297
+@print{} 301
+@dots{}
+@end example
+
+@noindent
+We've just created @code{$6}, whose value is the sum of fields
+@code{$2}, @code{$3}, @code{$4}, and @code{$5}. The @samp{+} sign
+represents addition. For the file @file{inventory-shipped}, @code{$6}
+represents the total number of parcels shipped for a particular month.
+
+Creating a new field changes @code{awk}'s internal copy of the current
+input record---the value of @code{$0}. Thus, if you do @samp{print $0}
+after adding a field, the record printed includes the new field, with
+the appropriate number of field separators between it and the previously
+existing fields.
+
+This recomputation affects and is affected by
+@code{NF} (the number of fields; @pxref{Fields, ,Examining Fields}),
+and by a feature that has not been discussed yet,
+the @dfn{output field separator}, @code{OFS},
+which is used to separate the fields (@pxref{Output Separators}).
+For example, the value of @code{NF} is set to the number of the highest
+field you create.
+
+Note, however, that merely @emph{referencing} an out-of-range field
+does @emph{not} change the value of either @code{$0} or @code{NF}.
+Referencing an out-of-range field only produces an empty string. For
+example:
+
+@example
+if ($(NF+1) != "")
+ print "can't happen"
+else
+ print "everything is normal"
+@end example
+
+@noindent
+should print @samp{everything is normal}, because @code{NF+1} is certain
+to be out of range. (@xref{If Statement, ,The @code{if}-@code{else} Statement},
+for more information about @code{awk}'s @code{if-else} statements.
+@xref{Typing and Comparison, ,Variable Typing and Comparison Expressions}, for more information
+about the @samp{!=} operator.)
+
+It is important to note that making an assignment to an existing field
+will change the
+value of @code{$0}, but will not change the value of @code{NF},
+even when you assign the empty string to a field. For example:
+
+@example
+@group
+$ echo a b c d | awk '@{ OFS = ":"; $2 = ""
+> print $0; print NF @}'
+@print{} a::c:d
+@print{} 4
+@end group
+@end example
+
+@noindent
+The field is still there; it just has an empty value. You can tell
+because there are two colons in a row.
+
+This example shows what happens if you create a new field.
+
+@example
+$ echo a b c d | awk '@{ OFS = ":"; $2 = ""; $6 = "new"
+> print $0; print NF @}'
+@print{} a::c:d::new
+@print{} 6
+@end example
+
+@noindent
+The intervening field, @code{$5} is created with an empty value
+(indicated by the second pair of adjacent colons),
+and @code{NF} is updated with the value six.
+
+@node Field Separators, Constant Size, Changing Fields, Reading Files
+@section Specifying How Fields are Separated
+
+This section is rather long; it describes one of the most fundamental
+operations in @code{awk}.
+
+@menu
+* Basic Field Splitting:: How fields are split with single characters
+ or simple strings.
+* Regexp Field Splitting:: Using regexps as the field separator.
+* Single Character Fields:: Making each character a separate field.
+* Command Line Field Separator:: Setting @code{FS} from the command line.
+* Field Splitting Summary:: Some final points and a summary table.
+@end menu
+
+@node Basic Field Splitting, Regexp Field Splitting, Field Separators, Field Separators
+@subsection The Basics of Field Separating
+@vindex FS
+@cindex fields, separating
+@cindex field separator, @code{FS}
+
+The @dfn{field separator}, which is either a single character or a regular
+expression, controls the way @code{awk} splits an input record into fields.
+@code{awk} scans the input record for character sequences that
+match the separator; the fields themselves are the text between the matches.
+
+In the examples below, we use the bullet symbol ``@bullet{}'' to represent
+spaces in the output.
+
+If the field separator is @samp{oo}, then the following line:
+
+@example
+moo goo gai pan
+@end example
+
+@noindent
+would be split into three fields: @samp{m}, @samp{@bullet{}g} and
+@samp{@bullet{}gai@bullet{}pan}.
+Note the leading spaces in the values of the second and third fields.
+
+@cindex common mistakes
+@cindex mistakes, common
+@cindex errors, common
+The field separator is represented by the built-in variable @code{FS}.
+Shell programmers take note! @code{awk} does @emph{not} use the name @code{IFS}
+which is used by the POSIX compatible shells (such as the Bourne shell,
+@code{sh}, or the GNU Bourne-Again Shell, Bash).
+
+You can change the value of @code{FS} in the @code{awk} program with the
+assignment operator, @samp{=} (@pxref{Assignment Ops, ,Assignment Expressions}).
+Often the right time to do this is at the beginning of execution,
+before any input has been processed, so that the very first record
+will be read with the proper separator. To do this, use the special
+@code{BEGIN} pattern
+(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}).
+For example, here we set the value of @code{FS} to the string
+@code{","}:
+
+@example
+awk 'BEGIN @{ FS = "," @} ; @{ print $2 @}'
+@end example
+
+@noindent
+Given the input line,
+
+@example
+John Q. Smith, 29 Oak St., Walamazoo, MI 42139
+@end example
+
+@noindent
+this @code{awk} program extracts and prints the string
+@samp{@bullet{}29@bullet{}Oak@bullet{}St.}.
+
+@cindex field separator, choice of
+@cindex regular expressions as field separators
+Sometimes your input data will contain separator characters that don't
+separate fields the way you thought they would. For instance, the
+person's name in the example we just used might have a title or
+suffix attached, such as @samp{John Q. Smith, LXIX}. From input
+containing such a name:
+
+@example
+John Q. Smith, LXIX, 29 Oak St., Walamazoo, MI 42139
+@end example
+
+@noindent
+@c careful of an overfull hbox here!
+the above program would extract @samp{@bullet{}LXIX}, instead of
+@samp{@bullet{}29@bullet{}Oak@bullet{}St.}.
+If you were expecting the program to print the
+address, you would be surprised. The moral is: choose your data layout and
+separator characters carefully to prevent such problems.
+
+@iftex
+As you know, normally,
+@end iftex
+@ifinfo
+Normally,
+@end ifinfo
+fields are separated by whitespace sequences
+(spaces and tabs), not by single spaces: two spaces in a row do not
+delimit an empty field. The default value of the field separator @code{FS}
+is a string containing a single space, @w{@code{" "}}. If this value were
+interpreted in the usual way, each space character would separate
+fields, so two spaces in a row would make an empty field between them.
+The reason this does not happen is that a single space as the value of
+@code{FS} is a special case: it is taken to specify the default manner
+of delimiting fields.
+
+If @code{FS} is any other single character, such as @code{","}, then
+each occurrence of that character separates two fields. Two consecutive
+occurrences delimit an empty field. If the character occurs at the
+beginning or the end of the line, that too delimits an empty field. The
+space character is the only single character which does not follow these
+rules.
+
+@node Regexp Field Splitting, Single Character Fields, Basic Field Splitting, Field Separators
+@subsection Using Regular Expressions to Separate Fields
+
+The previous
+@iftex
+subsection
+@end iftex
+@ifinfo
+node
+@end ifinfo
+discussed the use of single characters or simple strings as the
+value of @code{FS}.
+More generally, the value of @code{FS} may be a string containing any
+regular expression. In this case, each match in the record for the regular
+expression separates fields. For example, the assignment:
+
+@example
+FS = ", \t"
+@end example
+
+@noindent
+makes every area of an input line that consists of a comma followed by a
+space and a tab, into a field separator. (@samp{\t}
+is an @dfn{escape sequence} that stands for a tab;
+@pxref{Escape Sequences},
+for the complete list of similar escape sequences.)
+
+For a less trivial example of a regular expression, suppose you want
+single spaces to separate fields the way single commas were used above.
+You can set @code{FS} to @w{@code{"[@ ]"}} (left bracket, space, right
+bracket). This regular expression matches a single space and nothing else
+(@pxref{Regexp, ,Regular Expressions}).
+
+There is an important difference between the two cases of @samp{FS = @w{" "}}
+(a single space) and @samp{FS = @w{"[ \t]+"}} (left bracket, space, backslash,
+``t'', right bracket, which is a regular expression
+matching one or more spaces or tabs). For both values of @code{FS}, fields
+are separated by runs of spaces and/or tabs. However, when the value of
+@code{FS} is @w{@code{" "}}, @code{awk} will first strip leading and trailing
+whitespace from the record, and then decide where the fields are.
+
+For example, the following pipeline prints @samp{b}:
+
+@example
+$ echo ' a b c d ' | awk '@{ print $2 @}'
+@print{} b
+@end example
+
+@noindent
+However, this pipeline prints @samp{a} (note the extra spaces around
+each letter):
+
+@example
+$ echo ' a b c d ' | awk 'BEGIN @{ FS = "[ \t]+" @}
+> @{ print $2 @}'
+@print{} a
+@end example
+
+@noindent
+@cindex null string
+@cindex empty string
+In this case, the first field is @dfn{null}, or empty.
+
+The stripping of leading and trailing whitespace also comes into
+play whenever @code{$0} is recomputed. For instance, study this pipeline:
+
+@example
+$ echo ' a b c d' | awk '@{ print; $2 = $2; print @}'
+@print{} a b c d
+@print{} a b c d
+@end example
+
+@noindent
+The first @code{print} statement prints the record as it was read,
+with leading whitespace intact. The assignment to @code{$2} rebuilds
+@code{$0} by concatenating @code{$1} through @code{$NF} together,
+separated by the value of @code{OFS}. Since the leading whitespace
+was ignored when finding @code{$1}, it is not part of the new @code{$0}.
+Finally, the last @code{print} statement prints the new @code{$0}.
+
+@node Single Character Fields, Command Line Field Separator, Regexp Field Splitting, Field Separators
+@subsection Making Each Character a Separate Field
+
+@cindex differences between @code{gawk} and @code{awk}
+@cindex single character fields
+There are times when you may want to examine each character
+of a record separately. In @code{gawk}, this is easy to do, you
+simply assign the null string (@code{""}) to @code{FS}. In this case,
+each individual character in the record will become a separate field.
+Here is an example:
+@c extra verbiage due to page boundaries
+
+@example
+echo a b | gawk 'BEGIN @{ FS = "" @}
+ @{
+ for (i = 1; i <= NF; i = i + 1)
+ print "Field", i, "is", $i
+ @}'
+@end example
+
+@noindent
+The output from this is:
+
+@example
+Field 1 is a
+Field 2 is
+Field 3 is b
+@end example
+
+@cindex dark corner
+Traditionally, the behavior for @code{FS} equal to @code{""} was not defined.
+In this case, Unix @code{awk} would simply treat the entire record
+as only having one field (d.c.). In compatibility mode
+(@pxref{Options, ,Command Line Options}),
+if @code{FS} is the null string, then @code{gawk} will also
+behave this way.
+
+@node Command Line Field Separator, Field Splitting Summary, Single Character Fields, Field Separators
+@subsection Setting @code{FS} from the Command Line
+@cindex @code{-F} option
+@cindex field separator, on command line
+@cindex command line, setting @code{FS} on
+
+@code{FS} can be set on the command line. You use the @samp{-F} option to
+do so. For example:
+
+@example
+awk -F, '@var{program}' @var{input-files}
+@end example
+
+@noindent
+sets @code{FS} to be the @samp{,} character. Notice that the option uses
+a capital @samp{F}. Contrast this with @samp{-f}, which specifies a file
+containing an @code{awk} program. Case is significant in command line options:
+the @samp{-F} and @samp{-f} options have nothing to do with each other.
+You can use both options at the same time to set the @code{FS} variable
+@emph{and} get an @code{awk} program from a file.
+
+The value used for the argument to @samp{-F} is processed in exactly the
+same way as assignments to the built-in variable @code{FS}. This means that
+if the field separator contains special characters, they must be escaped
+appropriately. For example, to use a @samp{\} as the field separator, you
+would have to type:
+
+@example
+# same as FS = "\\"
+awk -F\\\\ '@dots{}' files @dots{}
+@end example
+
+@noindent
+Since @samp{\} is used for quoting in the shell, @code{awk} will see
+@samp{-F\\}. Then @code{awk} processes the @samp{\\} for escape
+characters (@pxref{Escape Sequences}), finally yielding
+a single @samp{\} to be used for the field separator.
+
+@cindex historical features
+As a special case, in compatibility mode
+(@pxref{Options, ,Command Line Options}), if the
+argument to @samp{-F} is @samp{t}, then @code{FS} is set to the tab
+character. This is because if you type @samp{-F\t} at the shell,
+without any quotes, the @samp{\} gets deleted, so @code{awk} figures that you
+really want your fields to be separated with tabs, and not @samp{t}s.
+Use @samp{-v FS="t"} on the command line if you really do want to separate
+your fields with @samp{t}s
+(@pxref{Options, ,Command Line Options}).
+
+For example, let's use an @code{awk} program file called @file{baud.awk}
+that contains the pattern @code{/300/}, and the action @samp{print $1}.
+Here is the program:
+
+@example
+/300/ @{ print $1 @}
+@end example
+
+Let's also set @code{FS} to be the @samp{-} character, and run the
+program on the file @file{BBS-list}. The following command prints a
+list of the names of the bulletin boards that operate at 300 baud and
+the first three digits of their phone numbers:
+
+@c tweaked to make the tex output look better in @smallbook
+@example
+@group
+$ awk -F- -f baud.awk BBS-list
+@print{} aardvark 555
+@print{} alpo
+@print{} barfly 555
+@dots{}
+@end group
+@ignore
+@print{} bites 555
+@print{} camelot 555
+@print{} core 555
+@print{} fooey 555
+@print{} foot 555
+@print{} macfoo 555
+@print{} sdace 555
+@print{} sabafoo 555
+@end ignore
+@end example
+
+@noindent
+Note the second line of output. In the original file
+(@pxref{Sample Data Files, ,Data Files for the Examples}),
+the second line looked like this:
+
+@example
+alpo-net 555-3412 2400/1200/300 A
+@end example
+
+The @samp{-} as part of the system's name was used as the field
+separator, instead of the @samp{-} in the phone number that was
+originally intended. This demonstrates why you have to be careful in
+choosing your field and record separators.
+
+On many Unix systems, each user has a separate entry in the system password
+file, one line per user. The information in these lines is separated
+by colons. The first field is the user's logon name, and the second is
+the user's encrypted password. A password file entry might look like this:
+
+@example
+arnold:xyzzy:2076:10:Arnold Robbins:/home/arnold:/bin/sh
+@end example
+
+The following program searches the system password file, and prints
+the entries for users who have no password:
+
+@example
+awk -F: '$2 == ""' /etc/passwd
+@end example
+
+@node Field Splitting Summary, , Command Line Field Separator, Field Separators
+@subsection Field Splitting Summary
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+According to the POSIX standard, @code{awk} is supposed to behave
+as if each record is split into fields at the time that it is read.
+In particular, this means that you can change the value of @code{FS}
+after a record is read, and the value of the fields (i.e.@: how they were split)
+should reflect the old value of @code{FS}, not the new one.
+
+@cindex dark corner
+@cindex @code{sed} utility
+@cindex stream editor
+However, many implementations of @code{awk} do not work this way. Instead,
+they defer splitting the fields until a field is actually
+referenced. The fields will be split
+using the @emph{current} value of @code{FS}! (d.c.)
+This behavior can be difficult
+to diagnose. The following example illustrates the difference
+between the two methods.
+(The @code{sed}@footnote{The @code{sed} utility is a ``stream editor.''
+Its behavior is also defined by the POSIX standard.}
+command prints just the first line of @file{/etc/passwd}.)
+
+@example
+sed 1q /etc/passwd | awk '@{ FS = ":" ; print $1 @}'
+@end example
+
+@noindent
+will usually print
+
+@example
+root
+@end example
+
+@noindent
+on an incorrect implementation of @code{awk}, while @code{gawk}
+will print something like
+
+@example
+root:nSijPlPhZZwgE:0:0:Root:/:
+@end example
+
+The following table summarizes how fields are split, based on the
+value of @code{FS}. (@samp{==} means ``is equal to.'')
+
+@c @cartouche
+@table @code
+@item FS == " "
+Fields are separated by runs of whitespace. Leading and trailing
+whitespace are ignored. This is the default.
+
+@item FS == @var{any other single character}
+Fields are separated by each occurrence of the character. Multiple
+successive occurrences delimit empty fields, as do leading and
+trailing occurrences.
+The character can even be a regexp metacharacter; it does not need
+to be escaped.
+
+@item FS == @var{regexp}
+Fields are separated by occurrences of characters that match @var{regexp}.
+Leading and trailing matches of @var{regexp} delimit empty fields.
+
+@item FS == ""
+Each individual character in the record becomes a separate field.
+@end table
+@c @end cartouche
+
+@node Constant Size, Multiple Line, Field Separators, Reading Files
+@section Reading Fixed-width Data
+
+(This section discusses an advanced, experimental feature. If you are
+a novice @code{awk} user, you may wish to skip it on the first reading.)
+
+@code{gawk} version 2.13 introduced a new facility for dealing with
+fixed-width fields with no distinctive field separator. Data of this
+nature arises, for example, in the input for old FORTRAN programs where
+numbers are run together; or in the output of programs that did not
+anticipate the use of their output as input for other programs.
+
+An example of the latter is a table where all the columns are lined up by
+the use of a variable number of spaces and @emph{empty fields are just
+spaces}. Clearly, @code{awk}'s normal field splitting based on @code{FS}
+will not work well in this case. Although a portable @code{awk} program
+can use a series of @code{substr} calls on @code{$0}
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}),
+this is awkward and inefficient for a large number of fields.
+
+The splitting of an input record into fixed-width fields is specified by
+assigning a string containing space-separated numbers to the built-in
+variable @code{FIELDWIDTHS}. Each number specifies the width of the field
+@emph{including} columns between fields. If you want to ignore the columns
+between fields, you can specify the width as a separate field that is
+subsequently ignored.
+
+The following data is the output of the Unix @code{w} utility. It is useful
+to illustrate the use of @code{FIELDWIDTHS}.
+
+@example
+@group
+ 10:06pm up 21 days, 14:04, 23 users
+User tty login@ idle JCPU PCPU what
+hzuo ttyV0 8:58pm 9 5 vi p24.tex
+hzang ttyV3 6:37pm 50 -csh
+eklye ttyV5 9:53pm 7 1 em thes.tex
+dportein ttyV6 8:17pm 1:47 -csh
+gierd ttyD3 10:00pm 1 elm
+dave ttyD4 9:47pm 4 4 w
+brent ttyp0 26Jun91 4:46 26:46 4:41 bash
+dave ttyq4 26Jun9115days 46 46 wnewmail
+@end group
+@end example
+
+The following program takes the above input, converts the idle time to
+number of seconds and prints out the first two fields and the calculated
+idle time. (This program uses a number of @code{awk} features that
+haven't been introduced yet.)
+
+@example
+@group
+BEGIN @{ FIELDWIDTHS = "9 6 10 6 7 7 35" @}
+NR > 2 @{
+ idle = $4
+ sub(/^ */, "", idle) # strip leading spaces
+ if (idle == "")
+ idle = 0
+ if (idle ~ /:/) @{
+ split(idle, t, ":")
+ idle = t[1] * 60 + t[2]
+ @}
+ if (idle ~ /days/)
+ idle *= 24 * 60 * 60
+
+ print $1, $2, idle
+@}
+@end group
+@end example
+
+Here is the result of running the program on the data:
+
+@example
+hzuo ttyV0 0
+hzang ttyV3 50
+eklye ttyV5 0
+dportein ttyV6 107
+gierd ttyD3 1
+dave ttyD4 0
+brent ttyp0 286
+dave ttyq4 1296000
+@end example
+
+Another (possibly more practical) example of fixed-width input data
+would be the input from a deck of balloting cards. In some parts of
+the United States, voters mark their choices by punching holes in computer
+cards. These cards are then processed to count the votes for any particular
+candidate or on any particular issue. Since a voter may choose not to
+vote on some issue, any column on the card may be empty. An @code{awk}
+program for processing such data could use the @code{FIELDWIDTHS} feature
+to simplify reading the data. (Of course, getting @code{gawk} to run on
+a system with card readers is another story!)
+
+@ignore
+Exercise: Write a ballot card reading program
+@end ignore
+
+Assigning a value to @code{FS} causes @code{gawk} to return to using
+@code{FS} for field splitting. Use @samp{FS = FS} to make this happen,
+without having to know the current value of @code{FS}.
+
+This feature is still experimental, and may evolve over time.
+Note that in particular, @code{gawk} does not attempt to verify
+the sanity of the values used in the value of @code{FIELDWIDTHS}.
+
+@node Multiple Line, Getline, Constant Size, Reading Files
+@section Multiple-Line Records
+
+@cindex multiple line records
+@cindex input, multiple line records
+@cindex reading files, multiple line records
+@cindex records, multiple line
+In some data bases, a single line cannot conveniently hold all the
+information in one entry. In such cases, you can use multi-line
+records.
+
+The first step in doing this is to choose your data format: when records
+are not defined as single lines, how do you want to define them?
+What should separate records?
+
+One technique is to use an unusual character or string to separate
+records. For example, you could use the formfeed character (written
+@samp{\f} in @code{awk}, as in C) to separate them, making each record
+a page of the file. To do this, just set the variable @code{RS} to
+@code{"\f"} (a string containing the formfeed character). Any
+other character could equally well be used, as long as it won't be part
+of the data in a record.
+
+Another technique is to have blank lines separate records. By a special
+dispensation, an empty string as the value of @code{RS} indicates that
+records are separated by one or more blank lines. If you set @code{RS}
+to the empty string, a record always ends at the first blank line
+encountered. And the next record doesn't start until the first non-blank
+line that follows---no matter how many blank lines appear in a row, they
+are considered one record-separator.
+
+@cindex leftmost longest match
+@cindex matching, leftmost longest
+You can achieve the same effect as @samp{RS = ""} by assigning the
+string @code{"\n\n+"} to @code{RS}. This regexp matches the newline
+at the end of the record, and one or more blank lines after the record.
+In addition, a regular expression always matches the longest possible
+sequence when there is a choice
+(@pxref{Leftmost Longest, ,How Much Text Matches?})
+So the next record doesn't start until
+the first non-blank line that follows---no matter how many blank lines
+appear in a row, they are considered one record-separator.
+
+@cindex dark corner
+There is an important difference between @samp{RS = ""} and
+@samp{RS = "\n\n+"}. In the first case, leading newlines in the input
+data file are ignored, and if a file ends without extra blank lines
+after the last record, the final newline is removed from the record.
+In the second case, this special processing is not done (d.c.).
+
+Now that the input is separated into records, the second step is to
+separate the fields in the record. One way to do this is to divide each
+of the lines into fields in the normal manner. This happens by default
+as the result of a special feature: when @code{RS} is set to the empty
+string, the newline character @emph{always} acts as a field separator.
+This is in addition to whatever field separations result from @code{FS}.
+
+The original motivation for this special exception was probably to provide
+useful behavior in the default case (i.e.@: @code{FS} is equal
+to @w{@code{" "}}). This feature can be a problem if you really don't
+want the newline character to separate fields, since there is no way to
+prevent it. However, you can work around this by using the @code{split}
+function to break up the record manually
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+
+Another way to separate fields is to
+put each field on a separate line: to do this, just set the
+variable @code{FS} to the string @code{"\n"}. (This simple regular
+expression matches a single newline.)
+
+A practical example of a data file organized this way might be a mailing
+list, where each entry is separated by blank lines. If we have a mailing
+list in a file named @file{addresses}, that looks like this:
+
+@example
+Jane Doe
+123 Main Street
+Anywhere, SE 12345-6789
+
+John Smith
+456 Tree-lined Avenue
+Smallville, MW 98765-4321
+
+@dots{}
+@end example
+
+@noindent
+A simple program to process this file would look like this:
+
+@example
+@group
+# addrs.awk --- simple mailing list program
+
+# Records are separated by blank lines.
+# Each line is one field.
+BEGIN @{ RS = "" ; FS = "\n" @}
+
+@{
+ print "Name is:", $1
+ print "Address is:", $2
+ print "City and State are:", $3
+ print ""
+@}
+@end group
+@end example
+
+Running the program produces the following output:
+
+@example
+@group
+$ awk -f addrs.awk addresses
+@print{} Name is: Jane Doe
+@print{} Address is: 123 Main Street
+@print{} City and State are: Anywhere, SE 12345-6789
+@print{}
+@end group
+@group
+@print{} Name is: John Smith
+@print{} Address is: 456 Tree-lined Avenue
+@print{} City and State are: Smallville, MW 98765-4321
+@print{}
+@dots{}
+@end group
+@end example
+
+@xref{Labels Program, ,Printing Mailing Labels}, for a more realistic
+program that deals with address lists.
+
+The following table summarizes how records are split, based on the
+value of @code{RS}. (@samp{==} means ``is equal to.'')
+
+@c @cartouche
+@table @code
+@item RS == "\n"
+Records are separated by the newline character (@samp{\n}). In effect,
+every line in the data file is a separate record, including blank lines.
+This is the default.
+
+@item RS == @var{any single character}
+Records are separated by each occurrence of the character. Multiple
+successive occurrences delimit empty records.
+
+@item RS == ""
+Records are separated by runs of blank lines. The newline character
+always serves as a field separator, in addition to whatever value
+@code{FS} may have. Leading and trailing newlines in a file are ignored.
+
+@item RS == @var{regexp}
+Records are separated by occurrences of characters that match @var{regexp}.
+Leading and trailing matches of @var{regexp} delimit empty records.
+@end table
+@c @end cartouche
+
+@vindex RT
+In all cases, @code{gawk} sets @code{RT} to the input text that matched the
+value specified by @code{RS}.
+
+@node Getline, , Multiple Line, Reading Files
+@section Explicit Input with @code{getline}
+
+@findex getline
+@cindex input, explicit
+@cindex explicit input
+@cindex input, @code{getline} command
+@cindex reading files, @code{getline} command
+So far we have been getting our input data from @code{awk}'s main
+input stream---either the standard input (usually your terminal, sometimes
+the output from another program) or from the
+files specified on the command line. The @code{awk} language has a
+special built-in command called @code{getline} that
+can be used to read input under your explicit control.
+
+@menu
+* Getline Intro:: Introduction to the @code{getline} function.
+* Plain Getline:: Using @code{getline} with no arguments.
+* Getline/Variable:: Using @code{getline} into a variable.
+* Getline/File:: Using @code{getline} from a file.
+* Getline/Variable/File:: Using @code{getline} into a variable from a
+ file.
+* Getline/Pipe:: Using @code{getline} from a pipe.
+* Getline/Variable/Pipe:: Using @code{getline} into a variable from a
+ pipe.
+* Getline Summary:: Summary Of @code{getline} Variants.
+@end menu
+
+@node Getline Intro, Plain Getline, Getline, Getline
+@subsection Introduction to @code{getline}
+
+This command is used in several different ways, and should @emph{not} be
+used by beginners. It is covered here because this is the chapter on input.
+The examples that follow the explanation of the @code{getline} command
+include material that has not been covered yet. Therefore, come back
+and study the @code{getline} command @emph{after} you have reviewed the
+rest of this @value{DOCUMENT} and have a good knowledge of how @code{awk} works.
+
+@vindex ERRNO
+@cindex differences between @code{gawk} and @code{awk}
+@cindex @code{getline}, return values
+@code{getline} returns one if it finds a record, and zero if the end of the
+file is encountered. If there is some error in getting a record, such
+as a file that cannot be opened, then @code{getline} returns @minus{}1.
+In this case, @code{gawk} sets the variable @code{ERRNO} to a string
+describing the error that occurred.
+
+In the following examples, @var{command} stands for a string value that
+represents a shell command.
+
+@node Plain Getline, Getline/Variable, Getline Intro, Getline
+@subsection Using @code{getline} with No Arguments
+
+The @code{getline} command can be used without arguments to read input
+from the current input file. All it does in this case is read the next
+input record and split it up into fields. This is useful if you've
+finished processing the current record, but you want to do some special
+processing @emph{right now} on the next record. Here's an
+example:
+
+@example
+@group
+awk '@{
+ if ((t = index($0, "/*")) != 0) @{
+ # value will be "" if t is 1
+ tmp = substr($0, 1, t - 1)
+ u = index(substr($0, t + 2), "*/")
+ while (u == 0) @{
+ if (getline <= 0) @{
+ m = "unexpected EOF or error"
+ m = (m ": " ERRNO)
+ print m > "/dev/stderr"
+ exit
+ @}
+ t = -1
+ u = index($0, "*/")
+ @}
+@end group
+@group
+ # substr expression will be "" if */
+ # occurred at end of line
+ $0 = tmp substr($0, t + u + 3)
+ @}
+ print $0
+@}'
+@end group
+@end example
+
+This @code{awk} program deletes all C-style comments, @samp{/* @dots{}
+*/}, from the input. By replacing the @samp{print $0} with other
+statements, you could perform more complicated processing on the
+decommented input, like searching for matches of a regular
+expression. This program has a subtle problem---it does not work if one
+comment ends and another begins on the same line.
+
+@ignore
+Exercise,
+write a program that does handle multiple comments on the line.
+@end ignore
+
+This form of the @code{getline} command sets @code{NF} (the number of
+fields; @pxref{Fields, ,Examining Fields}), @code{NR} (the number of
+records read so far; @pxref{Records, ,How Input is Split into Records}),
+@code{FNR} (the number of records read from this input file), and the
+value of @code{$0}.
+
+@cindex dark corner
+@strong{Note:} the new value of @code{$0} is used in testing
+the patterns of any subsequent rules. The original value
+of @code{$0} that triggered the rule which executed @code{getline}
+is lost (d.c.).
+By contrast, the @code{next} statement reads a new record
+but immediately begins processing it normally, starting with the first
+rule in the program. @xref{Next Statement, ,The @code{next} Statement}.
+
+@node Getline/Variable, Getline/File, Plain Getline, Getline
+@subsection Using @code{getline} Into a Variable
+
+You can use @samp{getline @var{var}} to read the next record from
+@code{awk}'s input into the variable @var{var}. No other processing is
+done.
+
+For example, suppose the next line is a comment, or a special string,
+and you want to read it, without triggering
+any rules. This form of @code{getline} allows you to read that line
+and store it in a variable so that the main
+read-a-line-and-check-each-rule loop of @code{awk} never sees it.
+
+The following example swaps every two lines of input. For example, given:
+
+@example
+wan
+tew
+free
+phore
+@end example
+
+@noindent
+it outputs:
+
+@example
+tew
+wan
+phore
+free
+@end example
+
+@noindent
+Here's the program:
+
+@example
+@group
+awk '@{
+ if ((getline tmp) > 0) @{
+ print tmp
+ print $0
+ @} else
+ print $0
+@}'
+@end group
+@end example
+
+The @code{getline} command used in this way sets only the variables
+@code{NR} and @code{FNR} (and of course, @var{var}). The record is not
+split into fields, so the values of the fields (including @code{$0}) and
+the value of @code{NF} do not change.
+
+@node Getline/File, Getline/Variable/File, Getline/Variable, Getline
+@subsection Using @code{getline} from a File
+
+@cindex input redirection
+@cindex redirection of input
+Use @samp{getline < @var{file}} to read
+the next record from the file
+@var{file}. Here @var{file} is a string-valued expression that
+specifies the file name. @samp{< @var{file}} is called a @dfn{redirection}
+since it directs input to come from a different place.
+
+For example, the following
+program reads its input record from the file @file{secondary.input} when it
+encounters a first field with a value equal to 10 in the current input
+file.
+
+@example
+@group
+awk '@{
+ if ($1 == 10) @{
+ getline < "secondary.input"
+ print
+ @} else
+ print
+@}'
+@end group
+@end example
+
+Since the main input stream is not used, the values of @code{NR} and
+@code{FNR} are not changed. But the record read is split into fields in
+the normal manner, so the values of @code{$0} and other fields are
+changed. So is the value of @code{NF}.
+
+@node Getline/Variable/File, Getline/Pipe, Getline/File, Getline
+@subsection Using @code{getline} Into a Variable from a File
+
+Use @samp{getline @var{var} < @var{file}} to read input
+the file
+@var{file} and put it in the variable @var{var}. As above, @var{file}
+is a string-valued expression that specifies the file from which to read.
+
+In this version of @code{getline}, none of the built-in variables are
+changed, and the record is not split into fields. The only variable
+changed is @var{var}.
+
+For example, the following program copies all the input files to the
+output, except for records that say @w{@samp{@@include @var{filename}}}.
+Such a record is replaced by the contents of the file
+@var{filename}.
+
+@example
+@group
+awk '@{
+ if (NF == 2 && $1 == "@@include") @{
+ while ((getline line < $2) > 0)
+ print line
+ close($2)
+ @} else
+ print
+@}'
+@end group
+@end example
+
+Note here how the name of the extra input file is not built into
+the program; it is taken directly from the data, from the second field on
+the @samp{@@include} line.
+
+The @code{close} function is called to ensure that if two identical
+@samp{@@include} lines appear in the input, the entire specified file is
+included twice.
+@xref{Close Files And Pipes, ,Closing Input and Output Files and Pipes}.
+
+One deficiency of this program is that it does not process nested
+@samp{@@include} statements
+(@samp{@@include} statements in included files)
+the way a true macro preprocessor would.
+@xref{Igawk Program, ,An Easy Way to Use Library Functions}, for a program
+that does handle nested @samp{@@include} statements.
+
+@node Getline/Pipe, Getline/Variable/Pipe, Getline/Variable/File, Getline
+@subsection Using @code{getline} from a Pipe
+
+@cindex input pipeline
+@cindex pipeline, input
+You can pipe the output of a command into @code{getline}, using
+@samp{@var{command} | getline}. In
+this case, the string @var{command} is run as a shell command and its output
+is piped into @code{awk} to be used as input. This form of @code{getline}
+reads one record at a time from the pipe.
+
+For example, the following program copies its input to its output, except for
+lines that begin with @samp{@@execute}, which are replaced by the output
+produced by running the rest of the line as a shell command:
+
+@example
+@group
+awk '@{
+ if ($1 == "@@execute") @{
+ tmp = substr($0, 10)
+ while ((tmp | getline) > 0)
+ print
+ close(tmp)
+ @} else
+ print
+@}'
+@end group
+@end example
+
+@noindent
+The @code{close} function is called to ensure that if two identical
+@samp{@@execute} lines appear in the input, the command is run for
+each one.
+@xref{Close Files And Pipes, ,Closing Input and Output Files and Pipes}.
+@c Exercise!!
+@c This example is unrealistic, since you could just use system
+
+Given the input:
+
+@example
+@group
+foo
+bar
+baz
+@@execute who
+bletch
+@end group
+@end example
+
+@noindent
+the program might produce:
+
+@example
+@group
+foo
+bar
+baz
+arnold ttyv0 Jul 13 14:22
+miriam ttyp0 Jul 13 14:23 (murphy:0)
+bill ttyp1 Jul 13 14:23 (murphy:0)
+bletch
+@end group
+@end example
+
+@noindent
+Notice that this program ran the command @code{who} and printed the result.
+(If you try this program yourself, you will of course get different results,
+showing you who is logged in on your system.)
+
+This variation of @code{getline} splits the record into fields, sets the
+value of @code{NF} and recomputes the value of @code{$0}. The values of
+@code{NR} and @code{FNR} are not changed.
+
+@node Getline/Variable/Pipe, Getline Summary, Getline/Pipe, Getline
+@subsection Using @code{getline} Into a Variable from a Pipe
+
+When you use @samp{@var{command} | getline @var{var}}, the
+output of the command @var{command} is sent through a pipe to
+@code{getline} and into the variable @var{var}. For example, the
+following program reads the current date and time into the variable
+@code{current_time}, using the @code{date} utility, and then
+prints it.
+
+@example
+@group
+awk 'BEGIN @{
+ "date" | getline current_time
+ close("date")
+ print "Report printed on " current_time
+@}'
+@end group
+@end example
+
+In this version of @code{getline}, none of the built-in variables are
+changed, and the record is not split into fields.
+
+@node Getline Summary, , Getline/Variable/Pipe, Getline
+@subsection Summary of @code{getline} Variants
+
+With all the forms of @code{getline}, even though @code{$0} and @code{NF},
+may be updated, the record will not be tested against all the patterns
+in the @code{awk} program, in the way that would happen if the record
+were read normally by the main processing loop of @code{awk}. However
+the new record is tested against any subsequent rules.
+
+@cindex differences between @code{gawk} and @code{awk}
+@cindex limitations
+@cindex implementation limits
+Many @code{awk} implementations limit the number of pipelines an @code{awk}
+program may have open to just one! In @code{gawk}, there is no such limit.
+You can open as many pipelines as the underlying operating system will
+permit.
+
+The following table summarizes the six variants of @code{getline},
+listing which built-in variables are set by each one.
+
+@iftex
+@page
+@end iftex
+@c @cartouche
+@table @code
+@item getline
+sets @code{$0}, @code{NF}, @code{FNR}, and @code{NR}.
+
+@item getline @var{var}
+sets @var{var}, @code{FNR}, and @code{NR}.
+
+@item getline < @var{file}
+sets @code{$0}, and @code{NF}.
+
+@item getline @var{var} < @var{file}
+sets @var{var}.
+
+@item @var{command} | getline
+sets @code{$0}, and @code{NF}.
+
+@item @var{command} | getline @var{var}
+sets @var{var}.
+@end table
+@c @end cartouche
+
+@node Printing, Expressions, Reading Files, Top
+@chapter Printing Output
+
+@cindex printing
+@cindex output
+One of the most common actions is to @dfn{print}, or output,
+some or all of the input. You use the @code{print} statement
+for simple output. You use the @code{printf} statement
+for fancier formatting. Both are described in this chapter.
+
+@menu
+* Print:: The @code{print} statement.
+* Print Examples:: Simple examples of @code{print} statements.
+* Output Separators:: The output separators and how to change them.
+* OFMT:: Controlling Numeric Output With @code{print}.
+* Printf:: The @code{printf} statement.
+* Redirection:: How to redirect output to multiple files and
+ pipes.
+* Special Files:: File name interpretation in @code{gawk}.
+ @code{gawk} allows access to inherited file
+ descriptors.
+* Close Files And Pipes:: Closing Input and Output Files and Pipes.
+@end menu
+
+@node Print, Print Examples, Printing, Printing
+@section The @code{print} Statement
+@cindex @code{print} statement
+
+The @code{print} statement does output with simple, standardized
+formatting. You specify only the strings or numbers to be printed, in a
+list separated by commas. They are output, separated by single spaces,
+followed by a newline. The statement looks like this:
+
+@example
+print @var{item1}, @var{item2}, @dots{}
+@end example
+
+@noindent
+The entire list of items may optionally be enclosed in parentheses. The
+parentheses are necessary if any of the item expressions uses the @samp{>}
+relational operator; otherwise it could be confused with a redirection
+(@pxref{Redirection, ,Redirecting Output of @code{print} and @code{printf}}).
+
+The items to be printed can be constant strings or numbers, fields of the
+current record (such as @code{$1}), variables, or any @code{awk}
+expressions.
+Numeric values are converted to strings, and then printed.
+
+The @code{print} statement is completely general for
+computing @emph{what} values to print. However, with two exceptions,
+you cannot specify @emph{how} to print them---how many
+columns, whether to use exponential notation or not, and so on.
+(For the exceptions, @pxref{Output Separators}, and
+@ref{OFMT, ,Controlling Numeric Output with @code{print}}.)
+For that, you need the @code{printf} statement
+(@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing}).
+
+The simple statement @samp{print} with no items is equivalent to
+@samp{print $0}: it prints the entire current record. To print a blank
+line, use @samp{print ""}, where @code{""} is the empty string.
+
+To print a fixed piece of text, use a string constant such as
+@w{@code{"Don't Panic"}} as one item. If you forget to use the
+double-quote characters, your text will be taken as an @code{awk}
+expression, and you will probably get an error. Keep in mind that a
+space is printed between any two items.
+
+Each @code{print} statement makes at least one line of output. But it
+isn't limited to one line. If an item value is a string that contains a
+newline, the newline is output along with the rest of the string. A
+single @code{print} can make any number of lines this way.
+
+@node Print Examples, Output Separators, Print, Printing
+@section Examples of @code{print} Statements
+
+Here is an example of printing a string that contains embedded newlines
+(the @samp{\n} is an escape sequence, used to represent the newline
+character; see @ref{Escape Sequences}):
+
+@example
+@group
+$ awk 'BEGIN @{ print "line one\nline two\nline three" @}'
+@print{} line one
+@print{} line two
+@print{} line three
+@end group
+@end example
+
+Here is an example that prints the first two fields of each input record,
+with a space between them:
+
+@example
+@group
+$ awk '@{ print $1, $2 @}' inventory-shipped
+@print{} Jan 13
+@print{} Feb 15
+@print{} Mar 15
+@dots{}
+@end group
+@end example
+
+@cindex common mistakes
+@cindex mistakes, common
+@cindex errors, common
+A common mistake in using the @code{print} statement is to omit the comma
+between two items. This often has the effect of making the items run
+together in the output, with no space. The reason for this is that
+juxtaposing two string expressions in @code{awk} means to concatenate
+them. Here is the same program, without the comma:
+
+@example
+@group
+$ awk '@{ print $1 $2 @}' inventory-shipped
+@print{} Jan13
+@print{} Feb15
+@print{} Mar15
+@dots{}
+@end group
+@end example
+
+To someone unfamiliar with the file @file{inventory-shipped}, neither
+example's output makes much sense. A heading line at the beginning
+would make it clearer. Let's add some headings to our table of months
+(@code{$1}) and green crates shipped (@code{$2}). We do this using the
+@code{BEGIN} pattern
+(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns})
+to force the headings to be printed only once:
+
+@example
+awk 'BEGIN @{ print "Month Crates"
+ print "----- ------" @}
+ @{ print $1, $2 @}' inventory-shipped
+@end example
+
+@noindent
+Did you already guess what happens? When run, the program prints
+the following:
+
+@example
+@group
+Month Crates
+----- ------
+Jan 13
+Feb 15
+Mar 15
+@dots{}
+@end group
+@end example
+
+@noindent
+The headings and the table data don't line up! We can fix this by printing
+some spaces between the two fields:
+
+@example
+awk 'BEGIN @{ print "Month Crates"
+ print "----- ------" @}
+ @{ print $1, " ", $2 @}' inventory-shipped
+@end example
+
+You can imagine that this way of lining up columns can get pretty
+complicated when you have many columns to fix. Counting spaces for two
+or three columns can be simple, but more than this and you can get
+lost quite easily. This is why the @code{printf} statement was
+created (@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing});
+one of its specialties is lining up columns of data.
+
+@cindex line continuation
+As a side point,
+you can continue either a @code{print} or @code{printf} statement simply
+by putting a newline after any comma
+(@pxref{Statements/Lines, ,@code{awk} Statements Versus Lines}).
+
+@node Output Separators, OFMT, Print Examples, Printing
+@section Output Separators
+
+@cindex output field separator, @code{OFS}
+@cindex output record separator, @code{ORS}
+@vindex OFS
+@vindex ORS
+As mentioned previously, a @code{print} statement contains a list
+of items, separated by commas. In the output, the items are normally
+separated by single spaces. This need not be the case; a
+single space is only the default. You can specify any string of
+characters to use as the @dfn{output field separator} by setting the
+built-in variable @code{OFS}. The initial value of this variable
+is the string @w{@code{" "}}, that is, a single space.
+
+The output from an entire @code{print} statement is called an
+@dfn{output record}. Each @code{print} statement outputs one output
+record and then outputs a string called the @dfn{output record separator}.
+The built-in variable @code{ORS} specifies this string. The initial
+value of @code{ORS} is the string @code{"\n"}, i.e.@: a newline
+character; thus, normally each @code{print} statement makes a separate line.
+
+You can change how output fields and records are separated by assigning
+new values to the variables @code{OFS} and/or @code{ORS}. The usual
+place to do this is in the @code{BEGIN} rule
+(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}), so
+that it happens before any input is processed. You may also do this
+with assignments on the command line, before the names of your input
+files, or using the @samp{-v} command line option
+(@pxref{Options, ,Command Line Options}).
+
+@ignore
+Exercise,
+Rewrite the
+@example
+awk 'BEGIN @{ print "Month Crates"
+ print "----- ------" @}
+ @{ print $1, " ", $2 @}' inventory-shipped
+@end example
+program by using a new value of @code{OFS}.
+@end ignore
+
+The following example prints the first and second fields of each input
+record separated by a semicolon, with a blank line added after each
+line:
+
+@example
+@group
+$ awk 'BEGIN @{ OFS = ";"; ORS = "\n\n" @}
+> @{ print $1, $2 @}' BBS-list
+@print{} aardvark;555-5553
+@print{}
+@print{} alpo-net;555-3412
+@print{}
+@print{} barfly;555-7685
+@dots{}
+@end group
+@end example
+
+If the value of @code{ORS} does not contain a newline, all your output
+will be run together on a single line, unless you output newlines some
+other way.
+
+@node OFMT, Printf, Output Separators, Printing
+@section Controlling Numeric Output with @code{print}
+@vindex OFMT
+@cindex numeric output format
+@cindex format, numeric output
+@cindex output format specifier, @code{OFMT}
+When you use the @code{print} statement to print numeric values,
+@code{awk} internally converts the number to a string of characters,
+and prints that string. @code{awk} uses the @code{sprintf} function
+to do this conversion
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+For now, it suffices to say that the @code{sprintf}
+function accepts a @dfn{format specification} that tells it how to format
+numbers (or strings), and that there are a number of different ways in which
+numbers can be formatted. The different format specifications are discussed
+more fully in
+@ref{Control Letters, , Format-Control Letters}.
+
+The built-in variable @code{OFMT} contains the default format specification
+that @code{print} uses with @code{sprintf} when it wants to convert a
+number to a string for printing.
+The default value of @code{OFMT} is @code{"%.6g"}.
+By supplying different format specifications
+as the value of @code{OFMT}, you can change how @code{print} will print
+your numbers. As a brief example:
+
+@example
+@group
+$ awk 'BEGIN @{
+> OFMT = "%.0f" # print numbers as integers (rounds)
+> print 17.23 @}'
+@print{} 17
+@end group
+@end example
+
+@noindent
+@cindex dark corner
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+According to the POSIX standard, @code{awk}'s behavior will be undefined
+if @code{OFMT} contains anything but a floating point conversion specification
+(d.c.).
+
+@node Printf, Redirection, OFMT, Printing
+@section Using @code{printf} Statements for Fancier Printing
+@cindex formatted output
+@cindex output, formatted
+
+If you want more precise control over the output format than
+@code{print} gives you, use @code{printf}. With @code{printf} you can
+specify the width to use for each item, and you can specify various
+formatting choices for numbers (such as what radix to use, whether to
+print an exponent, whether to print a sign, and how many digits to print
+after the decimal point). You do this by supplying a string, called
+the @dfn{format string}, which controls how and where to print the other
+arguments.
+
+@menu
+* Basic Printf:: Syntax of the @code{printf} statement.
+* Control Letters:: Format-control letters.
+* Format Modifiers:: Format-specification modifiers.
+* Printf Examples:: Several examples.
+@end menu
+
+@node Basic Printf, Control Letters, Printf, Printf
+@subsection Introduction to the @code{printf} Statement
+
+@cindex @code{printf} statement, syntax of
+The @code{printf} statement looks like this:
+
+@example
+printf @var{format}, @var{item1}, @var{item2}, @dots{}
+@end example
+
+@noindent
+The entire list of arguments may optionally be enclosed in parentheses. The
+parentheses are necessary if any of the item expressions use the @samp{>}
+relational operator; otherwise it could be confused with a redirection
+(@pxref{Redirection, ,Redirecting Output of @code{print} and @code{printf}}).
+
+@cindex format string
+The difference between @code{printf} and @code{print} is the @var{format}
+argument. This is an expression whose value is taken as a string; it
+specifies how to output each of the other arguments. It is called
+the @dfn{format string}.
+
+The format string is very similar to that in the ANSI C library function
+@code{printf}. Most of @var{format} is text to be output verbatim.
+Scattered among this text are @dfn{format specifiers}, one per item.
+Each format specifier says to output the next item in the argument list
+at that place in the format.
+
+The @code{printf} statement does not automatically append a newline to its
+output. It outputs only what the format string specifies. So if you want
+a newline, you must include one in the format string. The output separator
+variables @code{OFS} and @code{ORS} have no effect on @code{printf}
+statements. For example:
+
+@example
+@group
+BEGIN @{
+ ORS = "\nOUCH!\n"; OFS = "!"
+ msg = "Don't Panic!"; printf "%s\n", msg
+@}
+@end group
+@end example
+
+This program still prints the familiar @samp{Don't Panic!} message.
+
+@node Control Letters, Format Modifiers, Basic Printf, Printf
+@subsection Format-Control Letters
+@cindex @code{printf}, format-control characters
+@cindex format specifier
+
+A format specifier starts with the character @samp{%} and ends with a
+@dfn{format-control letter}; it tells the @code{printf} statement how
+to output one item. (If you actually want to output a @samp{%}, write
+@samp{%%}.) The format-control letter specifies what kind of value to
+print. The rest of the format specifier is made up of optional
+@dfn{modifiers} which are parameters to use, such as the field width.
+
+Here is a list of the format-control letters:
+
+@table @code
+@item c
+This prints a number as an ASCII character. Thus, @samp{printf "%c",
+65} outputs the letter @samp{A}. The output for a string value is
+the first character of the string.
+
+@iftex
+@page
+@end iftex
+@item d
+@itemx i
+These are equivalent. They both print a decimal integer.
+The @samp{%i} specification is for compatibility with ANSI C.
+
+@item e
+@itemx E
+This prints a number in scientific (exponential) notation.
+For example,
+
+@example
+printf "%4.3e\n", 1950
+@end example
+
+@noindent
+prints @samp{1.950e+03}, with a total of four significant figures of
+which three follow the decimal point. The @samp{4.3} are modifiers,
+discussed below. @samp{%E} uses @samp{E} instead of @samp{e} in the output.
+
+@item f
+This prints a number in floating point notation.
+For example,
+
+@example
+printf "%4.3f", 1950
+@end example
+
+@noindent
+prints @samp{1950.000}, with a total of four significant figures of
+which three follow the decimal point. The @samp{4.3} are modifiers,
+discussed below.
+
+@item g
+@itemx G
+This prints a number in either scientific notation or floating point
+notation, whichever uses fewer characters. If the result is printed in
+scientific notation, @samp{%G} uses @samp{E} instead of @samp{e}.
+
+@item o
+This prints an unsigned octal integer.
+(In octal, or base-eight notation, the digits run from @samp{0} to @samp{7};
+the decimal number eight is represented as @samp{10} in octal.)
+
+@item s
+This prints a string.
+
+@item x
+@itemx X
+This prints an unsigned hexadecimal integer.
+(In hexadecimal, or base-16 notation, the digits are @samp{0} through @samp{9}
+and @samp{a} through @samp{f}. The hexadecimal digit @samp{f} represents
+the decimal number 15.) @samp{%X} uses the letters @samp{A} through @samp{F}
+instead of @samp{a} through @samp{f}.
+
+@item %
+This isn't really a format-control letter, but it does have a meaning
+when used after a @samp{%}: the sequence @samp{%%} outputs one
+@samp{%}. It does not consume an argument, and it ignores any
+modifiers.
+@end table
+
+@cindex dark corner
+When using the integer format-control letters for values that are outside
+the range of a C @code{long} integer, @code{gawk} will switch to the
+@samp{%g} format specifier. Other versions of @code{awk} may print
+invalid values, or do something else entirely (d.c.).
+
+@node Format Modifiers, Printf Examples, Control Letters, Printf
+@subsection Modifiers for @code{printf} Formats
+
+@cindex @code{printf}, modifiers
+@cindex modifiers (in format specifiers)
+A format specification can also include @dfn{modifiers} that can control
+how much of the item's value is printed and how much space it gets. The
+modifiers come between the @samp{%} and the format-control letter.
+In the examples below, we use the bullet symbol ``@bullet{}'' to represent
+spaces in the output. Here are the possible modifiers, in the order in
+which they may appear:
+
+@table @code
+@item -
+The minus sign, used before the width modifier (see below),
+says to left-justify
+the argument within its specified width. Normally the argument
+is printed right-justified in the specified width. Thus,
+
+@example
+printf "%-4s", "foo"
+@end example
+
+@noindent
+prints @samp{foo@bullet{}}.
+
+@item @var{space}
+For numeric conversions, prefix positive values with a space, and
+negative values with a minus sign.
+
+@item +
+The plus sign, used before the width modifier (see below),
+says to always supply a sign for numeric conversions, even if the data
+to be formatted is positive. The @samp{+} overrides the space modifier.
+
+@item #
+Use an ``alternate form'' for certain control letters.
+For @samp{%o}, supply a leading zero.
+For @samp{%x}, and @samp{%X}, supply a leading @samp{0x} or @samp{0X} for
+a non-zero result.
+For @samp{%e}, @samp{%E}, and @samp{%f}, the result will always contain a
+decimal point.
+For @samp{%g}, and @samp{%G}, trailing zeros are not removed from the result.
+
+@cindex dark corner
+@item 0
+A leading @samp{0} (zero) acts as a flag, that indicates output should be
+padded with zeros instead of spaces.
+This applies even to non-numeric output formats (d.c.).
+This flag only has an effect when the field width is wider than the
+value to be printed.
+
+@item @var{width}
+This is a number specifying the desired minimum width of a field. Inserting any
+number between the @samp{%} sign and the format control character forces the
+field to be expanded to this width. The default way to do this is to
+pad with spaces on the left. For example,
+
+@example
+printf "%4s", "foo"
+@end example
+
+@noindent
+prints @samp{@bullet{}foo}.
+
+The value of @var{width} is a minimum width, not a maximum. If the item
+value requires more than @var{width} characters, it can be as wide as
+necessary. Thus,
+
+@example
+printf "%4s", "foobar"
+@end example
+
+@noindent
+prints @samp{foobar}.
+
+Preceding the @var{width} with a minus sign causes the output to be
+padded with spaces on the right, instead of on the left.
+
+@item .@var{prec}
+This is a number that specifies the precision to use when printing.
+For the @samp{e}, @samp{E}, and @samp{f} formats, this specifies the
+number of digits you want printed to the right of the decimal point.
+For the @samp{g}, and @samp{G} formats, it specifies the maximum number
+of significant digits. For the @samp{d}, @samp{o}, @samp{i}, @samp{u},
+@samp{x}, and @samp{X} formats, it specifies the minimum number of
+digits to print. For a string, it specifies the maximum number of
+characters from the string that should be printed. Thus,
+
+@example
+printf "%.4s", "foobar"
+@end example
+
+@noindent
+prints @samp{foob}.
+@end table
+
+The C library @code{printf}'s dynamic @var{width} and @var{prec}
+capability (for example, @code{"%*.*s"}) is supported. Instead of
+supplying explicit @var{width} and/or @var{prec} values in the format
+string, you pass them in the argument list. For example:
+
+@example
+w = 5
+p = 3
+s = "abcdefg"
+printf "%*.*s\n", w, p, s
+@end example
+
+@noindent
+is exactly equivalent to
+
+@example
+s = "abcdefg"
+printf "%5.3s\n", s
+@end example
+
+@noindent
+Both programs output @samp{@w{@bullet{}@bullet{}abc}}.
+
+Earlier versions of @code{awk} did not support this capability.
+If you must use such a version, you may simulate this feature by using
+concatenation to build up the format string, like so:
+
+@example
+w = 5
+p = 3
+s = "abcdefg"
+printf "%" w "." p "s\n", s
+@end example
+
+@noindent
+This is not particularly easy to read, but it does work.
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+C programmers may be used to supplying additional @samp{l} and @samp{h}
+flags in @code{printf} format strings. These are not valid in @code{awk}.
+Most @code{awk} implementations silently ignore these flags.
+If @samp{--lint} is provided on the command line
+(@pxref{Options, ,Command Line Options}),
+@code{gawk} will warn about their use. If @samp{--posix} is supplied,
+their use is a fatal error.
+
+@node Printf Examples, , Format Modifiers, Printf
+@subsection Examples Using @code{printf}
+
+Here is how to use @code{printf} to make an aligned table:
+
+@example
+awk '@{ printf "%-10s %s\n", $1, $2 @}' BBS-list
+@end example
+
+@noindent
+prints the names of bulletin boards (@code{$1}) of the file
+@file{BBS-list} as a string of 10 characters, left justified. It also
+prints the phone numbers (@code{$2}) afterward on the line. This
+produces an aligned two-column table of names and phone numbers:
+
+@example
+@group
+$ awk '@{ printf "%-10s %s\n", $1, $2 @}' BBS-list
+@print{} aardvark 555-5553
+@print{} alpo-net 555-3412
+@print{} barfly 555-7685
+@print{} bites 555-1675
+@print{} camelot 555-0542
+@print{} core 555-2912
+@print{} fooey 555-1234
+@print{} foot 555-6699
+@print{} macfoo 555-6480
+@print{} sdace 555-3430
+@print{} sabafoo 555-2127
+@end group
+@end example
+
+Did you notice that we did not specify that the phone numbers be printed
+as numbers? They had to be printed as strings because the numbers are
+separated by a dash.
+If we had tried to print the phone numbers as numbers, all we would have
+gotten would have been the first three digits, @samp{555}.
+This would have been pretty confusing.
+
+We did not specify a width for the phone numbers because they are the
+last things on their lines. We don't need to put spaces after them.
+
+We could make our table look even nicer by adding headings to the tops
+of the columns. To do this, we use the @code{BEGIN} pattern
+(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns})
+to force the header to be printed only once, at the beginning of
+the @code{awk} program:
+
+@example
+@group
+awk 'BEGIN @{ print "Name Number"
+ print "---- ------" @}
+ @{ printf "%-10s %s\n", $1, $2 @}' BBS-list
+@end group
+@end example
+
+Did you notice that we mixed @code{print} and @code{printf} statements in
+the above example? We could have used just @code{printf} statements to get
+the same results:
+
+@example
+@group
+awk 'BEGIN @{ printf "%-10s %s\n", "Name", "Number"
+ printf "%-10s %s\n", "----", "------" @}
+ @{ printf "%-10s %s\n", $1, $2 @}' BBS-list
+@end group
+@end example
+
+@noindent
+By printing each column heading with the same format specification
+used for the elements of the column, we have made sure that the headings
+are aligned just like the columns.
+
+The fact that the same format specification is used three times can be
+emphasized by storing it in a variable, like this:
+
+@example
+@group
+awk 'BEGIN @{ format = "%-10s %s\n"
+ printf format, "Name", "Number"
+ printf format, "----", "------" @}
+ @{ printf format, $1, $2 @}' BBS-list
+@end group
+@end example
+
+@c !!! exercise
+See if you can use the @code{printf} statement to line up the headings and
+table data for our @file{inventory-shipped} example covered earlier in the
+section on the @code{print} statement
+(@pxref{Print, ,The @code{print} Statement}).
+
+@node Redirection, Special Files, Printf, Printing
+@section Redirecting Output of @code{print} and @code{printf}
+
+@cindex output redirection
+@cindex redirection of output
+So far we have been dealing only with output that prints to the standard
+output, usually your terminal. Both @code{print} and @code{printf} can
+also send their output to other places.
+This is called @dfn{redirection}.
+
+A redirection appears after the @code{print} or @code{printf} statement.
+Redirections in @code{awk} are written just like redirections in shell
+commands, except that they are written inside the @code{awk} program.
+
+There are three forms of output redirection: output to a file,
+output appended to a file, and output through a pipe to another
+command.
+They are all shown for
+the @code{print} statement, but they work identically for @code{printf}
+also.
+
+@table @code
+@item print @var{items} > @var{output-file}
+This type of redirection prints the items into the output file
+@var{output-file}. The file name @var{output-file} can be any
+expression. Its value is changed to a string and then used as a
+file name (@pxref{Expressions}).
+
+When this type of redirection is used, the @var{output-file} is erased
+before the first output is written to it. Subsequent writes
+to the same @var{output-file} do not
+erase @var{output-file}, but append to it. If @var{output-file} does
+not exist, then it is created.
+
+For example, here is how an @code{awk} program can write a list of
+BBS names to a file @file{name-list} and a list of phone numbers to a
+file @file{phone-list}. Each output file contains one name or number
+per line.
+
+@example
+@group
+$ awk '@{ print $2 > "phone-list"
+> print $1 > "name-list" @}' BBS-list
+@end group
+@group
+$ cat phone-list
+@print{} 555-5553
+@print{} 555-3412
+@dots{}
+@end group
+@group
+$ cat name-list
+@print{} aardvark
+@print{} alpo-net
+@dots{}
+@end group
+@end example
+
+@item print @var{items} >> @var{output-file}
+This type of redirection prints the items into the pre-existing output file
+@var{output-file}. The difference between this and the
+single-@samp{>} redirection is that the old contents (if any) of
+@var{output-file} are not erased. Instead, the @code{awk} output is
+appended to the file.
+If @var{output-file} does not exist, then it is created.
+
+@cindex pipes for output
+@cindex output, piping
+@item print @var{items} | @var{command}
+It is also possible to send output to another program through a pipe
+instead of into a
+file. This type of redirection opens a pipe to @var{command} and writes
+the values of @var{items} through this pipe, to another process created
+to execute @var{command}.
+
+The redirection argument @var{command} is actually an @code{awk}
+expression. Its value is converted to a string, whose contents give the
+shell command to be run.
+
+For example, this produces two files, one unsorted list of BBS names
+and one list sorted in reverse alphabetical order:
+
+@example
+awk '@{ print $1 > "names.unsorted"
+ command = "sort -r > names.sorted"
+ print $1 | command @}' BBS-list
+@end example
+
+Here the unsorted list is written with an ordinary redirection while
+the sorted list is written by piping through the @code{sort} utility.
+
+This example uses redirection to mail a message to a mailing
+list @samp{bug-system}. This might be useful when trouble is encountered
+in an @code{awk} script run periodically for system maintenance.
+
+@example
+report = "mail bug-system"
+print "Awk script failed:", $0 | report
+m = ("at record number " FNR " of " FILENAME)
+print m | report
+close(report)
+@end example
+
+The message is built using string concatenation and saved in the variable
+@code{m}. It is then sent down the pipeline to the @code{mail} program.
+
+We call the @code{close} function here because it's a good idea to close
+the pipe as soon as all the intended output has been sent to it.
+@xref{Close Files And Pipes, ,Closing Input and Output Files and Pipes},
+for more information
+on this. This example also illustrates the use of a variable to represent
+a @var{file} or @var{command}: it is not necessary to always
+use a string constant. Using a variable is generally a good idea,
+since @code{awk} requires you to spell the string value identically
+every time.
+@end table
+
+Redirecting output using @samp{>}, @samp{>>}, or @samp{|} asks the system
+to open a file or pipe only if the particular @var{file} or @var{command}
+you've specified has not already been written to by your program, or if
+it has been closed since it was last written to.
+
+@cindex differences between @code{gawk} and @code{awk}
+@cindex limitations
+@cindex implementation limits
+Many @code{awk} implementations limit the number of pipelines an @code{awk}
+program may have open to just one! In @code{gawk}, there is no such limit.
+You can open as many pipelines as the underlying operating system will
+permit.
+
+@node Special Files, Close Files And Pipes , Redirection, Printing
+@section Special File Names in @code{gawk}
+@cindex standard input
+@cindex standard output
+@cindex standard error output
+@cindex file descriptors
+
+Running programs conventionally have three input and output streams
+already available to them for reading and writing. These are known as
+the @dfn{standard input}, @dfn{standard output}, and @dfn{standard error
+output}. These streams are, by default, connected to your terminal, but
+they are often redirected with the shell, via the @samp{<}, @samp{<<},
+@samp{>}, @samp{>>}, @samp{>&} and @samp{|} operators. Standard error
+is typically used for writing error messages; the reason we have two separate
+streams, standard output and standard error, is so that they can be
+redirected separately.
+
+@cindex differences between @code{gawk} and @code{awk}
+In other implementations of @code{awk}, the only way to write an error
+message to standard error in an @code{awk} program is as follows:
+
+@example
+print "Serious error detected!" | "cat 1>&2"
+@end example
+
+@noindent
+This works by opening a pipeline to a shell command which can access the
+standard error stream which it inherits from the @code{awk} process.
+This is far from elegant, and is also inefficient, since it requires a
+separate process. So people writing @code{awk} programs often
+neglect to do this. Instead, they send the error messages to the
+terminal, like this:
+
+@example
+@group
+print "Serious error detected!" > "/dev/tty"
+@end group
+@end example
+
+@noindent
+This usually has the same effect, but not always: although the
+standard error stream is usually the terminal, it can be redirected, and
+when that happens, writing to the terminal is not correct. In fact, if
+@code{awk} is run from a background job, it may not have a terminal at all.
+Then opening @file{/dev/tty} will fail.
+
+@code{gawk} provides special file names for accessing the three standard
+streams. When you redirect input or output in @code{gawk}, if the file name
+matches one of these special names, then @code{gawk} directly uses the
+stream it stands for.
+
+@cindex @file{/dev/stdin}
+@cindex @file{/dev/stdout}
+@cindex @file{/dev/stderr}
+@cindex @file{/dev/fd}
+@c @cartouche
+@table @file
+@item /dev/stdin
+The standard input (file descriptor 0).
+
+@item /dev/stdout
+The standard output (file descriptor 1).
+
+@item /dev/stderr
+The standard error output (file descriptor 2).
+
+@item /dev/fd/@var{N}
+The file associated with file descriptor @var{N}. Such a file must have
+been opened by the program initiating the @code{awk} execution (typically
+the shell). Unless you take special pains in the shell from which
+you invoke @code{gawk}, only descriptors 0, 1 and 2 are available.
+@end table
+@c @end cartouche
+
+The file names @file{/dev/stdin}, @file{/dev/stdout}, and @file{/dev/stderr}
+are aliases for @file{/dev/fd/0}, @file{/dev/fd/1}, and @file{/dev/fd/2},
+respectively, but they are more self-explanatory.
+
+The proper way to write an error message in a @code{gawk} program
+is to use @file{/dev/stderr}, like this:
+
+@example
+print "Serious error detected!" > "/dev/stderr"
+@end example
+
+@code{gawk} also provides special file names that give access to information
+about the running @code{gawk} process. Each of these ``files'' provides
+a single record of information. To read them more than once, you must
+first close them with the @code{close} function
+(@pxref{Close Files And Pipes, ,Closing Input and Output Files and Pipes}).
+The filenames are:
+
+@cindex process information
+@cindex @file{/dev/pid}
+@cindex @file{/dev/pgrpid}
+@cindex @file{/dev/ppid}
+@cindex @file{/dev/user}
+@c @cartouche
+@table @file
+@item /dev/pid
+Reading this file returns the process ID of the current process,
+in decimal, terminated with a newline.
+
+@item /dev/ppid
+Reading this file returns the parent process ID of the current process,
+in decimal, terminated with a newline.
+
+@item /dev/pgrpid
+Reading this file returns the process group ID of the current process,
+in decimal, terminated with a newline.
+
+@item /dev/user
+Reading this file returns a single record terminated with a newline.
+The fields are separated with spaces. The fields represent the
+following information:
+
+@table @code
+@item $1
+The return value of the @code{getuid} system call
+(the real user ID number).
+
+@item $2
+The return value of the @code{geteuid} system call
+(the effective user ID number).
+
+@item $3
+The return value of the @code{getgid} system call
+(the real group ID number).
+
+@item $4
+The return value of the @code{getegid} system call
+(the effective group ID number).
+@end table
+
+If there are any additional fields, they are the group IDs returned by
+@code{getgroups} system call.
+(Multiple groups may not be supported on all systems.)
+@end table
+@c @end cartouche
+
+These special file names may be used on the command line as data
+files, as well as for I/O redirections within an @code{awk} program.
+They may not be used as source files with the @samp{-f} option.
+
+Recognition of these special file names is disabled if @code{gawk} is in
+compatibility mode (@pxref{Options, ,Command Line Options}).
+
+@strong{Caution}: Unless your system actually has a @file{/dev/fd} directory
+(or any of the other above listed special files),
+the interpretation of these file names is done by @code{gawk} itself.
+For example, using @samp{/dev/fd/4} for output will actually write on
+file descriptor 4, and not on a new file descriptor that was @code{dup}'ed
+from file descriptor 4. Most of the time this does not matter; however, it
+is important to @emph{not} close any of the files related to file descriptors
+0, 1, and 2. If you do close one of these files, unpredictable behavior
+will result.
+
+The special files that provide process-related information may disappear
+in a future version of @code{gawk}.
+@xref{Future Extensions, ,Probable Future Extensions}.
+
+@node Close Files And Pipes, , Special Files, Printing
+@section Closing Input and Output Files and Pipes
+@cindex closing input files and pipes
+@cindex closing output files and pipes
+@findex close
+
+If the same file name or the same shell command is used with
+@code{getline}
+(@pxref{Getline, ,Explicit Input with @code{getline}})
+more than once during the execution of an @code{awk}
+program, the file is opened (or the command is executed) only the first time.
+At that time, the first record of input is read from that file or command.
+The next time the same file or command is used in @code{getline}, another
+record is read from it, and so on.
+
+Similarly, when a file or pipe is opened for output, the file name or command
+associated with
+it is remembered by @code{awk} and subsequent writes to the same file or
+command are appended to the previous writes. The file or pipe stays
+open until @code{awk} exits.
+
+This implies that if you want to start reading the same file again from
+the beginning, or if you want to rerun a shell command (rather than
+reading more output from the command), you must take special steps.
+What you must do is use the @code{close} function, as follows:
+
+@example
+close(@var{filename})
+@end example
+
+@noindent
+or
+
+@example
+close(@var{command})
+@end example
+
+The argument @var{filename} or @var{command} can be any expression. Its
+value must @emph{exactly} match the string that was used to open the file or
+start the command (spaces and other ``irrelevant'' characters
+included). For example, if you open a pipe with this:
+
+@example
+"sort -r names" | getline foo
+@end example
+
+@noindent
+then you must close it with this:
+
+@example
+close("sort -r names")
+@end example
+
+Once this function call is executed, the next @code{getline} from that
+file or command, or the next @code{print} or @code{printf} to that
+file or command, will reopen the file or rerun the command.
+
+Because the expression that you use to close a file or pipeline must
+exactly match the expression used to open the file or run the command,
+it is good practice to use a variable to store the file name or command.
+The previous example would become
+
+@example
+sortcom = "sort -r names"
+sortcom | getline foo
+@dots{}
+close(sortcom)
+@end example
+
+@noindent
+This helps avoid hard-to-find typographical errors in your @code{awk}
+programs.
+
+Here are some reasons why you might need to close an output file:
+
+@itemize @bullet
+@item
+To write a file and read it back later on in the same @code{awk}
+program. Close the file when you are finished writing it; then
+you can start reading it with @code{getline}.
+
+@item
+To write numerous files, successively, in the same @code{awk}
+program. If you don't close the files, eventually you may exceed a
+system limit on the number of open files in one process. So close
+each one when you are finished writing it.
+
+@item
+To make a command finish. When you redirect output through a pipe,
+the command reading the pipe normally continues to try to read input
+as long as the pipe is open. Often this means the command cannot
+really do its work until the pipe is closed. For example, if you
+redirect output to the @code{mail} program, the message is not
+actually sent until the pipe is closed.
+
+@item
+To run the same program a second time, with the same arguments.
+This is not the same thing as giving more input to the first run!
+
+For example, suppose you pipe output to the @code{mail} program. If you
+output several lines redirected to this pipe without closing it, they make
+a single message of several lines. By contrast, if you close the pipe
+after each line of output, then each line makes a separate message.
+@end itemize
+
+@vindex ERRNO
+@cindex differences between @code{gawk} and @code{awk}
+@code{close} returns a value of zero if the close succeeded.
+Otherwise, the value will be non-zero.
+In this case, @code{gawk} sets the variable @code{ERRNO} to a string
+describing the error that occurred.
+
+@cindex differences between @code{gawk} and @code{awk}
+@cindex portability issues
+If you use more files than the system allows you to have open,
+@code{gawk} will attempt to multiplex the available open files among
+your data files. @code{gawk}'s ability to do this depends upon the
+facilities of your operating system: it may not always work. It is
+therefore both good practice and good portability advice to always
+use @code{close} on your files when you are done with them.
+
+@node Expressions, Patterns and Actions, Printing, Top
+@chapter Expressions
+@cindex expression
+
+Expressions are the basic building blocks of @code{awk} patterns
+and actions. An expression evaluates to a value, which you can print, test,
+store in a variable or pass to a function. Additionally, an expression
+can assign a new value to a variable or a field, with an assignment operator.
+
+An expression can serve as a pattern or action statement on its own.
+Most other kinds of
+statements contain one or more expressions which specify data on which to
+operate. As in other languages, expressions in @code{awk} include
+variables, array references, constants, and function calls, as well as
+combinations of these with various operators.
+
+@menu
+* Constants:: String, numeric, and regexp constants.
+* Using Constant Regexps:: When and how to use a regexp constant.
+* Variables:: Variables give names to values for later use.
+* Conversion:: The conversion of strings to numbers and vice
+ versa.
+* Arithmetic Ops:: Arithmetic operations (@samp{+}, @samp{-},
+ etc.)
+* Concatenation:: Concatenating strings.
+* Assignment Ops:: Changing the value of a variable or a field.
+* Increment Ops:: Incrementing the numeric value of a variable.
+* Truth Values:: What is ``true'' and what is ``false''.
+* Typing and Comparison:: How variables acquire types, and how this
+ affects comparison of numbers and strings with
+ @samp{<}, etc.
+* Boolean Ops:: Combining comparison expressions using boolean
+ operators @samp{||} (``or''), @samp{&&}
+ (``and'') and @samp{!} (``not'').
+* Conditional Exp:: Conditional expressions select between two
+ subexpressions under control of a third
+ subexpression.
+* Function Calls:: A function call is an expression.
+* Precedence:: How various operators nest.
+@end menu
+
+@node Constants, Using Constant Regexps, Expressions, Expressions
+@section Constant Expressions
+@cindex constants, types of
+@cindex string constants
+
+The simplest type of expression is the @dfn{constant}, which always has
+the same value. There are three types of constants: numeric constants,
+string constants, and regular expression constants.
+
+@menu
+* Scalar Constants:: Numeric and string constants.
+* Regexp Constants:: Regular Expression constants.
+@end menu
+
+@node Scalar Constants, Regexp Constants, Constants, Constants
+@subsection Numeric and String Constants
+
+@cindex numeric constant
+@cindex numeric value
+A @dfn{numeric constant} stands for a number. This number can be an
+integer, a decimal fraction, or a number in scientific (exponential)
+notation.@footnote{The internal representation uses double-precision
+floating point numbers. If you don't know what that means, then don't
+worry about it.} Here are some examples of numeric constants, which all
+have the same value:
+
+@example
+105
+1.05e+2
+1050e-1
+@end example
+
+A string constant consists of a sequence of characters enclosed in
+double-quote marks. For example:
+
+@example
+"parrot"
+@end example
+
+@noindent
+@cindex differences between @code{gawk} and @code{awk}
+represents the string whose contents are @samp{parrot}. Strings in
+@code{gawk} can be of any length and they can contain any of the possible
+eight-bit ASCII characters including ASCII NUL (character code zero).
+Other @code{awk}
+implementations may have difficulty with some character codes.
+
+@node Regexp Constants, , Scalar Constants, Constants
+@subsection Regular Expression Constants
+
+@cindex @code{~} operator
+@cindex @code{!~} operator
+A regexp constant is a regular expression description enclosed in
+slashes, such as @code{@w{/^beginning and end$/}}. Most regexps used in
+@code{awk} programs are constant, but the @samp{~} and @samp{!~}
+matching operators can also match computed or ``dynamic'' regexps
+(which are just ordinary strings or variables that contain a regexp).
+
+@node Using Constant Regexps, Variables, Constants, Expressions
+@section Using Regular Expression Constants
+
+When used on the right hand side of the @samp{~} or @samp{!~}
+operators, a regexp constant merely stands for the regexp that is to be
+matched.
+
+@cindex dark corner
+Regexp constants (such as @code{/foo/}) may be used like simple expressions.
+When a
+regexp constant appears by itself, it has the same meaning as if it appeared
+in a pattern, i.e.@: @samp{($0 ~ /foo/)} (d.c.)
+(@pxref{Expression Patterns, ,Expressions as Patterns}).
+This means that the two code segments,
+
+@example
+if ($0 ~ /barfly/ || $0 ~ /camelot/)
+ print "found"
+@end example
+
+@noindent
+and
+
+@example
+if (/barfly/ || /camelot/)
+ print "found"
+@end example
+
+@noindent
+are exactly equivalent.
+
+One rather bizarre consequence of this rule is that the following
+boolean expression is valid, but does not do what the user probably
+intended:
+
+@example
+# note that /foo/ is on the left of the ~
+if (/foo/ ~ $1) print "found foo"
+@end example
+
+@noindent
+This code is ``obviously'' testing @code{$1} for a match against the regexp
+@code{/foo/}. But in fact, the expression @samp{/foo/ ~ $1} actually means
+@samp{($0 ~ /foo/) ~ $1}. In other words, first match the input record
+against the regexp @code{/foo/}. The result will be either zero or one,
+depending upon the success or failure of the match. Then match that result
+against the first field in the record.
+
+Since it is unlikely that you would ever really wish to make this kind of
+test, @code{gawk} will issue a warning when it sees this construct in
+a program.
+
+Another consequence of this rule is that the assignment statement
+
+@example
+matches = /foo/
+@end example
+
+@noindent
+will assign either zero or one to the variable @code{matches}, depending
+upon the contents of the current input record.
+
+This feature of the language was never well documented until the
+POSIX specification.
+
+@cindex differences between @code{gawk} and @code{awk}
+@cindex dark corner
+Constant regular expressions are also used as the first argument for
+the @code{gensub}, @code{sub} and @code{gsub} functions, and as the
+second argument of the @code{match} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+Modern implementations of @code{awk}, including @code{gawk}, allow
+the third argument of @code{split} to be a regexp constant, while some
+older implementations do not (d.c.).
+
+This can lead to confusion when attempting to use regexp constants
+as arguments to user defined functions
+(@pxref{User-defined, , User-defined Functions}).
+For example:
+
+@example
+function mysub(pat, repl, str, global)
+@{
+ if (global)
+ gsub(pat, repl, str)
+ else
+ sub(pat, repl, str)
+ return str
+@}
+
+@{
+ @dots{}
+ text = "hi! hi yourself!"
+ mysub(/hi/, "howdy", text, 1)
+ @dots{}
+@}
+@end example
+
+In this example, the programmer wishes to pass a regexp constant to the
+user-defined function @code{mysub}, which will in turn pass it on to
+either @code{sub} or @code{gsub}. However, what really happens is that
+the @code{pat} parameter will be either one or zero, depending upon whether
+or not @code{$0} matches @code{/hi/}.
+
+As it is unlikely that you would ever really wish to pass a truth value
+in this way, @code{gawk} will issue a warning when it sees a regexp
+constant used as a parameter to a user-defined function.
+
+@node Variables, Conversion, Using Constant Regexps, Expressions
+@section Variables
+
+Variables are ways of storing values at one point in your program for
+use later in another part of your program. You can manipulate them
+entirely within your program text, and you can also assign values to
+them on the @code{awk} command line.
+
+@menu
+* Using Variables:: Using variables in your programs.
+* Assignment Options:: Setting variables on the command line and a
+ summary of command line syntax. This is an
+ advanced method of input.
+@end menu
+
+@node Using Variables, Assignment Options, Variables, Variables
+@subsection Using Variables in a Program
+
+@cindex variables, user-defined
+@cindex user-defined variables
+Variables let you give names to values and refer to them later. You have
+already seen variables in many of the examples. The name of a variable
+must be a sequence of letters, digits and underscores, but it may not begin
+with a digit. Case is significant in variable names; @code{a} and @code{A}
+are distinct variables.
+
+A variable name is a valid expression by itself; it represents the
+variable's current value. Variables are given new values with
+@dfn{assignment operators}, @dfn{increment operators} and
+@dfn{decrement operators}.
+@xref{Assignment Ops, ,Assignment Expressions}.
+
+A few variables have special built-in meanings, such as @code{FS}, the
+field separator, and @code{NF}, the number of fields in the current
+input record. @xref{Built-in Variables}, for a list of them. These
+built-in variables can be used and assigned just like all other
+variables, but their values are also used or changed automatically by
+@code{awk}. All built-in variables names are entirely upper-case.
+
+Variables in @code{awk} can be assigned either numeric or string
+values. By default, variables are initialized to the empty string, which
+is zero if converted to a number. There is no need to
+``initialize'' each variable explicitly in @code{awk},
+the way you would in C and in most other traditional languages.
+
+@node Assignment Options, , Using Variables, Variables
+@subsection Assigning Variables on the Command Line
+
+You can set any @code{awk} variable by including a @dfn{variable assignment}
+among the arguments on the command line when you invoke @code{awk}
+(@pxref{Other Arguments, ,Other Command Line Arguments}). Such an assignment has
+this form:
+
+@example
+@var{variable}=@var{text}
+@end example
+
+@noindent
+With it, you can set a variable either at the beginning of the
+@code{awk} run or in between input files.
+
+If you precede the assignment with the @samp{-v} option, like this:
+
+@example
+-v @var{variable}=@var{text}
+@end example
+
+@noindent
+then the variable is set at the very beginning, before even the
+@code{BEGIN} rules are run. The @samp{-v} option and its assignment
+must precede all the file name arguments, as well as the program text.
+(@xref{Options, ,Command Line Options}, for more information about
+the @samp{-v} option.)
+
+Otherwise, the variable assignment is performed at a time determined by
+its position among the input file arguments: after the processing of the
+preceding input file argument. For example:
+
+@example
+awk '@{ print $n @}' n=4 inventory-shipped n=2 BBS-list
+@end example
+
+@noindent
+prints the value of field number @code{n} for all input records. Before
+the first file is read, the command line sets the variable @code{n}
+equal to four. This causes the fourth field to be printed in lines from
+the file @file{inventory-shipped}. After the first file has finished,
+but before the second file is started, @code{n} is set to two, so that the
+second field is printed in lines from @file{BBS-list}.
+
+@example
+@group
+$ awk '@{ print $n @}' n=4 inventory-shipped n=2 BBS-list
+@print{} 15
+@print{} 24
+@dots{}
+@print{} 555-5553
+@print{} 555-3412
+@dots{}
+@end group
+@end example
+
+Command line arguments are made available for explicit examination by
+the @code{awk} program in an array named @code{ARGV}
+(@pxref{ARGC and ARGV, ,Using @code{ARGC} and @code{ARGV}}).
+
+@cindex dark corner
+@code{awk} processes the values of command line assignments for escape
+sequences (d.c.) (@pxref{Escape Sequences}).
+
+@node Conversion, Arithmetic Ops, Variables, Expressions
+@section Conversion of Strings and Numbers
+
+@cindex conversion of strings and numbers
+Strings are converted to numbers, and numbers to strings, if the context
+of the @code{awk} program demands it. For example, if the value of
+either @code{foo} or @code{bar} in the expression @samp{foo + bar}
+happens to be a string, it is converted to a number before the addition
+is performed. If numeric values appear in string concatenation, they
+are converted to strings. Consider this:
+
+@example
+two = 2; three = 3
+print (two three) + 4
+@end example
+
+@noindent
+This prints the (numeric) value 27. The numeric values of
+the variables @code{two} and @code{three} are converted to strings and
+concatenated together, and the resulting string is converted back to the
+number 23, to which four is then added.
+
+@cindex null string
+@cindex empty string
+@cindex type conversion
+If, for some reason, you need to force a number to be converted to a
+string, concatenate the empty string, @code{""}, with that number.
+To force a string to be converted to a number, add zero to that string.
+
+A string is converted to a number by interpreting any numeric prefix
+of the string as numerals:
+@code{"2.5"} converts to 2.5, @code{"1e3"} converts to 1000, and @code{"25fix"}
+has a numeric value of 25.
+Strings that can't be interpreted as valid numbers are converted to
+zero.
+
+@vindex CONVFMT
+The exact manner in which numbers are converted into strings is controlled
+by the @code{awk} built-in variable @code{CONVFMT} (@pxref{Built-in Variables}).
+Numbers are converted using the @code{sprintf} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation})
+with @code{CONVFMT} as the format
+specifier.
+
+@code{CONVFMT}'s default value is @code{"%.6g"}, which prints a value with
+at least six significant digits. For some applications you will want to
+change it to specify more precision. Double precision on most modern
+machines gives you 16 or 17 decimal digits of precision.
+
+Strange results can happen if you set @code{CONVFMT} to a string that doesn't
+tell @code{sprintf} how to format floating point numbers in a useful way.
+For example, if you forget the @samp{%} in the format, all numbers will be
+converted to the same constant string.
+
+@cindex dark corner
+As a special case, if a number is an integer, then the result of converting
+it to a string is @emph{always} an integer, no matter what the value of
+@code{CONVFMT} may be. Given the following code fragment:
+
+@example
+CONVFMT = "%2.2f"
+a = 12
+b = a ""
+@end example
+
+@noindent
+@code{b} has the value @code{"12"}, not @code{"12.00"} (d.c.).
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@vindex OFMT
+Prior to the POSIX standard, @code{awk} specified that the value
+of @code{OFMT} was used for converting numbers to strings. @code{OFMT}
+specifies the output format to use when printing numbers with @code{print}.
+@code{CONVFMT} was introduced in order to separate the semantics of
+conversion from the semantics of printing. Both @code{CONVFMT} and
+@code{OFMT} have the same default value: @code{"%.6g"}. In the vast majority
+of cases, old @code{awk} programs will not change their behavior.
+However, this use of @code{OFMT} is something to keep in mind if you must
+port your program to other implementations of @code{awk}; we recommend
+that instead of changing your programs, you just port @code{gawk} itself!
+@xref{Print, ,The @code{print} Statement},
+for more information on the @code{print} statement.
+
+@node Arithmetic Ops, Concatenation, Conversion, Expressions
+@section Arithmetic Operators
+@cindex arithmetic operators
+@cindex operators, arithmetic
+@cindex addition
+@cindex subtraction
+@cindex multiplication
+@cindex division
+@cindex remainder
+@cindex quotient
+@cindex exponentiation
+
+The @code{awk} language uses the common arithmetic operators when
+evaluating expressions. All of these arithmetic operators follow normal
+precedence rules, and work as you would expect them to.
+
+Here is a file @file{grades} containing a list of student names and
+three test scores per student (it's a small class):
+
+@example
+Pat 100 97 58
+Sandy 84 72 93
+Chris 72 92 89
+@end example
+
+@noindent
+This programs takes the file @file{grades}, and prints the average
+of the scores.
+
+@example
+$ awk '@{ sum = $2 + $3 + $4 ; avg = sum / 3
+> print $1, avg @}' grades
+@print{} Pat 85
+@print{} Sandy 83
+@print{} Chris 84.3333
+@end example
+
+This table lists the arithmetic operators in @code{awk}, in order from
+highest precedence to lowest:
+
+@c sigh. this seems necessary
+@iftex
+@page
+@end iftex
+@c @cartouche
+@table @code
+@item - @var{x}
+Negation.
+
+@item + @var{x}
+Unary plus. The expression is converted to a number.
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@item @var{x} ^ @var{y}
+@itemx @var{x} ** @var{y}
+Exponentiation: @var{x} raised to the @var{y} power. @samp{2 ^ 3} has
+the value eight. The character sequence @samp{**} is equivalent to
+@samp{^}. (The POSIX standard only specifies the use of @samp{^}
+for exponentiation.)
+
+@item @var{x} * @var{y}
+Multiplication.
+
+@item @var{x} / @var{y}
+Division. Since all numbers in @code{awk} are
+real numbers, the result is not rounded to an integer: @samp{3 / 4}
+has the value 0.75.
+
+@item @var{x} % @var{y}
+@cindex differences between @code{gawk} and @code{awk}
+Remainder. The quotient is rounded toward zero to an integer,
+multiplied by @var{y} and this result is subtracted from @var{x}.
+This operation is sometimes known as ``trunc-mod.'' The following
+relation always holds:
+
+@example
+b * int(a / b) + (a % b) == a
+@end example
+
+One possibly undesirable effect of this definition of remainder is that
+@code{@var{x} % @var{y}} is negative if @var{x} is negative. Thus,
+
+@example
+-17 % 8 = -1
+@end example
+
+In other @code{awk} implementations, the signedness of the remainder
+may be machine dependent.
+@c !!! what does posix say?
+
+@item @var{x} + @var{y}
+Addition.
+
+@item @var{x} - @var{y}
+Subtraction.
+@end table
+@c @end cartouche
+
+For maximum portability, do not use the @samp{**} operator.
+
+Unary plus and minus have the same precedence,
+the multiplication operators all have the same precedence, and
+addition and subtraction have the same precedence.
+
+@node Concatenation, Assignment Ops, Arithmetic Ops, Expressions
+@section String Concatenation
+
+@cindex string operators
+@cindex operators, string
+@cindex concatenation
+There is only one string operation: concatenation. It does not have a
+specific operator to represent it. Instead, concatenation is performed by
+writing expressions next to one another, with no operator. For example:
+
+@example
+@group
+$ awk '@{ print "Field number one: " $1 @}' BBS-list
+@print{} Field number one: aardvark
+@print{} Field number one: alpo-net
+@dots{}
+@end group
+@end example
+
+Without the space in the string constant after the @samp{:}, the line
+would run together. For example:
+
+@example
+@group
+$ awk '@{ print "Field number one:" $1 @}' BBS-list
+@print{} Field number one:aardvark
+@print{} Field number one:alpo-net
+@dots{}
+@end group
+@end example
+
+Since string concatenation does not have an explicit operator, it is
+often necessary to insure that it happens where you want it to by
+using parentheses to enclose
+the items to be concatenated. For example, the
+following code fragment does not concatenate @code{file} and @code{name}
+as you might expect:
+
+@example
+file = "file"
+name = "name"
+print "something meaningful" > file name
+@end example
+
+@noindent
+It is necessary to use the following:
+
+@example
+print "something meaningful" > (file name)
+@end example
+
+We recommend that you use parentheses around concatenation in all but the
+most common contexts (such as on the right-hand side of @samp{=}).
+
+@node Assignment Ops, Increment Ops, Concatenation, Expressions
+@section Assignment Expressions
+@cindex assignment operators
+@cindex operators, assignment
+@cindex expression, assignment
+
+An @dfn{assignment} is an expression that stores a new value into a
+variable. For example, let's assign the value one to the variable
+@code{z}:
+
+@example
+z = 1
+@end example
+
+After this expression is executed, the variable @code{z} has the value one.
+Whatever old value @code{z} had before the assignment is forgotten.
+
+Assignments can store string values also. For example, this would store
+the value @code{"this food is good"} in the variable @code{message}:
+
+@example
+thing = "food"
+predicate = "good"
+message = "this " thing " is " predicate
+@end example
+
+@noindent
+(This also illustrates string concatenation.)
+
+The @samp{=} sign is called an @dfn{assignment operator}. It is the
+simplest assignment operator because the value of the right-hand
+operand is stored unchanged.
+
+@cindex side effect
+Most operators (addition, concatenation, and so on) have no effect
+except to compute a value. If you ignore the value, you might as well
+not use the operator. An assignment operator is different; it does
+produce a value, but even if you ignore the value, the assignment still
+makes itself felt through the alteration of the variable. We call this
+a @dfn{side effect}.
+
+@cindex lvalue
+@cindex rvalue
+The left-hand operand of an assignment need not be a variable
+(@pxref{Variables}); it can also be a field
+(@pxref{Changing Fields, ,Changing the Contents of a Field}) or
+an array element (@pxref{Arrays, ,Arrays in @code{awk}}).
+These are all called @dfn{lvalues},
+which means they can appear on the left-hand side of an assignment operator.
+The right-hand operand may be any expression; it produces the new value
+which the assignment stores in the specified variable, field or array
+element. (Such values are called @dfn{rvalues}).
+
+@cindex types of variables
+It is important to note that variables do @emph{not} have permanent types.
+The type of a variable is simply the type of whatever value it happens
+to hold at the moment. In the following program fragment, the variable
+@code{foo} has a numeric value at first, and a string value later on:
+
+@example
+foo = 1
+print foo
+foo = "bar"
+print foo
+@end example
+
+@noindent
+When the second assignment gives @code{foo} a string value, the fact that
+it previously had a numeric value is forgotten.
+
+String values that do not begin with a digit have a numeric value of
+zero. After executing this code, the value of @code{foo} is five:
+
+@example
+foo = "a string"
+foo = foo + 5
+@end example
+
+@noindent
+(Note that using a variable as a number and then later as a string can
+be confusing and is poor programming style. The above examples illustrate how
+@code{awk} works, @emph{not} how you should write your own programs!)
+
+An assignment is an expression, so it has a value: the same value that
+is assigned. Thus, @samp{z = 1} as an expression has the value one.
+One consequence of this is that you can write multiple assignments together:
+
+@example
+x = y = z = 0
+@end example
+
+@noindent
+stores the value zero in all three variables. It does this because the
+value of @samp{z = 0}, which is zero, is stored into @code{y}, and then
+the value of @samp{y = z = 0}, which is zero, is stored into @code{x}.
+
+You can use an assignment anywhere an expression is called for. For
+example, it is valid to write @samp{x != (y = 1)} to set @code{y} to one
+and then test whether @code{x} equals one. But this style tends to make
+programs hard to read; except in a one-shot program, you should
+not use such nesting of assignments.
+
+Aside from @samp{=}, there are several other assignment operators that
+do arithmetic with the old value of the variable. For example, the
+operator @samp{+=} computes a new value by adding the right-hand value
+to the old value of the variable. Thus, the following assignment adds
+five to the value of @code{foo}:
+
+@example
+foo += 5
+@end example
+
+@noindent
+This is equivalent to the following:
+
+@example
+foo = foo + 5
+@end example
+
+@noindent
+Use whichever one makes the meaning of your program clearer.
+
+There are situations where using @samp{+=} (or any assignment operator)
+is @emph{not} the same as simply repeating the left-hand operand in the
+right-hand expression. For example:
+
+@cindex Rankin, Pat
+@example
+@group
+# Thanks to Pat Rankin for this example
+BEGIN @{
+ foo[rand()] += 5
+ for (x in foo)
+ print x, foo[x]
+
+ bar[rand()] = bar[rand()] + 5
+ for (x in bar)
+ print x, bar[x]
+@}
+@end group
+@end example
+
+@noindent
+The indices of @code{bar} are guaranteed to be different, because
+@code{rand} will return different values each time it is called.
+(Arrays and the @code{rand} function haven't been covered yet.
+@xref{Arrays, ,Arrays in @code{awk}},
+and see @ref{Numeric Functions, ,Numeric Built-in Functions}, for more information).
+This example illustrates an important fact about the assignment
+operators: the left-hand expression is only evaluated @emph{once}.
+
+It is also up to the implementation as to which expression is evaluated
+first, the left-hand one or the right-hand one.
+Consider this example:
+
+@example
+i = 1
+a[i += 2] = i + 1
+@end example
+
+@noindent
+The value of @code{a[3]} could be either two or four.
+
+Here is a table of the arithmetic assignment operators. In each
+case, the right-hand operand is an expression whose value is converted
+to a number.
+
+@c @cartouche
+@table @code
+@item @var{lvalue} += @var{increment}
+Adds @var{increment} to the value of @var{lvalue} to make the new value
+of @var{lvalue}.
+
+@item @var{lvalue} -= @var{decrement}
+Subtracts @var{decrement} from the value of @var{lvalue}.
+
+@item @var{lvalue} *= @var{coefficient}
+Multiplies the value of @var{lvalue} by @var{coefficient}.
+
+@item @var{lvalue} /= @var{divisor}
+Divides the value of @var{lvalue} by @var{divisor}.
+
+@item @var{lvalue} %= @var{modulus}
+Sets @var{lvalue} to its remainder by @var{modulus}.
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@item @var{lvalue} ^= @var{power}
+@itemx @var{lvalue} **= @var{power}
+Raises @var{lvalue} to the power @var{power}.
+(Only the @samp{^=} operator is specified by POSIX.)
+@end table
+@c @end cartouche
+
+For maximum portability, do not use the @samp{**=} operator.
+
+@node Increment Ops, Truth Values, Assignment Ops, Expressions
+@section Increment and Decrement Operators
+
+@cindex increment operators
+@cindex operators, increment
+@dfn{Increment} and @dfn{decrement operators} increase or decrease the value of
+a variable by one. You could do the same thing with an assignment operator, so
+the increment operators add no power to the @code{awk} language; but they
+are convenient abbreviations for very common operations.
+
+The operator to add one is written @samp{++}. It can be used to increment
+a variable either before or after taking its value.
+
+To pre-increment a variable @var{v}, write @samp{++@var{v}}. This adds
+one to the value of @var{v} and that new value is also the value of this
+expression. The assignment expression @samp{@var{v} += 1} is completely
+equivalent.
+
+Writing the @samp{++} after the variable specifies post-increment. This
+increments the variable value just the same; the difference is that the
+value of the increment expression itself is the variable's @emph{old}
+value. Thus, if @code{foo} has the value four, then the expression @samp{foo++}
+has the value four, but it changes the value of @code{foo} to five.
+
+The post-increment @samp{foo++} is nearly equivalent to writing @samp{(foo
++= 1) - 1}. It is not perfectly equivalent because all numbers in
+@code{awk} are floating point: in floating point, @samp{foo + 1 - 1} does
+not necessarily equal @code{foo}. But the difference is minute as
+long as you stick to numbers that are fairly small (less than 10e12).
+
+Any lvalue can be incremented. Fields and array elements are incremented
+just like variables. (Use @samp{$(i++)} when you wish to do a field reference
+and a variable increment at the same time. The parentheses are necessary
+because of the precedence of the field reference operator, @samp{$}.)
+
+@cindex decrement operators
+@cindex operators, decrement
+The decrement operator @samp{--} works just like @samp{++} except that
+it subtracts one instead of adding. Like @samp{++}, it can be used before
+the lvalue to pre-decrement or after it to post-decrement.
+
+Here is a summary of increment and decrement expressions.
+
+@c @cartouche
+@table @code
+@item ++@var{lvalue}
+This expression increments @var{lvalue} and the new value becomes the
+value of the expression.
+
+@item @var{lvalue}++
+This expression increments @var{lvalue}, but
+the value of the expression is the @emph{old} value of @var{lvalue}.
+
+@item --@var{lvalue}
+Like @samp{++@var{lvalue}}, but instead of adding, it subtracts. It
+decrements @var{lvalue} and delivers the value that results.
+
+@item @var{lvalue}--
+Like @samp{@var{lvalue}++}, but instead of adding, it subtracts. It
+decrements @var{lvalue}. The value of the expression is the @emph{old}
+value of @var{lvalue}.
+@end table
+@c @end cartouche
+
+@node Truth Values, Typing and Comparison, Increment Ops, Expressions
+@section True and False in @code{awk}
+@cindex truth values
+@cindex logical true
+@cindex logical false
+
+Many programming languages have a special representation for the concepts
+of ``true'' and ``false.'' Such languages usually use the special
+constants @code{true} and @code{false}, or perhaps their upper-case
+equivalents.
+
+@cindex null string
+@cindex empty string
+@code{awk} is different. It borrows a very simple concept of true and
+false from C. In @code{awk}, any non-zero numeric value, @emph{or} any
+non-empty string value is true. Any other value (zero or the null
+string, @code{""}) is false. The following program will print @samp{A strange
+truth value} three times:
+
+@example
+BEGIN @{
+ if (3.1415927)
+ print "A strange truth value"
+ if ("Four Score And Seven Years Ago")
+ print "A strange truth value"
+ if (j = 57)
+ print "A strange truth value"
+@}
+@end example
+
+@cindex dark corner
+There is a surprising consequence of the ``non-zero or non-null'' rule:
+The string constant @code{"0"} is actually true, since it is non-null (d.c.).
+
+@node Typing and Comparison, Boolean Ops, Truth Values, Expressions
+@section Variable Typing and Comparison Expressions
+@cindex comparison expressions
+@cindex expression, comparison
+@cindex expression, matching
+@cindex relational operators
+@cindex operators, relational
+@cindex regexp match/non-match operators
+@cindex variable typing
+@cindex types of variables
+
+@c 2e: consider splitting this section into subsections
+
+Unlike other programming languages, @code{awk} variables do not have a
+fixed type. Instead, they can be either a number or a string, depending
+upon the value that is assigned to them.
+
+@cindex numeric string
+The 1992 POSIX standard introduced
+the concept of a @dfn{numeric string}, which is simply a string that looks
+like a number, for example, @code{@w{" +2"}}. This concept is used
+for determining the type of a variable.
+
+The type of the variable is important, since the types of two variables
+determine how they are compared.
+
+In @code{gawk}, variable typing follows these rules.
+
+@enumerate 1
+@item
+A numeric literal or the result of a numeric operation has the @var{numeric}
+attribute.
+
+@item
+A string literal or the result of a string operation has the @var{string}
+attribute.
+
+@item
+Fields, @code{getline} input, @code{FILENAME}, @code{ARGV} elements,
+@code{ENVIRON} elements and the
+elements of an array created by @code{split} that are numeric strings
+have the @var{strnum} attribute. Otherwise, they have the @var{string}
+attribute.
+Uninitialized variables also have the @var{strnum} attribute.
+
+@item
+Attributes propagate across assignments, but are not changed by
+any use.
+@c (Although a use may cause the entity to acquire an additional
+@c value such that it has both a numeric and string value -- this leaves the
+@c attribute unchanged.)
+@c This is important but not relevant
+@end enumerate
+
+The last rule is particularly important. In the following program,
+@code{a} has numeric type, even though it is later used in a string
+operation.
+
+@example
+BEGIN @{
+ a = 12.345
+ b = a " is a cute number"
+ print b
+@}
+@end example
+
+When two operands are compared, either string comparison or numeric comparison
+may be used, depending on the attributes of the operands, according to the
+following, symmetric, matrix:
+
+@c thanks to Karl Berry, kb@cs.umb.edu, for major help with TeX tables
+@tex
+\centerline{
+\vbox{\bigskip % space above the table (about 1 linespace)
+% Because we have vertical rules, we can't let TeX insert interline space
+% in its usual way.
+\offinterlineskip
+%
+% Define the table template. & separates columns, and \cr ends the
+% template (and each row). # is replaced by the text of that entry on
+% each row. The template for the first column breaks down like this:
+% \strut -- a way to make each line have the height and depth
+% of a normal line of type, since we turned off interline spacing.
+% \hfil -- infinite glue; has the effect of right-justifying in this case.
+% # -- replaced by the text (for instance, `STRNUM', in the last row).
+% \quad -- about the width of an `M'. Just separates the columns.
+%
+% The second column (\vrule#) is what generates the vertical rule that
+% spans table rows.
+%
+% The doubled && before the next entry means `repeat the following
+% template as many times as necessary on each line' -- in our case, twice.
+%
+% The template itself, \quad#\hfil, left-justifies with a little space before.
+%
+\halign{\strut\hfil#\quad&\vrule#&&\quad#\hfil\cr
+ &&STRING &NUMERIC &STRNUM\cr
+% The \omit tells TeX to skip inserting the template for this column on
+% this particular row. In this case, we only want a little extra space
+% to separate the heading row from the rule below it. the depth 2pt --
+% `\vrule depth 2pt' is that little space.
+\omit &depth 2pt\cr
+% This is the horizontal rule below the heading. Since it has nothing to
+% do with the columns of the table, we use \noalign to get it in there.
+\noalign{\hrule}
+% Like above, this time a little more space.
+\omit &depth 4pt\cr
+% The remaining rows have nothing special about them.
+STRING &&string &string &string\cr
+NUMERIC &&string &numeric &numeric\cr
+STRNUM &&string &numeric &numeric\cr
+}}}
+@end tex
+@ifinfo
+@display
+ +----------------------------------------------
+ | STRING NUMERIC STRNUM
+--------+----------------------------------------------
+ |
+STRING | string string string
+ |
+NUMERIC | string numeric numeric
+ |
+STRNUM | string numeric numeric
+--------+----------------------------------------------
+@end display
+@end ifinfo
+
+The basic idea is that user input that looks numeric, and @emph{only}
+user input, should be treated as numeric, even though it is actually
+made of characters, and is therefore also a string.
+
+@dfn{Comparison expressions} compare strings or numbers for
+relationships such as equality. They are written using @dfn{relational
+operators}, which are a superset of those in C. Here is a table of
+them:
+
+@cindex relational operators
+@cindex operators, relational
+@cindex @code{<} operator
+@cindex @code{<=} operator
+@cindex @code{>} operator
+@cindex @code{>=} operator
+@cindex @code{==} operator
+@cindex @code{!=} operator
+@cindex @code{~} operator
+@cindex @code{!~} operator
+@cindex @code{in} operator
+@c @cartouche
+@table @code
+@item @var{x} < @var{y}
+True if @var{x} is less than @var{y}.
+
+@item @var{x} <= @var{y}
+True if @var{x} is less than or equal to @var{y}.
+
+@item @var{x} > @var{y}
+True if @var{x} is greater than @var{y}.
+
+@item @var{x} >= @var{y}
+True if @var{x} is greater than or equal to @var{y}.
+
+@item @var{x} == @var{y}
+True if @var{x} is equal to @var{y}.
+
+@item @var{x} != @var{y}
+True if @var{x} is not equal to @var{y}.
+
+@item @var{x} ~ @var{y}
+True if the string @var{x} matches the regexp denoted by @var{y}.
+
+@item @var{x} !~ @var{y}
+True if the string @var{x} does not match the regexp denoted by @var{y}.
+
+@item @var{subscript} in @var{array}
+True if the array @var{array} has an element with the subscript @var{subscript}.
+@end table
+@c @end cartouche
+
+Comparison expressions have the value one if true and zero if false.
+
+When comparing operands of mixed types, numeric operands are converted
+to strings using the value of @code{CONVFMT}
+(@pxref{Conversion, ,Conversion of Strings and Numbers}).
+
+Strings are compared
+by comparing the first character of each, then the second character of each,
+and so on. Thus @code{"10"} is less than @code{"9"}. If there are two
+strings where one is a prefix of the other, the shorter string is less than
+the longer one. Thus @code{"abc"} is less than @code{"abcd"}.
+
+@cindex common mistakes
+@cindex mistakes, common
+@cindex errors, common
+It is very easy to accidentally mistype the @samp{==} operator, and
+leave off one of the @samp{=}s. The result is still valid @code{awk}
+code, but the program will not do what you mean:
+
+@example
+if (a = b) # oops! should be a == b
+ @dots{}
+else
+ @dots{}
+@end example
+
+@noindent
+Unless @code{b} happens to be zero or the null string, the @code{if}
+part of the test will always succeed. Because the operators are
+so similar, this kind of error is very difficult to spot when
+scanning the source code.
+
+Here are some sample expressions, how @code{gawk} compares them, and what
+the result of the comparison is.
+
+@table @code
+@item 1.5 <= 2.0
+numeric comparison (true)
+
+@item "abc" >= "xyz"
+string comparison (false)
+
+@item 1.5 != " +2"
+string comparison (true)
+
+@item "1e2" < "3"
+string comparison (true)
+
+@item a = 2; b = "2"
+@itemx a == b
+string comparison (true)
+
+@item a = 2; b = " +2"
+@itemx a == b
+string comparison (false)
+@end table
+
+In this example,
+
+@example
+@group
+$ echo 1e2 3 | awk '@{ print ($1 < $2) ? "true" : "false" @}'
+@print{} false
+@end group
+@end example
+
+@noindent
+the result is @samp{false} since both @code{$1} and @code{$2} are numeric
+strings and thus both have the @var{strnum} attribute,
+dictating a numeric comparison.
+
+The purpose of the comparison rules and the use of numeric strings is
+to attempt to produce the behavior that is ``least surprising,'' while
+still ``doing the right thing.''
+
+@cindex comparisons, string vs. regexp
+@cindex string comparison vs. regexp comparison
+@cindex regexp comparison vs. string comparison
+String comparisons and regular expression comparisons are very different.
+For example,
+
+@example
+x == "foo"
+@end example
+
+@noindent
+has the value of one, or is true, if the variable @code{x}
+is precisely @samp{foo}. By contrast,
+
+@example
+x ~ /foo/
+@end example
+
+@noindent
+has the value one if @code{x} contains @samp{foo}, such as
+@code{"Oh, what a fool am I!"}.
+
+The right hand operand of the @samp{~} and @samp{!~} operators may be
+either a regexp constant (@code{/@dots{}/}), or an ordinary
+expression, in which case the value of the expression as a string is used as a
+dynamic regexp (@pxref{Regexp Usage, ,How to Use Regular Expressions}; also
+@pxref{Computed Regexps, ,Using Dynamic Regexps}).
+
+@cindex regexp as expression
+In recent implementations of @code{awk}, a constant regular
+expression in slashes by itself is also an expression. The regexp
+@code{/@var{regexp}/} is an abbreviation for this comparison expression:
+
+@example
+$0 ~ /@var{regexp}/
+@end example
+
+One special place where @code{/foo/} is @emph{not} an abbreviation for
+@samp{$0 ~ /foo/} is when it is the right-hand operand of @samp{~} or
+@samp{!~}!
+@xref{Using Constant Regexps, ,Using Regular Expression Constants},
+where this is discussed in more detail.
+
+@c This paragraph has been here since day 1, and has always bothered
+@c me, especially since the expression doesn't really make a lot of
+@c sense. So, just take it out.
+@ignore
+In some contexts it may be necessary to write parentheses around the
+regexp to avoid confusing the @code{gawk} parser. For example,
+@samp{(/x/ - /y/) > threshold} is not allowed, but @samp{((/x/) - (/y/))
+> threshold} parses properly.
+@end ignore
+
+@node Boolean Ops, Conditional Exp, Typing and Comparison, Expressions
+@section Boolean Expressions
+@cindex expression, boolean
+@cindex boolean expressions
+@cindex operators, boolean
+@cindex boolean operators
+@cindex logical operations
+@cindex operations, logical
+@cindex short-circuit operators
+@cindex operators, short-circuit
+@cindex and operator
+@cindex or operator
+@cindex not operator
+@cindex @code{&&} operator
+@cindex @code{||} operator
+@cindex @code{!} operator
+
+A @dfn{boolean expression} is a combination of comparison expressions or
+matching expressions, using the boolean operators ``or''
+(@samp{||}), ``and'' (@samp{&&}), and ``not'' (@samp{!}), along with
+parentheses to control nesting. The truth value of the boolean expression is
+computed by combining the truth values of the component expressions.
+Boolean expressions are also referred to as @dfn{logical expressions}.
+The terms are equivalent.
+
+Boolean expressions can be used wherever comparison and matching
+expressions can be used. They can be used in @code{if}, @code{while},
+@code{do} and @code{for} statements
+(@pxref{Statements, ,Control Statements in Actions}).
+They have numeric values (one if true, zero if false), which come into play
+if the result of the boolean expression is stored in a variable, or
+used in arithmetic.
+
+In addition, every boolean expression is also a valid pattern, so
+you can use one as a pattern to control the execution of rules.
+
+Here are descriptions of the three boolean operators, with examples.
+
+@c @cartouche
+@table @code
+@item @var{boolean1} && @var{boolean2}
+True if both @var{boolean1} and @var{boolean2} are true. For example,
+the following statement prints the current input record if it contains
+both @samp{2400} and @samp{foo}.
+
+@example
+if ($0 ~ /2400/ && $0 ~ /foo/) print
+@end example
+
+The subexpression @var{boolean2} is evaluated only if @var{boolean1}
+is true. This can make a difference when @var{boolean2} contains
+expressions that have side effects: in the case of @samp{$0 ~ /foo/ &&
+($2 == bar++)}, the variable @code{bar} is not incremented if there is
+no @samp{foo} in the record.
+
+@item @var{boolean1} || @var{boolean2}
+True if at least one of @var{boolean1} or @var{boolean2} is true.
+For example, the following statement prints all records in the input
+that contain @emph{either} @samp{2400} or
+@samp{foo}, or both.
+
+@example
+if ($0 ~ /2400/ || $0 ~ /foo/) print
+@end example
+
+The subexpression @var{boolean2} is evaluated only if @var{boolean1}
+is false. This can make a difference when @var{boolean2} contains
+expressions that have side effects.
+
+@item ! @var{boolean}
+True if @var{boolean} is false. For example, the following program prints
+all records in the input file @file{BBS-list} that do @emph{not} contain the
+string @samp{foo}.
+
+@c A better example would be `if (! (subscript in array)) ...' but we
+@c haven't done anything with arrays or `in' yet. Sigh.
+@example
+awk '@{ if (! ($0 ~ /foo/)) print @}' BBS-list
+@end example
+@end table
+@c @end cartouche
+
+The @samp{&&} and @samp{||} operators are called @dfn{short-circuit}
+operators because of the way they work. Evaluation of the full expression
+is ``short-circuited'' if the result can be determined part way through
+its evaluation.
+
+@cindex line continuation
+You can continue a statement that uses @samp{&&} or @samp{||} simply
+by putting a newline after them. But you cannot put a newline in front
+of either of these operators without using backslash continuation
+(@pxref{Statements/Lines, ,@code{awk} Statements Versus Lines}).
+
+The actual value of an expression using the @samp{!} operator will be
+either one or zero, depending upon the truth value of the expression it
+is applied to.
+
+The @samp{!} operator is often useful for changing the sense of a flag
+variable from false to true and back again. For example, the following
+program is one way to print lines in between special bracketing lines:
+
+@example
+$1 == "START" @{ interested = ! interested @}
+interested == 1 @{ print @}
+$1 == "END" @{ interested = ! interested @}
+@end example
+
+@noindent
+The variable @code{interested}, like all @code{awk} variables, starts
+out initialized to zero, which is also false. When a line is seen whose
+first field is @samp{START}, the value of @code{interested} is toggled
+to true, using @samp{!}. The next rule prints lines as long as
+@code{interested} is true. When a line is seen whose first field is
+@samp{END}, @code{interested} is toggled back to false.
+@ignore
+We should discuss using `next' in the two rules that toggle the
+variable, to avoid printing the bracketing lines, but that's more
+distraction than really needed.
+@end ignore
+
+@node Conditional Exp, Function Calls, Boolean Ops, Expressions
+@section Conditional Expressions
+@cindex conditional expression
+@cindex expression, conditional
+
+A @dfn{conditional expression} is a special kind of expression with
+three operands. It allows you to use one expression's value to select
+one of two other expressions.
+
+The conditional expression is the same as in the C language:
+
+@example
+@var{selector} ? @var{if-true-exp} : @var{if-false-exp}
+@end example
+
+@noindent
+There are three subexpressions. The first, @var{selector}, is always
+computed first. If it is ``true'' (not zero and not null) then
+@var{if-true-exp} is computed next and its value becomes the value of
+the whole expression. Otherwise, @var{if-false-exp} is computed next
+and its value becomes the value of the whole expression.
+
+For example, this expression produces the absolute value of @code{x}:
+
+@example
+x > 0 ? x : -x
+@end example
+
+Each time the conditional expression is computed, exactly one of
+@var{if-true-exp} and @var{if-false-exp} is computed; the other is ignored.
+This is important when the expressions contain side effects. For example,
+this conditional expression examines element @code{i} of either array
+@code{a} or array @code{b}, and increments @code{i}.
+
+@example
+x == y ? a[i++] : b[i++]
+@end example
+
+@noindent
+This is guaranteed to increment @code{i} exactly once, because each time
+only one of the two increment expressions is executed,
+and the other is not.
+@xref{Arrays, ,Arrays in @code{awk}},
+for more information about arrays.
+
+@cindex differences between @code{gawk} and @code{awk}
+@cindex line continuation
+As a minor @code{gawk} extension,
+you can continue a statement that uses @samp{?:} simply
+by putting a newline after either character.
+However, you cannot put a newline in front
+of either character without using backslash continuation
+(@pxref{Statements/Lines, ,@code{awk} Statements Versus Lines}).
+
+@node Function Calls, Precedence, Conditional Exp, Expressions
+@section Function Calls
+@cindex function call
+@cindex calling a function
+
+A @dfn{function} is a name for a particular calculation. Because it has
+a name, you can ask for it by name at any point in the program. For
+example, the function @code{sqrt} computes the square root of a number.
+
+A fixed set of functions are @dfn{built-in}, which means they are
+available in every @code{awk} program. The @code{sqrt} function is one
+of these. @xref{Built-in, ,Built-in Functions}, for a list of built-in
+functions and their descriptions. In addition, you can define your own
+functions for use in your program.
+@xref{User-defined, ,User-defined Functions}, for how to do this.
+
+@cindex arguments in function call
+The way to use a function is with a @dfn{function call} expression,
+which consists of the function name followed immediately by a list of
+@dfn{arguments} in parentheses. The arguments are expressions which
+provide the raw materials for the function's calculations.
+When there is more than one argument, they are separated by commas. If
+there are no arguments, write just @samp{()} after the function name.
+Here are some examples:
+
+@example
+sqrt(x^2 + y^2) @i{one argument}
+atan2(y, x) @i{two arguments}
+rand() @i{no arguments}
+@end example
+
+@strong{Do not put any space between the function name and the
+open-parenthesis!} A user-defined function name looks just like the name of
+a variable, and space would make the expression look like concatenation
+of a variable with an expression inside parentheses. Space before the
+parenthesis is harmless with built-in functions, but it is best not to get
+into the habit of using space to avoid mistakes with user-defined
+functions.
+
+Each function expects a particular number of arguments. For example, the
+@code{sqrt} function must be called with a single argument, the number
+to take the square root of:
+
+@example
+sqrt(@var{argument})
+@end example
+
+Some of the built-in functions allow you to omit the final argument.
+If you do so, they use a reasonable default.
+@xref{Built-in, ,Built-in Functions}, for full details. If arguments
+are omitted in calls to user-defined functions, then those arguments are
+treated as local variables, initialized to the empty string
+(@pxref{User-defined, ,User-defined Functions}).
+
+Like every other expression, the function call has a value, which is
+computed by the function based on the arguments you give it. In this
+example, the value of @samp{sqrt(@var{argument})} is the square root of
+@var{argument}. A function can also have side effects, such as assigning
+values to certain variables or doing I/O.
+
+Here is a command to read numbers, one number per line, and print the
+square root of each one:
+
+@example
+@group
+$ awk '@{ print "The square root of", $1, "is", sqrt($1) @}'
+1
+@print{} The square root of 1 is 1
+3
+@print{} The square root of 3 is 1.73205
+5
+@print{} The square root of 5 is 2.23607
+@kbd{Control-d}
+@end group
+@end example
+
+@node Precedence, , Function Calls, Expressions
+@section Operator Precedence (How Operators Nest)
+@cindex precedence
+@cindex operator precedence
+
+@dfn{Operator precedence} determines how operators are grouped, when
+different operators appear close by in one expression. For example,
+@samp{*} has higher precedence than @samp{+}; thus, @samp{a + b * c}
+means to multiply @code{b} and @code{c}, and then add @code{a} to the
+product (i.e.@: @samp{a + (b * c)}).
+
+You can overrule the precedence of the operators by using parentheses.
+You can think of the precedence rules as saying where the
+parentheses are assumed to be if you do not write parentheses yourself. In
+fact, it is wise to always use parentheses whenever you have an unusual
+combination of operators, because other people who read the program may
+not remember what the precedence is in this case. You might forget,
+too; then you could make a mistake. Explicit parentheses will help prevent
+any such mistake.
+
+When operators of equal precedence are used together, the leftmost
+operator groups first, except for the assignment, conditional and
+exponentiation operators, which group in the opposite order.
+Thus, @samp{a - b + c} groups as @samp{(a - b) + c}, and
+@samp{a = b = c} groups as @samp{a = (b = c)}.
+
+The precedence of prefix unary operators does not matter as long as only
+unary operators are involved, because there is only one way to interpret
+them---innermost first. Thus, @samp{$++i} means @samp{$(++i)} and
+@samp{++$x} means @samp{++($x)}. However, when another operator follows
+the operand, then the precedence of the unary operators can matter.
+Thus, @samp{$x^2} means @samp{($x)^2}, but @samp{-x^2} means
+@samp{-(x^2)}, because @samp{-} has lower precedence than @samp{^}
+while @samp{$} has higher precedence.
+
+Here is a table of @code{awk}'s operators, in order from highest
+precedence to lowest:
+
+@c use @code in the items, looks better in TeX w/o all the quotes
+@table @code
+@item (@dots{})
+Grouping.
+
+@item $
+Field.
+
+@item ++ --
+Increment, decrement.
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@item ^ **
+Exponentiation. These operators group right-to-left.
+(The @samp{**} operator is not specified by POSIX.)
+
+@item + - !
+Unary plus, minus, logical ``not''.
+
+@item * / %
+Multiplication, division, modulus.
+
+@item + -
+Addition, subtraction.
+
+@item @r{Concatenation}
+No special token is used to indicate concatenation.
+The operands are simply written side by side.
+
+@item < <= == !=
+@itemx > >= >> |
+Relational, and redirection.
+The relational operators and the redirections have the same precedence
+level. Characters such as @samp{>} serve both as relationals and as
+redirections; the context distinguishes between the two meanings.
+
+Note that the I/O redirection operators in @code{print} and @code{printf}
+statements belong to the statement level, not to expressions. The
+redirection does not produce an expression which could be the operand of
+another operator. As a result, it does not make sense to use a
+redirection operator near another operator of lower precedence, without
+parentheses. Such combinations, for example @samp{print foo > a ? b : c},
+result in syntax errors.
+The correct way to write this statement is @samp{print foo > (a ? b : c)}.
+
+@item ~ !~
+Matching, non-matching.
+
+@item in
+Array membership.
+
+@item &&
+Logical ``and''.
+
+@item ||
+Logical ``or''.
+
+@item ?:
+Conditional. This operator groups right-to-left.
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@item = += -= *=
+@itemx /= %= ^= **=
+Assignment. These operators group right-to-left.
+(The @samp{**=} operator is not specified by POSIX.)
+@end table
+
+@node Patterns and Actions, Statements, Expressions, Top
+@chapter Patterns and Actions
+@cindex pattern, definition of
+
+As you have already seen, each @code{awk} statement consists of
+a pattern with an associated action. This chapter describes how
+you build patterns and actions.
+
+@menu
+* Pattern Overview:: What goes into a pattern.
+* Action Overview:: What goes into an action.
+@end menu
+
+@node Pattern Overview, Action Overview, Patterns and Actions, Patterns and Actions
+@section Pattern Elements
+
+Patterns in @code{awk} control the execution of rules: a rule is
+executed when its pattern matches the current input record. This
+section explains all about how to write patterns.
+
+@menu
+* Kinds of Patterns:: A list of all kinds of patterns.
+* Regexp Patterns:: Using regexps as patterns.
+* Expression Patterns:: Any expression can be used as a pattern.
+* Ranges:: Pairs of patterns specify record ranges.
+* BEGIN/END:: Specifying initialization and cleanup rules.
+* Empty:: The empty pattern, which matches every record.
+@end menu
+
+@node Kinds of Patterns, Regexp Patterns, Pattern Overview, Pattern Overview
+@subsection Kinds of Patterns
+@cindex patterns, types of
+
+Here is a summary of the types of patterns supported in @code{awk}.
+
+@table @code
+@item /@var{regular expression}/
+A regular expression as a pattern. It matches when the text of the
+input record fits the regular expression.
+(@xref{Regexp, ,Regular Expressions}.)
+
+@item @var{expression}
+A single expression. It matches when its value
+is non-zero (if a number) or non-null (if a string).
+(@xref{Expression Patterns, ,Expressions as Patterns}.)
+
+@item @var{pat1}, @var{pat2}
+A pair of patterns separated by a comma, specifying a range of records.
+The range includes both the initial record that matches @var{pat1}, and
+the final record that matches @var{pat2}.
+(@xref{Ranges, ,Specifying Record Ranges with Patterns}.)
+
+@item BEGIN
+@itemx END
+Special patterns for you to supply start-up or clean-up actions for your
+@code{awk} program.
+(@xref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}.)
+
+@item @var{empty}
+The empty pattern matches every input record.
+(@xref{Empty, ,The Empty Pattern}.)
+@end table
+
+@node Regexp Patterns, Expression Patterns, Kinds of Patterns, Pattern Overview
+@subsection Regular Expressions as Patterns
+
+We have been using regular expressions as patterns since our early examples.
+This kind of pattern is simply a regexp constant in the pattern part of
+a rule. Its meaning is @samp{$0 ~ /@var{pattern}/}.
+The pattern matches when the input record matches the regexp.
+For example:
+
+@example
+/foo|bar|baz/ @{ buzzwords++ @}
+END @{ print buzzwords, "buzzwords seen" @}
+@end example
+
+@node Expression Patterns, Ranges, Regexp Patterns, Pattern Overview
+@subsection Expressions as Patterns
+
+Any @code{awk} expression is valid as an @code{awk} pattern.
+Then the pattern matches if the expression's value is non-zero (if a
+number) or non-null (if a string).
+
+The expression is reevaluated each time the rule is tested against a new
+input record. If the expression uses fields such as @code{$1}, the
+value depends directly on the new input record's text; otherwise, it
+depends only on what has happened so far in the execution of the
+@code{awk} program, but that may still be useful.
+
+A very common kind of expression used as a pattern is the comparison
+expression, using the comparison operators described in
+@ref{Typing and Comparison, ,Variable Typing and Comparison Expressions}.
+
+Regexp matching and non-matching are also very common expressions.
+The left operand of the @samp{~} and @samp{!~} operators is a string.
+The right operand is either a constant regular expression enclosed in
+slashes (@code{/@var{regexp}/}), or any expression, whose string value
+is used as a dynamic regular expression
+(@pxref{Computed Regexps, , Using Dynamic Regexps}).
+
+The following example prints the second field of each input record
+whose first field is precisely @samp{foo}.
+
+@example
+$ awk '$1 == "foo" @{ print $2 @}' BBS-list
+@end example
+
+@noindent
+(There is no output, since there is no BBS site named ``foo''.)
+Contrast this with the following regular expression match, which would
+accept any record with a first field that contains @samp{foo}:
+
+@example
+@group
+$ awk '$1 ~ /foo/ @{ print $2 @}' BBS-list
+@print{} 555-1234
+@print{} 555-6699
+@print{} 555-6480
+@print{} 555-2127
+@end group
+@end example
+
+Boolean expressions are also commonly used as patterns.
+Whether the pattern
+matches an input record depends on whether its subexpressions match.
+
+For example, the following command prints all records in
+@file{BBS-list} that contain both @samp{2400} and @samp{foo}.
+
+@example
+$ awk '/2400/ && /foo/' BBS-list
+@print{} fooey 555-1234 2400/1200/300 B
+@end example
+
+The following command prints all records in
+@file{BBS-list} that contain @emph{either} @samp{2400} or @samp{foo}, or
+both.
+
+@example
+@group
+$ awk '/2400/ || /foo/' BBS-list
+@print{} alpo-net 555-3412 2400/1200/300 A
+@print{} bites 555-1675 2400/1200/300 A
+@print{} fooey 555-1234 2400/1200/300 B
+@print{} foot 555-6699 1200/300 B
+@print{} macfoo 555-6480 1200/300 A
+@print{} sdace 555-3430 2400/1200/300 A
+@print{} sabafoo 555-2127 1200/300 C
+@end group
+@end example
+
+The following command prints all records in
+@file{BBS-list} that do @emph{not} contain the string @samp{foo}.
+
+@example
+@group
+$ awk '! /foo/' BBS-list
+@print{} aardvark 555-5553 1200/300 B
+@print{} alpo-net 555-3412 2400/1200/300 A
+@print{} barfly 555-7685 1200/300 A
+@print{} bites 555-1675 2400/1200/300 A
+@print{} camelot 555-0542 300 C
+@print{} core 555-2912 1200/300 C
+@print{} sdace 555-3430 2400/1200/300 A
+@end group
+@end example
+
+The subexpressions of a boolean operator in a pattern can be constant regular
+expressions, comparisons, or any other @code{awk} expressions. Range
+patterns are not expressions, so they cannot appear inside boolean
+patterns. Likewise, the special patterns @code{BEGIN} and @code{END},
+which never match any input record, are not expressions and cannot
+appear inside boolean patterns.
+
+A regexp constant as a pattern is also a special case of an expression
+pattern. @code{/foo/} as an expression has the value one if @samp{foo}
+appears in the current input record; thus, as a pattern, @code{/foo/}
+matches any record containing @samp{foo}.
+
+@node Ranges, BEGIN/END, Expression Patterns, Pattern Overview
+@subsection Specifying Record Ranges with Patterns
+
+@cindex range pattern
+@cindex pattern, range
+@cindex matching ranges of lines
+A @dfn{range pattern} is made of two patterns separated by a comma, of
+the form @samp{@var{begpat}, @var{endpat}}. It matches ranges of
+consecutive input records. The first pattern, @var{begpat}, controls
+where the range begins, and the second one, @var{endpat}, controls where
+it ends. For example,
+
+@example
+awk '$1 == "on", $1 == "off"'
+@end example
+
+@noindent
+prints every record between @samp{on}/@samp{off} pairs, inclusive.
+
+A range pattern starts out by matching @var{begpat}
+against every input record; when a record matches @var{begpat}, the
+range pattern becomes @dfn{turned on}. The range pattern matches this
+record. As long as it stays turned on, it automatically matches every
+input record read. It also matches @var{endpat} against
+every input record; when that succeeds, the range pattern is turned
+off again for the following record. Then it goes back to checking
+@var{begpat} against each record.
+
+The record that turns on the range pattern and the one that turns it
+off both match the range pattern. If you don't want to operate on
+these records, you can write @code{if} statements in the rule's action
+to distinguish them from the records you are interested in.
+
+It is possible for a pattern to be turned both on and off by the same
+record, if the record satisfies both conditions. Then the action is
+executed for just that record.
+
+For example, suppose you have text between two identical markers (say
+the @samp{%} symbol) that you wish to ignore. You might try to
+combine a range pattern that describes the delimited text with the
+@code{next} statement
+(not discussed yet, @pxref{Next Statement, , The @code{next} Statement}),
+which causes @code{awk} to skip any further processing of the current
+record and start over again with the next input record. Such a program
+would like this:
+
+@example
+/^%$/,/^%$/ @{ next @}
+ @{ print @}
+@end example
+
+@noindent
+@cindex skipping lines between markers
+This program fails because the range pattern is both turned on and turned off
+by the first line with just a @samp{%} on it. To accomplish this task, you
+must write the program this way, using a flag:
+
+@example
+/^%$/ @{ skip = ! skip; next @}
+skip == 1 @{ next @} # skip lines with `skip' set
+@end example
+
+Note that in a range pattern, the @samp{,} has the lowest precedence
+(is evaluated last) of all the operators. Thus, for example, the
+following program attempts to combine a range pattern with another,
+simpler test.
+
+@example
+echo Yes | awk '/1/,/2/ || /Yes/'
+@end example
+
+The author of this program intended it to mean @samp{(/1/,/2/) || /Yes/}.
+However, @code{awk} interprets this as @samp{/1/, (/2/ || /Yes/)}.
+This cannot be changed or worked around; range patterns do not combine
+with other patterns.
+
+@node BEGIN/END, Empty, Ranges, Pattern Overview
+@subsection The @code{BEGIN} and @code{END} Special Patterns
+
+@cindex @code{BEGIN} special pattern
+@cindex pattern, @code{BEGIN}
+@cindex @code{END} special pattern
+@cindex pattern, @code{END}
+@code{BEGIN} and @code{END} are special patterns. They are not used to
+match input records. Rather, they supply start-up or
+clean-up actions for your @code{awk} script.
+
+@menu
+* Using BEGIN/END:: How and why to use BEGIN/END rules.
+* I/O And BEGIN/END:: I/O issues in BEGIN/END rules.
+@end menu
+
+@node Using BEGIN/END, I/O And BEGIN/END, BEGIN/END, BEGIN/END
+@subsubsection Startup and Cleanup Actions
+
+A @code{BEGIN} rule is executed, once, before the first input record
+has been read. An @code{END} rule is executed, once, after all the
+input has been read. For example:
+
+@example
+@group
+$ awk '
+> BEGIN @{ print "Analysis of \"foo\"" @}
+> /foo/ @{ ++n @}
+> END @{ print "\"foo\" appears " n " times." @}' BBS-list
+@print{} Analysis of "foo"
+@print{} "foo" appears 4 times.
+@end group
+@end example
+
+This program finds the number of records in the input file @file{BBS-list}
+that contain the string @samp{foo}. The @code{BEGIN} rule prints a title
+for the report. There is no need to use the @code{BEGIN} rule to
+initialize the counter @code{n} to zero, as @code{awk} does this
+automatically (@pxref{Variables}).
+
+The second rule increments the variable @code{n} every time a
+record containing the pattern @samp{foo} is read. The @code{END} rule
+prints the value of @code{n} at the end of the run.
+
+The special patterns @code{BEGIN} and @code{END} cannot be used in ranges
+or with boolean operators (indeed, they cannot be used with any operators).
+
+An @code{awk} program may have multiple @code{BEGIN} and/or @code{END}
+rules. They are executed in the order they appear, all the @code{BEGIN}
+rules at start-up and all the @code{END} rules at termination.
+@code{BEGIN} and @code{END} rules may be intermixed with other rules.
+This feature was added in the 1987 version of @code{awk}, and is included
+in the POSIX standard. The original (1978) version of @code{awk}
+required you to put the @code{BEGIN} rule at the beginning of the
+program, and the @code{END} rule at the end, and only allowed one of
+each. This is no longer required, but it is a good idea in terms of
+program organization and readability.
+
+Multiple @code{BEGIN} and @code{END} rules are useful for writing
+library functions, since each library file can have its own @code{BEGIN} and/or
+@code{END} rule to do its own initialization and/or cleanup. Note that
+the order in which library functions are named on the command line
+controls the order in which their @code{BEGIN} and @code{END} rules are
+executed. Therefore you have to be careful to write such rules in
+library files so that the order in which they are executed doesn't matter.
+@xref{Options, ,Command Line Options}, for more information on
+using library functions.
+@xref{Library Functions, ,A Library of @code{awk} Functions},
+for a number of useful library functions.
+
+@cindex dark corner
+If an @code{awk} program only has a @code{BEGIN} rule, and no other
+rules, then the program exits after the @code{BEGIN} rule has been run.
+(The original version of @code{awk} used to keep reading and ignoring input
+until end of file was seen.) However, if an @code{END} rule exists,
+then the input will be read, even if there are no other rules in
+the program. This is necessary in case the @code{END} rule checks the
+@code{FNR} and @code{NR} variables (d.c.).
+
+@code{BEGIN} and @code{END} rules must have actions; there is no default
+action for these rules since there is no current record when they run.
+
+@node I/O And BEGIN/END, , Using BEGIN/END, BEGIN/END
+@subsubsection Input/Output from @code{BEGIN} and @code{END} Rules
+
+@cindex I/O from @code{BEGIN} and @code{END}
+There are several (sometimes subtle) issues involved when doing I/O
+from a @code{BEGIN} or @code{END} rule.
+
+The first has to do with the value of @code{$0} in a @code{BEGIN}
+rule. Since @code{BEGIN} rules are executed before any input is read,
+there simply is no input record, and therefore no fields, when
+executing @code{BEGIN} rules. References to @code{$0} and the fields
+yield a null string or zero, depending upon the context. One way
+to give @code{$0} a real value is to execute a @code{getline} command
+without a variable (@pxref{Getline, ,Explicit Input with @code{getline}}).
+Another way is to simply assign a value to it.
+
+@cindex differences between @code{gawk} and @code{awk}
+The second point is similar to the first, but from the other direction.
+Inside an @code{END} rule, what is the value of @code{$0} and @code{NF}?
+Traditionally, due largely to implementation issues, @code{$0} and
+@code{NF} were @emph{undefined} inside an @code{END} rule.
+The POSIX standard specified that @code{NF} was available in an @code{END}
+rule, containing the number of fields from the last input record.
+Due most probably to an oversight, the standard does not say that @code{$0}
+is also preserved, although logically one would think that it should be.
+In fact, @code{gawk} does preserve the value of @code{$0} for use in
+@code{END} rules. Be aware, however, that Unix @code{awk}, and possibly
+other implementations, do not.
+
+The third point follows from the first two. What is the meaning of
+@samp{print} inside a @code{BEGIN} or @code{END} rule? The meaning is
+the same as always, @samp{print $0}. If @code{$0} is the null string,
+then this prints an empty line. Many long time @code{awk} programmers
+use @samp{print} in @code{BEGIN} and @code{END} rules, to mean
+@samp{@w{print ""}}, relying on @code{$0} being null. While you might
+generally get away with this in @code{BEGIN} rules, in @code{gawk} at
+least, it is a very bad idea in @code{END} rules. It is also poor
+style, since if you want an empty line in the output, you
+should say so explicitly in your program.
+
+@node Empty, , BEGIN/END, Pattern Overview
+@subsection The Empty Pattern
+
+@cindex empty pattern
+@cindex pattern, empty
+An empty (i.e.@: non-existent) pattern is considered to match @emph{every}
+input record. For example, the program:
+
+@example
+awk '@{ print $1 @}' BBS-list
+@end example
+
+@noindent
+prints the first field of every record.
+
+@node Action Overview, , Pattern Overview, Patterns and Actions
+@section Overview of Actions
+@cindex action, definition of
+@cindex curly braces
+@cindex action, curly braces
+@cindex action, separating statements
+
+An @code{awk} program or script consists of a series of
+rules and function definitions, interspersed. (Functions are
+described later. @xref{User-defined, ,User-defined Functions}.)
+
+A rule contains a pattern and an action, either of which (but not
+both) may be
+omitted. The purpose of the @dfn{action} is to tell @code{awk} what to do
+once a match for the pattern is found. Thus, in outline, an @code{awk}
+program generally looks like this:
+
+@example
+@r{[}@var{pattern}@r{]} @r{[}@{ @var{action} @}@r{]}
+@r{[}@var{pattern}@r{]} @r{[}@{ @var{action} @}@r{]}
+@dots{}
+function @var{name}(@var{args}) @{ @dots{} @}
+@dots{}
+@end example
+
+An action consists of one or more @code{awk} @dfn{statements}, enclosed
+in curly braces (@samp{@{} and @samp{@}}). Each statement specifies one
+thing to be done. The statements are separated by newlines or
+semicolons.
+
+The curly braces around an action must be used even if the action
+contains only one statement, or even if it contains no statements at
+all. However, if you omit the action entirely, omit the curly braces as
+well. An omitted action is equivalent to @samp{@{ print $0 @}}.
+
+@example
+/foo/ @{ @} # match foo, do nothing - empty action
+/foo/ # match foo, print the record - omitted action
+@end example
+
+Here are the kinds of statements supported in @code{awk}:
+
+@itemize @bullet
+@item
+Expressions, which can call functions or assign values to variables
+(@pxref{Expressions}). Executing
+this kind of statement simply computes the value of the expression.
+This is useful when the expression has side effects
+(@pxref{Assignment Ops, ,Assignment Expressions}).
+
+@item
+Control statements, which specify the control flow of @code{awk}
+programs. The @code{awk} language gives you C-like constructs
+(@code{if}, @code{for}, @code{while}, and @code{do}) as well as a few
+special ones (@pxref{Statements, ,Control Statements in Actions}).
+
+@item
+Compound statements, which consist of one or more statements enclosed in
+curly braces. A compound statement is used in order to put several
+statements together in the body of an @code{if}, @code{while}, @code{do}
+or @code{for} statement.
+
+@item
+Input statements, using the @code{getline} command
+(@pxref{Getline, ,Explicit Input with @code{getline}}), the @code{next}
+statement (@pxref{Next Statement, ,The @code{next} Statement}),
+and the @code{nextfile} statement
+(@pxref{Nextfile Statement, ,The @code{nextfile} Statement}).
+
+@item
+Output statements, @code{print} and @code{printf}.
+@xref{Printing, ,Printing Output}.
+
+@item
+Deletion statements, for deleting array elements.
+@xref{Delete, ,The @code{delete} Statement}.
+@end itemize
+
+@iftex
+The next chapter covers control statements in detail.
+@end iftex
+
+@node Statements, Built-in Variables, Patterns and Actions, Top
+@chapter Control Statements in Actions
+@cindex control statement
+
+@dfn{Control statements} such as @code{if}, @code{while}, and so on
+control the flow of execution in @code{awk} programs. Most of the
+control statements in @code{awk} are patterned on similar statements in
+C.
+
+All the control statements start with special keywords such as @code{if}
+and @code{while}, to distinguish them from simple expressions.
+
+@cindex compound statement
+@cindex statement, compound
+Many control statements contain other statements; for example, the
+@code{if} statement contains another statement which may or may not be
+executed. The contained statement is called the @dfn{body}. If you
+want to include more than one statement in the body, group them into a
+single @dfn{compound statement} with curly braces, separating them with
+newlines or semicolons.
+
+@menu
+* If Statement:: Conditionally execute some @code{awk}
+ statements.
+* While Statement:: Loop until some condition is satisfied.
+* Do Statement:: Do specified action while looping until some
+ condition is satisfied.
+* For Statement:: Another looping statement, that provides
+ initialization and increment clauses.
+* Break Statement:: Immediately exit the innermost enclosing loop.
+* Continue Statement:: Skip to the end of the innermost enclosing
+ loop.
+* Next Statement:: Stop processing the current input record.
+* Nextfile Statement:: Stop processing the current file.
+* Exit Statement:: Stop execution of @code{awk}.
+@end menu
+
+@node If Statement, While Statement, Statements, Statements
+@section The @code{if}-@code{else} Statement
+
+@cindex @code{if}-@code{else} statement
+The @code{if}-@code{else} statement is @code{awk}'s decision-making
+statement. It looks like this:
+
+@example
+if (@var{condition}) @var{then-body} @r{[}else @var{else-body}@r{]}
+@end example
+
+@noindent
+The @var{condition} is an expression that controls what the rest of the
+statement will do. If @var{condition} is true, @var{then-body} is
+executed; otherwise, @var{else-body} is executed.
+The @code{else} part of the statement is
+optional. The condition is considered false if its value is zero or
+the null string, and true otherwise.
+
+Here is an example:
+
+@example
+if (x % 2 == 0)
+ print "x is even"
+else
+ print "x is odd"
+@end example
+
+In this example, if the expression @samp{x % 2 == 0} is true (that is,
+the value of @code{x} is evenly divisible by two), then the first @code{print}
+statement is executed, otherwise the second @code{print} statement is
+executed.
+
+If the @code{else} appears on the same line as @var{then-body}, and
+@var{then-body} is not a compound statement (i.e.@: not surrounded by
+curly braces), then a semicolon must separate @var{then-body} from
+@code{else}. To illustrate this, let's rewrite the previous example:
+
+@example
+if (x % 2 == 0) print "x is even"; else
+ print "x is odd"
+@end example
+
+@noindent
+If you forget the @samp{;}, @code{awk} won't be able to interpret the
+statement, and you will get a syntax error.
+
+We would not actually write this example this way, because a human
+reader might fail to see the @code{else} if it were not the first thing
+on its line.
+
+@node While Statement, Do Statement, If Statement, Statements
+@section The @code{while} Statement
+@cindex @code{while} statement
+@cindex loop
+@cindex body of a loop
+
+In programming, a @dfn{loop} means a part of a program that can
+be executed two or more times in succession.
+
+The @code{while} statement is the simplest looping statement in
+@code{awk}. It repeatedly executes a statement as long as a condition is
+true. It looks like this:
+
+@example
+while (@var{condition})
+ @var{body}
+@end example
+
+@noindent
+Here @var{body} is a statement that we call the @dfn{body} of the loop,
+and @var{condition} is an expression that controls how long the loop
+keeps running.
+
+The first thing the @code{while} statement does is test @var{condition}.
+If @var{condition} is true, it executes the statement @var{body}.
+@ifinfo
+(The @var{condition} is true when the value
+is not zero and not a null string.)
+@end ifinfo
+After @var{body} has been executed,
+@var{condition} is tested again, and if it is still true, @var{body} is
+executed again. This process repeats until @var{condition} is no longer
+true. If @var{condition} is initially false, the body of the loop is
+never executed, and @code{awk} continues with the statement following
+the loop.
+
+This example prints the first three fields of each record, one per line.
+
+@example
+awk '@{ i = 1
+ while (i <= 3) @{
+ print $i
+ i++
+ @}
+@}' inventory-shipped
+@end example
+
+@noindent
+Here the body of the loop is a compound statement enclosed in braces,
+containing two statements.
+
+The loop works like this: first, the value of @code{i} is set to one.
+Then, the @code{while} tests whether @code{i} is less than or equal to
+three. This is true when @code{i} equals one, so the @code{i}-th
+field is printed. Then the @samp{i++} increments the value of @code{i}
+and the loop repeats. The loop terminates when @code{i} reaches four.
+
+As you can see, a newline is not required between the condition and the
+body; but using one makes the program clearer unless the body is a
+compound statement or is very simple. The newline after the open-brace
+that begins the compound statement is not required either, but the
+program would be harder to read without it.
+
+@node Do Statement, For Statement, While Statement, Statements
+@section The @code{do}-@code{while} Statement
+
+The @code{do} loop is a variation of the @code{while} looping statement.
+The @code{do} loop executes the @var{body} once, and then repeats @var{body}
+as long as @var{condition} is true. It looks like this:
+
+@example
+do
+ @var{body}
+while (@var{condition})
+@end example
+
+Even if @var{condition} is false at the start, @var{body} is executed at
+least once (and only once, unless executing @var{body} makes
+@var{condition} true). Contrast this with the corresponding
+@code{while} statement:
+
+@example
+while (@var{condition})
+ @var{body}
+@end example
+
+@noindent
+This statement does not execute @var{body} even once if @var{condition}
+is false to begin with.
+
+Here is an example of a @code{do} statement:
+
+@example
+awk '@{ i = 1
+ do @{
+ print $0
+ i++
+ @} while (i <= 10)
+@}'
+@end example
+
+@noindent
+This program prints each input record ten times. It isn't a very
+realistic example, since in this case an ordinary @code{while} would do
+just as well. But this reflects actual experience; there is only
+occasionally a real use for a @code{do} statement.
+
+@node For Statement, Break Statement, Do Statement, Statements
+@section The @code{for} Statement
+@cindex @code{for} statement
+
+The @code{for} statement makes it more convenient to count iterations of a
+loop. The general form of the @code{for} statement looks like this:
+
+@example
+for (@var{initialization}; @var{condition}; @var{increment})
+ @var{body}
+@end example
+
+@noindent
+The @var{initialization}, @var{condition} and @var{increment} parts are
+arbitrary @code{awk} expressions, and @var{body} stands for any
+@code{awk} statement.
+
+The @code{for} statement starts by executing @var{initialization}.
+Then, as long
+as @var{condition} is true, it repeatedly executes @var{body} and then
+@var{increment}. Typically @var{initialization} sets a variable to
+either zero or one, @var{increment} adds one to it, and @var{condition}
+compares it against the desired number of iterations.
+
+Here is an example of a @code{for} statement:
+
+@example
+@group
+awk '@{ for (i = 1; i <= 3; i++)
+ print $i
+@}' inventory-shipped
+@end group
+@end example
+
+@noindent
+This prints the first three fields of each input record, one field per
+line.
+
+You cannot set more than one variable in the
+@var{initialization} part unless you use a multiple assignment statement
+such as @samp{x = y = 0}, which is possible only if all the initial values
+are equal. (But you can initialize additional variables by writing
+their assignments as separate statements preceding the @code{for} loop.)
+
+The same is true of the @var{increment} part; to increment additional
+variables, you must write separate statements at the end of the loop.
+The C compound expression, using C's comma operator, would be useful in
+this context, but it is not supported in @code{awk}.
+
+Most often, @var{increment} is an increment expression, as in the
+example above. But this is not required; it can be any expression
+whatever. For example, this statement prints all the powers of two
+between one and 100:
+
+@example
+for (i = 1; i <= 100; i *= 2)
+ print i
+@end example
+
+Any of the three expressions in the parentheses following the @code{for} may
+be omitted if there is nothing to be done there. Thus, @w{@samp{for (; x
+> 0;)}} is equivalent to @w{@samp{while (x > 0)}}. If the
+@var{condition} is omitted, it is treated as @var{true}, effectively
+yielding an @dfn{infinite loop} (i.e.@: a loop that will never
+terminate).
+
+In most cases, a @code{for} loop is an abbreviation for a @code{while}
+loop, as shown here:
+
+@example
+@var{initialization}
+while (@var{condition}) @{
+ @var{body}
+ @var{increment}
+@}
+@end example
+
+@noindent
+The only exception is when the @code{continue} statement
+(@pxref{Continue Statement, ,The @code{continue} Statement}) is used
+inside the loop; changing a @code{for} statement to a @code{while}
+statement in this way can change the effect of the @code{continue}
+statement inside the loop.
+
+There is an alternate version of the @code{for} loop, for iterating over
+all the indices of an array:
+
+@example
+for (i in array)
+ @var{do something with} array[i]
+@end example
+
+@noindent
+@xref{Scanning an Array, ,Scanning All Elements of an Array},
+for more information on this version of the @code{for} loop.
+
+The @code{awk} language has a @code{for} statement in addition to a
+@code{while} statement because often a @code{for} loop is both less work to
+type and more natural to think of. Counting the number of iterations is
+very common in loops. It can be easier to think of this counting as part
+of looping rather than as something to do inside the loop.
+
+The next section has more complicated examples of @code{for} loops.
+
+@node Break Statement, Continue Statement, For Statement, Statements
+@section The @code{break} Statement
+@cindex @code{break} statement
+@cindex loops, exiting
+
+The @code{break} statement jumps out of the innermost @code{for},
+@code{while}, or @code{do} loop that encloses it. The
+following example finds the smallest divisor of any integer, and also
+identifies prime numbers:
+
+@example
+awk '# find smallest divisor of num
+ @{ num = $1
+ for (div = 2; div*div <= num; div++)
+ if (num % div == 0)
+ break
+ if (num % div == 0)
+ printf "Smallest divisor of %d is %d\n", num, div
+ else
+ printf "%d is prime\n", num
+ @}'
+@end example
+
+When the remainder is zero in the first @code{if} statement, @code{awk}
+immediately @dfn{breaks out} of the containing @code{for} loop. This means
+that @code{awk} proceeds immediately to the statement following the loop
+and continues processing. (This is very different from the @code{exit}
+statement which stops the entire @code{awk} program.
+@xref{Exit Statement, ,The @code{exit} Statement}.)
+
+Here is another program equivalent to the previous one. It illustrates how
+the @var{condition} of a @code{for} or @code{while} could just as well be
+replaced with a @code{break} inside an @code{if}:
+
+@example
+@group
+awk '# find smallest divisor of num
+ @{ num = $1
+ for (div = 2; ; div++) @{
+ if (num % div == 0) @{
+ printf "Smallest divisor of %d is %d\n", num, div
+ break
+ @}
+ if (div*div > num) @{
+ printf "%d is prime\n", num
+ break
+ @}
+ @}
+@}'
+@end group
+@end example
+
+@cindex @code{break}, outside of loops
+@cindex historical features
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@cindex dark corner
+As described above, the @code{break} statement has no meaning when
+used outside the body of a loop. However, although it was never documented,
+historical implementations of @code{awk} have treated the @code{break}
+statement outside of a loop as if it were a @code{next} statement
+(@pxref{Next Statement, ,The @code{next} Statement}).
+Recent versions of Unix @code{awk} no longer allow this usage.
+@code{gawk} will support this use of @code{break} only if @samp{--traditional}
+has been specified on the command line
+(@pxref{Options, ,Command Line Options}).
+Otherwise, it will be treated as an error, since the POSIX standard
+specifies that @code{break} should only be used inside the body of a
+loop (d.c.).
+
+@node Continue Statement, Next Statement, Break Statement, Statements
+@section The @code{continue} Statement
+
+@cindex @code{continue} statement
+The @code{continue} statement, like @code{break}, is used only inside
+@code{for}, @code{while}, and @code{do} loops. It skips
+over the rest of the loop body, causing the next cycle around the loop
+to begin immediately. Contrast this with @code{break}, which jumps out
+of the loop altogether.
+
+@c The point of this program was to illustrate the use of continue with
+@c a while loop. But Karl Berry points out that that is done adequately
+@c below, and that this example is very un-awk-like. So for now, we'll
+@c omit it.
+@ignore
+In Texinfo source files, text that the author wishes to ignore can be
+enclosed between lines that start with @samp{@@ignore} and end with
+@samp{@@end ignore}. Here is a program that strips out lines between
+@samp{@@ignore} and @samp{@@end ignore} pairs.
+
+@example
+BEGIN @{
+ while (getline > 0) @{
+ if (/^@@ignore/)
+ ignoring = 1
+ else if (/^@@end[ \t]+ignore/) @{
+ ignoring = 0
+ continue
+ @}
+ if (ignoring)
+ continue
+ print
+ @}
+@}
+@end example
+
+When an @samp{@@ignore} is seen, the @code{ignoring} flag is set to one (true).
+When @samp{@@end ignore} is seen, the flag is reset to zero (false). As long
+as the flag is true, the input record is not printed, because the
+@code{continue} restarts the @code{while} loop, skipping over the @code{print}
+statement.
+
+@c Exercise!!!
+@c How could this program be written to make better use of the awk language?
+@end ignore
+
+The @code{continue} statement in a @code{for} loop directs @code{awk} to
+skip the rest of the body of the loop, and resume execution with the
+increment-expression of the @code{for} statement. The following program
+illustrates this fact:
+
+@example
+awk 'BEGIN @{
+ for (x = 0; x <= 20; x++) @{
+ if (x == 5)
+ continue
+ printf "%d ", x
+ @}
+ print ""
+@}'
+@end example
+
+@noindent
+This program prints all the numbers from zero to 20, except for five, for
+which the @code{printf} is skipped. Since the increment @samp{x++}
+is not skipped, @code{x} does not remain stuck at five. Contrast the
+@code{for} loop above with this @code{while} loop:
+
+@example
+awk 'BEGIN @{
+ x = 0
+ while (x <= 20) @{
+ if (x == 5)
+ continue
+ printf "%d ", x
+ x++
+ @}
+ print ""
+@}'
+@end example
+
+@noindent
+This program loops forever once @code{x} gets to five.
+
+@cindex @code{continue}, outside of loops
+@cindex historical features
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@cindex dark corner
+As described above, the @code{continue} statement has no meaning when
+used outside the body of a loop. However, although it was never documented,
+historical implementations of @code{awk} have treated the @code{continue}
+statement outside of a loop as if it were a @code{next} statement
+(@pxref{Next Statement, ,The @code{next} Statement}).
+Recent versions of Unix @code{awk} no longer allow this usage.
+@code{gawk} will support this use of @code{continue} only if
+@samp{--traditional} has been specified on the command line
+(@pxref{Options, ,Command Line Options}).
+Otherwise, it will be treated as an error, since the POSIX standard
+specifies that @code{continue} should only be used inside the body of a
+loop (d.c.).
+
+@node Next Statement, Nextfile Statement, Continue Statement, Statements
+@section The @code{next} Statement
+@cindex @code{next} statement
+
+The @code{next} statement forces @code{awk} to immediately stop processing
+the current record and go on to the next record. This means that no
+further rules are executed for the current record. The rest of the
+current rule's action is not executed either.
+
+Contrast this with the effect of the @code{getline} function
+(@pxref{Getline, ,Explicit Input with @code{getline}}). That too causes
+@code{awk} to read the next record immediately, but it does not alter the
+flow of control in any way. So the rest of the current action executes
+with a new input record.
+
+At the highest level, @code{awk} program execution is a loop that reads
+an input record and then tests each rule's pattern against it. If you
+think of this loop as a @code{for} statement whose body contains the
+rules, then the @code{next} statement is analogous to a @code{continue}
+statement: it skips to the end of the body of this implicit loop, and
+executes the increment (which reads another record).
+
+For example, if your @code{awk} program works only on records with four
+fields, and you don't want it to fail when given bad input, you might
+use this rule near the beginning of the program:
+
+@example
+@group
+NF != 4 @{
+ err = sprintf("%s:%d: skipped: NF != 4\n", FILENAME, FNR)
+ print err > "/dev/stderr"
+ next
+@}
+@end group
+@end example
+
+@noindent
+so that the following rules will not see the bad record. The error
+message is redirected to the standard error output stream, as error
+messages should be. @xref{Special Files, ,Special File Names in @code{gawk}}.
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+According to the POSIX standard, the behavior is undefined if
+the @code{next} statement is used in a @code{BEGIN} or @code{END} rule.
+@code{gawk} will treat it as a syntax error.
+Although POSIX permits it,
+some other @code{awk} implementations don't allow the @code{next}
+statement inside function bodies
+(@pxref{User-defined, ,User-defined Functions}).
+Just as any other @code{next} statement, a @code{next} inside a
+function body reads the next record and starts processing it with the
+first rule in the program.
+
+If the @code{next} statement causes the end of the input to be reached,
+then the code in any @code{END} rules will be executed.
+@xref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}.
+
+@node Nextfile Statement, Exit Statement, Next Statement, Statements
+@section The @code{nextfile} Statement
+@cindex @code{nextfile} statement
+@cindex differences between @code{gawk} and @code{awk}
+
+@code{gawk} provides the @code{nextfile} statement,
+which is similar to the @code{next} statement.
+However, instead of abandoning processing of the current record, the
+@code{nextfile} statement instructs @code{gawk} to stop processing the
+current data file.
+
+Upon execution of the @code{nextfile} statement, @code{FILENAME} is
+updated to the name of the next data file listed on the command line,
+@code{FNR} is reset to one, @code{ARGIND} is incremented, and processing
+starts over with the first rule in the progam. @xref{Built-in Variables}.
+
+If the @code{nextfile} statement causes the end of the input to be reached,
+then the code in any @code{END} rules will be executed.
+@xref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}.
+
+The @code{nextfile} statement is a @code{gawk} extension; it is not
+(currently) available in any other @code{awk} implementation.
+@xref{Nextfile Function, ,Implementing @code{nextfile} as a Function},
+for a user-defined function you can use to simulate the @code{nextfile}
+statement.
+
+The @code{nextfile} statement would be useful if you have many data
+files to process, and you expect that you
+would not want to process every record in every file.
+Normally, in order to move on to
+the next data file, you would have to continue scanning the unwanted
+records. The @code{nextfile} statement accomplishes this much more
+efficiently.
+
+@cindex @code{next file} statement
+@strong{Caution:} Versions of @code{gawk} prior to 3.0 used two
+words (@samp{next file}) for the @code{nextfile} statement. This was
+changed in 3.0 to one word, since the treatment of @samp{file} was
+inconsistent. When it appeared after @code{next}, it was a keyword.
+Otherwise, it was a regular identifier. The old usage is still
+accepted. However, @code{gawk} will generate a warning message, and
+support for @code{next file} will eventually be discontinued in a
+future version of @code{gawk}.
+
+@node Exit Statement, , Nextfile Statement, Statements
+@section The @code{exit} Statement
+
+@cindex @code{exit} statement
+The @code{exit} statement causes @code{awk} to immediately stop
+executing the current rule and to stop processing input; any remaining input
+is ignored. It looks like this:
+
+@example
+exit @r{[}@var{return code}@r{]}
+@end example
+
+If an @code{exit} statement is executed from a @code{BEGIN} rule the
+program stops processing everything immediately. No input records are
+read. However, if an @code{END} rule is present, it is executed
+(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}).
+
+If @code{exit} is used as part of an @code{END} rule, it causes
+the program to stop immediately.
+
+An @code{exit} statement that is not part
+of a @code{BEGIN} or @code{END} rule stops the execution of any further
+automatic rules for the current record, skips reading any remaining input
+records, and executes
+the @code{END} rule if there is one.
+
+If you do not want the @code{END} rule to do its job in this case, you
+can set a variable to non-zero before the @code{exit} statement, and check
+that variable in the @code{END} rule.
+@xref{Assert Function, ,Assertions},
+for an example that does this.
+
+@cindex dark corner
+If an argument is supplied to @code{exit}, its value is used as the exit
+status code for the @code{awk} process. If no argument is supplied,
+@code{exit} returns status zero (success). In the case where an argument
+is supplied to a first @code{exit} statement, and then @code{exit} is
+called a second time with no argument, the previously supplied exit value
+is used (d.c.).
+
+For example, let's say you've discovered an error condition you really
+don't know how to handle. Conventionally, programs report this by
+exiting with a non-zero status. Your @code{awk} program can do this
+using an @code{exit} statement with a non-zero argument. Here is an
+example:
+
+@example
+@group
+BEGIN @{
+ if (("date" | getline date_now) < 0) @{
+ print "Can't get system date" > "/dev/stderr"
+ exit 1
+ @}
+ print "current date is", date_now
+ close("date")
+@}
+@end group
+@end example
+
+@node Built-in Variables, Arrays, Statements, Top
+@chapter Built-in Variables
+@cindex built-in variables
+
+Most @code{awk} variables are available for you to use for your own
+purposes; they never change except when your program assigns values to
+them, and never affect anything except when your program examines them.
+However, a few variables in @code{awk} have special built-in meanings.
+Some of them @code{awk} examines automatically, so that they enable you
+to tell @code{awk} how to do certain things. Others are set
+automatically by @code{awk}, so that they carry information from the
+internal workings of @code{awk} to your program.
+
+This chapter documents all the built-in variables of @code{gawk}. Most
+of them are also documented in the chapters describing their areas of
+activity.
+
+@menu
+* User-modified:: Built-in variables that you change to control
+ @code{awk}.
+* Auto-set:: Built-in variables where @code{awk} gives you
+ information.
+* ARGC and ARGV:: Ways to use @code{ARGC} and @code{ARGV}.
+@end menu
+
+@node User-modified, Auto-set, Built-in Variables, Built-in Variables
+@section Built-in Variables that Control @code{awk}
+@cindex built-in variables, user modifiable
+
+This is an alphabetical list of the variables which you can change to
+control how @code{awk} does certain things. Those variables that are
+specific to @code{gawk} are marked with an asterisk, @samp{*}.
+
+@table @code
+@vindex CONVFMT
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+@item CONVFMT
+This string controls conversion of numbers to
+strings (@pxref{Conversion, ,Conversion of Strings and Numbers}).
+It works by being passed, in effect, as the first argument to the
+@code{sprintf} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+Its default value is @code{"%.6g"}.
+@code{CONVFMT} was introduced by the POSIX standard.
+
+@vindex FIELDWIDTHS
+@item FIELDWIDTHS *
+This is a space separated list of columns that tells @code{gawk}
+how to split input with fixed, columnar boundaries. It is an
+experimental feature. Assigning to @code{FIELDWIDTHS}
+overrides the use of @code{FS} for field splitting.
+@xref{Constant Size, ,Reading Fixed-width Data}, for more information.
+
+If @code{gawk} is in compatibility mode
+(@pxref{Options, ,Command Line Options}), then @code{FIELDWIDTHS}
+has no special meaning, and field splitting operations are done based
+exclusively on the value of @code{FS}.
+
+@vindex FS
+@item FS
+@code{FS} is the input field separator
+(@pxref{Field Separators, ,Specifying How Fields are Separated}).
+The value is a single-character string or a multi-character regular
+expression that matches the separations between fields in an input
+record. If the value is the null string (@code{""}), then each
+character in the record becomes a separate field.
+
+The default value is @w{@code{" "}}, a string consisting of a single
+space. As a special exception, this value means that any
+sequence of spaces and tabs is a single separator. It also causes
+spaces and tabs at the beginning and end of a record to be ignored.
+
+You can set the value of @code{FS} on the command line using the
+@samp{-F} option:
+
+@example
+awk -F, '@var{program}' @var{input-files}
+@end example
+
+If @code{gawk} is using @code{FIELDWIDTHS} for field-splitting,
+assigning a value to @code{FS} will cause @code{gawk} to return to
+the normal, @code{FS}-based, field splitting. An easy way to do this
+is to simply say @samp{FS = FS}, perhaps with an explanatory comment.
+
+@vindex IGNORECASE
+@item IGNORECASE *
+If @code{IGNORECASE} is non-zero or non-null, then all string comparisons,
+and all regular expression matching are case-independent. Thus, regexp
+matching with @samp{~} and @samp{!~}, and the @code{gensub},
+@code{gsub}, @code{index}, @code{match}, @code{split} and @code{sub}
+functions, record termination with @code{RS}, and field splitting with
+@code{FS} all ignore case when doing their particular regexp operations.
+@xref{Case-sensitivity, ,Case-sensitivity in Matching}.
+
+If @code{gawk} is in compatibility mode
+(@pxref{Options, ,Command Line Options}),
+then @code{IGNORECASE} has no special meaning, and string
+and regexp operations are always case-sensitive.
+
+@vindex OFMT
+@item OFMT
+This string controls conversion of numbers to
+strings (@pxref{Conversion, ,Conversion of Strings and Numbers}) for
+printing with the @code{print} statement. It works by being passed, in
+effect, as the first argument to the @code{sprintf} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+Its default value is @code{"%.6g"}. Earlier versions of @code{awk}
+also used @code{OFMT} to specify the format for converting numbers to
+strings in general expressions; this is now done by @code{CONVFMT}.
+
+@vindex OFS
+@item OFS
+This is the output field separator (@pxref{Output Separators}). It is
+output between the fields output by a @code{print} statement. Its
+default value is @w{@code{" "}}, a string consisting of a single space.
+
+@vindex ORS
+@item ORS
+This is the output record separator. It is output at the end of every
+@code{print} statement. Its default value is @code{"\n"}.
+(@xref{Output Separators}.)
+
+@vindex RS
+@item RS
+This is @code{awk}'s input record separator. Its default value is a string
+containing a single newline character, which means that an input record
+consists of a single line of text.
+It can also be the null string, in which case records are separated by
+runs of blank lines, or a regexp, in which case records are separated by
+matches of the regexp in the input text.
+(@xref{Records, ,How Input is Split into Records}.)
+
+@vindex SUBSEP
+@item SUBSEP
+@code{SUBSEP} is the subscript separator. It has the default value of
+@code{"\034"}, and is used to separate the parts of the indices of a
+multi-dimensional array. Thus, the expression @code{@w{foo["A", "B"]}}
+really accesses @code{foo["A\034B"]}
+(@pxref{Multi-dimensional, ,Multi-dimensional Arrays}).
+@end table
+
+@node Auto-set, ARGC and ARGV, User-modified, Built-in Variables
+@section Built-in Variables that Convey Information
+@cindex built-in variables, convey information
+
+This is an alphabetical list of the variables that are set
+automatically by @code{awk} on certain occasions in order to provide
+information to your program. Those variables that are specific to
+@code{gawk} are marked with an asterisk, @samp{*}.
+
+@table @code
+@vindex ARGC
+@vindex ARGV
+@item ARGC
+@itemx ARGV
+The command-line arguments available to @code{awk} programs are stored in
+an array called @code{ARGV}. @code{ARGC} is the number of command-line
+arguments present. @xref{Other Arguments, ,Other Command Line Arguments}.
+Unlike most @code{awk} arrays,
+@code{ARGV} is indexed from zero to @code{ARGC} @minus{} 1. For example:
+
+@example
+@group
+$ awk 'BEGIN @{
+> for (i = 0; i < ARGC; i++)
+> print ARGV[i]
+> @}' inventory-shipped BBS-list
+@print{} awk
+@print{} inventory-shipped
+@print{} BBS-list
+@end group
+@end example
+
+@noindent
+In this example, @code{ARGV[0]} contains @code{"awk"}, @code{ARGV[1]}
+contains @code{"inventory-shipped"}, and @code{ARGV[2]} contains
+@code{"BBS-list"}. The value of @code{ARGC} is three, one more than the
+index of the last element in @code{ARGV}, since the elements are numbered
+from zero.
+
+The names @code{ARGC} and @code{ARGV}, as well as the convention of indexing
+the array from zero to @code{ARGC} @minus{} 1, are derived from the C language's
+method of accessing command line arguments.
+@xref{ARGC and ARGV, , Using @code{ARGC} and @code{ARGV}}, for information
+about how @code{awk} uses these variables.
+
+@vindex ARGIND
+@item ARGIND *
+The index in @code{ARGV} of the current file being processed.
+Every time @code{gawk} opens a new data file for processing, it sets
+@code{ARGIND} to the index in @code{ARGV} of the file name.
+When @code{gawk} is processing the input files, it is always
+true that @samp{FILENAME == ARGV[ARGIND]}.
+
+This variable is useful in file processing; it allows you to tell how far
+along you are in the list of data files, and to distinguish between
+successive instances of the same filename on the command line.
+
+While you can change the value of @code{ARGIND} within your @code{awk}
+program, @code{gawk} will automatically set it to a new value when the
+next file is opened.
+
+This variable is a @code{gawk} extension. In other @code{awk} implementations,
+or if @code{gawk} is in compatibility mode
+(@pxref{Options, ,Command Line Options}),
+it is not special.
+
+@vindex ENVIRON
+@item ENVIRON
+An associative array that contains the values of the environment. The array
+indices are the environment variable names; the values are the values of
+the particular environment variables. For example,
+@code{ENVIRON["HOME"]} might be @file{/home/arnold}. Changing this array
+does not affect the environment passed on to any programs that
+@code{awk} may spawn via redirection or the @code{system} function.
+(In a future version of @code{gawk}, it may do so.)
+
+Some operating systems may not have environment variables.
+On such systems, the @code{ENVIRON} array is empty (except for
+@w{@code{ENVIRON["AWKPATH"]}}).
+
+@vindex ERRNO
+@item ERRNO *
+If a system error occurs either doing a redirection for @code{getline},
+during a read for @code{getline}, or during a @code{close} operation,
+then @code{ERRNO} will contain a string describing the error.
+
+This variable is a @code{gawk} extension. In other @code{awk} implementations,
+or if @code{gawk} is in compatibility mode
+(@pxref{Options, ,Command Line Options}),
+it is not special.
+
+@cindex dark corner
+@vindex FILENAME
+@item FILENAME
+This is the name of the file that @code{awk} is currently reading.
+When no data files are listed on the command line, @code{awk} reads
+from the standard input, and @code{FILENAME} is set to @code{"-"}.
+@code{FILENAME} is changed each time a new file is read
+(@pxref{Reading Files, ,Reading Input Files}).
+Inside a @code{BEGIN} rule, the value of @code{FILENAME} is
+@code{""}, since there are no input files being processed
+yet.@footnote{Some early implementations of Unix @code{awk} initialized
+@code{FILENAME} to @code{"-"}, even if there were data files to be
+processed. This behavior was incorrect, and should not be relied
+upon in your programs.} (d.c.)
+
+@vindex FNR
+@item FNR
+@code{FNR} is the current record number in the current file. @code{FNR} is
+incremented each time a new record is read
+(@pxref{Getline, ,Explicit Input with @code{getline}}). It is reinitialized
+to zero each time a new input file is started.
+
+@vindex NF
+@item NF
+@code{NF} is the number of fields in the current input record.
+@code{NF} is set each time a new record is read, when a new field is
+created, or when @code{$0} changes (@pxref{Fields, ,Examining Fields}).
+
+@vindex NR
+@item NR
+This is the number of input records @code{awk} has processed since
+the beginning of the program's execution
+(@pxref{Records, ,How Input is Split into Records}).
+@code{NR} is set each time a new record is read.
+
+@vindex RLENGTH
+@item RLENGTH
+@code{RLENGTH} is the length of the substring matched by the
+@code{match} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+@code{RLENGTH} is set by invoking the @code{match} function. Its value
+is the length of the matched string, or @minus{}1 if no match was found.
+
+@vindex RSTART
+@item RSTART
+@code{RSTART} is the start-index in characters of the substring matched by the
+@code{match} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+@code{RSTART} is set by invoking the @code{match} function. Its value
+is the position of the string where the matched substring starts, or zero
+if no match was found.
+
+@vindex RT
+@item RT *
+@code{RT} is set each time a record is read. It contains the input text
+that matched the text denoted by @code{RS}, the record separator.
+
+This variable is a @code{gawk} extension. In other @code{awk} implementations,
+or if @code{gawk} is in compatibility mode
+(@pxref{Options, ,Command Line Options}),
+it is not special.
+@end table
+
+@cindex dark corner
+A side note about @code{NR} and @code{FNR}.
+@code{awk} simply increments both of these variables
+each time it reads a record, instead of setting them to the absolute
+value of the number of records read. This means that your program can
+change these variables, and their new values will be incremented for
+each record (d.c.). For example:
+
+@example
+@group
+$ echo '1
+> 2
+> 3
+> 4' | awk 'NR == 2 @{ NR = 17 @}
+> @{ print NR @}'
+@print{} 1
+@print{} 17
+@print{} 18
+@print{} 19
+@end group
+@end example
+
+@noindent
+Before @code{FNR} was added to the @code{awk} language
+(@pxref{V7/SVR3.1, ,Major Changes between V7 and SVR3.1}),
+many @code{awk} programs used this feature to track the number of
+records in a file by resetting @code{NR} to zero when @code{FILENAME}
+changed.
+
+@node ARGC and ARGV, , Auto-set, Built-in Variables
+@section Using @code{ARGC} and @code{ARGV}
+
+In @ref{Auto-set, , Built-in Variables that Convey Information},
+you saw this program describing the information contained in @code{ARGC}
+and @code{ARGV}:
+
+@example
+@group
+$ awk 'BEGIN @{
+> for (i = 0; i < ARGC; i++)
+> print ARGV[i]
+> @}' inventory-shipped BBS-list
+@print{} awk
+@print{} inventory-shipped
+@print{} BBS-list
+@end group
+@end example
+
+@noindent
+In this example, @code{ARGV[0]} contains @code{"awk"}, @code{ARGV[1]}
+contains @code{"inventory-shipped"}, and @code{ARGV[2]} contains
+@code{"BBS-list"}.
+
+Notice that the @code{awk} program is not entered in @code{ARGV}. The
+other special command line options, with their arguments, are also not
+entered. But variable assignments on the command line @emph{are}
+treated as arguments, and do show up in the @code{ARGV} array.
+
+Your program can alter @code{ARGC} and the elements of @code{ARGV}.
+Each time @code{awk} reaches the end of an input file, it uses the next
+element of @code{ARGV} as the name of the next input file. By storing a
+different string there, your program can change which files are read.
+You can use @code{"-"} to represent the standard input. By storing
+additional elements and incrementing @code{ARGC} you can cause
+additional files to be read.
+
+If you decrease the value of @code{ARGC}, that eliminates input files
+from the end of the list. By recording the old value of @code{ARGC}
+elsewhere, your program can treat the eliminated arguments as
+something other than file names.
+
+To eliminate a file from the middle of the list, store the null string
+(@code{""}) into @code{ARGV} in place of the file's name. As a
+special feature, @code{awk} ignores file names that have been
+replaced with the null string.
+You may also use the @code{delete} statement to remove elements from
+@code{ARGV} (@pxref{Delete, ,The @code{delete} Statement}).
+
+All of these actions are typically done from the @code{BEGIN} rule,
+before actual processing of the input begins.
+@xref{Split Program, ,Splitting a Large File Into Pieces}, and see
+@ref{Tee Program, ,Duplicating Output Into Multiple Files}, for an example
+of each way of removing elements from @code{ARGV}.
+
+The following fragment processes @code{ARGV} in order to examine, and
+then remove, command line options.
+
+@example
+@group
+BEGIN @{
+ for (i = 1; i < ARGC; i++) @{
+ if (ARGV[i] == "-v")
+ verbose = 1
+ else if (ARGV[i] == "-d")
+ debug = 1
+@end group
+@group
+ else if (ARGV[i] ~ /^-?/) @{
+ e = sprintf("%s: unrecognized option -- %c",
+ ARGV[0], substr(ARGV[i], 1, ,1))
+ print e > "/dev/stderr"
+ @} else
+ break
+ delete ARGV[i]
+ @}
+@}
+@end group
+@end example
+
+@node Arrays, Built-in, Built-in Variables, Top
+@chapter Arrays in @code{awk}
+
+An @dfn{array} is a table of values, called @dfn{elements}. The
+elements of an array are distinguished by their indices. @dfn{Indices}
+may be either numbers or strings. @code{awk} maintains a single set
+of names that may be used for naming variables, arrays and functions
+(@pxref{User-defined, ,User-defined Functions}).
+Thus, you cannot have a variable and an array with the same name in the
+same @code{awk} program.
+
+@menu
+* Array Intro:: Introduction to Arrays
+* Reference to Elements:: How to examine one element of an array.
+* Assigning Elements:: How to change an element of an array.
+* Array Example:: Basic Example of an Array
+* Scanning an Array:: A variation of the @code{for} statement. It
+ loops through the indices of an array's
+ existing elements.
+* Delete:: The @code{delete} statement removes an element
+ from an array.
+* Numeric Array Subscripts:: How to use numbers as subscripts in
+ @code{awk}.
+* Uninitialized Subscripts:: Using Uninitialized variables as subscripts.
+* Multi-dimensional:: Emulating multi-dimensional arrays in
+ @code{awk}.
+* Multi-scanning:: Scanning multi-dimensional arrays.
+@end menu
+
+@node Array Intro, Reference to Elements, Arrays, Arrays
+@section Introduction to Arrays
+
+@cindex arrays
+The @code{awk} language provides one-dimensional @dfn{arrays} for storing groups
+of related strings or numbers.
+
+Every @code{awk} array must have a name. Array names have the same
+syntax as variable names; any valid variable name would also be a valid
+array name. But you cannot use one name in both ways (as an array and
+as a variable) in one @code{awk} program.
+
+Arrays in @code{awk} superficially resemble arrays in other programming
+languages; but there are fundamental differences. In @code{awk}, you
+don't need to specify the size of an array before you start to use it.
+Additionally, any number or string in @code{awk} may be used as an
+array index, not just consecutive integers.
+
+In most other languages, you have to @dfn{declare} an array and specify
+how many elements or components it contains. In such languages, the
+declaration causes a contiguous block of memory to be allocated for that
+many elements. An index in the array usually must be a positive integer; for
+example, the index zero specifies the first element in the array, which is
+actually stored at the beginning of the block of memory. Index one
+specifies the second element, which is stored in memory right after the
+first element, and so on. It is impossible to add more elements to the
+array, because it has room for only as many elements as you declared.
+(Some languages allow arbitrary starting and ending indices,
+e.g., @samp{15 .. 27}, but the size of the array is still fixed when
+the array is declared.)
+
+A contiguous array of four elements might look like this,
+conceptually, if the element values are eight, @code{"foo"},
+@code{""} and 30:
+
+@iftex
+@c from Karl Berry, much thanks for the help.
+@tex
+\bigskip % space above the table (about 1 linespace)
+\offinterlineskip
+\newdimen\width \width = 1.5cm
+\newdimen\hwidth \hwidth = 4\width \advance\hwidth by 2pt % 5 * 0.4pt
+\centerline{\vbox{
+\halign{\strut\hfil\ignorespaces#&&\vrule#&\hbox to\width{\hfil#\unskip\hfil}\cr
+\noalign{\hrule width\hwidth}
+ &&{\tt 8} &&{\tt "foo"} &&{\tt ""} &&{\tt 30} &&\quad value\cr
+\noalign{\hrule width\hwidth}
+\noalign{\smallskip}
+ &\omit&0&\omit &1 &\omit&2 &\omit&3 &\omit&\quad index\cr
+}
+}}
+@end tex
+@end iftex
+@ifinfo
+@example
++---------+---------+--------+---------+
+| 8 | "foo" | "" | 30 | @r{value}
++---------+---------+--------+---------+
+ 0 1 2 3 @r{index}
+@end example
+@end ifinfo
+
+@noindent
+Only the values are stored; the indices are implicit from the order of
+the values. Eight is the value at index zero, because eight appears in the
+position with zero elements before it.
+
+@cindex arrays, definition of
+@cindex associative arrays
+@cindex arrays, associative
+Arrays in @code{awk} are different: they are @dfn{associative}. This means
+that each array is a collection of pairs: an index, and its corresponding
+array element value:
+
+@example
+@r{Element} 4 @r{Value} 30
+@r{Element} 2 @r{Value} "foo"
+@r{Element} 1 @r{Value} 8
+@r{Element} 3 @r{Value} ""
+@end example
+
+@noindent
+We have shown the pairs in jumbled order because their order is irrelevant.
+
+One advantage of associative arrays is that new pairs can be added
+at any time. For example, suppose we add to the above array a tenth element
+whose value is @w{@code{"number ten"}}. The result is this:
+
+@example
+@r{Element} 10 @r{Value} "number ten"
+@r{Element} 4 @r{Value} 30
+@r{Element} 2 @r{Value} "foo"
+@r{Element} 1 @r{Value} 8
+@r{Element} 3 @r{Value} ""
+@end example
+
+@noindent
+@cindex sparse arrays
+@cindex arrays, sparse
+Now the array is @dfn{sparse}, which just means some indices are missing:
+it has elements 1--4 and 10, but doesn't have elements 5, 6, 7, 8, or 9.
+@c ok, I should spell out the above, but ...
+
+Another consequence of associative arrays is that the indices don't
+have to be positive integers. Any number, or even a string, can be
+an index. For example, here is an array which translates words from
+English into French:
+
+@example
+@r{Element} "dog" @r{Value} "chien"
+@r{Element} "cat" @r{Value} "chat"
+@r{Element} "one" @r{Value} "un"
+@r{Element} 1 @r{Value} "un"
+@end example
+
+@noindent
+Here we decided to translate the number one in both spelled-out and
+numeric form---thus illustrating that a single array can have both
+numbers and strings as indices.
+(In fact, array subscripts are always strings; this is discussed
+in more detail in
+@ref{Numeric Array Subscripts, ,Using Numbers to Subscript Arrays}.)
+
+When @code{awk} creates an array for you, e.g., with the @code{split}
+built-in function,
+that array's indices are consecutive integers starting at one.
+(@xref{String Functions, ,Built-in Functions for String Manipulation}.)
+
+@node Reference to Elements, Assigning Elements, Array Intro, Arrays
+@section Referring to an Array Element
+@cindex array reference
+@cindex element of array
+@cindex reference to array
+
+The principal way of using an array is to refer to one of its elements.
+An array reference is an expression which looks like this:
+
+@example
+@var{array}[@var{index}]
+@end example
+
+@noindent
+Here, @var{array} is the name of an array. The expression @var{index} is
+the index of the element of the array that you want.
+
+The value of the array reference is the current value of that array
+element. For example, @code{foo[4.3]} is an expression for the element
+of array @code{foo} at index @samp{4.3}.
+
+If you refer to an array element that has no recorded value, the value
+of the reference is @code{""}, the null string. This includes elements
+to which you have not assigned any value, and elements that have been
+deleted (@pxref{Delete, ,The @code{delete} Statement}). Such a reference
+automatically creates that array element, with the null string as its value.
+(In some cases, this is unfortunate, because it might waste memory inside
+@code{awk}.)
+
+@cindex arrays, presence of elements
+@cindex arrays, the @code{in} operator
+You can find out if an element exists in an array at a certain index with
+the expression:
+
+@example
+@var{index} in @var{array}
+@end example
+
+@noindent
+This expression tests whether or not the particular index exists,
+without the side effect of creating that element if it is not present.
+The expression has the value one (true) if @code{@var{array}[@var{index}]}
+exists, and zero (false) if it does not exist.
+
+For example, to test whether the array @code{frequencies} contains the
+index @samp{2}, you could write this statement:
+
+@example
+if (2 in frequencies)
+ print "Subscript 2 is present."
+@end example
+
+Note that this is @emph{not} a test of whether or not the array
+@code{frequencies} contains an element whose @emph{value} is two.
+(There is no way to do that except to scan all the elements.) Also, this
+@emph{does not} create @code{frequencies[2]}, while the following
+(incorrect) alternative would do so:
+
+@example
+if (frequencies[2] != "")
+ print "Subscript 2 is present."
+@end example
+
+@node Assigning Elements, Array Example, Reference to Elements, Arrays
+@section Assigning Array Elements
+@cindex array assignment
+@cindex element assignment
+
+Array elements are lvalues: they can be assigned values just like
+@code{awk} variables:
+
+@example
+@var{array}[@var{subscript}] = @var{value}
+@end example
+
+@noindent
+Here @var{array} is the name of your array. The expression
+@var{subscript} is the index of the element of the array that you want
+to assign a value. The expression @var{value} is the value you are
+assigning to that element of the array.
+
+@node Array Example, Scanning an Array, Assigning Elements, Arrays
+@section Basic Array Example
+
+The following program takes a list of lines, each beginning with a line
+number, and prints them out in order of line number. The line numbers are
+not in order, however, when they are first read: they are scrambled. This
+program sorts the lines by making an array using the line numbers as
+subscripts. It then prints out the lines in sorted order of their numbers.
+It is a very simple program, and gets confused if it encounters repeated
+numbers, gaps, or lines that don't begin with a number.
+
+@example
+@c file eg/misc/arraymax.awk
+@{
+ if ($1 > max)
+ max = $1
+ arr[$1] = $0
+@}
+
+END @{
+ for (x = 1; x <= max; x++)
+ print arr[x]
+@}
+@c endfile
+@end example
+
+The first rule keeps track of the largest line number seen so far;
+it also stores each line into the array @code{arr}, at an index that
+is the line's number.
+
+The second rule runs after all the input has been read, to print out
+all the lines.
+
+When this program is run with the following input:
+
+@example
+@group
+@c file eg/misc/arraymax.data
+5 I am the Five man
+2 Who are you? The new number two!
+4 . . . And four on the floor
+1 Who is number one?
+3 I three you.
+@c endfile
+@end group
+@end example
+
+@noindent
+its output is this:
+
+@example
+1 Who is number one?
+2 Who are you? The new number two!
+3 I three you.
+4 . . . And four on the floor
+5 I am the Five man
+@end example
+
+If a line number is repeated, the last line with a given number overrides
+the others.
+
+Gaps in the line numbers can be handled with an easy improvement to the
+program's @code{END} rule:
+
+@example
+END @{
+ for (x = 1; x <= max; x++)
+ if (x in arr)
+ print arr[x]
+@}
+@end example
+
+@node Scanning an Array, Delete, Array Example, Arrays
+@section Scanning All Elements of an Array
+@cindex @code{for (x in @dots{})}
+@cindex arrays, special @code{for} statement
+@cindex scanning an array
+
+In programs that use arrays, you often need a loop that executes
+once for each element of an array. In other languages, where arrays are
+contiguous and indices are limited to positive integers, this is
+easy: you can
+find all the valid indices by counting from the lowest index
+up to the highest. This
+technique won't do the job in @code{awk}, since any number or string
+can be an array index. So @code{awk} has a special kind of @code{for}
+statement for scanning an array:
+
+@example
+for (@var{var} in @var{array})
+ @var{body}
+@end example
+
+@noindent
+This loop executes @var{body} once for each index in @var{array} that your
+program has previously used, with the
+variable @var{var} set to that index.
+
+Here is a program that uses this form of the @code{for} statement. The
+first rule scans the input records and notes which words appear (at
+least once) in the input, by storing a one into the array @code{used} with
+the word as index. The second rule scans the elements of @code{used} to
+find all the distinct words that appear in the input. It prints each
+word that is more than 10 characters long, and also prints the number of
+such words. @xref{String Functions, ,Built-in Functions for String Manipulation}, for more information
+on the built-in function @code{length}.
+
+@example
+# Record a 1 for each word that is used at least once.
+@{
+ for (i = 1; i <= NF; i++)
+ used[$i] = 1
+@}
+
+# Find number of distinct words more than 10 characters long.
+END @{
+ for (x in used)
+ if (length(x) > 10) @{
+ ++num_long_words
+ print x
+ @}
+ print num_long_words, "words longer than 10 characters"
+@}
+@end example
+
+@noindent
+@xref{Word Sorting, ,Generating Word Usage Counts},
+for a more detailed example of this type.
+
+The order in which elements of the array are accessed by this statement
+is determined by the internal arrangement of the array elements within
+@code{awk} and cannot be controlled or changed. This can lead to
+problems if new elements are added to @var{array} by statements in
+the loop body; you cannot predict whether or not the @code{for} loop will
+reach them. Similarly, changing @var{var} inside the loop may produce
+strange results. It is best to avoid such things.
+
+@node Delete, Numeric Array Subscripts, Scanning an Array, Arrays
+@section The @code{delete} Statement
+@cindex @code{delete} statement
+@cindex deleting elements of arrays
+@cindex removing elements of arrays
+@cindex arrays, deleting an element
+
+You can remove an individual element of an array using the @code{delete}
+statement:
+
+@example
+delete @var{array}[@var{index}]
+@end example
+
+Once you have deleted an array element, you can no longer obtain any
+value the element once had. It is as if you had never referred
+to it and had never given it any value.
+
+Here is an example of deleting elements in an array:
+
+@example
+for (i in frequencies)
+ delete frequencies[i]
+@end example
+
+@noindent
+This example removes all the elements from the array @code{frequencies}.
+
+If you delete an element, a subsequent @code{for} statement to scan the array
+will not report that element, and the @code{in} operator to check for
+the presence of that element will return zero (i.e.@: false):
+
+@example
+delete foo[4]
+if (4 in foo)
+ print "This will never be printed"
+@end example
+
+It is important to note that deleting an element is @emph{not} the
+same as assigning it a null value (the empty string, @code{""}).
+
+@example
+foo[4] = ""
+if (4 in foo)
+ print "This is printed, even though foo[4] is empty"
+@end example
+
+It is not an error to delete an element that does not exist.
+
+@cindex arrays, deleting entire contents
+@cindex deleting entire arrays
+@cindex differences between @code{gawk} and @code{awk}
+You can delete all the elements of an array with a single statement,
+by leaving off the subscript in the @code{delete} statement.
+
+@example
+delete @var{array}
+@end example
+
+This ability is a @code{gawk} extension; it is not available in
+compatibility mode (@pxref{Options, ,Command Line Options}).
+
+Using this version of the @code{delete} statement is about three times
+more efficient than the equivalent loop that deletes each element one
+at a time.
+
+@cindex portability issues
+The following statement provides a portable, but non-obvious way to clear
+out an array.
+
+@cindex Brennan, Michael
+@example
+@group
+# thanks to Michael Brennan for pointing this out
+split("", array)
+@end group
+@end example
+
+The @code{split} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation})
+clears out the target array first. This call asks it to split
+apart the null string. Since there is no data to split out, the
+function simply clears the array and then returns.
+
+@node Numeric Array Subscripts, Uninitialized Subscripts, Delete, Arrays
+@section Using Numbers to Subscript Arrays
+
+An important aspect of arrays to remember is that @emph{array subscripts
+are always strings}. If you use a numeric value as a subscript,
+it will be converted to a string value before it is used for subscripting
+(@pxref{Conversion, ,Conversion of Strings and Numbers}).
+
+@cindex conversions, during subscripting
+@cindex numbers, used as subscripts
+@vindex CONVFMT
+This means that the value of the built-in variable @code{CONVFMT} can potentially
+affect how your program accesses elements of an array. For example:
+
+@example
+xyz = 12.153
+data[xyz] = 1
+CONVFMT = "%2.2f"
+@group
+if (xyz in data)
+ printf "%s is in data\n", xyz
+else
+ printf "%s is not in data\n", xyz
+@end group
+@end example
+
+@noindent
+This prints @samp{12.15 is not in data}. The first statement gives
+@code{xyz} a numeric value. Assigning to
+@code{data[xyz]} subscripts @code{data} with the string value @code{"12.153"}
+(using the default conversion value of @code{CONVFMT}, @code{"%.6g"}),
+and assigns one to @code{data["12.153"]}. The program then changes
+the value of @code{CONVFMT}. The test @samp{(xyz in data)} generates a new
+string value from @code{xyz}, this time @code{"12.15"}, since the value of
+@code{CONVFMT} only allows two significant digits. This test fails,
+since @code{"12.15"} is a different string from @code{"12.153"}.
+
+According to the rules for conversions
+(@pxref{Conversion, ,Conversion of Strings and Numbers}), integer
+values are always converted to strings as integers, no matter what the
+value of @code{CONVFMT} may happen to be. So the usual case of:
+
+@example
+for (i = 1; i <= maxsub; i++)
+ @i{do something with} array[i]
+@end example
+
+@noindent
+will work, no matter what the value of @code{CONVFMT}.
+
+Like many things in @code{awk}, the majority of the time things work
+as you would expect them to work. But it is useful to have a precise
+knowledge of the actual rules, since sometimes they can have a subtle
+effect on your programs.
+
+@node Uninitialized Subscripts, Multi-dimensional, Numeric Array Subscripts, Arrays
+@section Using Uninitialized Variables as Subscripts
+
+@cindex uninitialized variables, as array subscripts
+@cindex array subscripts, uninitialized variables
+Suppose you want to print your input data in reverse order.
+A reasonable attempt at a program to do so (with some test
+data) might look like this:
+
+@example
+$ echo 'line 1
+> line 2
+> line 3' | awk '@{ l[lines] = $0; ++lines @}
+> END @{
+> for (i = lines-1; i >= 0; --i)
+> print l[i]
+> @}'
+@print{} line 3
+@print{} line 2
+@end example
+
+Unfortunately, the very first line of input data did not come out in the
+output!
+
+At first glance, this program should have worked. The variable @code{lines}
+is uninitialized, and uninitialized variables have the numeric value zero.
+So, the value of @code{l[0]} should have been printed.
+
+The issue here is that subscripts for @code{awk} arrays are @strong{always}
+strings. And uninitialized variables, when used as strings, have the
+value @code{""}, not zero. Thus, @samp{line 1} ended up stored in
+@code{l[""]}.
+
+The following version of the program works correctly:
+
+@example
+@{ l[lines++] = $0 @}
+END @{
+ for (i = lines - 1; i >= 0; --i)
+ print l[i]
+@}
+@end example
+
+Here, the @samp{++} forces @code{l} to be numeric, thus making
+the ``old value'' numeric zero, which is then converted to @code{"0"}
+as the array subscript.
+
+@cindex null string, as array subscript
+@cindex dark corner
+As we have just seen, even though it is somewhat unusual, the null string
+(@code{""}) is a valid array subscript (d.c.). If @samp{--lint} is provided
+on the command line (@pxref{Options, ,Command Line Options}),
+@code{gawk} will warn about the use of the null string as a subscript.
+
+@node Multi-dimensional, Multi-scanning, Uninitialized Subscripts, Arrays
+@section Multi-dimensional Arrays
+
+@cindex subscripts in arrays
+@cindex arrays, multi-dimensional subscripts
+@cindex multi-dimensional subscripts
+A multi-dimensional array is an array in which an element is identified
+by a sequence of indices, instead of a single index. For example, a
+two-dimensional array requires two indices. The usual way (in most
+languages, including @code{awk}) to refer to an element of a
+two-dimensional array named @code{grid} is with
+@code{grid[@var{x},@var{y}]}.
+
+@vindex SUBSEP
+Multi-dimensional arrays are supported in @code{awk} through
+concatenation of indices into one string. What happens is that
+@code{awk} converts the indices into strings
+(@pxref{Conversion, ,Conversion of Strings and Numbers}) and
+concatenates them together, with a separator between them. This creates
+a single string that describes the values of the separate indices. The
+combined string is used as a single index into an ordinary,
+one-dimensional array. The separator used is the value of the built-in
+variable @code{SUBSEP}.
+
+For example, suppose we evaluate the expression @samp{foo[5,12] = "value"}
+when the value of @code{SUBSEP} is @code{"@@"}. The numbers five and 12 are
+converted to strings and
+concatenated with an @samp{@@} between them, yielding @code{"5@@12"}; thus,
+the array element @code{foo["5@@12"]} is set to @code{"value"}.
+
+Once the element's value is stored, @code{awk} has no record of whether
+it was stored with a single index or a sequence of indices. The two
+expressions @samp{foo[5,12]} and @w{@samp{foo[5 SUBSEP 12]}} are always
+equivalent.
+
+The default value of @code{SUBSEP} is the string @code{"\034"},
+which contains a non-printing character that is unlikely to appear in an
+@code{awk} program or in most input data.
+
+The usefulness of choosing an unlikely character comes from the fact
+that index values that contain a string matching @code{SUBSEP} lead to
+combined strings that are ambiguous. Suppose that @code{SUBSEP} were
+@code{"@@"}; then @w{@samp{foo["a@@b", "c"]}} and @w{@samp{foo["a",
+"b@@c"]}} would be indistinguishable because both would actually be
+stored as @samp{foo["a@@b@@c"]}.
+
+You can test whether a particular index-sequence exists in a
+``multi-dimensional'' array with the same operator @samp{in} used for single
+dimensional arrays. Instead of a single index as the left-hand operand,
+write the whole sequence of indices, separated by commas, in
+parentheses:
+
+@example
+(@var{subscript1}, @var{subscript2}, @dots{}) in @var{array}
+@end example
+
+The following example treats its input as a two-dimensional array of
+fields; it rotates this array 90 degrees clockwise and prints the
+result. It assumes that all lines have the same number of
+elements.
+
+@example
+@group
+awk '@{
+ if (max_nf < NF)
+ max_nf = NF
+ max_nr = NR
+ for (x = 1; x <= NF; x++)
+ vector[x, NR] = $x
+@}
+@end group
+
+@group
+END @{
+ for (x = 1; x <= max_nf; x++) @{
+ for (y = max_nr; y >= 1; --y)
+ printf("%s ", vector[x, y])
+ printf("\n")
+ @}
+@}'
+@end group
+@end example
+
+@noindent
+When given the input:
+
+@example
+@group
+1 2 3 4 5 6
+2 3 4 5 6 1
+3 4 5 6 1 2
+4 5 6 1 2 3
+@end group
+@end example
+
+@noindent
+it produces:
+
+@example
+@group
+4 3 2 1
+5 4 3 2
+6 5 4 3
+1 6 5 4
+2 1 6 5
+3 2 1 6
+@end group
+@end example
+
+@node Multi-scanning, , Multi-dimensional, Arrays
+@section Scanning Multi-dimensional Arrays
+
+There is no special @code{for} statement for scanning a
+``multi-dimensional'' array; there cannot be one, because in truth there
+are no multi-dimensional arrays or elements; there is only a
+multi-dimensional @emph{way of accessing} an array.
+
+However, if your program has an array that is always accessed as
+multi-dimensional, you can get the effect of scanning it by combining
+the scanning @code{for} statement
+(@pxref{Scanning an Array, ,Scanning All Elements of an Array}) with the
+@code{split} built-in function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+It works like this:
+
+@example
+for (combined in array) @{
+ split(combined, separate, SUBSEP)
+ @dots{}
+@}
+@end example
+
+@noindent
+This sets @code{combined} to
+each concatenated, combined index in the array, and splits it
+into the individual indices by breaking it apart where the value of
+@code{SUBSEP} appears. The split-out indices become the elements of
+the array @code{separate}.
+
+Thus, suppose you have previously stored a value in @code{array[1, "foo"]};
+then an element with index @code{"1\034foo"} exists in
+@code{array}. (Recall that the default value of @code{SUBSEP} is
+the character with code 034.) Sooner or later the @code{for} statement
+will find that index and do an iteration with @code{combined} set to
+@code{"1\034foo"}. Then the @code{split} function is called as
+follows:
+
+@example
+split("1\034foo", separate, "\034")
+@end example
+
+@noindent
+The result of this is to set @code{separate[1]} to @code{"1"} and
+@code{separate[2]} to @code{"foo"}. Presto, the original sequence of
+separate indices has been recovered.
+
+@node Built-in, User-defined, Arrays, Top
+@chapter Built-in Functions
+
+@c 2e: USE TEXINFO-2 FUNCTION DEFINITION STUFF!!!!!!!!!!!!!
+@cindex built-in functions
+@dfn{Built-in} functions are functions that are always available for
+your @code{awk} program to call. This chapter defines all the built-in
+functions in @code{awk}; some of them are mentioned in other sections,
+but they are summarized here for your convenience. (You can also define
+new functions yourself. @xref{User-defined, ,User-defined Functions}.)
+
+@menu
+* Calling Built-in:: How to call built-in functions.
+* Numeric Functions:: Functions that work with numbers, including
+ @code{int}, @code{sin} and @code{rand}.
+* String Functions:: Functions for string manipulation, such as
+ @code{split}, @code{match}, and
+ @code{sprintf}.
+* I/O Functions:: Functions for files and shell commands.
+* Time Functions:: Functions for dealing with time stamps.
+@end menu
+
+@node Calling Built-in, Numeric Functions, Built-in, Built-in
+@section Calling Built-in Functions
+
+To call a built-in function, write the name of the function followed
+by arguments in parentheses. For example, @samp{atan2(y + z, 1)}
+is a call to the function @code{atan2}, with two arguments.
+
+Whitespace is ignored between the built-in function name and the
+open-parenthesis, but we recommend that you avoid using whitespace
+there. User-defined functions do not permit whitespace in this way, and
+you will find it easier to avoid mistakes by following a simple
+convention which always works: no whitespace after a function name.
+
+@cindex differences between @code{gawk} and @code{awk}
+Each built-in function accepts a certain number of arguments.
+In some cases, arguments can be omitted. The defaults for omitted
+arguments vary from function to function and are described under the
+individual functions. In some @code{awk} implementations, extra
+arguments given to built-in functions are ignored. However, in @code{gawk},
+it is a fatal error to give extra arguments to a built-in function.
+
+When a function is called, expressions that create the function's actual
+parameters are evaluated completely before the function call is performed.
+For example, in the code fragment:
+
+@example
+i = 4
+j = sqrt(i++)
+@end example
+
+@noindent
+the variable @code{i} is set to five before @code{sqrt} is called
+with a value of four for its actual parameter.
+
+@cindex evaluation, order of
+@cindex order of evaluation
+The order of evaluation of the expressions used for the function's
+parameters is undefined. Thus, you should not write programs that
+assume that parameters are evaluated from left to right or from
+right to left. For example,
+
+@example
+i = 5
+j = atan2(i++, i *= 2)
+@end example
+
+If the order of evaluation is left to right, then @code{i} first becomes
+six, and then 12, and @code{atan2} is called with the two arguments six
+and 12. But if the order of evaluation is right to left, @code{i}
+first becomes 10, and then 11, and @code{atan2} is called with the
+two arguments 11 and 10.
+
+@node Numeric Functions, String Functions, Calling Built-in, Built-in
+@section Numeric Built-in Functions
+
+Here is a full list of built-in functions that work with numbers.
+Optional parameters are enclosed in square brackets (``['' and ``]'').
+
+@table @code
+@item int(@var{x})
+@findex int
+This produces the nearest integer to @var{x}, located between @var{x} and zero,
+truncated toward zero.
+
+For example, @code{int(3)} is three, @code{int(3.9)} is three, @code{int(-3.9)}
+is @minus{}3, and @code{int(-3)} is @minus{}3 as well.
+
+@item sqrt(@var{x})
+@findex sqrt
+This gives you the positive square root of @var{x}. It reports an error
+if @var{x} is negative. Thus, @code{sqrt(4)} is two.
+
+@item exp(@var{x})
+@findex exp
+This gives you the exponential of @var{x} (@code{e ^ @var{x}}), or reports
+an error if @var{x} is out of range. The range of values @var{x} can have
+depends on your machine's floating point representation.
+
+@item log(@var{x})
+@findex log
+This gives you the natural logarithm of @var{x}, if @var{x} is positive;
+otherwise, it reports an error.
+
+@item sin(@var{x})
+@findex sin
+This gives you the sine of @var{x}, with @var{x} in radians.
+
+@item cos(@var{x})
+@findex cos
+This gives you the cosine of @var{x}, with @var{x} in radians.
+
+@item atan2(@var{y}, @var{x})
+@findex atan2
+This gives you the arctangent of @code{@var{y} / @var{x}} in radians.
+
+@item rand()
+@findex rand
+This gives you a random number. The values of @code{rand} are
+uniformly-distributed between zero and one.
+The value is never zero and never one.
+
+Often you want random integers instead. Here is a user-defined function
+you can use to obtain a random non-negative integer less than @var{n}:
+
+@example
+function randint(n) @{
+ return int(n * rand())
+@}
+@end example
+
+@noindent
+The multiplication produces a random real number greater than zero and less
+than @code{n}. We then make it an integer (using @code{int}) between zero
+and @code{n} @minus{} 1, inclusive.
+
+Here is an example where a similar function is used to produce
+random integers between one and @var{n}. This program
+prints a new random number for each input record.
+
+@example
+@group
+awk '
+# Function to roll a simulated die.
+function roll(n) @{ return 1 + int(rand() * n) @}
+@end group
+
+@group
+# Roll 3 six-sided dice and
+# print total number of points.
+@{
+ printf("%d points\n",
+ roll(6)+roll(6)+roll(6))
+@}'
+@end group
+@end example
+
+@cindex seed for random numbers
+@cindex random numbers, seed of
+@comment MAWK uses a different seed each time.
+@strong{Caution:} In most @code{awk} implementations, including @code{gawk},
+@code{rand} starts generating numbers from the same
+starting number, or @dfn{seed}, each time you run @code{awk}. Thus,
+a program will generate the same results each time you run it.
+The numbers are random within one @code{awk} run, but predictable
+from run to run. This is convenient for debugging, but if you want
+a program to do different things each time it is used, you must change
+the seed to a value that will be different in each run. To do this,
+use @code{srand}.
+
+@item srand(@r{[}@var{x}@r{]})
+@findex srand
+The function @code{srand} sets the starting point, or seed,
+for generating random numbers to the value @var{x}.
+
+Each seed value leads to a particular sequence of random
+numbers.@footnote{Computer generated random numbers really are not truly
+random. They are technically known as ``pseudo-random.'' This means
+that while the numbers in a sequence appear to be random, you can in
+fact generate the same sequence of random numbers over and over again.}
+Thus, if you set the seed to the same value a second time, you will get
+the same sequence of random numbers again.
+
+If you omit the argument @var{x}, as in @code{srand()}, then the current
+date and time of day are used for a seed. This is the way to get random
+numbers that are truly unpredictable.
+
+The return value of @code{srand} is the previous seed. This makes it
+easy to keep track of the seeds for use in consistently reproducing
+sequences of random numbers.
+@end table
+
+@node String Functions, I/O Functions, Numeric Functions, Built-in
+@section Built-in Functions for String Manipulation
+
+The functions in this section look at or change the text of one or more
+strings.
+Optional parameters are enclosed in square brackets (``['' and ``]'').
+
+@table @code
+@item index(@var{in}, @var{find})
+@findex index
+This searches the string @var{in} for the first occurrence of the string
+@var{find}, and returns the position in characters where that occurrence
+begins in the string @var{in}. For example:
+
+@example
+$ awk 'BEGIN @{ print index("peanut", "an") @}'
+@print{} 3
+@end example
+
+@noindent
+If @var{find} is not found, @code{index} returns zero.
+(Remember that string indices in @code{awk} start at one.)
+
+@item length(@r{[}@var{string}@r{]})
+@findex length
+This gives you the number of characters in @var{string}. If
+@var{string} is a number, the length of the digit string representing
+that number is returned. For example, @code{length("abcde")} is five. By
+contrast, @code{length(15 * 35)} works out to three. How? Well, 15 * 35 =
+525, and 525 is then converted to the string @code{"525"}, which has
+three characters.
+
+If no argument is supplied, @code{length} returns the length of @code{$0}.
+
+@cindex historical features
+@cindex portability issues
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+In older versions of @code{awk}, you could call the @code{length} function
+without any parentheses. Doing so is marked as ``deprecated'' in the
+POSIX standard. This means that while you can do this in your
+programs, it is a feature that can eventually be removed from a future
+version of the standard. Therefore, for maximal portability of your
+@code{awk} programs, you should always supply the parentheses.
+
+@item match(@var{string}, @var{regexp})
+@findex match
+The @code{match} function searches the string, @var{string}, for the
+longest, leftmost substring matched by the regular expression,
+@var{regexp}. It returns the character position, or @dfn{index}, of
+where that substring begins (one, if it starts at the beginning of
+@var{string}). If no match is found, it returns zero.
+
+@vindex RSTART
+@vindex RLENGTH
+The @code{match} function sets the built-in variable @code{RSTART} to
+the index. It also sets the built-in variable @code{RLENGTH} to the
+length in characters of the matched substring. If no match is found,
+@code{RSTART} is set to zero, and @code{RLENGTH} to @minus{}1.
+
+For example:
+
+@example
+@group
+@c file eg/misc/findpat.sh
+awk '@{
+ if ($1 == "FIND")
+ regex = $2
+ else @{
+ where = match($0, regex)
+ if (where != 0)
+ print "Match of", regex, "found at", \
+ where, "in", $0
+ @}
+@}'
+@c endfile
+@end group
+@end example
+
+@noindent
+This program looks for lines that match the regular expression stored in
+the variable @code{regex}. This regular expression can be changed. If the
+first word on a line is @samp{FIND}, @code{regex} is changed to be the
+second word on that line. Therefore, given:
+
+@example
+@c file eg/misc/findpat.data
+FIND ru+n
+My program runs
+but not very quickly
+FIND Melvin
+JF+KM
+This line is property of Reality Engineering Co.
+Melvin was here.
+@c endfile
+@end example
+
+@noindent
+@code{awk} prints:
+
+@example
+Match of ru+n found at 12 in My program runs
+Match of Melvin found at 1 in Melvin was here.
+@end example
+
+@item split(@var{string}, @var{array} @r{[}, @var{fieldsep}@r{]})
+@findex split
+This divides @var{string} into pieces separated by @var{fieldsep},
+and stores the pieces in @var{array}. The first piece is stored in
+@code{@var{array}[1]}, the second piece in @code{@var{array}[2]}, and so
+forth. The string value of the third argument, @var{fieldsep}, is
+a regexp describing where to split @var{string} (much as @code{FS} can
+be a regexp describing where to split input records). If
+the @var{fieldsep} is omitted, the value of @code{FS} is used.
+@code{split} returns the number of elements created.
+
+The @code{split} function splits strings into pieces in a
+manner similar to the way input lines are split into fields. For example:
+
+@example
+split("cul-de-sac", a, "-")
+@end example
+
+@noindent
+splits the string @samp{cul-de-sac} into three fields using @samp{-} as the
+separator. It sets the contents of the array @code{a} as follows:
+
+@example
+a[1] = "cul"
+a[2] = "de"
+a[3] = "sac"
+@end example
+
+@noindent
+The value returned by this call to @code{split} is three.
+
+As with input field-splitting, when the value of @var{fieldsep} is
+@w{@code{" "}}, leading and trailing whitespace is ignored, and the elements
+are separated by runs of whitespace.
+
+@cindex differences between @code{gawk} and @code{awk}
+Also as with input field-splitting, if @var{fieldsep} is the null string, each
+individual character in the string is split into its own array element.
+(This is a @code{gawk}-specific extension.)
+
+@cindex dark corner
+Recent implementations of @code{awk}, including @code{gawk}, allow
+the third argument to be a regexp constant (@code{/abc/}), as well as a
+string (d.c.). The POSIX standard allows this as well.
+
+Before splitting the string, @code{split} deletes any previously existing
+elements in the array @var{array} (d.c.).
+
+@item sprintf(@var{format}, @var{expression1},@dots{})
+@findex sprintf
+This returns (without printing) the string that @code{printf} would
+have printed out with the same arguments
+(@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing}).
+For example:
+
+@example
+sprintf("pi = %.2f (approx.)", 22/7)
+@end example
+
+@noindent
+returns the string @w{@code{"pi = 3.14 (approx.)"}}.
+
+@ignore
+2e: For sub, gsub, and gensub, either here or in the "how much matches"
+ section, we need some explanation that it is possible to match the
+ null string when using closures like *. E.g.,
+
+ $ echo abc | awk '{ gsub(/m*/, "X"); print }'
+ @print{} XaXbXc
+
+ Although this makes a certain amount of sense, it can be very
+ suprising.
+@end ignore
+
+@item sub(@var{regexp}, @var{replacement} @r{[}, @var{target}@r{]})
+@findex sub
+The @code{sub} function alters the value of @var{target}.
+It searches this value, which is treated as a string, for the
+leftmost longest substring matched by the regular expression, @var{regexp},
+extending this match as far as possible. Then the entire string is
+changed by replacing the matched text with @var{replacement}.
+The modified string becomes the new value of @var{target}.
+
+This function is peculiar because @var{target} is not simply
+used to compute a value, and not just any expression will do: it
+must be a variable, field or array element, so that @code{sub} can
+store a modified value there. If this argument is omitted, then the
+default is to use and alter @code{$0}.
+
+For example:
+
+@example
+str = "water, water, everywhere"
+sub(/at/, "ith", str)
+@end example
+
+@noindent
+sets @code{str} to @w{@code{"wither, water, everywhere"}}, by replacing the
+leftmost, longest occurrence of @samp{at} with @samp{ith}.
+
+The @code{sub} function returns the number of substitutions made (either
+one or zero).
+
+If the special character @samp{&} appears in @var{replacement}, it
+stands for the precise substring that was matched by @var{regexp}. (If
+the regexp can match more than one string, then this precise substring
+may vary.) For example:
+
+@example
+awk '@{ sub(/candidate/, "& and his wife"); print @}'
+@end example
+
+@noindent
+changes the first occurrence of @samp{candidate} to @samp{candidate
+and his wife} on each input line.
+
+Here is another example:
+
+@example
+awk 'BEGIN @{
+ str = "daabaaa"
+ sub(/a*/, "c&c", str)
+ print str
+@}'
+@print{} dcaacbaaa
+@end example
+
+@noindent
+This shows how @samp{&} can represent a non-constant string, and also
+illustrates the ``leftmost, longest'' rule in regexp matching
+(@pxref{Leftmost Longest, ,How Much Text Matches?}).
+
+The effect of this special character (@samp{&}) can be turned off by putting a
+backslash before it in the string. As usual, to insert one backslash in
+the string, you must write two backslashes. Therefore, write @samp{\\&}
+in a string constant to include a literal @samp{&} in the replacement.
+For example, here is how to replace the first @samp{|} on each line with
+an @samp{&}:
+
+@example
+awk '@{ sub(/\|/, "\\&"); print @}'
+@end example
+
+@strong{Note:} As mentioned above, the third argument to @code{sub} must
+be a variable, field or array reference.
+Some versions of @code{awk} allow the third argument to
+be an expression which is not an lvalue. In such a case, @code{sub}
+would still search for the pattern and return zero or one, but the result of
+the substitution (if any) would be thrown away because there is no place
+to put it. Such versions of @code{awk} accept expressions like
+this:
+
+@example
+sub(/USA/, "United States", "the USA and Canada")
+@end example
+
+@noindent
+This is considered erroneous in @code{gawk}.
+
+@item gsub(@var{regexp}, @var{replacement} @r{[}, @var{target}@r{]})
+@findex gsub
+This is similar to the @code{sub} function, except @code{gsub} replaces
+@emph{all} of the longest, leftmost, @emph{non-overlapping} matching
+substrings it can find. The @samp{g} in @code{gsub} stands for
+``global,'' which means replace everywhere. For example:
+
+@example
+awk '@{ gsub(/Britain/, "United Kingdom"); print @}'
+@end example
+
+@noindent
+replaces all occurrences of the string @samp{Britain} with @samp{United
+Kingdom} for all input records.
+
+The @code{gsub} function returns the number of substitutions made. If
+the variable to be searched and altered, @var{target}, is
+omitted, then the entire input record, @code{$0}, is used.
+
+As in @code{sub}, the characters @samp{&} and @samp{\} are special,
+and the third argument must be an lvalue.
+@end table
+
+@table @code
+@item gensub(@var{regexp}, @var{replacement}, @var{how} @r{[}, @var{target}@r{]})
+@findex gensub
+@code{gensub} is a general substitution function. Like @code{sub} and
+@code{gsub}, it searches the target string @var{target} for matches of
+the regular expression @var{regexp}. Unlike @code{sub} and
+@code{gsub}, the modified string is returned as the result of the
+function, and the original target string is @emph{not} changed. If
+@var{how} is a string beginning with @samp{g} or @samp{G}, then it
+replaces all matches of @var{regexp} with @var{replacement}.
+Otherwise, @var{how} is a number indicating which match of @var{regexp}
+to replace. If no @var{target} is supplied, @code{$0} is used instead.
+
+@code{gensub} provides an additional feature that is not available
+in @code{sub} or @code{gsub}: the ability to specify components of
+a regexp in the replacement text. This is done by using parentheses
+in the regexp to mark the components, and then specifying @samp{\@var{n}}
+in the replacement text, where @var{n} is a digit from one to nine.
+For example:
+
+@example
+@group
+$ gawk '
+> BEGIN @{
+> a = "abc def"
+> b = gensub(/(.+) (.+)/, "\\2 \\1", "g", a)
+> print b
+> @}'
+@print{} def abc
+@end group
+@end example
+
+@noindent
+As described above for @code{sub}, you must type two backslashes in order
+to get one into the string.
+
+In the replacement text, the sequence @samp{\0} represents the entire
+matched text, as does the character @samp{&}.
+
+This example shows how you can use the third argument to control
+which match of the regexp should be changed.
+
+@example
+$ echo a b c a b c |
+> gawk '@{ print gensub(/a/, "AA", 2) @}'
+@print{} a b c AA b c
+@end example
+
+In this case, @code{$0} is used as the default target string.
+@code{gensub} returns the new string as its result, which is
+passed directly to @code{print} for printing.
+
+If the @var{how} argument is a string that does not begin with @samp{g} or
+@samp{G}, or if it is a number that is less than zero, only one
+substitution is performed.
+
+@cindex differences between @code{gawk} and @code{awk}
+@code{gensub} is a @code{gawk} extension; it is not available
+in compatibility mode (@pxref{Options, ,Command Line Options}).
+
+@item substr(@var{string}, @var{start} @r{[}, @var{length}@r{]})
+@findex substr
+This returns a @var{length}-character-long substring of @var{string},
+starting at character number @var{start}. The first character of a
+string is character number one. For example,
+@code{substr("washington", 5, 3)} returns @code{"ing"}.
+
+If @var{length} is not present, this function returns the whole suffix of
+@var{string} that begins at character number @var{start}. For example,
+@code{substr("washington", 5)} returns @code{"ington"}. The whole
+suffix is also returned
+if @var{length} is greater than the number of characters remaining
+in the string, counting from character number @var{start}.
+
+@cindex case conversion
+@cindex conversion of case
+@item tolower(@var{string})
+@findex tolower
+This returns a copy of @var{string}, with each upper-case character
+in the string replaced with its corresponding lower-case character.
+Non-alphabetic characters are left unchanged. For example,
+@code{tolower("MiXeD cAsE 123")} returns @code{"mixed case 123"}.
+
+@item toupper(@var{string})
+@findex toupper
+This returns a copy of @var{string}, with each lower-case character
+in the string replaced with its corresponding upper-case character.
+Non-alphabetic characters are left unchanged. For example,
+@code{toupper("MiXeD cAsE 123")} returns @code{"MIXED CASE 123"}.
+@end table
+
+@c fakenode --- for prepinfo
+@subheading More About @samp{\} and @samp{&} with @code{sub}, @code{gsub} and @code{gensub}
+
+@cindex escape processing, @code{sub} et. al.
+When using @code{sub}, @code{gsub} or @code{gensub}, and trying to get literal
+backslashes and ampersands into the replacement text, you need to remember
+that there are several levels of @dfn{escape processing} going on.
+
+First, there is the @dfn{lexical} level, which is when @code{awk} reads
+your program, and builds an internal copy of your program that can
+be executed.
+
+Then there is the run-time level, when @code{awk} actually scans the
+replacement string to determine what to generate.
+
+At both levels, @code{awk} looks for a defined set of characters that
+can come after a backslash. At the lexical level, it looks for the
+escape sequences listed in @ref{Escape Sequences}.
+Thus, for every @samp{\} that @code{awk} will process at the run-time
+level, you type two @samp{\}s at the lexical level.
+When a character that is not valid for an escape sequence follows the
+@samp{\}, Unix @code{awk} and @code{gawk} both simply remove the initial
+@samp{\}, and put the following character into the string. Thus, for
+example, @code{"a\qb"} is treated as @code{"aqb"}.
+
+At the run-time level, the various functions handle sequences of
+@samp{\} and @samp{&} differently. The situation is (sadly) somewhat complex.
+
+Historically, the @code{sub} and @code{gsub} functions treated the two
+character sequence @samp{\&} specially; this sequence was replaced in
+the generated text with a single @samp{&}. Any other @samp{\} within
+the @var{replacement} string that did not precede an @samp{&} was passed
+through unchanged. To illustrate with a table:
+
+@c Thank to Karl Berry for help with the TeX stuff.
+@tex
+\vbox{\bigskip
+% This table has lots of &'s and \'s, so unspecialize them.
+\catcode`\& = \other \catcode`\\ = \other
+% But then we need character for escape and tab.
+@catcode`! = 4
+@halign{@hfil#!@qquad@hfil#!@qquad#@hfil@cr
+ You type!@code{sub} sees!@code{sub} generates@cr
+@hrulefill!@hrulefill!@hrulefill@cr
+ @code{\&}! @code{&}!the matched text@cr
+ @code{\\&}! @code{\&}!a literal @samp{&}@cr
+ @code{\\\&}! @code{\&}!a literal @samp{&}@cr
+@code{\\\\&}! @code{\\&}!a literal @samp{\&}@cr
+@code{\\\\\&}! @code{\\&}!a literal @samp{\&}@cr
+@code{\\\\\\&}! @code{\\\&}!a literal @samp{\\&}@cr
+ @code{\\q}! @code{\q}!a literal @samp{\q}@cr
+}
+@bigskip}
+@end tex
+@ifinfo
+@display
+ You type @code{sub} sees @code{sub} generates
+ -------- ---------- ---------------
+ @code{\&} @code{&} the matched text
+ @code{\\&} @code{\&} a literal @samp{&}
+ @code{\\\&} @code{\&} a literal @samp{&}
+ @code{\\\\&} @code{\\&} a literal @samp{\&}
+ @code{\\\\\&} @code{\\&} a literal @samp{\&}
+@code{\\\\\\&} @code{\\\&} a literal @samp{\\&}
+ @code{\\q} @code{\q} a literal @samp{\q}
+@end display
+@end ifinfo
+
+@noindent
+This table shows both the lexical level processing, where
+an odd number of backslashes becomes an even number at the run time level,
+and the run-time processing done by @code{sub}.
+(For the sake of simplicity, the rest of the tables below only show the
+case of even numbers of @samp{\}s entered at the lexical level.)
+
+The problem with the historical approach is that there is no way to get
+a literal @samp{\} followed by the matched text.
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+The 1992 POSIX standard attempted to fix this problem. The standard
+says that @code{sub} and @code{gsub} look for either a @samp{\} or an @samp{&}
+after the @samp{\}. If either one follows a @samp{\}, that character is
+output literally. The interpretation of @samp{\} and @samp{&} then becomes
+like this:
+
+@c thanks to Karl Berry for formatting this table
+@tex
+\vbox{\bigskip
+% This table has lots of &'s and \'s, so unspecialize them.
+\catcode`\& = \other \catcode`\\ = \other
+% But then we need character for escape and tab.
+@catcode`! = 4
+@halign{@hfil#!@qquad@hfil#!@qquad#@hfil@cr
+ You type!@code{sub} sees!@code{sub} generates@cr
+@hrulefill!@hrulefill!@hrulefill@cr
+ @code{&}! @code{&}!the matched text@cr
+ @code{\\&}! @code{\&}!a literal @samp{&}@cr
+@code{\\\\&}! @code{\\&}!a literal @samp{\}, then the matched text@cr
+@code{\\\\\\&}! @code{\\\&}!a literal @samp{\&}@cr
+}
+@bigskip}
+@end tex
+@ifinfo
+@display
+ You type @code{sub} sees @code{sub} generates
+ -------- ---------- ---------------
+ @code{&} @code{&} the matched text
+ @code{\\&} @code{\&} a literal @samp{&}
+ @code{\\\\&} @code{\\&} a literal @samp{\}, then the matched text
+@code{\\\\\\&} @code{\\\&} a literal @samp{\&}
+@end display
+@end ifinfo
+
+@noindent
+This would appear to solve the problem.
+Unfortunately, the phrasing of the standard is unusual. It
+says, in effect, that @samp{\} turns off the special meaning of any
+following character, but that for anything other than @samp{\} and @samp{&},
+such special meaning is undefined. This wording leads to two problems.
+
+@enumerate
+@item
+Backslashes must now be doubled in the @var{replacement} string, breaking
+historical @code{awk} programs.
+
+@item
+To make sure that an @code{awk} program is portable, @emph{every} character
+in the @var{replacement} string must be preceded with a
+backslash.@footnote{This consequence was certainly unintended.}
+@c I can say that, 'cause I was involved in making this change
+@end enumerate
+
+The POSIX standard is under revision.@footnote{As of December 1995,
+with final approval and publication hopefully sometime in 1996.}
+Because of the above problems, proposed text for the revised standard
+reverts to rules that correspond more closely to the original existing
+practice. The proposed rules have special cases that make it possible
+to produce a @samp{\} preceding the matched text.
+
+@tex
+\vbox{\bigskip
+% This table has lots of &'s and \'s, so unspecialize them.
+\catcode`\& = \other \catcode`\\ = \other
+% But then we need character for escape and tab.
+@catcode`! = 4
+@halign{@hfil#!@qquad@hfil#!@qquad#@hfil@cr
+ You type!@code{sub} sees!@code{sub} generates@cr
+@hrulefill!@hrulefill!@hrulefill@cr
+@code{\\\\\\&}! @code{\\\&}!a literal @samp{\&}@cr
+@code{\\\\&}! @code{\\&}!a literal @samp{\}, followed by the matched text@cr
+ @code{\\&}! @code{\&}!a literal @samp{&}@cr
+ @code{\\q}! @code{\q}!a literal @samp{\q}@cr
+}
+@bigskip}
+@end tex
+@ifinfo
+@display
+ You type @code{sub} sees @code{sub} generates
+ -------- ---------- ---------------
+@code{\\\\\\&} @code{\\\&} a literal @samp{\&}
+ @code{\\\\&} @code{\\&} a literal @samp{\}, followed by the matched text
+ @code{\\&} @code{\&} a literal @samp{&}
+ @code{\\q} @code{\q} a literal @samp{\q}
+@end display
+@end ifinfo
+
+In a nutshell, at the run-time level, there are now three special sequences
+of characters, @samp{\\\&}, @samp{\\&} and @samp{\&}, whereas historically,
+there was only one. However, as in the historical case, any @samp{\} that
+is not part of one of these three sequences is not special, and appears
+in the output literally.
+
+@code{gawk} 3.0 follows these proposed POSIX rules for @code{sub} and
+@code{gsub}.
+@c As much as we think it's a lousy idea. You win some, you lose some. Sigh.
+Whether these proposed rules will actually become codified into the
+standard is unknown at this point. Subsequent @code{gawk} releases will
+track the standard and implement whatever the final version specifies;
+this @value{DOCUMENT} will be updated as well.
+
+The rules for @code{gensub} are considerably simpler. At the run-time
+level, whenever @code{gawk} sees a @samp{\}, if the following character
+is a digit, then the text that matched the corresponding parenthesized
+subexpression is placed in the generated output. Otherwise,
+no matter what the character after the @samp{\} is, that character will
+appear in the generated text, and the @samp{\} will not.
+
+@tex
+\vbox{\bigskip
+% This table has lots of &'s and \'s, so unspecialize them.
+\catcode`\& = \other \catcode`\\ = \other
+% But then we need character for escape and tab.
+@catcode`! = 4
+@halign{@hfil#!@qquad@hfil#!@qquad#@hfil@cr
+ You type!@code{gensub} sees!@code{gensub} generates@cr
+@hrulefill!@hrulefill!@hrulefill@cr
+ @code{&}! @code{&}!the matched text@cr
+ @code{\\&}! @code{\&}!a literal @samp{&}@cr
+ @code{\\\\}! @code{\\}!a literal @samp{\}@cr
+ @code{\\\\&}! @code{\\&}!a literal @samp{\}, then the matched text@cr
+@code{\\\\\\&}! @code{\\\&}!a literal @samp{\&}@cr
+ @code{\\q}! @code{\q}!a literal @samp{q}@cr
+}
+@bigskip}
+@end tex
+@ifinfo
+@display
+ You type @code{gensub} sees @code{gensub} generates
+ -------- ------------- ------------------
+ @code{&} @code{&} the matched text
+ @code{\\&} @code{\&} a literal @samp{&}
+ @code{\\\\} @code{\\} a literal @samp{\}
+ @code{\\\\&} @code{\\&} a literal @samp{\}, then the matched text
+@code{\\\\\\&} @code{\\\&} a literal @samp{\&}
+ @code{\\q} @code{\q} a literal @samp{q}
+@end display
+@end ifinfo
+
+Because of the complexity of the lexical and run-time level processing,
+and the special cases for @code{sub} and @code{gsub},
+we recommend the use of @code{gawk} and @code{gensub} for when you have
+to do substitutions.
+
+@node I/O Functions, Time Functions, String Functions, Built-in
+@section Built-in Functions for Input/Output
+
+The following functions are related to Input/Output (I/O).
+Optional parameters are enclosed in square brackets (``['' and ``]'').
+
+@table @code
+@item close(@var{filename})
+@findex close
+Close the file @var{filename}, for input or output. The argument may
+alternatively be a shell command that was used for redirecting to or
+from a pipe; then the pipe is closed.
+@xref{Close Files And Pipes, ,Closing Input and Output Files and Pipes},
+for more information.
+
+@item fflush(@r{[}@var{filename}@r{]})
+@findex fflush
+@cindex portability issues
+@cindex flushing buffers
+@cindex buffers, flushing
+@cindex buffering output
+@cindex output, buffering
+Flush any buffered output associated @var{filename}, which is either a
+file opened for writing, or a shell command for redirecting output to
+a pipe.
+
+Many utility programs will @dfn{buffer} their output; they save information
+to be written to a disk file or terminal in memory, until there is enough
+for it to be worthwhile to send the data to the ouput device.
+This is often more efficient than writing
+every little bit of information as soon as it is ready. However, sometimes
+it is necessary to force a program to @dfn{flush} its buffers; that is,
+write the information to its destination, even if a buffer is not full.
+This is the purpose of the @code{fflush} function; @code{gawk} too
+buffers its output, and the @code{fflush} function can be used to force
+@code{gawk} to flush its buffers.
+
+@code{fflush} is a recent (1994) addition to the Bell Labs research
+version of @code{awk}; it is not part of the POSIX standard, and will
+not be available if @samp{--posix} has been specified on the command
+line (@pxref{Options, ,Command Line Options}).
+
+@code{gawk} extends the @code{fflush} function in two ways. This first
+is to allow no argument at all. In this case, the buffer for the
+standard output is flushed. The second way is to allow the null string
+(@w{@code{""}}) as the argument. In this case, the buffers for
+@emph{all} open output files and pipes are flushed.
+
+@code{fflush} returns zero if the buffer was successfully flushed,
+and nonzero otherwise.
+
+@item system(@var{command})
+@findex system
+@cindex interaction, @code{awk} and other programs
+The system function allows the user to execute operating system commands
+and then return to the @code{awk} program. The @code{system} function
+executes the command given by the string @var{command}. It returns, as
+its value, the status returned by the command that was executed.
+
+For example, if the following fragment of code is put in your @code{awk}
+program:
+
+@example
+END @{
+ system("date | mail -s 'awk run done' root")
+@}
+@end example
+
+@noindent
+the system administrator will be sent mail when the @code{awk} program
+finishes processing input and begins its end-of-input processing.
+
+Note that redirecting @code{print} or @code{printf} into a pipe is often
+enough to accomplish your task. However, if your @code{awk}
+program is interactive, @code{system} is useful for cranking up large
+self-contained programs, such as a shell or an editor.
+
+Some operating systems cannot implement the @code{system} function.
+@code{system} causes a fatal error if it is not supported.
+@end table
+
+@c fakenode --- for prepinfo
+@subheading Controlling Output Buffering with @code{system}
+@cindex flushing buffers
+@cindex buffers, flushing
+@cindex buffering output
+@cindex output, buffering
+
+The @code{fflush} function provides explicit control over output buffering for
+individual files and pipes. However, its use is not portable to many other
+@code{awk} implementations. An alternative method to flush output
+buffers is by calling @code{system} with a null string as its argument:
+
+@example
+system("") # flush output
+@end example
+
+@noindent
+@code{gawk} treats this use of the @code{system} function as a special
+case, and is smart enough not to run a shell (or other command
+interpreter) with the empty command. Therefore, with @code{gawk}, this
+idiom is not only useful, it is efficient. While this method should work
+with other @code{awk} implementations, it will not necessarily avoid
+starting an unnecessary shell. (Other implementations may only
+flush the buffer associated with the standard output, and not necessarily
+all buffered output.)
+
+If you think about what a programmer expects, it makes sense that
+@code{system} should flush any pending output. The following program:
+
+@example
+BEGIN @{
+ print "first print"
+ system("echo system echo")
+ print "second print"
+@}
+@end example
+
+@noindent
+must print
+
+@example
+first print
+system echo
+second print
+@end example
+
+@noindent
+and not
+
+@example
+system echo
+first print
+second print
+@end example
+
+If @code{awk} did not flush its buffers before calling @code{system}, the
+latter (undesirable) output is what you would see.
+
+@node Time Functions, , I/O Functions, Built-in
+@section Functions for Dealing with Time Stamps
+
+@cindex timestamps
+@cindex time of day
+A common use for @code{awk} programs is the processing of log files
+containing time stamp information, indicating when a
+particular log record was written. Many programs log their time stamp
+in the form returned by the @code{time} system call, which is the
+number of seconds since a particular epoch. On POSIX systems,
+it is the number of seconds since Midnight, January 1, 1970, UTC.
+
+In order to make it easier to process such log files, and to produce
+useful reports, @code{gawk} provides two functions for working with time
+stamps. Both of these are @code{gawk} extensions; they are not specified
+in the POSIX standard, nor are they in any other known version
+of @code{awk}.
+
+Optional parameters are enclosed in square brackets (``['' and ``]'').
+
+@table @code
+@item systime()
+@findex systime
+This function returns the current time as the number of seconds since
+the system epoch. On POSIX systems, this is the number of seconds
+since Midnight, January 1, 1970, UTC. It may be a different number on
+other systems.
+
+@item strftime(@r{[}@var{format} @r{[}, @var{timestamp}@r{]]})
+@findex strftime
+This function returns a string. It is similar to the function of the
+same name in ANSI C. The time specified by @var{timestamp} is used to
+produce a string, based on the contents of the @var{format} string.
+The @var{timestamp} is in the same format as the value returned by the
+@code{systime} function. If no @var{timestamp} argument is supplied,
+@code{gawk} will use the current time of day as the time stamp.
+If no @var{format} argument is supplied, @code{strftime} uses
+@code{@w{"%a %b %d %H:%M:%S %Z %Y"}}. This format string produces
+output (almost) equivalent to that of the @code{date} utility.
+(Versions of @code{gawk} prior to 3.0 require the @var{format} argument.)
+@end table
+
+The @code{systime} function allows you to compare a time stamp from a
+log file with the current time of day. In particular, it is easy to
+determine how long ago a particular record was logged. It also allows
+you to produce log records using the ``seconds since the epoch'' format.
+
+The @code{strftime} function allows you to easily turn a time stamp
+into human-readable information. It is similar in nature to the @code{sprintf}
+function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}),
+in that it copies non-format specification characters verbatim to the
+returned string, while substituting date and time values for format
+specifications in the @var{format} string.
+
+@code{strftime} is guaranteed by the ANSI C standard to support
+the following date format specifications:
+
+@table @code
+@item %a
+The locale's abbreviated weekday name.
+
+@item %A
+The locale's full weekday name.
+
+@item %b
+The locale's abbreviated month name.
+
+@item %B
+The locale's full month name.
+
+@item %c
+The locale's ``appropriate'' date and time representation.
+
+@item %d
+The day of the month as a decimal number (01--31).
+
+@item %H
+The hour (24-hour clock) as a decimal number (00--23).
+
+@item %I
+The hour (12-hour clock) as a decimal number (01--12).
+
+@item %j
+The day of the year as a decimal number (001--366).
+
+@item %m
+The month as a decimal number (01--12).
+
+@item %M
+The minute as a decimal number (00--59).
+
+@item %p
+The locale's equivalent of the AM/PM designations associated
+with a 12-hour clock.
+
+@item %S
+The second as a decimal number (00--61).@footnote{Occasionally there are
+minutes in a year with one or two leap seconds, which is why the
+seconds can go up to 61.}
+
+@item %U
+The week number of the year (the first Sunday as the first day of week one)
+as a decimal number (00--53).
+
+@item %w
+The weekday as a decimal number (0--6). Sunday is day zero.
+
+@item %W
+The week number of the year (the first Monday as the first day of week one)
+as a decimal number (00--53).
+
+@item %x
+The locale's ``appropriate'' date representation.
+
+@item %X
+The locale's ``appropriate'' time representation.
+
+@item %y
+The year without century as a decimal number (00--99).
+
+@item %Y
+The year with century as a decimal number (e.g., 1995).
+
+@item %Z
+The time zone name or abbreviation, or no characters if
+no time zone is determinable.
+
+@item %%
+A literal @samp{%}.
+@end table
+
+If a conversion specifier is not one of the above, the behavior is
+undefined.@footnote{This is because ANSI C leaves the
+behavior of the C version of @code{strftime} undefined, and @code{gawk}
+will use the system's version of @code{strftime} if it's there.
+Typically, the conversion specifier will either not appear in the
+returned string, or it will appear literally.}
+
+@cindex locale, definition of
+Informally, a @dfn{locale} is the geographic place in which a program
+is meant to run. For example, a common way to abbreviate the date
+September 4, 1991 in the United States would be ``9/4/91''.
+In many countries in Europe, however, it would be abbreviated ``4.9.91''.
+Thus, the @samp{%x} specification in a @code{"US"} locale might produce
+@samp{9/4/91}, while in a @code{"EUROPE"} locale, it might produce
+@samp{4.9.91}. The ANSI C standard defines a default @code{"C"}
+locale, which is an environment that is typical of what most C programmers
+are used to.
+
+A public-domain C version of @code{strftime} is supplied with @code{gawk}
+for systems that are not yet fully ANSI-compliant. If that version is
+used to compile @code{gawk} (@pxref{Installation, ,Installing @code{gawk}}),
+then the following additional format specifications are available:
+
+@table @code
+@item %D
+Equivalent to specifying @samp{%m/%d/%y}.
+
+@item %e
+The day of the month, padded with a space if it is only one digit.
+
+@item %h
+Equivalent to @samp{%b}, above.
+
+@item %n
+A newline character (ASCII LF).
+
+@item %r
+Equivalent to specifying @samp{%I:%M:%S %p}.
+
+@item %R
+Equivalent to specifying @samp{%H:%M}.
+
+@item %T
+Equivalent to specifying @samp{%H:%M:%S}.
+
+@item %t
+A tab character.
+
+@item %k
+The hour (24-hour clock) as a decimal number (0-23).
+Single digit numbers are padded with a space.
+
+@item %l
+The hour (12-hour clock) as a decimal number (1-12).
+Single digit numbers are padded with a space.
+
+@item %C
+The century, as a number between 00 and 99.
+
+@item %u
+The weekday as a decimal number
+[1 (Monday)--7].
+
+@cindex ISO 8601
+@item %V
+The week number of the year (the first Monday as the first
+day of week one) as a decimal number (01--53).
+The method for determining the week number is as specified by ISO 8601
+(to wit: if the week containing January 1 has four or more days in the
+new year, then it is week one, otherwise it is week 53 of the previous year
+and the next week is week one).
+
+@item %G
+The year with century of the ISO week number, as a decimal number.
+
+For example, January 1, 1993, is in week 53 of 1992. Thus, the year
+of its ISO week number is 1992, even though its year is 1993.
+Similarly, December 31, 1973, is in week 1 of 1974. Thus, the year
+of its ISO week number is 1974, even though its year is 1973.
+
+@item %g
+The year without century of the ISO week number, as a decimal number (00--99).
+
+@item %Ec %EC %Ex %Ey %EY %Od %Oe %OH %OI
+@itemx %Om %OM %OS %Ou %OU %OV %Ow %OW %Oy
+These are ``alternate representations'' for the specifications
+that use only the second letter (@samp{%c}, @samp{%C}, and so on).
+They are recognized, but their normal representations are
+used.@footnote{If you don't understand any of this, don't worry about
+it; these facilities are meant to make it easier to ``internationalize''
+programs.}
+(These facilitate compliance with the POSIX @code{date} utility.)
+
+@item %v
+The date in VMS format (e.g., 20-JUN-1991).
+
+@cindex RFC-822
+@cindex RFC-1036
+@item %z
+The timezone offset in a +HHMM format (e.g., the format necessary to
+produce RFC-822/RFC-1036 date headers).
+@end table
+
+This example is an @code{awk} implementation of the POSIX
+@code{date} utility. Normally, the @code{date} utility prints the
+current date and time of day in a well known format. However, if you
+provide an argument to it that begins with a @samp{+}, @code{date}
+will copy non-format specifier characters to the standard output, and
+will interpret the current time according to the format specifiers in
+the string. For example:
+
+@example
+$ date '+Today is %A, %B %d, %Y.'
+@print{} Today is Thursday, July 11, 1991.
+@end example
+
+Here is the @code{gawk} version of the @code{date} utility.
+It has a shell ``wrapper'', to handle the @samp{-u} option,
+which requires that @code{date} run as if the time zone
+was set to UTC.
+
+@example
+@group
+#! /bin/sh
+#
+# date --- approximate the P1003.2 'date' command
+
+case $1 in
+-u) TZ=GMT0 # use UTC
+ export TZ
+ shift ;;
+esac
+@end group
+
+@group
+gawk 'BEGIN @{
+ format = "%a %b %d %H:%M:%S %Z %Y"
+ exitval = 0
+@end group
+
+@group
+ if (ARGC > 2)
+ exitval = 1
+ else if (ARGC == 2) @{
+ format = ARGV[1]
+ if (format ~ /^\+/)
+ format = substr(format, 2) # remove leading +
+ @}
+ print strftime(format)
+ exit exitval
+@}' "$@@"
+@end group
+@end example
+
+@node User-defined, Invoking Gawk, Built-in, Top
+@chapter User-defined Functions
+
+@cindex user-defined functions
+@cindex functions, user-defined
+Complicated @code{awk} programs can often be simplified by defining
+your own functions. User-defined functions can be called just like
+built-in ones (@pxref{Function Calls}), but it is up to you to define
+them---to tell @code{awk} what they should do.
+
+@menu
+* Definition Syntax:: How to write definitions and what they mean.
+* Function Example:: An example function definition and what it
+ does.
+* Function Caveats:: Things to watch out for.
+* Return Statement:: Specifying the value a function returns.
+@end menu
+
+@node Definition Syntax, Function Example, User-defined, User-defined
+@section Function Definition Syntax
+@cindex defining functions
+@cindex function definition
+
+Definitions of functions can appear anywhere between the rules of an
+@code{awk} program. Thus, the general form of an @code{awk} program is
+extended to include sequences of rules @emph{and} user-defined function
+definitions.
+There is no need in @code{awk} to put the definition of a function
+before all uses of the function. This is because @code{awk} reads the
+entire program before starting to execute any of it.
+
+The definition of a function named @var{name} looks like this:
+
+@example
+function @var{name}(@var{parameter-list})
+@{
+ @var{body-of-function}
+@}
+@end example
+
+@cindex names, use of
+@cindex namespaces
+@noindent
+@var{name} is the name of the function to be defined. A valid function
+name is like a valid variable name: a sequence of letters, digits and
+underscores, not starting with a digit.
+Within a single @code{awk} program, any particular name can only be
+used as a variable, array or function.
+
+@var{parameter-list} is a list of the function's arguments and local
+variable names, separated by commas. When the function is called,
+the argument names are used to hold the argument values given in
+the call. The local variables are initialized to the empty string.
+A function cannot have two parameters with the same name.
+
+The @var{body-of-function} consists of @code{awk} statements. It is the
+most important part of the definition, because it says what the function
+should actually @emph{do}. The argument names exist to give the body a
+way to talk about the arguments; local variables, to give the body
+places to keep temporary values.
+
+Argument names are not distinguished syntactically from local variable
+names; instead, the number of arguments supplied when the function is
+called determines how many argument variables there are. Thus, if three
+argument values are given, the first three names in @var{parameter-list}
+are arguments, and the rest are local variables.
+
+It follows that if the number of arguments is not the same in all calls
+to the function, some of the names in @var{parameter-list} may be
+arguments on some occasions and local variables on others. Another
+way to think of this is that omitted arguments default to the
+null string.
+
+Usually when you write a function you know how many names you intend to
+use for arguments and how many you intend to use as local variables. It is
+conventional to place some extra space between the arguments and
+the local variables, to document how your function is supposed to be used.
+
+@cindex variable shadowing
+During execution of the function body, the arguments and local variable
+values hide or @dfn{shadow} any variables of the same names used in the
+rest of the program. The shadowed variables are not accessible in the
+function definition, because there is no way to name them while their
+names have been taken away for the local variables. All other variables
+used in the @code{awk} program can be referenced or set normally in the
+function's body.
+
+The arguments and local variables last only as long as the function body
+is executing. Once the body finishes, you can once again access the
+variables that were shadowed while the function was running.
+
+@cindex recursive function
+@cindex function, recursive
+The function body can contain expressions which call functions. They
+can even call this function, either directly or by way of another
+function. When this happens, we say the function is @dfn{recursive}.
+
+@cindex @code{awk} language, POSIX version
+@cindex POSIX @code{awk}
+In many @code{awk} implementations, including @code{gawk},
+the keyword @code{function} may be
+abbreviated @code{func}. However, POSIX only specifies the use of
+the keyword @code{function}. This actually has some practical implications.
+If @code{gawk} is in POSIX-compatibility mode
+(@pxref{Options, ,Command Line Options}), then the following
+statement will @emph{not} define a function:
+
+@example
+func foo() @{ a = sqrt($1) ; print a @}
+@end example
+
+@noindent
+Instead it defines a rule that, for each record, concatenates the value
+of the variable @samp{func} with the return value of the function @samp{foo}.
+If the resulting string is non-null, the action is executed.
+This is probably not what was desired. (@code{awk} accepts this input as
+syntactically valid, since functions may be used before they are defined
+in @code{awk} programs.)
+
+@cindex portability issues
+To ensure that your @code{awk} programs are portable, always use the
+keyword @code{function} when defining a function.
+
+@node Function Example, Function Caveats, Definition Syntax, User-defined
+@section Function Definition Examples
+
+Here is an example of a user-defined function, called @code{myprint}, that
+takes a number and prints it in a specific format.
+
+@example
+function myprint(num)
+@{
+ printf "%6.3g\n", num
+@}
+@end example
+
+@noindent
+To illustrate, here is an @code{awk} rule which uses our @code{myprint}
+function:
+
+@example
+$3 > 0 @{ myprint($3) @}
+@end example
+
+@noindent
+This program prints, in our special format, all the third fields that
+contain a positive number in our input. Therefore, when given:
+
+@example
+ 1.2 3.4 5.6 7.8
+ 9.10 11.12 -13.14 15.16
+17.18 19.20 21.22 23.24
+@end example
+
+@noindent
+this program, using our function to format the results, prints:
+
+@example
+ 5.6
+ 21.2
+@end example
+
+This function deletes all the elements in an array.
+
+@example
+function delarray(a, i)
+@{
+ for (i in a)
+ delete a[i]
+@}
+@end example
+
+When working with arrays, it is often necessary to delete all the elements
+in an array and start over with a new list of elements
+(@pxref{Delete, ,The @code{delete} Statement}).
+Instead of having
+to repeat this loop everywhere in your program that you need to clear out
+an array, your program can just call @code{delarray}.
+
+Here is an example of a recursive function. It takes a string
+as an input parameter, and returns the string in backwards order.
+
+@example
+function rev(str, start)
+@{
+ if (start == 0)
+ return ""
+
+ return (substr(str, start, 1) rev(str, start - 1))
+@}
+@end example
+
+If this function is in a file named @file{rev.awk}, we can test it
+this way:
+
+@example
+$ echo "Don't Panic!" |
+> gawk --source '@{ print rev($0, length($0)) @}' -f rev.awk
+@print{} !cinaP t'noD
+@end example
+
+Here is an example that uses the built-in function @code{strftime}.
+(@xref{Time Functions, ,Functions for Dealing with Time Stamps},
+for more information on @code{strftime}.)
+The C @code{ctime} function takes a timestamp and returns it in a string,
+formatted in a well known fashion. Here is an @code{awk} version:
+
+@example
+@c file eg/lib/ctime.awk
+@group
+# ctime.awk
+#
+# awk version of C ctime(3) function
+
+function ctime(ts, format)
+@{
+ format = "%a %b %d %H:%M:%S %Z %Y"
+ if (ts == 0)
+ ts = systime() # use current time as default
+ return strftime(format, ts)
+@}
+@c endfile
+@end group
+@end example
+
+@node Function Caveats, Return Statement, Function Example, User-defined
+@section Calling User-defined Functions
+
+@cindex call by value
+@cindex call by reference
+@cindex calling a function
+@cindex function call
+@dfn{Calling a function} means causing the function to run and do its job.
+A function call is an expression, and its value is the value returned by
+the function.
+
+A function call consists of the function name followed by the arguments
+in parentheses. What you write in the call for the arguments are
+@code{awk} expressions; each time the call is executed, these
+expressions are evaluated, and the values are the actual arguments. For
+example, here is a call to @code{foo} with three arguments (the first
+being a string concatenation):
+
+@example
+foo(x y, "lose", 4 * z)
+@end example
+
+@strong{Caution:} whitespace characters (spaces and tabs) are not allowed
+between the function name and the open-parenthesis of the argument list.
+If you write whitespace by mistake, @code{awk} might think that you mean
+to concatenate a variable with an expression in parentheses. However, it
+notices that you used a function name and not a variable name, and reports
+an error.
+
+@cindex call by value
+When a function is called, it is given a @emph{copy} of the values of
+its arguments. This is known as @dfn{call by value}. The caller may use
+a variable as the expression for the argument, but the called function
+does not know this: it only knows what value the argument had. For
+example, if you write this code:
+
+@example
+foo = "bar"
+z = myfunc(foo)
+@end example
+
+@noindent
+then you should not think of the argument to @code{myfunc} as being
+``the variable @code{foo}.'' Instead, think of the argument as the
+string value, @code{"bar"}.
+
+If the function @code{myfunc} alters the values of its local variables,
+this has no effect on any other variables. Thus, if @code{myfunc}
+does this:
+
+@example
+@group
+function myfunc(str)
+@{
+ print str
+ str = "zzz"
+ print str
+@}
+@end group
+@end example
+
+@noindent
+to change its first argument variable @code{str}, this @emph{does not}
+change the value of @code{foo} in the caller. The role of @code{foo} in
+calling @code{myfunc} ended when its value, @code{"bar"}, was computed.
+If @code{str} also exists outside of @code{myfunc}, the function body
+cannot alter this outer value, because it is shadowed during the
+execution of @code{myfunc} and cannot be seen or changed from there.
+
+@cindex call by reference
+However, when arrays are the parameters to functions, they are @emph{not}
+copied. Instead, the array itself is made available for direct manipulation
+by the function. This is usually called @dfn{call by reference}.
+Changes made to an array parameter inside the body of a function @emph{are}
+visible outside that function.
+@ifinfo
+This can be @strong{very} dangerous if you do not watch what you are
+doing. For example:
+@end ifinfo
+@iftex
+@emph{This can be very dangerous if you do not watch what you are
+doing.} For example:
+@end iftex
+
+@example
+function changeit(array, ind, nvalue)
+@{
+ array[ind] = nvalue
+@}
+
+BEGIN @{
+ a[1] = 1; a[2] = 2; a[3] = 3
+ changeit(a, 2, "two")
+ printf "a[1] = %s, a[2] = %s, a[3] = %s\n",
+ a[1], a[2], a[3]
+@}
+@end example
+
+@noindent
+This program prints @samp{a[1] = 1, a[2] = two, a[3] = 3}, because
+@code{changeit} stores @code{"two"} in the second element of @code{a}.
+
+@cindex undefined functions
+@cindex functions, undefined
+Some @code{awk} implementations allow you to call a function that
+has not been defined, and only report a problem at run-time when the
+program actually tries to call the function. For example:
+
+@example
+@group
+BEGIN @{
+ if (0)
+ foo()
+ else
+ bar()
+@}
+function bar() @{ @dots{} @}
+# note that `foo' is not defined
+@end group
+@end example
+
+@noindent
+Since the @samp{if} statement will never be true, it is not really a
+problem that @code{foo} has not been defined. Usually though, it is a
+problem if a program calls an undefined function.
+
+@ignore
+At one point, I had gawk dieing on this, but later decided that this might
+break old programs and/or test suites.
+@end ignore
+
+If @samp{--lint} has been specified
+(@pxref{Options, ,Command Line Options}),
+@code{gawk} will report about calls to undefined functions.
+
+@node Return Statement, , Function Caveats, User-defined
+@section The @code{return} Statement
+@cindex @code{return} statement
+
+The body of a user-defined function can contain a @code{return} statement.
+This statement returns control to the rest of the @code{awk} program. It
+can also be used to return a value for use in the rest of the @code{awk}
+program. It looks like this:
+
+@example
+return @r{[}@var{expression}@r{]}
+@end example
+
+The @var{expression} part is optional. If it is omitted, then the returned
+value is undefined and, therefore, unpredictable.
+
+A @code{return} statement with no value expression is assumed at the end of
+every function definition. So if control reaches the end of the function
+body, then the function returns an unpredictable value. @code{awk}
+will @emph{not} warn you if you use the return value of such a function.
+
+Sometimes, you want to write a function for what it does, not for
+what it returns. Such a function corresponds to a @code{void} function
+in C or to a @code{procedure} in Pascal. Thus, it may be appropriate to not
+return any value; you should simply bear in mind that if you use the return
+value of such a function, you do so at your own risk.
+
+Here is an example of a user-defined function that returns a value
+for the largest number among the elements of an array:
+
+@example
+@group
+function maxelt(vec, i, ret)
+@{
+ for (i in vec) @{
+ if (ret == "" || vec[i] > ret)
+ ret = vec[i]
+ @}
+ return ret
+@}
+@end group
+@end example
+
+@noindent
+You call @code{maxelt} with one argument, which is an array name. The local
+variables @code{i} and @code{ret} are not intended to be arguments;
+while there is nothing to stop you from passing two or three arguments
+to @code{maxelt}, the results would be strange. The extra space before
+@code{i} in the function parameter list indicates that @code{i} and
+@code{ret} are not supposed to be arguments. This is a convention that
+you should follow when you define functions.
+
+Here is a program that uses our @code{maxelt} function. It loads an
+array, calls @code{maxelt}, and then reports the maximum number in that
+array:
+
+@example
+@group
+awk '
+function maxelt(vec, i, ret)
+@{
+ for (i in vec) @{
+ if (ret == "" || vec[i] > ret)
+ ret = vec[i]
+ @}
+ return ret
+@}
+@end group
+
+@group
+# Load all fields of each record into nums.
+@{
+ for(i = 1; i <= NF; i++)
+ nums[NR, i] = $i
+@}
+
+END @{
+ print maxelt(nums)
+@}'
+@end group
+@end example
+
+Given the following input:
+
+@example
+@group
+ 1 5 23 8 16
+44 3 5 2 8 26
+256 291 1396 2962 100
+-6 467 998 1101
+99385 11 0 225
+@end group
+@end example
+
+@noindent
+our program tells us (predictably) that @code{99385} is the largest number
+in our array.
+
+@node Invoking Gawk, Library Functions, User-defined, Top
+@chapter Running @code{awk}
+@cindex command line
+@cindex invocation of @code{gawk}
+@cindex arguments, command line
+@cindex options, command line
+@cindex long options
+@cindex options, long
+
+There are two ways to run @code{awk}: with an explicit program, or with
+one or more program files. Here are templates for both of them; items
+enclosed in @samp{@r{[}@dots{}@r{]}} in these templates are optional.
+
+Besides traditional one-letter POSIX-style options, @code{gawk} also
+supports GNU long options.
+
+@example
+awk @r{[@var{options}]} -f progfile @r{[@code{--}]} @var{file} @dots{}
+awk @r{[@var{options}]} @r{[@code{--}]} '@var{program}' @var{file} @dots{}
+@end example
+
+@cindex empty program
+@cindex dark corner
+It is possible to invoke @code{awk} with an empty program:
+
+@example
+$ awk '' datafile1 datafile2
+@end example
+
+@noindent
+Doing so makes little sense though; @code{awk} will simply exit
+silently when given an empty program (d.c.). If @samp{--lint} has
+been specified on the command line, @code{gawk} will issue a
+warning that the program is empty.
+
+@menu
+* Options:: Command line options and their meanings.
+* Other Arguments:: Input file names and variable assignments.
+* AWKPATH Variable:: Searching directories for @code{awk} programs.
+* Obsolete:: Obsolete Options and/or features.
+* Undocumented:: Undocumented Options and Features.
+* Known Bugs:: Known Bugs in @code{gawk}.
+@end menu
+
+@node Options, Other Arguments, Invoking Gawk, Invoking Gawk
+@section Command Line Options
+
+Options begin with a dash, and consist of a single character.
+GNU style long options consist of two dashes and a keyword.
+The keyword can be abbreviated, as long the abbreviation allows the option
+to be uniquely identified. If the option takes an argument, then the
+keyword is either immediately followed by an equals sign (@samp{=}) and the
+argument's value, or the keyword and the argument's value are separated
+by whitespace. For brevity, the discussion below only refers to the
+traditional short options; however the long and short options are
+interchangeable in all contexts.
+
+Each long option for @code{gawk} has a corresponding
+POSIX-style option. The options and their meanings are as follows:
+
+@table @code
+@item -F @var{fs}
+@itemx --field-separator @var{fs}
+@cindex @code{-F} option
+@cindex @code{--field-separator} option
+Sets the @code{FS} variable to @var{fs}
+(@pxref{Field Separators, ,Specifying How Fields are Separated}).
+
+@item -f @var{source-file}
+@itemx --file @var{source-file}
+@cindex @code{-f} option
+@cindex @code{--file} option
+Indicates that the @code{awk} program is to be found in @var{source-file}
+instead of in the first non-option argument.
+
+@item -v @var{var}=@var{val}
+@itemx --assign @var{var}=@var{val}
+@cindex @code{-v} option
+@cindex @code{--assign} option
+Sets the variable @var{var} to the value @var{val} @strong{before}
+execution of the program begins. Such variable values are available
+inside the @code{BEGIN} rule
+(@pxref{Other Arguments, ,Other Command Line Arguments}).
+
+The @samp{-v} option can only set one variable, but you can use
+it more than once, setting another variable each time, like this:
+@samp{awk @w{-v foo=1} @w{-v bar=2} @dots{}}.
+
+@item -mf=@var{NNN}
+@itemx -mr=@var{NNN}
+Set various memory limits to the value @var{NNN}. The @samp{f} flag sets
+the maximum number of fields, and the @samp{r} flag sets the maximum
+record size. These two flags and the @samp{-m} option are from the
+Bell Labs research version of Unix @code{awk}. They are provided
+for compatibility, but otherwise ignored by
+@code{gawk}, since @code{gawk} has no predefined limits.
+
+@item -W @var{gawk-opt}
+@cindex @code{-W} option
+Following the POSIX standard, options that are implementation
+specific are supplied as arguments to the @samp{-W} option. With @code{gawk},
+these arguments may be separated by commas, or quoted and separated by
+whitespace. Case is ignored when processing these options. These options
+also have corresponding GNU style long options.
+See below.
+
+@item --
+Signals the end of the command line options. The following arguments
+are not treated as options even if they begin with @samp{-}. This
+interpretation of @samp{--} follows the POSIX argument parsing
+conventions.
+
+This is useful if you have file names that start with @samp{-},
+or in shell scripts, if you have file names that will be specified
+by the user which could start with @samp{-}.
+@end table
+
+The following @code{gawk}-specific options are available:
+
+@table @code
+@item -W traditional
+@itemx -W compat
+@itemx --traditional
+@itemx --compat
+@cindex @code{--compat} option
+@cindex @code{--traditional} option
+@cindex compatibility mode
+Specifies @dfn{compatibility mode}, in which the GNU extensions to
+the @code{awk} language are disabled, so that @code{gawk} behaves just
+like the Bell Labs research version of Unix @code{awk}.
+@samp{--traditional} is the preferred form of this option.
+@xref{POSIX/GNU, ,Extensions in @code{gawk} Not in POSIX @code{awk}},
+which summarizes the extensions. Also see
+@ref{Compatibility Mode, ,Downward Compatibility and Debugging}.
+
+@item -W copyleft
+@itemx -W copyright
+@itemx --copyleft
+@itemx --copyright
+@cindex @code{--copyleft} option
+@cindex @code{--copyright} option
+Print the short version of the General Public License.
+This option may disappear in a future version of @code{gawk}.
+
+@item -W help
+@itemx -W usage
+@itemx --help
+@itemx --usage
+@cindex @code{--help} option
+@cindex @code{--usage} option
+Print a ``usage'' message summarizing the short and long style options
+that @code{gawk} accepts, and then exit.
+
+@item -W lint
+@itemx --lint
+@cindex @code{--lint} option
+Warn about constructs that are dubious or non-portable to
+other @code{awk} implementations.
+Some warnings are issued when @code{gawk} first reads your program. Others
+are issued at run-time, as your program executes.
+
+@item -W lint-old
+@itemx --lint-old
+@cindex @code{--lint-old} option
+Warn about constructs that are not available in
+the original Version 7 Unix version of @code{awk}
+(@pxref{V7/SVR3.1, , Major Changes between V7 and SVR3.1}).
+
+@item -W posix
+@itemx --posix
+@cindex @code{--posix} option
+@cindex POSIX mode
+Operate in strict POSIX mode. This disables all @code{gawk}
+extensions (just like @samp{--traditional}), and adds the following additional
+restrictions:
+
+@c IMPORTANT! Keep this list in sync with the one in node POSIX
+
+@itemize @bullet
+@item
+@code{\x} escape sequences are not recognized
+(@pxref{Escape Sequences}).
+
+@item
+The synonym @code{func} for the keyword @code{function} is not
+recognized (@pxref{Definition Syntax, ,Function Definition Syntax}).
+
+@item
+The operators @samp{**} and @samp{**=} cannot be used in
+place of @samp{^} and @samp{^=} (@pxref{Arithmetic Ops, ,Arithmetic Operators},
+and also @pxref{Assignment Ops, ,Assignment Expressions}).
+
+@item
+Specifying @samp{-Ft} on the command line does not set the value
+of @code{FS} to be a single tab character
+(@pxref{Field Separators, ,Specifying How Fields are Separated}).
+
+@item
+The @code{fflush} built-in function is not supported
+(@pxref{I/O Functions, , Built-in Functions for Input/Output}).
+@end itemize
+
+If you supply both @samp{--traditional} and @samp{--posix} on the
+command line, @samp{--posix} will take precedence. @code{gawk}
+will also issue a warning if both options are supplied.
+
+@item -W re-interval
+@itemx --re-interval
+Allow interval expressions
+(@pxref{Regexp Operators, , Regular Expression Operators}),
+in regexps.
+Because interval expressions were traditionally not available in @code{awk},
+@code{gawk} does not provide them by default. This prevents old @code{awk}
+programs from breaking.
+
+@item -W source @var{program-text}
+@itemx --source @var{program-text}
+@cindex @code{--source} option
+Program source code is taken from the @var{program-text}. This option
+allows you to mix source code in files with source
+code that you enter on the command line. This is particularly useful
+when you have library functions that you wish to use from your command line
+programs (@pxref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable}).
+
+@item -W version
+@itemx --version
+@cindex @code{--version} option
+Prints version information for this particular copy of @code{gawk}.
+This allows you to determine if your copy of @code{gawk} is up to date
+with respect to whatever the Free Software Foundation is currently
+distributing.
+It is also useful for bug reports
+(@pxref{Bugs, , Reporting Problems and Bugs}).
+@end table
+
+Any other options are flagged as invalid with a warning message, but
+are otherwise ignored.
+
+In compatibility mode, as a special case, if the value of @var{fs} supplied
+to the @samp{-F} option is @samp{t}, then @code{FS} is set to the tab
+character (@code{"\t"}). This is only true for @samp{--traditional}, and not
+for @samp{--posix}
+(@pxref{Field Separators, ,Specifying How Fields are Separated}).
+
+The @samp{-f} option may be used more than once on the command line.
+If it is, @code{awk} reads its program source from all of the named files, as
+if they had been concatenated together into one big file. This is
+useful for creating libraries of @code{awk} functions. Useful functions
+can be written once, and then retrieved from a standard place, instead
+of having to be included into each individual program.
+
+You can type in a program at the terminal and still use library functions,
+by specifying @samp{-f /dev/tty}. @code{awk} will read a file from the terminal
+to use as part of the @code{awk} program. After typing your program,
+type @kbd{Control-d} (the end-of-file character) to terminate it.
+(You may also use @samp{-f -} to read program source from the standard
+input, but then you will not be able to also use the standard input as a
+source of data.)
+
+Because it is clumsy using the standard @code{awk} mechanisms to mix source
+file and command line @code{awk} programs, @code{gawk} provides the
+@samp{--source} option. This does not require you to pre-empt the standard
+input for your source code, and allows you to easily mix command line
+and library source code
+(@pxref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable}).
+
+If no @samp{-f} or @samp{--source} option is specified, then @code{gawk}
+will use the first non-option command line argument as the text of the
+program source code.
+
+@cindex @code{POSIXLY_CORRECT} environment variable
+@cindex environment variable, @code{POSIXLY_CORRECT}
+If the environment variable @code{POSIXLY_CORRECT} exists,
+then @code{gawk} will behave in strict POSIX mode, exactly as if
+you had supplied the @samp{--posix} command line option.
+Many GNU programs look for this environment variable to turn on
+strict POSIX mode. If you supply @samp{--lint} on the command line,
+and @code{gawk} turns on POSIX mode because of @code{POSIXLY_CORRECT},
+then it will print a warning message indicating that POSIX
+mode is in effect.
+
+You would typically set this variable in your shell's startup file.
+For a Bourne compatible shell (such as Bash), you would add these
+lines to the @file{.profile} file in your home directory.
+
+@example
+@group
+POSIXLY_CORRECT=true
+export POSIXLY_CORRECT
+@end group
+@end example
+
+For a @code{csh} compatible shell,@footnote{Not recommended.}
+you would add this line to the @file{.login} file in your home directory.
+
+@example
+setenv POSIXLY_CORRECT true
+@end example
+
+@node Other Arguments, AWKPATH Variable, Options, Invoking Gawk
+@section Other Command Line Arguments
+
+Any additional arguments on the command line are normally treated as
+input files to be processed in the order specified. However, an
+argument that has the form @code{@var{var}=@var{value}}, assigns
+the value @var{value} to the variable @var{var}---it does not specify a
+file at all.
+
+@vindex ARGIND
+@vindex ARGV
+All these arguments are made available to your @code{awk} program in the
+@code{ARGV} array (@pxref{Built-in Variables}). Command line options
+and the program text (if present) are omitted from @code{ARGV}.
+All other arguments, including variable assignments, are
+included. As each element of @code{ARGV} is processed, @code{gawk}
+sets the variable @code{ARGIND} to the index in @code{ARGV} of the
+current element.
+
+The distinction between file name arguments and variable-assignment
+arguments is made when @code{awk} is about to open the next input file.
+At that point in execution, it checks the ``file name'' to see whether
+it is really a variable assignment; if so, @code{awk} sets the variable
+instead of reading a file.
+
+Therefore, the variables actually receive the given values after all
+previously specified files have been read. In particular, the values of
+variables assigned in this fashion are @emph{not} available inside a
+@code{BEGIN} rule
+(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}),
+since such rules are run before @code{awk} begins scanning the argument list.
+
+@cindex dark corner
+The variable values given on the command line are processed for escape
+sequences (d.c.) (@pxref{Escape Sequences}).
+
+In some earlier implementations of @code{awk}, when a variable assignment
+occurred before any file names, the assignment would happen @emph{before}
+the @code{BEGIN} rule was executed. @code{awk}'s behavior was thus
+inconsistent; some command line assignments were available inside the
+@code{BEGIN} rule, while others were not. However,
+some applications came to depend
+upon this ``feature.'' When @code{awk} was changed to be more consistent,
+the @samp{-v} option was added to accommodate applications that depended
+upon the old behavior.
+
+The variable assignment feature is most useful for assigning to variables
+such as @code{RS}, @code{OFS}, and @code{ORS}, which control input and
+output formats, before scanning the data files. It is also useful for
+controlling state if multiple passes are needed over a data file. For
+example:
+
+@cindex multiple passes over data
+@cindex passes, multiple
+@example
+awk 'pass == 1 @{ @var{pass 1 stuff} @}
+ pass == 2 @{ @var{pass 2 stuff} @}' pass=1 mydata pass=2 mydata
+@end example
+
+Given the variable assignment feature, the @samp{-F} option for setting
+the value of @code{FS} is not
+strictly necessary. It remains for historical compatibility.
+
+@node AWKPATH Variable, Obsolete, Other Arguments, Invoking Gawk
+@section The @code{AWKPATH} Environment Variable
+@cindex @code{AWKPATH} environment variable
+@cindex environment variable, @code{AWKPATH}
+@cindex search path
+@cindex directory search
+@cindex path, search
+@cindex differences between @code{gawk} and @code{awk}
+
+The previous section described how @code{awk} program files can be named
+on the command line with the @samp{-f} option. In most @code{awk}
+implementations, you must supply a precise path name for each program
+file, unless the file is in the current directory.
+
+@cindex search path, for source files
+But in @code{gawk}, if the file name supplied to the @samp{-f} option
+does not contain a @samp{/}, then @code{gawk} searches a list of
+directories (called the @dfn{search path}), one by one, looking for a
+file with the specified name.
+
+The search path is a string consisting of directory names
+separated by colons. @code{gawk} gets its search path from the
+@code{AWKPATH} environment variable. If that variable does not exist,
+@code{gawk} uses a default path, which is
+@samp{.:/usr/local/share/awk}.@footnote{Your version of @code{gawk}
+may use a directory that is different than @file{/usr/local/share/awk}; it
+will depend upon how @code{gawk} was built and installed. The actual
+directory will be the value of @samp{$(datadir)} generated when
+@code{gawk} was configured. You probably don't need to worry about this
+though.} (Programs written for use by
+system administrators should use an @code{AWKPATH} variable that
+does not include the current directory, @file{.}.)
+
+The search path feature is particularly useful for building up libraries
+of useful @code{awk} functions. The library files can be placed in a
+standard directory that is in the default path, and then specified on
+the command line with a short file name. Otherwise, the full file name
+would have to be typed for each file.
+
+By using both the @samp{--source} and @samp{-f} options, your command line
+@code{awk} programs can use facilities in @code{awk} library files.
+@xref{Library Functions, , A Library of @code{awk} Functions}.
+
+Path searching is not done if @code{gawk} is in compatibility mode.
+This is true for both @samp{--traditional} and @samp{--posix}.
+@xref{Options, ,Command Line Options}.
+
+@strong{Note:} if you want files in the current directory to be found,
+you must include the current directory in the path, either by including
+@file{.} explicitly in the path, or by writing a null entry in the
+path. (A null entry is indicated by starting or ending the path with a
+colon, or by placing two colons next to each other (@samp{::}).) If the
+current directory is not included in the path, then files cannot be
+found in the current directory. This path search mechanism is identical
+to the shell's.
+@c someday, @cite{The Bourne Again Shell}....
+
+Starting with version 3.0, if @code{AWKPATH} is not defined in the
+environment, @code{gawk} will place its default search path into
+@code{ENVIRON["AWKPATH"]}. This makes it easy to determine
+the actual search path @code{gawk} will use.
+
+@node Obsolete, Undocumented, AWKPATH Variable, Invoking Gawk
+@section Obsolete Options and/or Features
+
+@cindex deprecated options
+@cindex obsolete options
+@cindex deprecated features
+@cindex obsolete features
+This section describes features and/or command line options from
+previous releases of @code{gawk} that are either not available in the
+current version, or that are still supported but deprecated (meaning that
+they will @emph{not} be in the next release).
+
+@c update this section for each release!
+
+For version @value{VERSION} of @code{gawk}, there are no command line options
+or other deprecated features from the previous version of @code{gawk}.
+@iftex
+This section
+@end iftex
+@ifinfo
+This node
+@end ifinfo
+is thus essentially a place holder,
+in case some option becomes obsolete in a future version of @code{gawk}.
+
+@ignore
+@c This is pretty old news...
+The public-domain version of @code{strftime} that is distributed with
+@code{gawk} changed for the 2.14 release. The @samp{%V} conversion specifier
+that used to generate the date in VMS format was changed to @samp{%v}.
+This is because the POSIX standard for the @code{date} utility now
+specifies a @samp{%V} conversion specifier.
+@xref{Time Functions, ,Functions for Dealing with Time Stamps}, for details.
+@end ignore
+
+@node Undocumented, Known Bugs, Obsolete, Invoking Gawk
+@section Undocumented Options and Features
+@cindex undocumented features
+
+This section intentionally left blank.
+
+@c Read The Source, Luke!
+
+@ignore
+@c If these came out in the Info file or TeX document, then they wouldn't
+@c be undocumented, would they?
+
+@code{gawk} has one undocumented option:
+
+@table @code
+@item -W nostalgia
+@itemx --nostalgia
+Print the message @code{"awk: bailing out near line 1"} and dump core.
+This option was inspired by the common behavior of very early versions of
+Unix @code{awk}, and by a t--shirt.
+@end table
+
+Early versions of @code{awk} used to not require any separator (either
+a newline or @samp{;}) between the rules in @code{awk} programs. Thus,
+it was common to see one-line programs like:
+
+@example
+awk '@{ sum += $1 @} END @{ print sum @}'
+@end example
+
+@code{gawk} actually supports this, but it is purposely undocumented
+since it is considered bad style. The correct way to write such a program
+is either
+
+@example
+awk '@{ sum += $1 @} ; END @{ print sum @}'
+@end example
+
+@noindent
+or
+
+@example
+awk '@{ sum += $1 @}
+ END @{ print sum @}' data
+@end example
+
+@noindent
+@xref{Statements/Lines, ,@code{awk} Statements Versus Lines}, for a fuller
+explanation.
+
+@end ignore
+
+@node Known Bugs, , Undocumented, Invoking Gawk
+@section Known Bugs in @code{gawk}
+@cindex bugs, known in @code{gawk}
+@cindex known bugs
+
+@itemize @bullet
+@item
+The @samp{-F} option for changing the value of @code{FS}
+(@pxref{Options, ,Command Line Options})
+is not necessary given the command line variable
+assignment feature; it remains only for backwards compatibility.
+
+@item
+If your system actually has support for @file{/dev/fd} and the
+associated @file{/dev/stdin}, @file{/dev/stdout}, and
+@file{/dev/stderr} files, you may get different output from @code{gawk}
+than you would get on a system without those files. When @code{gawk}
+interprets these files internally, it synchronizes output to the
+standard output with output to @file{/dev/stdout}, while on a system
+with those files, the output is actually to different open files
+(@pxref{Special Files, ,Special File Names in @code{gawk}}).
+
+@item
+Syntactically invalid single character programs tend to overflow
+the parse stack, generating a rather unhelpful message. Such programs
+are surprisingly difficult to diagnose in the completely general case,
+and the effort to do so really is not worth it.
+
+@item
+The word ``GNU'' is incorrectly capitalized in at least one
+file in the source code.
+@end itemize
+
+@node Library Functions, Sample Programs, Invoking Gawk, Top
+@chapter A Library of @code{awk} Functions
+
+@c 2e: USE TEXINFO-2 FUNCTION DEFINITION STUFF!!!!!!!!!!!!!
+This chapter presents a library of useful @code{awk} functions. The
+sample programs presented later
+(@pxref{Sample Programs, ,Practical @code{awk} Programs})
+use these functions.
+The functions are presented here in a progression from simple to complex.
+
+@ref{Extract Program, ,Extracting Programs from Texinfo Source Files},
+presents a program that you can use to extract the source code for
+these example library functions and programs from the Texinfo source
+for this @value{DOCUMENT}.
+(This has already been done as part of the @code{gawk} distribution.)
+
+If you have written one or more useful, general purpose @code{awk} functions,
+and would like to contribute them for a subsequent edition of this @value{DOCUMENT},
+please contact the author. @xref{Bugs, ,Reporting Problems and Bugs},
+for information on doing this. Don't just send code, as you will be
+required to either place your code in the public domain,
+publish it under the GPL (@pxref{Copying, ,GNU GENERAL PUBLIC LICENSE}),
+or assign the copyright in it to the Free Software Foundation.
+
+@menu
+* Portability Notes:: What to do if you don't have @code{gawk}.
+* Nextfile Function:: Two implementations of a @code{nextfile}
+ function.
+* Assert Function:: A function for assertions in @code{awk}
+ programs.
+* Ordinal Functions:: Functions for using characters as numbers and
+ vice versa.
+* Join Function:: A function to join an array into a string.
+* Mktime Function:: A function to turn a date into a timestamp.
+* Gettimeofday Function:: A function to get formatted times.
+* Filetrans Function:: A function for handling data file transitions.
+* Getopt Function:: A function for processing command line
+ arguments.
+* Passwd Functions:: Functions for getting user information.
+* Group Functions:: Functions for getting group information.
+* Library Names:: How to best name private global variables in
+ library functions.
+@end menu
+
+@node Portability Notes, Nextfile Function, Library Functions, Library Functions
+@section Simulating @code{gawk}-specific Features
+@cindex portability issues
+
+The programs in this chapter and in
+@ref{Sample Programs, ,Practical @code{awk} Programs},
+freely use features that are specific to @code{gawk}.
+This section briefly discusses how you can rewrite these programs for
+different implementations of @code{awk}.
+
+Diagnostic error messages are sent to @file{/dev/stderr}.
+Use @samp{| "cat 1>&2"} instead of @samp{> "/dev/stderr"}, if your system
+does not have a @file{/dev/stderr}, or if you cannot use @code{gawk}.
+
+A number of programs use @code{nextfile}
+(@pxref{Nextfile Statement, ,The @code{nextfile} Statement}),
+to skip any remaining input in the input file.
+@ref{Nextfile Function, ,Implementing @code{nextfile} as a Function},
+shows you how to write a function that will do the same thing.
+
+Finally, some of the programs choose to ignore upper-case and lower-case
+distinctions in their input. They do this by assigning one to @code{IGNORECASE}.
+You can achieve the same effect by adding the following rule to the
+beginning of the program:
+
+@example
+# ignore case
+@{ $0 = tolower($0) @}
+@end example
+
+@noindent
+Also, verify that all regexp and string constants used in
+comparisons only use lower-case letters.
+
+@node Nextfile Function, Assert Function, Portability Notes, Library Functions
+@section Implementing @code{nextfile} as a Function
+
+@cindex skipping input files
+@cindex input files, skipping
+The @code{nextfile} statement presented in
+@ref{Nextfile Statement, ,The @code{nextfile} Statement},
+is a @code{gawk}-specific extension. It is not available in other
+implementations of @code{awk}. This section shows two versions of a
+@code{nextfile} function that you can use to simulate @code{gawk}'s
+@code{nextfile} statement if you cannot use @code{gawk}.
+
+Here is a first attempt at writing a @code{nextfile} function.
+
+@example
+@group
+# nextfile --- skip remaining records in current file
+
+# this should be read in before the "main" awk program
+
+function nextfile() @{ _abandon_ = FILENAME; next @}
+
+_abandon_ == FILENAME @{ next @}
+@end group
+@end example
+
+This file should be included before the main program, because it supplies
+a rule that must be executed first. This rule compares the current data
+file's name (which is always in the @code{FILENAME} variable) to a private
+variable named @code{_abandon_}. If the file name matches, then the action
+part of the rule executes a @code{next} statement, to go on to the next
+record. (The use of @samp{_} in the variable name is a convention.
+It is discussed more fully in
+@ref{Library Names, , Naming Library Function Global Variables}.)
+
+The use of the @code{next} statement effectively creates a loop that reads
+all the records from the current data file.
+Eventually, the end of the file is reached, and
+a new data file is opened, changing the value of @code{FILENAME}.
+Once this happens, the comparison of @code{_abandon_} to @code{FILENAME}
+fails, and execution continues with the first rule of the ``real'' program.
+
+The @code{nextfile} function itself simply sets the value of @code{_abandon_}
+and then executes a @code{next} statement to start the loop
+going.@footnote{Some implementations of @code{awk} do not allow you to
+execute @code{next} from within a function body. Some other work-around
+will be necessary if you use such a version.}
+@c mawk is what we're talking about.
+
+This initial version has a subtle problem. What happens if the same data
+file is listed @emph{twice} on the command line, one right after the other,
+or even with just a variable assignment between the two occurrences of
+the file name?
+
+@c @findex nextfile
+@c do it this way, since all the indices are merged
+@cindex @code{nextfile} function
+In such a case,
+this code will skip right through the file, a second time, even though
+it should stop when it gets to the end of the first occurrence.
+Here is a second version of @code{nextfile} that remedies this problem.
+
+@example
+@group
+@c file eg/lib/nextfile.awk
+# nextfile --- skip remaining records in current file
+# correctly handle successive occurrences of the same file
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May, 1993
+
+# this should be read in before the "main" awk program
+
+function nextfile() @{ _abandon_ = FILENAME; next @}
+
+_abandon_ == FILENAME @{
+ if (FNR == 1)
+ _abandon_ = ""
+ else
+ next
+@}
+@c endfile
+@end group
+@end example
+
+The @code{nextfile} function has not changed. It sets @code{_abandon_}
+equal to the current file name and then executes a @code{next} satement.
+The @code{next} statement reads the next record and increments @code{FNR},
+so @code{FNR} is guaranteed to have a value of at least two.
+However, if @code{nextfile} is called for the last record in the file,
+then @code{awk} will close the current data file and move on to the next
+one. Upon doing so, @code{FILENAME} will be set to the name of the new file,
+and @code{FNR} will be reset to one. If this next file is the same as
+the previous one, @code{_abandon_} will still be equal to @code{FILENAME}.
+However, @code{FNR} will be equal to one, telling us that this is a new
+occurrence of the file, and not the one we were reading when the
+@code{nextfile} function was executed. In that case, @code{_abandon_}
+is reset to the empty string, so that further executions of this rule
+will fail (until the next time that @code{nextfile} is called).
+
+If @code{FNR} is not one, then we are still in the original data file,
+and the program executes a @code{next} statement to skip through it.
+
+An important question to ask at this point is: ``Given that the
+functionality of @code{nextfile} can be provided with a library file,
+why is it built into @code{gawk}?'' This is an important question. Adding
+features for little reason leads to larger, slower programs that are
+harder to maintain.
+
+The answer is that building @code{nextfile} into @code{gawk} provides
+significant gains in efficiency. If the @code{nextfile} function is executed
+at the beginning of a large data file, @code{awk} still has to scan the entire
+file, splitting it up into records, just to skip over it. The built-in
+@code{nextfile} can simply close the file immediately and proceed to the
+next one, saving a lot of time. This is particularly important in
+@code{awk}, since @code{awk} programs are generally I/O bound (i.e.@:
+they spend most of their time doing input and output, instead of performing
+computations).
+
+@node Assert Function, Ordinal Functions, Nextfile Function, Library Functions
+@section Assertions
+
+@cindex assertions
+@cindex @code{assert}, C version
+When writing large programs, it is often useful to be able to know
+that a condition or set of conditions is true. Before proceeding with a
+particular computation, you make a statement about what you believe to be
+the case. Such a statement is known as an
+``assertion.'' The C language provides an @code{<assert.h>} header file
+and corresponding @code{assert} macro that the programmer can use to make
+assertions. If an assertion fails, the @code{assert} macro arranges to
+print a diagnostic message describing the condition that should have
+been true but was not, and then it kills the program. In C, using
+@code{assert} looks this:
+
+@example
+#include <assert.h>
+
+int myfunc(int a, double b)
+@{
+ assert(a <= 5 && b >= 17);
+ @dots{}
+@}
+@end example
+
+If the assertion failed, the program would print a message similar to
+this:
+
+@example
+prog.c:5: assertion failed: a <= 5 && b >= 17
+@end example
+
+@findex assert
+The ANSI C language makes it possible to turn the condition into a string for use
+in printing the diagnostic message. This is not possible in @code{awk}, so
+this @code{assert} function also requires a string version of the condition
+that is being tested.
+
+@example
+@c @group
+@c file eg/lib/assert.awk
+# assert --- assert that a condition is true. Otherwise exit.
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May, 1993
+
+function assert(condition, string)
+@{
+ if (! condition) @{
+ printf("%s:%d: assertion failed: %s\n",
+ FILENAME, FNR, string) > "/dev/stderr"
+ _assert_exit = 1
+ exit 1
+ @}
+@}
+
+END @{
+ if (_assert_exit)
+ exit 1
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{assert} function tests the @code{condition} parameter. If it
+is false, it prints a message to standard error, using the @code{string}
+parameter to describe the failed condition. It then sets the variable
+@code{_assert_exit} to one, and executes the @code{exit} statement.
+The @code{exit} statement jumps to the @code{END} rule. If the @code{END}
+rules finds @code{_assert_exit} to be true, then it exits immediately.
+
+The purpose of the @code{END} rule with its test is to
+keep any other @code{END} rules from running. When an assertion fails, the
+program should exit immediately.
+If no assertions fail, then @code{_assert_exit} will still be
+false when the @code{END} rule is run normally, and the rest of the
+program's @code{END} rules will execute.
+For all of this to work correctly, @file{assert.awk} must be the
+first source file read by @code{awk}.
+
+You would use this function in your programs this way:
+
+@example
+function myfunc(a, b)
+@{
+ assert(a <= 5 && b >= 17, "a <= 5 && b >= 17")
+ @dots{}
+@}
+@end example
+
+@noindent
+If the assertion failed, you would see a message like this:
+
+@example
+mydata:1357: assertion failed: a <= 5 && b >= 17
+@end example
+
+There is a problem with this version of @code{assert}, that it may not
+be possible to work around. An @code{END} rule is automatically added
+to the program calling @code{assert}. Normally, if a program consists
+of just a @code{BEGIN} rule, the input files and/or standard input are
+not read. However, now that the program has an @code{END} rule, @code{awk}
+will attempt to read the input data files, or standard input
+(@pxref{Using BEGIN/END, , Startup and Cleanup Actions}),
+most likely causing the program to hang, waiting for input.
+
+@cindex backslash continuation
+Just a note on programming style. You may have noticed that the @code{END}
+rule uses backslash continuation, with the open brace on a line by
+itself. This is so that it more closely resembles the way functions
+are written. Many of the examples
+@iftex
+in this chapter and the next one
+@end iftex
+use this style. You can decide for yourself if you like writing
+your @code{BEGIN} and @code{END} rules this way,
+or not.
+
+@node Ordinal Functions, Join Function, Assert Function, Library Functions
+@section Translating Between Characters and Numbers
+
+@cindex numeric character values
+@cindex values of characters as numbers
+One commercial implementation of @code{awk} supplies a built-in function,
+@code{ord}, which takes a character and returns the numeric value for that
+character in the machine's character set. If the string passed to
+@code{ord} has more than one character, only the first one is used.
+
+The inverse of this function is @code{chr} (from the function of the same
+name in Pascal), which takes a number and returns the corresponding character.
+
+Both functions can be written very nicely in @code{awk}; there is no real
+reason to build them into the @code{awk} interpreter.
+
+@findex ord
+@findex chr
+@example
+@c @group
+@c file eg/lib/ord.awk
+# ord.awk --- do ord and chr
+#
+# Global identifiers:
+# _ord_: numerical values indexed by characters
+# _ord_init: function to initialize _ord_
+#
+# Arnold Robbins
+# arnold@@gnu.ai.mit.edu
+# Public Domain
+# 16 January, 1992
+# 20 July, 1992, revised
+
+BEGIN @{ _ord_init() @}
+@c endfile
+@c @end group
+
+@c @group
+@c file eg/lib/ord.awk
+function _ord_init( low, high, i, t)
+@{
+ low = sprintf("%c", 7) # BEL is ascii 7
+ if (low == "\a") @{ # regular ascii
+ low = 0
+ high = 127
+ @} else if (sprintf("%c", 128 + 7) == "\a") @{
+ # ascii, mark parity
+ low = 128
+ high = 255
+ @} else @{ # ebcdic(!)
+ low = 0
+ high = 255
+ @}
+
+ for (i = low; i <= high; i++) @{
+ t = sprintf("%c", i)
+ _ord_[t] = i
+ @}
+@}
+@c endfile
+@c @end group
+@end example
+
+@cindex character sets
+@cindex character encodings
+@cindex ASCII
+@cindex EBCDIC
+@cindex mark parity
+Some explanation of the numbers used by @code{chr} is worthwhile.
+The most prominent character set in use today is ASCII. Although an
+eight-bit byte can hold 256 distinct values (from zero to 255), ASCII only
+defines characters that use the values from zero to 127.@footnote{ASCII
+has been extended in many countries to use the values from 128 to 255
+for country-specific characters. If your system uses these extensions,
+you can simplify @code{_ord_init} to simply loop from zero to 255.}
+At least one computer manufacturer that we know of
+@c Pr1me, blech
+uses ASCII, but with mark parity, meaning that the leftmost bit in the byte
+is always one. What this means is that on those systems, characters
+have numeric values from 128 to 255.
+Finally, large mainframe systems use the EBCDIC character set, which
+uses all 256 values.
+While there are other character sets in use on some older systems,
+they are not really worth worrying about.
+
+@example
+@group
+@c file eg/lib/ord.awk
+function ord(str, c)
+@{
+ # only first character is of interest
+ c = substr(str, 1, 1)
+ return _ord_[c]
+@}
+@c endfile
+@end group
+
+@group
+@c file eg/lib/ord.awk
+function chr(c)
+@{
+ # force c to be numeric by adding 0
+ return sprintf("%c", c + 0)
+@}
+@c endfile
+@end group
+
+@c @group
+@c file eg/lib/ord.awk
+#### test code ####
+# BEGIN \
+# @{
+# for (;;) @{
+# printf("enter a character: ")
+# if (getline var <= 0)
+# break
+# printf("ord(%s) = %d\n", var, ord(var))
+# @}
+# @}
+@c endfile
+@c @end group
+@end example
+
+An obvious improvement to these functions would be to move the code for the
+@code{@w{_ord_init}} function into the body of the @code{BEGIN} rule. It was
+written this way initially for ease of development.
+
+There is a ``test program'' in a @code{BEGIN} rule, for testing the
+function. It is commented out for production use.
+
+@node Join Function, Mktime Function, Ordinal Functions, Library Functions
+@section Merging an Array Into a String
+
+@cindex merging strings
+When doing string processing, it is often useful to be able to join
+all the strings in an array into one long string. The following function,
+@code{join}, accomplishes this task. It is used later in several of
+the application programs
+(@pxref{Sample Programs, ,Practical @code{awk} Programs}).
+
+Good function design is important; this function needs to be general, but it
+should also have a reasonable default behavior. It is called with an array
+and the beginning and ending indices of the elements in the array to be
+merged. This assumes that the array indices are numeric---a reasonable
+assumption since the array was likely created with @code{split}
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+
+@findex join
+@example
+@group
+@c file eg/lib/join.awk
+# join.awk --- join an array into a string
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+function join(array, start, end, sep, result, i)
+@{
+ if (sep == "")
+ sep = " "
+ else if (sep == SUBSEP) # magic value
+ sep = ""
+ result = array[start]
+ for (i = start + 1; i <= end; i++)
+ result = result sep array[i]
+ return result
+@}
+@c endfile
+@end group
+@end example
+
+An optional additional argument is the separator to use when joining the
+strings back together. If the caller supplies a non-empty value,
+@code{join} uses it. If it is not supplied, it will have a null
+value. In this case, @code{join} uses a single blank as a default
+separator for the strings. If the value is equal to @code{SUBSEP},
+then @code{join} joins the strings with no separator between them.
+@code{SUBSEP} serves as a ``magic'' value to indicate that there should
+be no separation between the component strings.
+
+It would be nice if @code{awk} had an assignment operator for concatenation.
+The lack of an explicit operator for concatenation makes string operations
+more difficult than they really need to be.
+
+@node Mktime Function, Gettimeofday Function, Join Function, Library Functions
+@section Turning Dates Into Timestamps
+
+The @code{systime} function built in to @code{gawk}
+returns the current time of day as
+a timestamp in ``seconds since the Epoch.'' This timestamp
+can be converted into a printable date of almost infinitely variable
+format using the built-in @code{strftime} function.
+(For more information on @code{systime} and @code{strftime},
+@pxref{Time Functions, ,Functions for Dealing with Time Stamps}.)
+
+@cindex converting dates to timestamps
+@cindex dates, converting to timestamps
+@cindex timestamps, converting from dates
+An interesting but difficult problem is to convert a readable representation
+of a date back into a timestamp. The ANSI C library provides a @code{mktime}
+function that does the basic job, converting a canonical representation of a
+date into a timestamp.
+
+It would appear at first glance that @code{gawk} would have to supply a
+@code{mktime} built-in function that was simply a ``hook'' to the C language
+version. In fact though, @code{mktime} can be implemented entirely in
+@code{awk}.
+
+Here is a version of @code{mktime} for @code{awk}. It takes a simple
+representation of the date and time, and converts it into a timestamp.
+
+The code is presented here intermixed with explanatory prose. In
+@ref{Extract Program, ,Extracting Programs from Texinfo Source Files},
+you will see how the Texinfo source file for this @value{DOCUMENT}
+can be processed to extract the code into a single source file.
+
+The program begins with a descriptive comment and a @code{BEGIN} rule
+that initializes a table @code{_tm_months}. This table is a two-dimensional
+array that has the lengths of the months. The first index is zero for
+regular years, and one for leap years. The values are the same for all the
+months in both kinds of years, except for February; thus the use of multiple
+assignment.
+
+@example
+@c @group
+@c file eg/lib/mktime.awk
+# mktime.awk --- convert a canonical date representation
+# into a timestamp
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+BEGIN \
+@{
+ # Initialize table of month lengths
+ _tm_months[0,1] = _tm_months[1,1] = 31
+ _tm_months[0,2] = 28; _tm_months[1,2] = 29
+ _tm_months[0,3] = _tm_months[1,3] = 31
+ _tm_months[0,4] = _tm_months[1,4] = 30
+ _tm_months[0,5] = _tm_months[1,5] = 31
+ _tm_months[0,6] = _tm_months[1,6] = 30
+ _tm_months[0,7] = _tm_months[1,7] = 31
+ _tm_months[0,8] = _tm_months[1,8] = 31
+ _tm_months[0,9] = _tm_months[1,9] = 30
+ _tm_months[0,10] = _tm_months[1,10] = 31
+ _tm_months[0,11] = _tm_months[1,11] = 30
+ _tm_months[0,12] = _tm_months[1,12] = 31
+@}
+@c endfile
+@c @end group
+@end example
+
+The benefit of merging multiple @code{BEGIN} rules
+(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns})
+is particularly clear when writing library files. Functions in library
+files can cleanly initialize their own private data and also provide clean-up
+actions in private @code{END} rules.
+
+The next function is a simple one that computes whether a given year is or
+is not a leap year. If a year is evenly divisible by four, but not evenly
+divisible by 100, or if it is evenly divisible by 400, then it is a leap
+year. Thus, 1904 was a leap year, 1900 was not, but 2000 will be.
+@c Change this after the year 2000 to ``2000 was'' (:-)
+
+@findex _tm_isleap
+@example
+@group
+@c file eg/lib/mktime.awk
+# decide if a year is a leap year
+function _tm_isleap(year, ret)
+@{
+ ret = (year % 4 == 0 && year % 100 != 0) ||
+ (year % 400 == 0)
+
+ return ret
+@}
+@c endfile
+@end group
+@end example
+
+This function is only used a few times in this file, and its computation
+could have been written @dfn{in-line} (at the point where it's used).
+Making it a separate function made the original development easier, and also
+avoids the possibility of typing errors when duplicating the code in
+multiple places.
+
+The next function is more interesting. It does most of the work of
+generating a timestamp, which is converting a date and time into some number
+of seconds since the Epoch. The caller passes an array (rather
+imaginatively named @code{a}) containing six
+values: the year including century, the month as a number between one and 12,
+the day of the month, the hour as a number between zero and 23, the minute in
+the hour, and the seconds within the minute.
+
+The function uses several local variables to precompute the number of
+seconds in an hour, seconds in a day, and seconds in a year. Often,
+similar C code simply writes out the expression in-line, expecting the
+compiler to do @dfn{constant folding}. E.g., most C compilers would
+turn @samp{60 * 60} into @samp{3600} at compile time, instead of recomputing
+it every time at run time. Precomputing these values makes the
+function more efficient.
+
+@findex _tm_addup
+@example
+@c @group
+@c file eg/lib/mktime.awk
+# convert a date into seconds
+function _tm_addup(a, total, yearsecs, daysecs,
+ hoursecs, i, j)
+@{
+ hoursecs = 60 * 60
+ daysecs = 24 * hoursecs
+ yearsecs = 365 * daysecs
+
+ total = (a[1] - 1970) * yearsecs
+
+@group
+ # extra day for leap years
+ for (i = 1970; i < a[1]; i++)
+ if (_tm_isleap(i))
+ total += daysecs
+@end group
+
+@group
+ j = _tm_isleap(a[1])
+ for (i = 1; i < a[2]; i++)
+ total += _tm_months[j, i] * daysecs
+@end group
+
+ total += (a[3] - 1) * daysecs
+ total += a[4] * hoursecs
+ total += a[5] * 60
+ total += a[6]
+
+ return total
+@}
+@c endfile
+@c @end group
+@end example
+
+The function starts with a first approximation of all the seconds between
+Midnight, January 1, 1970,@footnote{This is the Epoch on POSIX systems.
+It may be different on other systems.} and the beginning of the current
+year. It then goes through all those years, and for every leap year,
+adds an additional day's worth of seconds.
+
+The variable @code{j} holds either one or zero, if the current year is or is not
+a leap year.
+For every month in the current year prior to the current month, it adds
+the number of seconds in the month, using the appropriate entry in the
+@code{_tm_months} array.
+
+Finally, it adds in the seconds for the number of days prior to the current
+day, and the number of hours, minutes, and seconds in the current day.
+
+The result is a count of seconds since January 1, 1970. This value is not
+yet what is needed though. The reason why is described shortly.
+
+The main @code{mktime} function takes a single character string argument.
+This string is a representation of a date and time in a ``canonical''
+(fixed) form. This string should be
+@code{"@var{year} @var{month} @var{day} @var{hour} @var{minute} @var{second}"}.
+
+@findex mktime
+@example
+@c @group
+@c file eg/lib/mktime.awk
+# mktime --- convert a date into seconds,
+# compensate for time zone
+
+function mktime(str, res1, res2, a, b, i, j, t, diff)
+@{
+ i = split(str, a, " ") # don't rely on FS
+
+ if (i != 6)
+ return -1
+
+ # force numeric
+ for (j in a)
+ a[j] += 0
+
+@group
+ # validate
+ if (a[1] < 1970 ||
+ a[2] < 1 || a[2] > 12 ||
+ a[3] < 1 || a[3] > 31 ||
+ a[4] < 0 || a[4] > 23 ||
+ a[5] < 0 || a[5] > 59 ||
+ a[6] < 0 || a[6] > 61 )
+ return -1
+@end group
+
+ res1 = _tm_addup(a)
+ t = strftime("%Y %m %d %H %M %S", res1)
+
+ if (_tm_debug)
+ printf("(%s) -> (%s)\n", str, t) > "/dev/stderr"
+
+ split(t, b, " ")
+ res2 = _tm_addup(b)
+
+ diff = res1 - res2
+
+ if (_tm_debug)
+ printf("diff = %d seconds\n", diff) > "/dev/stderr"
+
+ res1 += diff
+
+ return res1
+@}
+@c endfile
+@c @end group
+@end example
+
+The function first splits the string into an array, using spaces and tabs as
+separators. If there are not six elements in the array, it returns an
+error, signaled as the value @minus{}1.
+Next, it forces each element of the array to be numeric, by adding zero to it.
+The following @samp{if} statement then makes sure that each element is
+within an allowable range. (This checking could be extended further, e.g.,
+to make sure that the day of the month is within the correct range for the
+particular month supplied.) All of this is essentially preliminary set-up
+and error checking.
+
+Recall that @code{_tm_addup} generated a value in seconds since Midnight,
+January 1, 1970. This value is not directly usable as the result we want,
+@emph{since the calculation does not account for the local timezone}. In other
+words, the value represents the count in seconds since the Epoch, but only
+for UTC (Universal Coordinated Time). If the local timezone is east or west
+of UTC, then some number of hours should be either added to, or subtracted from
+the resulting timestamp.
+
+For example, 6:23 p.m. in Atlanta, Georgia (USA), is normally five hours west
+of (behind) UTC. It is only four hours behind UTC if daylight savings
+time is in effect.
+If you are calling @code{mktime} in Atlanta, with the argument
+@code{@w{"1993 5 23 18 23 12"}}, the result from @code{_tm_addup} will be
+for 6:23 p.m. UTC, which is only 2:23 p.m. in Atlanta. It is necessary to
+add another four hours worth of seconds to the result.
+
+How can @code{mktime} determine how far away it is from UTC? This is
+surprisingly easy. The returned timestamp represents the time passed to
+@code{mktime} @emph{as UTC}. This timestamp can be fed back to
+@code{strftime}, which will format it as a @emph{local} time; i.e.@: as
+if it already had the UTC difference added in to it. This is done by
+giving @code{@w{"%Y %m %d %H %M %S"}} to @code{strftime} as the format
+argument. It returns the computed timestamp in the original string
+format. The result represents a time that accounts for the UTC
+difference. When the new time is converted back to a timestamp, the
+difference between the two timestamps is the difference (in seconds)
+between the local timezone and UTC. This difference is then added back
+to the original result. An example demonstrating this is presented below.
+
+Finally, there is a ``main'' program for testing the function.
+
+@example
+@c @group
+@c file eg/lib/mktime.awk
+BEGIN @{
+ if (_tm_test) @{
+ printf "Enter date as yyyy mm dd hh mm ss: "
+ getline _tm_test_date
+
+ t = mktime(_tm_test_date)
+ r = strftime("%Y %m %d %H %M %S", t)
+ printf "Got back (%s)\n", r
+ @}
+@}
+@c endfile
+@c @end group
+@end example
+
+The entire program uses two variables that can be set on the command
+line to control debugging output and to enable the test in the final
+@code{BEGIN} rule. Here is the result of a test run. (Note that debugging
+output is to standard error, and test output is to standard output.)
+
+@example
+@c @group
+$ gawk -f mktime.awk -v _tm_test=1 -v _tm_debug=1
+@print{} Enter date as yyyy mm dd hh mm ss: 1993 5 23 15 35 10
+@error{} (1993 5 23 15 35 10) -> (1993 05 23 11 35 10)
+@error{} diff = 14400 seconds
+@print{} Got back (1993 05 23 15 35 10)
+@c @end group
+@end example
+
+The time entered was 3:35 p.m. (15:35 on a 24-hour clock), on May 23, 1993.
+The first line
+of debugging output shows the resulting time as UTC---four hours ahead of
+the local time zone. The second line shows that the difference is 14400
+seconds, which is four hours. (The difference is only four hours, since
+daylight savings time is in effect during May.)
+The final line of test output shows that the timezone compensation
+algorithm works; the returned time is the same as the entered time.
+
+This program does not solve the general problem of turning an arbitrary date
+representation into a timestamp. That problem is very involved. However,
+the @code{mktime} function provides a foundation upon which to build. Other
+software can convert month names into numeric months, and AM/PM times into
+24-hour clocks, to generate the ``canonical'' format that @code{mktime}
+requires.
+
+@node Gettimeofday Function, Filetrans Function, Mktime Function, Library Functions
+@section Managing the Time of Day
+
+@cindex formatted timestamps
+@cindex timestamps, formatted
+The @code{systime} and @code{strftime} functions described in
+@ref{Time Functions, ,Functions for Dealing with Time Stamps},
+provide the minimum functionality necessary for dealing with the time of day
+in human readable form. While @code{strftime} is extensive, the control
+formats are not necessarily easy to remember or intuitively obvious when
+reading a program.
+
+The following function, @code{gettimeofday}, populates a user-supplied array
+with pre-formatted time information. It returns a string with the current
+time formatted in the same way as the @code{date} utility.
+
+@findex gettimeofday
+@example
+@c @group
+@c file eg/lib/gettime.awk
+# gettimeofday --- get the time of day in a usable format
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain, May 1993
+#
+# Returns a string in the format of output of date(1)
+# Populates the array argument time with individual values:
+# time["second"] -- seconds (0 - 59)
+# time["minute"] -- minutes (0 - 59)
+# time["hour"] -- hours (0 - 23)
+# time["althour"] -- hours (0 - 12)
+# time["monthday"] -- day of month (1 - 31)
+# time["month"] -- month of year (1 - 12)
+# time["monthname"] -- name of the month
+# time["shortmonth"] -- short name of the month
+# time["year"] -- year within century (0 - 99)
+# time["fullyear"] -- year with century (19xx or 20xx)
+# time["weekday"] -- day of week (Sunday = 0)
+# time["altweekday"] -- day of week (Monday = 0)
+# time["weeknum"] -- week number, Sunday first day
+# time["altweeknum"] -- week number, Monday first day
+# time["dayname"] -- name of weekday
+# time["shortdayname"] -- short name of weekday
+# time["yearday"] -- day of year (0 - 365)
+# time["timezone"] -- abbreviation of timezone name
+# time["ampm"] -- AM or PM designation
+
+@group
+function gettimeofday(time, ret, now, i)
+@{
+ # get time once, avoids unnecessary system calls
+ now = systime()
+
+ # return date(1)-style output
+ ret = strftime("%a %b %d %H:%M:%S %Z %Y", now)
+
+ # clear out target array
+ for (i in time)
+ delete time[i]
+@end group
+
+@group
+ # fill in values, force numeric values to be
+ # numeric by adding 0
+ time["second"] = strftime("%S", now) + 0
+ time["minute"] = strftime("%M", now) + 0
+ time["hour"] = strftime("%H", now) + 0
+ time["althour"] = strftime("%I", now) + 0
+ time["monthday"] = strftime("%d", now) + 0
+ time["month"] = strftime("%m", now) + 0
+ time["monthname"] = strftime("%B", now)
+ time["shortmonth"] = strftime("%b", now)
+ time["year"] = strftime("%y", now) + 0
+ time["fullyear"] = strftime("%Y", now) + 0
+ time["weekday"] = strftime("%w", now) + 0
+ time["altweekday"] = strftime("%u", now) + 0
+ time["dayname"] = strftime("%A", now)
+ time["shortdayname"] = strftime("%a", now)
+ time["yearday"] = strftime("%j", now) + 0
+ time["timezone"] = strftime("%Z", now)
+ time["ampm"] = strftime("%p", now)
+ time["weeknum"] = strftime("%U", now) + 0
+ time["altweeknum"] = strftime("%W", now) + 0
+
+ return ret
+@}
+@end group
+@c endfile
+@end example
+
+The string indices are easier to use and read than the various formats
+required by @code{strftime}. The @code{alarm} program presented in
+@ref{Alarm Program, ,An Alarm Clock Program},
+uses this function.
+
+@c exercise!!!
+The @code{gettimeofday} function is presented above as it was written. A
+more general design for this function would have allowed the user to supply
+an optional timestamp value that would have been used instead of the current
+time.
+
+@node Filetrans Function, Getopt Function, Gettimeofday Function, Library Functions
+@section Noting Data File Boundaries
+
+@cindex per file initialization and clean-up
+The @code{BEGIN} and @code{END} rules are each executed exactly once, at
+the beginning and end respectively of your @code{awk} program
+(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}).
+We (the @code{gawk} authors) once had a user who mistakenly thought that the
+@code{BEGIN} rule was executed at the beginning of each data file and the
+@code{END} rule was executed at the end of each data file. When informed
+that this was not the case, the user requested that we add new special
+patterns to @code{gawk}, named @code{BEGIN_FILE} and @code{END_FILE}, that
+would have the desired behavior. He even supplied us the code to do so.
+
+However, after a little thought, I came up with the following library program.
+It arranges to call two user-supplied functions, @code{beginfile} and
+@code{endfile}, at the beginning and end of each data file.
+Besides solving the problem in only nine(!) lines of code, it does so
+@emph{portably}; this will work with any implementation of @code{awk}.
+
+@example
+@c @group
+# transfile.awk
+#
+# Give the user a hook for filename transitions
+#
+# The user must supply functions beginfile() and endfile()
+# that each take the name of the file being started or
+# finished, respectively.
+#
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, January 1992
+# Public Domain
+
+FILENAME != _oldfilename \
+@{
+ if (_oldfilename != "")
+ endfile(_oldfilename)
+ _oldfilename = FILENAME
+ beginfile(FILENAME)
+@}
+
+END @{ endfile(FILENAME) @}
+@c @end group
+@end example
+
+This file must be loaded before the user's ``main'' program, so that the
+rule it supplies will be executed first.
+
+This rule relies on @code{awk}'s @code{FILENAME} variable that
+automatically changes for each new data file. The current file name is
+saved in a private variable, @code{_oldfilename}. If @code{FILENAME} does
+not equal @code{_oldfilename}, then a new data file is being processed, and
+it is necessary to call @code{endfile} for the old file. Since
+@code{endfile} should only be called if a file has been processed, the
+program first checks to make sure that @code{_oldfilename} is not the null
+string. The program then assigns the current file name to
+@code{_oldfilename}, and calls @code{beginfile} for the file.
+Since, like all @code{awk} variables, @code{_oldfilename} will be
+initialized to the null string, this rule executes correctly even for the
+first data file.
+
+The program also supplies an @code{END} rule, to do the final processing for
+the last file. Since this @code{END} rule comes before any @code{END} rules
+supplied in the ``main'' program, @code{endfile} will be called first. Once
+again the value of multiple @code{BEGIN} and @code{END} rules should be clear.
+
+@findex beginfile
+@findex endfile
+This version has same problem as the first version of @code{nextfile}
+(@pxref{Nextfile Function, ,Implementing @code{nextfile} as a Function}).
+If the same data file occurs twice in a row on command line, then
+@code{endfile} and @code{beginfile} will not be executed at the end of the
+first pass and at the beginning of the second pass.
+This version solves the problem.
+
+@example
+@c @group
+@c file eg/lib/ftrans.awk
+# ftrans.awk --- handle data file transitions
+#
+# user supplies beginfile() and endfile() functions
+#
+# Arnold Robbins, arnold@@gnu.ai.mit.edu. November 1992
+# Public Domain
+
+FNR == 1 @{
+ if (_filename_ != "")
+ endfile(_filename_)
+ _filename_ = FILENAME
+ beginfile(FILENAME)
+@}
+
+END @{ endfile(_filename_) @}
+@c endfile
+@c @end group
+@end example
+
+In @ref{Wc Program, ,Counting Things},
+you will see how this library function can be used, and
+how it simplifies writing the main program.
+
+@node Getopt Function, Passwd Functions, Filetrans Function, Library Functions
+@section Processing Command Line Options
+
+@cindex @code{getopt}, C version
+@cindex processing arguments
+@cindex argument processing
+Most utilities on POSIX compatible systems take options or ``switches'' on
+the command line that can be used to change the way a program behaves.
+@code{awk} is an example of such a program
+(@pxref{Options, ,Command Line Options}).
+Often, options take @dfn{arguments}, data that the program needs to
+correctly obey the command line option. For example, @code{awk}'s
+@samp{-F} option requires a string to use as the field separator.
+The first occurrence on the command line of either @samp{--} or a
+string that does not begin with @samp{-} ends the options.
+
+Most Unix systems provide a C function named @code{getopt} for processing
+command line arguments. The programmer provides a string describing the one
+letter options. If an option requires an argument, it is followed in the
+string with a colon. @code{getopt} is also passed the
+count and values of the command line arguments, and is called in a loop.
+@code{getopt} processes the command line arguments for option letters.
+Each time around the loop, it returns a single character representing the
+next option letter that it found, or @samp{?} if it found an invalid option.
+When it returns @minus{}1, there are no options left on the command line.
+
+When using @code{getopt}, options that do not take arguments can be
+grouped together. Furthermore, options that take arguments require that the
+argument be present. The argument can immediately follow the option letter,
+or it can be a separate command line argument.
+
+Given a hypothetical program that takes
+three command line options, @samp{-a}, @samp{-b}, and @samp{-c}, and
+@samp{-b} requires an argument, all of the following are valid ways of
+invoking the program:
+
+@example
+@c @group
+prog -a -b foo -c data1 data2 data3
+prog -ac -bfoo -- data1 data2 data3
+prog -acbfoo data1 data2 data3
+@c @end group
+@end example
+
+Notice that when the argument is grouped with its option, the rest of
+the command line argument is considered to be the option's argument.
+In the above example, @samp{-acbfoo} indicates that all of the
+@samp{-a}, @samp{-b}, and @samp{-c} options were supplied,
+and that @samp{foo} is the argument to the @samp{-b} option.
+
+@code{getopt} provides four external variables that the programmer can use.
+
+@table @code
+@item optind
+The index in the argument value array (@code{argv}) where the first
+non-option command line argument can be found.
+
+@item optarg
+The string value of the argument to an option.
+
+@item opterr
+Usually @code{getopt} prints an error message when it finds an invalid
+option. Setting @code{opterr} to zero disables this feature. (An
+application might wish to print its own error message.)
+
+@item optopt
+The letter representing the command line option.
+While not usually documented, most versions supply this variable.
+@end table
+
+The following C fragment shows how @code{getopt} might process command line
+arguments for @code{awk}.
+
+@example
+@group
+int
+main(int argc, char *argv[])
+@{
+ @dots{}
+ /* print our own message */
+ opterr = 0;
+@end group
+@group
+ while ((c = getopt(argc, argv, "v:f:F:W:")) != -1) @{
+ switch (c) @{
+ case 'f': /* file */
+ @dots{}
+ break;
+ case 'F': /* field separator */
+ @dots{}
+ break;
+ case 'v': /* variable assignment */
+ @dots{}
+ break;
+ case 'W': /* extension */
+ @dots{}
+ break;
+ case '?':
+ default:
+ usage();
+ break;
+ @}
+ @}
+ @dots{}
+@}
+@end group
+@end example
+
+As a side point, @code{gawk} actually uses the GNU @code{getopt_long}
+function to process both normal and GNU-style long options
+(@pxref{Options, ,Command Line Options}).
+
+The abstraction provided by @code{getopt} is very useful, and would be quite
+handy in @code{awk} programs as well. Here is an @code{awk} version of
+@code{getopt}. This function highlights one of the greatest weaknesses in
+@code{awk}, which is that it is very poor at manipulating single characters.
+Repeated calls to @code{substr} are necessary for accessing individual
+characters (@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+
+The discussion walks through the code a bit at a time.
+
+@example
+@c @group
+@c file eg/lib/getopt.awk
+# getopt --- do C library getopt(3) function in awk
+#
+# arnold@@gnu.ai.mit.edu
+# Public domain
+#
+# Initial version: March, 1991
+# Revised: May, 1993
+
+# External variables:
+# Optind -- index of ARGV for first non-option argument
+# Optarg -- string value of argument to current option
+# Opterr -- if non-zero, print our own diagnostic
+# Optopt -- current option letter
+
+# Returns
+# -1 at end of options
+# ? for unrecognized option
+# <c> a character representing the current option
+
+# Private Data
+# _opti index in multi-flag option, e.g., -abc
+@c endfile
+@c @end group
+@end example
+
+The function starts out with some documentation: who wrote the code,
+and when it was revised, followed by a list of the global variables it uses,
+what the return values are and what they mean, and any global variables that
+are ``private'' to this library function. Such documentation is essential
+for any program, and particularly for library functions.
+
+@findex getopt
+@example
+@c @group
+@c file eg/lib/getopt.awk
+function getopt(argc, argv, options, optl, thisopt, i)
+@{
+ optl = length(options)
+ if (optl == 0) # no options given
+ return -1
+
+ if (argv[Optind] == "--") @{ # all done
+ Optind++
+ _opti = 0
+ return -1
+ @} else if (argv[Optind] !~ /^-[^: \t\n\f\r\v\b]/) @{
+ _opti = 0
+ return -1
+ @}
+@c endfile
+@c @end group
+@end example
+
+The function first checks that it was indeed called with a string of options
+(the @code{options} parameter). If @code{options} has a zero length,
+@code{getopt} immediately returns @minus{}1.
+
+The next thing to check for is the end of the options. A @samp{--} ends the
+command line options, as does any command line argument that does not begin
+with a @samp{-}. @code{Optind} is used to step through the array of command
+line arguments; it retains its value across calls to @code{getopt}, since it
+is a global variable.
+
+The regexp used, @code{@w{/^-[^: \t\n\f\r\v\b]/}}, is
+perhaps a bit of overkill; it checks for a @samp{-} followed by anything
+that is not whitespace and not a colon.
+If the current command line argument does not match this pattern,
+it is not an option, and it ends option processing.
+
+@example
+@group
+@c file eg/lib/getopt.awk
+ if (_opti == 0)
+ _opti = 2
+ thisopt = substr(argv[Optind], _opti, 1)
+ Optopt = thisopt
+ i = index(options, thisopt)
+ if (i == 0) @{
+ if (Opterr)
+ printf("%c -- invalid option\n",
+ thisopt) > "/dev/stderr"
+ if (_opti >= length(argv[Optind])) @{
+ Optind++
+ _opti = 0
+ @} else
+ _opti++
+ return "?"
+ @}
+@c endfile
+@end group
+@end example
+
+The @code{_opti} variable tracks the position in the current command line
+argument (@code{argv[Optind]}). In the case that multiple options were
+grouped together with one @samp{-} (e.g., @samp{-abx}), it is necessary
+to return them to the user one at a time.
+
+If @code{_opti} is equal to zero, it is set to two, the index in the string
+of the next character to look at (we skip the @samp{-}, which is at position
+one). The variable @code{thisopt} holds the character, obtained with
+@code{substr}. It is saved in @code{Optopt} for the main program to use.
+
+If @code{thisopt} is not in the @code{options} string, then it is an
+invalid option. If @code{Opterr} is non-zero, @code{getopt} prints an error
+message on the standard error that is similar to the message from the C
+version of @code{getopt}.
+
+Since the option is invalid, it is necessary to skip it and move on to the
+next option character. If @code{_opti} is greater than or equal to the
+length of the current command line argument, then it is necessary to move on
+to the next one, so @code{Optind} is incremented and @code{_opti} is reset
+to zero. Otherwise, @code{Optind} is left alone and @code{_opti} is merely
+incremented.
+
+In any case, since the option was invalid, @code{getopt} returns @samp{?}.
+The main program can examine @code{Optopt} if it needs to know what the
+invalid option letter actually was.
+
+@example
+@group
+@c file eg/lib/getopt.awk
+ if (substr(options, i + 1, 1) == ":") @{
+ # get option argument
+ if (length(substr(argv[Optind], _opti + 1)) > 0)
+ Optarg = substr(argv[Optind], _opti + 1)
+ else
+ Optarg = argv[++Optind]
+ _opti = 0
+ @} else
+ Optarg = ""
+@c endfile
+@end group
+@end example
+
+If the option requires an argument, the option letter is followed by a colon
+in the @code{options} string. If there are remaining characters in the
+current command line argument (@code{argv[Optind]}), then the rest of that
+string is assigned to @code{Optarg}. Otherwise, the next command line
+argument is used (@samp{-xFOO} vs. @samp{@w{-x FOO}}). In either case,
+@code{_opti} is reset to zero, since there are no more characters left to
+examine in the current command line argument.
+
+@example
+@c @group
+@c file eg/lib/getopt.awk
+ if (_opti == 0 || _opti >= length(argv[Optind])) @{
+ Optind++
+ _opti = 0
+ @} else
+ _opti++
+ return thisopt
+@}
+@c endfile
+@c @end group
+@end example
+
+Finally, if @code{_opti} is either zero or greater than the length of the
+current command line argument, it means this element in @code{argv} is
+through being processed, so @code{Optind} is incremented to point to the
+next element in @code{argv}. If neither condition is true, then only
+@code{_opti} is incremented, so that the next option letter can be processed
+on the next call to @code{getopt}.
+
+@example
+@c @group
+@c file eg/lib/getopt.awk
+BEGIN @{
+ Opterr = 1 # default is to diagnose
+ Optind = 1 # skip ARGV[0]
+
+ # test program
+ if (_getopt_test) @{
+ while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1)
+ printf("c = <%c>, optarg = <%s>\n",
+ _go_c, Optarg)
+ printf("non-option arguments:\n")
+ for (; Optind < ARGC; Optind++)
+ printf("\tARGV[%d] = <%s>\n",
+ Optind, ARGV[Optind])
+ @}
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{BEGIN} rule initializes both @code{Opterr} and @code{Optind} to one.
+@code{Opterr} is set to one, since the default behavior is for @code{getopt}
+to print a diagnostic message upon seeing an invalid option. @code{Optind}
+is set to one, since there's no reason to look at the program name, which is
+in @code{ARGV[0]}.
+
+The rest of the @code{BEGIN} rule is a simple test program. Here is the
+result of two sample runs of the test program.
+
+@example
+@group
+$ awk -f getopt.awk -v _getopt_test=1 -- -a -cbARG bax -x
+@print{} c = <a>, optarg = <>
+@print{} c = <c>, optarg = <>
+@print{} c = <b>, optarg = <ARG>
+@print{} non-option arguments:
+@print{} ARGV[3] = <bax>
+@print{} ARGV[4] = <-x>
+@end group
+
+@group
+$ awk -f getopt.awk -v _getopt_test=1 -- -a -x -- xyz abc
+@print{} c = <a>, optarg = <>
+@error{} x -- invalid option
+@print{} c = <?>, optarg = <>
+@print{} non-option arguments:
+@print{} ARGV[4] = <xyz>
+@print{} ARGV[5] = <abc>
+@end group
+@end example
+
+The first @samp{--} terminates the arguments to @code{awk}, so that it does
+not try to interpret the @samp{-a} etc. as its own options.
+
+Several of the sample programs presented in
+@ref{Sample Programs, ,Practical @code{awk} Programs},
+use @code{getopt} to process their arguments.
+
+@node Passwd Functions, Group Functions, Getopt Function, Library Functions
+@section Reading the User Database
+
+@cindex @file{/dev/user}
+The @file{/dev/user} special file
+(@pxref{Special Files, ,Special File Names in @code{gawk}})
+provides access to the current user's real and effective user and group id
+numbers, and if available, the user's supplementary group set.
+However, since these are numbers, they do not provide very useful
+information to the average user. There needs to be some way to find the
+user information associated with the user and group numbers. This
+section presents a suite of functions for retrieving information from the
+user database. @xref{Group Functions, ,Reading the Group Database},
+for a similar suite that retrieves information from the group database.
+
+@cindex @code{getpwent}, C version
+@cindex user information
+@cindex login information
+@cindex account information
+@cindex password file
+The POSIX standard does not define the file where user information is
+kept. Instead, it provides the @code{<pwd.h>} header file
+and several C language subroutines for obtaining user information.
+The primary function is @code{getpwent}, for ``get password entry.''
+The ``password'' comes from the original user database file,
+@file{/etc/passwd}, which kept user information, along with the
+encrypted passwords (hence the name).
+
+While an @code{awk} program could simply read @file{/etc/passwd} directly
+(the format is well known), because of the way password
+files are handled on networked systems,
+this file may not contain complete information about the system's set of users.
+
+@cindex @code{pwcat} program
+To be sure of being
+able to produce a readable, complete version of the user database, it is
+necessary to write a small C program that calls @code{getpwent}.
+@code{getpwent} is defined to return a pointer to a @code{struct passwd}.
+Each time it is called, it returns the next entry in the database.
+When there are no more entries, it returns @code{NULL}, the null pointer.
+When this happens, the C program should call @code{endpwent} to close the
+database.
+Here is @code{pwcat}, a C program that ``cats'' the password database.
+
+@findex pwcat.c
+@example
+@c @group
+@c file eg/lib/pwcat.c
+/*
+ * pwcat.c
+ *
+ * Generate a printable version of the password database
+ *
+ * Arnold Robbins
+ * arnold@@gnu.ai.mit.edu
+ * May 1993
+ * Public Domain
+ */
+
+#include <stdio.h>
+#include <pwd.h>
+
+int
+main(argc, argv)
+int argc;
+char **argv;
+@{
+ struct passwd *p;
+
+ while ((p = getpwent()) != NULL)
+ printf("%s:%s:%d:%d:%s:%s:%s\n",
+ p->pw_name, p->pw_passwd, p->pw_uid,
+ p->pw_gid, p->pw_gecos, p->pw_dir, p->pw_shell);
+
+ endpwent();
+ exit(0);
+@}
+@c endfile
+@c @end group
+@end example
+
+If you don't understand C, don't worry about it.
+The output from @code{pwcat} is the user database, in the traditional
+@file{/etc/passwd} format of colon-separated fields. The fields are:
+
+@table @asis
+@item Login name
+The user's login name.
+
+@item Encrypted password
+The user's encrypted password. This may not be available on some systems.
+
+@item User-ID
+The user's numeric user-id number.
+
+@item Group-ID
+The user's numeric group-id number.
+
+@item Full name
+The user's full name, and perhaps other information associated with the
+user.
+
+@item Home directory
+The user's login, or ``home'' directory (familiar to shell programmers as
+@code{$HOME}).
+
+@item Login shell
+The program that will be run when the user logs in. This is usually a
+shell, such as Bash (the Gnu Bourne-Again shell).
+@end table
+
+Here are a few lines representative of @code{pwcat}'s output.
+
+@example
+@c @group
+$ pwcat
+@print{} root:3Ov02d5VaUPB6:0:1:Operator:/:/bin/sh
+@print{} nobody:*:65534:65534::/:
+@print{} daemon:*:1:1::/:
+@print{} sys:*:2:2::/:/bin/csh
+@print{} bin:*:3:3::/bin:
+@print{} arnold:xyzzy:2076:10:Arnold Robbins:/home/arnold:/bin/sh
+@print{} miriam:yxaay:112:10:Miriam Robbins:/home/miriam:/bin/sh
+@dots{}
+@c @end group
+@end example
+
+With that introduction, here is a group of functions for getting user
+information. There are several functions here, corresponding to the C
+functions of the same name.
+
+@findex _pw_init
+@example
+@c file eg/lib/passwdawk.in
+@group
+# passwd.awk --- access password file information
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+BEGIN @{
+ # tailor this to suit your system
+ _pw_awklib = "/usr/local/libexec/awk/"
+@}
+@end group
+
+function _pw_init( oldfs, oldrs, olddol0, pwcat)
+@{
+ if (_pw_inited)
+ return
+ oldfs = FS
+ oldrs = RS
+ olddol0 = $0
+ FS = ":"
+ RS = "\n"
+ pwcat = _pw_awklib "pwcat"
+ while ((pwcat | getline) > 0) @{
+ _pw_byname[$1] = $0
+ _pw_byuid[$3] = $0
+ _pw_bycount[++_pw_total] = $0
+ @}
+ close(pwcat)
+ _pw_count = 0
+ _pw_inited = 1
+ FS = oldfs
+ RS = oldrs
+ $0 = olddol0
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{BEGIN} rule sets a private variable to the directory where
+@code{pwcat} is stored. Since it is used to help out an @code{awk} library
+routine, we have chosen to put it in @file{/usr/local/libexec/awk}.
+You might want it to be in a different directory on your system.
+
+The function @code{_pw_init} keeps three copies of the user information
+in three associative arrays. The arrays are indexed by user name
+(@code{_pw_byname}), by user-id number (@code{_pw_byuid}), and by order of
+occurrence (@code{_pw_bycount}).
+
+The variable @code{_pw_inited} is used for efficiency; @code{_pw_init} only
+needs to be called once.
+
+Since this function uses @code{getline} to read information from
+@code{pwcat}, it first saves the values of @code{FS}, @code{RS}, and
+@code{$0}. Doing so is necessary, since these functions could be called
+from anywhere within a user's program, and the user may have his or her
+own values for @code{FS} and @code{RS}.
+@ignore
+Problem, what if FIELDWIDTHS is in use? Sigh.
+@end ignore
+
+The main part of the function uses a loop to read database lines, split
+the line into fields, and then store the line into each array as necessary.
+When the loop is done, @code{@w{_pw_init}} cleans up by closing the pipeline,
+setting @code{@w{_pw_inited}} to one, and restoring @code{FS}, @code{RS}, and
+@code{$0}. The use of @code{@w{_pw_count}} will be explained below.
+
+@findex getpwnam
+@example
+@group
+@c file eg/lib/passwdawk.in
+function getpwnam(name)
+@{
+ _pw_init()
+ if (name in _pw_byname)
+ return _pw_byname[name]
+ return ""
+@}
+@c endfile
+@end group
+@end example
+
+The @code{getpwnam} function takes a user name as a string argument. If that
+user is in the database, it returns the appropriate line. Otherwise it
+returns the null string.
+
+@findex getpwuid
+@example
+@group
+@c file eg/lib/passwdawk.in
+function getpwuid(uid)
+@{
+ _pw_init()
+ if (uid in _pw_byuid)
+ return _pw_byuid[uid]
+ return ""
+@}
+@c endfile
+@end group
+@end example
+
+Similarly,
+the @code{getpwuid} function takes a user-id number argument. If that
+user number is in the database, it returns the appropriate line. Otherwise it
+returns the null string.
+
+@findex getpwent
+@example
+@c @group
+@c file eg/lib/passwdawk.in
+function getpwent()
+@{
+ _pw_init()
+ if (_pw_count < _pw_total)
+ return _pw_bycount[++_pw_count]
+ return ""
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{getpwent} function simply steps through the database, one entry at
+a time. It uses @code{_pw_count} to track its current position in the
+@code{_pw_bycount} array.
+
+@findex endpwent
+@example
+@c @group
+@c file eg/lib/passwdawk.in
+function endpwent()
+@{
+ _pw_count = 0
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{@w{endpwent}} function resets @code{@w{_pw_count}} to zero, so that
+subsequent calls to @code{getpwent} will start over again.
+
+A conscious design decision in this suite is that each subroutine calls
+@code{@w{_pw_init}} to initialize the database arrays. The overhead of running
+a separate process to generate the user database, and the I/O to scan it,
+will only be incurred if the user's main program actually calls one of these
+functions. If this library file is loaded along with a user's program, but
+none of the routines are ever called, then there is no extra run-time overhead.
+(The alternative would be to move the body of @code{@w{_pw_init}} into a
+@code{BEGIN} rule, which would always run @code{pwcat}. This simplifies the
+code but runs an extra process that may never be needed.)
+
+In turn, calling @code{_pw_init} is not too expensive, since the
+@code{_pw_inited} variable keeps the program from reading the data more than
+once. If you are worried about squeezing every last cycle out of your
+@code{awk} program, the check of @code{_pw_inited} could be moved out of
+@code{_pw_init} and duplicated in all the other functions. In practice,
+this is not necessary, since most @code{awk} programs are I/O bound, and it
+would clutter up the code.
+
+The @code{id} program in @ref{Id Program, ,Printing Out User Information},
+uses these functions.
+
+@node Group Functions, Library Names, Passwd Functions, Library Functions
+@section Reading the Group Database
+
+@cindex @code{getgrent}, C version
+@cindex group information
+@cindex account information
+@cindex group file
+Much of the discussion presented in
+@ref{Passwd Functions, ,Reading the User Database},
+applies to the group database as well. Although there has traditionally
+been a well known file, @file{/etc/group}, in a well known format, the POSIX
+standard only provides a set of C library routines
+(@code{<grp.h>} and @code{getgrent})
+for accessing the information.
+Even though this file may exist, it likely does not have
+complete information. Therefore, as with the user database, it is necessary
+to have a small C program that generates the group database as its output.
+
+@cindex @code{grcat} program
+Here is @code{grcat}, a C program that ``cats'' the group database.
+
+@findex grcat.c
+@example
+@c @group
+@c file eg/lib/grcat.c
+/*
+ * grcat.c
+ *
+ * Generate a printable version of the group database
+ *
+ * Arnold Robbins, arnold@@gnu.ai.mit.edu
+ * May 1993
+ * Public Domain
+ */
+
+#include <stdio.h>
+#include <grp.h>
+
+@group
+int
+main(argc, argv)
+int argc;
+char **argv;
+@{
+ struct group *g;
+ int i;
+@end group
+
+ while ((g = getgrent()) != NULL) @{
+ printf("%s:%s:%d:", g->gr_name, g->gr_passwd,
+ g->gr_gid);
+ for (i = 0; g->gr_mem[i] != NULL; i++) @{
+ printf("%s", g->gr_mem[i]);
+ if (g->gr_mem[i+1] != NULL)
+ putchar(',');
+ @}
+ putchar('\n');
+ @}
+ endgrent();
+ exit(0);
+@}
+@c endfile
+@c @end group
+@end example
+
+Each line in the group database represent one group. The fields are
+separated with colons, and represent the following information.
+
+@table @asis
+@item Group Name
+The name of the group.
+
+@item Group Password
+The encrypted group password. In practice, this field is never used. It is
+usually empty, or set to @samp{*}.
+
+@item Group ID Number
+The numeric group-id number. This number should be unique within the file.
+
+@item Group Member List
+A comma-separated list of user names. These users are members of the group.
+Most Unix systems allow users to be members of several groups
+simultaneously. If your system does, then reading @file{/dev/user} will
+return those group-id numbers in @code{$5} through @code{$NF}.
+(Note that @file{/dev/user} is a @code{gawk} extension;
+@pxref{Special Files, ,Special File Names in @code{gawk}}.)
+@end table
+
+@iftex
+@page
+@end iftex
+Here is what running @code{grcat} might produce:
+
+@example
+@group
+$ grcat
+@print{} wheel:*:0:arnold
+@print{} nogroup:*:65534:
+@print{} daemon:*:1:
+@print{} kmem:*:2:
+@print{} staff:*:10:arnold,miriam,andy
+@print{} other:*:20:
+@dots{}
+@end group
+@end example
+
+Here are the functions for obtaining information from the group database.
+There are several, modeled after the C library functions of the same names.
+
+@findex _gr_init
+@example
+@group
+@c file eg/lib/groupawk.in
+# group.awk --- functions for dealing with the group file
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+BEGIN \
+@{
+ # Change to suit your system
+ _gr_awklib = "/usr/local/libexec/awk/"
+@}
+@c endfile
+@end group
+
+@group
+@c file eg/lib/groupawk.in
+function _gr_init( oldfs, oldrs, olddol0, grcat, n, a, i)
+@{
+ if (_gr_inited)
+ return
+@end group
+
+@group
+ oldfs = FS
+ oldrs = RS
+ olddol0 = $0
+ FS = ":"
+ RS = "\n"
+@end group
+
+@group
+ grcat = _gr_awklib "grcat"
+ while ((grcat | getline) > 0) @{
+ if ($1 in _gr_byname)
+ _gr_byname[$1] = _gr_byname[$1] "," $4
+ else
+ _gr_byname[$1] = $0
+ if ($3 in _gr_bygid)
+ _gr_bygid[$3] = _gr_bygid[$3] "," $4
+ else
+ _gr_bygid[$3] = $0
+
+ n = split($4, a, "[ \t]*,[ \t]*")
+@end group
+@group
+ for (i = 1; i <= n; i++)
+ if (a[i] in _gr_groupsbyuser)
+ _gr_groupsbyuser[a[i]] = \
+ _gr_groupsbyuser[a[i]] " " $1
+ else
+ _gr_groupsbyuser[a[i]] = $1
+@end group
+
+@group
+ _gr_bycount[++_gr_count] = $0
+ @}
+@end group
+@group
+ close(grcat)
+ _gr_count = 0
+ _gr_inited++
+ FS = oldfs
+ RS = oldrs
+ $0 = olddol0
+@}
+@c endfile
+@end group
+@end example
+
+The @code{BEGIN} rule sets a private variable to the directory where
+@code{grcat} is stored. Since it is used to help out an @code{awk} library
+routine, we have chosen to put it in @file{/usr/local/libexec/awk}. You might
+want it to be in a different directory on your system.
+
+These routines follow the same general outline as the user database routines
+(@pxref{Passwd Functions, ,Reading the User Database}).
+The @code{@w{_gr_inited}} variable is used to
+ensure that the database is scanned no more than once.
+The @code{@w{_gr_init}} function first saves @code{FS}, @code{RS}, and
+@code{$0}, and then sets @code{FS} and @code{RS} to the correct values for
+scanning the group information.
+
+The group information is stored is several associative arrays.
+The arrays are indexed by group name (@code{@w{_gr_byname}}), by group-id number
+(@code{@w{_gr_bygid}}), and by position in the database (@code{@w{_gr_bycount}}).
+There is an additional array indexed by user name (@code{@w{_gr_groupsbyuser}}),
+that is a space separated list of groups that each user belongs to.
+
+Unlike the user database, it is possible to have multiple records in the
+database for the same group. This is common when a group has a large number
+of members. Such a pair of entries might look like:
+
+@example
+tvpeople:*:101:johny,jay,arsenio
+tvpeople:*:101:david,conan,tom,joan
+@end example
+
+For this reason, @code{_gr_init} looks to see if a group name or
+group-id number has already been seen. If it has, then the user names are
+simply concatenated onto the previous list of users. (There is actually a
+subtle problem with the code presented above. Suppose that
+the first time there were no names. This code adds the names with
+a leading comma. It also doesn't check that there is a @code{$4}.)
+
+Finally, @code{_gr_init} closes the pipeline to @code{grcat}, restores
+@code{FS}, @code{RS}, and @code{$0}, initializes @code{_gr_count} to zero
+(it is used later), and makes @code{_gr_inited} non-zero.
+
+@findex getgrnam
+@example
+@c @group
+@c file eg/lib/groupawk.in
+function getgrnam(group)
+@{
+ _gr_init()
+ if (group in _gr_byname)
+ return _gr_byname[group]
+ return ""
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{getgrnam} function takes a group name as its argument, and if that
+group exists, it is returned. Otherwise, @code{getgrnam} returns the null
+string.
+
+@findex getgrgid
+@example
+@c @group
+@c file eg/lib/groupawk.in
+function getgrgid(gid)
+@{
+ _gr_init()
+ if (gid in _gr_bygid)
+ return _gr_bygid[gid]
+ return ""
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{getgrgid} function is similar, it takes a numeric group-id, and
+looks up the information associated with that group-id.
+
+@findex getgruser
+@example
+@group
+@c file eg/lib/groupawk.in
+function getgruser(user)
+@{
+ _gr_init()
+ if (user in _gr_groupsbyuser)
+ return _gr_groupsbyuser[user]
+ return ""
+@}
+@c endfile
+@end group
+@end example
+
+The @code{getgruser} function does not have a C counterpart. It takes a
+user name, and returns the list of groups that have the user as a member.
+
+@findex getgrent
+@example
+@c @group
+@c file eg/lib/groupawk.in
+function getgrent()
+@{
+ _gr_init()
+ if (++gr_count in _gr_bycount)
+ return _gr_bycount[_gr_count]
+ return ""
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{getgrent} function steps through the database one entry at a time.
+It uses @code{_gr_count} to track its position in the list.
+
+@findex endgrent
+@example
+@group
+@c file eg/lib/groupawk.in
+function endgrent()
+@{
+ _gr_count = 0
+@}
+@c endfile
+@end group
+@end example
+
+@code{endgrent} resets @code{_gr_count} to zero so that @code{getgrent} can
+start over again.
+
+As with the user database routines, each function calls @code{_gr_init} to
+initialize the arrays. Doing so only incurs the extra overhead of running
+@code{grcat} if these functions are used (as opposed to moving the body of
+@code{_gr_init} into a @code{BEGIN} rule).
+
+Most of the work is in scanning the database and building the various
+associative arrays. The functions that the user calls are themselves very
+simple, relying on @code{awk}'s associative arrays to do work.
+
+The @code{id} program in @ref{Id Program, ,Printing Out User Information},
+uses these functions.
+
+@node Library Names, , Group Functions, Library Functions
+@section Naming Library Function Global Variables
+
+@cindex namespace issues in @code{awk}
+@cindex documenting @code{awk} programs
+@cindex programs, documenting
+Due to the way the @code{awk} language evolved, variables are either
+@dfn{global} (usable by the entire program), or @dfn{local} (usable just by
+a specific function). There is no intermediate state analogous to
+@code{static} variables in C.
+
+Library functions often need to have global variables that they can use to
+preserve state information between calls to the function. For example,
+@code{getopt}'s variable @code{_opti}
+(@pxref{Getopt Function, ,Processing Command Line Options}),
+and the @code{_tm_months} array used by @code{mktime}
+(@pxref{Mktime Function, ,Turning Dates Into Timestamps}).
+Such variables are called @dfn{private}, since the only functions that need to
+use them are the ones in the library.
+
+When writing a library function, you should try to choose names for your
+private variables so that they will not conflict with any variables used by
+either another library function or a user's main program. For example, a
+name like @samp{i} or @samp{j} is not a good choice, since user programs
+often use variable names like these for their own purposes.
+
+The example programs shown in this chapter all start the names of their
+private variables with an underscore (@samp{_}). Users generally don't use
+leading underscores in their variable names, so this convention immediately
+decreases the chances that the variable name will be accidentally shared
+with the user's program.
+
+In addition, several of the library functions use a prefix that helps
+indicate what function or set of functions uses the variables. For example,
+@code{_tm_months} in @code{mktime}
+(@pxref{Mktime Function, ,Turning Dates Into Timestamps}), and
+@code{_pw_byname} in the user data base routines
+(@pxref{Passwd Functions, ,Reading the User Database}).
+This convention is recommended, since it even further decreases the chance
+of inadvertent conflict among variable names.
+Note that this convention can be used equally well both for variable names
+and for private function names too.
+
+While I could have re-written all the library routines to use this
+convention, I did not do so, in order to show how my own @code{awk}
+programming style has evolved, and to provide some basis for this
+discussion.
+
+As a final note on variable naming, if a function makes global variables
+available for use by a main program, it is a good convention to start that
+variable's name with a capital letter.
+For example, @code{getopt}'s @code{Opterr} and @code{Optind} variables
+(@pxref{Getopt Function, ,Processing Command Line Options}).
+The leading capital letter indicates that it is global, while the fact that
+the variable name is not all capital letters indicates that the variable is
+not one of @code{awk}'s built-in variables, like @code{FS}.
+
+It is also important that @emph{all} variables in library functions
+that do not need to save state are in fact declared local. If this is
+not done, the variable could accidentally be used in the user's program,
+leading to bugs that are very difficult to track down.
+
+@example
+function lib_func(x, y, l1, l2)
+@{
+ @dots{}
+ @var{use variable} some_var # some_var could be local
+ @dots{} # but is not by oversight
+@}
+@end example
+
+@cindex Tcl
+A different convention, common in the Tcl community, is to use a single
+associative array to hold the values needed by the library function(s), or
+``package.'' This significantly decreases the number of actual global names
+in use. For example, the functions described in
+@ref{Passwd Functions, , Reading the User Database},
+might have used @code{@w{PW_data["inited"]}}, @code{@w{PW_data["total"]}},
+@code{@w{PW_data["count"]}} and @code{@w{PW_data["awklib"]}}, instead of
+@code{@w{_pw_inited}}, @code{@w{_pw_awklib}}, @code{@w{_pw_total}},
+and @code{@w{_pw_count}}.
+
+The conventions presented in this section are exactly that, conventions. You
+are not required to write your programs this way, we merely recommend that
+you do so.
+
+@node Sample Programs, Language History, Library Functions, Top
+@chapter Practical @code{awk} Programs
+
+This chapter presents a potpourri of @code{awk} programs for your reading
+enjoyment.
+@iftex
+There are two sections. The first presents @code{awk}
+versions of several common POSIX utilities.
+The second is a grab-bag of interesting programs.
+@end iftex
+
+Many of these programs use the library functions presented in
+@ref{Library Functions, ,A Library of @code{awk} Functions}.
+
+@menu
+* Clones:: Clones of common utilities.
+* Miscellaneous Programs:: Some interesting @code{awk} programs.
+@end menu
+
+@node Clones, Miscellaneous Programs, Sample Programs, Sample Programs
+@section Re-inventing Wheels for Fun and Profit
+
+This section presents a number of POSIX utilities that are implemented in
+@code{awk}. Re-inventing these programs in @code{awk} is often enjoyable,
+since the algorithms can be very clearly expressed, and usually the code is
+very concise and simple. This is true because @code{awk} does so much for you.
+
+It should be noted that these programs are not necessarily intended to
+replace the installed versions on your system. Instead, their
+purpose is to illustrate @code{awk} language programming for ``real world''
+tasks.
+
+The programs are presented in alphabetical order.
+
+@menu
+* Cut Program:: The @code{cut} utility.
+* Egrep Program:: The @code{egrep} utility.
+* Id Program:: The @code{id} utility.
+* Split Program:: The @code{split} utility.
+* Tee Program:: The @code{tee} utility.
+* Uniq Program:: The @code{uniq} utility.
+* Wc Program:: The @code{wc} utility.
+@end menu
+
+@node Cut Program, Egrep Program, Clones, Clones
+@subsection Cutting Out Fields and Columns
+
+@cindex @code{cut} utility
+The @code{cut} utility selects, or ``cuts,'' either characters or fields
+from its standard
+input and sends them to its standard output. @code{cut} can cut out either
+a list of characters, or a list of fields. By default, fields are separated
+by tabs, but you may supply a command line option to change the field
+@dfn{delimiter}, i.e.@: the field separator character. @code{cut}'s definition
+of fields is less general than @code{awk}'s.
+
+A common use of @code{cut} might be to pull out just the login name of
+logged-on users from the output of @code{who}. For example, the following
+pipeline generates a sorted, unique list of the logged on users:
+
+@example
+who | cut -c1-8 | sort | uniq
+@end example
+
+The options for @code{cut} are:
+
+@table @code
+@item -c @var{list}
+Use @var{list} as the list of characters to cut out. Items within the list
+may be separated by commas, and ranges of characters can be separated with
+dashes. The list @samp{1-8,15,22-35} specifies characters one through
+eight, 15, and 22 through 35.
+
+@item -f @var{list}
+Use @var{list} as the list of fields to cut out.
+
+@item -d @var{delim}
+Use @var{delim} as the field separator character instead of the tab
+character.
+
+@item -s
+Suppress printing of lines that do not contain the field delimiter.
+@end table
+
+The @code{awk} implementation of @code{cut} uses the @code{getopt} library
+function (@pxref{Getopt Function, ,Processing Command Line Options}),
+and the @code{join} library function
+(@pxref{Join Function, ,Merging an Array Into a String}).
+
+The program begins with a comment describing the options and a @code{usage}
+function which prints out a usage message and exits. @code{usage} is called
+if invalid arguments are supplied.
+
+@findex cut.awk
+@example
+@c @group
+@c file eg/prog/cut.awk
+# cut.awk --- implement cut in awk
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+# Options:
+# -f list Cut fields
+# -d c Field delimiter character
+# -c list Cut characters
+#
+# -s Suppress lines without the delimiter character
+
+function usage( e1, e2)
+@{
+ e1 = "usage: cut [-f list] [-d c] [-s] [files...]"
+ e2 = "usage: cut [-c list] [files...]"
+ print e1 > "/dev/stderr"
+ print e2 > "/dev/stderr"
+ exit 1
+@}
+@c endfile
+@c @end group
+@end example
+
+@noindent
+The variables @code{e1} and @code{e2} are used so that the function
+fits nicely on the
+@iftex
+page.
+@end iftex
+@ifinfo
+screen.
+@end ifinfo
+
+Next comes a @code{BEGIN} rule that parses the command line options.
+It sets @code{FS} to a single tab character, since that is @code{cut}'s
+default field separator. The output field separator is also set to be the
+same as the input field separator. Then @code{getopt} is used to step
+through the command line options. One or the other of the variables
+@code{by_fields} or @code{by_chars} is set to true, to indicate that
+processing should be done by fields or by characters respectively.
+When cutting by characters, the output field separator is set to the null
+string.
+
+@example
+@c @group
+@c file eg/prog/cut.awk
+BEGIN \
+@{
+ FS = "\t" # default
+ OFS = FS
+ while ((c = getopt(ARGC, ARGV, "sf:c:d:")) != -1) @{
+ if (c == "f") @{
+ by_fields = 1
+ fieldlist = Optarg
+ @} else if (c == "c") @{
+ by_chars = 1
+ fieldlist = Optarg
+ OFS = ""
+ @} else if (c == "d") @{
+ if (length(Optarg) > 1) @{
+ printf("Using first character of %s" \
+ " for delimiter\n", Optarg) > "/dev/stderr"
+ Optarg = substr(Optarg, 1, 1)
+ @}
+ FS = Optarg
+ OFS = FS
+ if (FS == " ") # defeat awk semantics
+ FS = "[ ]"
+ @} else if (c == "s")
+ suppress++
+ else
+ usage()
+ @}
+
+ for (i = 1; i < Optind; i++)
+ ARGV[i] = ""
+@c endfile
+@c @end group
+@end example
+
+Special care is taken when the field delimiter is a space. Using
+@code{@w{" "}} (a single space) for the value of @code{FS} is
+incorrect---@code{awk} would
+separate fields with runs of spaces and/or tabs, and we want them to be
+separated with individual spaces. Also, note that after @code{getopt} is
+through, we have to clear out all the elements of @code{ARGV} from one to
+@code{Optind}, so that @code{awk} will not try to process the command line
+options as file names.
+
+After dealing with the command line options, the program verifies that the
+options make sense. Only one or the other of @samp{-c} and @samp{-f} should
+be used, and both require a field list. Then either @code{set_fieldlist} or
+@code{set_charlist} is called to pull apart the list of fields or
+characters.
+
+@example
+@c @group
+@c file eg/prog/cut.awk
+ if (by_fields && by_chars)
+ usage()
+
+ if (by_fields == 0 && by_chars == 0)
+ by_fields = 1 # default
+
+ if (fieldlist == "") @{
+ print "cut: needs list for -c or -f" > "/dev/stderr"
+ exit 1
+ @}
+
+@group
+ if (by_fields)
+ set_fieldlist()
+ else
+ set_charlist()
+@}
+@c endfile
+@end group
+@end example
+
+Here is @code{set_fieldlist}. It first splits the field list apart
+at the commas, into an array. Then, for each element of the array, it
+looks to see if it is actually a range, and if so splits it apart. The range
+is verified to make sure the first number is smaller than the second.
+Each number in the list is added to the @code{flist} array, which simply
+lists the fields that will be printed.
+Normal field splitting is used.
+The program lets @code{awk}
+handle the job of doing the field splitting.
+
+@example
+@c @group
+@c file eg/prog/cut.awk
+function set_fieldlist( n, m, i, j, k, f, g)
+@{
+ n = split(fieldlist, f, ",")
+ j = 1 # index in flist
+ for (i = 1; i <= n; i++) @{
+ if (index(f[i], "-") != 0) @{ # a range
+ m = split(f[i], g, "-")
+ if (m != 2 || g[1] >= g[2]) @{
+ printf("bad field list: %s\n",
+ f[i]) > "/dev/stderr"
+ exit 1
+ @}
+ for (k = g[1]; k <= g[2]; k++)
+ flist[j++] = k
+ @} else
+ flist[j++] = f[i]
+ @}
+ nfields = j - 1
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{set_charlist} function is more complicated than @code{set_fieldlist}.
+The idea here is to use @code{gawk}'s @code{FIELDWIDTHS} variable
+(@pxref{Constant Size, ,Reading Fixed-width Data}),
+which describes constant width input. When using a character list, that is
+exactly what we have.
+
+Setting up @code{FIELDWIDTHS} is more complicated than simply listing the
+fields that need to be printed. We have to keep track of the fields to be
+printed, and also the intervening characters that have to be skipped.
+For example, suppose you wanted characters one through eight, 15, and
+22 through 35. You would use @samp{-c 1-8,15,22-35}. The necessary value
+for @code{FIELDWIDTHS} would be @code{@w{"8 6 1 6 14"}}. This gives us five
+fields, and what should be printed are @code{$1}, @code{$3}, and @code{$5}.
+The intermediate fields are ``filler,'' stuff in between the desired data.
+
+@code{flist} lists the fields to be printed, and @code{t} tracks the
+complete field list, including filler fields.
+
+@example
+@c @group
+@c file eg/prog/cut.awk
+function set_charlist( field, i, j, f, g, t,
+ filler, last, len)
+@{
+ field = 1 # count total fields
+ n = split(fieldlist, f, ",")
+ j = 1 # index in flist
+ for (i = 1; i <= n; i++) @{
+ if (index(f[i], "-") != 0) @{ # range
+ m = split(f[i], g, "-")
+ if (m != 2 || g[1] >= g[2]) @{
+ printf(bad character list: %s\n",
+ f[i]) > "/dev/stderr"
+ exit 1
+ @}
+ len = g[2] - g[1] + 1
+ if (g[1] > 1) # compute length of filler
+ filler = g[1] - last - 1
+ else
+ filler = 0
+ if (filler)
+ t[field++] = filler
+ t[field++] = len # length of field
+ last = g[2]
+ flist[j++] = field - 1
+ @} else @{
+ if (f[i] > 1)
+ filler = f[i] - last - 1
+ else
+ filler = 0
+ if (filler)
+ t[field++] = filler
+ t[field++] = 1
+ last = f[i]
+ flist[j++] = field - 1
+ @}
+ @}
+@group
+ FIELDWIDTHS = join(t, 1, field - 1)
+ nfields = j - 1
+@}
+@end group
+@c endfile
+@end example
+
+Here is the rule that actually processes the data. If the @samp{-s} option
+was given, then @code{suppress} will be true. The first @code{if} statement
+makes sure that the input record does have the field separator. If
+@code{cut} is processing fields, @code{suppress} is true, and the field
+separator character is not in the record, then the record is skipped.
+
+If the record is valid, then at this point, @code{gawk} has split the data
+into fields, either using the character in @code{FS} or using fixed-length
+fields and @code{FIELDWIDTHS}. The loop goes through the list of fields
+that should be printed. If the corresponding field has data in it, it is
+printed. If the next field also has data, then the separator character is
+written out in between the fields.
+
+@c 2e: Could use `index($0, FS) != 0' instead of `$0 !~ FS', below
+
+@example
+@c @group
+@c file eg/prog/cut.awk
+@{
+ if (by_fields && suppress && $0 !~ FS)
+ next
+
+ for (i = 1; i <= nfields; i++) @{
+ if ($flist[i] != "") @{
+ printf "%s", $flist[i]
+ if (i < nfields && $flist[i+1] != "")
+ printf "%s", OFS
+ @}
+ @}
+ print ""
+@}
+@c endfile
+@c @end group
+@end example
+
+This version of @code{cut} relies on @code{gawk}'s @code{FIELDWIDTHS}
+variable to do the character-based cutting. While it would be possible in
+other @code{awk} implementations to use @code{substr}
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}),
+it would also be extremely painful to do so.
+The @code{FIELDWIDTHS} variable supplies an elegant solution to the problem
+of picking the input line apart by characters.
+
+@node Egrep Program, Id Program, Cut Program, Clones
+@subsection Searching for Regular Expressions in Files
+
+@cindex @code{egrep} utility
+The @code{egrep} utility searches files for patterns. It uses regular
+expressions that are almost identical to those available in @code{awk}
+(@pxref{Regexp Constants, ,Regular Expression Constants}). It is used this way:
+
+@example
+egrep @r{[} @var{options} @r{]} '@var{pattern}' @var{files} @dots{}
+@end example
+
+The @var{pattern} is a regexp.
+In typical usage, the regexp is quoted to prevent the shell from expanding
+any of the special characters as file name wildcards.
+Normally, @code{egrep} prints the
+lines that matched. If multiple file names are provided on the command
+line, each output line is preceded by the name of the file and a colon.
+
+The options are:
+
+@table @code
+@item -c
+Print out a count of the lines that matched the pattern, instead of the
+lines themselves.
+
+@item -s
+Be silent. No output is produced, and the exit value indicates whether
+or not the pattern was matched.
+
+@item -v
+Invert the sense of the test. @code{egrep} prints the lines that do
+@emph{not} match the pattern, and exits successfully if the pattern was not
+matched.
+
+@item -i
+Ignore case distinctions in both the pattern and the input data.
+
+@item -l
+Only print the names of the files that matched, not the lines that matched.
+
+@item -e @var{pattern}
+Use @var{pattern} as the regexp to match. The purpose of the @samp{-e}
+option is to allow patterns that start with a @samp{-}.
+@end table
+
+This version uses the @code{getopt} library function
+(@pxref{Getopt Function, ,Processing Command Line Options}),
+and the file transition library program
+(@pxref{Filetrans Function, ,Noting Data File Boundaries}).
+
+The program begins with a descriptive comment, and then a @code{BEGIN} rule
+that processes the command line arguments with @code{getopt}. The @samp{-i}
+(ignore case) option is particularly easy with @code{gawk}; we just use the
+@code{IGNORECASE} built in variable
+(@pxref{Built-in Variables}).
+
+@findex egrep.awk
+@example
+@c @group
+@c file eg/prog/egrep.awk
+# egrep.awk --- simulate egrep in awk
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+# Options:
+# -c count of lines
+# -s silent - use exit value
+# -v invert test, success if no match
+# -i ignore case
+# -l print filenames only
+# -e argument is pattern
+
+BEGIN @{
+ while ((c = getopt(ARGC, ARGV, "ce:svil")) != -1) @{
+ if (c == "c")
+ count_only++
+ else if (c == "s")
+ no_print++
+ else if (c == "v")
+ invert++
+ else if (c == "i")
+ IGNORECASE = 1
+ else if (c == "l")
+ filenames_only++
+ else if (c == "e")
+ pattern = Optarg
+ else
+ usage()
+ @}
+@c endfile
+@c @end group
+@end example
+
+Next comes the code that handles the @code{egrep} specific behavior. If no
+pattern was supplied with @samp{-e}, the first non-option on the command
+line is used. The @code{awk} command line arguments up to @code{ARGV[Optind]}
+are cleared, so that @code{awk} won't try to process them as files. If no
+files were specified, the standard input is used, and if multiple files were
+specified, we make sure to note this so that the file names can precede the
+matched lines in the output.
+
+The last two lines are commented out, since they are not needed in
+@code{gawk}. They should be uncommented if you have to use another version
+of @code{awk}.
+
+@example
+@c @group
+@c file eg/prog/egrep.awk
+ if (pattern == "")
+ pattern = ARGV[Optind++]
+
+ for (i = 1; i < Optind; i++)
+ ARGV[i] = ""
+ if (Optind >= ARGC) @{
+ ARGV[1] = "-"
+ ARGC = 2
+ @} else if (ARGC - Optind > 1)
+ do_filenames++
+
+# if (IGNORECASE)
+# pattern = tolower(pattern)
+@}
+@c endfile
+@c @end group
+@end example
+
+The next set of lines should be uncommented if you are not using
+@code{gawk}. This rule translates all the characters in the input line
+into lower-case if the @samp{-i} option was specified. The rule is
+commented out since it is not necessary with @code{gawk}.
+@c bug: if a match happens, we output the translated line, not the original
+
+@example
+@c @group
+@c file eg/prog/egrep.awk
+#@{
+# if (IGNORECASE)
+# $0 = tolower($0)
+#@}
+@c endfile
+@c @end group
+@end example
+
+The @code{beginfile} function is called by the rule in @file{ftrans.awk}
+when each new file is processed. In this case, it is very simple; all it
+does is initialize a variable @code{fcount} to zero. @code{fcount} tracks
+how many lines in the current file matched the pattern.
+
+@example
+@c @group
+@c file eg/prog/egrep.awk
+function beginfile(junk)
+@{
+ fcount = 0
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{endfile} function is called after each file has been processed.
+It is used only when the user wants a count of the number of lines that
+matched. @code{no_print} will be true only if the exit status is desired.
+@code{count_only} will be true if line counts are desired. @code{egrep}
+will therefore only print line counts if printing and counting are enabled.
+The output format must be adjusted depending upon the number of files to be
+processed. Finally, @code{fcount} is added to @code{total}, so that we
+know how many lines altogether matched the pattern.
+
+@example
+@c @group
+@c file eg/prog/egrep.awk
+function endfile(file)
+@{
+ if (! no_print && count_only)
+ if (do_filenames)
+ print file ":" fcount
+ else
+ print fcount
+
+ total += fcount
+@}
+@c endfile
+@c @end group
+@end example
+
+This rule does most of the work of matching lines. The variable
+@code{matches} will be true if the line matched the pattern. If the user
+wants lines that did not match, the sense of the @code{matches} is inverted
+using the @samp{!} operator. @code{fcount} is incremented with the value of
+@code{matches}, which will be either one or zero, depending upon a
+successful or unsuccessful match. If the line did not match, the
+@code{next} statement just moves on to the next record.
+
+There are several optimizations for performance in the following few lines
+of code. If the user only wants exit status (@code{no_print} is true), and
+we don't have to count lines, then it is enough to know that one line in
+this file matched, and we can skip on to the next file with @code{nextfile}.
+Along similar lines, if we are only printing file names, and we
+don't need to count lines, we can print the file name, and then skip to the
+next file with @code{nextfile}.
+
+Finally, each line is printed, with a leading filename and colon if
+necessary.
+
+@ignore
+2e: note, probably better to recode the last few lines as
+ if (! count_only) @{
+ if (no_print)
+ nextfile
+
+ if (filenames_only) @{
+ print FILENAME
+ nextfile
+ @}
+
+ if (do_filenames)
+ print FILENAME ":" $0
+ else
+ print
+ @}
+@end ignore
+
+@example
+@c @group
+@c file eg/prog/egrep.awk
+@{
+ matches = ($0 ~ pattern)
+ if (invert)
+ matches = ! matches
+
+ fcount += matches # 1 or 0
+
+ if (! matches)
+ next
+
+ if (no_print && ! count_only)
+ nextfile
+
+ if (filenames_only && ! count_only) @{
+ print FILENAME
+ nextfile
+ @}
+
+ if (do_filenames && ! count_only)
+ print FILENAME ":" $0
+ else if (! count_only)
+ print
+@}
+@c endfile
+@c @end group
+@end example
+
+@c @strong{Exercise}: rearrange the code inside @samp{if (! count_only)}.
+
+The @code{END} rule takes care of producing the correct exit status. If
+there were no matches, the exit status is one, otherwise it is zero.
+
+@example
+@c @group
+@c file eg/prog/egrep.awk
+END \
+@{
+ if (total == 0)
+ exit 1
+ exit 0
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{usage} function prints a usage message in case of invalid options
+and then exits.
+
+@example
+@c @group
+@c file eg/prog/egrep.awk
+function usage( e)
+@{
+ e = "Usage: egrep [-csvil] [-e pat] [files ...]"
+ print e > "/dev/stderr"
+ exit 1
+@}
+@c endfile
+@c @end group
+@end example
+
+The variable @code{e} is used so that the function fits nicely
+on the printed page.
+
+@node Id Program, Split Program, Egrep Program, Clones
+@subsection Printing Out User Information
+
+@cindex @code{id} utility
+The @code{id} utility lists a user's real and effective user-id numbers,
+real and effective group-id numbers, and the user's group set, if any.
+@code{id} will only print the effective user-id and group-id if they are
+different from the real ones. If possible, @code{id} will also supply the
+corresponding user and group names. The output might look like this:
+
+@example
+$ id
+@print{} uid=2076(arnold) gid=10(staff) groups=10(staff),4(tty)
+@end example
+
+This information is exactly what is provided by @code{gawk}'s
+@file{/dev/user} special file (@pxref{Special Files, ,Special File Names in @code{gawk}}).
+However, the @code{id} utility provides a more palatable output than just a
+string of numbers.
+
+Here is a simple version of @code{id} written in @code{awk}.
+It uses the user database library functions
+(@pxref{Passwd Functions, ,Reading the User Database}),
+and the group database library functions
+(@pxref{Group Functions, ,Reading the Group Database}).
+
+The program is fairly straightforward. All the work is done in the
+@code{BEGIN} rule. The user and group id numbers are obtained from
+@file{/dev/user}. If there is no support for @file{/dev/user}, the program
+gives up.
+
+The code is repetitive. The entry in the user database for the real user-id
+number is split into parts at the @samp{:}. The name is the first field.
+Similar code is used for the effective user-id number, and the group
+numbers.
+
+@findex id.awk
+@example
+@c @group
+@c file eg/prog/id.awk
+# id.awk --- implement id in awk
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+# output is:
+# uid=12(foo) euid=34(bar) gid=3(baz) \
+# egid=5(blat) groups=9(nine),2(two),1(one)
+
+BEGIN \
+@{
+ if ((getline < "/dev/user") < 0) @{
+ err = "id: no /dev/user support - cannot run"
+ print err > "/dev/stderr"
+ exit 1
+ @}
+ close("/dev/user")
+
+ uid = $1
+ euid = $2
+ gid = $3
+ egid = $4
+
+ printf("uid=%d", uid)
+ pw = getpwuid(uid)
+@group
+ if (pw != "") @{
+ split(pw, a, ":")
+ printf("(%s)", a[1])
+ @}
+@end group
+
+ if (euid != uid) @{
+ printf(" euid=%d", euid)
+ pw = getpwuid(euid)
+ if (pw != "") @{
+ split(pw, a, ":")
+ printf("(%s)", a[1])
+ @}
+ @}
+
+ printf(" gid=%d", gid)
+ pw = getgrgid(gid)
+ if (pw != "") @{
+ split(pw, a, ":")
+ printf("(%s)", a[1])
+ @}
+
+ if (egid != gid) @{
+ printf(" egid=%d", egid)
+ pw = getgrgid(egid)
+ if (pw != "") @{
+ split(pw, a, ":")
+ printf("(%s)", a[1])
+ @}
+ @}
+
+ if (NF > 4) @{
+ printf(" groups=");
+ for (i = 5; i <= NF; i++) @{
+ printf("%d", $i)
+ pw = getgrgid($i)
+ if (pw != "") @{
+ split(pw, a, ":")
+ printf("(%s)", a[1])
+ @}
+ if (i < NF)
+ printf(",")
+ @}
+ @}
+ print ""
+@}
+@c endfile
+@c @end group
+@end example
+
+@c exercise!!!
+@ignore
+The POSIX version of @code{id} takes arguments that control which
+information is printed. Modify this version to accept the same
+arguments and perform in the same way.
+@end ignore
+
+@node Split Program, Tee Program, Id Program, Clones
+@subsection Splitting a Large File Into Pieces
+
+@cindex @code{split} utility
+The @code{split} program splits large text files into smaller pieces. By default,
+the output files are named @file{xaa}, @file{xab}, and so on. Each file has
+1000 lines in it, with the likely exception of the last file. To change the
+number of lines in each file, you supply a number on the command line
+preceded with a minus, e.g., @samp{-500} for files with 500 lines in them
+instead of 1000. To change the name of the output files to something like
+@file{myfileaa}, @file{myfileab}, and so on, you supply an additional
+argument that specifies the filename.
+
+Here is a version of @code{split} in @code{awk}. It uses the @code{ord} and
+@code{chr} functions presented in
+@ref{Ordinal Functions, ,Translating Between Characters and Numbers}.
+
+The program first sets its defaults, and then tests to make sure there are
+not too many arguments. It then looks at each argument in turn. The
+first argument could be a minus followed by a number. If it is, this happens
+to look like a negative number, so it is made positive, and that is the
+count of lines. The data file name is skipped over, and the final argument
+is used as the prefix for the output file names.
+
+@findex split.awk
+@example
+@c @group
+@c file eg/prog/split.awk
+# split.awk --- do split in awk
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+# usage: split [-num] [file] [outname]
+
+BEGIN \
+@{
+ outfile = "x" # default
+ count = 1000
+ if (ARGC > 4)
+ usage()
+
+ i = 1
+ if (ARGV[i] ~ /^-[0-9]+$/) @{
+ count = -ARGV[i]
+ ARGV[i] = ""
+ i++
+ @}
+ # test argv in case reading from stdin instead of file
+ if (i in ARGV)
+ i++ # skip data file name
+ if (i in ARGV) @{
+ outfile = ARGV[i]
+ ARGV[i] = ""
+ @}
+
+ s1 = s2 = "a"
+ out = (outfile s1 s2)
+@}
+@c endfile
+@c @end group
+@end example
+
+The next rule does most of the work. @code{tcount} (temporary count) tracks
+how many lines have been printed to the output file so far. If it is greater
+than @code{count}, it is time to close the current file and start a new one.
+@code{s1} and @code{s2} track the current suffixes for the file name. If
+they are both @samp{z}, the file is just too big. Otherwise, @code{s1}
+moves to the next letter in the alphabet and @code{s2} starts over again at
+@samp{a}.
+
+@example
+@c @group
+@c file eg/prog/split.awk
+@{
+ if (++tcount > count) @{
+ close(out)
+ if (s2 == "z") @{
+ if (s1 == "z") @{
+ printf("split: %s is too large to split\n", \
+ FILENAME) > "/dev/stderr"
+ exit 1
+ @}
+ s1 = chr(ord(s1) + 1)
+ s2 = "a"
+ @} else
+ s2 = chr(ord(s2) + 1)
+ out = (outfile s1 s2)
+ tcount = 1
+ @}
+ print > out
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{usage} function simply prints an error message and exits.
+
+@example
+@c @group
+@c file eg/prog/split.awk
+function usage( e)
+@{
+ e = "usage: split [-num] [file] [outname]"
+ print e > "/dev/stderr"
+ exit 1
+@}
+@c endfile
+@c @end group
+@end example
+
+@noindent
+The variable @code{e} is used so that the function
+fits nicely on the
+@iftex
+page.
+@end iftex
+@ifinfo
+screen.
+@end ifinfo
+
+This program is a bit sloppy; it relies on @code{awk} to close the last file
+for it automatically, instead of doing it in an @code{END} rule.
+
+@node Tee Program, Uniq Program, Split Program, Clones
+@subsection Duplicating Output Into Multiple Files
+
+@cindex @code{tee} utility
+The @code{tee} program is known as a ``pipe fitting.'' @code{tee} copies
+its standard input to its standard output, and also duplicates it to the
+files named on the command line. Its usage is:
+
+@example
+tee @r{[}-a@r{]} file @dots{}
+@end example
+
+The @samp{-a} option tells @code{tee} to append to the named files, instead of
+truncating them and starting over.
+
+The @code{BEGIN} rule first makes a copy of all the command line arguments,
+into an array named @code{copy}.
+@code{ARGV[0]} is not copied, since it is not needed.
+@code{tee} cannot use @code{ARGV} directly, since @code{awk} will attempt to
+process each file named in @code{ARGV} as input data.
+
+If the first argument is @samp{-a}, then the flag variable
+@code{append} is set to true, and both @code{ARGV[1]} and
+@code{copy[1]} are deleted. If @code{ARGC} is less than two, then no file
+names were supplied, and @code{tee} prints a usage message and exits.
+Finally, @code{awk} is forced to read the standard input by setting
+@code{ARGV[1]} to @code{"-"}, and @code{ARGC} to two.
+
+@c 2e: the `ARGC--' in the `if (ARGV[1] == "-a")' isn't needed.
+
+@findex tee.awk
+@example
+@c @group
+@c file eg/prog/tee.awk
+# tee.awk --- tee in awk
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+# Revised December 1995
+
+BEGIN \
+@{
+ for (i = 1; i < ARGC; i++)
+ copy[i] = ARGV[i]
+
+ if (ARGV[1] == "-a") @{
+ append = 1
+ delete ARGV[1]
+ delete copy[1]
+ ARGC--
+ @}
+ if (ARGC < 2) @{
+ print "usage: tee [-a] file ..." > "/dev/stderr"
+ exit 1
+ @}
+ ARGV[1] = "-"
+ ARGC = 2
+@}
+@c endfile
+@c @end group
+@end example
+
+The single rule does all the work. Since there is no pattern, it is
+executed for each line of input. The body of the rule simply prints the
+line into each file on the command line, and then to the standard output.
+
+@example
+@group
+@c file eg/prog/tee.awk
+@{
+ # moving the if outside the loop makes it run faster
+ if (append)
+ for (i in copy)
+ print >> copy[i]
+ else
+ for (i in copy)
+ print > copy[i]
+ print
+@}
+@c endfile
+@end group
+@end example
+
+It would have been possible to code the loop this way:
+
+@example
+for (i in copy)
+ if (append)
+ print >> copy[i]
+ else
+ print > copy[i]
+@end example
+
+@noindent
+This is more concise, but it is also less efficient. The @samp{if} is
+tested for each record and for each output file. By duplicating the loop
+body, the @samp{if} is only tested once for each input record. If there are
+@var{N} input records and @var{M} input files, the first method only
+executes @var{N} @samp{if} statements, while the second would execute
+@var{N}@code{*}@var{M} @samp{if} statements.
+
+Finally, the @code{END} rule cleans up, by closing all the output files.
+
+@example
+@c @group
+@c file eg/prog/tee.awk
+END \
+@{
+ for (i in copy)
+ close(copy[i])
+@}
+@c endfile
+@c @end group
+@end example
+
+@node Uniq Program, Wc Program, Tee Program, Clones
+@subsection Printing Non-duplicated Lines of Text
+
+@cindex @code{uniq} utility
+The @code{uniq} utility reads sorted lines of data on its standard input,
+and (by default) removes duplicate lines. In other words, only unique lines
+are printed, hence the name. @code{uniq} has a number of options. The usage is:
+
+@example
+uniq @r{[}-udc @r{[}-@var{n}@r{]]} @r{[}+@var{n}@r{]} @r{[} @var{input file} @r{[} @var{output file} @r{]]}
+@end example
+
+The option meanings are:
+
+@table @code
+@item -d
+Only print repeated lines.
+
+@item -u
+Only print non-repeated lines.
+
+@item -c
+Count lines. This option overrides @samp{-d} and @samp{-u}. Both repeated
+and non-repeated lines are counted.
+
+@item -@var{n}
+Skip @var{n} fields before comparing lines. The definition of fields is the
+same as @code{awk}'s default: non-whitespace characters separated by runs of
+spaces and/or tabs.
+
+@item +@var{n}
+Skip @var{n} characters before comparing lines. Any fields specified with
+@samp{-@var{n}} are skipped first.
+
+@item @var{input file}
+Data is read from the input file named on the command line, instead of from
+the standard input.
+
+@item @var{output file}
+The generated output is sent to the named output file, instead of to the
+standard output.
+@end table
+
+Normally @code{uniq} behaves as if both the @samp{-d} and @samp{-u} options
+had been provided.
+
+Here is an @code{awk} implementation of @code{uniq}. It uses the
+@code{getopt} library function
+(@pxref{Getopt Function, ,Processing Command Line Options}),
+and the @code{join} library function
+(@pxref{Join Function, ,Merging an Array Into a String}).
+
+The program begins with a @code{usage} function and then a brief outline of
+the options and their meanings in a comment.
+
+The @code{BEGIN} rule deals with the command line arguments and options. It
+uses a trick to get @code{getopt} to handle options of the form @samp{-25},
+treating such an option as the option letter @samp{2} with an argument of
+@samp{5}. If indeed two or more digits were supplied (@code{Optarg} looks
+like a number), @code{Optarg} is
+concatenated with the option digit, and then result is added to zero to make
+it into a number. If there is only one digit in the option, then
+@code{Optarg} is not needed, and @code{Optind} must be decremented so that
+@code{getopt} will process it next time. This code is admittedly a bit
+tricky.
+
+If no options were supplied, then the default is taken, to print both
+repeated and non-repeated lines. The output file, if provided, is assigned
+to @code{outputfile}. Earlier, @code{outputfile} was initialized to the
+standard output, @file{/dev/stdout}.
+
+@findex uniq.awk
+@example
+@c @group
+@c file eg/prog/uniq.awk
+# uniq.awk --- do uniq in awk
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+function usage( e)
+@{
+ e = "Usage: uniq [-udc [-n]] [+n] [ in [ out ]]"
+ print e > "/dev/stderr"
+ exit 1
+@}
+
+# -c count lines. overrides -d and -u
+# -d only repeated lines
+# -u only non-repeated lines
+# -n skip n fields
+# +n skip n characters, skip fields first
+
+BEGIN \
+@{
+ count = 1
+ outputfile = "/dev/stdout"
+ opts = "udc0:1:2:3:4:5:6:7:8:9:"
+ while ((c = getopt(ARGC, ARGV, opts)) != -1) @{
+ if (c == "u")
+ non_repeated_only++
+ else if (c == "d")
+ repeated_only++
+ else if (c == "c")
+ do_count++
+ else if (index("0123456789", c) != 0) @{
+ # getopt requires args to options
+ # this messes us up for things like -5
+ if (Optarg ~ /^[0-9]+$/)
+ fcount = (c Optarg) + 0
+ else @{
+ fcount = c + 0
+ Optind--
+ @}
+ @} else
+ usage()
+ @}
+
+ if (ARGV[Optind] ~ /^\+[0-9]+$/) @{
+ charcount = substr(ARGV[Optind], 2) + 0
+ Optind++
+ @}
+
+ for (i = 1; i < Optind; i++)
+ ARGV[i] = ""
+
+ if (repeated_only == 0 && non_repeated_only == 0)
+ repeated_only = non_repeated_only = 1
+
+ if (ARGC - Optind == 2) @{
+ outputfile = ARGV[ARGC - 1]
+ ARGV[ARGC - 1] = ""
+ @}
+@}
+@c endfile
+@c @end group
+@end example
+
+The following function, @code{are_equal}, compares the current line,
+@code{$0}, to the
+previous line, @code{last}. It handles skipping fields and characters.
+
+If no field count and no character count were specified, @code{are_equal}
+simply returns one or zero depending upon the result of a simple string
+comparison of @code{last} and @code{$0}. Otherwise, things get more
+complicated.
+
+If fields have to be skipped, each line is broken into an array using
+@code{split}
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}),
+and then the desired fields are joined back into a line using @code{join}.
+The joined lines are stored in @code{clast} and @code{cline}.
+If no fields are skipped, @code{clast} and @code{cline} are set to
+@code{last} and @code{$0} respectively.
+
+Finally, if characters are skipped, @code{substr} is used to strip off the
+leading @code{charcount} characters in @code{clast} and @code{cline}. The
+two strings are then compared, and @code{are_equal} returns the result.
+
+@example
+@c @group
+@c file eg/prog/uniq.awk
+function are_equal( n, m, clast, cline, alast, aline)
+@{
+ if (fcount == 0 && charcount == 0)
+ return (last == $0)
+
+ if (fcount > 0) @{
+ n = split(last, alast)
+ m = split($0, aline)
+ clast = join(alast, fcount+1, n)
+ cline = join(aline, fcount+1, m)
+ @} else @{
+ clast = last
+ cline = $0
+ @}
+ if (charcount) @{
+ clast = substr(clast, charcount + 1)
+ cline = substr(cline, charcount + 1)
+ @}
+
+ return (clast == cline)
+@}
+@c endfile
+@c @end group
+@end example
+
+The following two rules are the body of the program. The first one is
+executed only for the very first line of data. It sets @code{last} equal to
+@code{$0}, so that subsequent lines of text have something to be compared to.
+
+The second rule does the work. The variable @code{equal} will be one or zero
+depending upon the results of @code{are_equal}'s comparison. If @code{uniq}
+is counting repeated lines, then the @code{count} variable is incremented if
+the lines are equal. Otherwise the line is printed and @code{count} is
+reset, since the two lines are not equal.
+
+If @code{uniq} is not counting, @code{count} is incremented if the lines are
+equal. Otherwise, if @code{uniq} is counting repeated lines, and more than
+one line has been seen, or if @code{uniq} is counting non-repeated lines,
+and only one line has been seen, then the line is printed, and @code{count}
+is reset.
+
+Finally, similar logic is used in the @code{END} rule to print the final
+line of input data.
+
+@example
+@c @group
+@c file eg/prog/uniq.awk
+@group
+NR == 1 @{
+ last = $0
+ next
+@}
+@end group
+
+@{
+ equal = are_equal()
+
+ if (do_count) @{ # overrides -d and -u
+ if (equal)
+ count++
+ else @{
+ printf("%4d %s\n", count, last) > outputfile
+ last = $0
+ count = 1 # reset
+ @}
+ next
+ @}
+
+ if (equal)
+ count++
+ else @{
+ if ((repeated_only && count > 1) ||
+ (non_repeated_only && count == 1))
+ print last > outputfile
+ last = $0
+ count = 1
+ @}
+@}
+
+@group
+END @{
+ if (do_count)
+ printf("%4d %s\n", count, last) > outputfile
+ else if ((repeated_only && count > 1) ||
+ (non_repeated_only && count == 1))
+ print last > outputfile
+@}
+@end group
+@c endfile
+@c @end group
+@end example
+
+@node Wc Program, , Uniq Program, Clones
+@subsection Counting Things
+
+@cindex @code{wc} utility
+The @code{wc} (word count) utility counts lines, words, and characters in
+one or more input files. Its usage is:
+
+@example
+wc @r{[}-lwc@r{]} @r{[} @var{files} @dots{} @r{]}
+@end example
+
+If no files are specified on the command line, @code{wc} reads its standard
+input. If there are multiple files, it will also print total counts for all
+the files. The options and their meanings are:
+
+@table @code
+@item -l
+Only count lines.
+
+@item -w
+Only count words.
+A ``word'' is a contiguous sequence of non-whitespace characters, separated
+by spaces and/or tabs. Happily, this is the normal way @code{awk} separates
+fields in its input data.
+
+@item -c
+Only count characters.
+@end table
+
+Implementing @code{wc} in @code{awk} is particularly elegant, since
+@code{awk} does a lot of the work for us; it splits lines into words (i.e.@:
+fields) and counts them, it counts lines (i.e.@: records) for us, and it can
+easily tell us how long a line is.
+
+This version uses the @code{getopt} library function
+(@pxref{Getopt Function, ,Processing Command Line Options}),
+and the file transition functions
+(@pxref{Filetrans Function, ,Noting Data File Boundaries}).
+
+This version has one major difference from traditional versions of @code{wc}.
+Our version always prints the counts in the order lines, words,
+and characters. Traditional versions note the order of the @samp{-l},
+@samp{-w}, and @samp{-c} options on the command line, and print the counts
+in that order.
+
+The @code{BEGIN} rule does the argument processing.
+The variable @code{print_total} will
+be true if more than one file was named on the command line.
+
+@findex wc.awk
+@example
+@c @group
+@c file eg/prog/wc.awk
+# wc.awk --- count lines, words, characters
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+# Options:
+# -l only count lines
+# -w only count words
+# -c only count characters
+#
+# Default is to count lines, words, characters
+
+BEGIN @{
+ # let getopt print a message about
+ # invalid options. we ignore them
+ while ((c = getopt(ARGC, ARGV, "lwc")) != -1) @{
+ if (c == "l")
+ do_lines = 1
+ else if (c == "w")
+ do_words = 1
+ else if (c == "c")
+ do_chars = 1
+ @}
+ for (i = 1; i < Optind; i++)
+ ARGV[i] = ""
+
+ # if no options, do all
+ if (! do_lines && ! do_words && ! do_chars)
+ do_lines = do_words = do_chars = 1
+
+ print_total = (ARC - i > 2)
+@}
+@c endfile
+@c @end group
+@end example
+
+The @code{beginfile} function is simple; it just resets the counts of lines,
+words, and characters to zero, and saves the current file name in
+@code{fname}.
+
+The @code{endfile} function adds the current file's numbers to the running
+totals of lines, words, and characters. It then prints out those numbers
+for the file that was just read. It relies on @code{beginfile} to reset the
+numbers for the following data file.
+
+@example
+@c @group
+@c file eg/prog/wc.awk
+function beginfile(file)
+@{
+ chars = lines = words = 0
+ fname = FILENAME
+@}
+
+function endfile(file)
+@{
+ tchars += chars
+ tlines += lines
+ twords += words
+@group
+ if (do_lines)
+ printf "\t%d", lines
+@end group
+ if (do_words)
+ printf "\t%d", words
+ if (do_chars)
+ printf "\t%d", chars
+ printf "\t%s\n", fname
+@}
+@c endfile
+@c @end group
+@end example
+
+There is one rule that is executed for each line. It adds the length of the
+record to @code{chars}. It has to add one, since the newline character
+separating records (the value of @code{RS}) is not part of the record
+itself. @code{lines} is incremented for each line read, and @code{words} is
+incremented by the value of @code{NF}, the number of ``words'' on this
+line.@footnote{Examine the code in
+@ref{Filetrans Function, ,Noting Data File Boundaries}.
+Why must @code{wc} use a separate @code{lines} variable, instead of using
+the value of @code{FNR} in @code{endfile}?}
+
+Finally, the @code{END} rule simply prints the totals for all the files.
+
+@example
+@c @group
+@c file eg/prog/wc.awk
+# do per line
+@{
+ chars += length($0) + 1 # get newline
+ lines++
+ words += NF
+@}
+
+END @{
+ if (print_total) @{
+ if (do_lines)
+ printf "\t%d", tlines
+ if (do_words)
+ printf "\t%d", twords
+ if (do_chars)
+ printf "\t%d", tchars
+ print "\ttotal"
+ @}
+@}
+@c endfile
+@c @end group
+@end example
+
+@node Miscellaneous Programs, , Clones, Sample Programs
+@section A Grab Bag of @code{awk} Programs
+
+This section is a large ``grab bag'' of miscellaneous programs.
+We hope you find them both interesting and enjoyable.
+
+@menu
+* Dupword Program:: Finding duplicated words in a document.
+* Alarm Program:: An alarm clock.
+* Translate Program:: A program similar to the @code{tr} utility.
+* Labels Program:: Printing mailing labels.
+* Word Sorting:: A program to produce a word usage count.
+* History Sorting:: Eliminating duplicate entries from a history
+ file.
+* Extract Program:: Pulling out programs from Texinfo source
+ files.
+* Simple Sed:: A Simple Stream Editor.
+* Igawk Program:: A wrapper for @code{awk} that includes files.
+@end menu
+
+@node Dupword Program, Alarm Program, Miscellaneous Programs, Miscellaneous Programs
+@subsection Finding Duplicated Words in a Document
+
+A common error when writing large amounts of prose is to accidentally
+duplicate words. Often you will see this in text as something like ``the
+the program does the following @dots{}.'' When the text is on-line, often
+the duplicated words occur at the end of one line and the beginning of
+another, making them very difficult to spot.
+@c as here!
+
+This program, @file{dupword.awk}, scans through a file one line at a time,
+and looks for adjacent occurrences of the same word. It also saves the last
+word on a line (in the variable @code{prev}) for comparison with the first
+word on the next line.
+
+The first two statements make sure that the line is all lower-case, so that,
+for example,
+``The'' and ``the'' compare equal to each other. The second statement
+removes all non-alphanumeric and non-whitespace characters from the line, so
+that punctuation does not affect the comparison either. This sometimes
+leads to reports of duplicated words that really are different, but this is
+unusual.
+
+@findex dupword.awk
+@example
+@group
+@c file eg/prog/dupword.awk
+# dupword --- find duplicate words in text
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# December 1991
+
+@{
+ $0 = tolower($0)
+ gsub(/[^A-Za-z0-9 \t]/, "");
+ if ($1 == prev)
+ printf("%s:%d: duplicate %s\n",
+ FILENAME, FNR, $1)
+ for (i = 2; i <= NF; i++)
+ if ($i == $(i-1))
+ printf("%s:%d: duplicate %s\n",
+ FILENAME, FNR, $i)
+ prev = $NF
+@}
+@c endfile
+@end group
+@end example
+
+@node Alarm Program, Translate Program, Dupword Program, Miscellaneous Programs
+@subsection An Alarm Clock Program
+
+The following program is a simple ``alarm clock'' program.
+You give it a time of day, and an optional message. At the given time,
+it prints the message on the standard output. In addition, you can give it
+the number of times to repeat the message, and also a delay between
+repetitions.
+
+This program uses the @code{gettimeofday} function from
+@ref{Gettimeofday Function, ,Managing the Time of Day}.
+
+All the work is done in the @code{BEGIN} rule. The first part is argument
+checking and setting of defaults; the delay, the count, and the message to
+print. If the user supplied a message, but it does not contain the ASCII BEL
+character (known as the ``alert'' character, @samp{\a}), then it is added to
+the message. (On many systems, printing the ASCII BEL generates some sort
+of audible alert. Thus, when the alarm goes off, the system calls attention
+to itself, in case the user is not looking at their computer or terminal.)
+
+@findex alarm.awk
+@example
+@c @group
+@c file eg/prog/alarm.awk
+# alarm --- set an alarm
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+# usage: alarm time [ "message" [ count [ delay ] ] ]
+
+BEGIN \
+@{
+ # Initial argument sanity checking
+ usage1 = "usage: alarm time ['message' [count [delay]]]"
+ usage2 = sprintf("\t(%s) time ::= hh:mm", ARGV[1])
+
+ if (ARGC < 2) @{
+ print usage > "/dev/stderr"
+ exit 1
+ @} else if (ARGC == 5) @{
+ delay = ARGV[4] + 0
+ count = ARGV[3] + 0
+ message = ARGV[2]
+ @} else if (ARGC == 4) @{
+ count = ARGV[3] + 0
+ message = ARGV[2]
+ @} else if (ARGC == 3) @{
+ message = ARGV[2]
+ @} else if (ARGV[1] !~ /[0-9]?[0-9]:[0-9][0-9]/) @{
+ print usage1 > "/dev/stderr"
+ print usage2 > "/dev/stderr"
+ exit 1
+ @}
+
+ # set defaults for once we reach the desired time
+ if (delay == 0)
+ delay = 180 # 3 minutes
+ if (count == 0)
+ count = 5
+@group
+ if (message == "")
+ message = sprintf("\aIt is now %s!\a", ARGV[1])
+ else if (index(message, "\a") == 0)
+ message = "\a" message "\a"
+@end group
+@c endfile
+@end example
+
+The next section of code turns the alarm time into hours and minutes,
+and converts it if necessary to a 24-hour clock. Then it turns that
+time into a count of the seconds since midnight. Next it turns the current
+time into a count of seconds since midnight. The difference between the two
+is how long to wait before setting off the alarm.
+
+@example
+@c @group
+@c file eg/prog/alarm.awk
+ # split up dest time
+ split(ARGV[1], atime, ":")
+ hour = atime[1] + 0 # force numeric
+ minute = atime[2] + 0 # force numeric
+
+ # get current broken down time
+ gettimeofday(now)
+
+ # if time given is 12-hour hours and it's after that
+ # hour, e.g., `alarm 5:30' at 9 a.m. means 5:30 p.m.,
+ # then add 12 to real hour
+ if (hour < 12 && now["hour"] > hour)
+ hour += 12
+
+ # set target time in seconds since midnight
+ target = (hour * 60 * 60) + (minute * 60)
+
+ # get current time in seconds since midnight
+ current = (now["hour"] * 60 * 60) + \
+ (now["minute"] * 60) + now["second"]
+
+ # how long to sleep for
+ naptime = target - current
+ if (naptime <= 0) @{
+ print "time is in the past!" > "/dev/stderr"
+ exit 1
+ @}
+@c endfile
+@c @end group
+@end example
+
+Finally, the program uses the @code{system} function
+(@pxref{I/O Functions, ,Built-in Functions for Input/Output})
+to call the @code{sleep} utility. The @code{sleep} utility simply pauses
+for the given number of seconds. If the exit status is not zero,
+the program assumes that @code{sleep} was interrupted, and exits. If
+@code{sleep} exited with an OK status (zero), then the program prints the
+message in a loop, again using @code{sleep} to delay for however many
+seconds are necessary.
+
+@example
+@c @group
+@c file eg/prog/alarm.awk
+ # zzzzzz..... go away if interrupted
+ if (system(sprintf("sleep %d", naptime)) != 0)
+ exit 1
+
+ # time to notify!
+ command = sprintf("sleep %d", delay)
+ for (i = 1; i <= count; i++) @{
+ print message
+ # if sleep command interrupted, go away
+ if (system(command) != 0)
+ break
+ @}
+
+ exit 0
+@}
+@c endfile
+@c @end group
+@end example
+
+@node Translate Program, Labels Program, Alarm Program, Miscellaneous Programs
+@subsection Transliterating Characters
+
+The system @code{tr} utility transliterates characters. For example, it is
+often used to map upper-case letters into lower-case, for further
+processing.
+
+@example
+@var{generate data} | tr '[A-Z]' '[a-z]' | @var{process data} @dots{}
+@end example
+
+You give @code{tr} two lists of characters enclosed in square brackets.
+Usually, the lists are quoted to keep the shell from attempting to do a
+filename expansion.@footnote{On older, non-POSIX systems, @code{tr} often
+does not require that the lists be enclosed in square brackets and quoted.
+This is a feature.} When processing the input, the
+first character in the first list is replaced with the first character in the
+second list, the second character in the first list is replaced with the
+second character in the second list, and so on.
+If there are more characters in the ``from'' list than in the ``to'' list,
+the last character of the ``to'' list is used for the remaining characters
+in the ``from'' list.
+
+Some time ago,
+@c early or mid-1989!
+a user proposed to us that we add a transliteration function to @code{gawk}.
+Being opposed to ``creeping featurism,'' I wrote the following program to
+prove that character transliteration could be done with a user-level
+function. This program is not as complete as the system @code{tr} utility,
+but it will do most of the job.
+
+The @code{translate} program demonstrates one of the few weaknesses of
+standard
+@code{awk}: dealing with individual characters is very painful, requiring
+repeated use of the @code{substr}, @code{index}, and @code{gsub} built-in
+functions
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).@footnote{This
+program was written before @code{gawk} acquired the ability to
+split each character in a string into separate array elements.
+How might this ability simplify the program?}
+
+There are two functions. The first, @code{stranslate}, takes three
+arguments.
+
+@table @code
+@item from
+A list of characters to translate from.
+
+@item to
+A list of characters to translate to.
+
+@item target
+The string to do the translation on.
+@end table
+
+Associative arrays make the translation part fairly easy. @code{t_ar} holds
+the ``to'' characters, indexed by the ``from'' characters. Then a simple
+loop goes through @code{from}, one character at a time. For each character
+in @code{from}, if the character appears in @code{target}, @code{gsub}
+is used to change it to the corresponding @code{to} character.
+
+The @code{translate} function simply calls @code{stranslate} using @code{$0}
+as the target. The main program sets two global variables, @code{FROM} and
+@code{TO}, from the command line, and then changes @code{ARGV} so that
+@code{awk} will read from the standard input.
+
+Finally, the processing rule simply calls @code{translate} for each record.
+
+@findex translate.awk
+@example
+@c @group
+@c file eg/prog/translate.awk
+# translate --- do tr like stuff
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# August 1989
+
+# bugs: does not handle things like: tr A-Z a-z, it has
+# to be spelled out. However, if `to' is shorter than `from',
+# the last character in `to' is used for the rest of `from'.
+
+function stranslate(from, to, target, lf, lt, t_ar, i, c)
+@{
+ lf = length(from)
+ lt = length(to)
+ for (i = 1; i <= lt; i++)
+ t_ar[substr(from, i, 1)] = substr(to, i, 1)
+ if (lt < lf)
+ for (; i <= lf; i++)
+ t_ar[substr(from, i, 1)] = substr(to, lt, 1)
+ for (i = 1; i <= lf; i++) @{
+ c = substr(from, i, 1)
+ if (index(target, c) > 0)
+ gsub(c, t_ar[c], target)
+ @}
+ return target
+@}
+
+@group
+function translate(from, to)
+@{
+ return $0 = stranslate(from, to, $0)
+@}
+@end group
+
+# main program
+BEGIN @{
+ if (ARGC < 3) @{
+ print "usage: translate from to" > "/dev/stderr"
+ exit
+ @}
+ FROM = ARGV[1]
+ TO = ARGV[2]
+ ARGC = 2
+ ARGV[1] = "-"
+@}
+
+@{
+ translate(FROM, TO)
+ print
+@}
+@c endfile
+@c @end group
+@end example
+
+While it is possible to do character transliteration in a user-level
+function, it is not necessarily efficient, and we started to consider adding
+a built-in function. However, shortly after writing this program, we learned
+that the System V Release 4 @code{awk} had added the @code{toupper} and
+@code{tolower} functions. These functions handle the vast majority of the
+cases where character transliteration is necessary, and so we chose to
+simply add those functions to @code{gawk} as well, and then leave well
+enough alone.
+
+An obvious improvement to this program would be to set up the
+@code{t_ar} array only once, in a @code{BEGIN} rule. However, this
+assumes that the ``from'' and ``to'' lists
+will never change throughout the lifetime of the program.
+
+@node Labels Program, Word Sorting, Translate Program, Miscellaneous Programs
+@subsection Printing Mailing Labels
+
+Here is a ``real world''@footnote{``Real world'' is defined as
+``a program actually used to get something done.''}
+program. This script reads lists of names and
+addresses, and generates mailing labels. Each page of labels has 20 labels
+on it, two across and ten down. The addresses are guaranteed to be no more
+than five lines of data. Each address is separated from the next by a blank
+line.
+
+The basic idea is to read 20 labels worth of data. Each line of each label
+is stored in the @code{line} array. The single rule takes care of filling
+the @code{line} array and printing the page when 20 labels have been read.
+
+The @code{BEGIN} rule simply sets @code{RS} to the empty string, so that
+@code{awk} will split records at blank lines
+(@pxref{Records, ,How Input is Split into Records}).
+It sets @code{MAXLINES} to 100, since @code{MAXLINE} is the maximum number
+of lines on the page (20 * 5 = 100).
+
+Most of the work is done in the @code{printpage} function.
+The label lines are stored sequentially in the @code{line} array. But they
+have to be printed horizontally; @code{line[1]} next to @code{line[6]},
+@code{line[2]} next to @code{line[7]}, and so on. Two loops are used to
+accomplish this. The outer loop, controlled by @code{i}, steps through
+every 10 lines of data; this is each row of labels. The inner loop,
+controlled by @code{j}, goes through the lines within the row.
+As @code{j} goes from zero to four, @samp{i+j} is the @code{j}'th line in
+the row, and @samp{i+j+5} is the entry next to it. The output ends up
+looking something like this:
+
+@example
+line 1 line 6
+line 2 line 7
+line 3 line 8
+line 4 line 9
+line 5 line 10
+@end example
+
+As a final note, at lines 21 and 61, an extra blank line is printed, to keep
+the output lined up on the labels. This is dependent on the particular
+brand of labels in use when the program was written. You will also note
+that there are two blank lines at the top and two blank lines at the bottom.
+
+The @code{END} rule arranges to flush the final page of labels; there may
+not have been an even multiple of 20 labels in the data.
+
+@findex labels.awk
+@example
+@c @group
+@c file eg/prog/labels.awk
+# labels.awk
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# June 1992
+
+# Program to print labels. Each label is 5 lines of data
+# that may have blank lines. The label sheets have 2
+# blank lines at the top and 2 at the bottom.
+
+BEGIN @{ RS = "" ; MAXLINES = 100 @}
+
+function printpage( i, j)
+@{
+ if (Nlines <= 0)
+ return
+
+ printf "\n\n" # header
+
+ for (i = 1; i <= Nlines; i += 10) @{
+ if (i == 21 || i == 61)
+ print ""
+ for (j = 0; j < 5; j++) @{
+ if (i + j > MAXLINES)
+ break
+ printf " %-41s %s\n", line[i+j], line[i+j+5]
+ @}
+ print ""
+ @}
+
+ printf "\n\n" # footer
+
+ for (i in line)
+ line[i] = ""
+@}
+
+# main rule
+@{
+ if (Count >= 20) @{
+ printpage()
+ Count = 0
+ Nlines = 0
+ @}
+ n = split($0, a, "\n")
+ for (i = 1; i <= n; i++)
+ line[++Nlines] = a[i]
+ for (; i <= 5; i++)
+ line[++Nlines] = ""
+ Count++
+@}
+
+END \
+@{
+ printpage()
+@}
+@c endfile
+@c @end group
+@end example
+
+@node Word Sorting, History Sorting, Labels Program, Miscellaneous Programs
+@subsection Generating Word Usage Counts
+
+The following @code{awk} program prints
+the number of occurrences of each word in its input. It illustrates the
+associative nature of @code{awk} arrays by using strings as subscripts. It
+also demonstrates the @samp{for @var{x} in @var{array}} construction.
+Finally, it shows how @code{awk} can be used in conjunction with other
+utility programs to do a useful task of some complexity with a minimum of
+effort. Some explanations follow the program listing.
+
+@example
+awk '
+# Print list of word frequencies
+@{
+ for (i = 1; i <= NF; i++)
+ freq[$i]++
+@}
+
+END @{
+ for (word in freq)
+ printf "%s\t%d\n", word, freq[word]
+@}'
+@end example
+
+The first thing to notice about this program is that it has two rules. The
+first rule, because it has an empty pattern, is executed on every line of
+the input. It uses @code{awk}'s field-accessing mechanism
+(@pxref{Fields, ,Examining Fields}) to pick out the individual words from
+the line, and the built-in variable @code{NF} (@pxref{Built-in Variables})
+to know how many fields are available.
+
+For each input word, an element of the array @code{freq} is incremented to
+reflect that the word has been seen an additional time.
+
+The second rule, because it has the pattern @code{END}, is not executed
+until the input has been exhausted. It prints out the contents of the
+@code{freq} table that has been built up inside the first action.
+
+This program has several problems that would prevent it from being
+useful by itself on real text files:
+
+@itemize @bullet
+@item
+Words are detected using the @code{awk} convention that fields are
+separated by whitespace and that other characters in the input (except
+newlines) don't have any special meaning to @code{awk}. This means that
+punctuation characters count as part of words.
+
+@item
+The @code{awk} language considers upper- and lower-case characters to be
+distinct. Therefore, @samp{bartender} and @samp{Bartender} are not treated
+as the same word. This is undesirable since, in normal text, words
+are capitalized if they begin sentences, and a frequency analyzer should not
+be sensitive to capitalization.
+
+@iftex
+@page
+@end iftex
+@item
+The output does not come out in any useful order. You're more likely to be
+interested in which words occur most frequently, or having an alphabetized
+table of how frequently each word occurs.
+@end itemize
+
+The way to solve these problems is to use some of the more advanced
+features of the @code{awk} language. First, we use @code{tolower} to remove
+case distinctions. Next, we use @code{gsub} to remove punctuation
+characters. Finally, we use the system @code{sort} utility to process the
+output of the @code{awk} script. Here is the new version of
+the program:
+
+@findex wordfreq.sh
+@example
+@c file eg/prog/wordfreq.awk
+# Print list of word frequencies
+@{
+ $0 = tolower($0) # remove case distinctions
+ gsub(/[^a-z0-9_ \t]/, "", $0) # remove punctuation
+ for (i = 1; i <= NF; i++)
+ freq[$i]++
+@}
+@c endfile
+
+END @{
+ for (word in freq)
+ printf "%s\t%d\n", word, freq[word]
+@}
+@end example
+
+Assuming we have saved this program in a file named @file{wordfreq.awk},
+and that the data is in @file{file1}, the following pipeline
+
+@example
+awk -f wordfreq.awk file1 | sort +1 -nr
+@end example
+
+@noindent
+produces a table of the words appearing in @file{file1} in order of
+decreasing frequency.
+
+The @code{awk} program suitably massages the data and produces a word
+frequency table, which is not ordered.
+
+The @code{awk} script's output is then sorted by the @code{sort} utility and
+printed on the terminal. The options given to @code{sort} in this example
+specify to sort using the second field of each input line (skipping one field),
+that the sort keys should be treated as numeric quantities (otherwise
+@samp{15} would come before @samp{5}), and that the sorting should be done
+in descending (reverse) order.
+
+We could have even done the @code{sort} from within the program, by
+changing the @code{END} action to:
+
+@example
+@c file eg/prog/wordfreq.awk
+END @{
+ sort = "sort +1 -nr"
+ for (word in freq)
+ printf "%s\t%d\n", word, freq[word] | sort
+ close(sort)
+@}
+@c endfile
+@end example
+
+You would have to use this way of sorting on systems that do not
+have true pipes.
+
+See the general operating system documentation for more information on how
+to use the @code{sort} program.
+
+@node History Sorting, Extract Program, Word Sorting, Miscellaneous Programs
+@subsection Removing Duplicates from Unsorted Text
+
+The @code{uniq} program
+(@pxref{Uniq Program, ,Printing Non-duplicated Lines of Text}),
+removes duplicate lines from @emph{sorted} data.
+
+Suppose, however, you need to remove duplicate lines from a data file, but
+that you wish to preserve the order the lines are in? A good example of
+this might be a shell history file. The history file keeps a copy of all
+the commands you have entered, and it is not unusual to repeat a command
+several times in a row. Occasionally you might wish to compact the history
+by removing duplicate entries. Yet it is desirable to maintain the order
+of the original commands.
+
+This simple program does the job. It uses two arrays. The @code{data}
+array is indexed by the text of each line.
+For each line, @code{data[$0]} is incremented.
+
+If a particular line has not
+been seen before, then @code{data[$0]} will be zero.
+In that case, the text of the line is stored in @code{lines[count]}.
+Each element of @code{lines} is a unique command, and the indices of
+@code{lines} indicate the order in which those lines were encountered.
+The @code{END} rule simply prints out the lines, in order.
+
+@cindex Rakitzis, Byron
+@findex histsort.awk
+@example
+@group
+@c file eg/prog/histsort.awk
+# histsort.awk --- compact a shell history file
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+# Thanks to Byron Rakitzis for the general idea
+@{
+ if (data[$0]++ == 0)
+ lines[++count] = $0
+@}
+
+END @{
+ for (i = 1; i <= count; i++)
+ print lines[i]
+@}
+@c endfile
+@end group
+@end example
+
+This program also provides a foundation for generating other useful
+information. For example, using the following @code{print} satement in the
+@code{END} rule would indicate how often a particular command was used.
+
+@example
+print data[lines[i]], lines[i]
+@end example
+
+This works because @code{data[$0]} was incremented each time a line was
+seen.
+
+@node Extract Program, Simple Sed, History Sorting, Miscellaneous Programs
+@subsection Extracting Programs from Texinfo Source Files
+
+@iftex
+Both this chapter and the previous chapter
+(@ref{Library Functions, ,A Library of @code{awk} Functions}),
+present a large number of @code{awk} programs.
+@end iftex
+@ifinfo
+The nodes
+@ref{Library Functions, ,A Library of @code{awk} Functions},
+and @ref{Sample Programs, ,Practical @code{awk} Programs},
+are the top level nodes for a large number of @code{awk} programs.
+@end ifinfo
+If you wish to experiment with these programs, it is tedious to have to type
+them in by hand. Here we present a program that can extract parts of a
+Texinfo input file into separate files.
+
+This @value{DOCUMENT} is written in Texinfo, the GNU project's document
+formatting language. A single Texinfo source file can be used to produce both
+printed and on-line documentation.
+@iftex
+Texinfo is fully documented in @cite{Texinfo---The GNU Documentation Format},
+available from the Free Software Foundation.
+@end iftex
+@ifinfo
+The Texinfo language is described fully, starting with
+@ref{Top, , Introduction, texi, Texinfo---The GNU Documentation Format}.
+@end ifinfo
+
+For our purposes, it is enough to know three things about Texinfo input
+files.
+
+@itemize @bullet
+@item
+The ``at'' symbol, @samp{@@}, is special in Texinfo, much like @samp{\} in C
+or @code{awk}. Literal @samp{@@} symbols are represented in Texinfo source
+files as @samp{@@@@}.
+
+@item
+Comments start with either @samp{@@c} or @samp{@@comment}.
+The file extraction program will work by using special comments that start
+at the beginning of a line.
+
+@item
+Example text that should not be split across a page boundary is bracketed
+between lines containing @samp{@@group} and @samp{@@end group} commands.
+@end itemize
+
+The following program, @file{extract.awk}, reads through a Texinfo source
+file, and does two things, based on the special comments.
+Upon seeing @samp{@w{@@c system @dots{}}},
+it runs a command, by extracting the command text from the
+control line and passing it on to the @code{system} function
+(@pxref{I/O Functions, ,Built-in Functions for Input/Output}).
+Upon seeing @samp{@@c file @var{filename}}, each subsequent line is sent to
+the file @var{filename}, until @samp{@@c endfile} is encountered.
+The rules in @file{extract.awk} will match either @samp{@@c} or
+@samp{@@comment} by letting the @samp{omment} part be optional.
+Lines containing @samp{@@group} and @samp{@@end group} are simply removed.
+@file{extract.awk} uses the @code{join} library function
+(@pxref{Join Function, ,Merging an Array Into a String}).
+
+The example programs in the on-line Texinfo source for @cite{@value{TITLE}}
+(@file{gawk.texi}) have all been bracketed inside @samp{file},
+and @samp{endfile} lines. The @code{gawk} distribution uses a copy of
+@file{extract.awk} to extract the sample
+programs and install many of them in a standard directory, where
+@code{gawk} can find them.
+
+@file{extract.awk} begins by setting @code{IGNORECASE} to one, so that
+mixed upper-case and lower-case letters in the directives won't matter.
+
+The first rule handles calling @code{system}, checking that a command was
+given (@code{NF} is at least three), and also checking that the command
+exited with a zero exit status, signifying OK.
+
+@findex extract.awk
+@example
+@c @group
+@c file eg/prog/extract.awk
+# extract.awk --- extract files and run programs
+# from texinfo files
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# May 1993
+
+BEGIN @{ IGNORECASE = 1 @}
+
+@group
+/^@@c(omment)?[ \t]+system/ \
+@{
+ if (NF < 3) @{
+ e = (FILENAME ":" FNR)
+ e = (e ": badly formed `system' line")
+ print e > "/dev/stderr"
+ next
+ @}
+ $1 = ""
+ $2 = ""
+ stat = system($0)
+ if (stat != 0) @{
+ e = (FILENAME ":" FNR)
+ e = (e ": warning: system returned " stat)
+ print e > "/dev/stderr"
+ @}
+@}
+@end group
+@c endfile
+@end example
+
+@noindent
+The variable @code{e} is used so that the function
+fits nicely on the
+@iftex
+page.
+@end iftex
+@ifinfo
+screen.
+@end ifinfo
+
+The second rule handles moving data into files. It verifies that a file
+name was given in the directive. If the file named is not the current file,
+then the current file is closed. This means that an @samp{@@c endfile} was
+not given for that file. (We should probably print a diagnostic in this
+case, although at the moment we do not.)
+
+The @samp{for} loop does the work. It reads lines using @code{getline}
+(@pxref{Getline, ,Explicit Input with @code{getline}}).
+For an unexpected end of file, it calls the @code{@w{unexpected_eof}}
+function. If the line is an ``endfile'' line, then it breaks out of
+the loop.
+If the line is an @samp{@@group} or @samp{@@end group} line, then it
+ignores it, and goes on to the next line.
+
+Most of the work is in the following few lines. If the line has no @samp{@@}
+symbols, it can be printed directly. Otherwise, each leading @samp{@@} must be
+stripped off.
+
+To remove the @samp{@@} symbols, the line is split into separate elements of
+the array @code{a}, using the @code{split} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+Each element of @code{a} that is empty indicates two successive @samp{@@}
+symbols in the original line. For each two empty elements (@samp{@@@@} in
+the original file), we have to add back in a single @samp{@@} symbol.
+
+When the processing of the array is finished, @code{join} is called with the
+value of @code{SUBSEP}, to rejoin the pieces back into a single
+line. That line is then printed to the output file.
+
+@example
+@c @group
+@c file eg/prog/extract.awk
+/^@@c(omment)?[ \t]+file/ \
+@{
+@group
+ if (NF != 3) @{
+ e = (FILENAME ":" FNR ": badly formed `file' line")
+ print e > "/dev/stderr"
+ next
+ @}
+@end group
+ if ($3 != curfile) @{
+ if (curfile != "")
+ close(curfile)
+ curfile = $3
+ @}
+
+ for (;;) @{
+ if ((getline line) <= 0)
+ unexpected_eof()
+ if (line ~ /^@@c(omment)?[ \t]+endfile/)
+ break
+ else if (line ~ /^@@(end[ \t]+)?group/)
+ continue
+ if (index(line, "@@") == 0) @{
+ print line > curfile
+ continue
+ @}
+ n = split(line, a, "@@")
+@group
+ # if a[1] == "", means leading @@,
+ # don't add one back in.
+@end group
+ for (i = 2; i <= n; i++) @{
+ if (a[i] == "") @{ # was an @@@@
+ a[i] = "@@"
+ if (a[i+1] == "")
+ i++
+ @}
+ @}
+ print join(a, 1, n, SUBSEP) > curfile
+ @}
+@}
+@c endfile
+@c @end group
+@end example
+
+An important thing to note is the use of the @samp{>} redirection.
+Output done with @samp{>} only opens the file once; it stays open and
+subsequent output is appended to the file
+(@pxref{Redirection, , Redirecting Output of @code{print} and @code{printf}}).
+This allows us to easily mix program text and explanatory prose for the same
+sample source file (as has been done here!) without any hassle. The file is
+only closed when a new data file name is encountered, or at the end of the
+input file.
+
+Finally, the function @code{@w{unexpected_eof}} prints an appropriate
+error message and then exits.
+
+The @code{END} rule handles the final cleanup, closing the open file.
+
+@example
+@c file eg/prog/extract.awk
+@group
+function unexpected_eof()
+@{
+ printf("%s:%d: unexpected EOF or error\n", \
+ FILENAME, FNR) > "/dev/stderr"
+ exit 1
+@}
+@end group
+
+END @{
+ if (curfile)
+ close(curfile)
+@}
+@c endfile
+@end example
+
+@node Simple Sed, Igawk Program, Extract Program, Miscellaneous Programs
+@subsection A Simple Stream Editor
+
+@cindex @code{sed} utility
+The @code{sed} utility is a ``stream editor,'' a program that reads a
+stream of data, makes changes to it, and passes the modified data on.
+It is often used to make global changes to a large file, or to a stream
+of data generated by a pipeline of commands.
+
+While @code{sed} is a complicated program in its own right, its most common
+use is to perform global substitutions in the middle of a pipeline:
+
+@example
+command1 < orig.data | sed 's/old/new/g' | command2 > result
+@end example
+
+Here, the @samp{s/old/new/g} tells @code{sed} to look for the regexp
+@samp{old} on each input line, and replace it with the text @samp{new},
+globally (i.e.@: all the occurrences on a line). This is similar to
+@code{awk}'s @code{gsub} function
+(@pxref{String Functions, , Built-in Functions for String Manipulation}).
+
+The following program, @file{awksed.awk}, accepts at least two command line
+arguments; the pattern to look for and the text to replace it with. Any
+additional arguments are treated as data file names to process. If none
+are provided, the standard input is used.
+
+@cindex Brennan, Michael
+@cindex @code{awksed}
+@cindex simple stream editor
+@cindex stream editor, simple
+@example
+@c @group
+@c file eg/prog/awksed.awk
+# awksed.awk --- do s/foo/bar/g using just print
+# Thanks to Michael Brennan for the idea
+
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# August 1995
+
+function usage()
+@{
+ print "usage: awksed pat repl [files...]" > "/dev/stderr"
+ exit 1
+@}
+
+BEGIN @{
+ # validate arguments
+ if (ARGC < 3)
+ usage()
+
+ RS = ARGV[1]
+ ORS = ARGV[2]
+
+ # don't use arguments as files
+ ARGV[1] = ARGV[2] = ""
+@}
+
+# look ma, no hands!
+@{
+ if (RT == "")
+ printf "%s", $0
+ else
+ print
+@}
+@c endfile
+@c @end group
+@end example
+
+The program relies on @code{gawk}'s ability to have @code{RS} be a regexp
+and on the setting of @code{RT} to the actual text that terminated the
+record (@pxref{Records, ,How Input is Split into Records}).
+
+The idea is to have @code{RS} be the pattern to look for. @code{gawk}
+will automatically set @code{$0} to the text between matches of the pattern.
+This is text that we wish to keep, unmodified. Then, by setting @code{ORS}
+to the replacement text, a simple @code{print} statement will output the
+text we wish to keep, followed by the replacement text.
+
+There is one wrinkle to this scheme, which is what to do if the last record
+doesn't end with text that matches @code{RS}? Using a @code{print}
+statement unconditionally prints the replacement text, which is not correct.
+
+However, if the file did not end in text that matches @code{RS}, @code{RT}
+will be set to the null string. In this case, we can print @code{$0} using
+@code{printf}
+(@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing}).
+
+The @code{BEGIN} rule handles the setup, checking for the right number
+of arguments, and calling @code{usage} if there is a problem. Then it sets
+@code{RS} and @code{ORS} from the command line arguments, and sets
+@code{ARGV[1]} and @code{ARGV[2]} to the null string, so that they will
+not be treated as file names
+(@pxref{ARGC and ARGV, , Using @code{ARGC} and @code{ARGV}}).
+
+The @code{usage} function prints an error message and exits.
+
+Finally, the single rule handles the printing scheme outlined above,
+using @code{print} or @code{printf} as appropriate, depending upon the
+value of @code{RT}.
+
+@ignore
+Exercise, compare the performance of this version with the more
+straightforward:
+
+BEGIN {
+ pat = ARGV[1]
+ repl = ARGV[2]
+ ARGV[1] = ARGV[2] = ""
+}
+
+{ gsub(pat, repl); print }
+
+Exercise: what are the advantages and disadvantages of this version vs. sed?
+ Advantage: egrep regexps
+ speed (?)
+ Disadvantage: no & in replacement text
+
+Others?
+@end ignore
+
+@node Igawk Program, , Simple Sed, Miscellaneous Programs
+@subsection An Easy Way to Use Library Functions
+
+Using library functions in @code{awk} can be very beneficial. It
+encourages code re-use and the writing of general functions. Programs are
+smaller, and therefore clearer.
+However, using library functions is only easy when writing @code{awk}
+programs; it is painful when running them, requiring multiple @samp{-f}
+options. If @code{gawk} is unavailable, then so too is the @code{AWKPATH}
+environment variable and the ability to put @code{awk} functions into a
+library directory (@pxref{Options, ,Command Line Options}).
+
+It would be nice to be able to write programs like so:
+
+@example
+# library functions
+@@include getopt.awk
+@@include join.awk
+@dots{}
+
+# main program
+BEGIN @{
+ while ((c = getopt(ARGC, ARGV, "a:b:cde")) != -1)
+ @dots{}
+ @dots{}
+@}
+@end example
+
+The following program, @file{igawk.sh}, provides this service.
+It simulates @code{gawk}'s searching of the @code{AWKPATH} variable,
+and also allows @dfn{nested} includes; i.e.@: a file that has been included
+with @samp{@@include} can contain further @samp{@@include} statements.
+@code{igawk} will make an effort to only include files once, so that nested
+includes don't accidentally include a library function twice.
+
+@code{igawk} should behave externally just like @code{gawk}. This means it
+should accept all of @code{gawk}'s command line arguments, including the
+ability to have multiple source files specified via @samp{-f}, and the
+ability to mix command line and library source files.
+
+The program is written using the POSIX Shell (@code{sh}) command language.
+The way the program works is as follows:
+
+@enumerate
+@item
+Loop through the arguments, saving anything that doesn't represent
+@code{awk} source code for later, when the expanded program is run.
+
+@item
+For any arguments that do represent @code{awk} text, put the arguments into
+a temporary file that will be expanded. There are two cases.
+
+@enumerate a
+@item
+Literal text, provided with @samp{--source} or @samp{--source=}. This
+text is just echoed directly. The @code{echo} program will automatically
+supply a trailing newline.
+
+@item
+File names provided with @samp{-f}. We use a neat trick, and echo
+@samp{@@include @var{filename}} into the temporary file. Since the file
+inclusion program will work the way @code{gawk} does, this will get the text
+of the file included into the program at the correct point.
+@end enumerate
+
+@item
+Run an @code{awk} program (naturally) over the temporary file to expand
+@samp{@@include} statements. The expanded program is placed in a second
+temporary file.
+
+@item
+Run the expanded program with @code{gawk} and any other original command line
+arguments that the user supplied (such as the data file names).
+@end enumerate
+
+The initial part of the program turns on shell tracing if the first
+argument was @samp{debug}. Otherwise, a shell @code{trap} statement
+arranges to clean up any temporary files on program exit or upon an
+interrupt.
+
+@c 2e: For the temp file handling, go with Darrel's ig=${TMP:-/tmp}/igs.$$
+@c 2e: or something as similar as possible.
+
+The next part loops through all the command line arguments.
+There are several cases of interest.
+
+@table @code
+@item --
+This ends the arguments to @code{igawk}. Anything else should be passed on
+to the user's @code{awk} program without being evaluated.
+
+@item -W
+This indicates that the next option is specific to @code{gawk}. To make
+argument processing easier, the @samp{-W} is appended to the front of the
+remaining arguments and the loop continues. (This is an @code{sh}
+programming trick. Don't worry about it if you are not familiar with
+@code{sh}.)
+
+@item -v
+@itemx -F
+These are saved and passed on to @code{gawk}.
+
+@item -f
+@itemx --file
+@itemx --file=
+@itemx -Wfile=
+The file name is saved to the temporary file @file{/tmp/ig.s.$$} with an
+@samp{@@include} statement.
+The @code{sed} utility is used to remove the leading option part of the
+argument (e.g., @samp{--file=}).
+
+@item --source
+@itemx --source=
+@itemx -Wsource=
+The source text is echoed into @file{/tmp/ig.s.$$}.
+
+@iftex
+@page
+@end iftex
+@item --version
+@itemx --version
+@itemx -Wversion
+@code{igawk} prints its version number, and runs @samp{gawk --version}
+to get the @code{gawk} version information, and then exits.
+@end table
+
+If none of @samp{-f}, @samp{--file}, @samp{-Wfile}, @samp{--source},
+or @samp{-Wsource}, were supplied, then the first non-option argument
+should be the @code{awk} program. If there are no command line
+arguments left, @code{igawk} prints an error message and exits.
+Otherwise, the first argument is echoed into @file{/tmp/ig.s.$$}.
+
+In any case, after the arguments have been processed,
+@file{/tmp/ig.s.$$} contains the complete text of the original @code{awk}
+program.
+
+The @samp{$$} in @code{sh} represents the current process ID number.
+It is often used in shell programs to generate unique temporary file
+names. This allows multiple users to run @code{igawk} without worrying
+that the temporary file names will clash.
+
+@cindex @code{sed} utility
+Here's the program:
+
+@findex igawk.sh
+@example
+@c @group
+@c file eg/prog/igawk.sh
+#! /bin/sh
+
+# igawk --- like gawk but do @@include processing
+# Arnold Robbins, arnold@@gnu.ai.mit.edu, Public Domain
+# July 1993
+
+if [ "$1" = debug ]
+then
+ set -x
+ shift
+else
+ # cleanup on exit, hangup, interrupt, quit, termination
+ trap 'rm -f /tmp/ig.[se].$$' 0 1 2 3 15
+fi
+
+while [ $# -ne 0 ] # loop over arguments
+do
+ case $1 in
+ --) shift; break;;
+
+ -W) shift
+ set -- -W"$@@"
+ continue;;
+
+ -[vF]) opts="$opts $1 '$2'"
+ shift;;
+
+ -[vF]*) opts="$opts '$1'" ;;
+
+ -f) echo @@include "$2" >> /tmp/ig.s.$$
+ shift;;
+
+ -f*) f=`echo "$1" | sed 's/-f//'`
+ echo @@include "$f" >> /tmp/ig.s.$$ ;;
+
+ -?file=*) # -Wfile or --file
+ f=`echo "$1" | sed 's/-.file=//'`
+ echo @@include "$f" >> /tmp/ig.s.$$ ;;
+
+ -?file) # get arg, $2
+ echo @@include "$2" >> /tmp/ig.s.$$
+ shift;;
+
+ -?source=*) # -Wsource or --source
+ t=`echo "$1" | sed 's/-.source=//'`
+ echo "$t" >> /tmp/ig.s.$$ ;;
+
+ -?source) # get arg, $2
+ echo "$2" >> /tmp/ig.s.$$
+ shift;;
+
+ -?version)
+ echo igawk: version 1.0 1>&2
+ gawk --version
+ exit 0 ;;
+
+ -[W-]*) opts="$opts '$1'" ;;
+
+ *) break;;
+ esac
+ shift
+done
+
+if [ ! -s /tmp/ig.s.$$ ]
+then
+ if [ -z "$1" ]
+ then
+ echo igawk: no program! 1>&2
+ exit 1
+ else
+ echo "$1" > /tmp/ig.s.$$
+ shift
+ fi
+fi
+
+# at this point, /tmp/ig.s.$$ has the program
+@c endfile
+@c @end group
+@end example
+
+The @code{awk} program to process @samp{@@include} directives reads through
+the program, one line at a time using @code{getline}
+(@pxref{Getline, ,Explicit Input with @code{getline}}).
+The input file names and @samp{@@include} statements are managed using a
+stack. As each @samp{@@include} is encountered, the current file name is
+``pushed'' onto the stack, and the file named in the @samp{@@include}
+directive becomes
+the current file name. As each file is finished, the stack is ``popped,''
+and the previous input file becomes the current input file again.
+The process is started by making the original file the first one on the
+stack.
+
+The @code{pathto} function does the work of finding the full path to a
+file. It simulates @code{gawk}'s behavior when searching the @code{AWKPATH}
+environment variable
+(@pxref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable}).
+If a file name has a @samp{/} in it, no path search
+is done. Otherwise, the file name is concatenated with the name of each
+directory in the path, and an attempt is made to open the generated file
+name. The only way in @code{awk} to test if a file can be read is to go
+ahead and try to read it with @code{getline}; that is what @code{pathto}
+does. If the file can be read, it is closed, and the file name is
+returned.
+@ignore
+An alternative way to test for the file's existence would be to call
+@samp{system("test -r " t)}, which uses the @code{test} utility to
+see if the file exists and is readable. The disadvantage to this method
+is that it requires creating an extra process, and can thus be slightly
+slower.
+@end ignore
+
+@example
+@c @group
+@c file eg/prog/igawk.sh
+gawk -- '
+# process @@include directives
+
+function pathto(file, i, t, junk)
+@{
+ if (index(file, "/") != 0)
+ return file
+
+ for (i = 1; i <= ndirs; i++) @{
+ t = (pathlist[i] "/" file)
+ if ((getline junk < t) > 0) @{
+ # found it
+ close(t)
+ return t
+ @}
+ @}
+ return ""
+@}
+@c endfile
+@c @end group
+@end example
+
+The main program is contained inside one @code{BEGIN} rule. The first thing it
+does is set up the @code{pathlist} array that @code{pathto} uses. After
+splitting the path on @samp{:}, null elements are replaced with @code{"."},
+which represents the current directory.
+
+@example
+@c @group
+@c file eg/prog/igawk.sh
+BEGIN @{
+ path = ENVIRON["AWKPATH"]
+ ndirs = split(path, pathlist, ":")
+ for (i = 1; i <= ndirs; i++) @{
+ if (pathlist[i] == "")
+ pathlist[i] = "."
+ @}
+@c endfile
+@c @end group
+@end example
+
+The stack is initialized with @code{ARGV[1]}, which will be @file{/tmp/ig.s.$$}.
+The main loop comes next. Input lines are read in succession. Lines that
+do not start with @samp{@@include} are printed verbatim.
+
+If the line does start with @samp{@@include}, the file name is in @code{$2}.
+@code{pathto} is called to generate the full path. If it could not, then we
+print an error message and continue.
+
+The next thing to check is if the file has been included already. The
+@code{processed} array is indexed by the full file name of each included
+file, and it tracks this information for us. If the file has been
+seen, a warning message is printed. Otherwise, the new file name is
+pushed onto the stack and processing continues.
+
+Finally, when @code{getline} encounters the end of the input file, the file
+is closed and the stack is popped. When @code{stackptr} is less than zero,
+the program is done.
+
+@example
+@c @group
+@c file eg/prog/igawk.sh
+ stackptr = 0
+ input[stackptr] = ARGV[1] # ARGV[1] is first file
+
+ for (; stackptr >= 0; stackptr--) @{
+ while ((getline < input[stackptr]) > 0) @{
+ if (tolower($1) != "@@include") @{
+ print
+ continue
+ @}
+ fpath = pathto($2)
+ if (fpath == "") @{
+ printf("igawk:%s:%d: cannot find %s\n", \
+ input[stackptr], FNR, $2) > "/dev/stderr"
+ continue
+ @}
+@group
+ if (! (fpath in processed)) @{
+ processed[fpath] = input[stackptr]
+ input[++stackptr] = fpath
+ @} else
+ print $2, "included in", input[stackptr], \
+ "already included in", \
+ processed[fpath] > "/dev/stderr"
+ @}
+@end group
+@group
+ close(input[stackptr])
+ @}
+@}' /tmp/ig.s.$$ > /tmp/ig.e.$$
+@end group
+@c endfile
+@c @end group
+@end example
+
+The last step is to call @code{gawk} with the expanded program and the original
+options and command line arguments that the user supplied. @code{gawk}'s
+exit status is passed back on to @code{igawk}'s calling program.
+
+@c this causes more problems than it solves, so leave it out.
+@ignore
+The special file @file{/dev/null} is passed as a data file to @code{gawk}
+to handle an interesting case. Suppose that the user's program only has
+a @code{BEGIN} rule, and there are no data files to read. The program should exit without reading any data
+files. However, suppose that an included library file defines an @code{END}
+rule of its own. In this case, @code{gawk} will hang, reading standard
+input. In order to avoid this, @file{/dev/null} is explicitly to the
+command line. Reading from @file{/dev/null} always returns an immediate
+end of file indication.
+
+@c Hmm. Add /dev/null if $# is 0? Still messes up ARGV. Sigh.
+@end ignore
+
+@example
+@c @group
+@c file eg/prog/igawk.sh
+eval gawk -f /tmp/ig.e.$$ $opts -- "$@@"
+
+exit $?
+@c endfile
+@c @end group
+@end example
+
+This version of @code{igawk} represents my third attempt at this program.
+There are three key simplifications that made the program work better.
+
+@enumerate
+@item
+Using @samp{@@include} even for the files named with @samp{-f} makes building
+the initial collected @code{awk} program much simpler; all the
+@samp{@@include} processing can be done once.
+
+@item
+The @code{pathto} function doesn't try to save the line read with
+@code{getline} when testing for the file's accessibility. Trying to save
+this line for use with the main program complicates things considerably.
+@c what problem does this engender though - exercise
+@c answer, reading from "-" or /dev/stdin
+
+@item
+Using a @code{getline} loop in the @code{BEGIN} rule does it all in one
+place. It is not necessary to call out to a separate loop for processing
+nested @samp{@@include} statements.
+@end enumerate
+
+Also, this program illustrates that it is often worthwhile to combine
+@code{sh} and @code{awk} programming together. You can usually accomplish
+quite a lot, without having to resort to low-level programming in C or C++, and it
+is frequently easier to do certain kinds of string and argument manipulation
+using the shell than it is in @code{awk}.
+
+Finally, @code{igawk} shows that it is not always necessary to add new
+features to a program; they can often be layered on top. With @code{igawk},
+there is no real reason to build @samp{@@include} processing into
+@code{gawk} itself.
+
+As an additional example of this, consider the idea of having two
+files in a directory in the search path.
+
+@table @file
+@item default.awk
+This file would contain a set of default library functions, such
+as @code{getopt} and @code{assert}.
+
+@item site.awk
+This file would contain library functions that are specific to a site or
+installation, i.e.@: locally developed functions.
+Having a separate file allows @file{default.awk} to change with
+new @code{gawk} releases, without requiring the system administrator to
+update it each time by adding the local functions.
+@end table
+
+One user
+@c Karl Berry, karl@ileaf.com, 10/95
+suggested that @code{gawk} be modified to automatically read these files
+upon startup. Instead, it would be very simple to modify @code{igawk}
+to do this. Since @code{igawk} can process nested @samp{@@include}
+directives, @file{default.awk} could simply contain @samp{@@include}
+statements for the desired library functions.
+
+@c Exercise: make this change
+
+@node Language History, Gawk Summary, Sample Programs, Top
+@chapter The Evolution of the @code{awk} Language
+
+This @value{DOCUMENT} describes the GNU implementation of @code{awk}, which follows
+the POSIX specification. Many @code{awk} users are only familiar
+with the original @code{awk} implementation in Version 7 Unix.
+(This implementation was the basis for @code{awk} in Berkeley Unix,
+through 4.3--Reno. The 4.4 release of Berkeley Unix uses @code{gawk} 2.15.2
+for its version of @code{awk}.) This chapter briefly describes the
+evolution of the @code{awk} language, with cross references to other parts
+of the @value{DOCUMENT} where you can find more information.
+
+@menu
+* V7/SVR3.1:: The major changes between V7 and System V
+ Release 3.1.
+* SVR4:: Minor changes between System V Releases 3.1
+ and 4.
+* POSIX:: New features from the POSIX standard.
+* BTL:: New features from the AT&T Bell Laboratories
+ version of @code{awk}.
+* POSIX/GNU:: The extensions in @code{gawk} not in POSIX
+ @code{awk}.
+@end menu
+
+@node V7/SVR3.1, SVR4, Language History, Language History
+@section Major Changes between V7 and SVR3.1
+
+The @code{awk} language evolved considerably between the release of
+Version 7 Unix (1978) and the new version first made generally available in
+System V Release 3.1 (1987). This section summarizes the changes, with
+cross-references to further details.
+
+@itemize @bullet
+@item
+The requirement for @samp{;} to separate rules on a line
+(@pxref{Statements/Lines, ,@code{awk} Statements Versus Lines}).
+
+@item
+User-defined functions, and the @code{return} statement
+(@pxref{User-defined, ,User-defined Functions}).
+
+@item
+The @code{delete} statement (@pxref{Delete, ,The @code{delete} Statement}).
+
+@item
+The @code{do}-@code{while} statement
+(@pxref{Do Statement, ,The @code{do}-@code{while} Statement}).
+
+@item
+The built-in functions @code{atan2}, @code{cos}, @code{sin}, @code{rand} and
+@code{srand} (@pxref{Numeric Functions, ,Numeric Built-in Functions}).
+
+@item
+The built-in functions @code{gsub}, @code{sub}, and @code{match}
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+
+@item
+The built-in functions @code{close}, and @code{system}
+(@pxref{I/O Functions, ,Built-in Functions for Input/Output}).
+
+@item
+The @code{ARGC}, @code{ARGV}, @code{FNR}, @code{RLENGTH}, @code{RSTART},
+and @code{SUBSEP} built-in variables (@pxref{Built-in Variables}).
+
+@item
+The conditional expression using the ternary operator @samp{?:}
+(@pxref{Conditional Exp, ,Conditional Expressions}).
+
+@item
+The exponentiation operator @samp{^}
+(@pxref{Arithmetic Ops, ,Arithmetic Operators}) and its assignment operator
+form @samp{^=} (@pxref{Assignment Ops, ,Assignment Expressions}).
+
+@item
+C-compatible operator precedence, which breaks some old @code{awk}
+programs (@pxref{Precedence, ,Operator Precedence (How Operators Nest)}).
+
+@item
+Regexps as the value of @code{FS}
+(@pxref{Field Separators, ,Specifying How Fields are Separated}), and as the
+third argument to the @code{split} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+
+@item
+Dynamic regexps as operands of the @samp{~} and @samp{!~} operators
+(@pxref{Regexp Usage, ,How to Use Regular Expressions}).
+
+@item
+The escape sequences @samp{\b}, @samp{\f}, and @samp{\r}
+(@pxref{Escape Sequences}).
+(Some vendors have updated their old versions of @code{awk} to
+recognize @samp{\r}, @samp{\b}, and @samp{\f}, but this is not
+something you can rely on.)
+
+@item
+Redirection of input for the @code{getline} function
+(@pxref{Getline, ,Explicit Input with @code{getline}}).
+
+@item
+Multiple @code{BEGIN} and @code{END} rules
+(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}).
+
+@item
+Multi-dimensional arrays
+(@pxref{Multi-dimensional, ,Multi-dimensional Arrays}).
+@end itemize
+
+@node SVR4, POSIX, V7/SVR3.1, Language History
+@section Changes between SVR3.1 and SVR4
+
+@cindex @code{awk} language, V.4 version
+The System V Release 4 version of Unix @code{awk} added these features
+(some of which originated in @code{gawk}):
+
+@itemize @bullet
+@item
+The @code{ENVIRON} variable (@pxref{Built-in Variables}).
+
+@item
+Multiple @samp{-f} options on the command line
+(@pxref{Options, ,Command Line Options}).
+
+@item
+The @samp{-v} option for assigning variables before program execution begins
+(@pxref{Options, ,Command Line Options}).
+
+@item
+The @samp{--} option for terminating command line options.
+
+@item
+The @samp{\a}, @samp{\v}, and @samp{\x} escape sequences
+(@pxref{Escape Sequences}).
+
+@item
+A defined return value for the @code{srand} built-in function
+(@pxref{Numeric Functions, ,Numeric Built-in Functions}).
+
+@item
+The @code{toupper} and @code{tolower} built-in string functions
+for case translation
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+
+@item
+A cleaner specification for the @samp{%c} format-control letter in the
+@code{printf} function
+(@pxref{Control Letters, ,Format-Control Letters}).
+
+@item
+The ability to dynamically pass the field width and precision (@code{"%*.*d"})
+in the argument list of the @code{printf} function
+(@pxref{Control Letters, ,Format-Control Letters}).
+
+@item
+The use of regexp constants such as @code{/foo/} as expressions, where
+they are equivalent to using the matching operator, as in @samp{$0 ~ /foo/}
+(@pxref{Using Constant Regexps, ,Using Regular Expression Constants}).
+@end itemize
+
+@node POSIX, BTL, SVR4, Language History
+@section Changes between SVR4 and POSIX @code{awk}
+
+The POSIX Command Language and Utilities standard for @code{awk}
+introduced the following changes into the language:
+
+@itemize @bullet
+@item
+The use of @samp{-W} for implementation-specific options.
+
+@item
+The use of @code{CONVFMT} for controlling the conversion of numbers
+to strings (@pxref{Conversion, ,Conversion of Strings and Numbers}).
+
+@item
+The concept of a numeric string, and tighter comparison rules to go
+with it (@pxref{Typing and Comparison, ,Variable Typing and Comparison Expressions}).
+
+@item
+More complete documentation of many of the previously undocumented
+features of the language.
+@end itemize
+
+The following common extensions are not permitted by the POSIX
+standard:
+
+@c IMPORTANT! Keep this list in sync with the one in node Options
+
+@itemize @bullet
+@item
+@code{\x} escape sequences are not recognized
+(@pxref{Escape Sequences}).
+
+@item
+The synonym @code{func} for the keyword @code{function} is not
+recognized (@pxref{Definition Syntax, ,Function Definition Syntax}).
+
+@item
+The operators @samp{**} and @samp{**=} cannot be used in
+place of @samp{^} and @samp{^=} (@pxref{Arithmetic Ops, ,Arithmetic Operators},
+and also @pxref{Assignment Ops, ,Assignment Expressions}).
+
+@item
+Specifying @samp{-Ft} on the command line does not set the value
+of @code{FS} to be a single tab character
+(@pxref{Field Separators, ,Specifying How Fields are Separated}).
+
+@item
+The @code{fflush} built-in function is not supported
+(@pxref{I/O Functions, , Built-in Functions for Input/Output}).
+@end itemize
+
+@node BTL, POSIX/GNU, POSIX, Language History
+@section Extensions in the AT&T Bell Laboratories @code{awk}
+
+@cindex Kernighan, Brian
+Brian Kernighan, one of the original designers of Unix @code{awk},
+has made his version available via anonymous @code{ftp}
+(@pxref{Other Versions, ,Other Freely Available @code{awk} Implementations}).
+This section describes extensions in his version of @code{awk} that are
+not in POSIX @code{awk}.
+
+@itemize @bullet
+@item
+The @samp{-mf=@var{NNN}} and @samp{-mr=@var{NNN}} command line options
+to set the maximum number of fields, and the maximum
+record size, respectively
+(@pxref{Options, ,Command Line Options}).
+
+@item
+The @code{fflush} built-in function for flushing buffered output
+(@pxref{I/O Functions, ,Built-in Functions for Input/Output}).
+
+@ignore
+@item
+The @code{SYMTAB} array, that allows access to the internal symbol
+table of @code{awk}. This feature is not documented, largely because
+it is somewhat shakily implemented. For instance, you cannot access arrays
+or array elements through it.
+@end ignore
+@end itemize
+
+@node POSIX/GNU, , BTL, Language History
+@section Extensions in @code{gawk} Not in POSIX @code{awk}
+
+@cindex compatibility mode
+The GNU implementation, @code{gawk}, adds a number of features.
+This sections lists them in the order they were added to @code{gawk}.
+They can all be disabled with either the @samp{--traditional} or
+@samp{--posix} options
+(@pxref{Options, ,Command Line Options}).
+
+Version 2.10 of @code{gawk} introduced these features:
+
+@itemize @bullet
+@item
+The @code{AWKPATH} environment variable for specifying a path search for
+the @samp{-f} command line option
+(@pxref{Options, ,Command Line Options}).
+
+@item
+The @code{IGNORECASE} variable and its effects
+(@pxref{Case-sensitivity, ,Case-sensitivity in Matching}).
+
+@item
+The @file{/dev/stdin}, @file{/dev/stdout}, @file{/dev/stderr}, and
+@file{/dev/fd/@var{n}} file name interpretation
+(@pxref{Special Files, ,Special File Names in @code{gawk}}).
+@end itemize
+
+Version 2.13 of @code{gawk} introduced these features:
+
+@itemize @bullet
+@item
+The @code{FIELDWIDTHS} variable and its effects
+(@pxref{Constant Size, ,Reading Fixed-width Data}).
+
+@item
+The @code{systime} and @code{strftime} built-in functions for obtaining
+and printing time stamps
+(@pxref{Time Functions, ,Functions for Dealing with Time Stamps}).
+
+@item
+The @samp{-W lint} option to provide source code and run time error
+and portability checking
+(@pxref{Options, ,Command Line Options}).
+
+@item
+The @samp{-W compat} option to turn off these extensions
+(@pxref{Options, ,Command Line Options}).
+
+@item
+The @samp{-W posix} option for full POSIX compliance
+(@pxref{Options, ,Command Line Options}).
+@end itemize
+
+Version 2.14 of @code{gawk} introduced these features:
+
+@itemize @bullet
+@item
+The @code{next file} statement for skipping to the next data file
+(@pxref{Nextfile Statement, ,The @code{nextfile} Statement}).
+@end itemize
+
+Version 2.15 of @code{gawk} introduced these features:
+
+@itemize @bullet
+@item
+The @code{ARGIND} variable, that tracks the movement of @code{FILENAME}
+through @code{ARGV} (@pxref{Built-in Variables}).
+
+@item
+The @code{ERRNO} variable, that contains the system error message when
+@code{getline} returns @minus{}1, or when @code{close} fails
+(@pxref{Built-in Variables}).
+
+@item
+The ability to use GNU-style long named options that start with @samp{--}
+(@pxref{Options, ,Command Line Options}).
+
+@item
+The @samp{--source} option for mixing command line and library
+file source code
+(@pxref{Options, ,Command Line Options}).
+
+@item
+The @file{/dev/pid}, @file{/dev/ppid}, @file{/dev/pgrpid}, and
+@file{/dev/user} file name interpretation
+(@pxref{Special Files, ,Special File Names in @code{gawk}}).
+@end itemize
+
+Version 3.0 of @code{gawk} introduced these features:
+
+@itemize @bullet
+@item
+The @code{next file} statement became @code{nextfile}
+(@pxref{Nextfile Statement, ,The @code{nextfile} Statement}).
+
+@item
+The @samp{--lint-old} option to
+warn about constructs that are not available in
+the original Version 7 Unix version of @code{awk}
+(@pxref{V7/SVR3.1, , Major Changes between V7 and SVR3.1}).
+
+@item
+The @samp{--traditional} option was added as a better name for
+@samp{--compat} (@pxref{Options, ,Command Line Options}).
+
+@item
+The ability for @code{FS} to be a null string, and for the third
+argument to @code{split} to be the null string
+(@pxref{Single Character Fields, , Making Each Character a Separate Field}).
+
+@item
+The ability for @code{RS} to be a regexp
+(@pxref{Records, , How Input is Split into Records}).
+
+@item
+The @code{RT} variable
+(@pxref{Records, , How Input is Split into Records}).
+
+@item
+The @code{gensub} function for more powerful text manipulation
+(@pxref{String Functions, , Built-in Functions for String Manipulation}).
+
+@item
+The @code{strftime} function acquired a default time format,
+allowing it to be called with no arguments
+(@pxref{Time Functions, , Functions for Dealing with Time Stamps}).
+
+@item
+Full support for both POSIX and GNU regexps
+(@pxref{Regexp, , Regular Expressions}).
+
+@item
+The @samp{--re-interval} option to provide interval expressions in regexps
+(@pxref{Regexp Operators, , Regular Expression Operators}).
+
+@item
+@code{IGNORECASE} changed, now applying to string comparison as well
+as regexp operations
+(@pxref{Case-sensitivity, ,Case-sensitivity in Matching}).
+
+@item
+The @samp{-m} option and the @code{fflush} function from the
+Bell Labs research version of @code{awk}
+(@pxref{Options, ,Command Line Options}; also
+@pxref{I/O Functions, ,Built-in Functions for Input/Output}).
+
+@item
+The use of GNU Autoconf to control the configuration process
+(@pxref{Quick Installation, , Compiling @code{gawk} for Unix}).
+
+@item
+Amiga support
+(@pxref{Amiga Installation, ,Installing @code{gawk} on an Amiga}).
+
+@c XXX ADD MORE STUFF HERE
+
+@end itemize
+
+@node Gawk Summary, Installation, Language History, Top
+@appendix @code{gawk} Summary
+
+This appendix provides a brief summary of the @code{gawk} command line and the
+@code{awk} language. It is designed to serve as ``quick reference.'' It is
+therefore terse, but complete.
+
+@menu
+* Command Line Summary:: Recapitulation of the command line.
+* Language Summary:: A terse review of the language.
+* Variables/Fields:: Variables, fields, and arrays.
+* Rules Summary:: Patterns and Actions, and their component
+ parts.
+* Actions Summary:: Quick overview of actions.
+* Functions Summary:: Defining and calling functions.
+* Historical Features:: Some undocumented but supported ``features''.
+@end menu
+
+@node Command Line Summary, Language Summary, Gawk Summary, Gawk Summary
+@appendixsec Command Line Options Summary
+
+The command line consists of options to @code{gawk} itself, the
+@code{awk} program text (if not supplied via the @samp{-f} option), and
+values to be made available in the @code{ARGC} and @code{ARGV}
+predefined @code{awk} variables:
+
+@example
+gawk @r{[@var{POSIX or GNU style options}]} -f @var{source-file} @r{[@code{--}]} @var{file} @dots{}
+gawk @r{[@var{POSIX or GNU style options}]} @r{[@code{--}]} '@var{program}' @var{file} @dots{}
+@end example
+
+The options that @code{gawk} accepts are:
+
+@table @code
+@item -F @var{fs}
+@itemx --field-separator @var{fs}
+Use @var{fs} for the input field separator (the value of the @code{FS}
+predefined variable).
+
+@item -f @var{program-file}
+@itemx --file @var{program-file}
+Read the @code{awk} program source from the file @var{program-file}, instead
+of from the first command line argument.
+
+@item -mf=@var{NNN}
+@itemx -mr=@var{NNN}
+The @samp{f} flag sets
+the maximum number of fields, and the @samp{r} flag sets the maximum
+record size. These options are ignored by @code{gawk}, since @code{gawk}
+has no predefined limits; they are only for compatibility with the
+Bell Labs research version of Unix @code{awk}.
+
+@item -v @var{var}=@var{val}
+@itemx --assign @var{var}=@var{val}
+Assign the variable @var{var} the value @var{val} before program execution
+begins.
+
+@item -W traditional
+@itemx -W compat
+@itemx --traditional
+@itemx --compat
+Use compatibility mode, in which @code{gawk} extensions are turned
+off.
+
+@item -W copyleft
+@itemx -W copyright
+@itemx --copyleft
+@itemx --copyright
+Print the short version of the General Public License on the error
+output. This option may disappear in a future version of @code{gawk}.
+
+@item -W help
+@itemx -W usage
+@itemx --help
+@itemx --usage
+Print a relatively short summary of the available options on the error output.
+
+@item -W lint
+@itemx --lint
+Give warnings about dubious or non-portable @code{awk} constructs.
+
+@item -W lint-old
+@itemx --lint-old
+Warn about constructs that are not available in
+the original Version 7 Unix version of @code{awk}.
+
+@item -W posix
+@itemx --posix
+Use POSIX compatibility mode, in which @code{gawk} extensions
+are turned off and additional restrictions apply.
+
+@item -W re-interval
+@itemx --re-interval
+Allow interval expressions
+(@pxref{Regexp Operators, , Regular Expression Operators}),
+in regexps.
+
+@item -W source=@var{program-text}
+@itemx --source @var{program-text}
+Use @var{program-text} as @code{awk} program source code. This option allows
+mixing command line source code with source code from files, and is
+particularly useful for mixing command line programs with library functions.
+
+@item -W version
+@itemx --version
+Print version information for this particular copy of @code{gawk} on the error
+output.
+
+@item --
+Signal the end of options. This is useful to allow further arguments to the
+@code{awk} program itself to start with a @samp{-}. This is mainly for
+consistency with POSIX argument parsing conventions.
+@end table
+
+Any other options are flagged as invalid, but are otherwise ignored.
+@xref{Options, ,Command Line Options}, for more details.
+
+@node Language Summary, Variables/Fields, Command Line Summary, Gawk Summary
+@appendixsec Language Summary
+
+An @code{awk} program consists of a sequence of zero or more pattern-action
+statements and optional function definitions. One or the other of the
+pattern and action may be omitted.
+
+@example
+@var{pattern} @{ @var{action statements} @}
+@var{pattern}
+ @{ @var{action statements} @}
+
+function @var{name}(@var{parameter list}) @{ @var{action statements} @}
+@end example
+
+@code{gawk} first reads the program source from the
+@var{program-file}(s), if specified, or from the first non-option
+argument on the command line. The @samp{-f} option may be used multiple
+times on the command line. @code{gawk} reads the program text from all
+the @var{program-file} files, effectively concatenating them in the
+order they are specified. This is useful for building libraries of
+@code{awk} functions, without having to include them in each new
+@code{awk} program that uses them. To use a library function in a file
+from a program typed in on the command line, specify
+@samp{--source '@var{program}'}, and type your program in between the single
+quotes.
+@xref{Options, ,Command Line Options}.
+
+The environment variable @code{AWKPATH} specifies a search path to use
+when finding source files named with the @samp{-f} option. The default
+path, which is
+@samp{.:/usr/local/share/awk}@footnote{The path may use a directory
+other than @file{/usr/local/share/awk}, depending upon how @code{gawk}
+was built and installed.} is used if @code{AWKPATH} is not set.
+If a file name given to the @samp{-f} option contains a @samp{/} character,
+no path search is performed.
+@xref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable}.
+
+@code{gawk} compiles the program into an internal form, and then proceeds to
+read each file named in the @code{ARGV} array.
+The initial values of @code{ARGV} come from the command line arguments.
+If there are no files named
+on the command line, @code{gawk} reads the standard input.
+
+If a ``file'' named on the command line has the form
+@samp{@var{var}=@var{val}}, it is treated as a variable assignment: the
+variable @var{var} is assigned the value @var{val}.
+If any of the files have a value that is the null string, that
+element in the list is skipped.
+
+For each record in the input, @code{gawk} tests to see if it matches any
+@var{pattern} in the @code{awk} program. For each pattern that the record
+matches, the associated @var{action} is executed.
+
+@node Variables/Fields, Rules Summary, Language Summary, Gawk Summary
+@appendixsec Variables and Fields
+
+@code{awk} variables are not declared; they come into existence when they are
+first used. Their values are either floating-point numbers or strings.
+@code{awk} also has one-dimensional arrays; multiple-dimensional arrays
+may be simulated. There are several predefined variables that
+@code{awk} sets as a program runs; these are summarized below.
+
+@menu
+* Fields Summary:: Input field splitting.
+* Built-in Summary:: @code{awk}'s built-in variables.
+* Arrays Summary:: Using arrays.
+* Data Type Summary:: Values in @code{awk} are numbers or strings.
+@end menu
+
+@node Fields Summary, Built-in Summary, Variables/Fields, Variables/Fields
+@appendixsubsec Fields
+
+As each input line is read, @code{gawk} splits the line into
+@var{fields}, using the value of the @code{FS} variable as the field
+separator. If @code{FS} is a single character, fields are separated by
+that character. Otherwise, @code{FS} is expected to be a full regular
+expression. In the special case that @code{FS} is a single space,
+fields are separated by runs of spaces and/or tabs.
+If @code{FS} is the null string (@code{""}), then each individual
+character in the record becomes a separate field.
+Note that the value
+of @code{IGNORECASE} (@pxref{Case-sensitivity, ,Case-sensitivity in Matching})
+also affects how fields are split when @code{FS} is a regular expression.
+
+Each field in the input line may be referenced by its position, @code{$1},
+@code{$2}, and so on. @code{$0} is the whole line. The value of a field may
+be assigned to as well. Field numbers need not be constants:
+
+@example
+n = 5
+print $n
+@end example
+
+@noindent
+prints the fifth field in the input line. The variable @code{NF} is set to
+the total number of fields in the input line.
+
+References to non-existent fields (i.e.@: fields after @code{$NF}) return
+the null string. However, assigning to a non-existent field (e.g.,
+@code{$(NF+2) = 5}) increases the value of @code{NF}, creates any
+intervening fields with the null string as their value, and causes the
+value of @code{$0} to be recomputed, with the fields being separated by
+the value of @code{OFS}.
+@xref{Reading Files, ,Reading Input Files}.
+
+@node Built-in Summary, Arrays Summary, Fields Summary, Variables/Fields
+@appendixsubsec Built-in Variables
+
+@code{gawk}'s built-in variables are:
+
+@table @code
+@item ARGC
+The number of elements in @code{ARGV}. See below for what is actually
+included in @code{ARGV}.
+
+@item ARGIND
+The index in @code{ARGV} of the current file being processed.
+When @code{gawk} is processing the input data files,
+it is always true that @samp{FILENAME == ARGV[ARGIND]}.
+
+@item ARGV
+The array of command line arguments. The array is indexed from zero to
+@code{ARGC} @minus{} 1. Dynamically changing @code{ARGC} and
+the contents of @code{ARGV}
+can control the files used for data. A null-valued element in
+@code{ARGV} is ignored. @code{ARGV} does not include the options to
+@code{awk} or the text of the @code{awk} program itself.
+
+@item CONVFMT
+The conversion format to use when converting numbers to strings.
+
+@item FIELDWIDTHS
+A space separated list of numbers describing the fixed-width input data.
+
+@item ENVIRON
+An array of environment variable values. The array
+is indexed by variable name, each element being the value of that
+variable. Thus, the environment variable @code{HOME} is
+@code{ENVIRON["HOME"]}. One possible value might be @file{/home/arnold}.
+
+Changing this array does not affect the environment seen by programs
+which @code{gawk} spawns via redirection or the @code{system} function.
+(This may change in a future version of @code{gawk}.)
+
+Some operating systems do not have environment variables.
+The @code{ENVIRON} array is empty when running on these systems.
+
+@item ERRNO
+The system error message when an error occurs using @code{getline}
+or @code{close}.
+
+@item FILENAME
+The name of the current input file. If no files are specified on the command
+line, the value of @code{FILENAME} is the null string.
+
+@item FNR
+The input record number in the current input file.
+
+@item FS
+The input field separator, a space by default.
+
+@item IGNORECASE
+The case-sensitivity flag for string comparisons and regular expression
+operations. If @code{IGNORECASE} has a non-zero value, then pattern
+matching in rules, record separating with @code{RS}, field splitting
+with @code{FS}, regular expression matching with @samp{~} and
+@samp{!~}, and the @code{gensub}, @code{gsub}, @code{index},
+@code{match}, @code{split} and @code{sub} built-in functions all
+ignore case when doing regular expression operations, and all string
+comparisons are done ignoring case.
+
+@item NF
+The number of fields in the current input record.
+
+@item NR
+The total number of input records seen so far.
+
+@item OFMT
+The output format for numbers for the @code{print} statement,
+@code{"%.6g"} by default.
+
+@item OFS
+The output field separator, a space by default.
+
+@item ORS
+The output record separator, by default a newline.
+
+@item RS
+The input record separator, by default a newline.
+If @code{RS} is set to the null string, then records are separated by
+blank lines. When @code{RS} is set to the null string, then the newline
+character always acts as a field separator, in addition to whatever value
+@code{FS} may have. If @code{RS} is set to a multi-character
+string, it denotes a regexp; input text matching the regexp
+separates records.
+
+@item RT
+The input text that matched the text denoted by @code{RS},
+the record separator.
+
+@item RSTART
+The index of the first character last matched by @code{match}; zero if no match.
+
+@item RLENGTH
+The length of the string last matched by @code{match}; @minus{}1 if no match.
+
+@item SUBSEP
+The string used to separate multiple subscripts in array elements, by
+default @code{"\034"}.
+@end table
+
+@xref{Built-in Variables}, for more information.
+
+@node Arrays Summary, Data Type Summary, Built-in Summary, Variables/Fields
+@appendixsubsec Arrays
+
+Arrays are subscripted with an expression between square brackets
+(@samp{[} and @samp{]}). Array subscripts are @emph{always} strings;
+numbers are converted to strings as necessary, following the standard
+conversion rules
+(@pxref{Conversion, ,Conversion of Strings and Numbers}).
+
+If you use multiple expressions separated by commas inside the square
+brackets, then the array subscript is a string consisting of the
+concatenation of the individual subscript values, converted to strings,
+separated by the subscript separator (the value of @code{SUBSEP}).
+
+The special operator @code{in} may be used in a conditional context
+to see if an array has an index consisting of a particular value.
+
+@example
+if (val in array)
+ print array[val]
+@end example
+
+If the array has multiple subscripts, use @samp{(i, j, @dots{}) in @var{array}}
+to test for existence of an element.
+
+The @code{in} construct may also be used in a @code{for} loop to iterate
+over all the elements of an array.
+@xref{Scanning an Array, ,Scanning All Elements of an Array}.
+
+You can remove an element from an array using the @code{delete} statement.
+
+You can clear an entire array using @samp{delete @var{array}}.
+
+@xref{Arrays, ,Arrays in @code{awk}}.
+
+@node Data Type Summary, , Arrays Summary, Variables/Fields
+@appendixsubsec Data Types
+
+The value of an @code{awk} expression is always either a number
+or a string.
+
+Some contexts (such as arithmetic operators) require numeric
+values. They convert strings to numbers by interpreting the text
+of the string as a number. If the string does not look like a
+number, it converts to zero.
+
+Other contexts (such as concatenation) require string values.
+They convert numbers to strings by effectively printing them
+with @code{sprintf}.
+@xref{Conversion, ,Conversion of Strings and Numbers}, for the details.
+
+To force conversion of a string value to a number, simply add zero
+to it. If the value you start with is already a number, this
+does not change it.
+
+To force conversion of a numeric value to a string, concatenate it with
+the null string.
+
+Comparisons are done numerically if both operands are numeric, or if
+one is numeric and the other is a numeric string. Otherwise one or
+both operands are converted to strings and a string comparison is
+performed. Fields, @code{getline} input, @code{FILENAME}, @code{ARGV}
+elements, @code{ENVIRON} elements and the elements of an array created
+by @code{split} are the only items that can be numeric strings. String
+constants, such as @code{"3.1415927"} are not numeric strings, they are
+string constants. The full rules for comparisons are described in
+@ref{Typing and Comparison, ,Variable Typing and Comparison Expressions}.
+
+Uninitialized variables have the string value @code{""} (the null, or
+empty, string). In contexts where a number is required, this is
+equivalent to zero.
+
+@xref{Variables}, for more information on variable naming and initialization;
+@pxref{Conversion, ,Conversion of Strings and Numbers}, for more information
+on how variable values are interpreted.
+
+@node Rules Summary, Actions Summary, Variables/Fields, Gawk Summary
+@appendixsec Patterns
+
+@menu
+* Pattern Summary:: Quick overview of patterns.
+* Regexp Summary:: Quick overview of regular expressions.
+@end menu
+
+An @code{awk} program is mostly composed of rules, each consisting of a
+pattern followed by an action. The action is enclosed in @samp{@{} and
+@samp{@}}. Either the pattern may be missing, or the action may be
+missing, but not both. If the pattern is missing, the
+action is executed for every input record. A missing action is
+equivalent to @samp{@w{@{ print @}}}, which prints the entire line.
+
+@c These paragraphs repeated for both patterns and actions. I don't
+@c like this, but I also don't see any way around it. Update both copies
+@c if they need fixing.
+Comments begin with the @samp{#} character, and continue until the end of the
+line. Blank lines may be used to separate statements. Statements normally
+end with a newline; however, this is not the case for lines ending in a
+@samp{,}, @samp{@{}, @samp{?}, @samp{:}, @samp{&&}, or @samp{||}. Lines
+ending in @code{do} or @code{else} also have their statements automatically
+continued on the following line. In other cases, a line can be continued by
+ending it with a @samp{\}, in which case the newline is ignored.
+
+Multiple statements may be put on one line by separating each one with
+a @samp{;}.
+This applies to both the statements within the action part of a rule (the
+usual case), and to the rule statements.
+
+@xref{Comments, ,Comments in @code{awk} Programs}, for information on
+@code{awk}'s commenting convention;
+@pxref{Statements/Lines, ,@code{awk} Statements Versus Lines}, for a
+description of the line continuation mechanism in @code{awk}.
+
+@node Pattern Summary, Regexp Summary, Rules Summary, Rules Summary
+@appendixsubsec Pattern Summary
+
+@code{awk} patterns may be one of the following:
+
+@example
+/@var{regular expression}/
+@var{relational expression}
+@var{pattern} && @var{pattern}
+@var{pattern} || @var{pattern}
+@var{pattern} ? @var{pattern} : @var{pattern}
+(@var{pattern})
+! @var{pattern}
+@var{pattern1}, @var{pattern2}
+BEGIN
+END
+@end example
+
+@code{BEGIN} and @code{END} are two special kinds of patterns that are not
+tested against the input. The action parts of all @code{BEGIN} rules are
+concatenated as if all the statements had been written in a single @code{BEGIN}
+rule. They are executed before any of the input is read. Similarly, all the
+@code{END} rules are concatenated, and executed when all the input is exhausted (or
+when an @code{exit} statement is executed). @code{BEGIN} and @code{END}
+patterns cannot be combined with other patterns in pattern expressions.
+@code{BEGIN} and @code{END} rules cannot have missing action parts.
+
+For @code{/@var{regular-expression}/} patterns, the associated statement is
+executed for each input record that matches the regular expression. Regular
+expressions are summarized below.
+
+A @var{relational expression} may use any of the operators defined below in
+the section on actions. These generally test whether certain fields match
+certain regular expressions.
+
+The @samp{&&}, @samp{||}, and @samp{!} operators are logical ``and,''
+logical ``or,'' and logical ``not,'' respectively, as in C. They do
+short-circuit evaluation, also as in C, and are used for combining more
+primitive pattern expressions. As in most languages, parentheses may be
+used to change the order of evaluation.
+
+The @samp{?:} operator is like the same operator in C. If the first
+pattern matches, then the second pattern is matched against the input
+record; otherwise, the third is matched. Only one of the second and
+third patterns is matched.
+
+The @samp{@var{pattern1}, @var{pattern2}} form of a pattern is called a
+range pattern. It matches all input lines starting with a line that
+matches @var{pattern1}, and continuing until a line that matches
+@var{pattern2}, inclusive. A range pattern cannot be used as an operand
+of any of the pattern operators.
+
+@xref{Pattern Overview, ,Pattern Elements}.
+
+@node Regexp Summary, , Pattern Summary, Rules Summary
+@appendixsubsec Regular Expressions
+
+Regular expressions are based on POSIX EREs (extended regular expressions).
+The escape sequences allowed in string constants are also valid in
+regular expressions (@pxref{Escape Sequences}).
+Regexps are composed of characters as follows:
+
+@table @code
+@item @var{c}
+matches the character @var{c} (assuming @var{c} is none of the characters
+listed below).
+
+@item \@var{c}
+matches the literal character @var{c}.
+
+@item .
+matches any character, @emph{including} newline.
+In strict POSIX mode, @samp{.} does not match the @sc{nul}
+character, which is a character with all bits equal to zero.
+
+@item ^
+matches the beginning of a string.
+
+@item $
+matches the end of a string.
+
+@item [@var{abc}@dots{}]
+matches any of the characters @var{abc}@dots{} (character list).
+
+@item [[:@var{class}:]]
+matches any character in the character class @var{class}. Allowable classes
+are @code{alnum}, @code{alpha}, @code{blank}, @code{cntrl},
+@code{digit}, @code{graph}, @code{lower}, @code{print}, @code{punct},
+@code{space}, @code{upper}, and @code{xdigit}.
+
+@item [[.@var{symbol}.]]
+matches the multi-character collating symbol @var{symbol}.
+@code{gawk} does not currently support collating symbols.
+
+@item [[=@var{chars}=]]
+matches any of the equivalent characters in @var{chars}.
+@code{gawk} does not currently support equivalence classes.
+
+@item [^@var{abc}@dots{}]
+matches any character except @var{abc}@dots{} and newline (negated
+character list).
+
+@item @var{r1}|@var{r2}
+matches either @var{r1} or @var{r2} (alternation).
+
+@item @var{r1r2}
+matches @var{r1}, and then @var{r2} (concatenation).
+
+@item @var{r}+
+matches one or more @var{r}'s.
+
+@item @var{r}*
+matches zero or more @var{r}'s.
+
+@item @var{r}?
+matches zero or one @var{r}'s.
+
+@item (@var{r})
+matches @var{r} (grouping).
+
+@item @var{r}@{@var{n}@}
+@itemx @var{r}@{@var{n},@}
+@itemx @var{r}@{@var{n},@var{m}@}
+matches at least @var{n}, @var{n} to any number, or @var{n} to @var{m}
+occurrences of @var{r} (interval expressions).
+
+@item \y
+matches the empty string at either the beginning or the
+end of a word.
+
+@item \B
+matches the empty string within a word.
+
+@item \<
+matches the empty string at the beginning of a word.
+
+@item \>
+matches the empty string at the end of a word.
+
+@item \w
+matches any word-constituent character (alphanumeric characters and
+the underscore).
+
+@item \W
+matches any character that is not word-constituent.
+
+@item \`
+matches the empty string at the beginning of a buffer (same as a string
+in @code{gawk}).
+
+@item \'
+matches the empty string at the end of a buffer.
+@end table
+
+The various command line options
+control how @code{gawk} interprets characters in regexps.
+
+@c NOTE!!! Keep this in sync with the same table in the regexp chapter!
+@table @asis
+@item No options
+In the default case, @code{gawk} provide all the facilities of
+POSIX regexps and the GNU regexp operators described above.
+However, interval expressions are not supported.
+
+@item @code{--posix}
+Only POSIX regexps are supported, the GNU operators are not special
+(e.g., @samp{\w} matches a literal @samp{w}). Interval expressions
+are allowed.
+
+@item @code{--traditional}
+Traditional Unix @code{awk} regexps are matched. The GNU operators
+are not special, interval expressions are not available, and neither
+are the POSIX character classes (@code{[[:alnum:]]} and so on).
+Characters described by octal and hexadecimal escape sequences are
+treated literally, even if they represent regexp metacharacters.
+
+@item @code{--re-interval}
+Allow interval expressions in regexps, even if @samp{--traditional}
+has been provided.
+@end table
+
+@xref{Regexp, ,Regular Expressions}.
+
+@node Actions Summary, Functions Summary, Rules Summary, Gawk Summary
+@appendixsec Actions
+
+Action statements are enclosed in braces, @samp{@{} and @samp{@}}.
+A missing action statement is equivalent to @samp{@w{@{ print @}}}.
+
+Action statements consist of the usual assignment, conditional, and looping
+statements found in most languages. The operators, control statements,
+and Input/Output statements available are similar to those in C.
+
+@c These paragraphs repeated for both patterns and actions. I don't
+@c like this, but I also don't see any way around it. Update both copies
+@c if they need fixing.
+Comments begin with the @samp{#} character, and continue until the end of the
+line. Blank lines may be used to separate statements. Statements normally
+end with a newline; however, this is not the case for lines ending in a
+@samp{,}, @samp{@{}, @samp{?}, @samp{:}, @samp{&&}, or @samp{||}. Lines
+ending in @code{do} or @code{else} also have their statements automatically
+continued on the following line. In other cases, a line can be continued by
+ending it with a @samp{\}, in which case the newline is ignored.
+
+Multiple statements may be put on one line by separating each one with
+a @samp{;}.
+This applies to both the statements within the action part of a rule (the
+usual case), and to the rule statements.
+
+@xref{Comments, ,Comments in @code{awk} Programs}, for information on
+@code{awk}'s commenting convention;
+@pxref{Statements/Lines, ,@code{awk} Statements Versus Lines}, for a
+description of the line continuation mechanism in @code{awk}.
+
+@menu
+* Operator Summary:: @code{awk} operators.
+* Control Flow Summary:: The control statements.
+* I/O Summary:: The I/O statements.
+* Printf Summary:: A summary of @code{printf}.
+* Special File Summary:: Special file names interpreted internally.
+* Built-in Functions Summary:: Built-in numeric and string functions.
+* Time Functions Summary:: Built-in time functions.
+* String Constants Summary:: Escape sequences in strings.
+@end menu
+
+@node Operator Summary, Control Flow Summary, Actions Summary, Actions Summary
+@appendixsubsec Operators
+
+The operators in @code{awk}, in order of decreasing precedence, are:
+
+@table @code
+@item (@dots{})
+Grouping.
+
+@item $
+Field reference.
+
+@item ++ --
+Increment and decrement, both prefix and postfix.
+
+@item ^
+Exponentiation (@samp{**} may also be used, and @samp{**=} for the assignment
+operator, but they are not specified in the POSIX standard).
+
+@item + - !
+Unary plus, unary minus, and logical negation.
+
+@item * / %
+Multiplication, division, and modulus.
+
+@item + -
+Addition and subtraction.
+
+@item @var{space}
+String concatenation.
+
+@item < <= > >= != ==
+The usual relational operators.
+
+@item ~ !~
+Regular expression match, negated match.
+
+@item in
+Array membership.
+
+@item &&
+Logical ``and''.
+
+@item ||
+Logical ``or''.
+
+@item ?:
+A conditional expression. This has the form @samp{@var{expr1} ?
+@var{expr2} : @var{expr3}}. If @var{expr1} is true, the value of the
+expression is @var{expr2}; otherwise it is @var{expr3}. Only one of
+@var{expr2} and @var{expr3} is evaluated.
+
+@item = += -= *= /= %= ^=
+Assignment. Both absolute assignment (@code{@var{var}=@var{value}})
+and operator assignment (the other forms) are supported.
+@end table
+
+@xref{Expressions}.
+
+@node Control Flow Summary, I/O Summary, Operator Summary, Actions Summary
+@appendixsubsec Control Statements
+
+The control statements are as follows:
+
+@example
+if (@var{condition}) @var{statement} @r{[} else @var{statement} @r{]}
+while (@var{condition}) @var{statement}
+do @var{statement} while (@var{condition})
+for (@var{expr1}; @var{expr2}; @var{expr3}) @var{statement}
+for (@var{var} in @var{array}) @var{statement}
+break
+continue
+delete @var{array}[@var{index}]
+delete @var{array}
+exit @r{[} @var{expression} @r{]}
+@{ @var{statements} @}
+@end example
+
+@xref{Statements, ,Control Statements in Actions}.
+
+@node I/O Summary, Printf Summary, Control Flow Summary, Actions Summary
+@appendixsubsec I/O Statements
+
+The Input/Output statements are as follows:
+
+@table @code
+@item getline
+Set @code{$0} from next input record; set @code{NF}, @code{NR}, @code{FNR}.
+@xref{Getline, ,Explicit Input with @code{getline}}.
+
+@item getline <@var{file}
+Set @code{$0} from next record of @var{file}; set @code{NF}.
+
+@item getline @var{var}
+Set @var{var} from next input record; set @code{NF}, @code{FNR}.
+
+@item getline @var{var} <@var{file}
+Set @var{var} from next record of @var{file}.
+
+@item @var{command} | getline
+Run @var{command}, piping its output into @code{getline}; sets @code{$0},
+@code{NF}, @code{NR}.
+
+@item @var{command} | getline @code{var}
+Run @var{command}, piping its output into @code{getline}; sets @var{var}.
+
+@item next
+Stop processing the current input record. The next input record is read and
+processing starts over with the first pattern in the @code{awk} program.
+If the end of the input data is reached, the @code{END} rule(s), if any,
+are executed.
+@xref{Next Statement, ,The @code{next} Statement}.
+
+@item nextfile
+Stop processing the current input file. The next input record read comes
+from the next input file. @code{FILENAME} is updated, @code{FNR} is set to one,
+@code{ARGIND} is incremented,
+and processing starts over with the first pattern in the @code{awk} program.
+If the end of the input data is reached, the @code{END} rule(s), if any,
+are executed.
+Earlier versions of @code{gawk} used @samp{next file}; this usage is still
+supported, but is considered to be deprecated.
+@xref{Nextfile Statement, ,The @code{nextfile} Statement}.
+
+@item print
+Prints the current record.
+@xref{Printing, ,Printing Output}.
+
+@item print @var{expr-list}
+Prints expressions.
+
+@item print @var{expr-list} > @var{file}
+Prints expressions to @var{file}. If @var{file} does not exist, it is
+created. If it does exist, its contents are deleted the first time the
+@code{print} is executed.
+
+@item print @var{expr-list} >> @var{file}
+Prints expressions to @var{file}. The previous contents of @var{file}
+are retained, and the output of @code{print} is appended to the file.
+
+@item print @var{expr-list} | @var{command}
+Prints expressions, sending the output down a pipe to @var{command}.
+The pipeline to the command stays open until the @code{close} function
+is called.
+
+@item printf @var{fmt, expr-list}
+Format and print.
+
+@item printf @var{fmt, expr-list} > file
+Format and print to @var{file}. If @var{file} does not exist, it is
+created. If it does exist, its contents are deleted the first time the
+@code{printf} is executed.
+
+@item printf @var{fmt, expr-list} >> @var{file}
+Format and print to @var{file}. The previous contents of @var{file}
+are retained, and the output of @code{printf} is appended to the file.
+
+@item printf @var{fmt, expr-list} | @var{command}
+Format and print, sending the output down a pipe to @var{command}.
+The pipeline to the command stays open until the @code{close} function
+is called.
+@end table
+
+@code{getline} returns zero on end of file, and @minus{}1 on an error.
+In the event of an error, @code{getline} will set @code{ERRNO} to
+the value of a system-dependent string that describes the error.
+
+@node Printf Summary, Special File Summary, I/O Summary, Actions Summary
+@appendixsubsec @code{printf} Summary
+
+Conversion specification have the form
+@code{%}[@var{flag}][@var{width}][@code{.}@var{prec}]@var{format}.
+@c whew!
+Items in brackets are optional.
+
+The @code{awk} @code{printf} statement and @code{sprintf} function
+accept the following conversion specification formats:
+
+@table @code
+@item %c
+An ASCII character. If the argument used for @samp{%c} is numeric, it is
+treated as a character and printed. Otherwise, the argument is assumed to
+be a string, and the only first character of that string is printed.
+
+@item %d
+@itemx %i
+A decimal number (the integer part).
+
+@item %e
+@itemx %E
+A floating point number of the form
+@samp{@r{[}-@r{]}d.dddddde@r{[}+-@r{]}dd}.
+The @samp{%E} format uses @samp{E} instead of @samp{e}.
+
+@item %f
+A floating point number of the form
+@r{[}@code{-}@r{]}@code{ddd.dddddd}.
+
+@item %g
+@itemx %G
+Use either the @samp{%e} or @samp{%f} formats, whichever produces a shorter
+string, with non-significant zeros suppressed.
+@samp{%G} will use @samp{%E} instead of @samp{%e}.
+
+@item %o
+An unsigned octal number (again, an integer).
+
+@item %s
+A character string.
+
+@item %x
+@itemx %X
+An unsigned hexadecimal number (an integer).
+The @samp{%X} format uses @samp{A} through @samp{F} instead of
+@samp{a} through @samp{f} for decimal 10 through 15.
+
+@item %%
+A single @samp{%} character; no argument is converted.
+@end table
+
+There are optional, additional parameters that may lie between the @samp{%}
+and the control letter:
+
+@table @code
+@item -
+The expression should be left-justified within its field.
+
+@item @var{space}
+For numeric conversions, prefix positive values with a space, and
+negative values with a minus sign.
+
+@item +
+The plus sign, used before the width modifier (see below),
+says to always supply a sign for numeric conversions, even if the data
+to be formatted is positive. The @samp{+} overrides the space modifier.
+
+@item #
+Use an ``alternate form'' for certain control letters.
+For @samp{o}, supply a leading zero.
+For @samp{x}, and @samp{X}, supply a leading @samp{0x} or @samp{0X} for
+a non-zero result.
+For @samp{e}, @samp{E}, and @samp{f}, the result will always contain a
+decimal point.
+For @samp{g}, and @samp{G}, trailing zeros are not removed from the result.
+
+@item 0
+A leading @samp{0} (zero) acts as a flag, that indicates output should be
+padded with zeros instead of spaces.
+This applies even to non-numeric output formats.
+This flag only has an effect when the field width is wider than the
+value to be printed.
+
+@item @var{width}
+The field should be padded to this width. The field is normally padded
+with spaces. If the @samp{0} flag has been used, it is padded with zeros.
+
+@item .@var{prec}
+A number that specifies the precision to use when printing.
+For the @samp{e}, @samp{E}, and @samp{f} formats, this specifies the
+number of digits you want printed to the right of the decimal point.
+For the @samp{g}, and @samp{G} formats, it specifies the maximum number
+of significant digits. For the @samp{d}, @samp{o}, @samp{i}, @samp{u},
+@samp{x}, and @samp{X} formats, it specifies the minimum number of
+digits to print. For the @samp{s} format, it specifies the maximum number of
+characters from the string that should be printed.
+@end table
+
+Either or both of the @var{width} and @var{prec} values may be specified
+as @samp{*}. In that case, the particular value is taken from the argument
+list.
+
+@xref{Printf, ,Using @code{printf} Statements for Fancier Printing}.
+
+@node Special File Summary, Built-in Functions Summary, Printf Summary, Actions Summary
+@appendixsubsec Special File Names
+
+When doing I/O redirection from either @code{print} or @code{printf} into a
+file, or via @code{getline} from a file, @code{gawk} recognizes certain special
+file names internally. These file names allow access to open file descriptors
+inherited from @code{gawk}'s parent process (usually the shell). The
+file names are:
+
+@table @file
+@item /dev/stdin
+The standard input.
+
+@item /dev/stdout
+The standard output.
+
+@item /dev/stderr
+The standard error output.
+
+@item /dev/fd/@var{n}
+The file denoted by the open file descriptor @var{n}.
+@end table
+
+In addition, reading the following files provides process related information
+about the running @code{gawk} program. All returned records are terminated
+with a newline.
+
+@table @file
+@item /dev/pid
+Returns the process ID of the current process.
+
+@item /dev/ppid
+Returns the parent process ID of the current process.
+
+@item /dev/pgrpid
+Returns the process group ID of the current process.
+
+@item /dev/user
+At least four space-separated fields, containing the return values of
+the @code{getuid}, @code{geteuid}, @code{getgid}, and @code{getegid}
+system calls.
+If there are any additional fields, they are the group IDs returned by
+@code{getgroups} system call.
+(Multiple groups may not be supported on all systems.)
+@end table
+
+@noindent
+These file names may also be used on the command line to name data files.
+These file names are only recognized internally if you do not
+actually have files with these names on your system.
+
+@xref{Special Files, ,Special File Names in @code{gawk}}, for a longer description that
+provides the motivation for this feature.
+
+@node Built-in Functions Summary, Time Functions Summary, Special File Summary, Actions Summary
+@appendixsubsec Built-in Functions
+
+@code{awk} provides a number of built-in functions for performing
+numeric operations, string related operations, and I/O related operations.
+
+The built-in arithmetic functions are:
+
+@table @code
+@item atan2(@var{y}, @var{x})
+the arctangent of @var{y/x} in radians.
+
+@item cos(@var{expr})
+the cosine in radians.
+
+@item exp(@var{expr})
+the exponential function (@code{e ^ @var{expr}}).
+
+@item int(@var{expr})
+truncates to integer.
+
+@item log(@var{expr})
+the natural logarithm of @code{expr}.
+
+@item rand()
+a random number between zero and one.
+
+@item sin(@var{expr})
+the sine in radians.
+
+@item sqrt(@var{expr})
+the square root function.
+
+@item srand(@r{[}@var{expr}@r{]})
+use @var{expr} as a new seed for the random number generator. If no @var{expr}
+is provided, the time of day is used. The return value is the previous
+seed for the random number generator.
+@end table
+
+@iftex
+@page
+@end iftex
+@code{awk} has the following built-in string functions:
+
+@table @code
+@item gensub(@var{regex}, @var{subst}, @var{how} @r{[}, @var{target}@r{]})
+If @var{how} is a string beginning with @samp{g} or @samp{G}, then
+replace each match of @var{regex} in @var{target} with @var{subst}.
+Otherwise, replace the @var{how}'th occurrence. If @var{target} is not
+supplied, use @code{$0}. The return value is the changed string; the
+original @var{target} is not modified. Within @var{subst},
+@samp{\@var{n}}, where @var{n} is a digit from one to nine, can be used to
+indicate the text that matched the @var{n}'th parenthesized
+subexpression.
+
+@item gsub(@var{regex}, @var{subst} @r{[}, @var{target}@r{]})
+for each substring matching the regular expression @var{regex} in the string
+@var{target}, substitute the string @var{subst}, and return the number of
+substitutions. If @var{target} is not supplied, use @code{$0}.
+
+@item index(@var{str}, @var{search})
+returns the index of the string @var{search} in the string @var{str}, or
+zero if
+@var{search} is not present.
+
+@item length(@r{[}@var{str}@r{]})
+returns the length of the string @var{str}. The length of @code{$0}
+is returned if no argument is supplied.
+
+@item match(@var{str}, @var{regex})
+returns the position in @var{str} where the regular expression @var{regex}
+occurs, or zero if @var{regex} is not present, and sets the values of
+@code{RSTART} and @code{RLENGTH}.
+
+@item split(@var{str}, @var{arr} @r{[}, @var{regex}@r{]})
+splits the string @var{str} into the array @var{arr} on the regular expression
+@var{regex}, and returns the number of elements. If @var{regex} is omitted,
+@code{FS} is used instead. @var{regex} can be the null string, causing
+each character to be placed into its own array element.
+The array @var{arr} is cleared first.
+
+@item sprintf(@var{fmt}, @var{expr-list})
+prints @var{expr-list} according to @var{fmt}, and returns the resulting string.
+
+@item sub(@var{regex}, @var{subst} @r{[}, @var{target}@r{]})
+just like @code{gsub}, but only the first matching substring is replaced.
+
+@item substr(@var{str}, @var{index} @r{[}, @var{len}@r{]})
+returns the @var{len}-character substring of @var{str} starting at @var{index}.
+If @var{len} is omitted, the rest of @var{str} is used.
+
+@item tolower(@var{str})
+returns a copy of the string @var{str}, with all the upper-case characters in
+@var{str} translated to their corresponding lower-case counterparts.
+Non-alphabetic characters are left unchanged.
+
+@item toupper(@var{str})
+returns a copy of the string @var{str}, with all the lower-case characters in
+@var{str} translated to their corresponding upper-case counterparts.
+Non-alphabetic characters are left unchanged.
+@end table
+
+The I/O related functions are:
+
+@table @code
+@item close(@var{expr})
+Close the open file or pipe denoted by @var{expr}.
+
+@item fflush(@r{[}@var{expr}@r{]})
+Flush any buffered output for the output file or pipe denoted by @var{expr}.
+If @var{expr} is omitted, standard output is flushed.
+If @var{expr} is the null string (@code{""}), all output buffers are flushed.
+
+@item system(@var{cmd-line})
+Execute the command @var{cmd-line}, and return the exit status.
+If your operating system does not support @code{system}, calling it will
+generate a fatal error.
+
+@samp{system("")} can be used to force @code{awk} to flush any pending
+output. This is more portable, but less obvious, than calling @code{fflush}.
+@end table
+
+@node Time Functions Summary, String Constants Summary, Built-in Functions Summary, Actions Summary
+@appendixsubsec Time Functions
+
+The following two functions are available for getting the current
+time of day, and for formatting time stamps.
+
+@table @code
+@item systime()
+returns the current time of day as the number of seconds since a particular
+epoch (Midnight, January 1, 1970 UTC, on POSIX systems).
+
+@item strftime(@r{[}@var{format}@r{[}, @var{timestamp}@r{]]})
+formats @var{timestamp} according to the specification in @var{format}.
+The current time of day is used if no @var{timestamp} is supplied.
+A default format equivalent to the output of the @code{date} utility is used if
+no @var{format} is supplied.
+@xref{Time Functions, ,Functions for Dealing with Time Stamps}, for the
+details on the conversion specifiers that @code{strftime} accepts.
+@end table
+
+@iftex
+@xref{Built-in, ,Built-in Functions}, for a description of all of
+@code{awk}'s built-in functions.
+@end iftex
+
+@node String Constants Summary, , Time Functions Summary, Actions Summary
+@appendixsubsec String Constants
+
+String constants in @code{awk} are sequences of characters enclosed
+in double quotes (@code{"}). Within strings, certain @dfn{escape sequences}
+are recognized, as in C. These are:
+
+@table @code
+@item \\
+A literal backslash.
+
+@item \a
+The ``alert'' character; usually the ASCII BEL character.
+
+@item \b
+Backspace.
+
+@item \f
+Formfeed.
+
+@item \n
+Newline.
+
+@item \r
+Carriage return.
+
+@item \t
+Horizontal tab.
+
+@item \v
+Vertical tab.
+
+@item \x@var{hex digits}
+The character represented by the string of hexadecimal digits following
+the @samp{\x}. As in ANSI C, all following hexadecimal digits are
+considered part of the escape sequence. E.g., @code{"\x1B"} is a
+string containing the ASCII ESC (escape) character. (The @samp{\x}
+escape sequence is not in POSIX @code{awk}.)
+
+@item \@var{ddd}
+The character represented by the one, two, or three digit sequence of octal
+digits. Thus, @code{"\033"} is also a string containing the ASCII ESC
+(escape) character.
+
+@item \@var{c}
+The literal character @var{c}, if @var{c} is not one of the above.
+@end table
+
+The escape sequences may also be used inside constant regular expressions
+(e.g., the regexp @code{@w{/[@ \t\f\n\r\v]/}} matches whitespace
+characters).
+
+@xref{Escape Sequences}.
+
+@node Functions Summary, Historical Features, Actions Summary, Gawk Summary
+@appendixsec User-defined Functions
+
+Functions in @code{awk} are defined as follows:
+
+@example
+function @var{name}(@var{parameter list}) @{ @var{statements} @}
+@end example
+
+Actual parameters supplied in the function call are used to instantiate
+the formal parameters declared in the function. Arrays are passed by
+reference, other variables are passed by value.
+
+If there are fewer arguments passed than there are names in @var{parameter-list},
+the extra names are given the null string as their value. Extra names have the
+effect of local variables.
+
+The open-parenthesis in a function call of a user-defined function must
+immediately follow the function name, without any intervening white space.
+This is to avoid a syntactic ambiguity with the concatenation operator.
+
+The word @code{func} may be used in place of @code{function} (but not in
+POSIX @code{awk}).
+
+Use the @code{return} statement to return a value from a function.
+
+@xref{User-defined, ,User-defined Functions}.
+
+@node Historical Features, , Functions Summary, Gawk Summary
+@appendixsec Historical Features
+
+@cindex historical features
+There are two features of historical @code{awk} implementations that
+@code{gawk} supports.
+
+First, it is possible to call the @code{length} built-in function not only
+with no arguments, but even without parentheses!
+
+@example
+a = length
+@end example
+
+@noindent
+is the same as either of
+
+@example
+a = length()
+a = length($0)
+@end example
+
+@noindent
+For example:
+
+@example
+$ echo abcdef | awk '@{ print length @}'
+@print{} 6
+@end example
+
+@noindent
+This feature is marked as ``deprecated'' in the POSIX standard, and
+@code{gawk} will issue a warning about its use if @samp{--lint} is
+specified on the command line.
+(The ability to use @code{length} this way was actually an accident of the
+original Unix @code{awk} implementation. If any built-in function used
+@code{$0} as its default argument, it was possible to call that function
+without the parentheses. In particular, it was common practice to use
+the @code{length} function in this fashion, and this usage was documented
+in the @code{awk} manual page.)
+
+The other historical feature is the use of either the @code{break} statement,
+or the @code{continue} statement
+outside the body of a @code{while}, @code{for}, or @code{do} loop. Traditional
+@code{awk} implementations have treated such usage as equivalent to the
+@code{next} statement. More recent versions of Unix @code{awk} do not allow
+it. @code{gawk} supports this usage if @samp{--traditional} has been
+specified.
+
+@xref{Options, ,Command Line Options}, for more information about the
+@samp{--posix} and @samp{--lint} options.
+
+@node Installation, Notes, Gawk Summary, Top
+@appendix Installing @code{gawk}
+
+This appendix provides instructions for installing @code{gawk} on the
+various platforms that are supported by the developers. The primary
+developers support Unix (and one day, GNU), while the other ports were
+contributed. The file @file{ACKNOWLEDGMENT} in the @code{gawk}
+distribution lists the electronic mail addresses of the people who did
+the respective ports, and they are also provided in
+@ref{Bugs, , Reporting Problems and Bugs}.
+
+@menu
+* Gawk Distribution:: What is in the @code{gawk} distribution.
+* Unix Installation:: Installing @code{gawk} under various versions
+ of Unix.
+* VMS Installation:: Installing @code{gawk} on VMS.
+* PC Installation:: Installing and Compiling @code{gawk} on MS-DOS
+ and OS/2
+* Atari Installation:: Installing @code{gawk} on the Atari ST.
+* Amiga Installation:: Installing @code{gawk} on an Amiga.
+* Bugs:: Reporting Problems and Bugs.
+* Other Versions:: Other freely available @code{awk}
+ implementations.
+@end menu
+
+@node Gawk Distribution, Unix Installation, Installation, Installation
+@appendixsec The @code{gawk} Distribution
+
+This section first describes how to get the @code{gawk}
+distribution, how to extract it, and then what is in the various files and
+subdirectories.
+
+@menu
+* Getting:: How to get the distribution.
+* Extracting:: How to extract the distribution.
+* Distribution contents:: What is in the distribution.
+@end menu
+
+@node Getting, Extracting, Gawk Distribution, Gawk Distribution
+@appendixsubsec Getting the @code{gawk} Distribution
+@cindex getting @code{gawk}
+@cindex anonymous @code{ftp}
+@cindex @code{ftp}, anonymous
+@cindex Free Software Foundation
+There are three ways you can get GNU software.
+
+@enumerate
+@item
+You can copy it from someone else who already has it.
+
+@cindex Free Software Foundation
+@item
+You can order @code{gawk} directly from the Free Software Foundation.
+Software distributions are available for Unix, MS-DOS, and VMS, on
+tape, CD-ROM, or floppies (MS-DOS only). The address is:
+
+@quotation
+Free Software Foundation @*
+59 Temple Place---Suite 330 @*
+Boston, MA 02111-1307 USA @*
+Phone: +1-617-542-5942 @*
+Fax (including Japan): +1-617-542-2652 @*
+E-mail: @code{gnu@@prep.ai.mit.edu} @*
+@end quotation
+
+@noindent
+Ordering from the FSF directly contributes to the support of the foundation
+and to the production of more free software.
+
+@item
+You can get @code{gawk} by using anonymous @code{ftp} to the Internet host
+@code{ftp.gnu.ai.mit.edu}, in the directory @file{/pub/gnu}.
+
+Here is a list of alternate @code{ftp} sites from which you can obtain GNU
+software. When a site is listed as ``@var{site}@code{:}@var{directory}'' the
+@var{directory} indicates the directory where GNU software is kept.
+You should use a site that is geographically close to you.
+
+@table @asis
+@item Asia:
+@table @code
+@item cair-archive.kaist.ac.kr:/pub/gnu
+@itemx ftp.cs.titech.ac.jp
+@itemx ftp.nectec.or.th:/pub/mirrors/gnu
+@itemx utsun.s.u-tokyo.ac.jp:/ftpsync/prep
+@end table
+
+@item Australia:
+@table @code
+@item archie.au:/gnu
+(@code{archie.oz} or @code{archie.oz.au} for ACSnet)
+@end table
+
+@item Africa:
+@table @code
+@item ftp.sun.ac.za:/pub/gnu
+@end table
+
+@item Middle East:
+@table @code
+@item ftp.technion.ac.il:/pub/unsupported/gnu
+@end table
+
+@item Europe:
+@table @code
+@item archive.eu.net
+@itemx ftp.denet.dk
+@itemx ftp.eunet.ch
+@itemx ftp.funet.fi:/pub/gnu
+@itemx ftp.ieunet.ie:pub/gnu
+@itemx ftp.informatik.rwth-aachen.de:/pub/gnu
+@itemx ftp.informatik.tu-muenchen.de
+@itemx ftp.luth.se:/pub/unix/gnu
+@itemx ftp.mcc.ac.uk
+@itemx ftp.stacken.kth.se
+@itemx ftp.sunet.se:/pub/gnu
+@itemx ftp.univ-lyon1.fr:pub/gnu
+@itemx ftp.win.tue.nl:/pub/gnu
+@itemx irisa.irisa.fr:/pub/gnu
+@itemx isy.liu.se
+@itemx nic.switch.ch:/mirror/gnu
+@itemx src.doc.ic.ac.uk:/gnu
+@itemx unix.hensa.ac.uk:/pub/uunet/systems/gnu
+@end table
+
+@item South America:
+@table @code
+@item ftp.inf.utfsm.cl:/pub/gnu
+@itemx ftp.unicamp.br:/pub/gnu
+@end table
+
+@item Western Canada:
+@table @code
+@item ftp.cs.ubc.ca:/mirror2/gnu
+@end table
+
+@item USA:
+@table @code
+@item col.hp.com:/mirrors/gnu
+@itemx f.ms.uky.edu:/pub3/gnu
+@itemx ftp.cc.gatech.edu:/pub/gnu
+@itemx ftp.cs.columbia.edu:/archives/gnu/prep
+@itemx ftp.digex.net:/pub/gnu
+@itemx ftp.hawaii.edu:/mirrors/gnu
+@itemx ftp.kpc.com:/pub/mirror/gnu
+@end table
+
+@iftex
+@page
+@end iftex
+@item USA (continued):
+@table @code
+@itemx ftp.uu.net:/systems/gnu
+@itemx gatekeeper.dec.com:/pub/GNU
+@itemx jaguar.utah.edu:/gnustuff
+@itemx labrea.stanford.edu
+@itemx mrcnext.cso.uiuc.edu:/pub/gnu
+@itemx vixen.cso.uiuc.edu:/gnu
+@itemx wuarchive.wustl.edu:/systems/gnu
+@end table
+@end table
+@end enumerate
+
+@node Extracting, Distribution contents, Getting, Gawk Distribution
+@appendixsubsec Extracting the Distribution
+@code{gawk} is distributed as a @code{tar} file compressed with the
+GNU Zip program, @code{gzip}.
+
+Once you have the distribution (for example,
+@file{gawk-@value{VERSION}.0.tar.gz}), first use @code{gzip} to expand the
+file, and then use @code{tar} to extract it. You can use the following
+pipeline to produce the @code{gawk} distribution:
+
+@example
+# Under System V, add 'o' to the tar flags
+gzip -d -c gawk-@value{VERSION}.0.tar.gz | tar -xvpf -
+@end example
+
+@noindent
+This will create a directory named @file{gawk-@value{VERSION}.0} in the current
+directory.
+
+The distribution file name is of the form
+@file{gawk-@var{V}.@var{R}.@var{n}.tar.gz}.
+The @var{V} represents the major version of @code{gawk},
+the @var{R} represents the current release of version @var{V}, and
+the @var{n} represents a @dfn{patch level}, meaning that minor bugs have
+been fixed in the release. The current patch level is 0, but when
+retrieving distributions, you should get the version with the highest
+version, release, and patch level. (Note that release levels greater than
+or equal to 90 denote ``beta,'' or non-production software; you may not wish
+to retrieve such a version unless you don't mind experimenting.)
+
+If you are not on a Unix system, you will need to make other arrangements
+for getting and extracting the @code{gawk} distribution. You should consult
+a local expert.
+
+@node Distribution contents, , Extracting, Gawk Distribution
+@appendixsubsec Contents of the @code{gawk} Distribution
+
+The @code{gawk} distribution has a number of C source files,
+documentation files,
+subdirectories and files related to the configuration process
+(@pxref{Unix Installation, ,Compiling and Installing @code{gawk} on Unix}),
+and several subdirectories related to different, non-Unix,
+operating systems.
+
+@table @asis
+@item various @samp{.c}, @samp{.y}, and @samp{.h} files
+These files are the actual @code{gawk} source code.
+@end table
+
+@iftex
+@page
+@end iftex
+@table @file
+@item README
+@itemx README_d/README.*
+Descriptive files: @file{README} for @code{gawk} under Unix, and the
+rest for the various hardware and software combinations.
+
+@item INSTALL
+A file providing an overview of the configuration and installation process.
+
+@item PORTS
+A list of systems to which @code{gawk} has been ported, and which
+have successfully run the test suite.
+
+@item ACKNOWLEDGMENT
+A list of the people who contributed major parts of the code or documentation.
+
+@item ChangeLog
+A detailed list of source code changes as bugs are fixed or improvements made.
+
+@item NEWS
+A list of changes to @code{gawk} since the last release or patch.
+
+@item COPYING
+The GNU General Public License.
+
+@item FUTURES
+A brief list of features and/or changes being contemplated for future
+releases, with some indication of the time frame for the feature, based
+on its difficulty.
+
+@item LIMITATIONS
+A list of those factors that limit @code{gawk}'s performance.
+Most of these depend on the hardware or operating system software, and
+are not limits in @code{gawk} itself.
+
+@item POSIX.STD
+A description of one area where the POSIX standard for @code{awk} is
+incorrect, and how @code{gawk} handles the problem.
+
+@item PROBLEMS
+A file describing known problems with the current release.
+
+@item doc/gawk.1
+The @code{troff} source for a manual page describing @code{gawk}.
+This is distributed for the convenience of Unix users.
+
+@item doc/gawk.texi
+The Texinfo source file for this @value{DOCUMENT}.
+It should be processed with @TeX{} to produce a printed document, and
+with @code{makeinfo} to produce an Info file.
+
+@item doc/gawk.info
+The generated Info file for this @value{DOCUMENT}.
+
+@item doc/igawk.1
+The @code{troff} source for a manual page describing the @code{igawk}
+program presented in
+@ref{Igawk Program, ,An Easy Way to Use Library Functions}.
+
+@item doc/Makefile.in
+The input file used during the configuration process to generate the
+actual @file{Makefile} for creating the documentation.
+
+@item Makefile.in
+@itemx acconfig.h
+@itemx aclocal.m4
+@itemx configh.in
+@itemx configure.in
+@itemx configure
+@itemx custom.h
+@itemx missing/*
+These files and subdirectory are used when configuring @code{gawk}
+for various Unix systems. They are explained in detail in
+@ref{Unix Installation, ,Compiling and Installing @code{gawk} on Unix}.
+
+@item awklib/extract.awk
+@itemx awklib/Makefile.in
+The @file{awklib} directory contains a copy of @file{extract.awk}
+(@pxref{Extract Program, ,Extracting Programs from Texinfo Source Files}),
+which can be used to extract the sample programs from the Texinfo
+source file for this @value{DOCUMENT}, and a @file{Makefile.in} file, which
+@code{configure} uses to generate a @file{Makefile}.
+As part of the process of building @code{gawk}, the library functions from
+@ref{Library Functions, , A Library of @code{awk} Functions},
+and the @code{igawk} program from
+@ref{Igawk Program, , An Easy Way to Use Library Functions},
+are extracted into ready to use files.
+They are installed as part of the installation process.
+
+@item amiga/*
+Files needed for building @code{gawk} on an Amiga.
+@xref{Amiga Installation, ,Installing @code{gawk} on an Amiga}, for details.
+
+@item atari/*
+Files needed for building @code{gawk} on an Atari ST.
+@xref{Atari Installation, ,Installing @code{gawk} on the Atari ST}, for details.
+
+@item pc/*
+Files needed for building @code{gawk} under MS-DOS and OS/2.
+@xref{PC Installation, ,MS-DOS and OS/2 Installation and Compilation}, for details.
+
+@item vms/*
+Files needed for building @code{gawk} under VMS.
+@xref{VMS Installation, ,How to Compile and Install @code{gawk} on VMS}, for details.
+
+@item test/*
+A test suite for
+@code{gawk}. You can use @samp{make check} from the top level @code{gawk}
+directory to run your version of @code{gawk} against the test suite.
+If @code{gawk} successfully passes @samp{make check} then you can
+be confident of a successful port.
+@end table
+
+@node Unix Installation, VMS Installation, Gawk Distribution, Installation
+@appendixsec Compiling and Installing @code{gawk} on Unix
+
+Usually, you can compile and install @code{gawk} by typing only two
+commands. However, if you do use an unusual system, you may need
+to configure @code{gawk} for your system yourself.
+
+@menu
+* Quick Installation:: Compiling @code{gawk} under Unix.
+* Configuration Philosophy:: How it's all supposed to work.
+@end menu
+
+@node Quick Installation, Configuration Philosophy, Unix Installation, Unix Installation
+@appendixsubsec Compiling @code{gawk} for Unix
+
+@cindex installation, unix
+After you have extracted the @code{gawk} distribution, @code{cd}
+to @file{gawk-@value{VERSION}.0}. Like most GNU software,
+@code{gawk} is configured
+automatically for your Unix system by running the @code{configure} program.
+This program is a Bourne shell script that was generated automatically using
+GNU @code{autoconf}.
+@iftex
+(The @code{autoconf} software is
+described fully in
+@cite{Autoconf---Generating Automatic Configuration Scripts},
+which is available from the Free Software Foundation.)
+@end iftex
+@ifinfo
+(The @code{autoconf} software is described fully starting with
+@ref{Top, , Introduction, autoconf, Autoconf---Generating Automatic Configuration Scripts}.)
+@end ifinfo
+
+To configure @code{gawk}, simply run @code{configure}:
+
+@example
+sh ./configure
+@end example
+
+This produces a @file{Makefile} and @file{config.h} tailored to your system.
+The @file{config.h} file describes various facts about your system.
+You may wish to edit the @file{Makefile} to
+change the @code{CFLAGS} variable, which controls
+the command line options that are passed to the C compiler (such as
+optimization levels, or compiling for debugging).
+
+Alternatively, you can add your own values for most @code{make}
+variables, such as @code{CC} and @code{CFLAGS}, on the command line when
+running @code{configure}:
+
+@example
+CC=cc CFLAGS=-g sh ./configure
+@end example
+
+@noindent
+See the file @file{INSTALL} in the @code{gawk} distribution for
+all the details.
+
+After you have run @code{configure}, and possibly edited the @file{Makefile},
+type:
+
+@example
+make
+@end example
+
+@noindent
+and shortly thereafter, you should have an executable version of @code{gawk}.
+That's all there is to it!
+(If these steps do not work, please send in a bug report;
+@pxref{Bugs, ,Reporting Problems and Bugs}.)
+
+@node Configuration Philosophy, , Quick Installation, Unix Installation
+@appendixsubsec The Configuration Process
+
+@cindex configuring @code{gawk}
+(This section is of interest only if you know something about using the
+C language and the Unix operating system.)
+
+The source code for @code{gawk} generally attempts to adhere to formal
+standards wherever possible. This means that @code{gawk} uses library
+routines that are specified by the ANSI C standard and by the POSIX
+operating system interface standard. When using an ANSI C compiler,
+function prototypes are used to help improve the compile-time checking.
+
+Many Unix systems do not support all of either the ANSI or the
+POSIX standards. The @file{missing} subdirectory in the @code{gawk}
+distribution contains replacement versions of those subroutines that are
+most likely to be missing.
+
+The @file{config.h} file that is created by the @code{configure} program
+contains definitions that describe features of the particular operating
+system where you are attempting to compile @code{gawk}. The three things
+described by this file are what header files are available, so that
+they can be correctly included,
+what (supposedly) standard functions are actually available in your C
+libraries, and
+other miscellaneous facts about your
+variant of Unix. For example, there may not be an @code{st_blksize}
+element in the @code{stat} structure. In this case @samp{HAVE_ST_BLKSIZE}
+would be undefined.
+
+@cindex @code{custom.h} configuration file
+It is possible for your C compiler to lie to @code{configure}. It may
+do so by not exiting with an error when a library function is not
+available. To get around this, you can edit the file @file{custom.h}.
+Use an @samp{#ifdef} that is appropriate for your system, and either
+@code{#define} any constants that @code{configure} should have defined but
+didn't, or @code{#undef} any constants that @code{configure} defined and
+should not have. @file{custom.h} is automatically included by
+@file{config.h}.
+
+It is also possible that the @code{configure} program generated by
+@code{autoconf}
+will not work on your system in some other fashion. If you do have a problem,
+the file
+@file{configure.in} is the input for @code{autoconf}. You may be able to
+change this file, and generate a new version of @code{configure} that will
+work on your system. @xref{Bugs, ,Reporting Problems and Bugs}, for
+information on how to report problems in configuring @code{gawk}. The same
+mechanism may be used to send in updates to @file{configure.in} and/or
+@file{custom.h}.
+
+@node VMS Installation, PC Installation, Unix Installation, Installation
+@appendixsec How to Compile and Install @code{gawk} on VMS
+
+@c based on material from Pat Rankin <rankin@eql.caltech.edu>
+
+@cindex installation, vms
+This section describes how to compile and install @code{gawk} under VMS.
+
+@menu
+* VMS Compilation:: How to compile @code{gawk} under VMS.
+* VMS Installation Details:: How to install @code{gawk} under VMS.
+* VMS Running:: How to run @code{gawk} under VMS.
+* VMS POSIX:: Alternate instructions for VMS POSIX.
+@end menu
+
+@node VMS Compilation, VMS Installation Details, VMS Installation, VMS Installation
+@appendixsubsec Compiling @code{gawk} on VMS
+
+To compile @code{gawk} under VMS, there is a @code{DCL} command procedure that
+will issue all the necessary @code{CC} and @code{LINK} commands, and there is
+also a @file{Makefile} for use with the @code{MMS} utility. From the source
+directory, use either
+
+@example
+$ @@[.VMS]VMSBUILD.COM
+@end example
+
+@noindent
+or
+
+@example
+$ MMS/DESCRIPTION=[.VMS]DESCRIP.MMS GAWK
+@end example
+
+Depending upon which C compiler you are using, follow one of the sets
+of instructions in this table:
+
+@table @asis
+@item VAX C V3.x
+Use either @file{vmsbuild.com} or @file{descrip.mms} as is. These use
+@code{CC/OPTIMIZE=NOLINE}, which is essential for Version 3.0.
+
+@item VAX C V2.x
+You must have Version 2.3 or 2.4; older ones won't work. Edit either
+@file{vmsbuild.com} or @file{descrip.mms} according to the comments in them.
+For @file{vmsbuild.com}, this just entails removing two @samp{!} delimiters.
+Also edit @file{config.h} (which is a copy of file @file{[.config]vms-conf.h})
+and comment out or delete the two lines @samp{#define __STDC__ 0} and
+@samp{#define VAXC_BUILTINS} near the end.
+
+@item GNU C
+Edit @file{vmsbuild.com} or @file{descrip.mms}; the changes are different
+from those for VAX C V2.x, but equally straightforward. No changes to
+@file{config.h} should be needed.
+
+@item DEC C
+Edit @file{vmsbuild.com} or @file{descrip.mms} according to their comments.
+No changes to @file{config.h} should be needed.
+@end table
+
+@code{gawk} has been tested under VAX/VMS 5.5-1 using VAX C V3.2,
+GNU C 1.40 and 2.3. It should work without modifications for VMS V4.6 and up.
+
+@node VMS Installation Details, VMS Running, VMS Compilation, VMS Installation
+@appendixsubsec Installing @code{gawk} on VMS
+
+To install @code{gawk}, all you need is a ``foreign'' command, which is
+a @code{DCL} symbol whose value begins with a dollar sign. For example:
+
+@example
+$ GAWK :== $disk1:[gnubin]GAWK
+@end example
+
+@noindent
+(Substitute the actual location of @code{gawk.exe} for
+@samp{$disk1:[gnubin]}.) The symbol should be placed in the
+@file{login.com} of any user who wishes to run @code{gawk},
+so that it will be defined every time the user logs on.
+Alternatively, the symbol may be placed in the system-wide
+@file{sylogin.com} procedure, which will allow all users
+to run @code{gawk}.
+
+Optionally, the help entry can be loaded into a VMS help library:
+
+@example
+$ LIBRARY/HELP SYS$HELP:HELPLIB [.VMS]GAWK.HLP
+@end example
+
+@noindent
+(You may want to substitute a site-specific help library rather than
+the standard VMS library @samp{HELPLIB}.) After loading the help text,
+
+@example
+$ HELP GAWK
+@end example
+
+@noindent
+will provide information about both the @code{gawk} implementation and the
+@code{awk} programming language.
+
+The logical name @samp{AWK_LIBRARY} can designate a default location
+for @code{awk} program files. For the @samp{-f} option, if the specified
+filename has no device or directory path information in it, @code{gawk}
+will look in the current directory first, then in the directory specified
+by the translation of @samp{AWK_LIBRARY} if the file was not found.
+If after searching in both directories, the file still is not found,
+then @code{gawk} appends the suffix @samp{.awk} to the filename and the
+file search will be re-tried. If @samp{AWK_LIBRARY} is not defined, that
+portion of the file search will fail benignly.
+
+@node VMS Running, VMS POSIX, VMS Installation Details, VMS Installation
+@appendixsubsec Running @code{gawk} on VMS
+
+Command line parsing and quoting conventions are significantly different
+on VMS, so examples in this @value{DOCUMENT} or from other sources often need minor
+changes. They @emph{are} minor though, and all @code{awk} programs
+should run correctly.
+
+Here are a couple of trivial tests:
+
+@example
+$ gawk -- "BEGIN @{print ""Hello, World!""@}"
+$ gawk -"W" version
+! could also be -"W version" or "-W version"
+@end example
+
+@noindent
+Note that upper-case and mixed-case text must be quoted.
+
+The VMS port of @code{gawk} includes a @code{DCL}-style interface in addition
+to the original shell-style interface (see the help entry for details).
+One side-effect of dual command line parsing is that if there is only a
+single parameter (as in the quoted string program above), the command
+becomes ambiguous. To work around this, the normally optional @samp{--}
+flag is required to force Unix style rather than @code{DCL} parsing. If any
+other dash-type options (or multiple parameters such as data files to be
+processed) are present, there is no ambiguity and @samp{--} can be omitted.
+
+The default search path when looking for @code{awk} program files specified
+by the @samp{-f} option is @code{"SYS$DISK:[],AWK_LIBRARY:"}. The logical
+name @samp{AWKPATH} can be used to override this default. The format
+of @samp{AWKPATH} is a comma-separated list of directory specifications.
+When defining it, the value should be quoted so that it retains a single
+translation, and not a multi-translation @code{RMS} searchlist.
+
+@node VMS POSIX, , VMS Running, VMS Installation
+@appendixsubsec Building and Using @code{gawk} on VMS POSIX
+
+Ignore the instructions above, although @file{vms/gawk.hlp} should still
+be made available in a help library. Make sure that the @code{configure}
+script is executable; use @samp{chmod +x}
+on it if necessary. Then execute the following commands:
+
+@example
+@group
+$ POSIX
+psx> CC=vms/posix-cc.sh configure
+psx> CC=c89 make gawk
+@end group
+@end example
+
+@noindent
+The first command will construct files @file{config.h} and @file{Makefile}
+out of templates. The second command will compile and link @code{gawk}.
+@ignore
+Due to a @code{make} bug in VMS POSIX V1.0 and V1.1,
+the file @file{awktab.c} must be given as an explicit target or it will
+not be built and the final link step will fail.
+@end ignore
+Ignore the warning
+@code{"Could not find lib m in lib list"}; it is harmless, caused by the
+explicit use of @samp{-lm} as a linker option which is not needed
+under VMS POSIX. Under V1.1 (but not V1.0) a problem with the @code{yacc}
+skeleton @file{/etc/yyparse.c} will cause a compiler warning for
+@file{awktab.c}, followed by a linker warning about compilation warnings
+in the resulting object module. These warnings can be ignored.
+
+Once built, @code{gawk} will work like any other shell utility. Unlike
+the normal VMS port of @code{gawk}, no special command line manipulation is
+needed in the VMS POSIX environment.
+
+@c Rewritten by Scott Deifik <scottd@amgen.com>
+@c and Darrel Hankerson <hankedr@mail.auburn.edu>
+@node PC Installation, Atari Installation, VMS Installation, Installation
+@appendixsec MS-DOS and OS/2 Installation and Compilation
+
+@cindex installation, MS-DOS and OS/2
+If you have received a binary distribution prepared by the DOS
+maintainers, then @code{gawk} and the necessary support files will appear
+under the @file{gnu} directory, with executables in @file{gnu/bin},
+libraries in @file{gnu/lib/awk}, and manual pages under @file{gnu/man}.
+This is designed for easy installation to a @file{/gnu} directory on your
+drive, but the files can be installed anywhere provided @code{AWKPATH} is
+set properly. Regardless of the installation directory, the first line of
+@file{igawk.cmd} and @file{igawk.bat} (in @file{gnu/bin}) may need to be
+edited.
+
+The binary distribution will contain a separate file describing the
+contents. In particular, it may include more than one version of the
+@code{gawk} executable. OS/2 binary distributions may have a
+different arrangement, but installation is similar.
+
+The OS/2 and MS-DOS versions of @code{gawk} search for program files as
+described in @ref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable}.
+However, semicolons (rather than colons) separate elements
+in the @code{AWKPATH} variable. If @code{AWKPATH} is not set or is empty,
+then the default search path is @code{@w{".;c:/lib/awk;c:/gnu/lib/awk"}}.
+
+An @code{sh}-like shell (as opposed to @code{command.com} under MS-DOS
+or @code{cmd.exe} under OS/2) may be useful for @code{awk} programming.
+Ian Stewartson has written an excellent shell for MS-DOS and OS/2, and a
+@code{ksh} clone and GNU Bash are available for OS/2. The file
+@file{README_d/README.pc} in the @code{gawk} distribution contains
+information on these shells. Users of Stewartson's shell on DOS should
+examine its documentation on handling of command-lines. In particular,
+the setting for @code{gawk} in the shell configuration may need to be
+changed, and the @code{ignoretype} option may also be of interest.
+
+@code{gawk} can be compiled for MS-DOS and OS/2 using the GNU development tools
+from DJ Delorie (DJGPP, MS-DOS-only) or Eberhard Mattes (EMX, MS-DOS and OS/2).
+Microsoft C can be used to build 16-bit versions for MS-DOS and OS/2. The file
+@file{README_d/README.pc} in the @code{gawk} distribution contains additional
+notes, and @file{pc/Makefile} contains important notes on compilation options.
+
+To build @code{gawk}, copy the files in the @file{pc} directory to the
+directory with the rest of the @code{gawk} sources. The @file{Makefile}
+contains a configuration section with comments, and may need to be
+edited in order to work with your @code{make} utility.
+
+The @file{Makefile} contains a number of targets for building various MS-DOS
+and OS/2 versions. A list of targets will be printed if the @code{make}
+command is given without a target. As an example, to build @code{gawk}
+using the DJGPP tools, enter @samp{make djgpp}.
+
+Using @code{make} to run the standard tests and to install @code{gawk}
+requires additional Unix-like tools, including @code{sh}, @code{sed}, and
+@code{cp}. In order to run the tests, the @file{test/*.ok} files may need to
+be converted so that they have the usual DOS-style end-of-line markers. Most
+of the tests will work properly with Stewartson's shell along with the
+companion utilities or appropriate GNU utilities. However, some editing of
+@file{test/Makefile} is required. It is recommended that the file
+@file{pc/Makefile.tst} be copied to @file{test/Makefile} as a
+replacement. Details can be found in @file{README_d/README.pc}.
+
+@node Atari Installation, Amiga Installation, PC Installation, Installation
+@appendixsec Installing @code{gawk} on the Atari ST
+
+@c based on material from Michal Jaegermann <michal@gortel.phys.ualberta.ca>
+
+@cindex atari
+@cindex installation, atari
+There are no substantial differences when installing @code{gawk} on
+various Atari models. Compiled @code{gawk} executables do not require
+a large amount of memory with most @code{awk} programs and should run on all
+Motorola processor based models (called further ST, even if that is not
+exactly right).
+
+In order to use @code{gawk}, you need to have a shell, either text or
+graphics, that does not map all the characters of a command line to
+upper-case. Maintaining case distinction in option flags is very
+important (@pxref{Options, ,Command Line Options}).
+These days this is the default, and it may only be a problem for some
+very old machines. If your system does not preserve the case of option
+flags, you will need to upgrade your tools. Support for I/O
+redirection is necessary to make it easy to import @code{awk} programs
+from other environments. Pipes are nice to have, but not vital.
+
+@menu
+* Atari Compiling:: Compiling @code{gawk} on Atari
+* Atari Using:: Running @code{gawk} on Atari
+@end menu
+
+@node Atari Compiling, Atari Using, Atari Installation, Atari Installation
+@appendixsubsec Compiling @code{gawk} on the Atari ST
+
+A proper compilation of @code{gawk} sources when @code{sizeof(int)}
+differs from @code{sizeof(void *)} requires an ANSI C compiler. An initial
+port was done with @code{gcc}. You may actually prefer executables
+where @code{int}s are four bytes wide, but the other variant works as well.
+
+You may need quite a bit of memory when trying to recompile the @code{gawk}
+sources, as some source files (@file{regex.c} in particular) are quite
+big. If you run out of memory compiling such a file, try reducing the
+optimization level for this particular file; this may help.
+
+@cindex Linux
+With a reasonable shell (Bash will do), and in particular if you run
+Linux, MiNT or a similar operating system, you have a pretty good
+chance that the @code{configure} utility will succeed. Otherwise
+sample versions of @file{config.h} and @file{Makefile.st} are given in the
+@file{atari} subdirectory and can be edited and copied to the
+corresponding files in the main source directory. Even if
+@code{configure} produced something, it might be advisable to compare
+its results with the sample versions and possibly make adjustments.
+
+Some @code{gawk} source code fragments depend on a preprocessor define
+@samp{atarist}. This basically assumes the TOS environment with @code{gcc}.
+Modify these sections as appropriate if they are not right for your
+environment. Also see the remarks about @code{AWKPATH} and @code{envsep} in
+@ref{Atari Using, ,Running @code{gawk} on the Atari ST}.
+
+As shipped, the sample @file{config.h} claims that the @code{system}
+function is missing from the libraries, which is not true, and an
+alternative implementation of this function is provided in
+@file{atari/system.c}. Depending upon your particular combination of
+shell and operating system, you may wish to change the file to indicate
+that @code{system} is available.
+
+@node Atari Using, , Atari Compiling, Atari Installation
+@appendixsubsec Running @code{gawk} on the Atari ST
+
+An executable version of @code{gawk} should be placed, as usual,
+anywhere in your @code{PATH} where your shell can find it.
+
+While executing, @code{gawk} creates a number of temporary files. When
+using @code{gcc} libraries for TOS, @code{gawk} looks for either of
+the environment variables @code{TEMP} or @code{TMPDIR}, in that order.
+If either one is found, its value is assumed to be a directory for
+temporary files. This directory must exist, and if you can spare the
+memory, it is a good idea to put it on a RAM drive. If neither
+@code{TEMP} nor @code{TMPDIR} are found, then @code{gawk} uses the
+current directory for its temporary files.
+
+The ST version of @code{gawk} searches for its program files as described in
+@ref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable}.
+The default value for the @code{AWKPATH} variable is taken from
+@code{DEFPATH} defined in @file{Makefile}. The sample @code{gcc}/TOS
+@file{Makefile} for the ST in the distribution sets @code{DEFPATH} to
+@code{@w{".,c:\lib\awk,c:\gnu\lib\awk"}}. The search path can be
+modified by explicitly setting @code{AWKPATH} to whatever you wish.
+Note that colons cannot be used on the ST to separate elements in the
+@code{AWKPATH} variable, since they have another, reserved, meaning.
+Instead, you must use a comma to separate elements in the path. When
+recompiling, the separating character can be modified by initializing
+the @code{envsep} variable in @file{atari/gawkmisc.atr} to another
+value.
+
+Although @code{awk} allows great flexibility in doing I/O redirections
+from within a program, this facility should be used with care on the ST
+running under TOS. In some circumstances the OS routines for file
+handle pool processing lose track of certain events, causing the
+computer to crash, and requiring a reboot. Often a warm reboot is
+sufficient. Fortunately, this happens infrequently, and in rather
+esoteric situations. In particular, avoid having one part of an
+@code{awk} program using @code{print} statements explicitly redirected
+to @code{"/dev/stdout"}, while other @code{print} statements use the
+default standard output, and a calling shell has redirected standard
+output to a file.
+
+When @code{gawk} is compiled with the ST version of @code{gcc} and its
+usual libraries, it will accept both @samp{/} and @samp{\} as path separators.
+While this is convenient, it should be remembered that this removes one,
+technically valid, character (@samp{/}) from your file names, and that
+it may create problems for external programs, called via the @code{system}
+function, which may not support this convention. Whenever it is possible
+that a file created by @code{gawk} will be used by some other program,
+use only backslashes. Also remember that in @code{awk}, backslashes in
+strings have to be doubled in order to get literal backslashes
+(@pxref{Escape Sequences}).
+
+@node Amiga Installation, Bugs, Atari Installation, Installation
+@appendixsec Installing @code{gawk} on an Amiga
+
+@cindex amiga
+@cindex installation, amiga
+You can install @code{gawk} on an Amiga system using a Unix emulation
+environment available via anonymous @code{ftp} from
+@code{wuarchive.wustl.edu} in the directory @file{pub/aminet/dev/gcc}.
+This includes a shell based on @code{pdksh}. The primary component of
+this environment is a Unix emulation library, @file{ixemul.lib}.
+@c could really use more background here, who wrote this, etc.
+
+A more complete distribution for the Amiga is available on
+the FreshFish CD-ROM from:
+
+@quotation
+Amiga Library Services @*
+610 North Alma School Road, Suite 18 @*
+Chandler, AZ 85224 USA @*
+Phone: +1-602-491-0048 @*
+FAX: +1-602-491-0048 @*
+E-mail: @code{orders@@amigalib.com}
+@end quotation
+
+Once you have the distribution, you can configure @code{gawk} simply by
+running @code{configure}:
+
+@example
+configure -v m68k-cbm-amigados
+@end example
+
+Then run @code{make}, and you should be all set!
+(If these steps do not work, please send in a bug report;
+@pxref{Bugs, ,Reporting Problems and Bugs}.)
+
+@node Bugs, Other Versions, Amiga Installation, Installation
+@appendixsec Reporting Problems and Bugs
+
+If you have problems with @code{gawk} or think that you have found a bug,
+please report it to the developers; we cannot promise to do anything
+but we might well want to fix it.
+
+Before reporting a bug, make sure you have actually found a real bug.
+Carefully reread the documentation and see if it really says you can do
+what you're trying to do. If it's not clear whether you should be able
+to do something or not, report that too; it's a bug in the documentation!
+
+Before reporting a bug or trying to fix it yourself, try to isolate it
+to the smallest possible @code{awk} program and input data file that
+reproduces the problem. Then send us the program and data file,
+some idea of what kind of Unix system you're using, and the exact results
+@code{gawk} gave you. Also say what you expected to occur; this will help
+us decide whether the problem was really in the documentation.
+
+Once you have a precise problem, there are two e-mail addresses you
+can send mail to.
+
+@table @asis
+@item Internet:
+@samp{bug-gnu-utils@@prep.ai.mit.edu}
+
+@item UUCP:
+@samp{uunet!prep.ai.mit.edu!bug-gnu-utils}
+@end table
+
+Please include the
+version number of @code{gawk} you are using. You can get this information
+with the command @samp{gawk --version}.
+You should send a carbon copy of your mail to Arnold Robbins, who can
+be reached at @samp{arnold@@gnu.ai.mit.edu}.
+
+@cindex @code{comp.lang.awk}
+@strong{Important!} Do @emph{not} try to report bugs in @code{gawk} by
+posting to the Usenet/Internet newsgroup @code{comp.lang.awk}.
+While the @code{gawk} developers do occasionally read this newsgroup,
+there is no guarantee that we will see your posting. The steps described
+above are the official, recognized ways for reporting bugs.
+
+Non-bug suggestions are always welcome as well. If you have questions
+about things that are unclear in the documentation or are just obscure
+features, ask Arnold Robbins; he will try to help you out, although he
+may not have the time to fix the problem. You can send him electronic
+mail at the Internet address above.
+
+If you find bugs in one of the non-Unix ports of @code{gawk}, please send
+an electronic mail message to the person who maintains that port. They
+are listed below, and also in the @file{README} file in the @code{gawk}
+distribution. Information in the @code{README} file should be considered
+authoritative if it conflicts with this @value{DOCUMENT}.
+
+The people maintaining the non-Unix ports of @code{gawk} are:
+
+@cindex Deifik, Scott
+@cindex Fish, Fred
+@cindex Hankerson, Darrel
+@cindex Jaegermann, Michal
+@cindex Rankin, Pat
+@cindex Rommel, Kai Uwe
+@table @asis
+@item MS-DOS
+Scott Deifik, @samp{scottd@@amgen.com}, and
+Darrel Hankerson, @samp{hankedr@@mail.auburn.edu}.
+
+@item OS/2
+Kai Uwe Rommel, @samp{rommel@@ars.de}.
+
+@item VMS
+Pat Rankin, @samp{rankin@@eql.caltech.edu}.
+
+@item Atari ST
+Michal Jaegermann, @samp{michal@@gortel.phys.ualberta.ca}.
+
+@item Amiga
+Fred Fish, @samp{fnf@@amigalib.com}.
+@end table
+
+If your bug is also reproducible under Unix, please send copies of your
+report to the general GNU bug list, as well as to Arnold Robbins, at the
+addresses listed above.
+
+@node Other Versions, , Bugs, Installation
+@appendixsec Other Freely Available @code{awk} Implementations
+
+There are two other freely available @code{awk} implementations.
+This section briefly describes where to get them.
+
+@table @asis
+@cindex Kernighan, Brian
+@cindex anonymous @code{ftp}
+@cindex @code{ftp}, anonymous
+@item Unix @code{awk}
+Brian Kernighan has been able to make his implementation of
+@code{awk} freely available. You can get it via anonymous @code{ftp}
+to the host @code{@w{netlib.att.com}}. Change directory to
+@file{/netlib/research}. Use ``binary'' or ``image'' mode, and
+retrieve @file{awk.bundle.Z}.
+
+This is a shell archive that has been compressed with the @code{compress}
+utility. It can be uncompressed with either @code{uncompress} or the
+GNU @code{gunzip} utility.
+
+This version requires an ANSI C compiler; GCC (the GNU C compiler)
+works quite nicely.
+
+@cindex Brennan, Michael
+@cindex @code{mawk}
+@item @code{mawk}
+Michael Brennan has written an independent implementation of @code{awk},
+called @code{mawk}. It is available under the GPL
+(@pxref{Copying, ,GNU GENERAL PUBLIC LICENSE}),
+just as @code{gawk} is.
+
+You can get it via anonymous @code{ftp} to the host
+@code{@w{oxy.edu}}. Change directory to @file{/public}. Use ``binary''
+or ``image'' mode, and retrieve @file{mawk1.2.1.tar.gz} (or the latest
+version that is there).
+
+@code{gunzip} may be used to decompress this file. Installation
+is similar to @code{gawk}'s
+(@pxref{Unix Installation, , Compiling and Installing @code{gawk} on Unix}).
+@end table
+
+@node Notes, Glossary, Installation, Top
+@appendix Implementation Notes
+
+This appendix contains information mainly of interest to implementors and
+maintainers of @code{gawk}. Everything in it applies specifically to
+@code{gawk}, and not to other implementations.
+
+@menu
+* Compatibility Mode:: How to disable certain @code{gawk} extensions.
+* Additions:: Making Additions To @code{gawk}.
+* Future Extensions:: New features that may be implemented one day.
+* Improvements:: Suggestions for improvements by volunteers.
+@end menu
+
+@node Compatibility Mode, Additions, Notes, Notes
+@appendixsec Downward Compatibility and Debugging
+
+@xref{POSIX/GNU, ,Extensions in @code{gawk} Not in POSIX @code{awk}},
+for a summary of the GNU extensions to the @code{awk} language and program.
+All of these features can be turned off by invoking @code{gawk} with the
+@samp{--traditional} option, or with the @samp{--posix} option.
+
+If @code{gawk} is compiled for debugging with @samp{-DDEBUG}, then there
+is one more option available on the command line:
+
+@table @code
+@item -W parsedebug
+@itemx --parsedebug
+Print out the parse stack information as the program is being parsed.
+@end table
+
+This option is intended only for serious @code{gawk} developers,
+and not for the casual user. It probably has not even been compiled into
+your version of @code{gawk}, since it slows down execution.
+
+@node Additions, Future Extensions, Compatibility Mode, Notes
+@appendixsec Making Additions to @code{gawk}
+
+If you should find that you wish to enhance @code{gawk} in a significant
+fashion, you are perfectly free to do so. That is the point of having
+free software; the source code is available, and you are free to change
+it as you wish (@pxref{Copying, ,GNU GENERAL PUBLIC LICENSE}).
+
+This section discusses the ways you might wish to change @code{gawk},
+and any considerations you should bear in mind.
+
+@menu
+* Adding Code:: Adding code to the main body of @code{gawk}.
+* New Ports:: Porting @code{gawk} to a new operating system.
+@end menu
+
+@node Adding Code, New Ports, Additions, Additions
+@appendixsubsec Adding New Features
+
+@cindex adding new features
+@cindex features, adding
+You are free to add any new features you like to @code{gawk}.
+However, if you want your changes to be incorporated into the @code{gawk}
+distribution, there are several steps that you need to take in order to
+make it possible for me to include to your changes.
+
+@enumerate 1
+@item
+Get the latest version.
+It is much easier for me to integrate changes if they are relative to
+the most recent distributed version of @code{gawk}. If your version of
+@code{gawk} is very old, I may not be able to integrate them at all.
+@xref{Getting, ,Getting the @code{gawk} Distribution},
+for information on getting the latest version of @code{gawk}.
+
+@item
+@iftex
+Follow the @cite{GNU Coding Standards}.
+@end iftex
+@ifinfo
+See @inforef{Top, , Version, standards, GNU Coding Standards}.
+@end ifinfo
+This document describes how GNU software should be written. If you haven't
+read it, please do so, preferably @emph{before} starting to modify @code{gawk}.
+(The @cite{GNU Coding Standards} are available as part of the Autoconf
+distribution, from the FSF.)
+
+@cindex @code{gawk} coding style
+@cindex coding style used in @code{gawk}
+@item
+Use the @code{gawk} coding style.
+The C code for @code{gawk} follows the instructions in the
+@cite{GNU Coding Standards}, with minor exceptions. The code is formatted
+using the traditional ``K&R'' style, particularly as regards the placement
+of braces and the use of tabs. In brief, the coding rules for @code{gawk}
+are:
+
+@itemize @bullet
+@item
+Use old style (non-prototype) function headers when defining functions.
+
+@item
+Put the name of the function at the beginning of its own line.
+
+@item
+Put the return type of the function, even if it is @code{int}, on the
+line above the line with the name and arguments of the function.
+
+@item
+The declarations for the function arguments should not be indented.
+
+@item
+Put spaces around parentheses used in control structures
+(@code{if}, @code{while}, @code{for}, @code{do}, @code{switch}
+and @code{return}).
+
+@item
+Do not put spaces in front of parentheses used in function calls.
+
+@item
+Put spaces around all C operators, and after commas in function calls.
+
+@item
+Do not use the comma operator to produce multiple side-effects, except
+in @code{for} loop initialization and increment parts, and in macro bodies.
+
+@item
+Use real tabs for indenting, not spaces.
+
+@item
+Use the ``K&R'' brace layout style.
+
+@item
+Use comparisons against @code{NULL} and @code{'\0'} in the conditions of
+@code{if}, @code{while} and @code{for} statements, and in the @code{case}s
+of @code{switch} statements, instead of just the
+plain pointer or character value.
+
+@item
+Use the @code{TRUE}, @code{FALSE}, and @code{NULL} symbolic constants,
+and the character constant @code{'\0'} where appropriate, instead of @code{1}
+and @code{0}.
+
+@item
+Provide one-line descriptive comments for each function.
+
+@item
+Do not use @samp{#elif}. Many older Unix C compilers cannot handle it.
+@end itemize
+
+If I have to reformat your code to follow the coding style used in
+@code{gawk}, I may not bother.
+
+@item
+Be prepared to sign the appropriate paperwork.
+In order for the FSF to distribute your changes, you must either place
+those changes in the public domain, and submit a signed statement to that
+effect, or assign the copyright in your changes to the FSF.
+Both of these actions are easy to do, and @emph{many} people have done so
+already. If you have questions, please contact me
+(@pxref{Bugs, , Reporting Problems and Bugs}),
+or @code{gnu@@prep.ai.mit.edu}.
+
+@item
+Update the documentation.
+Along with your new code, please supply new sections and or chapters
+for this @value{DOCUMENT}. If at all possible, please use real
+Texinfo, instead of just supplying unformatted ASCII text (although
+even that is better than no documentation at all).
+Conventions to be followed in @cite{@value{TITLE}} are provided
+after the @samp{@@bye} at the end of the Texinfo source file.
+If possible, please update the man page as well.
+
+You will also have to sign paperwork for your documentation changes.
+
+@item
+Submit changes as context diffs or unified diffs.
+Use @samp{diff -c -r -N} or @samp{diff -u -r -N} to compare
+the original @code{gawk} source tree with your version.
+(I find context diffs to be more readable, but unified diffs are
+more compact.)
+I recommend using the GNU version of @code{diff}.
+Send the output produced by either run of @code{diff} to me when you
+submit your changes.
+@xref{Bugs, , Reporting Problems and Bugs}, for the electronic mail
+information.
+
+Using this format makes it easy for me to apply your changes to the
+master version of the @code{gawk} source code (using @code{patch}).
+If I have to apply the changes manually, using a text editor, I may
+not do so, particularly if there are lots of changes.
+@end enumerate
+
+Although this sounds like a lot of work, please remember that while you
+may write the new code, I have to maintain it and support it, and if it
+isn't possible for me to do that with a minimum of extra work, then I
+probably will not.
+
+@node New Ports, , Adding Code, Additions
+@appendixsubsec Porting @code{gawk} to a New Operating System
+
+@cindex porting @code{gawk}
+If you wish to port @code{gawk} to a new operating system, there are
+several steps to follow.
+
+@enumerate 1
+@item
+Follow the guidelines in
+@ref{Adding Code, ,Adding New Features},
+concerning coding style, submission of diffs, and so on.
+
+@item
+When doing a port, bear in mind that your code must co-exist peacefully
+with the rest of @code{gawk}, and the other ports. Avoid gratuitous
+changes to the system-independent parts of the code. If at all possible,
+avoid sprinkling @samp{#ifdef}s just for your port throughout the
+code.
+
+If the changes needed for a particular system affect too much of the
+code, I probably will not accept them. In such a case, you will, of course,
+be able to distribute your changes on your own, as long as you comply
+with the GPL
+(@pxref{Copying, ,GNU GENERAL PUBLIC LICENSE}).
+
+@item
+A number of the files that come with @code{gawk} are maintained by other
+people at the Free Software Foundation. Thus, you should not change them
+unless it is for a very good reason. I.e.@: changes are not out of the
+question, but changes to these files will be scrutinized extra carefully.
+The files are @file{alloca.c}, @file{getopt.h}, @file{getopt.c},
+@file{getopt1.c}, @file{regex.h}, @file{regex.c}, @file{dfa.h},
+@file{dfa.c}, @file{install-sh}, and @file{mkinstalldirs}.
+
+@item
+Be willing to continue to maintain the port.
+Non-Unix operating systems are supported by volunteers who maintain
+the code needed to compile and run @code{gawk} on their systems. If no-one
+volunteers to maintain a port, that port becomes unsupported, and it may
+be necessary to remove it from the distribution.
+
+@item
+Supply an appropriate @file{gawkmisc.???} file.
+Each port has its own @file{gawkmisc.???} that implements certain
+operating system specific functions. This is cleaner than a plethora of
+@samp{#ifdef}s scattered throughout the code. The @file{gawkmisc.c} in
+the main source directory includes the appropriate
+@file{gawkmisc.???} file from each subdirectory.
+Be sure to update it as well.
+
+Each port's @file{gawkmisc.???} file has a suffix reminiscent of the machine
+or operating system for the port. For example, @file{pc/gawkmisc.pc} and
+@file{vms/gawkmisc.vms}. The use of separate suffixes, instead of plain
+@file{gawkmisc.c}, makes it possible to move files from a port's subdirectory
+into the main subdirectory, without accidentally destroying the real
+@file{gawkmisc.c} file. (Currently, this is only an issue for the MS-DOS
+and OS/2 ports.)
+
+@item
+Supply a @file{Makefile} and any other C source and header files that are
+necessary for your operating system. All your code should be in a
+separate subdirectory, with a name that is the same as, or reminiscent
+of, either your operating system or the computer system. If possible,
+try to structure things so that it is not necessary to move files out
+of the subdirectory into the main source directory. If that is not
+possible, then be sure to avoid using names for your files that
+duplicate the names of files in the main source directory.
+
+@item
+Update the documentation.
+Please write a section (or sections) for this @value{DOCUMENT} describing the
+installation and compilation steps needed to install and/or compile
+@code{gawk} for your system.
+
+@item
+Be prepared to sign the appropriate paperwork.
+In order for the FSF to distribute your code, you must either place
+your code in the public domain, and submit a signed statement to that
+effect, or assign the copyright in your code to the FSF.
+@ifinfo
+Both of these actions are easy to do, and @emph{many} people have done so
+already. If you have questions, please contact me, or
+@code{gnu@@prep.ai.mit.edu}.
+@end ifinfo
+@end enumerate
+
+Following these steps will make it much easier to integrate your changes
+into @code{gawk}, and have them co-exist happily with the code for other
+operating systems that is already there.
+
+In the code that you supply, and that you maintain, feel free to use a
+coding style and brace layout that suits your taste.
+
+@c why should this be needed? sigh
+@iftex
+@page
+@end iftex
+@node Future Extensions, Improvements, Additions, Notes
+@appendixsec Probable Future Extensions
+
+@ignore
+From emory!scalpel.netlabs.com!lwall Tue Oct 31 12:43:17 1995
+Return-Path: <emory!scalpel.netlabs.com!lwall>
+Message-Id: <9510311732.AA28472@scalpel.netlabs.com>
+To: arnold@skeeve.atl.ga.us (Arnold D. Robbins)
+Subject: Re: May I quote you?
+In-Reply-To: Your message of "Tue, 31 Oct 95 09:11:00 EST."
+ <m0tAHPQ-00014MC@skeeve.atl.ga.us>
+Date: Tue, 31 Oct 95 09:32:46 -0800
+From: Larry Wall <emory!scalpel.netlabs.com!lwall>
+
+: Greetings. I am working on the release of gawk 3.0. Part of it will be a
+: thoroughly updated manual. One of the sections deals with planned future
+: extensions and enhancements. I have the following at the beginning
+: of it:
+:
+: @cindex PERL
+: @cindex Wall, Larry
+: @display
+: @i{AWK is a language similar to PERL, only considerably more elegant.} @*
+: Arnold Robbins
+: @sp 1
+: @i{Hey!} @*
+: Larry Wall
+: @end display
+:
+: Before I actually release this for publication, I wanted to get your
+: permission to quote you. (Hopefully, in the spirit of much of GNU, the
+: implied humor is visible... :-)
+
+I think that would be fine.
+
+Larry
+@end ignore
+
+@cindex PERL
+@cindex Wall, Larry
+@display
+@i{AWK is a language similar to PERL, only considerably more elegant.}
+Arnold Robbins
+
+@i{Hey!}
+Larry Wall
+@end display
+
+This section briefly lists extensions and possible improvements
+that indicate the directions we are
+currently considering for @code{gawk}. The file @file{FUTURES} in the
+@code{gawk} distributions lists these extensions as well.
+
+This is a list of probable future changes that will be usable by the
+@code{awk} language programmer.
+
+@c these are ordered by likelihood
+@table @asis
+@item Localization
+The GNU project is starting to support multiple languages.
+It will at least be possible to make @code{gawk} print its warnings and
+error messages in languages other than English.
+It may be possible for @code{awk} programs to also use the multiple
+language facilities, separate from @code{gawk} itself.
+
+@item Databases
+It may be possible to map a GDBM/NDBM/SDBM file into an @code{awk} array.
+
+@item A @code{PROCINFO} Array
+The special files that provide process-related information
+(@pxref{Special Files, ,Special File Names in @code{gawk}})
+may be superseded by a @code{PROCINFO} array that would provide the same
+information, in an easier to access fashion.
+
+@item More @code{lint} warnings
+There are more things that could be checked for portability.
+
+@item Control of subprocess environment
+Changes made in @code{gawk} to the array @code{ENVIRON} may be
+propagated to subprocesses run by @code{gawk}.
+
+@ignore
+@item @code{RECLEN} variable for fixed length records
+Along with @code{FIELDWIDTHS}, this would speed up the processing of
+fixed-length records.
+
+@item A @code{restart} keyword
+After modifying @code{$0}, @code{restart} would restart the pattern
+matching loop, without reading a new record from the input.
+
+@item A @samp{|&} redirection
+The @samp{|&} redirection, in place of @samp{|}, would open a two-way
+pipeline for communication with a sub-process (via @code{getline} and
+@code{print} and @code{printf}).
+
+@item Function valued variables
+It would be possible to assign the name of a user-defined or built-in
+function to a regular @code{awk} variable, and then call the function
+indirectly, by using the regular variable. This would make it possible
+to write general purpose sorting and comparing routines, for example,
+by simply passing the name of one function into another.
+
+@item A built-in @code{stat} function
+The @code{stat} function would provide an easy-to-use hook to the
+@code{stat} system call so that @code{awk} programs could determine information
+about files.
+
+@item A built-in @code{ftw} function
+Combined with function valued variables and the @code{stat} function,
+@code{ftw} (file tree walk) would make it easy for an @code{awk} program
+to walk an entire file tree.
+@end ignore
+@end table
+
+This is a list of probable improvements that will make @code{gawk}
+perform better.
+
+@table @asis
+@item An Improved Version of @code{dfa}
+The @code{dfa} pattern matcher from GNU @code{grep} has some
+problems. Either a new version or a fixed one will deal with some
+important regexp matching issues.
+
+@item Use of @code{mmap}
+On systems that support the @code{mmap} system call, its use would provide
+much faster file input, and considerably simplified input buffer management.
+
+@item Use of GNU @code{malloc}
+The GNU version of @code{malloc} could potentially speed up @code{gawk},
+since it relies heavily on the use of dynamic memory allocation.
+
+@item Use of the @code{rx} regexp library
+The @code{rx} regular expression library could potentially speed up
+all regexp operations that require knowing the exact location of matches.
+This includes record termination, field and array splitting,
+and the @code{sub}, @code{gsub}, @code{gensub} and @code{match} functions.
+@end table
+
+@node Improvements, , Future Extensions, Notes
+@appendixsec Suggestions for Improvements
+
+Here are some projects that would-be @code{gawk} hackers might like to take
+on. They vary in size from a few days to a few weeks of programming,
+depending on which one you choose and how fast a programmer you are. Please
+send any improvements you write to the maintainers at the GNU project.
+@xref{Adding Code, , Adding New Features},
+for guidelines to follow when adding new features to @code{gawk}.
+@xref{Bugs, ,Reporting Problems and Bugs}, for information on
+contacting the maintainers.
+
+@enumerate
+@item
+Compilation of @code{awk} programs: @code{gawk} uses a Bison (YACC-like)
+parser to convert the script given it into a syntax tree; the syntax
+tree is then executed by a simple recursive evaluator. This method incurs
+a lot of overhead, since the recursive evaluator performs many procedure
+calls to do even the simplest things.
+
+It should be possible for @code{gawk} to convert the script's parse tree
+into a C program which the user would then compile, using the normal
+C compiler and a special @code{gawk} library to provide all the needed
+functions (regexps, fields, associative arrays, type coercion, and so
+on).
+
+An easier possibility might be for an intermediate phase of @code{awk} to
+convert the parse tree into a linear byte code form like the one used
+in GNU Emacs Lisp. The recursive evaluator would then be replaced by
+a straight line byte code interpreter that would be intermediate in speed
+between running a compiled program and doing what @code{gawk} does
+now.
+
+@item
+The programs in the test suite could use documenting in this @value{DOCUMENT}.
+
+@item
+See the @file{FUTURES} file for more ideas. Contact us if you would
+seriously like to tackle any of the items listed there.
+@end enumerate
+
+@node Glossary, Copying, Notes, Top
+@appendix Glossary
+
+@table @asis
+@item Action
+A series of @code{awk} statements attached to a rule. If the rule's
+pattern matches an input record, @code{awk} executes the
+rule's action. Actions are always enclosed in curly braces.
+@xref{Action Overview, ,Overview of Actions}.
+
+@item Amazing @code{awk} Assembler
+Henry Spencer at the University of Toronto wrote a retargetable assembler
+completely as @code{awk} scripts. It is thousands of lines long, including
+machine descriptions for several eight-bit microcomputers.
+It is a good example of a
+program that would have been better written in another language.
+
+@item Amazingly Workable Formatter (@code{awf})
+Henry Spencer at the University of Toronto wrote a formatter that accepts
+a large subset of the @samp{nroff -ms} and @samp{nroff -man} formatting
+commands, using @code{awk} and @code{sh}.
+
+@item ANSI
+The American National Standards Institute. This organization produces
+many standards, among them the standards for the C and C++ programming
+languages.
+
+@item Assignment
+An @code{awk} expression that changes the value of some @code{awk}
+variable or data object. An object that you can assign to is called an
+@dfn{lvalue}. The assigned values are called @dfn{rvalues}.
+@xref{Assignment Ops, ,Assignment Expressions}.
+
+@item @code{awk} Language
+The language in which @code{awk} programs are written.
+
+@item @code{awk} Program
+An @code{awk} program consists of a series of @dfn{patterns} and
+@dfn{actions}, collectively known as @dfn{rules}. For each input record
+given to the program, the program's rules are all processed in turn.
+@code{awk} programs may also contain function definitions.
+
+@item @code{awk} Script
+Another name for an @code{awk} program.
+
+@item Bash
+The GNU version of the standard shell (the Bourne-Again shell).
+See ``Bourne Shell.''
+
+@item BBS
+See ``Bulletin Board System.''
+
+@item Boolean Expression
+Named after the English mathematician Boole. See ``Logical Expression.''
+
+@item Bourne Shell
+The standard shell (@file{/bin/sh}) on Unix and Unix-like systems,
+originally written by Steven R.@: Bourne.
+Many shells (Bash, @code{ksh}, @code{pdksh}, @code{zsh}) are
+generally upwardly compatible with the Bourne shell.
+
+@item Built-in Function
+The @code{awk} language provides built-in functions that perform various
+numerical, time stamp related, and string computations. Examples are
+@code{sqrt} (for the square root of a number) and @code{substr} (for a
+substring of a string). @xref{Built-in, ,Built-in Functions}.
+
+@item Built-in Variable
+@code{ARGC}, @code{ARGIND}, @code{ARGV}, @code{CONVFMT}, @code{ENVIRON},
+@code{ERRNO}, @code{FIELDWIDTHS}, @code{FILENAME}, @code{FNR}, @code{FS},
+@code{IGNORECASE}, @code{NF}, @code{NR}, @code{OFMT}, @code{OFS}, @code{ORS},
+@code{RLENGTH}, @code{RSTART}, @code{RS}, @code{RT}, and @code{SUBSEP},
+are the variables that have special meaning to @code{awk}.
+Changing some of them affects @code{awk}'s running environment.
+Several of these variables are specific to @code{gawk}.
+@xref{Built-in Variables}.
+
+@item Braces
+See ``Curly Braces.''
+
+@item Bulletin Board System
+A computer system allowing users to log in and read and/or leave messages
+for other users of the system, much like leaving paper notes on a bulletin
+board.
+
+@item C
+The system programming language that most GNU software is written in. The
+@code{awk} programming language has C-like syntax, and this @value{DOCUMENT}
+points out similarities between @code{awk} and C when appropriate.
+
+@cindex ISO 8859-1
+@cindex ISO Latin-1
+@item Character Set
+The set of numeric codes used by a computer system to represent the
+characters (letters, numbers, punctuation, etc.) of a particular country
+or place. The most common character set in use today is ASCII (American
+Standard Code for Information Interchange). Many European
+countries use an extension of ASCII known as ISO-8859-1 (ISO Latin-1).
+
+@item CHEM
+A preprocessor for @code{pic} that reads descriptions of molecules
+and produces @code{pic} input for drawing them. It was written in @code{awk}
+by Brian Kernighan and Jon Bentley, and is available from
+@code{@w{netlib@@research.att.com}}.
+
+@item Compound Statement
+A series of @code{awk} statements, enclosed in curly braces. Compound
+statements may be nested.
+@xref{Statements, ,Control Statements in Actions}.
+
+@item Concatenation
+Concatenating two strings means sticking them together, one after another,
+giving a new string. For example, the string @samp{foo} concatenated with
+the string @samp{bar} gives the string @samp{foobar}.
+@xref{Concatenation, ,String Concatenation}.
+
+@item Conditional Expression
+An expression using the @samp{?:} ternary operator, such as
+@samp{@var{expr1} ? @var{expr2} : @var{expr3}}. The expression
+@var{expr1} is evaluated; if the result is true, the value of the whole
+expression is the value of @var{expr2}, otherwise the value is
+@var{expr3}. In either case, only one of @var{expr2} and @var{expr3}
+is evaluated. @xref{Conditional Exp, ,Conditional Expressions}.
+
+@item Comparison Expression
+A relation that is either true or false, such as @samp{(a < b)}.
+Comparison expressions are used in @code{if}, @code{while}, @code{do},
+and @code{for}
+statements, and in patterns to select which input records to process.
+@xref{Typing and Comparison, ,Variable Typing and Comparison Expressions}.
+
+@item Curly Braces
+The characters @samp{@{} and @samp{@}}. Curly braces are used in
+@code{awk} for delimiting actions, compound statements, and function
+bodies.
+
+@item Dark Corner
+An area in the language where specifications often were (or still
+are) not clear, leading to unexpected or undesirable behavior.
+Such areas are marked in this @value{DOCUMENT} with ``(d.c.)'' in the
+text, and are indexed under the heading ``dark corner.''
+
+@item Data Objects
+These are numbers and strings of characters. Numbers are converted into
+strings and vice versa, as needed.
+@xref{Conversion, ,Conversion of Strings and Numbers}.
+
+@item Double Precision
+An internal representation of numbers that can have fractional parts.
+Double precision numbers keep track of more digits than do single precision
+numbers, but operations on them are more expensive. This is the way
+@code{awk} stores numeric values. It is the C type @code{double}.
+
+@item Dynamic Regular Expression
+A dynamic regular expression is a regular expression written as an
+ordinary expression. It could be a string constant, such as
+@code{"foo"}, but it may also be an expression whose value can vary.
+@xref{Computed Regexps, , Using Dynamic Regexps}.
+
+@item Environment
+A collection of strings, of the form @var{name@code{=}val}, that each
+program has available to it. Users generally place values into the
+environment in order to provide information to various programs. Typical
+examples are the environment variables @code{HOME} and @code{PATH}.
+
+@item Empty String
+See ``Null String.''
+
+@item Escape Sequences
+A special sequence of characters used for describing non-printing
+characters, such as @samp{\n} for newline, or @samp{\033} for the ASCII
+ESC (escape) character. @xref{Escape Sequences}.
+
+@item Field
+When @code{awk} reads an input record, it splits the record into pieces
+separated by whitespace (or by a separator regexp which you can
+change by setting the built-in variable @code{FS}). Such pieces are
+called fields. If the pieces are of fixed length, you can use the built-in
+variable @code{FIELDWIDTHS} to describe their lengths.
+@xref{Field Separators, ,Specifying How Fields are Separated},
+and also see
+@xref{Constant Size, , Reading Fixed-width Data}.
+
+@item Floating Point Number
+Often referred to in mathematical terms as a ``rational'' number, this is
+just a number that can have a fractional part.
+See ``Double Precision'' and ``Single Precision.''
+
+@item Format
+Format strings are used to control the appearance of output in the
+@code{printf} statement. Also, data conversions from numbers to strings
+are controlled by the format string contained in the built-in variable
+@code{CONVFMT}. @xref{Control Letters, ,Format-Control Letters}.
+
+@item Function
+A specialized group of statements used to encapsulate general
+or program-specific tasks. @code{awk} has a number of built-in
+functions, and also allows you to define your own.
+@xref{Built-in, ,Built-in Functions},
+and @ref{User-defined, ,User-defined Functions}.
+
+@item FSF
+See ``Free Software Foundation.''
+
+@item Free Software Foundation
+A non-profit organization dedicated
+to the production and distribution of freely distributable software.
+It was founded by Richard M.@: Stallman, the author of the original
+Emacs editor. GNU Emacs is the most widely used version of Emacs today.
+
+@item @code{gawk}
+The GNU implementation of @code{awk}.
+
+@item General Public License
+This document describes the terms under which @code{gawk} and its source
+code may be distributed. (@pxref{Copying, ,GNU GENERAL PUBLIC LICENSE})
+
+@item GNU
+``GNU's not Unix''. An on-going project of the Free Software Foundation
+to create a complete, freely distributable, POSIX-compliant computing
+environment.
+
+@item GPL
+See ``General Public License.''
+
+@item Hexadecimal
+Base 16 notation, where the digits are @code{0}-@code{9} and
+@code{A}-@code{F}, with @samp{A}
+representing 10, @samp{B} representing 11, and so on up to @samp{F} for 15.
+Hexadecimal numbers are written in C using a leading @samp{0x},
+to indicate their base. Thus, @code{0x12} is 18 (one times 16 plus 2).
+
+@item I/O
+Abbreviation for ``Input/Output,'' the act of moving data into and/or
+out of a running program.
+
+@item Input Record
+A single chunk of data read in by @code{awk}. Usually, an @code{awk} input
+record consists of one line of text.
+@xref{Records, ,How Input is Split into Records}.
+
+@item Integer
+A whole number, i.e.@: a number that does not have a fractional part.
+
+@item Keyword
+In the @code{awk} language, a keyword is a word that has special
+meaning. Keywords are reserved and may not be used as variable names.
+
+@code{gawk}'s keywords are:
+@code{BEGIN},
+@code{END},
+@code{if},
+@code{else},
+@code{while},
+@code{do@dots{}while},
+@code{for},
+@code{for@dots{}in},
+@code{break},
+@code{continue},
+@code{delete},
+@code{next},
+@code{nextfile},
+@code{function},
+@code{func},
+and @code{exit}.
+
+@item Logical Expression
+An expression using the operators for logic, AND, OR, and NOT, written
+@samp{&&}, @samp{||}, and @samp{!} in @code{awk}. Often called Boolean
+expressions, after the mathematician who pioneered this kind of
+mathematical logic.
+
+@item Lvalue
+An expression that can appear on the left side of an assignment
+operator. In most languages, lvalues can be variables or array
+elements. In @code{awk}, a field designator can also be used as an
+lvalue.
+
+@item Null String
+A string with no characters in it. It is represented explicitly in
+@code{awk} programs by placing two double-quote characters next to
+each other (@code{""}). It can appear in input data by having two successive
+occurrences of the field separator appear next to each other.
+
+@item Number
+A numeric valued data object. The @code{gawk} implementation uses double
+precision floating point to represent numbers.
+Very old @code{awk} implementations use single precision floating
+point.
+
+@item Octal
+Base-eight notation, where the digits are @code{0}-@code{7}.
+Octal numbers are written in C using a leading @samp{0},
+to indicate their base. Thus, @code{013} is 11 (one times 8 plus 3).
+
+@item Pattern
+Patterns tell @code{awk} which input records are interesting to which
+rules.
+
+A pattern is an arbitrary conditional expression against which input is
+tested. If the condition is satisfied, the pattern is said to @dfn{match}
+the input record. A typical pattern might compare the input record against
+a regular expression. @xref{Pattern Overview, ,Pattern Elements}.
+
+@item POSIX
+The name for a series of standards being developed by the IEEE
+that specify a Portable Operating System interface. The ``IX'' denotes
+the Unix heritage of these standards. The main standard of interest for
+@code{awk} users is
+@cite{IEEE Standard for Information Technology, Standard 1003.2-1992,
+Portable Operating System Interface (POSIX) Part 2: Shell and Utilities}.
+Informally, this standard is often referred to as simply ``P1003.2.''
+
+@item Private
+Variables and/or functions that are meant for use exclusively by library
+functions, and not for the main @code{awk} program. Special care must be
+taken when naming such variables and functions.
+@xref{Library Names, , Naming Library Function Global Variables}.
+
+@item Range (of input lines)
+A sequence of consecutive lines from the input file. A pattern
+can specify ranges of input lines for @code{awk} to process, or it can
+specify single lines. @xref{Pattern Overview, ,Pattern Elements}.
+
+@item Recursion
+When a function calls itself, either directly or indirectly.
+If this isn't clear, refer to the entry for ``recursion.''
+
+@item Redirection
+Redirection means performing input from other than the standard input
+stream, or output to other than the standard output stream.
+
+You can redirect the output of the @code{print} and @code{printf} statements
+to a file or a system command, using the @samp{>}, @samp{>>}, and @samp{|}
+operators. You can redirect input to the @code{getline} statement using
+the @samp{<} and @samp{|} operators.
+@xref{Redirection, ,Redirecting Output of @code{print} and @code{printf}},
+and @ref{Getline, ,Explicit Input with @code{getline}}.
+
+@item Regexp
+Short for @dfn{regular expression}. A regexp is a pattern that denotes a
+set of strings, possibly an infinite set. For example, the regexp
+@samp{R.*xp} matches any string starting with the letter @samp{R}
+and ending with the letters @samp{xp}. In @code{awk}, regexps are
+used in patterns and in conditional expressions. Regexps may contain
+escape sequences. @xref{Regexp, ,Regular Expressions}.
+
+@item Regular Expression
+See ``regexp.''
+
+@item Regular Expression Constant
+A regular expression constant is a regular expression written within
+slashes, such as @code{/foo/}. This regular expression is chosen
+when you write the @code{awk} program, and cannot be changed doing
+its execution. @xref{Regexp Usage, ,How to Use Regular Expressions}.
+
+@item Rule
+A segment of an @code{awk} program that specifies how to process single
+input records. A rule consists of a @dfn{pattern} and an @dfn{action}.
+@code{awk} reads an input record; then, for each rule, if the input record
+satisfies the rule's pattern, @code{awk} executes the rule's action.
+Otherwise, the rule does nothing for that input record.
+
+@item Rvalue
+A value that can appear on the right side of an assignment operator.
+In @code{awk}, essentially every expression has a value. These values
+are rvalues.
+
+@item @code{sed}
+See ``Stream Editor.''
+
+@item Short-Circuit
+The nature of the @code{awk} logical operators @samp{&&} and @samp{||}.
+If the value of the entire expression can be deduced from evaluating just
+the left-hand side of these operators, the right-hand side will not
+be evaluated
+(@pxref{Boolean Ops, ,Boolean Expressions}).
+
+@item Side Effect
+A side effect occurs when an expression has an effect aside from merely
+producing a value. Assignment expressions, increment and decrement
+expressions and function calls have side effects.
+@xref{Assignment Ops, ,Assignment Expressions}.
+
+@item Single Precision
+An internal representation of numbers that can have fractional parts.
+Single precision numbers keep track of fewer digits than do double precision
+numbers, but operations on them are less expensive in terms of CPU time.
+This is the type used by some very old versions of @code{awk} to store
+numeric values. It is the C type @code{float}.
+
+@item Space
+The character generated by hitting the space bar on the keyboard.
+
+@item Special File
+A file name interpreted internally by @code{gawk}, instead of being handed
+directly to the underlying operating system. For example, @file{/dev/stderr}.
+@xref{Special Files, ,Special File Names in @code{gawk}}.
+
+@item Stream Editor
+A program that reads records from an input stream and processes them one
+or more at a time. This is in contrast with batch programs, which may
+expect to read their input files in entirety before starting to do
+anything, and with interactive programs, which require input from the
+user.
+
+@item String
+A datum consisting of a sequence of characters, such as @samp{I am a
+string}. Constant strings are written with double-quotes in the
+@code{awk} language, and may contain escape sequences.
+@xref{Escape Sequences}.
+
+@item Tab
+The character generated by hitting the @kbd{TAB} key on the keyboard.
+It usually expands to up to eight spaces upon output.
+
+@item Unix
+A computer operating system originally developed in the early 1970's at
+AT&T Bell Laboratories. It initially became popular in universities around
+the world, and later moved into commercial evnironments as a software
+development system and network server system. There are many commercial
+versions of Unix, as well as several work-alike systems whose source code
+is freely available (such as Linux, NetBSD, and FreeBSD).
+
+@item Whitespace
+A sequence of space or tab characters occurring inside an input record or a
+string.
+@end table
+
+@node Copying, Index, Glossary, Top
+@unnumbered GNU GENERAL PUBLIC LICENSE
+@center Version 2, June 1991
+
+@display
+Copyright @copyright{} 1989, 1991 Free Software Foundation, Inc.
+59 Temple Place --- Suite 330, Boston, MA 02111-1307, USA
+
+Everyone is permitted to copy and distribute verbatim copies
+of this license document, but changing it is not allowed.
+@end display
+
+@c fakenode --- for prepinfo
+@unnumberedsec Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software---to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+@iftex
+@c fakenode --- for prepinfo
+@unnumberedsec TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+@end iftex
+@ifinfo
+@center TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+@end ifinfo
+
+@enumerate 0
+@item
+This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The ``Program'', below,
+refers to any such program or work, and a ``work based on the Program''
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term ``modification''.) Each licensee is addressed as ``you''.
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+@item
+You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+@item
+You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+@enumerate a
+@item
+You must cause the modified files to carry prominent notices
+stating that you changed the files and the date of any change.
+
+@item
+You must cause any work that you distribute or publish, that in
+whole or in part contains or is derived from the Program or any
+part thereof, to be licensed as a whole at no charge to all third
+parties under the terms of this License.
+
+@item
+If the modified program normally reads commands interactively
+when run, you must cause it, when started running for such
+interactive use in the most ordinary way, to print or display an
+announcement including an appropriate copyright notice and a
+notice that there is no warranty (or else, saying that you provide
+a warranty) and that users may redistribute the program under
+these conditions, and telling the user how to view a copy of this
+License. (Exception: if the Program itself is interactive but
+does not normally print such an announcement, your work based on
+the Program is not required to print an announcement.)
+@end enumerate
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+@item
+You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+@enumerate a
+@item
+Accompany it with the complete corresponding machine-readable
+source code, which must be distributed under the terms of Sections
+1 and 2 above on a medium customarily used for software interchange; or,
+
+@item
+Accompany it with a written offer, valid for at least three
+years, to give any third party, for a charge no more than your
+cost of physically performing source distribution, a complete
+machine-readable copy of the corresponding source code, to be
+distributed under the terms of Sections 1 and 2 above on a medium
+customarily used for software interchange; or,
+
+@item
+Accompany it with the information you received as to the offer
+to distribute corresponding source code. (This alternative is
+allowed only for non-commercial distribution and only if you
+received the program in object code or executable form with such
+an offer, in accord with Subsection b above.)
+@end enumerate
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+@item
+You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+@item
+You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+@item
+Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+@item
+If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+@item
+If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+@item
+The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and ``any
+later version'', you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+@item
+If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+@iftex
+@c fakenode --- for prepinfo
+@heading NO WARRANTY
+@end iftex
+@ifinfo
+@center NO WARRANTY
+@end ifinfo
+
+@item
+BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW@. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM ``AS IS'' WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE@. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU@. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+@item
+IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+@end enumerate
+
+@iftex
+@c fakenode --- for prepinfo
+@heading END OF TERMS AND CONDITIONS
+@end iftex
+@ifinfo
+@center END OF TERMS AND CONDITIONS
+@end ifinfo
+
+@page
+@c fakenode --- for prepinfo
+@unnumberedsec How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the ``copyright'' line and a pointer to where the full notice is found.
+
+@smallexample
+@var{one line to give the program's name and an idea of what it does.}
+Copyright (C) 19@var{yy} @var{name of author}
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE@. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place --- Suite 330, Boston, MA 02111-1307, USA.
+@end smallexample
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+@smallexample
+Gnomovision version 69, Copyright (C) 19@var{yy} @var{name of author}
+Gnomovision comes with ABSOLUTELY NO WARRANTY; for details
+type `show w'. This is free software, and you are welcome
+to redistribute it under certain conditions; type `show c'
+for details.
+@end smallexample
+
+The hypothetical commands @samp{show w} and @samp{show c} should show
+the appropriate parts of the General Public License. Of course, the
+commands you use may be called something other than @samp{show w} and
+@samp{show c}; they could even be mouse-clicks or menu items---whatever
+suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a ``copyright disclaimer'' for the program, if
+necessary. Here is a sample; alter the names:
+
+@smallexample
+@group
+Yoyodyne, Inc., hereby disclaims all copyright
+interest in the program `Gnomovision'
+(which makes passes at compilers) written
+by James Hacker.
+
+@var{signature of Ty Coon}, 1 April 1989
+Ty Coon, President of Vice
+@end group
+@end smallexample
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
+
+@node Index, , Copying, Top
+@unnumbered Index
+@printindex cp
+
+@summarycontents
+@contents
+@bye
+
+Unresolved Issues:
+------------------
+1. From ADR.
+
+ Robert J. Chassell points out that awk programs should have some indication
+ of how to use them. It would be useful to perhaps have a "programming
+ style" section of the manual that would include this and other tips.
+
+2. The default AWKPATH search path should be configurable via `configure'
+ The default and how this changes needs to be documented.
+
+Consistency issues:
+ /.../ regexps are in @code, not @samp
+ ".." strings are in @code, not @samp
+ no @print before @dots
+ values of expressions in the text (@code{x} has the value 15),
+ should be in roman, not @code
+ Use tab and not TAB
+ Use ESC and not ESCAPE
+ Use space and not blank to describe the space bar's character
+ The term "blank" is thus basically reserved for "blank lines" etc.
+ The `(d.c.)' should appear inside the closing `.' of a sentence
+ It should come before (pxref{...})
+ " " should have an @w{} around it
+ Use "non-" everywhere
+ Use @code{ftp} when talking about anonymous ftp
+ Use upper-case and lower-case, not "upper case" and "lower case"
+ Use alphanumeric, not alpha-numeric
+ Use --foo, not -Wfoo when describing long options
+ Use findex for all programs and functions in the example chapters
+ Use "Bell Labs" or "AT&T Bell Laboratories", but not
+ "AT&T Bell Labs".
+ Use "behavior" instead of "behaviour".
+ Use "zeros" instead of "zeroes".
+ Use "Input/Output", not "input/output". Also "I/O", not "i/o".
+ Use @code{do}, and not @code{do}-@code{while}, except where
+ actually discussing the do-while.
+ The words "a", "and", "as", "between", "for", "from", "in", "of",
+ "on", "that", "the", "to", "with", and "without",
+ should not be capitalized in @chapter, @section etc.
+ "Into" and "How" should.
+ Search for @dfn; make sure important items are also indexed.
+ "e.g." should always be followed by a comma.
+ "i.e." should never be followed by a comma, and should be followed
+ by `@:'.
+ The numbers zero through ten should be spelled out, except when
+ talking about file descriptor numbers. > 10 and < 0, it's
+ ok to use numbers.
+ In tables, put command line options in @code, while in the text,
+ put them in @samp.
+ When using @strong, use "Note:" or "Caution:" with colons and
+ not exclamation points. Do not surround the paragraphs
+ with @quotation ... @end quotation.
+
+Date: Wed, 13 Apr 94 15:20:52 -0400
+From: rsm@gnu.ai.mit.edu (Richard Stallman)
+To: gnu-prog@gnu.ai.mit.edu
+Subject: A reminder: no pathnames in GNU
+
+It's a GNU convention to use the term "file name" for the name of a
+file, never "pathname". We use the term "path" for search paths,
+which are lists of file names. Using it for a single file name as
+well is potentially confusing to users.
+
+So please check any documentation you maintain, if you think you might
+have used "pathname".
+
+Note that "file name" should be two words when it appears as ordinary
+text. It's ok as one word when it's a metasyntactic variable, though.
+
+Suggestions:
+------------
+Enhance FIELDWIDTHS with some way to indicate "the rest of the record".
+E.g., a length of 0 or -1 or something. May be "n"?
+
+Make FIELDWIDTHS be an array?
+
+What if FIELDWIDTHS has invalid values in it?
diff --git a/doc/igawk.1 b/doc/igawk.1
new file mode 100644
index 00000000..b3e19c20
--- /dev/null
+++ b/doc/igawk.1
@@ -0,0 +1,73 @@
+.TH IGAWK 1 "Oct 13 1995" "Free Software Foundation" "Utility Commands"
+.SH NAME
+igawk \- gawk with include files
+.SH SYNOPSIS
+.B igawk
+[ all
+.I gawk
+options ]
+.B \-f
+.I program-file
+[
+.B \-\^\-
+] file .\^.\^.
+.br
+.B igawk
+[ all
+.I gawk
+options ]
+[
+.B \-\^\-
+]
+.I program-text
+file .\^.\^.
+.SH DESCRIPTION
+.I Igawk
+is a simple shell script that adds the ability to have ``include files'' to
+.IR gawk (1).
+.PP
+AWK programs for
+.I igawk
+are the same as for
+.IR gawk ,
+except that, in addition, you may have lines like
+.RS
+.sp
+.ft B
+@include getopt.awk
+.ft R
+.sp
+.RE
+in your program to include the file
+.B getopt.awk
+from either the current directory or one of the other directories
+in the search path.
+.SH OPTIONS
+See
+.IR gawk (1)
+for a full description of the AWK language and the options that
+.I gawk
+supports.
+.SH EXAMPLES
+.nf
+.ft B
+cat << EOF > test.awk
+@include getopt.awk
+.sp
+BEGIN {
+ while (getopt(ARGC, ARGV, "am:q") != \-1)
+ \&.\^.\^.
+}
+EOF
+.sp
+igawk \-f test.awk
+.ft R
+.fi
+.SH SEE ALSO
+.IR gawk (1)
+.PP
+.IR "AWK Language Programming" ,
+Edition 1.0, published by the Free Software Foundation, 1995.
+.SH AUTHOR
+Arnold Robbins
+.RB ( arnold@gnu.ai.mit.edu ).
diff --git a/support/texinfo.tex b/doc/texinfo.tex
index ce11b7b8..ea80e1d0 100644
--- a/support/texinfo.tex
+++ b/doc/texinfo.tex
@@ -1,6 +1,6 @@
%% TeX macros to handle texinfo files
-% Copyright (C) 1985, 86, 88, 90, 91, 92, 1993 Free Software Foundation, Inc.
+% Copyright (C) 1985, 86, 88, 90, 91, 92, 93, 1994 Free Software Foundation, Inc.
%This texinfo.tex file is free software; you can redistribute it and/or
%modify it under the terms of the GNU General Public License as
@@ -14,22 +14,38 @@
%You should have received a copy of the GNU General Public License
%along with this texinfo.tex file; see the file COPYING. If not, write
-%to the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
-%USA.
+%to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+%Boston, MA 02111-1307, USA.
%In other words, you are welcome to use, share and improve this program.
%You are forbidden to forbid anyone else to use, share and improve
%what you give them. Help stamp out software-hoarding!
-\def\texinfoversion{2.115}
+
+% Send bug reports to bug-texinfo@prep.ai.mit.edu.
+% Please include a *precise* test case in each bug report.
+
+
+% Make it possible to create a .fmt file just by loading this file:
+% if the underlying format is not loaded, start by loading it now.
+% Added by gildea November 1993.
+\expandafter\ifx\csname fmtname\endcsname\relax\input plain\fi
+
+% This automatically updates the version number based on RCS.
+\def\deftexinfoversion$#1: #2 ${\def\texinfoversion{#2}}
+\deftexinfoversion$Revision: 2.159 $
\message{Loading texinfo package [Version \texinfoversion]:}
-% Print the version number if in a .fmt file.
-\everyjob{\message{[Texinfo version \texinfoversion]}\message{}}
+% If in a .fmt file, print the version number
+% and turn on active characters that we couldn't do earlier because
+% they might have appeared in the input file name.
+\everyjob{\message{[Texinfo version \texinfoversion]}\message{}
+ \catcode`+=\active \catcode`\_=\active}
% Save some parts of plain tex whose names we will redefine.
+\let\ptextilde=\~
\let\ptexlbrace=\{
\let\ptexrbrace=\}
\let\ptexdots=\dots
@@ -44,7 +60,18 @@
\let\ptexl=\l
\let\ptexL=\L
-\def\tie{\penalty 10000\ } % Save plain tex definition of ~.
+% Be sure we're in horizontal mode when doing a tie, since we make space
+% equivalent to this in @example-like environments. Otherwise, a space
+% at the beginning of a line will start with \penalty -- and
+% since \penalty is valid in vertical mode, we'd end up putting the
+% penalty on the vertical list instead of in the new paragraph.
+{\catcode`@ = 11
+ % Avoid using \@M directly, because that causes trouble
+ % if the definition is written into an index file.
+ \global\let\tiepenalty = \@M
+ \gdef\tie{\leavevmode\penalty\tiepenalty\ }
+}
+\let\~ = \tie % And make it available as @~.
\message{Basics,}
\chardef\other=12
@@ -53,6 +80,19 @@
% starts a new line in the output.
\newlinechar = `^^J
+% Set up fixed words for English.
+\ifx\putwordChapter\undefined{\gdef\putwordChapter{Chapter}}\fi%
+\def\putwordInfo{Info}%
+\ifx\putwordSee\undefined{\gdef\putwordSee{See}}\fi%
+\ifx\putwordsee\undefined{\gdef\putwordsee{see}}\fi%
+\ifx\putwordfile\undefined{\gdef\putwordfile{file}}\fi%
+\ifx\putwordpage\undefined{\gdef\putwordpage{page}}\fi%
+\ifx\putwordsection\undefined{\gdef\putwordsection{section}}\fi%
+\ifx\putwordSection\undefined{\gdef\putwordSection{Section}}\fi%
+\ifx\putwordTableofContents\undefined{\gdef\putwordTableofContents{Table of Contents}}\fi%
+\ifx\putwordShortContents\undefined{\gdef\putwordShortContents{Short Contents}}\fi%
+\ifx\putwordAppendix\undefined{\gdef\putwordAppendix{Appendix}}\fi%
+
% Ignore a token.
%
\def\gobble#1{}
@@ -144,9 +184,14 @@
% Do @cropmarks to get crop marks
\def\cropmarks{\let\onepageout=\croppageout }
+\newinsert\margin \dimen\margin=\maxdimen
+
\def\pagebody#1{\vbox to\pageheight{\boxmaxdepth=\maxdepth #1}}
{\catcode`\@ =11
\gdef\pagecontents#1{\ifvoid\topins\else\unvbox\topins\fi
+% marginal hacks, juha@viisa.uucp (Juha Takala)
+\ifvoid\margin\else % marginal info is present
+ \rlap{\kern\hsize\vbox to\z@{\kern1pt\box\margin \vss}}\fi
\dimen@=\dp#1 \unvbox#1
\ifvoid\footins\else\vskip\skip\footins\footnoterule \unvbox\footins\fi
\ifr@ggedbottom \kern-\dimen@ \vfil \fi}
@@ -297,13 +342,13 @@
% Single-spacing is done by various environments (specifically, in
% \nonfillstart and \quotations).
-\newskip\singlespaceskip \singlespaceskip = \baselineskip
+\newskip\singlespaceskip \singlespaceskip = 12.5pt
\def\singlespace{%
-% Why was this kern here? It messes up equalizing space above and below
-% environments. --karl, 6may93
-%{\advance \baselineskip by -\singlespaceskip
-%\kern \baselineskip}%
-\baselineskip=\singlespaceskip
+ % Why was this kern here? It messes up equalizing space above and below
+ % environments. --karl, 6may93
+ %{\advance \baselineskip by -\singlespaceskip
+ %\kern \baselineskip}%
+ \setleading \singlespaceskip
}
%% Simple single-character @ commands
@@ -335,6 +380,15 @@
% @. is an end-of-sentence period.
\def\.{.\spacefactor=3000 }
+% @enddots{} is an end-of-sentence ellipsis.
+\gdef\enddots{$\mathinner{\ldotp\ldotp\ldotp\ldotp}$\spacefactor=3000}
+
+% @! is an end-of-sentence bang.
+\gdef\!{!\spacefactor=3000 }
+
+% @? is an end-of-sentence query.
+\gdef\?{?\spacefactor=3000 }
+
% @w prevents a word break. Without the \leavevmode, @w at the
% beginning of a paragraph, when TeX is still in vertical mode, would
% produce a whole line of output instead of starting the paragraph.
@@ -389,8 +443,8 @@
\obeylines
\fi
%
- % We do @comment here in case we are called inside an environment,
- % such as @example, where each end-of-line in the input causes an
+ % Do @comment since we are called inside an environment such as
+ % @example, where each end-of-line in the input causes an
% end-of-line in the output. We don't want the end-of-line after
% the `@group' to put extra space in the output. Since @group
% should appear on a line by itself (according to the Texinfo
@@ -482,6 +536,15 @@ where each line of input produces a line of output.}
\def\nofillexdentyyy #1{{\advance \leftskip by -\exdentamount
\leftline{\hskip\leftskip{\rm#1}}}}
+% @inmargin{TEXT} puts TEXT in the margin next to the current paragraph.
+
+\def\inmargin#1{%
+\strut\vadjust{\nobreak\kern-\strutdepth
+ \vtop to \strutdepth{\baselineskip\strutdepth\vss
+ \llap{\rightskip=\inmarginspacing \vbox{\noindent #1}}\null}}}
+\newskip\inmarginspacing \inmarginspacing=1cm
+\def\strutdepth{\dp\strutbox}
+
%\hbox{{\rm#1}}\hfil\break}}
% @include file insert text of that file as input.
@@ -577,6 +640,15 @@ where each line of input produces a line of output.}
\let\printindex = \relax
\let\pxref = \relax
\let\settitle = \relax
+ \let\setchapternewpage = \relax
+ \let\setchapterstyle = \relax
+ \let\everyheading = \relax
+ \let\evenheading = \relax
+ \let\oddheading = \relax
+ \let\everyfooting = \relax
+ \let\evenfooting = \relax
+ \let\oddfooting = \relax
+ \let\headings = \relax
\let\include = \relax
\let\lowersections = \relax
\let\down = \relax
@@ -584,18 +656,27 @@ where each line of input produces a line of output.}
\let\up = \relax
\let\set = \relax
\let\clear = \relax
+ \let\item = \relax
+ \let\message = \relax
}
% Ignore @ignore ... @end ignore.
%
\def\ignore{\doignore{ignore}}
-% Also ignore @ifinfo, @menu, and @direntry text.
+% Also ignore @ifinfo, @ifhtml, @html, @menu, and @direntry text.
%
\def\ifinfo{\doignore{ifinfo}}
+\def\ifhtml{\doignore{ifhtml}}
+\def\html{\doignore{html}}
\def\menu{\doignore{menu}}
\def\direntry{\doignore{direntry}}
+% @dircategory CATEGORY -- specify a category of the dir file
+% which this file should belong to. Ignore this in TeX.
+
+\def\dircategory{\comment}
+
% Ignore text until a line `@end #1'.
%
\def\doignore#1{\begingroup
@@ -717,7 +798,10 @@ where each line of input produces a line of output.}
\else \setzzz{#1}#2\endsetzzz % Remove the trailing space \setxxx inserted.
\fi
}
-\def\setzzz#1#2 \endsetzzz{\expandafter\xdef\csname SET#1\endcsname{#2}}
+% Can't use \xdef to pre-expand #2 and save some time, since \temp or
+% \next or other control sequences that we've defined might get us into
+% an infinite loop. Consider `@set foo @cite{bar}'.
+\def\setzzz#1#2 \endsetzzz{\expandafter\gdef\csname SET#1\endcsname{#2}}
% @clear VAR clears (i.e., unsets) the variable VAR.
%
@@ -819,15 +903,15 @@ where each line of input produces a line of output.}
\def\donoderef{\ifx\lastnode\relax\else
\expandafter\expandafter\expandafter\setref{\lastnode}\fi
-\let\lastnode=\relax}
+\global\let\lastnode=\relax}
\def\unnumbnoderef{\ifx\lastnode\relax\else
\expandafter\expandafter\expandafter\unnumbsetref{\lastnode}\fi
-\let\lastnode=\relax}
+\global\let\lastnode=\relax}
\def\appendixnoderef{\ifx\lastnode\relax\else
\expandafter\expandafter\expandafter\appendixsetref{\lastnode}\fi
-\let\lastnode=\relax}
+\global\let\lastnode=\relax}
\let\refill=\relax
@@ -846,7 +930,7 @@ where each line of input produces a line of output.}
\outer\def\bye{\pagealignmacro\tracingstats=1\ptexend}
\def\inforef #1{\inforefzzz #1,,,,**}
-\def\inforefzzz #1,#2,#3,#4**{See Info file \file{\ignorespaces #3{}},
+\def\inforefzzz #1,#2,#3,#4**{\putwordSee{} \putwordInfo{} \putwordfile{} \file{\ignorespaces #3{}},
node \samp{\ignorespaces#1{}}}
\message{fonts,}
@@ -862,28 +946,39 @@ where each line of input produces a line of output.}
%% Try out Computer Modern fonts at \magstephalf
\let\mainmagstep=\magstephalf
+% Set the font macro #1 to the font named #2, adding on the
+% specified font prefix (normally `cm').
+\def\setfont#1#2{\font#1=\fontprefix#2}
+
+% Use cm as the default font prefix.
+% To specify the font prefix, you must define \fontprefix
+% before you read in texinfo.tex.
+\ifx\fontprefix\undefined
+\def\fontprefix{cm}
+\fi
+
\ifx\bigger\relax
\let\mainmagstep=\magstep1
-\font\textrm=cmr12
-\font\texttt=cmtt12
+\setfont\textrm{r12}
+\setfont\texttt{tt12}
\else
-\font\textrm=cmr10 scaled \mainmagstep
-\font\texttt=cmtt10 scaled \mainmagstep
+\setfont\textrm{r10 scaled \mainmagstep}
+\setfont\texttt{tt10 scaled \mainmagstep}
\fi
% Instead of cmb10, you many want to use cmbx10.
% cmbx10 is a prettier font on its own, but cmb10
% looks better when embedded in a line with cmr10.
-\font\textbf=cmb10 scaled \mainmagstep
-\font\textit=cmti10 scaled \mainmagstep
-\font\textsl=cmsl10 scaled \mainmagstep
-\font\textsf=cmss10 scaled \mainmagstep
-\font\textsc=cmcsc10 scaled \mainmagstep
+\setfont\textbf{b10 scaled \mainmagstep}
+\setfont\textit{ti10 scaled \mainmagstep}
+\setfont\textsl{sl10 scaled \mainmagstep}
+\setfont\textsf{ss10 scaled \mainmagstep}
+\setfont\textsc{csc10 scaled \mainmagstep}
\font\texti=cmmi10 scaled \mainmagstep
\font\textsy=cmsy10 scaled \mainmagstep
% A few fonts for @defun, etc.
-\font\defbf=cmbx10 scaled \magstep1 %was 1314
-\font\deftt=cmtt10 scaled \magstep1
+\setfont\defbf{bx10 scaled \magstep1} %was 1314
+\setfont\deftt{tt10 scaled \magstep1}
\def\df{\let\tentt=\deftt \let\tenbf = \defbf \bf}
% Fonts for indices and small examples.
@@ -891,66 +986,66 @@ where each line of input produces a line of output.}
% because texinfo normally uses the slanted fonts for that.
% Do not make many font distinctions in general in the index, since they
% aren't very useful.
-\font\ninett=cmtt9
-\font\indrm=cmr9
-\font\indit=cmsl9
+\setfont\ninett{tt9}
+\setfont\indrm{r9}
+\setfont\indit{sl9}
\let\indsl=\indit
\let\indtt=\ninett
\let\indsf=\indrm
\let\indbf=\indrm
-\let\indsc=\indrm
+\setfont\indsc{csc10 at 9pt}
\font\indi=cmmi9
\font\indsy=cmsy9
% Fonts for headings
-\font\chaprm=cmbx12 scaled \magstep2
-\font\chapit=cmti12 scaled \magstep2
-\font\chapsl=cmsl12 scaled \magstep2
-\font\chaptt=cmtt12 scaled \magstep2
-\font\chapsf=cmss12 scaled \magstep2
+\setfont\chaprm{bx12 scaled \magstep2}
+\setfont\chapit{ti12 scaled \magstep2}
+\setfont\chapsl{sl12 scaled \magstep2}
+\setfont\chaptt{tt12 scaled \magstep2}
+\setfont\chapsf{ss12 scaled \magstep2}
\let\chapbf=\chaprm
-\font\chapsc=cmcsc10 scaled\magstep3
+\setfont\chapsc{csc10 scaled\magstep3}
\font\chapi=cmmi12 scaled \magstep2
\font\chapsy=cmsy10 scaled \magstep3
-\font\secrm=cmbx12 scaled \magstep1
-\font\secit=cmti12 scaled \magstep1
-\font\secsl=cmsl12 scaled \magstep1
-\font\sectt=cmtt12 scaled \magstep1
-\font\secsf=cmss12 scaled \magstep1
-\font\secbf=cmbx12 scaled \magstep1
-\font\secsc=cmcsc10 scaled\magstep2
+\setfont\secrm{bx12 scaled \magstep1}
+\setfont\secit{ti12 scaled \magstep1}
+\setfont\secsl{sl12 scaled \magstep1}
+\setfont\sectt{tt12 scaled \magstep1}
+\setfont\secsf{ss12 scaled \magstep1}
+\setfont\secbf{bx12 scaled \magstep1}
+\setfont\secsc{csc10 scaled\magstep2}
\font\seci=cmmi12 scaled \magstep1
\font\secsy=cmsy10 scaled \magstep2
-% \font\ssecrm=cmbx10 scaled \magstep1 % This size an font looked bad.
-% \font\ssecit=cmti10 scaled \magstep1 % The letters were too crowded.
-% \font\ssecsl=cmsl10 scaled \magstep1
-% \font\ssectt=cmtt10 scaled \magstep1
-% \font\ssecsf=cmss10 scaled \magstep1
+% \setfont\ssecrm{bx10 scaled \magstep1} % This size an font looked bad.
+% \setfont\ssecit{cmti10 scaled \magstep1} % The letters were too crowded.
+% \setfont\ssecsl{sl10 scaled \magstep1}
+% \setfont\ssectt{tt10 scaled \magstep1}
+% \setfont\ssecsf{ss10 scaled \magstep1}
-%\font\ssecrm=cmb10 scaled 1315 % Note the use of cmb rather than cmbx.
-%\font\ssecit=cmti10 scaled 1315 % Also, the size is a little larger than
-%\font\ssecsl=cmsl10 scaled 1315 % being scaled magstep1.
-%\font\ssectt=cmtt10 scaled 1315
-%\font\ssecsf=cmss10 scaled 1315
+%\setfont\ssecrm{b10 scaled 1315} % Note the use of cmb rather than cmbx.
+%\setfont\ssecit{ti10 scaled 1315} % Also, the size is a little larger than
+%\setfont\ssecsl{sl10 scaled 1315} % being scaled magstep1.
+%\setfont\ssectt{tt10 scaled 1315}
+%\setfont\ssecsf{ss10 scaled 1315}
%\let\ssecbf=\ssecrm
-\font\ssecrm=cmbx12 scaled \magstephalf
-\font\ssecit=cmti12 scaled \magstephalf
-\font\ssecsl=cmsl12 scaled \magstephalf
-\font\ssectt=cmtt12 scaled \magstephalf
-\font\ssecsf=cmss12 scaled \magstephalf
-\font\ssecbf=cmbx12 scaled \magstephalf
-\font\ssecsc=cmcsc10 scaled \magstep1
+\setfont\ssecrm{bx12 scaled \magstephalf}
+\setfont\ssecit{ti12 scaled \magstephalf}
+\setfont\ssecsl{sl12 scaled \magstephalf}
+\setfont\ssectt{tt12 scaled \magstephalf}
+\setfont\ssecsf{ss12 scaled \magstephalf}
+\setfont\ssecbf{bx12 scaled \magstephalf}
+\setfont\ssecsc{csc10 scaled \magstep1}
\font\sseci=cmmi12 scaled \magstephalf
\font\ssecsy=cmsy10 scaled \magstep1
% The smallcaps and symbol fonts should actually be scaled \magstep1.5,
% but that is not a standard magnification.
% Fonts for title page:
-\font\titlerm = cmbx12 scaled \magstep3
+\setfont\titlerm{bx12 scaled \magstep3}
\let\authorrm = \secrm
% In order for the font changes to affect most math symbols and letters,
@@ -1006,9 +1101,9 @@ where each line of input produces a line of output.}
\newcount\fontdepth \fontdepth=0
% Fonts for short table of contents.
-\font\shortcontrm=cmr12
-\font\shortcontbf=cmbx12
-\font\shortcontsl=cmsl12
+\setfont\shortcontrm{r12}
+\setfont\shortcontbf{bx12}
+\setfont\shortcontsl{sl12}
%% Add scribe-like font environments, plus @l for inline lisp (usually sans
%% serif) and @ii for TeX italic
@@ -1035,11 +1130,10 @@ where each line of input produces a line of output.}
\def\restorehyphenation{\hyphenchar\font = `- }
\def\t#1{%
- {\tt \nohyphenation \rawbackslash \frenchspacing #1}%
+ {\tt \rawbackslash \frenchspacing #1}%
\null
}
-\let\ttfont = \t
-%\def\samp #1{`{\tt \rawbackslash \frenchspacing #1}'\null}
+\let\ttfont=\t
\def\samp #1{`\tclose{#1}'\null}
\def\key #1{{\tt \nohyphenation \uppercase{#1}}\null}
\def\ctrl #1{{\tt \rawbackslash \hat}#1}
@@ -1070,7 +1164,7 @@ where each line of input produces a line of output.}
}
% We *must* turn on hyphenation at `-' and `_' in \code.
-% Otherwise, it is too hard to avoid overful hboxes
+% Otherwise, it is too hard to avoid overfull hboxes
% in the Emacs manual, the Library manual, etc.
% Unfortunately, TeX uses one parameter (\hyphenchar) to control
@@ -1088,6 +1182,7 @@ where each line of input produces a line of output.}
% ever called. -- mycroft
\global\def\indexbreaks{\catcode`\-=\active \let-\realdash \catcode`\_=\active \let_\realunder}
}
+
\def\realdash{-}
\def\realunder{_}
\def\codedash{-\discretionary{}{}{}}
@@ -1475,7 +1570,7 @@ July\or August\or September\or October\or November\or December\fi
\def\tablez #1#2#3#4#5#6{%
\aboveenvbreak %
\begingroup %
-\def\Edescription{\Etable}% Neccessary kludge.
+\def\Edescription{\Etable}% Necessary kludge.
\let\itemindex=#1%
\ifnum 0#3>0 \advance \leftskip by #3\mil \fi %
\ifnum 0#4>0 \tableindent=#4\mil \fi %
@@ -1638,6 +1733,159 @@ July\or August\or September\or October\or November\or December\fi
\vadjust{\penalty 1200}}%
\flushcr}
+% @multitable macros
+% Amy Hendrickson, 8/18/94
+%
+% @multitable ... @end multitable will make as many columns as desired.
+% Contents of each column will wrap at width given in preamble. Width
+% can be specified either with sample text given in a template line,
+% or in percent of \hsize, the current width of text on page.
+
+% Table can continue over pages but will only break between lines.
+
+% To make preamble:
+%
+% Either define widths of columns in terms of percent of \hsize:
+% @multitable @percentofhsize .2 .3 .5
+% @item ...
+%
+% Numbers following @percentofhsize are the percent of the total
+% current hsize to be used for each column. You may use as many
+% columns as desired.
+
+% Or use a template:
+% @multitable {Column 1 template} {Column 2 template} {Column 3 template}
+% @item ...
+% using the widest term desired in each column.
+
+
+% Each new table line starts with @item, each subsequent new column
+% starts with @tab. Empty columns may be produced by supplying @tab's
+% with nothing between them for as many times as empty columns are needed,
+% ie, @tab@tab@tab will produce two empty columns.
+
+% @item, @tab, @multicolumn or @endmulticolumn do not need to be on their
+% own lines, but it will not hurt if they are.
+
+% Sample multitable:
+
+% @multitable {Column 1 template} {Column 2 template} {Column 3 template}
+% @item first col stuff @tab second col stuff @tab third col
+% @item
+% first col stuff
+% @tab
+% second col stuff
+% @tab
+% third col
+% @item first col stuff @tab second col stuff
+% @tab Many paragraphs of text may be used in any column.
+%
+% They will wrap at the width determined by the template.
+% @item@tab@tab This will be in third column.
+% @end multitable
+
+% Default dimensions may be reset by user.
+% @intableparskip will set vertical space between paragraphs in table.
+% @intableparindent will set paragraph indent in table.
+% @spacebetweencols will set horizontal space to be left between columns.
+% @spacebetweenlines will set vertical space to be left between lines.
+
+%%%%
+% Dimensions
+
+\newdimen\intableparskip
+\newdimen\intableparindent
+\newdimen\spacebetweencols
+\newdimen\spacebetweenlines
+\intableparskip=0pt
+\intableparindent=6pt
+\spacebetweencols=12pt
+\spacebetweenlines=12pt
+
+%%%%
+% Macros used to set up halign preamble:
+\let\endsetuptable\relax
+\def\xendsetuptable{\endsetuptable}
+\let\percentofhsize\relax
+\def\xpercentofhsize{\percentofhsize}
+\newif\ifsetpercent
+
+\newcount\colcount
+\def\setuptable#1{\def\firstarg{#1}%
+\ifx\firstarg\xendsetuptable\let\go\relax%
+\else
+ \ifx\firstarg\xpercentofhsize\global\setpercenttrue%
+ \else
+ \ifsetpercent
+ \if#1.\else%
+ \global\advance\colcount by1 %
+ \expandafter\xdef\csname col\the\colcount\endcsname{.#1\hsize}%
+ \fi
+ \else
+ \global\advance\colcount by1
+ \setbox0=\hbox{#1}%
+ \expandafter\xdef\csname col\the\colcount\endcsname{\the\wd0}%
+ \fi%
+ \fi%
+ \let\go\setuptable%
+\fi\go}
+%%%%
+% multitable syntax
+\def\tab{&}
+
+%%%%
+% @multitable ... @end multitable definitions:
+
+\def\multitable#1\item{\bgroup
+\let\item\cr
+\tolerance=9500
+\hbadness=9500
+\parskip=\intableparskip
+\parindent=\intableparindent
+\overfullrule=0pt
+\global\colcount=0\relax%
+\def\Emultitable{\global\setpercentfalse\global\everycr{}\cr\egroup\egroup}%
+ % To parse everything between @multitable and @item :
+\def\one{#1}\expandafter\setuptable\one\endsetuptable
+ % Need to reset this to 0 after \setuptable.
+\global\colcount=0\relax%
+ %
+ % This preamble sets up a generic column definition, which will
+ % be used as many times as user calls for columns.
+ % \vtop will set a single line and will also let text wrap and
+ % continue for many paragraphs if desired.
+\halign\bgroup&\global\advance\colcount by 1\relax%
+\vtop{\hsize=\expandafter\csname col\the\colcount\endcsname
+ % In order to keep entries from bumping into each other
+ % we will add a \leftskip of \spacebetweencols to all columns after
+ % the first one.
+ % If a template has been used, we will add \spacebetweencols
+ % to the width of each template entry.
+ % If user has set preamble in terms of percent of \hsize
+ % we will use that dimension as the width of the column, and
+ % the \leftskip will keep entries from bumping into each other.
+ % Table will start at left margin and final column will justify at
+ % right margin.
+\ifnum\colcount=1
+\else
+ \ifsetpercent
+ \else
+ % If user has <not> set preamble in terms of percent of \hsize
+ % we will advance \hsize by \spacebetweencols
+ \advance\hsize by \spacebetweencols
+ \fi
+ % In either case we will make \leftskip=\spacebetweencols:
+\leftskip=\spacebetweencols
+\fi
+\noindent##}\cr%
+ % \everycr will reset column counter, \colcount, at the end of
+ % each line. Every column entry will cause \colcount to advance by one.
+ % The table preamble
+ % looks at the current \colcount to find the correct column width.
+\global\everycr{\noalign{\nointerlineskip\vskip\spacebetweenlines
+\filbreak%% keeps underfull box messages off when table breaks over pages.
+\global\colcount=0\relax}}}
+
\message{indexing,}
% Index generation facilities
@@ -1712,6 +1960,32 @@ July\or August\or September\or October\or November\or December\fi
\def\singlecodeindexer #1{\doind{\indexname}{\code{#1}}}
\def\indexdummies{%
+% Take care of the plain tex accent commands.
+\def\"{\realbackslash "}%
+\def\`{\realbackslash `}%
+\def\'{\realbackslash '}%
+\def\^{\realbackslash ^}%
+\def\~{\realbackslash ~}%
+\def\={\realbackslash =}%
+\def\b{\realbackslash b}%
+\def\c{\realbackslash c}%
+\def\d{\realbackslash d}%
+\def\u{\realbackslash u}%
+\def\v{\realbackslash v}%
+\def\H{\realbackslash H}%
+% Take care of the plain tex special European modified letters.
+\def\oe{\realbackslash oe}%
+\def\ae{\realbackslash ae}%
+\def\aa{\realbackslash aa}%
+\def\OE{\realbackslash OE}%
+\def\AE{\realbackslash AE}%
+\def\AA{\realbackslash AA}%
+\def\o{\realbackslash o}%
+\def\O{\realbackslash O}%
+\def\l{\realbackslash l}%
+\def\L{\realbackslash L}%
+\def\ss{\realbackslash ss}%
+% Take care of texinfo commands likely to appear in an index entry.
\def\_{{\realbackslash _}}%
\def\w{\realbackslash w }%
\def\bf{\realbackslash bf }%
@@ -1740,8 +2014,15 @@ July\or August\or September\or October\or November\or December\fi
\def\kbd##1{\realbackslash kbd {##1}}%
\def\dfn##1{\realbackslash dfn {##1}}%
\def\emph##1{\realbackslash emph {##1}}%
+\unsepspaces
}
+% If an index command is used in an @example environment, any spaces
+% therein should become regular spaces in the raw index file, not the
+% expansion of \tie (\\leavevmode \penalty \@M \ ).
+{\obeyspaces
+ \gdef\unsepspaces{\obeyspaces\let =\space}}
+
% \indexnofonts no-ops all font-change commands.
% This is used when outputting the strings to sort the index by.
\def\indexdummyfont#1{#1}
@@ -1749,6 +2030,31 @@ July\or August\or September\or October\or November\or December\fi
\def\indexdummydots{...}
\def\indexnofonts{%
+% Just ignore accents.
+\let\"=\indexdummyfont
+\let\`=\indexdummyfont
+\let\'=\indexdummyfont
+\let\^=\indexdummyfont
+\let\~=\indexdummyfont
+\let\==\indexdummyfont
+\let\b=\indexdummyfont
+\let\c=\indexdummyfont
+\let\d=\indexdummyfont
+\let\u=\indexdummyfont
+\let\v=\indexdummyfont
+\let\H=\indexdummyfont
+% Take care of the plain tex special European modified letters.
+\def\oe{oe}%
+\def\ae{ae}%
+\def\aa{aa}%
+\def\OE{OE}%
+\def\AE{AE}%
+\def\AA{AA}%
+\def\o{o}%
+\def\O{O}%
+\def\l{l}%
+\def\L{L}%
+\def\ss{ss}%
\let\w=\indexdummyfont
\let\t=\indexdummyfont
\let\r=\indexdummyfont
@@ -1781,7 +2087,14 @@ July\or August\or September\or October\or November\or December\fi
\let\indexbackslash=0 %overridden during \printindex.
+\let\SETmarginindex=\relax %initialize!
+% workhorse for all \fooindexes
+% #1 is name of index, #2 is stuff to put there
\def\doind #1#2{%
+% Put the index entry in the margin if desired.
+\ifx\SETmarginindex\relax\else%
+\insert\margin{\hbox{\vrule height8pt depth3pt width0pt #2}}%
+\fi%
{\count10=\lastpenalty %
{\indexdummies % Must do this here, since \bf, etc expand at this stage
\escapechar=`\\%
@@ -1960,7 +2273,7 @@ July\or August\or September\or October\or November\or December\fi
%
% Insert the text of the index entry. TeX will do line-breaking on it.
#1%
- % The following is kluged to not output a line of dots in the index if
+ % The following is kludged to not output a line of dots in the index if
% there are no page numbers. The next person who breaks this will be
% cursed by a Unix daemon.
\def\tempa{{\rm }}%
@@ -1985,7 +2298,7 @@ July\or August\or September\or October\or November\or December\fi
% Like \dotfill except takes at least 1 em.
\def\indexdotfill{\cleaders
- \hbox{$\mathsurround=0pt \mkern1.5mu . \mkern1.5mu$}\hskip 1em plus 1fill}
+ \hbox{$\mathsurround=0pt \mkern1.5mu ${\it .}$ \mkern1.5mu$}\hskip 1em plus 1fill}
\def\primary #1{\line{#1\hfil}}
@@ -2217,13 +2530,13 @@ July\or August\or September\or October\or November\or December\fi
\def\chapteryyy #1{\numhead0{#1}} % normally numhead0 calls chapterzzz
\def\chapterzzz #1{\seccheck{chapter}%
\secno=0 \subsecno=0 \subsubsecno=0
-\global\advance \chapno by 1 \message{Chapter \the\chapno}%
+\global\advance \chapno by 1 \message{\putwordChapter \the\chapno}%
\chapmacro {#1}{\the\chapno}%
\gdef\thissection{#1}%
\gdef\thischaptername{#1}%
% We don't substitute the actual chapter name into \thischapter
% because we don't want its macros evaluated now.
-\xdef\thischapter{Chapter \the\chapno: \noexpand\thischaptername}%
+\xdef\thischapter{\putwordChapter{} \the\chapno: \noexpand\thischaptername}%
{\chapternofonts%
\edef\temp{{\realbackslash chapentry {#1}{\the\chapno}{\noexpand\folio}}}%
\escapechar=`\\%
@@ -2239,13 +2552,13 @@ July\or August\or September\or October\or November\or December\fi
\def\appendixzzz #1{\seccheck{appendix}%
\secno=0 \subsecno=0 \subsubsecno=0
\global\advance \appendixno by 1 \message{Appendix \appendixletter}%
-\chapmacro {#1}{Appendix \appendixletter}%
+\chapmacro {#1}{\putwordAppendix{} \appendixletter}%
\gdef\thissection{#1}%
\gdef\thischaptername{#1}%
-\xdef\thischapter{Appendix \appendixletter: \noexpand\thischaptername}%
+\xdef\thischapter{\putwordAppendix{} \appendixletter: \noexpand\thischaptername}%
{\chapternofonts%
\edef\temp{{\realbackslash chapentry
- {#1}{Appendix \appendixletter}{\noexpand\folio}}}%
+ {#1}{\putwordAppendix{} \appendixletter}{\noexpand\folio}}}%
\escapechar=`\\%
\write \contentsfile \temp %
\appendixnoderef %
@@ -2298,7 +2611,7 @@ July\or August\or September\or October\or November\or December\fi
\penalty 10000 %
}}
-\outer\def\appenixsection{\parsearg\appendixsecyyy}
+\outer\def\appendixsection{\parsearg\appendixsecyyy}
\outer\def\appendixsec{\parsearg\appendixsecyyy}
\def\appendixsecyyy #1{\apphead1{#1}} % normally calls appendixsectionzzz
\def\appendixsectionzzz #1{\seccheck{appendixsection}%
@@ -2604,6 +2917,7 @@ July\or August\or September\or October\or November\or December\fi
\unnumbchapmacro{#1}\def\thischapter{}%
\begingroup % Set up to handle contents files properly.
\catcode`\\=0 \catcode`\{=1 \catcode`\}=2 \catcode`\@=11
+ \catcode`\^=7 % to see ^^e4 as \"a etc. juha@piuha.ydi.vtt.fi
\raggedbottom % Worry more about breakpoints than the bottom.
\advance\hsize by -\contentsrightmargin % Don't use the full line length.
}
@@ -2611,7 +2925,7 @@ July\or August\or September\or October\or November\or December\fi
% Normal (long) toc.
\outer\def\contents{%
- \startcontents{Table of Contents}%
+ \startcontents{\putwordTableofContents}%
\input \jobname.toc
\endgroup
\vfill \eject
@@ -2619,7 +2933,7 @@ July\or August\or September\or October\or November\or December\fi
% And just the chapters.
\outer\def\summarycontents{%
- \startcontents{Short Contents}%
+ \startcontents{\putwordShortContents}%
%
\let\chapentry = \shortchapentry
\let\unnumbchapentry = \shortunnumberedentry
@@ -2658,7 +2972,7 @@ July\or August\or September\or October\or November\or December\fi
% We could simplify the code here by writing out an \appendixentry
% command in the toc file for appendices, instead of using \chapentry
% for both, but it doesn't seem worth it.
-\setbox0 = \hbox{\shortcontrm Appendix }
+\setbox0 = \hbox{\shortcontrm \putwordAppendix }
\newdimen\shortappendixwidth \shortappendixwidth = \wd0
\def\shortchaplabel#1{%
@@ -2729,9 +3043,10 @@ July\or August\or September\or October\or November\or December\fi
% can't do that in the \entry macro, since index entries might consist
% of hyphenated-identifiers-that-do-not-fit-on-a-line-and-nothing-else.)
%
+% \turnoffactive is for the sake of @" used for umlauts.
\def\tocentry#1#2{\begingroup
\hyphenpenalty = 10000
- \entry{#1}{#2}%
+ \entry{\turnoffactive #1}{\turnoffactive #2}%
\endgroup}
% Space between chapter (or whatever) number and the title.
@@ -2812,6 +3127,7 @@ July\or August\or September\or October\or November\or December\fi
\catcode`\>=12
\escapechar=`\\
%
+\let\~=\ptextilde
\let\{=\ptexlbrace
\let\}=\ptexrbrace
\let\.=\ptexdot
@@ -2986,9 +3302,9 @@ July\or August\or September\or October\or November\or December\fi
\let\Esmallexample = \nonfillfinish
%
% Smaller interline space and fonts for small examples.
- \baselineskip 10pt
+ \setleading{10pt}%
\indexfonts \tt
- \rawbackslash % output the \ character from the current font
+ \rawbackslash % make \ output the \ character from the current font (tt)
\gobble
}
@@ -3024,23 +3340,26 @@ July\or August\or September\or October\or November\or December\fi
\advance\leftskip by 0pt plus 1fill
\gobble}
-% @quotation does normal linebreaking and narrows the margins.
+% @quotation does normal linebreaking (hence we can't use \nonfillstart)
+% and narrows the margins.
%
\def\quotation{%
-\begingroup\inENV %This group ends at the end of the @quotation body
-{\parskip=0pt % because we will skip by \parskip too, later
-\aboveenvbreak}%
-\singlespace
-\parindent=0pt
-\let\Equotation = \nonfillfinish
-% @cartouche defines \nonarrowing to inhibit narrowing
-% at next level down.
-\ifx\nonarrowing\relax
-\advance \leftskip by \lispnarrowing
-\advance \rightskip by \lispnarrowing
-\exdentamount=\lispnarrowing
-\let\nonarrowing=\relax
-\fi}
+ \begingroup\inENV %This group ends at the end of the @quotation body
+ {\parskip=0pt \aboveenvbreak}% because \aboveenvbreak inserts \parskip
+ \singlespace
+ \parindent=0pt
+ % We have retained a nonzero parskip for the environment, since we're
+ % doing normal filling. So to avoid extra space below the environment...
+ \def\Equotation{\parskip = 0pt \nonfillfinish}%
+ %
+ % @cartouche defines \nonarrowing to inhibit narrowing at next level down.
+ \ifx\nonarrowing\relax
+ \advance\leftskip by \lispnarrowing
+ \advance\rightskip by \lispnarrowing
+ \exdentamount = \lispnarrowing
+ \let\nonarrowing = \relax
+ \fi
+}
\message{defuns,}
% Define formatter for defuns
@@ -3072,6 +3391,9 @@ July\or August\or September\or October\or November\or December\fi
\gdef\functionparens{\boldbrax\let&=\amprm\parencount=0 }
\gdef\boldbrax{\let(=\opnr\let)=\clnr\let[=\lbrb\let]=\rbrb}
+% This is used to turn on special parens
+% but make & act ordinary (given that it's active).
+\gdef\boldbraxnoamp{\let(=\opnr\let)=\clnr\let[=\lbrb\let]=\rbrb\let&=\ampnr}
% Definitions of (, ) and & used in args for functions.
% This is the definition of ( outside of all parentheses.
@@ -3280,8 +3602,9 @@ July\or August\or September\or October\or November\or December\fi
\def\deftypefunargs #1{%
% Expand, preventing hyphenation at `-' chars.
% Note that groups don't affect changes in \hyphenchar.
-\functionparens
-\code{#1}%
+% Use \boldbraxnoamp, not \functionparens, so that & is not special.
+\boldbraxnoamp
+\tclose{#1}% avoid \code because of side effects on active chars
\interlinepenalty=10000
\advance\rightskip by 0pt plus 1fil
\endgraf\penalty 10000\vskip -\parskip\penalty 10000%
@@ -3317,7 +3640,7 @@ July\or August\or September\or October\or November\or December\fi
% #1 is the data type, #2 the name, #3 the args.
\def\deftypefunheaderx #1#2 #3\relax{%
\doind {fn}{\code{#2}}% Make entry in function index
-\begingroup\defname {\code{#1} #2}{Function}%
+\begingroup\defname {\defheaderxcond#1\relax$$$#2}{Function}%
\deftypefunargs {#3}\endgroup %
\catcode 61=\other % Turn off change made in \defparsebody
}
@@ -3326,6 +3649,10 @@ July\or August\or September\or October\or November\or December\fi
\def\deftypefn{\defmethparsebody\Edeftypefn\deftypefnx\deftypefnheader}
+% \defheaderxcond#1\relax$$$
+% puts #1 in @code, followed by a space, but does nothing if #1 is null.
+\def\defheaderxcond#1#2$$${\ifx#1\relax\else\code{#1#2} \fi}
+
% #1 is the classification. #2 is the data type. #3 is the name and args.
\def\deftypefnheader #1#2#3{\deftypefnheaderx{#1}{#2}#3 \relax}
% #1 is the classification, #2 the data type, #3 the name, #4 the args.
@@ -3334,7 +3661,7 @@ July\or August\or September\or October\or November\or December\fi
\begingroup
\normalparens % notably, turn off `&' magic, which prevents
% at least some C++ text from working
-\defname {\code{#2} #3}{#1}%
+\defname {\defheaderxcond#2\relax$$$#3}{#1}%
\deftypefunargs {#4}\endgroup %
\catcode 61=\other % Turn off change made in \defparsebody
}
@@ -3462,7 +3789,7 @@ July\or August\or September\or October\or November\or December\fi
% #1 is the data type. #2 is the name.
\def\deftypevarheader #1#2{%
\doind {vr}{\code{#2}}% Make entry in variables index
-\begingroup\defname {\code{#1} #2}{Variable}%
+\begingroup\defname {\defheaderxcond#1\relax$$$#2}{Variable}%
\interlinepenalty=10000
\endgraf\penalty 10000\vskip -\parskip\penalty 10000
\endgroup}
@@ -3472,7 +3799,7 @@ July\or August\or September\or October\or November\or December\fi
\def\deftypevr{\defvrparsebody\Edeftypevr\deftypevrx\deftypevrheader}
\def\deftypevrheader #1#2#3{\doind {vr}{\code{#3}}%
-\begingroup\defname {\code{#2} #3}{#1}
+\begingroup\defname {\defheaderxcond#2\relax$$$#3}{#1}
\interlinepenalty=10000
\endgraf\penalty 10000\vskip -\parskip\penalty 10000
\endgroup}
@@ -3533,54 +3860,62 @@ July\or August\or September\or October\or November\or December\fi
% file, #5 the name of the printed manual. All but the node name can be
% omitted.
%
-\def\pxref#1{see \xrefX[#1,,,,,,,]}
-\def\xref#1{See \xrefX[#1,,,,,,,]}
+\def\pxref#1{\putwordsee{} \xrefX[#1,,,,,,,]}
+\def\xref#1{\putwordSee{} \xrefX[#1,,,,,,,]}
\def\ref#1{\xrefX[#1,,,,,,,]}
-\def\xrefX[#1,#2,#3,#4,#5,#6]{\begingroup%
-\def\printedmanual{\ignorespaces #5}%
-\def\printednodename{\ignorespaces #3}%
-%
-\setbox1=\hbox{\printedmanual}%
-\setbox0=\hbox{\printednodename}%
-\ifdim \wd0=0pt%
-% No printed node name was explicitly given.
-\ifx SETxref-automatic-section-title %
-% This line should make the actual chapter or section title appear inside
-% the square brackets. Use the real section title if we have it.
-\ifdim \wd1>0pt%
-% It is in another manual, so we don't have it.
-\def\printednodename{\ignorespaces #1} \else%
-% We know the real title if we have the xref values.
-\ifhavexrefs \def\printednodename{\refx{#1-title}}%
-% Otherwise just copy the Info node name.
-\else \def\printednodename{\ignorespaces #1} \fi%
-\fi\def\printednodename{#1-title}%
-\else% This line just uses the node name.
-\def\printednodename{\ignorespaces #1}%
-\fi% ends \ifx SETxref-automatic-section-title
-\fi% ends \ifdim \wd0
-%
-%
-% If we use \unhbox0 and \unhbox1 to print the node names, TeX does
-% not insert empty discretionaries after hyphens, which means that it
-% will not find a line break at a hyphen in a node names. Since some
-% manuals are best written with fairly long node names, containing
-% hyphens, this is a loss. Therefore, we simply give the text of
-% the node name again, so it is as if TeX is seeing it for the first
-% time.
-\ifdim \wd1>0pt
-section ``\printednodename'' in \cite{\printedmanual}%
-\else%
-\turnoffactive%
-\refx{#1-snt}{} [\printednodename], page\tie\refx{#1-pg}{}%
-\fi
+\def\xrefX[#1,#2,#3,#4,#5,#6]{\begingroup
+ \def\printedmanual{\ignorespaces #5}%
+ \def\printednodename{\ignorespaces #3}%
+ \setbox1=\hbox{\printedmanual}%
+ \setbox0=\hbox{\printednodename}%
+ \ifdim \wd0 = 0pt
+ % No printed node name was explicitly given.
+ \expandafter\ifx\csname SETxref-automatic-section-title\endcsname\relax
+ % Use the node name inside the square brackets.
+ \def\printednodename{\ignorespaces #1}%
+ \else
+ % Use the actual chapter/section title appear inside
+ % the square brackets. Use the real section title if we have it.
+ \ifdim \wd1>0pt%
+ % It is in another manual, so we don't have it.
+ \def\printednodename{\ignorespaces #1}%
+ \else
+ \ifhavexrefs
+ % We know the real title if we have the xref values.
+ \def\printednodename{\refx{#1-title}{}}%
+ \else
+ % Otherwise just copy the Info node name.
+ \def\printednodename{\ignorespaces #1}%
+ \fi%
+ \fi
+ \fi
+ \fi
+ %
+ % If we use \unhbox0 and \unhbox1 to print the node names, TeX does not
+ % insert empty discretionaries after hyphens, which means that it will
+ % not find a line break at a hyphen in a node names. Since some manuals
+ % are best written with fairly long node names, containing hyphens, this
+ % is a loss. Therefore, we give the text of the node name again, so it
+ % is as if TeX is seeing it for the first time.
+ \ifdim \wd1 > 0pt
+ \putwordsection{} ``\printednodename'' in \cite{\printedmanual}%
+ \else
+ % _ (for example) has to be the character _ for the purposes of the
+ % control sequence corresponding to the node, but it has to expand
+ % into the usual \leavevmode...\vrule stuff for purposes of
+ % printing. So we \turnoffactive for the \refx-snt, back on for the
+ % printing, back off for the \refx-pg.
+ {\turnoffactive \refx{#1-snt}{}}%
+ \space [\printednodename],\space
+ \turnoffactive \putwordpage\tie\refx{#1-pg}{}%
+ \fi
\endgroup}
% \dosetq is the interface for calls from other macros
% Use \turnoffactive so that punctuation chars such as underscore
% work in node names.
-\def\dosetq #1#2{{\let\folio=0 \turnoffactive%
+\def\dosetq #1#2{{\let\folio=0 \turnoffactive \auxhat%
\edef\next{\write\auxfile{\internalsetq {#1}{#2}}}%
\next}}
@@ -3599,21 +3934,21 @@ section ``\printednodename'' in \cite{\printedmanual}%
\def\Ynothing{}
\def\Ysectionnumberandtype{%
-\ifnum\secno=0 Chapter\xreftie\the\chapno %
-\else \ifnum \subsecno=0 Section\xreftie\the\chapno.\the\secno %
+\ifnum\secno=0 \putwordChapter\xreftie\the\chapno %
+\else \ifnum \subsecno=0 \putwordSection\xreftie\the\chapno.\the\secno %
\else \ifnum \subsubsecno=0 %
-Section\xreftie\the\chapno.\the\secno.\the\subsecno %
+\putwordSection\xreftie\the\chapno.\the\secno.\the\subsecno %
\else %
-Section\xreftie\the\chapno.\the\secno.\the\subsecno.\the\subsubsecno %
+\putwordSection\xreftie\the\chapno.\the\secno.\the\subsecno.\the\subsubsecno %
\fi \fi \fi }
\def\Yappendixletterandtype{%
-\ifnum\secno=0 Appendix\xreftie'char\the\appendixno{}%
-\else \ifnum \subsecno=0 Section\xreftie'char\the\appendixno.\the\secno %
+\ifnum\secno=0 \putwordAppendix\xreftie'char\the\appendixno{}%
+\else \ifnum \subsecno=0 \putwordSection\xreftie'char\the\appendixno.\the\secno %
\else \ifnum \subsubsecno=0 %
-Section\xreftie'char\the\appendixno.\the\secno.\the\subsecno %
+\putwordSection\xreftie'char\the\appendixno.\the\secno.\the\subsecno %
\else %
-Section\xreftie'char\the\appendixno.\the\secno.\the\subsecno.\the\subsubsecno %
+\putwordSection\xreftie'char\the\appendixno.\the\secno.\the\subsecno.\the\subsubsecno %
\fi \fi \fi }
\gdef\xreftie{'tie}
@@ -3701,6 +4036,15 @@ Section\xreftie'char\the\appendixno.\the\secno.\the\subsecno.\the\subsubsecno %
\catcode `\&=\other
% `\+ does not work, so use 43.
\catcode 43=\other
+% Make the characters 128-255 be printing characters
+{%
+ \count 1=128
+ \def\loop{%
+ \catcode\count 1=\other
+ \advance\count 1 by 1
+ \ifnum \count 1<256 \loop \fi
+ }%
+}%
% the aux file uses ' as the escape.
% Turn off \ as an escape so we do not lose on
% entries which were dumped with control sequences in their names.
@@ -3710,6 +4054,7 @@ Section\xreftie'char\the\appendixno.\the\secno.\the\subsecno.\the\subsubsecno %
\catcode `\{=1 \catcode `\}=2
\catcode `\%=\other
\catcode `\'=0
+\catcode`\^=7 % to make ^^e4 etc usable in xref tags
\catcode `\\=\other
\openin 1 \jobname.aux
\ifeof 1 \else \closein 1 \input \jobname.aux \global\havexrefstrue
@@ -3896,6 +4241,8 @@ Section\xreftie'char\the\appendixno.\the\secno.\the\subsecno.\the\subsubsecno %
\global\tolerance=700
\global\hfuzz=1pt
\global\contentsrightmargin=0pt
+\global\deftypemargin=0pt
+\global\defbodyindent=.5cm
\global\pagewidth=\hsize
\global\pageheight=\vsize
@@ -3925,6 +4272,32 @@ Section\xreftie'char\the\appendixno.\the\secno.\the\subsecno.\the\subsubsecno %
\global\pageheight=\vsize
}
+% Allow control of the text dimensions. Parameters in order: textheight;
+% textwidth; \voffset; \hoffset (!); binding offset. All require a dimension;
+% header is additional; added length extends the bottom of the page.
+
+\def\changepagesizes#1#2#3#4#5{
+ \global\vsize= #1
+ \advance\vsize by \topskip
+ \global\voffset= #3
+ \global\hsize= #2
+ \global\outerhsize=\hsize
+ \global\advance\outerhsize by 0.5in
+ \global\outervsize=\vsize
+ \global\advance\outervsize by 0.6in
+ \global\pagewidth=\hsize
+ \global\pageheight=\vsize
+ \global\normaloffset= #4
+ \global\bindingoffset= #5}
+
+% This layout is compatible with Latex on A4 paper.
+
+\def\afourlatex{\changepagesizes{22cm}{15cm}{7mm}{4.6mm}{5mm}}
+
+% Use @afourwide to print on European A4 paper in wide format.
+\def\afourwide{\afourpaper
+\changepagesizes{9.5in}{6.5in}{\hoffset}{\normaloffset}{\bindingoffset}}
+
% Define macros to output various characters with catcode for normal text.
\catcode`\"=\other
\catcode`\~=\other
@@ -3966,19 +4339,13 @@ Section\xreftie'char\the\appendixno.\the\secno.\the\subsecno.\the\subsubsecno %
\def~{{\tt \char '176}}
\chardef\hat=`\^
\catcode`\^=\active
+\def\auxhat{\def^{'hat}}
\def^{{\tt \hat}}
\catcode`\_=\active
\def_{\ifusingtt\normalunderscore\_}
% Subroutine for the previous macro.
-\def\_{\lvvmode \kern.06em \vbox{\hrule width.3em height.1ex}}
-
-% \lvvmode is equivalent in function to \leavevmode.
-% Using \leavevmode runs into trouble when written out to
-% an index file due to the expansion of \leavevmode into ``\unhbox
-% \voidb@x'' ---which looks to TeX like ``\unhbox \voidb\x'' due to our
-% magic tricks with @.
-\def\lvvmode{\vbox to 0pt{}}
+\def\_{\leavevmode \kern.06em \vbox{\hrule width.3em height.1ex}}
\catcode`\|=\active
\def|{{\tt \char '174}}
@@ -3993,21 +4360,19 @@ Section\xreftie'char\the\appendixno.\the\secno.\the\subsecno.\the\subsubsecno %
%\catcode 27=\active
%\def^^[{$\diamondsuit$}
-% Used sometimes to turn off (effectively) the active characters
-% even after parsing them.
-\def\turnoffactive{\let"=\normaldoublequote
-\let~=\normaltilde
-\let^=\normalcaret
-\let_=\normalunderscore
-\let|=\normalverticalbar
-\let<=\normalless
-\let>=\normalgreater
-\let+=\normalplus}
-
% Set up an active definition for =, but don't enable it most of the time.
{\catcode`\==\active
\global\def={{\tt \char 61}}}
+\catcode`+=\active
+\catcode`\_=\active
+
+% If a .fmt file is being used, characters that might appear in a file
+% name cannot be active until we have parsed the command line.
+% So turn them off again, and have \everyjob (or @setfilename) turn them on.
+% \otherifyactive is called near the end of this file.
+\def\otherifyactive{\catcode`+=\other \catcode`\_=\other}
+
\catcode`\@=0
% \rawbackslashxx output one backslash character in current font
@@ -4028,6 +4393,32 @@ Section\xreftie'char\the\appendixno.\the\secno.\the\subsecno.\the\subsubsecno %
% \catcode 17=0 % Define control-q
\catcode`\\=\active
+% Used sometimes to turn off (effectively) the active characters
+% even after parsing them.
+@def@turnoffactive{@let"=@normaldoublequote
+@let\=@realbackslash
+@let~=@normaltilde
+@let^=@normalcaret
+@let_=@normalunderscore
+@let|=@normalverticalbar
+@let<=@normalless
+@let>=@normalgreater
+@let+=@normalplus}
+
+@def@normalturnoffactive{@let"=@normaldoublequote
+@let\=@normalbackslash
+@let~=@normaltilde
+@let^=@normalcaret
+@let_=@normalunderscore
+@let|=@normalverticalbar
+@let<=@normalless
+@let>=@normalgreater
+@let+=@normalplus}
+
+% Make _ and + \other characters, temporarily.
+% This is canceled by @fixbackslash.
+@otherifyactive
+
% If a .fmt file is being used, we don't want the `\input texinfo' to show up.
% That is what \eatinput is for; after that, the `\' should revert to printing
% a backslash.
@@ -4038,8 +4429,11 @@ Section\xreftie'char\the\appendixno.\the\secno.\the\subsecno.\the\subsubsecno %
% On the other hand, perhaps the file did not have a `\input texinfo'. Then
% the first `\{ in the file would cause an error. This macro tries to fix
% that, assuming it is called before the first `\' could plausibly occur.
+% Also back turn on active characters that might appear in the input
+% file name, in case not using a pre-dumped format.
%
-@gdef@fixbackslash{@ifx\@eatinput @let\ = @normalbackslash @fi}
+@gdef@fixbackslash{@ifx\@eatinput @let\ = @normalbackslash @fi
+ @catcode`+=@active @catcode`@_=@active}
%% These look ok in all fonts, so just make them not special. The @rm below
%% makes sure that the current font starts out as the newly loaded cmr10
diff --git a/eval.c b/eval.c
index 5006ccf7..36e4d678 100644
--- a/eval.c
+++ b/eval.c
@@ -6,7 +6,7 @@
* Copyright (C) 1986, 1988, 1989, 1991-1995 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
- * AWK Progamming Language.
+ * AWK Programming Language.
*
* GAWK is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -19,8 +19,8 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with GAWK; see the file COPYING. If not, write to
- * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/
#include "awk.h"
@@ -34,7 +34,9 @@ static NODE *op_assign P((NODE *tree));
static NODE *func_call P((NODE *name, NODE *arg_list));
static NODE *match_op P((NODE *tree));
+#if __GNUC__ < 2
NODE *_t; /* used as a temporary in macros */
+#endif
#ifdef MSDOS
double _msc51bug; /* to get around a bug in MSC 5.1 */
#endif
@@ -49,12 +51,12 @@ int CONVFMTidx;
* the val variable allows return/continue/break-out-of-context to be
* caught and diagnosed
*/
-#define PUSH_BINDING(stack, x, val) (memcpy ((char *)(stack), (char *)(x), sizeof (jmp_buf)), val++)
-#define RESTORE_BINDING(stack, x, val) (memcpy ((char *)(x), (char *)(stack), sizeof (jmp_buf)), val--)
+#define PUSH_BINDING(stack, x, val) (memcpy((char *)(stack), (char *)(x), sizeof(jmp_buf)), val++)
+#define RESTORE_BINDING(stack, x, val) (memcpy((char *)(x), (char *)(stack), sizeof(jmp_buf)), val--)
-static jmp_buf loop_tag; /* always the current binding */
-static int loop_tag_valid = 0; /* nonzero when loop_tag valid */
-static int func_tag_valid = 0;
+static jmp_buf loop_tag; /* always the current binding */
+static int loop_tag_valid = FALSE; /* nonzero when loop_tag valid */
+static int func_tag_valid = FALSE;
static jmp_buf func_tag;
extern int exiting, exit_val;
@@ -62,7 +64,7 @@ extern int exiting, exit_val;
* This table is used by the regexp routines to do case independant
* matching. Basically, every ascii character maps to itself, except
* uppercase letters map to lower case ones. This table has 256
- * entries, which may be overkill. Note also that if the system this
+ * entries, for ISO 8859-1. Note also that if the system this
* is compiled on doesn't use 7-bit ascii, casetable[] should not be
* defined to the linker, so gawk should not load.
*
@@ -99,6 +101,24 @@ char casetable[] = {
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
/* 'x' 'y' 'z' '{' '|' '}' '~' */
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+#ifndef USE_PURE_ASCII
+ '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+ '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+ '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+ '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+ '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
+ '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
+ '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+ '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
+ '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+ '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+ '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
+#else
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
@@ -115,12 +135,14 @@ char casetable[] = {
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
+#endif
};
#else
#include "You lose. You will need a translation table for your character set."
#endif
/*
+ * interpret:
* Tree is a bunch of rules to run. Returns zero if it hit an exit()
* statement
*/
@@ -135,7 +157,7 @@ register NODE *volatile tree;
register NODE *volatile t = NULL; /* temporary */
NODE **volatile lhs; /* lhs == Left Hand Side for assigns, etc */
NODE *volatile stable_tree;
- int volatile traverse = 1; /* True => loop thru tree (Node_rule_list) */
+ int volatile traverse = TRUE; /* True => loop thru tree (Node_rule_list) */
/* avoid false source indications */
source = NULL;
@@ -147,7 +169,7 @@ register NODE *volatile tree;
source = tree->source_file;
switch (tree->type) {
case Node_rule_node:
- traverse = 0; /* False => one for-loop iteration only */
+ traverse = FALSE; /* False => one for-loop iteration only */
/* FALL THROUGH */
case Node_rule_list:
for (t = tree; t != NULL; t = t->rnode) {
@@ -169,8 +191,8 @@ register NODE *volatile tree;
default:
cant_happen();
}
- if (!traverse) /* case Node_rule_node */
- break; /* don't loop */
+ if (! traverse) /* case Node_rule_node */
+ break; /* don't loop */
}
break;
@@ -180,11 +202,10 @@ register NODE *volatile tree;
break;
case Node_K_if:
- if (eval_condition(tree->lnode)) {
+ if (eval_condition(tree->lnode))
(void) interpret(tree->rnode->lnode);
- } else {
+ else
(void) interpret(tree->rnode->rnode);
- }
break;
case Node_K_while:
@@ -263,6 +284,8 @@ register NODE *volatile tree;
if (t->type == Node_param_list)
t = stack_ptr[t->param_cnt];
stable_tree = tree;
+ if ((t->flags & SCALAR) != 0)
+ fatal("attempt to use scalar as array");
for (assoc_scan(t, (struct search *)&l);
l.retval;
assoc_next((struct search *)&l)) {
@@ -288,40 +311,40 @@ register NODE *volatile tree;
}
case Node_K_break:
- if (loop_tag_valid == 0) {
+ if (! loop_tag_valid) {
/*
* Old AT&T nawk treats break outside of loops like
* next. New ones catch it at parse time. Allow it if
- * do_unix is on, and complain if lint.
+ * do_traditional is on, and complain if lint.
*/
- static int warned = 0;
+ static int warned = FALSE;
if (do_lint && ! warned) {
- warning("use of `break' outside of loop is not portable");
- warned = 1;
+ warning("use of `break' outside a loop is not portable");
+ warned = TRUE;
}
- if (! do_unix)
- fatal("use of `break' outside of loop is not allowed");
+ if (! do_traditional || do_posix)
+ fatal("use of `break' outside a loop is not allowed");
longjmp(rule_tag, TAG_CONTINUE);
} else
longjmp(loop_tag, TAG_BREAK);
break;
case Node_K_continue:
- if (loop_tag_valid == 0) {
+ if (! loop_tag_valid) {
/*
* Old AT&T nawk treats continue outside of loops like
* next. New ones catch it at parse time. Allow it if
- * do_unix is on, and complain if lint.
+ * do_traditional is on, and complain if lint.
*/
- static int warned = 0;
+ static int warned = FALSE;
if (do_lint && ! warned) {
- warning("use of `continue' outside of loop is not portable");
- warned = 1;
+ warning("use of `continue' outside a loop is not portable");
+ warned = TRUE;
}
- if (! do_unix)
- fatal("use of `continue' outside of loop is not allowed");
+ if (! do_traditional || do_posix)
+ fatal("use of `continue' outside a loop is not allowed");
longjmp(rule_tag, TAG_CONTINUE);
} else
longjmp(loop_tag, TAG_CONTINUE);
@@ -336,7 +359,7 @@ register NODE *volatile tree;
break;
case Node_K_delete:
- if (tree->rnode != NULL)
+ if (tree->rnode != NULL) /* delete array */
do_delete(tree->lnode, tree->rnode);
else
assoc_clear(tree->lnode);
@@ -358,8 +381,8 @@ register NODE *volatile tree;
* any are executed." This implies that the rest of the rules
* are not done. So we immediately break out of the main loop.
*/
- exiting = 1;
- if (tree->lnode) {
+ exiting = TRUE;
+ if (tree->lnode != NULL) {
t = tree_eval(tree->lnode);
exit_val = (int) force_number(t);
free_temp(t);
@@ -388,11 +411,12 @@ register NODE *volatile tree;
return 1;
}
-/* evaluate a subtree */
+/* r_tree_eval --- evaluate a subtree */
NODE *
-r_tree_eval(tree)
+r_tree_eval(tree, iscond)
register NODE *tree;
+int iscond;
{
register NODE *r, *t1, *t2; /* return value & temporary subtrees */
register NODE **lhs;
@@ -402,24 +426,30 @@ register NODE *tree;
#ifdef _CRAY
long lx2;
#endif
+ char namebuf[100];
#ifdef DEBUG
if (tree == NULL)
return Nnull_string;
- if (tree->type == Node_val) {
- if ((char)tree->stref <= 0) cant_happen();
+ else if (tree->type == Node_val) {
+ if (tree->stref <= 0)
+ cant_happen();
return tree;
- }
- if (tree->type == Node_var) {
- if ((char)tree->var_value->stref <= 0) cant_happen();
+ } else if (tree->type == Node_var) {
+ if (tree->var_value->stref <= 0)
+ cant_happen();
return tree->var_value;
}
#endif
if (tree->type == Node_param_list) {
+ int paramnum = tree->param_cnt + 1;
+
tree = stack_ptr[tree->param_cnt];
if (tree == NULL)
return Nnull_string;
+ sprintf(namebuf, "parameter #%d", paramnum);
+ tree->vname = namebuf;
}
switch (tree->type) {
@@ -439,7 +469,7 @@ register NODE *tree;
/* Builtins */
case Node_builtin:
- return ((*tree->proc) (tree->subnode));
+ return (*tree->proc)(tree->subnode);
case Node_K_getline:
return (do_getline(tree));
@@ -464,11 +494,12 @@ register NODE *tree;
case Node_ORS:
case Node_OFMT:
case Node_CONVFMT:
- lhs = get_lhs(tree, (Func_ptr *)0);
+ lhs = get_lhs(tree, (Func_ptr *) NULL);
return *lhs;
case Node_var_array:
- fatal("attempt to use array `%s' in a scalar context", tree->vname);
+ fatal("attempt to use array `%s' in a scalar context",
+ tree->vname);
case Node_unary_minus:
t1 = tree_eval(tree->subnode);
@@ -496,6 +527,8 @@ register NODE *tree;
{
Func_ptr after_assign = NULL;
+ if (iscond && do_lint)
+ warning("assignment used in conditional context");
r = tree_eval(tree->rnode);
lhs = get_lhs(tree->lnode, &after_assign);
if (r != *lhs) {
@@ -506,6 +539,7 @@ register NODE *tree;
unref(save);
}
free_temp(r);
+ tree->lnode->flags |= SCALAR;
if (after_assign)
(*after_assign)();
return *lhs;
@@ -662,9 +696,7 @@ register NODE *tree;
if (x2 == 0)
fatal("division by zero attempted");
#ifdef _CRAY
- /*
- * special case for integer division, put in for Cray
- */
+ /* special case for integer division, put in for Cray */
lx2 = x2;
if (lx2 == 0)
return tmp_number(x1 / x2);
@@ -678,12 +710,12 @@ register NODE *tree;
case Node_mod:
if (x2 == 0)
fatal("division by zero attempted in mod");
-#ifndef FMOD_MISSING
- return tmp_number(fmod (x1, x2));
-#else
+#ifdef HAVE_FMOD
+ return tmp_number(fmod(x1, x2));
+#else /* ! HAVE_FMOD */
(void) modf(x1 / x2, &x);
return tmp_number(x1 - x * x2);
-#endif
+#endif /* ! HAVE_FMOD */
case Node_plus:
return tmp_number(x1 + x2);
@@ -692,7 +724,8 @@ register NODE *tree;
return tmp_number(x1 - x2);
case Node_var_array:
- fatal("attempt to use array `%s' in a scalar context", tree->vname);
+ fatal("attempt to use array `%s' in a scalar context",
+ tree->vname);
default:
fatal("illegal type (%d) in tree_eval", tree->type);
@@ -700,7 +733,8 @@ register NODE *tree;
return 0;
}
-/* Is TREE true or false? Returns 0==false, non-zero==true */
+/* eval_condition --- is TREE true or false? Returns 0==false, non-zero==true */
+
static int
eval_condition(tree)
register NODE *tree;
@@ -709,7 +743,7 @@ register NODE *tree;
register int ret;
if (tree == NULL) /* Null trees are the easiest kinds */
- return 1;
+ return TRUE;
if (tree->type == Node_line_range) {
/*
* Node_line_range is kind of like Node_match, EXCEPT: the
@@ -724,15 +758,15 @@ register NODE *tree;
* able to begin and end on a single input record, so this
* isn't an ELSE IF, as noted above.
*/
- if (!tree->triggered)
- if (!eval_condition(tree->condpair->lnode))
- return 0;
+ if (! tree->triggered)
+ if (! eval_condition(tree->condpair->lnode))
+ return FALSE;
else
- tree->triggered = 1;
+ tree->triggered = TRUE;
/* Else we are triggered */
if (eval_condition(tree->condpair->rnode))
- tree->triggered = 0;
- return 1;
+ tree->triggered = FALSE;
+ return TRUE;
}
/*
@@ -740,26 +774,27 @@ register NODE *tree;
* false, anything else is true
*/
- t1 = tree_eval(tree);
+ t1 = m_tree_eval(tree, TRUE);
if (t1->flags & MAYBE_NUM)
(void) force_number(t1);
if (t1->flags & NUMBER)
- ret = t1->numbr != 0.0;
+ ret = (t1->numbr != 0.0);
else
- ret = t1->stlen != 0;
+ ret = (t1->stlen != 0);
free_temp(t1);
return ret;
}
-/*
- * compare two nodes, returning negative, 0, positive
- */
+/* cmp_nodes --- compare two nodes, returning negative, 0, positive */
+
int
cmp_nodes(t1, t2)
register NODE *t1, *t2;
{
register int ret;
register size_t len1, len2;
+ register int l;
+ int ldiff;
if (t1 == t2)
return 0;
@@ -768,20 +803,35 @@ register NODE *t1, *t2;
if (t2->flags & MAYBE_NUM)
(void) force_number(t2);
if ((t1->flags & NUMBER) && (t2->flags & NUMBER)) {
- if (t1->numbr == t2->numbr) return 0;
- else if (t1->numbr - t2->numbr < 0) return -1;
- else return 1;
+ if (t1->numbr == t2->numbr)
+ return 0;
+ /* don't subtract, in case one or both are infinite */
+ else if (t1->numbr < t2->numbr)
+ return -1;
+ else
+ return 1;
}
(void) force_string(t1);
(void) force_string(t2);
len1 = t1->stlen;
len2 = t2->stlen;
+ ldiff = len1 - len2;
if (len1 == 0 || len2 == 0)
- return len1 - len2;
- ret = memcmp(t1->stptr, t2->stptr, len1 <= len2 ? len1 : len2);
- return ret == 0 ? len1-len2 : ret;
+ return ldiff;
+ l = (ldiff <= 0 ? len1 : len2);
+ if (IGNORECASE) {
+ register unsigned char *cp1 = (unsigned char *) t1->stptr;
+ register unsigned char *cp2 = (unsigned char *) t2->stptr;
+
+ for (ret = 0; l-- > 0 && ret == 0; cp1++, cp2++)
+ ret = casetable[*cp1] - casetable[*cp2];
+ } else
+ ret = memcmp(t1->stptr, t2->stptr, l);
+ return (ret == 0 ? ldiff : ret);
}
+/* op_assign --- do +=, -=, etc. */
+
static NODE *
op_assign(tree)
register NODE *tree;
@@ -806,6 +856,7 @@ register NODE *tree;
unref(*lhs);
*lhs = make_number(lval +
(tree->type == Node_preincrement ? 1.0 : -1.0));
+ tree->lnode->flags |= SCALAR;
if (after_assign)
(*after_assign)();
return *lhs;
@@ -815,6 +866,7 @@ register NODE *tree;
unref(*lhs);
*lhs = make_number(lval +
(tree->type == Node_postincrement ? 1.0 : -1.0));
+ tree->lnode->flags |= SCALAR;
if (after_assign)
(*after_assign)();
return tmp_number(lval);
@@ -859,9 +911,7 @@ register NODE *tree;
if (rval == (AWKNUM) 0)
fatal("division by zero attempted in /=");
#ifdef _CRAY
- /*
- * special case for integer division, put in for Cray
- */
+ /* special case for integer division, put in for Cray */
ltemp = rval;
if (ltemp == 0) {
*lhs = make_number(lval / rval);
@@ -871,20 +921,20 @@ register NODE *tree;
if (ltemp * lval == rval)
*lhs = make_number((AWKNUM) ltemp);
else
-#endif
+#endif /* _CRAY */
*lhs = make_number(lval / rval);
break;
case Node_assign_mod:
if (rval == (AWKNUM) 0)
fatal("division by zero attempted in %=");
-#ifndef FMOD_MISSING
+#ifdef HAVE_FMOD
*lhs = make_number(fmod(lval, rval));
-#else
+#else /* ! HAVE_FMOD */
(void) modf(lval / rval, &t1);
t2 = lval - rval * t1;
*lhs = make_number(t2);
-#endif
+#endif /* ! HAVE_FMOD */
break;
case Node_assign_plus:
@@ -897,11 +947,14 @@ register NODE *tree;
default:
cant_happen();
}
+ tree->lnode->flags |= SCALAR;
if (after_assign)
(*after_assign)();
return *lhs;
}
+/* func_call --- call a function, call by reference for arrays */
+
NODE **stack_ptr;
static NODE *
@@ -913,36 +966,31 @@ NODE *arg_list; /* Node_expression_list of calling args. */
NODE *n, *f;
jmp_buf volatile func_tag_stack;
jmp_buf volatile loop_tag_stack;
- int volatile save_loop_tag_valid = 0;
+ int volatile save_loop_tag_valid = FALSE;
NODE **volatile save_stack, *save_ret_node;
NODE **volatile local_stack = NULL, **sp;
int count;
extern NODE *ret_node;
- /*
- * retrieve function definition node
- */
+ /* retrieve function definition node */
f = lookup(name->stptr);
- if (!f || f->type != Node_func)
+ if (f == NULL || f->type != Node_func)
fatal("function `%s' not defined", name->stptr);
#ifdef FUNC_TRACE
fprintf(stderr, "function %s called\n", name->stptr);
#endif
count = f->lnode->param_cnt;
- if (count)
+ if (count > 0)
emalloc(local_stack, NODE **, count*sizeof(NODE *), "func_call");
sp = local_stack;
- /*
- * for each calling arg. add NODE * on stack
- */
- for (argp = arg_list; count && argp != NULL; argp = argp->rnode) {
+ /* for each calling arg. add NODE * on stack */
+ for (argp = arg_list; count > 0 && argp != NULL; argp = argp->rnode) {
arg = argp->lnode;
getnode(r);
r->type = Node_var;
- /*
- * call by reference for arrays; see below also
- */
+
+ /* call by reference for arrays; see below also */
if (arg->type == Node_param_list)
arg = stack_ptr[arg->param_cnt];
if (arg->type == Node_var_array)
@@ -951,6 +999,8 @@ NODE *arg_list; /* Node_expression_list of calling args. */
n = tree_eval(arg);
r->lnode = dupnode(n);
r->rnode = (NODE *) NULL;
+ if ((n->flags & SCALAR) != 0)
+ r->flags |= SCALAR;
free_temp(n);
}
*sp++ = r;
@@ -960,13 +1010,13 @@ NODE *arg_list; /* Node_expression_list of calling args. */
warning(
"function `%s' called with more arguments than declared",
name->stptr);
- /*
- * add remaining params. on stack with null value
- */
+
+ /* add remaining params. on stack with null value */
while (count-- > 0) {
getnode(r);
r->type = Node_var;
r->lnode = Nnull_string;
+ r->flags &= ~SCALAR;
r->rnode = (NODE *) NULL;
*sp++ = r;
}
@@ -987,7 +1037,7 @@ NODE *arg_list; /* Node_expression_list of calling args. */
save_loop_tag_valid = (volatile int) loop_tag_valid;
PUSH_BINDING(loop_tag_stack, loop_tag, junk);
- loop_tag_valid = 0;
+ loop_tag_valid = FALSE;
}
save_stack = stack_ptr;
stack_ptr = local_stack;
@@ -1032,7 +1082,7 @@ NODE *arg_list; /* Node_expression_list of calling args. */
}
while (count-- > 0) {
n = *sp++;
- /* if n is an (local) array, all the elements should be freed */
+ /* if n is a local array, all the elements should be freed */
if (n->type == Node_var_array)
assoc_clear(n);
unref(n->lnode);
@@ -1049,12 +1099,13 @@ NODE *arg_list; /* Node_expression_list of calling args. */
RESTORE_BINDING(loop_tag_stack, loop_tag, junk);
}
- if (!(r->flags & PERM))
+ if ((r->flags & PERM) == 0)
r->flags |= TEMP;
return r;
}
/*
+ * r_get_lhs:
* This returns a POINTER to a node pointer. get_lhs(ptr) is the current
* value of the var, or where to store the var's new value
*/
@@ -1072,30 +1123,32 @@ Func_ptr *assign;
switch (ptr->type) {
case Node_var_array:
- fatal("attempt to use array `%s' in a scalar context", ptr->vname);
+ fatal("attempt to use array `%s' in a scalar context",
+ ptr->vname);
+
case Node_var:
aptr = &(ptr->var_value);
#ifdef DEBUG
- if ((char)ptr->var_value->stref <= 0)
+ if (ptr->var_value->stref <= 0)
cant_happen();
#endif
break;
case Node_FIELDWIDTHS:
aptr = &(FIELDWIDTHS_node->var_value);
- if (assign)
+ if (assign != NULL)
*assign = set_FIELDWIDTHS;
break;
case Node_RS:
aptr = &(RS_node->var_value);
- if (assign)
+ if (assign != NULL)
*assign = set_RS;
break;
case Node_FS:
aptr = &(FS_node->var_value);
- if (assign)
+ if (assign != NULL)
*assign = set_FS;
break;
@@ -1103,7 +1156,7 @@ Func_ptr *assign;
unref(FNR_node->var_value);
FNR_node->var_value = make_number((AWKNUM) FNR);
aptr = &(FNR_node->var_value);
- if (assign)
+ if (assign != NULL)
*assign = set_FNR;
break;
@@ -1111,7 +1164,7 @@ Func_ptr *assign;
unref(NR_node->var_value);
NR_node->var_value = make_number((AWKNUM) NR);
aptr = &(NR_node->var_value);
- if (assign)
+ if (assign != NULL)
*assign = set_NR;
break;
@@ -1121,39 +1174,37 @@ Func_ptr *assign;
unref(NF_node->var_value);
NF_node->var_value = make_number((AWKNUM) NF);
aptr = &(NF_node->var_value);
- if (assign)
+ if (assign != NULL)
*assign = set_NF;
break;
case Node_IGNORECASE:
- unref(IGNORECASE_node->var_value);
- IGNORECASE_node->var_value = make_number((AWKNUM) IGNORECASE);
aptr = &(IGNORECASE_node->var_value);
- if (assign)
+ if (assign != NULL)
*assign = set_IGNORECASE;
break;
case Node_OFMT:
aptr = &(OFMT_node->var_value);
- if (assign)
+ if (assign != NULL)
*assign = set_OFMT;
break;
case Node_CONVFMT:
aptr = &(CONVFMT_node->var_value);
- if (assign)
+ if (assign != NULL)
*assign = set_CONVFMT;
break;
case Node_ORS:
aptr = &(ORS_node->var_value);
- if (assign)
+ if (assign != NULL)
*assign = set_ORS;
break;
case Node_OFS:
aptr = &(OFS_node->var_value);
- if (assign)
+ if (assign != NULL)
*assign = set_OFS;
break;
@@ -1172,7 +1223,7 @@ Func_ptr *assign;
fatal("attempt to access field %d", field_num);
if (field_num == 0 && field0_valid) { /* short circuit */
aptr = &fields_arr[0];
- if (assign)
+ if (assign != NULL)
*assign = reset_record;
break;
}
@@ -1181,13 +1232,18 @@ Func_ptr *assign;
}
case Node_subscript:
n = ptr->lnode;
- if (n->type == Node_param_list)
+ if (n->type == Node_param_list) {
+ int i = n->param_cnt + 1;
+
n = stack_ptr[n->param_cnt];
+ if ((n->flags & SCALAR) != 0)
+ fatal("attempt to use scalar parameter %d as an array", i);
+ }
aptr = assoc_lookup(n, concat_exp(ptr->rnode));
break;
case Node_func:
- fatal ("`%s' is a function, assignment is not allowed",
+ fatal("`%s' is a function, assignment is not allowed",
ptr->lnode->param);
default:
cant_happen();
@@ -1195,6 +1251,8 @@ Func_ptr *assign;
return aptr;
}
+/* match_op --- do ~ and !~ */
+
static NODE *
match_op(tree)
register NODE *tree;
@@ -1202,10 +1260,11 @@ register NODE *tree;
register NODE *t1;
register Regexp *rp;
int i;
- int match = 1;
+ int match = TRUE;
+ int kludge_need_start = FALSE; /* XXX --- see below */
if (tree->type == Node_nomatch)
- match = 0;
+ match = FALSE;
if (tree->type == Node_regex)
t1 = *get_field(0, (Func_ptr *) 0);
else {
@@ -1213,25 +1272,51 @@ register NODE *tree;
tree = tree->rnode;
}
rp = re_update(tree);
- i = research(rp, t1->stptr, 0, t1->stlen, 0);
- i = (i == -1) ^ (match == 1);
+ /*
+ * XXX
+ *
+ * Any place where research() is called with a last parameter of
+ * FALSE, we need to use the avoid_dfa test. This is the only place
+ * at the moment.
+ *
+ * A new or improved dfa that distinguishes beginning/end of
+ * string from beginning/end of line will allow us to get rid of
+ * this temporary hack.
+ *
+ * The avoid_dfa() function is in re.c; it is not very smart.
+ */
+ if (avoid_dfa(tree, t1->stptr, t1->stlen))
+ kludge_need_start = TRUE;
+ i = research(rp, t1->stptr, 0, t1->stlen, kludge_need_start);
+ i = (i == -1) ^ (match == TRUE);
free_temp(t1);
return tmp_number((AWKNUM) i);
}
+/* set_IGNORECASE --- update IGNORECASE as appropriate */
+
void
set_IGNORECASE()
{
- static int warned = 0;
+ static int warned = FALSE;
- if ((do_lint || do_unix) && ! warned) {
- warned = 1;
+ if ((do_lint || do_traditional) && ! warned) {
+ warned = TRUE;
warning("IGNORECASE not supported in compatibility mode");
}
- IGNORECASE = (force_number(IGNORECASE_node->var_value) != 0.0);
+ if (do_traditional)
+ IGNORECASE = FALSE;
+ else if (IGNORECASE_node->var_value->flags & STRING)
+ IGNORECASE = (force_string(IGNORECASE_node->var_value)->stlen > 0);
+ else if (IGNORECASE_node->var_value->flags & NUMBER)
+ IGNORECASE = (force_number(IGNORECASE_node->var_value) != 0.0);
+ else
+ IGNORECASE = FALSE; /* shouldn't happen */
set_FS_if_not_FIELDWIDTHS();
}
+/* set_OFS --- update OFS related variables when OFS assigned to */
+
void
set_OFS()
{
@@ -1240,6 +1325,8 @@ set_OFS()
OFS[OFSlen] = '\0';
}
+/* set_ORS --- update ORS related variables when ORS assigned to */
+
void
set_ORS()
{
@@ -1248,6 +1335,8 @@ set_ORS()
ORS[ORSlen] = '\0';
}
+/* fmt_ok --- is the conversion format a valid one? */
+
NODE **fmt_list = NULL;
static int fmt_ok P((NODE *n));
static int fmt_index P((NODE *n));
@@ -1256,10 +1345,30 @@ static int
fmt_ok(n)
NODE *n;
{
- /* to be done later */
+ NODE *tmp = force_string(n);
+ char *p = tmp->stptr;
+
+ if (*p++ != '%')
+ return 0;
+ while (*p && strchr(" +-#", *p) != NULL) /* flags */
+ p++;
+ while (*p && isdigit(*p)) /* width - %*.*g is NOT allowed */
+ p++;
+ if (*p == '\0' || (*p != '.' && ! isdigit(*p)))
+ return 0;
+ if (*p == '.')
+ p++;
+ while (*p && isdigit(*p)) /* precision */
+ p++;
+ if (*p == '\0' || strchr("efgEG", *p) == NULL)
+ return 0;
+ if (*++p != '\0')
+ return 0;
return 1;
}
+/* fmt_index --- track values of OFMT and CONVFMT to keep semantics correct */
+
static int
fmt_index(n)
NODE *n;
@@ -1278,8 +1387,12 @@ NODE *n;
}
/* not found */
n->stptr[n->stlen] = '\0';
- if (!fmt_ok(n))
- warning("bad FMT specification");
+ if (do_lint && ! fmt_ok(n))
+ warning("bad %sFMT specification",
+ n == CONVFMT_node->var_value ? "CONV"
+ : n == OFMT_node->var_value ? "O"
+ : "");
+
if (fmt_hiwater >= fmt_num) {
fmt_num *= 2;
emalloc(fmt_list, NODE **, fmt_num, "fmt_index");
@@ -1288,6 +1401,8 @@ NODE *n;
return fmt_hiwater++;
}
+/* set_OFMT --- track OFMT correctly */
+
void
set_OFMT()
{
@@ -1295,6 +1410,8 @@ set_OFMT()
OFMT = fmt_list[OFMTidx]->stptr;
}
+/* set_CONVFMT --- track CONVFMT correctly */
+
void
set_CONVFMT()
{
diff --git a/field.c b/field.c
index fa7dc3e9..4b638b2c 100644
--- a/field.c
+++ b/field.c
@@ -6,7 +6,7 @@
* Copyright (C) 1986, 1988, 1989, 1991-1995 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
- * AWK Progamming Language.
+ * AWK Programming Language.
*
* GAWK is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -19,44 +19,48 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with GAWK; see the file COPYING. If not, write to
- * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/
#include "awk.h"
-typedef void (* Setfunc) P((int, char*, int, NODE *));
+typedef void (* Setfunc) P((long, char *, long, NODE *));
-static long (*parse_field) P((int, char **, int, NODE *,
+static long (*parse_field) P((long, char **, int, NODE *,
Regexp *, Setfunc, NODE *));
static void rebuild_record P((void));
-static long re_parse_field P((int, char **, int, NODE *,
+static long re_parse_field P((long, char **, int, NODE *,
Regexp *, Setfunc, NODE *));
-static long def_parse_field P((int, char **, int, NODE *,
+static long def_parse_field P((long, char **, int, NODE *,
Regexp *, Setfunc, NODE *));
-static long sc_parse_field P((int, char **, int, NODE *,
+static long null_parse_field P((long, char **, int, NODE *,
Regexp *, Setfunc, NODE *));
-static long fw_parse_field P((int, char **, int, NODE *,
+static long sc_parse_field P((long, char **, int, NODE *,
Regexp *, Setfunc, NODE *));
-static void set_element P((int, char *, int, NODE *));
+static long fw_parse_field P((long, char **, int, NODE *,
+ Regexp *, Setfunc, NODE *));
+static void set_element P((long num, char * str, long len, NODE *arr));
static void grow_fields_arr P((long num));
-static void set_field P((int num, char *str, int len, NODE *dummy));
+static void set_field P((long num, char *str, long len, NODE *dummy));
-static Regexp *FS_regexp = NULL;
static char *parse_extent; /* marks where to restart parse of record */
-static long parse_high_water=0; /* field number that we have parsed so far */
+static long parse_high_water = 0; /* field number that we have parsed so far */
static long nf_high_water = 0; /* size of fields_arr */
static int resave_fs;
static NODE *save_FS; /* save current value of FS when line is read,
* to be used in deferred parsing
*/
+static NODE **nodes; /* permanent repository of field nodes */
+static int *FIELDWIDTHS = NULL;
NODE **fields_arr; /* array of pointers to the field nodes */
int field0_valid; /* $(>0) has not been changed yet */
-int default_FS;
-static NODE **nodes; /* permanent repository of field nodes */
-static int *FIELDWIDTHS = NULL;
+int default_FS; /* 1 when FS == " " */
+Regexp *FS_regexp = NULL;
+
+/* init_fields --- set up the fields array to start with */
void
init_fields()
@@ -67,12 +71,14 @@ init_fields()
emalloc(nodes, NODE **, sizeof(NODE *), "init_fields");
getnode(n);
*n = *Nnull_string;
+ n->flags |= SCALAR;
fields_arr[0] = nodes[0] = n;
parse_extent = fields_arr[0]->stptr;
save_FS = dupnode(FS_node->var_value);
- field0_valid = 1;
+ field0_valid = TRUE;
}
+/* grow_fields --- acquire new fields as needed */
static void
grow_fields_arr(num)
@@ -81,22 +87,25 @@ long num;
register int t;
register NODE *n;
- erealloc(fields_arr, NODE **, (num + 1) * sizeof(NODE *), "set_field");
- erealloc(nodes, NODE **, (num+1) * sizeof(NODE *), "set_field");
- for (t = nf_high_water+1; t <= num; t++) {
+ erealloc(fields_arr, NODE **, (num + 1) * sizeof(NODE *), "grow_fields_arr");
+ erealloc(nodes, NODE **, (num+1) * sizeof(NODE *), "grow_fields_arr");
+ for (t = nf_high_water + 1; t <= num; t++) {
getnode(n);
*n = *Nnull_string;
+ n->flags |= SCALAR;
fields_arr[t] = nodes[t] = n;
}
nf_high_water = num;
}
+/* set_field --- set the value of a particular field */
+
/*ARGSUSED*/
static void
set_field(num, str, len, dummy)
-int num;
+long num;
char *str;
-int len;
+long len;
NODE *dummy; /* not used -- just to make interface same as set_element */
{
register NODE *n;
@@ -110,30 +119,34 @@ NODE *dummy; /* not used -- just to make interface same as set_element */
fields_arr[num] = n;
}
-/* Someone assigned a value to $(something). Fix up $0 to be right */
+/* rebuild_record --- Someone assigned a value to $(something).
+ Fix up $0 to be right */
+
static void
rebuild_record()
{
- register size_t tlen;
+ /*
+ * use explicit unsigned longs for lengths, in case
+ * a size_t isn't big enough.
+ */
+ register unsigned long tlen;
+ register unsigned long ofslen;
register NODE *tmp;
NODE *ofs;
char *ops;
register char *cops;
register NODE **ptr;
- register size_t ofslen;
tlen = 0;
ofs = force_string(OFS_node->var_value);
ofslen = ofs->stlen;
- ptr = &fields_arr[NF];
- while (ptr > &fields_arr[0]) {
+ for (ptr = &fields_arr[NF]; ptr > &fields_arr[0]; ptr--) {
tmp = force_string(*ptr);
tlen += tmp->stlen;
- ptr--;
}
tlen += (NF - 1) * ofslen;
- if ((long)tlen < 0)
- tlen = 0;
+ if ((long) tlen < 0)
+ tlen = 0;
emalloc(ops, char *, tlen + 2, "rebuild_record");
cops = ops;
ops[0] = '\0';
@@ -157,10 +170,11 @@ rebuild_record()
tmp = make_str_node(ops, tlen, ALREADY_MALLOCED);
unref(fields_arr[0]);
fields_arr[0] = tmp;
- field0_valid = 1;
+ field0_valid = TRUE;
}
/*
+ * set_record:
* setup $0, but defer parsing rest of line until reference is made to $(>0)
* or to NF. At that point, parse only as much as necessary.
*/
@@ -173,17 +187,21 @@ int freeold;
register int i;
NF = -1;
- for (i = 1; i <= parse_high_water; i++) {
+ for (i = 1; i <= parse_high_water; i++)
unref(fields_arr[i]);
- }
+
parse_high_water = 0;
+ /*
+ * $0 = $0 should resplit using the current value of FS, thus,
+ * this if is executed orthogonally to the value of freeold.
+ */
+ if (resave_fs) {
+ resave_fs = FALSE;
+ unref(save_FS);
+ save_FS = dupnode(FS_node->var_value);
+ }
if (freeold) {
unref(fields_arr[0]);
- if (resave_fs) {
- resave_fs = 0;
- unref(save_FS);
- save_FS = dupnode(FS_node->var_value);
- }
nodes[0]->stptr = buf;
nodes[0]->stlen = cnt;
nodes[0]->stref = 1;
@@ -191,16 +209,20 @@ int freeold;
fields_arr[0] = nodes[0];
}
fields_arr[0]->flags |= MAYBE_NUM;
- field0_valid = 1;
+ field0_valid = TRUE;
}
+/* reset_record --- start over again with current $0 */
+
void
reset_record()
{
(void) force_string(fields_arr[0]);
- set_record(fields_arr[0]->stptr, fields_arr[0]->stlen, 0);
+ set_record(fields_arr[0]->stptr, fields_arr[0]->stlen, FALSE);
}
+/* set_NF --- handle what happens to $0 and fields when NF is changed */
+
void
set_NF()
{
@@ -213,17 +235,19 @@ set_NF()
unref(fields_arr[i]);
fields_arr[i] = Nnull_string;
}
- field0_valid = 0;
+ field0_valid = FALSE;
}
/*
- * this is called both from get_field() and from do_split()
+ * re_parse_field --- parse fields using a regexp.
+ *
+ * This is called both from get_field() and from do_split()
* via (*parse_field)(). This variation is for when FS is a regular
* expression -- either user-defined or because RS=="" and FS==" "
*/
static long
re_parse_field(up_to, buf, len, fs, rp, set, n)
-int up_to; /* parse only up to this field number */
+long up_to; /* parse only up to this field number */
char **buf; /* on input: string to parse; on output: point to start next */
int len;
NODE *fs;
@@ -232,7 +256,7 @@ Setfunc set; /* routine to set the value of the parsed field */
NODE *n;
{
register char *scan = *buf;
- register int nf = parse_high_water;
+ register long nf = parse_high_water;
register char *field;
register char *end = scan + len;
@@ -241,31 +265,31 @@ NODE *n;
if (len == 0)
return nf;
- if (*RS == 0 && default_FS)
+ if (RS_is_null && default_FS)
while (scan < end && (*scan == ' ' || *scan == '\t' || *scan == '\n'))
scan++;
field = scan;
while (scan < end
- && research(rp, scan, 0, (end - scan), 1) != -1
+ && research(rp, scan, 0, (end - scan), TRUE) != -1
&& nf < up_to) {
if (REEND(rp, scan) == RESTART(rp, scan)) { /* null match */
scan++;
if (scan == end) {
- (*set)(++nf, field, (int)(scan - field), n);
+ (*set)(++nf, field, (long)(scan - field), n);
up_to = nf;
break;
}
continue;
}
(*set)(++nf, field,
- (int)(scan + RESTART(rp, scan) - field), n);
+ (long)(scan + RESTART(rp, scan) - field), n);
scan += REEND(rp, scan);
field = scan;
if (scan == end) /* FS at end of record */
- (*set)(++nf, field, 0, n);
+ (*set)(++nf, field, 0L, n);
}
if (nf != up_to && scan < end) {
- (*set)(++nf, scan, (int)(end - scan), n);
+ (*set)(++nf, scan, (long)(end - scan), n);
scan = end;
}
*buf = scan;
@@ -273,13 +297,15 @@ NODE *n;
}
/*
- * this is called both from get_field() and from do_split()
+ * def_parse_field --- default field parsing.
+ *
+ * This is called both from get_field() and from do_split()
* via (*parse_field)(). This variation is for when FS is a single space
* character.
*/
static long
def_parse_field(up_to, buf, len, fs, rp, set, n)
-int up_to; /* parse only up to this field number */
+long up_to; /* parse only up to this field number */
char **buf; /* on input: string to parse; on output: point to start next */
int len;
NODE *fs;
@@ -288,7 +314,7 @@ Setfunc set; /* routine to set the value of the parsed field */
NODE *n;
{
register char *scan = *buf;
- register int nf = parse_high_water;
+ register long nf = parse_high_water;
register char *field;
register char *end = scan + len;
char sav;
@@ -324,7 +350,7 @@ NODE *n;
field = scan;
while (*scan != ' ' && *scan != '\t')
scan++;
- (*set)(++nf, field, (int)(scan - field), n);
+ (*set)(++nf, field, (long)(scan - field), n);
if (scan == end)
break;
}
@@ -337,13 +363,47 @@ NODE *n;
}
/*
- * this is called both from get_field() and from do_split()
+ * null_parse_field --- each character is a separate field
+ *
+ * This is called both from get_field() and from do_split()
+ * via (*parse_field)(). This variation is for when FS is the null string.
+ */
+static long
+null_parse_field(up_to, buf, len, fs, rp, set, n)
+long up_to; /* parse only up to this field number */
+char **buf; /* on input: string to parse; on output: point to start next */
+int len;
+NODE *fs;
+Regexp *rp;
+Setfunc set; /* routine to set the value of the parsed field */
+NODE *n;
+{
+ register char *scan = *buf;
+ register long nf = parse_high_water;
+ register char *end = scan + len;
+
+ if (up_to == HUGE)
+ nf = 0;
+ if (len == 0)
+ return nf;
+
+ for (; nf < up_to && scan < end; scan++)
+ (*set)(++nf, scan, 1L, n);
+
+ *buf = scan;
+ return nf;
+}
+
+/*
+ * sc_parse_field --- single character field separator
+ *
+ * This is called both from get_field() and from do_split()
* via (*parse_field)(). This variation is for when FS is a single character
* other than space.
*/
static long
sc_parse_field(up_to, buf, len, fs, rp, set, n)
-int up_to; /* parse only up to this field number */
+long up_to; /* parse only up to this field number */
char **buf; /* on input: string to parse; on output: point to start next */
int len;
NODE *fs;
@@ -353,9 +413,10 @@ NODE *n;
{
register char *scan = *buf;
register char fschar;
- register int nf = parse_high_water;
+ register long nf = parse_high_water;
register char *field;
register char *end = scan + len;
+ int onecase;
char sav;
if (up_to == HUGE)
@@ -363,11 +424,15 @@ NODE *n;
if (len == 0)
return nf;
- if (*RS == 0 && fs->stlen == 0)
+ if (RS_is_null && fs->stlen == 0)
fschar = '\n';
else
fschar = fs->stptr[0];
+ onecase = (IGNORECASE && isalpha(fschar));
+ if (onecase)
+ fschar = casetable[fschar];
+
/* before doing anything save the char at *end */
sav = *end;
/* because it will be destroyed now: */
@@ -375,14 +440,19 @@ NODE *n;
for (; nf < up_to;) {
field = scan;
- while (*scan != fschar)
- scan++;
- (*set)(++nf, field, (int)(scan - field), n);
+ if (onecase) {
+ while (casetable[*scan] != fschar)
+ scan++;
+ } else {
+ while (*scan != fschar)
+ scan++;
+ }
+ (*set)(++nf, field, (long)(scan - field), n);
if (scan == end)
break;
scan++;
if (scan == end) { /* FS at end of record */
- (*set)(++nf, field, 0, n);
+ (*set)(++nf, field, 0L, n);
break;
}
}
@@ -395,12 +465,14 @@ NODE *n;
}
/*
- * this is called both from get_field() and from do_split()
+ * fw_parse_field --- field parsing using FIELDWIDTHS spec
+ *
+ * This is called both from get_field() and from do_split()
* via (*parse_field)(). This variation is for fields are fixed widths.
*/
static long
fw_parse_field(up_to, buf, len, fs, rp, set, n)
-int up_to; /* parse only up to this field number */
+long up_to; /* parse only up to this field number */
char **buf; /* on input: string to parse; on output: point to start next */
int len;
NODE *fs;
@@ -419,7 +491,7 @@ NODE *n;
for (; nf < up_to && (len = FIELDWIDTHS[nf+1]) != -1; ) {
if (len > end - scan)
len = end - scan;
- (*set)(++nf, scan, len, n);
+ (*set)(++nf, scan, (long) len, n);
scan += len;
}
if (len == -1)
@@ -429,9 +501,11 @@ NODE *n;
return nf;
}
+/* get_field --- return a particular $n */
+
NODE **
get_field(requested, assign)
-register int requested;
+register long requested;
Func_ptr *assign; /* this field is on the LHS of an assign */
{
/*
@@ -439,27 +513,28 @@ Func_ptr *assign; /* this field is on the LHS of an assign */
* then the whole line must be rebuilt
*/
if (requested == 0) {
- if (!field0_valid) {
+ if (! field0_valid) {
/* first, parse remainder of input record */
if (NF == -1) {
NF = (*parse_field)(HUGE-1, &parse_extent,
fields_arr[0]->stlen -
(parse_extent - fields_arr[0]->stptr),
save_FS, FS_regexp, set_field,
- (NODE *)NULL);
+ (NODE *) NULL);
parse_high_water = NF;
}
rebuild_record();
+ reset_record(); /* clear out fields array */
}
- if (assign)
+ if (assign != NULL)
*assign = reset_record;
return &fields_arr[0];
}
/* assert(requested > 0); */
- if (assign)
- field0_valid = 0; /* $0 needs reconstruction */
+ if (assign != NULL)
+ field0_valid = FALSE; /* $0 needs reconstruction */
if (requested <= parse_high_water) /* already parsed this field */
return &fields_arr[requested];
@@ -472,11 +547,11 @@ Func_ptr *assign; /* this field is on the LHS of an assign */
if (parse_high_water == 0) /* starting at the beginning */
parse_extent = fields_arr[0]->stptr;
parse_high_water = (*parse_field)(requested, &parse_extent,
- fields_arr[0]->stlen - (parse_extent-fields_arr[0]->stptr),
- save_FS, FS_regexp, set_field, (NODE *)NULL);
+ fields_arr[0]->stlen - (parse_extent - fields_arr[0]->stptr),
+ save_FS, FS_regexp, set_field, (NODE *) NULL);
/*
- * if we reached the end of the record, set NF to the number of
+ * if we reached the end of the record, set NF to the number of
* fields so far. Note that requested might actually refer to
* a field that is beyond the end of the record, but we won't
* set NF to that value at this point, since this is only a
@@ -489,7 +564,7 @@ Func_ptr *assign; /* this field is on the LHS of an assign */
requested = parse_high_water;
}
if (parse_high_water < requested) { /* requested beyond end of record */
- if (assign) { /* expand record */
+ if (assign != NULL) { /* expand record */
register int i;
if (requested > nf_high_water)
@@ -508,11 +583,13 @@ Func_ptr *assign; /* this field is on the LHS of an assign */
return &fields_arr[requested];
}
+/* set_element --- set an array element, used by do_split() */
+
static void
set_element(num, s, len, n)
-int num;
+long num;
char *s;
-int len;
+long len;
NODE *n;
{
register NODE *it;
@@ -522,6 +599,8 @@ NODE *n;
*assoc_lookup(n, tmp_number((AWKNUM) (num))) = it;
}
+/* do_split --- implement split(), semantics are same as for field splitting */
+
NODE *
do_split(tree)
NODE *tree;
@@ -529,7 +608,7 @@ NODE *tree;
NODE *t1, *t2, *t3, *tmp;
NODE *fs;
char *s;
- long (*parseit)P((int, char **, int, NODE *,
+ long (*parseit) P((long, char **, int, NODE *,
Regexp *, Setfunc, NODE *));
Regexp *rp = NULL;
@@ -538,7 +617,7 @@ NODE *tree;
* do dupnode(), to avoid problems like
* x = split(a[1], a, "blah")
* since we assoc_clear the array. gack.
- * this also gives up complete call by value semantics.
+ * this also gives us complete call by value semantics.
*/
tmp = tree_eval(tree->lnode);
t1 = dupnode(tmp);
@@ -552,7 +631,7 @@ NODE *tree;
if (t2->type == Node_param_list)
t2 = stack_ptr[t2->param_cnt];
if (t2->type != Node_var && t2->type != Node_var_array)
- fatal("second argument of split is not a variable");
+ fatal("second argument of split is not an array");
assoc_clear(t2);
if (t3->re_flags & FS_DFLT) {
@@ -561,7 +640,9 @@ NODE *tree;
rp = FS_regexp;
} else {
tmp = force_string(tree_eval(t3->re_exp));
- if (tmp->stlen == 1) {
+ if (tmp->stlen == 0)
+ parseit = null_parse_field;
+ else if (tmp->stlen == 1) {
if (tmp->stptr[0] == ' ')
parseit = def_parse_field;
else
@@ -574,75 +655,14 @@ NODE *tree;
}
s = t1->stptr;
- tmp = tmp_number((AWKNUM) (*parseit)(HUGE, &s, (int)t1->stlen,
+ tmp = tmp_number((AWKNUM) (*parseit)(HUGE, &s, (int) t1->stlen,
fs, rp, set_element, t2));
unref(t1);
free_temp(t3);
return tmp;
}
-void
-set_FS()
-{
- char buf[10];
- NODE *fs;
-
- /*
- * If changing the way fields are split, obey least-suprise
- * semantics, and force $0 to be split totally.
- */
- if (fields_arr != NULL)
- (void) get_field(HUGE - 1, 0);
-
- buf[0] = '\0';
- default_FS = 0;
- if (FS_regexp) {
- refree(FS_regexp);
- FS_regexp = NULL;
- }
- fs = force_string(FS_node->var_value);
- if (fs->stlen > 1)
- parse_field = re_parse_field;
- else if (*RS == 0) {
- parse_field = sc_parse_field;
- if (fs->stlen == 1) {
- if (fs->stptr[0] == ' ') {
- default_FS = 1;
- strcpy(buf, "[ \t\n]+");
- } else if (fs->stptr[0] != '\n')
- sprintf(buf, "[%c\n]", fs->stptr[0]);
- }
- } else {
- parse_field = def_parse_field;
- if (fs->stptr[0] == ' ' && fs->stlen == 1)
- default_FS = 1;
- else if (fs->stptr[0] != ' ' && fs->stlen == 1) {
- if (IGNORECASE == 0)
- parse_field = sc_parse_field;
- else if (fs->stptr[0] == '\\')
- /* yet another special case */
- strcpy(buf, "[\\\\]");
- else
- sprintf(buf, "[%c]", fs->stptr[0]);
- }
- }
- if (buf[0]) {
- FS_regexp = make_regexp(buf, strlen(buf), IGNORECASE, 1);
- parse_field = re_parse_field;
- } else if (parse_field == re_parse_field) {
- FS_regexp = make_regexp(fs->stptr, fs->stlen, IGNORECASE, 1);
- } else
- FS_regexp = NULL;
- resave_fs = 1;
-}
-
-void
-set_RS()
-{
- (void) force_string(RS_node->var_value);
- RS = RS_node->var_value->stptr;
- set_FS();
-}
+/* set_FIELDWIDTHS --- handle an assignment to FIELDWIDTHS */
void
set_FIELDWIDTHS()
@@ -651,14 +671,14 @@ set_FIELDWIDTHS()
char *end;
register int i;
static int fw_alloc = 1;
- static int warned = 0;
+ static int warned = FALSE;
extern double strtod();
if (do_lint && ! warned) {
- warned = 1;
+ warned = TRUE;
warning("use of FIELDWIDTHS is a gawk extension");
}
- if (do_unix) /* quick and dirty, does the trick */
+ if (do_traditional) /* quick and dirty, does the trick */
return;
/*
@@ -693,3 +713,79 @@ set_FS_if_not_FIELDWIDTHS()
if (parse_field != fw_parse_field)
set_FS();
}
+
+/* set_FS --- handle things when FS is assigned to */
+
+void
+set_FS()
+{
+ char buf[10];
+ NODE *fs;
+ static NODE *save_fs = NULL;
+ static NODE *save_rs = NULL;
+
+ /*
+ * If changing the way fields are split, obey least-suprise
+ * semantics, and force $0 to be split totally.
+ */
+ if (fields_arr != NULL)
+ (void) get_field(HUGE - 1, 0);
+
+ if (save_fs && cmp_nodes(FS_node->var_value, save_fs) == 0
+ && save_rs && cmp_nodes(RS_node->var_value, save_rs) == 0)
+ return;
+ unref(save_fs);
+ save_fs = dupnode(FS_node->var_value);
+ unref(save_rs);
+ save_rs = dupnode(RS_node->var_value);
+ resave_fs = TRUE;
+ buf[0] = '\0';
+ default_FS = FALSE;
+ if (FS_regexp) {
+ refree(FS_regexp);
+ FS_regexp = NULL;
+ }
+ fs = force_string(FS_node->var_value);
+ if (! do_traditional && fs->stlen == 0)
+ parse_field = null_parse_field;
+ else if (fs->stlen > 1)
+ parse_field = re_parse_field;
+ else if (RS_is_null) {
+ parse_field = sc_parse_field;
+ if (fs->stlen == 1) {
+ if (fs->stptr[0] == ' ') {
+ default_FS = TRUE;
+ strcpy(buf, "[ \t\n]+");
+ } else if (fs->stptr[0] != '\n')
+ sprintf(buf, "[%c\n]", fs->stptr[0]);
+ }
+ } else {
+ parse_field = def_parse_field;
+ if (fs->stptr[0] == ' ' && fs->stlen == 1)
+ default_FS = TRUE;
+ else if (fs->stptr[0] != ' ' && fs->stlen == 1) {
+ if (! IGNORECASE)
+ parse_field = sc_parse_field;
+ else if (fs->stptr[0] == '\\')
+ /* yet another special case */
+ strcpy(buf, "[\\\\]");
+ else
+ sprintf(buf, "[%c]", fs->stptr[0]);
+ }
+ }
+ if (buf[0] != '\0') {
+ FS_regexp = make_regexp(buf, strlen(buf), IGNORECASE, TRUE);
+ parse_field = re_parse_field;
+ } else if (parse_field == re_parse_field) {
+ FS_regexp = make_regexp(fs->stptr, fs->stlen, IGNORECASE, TRUE);
+ } else
+ FS_regexp = NULL;
+}
+
+/* using_fieldwidths --- is FS or FIELDWIDTHS in use? */
+
+int
+using_fieldwidths()
+{
+ return parse_field == fw_parse_field;
+}
diff --git a/gawk.texi b/gawk.texi
deleted file mode 100644
index b2802623..00000000
--- a/gawk.texi
+++ /dev/null
@@ -1,11270 +0,0 @@
-\input texinfo @c -*-texinfo-*-
-@c %**start of header (This is for running Texinfo on a region.)
-@setfilename gawk.info
-@settitle The GAWK Manual
-@c @smallbook
-@c %**end of header (This is for running Texinfo on a region.)
-
-@ifinfo
-@synindex fn cp
-@synindex vr cp
-@end ifinfo
-@iftex
-@syncodeindex fn cp
-@syncodeindex vr cp
-@end iftex
-
-@c If "finalout" is commented out, the printed output will show
-@c black boxes that mark lines that are too long. Thus, it is
-@c unwise to comment it out when running a master in case there are
-@c overfulls which are deemed okay.
-
-@iftex
-@finalout
-@end iftex
-
-@c ===> NOTE! <==
-@c Determine the edition number in *four* places by hand:
-@c 1. First ifinfo section 2. title page 3. copyright page 4. top node
-@c To find the locations, search for !!set
-
-@ifinfo
-This file documents @code{awk}, a program that you can use to select
-particular records in a file and perform operations upon them.
-
-This is Edition 0.15 of @cite{The GAWK Manual}, @*
-for the 2.15 version of the GNU implementation @*
-of AWK.
-
-Copyright (C) 1989, 1991, 1992, 1993 Free Software Foundation, Inc.
-
-Permission is granted to make and distribute verbatim copies of
-this manual provided the copyright notice and this permission notice
-are preserved on all copies.
-
-@ignore
-Permission is granted to process this file through TeX and print the
-results, provided the printed document carries copying permission
-notice identical to this one except for the removal of this paragraph
-(this paragraph not being relevant to the printed manual).
-
-@end ignore
-Permission is granted to copy and distribute modified versions of this
-manual under the conditions for verbatim copying, provided that the entire
-resulting derived work is distributed under the terms of a permission
-notice identical to this one.
-
-Permission is granted to copy and distribute translations of this manual
-into another language, under the above conditions for modified versions,
-except that this permission notice may be stated in a translation approved
-by the Foundation.
-@end ifinfo
-
-@setchapternewpage odd
-
-@c !!set edition, date, version
-@titlepage
-@title The GAWK Manual
-@subtitle Edition 0.15
-@subtitle April 1993
-@author Diane Barlow Close
-@author Arnold D. Robbins
-@author Paul H. Rubin
-@author Richard Stallman
-
-@c Include the Distribution inside the titlepage environment so
-@c that headings are turned off. Headings on and off do not work.
-
-@page
-@vskip 0pt plus 1filll
-Copyright @copyright{} 1989, 1991, 1992, 1993 Free Software Foundation, Inc.
-@sp 2
-
-@c !!set edition, date, version
-This is Edition 0.15 of @cite{The GAWK Manual}, @*
-for the 2.15 version of the GNU implementation @*
-of AWK.
-
-@sp 2
-Published by the Free Software Foundation @*
-675 Massachusetts Avenue @*
-Cambridge, MA 02139 USA @*
-Printed copies are available for $20 each.
-
-Permission is granted to make and distribute verbatim copies of
-this manual provided the copyright notice and this permission notice
-are preserved on all copies.
-
-Permission is granted to copy and distribute modified versions of this
-manual under the conditions for verbatim copying, provided that the entire
-resulting derived work is distributed under the terms of a permission
-notice identical to this one.
-
-Permission is granted to copy and distribute translations of this manual
-into another language, under the above conditions for modified versions,
-except that this permission notice may be stated in a translation approved
-by the Foundation.
-@end titlepage
-
-@ifinfo
-@node Top, Preface, (dir), (dir)
-@comment node-name, next, previous, up
-@top General Introduction
-@c Preface or Licensing nodes should come right after the Top
-@c node, in `unnumbered' sections, then the chapter, `What is gawk'.
-
-This file documents @code{awk}, a program that you can use to select
-particular records in a file and perform operations upon them.
-
-@c !!set edition, date, version
-This is Edition 0.15 of @cite{The GAWK Manual}, @*
-for the 2.15 version of the GNU implementation @*
-of AWK.
-
-@end ifinfo
-
-@menu
-* Preface:: What you can do with @code{awk}; brief history
- and acknowledgements.
-* Copying:: Your right to copy and distribute @code{gawk}.
-* This Manual:: Using this manual.
- Includes sample input files that you can use.
-* Getting Started:: A basic introduction to using @code{awk}.
- How to run an @code{awk} program.
- Command line syntax.
-* Reading Files:: How to read files and manipulate fields.
-* Printing:: How to print using @code{awk}. Describes the
- @code{print} and @code{printf} statements.
- Also describes redirection of output.
-* One-liners:: Short, sample @code{awk} programs.
-* Patterns:: The various types of patterns
- explained in detail.
-* Actions:: The various types of actions are
- introduced here. Describes
- expressions and the various operators in
- detail. Also describes comparison expressions.
-* Expressions:: Expressions are the basic building
- blocks of statements.
-* Statements:: The various control statements are
- described in detail.
-* Arrays:: The description and use of arrays.
- Also includes array-oriented control
- statements.
-* Built-in:: The built-in functions are summarized here.
-* User-defined:: User-defined functions are described in detail.
-* Built-in Variables:: Built-in Variables
-* Command Line:: How to run @code{gawk}.
-* Language History:: The evolution of the @code{awk} language.
-* Installation:: Installing @code{gawk} under
- various operating systems.
-* Gawk Summary:: @code{gawk} Options and Language Summary.
-* Sample Program:: A sample @code{awk} program with a
- complete explanation.
-* Bugs:: Reporting Problems and Bugs.
-* Notes:: Something about the
- implementation of @code{gawk}.
-* Glossary:: An explanation of some unfamiliar terms.
-* Index::
-@end menu
-
-@node Preface, Copying, Top, Top
-@comment node-name, next, previous, up
-@unnumbered Preface
-
-@iftex
-@cindex what is @code{awk}
-@end iftex
-If you are like many computer users, you would frequently like to make
-changes in various text files wherever certain patterns appear, or
-extract data from parts of certain lines while discarding the rest. To
-write a program to do this in a language such as C or Pascal is a
-time-consuming inconvenience that may take many lines of code. The job
-may be easier with @code{awk}.
-
-The @code{awk} utility interprets a special-purpose programming language
-that makes it possible to handle simple data-reformatting jobs easily
-with just a few lines of code.
-
-The GNU implementation of @code{awk} is called @code{gawk}; it is fully
-upward compatible with the System V Release 4 version of
-@code{awk}. @code{gawk} is also upward compatible with the @sc{posix}
-(draft) specification of the @code{awk} language. This means that all
-properly written @code{awk} programs should work with @code{gawk}.
-Thus, we usually don't distinguish between @code{gawk} and other @code{awk}
-implementations in this manual.@refill
-
-@cindex uses of @code{awk}
-This manual teaches you what @code{awk} does and how you can use
-@code{awk} effectively. You should already be familiar with basic
-system commands such as @code{ls}. Using @code{awk} you can: @refill
-
-@itemize @bullet
-@item
-manage small, personal databases
-
-@item
-generate reports
-
-@item
-validate data
-@item
-produce indexes, and perform other document preparation tasks
-
-@item
-even experiment with algorithms that can be adapted later to other computer
-languages
-@end itemize
-
-@iftex
-This manual has the difficult task of being both tutorial and reference.
-If you are a novice, feel free to skip over details that seem too complex.
-You should also ignore the many cross references; they are for the
-expert user, and for the on-line Info version of the manual.
-@end iftex
-
-@menu
-* History:: The history of @code{gawk} and
- @code{awk}. Acknowledgements.
-@end menu
-
-@node History, , Preface, Preface
-@comment node-name, next, previous, up
-@unnumberedsec History of @code{awk} and @code{gawk}
-
-@cindex acronym
-@cindex history of @code{awk}
-The name @code{awk} comes from the initials of its designers: Alfred V.
-Aho, Peter J. Weinberger, and Brian W. Kernighan. The original version of
-@code{awk} was written in 1977. In 1985 a new version made the programming
-language more powerful, introducing user-defined functions, multiple input
-streams, and computed regular expressions.
-This new version became generally available with System V Release 3.1.
-The version in System V Release 4 added some new features and also cleaned
-up the behavior in some of the ``dark corners'' of the language.
-The specification for @code{awk} in the @sc{posix} Command Language
-and Utilities standard further clarified the language based on feedback
-from both the @code{gawk} designers, and the original @code{awk}
-designers.@refill
-
-The GNU implementation, @code{gawk}, was written in 1986 by Paul Rubin
-and Jay Fenlason, with advice from Richard Stallman. John Woods
-contributed parts of the code as well. In 1988 and 1989, David Trueman, with
-help from Arnold Robbins, thoroughly reworked @code{gawk} for compatibility
-with the newer @code{awk}. Current development (1992) focuses on bug fixes,
-performance improvements, and standards compliance.
-
-We need to thank many people for their assistance in producing this
-manual. Jay Fenlason contributed many ideas and sample programs. Richard
-Mlynarik and Robert J. Chassell gave helpful comments on early drafts of this
-manual. The paper @cite{A Supplemental Document for @code{awk}} by John W.
-Pierce of the Chemistry Department at UC San Diego, pinpointed several
-issues relevant both to @code{awk} implementation and to this manual, that
-would otherwise have escaped us. David Trueman, Pat Rankin, and Michal
-Jaegermann also contributed sections of the manual.@refill
-
-The following people provided many helpful comments on this edition of
-the manual: Rick Adams, Michael Brennan, Rich Burridge, Diane Close,
-Christopher (``Topher'') Eliot, Michael Lijewski, Pat Rankin, Miriam Robbins,
-and Michal Jaegermann. Robert J. Chassell provided much valuable advice on
-the use of Texinfo.
-
-Finally, we would like to thank Brian Kernighan of Bell Labs for invaluable
-assistance during the testing and debugging of @code{gawk}, and for
-help in clarifying numerous points about the language.@refill
-
-@node Copying, This Manual, Preface, Top
-@unnumbered GNU GENERAL PUBLIC LICENSE
-@center Version 2, June 1991
-
-@display
-Copyright @copyright{} 1989, 1991 Free Software Foundation, Inc.
-675 Mass Ave, Cambridge, MA 02139, USA
-
-Everyone is permitted to copy and distribute verbatim copies
-of this license document, but changing it is not allowed.
-@end display
-
-@c fakenode --- for prepinfo
-@unnumberedsec Preamble
-
- The licenses for most software are designed to take away your
-freedom to share and change it. By contrast, the GNU General Public
-License is intended to guarantee your freedom to share and change free
-software---to make sure the software is free for all its users. This
-General Public License applies to most of the Free Software
-Foundation's software and to any other program whose authors commit to
-using it. (Some other Free Software Foundation software is covered by
-the GNU Library General Public License instead.) You can apply it to
-your programs, too.
-
- When we speak of free software, we are referring to freedom, not
-price. Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-this service if you wish), that you receive source code or can get it
-if you want it, that you can change the software or use pieces of it
-in new free programs; and that you know you can do these things.
-
- To protect your rights, we need to make restrictions that forbid
-anyone to deny you these rights or to ask you to surrender the rights.
-These restrictions translate to certain responsibilities for you if you
-distribute copies of the software, or if you modify it.
-
- For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must give the recipients all the rights that
-you have. You must make sure that they, too, receive or can get the
-source code. And you must show them these terms so they know their
-rights.
-
- We protect your rights with two steps: (1) copyright the software, and
-(2) offer you this license which gives you legal permission to copy,
-distribute and/or modify the software.
-
- Also, for each author's protection and ours, we want to make certain
-that everyone understands that there is no warranty for this free
-software. If the software is modified by someone else and passed on, we
-want its recipients to know that what they have is not the original, so
-that any problems introduced by others will not reflect on the original
-authors' reputations.
-
- Finally, any free program is threatened constantly by software
-patents. We wish to avoid the danger that redistributors of a free
-program will individually obtain patent licenses, in effect making the
-program proprietary. To prevent this, we have made it clear that any
-patent must be licensed for everyone's free use or not licensed at all.
-
- The precise terms and conditions for copying, distribution and
-modification follow.
-
-@iftex
-@c fakenode --- for prepinfo
-@unnumberedsec TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
-@end iftex
-@ifinfo
-@center TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
-@end ifinfo
-
-@enumerate
-@item
-This License applies to any program or other work which contains
-a notice placed by the copyright holder saying it may be distributed
-under the terms of this General Public License. The ``Program'', below,
-refers to any such program or work, and a ``work based on the Program''
-means either the Program or any derivative work under copyright law:
-that is to say, a work containing the Program or a portion of it,
-either verbatim or with modifications and/or translated into another
-language. (Hereinafter, translation is included without limitation in
-the term ``modification''.) Each licensee is addressed as ``you''.
-
-Activities other than copying, distribution and modification are not
-covered by this License; they are outside its scope. The act of
-running the Program is not restricted, and the output from the Program
-is covered only if its contents constitute a work based on the
-Program (independent of having been made by running the Program).
-Whether that is true depends on what the Program does.
-
-@item
-You may copy and distribute verbatim copies of the Program's
-source code as you receive it, in any medium, provided that you
-conspicuously and appropriately publish on each copy an appropriate
-copyright notice and disclaimer of warranty; keep intact all the
-notices that refer to this License and to the absence of any warranty;
-and give any other recipients of the Program a copy of this License
-along with the Program.
-
-You may charge a fee for the physical act of transferring a copy, and
-you may at your option offer warranty protection in exchange for a fee.
-
-@item
-You may modify your copy or copies of the Program or any portion
-of it, thus forming a work based on the Program, and copy and
-distribute such modifications or work under the terms of Section 1
-above, provided that you also meet all of these conditions:
-
-@enumerate a
-@item
-You must cause the modified files to carry prominent notices
-stating that you changed the files and the date of any change.
-
-@item
-You must cause any work that you distribute or publish, that in
-whole or in part contains or is derived from the Program or any
-part thereof, to be licensed as a whole at no charge to all third
-parties under the terms of this License.
-
-@item
-If the modified program normally reads commands interactively
-when run, you must cause it, when started running for such
-interactive use in the most ordinary way, to print or display an
-announcement including an appropriate copyright notice and a
-notice that there is no warranty (or else, saying that you provide
-a warranty) and that users may redistribute the program under
-these conditions, and telling the user how to view a copy of this
-License. (Exception: if the Program itself is interactive but
-does not normally print such an announcement, your work based on
-the Program is not required to print an announcement.)
-@end enumerate
-
-These requirements apply to the modified work as a whole. If
-identifiable sections of that work are not derived from the Program,
-and can be reasonably considered independent and separate works in
-themselves, then this License, and its terms, do not apply to those
-sections when you distribute them as separate works. But when you
-distribute the same sections as part of a whole which is a work based
-on the Program, the distribution of the whole must be on the terms of
-this License, whose permissions for other licensees extend to the
-entire whole, and thus to each and every part regardless of who wrote it.
-
-Thus, it is not the intent of this section to claim rights or contest
-your rights to work written entirely by you; rather, the intent is to
-exercise the right to control the distribution of derivative or
-collective works based on the Program.
-
-In addition, mere aggregation of another work not based on the Program
-with the Program (or with a work based on the Program) on a volume of
-a storage or distribution medium does not bring the other work under
-the scope of this License.
-
-@item
-You may copy and distribute the Program (or a work based on it,
-under Section 2) in object code or executable form under the terms of
-Sections 1 and 2 above provided that you also do one of the following:
-
-@enumerate a
-@item
-Accompany it with the complete corresponding machine-readable
-source code, which must be distributed under the terms of Sections
-1 and 2 above on a medium customarily used for software interchange; or,
-
-@item
-Accompany it with a written offer, valid for at least three
-years, to give any third party, for a charge no more than your
-cost of physically performing source distribution, a complete
-machine-readable copy of the corresponding source code, to be
-distributed under the terms of Sections 1 and 2 above on a medium
-customarily used for software interchange; or,
-
-@item
-Accompany it with the information you received as to the offer
-to distribute corresponding source code. (This alternative is
-allowed only for noncommercial distribution and only if you
-received the program in object code or executable form with such
-an offer, in accord with Subsection b above.)
-@end enumerate
-
-The source code for a work means the preferred form of the work for
-making modifications to it. For an executable work, complete source
-code means all the source code for all modules it contains, plus any
-associated interface definition files, plus the scripts used to
-control compilation and installation of the executable. However, as a
-special exception, the source code distributed need not include
-anything that is normally distributed (in either source or binary
-form) with the major components (compiler, kernel, and so on) of the
-operating system on which the executable runs, unless that component
-itself accompanies the executable.
-
-If distribution of executable or object code is made by offering
-access to copy from a designated place, then offering equivalent
-access to copy the source code from the same place counts as
-distribution of the source code, even though third parties are not
-compelled to copy the source along with the object code.
-
-@item
-You may not copy, modify, sublicense, or distribute the Program
-except as expressly provided under this License. Any attempt
-otherwise to copy, modify, sublicense or distribute the Program is
-void, and will automatically terminate your rights under this License.
-However, parties who have received copies, or rights, from you under
-this License will not have their licenses terminated so long as such
-parties remain in full compliance.
-
-@item
-You are not required to accept this License, since you have not
-signed it. However, nothing else grants you permission to modify or
-distribute the Program or its derivative works. These actions are
-prohibited by law if you do not accept this License. Therefore, by
-modifying or distributing the Program (or any work based on the
-Program), you indicate your acceptance of this License to do so, and
-all its terms and conditions for copying, distributing or modifying
-the Program or works based on it.
-
-@item
-Each time you redistribute the Program (or any work based on the
-Program), the recipient automatically receives a license from the
-original licensor to copy, distribute or modify the Program subject to
-these terms and conditions. You may not impose any further
-restrictions on the recipients' exercise of the rights granted herein.
-You are not responsible for enforcing compliance by third parties to
-this License.
-
-@item
-If, as a consequence of a court judgment or allegation of patent
-infringement or for any other reason (not limited to patent issues),
-conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License. If you cannot
-distribute so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you
-may not distribute the Program at all. For example, if a patent
-license would not permit royalty-free redistribution of the Program by
-all those who receive copies directly or indirectly through you, then
-the only way you could satisfy both it and this License would be to
-refrain entirely from distribution of the Program.
-
-If any portion of this section is held invalid or unenforceable under
-any particular circumstance, the balance of the section is intended to
-apply and the section as a whole is intended to apply in other
-circumstances.
-
-It is not the purpose of this section to induce you to infringe any
-patents or other property right claims or to contest validity of any
-such claims; this section has the sole purpose of protecting the
-integrity of the free software distribution system, which is
-implemented by public license practices. Many people have made
-generous contributions to the wide range of software distributed
-through that system in reliance on consistent application of that
-system; it is up to the author/donor to decide if he or she is willing
-to distribute software through any other system and a licensee cannot
-impose that choice.
-
-This section is intended to make thoroughly clear what is believed to
-be a consequence of the rest of this License.
-
-@item
-If the distribution and/or use of the Program is restricted in
-certain countries either by patents or by copyrighted interfaces, the
-original copyright holder who places the Program under this License
-may add an explicit geographical distribution limitation excluding
-those countries, so that distribution is permitted only in or among
-countries not thus excluded. In such case, this License incorporates
-the limitation as if written in the body of this License.
-
-@item
-The Free Software Foundation may publish revised and/or new versions
-of the General Public License from time to time. Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
-Each version is given a distinguishing version number. If the Program
-specifies a version number of this License which applies to it and ``any
-later version'', you have the option of following the terms and conditions
-either of that version or of any later version published by the Free
-Software Foundation. If the Program does not specify a version number of
-this License, you may choose any version ever published by the Free Software
-Foundation.
-
-@item
-If you wish to incorporate parts of the Program into other free
-programs whose distribution conditions are different, write to the author
-to ask for permission. For software which is copyrighted by the Free
-Software Foundation, write to the Free Software Foundation; we sometimes
-make exceptions for this. Our decision will be guided by the two goals
-of preserving the free status of all derivatives of our free software and
-of promoting the sharing and reuse of software generally.
-
-@iftex
-@c fakenode --- for prepinfo
-@heading NO WARRANTY
-@end iftex
-@ifinfo
-@center NO WARRANTY
-@end ifinfo
-
-@item
-BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
-FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
-OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
-PROVIDE THE PROGRAM ``AS IS'' WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
-OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
-TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
-PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
-REPAIR OR CORRECTION.
-
-@item
-IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
-REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
-INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
-OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
-TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
-YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
-PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGES.
-@end enumerate
-
-@iftex
-@c fakenode --- for prepinfo
-@heading END OF TERMS AND CONDITIONS
-@end iftex
-@ifinfo
-@center END OF TERMS AND CONDITIONS
-@end ifinfo
-
-@page
-@c fakenode --- for prepinfo
-@unnumberedsec How to Apply These Terms to Your New Programs
-
- If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
- To do so, attach the following notices to the program. It is safest
-to attach them to the start of each source file to most effectively
-convey the exclusion of warranty; and each file should have at least
-the ``copyright'' line and a pointer to where the full notice is found.
-
-@smallexample
-@var{one line to give the program's name and a brief idea of what it does.}
-Copyright (C) 19@var{yy} @var{name of author}
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-@end smallexample
-
-Also add information on how to contact you by electronic and paper mail.
-
-If the program is interactive, make it output a short notice like this
-when it starts in an interactive mode:
-
-@smallexample
-Gnomovision version 69, Copyright (C) 19@var{yy} @var{name of author}
-Gnomovision comes with ABSOLUTELY NO WARRANTY; for details
-type `show w'.
-This is free software, and you are welcome to redistribute it
-under certain conditions; type `show c' for details.
-@end smallexample
-
-The hypothetical commands @samp{show w} and @samp{show c} should show
-the appropriate parts of the General Public License. Of course, the
-commands you use may be called something other than @samp{show w} and
-@samp{show c}; they could even be mouse-clicks or menu items---whatever
-suits your program.
-
-You should also get your employer (if you work as a programmer) or your
-school, if any, to sign a ``copyright disclaimer'' for the program, if
-necessary. Here is a sample; alter the names:
-
-@smallexample
-Yoyodyne, Inc., hereby disclaims all copyright interest in the program
-`Gnomovision' (which makes passes at compilers) written by James Hacker.
-
-@var{signature of Ty Coon}, 1 April 1989
-Ty Coon, President of Vice
-@end smallexample
-
-This General Public License does not permit incorporating your program into
-proprietary programs. If your program is a subroutine library, you may
-consider it more useful to permit linking proprietary applications with the
-library. If this is what you want to do, use the GNU Library General
-Public License instead of this License.
-
-@node This Manual, Getting Started, Copying, Top
-@chapter Using this Manual
-@cindex manual, using this
-@cindex using this manual
-@cindex language, @code{awk}
-@cindex program, @code{awk}
-@cindex @code{awk} language
-@cindex @code{awk} program
-
-The term @code{awk} refers to a particular program, and to the language you
-use to tell this program what to do. When we need to be careful, we call
-the program ``the @code{awk} utility'' and the language ``the @code{awk}
-language.'' The term @code{gawk} refers to a version of @code{awk} developed
-as part the GNU project. The purpose of this manual is to explain
-both the
-@code{awk} language and how to run the @code{awk} utility.@refill
-
-While concentrating on the features of @code{gawk}, the manual will also
-attempt to describe important differences between @code{gawk} and other
-@code{awk} implementations. In particular, any features that are not
-in the @sc{posix} standard for @code{awk} will be noted. @refill
-
-The term @dfn{@code{awk} program} refers to a program written by you in
-the @code{awk} programming language.@refill
-
-@xref{Getting Started, ,Getting Started with @code{awk}}, for the bare
-essentials you need to know to start using @code{awk}.
-
-Some useful ``one-liners'' are included to give you a feel for the
-@code{awk} language (@pxref{One-liners, ,Useful ``One-liners''}).
-
-@ignore
-@strong{I deleted four paragraphs here because they would confuse the
-beginner more than help him. They mention terms such as ``field,''
-``pattern,'' ``action,'' ``built-in function'' which the beginner
-doesn't know.}
-
-@strong{If you can find a way to introduce several of these concepts here,
-enough to give the reader a map of what is to follow, that might
-be useful. I'm not sure that can be done without taking up more
-space than ought to be used here. There may be no way to win.}
-
-@strong{ADR: I'd like to tackle this in phase 2 of my editing.}
-@end ignore
-
-A sample @code{awk} program has been provided for you
-(@pxref{Sample Program}).@refill
-
-If you find terms that you aren't familiar with, try looking them
-up in the glossary (@pxref{Glossary}).@refill
-
-The entire @code{awk} language is summarized for quick reference in
-@ref{Gawk Summary, ,@code{gawk} Summary}. Look there if you just need
-to refresh your memory about a particular feature.@refill
-
-Most of the time complete @code{awk} programs are used as examples, but in
-some of the more advanced sections, only the part of the @code{awk} program
-that illustrates the concept being described is shown.@refill
-
-@menu
-* Sample Data Files:: Sample data files for use in the @code{awk}
- programs illustrated in this manual.
-@end menu
-
-@node Sample Data Files, , This Manual, This Manual
-@section Data Files for the Examples
-
-@cindex input file, sample
-@cindex sample input file
-@cindex @file{BBS-list} file
-Many of the examples in this manual take their input from two sample
-data files. The first, called @file{BBS-list}, represents a list of
-computer bulletin board systems together with information about those systems.
-The second data file, called @file{inventory-shipped}, contains
-information about shipments on a monthly basis. Each line of these
-files is one @dfn{record}.
-
-In the file @file{BBS-list}, each record contains the name of a computer
-bulletin board, its phone number, the board's baud rate, and a code for
-the number of hours it is operational. An @samp{A} in the last column
-means the board operates 24 hours a day. A @samp{B} in the last
-column means the board operates evening and weekend hours, only. A
-@samp{C} means the board operates only on weekends.
-
-@example
-aardvark 555-5553 1200/300 B
-alpo-net 555-3412 2400/1200/300 A
-barfly 555-7685 1200/300 A
-bites 555-1675 2400/1200/300 A
-camelot 555-0542 300 C
-core 555-2912 1200/300 C
-fooey 555-1234 2400/1200/300 B
-foot 555-6699 1200/300 B
-macfoo 555-6480 1200/300 A
-sdace 555-3430 2400/1200/300 A
-sabafoo 555-2127 1200/300 C
-@end example
-
-@cindex @file{inventory-shipped} file
-The second data file, called @file{inventory-shipped}, represents
-information about shipments during the year.
-Each record contains the month of the year, the number
-of green crates shipped, the number of red boxes shipped, the number of
-orange bags shipped, and the number of blue packages shipped,
-respectively. There are 16 entries, covering the 12 months of one year
-and 4 months of the next year.@refill
-
-@example
-Jan 13 25 15 115
-Feb 15 32 24 226
-Mar 15 24 34 228
-Apr 31 52 63 420
-May 16 34 29 208
-Jun 31 42 75 492
-Jul 24 34 67 436
-Aug 15 34 47 316
-Sep 13 55 37 277
-Oct 29 54 68 525
-Nov 20 87 82 577
-Dec 17 35 61 401
-
-Jan 21 36 64 620
-Feb 26 58 80 652
-Mar 24 75 70 495
-Apr 21 70 74 514
-@end example
-
-@ifinfo
-If you are reading this in GNU Emacs using Info, you can copy the regions
-of text showing these sample files into your own test files. This way you
-can try out the examples shown in the remainder of this document. You do
-this by using the command @kbd{M-x write-region} to copy text from the Info
-file into a file for use with @code{awk}
-(@xref{Misc File Ops, , , emacs, GNU Emacs Manual},
-for more information). Using this information, create your own
-@file{BBS-list} and @file{inventory-shipped} files, and practice what you
-learn in this manual.
-@end ifinfo
-
-@node Getting Started, Reading Files, This Manual, Top
-@chapter Getting Started with @code{awk}
-@cindex script, definition of
-@cindex rule, definition of
-@cindex program, definition of
-@cindex basic function of @code{gawk}
-
-The basic function of @code{awk} is to search files for lines (or other
-units of text) that contain certain patterns. When a line matches one
-of the patterns, @code{awk} performs specified actions on that line.
-@code{awk} keeps processing input lines in this way until the end of the
-input file is reached.@refill
-
-When you run @code{awk}, you specify an @code{awk} @dfn{program} which
-tells @code{awk} what to do. The program consists of a series of
-@dfn{rules}. (It may also contain @dfn{function definitions}, but that
-is an advanced feature, so we will ignore it for now.
-@xref{User-defined, ,User-defined Functions}.) Each rule specifies one
-pattern to search for, and one action to perform when that pattern is found.
-
-Syntactically, a rule consists of a pattern followed by an action. The
-action is enclosed in curly braces to separate it from the pattern.
-Rules are usually separated by newlines. Therefore, an @code{awk}
-program looks like this:
-
-@example
-@var{pattern} @{ @var{action} @}
-@var{pattern} @{ @var{action} @}
-@dots{}
-@end example
-
-@menu
-* Very Simple:: A very simple example.
-* Two Rules:: A less simple one-line example with two rules.
-* More Complex:: A more complex example.
-* Running gawk:: How to run @code{gawk} programs;
- includes command line syntax.
-* Comments:: Adding documentation to @code{gawk} programs.
-* Statements/Lines:: Subdividing or combining statements into lines.
-* When:: When to use @code{gawk} and
- when to use other things.
-@end menu
-
-@node Very Simple, Two Rules, Getting Started, Getting Started
-@section A Very Simple Example
-
-@cindex @samp{print $0}
-The following command runs a simple @code{awk} program that searches the
-input file @file{BBS-list} for the string of characters: @samp{foo}. (A
-string of characters is usually called, a @dfn{string}.
-The term @dfn{string} is perhaps based on similar usage in English, such
-as ``a string of pearls,'' or, ``a string of cars in a train.'')
-
-@example
-awk '/foo/ @{ print $0 @}' BBS-list
-@end example
-
-@noindent
-When lines containing @samp{foo} are found, they are printed, because
-@w{@samp{print $0}} means print the current line. (Just @samp{print} by
-itself means the same thing, so we could have written that
-instead.)
-
-You will notice that slashes, @samp{/}, surround the string @samp{foo}
-in the actual @code{awk} program. The slashes indicate that @samp{foo}
-is a pattern to search for. This type of pattern is called a
-@dfn{regular expression}, and is covered in more detail later
-(@pxref{Regexp, ,Regular Expressions as Patterns}). There are
-single-quotes around the @code{awk} program so that the shell won't
-interpret any of it as special shell characters.@refill
-
-Here is what this program prints:
-
-@example
-@group
-fooey 555-1234 2400/1200/300 B
-foot 555-6699 1200/300 B
-macfoo 555-6480 1200/300 A
-sabafoo 555-2127 1200/300 C
-@end group
-@end example
-
-@cindex action, default
-@cindex pattern, default
-@cindex default action
-@cindex default pattern
-In an @code{awk} rule, either the pattern or the action can be omitted,
-but not both. If the pattern is omitted, then the action is performed
-for @emph{every} input line. If the action is omitted, the default
-action is to print all lines that match the pattern.
-
-Thus, we could leave out the action (the @code{print} statement and the curly
-braces) in the above example, and the result would be the same: all
-lines matching the pattern @samp{foo} would be printed. By comparison,
-omitting the @code{print} statement but retaining the curly braces makes an
-empty action that does nothing; then no lines would be printed.
-
-@node Two Rules, More Complex, Very Simple, Getting Started
-@section An Example with Two Rules
-@cindex how @code{awk} works
-
-The @code{awk} utility reads the input files one line at a
-time. For each line, @code{awk} tries the patterns of each of the rules.
-If several patterns match then several actions are run, in the order in
-which they appear in the @code{awk} program. If no patterns match, then
-no actions are run.
-
-After processing all the rules (perhaps none) that match the line,
-@code{awk} reads the next line (however,
-@pxref{Next Statement, ,The @code{next} Statement}). This continues
-until the end of the file is reached.@refill
-
-For example, the @code{awk} program:
-
-@example
-/12/ @{ print $0 @}
-/21/ @{ print $0 @}
-@end example
-
-@noindent
-contains two rules. The first rule has the string @samp{12} as the
-pattern and @samp{print $0} as the action. The second rule has the
-string @samp{21} as the pattern and also has @samp{print $0} as the
-action. Each rule's action is enclosed in its own pair of braces.
-
-This @code{awk} program prints every line that contains the string
-@samp{12} @emph{or} the string @samp{21}. If a line contains both
-strings, it is printed twice, once by each rule.
-
-If we run this program on our two sample data files, @file{BBS-list} and
-@file{inventory-shipped}, as shown here:
-
-@example
-awk '/12/ @{ print $0 @}
- /21/ @{ print $0 @}' BBS-list inventory-shipped
-@end example
-
-@noindent
-we get the following output:
-
-@example
-aardvark 555-5553 1200/300 B
-alpo-net 555-3412 2400/1200/300 A
-barfly 555-7685 1200/300 A
-bites 555-1675 2400/1200/300 A
-core 555-2912 1200/300 C
-fooey 555-1234 2400/1200/300 B
-foot 555-6699 1200/300 B
-macfoo 555-6480 1200/300 A
-sdace 555-3430 2400/1200/300 A
-sabafoo 555-2127 1200/300 C
-sabafoo 555-2127 1200/300 C
-Jan 21 36 64 620
-Apr 21 70 74 514
-@end example
-
-@noindent
-Note how the line in @file{BBS-list} beginning with @samp{sabafoo}
-was printed twice, once for each rule.
-
-@node More Complex, Running gawk, Two Rules, Getting Started
-@comment node-name, next, previous, up
-@section A More Complex Example
-
-Here is an example to give you an idea of what typical @code{awk}
-programs do. This example shows how @code{awk} can be used to
-summarize, select, and rearrange the output of another utility. It uses
-features that haven't been covered yet, so don't worry if you don't
-understand all the details.
-
-@example
-ls -l | awk '$5 == "Nov" @{ sum += $4 @}
- END @{ print sum @}'
-@end example
-
-This command prints the total number of bytes in all the files in the
-current directory that were last modified in November (of any year).
-(In the C shell you would need to type a semicolon and then a backslash
-at the end of the first line; in a @sc{posix}-compliant shell, such as the
-Bourne shell or the Bourne-Again shell, you can type the example as shown.)
-
-The @w{@samp{ls -l}} part of this example is a command that gives you a
-listing of the files in a directory, including file size and date.
-Its output looks like this:@refill
-
-@example
--rw-r--r-- 1 close 1933 Nov 7 13:05 Makefile
--rw-r--r-- 1 close 10809 Nov 7 13:03 gawk.h
--rw-r--r-- 1 close 983 Apr 13 12:14 gawk.tab.h
--rw-r--r-- 1 close 31869 Jun 15 12:20 gawk.y
--rw-r--r-- 1 close 22414 Nov 7 13:03 gawk1.c
--rw-r--r-- 1 close 37455 Nov 7 13:03 gawk2.c
--rw-r--r-- 1 close 27511 Dec 9 13:07 gawk3.c
--rw-r--r-- 1 close 7989 Nov 7 13:03 gawk4.c
-@end example
-
-@noindent
-The first field contains read-write permissions, the second field contains
-the number of links to the file, and the third field identifies the owner of
-the file. The fourth field contains the size of the file in bytes. The
-fifth, sixth, and seventh fields contain the month, day, and time,
-respectively, that the file was last modified. Finally, the eighth field
-contains the name of the file.
-
-The @code{$5 == "Nov"} in our @code{awk} program is an expression that
-tests whether the fifth field of the output from @w{@samp{ls -l}}
-matches the string @samp{Nov}. Each time a line has the string
-@samp{Nov} in its fifth field, the action @samp{@{ sum += $4 @}} is
-performed. This adds the fourth field (the file size) to the variable
-@code{sum}. As a result, when @code{awk} has finished reading all the
-input lines, @code{sum} is the sum of the sizes of files whose
-lines matched the pattern. (This works because @code{awk} variables
-are automatically initialized to zero.)@refill
-
-After the last line of output from @code{ls} has been processed, the
-@code{END} rule is executed, and the value of @code{sum} is
-printed. In this example, the value of @code{sum} would be 80600.@refill
-
-These more advanced @code{awk} techniques are covered in later sections
-(@pxref{Actions, ,Overview of Actions}). Before you can move on to more
-advanced @code{awk} programming, you have to know how @code{awk} interprets
-your input and displays your output. By manipulating fields and using
-@code{print} statements, you can produce some very useful and spectacular
-looking reports.@refill
-
-@node Running gawk, Comments, More Complex, Getting Started
-@section How to Run @code{awk} Programs
-
-@ignore
-Date: Mon, 26 Aug 91 09:48:10 +0200
-From: gatech!vsoc07.cern.ch!matheys (Jean-Pol Matheys (CERN - ECP Division))
-To: uunet.UU.NET!skeeve!arnold
-Subject: RE: status check
-
-The introduction of Chapter 2 (i.e. before 2.1) should include
-the whole of section 2.4 - it's better to tell people how to run awk programs
-before giving any examples
-
-ADR --- he's right. but for now, don't do this because the rest of the
-chapter would need some rewriting.
-@end ignore
-
-@cindex command line formats
-@cindex running @code{awk} programs
-There are several ways to run an @code{awk} program. If the program is
-short, it is easiest to include it in the command that runs @code{awk},
-like this:
-
-@example
-awk '@var{program}' @var{input-file1} @var{input-file2} @dots{}
-@end example
-
-@noindent
-where @var{program} consists of a series of patterns and actions, as
-described earlier.
-
-When the program is long, it is usually more convenient to put it in a file
-and run it with a command like this:
-
-@example
-awk -f @var{program-file} @var{input-file1} @var{input-file2} @dots{}
-@end example
-
-@menu
-* One-shot:: Running a short throw-away @code{awk} program.
-* Read Terminal:: Using no input files (input from
- terminal instead).
-* Long:: Putting permanent @code{awk} programs in files.
-* Executable Scripts:: Making self-contained @code{awk} programs.
-@end menu
-
-@node One-shot, Read Terminal, Running gawk, Running gawk
-@subsection One-shot Throw-away @code{awk} Programs
-
-Once you are familiar with @code{awk}, you will often type simple
-programs at the moment you want to use them. Then you can write the
-program as the first argument of the @code{awk} command, like this:
-
-@example
-awk '@var{program}' @var{input-file1} @var{input-file2} @dots{}
-@end example
-
-@noindent
-where @var{program} consists of a series of @var{patterns} and
-@var{actions}, as described earlier.
-
-@cindex single quotes, why needed
-This command format instructs the shell to start @code{awk} and use the
-@var{program} to process records in the input file(s). There are single
-quotes around @var{program} so that the shell doesn't interpret any
-@code{awk} characters as special shell characters. They also cause the
-shell to treat all of @var{program} as a single argument for
-@code{awk} and allow @var{program} to be more than one line long.@refill
-
-This format is also useful for running short or medium-sized @code{awk}
-programs from shell scripts, because it avoids the need for a separate
-file for the @code{awk} program. A self-contained shell script is more
-reliable since there are no other files to misplace.
-
-@node Read Terminal, Long, One-shot, Running gawk
-@subsection Running @code{awk} without Input Files
-
-@cindex standard input
-@cindex input, standard
-You can also run @code{awk} without any input files. If you type the
-command line:@refill
-
-@example
-awk '@var{program}'
-@end example
-
-@noindent
-then @code{awk} applies the @var{program} to the @dfn{standard input},
-which usually means whatever you type on the terminal. This continues
-until you indicate end-of-file by typing @kbd{Control-d}.
-
-For example, if you execute this command:
-
-@example
-awk '/th/'
-@end example
-
-@noindent
-whatever you type next is taken as data for that @code{awk}
-program. If you go on to type the following data:
-
-@example
-Kathy
-Ben
-Tom
-Beth
-Seth
-Karen
-Thomas
-@kbd{Control-d}
-@end example
-
-@noindent
-then @code{awk} prints this output:
-
-@example
-Kathy
-Beth
-Seth
-@end example
-
-@noindent
-@cindex case sensitivity
-@cindex pattern, case sensitive
-as matching the pattern @samp{th}. Notice that it did not recognize
-@samp{Thomas} as matching the pattern. The @code{awk} language is
-@dfn{case sensitive}, and matches patterns exactly. (However, you can
-override this with the variable @code{IGNORECASE}.
-@xref{Case-sensitivity, ,Case-sensitivity in Matching}.)
-
-@node Long, Executable Scripts, Read Terminal, Running gawk
-@subsection Running Long Programs
-
-@cindex running long programs
-@cindex @samp{-f} option
-@cindex program file
-@cindex file, @code{awk} program
-Sometimes your @code{awk} programs can be very long. In this case it is
-more convenient to put the program into a separate file. To tell
-@code{awk} to use that file for its program, you type:@refill
-
-@example
-awk -f @var{source-file} @var{input-file1} @var{input-file2} @dots{}
-@end example
-
-The @samp{-f} instructs the @code{awk} utility to get the @code{awk} program
-from the file @var{source-file}. Any file name can be used for
-@var{source-file}. For example, you could put the program:@refill
-
-@example
-/th/
-@end example
-
-@noindent
-into the file @file{th-prog}. Then this command:
-
-@example
-awk -f th-prog
-@end example
-
-@noindent
-does the same thing as this one:
-
-@example
-awk '/th/'
-@end example
-
-@noindent
-which was explained earlier (@pxref{Read Terminal, ,Running @code{awk} without Input Files}).
-Note that you don't usually need single quotes around the file name that you
-specify with @samp{-f}, because most file names don't contain any of the shell's
-special characters. Notice that in @file{th-prog}, the @code{awk}
-program did not have single quotes around it. The quotes are only needed
-for programs that are provided on the @code{awk} command line.
-
-If you want to identify your @code{awk} program files clearly as such,
-you can add the extension @file{.awk} to the file name. This doesn't
-affect the execution of the @code{awk} program, but it does make
-``housekeeping'' easier.
-
-@node Executable Scripts, , Long, Running gawk
-@c node-name, next, previous, up
-@subsection Executable @code{awk} Programs
-@cindex executable scripts
-@cindex scripts, executable
-@cindex self contained programs
-@cindex program, self contained
-@cindex @samp{#!}
-
-Once you have learned @code{awk}, you may want to write self-contained
-@code{awk} scripts, using the @samp{#!} script mechanism. You can do
-this on many Unix systems @footnote{The @samp{#!} mechanism works on
-Unix systems derived from Berkeley Unix, System V Release 4, and some System
-V Release 3 systems.} (and someday on GNU).@refill
-
-For example, you could create a text file named @file{hello}, containing
-the following (where @samp{BEGIN} is a feature we have not yet
-discussed):
-
-@example
-#! /bin/awk -f
-
-# a sample awk program
-BEGIN @{ print "hello, world" @}
-@end example
-
-@noindent
-After making this file executable (with the @code{chmod} command), you
-can simply type:
-
-@example
-hello
-@end example
-
-@noindent
-at the shell, and the system will arrange to run @code{awk} @footnote{The
-line beginning with @samp{#!} lists the full pathname of an interpreter
-to be run, and an optional initial command line argument to pass to that
-interpreter. The operating system then runs the interpreter with the given
-argument and the full argument list of the executed program. The first argument
-in the list is the full pathname of the @code{awk} program. The rest of the
-argument list will either be options to @code{awk}, or data files,
-or both.} as if you had typed:@refill
-
-@example
-awk -f hello
-@end example
-
-@noindent
-Self-contained @code{awk} scripts are useful when you want to write a
-program which users can invoke without knowing that the program is
-written in @code{awk}.
-
-@cindex shell scripts
-@cindex scripts, shell
-If your system does not support the @samp{#!} mechanism, you can get a
-similar effect using a regular shell script. It would look something
-like this:
-
-@example
-: The colon makes sure this script is executed by the Bourne shell.
-awk '@var{program}' "$@@"
-@end example
-
-Using this technique, it is @emph{vital} to enclose the @var{program} in
-single quotes to protect it from interpretation by the shell. If you
-omit the quotes, only a shell wizard can predict the results.
-
-The @samp{"$@@"} causes the shell to forward all the command line
-arguments to the @code{awk} program, without interpretation. The first
-line, which starts with a colon, is used so that this shell script will
-work even if invoked by a user who uses the C shell.
-@c Someday: (See @cite{The Bourne Again Shell}, by ??.)
-
-@node Comments, Statements/Lines, Running gawk, Getting Started
-@section Comments in @code{awk} Programs
-@cindex @samp{#}
-@cindex comments
-@cindex use of comments
-@cindex documenting @code{awk} programs
-@cindex programs, documenting
-
-A @dfn{comment} is some text that is included in a program for the sake
-of human readers, and that is not really part of the program. Comments
-can explain what the program does, and how it works. Nearly all
-programming languages have provisions for comments, because programs are
-typically hard to understand without their extra help.
-
-In the @code{awk} language, a comment starts with the sharp sign
-character, @samp{#}, and continues to the end of the line. The
-@code{awk} language ignores the rest of a line following a sharp sign.
-For example, we could have put the following into @file{th-prog}:@refill
-
-@smallexample
-# This program finds records containing the pattern @samp{th}. This is how
-# you continue comments on additional lines.
-/th/
-@end smallexample
-
-You can put comment lines into keyboard-composed throw-away @code{awk}
-programs also, but this usually isn't very useful; the purpose of a
-comment is to help you or another person understand the program at
-a later time.@refill
-
-@node Statements/Lines, When, Comments, Getting Started
-@section @code{awk} Statements versus Lines
-
-Most often, each line in an @code{awk} program is a separate statement or
-separate rule, like this:
-
-@example
-awk '/12/ @{ print $0 @}
- /21/ @{ print $0 @}' BBS-list inventory-shipped
-@end example
-
-But sometimes statements can be more than one line, and lines can
-contain several statements. You can split a statement into multiple
-lines by inserting a newline after any of the following:@refill
-
-@example
-, @{ ? : || && do else
-@end example
-
-@noindent
-A newline at any other point is considered the end of the statement.
-(Splitting lines after @samp{?} and @samp{:} is a minor @code{gawk}
-extension. The @samp{?} and @samp{:} referred to here is the
-three operand conditional expression described in
-@ref{Conditional Exp, ,Conditional Expressions}.)@refill
-
-@cindex backslash continuation
-@cindex continuation of lines
-If you would like to split a single statement into two lines at a point
-where a newline would terminate it, you can @dfn{continue} it by ending the
-first line with a backslash character, @samp{\}. This is allowed
-absolutely anywhere in the statement, even in the middle of a string or
-regular expression. For example:
-
-@example
-awk '/This program is too long, so continue it\
- on the next line/ @{ print $1 @}'
-@end example
-
-@noindent
-We have generally not used backslash continuation in the sample programs in
-this manual. Since in @code{gawk} there is no limit on the length of a line,
-it is never strictly necessary; it just makes programs prettier. We have
-preferred to make them even more pretty by keeping the statements short.
-Backslash continuation is most useful when your @code{awk} program is in a
-separate source file, instead of typed in on the command line. You should
-also note that many @code{awk} implementations are more picky about where
-you may use backslash continuation. For maximal portability of your @code{awk}
-programs, it is best not to split your lines in the middle of a regular
-expression or a string.@refill
-
-@strong{Warning: backslash continuation does not work as described above
-with the C shell.} Continuation with backslash works for @code{awk}
-programs in files, and also for one-shot programs @emph{provided} you
-are using a @sc{posix}-compliant shell, such as the Bourne shell or the
-Bourne-again shell. But the C shell used on Berkeley Unix behaves
-differently! There, you must use two backslashes in a row, followed by
-a newline.@refill
-
-@cindex multiple statements on one line
-When @code{awk} statements within one rule are short, you might want to put
-more than one of them on a line. You do this by separating the statements
-with a semicolon, @samp{;}.
-This also applies to the rules themselves.
-Thus, the previous program could have been written:@refill
-
-@example
-/12/ @{ print $0 @} ; /21/ @{ print $0 @}
-@end example
-
-@noindent
-@strong{Note:} the requirement that rules on the same line must be
-separated with a semicolon is a recent change in the @code{awk}
-language; it was done for consistency with the treatment of statements
-within an action.
-
-@node When, , Statements/Lines, Getting Started
-@section When to Use @code{awk}
-
-@cindex when to use @code{awk}
-@cindex applications of @code{awk}
-You might wonder how @code{awk} might be useful for you. Using additional
-utility programs, more advanced patterns, field separators, arithmetic
-statements, and other selection criteria, you can produce much more
-complex output. The @code{awk} language is very useful for producing
-reports from large amounts of raw data, such as summarizing information
-from the output of other utility programs like @code{ls}.
-(@xref{More Complex, ,A More Complex Example}.)
-
-Programs written with @code{awk} are usually much smaller than they would
-be in other languages. This makes @code{awk} programs easy to compose and
-use. Often @code{awk} programs can be quickly composed at your terminal,
-used once, and thrown away. Since @code{awk} programs are interpreted, you
-can avoid the usually lengthy edit-compile-test-debug cycle of software
-development.
-
-Complex programs have been written in @code{awk}, including a complete
-retargetable assembler for 8-bit microprocessors (@pxref{Glossary}, for
-more information) and a microcode assembler for a special purpose Prolog
-computer. However, @code{awk}'s capabilities are strained by tasks of
-such complexity.
-
-If you find yourself writing @code{awk} scripts of more than, say, a few
-hundred lines, you might consider using a different programming
-language. Emacs Lisp is a good choice if you need sophisticated string
-or pattern matching capabilities. The shell is also good at string and
-pattern matching; in addition, it allows powerful use of the system
-utilities. More conventional languages, such as C, C++, and Lisp, offer
-better facilities for system programming and for managing the complexity
-of large programs. Programs in these languages may require more lines
-of source code than the equivalent @code{awk} programs, but they are
-easier to maintain and usually run more efficiently.@refill
-
-@node Reading Files, Printing, Getting Started, Top
-@chapter Reading Input Files
-
-@cindex reading files
-@cindex input
-@cindex standard input
-@vindex FILENAME
-In the typical @code{awk} program, all input is read either from the
-standard input (by default the keyboard, but often a pipe from another
-command) or from files whose names you specify on the @code{awk} command
-line. If you specify input files, @code{awk} reads them in order, reading
-all the data from one before going on to the next. The name of the current
-input file can be found in the built-in variable @code{FILENAME}
-(@pxref{Built-in Variables}).@refill
-
-The input is read in units called records, and processed by the
-rules one record at a time. By default, each record is one line. Each
-record is split automatically into fields, to make it more
-convenient for a rule to work on its parts.
-
-On rare occasions you will need to use the @code{getline} command,
-which can do explicit input from any number of files
-(@pxref{Getline, ,Explicit Input with @code{getline}}).@refill
-
-@menu
-* Records:: Controlling how data is split into records.
-* Fields:: An introduction to fields.
-* Non-Constant Fields:: Non-constant Field Numbers.
-* Changing Fields:: Changing the Contents of a Field.
-* Field Separators:: The field separator and how to change it.
-* Constant Size:: Reading constant width data.
-* Multiple Line:: Reading multi-line records.
-* Getline:: Reading files under explicit program control
- using the @code{getline} function.
-* Close Input:: Closing an input file (so you can read from
- the beginning once more).
-@end menu
-
-@node Records, Fields, Reading Files, Reading Files
-@section How Input is Split into Records
-
-@cindex record separator
-The @code{awk} language divides its input into records and fields.
-Records are separated by a character called the @dfn{record separator}.
-By default, the record separator is the newline character, defining
-a record to be a single line of text.@refill
-
-@iftex
-@cindex changing the record separator
-@end iftex
-@vindex RS
-Sometimes you may want to use a different character to separate your
-records. You can use a different character by changing the built-in
-variable @code{RS}. The value of @code{RS} is a string that says how
-to separate records; the default value is @code{"\n"}, the string containing
-just a newline character. This is why records are, by default, single lines.
-
-@code{RS} can have any string as its value, but only the first character
-of the string is used as the record separator. The other characters are
-ignored. @code{RS} is exceptional in this regard; @code{awk} uses the
-full value of all its other built-in variables.@refill
-
-@ignore
-Someday this should be true!
-
-The value of @code{RS} is not limited to a one-character string. It can
-be any regular expression (@pxref{Regexp, ,Regular Expressions as Patterns}).
-In general, each record
-ends at the next string that matches the regular expression; the next
-record starts at the end of the matching string. This general rule is
-actually at work in the usual case, where @code{RS} contains just a
-newline: a record ends at the beginning of the next matching string (the
-next newline in the input) and the following record starts just after
-the end of this string (at the first character of the following line).
-The newline, since it matches @code{RS}, is not part of either record.@refill
-@end ignore
-
-You can change the value of @code{RS} in the @code{awk} program with the
-assignment operator, @samp{=} (@pxref{Assignment Ops, ,Assignment Expressions}).
-The new record-separator character should be enclosed in quotation marks to make
-a string constant. Often the right time to do this is at the beginning
-of execution, before any input has been processed, so that the very
-first record will be read with the proper separator. To do this, use
-the special @code{BEGIN} pattern
-(@pxref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}). For
-example:@refill
-
-@example
-awk 'BEGIN @{ RS = "/" @} ; @{ print $0 @}' BBS-list
-@end example
-
-@noindent
-changes the value of @code{RS} to @code{"/"}, before reading any input.
-This is a string whose first character is a slash; as a result, records
-are separated by slashes. Then the input file is read, and the second
-rule in the @code{awk} program (the action with no pattern) prints each
-record. Since each @code{print} statement adds a newline at the end of
-its output, the effect of this @code{awk} program is to copy the input
-with each slash changed to a newline.
-
-Another way to change the record separator is on the command line,
-using the variable-assignment feature
-(@pxref{Command Line, ,Invoking @code{awk}}).@refill
-
-@example
-awk '@{ print $0 @}' RS="/" BBS-list
-@end example
-
-@noindent
-This sets @code{RS} to @samp{/} before processing @file{BBS-list}.
-
-Reaching the end of an input file terminates the current input record,
-even if the last character in the file is not the character in @code{RS}.
-
-@ignore
-@c merge the preceding paragraph and this stuff into one paragraph
-@c and put it in an `expert info' section.
-This produces correct behavior in the vast majority of cases, although
-the following (extreme) pipeline prints a surprising @samp{1}. (There
-is one field, consisting of a newline.)
-
-@example
-echo | awk 'BEGIN @{ RS = "a" @} ; @{ print NF @}'
-@end example
-
-@end ignore
-
-The empty string, @code{""} (a string of no characters), has a special meaning
-as the value of @code{RS}: it means that records are separated only
-by blank lines. @xref{Multiple Line, ,Multiple-Line Records}, for more details.
-
-@cindex number of records, @code{NR} or @code{FNR}
-@vindex NR
-@vindex FNR
-The @code{awk} utility keeps track of the number of records that have
-been read so far from the current input file. This value is stored in a
-built-in variable called @code{FNR}. It is reset to zero when a new
-file is started. Another built-in variable, @code{NR}, is the total
-number of input records read so far from all files. It starts at zero
-but is never automatically reset to zero.
-
-If you change the value of @code{RS} in the middle of an @code{awk} run,
-the new value is used to delimit subsequent records, but the record
-currently being processed (and records already processed) are not
-affected.
-
-@node Fields, Non-Constant Fields, Records, Reading Files
-@section Examining Fields
-
-@cindex examining fields
-@cindex fields
-@cindex accessing fields
-When @code{awk} reads an input record, the record is
-automatically separated or @dfn{parsed} by the interpreter into chunks
-called @dfn{fields}. By default, fields are separated by whitespace,
-like words in a line.
-Whitespace in @code{awk} means any string of one or more spaces and/or
-tabs; other characters such as newline, formfeed, and so on, that are
-considered whitespace by other languages are @emph{not} considered
-whitespace by @code{awk}.@refill
-
-The purpose of fields is to make it more convenient for you to refer to
-these pieces of the record. You don't have to use them---you can
-operate on the whole record if you wish---but fields are what make
-simple @code{awk} programs so powerful.
-
-@cindex @code{$} (field operator)
-@cindex operators, @code{$}
-To refer to a field in an @code{awk} program, you use a dollar-sign,
-@samp{$}, followed by the number of the field you want. Thus, @code{$1}
-refers to the first field, @code{$2} to the second, and so on. For
-example, suppose the following is a line of input:@refill
-
-@example
-This seems like a pretty nice example.
-@end example
-
-@noindent
-Here the first field, or @code{$1}, is @samp{This}; the second field, or
-@code{$2}, is @samp{seems}; and so on. Note that the last field,
-@code{$7}, is @samp{example.}. Because there is no space between the
-@samp{e} and the @samp{.}, the period is considered part of the seventh
-field.@refill
-
-No matter how many fields there are, the last field in a record can be
-represented by @code{$NF}. So, in the example above, @code{$NF} would
-be the same as @code{$7}, which is @samp{example.}. Why this works is
-explained below (@pxref{Non-Constant Fields, ,Non-constant Field Numbers}).
-If you try to refer to a field beyond the last one, such as @code{$8}
-when the record has only 7 fields, you get the empty string.@refill
-
-@vindex NF
-@cindex number of fields, @code{NF}
-Plain @code{NF}, with no @samp{$}, is a built-in variable whose value
-is the number of fields in the current record.
-
-@code{$0}, which looks like an attempt to refer to the zeroth field, is
-a special case: it represents the whole input record. This is what you
-would use if you weren't interested in fields.
-
-Here are some more examples:
-
-@example
-awk '$1 ~ /foo/ @{ print $0 @}' BBS-list
-@end example
-
-@noindent
-This example prints each record in the file @file{BBS-list} whose first
-field contains the string @samp{foo}. The operator @samp{~} is called a
-@dfn{matching operator} (@pxref{Comparison Ops, ,Comparison Expressions});
-it tests whether a string (here, the field @code{$1}) matches a given regular
-expression.@refill
-
-By contrast, the following example:
-
-@example
-awk '/foo/ @{ print $1, $NF @}' BBS-list
-@end example
-
-@noindent
-looks for @samp{foo} in @emph{the entire record} and prints the first
-field and the last field for each input record containing a
-match.@refill
-
-@node Non-Constant Fields, Changing Fields, Fields, Reading Files
-@section Non-constant Field Numbers
-
-The number of a field does not need to be a constant. Any expression in
-the @code{awk} language can be used after a @samp{$} to refer to a
-field. The value of the expression specifies the field number. If the
-value is a string, rather than a number, it is converted to a number.
-Consider this example:@refill
-
-@example
-awk '@{ print $NR @}'
-@end example
-
-@noindent
-Recall that @code{NR} is the number of records read so far: 1 in the
-first record, 2 in the second, etc. So this example prints the first
-field of the first record, the second field of the second record, and so
-on. For the twentieth record, field number 20 is printed; most likely,
-the record has fewer than 20 fields, so this prints a blank line.
-
-Here is another example of using expressions as field numbers:
-
-@example
-awk '@{ print $(2*2) @}' BBS-list
-@end example
-
-The @code{awk} language must evaluate the expression @code{(2*2)} and use
-its value as the number of the field to print. The @samp{*} sign
-represents multiplication, so the expression @code{2*2} evaluates to 4.
-The parentheses are used so that the multiplication is done before the
-@samp{$} operation; they are necessary whenever there is a binary
-operator in the field-number expression. This example, then, prints the
-hours of operation (the fourth field) for every line of the file
-@file{BBS-list}.@refill
-
-If the field number you compute is zero, you get the entire record.
-Thus, @code{$(2-2)} has the same value as @code{$0}. Negative field
-numbers are not allowed.
-
-The number of fields in the current record is stored in the built-in
-variable @code{NF} (@pxref{Built-in Variables}). The expression
-@code{$NF} is not a special feature: it is the direct consequence of
-evaluating @code{NF} and using its value as a field number.
-
-@node Changing Fields, Field Separators, Non-Constant Fields, Reading Files
-@section Changing the Contents of a Field
-
-@cindex field, changing contents of
-@cindex changing contents of a field
-@cindex assignment to fields
-You can change the contents of a field as seen by @code{awk} within an
-@code{awk} program; this changes what @code{awk} perceives as the
-current input record. (The actual input is untouched: @code{awk} never
-modifies the input file.)
-
-Consider this example:
-
-@smallexample
-awk '@{ $3 = $2 - 10; print $2, $3 @}' inventory-shipped
-@end smallexample
-
-@noindent
-The @samp{-} sign represents subtraction, so this program reassigns
-field three, @code{$3}, to be the value of field two minus ten,
-@code{$2 - 10}. (@xref{Arithmetic Ops, ,Arithmetic Operators}.)
-Then field two, and the new value for field three, are printed.
-
-In order for this to work, the text in field @code{$2} must make sense
-as a number; the string of characters must be converted to a number in
-order for the computer to do arithmetic on it. The number resulting
-from the subtraction is converted back to a string of characters which
-then becomes field three.
-@xref{Conversion, ,Conversion of Strings and Numbers}.@refill
-
-When you change the value of a field (as perceived by @code{awk}), the
-text of the input record is recalculated to contain the new field where
-the old one was. Therefore, @code{$0} changes to reflect the altered
-field. Thus,
-
-@smallexample
-awk '@{ $2 = $2 - 10; print $0 @}' inventory-shipped
-@end smallexample
-
-@noindent
-prints a copy of the input file, with 10 subtracted from the second
-field of each line.
-
-You can also assign contents to fields that are out of range. For
-example:
-
-@smallexample
-awk '@{ $6 = ($5 + $4 + $3 + $2) ; print $6 @}' inventory-shipped
-@end smallexample
-
-@noindent
-We've just created @code{$6}, whose value is the sum of fields
-@code{$2}, @code{$3}, @code{$4}, and @code{$5}. The @samp{+} sign
-represents addition. For the file @file{inventory-shipped}, @code{$6}
-represents the total number of parcels shipped for a particular month.
-
-Creating a new field changes the internal @code{awk} copy of the current
-input record---the value of @code{$0}. Thus, if you do @samp{print $0}
-after adding a field, the record printed includes the new field, with
-the appropriate number of field separators between it and the previously
-existing fields.
-
-This recomputation affects and is affected by several features not yet
-discussed, in particular, the @dfn{output field separator}, @code{OFS},
-which is used to separate the fields (@pxref{Output Separators}), and
-@code{NF} (the number of fields; @pxref{Fields, ,Examining Fields}).
-For example, the value of @code{NF} is set to the number of the highest
-field you create.@refill
-
-Note, however, that merely @emph{referencing} an out-of-range field
-does @emph{not} change the value of either @code{$0} or @code{NF}.
-Referencing an out-of-range field merely produces a null string. For
-example:@refill
-
-@smallexample
-if ($(NF+1) != "")
- print "can't happen"
-else
- print "everything is normal"
-@end smallexample
-
-@noindent
-should print @samp{everything is normal}, because @code{NF+1} is certain
-to be out of range. (@xref{If Statement, ,The @code{if} Statement},
-for more information about @code{awk}'s @code{if-else} statements.)@refill
-
-It is important to note that assigning to a field will change the
-value of @code{$0}, but will not change the value of @code{NF},
-even when you assign the null string to a field. For example:
-
-@smallexample
-echo a b c d | awk '@{ OFS = ":"; $2 = "" ; print ; print NF @}'
-@end smallexample
-
-@noindent
-prints
-
-@smallexample
-a::c:d
-4
-@end smallexample
-
-@noindent
-The field is still there, it just has an empty value. You can tell
-because there are two colons in a row.
-
-@node Field Separators, Constant Size, Changing Fields, Reading Files
-@section Specifying how Fields are Separated
-@vindex FS
-@cindex fields, separating
-@cindex field separator, @code{FS}
-@cindex @samp{-F} option
-
-(This section is rather long; it describes one of the most fundamental
-operations in @code{awk}. If you are a novice with @code{awk}, we
-recommend that you re-read this section after you have studied the
-section on regular expressions, @ref{Regexp, ,Regular Expressions as Patterns}.)
-
-The way @code{awk} splits an input record into fields is controlled by
-the @dfn{field separator}, which is a single character or a regular
-expression. @code{awk} scans the input record for matches for the
-separator; the fields themselves are the text between the matches. For
-example, if the field separator is @samp{oo}, then the following line:
-
-@smallexample
-moo goo gai pan
-@end smallexample
-
-@noindent
-would be split into three fields: @samp{m}, @samp{@ g} and @samp{@ gai@
-pan}.
-
-The field separator is represented by the built-in variable @code{FS}.
-Shell programmers take note! @code{awk} does not use the name @code{IFS}
-which is used by the shell.@refill
-
-You can change the value of @code{FS} in the @code{awk} program with the
-assignment operator, @samp{=} (@pxref{Assignment Ops, ,Assignment Expressions}).
-Often the right time to do this is at the beginning of execution,
-before any input has been processed, so that the very first record
-will be read with the proper separator. To do this, use the special
-@code{BEGIN} pattern
-(@pxref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}).
-For example, here we set the value of @code{FS} to the string
-@code{","}:@refill
-
-@smallexample
-awk 'BEGIN @{ FS = "," @} ; @{ print $2 @}'
-@end smallexample
-
-@noindent
-Given the input line,
-
-@smallexample
-John Q. Smith, 29 Oak St., Walamazoo, MI 42139
-@end smallexample
-
-@noindent
-this @code{awk} program extracts the string @samp{@ 29 Oak St.}.
-
-@cindex field separator, choice of
-@cindex regular expressions as field separators
-Sometimes your input data will contain separator characters that don't
-separate fields the way you thought they would. For instance, the
-person's name in the example we've been using might have a title or
-suffix attached, such as @samp{John Q. Smith, LXIX}. From input
-containing such a name:
-
-@smallexample
-John Q. Smith, LXIX, 29 Oak St., Walamazoo, MI 42139
-@end smallexample
-
-@noindent
-the previous sample program would extract @samp{@ LXIX}, instead of
-@samp{@ 29 Oak St.}. If you were expecting the program to print the
-address, you would be surprised. So choose your data layout and
-separator characters carefully to prevent such problems.
-
-As you know, by default, fields are separated by whitespace sequences
-(spaces and tabs), not by single spaces: two spaces in a row do not
-delimit an empty field. The default value of the field separator is a
-string @w{@code{" "}} containing a single space. If this value were
-interpreted in the usual way, each space character would separate
-fields, so two spaces in a row would make an empty field between them.
-The reason this does not happen is that a single space as the value of
-@code{FS} is a special case: it is taken to specify the default manner
-of delimiting fields.
-
-If @code{FS} is any other single character, such as @code{","}, then
-each occurrence of that character separates two fields. Two consecutive
-occurrences delimit an empty field. If the character occurs at the
-beginning or the end of the line, that too delimits an empty field. The
-space character is the only single character which does not follow these
-rules.
-
-More generally, the value of @code{FS} may be a string containing any
-regular expression. Then each match in the record for the regular
-expression separates fields. For example, the assignment:@refill
-
-@smallexample
-FS = ", \t"
-@end smallexample
-
-@noindent
-makes every area of an input line that consists of a comma followed by a
-space and a tab, into a field separator. (@samp{\t} stands for a
-tab.)@refill
-
-For a less trivial example of a regular expression, suppose you want
-single spaces to separate fields the way single commas were used above.
-You can set @code{FS} to @w{@code{"[@ ]"}}. This regular expression
-matches a single space and nothing else.
-
-@c the following index entry is an overfull hbox. --mew 30jan1992
-@cindex field separator: on command line
-@cindex command line, setting @code{FS} on
-@code{FS} can be set on the command line. You use the @samp{-F} argument to
-do so. For example:
-
-@smallexample
-awk -F, '@var{program}' @var{input-files}
-@end smallexample
-
-@noindent
-sets @code{FS} to be the @samp{,} character. Notice that the argument uses
-a capital @samp{F}. Contrast this with @samp{-f}, which specifies a file
-containing an @code{awk} program. Case is significant in command options:
-the @samp{-F} and @samp{-f} options have nothing to do with each other.
-You can use both options at the same time to set the @code{FS} argument
-@emph{and} get an @code{awk} program from a file.@refill
-
-@c begin expert info
-The value used for the argument to @samp{-F} is processed in exactly the
-same way as assignments to the built-in variable @code{FS}. This means that
-if the field separator contains special characters, they must be escaped
-appropriately. For example, to use a @samp{\} as the field separator, you
-would have to type:
-
-@smallexample
-# same as FS = "\\"
-awk -F\\\\ '@dots{}' files @dots{}
-@end smallexample
-
-@noindent
-Since @samp{\} is used for quoting in the shell, @code{awk} will see
-@samp{-F\\}. Then @code{awk} processes the @samp{\\} for escape
-characters (@pxref{Constants, ,Constant Expressions}), finally yielding
-a single @samp{\} to be used for the field separator.
-@c end expert info
-
-As a special case, in compatibility mode
-(@pxref{Command Line, ,Invoking @code{awk}}), if the
-argument to @samp{-F} is @samp{t}, then @code{FS} is set to the tab
-character. (This is because if you type @samp{-F\t}, without the quotes,
-at the shell, the @samp{\} gets deleted, so @code{awk} figures that you
-really want your fields to be separated with tabs, and not @samp{t}s.
-Use @samp{-v FS="t"} on the command line if you really do want to separate
-your fields with @samp{t}s.)@refill
-
-For example, let's use an @code{awk} program file called @file{baud.awk}
-that contains the pattern @code{/300/}, and the action @samp{print $1}.
-Here is the program:
-
-@smallexample
-/300/ @{ print $1 @}
-@end smallexample
-
-Let's also set @code{FS} to be the @samp{-} character, and run the
-program on the file @file{BBS-list}. The following command prints a
-list of the names of the bulletin boards that operate at 300 baud and
-the first three digits of their phone numbers:@refill
-
-@smallexample
-awk -F- -f baud.awk BBS-list
-@end smallexample
-
-@noindent
-It produces this output:
-
-@smallexample
-aardvark 555
-alpo
-barfly 555
-bites 555
-camelot 555
-core 555
-fooey 555
-foot 555
-macfoo 555
-sdace 555
-sabafoo 555
-@end smallexample
-
-@noindent
-Note the second line of output. If you check the original file, you will
-see that the second line looked like this:
-
-@smallexample
-alpo-net 555-3412 2400/1200/300 A
-@end smallexample
-
-The @samp{-} as part of the system's name was used as the field
-separator, instead of the @samp{-} in the phone number that was
-originally intended. This demonstrates why you have to be careful in
-choosing your field and record separators.
-
-The following program searches the system password file, and prints
-the entries for users who have no password:
-
-@smallexample
-awk -F: '$2 == ""' /etc/passwd
-@end smallexample
-
-@noindent
-Here we use the @samp{-F} option on the command line to set the field
-separator. Note that fields in @file{/etc/passwd} are separated by
-colons. The second field represents a user's encrypted password, but if
-the field is empty, that user has no password.
-
-@c begin expert info
-According to the @sc{posix} standard, @code{awk} is supposed to behave
-as if each record is split into fields at the time that it is read.
-In particular, this means that you can change the value of @code{FS}
-after a record is read, but before any of the fields are referenced.
-The value of the fields (i.e. how they were split) should reflect the
-old value of @code{FS}, not the new one.
-
-However, many implementations of @code{awk} do not do this. Instead,
-they defer splitting the fields until a field reference actually happens,
-using the @emph{current} value of @code{FS}! This behavior can be difficult
-to diagnose. The following example illustrates the results of the two methods.
-(The @code{sed} command prints just the first line of @file{/etc/passwd}.)
-
-@smallexample
-sed 1q /etc/passwd | awk '@{ FS = ":" ; print $1 @}'
-@end smallexample
-
-@noindent
-will usually print
-
-@smallexample
-root
-@end smallexample
-
-@noindent
-on an incorrect implementation of @code{awk}, while @code{gawk}
-will print something like
-
-@smallexample
-root:nSijPlPhZZwgE:0:0:Root:/:
-@end smallexample
-@c end expert info
-
-@c begin expert info
-There is an important difference between the two cases of @samp{FS = @w{" "}}
-(a single blank) and @samp{FS = @w{"[ \t]+"}} (which is a regular expression
-matching one or more blanks or tabs). For both values of @code{FS}, fields
-are separated by runs of blanks and/or tabs. However, when the value of
-@code{FS} is @code{" "}, @code{awk} will strip leading and trailing whitespace
-from the record, and then decide where the fields are.
-
-For example, the following expression prints @samp{b}:
-
-@smallexample
-echo ' a b c d ' | awk '@{ print $2 @}'
-@end smallexample
-
-@noindent
-However, the following prints @samp{a}:
-
-@smallexample
-echo ' a b c d ' | awk 'BEGIN @{ FS = "[ \t]+" @} ; @{ print $2 @}'
-@end smallexample
-
-@noindent
-In this case, the first field is null.
-
-The stripping of leading and trailing whitespace also comes into
-play whenever @code{$0} is recomputed. For instance, this pipeline
-
-@smallexample
-echo ' a b c d' | awk '@{ print; $2 = $2; print @}'
-@end smallexample
-
-@noindent
-produces this output:
-
-@smallexample
- a b c d
-a b c d
-@end smallexample
-
-@noindent
-The first @code{print} statement prints the record as it was read,
-with leading whitespace intact. The assignment to @code{$2} rebuilds
-@code{$0} by concatenating @code{$1} through @code{$NF} together,
-separated by the value of @code{OFS}. Since the leading whitespace
-was ignored when finding @code{$1}, it is not part of the new @code{$0}.
-Finally, the last @code{print} statement prints the new @code{$0}.
-@c end expert info
-
-The following table summarizes how fields are split, based on the
-value of @code{FS}.
-
-@table @code
-@item FS == " "
-Fields are separated by runs of whitespace. Leading and trailing
-whitespace are ignored. This is the default.
-
-@item FS == @var{any single character}
-Fields are separated by each occurrence of the character. Multiple
-successive occurrences delimit empty fields, as do leading and
-trailing occurrences.
-
-@item FS == @var{regexp}
-Fields are separated by occurrences of characters that match @var{regexp}.
-Leading and trailing matches of @var{regexp} delimit empty fields.
-@end table
-
-@node Constant Size, Multiple Line, Field Separators, Reading Files
-@section Reading Fixed-width Data
-
-(This section discusses an advanced, experimental feature. If you are
-a novice @code{awk} user, you may wish to skip it on the first reading.)
-
-@code{gawk} 2.13 introduced a new facility for dealing with fixed-width fields
-with no distinctive field separator. Data of this nature arises typically
-in one of at least two ways: the input for old FORTRAN programs where
-numbers are run together, and the output of programs that did not anticipate
-the use of their output as input for other programs.
-
-An example of the latter is a table where all the columns are lined up by
-the use of a variable number of spaces and @emph{empty fields are just
-spaces}. Clearly, @code{awk}'s normal field splitting based on @code{FS}
-will not work well in this case. (Although a portable @code{awk} program
-can use a series of @code{substr} calls on @code{$0}, this is awkward and
-inefficient for a large number of fields.)@refill
-
-The splitting of an input record into fixed-width fields is specified by
-assigning a string containing space-separated numbers to the built-in
-variable @code{FIELDWIDTHS}. Each number specifies the width of the field
-@emph{including} columns between fields. If you want to ignore the columns
-between fields, you can specify the width as a separate field that is
-subsequently ignored.
-
-The following data is the output of the @code{w} utility. It is useful
-to illustrate the use of @code{FIELDWIDTHS}.
-
-@smallexample
- 10:06pm up 21 days, 14:04, 23 users
-User tty login@ idle JCPU PCPU what
-hzuo ttyV0 8:58pm 9 5 vi p24.tex
-hzang ttyV3 6:37pm 50 -csh
-eklye ttyV5 9:53pm 7 1 em thes.tex
-dportein ttyV6 8:17pm 1:47 -csh
-gierd ttyD3 10:00pm 1 elm
-dave ttyD4 9:47pm 4 4 w
-brent ttyp0 26Jun91 4:46 26:46 4:41 bash
-dave ttyq4 26Jun9115days 46 46 wnewmail
-@end smallexample
-
-The following program takes the above input, converts the idle time to
-number of seconds and prints out the first two fields and the calculated
-idle time. (This program uses a number of @code{awk} features that
-haven't been introduced yet.)@refill
-
-@smallexample
-BEGIN @{ FIELDWIDTHS = "9 6 10 6 7 7 35" @}
-NR > 2 @{
- idle = $4
- sub(/^ */, "", idle) # strip leading spaces
- if (idle == "") idle = 0
- if (idle ~ /:/) @{ split(idle, t, ":"); idle = t[1] * 60 + t[2] @}
- if (idle ~ /days/) @{ idle *= 24 * 60 * 60 @}
-
- print $1, $2, idle
-@}
-@end smallexample
-
-Here is the result of running the program on the data:
-
-@smallexample
-hzuo ttyV0 0
-hzang ttyV3 50
-eklye ttyV5 0
-dportein ttyV6 107
-gierd ttyD3 1
-dave ttyD4 0
-brent ttyp0 286
-dave ttyq4 1296000
-@end smallexample
-
-Another (possibly more practical) example of fixed-width input data
-would be the input from a deck of balloting cards. In some parts of
-the United States, voters make their choices by punching holes in computer
-cards. These cards are then processed to count the votes for any particular
-candidate or on any particular issue. Since a voter may choose not to
-vote on some issue, any column on the card may be empty. An @code{awk}
-program for processing such data could use the @code{FIELDWIDTHS} feature
-to simplify reading the data.@refill
-
-@c of course, getting gawk to run on a system with card readers is
-@c another story!
-
-This feature is still experimental, and will likely evolve over time.
-
-@node Multiple Line, Getline, Constant Size, Reading Files
-@section Multiple-Line Records
-
-@cindex multiple line records
-@cindex input, multiple line records
-@cindex reading files, multiple line records
-@cindex records, multiple line
-In some data bases, a single line cannot conveniently hold all the
-information in one entry. In such cases, you can use multi-line
-records.
-
-The first step in doing this is to choose your data format: when records
-are not defined as single lines, how do you want to define them?
-What should separate records?
-
-One technique is to use an unusual character or string to separate
-records. For example, you could use the formfeed character (written
-@code{\f} in @code{awk}, as in C) to separate them, making each record
-a page of the file. To do this, just set the variable @code{RS} to
-@code{"\f"} (a string containing the formfeed character). Any
-other character could equally well be used, as long as it won't be part
-of the data in a record.@refill
-
-@ignore
-Another technique is to have blank lines separate records. The string
-@code{"^\n+"} is a regular expression that matches any sequence of
-newlines starting at the beginning of a line---in other words, it
-matches a sequence of blank lines. If you set @code{RS} to this string,
-a record always ends at the first blank line encountered. In
-addition, a regular expression always matches the longest possible
-sequence when there is a choice. So the next record doesn't start until
-the first nonblank line that follows---no matter how many blank lines
-appear in a row, they are considered one record-separator.
-@end ignore
-
-Another technique is to have blank lines separate records. By a special
-dispensation, a null string as the value of @code{RS} indicates that
-records are separated by one or more blank lines. If you set @code{RS}
-to the null string, a record always ends at the first blank line
-encountered. And the next record doesn't start until the first nonblank
-line that follows---no matter how many blank lines appear in a row, they
-are considered one record-separator. (End of file is also considered
-a record separator.)@refill
-@c !!! This use of `end of file' is confusing. Needs to be clarified.
-
-The second step is to separate the fields in the record. One way to do
-this is to put each field on a separate line: to do this, just set the
-variable @code{FS} to the string @code{"\n"}. (This simple regular
-expression matches a single newline.)
-
-Another way to separate fields is to divide each of the lines into fields
-in the normal manner. This happens by default as a result of a special
-feature: when @code{RS} is set to the null string, the newline character
-@emph{always} acts as a field separator. This is in addition to whatever
-field separations result from @code{FS}.
-
-The original motivation for this special exception was probably so that
-you get useful behavior in the default case (i.e., @w{@code{FS == " "}}).
-This feature can be a problem if you really don't want the
-newline character to separate fields, since there is no way to
-prevent it. However, you can work around this by using the @code{split}
-function to break up the record manually
-(@pxref{String Functions, ,Built-in Functions for String Manipulation}).@refill
-
-@ignore
-Here are two ways to use records separated by blank lines and break each
-line into fields normally:
-
-@example
-awk 'BEGIN @{ RS = ""; FS = "[ \t\n]+" @} @{ print $1 @}' BBS-list
-
-@exdent @r{or}
-
-awk 'BEGIN @{ RS = "^\n+"; FS = "[ \t\n]+" @} @{ print $1 @}' BBS-list
-@end example
-@end ignore
-
-@ignore
-Here is how to use records separated by blank lines and break each
-line into fields normally:
-
-@example
-awk 'BEGIN @{ RS = ""; FS = "[ \t\n]+" @} ; @{ print $1 @}' BBS-list
-@end example
-@end ignore
-
-@node Getline, Close Input, Multiple Line, Reading Files
-@section Explicit Input with @code{getline}
-
-@findex getline
-@cindex input, explicit
-@cindex explicit input
-@cindex input, @code{getline} command
-@cindex reading files, @code{getline} command
-So far we have been getting our input files from @code{awk}'s main
-input stream---either the standard input (usually your terminal) or the
-files specified on the command line. The @code{awk} language has a
-special built-in command called @code{getline} that
-can be used to read input under your explicit control.@refill
-
-This command is quite complex and should @emph{not} be used by
-beginners. It is covered here because this is the chapter on input.
-The examples that follow the explanation of the @code{getline} command
-include material that has not been covered yet. Therefore, come back
-and study the @code{getline} command @emph{after} you have reviewed the
-rest of this manual and have a good knowledge of how @code{awk} works.
-
-@vindex ERRNO
-@cindex differences: @code{gawk} and @code{awk}
-@code{getline} returns 1 if it finds a record, and 0 if the end of the
-file is encountered. If there is some error in getting a record, such
-as a file that cannot be opened, then @code{getline} returns @minus{}1.
-In this case, @code{gawk} sets the variable @code{ERRNO} to a string
-describing the error that occurred.
-
-In the following examples, @var{command} stands for a string value that
-represents a shell command.
-
-@table @code
-@item getline
-The @code{getline} command can be used without arguments to read input
-from the current input file. All it does in this case is read the next
-input record and split it up into fields. This is useful if you've
-finished processing the current record, but you want to do some special
-processing @emph{right now} on the next record. Here's an
-example:@refill
-
-@example
-awk '@{
- if (t = index($0, "/*")) @{
- if (t > 1)
- tmp = substr($0, 1, t - 1)
- else
- tmp = ""
- u = index(substr($0, t + 2), "*/")
- while (u == 0) @{
- getline
- t = -1
- u = index($0, "*/")
- @}
- if (u <= length($0) - 2)
- $0 = tmp substr($0, t + u + 3)
- else
- $0 = tmp
- @}
- print $0
-@}'
-@end example
-
-This @code{awk} program deletes all C-style comments, @samp{/* @dots{}
-*/}, from the input. By replacing the @samp{print $0} with other
-statements, you could perform more complicated processing on the
-decommented input, like searching for matches of a regular
-expression. (This program has a subtle problem---can you spot it?)
-
-@c the program to remove comments doesn't work if one
-@c comment ends and another begins on the same line. (Your
-@c idea for restart would be useful here). --- brennan@boeing.com
-
-This form of the @code{getline} command sets @code{NF} (the number of
-fields; @pxref{Fields, ,Examining Fields}), @code{NR} (the number of
-records read so far; @pxref{Records, ,How Input is Split into Records}),
-@code{FNR} (the number of records read from this input file), and the
-value of @code{$0}.
-
-@strong{Note:} the new value of @code{$0} is used in testing
-the patterns of any subsequent rules. The original value
-of @code{$0} that triggered the rule which executed @code{getline}
-is lost. By contrast, the @code{next} statement reads a new record
-but immediately begins processing it normally, starting with the first
-rule in the program. @xref{Next Statement, ,The @code{next} Statement}.
-
-@item getline @var{var}
-This form of @code{getline} reads a record into the variable @var{var}.
-This is useful when you want your program to read the next record from
-the current input file, but you don't want to subject the record to the
-normal input processing.
-
-For example, suppose the next line is a comment, or a special string,
-and you want to read it, but you must make certain that it won't trigger
-any rules. This version of @code{getline} allows you to read that line
-and store it in a variable so that the main
-read-a-line-and-check-each-rule loop of @code{awk} never sees it.
-
-The following example swaps every two lines of input. For example, given:
-
-@example
-wan
-tew
-free
-phore
-@end example
-
-@noindent
-it outputs:
-
-@example
-tew
-wan
-phore
-free
-@end example
-
-@noindent
-Here's the program:
-
-@example
-@group
-awk '@{
- if ((getline tmp) > 0) @{
- print tmp
- print $0
- @} else
- print $0
-@}'
-@end group
-@end example
-
-The @code{getline} function used in this way sets only the variables
-@code{NR} and @code{FNR} (and of course, @var{var}). The record is not
-split into fields, so the values of the fields (including @code{$0}) and
-the value of @code{NF} do not change.@refill
-
-@item getline < @var{file}
-@cindex input redirection
-@cindex redirection of input
-This form of the @code{getline} function takes its input from the file
-@var{file}. Here @var{file} is a string-valued expression that
-specifies the file name. @samp{< @var{file}} is called a @dfn{redirection}
-since it directs input to come from a different place.
-
-This form is useful if you want to read your input from a particular
-file, instead of from the main input stream. For example, the following
-program reads its input record from the file @file{foo.input} when it
-encounters a first field with a value equal to 10 in the current input
-file.@refill
-
-@example
-awk '@{
- if ($1 == 10) @{
- getline < "foo.input"
- print
- @} else
- print
-@}'
-@end example
-
-Since the main input stream is not used, the values of @code{NR} and
-@code{FNR} are not changed. But the record read is split into fields in
-the normal manner, so the values of @code{$0} and other fields are
-changed. So is the value of @code{NF}.
-
-This does not cause the record to be tested against all the patterns
-in the @code{awk} program, in the way that would happen if the record
-were read normally by the main processing loop of @code{awk}. However
-the new record is tested against any subsequent rules, just as when
-@code{getline} is used without a redirection.
-
-@item getline @var{var} < @var{file}
-This form of the @code{getline} function takes its input from the file
-@var{file} and puts it in the variable @var{var}. As above, @var{file}
-is a string-valued expression that specifies the file from which to read.
-
-In this version of @code{getline}, none of the built-in variables are
-changed, and the record is not split into fields. The only variable
-changed is @var{var}.
-
-For example, the following program copies all the input files to the
-output, except for records that say @w{@samp{@@include @var{filename}}}.
-Such a record is replaced by the contents of the file
-@var{filename}.@refill
-
-@example
-awk '@{
- if (NF == 2 && $1 == "@@include") @{
- while ((getline line < $2) > 0)
- print line
- close($2)
- @} else
- print
-@}'
-@end example
-
-Note here how the name of the extra input file is not built into
-the program; it is taken from the data, from the second field on
-the @samp{@@include} line.@refill
-
-The @code{close} function is called to ensure that if two identical
-@samp{@@include} lines appear in the input, the entire specified file is
-included twice. @xref{Close Input, ,Closing Input Files and Pipes}.@refill
-
-One deficiency of this program is that it does not process nested
-@samp{@@include} statements the way a true macro preprocessor would.
-
-@item @var{command} | getline
-You can @dfn{pipe} the output of a command into @code{getline}. A pipe is
-simply a way to link the output of one program to the input of another. In
-this case, the string @var{command} is run as a shell command and its output
-is piped into @code{awk} to be used as input. This form of @code{getline}
-reads one record from the pipe.
-
-For example, the following program copies input to output, except for lines
-that begin with @samp{@@execute}, which are replaced by the output produced by
-running the rest of the line as a shell command:
-
-@example
-awk '@{
- if ($1 == "@@execute") @{
- tmp = substr($0, 10)
- while ((tmp | getline) > 0)
- print
- close(tmp)
- @} else
- print
-@}'
-@end example
-
-@noindent
-The @code{close} function is called to ensure that if two identical
-@samp{@@execute} lines appear in the input, the command is run for
-each one. @xref{Close Input, ,Closing Input Files and Pipes}.
-
-Given the input:
-
-@example
-foo
-bar
-baz
-@@execute who
-bletch
-@end example
-
-@noindent
-the program might produce:
-
-@example
-foo
-bar
-baz
-hack ttyv0 Jul 13 14:22
-hack ttyp0 Jul 13 14:23 (gnu:0)
-hack ttyp1 Jul 13 14:23 (gnu:0)
-hack ttyp2 Jul 13 14:23 (gnu:0)
-hack ttyp3 Jul 13 14:23 (gnu:0)
-bletch
-@end example
-
-@noindent
-Notice that this program ran the command @code{who} and printed the result.
-(If you try this program yourself, you will get different results, showing
-you who is logged in on your system.)
-
-This variation of @code{getline} splits the record into fields, sets the
-value of @code{NF} and recomputes the value of @code{$0}. The values of
-@code{NR} and @code{FNR} are not changed.
-
-@item @var{command} | getline @var{var}
-The output of the command @var{command} is sent through a pipe to
-@code{getline} and into the variable @var{var}. For example, the
-following program reads the current date and time into the variable
-@code{current_time}, using the @code{date} utility, and then
-prints it.@refill
-
-@example
-awk 'BEGIN @{
- "date" | getline current_time
- close("date")
- print "Report printed on " current_time
-@}'
-@end example
-
-In this version of @code{getline}, none of the built-in variables are
-changed, and the record is not split into fields.
-@end table
-
-@node Close Input, , Getline, Reading Files
-@section Closing Input Files and Pipes
-@cindex closing input files and pipes
-@findex close
-
-If the same file name or the same shell command is used with
-@code{getline} more than once during the execution of an @code{awk}
-program, the file is opened (or the command is executed) only the first time.
-At that time, the first record of input is read from that file or command.
-The next time the same file or command is used in @code{getline}, another
-record is read from it, and so on.
-
-This implies that if you want to start reading the same file again from
-the beginning, or if you want to rerun a shell command (rather than
-reading more output from the command), you must take special steps.
-What you must do is use the @code{close} function, as follows:
-
-@example
-close(@var{filename})
-@end example
-
-@noindent
-or
-
-@example
-close(@var{command})
-@end example
-
-The argument @var{filename} or @var{command} can be any expression. Its
-value must exactly equal the string that was used to open the file or
-start the command---for example, if you open a pipe with this:
-
-@example
-"sort -r names" | getline foo
-@end example
-
-@noindent
-then you must close it with this:
-
-@example
-close("sort -r names")
-@end example
-
-Once this function call is executed, the next @code{getline} from that
-file or command will reopen the file or rerun the command.
-
-@iftex
-@vindex ERRNO
-@cindex differences: @code{gawk} and @code{awk}
-@end iftex
-@code{close} returns a value of zero if the close succeeded.
-Otherwise, the value will be non-zero.
-In this case, @code{gawk} sets the variable @code{ERRNO} to a string
-describing the error that occurred.
-
-@node Printing, One-liners, Reading Files, Top
-@chapter Printing Output
-
-@cindex printing
-@cindex output
-One of the most common things that actions do is to output or @dfn{print}
-some or all of the input. For simple output, use the @code{print}
-statement. For fancier formatting use the @code{printf} statement.
-Both are described in this chapter.
-
-@menu
-* Print:: The @code{print} statement.
-* Print Examples:: Simple examples of @code{print} statements.
-* Output Separators:: The output separators and how to change them.
-* OFMT:: Controlling Numeric Output With @code{print}.
-* Printf:: The @code{printf} statement.
-* Redirection:: How to redirect output to multiple
- files and pipes.
-* Special Files:: File name interpretation in @code{gawk}.
- @code{gawk} allows access to
- inherited file descriptors.
-@end menu
-
-@node Print, Print Examples, Printing, Printing
-@section The @code{print} Statement
-@cindex @code{print} statement
-
-The @code{print} statement does output with simple, standardized
-formatting. You specify only the strings or numbers to be printed, in a
-list separated by commas. They are output, separated by single spaces,
-followed by a newline. The statement looks like this:
-
-@example
-print @var{item1}, @var{item2}, @dots{}
-@end example
-
-@noindent
-The entire list of items may optionally be enclosed in parentheses. The
-parentheses are necessary if any of the item expressions uses a
-relational operator; otherwise it could be confused with a redirection
-(@pxref{Redirection, ,Redirecting Output of @code{print} and @code{printf}}).
-The relational operators are @samp{==},
-@samp{!=}, @samp{<}, @samp{>}, @samp{>=}, @samp{<=}, @samp{~} and
-@samp{!~} (@pxref{Comparison Ops, ,Comparison Expressions}).@refill
-
-The items printed can be constant strings or numbers, fields of the
-current record (such as @code{$1}), variables, or any @code{awk}
-expressions. The @code{print} statement is completely general for
-computing @emph{what} values to print. With two exceptions,
-you cannot specify @emph{how} to print them---how many
-columns, whether to use exponential notation or not, and so on.
-(@xref{Output Separators}, and
-@ref{OFMT, ,Controlling Numeric Output with @code{print}}.)
-For that, you need the @code{printf} statement
-(@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing}).@refill
-
-The simple statement @samp{print} with no items is equivalent to
-@samp{print $0}: it prints the entire current record. To print a blank
-line, use @samp{print ""}, where @code{""} is the null, or empty,
-string.
-
-To print a fixed piece of text, use a string constant such as
-@w{@code{"Hello there"}} as one item. If you forget to use the
-double-quote characters, your text will be taken as an @code{awk}
-expression, and you will probably get an error. Keep in mind that a
-space is printed between any two items.
-
-Most often, each @code{print} statement makes one line of output. But it
-isn't limited to one line. If an item value is a string that contains a
-newline, the newline is output along with the rest of the string. A
-single @code{print} can make any number of lines this way.
-
-@node Print Examples, Output Separators, Print, Printing
-@section Examples of @code{print} Statements
-
-Here is an example of printing a string that contains embedded newlines:
-
-@example
-awk 'BEGIN @{ print "line one\nline two\nline three" @}'
-@end example
-
-@noindent
-produces output like this:
-
-@example
-line one
-line two
-line three
-@end example
-
-Here is an example that prints the first two fields of each input record,
-with a space between them:
-
-@example
-awk '@{ print $1, $2 @}' inventory-shipped
-@end example
-
-@noindent
-Its output looks like this:
-
-@example
-Jan 13
-Feb 15
-Mar 15
-@dots{}
-@end example
-
-A common mistake in using the @code{print} statement is to omit the comma
-between two items. This often has the effect of making the items run
-together in the output, with no space. The reason for this is that
-juxtaposing two string expressions in @code{awk} means to concatenate
-them. For example, without the comma:
-
-@example
-awk '@{ print $1 $2 @}' inventory-shipped
-@end example
-
-@noindent
-prints:
-
-@example
-@group
-Jan13
-Feb15
-Mar15
-@dots{}
-@end group
-@end example
-
-Neither example's output makes much sense to someone unfamiliar with the
-file @file{inventory-shipped}. A heading line at the beginning would make
-it clearer. Let's add some headings to our table of months (@code{$1}) and
-green crates shipped (@code{$2}). We do this using the @code{BEGIN} pattern
-(@pxref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}) to force the headings to be printed only once:
-
-@example
-awk 'BEGIN @{ print "Month Crates"
- print "----- ------" @}
- @{ print $1, $2 @}' inventory-shipped
-@end example
-
-@noindent
-Did you already guess what happens? This program prints the following:
-
-@example
-@group
-Month Crates
------ ------
-Jan 13
-Feb 15
-Mar 15
-@dots{}
-@end group
-@end example
-
-@noindent
-The headings and the table data don't line up! We can fix this by printing
-some spaces between the two fields:
-
-@example
-awk 'BEGIN @{ print "Month Crates"
- print "----- ------" @}
- @{ print $1, " ", $2 @}' inventory-shipped
-@end example
-
-You can imagine that this way of lining up columns can get pretty
-complicated when you have many columns to fix. Counting spaces for two
-or three columns can be simple, but more than this and you can get
-``lost'' quite easily. This is why the @code{printf} statement was
-created (@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing});
-one of its specialties is lining up columns of data.@refill
-
-@node Output Separators, OFMT, Print Examples, Printing
-@section Output Separators
-
-@cindex output field separator, @code{OFS}
-@vindex OFS
-@vindex ORS
-@cindex output record separator, @code{ORS}
-As mentioned previously, a @code{print} statement contains a list
-of items, separated by commas. In the output, the items are normally
-separated by single spaces. But they do not have to be spaces; a
-single space is only the default. You can specify any string of
-characters to use as the @dfn{output field separator} by setting the
-built-in variable @code{OFS}. The initial value of this variable
-is the string @w{@code{" "}}, that is, just a single space.@refill
-
-The output from an entire @code{print} statement is called an
-@dfn{output record}. Each @code{print} statement outputs one output
-record and then outputs a string called the @dfn{output record separator}.
-The built-in variable @code{ORS} specifies this string. The initial
-value of the variable is the string @code{"\n"} containing a newline
-character; thus, normally each @code{print} statement makes a separate line.
-
-You can change how output fields and records are separated by assigning
-new values to the variables @code{OFS} and/or @code{ORS}. The usual
-place to do this is in the @code{BEGIN} rule
-(@pxref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}), so
-that it happens before any input is processed. You may also do this
-with assignments on the command line, before the names of your input
-files.@refill
-
-The following example prints the first and second fields of each input
-record separated by a semicolon, with a blank line added after each
-line:@refill
-
-@example
-@group
-awk 'BEGIN @{ OFS = ";"; ORS = "\n\n" @}
- @{ print $1, $2 @}' BBS-list
-@end group
-@end example
-
-If the value of @code{ORS} does not contain a newline, all your output
-will be run together on a single line, unless you output newlines some
-other way.
-
-@node OFMT, Printf, Output Separators, Printing
-@section Controlling Numeric Output with @code{print}
-@vindex OFMT
-When you use the @code{print} statement to print numeric values,
-@code{awk} internally converts the number to a string of characters,
-and prints that string. @code{awk} uses the @code{sprintf} function
-to do this conversion. For now, it suffices to say that the @code{sprintf}
-function accepts a @dfn{format specification} that tells it how to format
-numbers (or strings), and that there are a number of different ways that
-numbers can be formatted. The different format specifications are discussed
-more fully in
-@ref{Printf, ,Using @code{printf} Statements for Fancier Printing}.@refill
-
-The built-in variable @code{OFMT} contains the default format specification
-that @code{print} uses with @code{sprintf} when it wants to convert a
-number to a string for printing. By supplying different format specifications
-as the value of @code{OFMT}, you can change how @code{print} will print
-your numbers. As a brief example:
-
-@example
-@group
-awk 'BEGIN @{ OFMT = "%d" # print numbers as integers
- print 17.23 @}'
-@end group
-@end example
-
-@noindent
-will print @samp{17}.
-
-@node Printf, Redirection, OFMT, Printing
-@section Using @code{printf} Statements for Fancier Printing
-@cindex formatted output
-@cindex output, formatted
-
-If you want more precise control over the output format than
-@code{print} gives you, use @code{printf}. With @code{printf} you can
-specify the width to use for each item, and you can specify various
-stylistic choices for numbers (such as what radix to use, whether to
-print an exponent, whether to print a sign, and how many digits to print
-after the decimal point). You do this by specifying a string, called
-the @dfn{format string}, which controls how and where to print the other
-arguments.
-
-@menu
-* Basic Printf:: Syntax of the @code{printf} statement.
-* Control Letters:: Format-control letters.
-* Format Modifiers:: Format-specification modifiers.
-* Printf Examples:: Several examples.
-@end menu
-
-@node Basic Printf, Control Letters, Printf, Printf
-@subsection Introduction to the @code{printf} Statement
-
-@cindex @code{printf} statement, syntax of
-The @code{printf} statement looks like this:@refill
-
-@example
-printf @var{format}, @var{item1}, @var{item2}, @dots{}
-@end example
-
-@noindent
-The entire list of arguments may optionally be enclosed in parentheses. The
-parentheses are necessary if any of the item expressions uses a
-relational operator; otherwise it could be confused with a redirection
-(@pxref{Redirection, ,Redirecting Output of @code{print} and @code{printf}}).
-The relational operators are @samp{==},
-@samp{!=}, @samp{<}, @samp{>}, @samp{>=}, @samp{<=}, @samp{~} and
-@samp{!~} (@pxref{Comparison Ops, ,Comparison Expressions}).@refill
-
-@cindex format string
-The difference between @code{printf} and @code{print} is the argument
-@var{format}. This is an expression whose value is taken as a string; it
-specifies how to output each of the other arguments. It is called
-the @dfn{format string}.
-
-The format string is the same as in the @sc{ansi} C library function
-@code{printf}. Most of @var{format} is text to be output verbatim.
-Scattered among this text are @dfn{format specifiers}, one per item.
-Each format specifier says to output the next item at that place in the
-format.@refill
-
-The @code{printf} statement does not automatically append a newline to its
-output. It outputs only what the format specifies. So if you want
-a newline, you must include one in the format. The output separator
-variables @code{OFS} and @code{ORS} have no effect on @code{printf}
-statements.@refill
-
-@node Control Letters, Format Modifiers, Basic Printf, Printf
-@subsection Format-Control Letters
-@cindex @code{printf}, format-control characters
-@cindex format specifier
-
-A format specifier starts with the character @samp{%} and ends with a
-@dfn{format-control letter}; it tells the @code{printf} statement how
-to output one item. (If you actually want to output a @samp{%}, write
-@samp{%%}.) The format-control letter specifies what kind of value to
-print. The rest of the format specifier is made up of optional
-@dfn{modifiers} which are parameters such as the field width to use.@refill
-
-Here is a list of the format-control letters:
-
-@table @samp
-@item c
-This prints a number as an ASCII character. Thus, @samp{printf "%c",
-65} outputs the letter @samp{A}. The output for a string value is
-the first character of the string.
-
-@item d
-This prints a decimal integer.
-
-@item i
-This also prints a decimal integer.
-
-@item e
-This prints a number in scientific (exponential) notation.
-For example,
-
-@example
-printf "%4.3e", 1950
-@end example
-
-@noindent
-prints @samp{1.950e+03}, with a total of four significant figures of
-which three follow the decimal point. The @samp{4.3} are @dfn{modifiers},
-discussed below.
-
-@item f
-This prints a number in floating point notation.
-
-@item g
-This prints a number in either scientific notation or floating point
-notation, whichever uses fewer characters.
-@ignore
-From: gatech!ames!elroy!cit-vax!EQL.Caltech.Edu!rankin (Pat Rankin)
-
-In the description of printf formats (p.43), the information for %g
-is incorrect (mainly, it's too much of an oversimplification). It's
-wrong in the AWK book too, and in the gawk man page. I suggested to
-David Trueman before 2.13 was released that the latter be revised, so
-that it matched gawk's behavior (rather than trying to change gawk to
-match the docs ;-). The documented description is nice and simple, but
-it doesn't match the actual underlying behavior of %g in the various C
-run-time libraries that gawk relies on. The precision value for g format
-is different than for f and e formats, so it's inaccurate to say 'g' is
-the shorter of 'e' or 'f'. For 'g', precision represents the number of
-significant digits rather than the number of decimal places, and it has
-special rules about how to format numbers with range between 10E-1 and
-10E-4. All in all, it's pretty messy, and I had to add that clumsy
-GFMT_WORKAROUND code because the VMS run-time library doesn't conform to
-the ANSI-C specifications.
-@end ignore
-
-@item o
-This prints an unsigned octal integer.
-
-@item s
-This prints a string.
-
-@item x
-This prints an unsigned hexadecimal integer.
-
-@item X
-This prints an unsigned hexadecimal integer. However, for the values 10
-through 15, it uses the letters @samp{A} through @samp{F} instead of
-@samp{a} through @samp{f}.
-
-@item %
-This isn't really a format-control letter, but it does have a meaning
-when used after a @samp{%}: the sequence @samp{%%} outputs one
-@samp{%}. It does not consume an argument.
-@end table
-
-@node Format Modifiers, Printf Examples, Control Letters, Printf
-@subsection Modifiers for @code{printf} Formats
-
-@cindex @code{printf}, modifiers
-@cindex modifiers (in format specifiers)
-A format specification can also include @dfn{modifiers} that can control
-how much of the item's value is printed and how much space it gets. The
-modifiers come between the @samp{%} and the format-control letter. Here
-are the possible modifiers, in the order in which they may appear:
-
-@table @samp
-@item -
-The minus sign, used before the width modifier, says to left-justify
-the argument within its specified width. Normally the argument
-is printed right-justified in the specified width. Thus,
-
-@example
-printf "%-4s", "foo"
-@end example
-
-@noindent
-prints @samp{foo }.
-
-@item @var{width}
-This is a number representing the desired width of a field. Inserting any
-number between the @samp{%} sign and the format control character forces the
-field to be expanded to this width. The default way to do this is to
-pad with spaces on the left. For example,
-
-@example
-printf "%4s", "foo"
-@end example
-
-@noindent
-prints @samp{ foo}.
-
-The value of @var{width} is a minimum width, not a maximum. If the item
-value requires more than @var{width} characters, it can be as wide as
-necessary. Thus,
-
-@example
-printf "%4s", "foobar"
-@end example
-
-@noindent
-prints @samp{foobar}.
-
-Preceding the @var{width} with a minus sign causes the output to be
-padded with spaces on the right, instead of on the left.
-
-@item .@var{prec}
-This is a number that specifies the precision to use when printing.
-This specifies the number of digits you want printed to the right of the
-decimal point. For a string, it specifies the maximum number of
-characters from the string that should be printed.
-@end table
-
-The C library @code{printf}'s dynamic @var{width} and @var{prec}
-capability (for example, @code{"%*.*s"}) is supported. Instead of
-supplying explicit @var{width} and/or @var{prec} values in the format
-string, you pass them in the argument list. For example:@refill
-
-@example
-w = 5
-p = 3
-s = "abcdefg"
-printf "<%*.*s>\n", w, p, s
-@end example
-
-@noindent
-is exactly equivalent to
-
-@example
-s = "abcdefg"
-printf "<%5.3s>\n", s
-@end example
-
-@noindent
-Both programs output @samp{@w{<@bullet{}@bullet{}abc>}}. (We have
-used the bullet symbol ``@bullet{}'' to represent a space, to clearly
-show you that there are two spaces in the output.)@refill
-
-Earlier versions of @code{awk} did not support this capability. You may
-simulate it by using concatenation to build up the format string,
-like so:@refill
-
-@example
-w = 5
-p = 3
-s = "abcdefg"
-printf "<%" w "." p "s>\n", s
-@end example
-
-@noindent
-This is not particularly easy to read, however.
-
-@node Printf Examples, , Format Modifiers, Printf
-@subsection Examples of Using @code{printf}
-
-Here is how to use @code{printf} to make an aligned table:
-
-@example
-awk '@{ printf "%-10s %s\n", $1, $2 @}' BBS-list
-@end example
-
-@noindent
-prints the names of bulletin boards (@code{$1}) of the file
-@file{BBS-list} as a string of 10 characters, left justified. It also
-prints the phone numbers (@code{$2}) afterward on the line. This
-produces an aligned two-column table of names and phone numbers:@refill
-
-@example
-@group
-aardvark 555-5553
-alpo-net 555-3412
-barfly 555-7685
-bites 555-1675
-camelot 555-0542
-core 555-2912
-fooey 555-1234
-foot 555-6699
-macfoo 555-6480
-sdace 555-3430
-sabafoo 555-2127
-@end group
-@end example
-
-Did you notice that we did not specify that the phone numbers be printed
-as numbers? They had to be printed as strings because the numbers are
-separated by a dash. This dash would be interpreted as a minus sign if
-we had tried to print the phone numbers as numbers. This would have led
-to some pretty confusing results.
-
-We did not specify a width for the phone numbers because they are the
-last things on their lines. We don't need to put spaces after them.
-
-We could make our table look even nicer by adding headings to the tops
-of the columns. To do this, use the @code{BEGIN} pattern
-(@pxref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns})
-to force the header to be printed only once, at the beginning of
-the @code{awk} program:@refill
-
-@example
-@group
-awk 'BEGIN @{ print "Name Number"
- print "---- ------" @}
- @{ printf "%-10s %s\n", $1, $2 @}' BBS-list
-@end group
-@end example
-
-Did you notice that we mixed @code{print} and @code{printf} statements in
-the above example? We could have used just @code{printf} statements to get
-the same results:
-
-@example
-@group
-awk 'BEGIN @{ printf "%-10s %s\n", "Name", "Number"
- printf "%-10s %s\n", "----", "------" @}
- @{ printf "%-10s %s\n", $1, $2 @}' BBS-list
-@end group
-@end example
-
-@noindent
-By outputting each column heading with the same format specification
-used for the elements of the column, we have made sure that the headings
-are aligned just like the columns.
-
-The fact that the same format specification is used three times can be
-emphasized by storing it in a variable, like this:
-
-@example
-awk 'BEGIN @{ format = "%-10s %s\n"
- printf format, "Name", "Number"
- printf format, "----", "------" @}
- @{ printf format, $1, $2 @}' BBS-list
-@end example
-
-See if you can use the @code{printf} statement to line up the headings and
-table data for our @file{inventory-shipped} example covered earlier in the
-section on the @code{print} statement
-(@pxref{Print, ,The @code{print} Statement}).@refill
-
-@node Redirection, Special Files, Printf, Printing
-@section Redirecting Output of @code{print} and @code{printf}
-
-@cindex output redirection
-@cindex redirection of output
-So far we have been dealing only with output that prints to the standard
-output, usually your terminal. Both @code{print} and @code{printf} can
-also send their output to other places.
-This is called @dfn{redirection}.@refill
-
-A redirection appears after the @code{print} or @code{printf} statement.
-Redirections in @code{awk} are written just like redirections in shell
-commands, except that they are written inside the @code{awk} program.
-
-@menu
-* File/Pipe Redirection:: Redirecting Output to Files and Pipes.
-* Close Output:: How to close output files and pipes.
-@end menu
-
-@node File/Pipe Redirection, Close Output, Redirection, Redirection
-@subsection Redirecting Output to Files and Pipes
-
-Here are the three forms of output redirection. They are all shown for
-the @code{print} statement, but they work identically for @code{printf}
-also.@refill
-
-@table @code
-@item print @var{items} > @var{output-file}
-This type of redirection prints the items onto the output file
-@var{output-file}. The file name @var{output-file} can be any
-expression. Its value is changed to a string and then used as a
-file name (@pxref{Expressions, ,Expressions as Action Statements}).@refill
-
-When this type of redirection is used, the @var{output-file} is erased
-before the first output is written to it. Subsequent writes do not
-erase @var{output-file}, but append to it. If @var{output-file} does
-not exist, then it is created.@refill
-
-For example, here is how one @code{awk} program can write a list of
-BBS names to a file @file{name-list} and a list of phone numbers to a
-file @file{phone-list}. Each output file contains one name or number
-per line.
-
-@smallexample
-awk '@{ print $2 > "phone-list"
- print $1 > "name-list" @}' BBS-list
-@end smallexample
-
-@item print @var{items} >> @var{output-file}
-This type of redirection prints the items onto the output file
-@var{output-file}. The difference between this and the
-single-@samp{>} redirection is that the old contents (if any) of
-@var{output-file} are not erased. Instead, the @code{awk} output is
-appended to the file.
-
-@cindex pipes for output
-@cindex output, piping
-@item print @var{items} | @var{command}
-It is also possible to send output through a @dfn{pipe} instead of into a
-file. This type of redirection opens a pipe to @var{command} and writes
-the values of @var{items} through this pipe, to another process created
-to execute @var{command}.@refill
-
-The redirection argument @var{command} is actually an @code{awk}
-expression. Its value is converted to a string, whose contents give the
-shell command to be run.
-
-For example, this produces two files, one unsorted list of BBS names
-and one list sorted in reverse alphabetical order:
-
-@smallexample
-awk '@{ print $1 > "names.unsorted"
- print $1 | "sort -r > names.sorted" @}' BBS-list
-@end smallexample
-
-Here the unsorted list is written with an ordinary redirection while
-the sorted list is written by piping through the @code{sort} utility.
-
-Here is an example that uses redirection to mail a message to a mailing
-list @samp{bug-system}. This might be useful when trouble is encountered
-in an @code{awk} script run periodically for system maintenance.
-
-@smallexample
-report = "mail bug-system"
-print "Awk script failed:", $0 | report
-print "at record number", FNR, "of", FILENAME | report
-close(report)
-@end smallexample
-
-We call the @code{close} function here because it's a good idea to close
-the pipe as soon as all the intended output has been sent to it.
-@xref{Close Output, ,Closing Output Files and Pipes}, for more information
-on this. This example also illustrates the use of a variable to represent
-a @var{file} or @var{command}: it is not necessary to always
-use a string constant. Using a variable is generally a good idea,
-since @code{awk} requires you to spell the string value identically
-every time.
-@end table
-
-Redirecting output using @samp{>}, @samp{>>}, or @samp{|} asks the system
-to open a file or pipe only if the particular @var{file} or @var{command}
-you've specified has not already been written to by your program, or if
-it has been closed since it was last written to.@refill
-
-@node Close Output, , File/Pipe Redirection, Redirection
-@subsection Closing Output Files and Pipes
-@cindex closing output files and pipes
-@findex close
-
-When a file or pipe is opened, the file name or command associated with
-it is remembered by @code{awk} and subsequent writes to the same file or
-command are appended to the previous writes. The file or pipe stays
-open until @code{awk} exits. This is usually convenient.
-
-Sometimes there is a reason to close an output file or pipe earlier
-than that. To do this, use the @code{close} function, as follows:
-
-@example
-close(@var{filename})
-@end example
-
-@noindent
-or
-
-@example
-close(@var{command})
-@end example
-
-The argument @var{filename} or @var{command} can be any expression.
-Its value must exactly equal the string used to open the file or pipe
-to begin with---for example, if you open a pipe with this:
-
-@example
-print $1 | "sort -r > names.sorted"
-@end example
-
-@noindent
-then you must close it with this:
-
-@example
-close("sort -r > names.sorted")
-@end example
-
-Here are some reasons why you might need to close an output file:
-
-@itemize @bullet
-@item
-To write a file and read it back later on in the same @code{awk}
-program. Close the file when you are finished writing it; then
-you can start reading it with @code{getline}
-(@pxref{Getline, ,Explicit Input with @code{getline}}).@refill
-
-@item
-To write numerous files, successively, in the same @code{awk}
-program. If you don't close the files, eventually you may exceed a
-system limit on the number of open files in one process. So close
-each one when you are finished writing it.
-
-@item
-To make a command finish. When you redirect output through a pipe,
-the command reading the pipe normally continues to try to read input
-as long as the pipe is open. Often this means the command cannot
-really do its work until the pipe is closed. For example, if you
-redirect output to the @code{mail} program, the message is not
-actually sent until the pipe is closed.
-
-@item
-To run the same program a second time, with the same arguments.
-This is not the same thing as giving more input to the first run!
-
-For example, suppose you pipe output to the @code{mail} program. If you
-output several lines redirected to this pipe without closing it, they make
-a single message of several lines. By contrast, if you close the pipe
-after each line of output, then each line makes a separate message.
-@end itemize
-
-@iftex
-@vindex ERRNO
-@cindex differences: @code{gawk} and @code{awk}
-@end iftex
-@code{close} returns a value of zero if the close succeeded.
-Otherwise, the value will be non-zero.
-In this case, @code{gawk} sets the variable @code{ERRNO} to a string
-describing the error that occurred.
-
-@node Special Files, , Redirection, Printing
-@section Standard I/O Streams
-@cindex standard input
-@cindex standard output
-@cindex standard error output
-@cindex file descriptors
-
-Running programs conventionally have three input and output streams
-already available to them for reading and writing. These are known as
-the @dfn{standard input}, @dfn{standard output}, and @dfn{standard error
-output}. These streams are, by default, terminal input and output, but
-they are often redirected with the shell, via the @samp{<}, @samp{<<},
-@samp{>}, @samp{>>}, @samp{>&} and @samp{|} operators. Standard error
-is used only for writing error messages; the reason we have two separate
-streams, standard output and standard error, is so that they can be
-redirected separately.
-
-@iftex
-@cindex differences: @code{gawk} and @code{awk}
-@end iftex
-In other implementations of @code{awk}, the only way to write an error
-message to standard error in an @code{awk} program is as follows:
-
-@smallexample
-print "Serious error detected!\n" | "cat 1>&2"
-@end smallexample
-
-@noindent
-This works by opening a pipeline to a shell command which can access the
-standard error stream which it inherits from the @code{awk} process.
-This is far from elegant, and is also inefficient, since it requires a
-separate process. So people writing @code{awk} programs have often
-neglected to do this. Instead, they have sent the error messages to the
-terminal, like this:
-
-@smallexample
-@group
-NF != 4 @{
- printf("line %d skipped: doesn't have 4 fields\n", FNR) > "/dev/tty"
-@}
-@end group
-@end smallexample
-
-@noindent
-This has the same effect most of the time, but not always: although the
-standard error stream is usually the terminal, it can be redirected, and
-when that happens, writing to the terminal is not correct. In fact, if
-@code{awk} is run from a background job, it may not have a terminal at all.
-Then opening @file{/dev/tty} will fail.
-
-@code{gawk} provides special file names for accessing the three standard
-streams. When you redirect input or output in @code{gawk}, if the file name
-matches one of these special names, then @code{gawk} directly uses the
-stream it stands for.
-
-@cindex @file{/dev/stdin}
-@cindex @file{/dev/stdout}
-@cindex @file{/dev/stderr}
-@cindex @file{/dev/fd/}
-@table @file
-@item /dev/stdin
-The standard input (file descriptor 0).
-
-@item /dev/stdout
-The standard output (file descriptor 1).
-
-@item /dev/stderr
-The standard error output (file descriptor 2).
-
-@item /dev/fd/@var{N}
-The file associated with file descriptor @var{N}. Such a file must have
-been opened by the program initiating the @code{awk} execution (typically
-the shell). Unless you take special pains, only descriptors 0, 1 and 2
-are available.
-@end table
-
-The file names @file{/dev/stdin}, @file{/dev/stdout}, and @file{/dev/stderr}
-are aliases for @file{/dev/fd/0}, @file{/dev/fd/1}, and @file{/dev/fd/2},
-respectively, but they are more self-explanatory.
-
-The proper way to write an error message in a @code{gawk} program
-is to use @file{/dev/stderr}, like this:
-
-@smallexample
-NF != 4 @{
- printf("line %d skipped: doesn't have 4 fields\n", FNR) > "/dev/stderr"
-@}
-@end smallexample
-
-@code{gawk} also provides special file names that give access to information
-about the running @code{gawk} process. Each of these ``files'' provides
-a single record of information. To read them more than once, you must
-first close them with the @code{close} function
-(@pxref{Close Input, ,Closing Input Files and Pipes}).
-The filenames are:
-
-@cindex @file{/dev/pid}
-@cindex @file{/dev/pgrpid}
-@cindex @file{/dev/ppid}
-@cindex @file{/dev/user}
-@table @file
-@item /dev/pid
-Reading this file returns the process ID of the current process,
-in decimal, terminated with a newline.
-
-@item /dev/ppid
-Reading this file returns the parent process ID of the current process,
-in decimal, terminated with a newline.
-
-@item /dev/pgrpid
-Reading this file returns the process group ID of the current process,
-in decimal, terminated with a newline.
-
-@item /dev/user
-Reading this file returns a single record terminated with a newline.
-The fields are separated with blanks. The fields represent the
-following information:
-
-@table @code
-@item $1
-The value of the @code{getuid} system call.
-
-@item $2
-The value of the @code{geteuid} system call.
-
-@item $3
-The value of the @code{getgid} system call.
-
-@item $4
-The value of the @code{getegid} system call.
-@end table
-
-If there are any additional fields, they are the group IDs returned by
-@code{getgroups} system call.
-(Multiple groups may not be supported on all systems.)@refill
-@end table
-
-These special file names may be used on the command line as data
-files, as well as for I/O redirections within an @code{awk} program.
-They may not be used as source files with the @samp{-f} option.
-
-Recognition of these special file names is disabled if @code{gawk} is in
-compatibility mode (@pxref{Command Line, ,Invoking @code{awk}}).
-
-@quotation
-@strong{Caution}: Unless your system actually has a @file{/dev/fd} directory
-(or any of the other above listed special files),
-the interpretation of these file names is done by @code{gawk} itself.
-For example, using @samp{/dev/fd/4} for output will actually write on
-file descriptor 4, and not on a new file descriptor that was @code{dup}'ed
-from file descriptor 4. Most of the time this does not matter; however, it
-is important to @emph{not} close any of the files related to file descriptors
-0, 1, and 2. If you do close one of these files, unpredictable behavior
-will result.
-@end quotation
-
-@node One-liners, Patterns, Printing, Top
-@chapter Useful ``One-liners''
-
-@cindex one-liners
-Useful @code{awk} programs are often short, just a line or two. Here is a
-collection of useful, short programs to get you started. Some of these
-programs contain constructs that haven't been covered yet. The description
-of the program will give you a good idea of what is going on, but please
-read the rest of the manual to become an @code{awk} expert!
-
-@c Per suggestions from Michal Jaegermann
-@ifinfo
-Since you are reading this in Info, each line of the example code is
-enclosed in quotes, to represent text that you would type literally.
-The examples themselves represent shell commands that use single quotes
-to keep the shell from interpreting the contents of the program.
-When reading the examples, focus on the text between the open and close
-quotes.
-@end ifinfo
-
-@table @code
-@item awk '@{ if (NF > max) max = NF @}
-@itemx @ @ @ @ @ END @{ print max @}'
-This program prints the maximum number of fields on any input line.
-
-@item awk 'length($0) > 80'
-This program prints every line longer than 80 characters. The sole
-rule has a relational expression as its pattern, and has no action (so the
-default action, printing the record, is used).
-
-@item awk 'NF > 0'
-This program prints every line that has at least one field. This is an
-easy way to delete blank lines from a file (or rather, to create a new
-file similar to the old file but from which the blank lines have been
-deleted).
-
-@item awk '@{ if (NF > 0) print @}'
-This program also prints every line that has at least one field. Here we
-allow the rule to match every line, then decide in the action whether
-to print.
-
-@item awk@ 'BEGIN@ @{@ for (i = 1; i <= 7; i++)
-@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ print int(101 * rand()) @}'
-This program prints 7 random numbers from 0 to 100, inclusive.
-
-@item ls -l @var{files} | awk '@{ x += $4 @} ; END @{ print "total bytes: " x @}'
-This program prints the total number of bytes used by @var{files}.
-
-@item expand@ @var{file}@ |@ awk@ '@{ if (x < length()) x = length() @}
-@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ END @{ print "maximum line length is " x @}'
-This program prints the maximum line length of @var{file}. The input
-is piped through the @code{expand} program to change tabs into spaces,
-so the widths compared are actually the right-margin columns.
-
-@item awk 'BEGIN @{ FS = ":" @}
-@itemx @ @ @ @ @ @{ print $1 | "sort" @}' /etc/passwd
-This program prints a sorted list of the login names of all users.
-
-@item awk '@{ nlines++ @}
-@itemx @ @ @ @ @ END@ @{ print nlines @}'
-This programs counts lines in a file.
-
-@item awk 'END @{ print NR @}'
-This program also counts lines in a file, but lets @code{awk} do the work.
-
-@item awk '@{ print NR, $0 @}'
-This program adds line numbers to all its input files,
-similar to @samp{cat -n}.
-@end table
-
-@node Patterns, Actions, One-liners, Top
-@chapter Patterns
-@cindex pattern, definition of
-
-Patterns in @code{awk} control the execution of rules: a rule is
-executed when its pattern matches the current input record. This
-chapter tells all about how to write patterns.
-
-@menu
-* Kinds of Patterns:: A list of all kinds of patterns.
- The following subsections describe
- them in detail.
-* Regexp:: Regular expressions such as @samp{/foo/}.
-* Comparison Patterns:: Comparison expressions such as @code{$1 > 10}.
-* Boolean Patterns:: Combining comparison expressions.
-* Expression Patterns:: Any expression can be used as a pattern.
-* Ranges:: Pairs of patterns specify record ranges.
-* BEGIN/END:: Specifying initialization and cleanup rules.
-* Empty:: The empty pattern, which matches every record.
-@end menu
-
-@node Kinds of Patterns, Regexp, Patterns, Patterns
-@section Kinds of Patterns
-@cindex patterns, types of
-
-Here is a summary of the types of patterns supported in @code{awk}.
-@c At the next rewrite, check to see that this order matches the
-@c order in the text. It might not matter to a reader, but it's good
-@c style. Also, it might be nice to mention all the topics of sections
-@c that follow in this list; that way people can scan and know when to
-@c expect a specific topic. Specifically please also make an entry
-@c for Boolean operators as patterns in the right place. --mew
-
-@table @code
-@item /@var{regular expression}/
-A regular expression as a pattern. It matches when the text of the
-input record fits the regular expression.
-(@xref{Regexp, ,Regular Expressions as Patterns}.)@refill
-
-@item @var{expression}
-A single expression. It matches when its value, converted to a number,
-is nonzero (if a number) or nonnull (if a string).
-(@xref{Expression Patterns, ,Expressions as Patterns}.)@refill
-
-@item @var{pat1}, @var{pat2}
-A pair of patterns separated by a comma, specifying a range of records.
-(@xref{Ranges, ,Specifying Record Ranges with Patterns}.)
-
-@item BEGIN
-@itemx END
-Special patterns to supply start-up or clean-up information to
-@code{awk}. (@xref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}.)
-
-@item @var{null}
-The empty pattern matches every input record.
-(@xref{Empty, ,The Empty Pattern}.)@refill
-@end table
-
-
-@node Regexp, Comparison Patterns, Kinds of Patterns, Patterns
-@section Regular Expressions as Patterns
-@cindex pattern, regular expressions
-@cindex regexp
-@cindex regular expressions as patterns
-
-A @dfn{regular expression}, or @dfn{regexp}, is a way of describing a
-class of strings. A regular expression enclosed in slashes (@samp{/})
-is an @code{awk} pattern that matches every input record whose text
-belongs to that class.
-
-The simplest regular expression is a sequence of letters, numbers, or
-both. Such a regexp matches any string that contains that sequence.
-Thus, the regexp @samp{foo} matches any string containing @samp{foo}.
-Therefore, the pattern @code{/foo/} matches any input record containing
-@samp{foo}. Other kinds of regexps let you specify more complicated
-classes of strings.
-
-@menu
-* Regexp Usage:: How to Use Regular Expressions
-* Regexp Operators:: Regular Expression Operators
-* Case-sensitivity:: How to do case-insensitive matching.
-@end menu
-
-@node Regexp Usage, Regexp Operators, Regexp, Regexp
-@subsection How to Use Regular Expressions
-
-A regular expression can be used as a pattern by enclosing it in
-slashes. Then the regular expression is matched against the
-entire text of each record. (Normally, it only needs
-to match some part of the text in order to succeed.) For example, this
-prints the second field of each record that contains @samp{foo} anywhere:
-
-@example
-awk '/foo/ @{ print $2 @}' BBS-list
-@end example
-
-@cindex regular expression matching operators
-@cindex string-matching operators
-@cindex operators, string-matching
-@cindex operators, regexp matching
-@cindex regexp search operators
-Regular expressions can also be used in comparison expressions. Then
-you can specify the string to match against; it need not be the entire
-current input record. These comparison expressions can be used as
-patterns or in @code{if}, @code{while}, @code{for}, and @code{do} statements.
-
-@table @code
-@item @var{exp} ~ /@var{regexp}/
-This is true if the expression @var{exp} (taken as a character string)
-is matched by @var{regexp}. The following example matches, or selects,
-all input records with the upper-case letter @samp{J} somewhere in the
-first field:@refill
-
-@example
-awk '$1 ~ /J/' inventory-shipped
-@end example
-
-So does this:
-
-@example
-awk '@{ if ($1 ~ /J/) print @}' inventory-shipped
-@end example
-
-@item @var{exp} !~ /@var{regexp}/
-This is true if the expression @var{exp} (taken as a character string)
-is @emph{not} matched by @var{regexp}. The following example matches,
-or selects, all input records whose first field @emph{does not} contain
-the upper-case letter @samp{J}:@refill
-
-@example
-awk '$1 !~ /J/' inventory-shipped
-@end example
-@end table
-
-@cindex computed regular expressions
-@cindex regular expressions, computed
-@cindex dynamic regular expressions
-The right hand side of a @samp{~} or @samp{!~} operator need not be a
-constant regexp (i.e., a string of characters between slashes). It may
-be any expression. The expression is evaluated, and converted if
-necessary to a string; the contents of the string are used as the
-regexp. A regexp that is computed in this way is called a @dfn{dynamic
-regexp}. For example:
-
-@example
-identifier_regexp = "[A-Za-z_][A-Za-z_0-9]+"
-$0 ~ identifier_regexp
-@end example
-
-@noindent
-sets @code{identifier_regexp} to a regexp that describes @code{awk}
-variable names, and tests if the input record matches this regexp.
-
-@node Regexp Operators, Case-sensitivity, Regexp Usage, Regexp
-@subsection Regular Expression Operators
-@cindex metacharacters
-@cindex regular expression metacharacters
-
-You can combine regular expressions with the following characters,
-called @dfn{regular expression operators}, or @dfn{metacharacters}, to
-increase the power and versatility of regular expressions.
-
-Here is a table of metacharacters. All characters not listed in the
-table stand for themselves.
-
-@table @code
-@item ^
-This matches the beginning of the string or the beginning of a line
-within the string. For example:
-
-@example
-^@@chapter
-@end example
-
-@noindent
-matches the @samp{@@chapter} at the beginning of a string, and can be used
-to identify chapter beginnings in Texinfo source files.
-
-@item $
-This is similar to @samp{^}, but it matches only at the end of a string
-or the end of a line within the string. For example:
-
-@example
-p$
-@end example
-
-@noindent
-matches a record that ends with a @samp{p}.
-
-@item .
-This matches any single character except a newline. For example:
-
-@example
-.P
-@end example
-
-@noindent
-matches any single character followed by a @samp{P} in a string. Using
-concatenation we can make regular expressions like @samp{U.A}, which
-matches any three-character sequence that begins with @samp{U} and ends
-with @samp{A}.
-
-@item [@dots{}]
-This is called a @dfn{character set}. It matches any one of the
-characters that are enclosed in the square brackets. For example:
-
-@example
-[MVX]
-@end example
-
-@noindent
-matches any one of the characters @samp{M}, @samp{V}, or @samp{X} in a
-string.@refill
-
-Ranges of characters are indicated by using a hyphen between the beginning
-and ending characters, and enclosing the whole thing in brackets. For
-example:@refill
-
-@example
-[0-9]
-@end example
-
-@noindent
-matches any digit.
-
-To include the character @samp{\}, @samp{]}, @samp{-} or @samp{^} in a
-character set, put a @samp{\} in front of it. For example:
-
-@example
-[d\]]
-@end example
-
-@noindent
-matches either @samp{d}, or @samp{]}.@refill
-
-This treatment of @samp{\} is compatible with other @code{awk}
-implementations, and is also mandated by the @sc{posix} Command Language
-and Utilities standard. The regular expressions in @code{awk} are a superset
-of the @sc{posix} specification for Extended Regular Expressions (EREs).
-@sc{posix} EREs are based on the regular expressions accepted by the
-traditional @code{egrep} utility.
-
-In @code{egrep} syntax, backslash is not syntactically special within
-square brackets. This means that special tricks have to be used to
-represent the characters @samp{]}, @samp{-} and @samp{^} as members of a
-character set.
-
-In @code{egrep} syntax, to match @samp{-}, write it as @samp{---},
-which is a range containing only @w{@samp{-}.} You may also give @samp{-}
-as the first or last character in the set. To match @samp{^}, put it
-anywhere except as the first character of a set. To match a @samp{]},
-make it the first character in the set. For example:@refill
-
-@example
-[]d^]
-@end example
-
-@noindent
-matches either @samp{]}, @samp{d} or @samp{^}.@refill
-
-@item [^ @dots{}]
-This is a @dfn{complemented character set}. The first character after
-the @samp{[} @emph{must} be a @samp{^}. It matches any characters
-@emph{except} those in the square brackets (or newline). For example:
-
-@example
-[^0-9]
-@end example
-
-@noindent
-matches any character that is not a digit.
-
-@item |
-This is the @dfn{alternation operator} and it is used to specify
-alternatives. For example:
-
-@example
-^P|[0-9]
-@end example
-
-@noindent
-matches any string that matches either @samp{^P} or @samp{[0-9]}. This
-means it matches any string that contains a digit or starts with @samp{P}.
-
-The alternation applies to the largest possible regexps on either side.
-@item (@dots{})
-Parentheses are used for grouping in regular expressions as in
-arithmetic. They can be used to concatenate regular expressions
-containing the alternation operator, @samp{|}.
-
-@item *
-This symbol means that the preceding regular expression is to be
-repeated as many times as possible to find a match. For example:
-
-@example
-ph*
-@end example
-
-@noindent
-applies the @samp{*} symbol to the preceding @samp{h} and looks for matches
-to one @samp{p} followed by any number of @samp{h}s. This will also match
-just @samp{p} if no @samp{h}s are present.
-
-The @samp{*} repeats the @emph{smallest} possible preceding expression.
-(Use parentheses if you wish to repeat a larger expression.) It finds
-as many repetitions as possible. For example:
-
-@example
-awk '/\(c[ad][ad]*r x\)/ @{ print @}' sample
-@end example
-
-@noindent
-prints every record in the input containing a string of the form
-@samp{(car x)}, @samp{(cdr x)}, @samp{(cadr x)}, and so on.@refill
-
-@item +
-This symbol is similar to @samp{*}, but the preceding expression must be
-matched at least once. This means that:
-
-@example
-wh+y
-@end example
-
-@noindent
-would match @samp{why} and @samp{whhy} but not @samp{wy}, whereas
-@samp{wh*y} would match all three of these strings. This is a simpler
-way of writing the last @samp{*} example:
-
-@example
-awk '/\(c[ad]+r x\)/ @{ print @}' sample
-@end example
-
-@item ?
-This symbol is similar to @samp{*}, but the preceding expression can be
-matched once or not at all. For example:
-
-@example
-fe?d
-@end example
-
-@noindent
-will match @samp{fed} and @samp{fd}, but nothing else.@refill
-
-@item \
-This is used to suppress the special meaning of a character when
-matching. For example:
-
-@example
-\$
-@end example
-
-@noindent
-matches the character @samp{$}.
-
-The escape sequences used for string constants
-(@pxref{Constants, ,Constant Expressions}) are
-valid in regular expressions as well; they are also introduced by a
-@samp{\}.@refill
-@end table
-
-In regular expressions, the @samp{*}, @samp{+}, and @samp{?} operators have
-the highest precedence, followed by concatenation, and finally by @samp{|}.
-As in arithmetic, parentheses can change how operators are grouped.@refill
-
-@node Case-sensitivity, , Regexp Operators, Regexp
-@subsection Case-sensitivity in Matching
-
-Case is normally significant in regular expressions, both when matching
-ordinary characters (i.e., not metacharacters), and inside character
-sets. Thus a @samp{w} in a regular expression matches only a lower case
-@samp{w} and not an upper case @samp{W}.
-
-The simplest way to do a case-independent match is to use a character
-set: @samp{[Ww]}. However, this can be cumbersome if you need to use it
-often; and it can make the regular expressions harder for humans to
-read. There are two other alternatives that you might prefer.
-
-One way to do a case-insensitive match at a particular point in the
-program is to convert the data to a single case, using the
-@code{tolower} or @code{toupper} built-in string functions (which we
-haven't discussed yet;
-@pxref{String Functions, ,Built-in Functions for String Manipulation}).
-For example:@refill
-
-@example
-tolower($1) ~ /foo/ @{ @dots{} @}
-@end example
-
-@noindent
-converts the first field to lower case before matching against it.
-
-Another method is to set the variable @code{IGNORECASE} to a nonzero
-value (@pxref{Built-in Variables}). When @code{IGNORECASE} is not zero,
-@emph{all} regexp operations ignore case. Changing the value of
-@code{IGNORECASE} dynamically controls the case sensitivity of your
-program as it runs. Case is significant by default because
-@code{IGNORECASE} (like most variables) is initialized to zero.
-
-@example
-x = "aB"
-if (x ~ /ab/) @dots{} # this test will fail
-
-IGNORECASE = 1
-if (x ~ /ab/) @dots{} # now it will succeed
-@end example
-
-In general, you cannot use @code{IGNORECASE} to make certain rules
-case-insensitive and other rules case-sensitive, because there is no way
-to set @code{IGNORECASE} just for the pattern of a particular rule. To
-do this, you must use character sets or @code{tolower}. However, one
-thing you can do only with @code{IGNORECASE} is turn case-sensitivity on
-or off dynamically for all the rules at once.@refill
-
-@code{IGNORECASE} can be set on the command line, or in a @code{BEGIN}
-rule. Setting @code{IGNORECASE} from the command line is a way to make
-a program case-insensitive without having to edit it.
-
-The value of @code{IGNORECASE} has no effect if @code{gawk} is in
-compatibility mode (@pxref{Command Line, ,Invoking @code{awk}}).
-Case is always significant in compatibility mode.@refill
-
-@node Comparison Patterns, Boolean Patterns, Regexp, Patterns
-@section Comparison Expressions as Patterns
-@cindex comparison expressions as patterns
-@cindex pattern, comparison expressions
-@cindex relational operators
-@cindex operators, relational
-
-@dfn{Comparison patterns} test relationships such as equality between
-two strings or numbers. They are a special case of expression patterns
-(@pxref{Expression Patterns, ,Expressions as Patterns}). They are written
-with @dfn{relational operators}, which are a superset of those in C.
-Here is a table of them:@refill
-
-@table @code
-@item @var{x} < @var{y}
-True if @var{x} is less than @var{y}.
-
-@item @var{x} <= @var{y}
-True if @var{x} is less than or equal to @var{y}.
-
-@item @var{x} > @var{y}
-True if @var{x} is greater than @var{y}.
-
-@item @var{x} >= @var{y}
-True if @var{x} is greater than or equal to @var{y}.
-
-@item @var{x} == @var{y}
-True if @var{x} is equal to @var{y}.
-
-@item @var{x} != @var{y}
-True if @var{x} is not equal to @var{y}.
-
-@item @var{x} ~ @var{y}
-True if @var{x} matches the regular expression described by @var{y}.
-
-@item @var{x} !~ @var{y}
-True if @var{x} does not match the regular expression described by @var{y}.
-@end table
-
-The operands of a relational operator are compared as numbers if they
-are both numbers. Otherwise they are converted to, and compared as,
-strings (@pxref{Conversion, ,Conversion of Strings and Numbers},
-for the detailed rules). Strings are compared by comparing the first
-character of each, then the second character of each,
-and so on, until there is a difference. If the two strings are equal until
-the shorter one runs out, the shorter one is considered to be less than the
-longer one. Thus, @code{"10"} is less than @code{"9"}, and @code{"abc"}
-is less than @code{"abcd"}.@refill
-
-The left operand of the @samp{~} and @samp{!~} operators is a string.
-The right operand is either a constant regular expression enclosed in
-slashes (@code{/@var{regexp}/}), or any expression, whose string value
-is used as a dynamic regular expression
-(@pxref{Regexp Usage, ,How to Use Regular Expressions}).@refill
-
-The following example prints the second field of each input record
-whose first field is precisely @samp{foo}.
-
-@example
-awk '$1 == "foo" @{ print $2 @}' BBS-list
-@end example
-
-@noindent
-Contrast this with the following regular expression match, which would
-accept any record with a first field that contains @samp{foo}:
-
-@example
-awk '$1 ~ "foo" @{ print $2 @}' BBS-list
-@end example
-
-@noindent
-or, equivalently, this one:
-
-@example
-awk '$1 ~ /foo/ @{ print $2 @}' BBS-list
-@end example
-
-@node Boolean Patterns, Expression Patterns, Comparison Patterns, Patterns
-@section Boolean Operators and Patterns
-@cindex patterns, boolean
-@cindex boolean patterns
-
-A @dfn{boolean pattern} is an expression which combines other patterns
-using the @dfn{boolean operators} ``or'' (@samp{||}), ``and''
-(@samp{&&}), and ``not'' (@samp{!}). Whether the boolean pattern
-matches an input record depends on whether its subpatterns match.
-
-For example, the following command prints all records in the input file
-@file{BBS-list} that contain both @samp{2400} and @samp{foo}.@refill
-
-@example
-awk '/2400/ && /foo/' BBS-list
-@end example
-
-The following command prints all records in the input file
-@file{BBS-list} that contain @emph{either} @samp{2400} or @samp{foo}, or
-both.@refill
-
-@example
-awk '/2400/ || /foo/' BBS-list
-@end example
-
-The following command prints all records in the input file
-@file{BBS-list} that do @emph{not} contain the string @samp{foo}.
-
-@example
-awk '! /foo/' BBS-list
-@end example
-
-Note that boolean patterns are a special case of expression patterns
-(@pxref{Expression Patterns, ,Expressions as Patterns}); they are
-expressions that use the boolean operators.
-@xref{Boolean Ops, ,Boolean Expressions}, for complete information
-on the boolean operators.@refill
-
-The subpatterns of a boolean pattern can be constant regular
-expressions, comparisons, or any other @code{awk} expressions. Range
-patterns are not expressions, so they cannot appear inside boolean
-patterns. Likewise, the special patterns @code{BEGIN} and @code{END},
-which never match any input record, are not expressions and cannot
-appear inside boolean patterns.
-
-@node Expression Patterns, Ranges, Boolean Patterns, Patterns
-@section Expressions as Patterns
-
-Any @code{awk} expression is also valid as an @code{awk} pattern.
-Then the pattern ``matches'' if the expression's value is nonzero (if a
-number) or nonnull (if a string).
-
-The expression is reevaluated each time the rule is tested against a new
-input record. If the expression uses fields such as @code{$1}, the
-value depends directly on the new input record's text; otherwise, it
-depends only on what has happened so far in the execution of the
-@code{awk} program, but that may still be useful.
-
-Comparison patterns are actually a special case of this. For
-example, the expression @code{$5 == "foo"} has the value 1 when the
-value of @code{$5} equals @code{"foo"}, and 0 otherwise; therefore, this
-expression as a pattern matches when the two values are equal.
-
-Boolean patterns are also special cases of expression patterns.
-
-A constant regexp as a pattern is also a special case of an expression
-pattern. @code{/foo/} as an expression has the value 1 if @samp{foo}
-appears in the current input record; thus, as a pattern, @code{/foo/}
-matches any record containing @samp{foo}.
-
-Other implementations of @code{awk} that are not yet @sc{posix} compliant
-are less general than @code{gawk}: they allow comparison expressions, and
-boolean combinations thereof (optionally with parentheses), but not
-necessarily other kinds of expressions.
-
-@node Ranges, BEGIN/END, Expression Patterns, Patterns
-@section Specifying Record Ranges with Patterns
-
-@cindex range pattern
-@cindex patterns, range
-A @dfn{range pattern} is made of two patterns separated by a comma, of
-the form @code{@var{begpat}, @var{endpat}}. It matches ranges of
-consecutive input records. The first pattern @var{begpat} controls
-where the range begins, and the second one @var{endpat} controls where
-it ends. For example,@refill
-
-@example
-awk '$1 == "on", $1 == "off"'
-@end example
-
-@noindent
-prints every record between @samp{on}/@samp{off} pairs, inclusive.
-
-A range pattern starts out by matching @var{begpat}
-against every input record; when a record matches @var{begpat}, the
-range pattern becomes @dfn{turned on}. The range pattern matches this
-record. As long as it stays turned on, it automatically matches every
-input record read. It also matches @var{endpat} against
-every input record; when that succeeds, the range pattern is turned
-off again for the following record. Now it goes back to checking
-@var{begpat} against each record.
-
-The record that turns on the range pattern and the one that turns it
-off both match the range pattern. If you don't want to operate on
-these records, you can write @code{if} statements in the rule's action
-to distinguish them.
-
-It is possible for a pattern to be turned both on and off by the same
-record, if both conditions are satisfied by that record. Then the action is
-executed for just that record.
-
-@node BEGIN/END, Empty, Ranges, Patterns
-@section @code{BEGIN} and @code{END} Special Patterns
-
-@cindex @code{BEGIN} special pattern
-@cindex patterns, @code{BEGIN}
-@cindex @code{END} special pattern
-@cindex patterns, @code{END}
-@code{BEGIN} and @code{END} are special patterns. They are not used to
-match input records. Rather, they are used for supplying start-up or
-clean-up information to your @code{awk} script. A @code{BEGIN} rule is
-executed, once, before the first input record has been read. An @code{END}
-rule is executed, once, after all the input has been read. For
-example:@refill
-
-@example
-awk 'BEGIN @{ print "Analysis of `foo'" @}
- /foo/ @{ ++foobar @}
- END @{ print "`foo' appears " foobar " times." @}' BBS-list
-@end example
-
-This program finds the number of records in the input file @file{BBS-list}
-that contain the string @samp{foo}. The @code{BEGIN} rule prints a title
-for the report. There is no need to use the @code{BEGIN} rule to
-initialize the counter @code{foobar} to zero, as @code{awk} does this
-for us automatically (@pxref{Variables}).
-
-The second rule increments the variable @code{foobar} every time a
-record containing the pattern @samp{foo} is read. The @code{END} rule
-prints the value of @code{foobar} at the end of the run.@refill
-
-The special patterns @code{BEGIN} and @code{END} cannot be used in ranges
-or with boolean operators (indeed, they cannot be used with any operators).
-
-An @code{awk} program may have multiple @code{BEGIN} and/or @code{END}
-rules. They are executed in the order they appear, all the @code{BEGIN}
-rules at start-up and all the @code{END} rules at termination.
-
-Multiple @code{BEGIN} and @code{END} sections are useful for writing
-library functions, since each library can have its own @code{BEGIN} or
-@code{END} rule to do its own initialization and/or cleanup. Note that
-the order in which library functions are named on the command line
-controls the order in which their @code{BEGIN} and @code{END} rules are
-executed. Therefore you have to be careful to write such rules in
-library files so that the order in which they are executed doesn't matter.
-@xref{Command Line, ,Invoking @code{awk}}, for more information on
-using library functions.
-
-If an @code{awk} program only has a @code{BEGIN} rule, and no other
-rules, then the program exits after the @code{BEGIN} rule has been run.
-(Older versions of @code{awk} used to keep reading and ignoring input
-until end of file was seen.) However, if an @code{END} rule exists as
-well, then the input will be read, even if there are no other rules in
-the program. This is necessary in case the @code{END} rule checks the
-@code{NR} variable.
-
-@code{BEGIN} and @code{END} rules must have actions; there is no default
-action for these rules since there is no current record when they run.
-
-@node Empty, , BEGIN/END, Patterns
-@comment node-name, next, previous, up
-@section The Empty Pattern
-
-@cindex empty pattern
-@cindex pattern, empty
-An empty pattern is considered to match @emph{every} input record. For
-example, the program:@refill
-
-@example
-awk '@{ print $1 @}' BBS-list
-@end example
-
-@noindent
-prints the first field of every record.
-
-@node Actions, Expressions, Patterns, Top
-@chapter Overview of Actions
-@cindex action, definition of
-@cindex curly braces
-@cindex action, curly braces
-@cindex action, separating statements
-
-An @code{awk} program or script consists of a series of
-rules and function definitions, interspersed. (Functions are
-described later. @xref{User-defined, ,User-defined Functions}.)
-
-A rule contains a pattern and an action, either of which may be
-omitted. The purpose of the @dfn{action} is to tell @code{awk} what to do
-once a match for the pattern is found. Thus, the entire program
-looks somewhat like this:
-
-@example
-@r{[}@var{pattern}@r{]} @r{[}@{ @var{action} @}@r{]}
-@r{[}@var{pattern}@r{]} @r{[}@{ @var{action} @}@r{]}
-@dots{}
-function @var{name} (@var{args}) @{ @dots{} @}
-@dots{}
-@end example
-
-An action consists of one or more @code{awk} @dfn{statements}, enclosed
-in curly braces (@samp{@{} and @samp{@}}). Each statement specifies one
-thing to be done. The statements are separated by newlines or
-semicolons.
-
-The curly braces around an action must be used even if the action
-contains only one statement, or even if it contains no statements at
-all. However, if you omit the action entirely, omit the curly braces as
-well. (An omitted action is equivalent to @samp{@{ print $0 @}}.)
-
-Here are the kinds of statements supported in @code{awk}:
-
-@itemize @bullet
-@item
-Expressions, which can call functions or assign values to variables
-(@pxref{Expressions, ,Expressions as Action Statements}). Executing
-this kind of statement simply computes the value of the expression and
-then ignores it. This is useful when the expression has side effects
-(@pxref{Assignment Ops, ,Assignment Expressions}).@refill
-
-@item
-Control statements, which specify the control flow of @code{awk}
-programs. The @code{awk} language gives you C-like constructs
-(@code{if}, @code{for}, @code{while}, and so on) as well as a few
-special ones (@pxref{Statements, ,Control Statements in Actions}).@refill
-
-@item
-Compound statements, which consist of one or more statements enclosed in
-curly braces. A compound statement is used in order to put several
-statements together in the body of an @code{if}, @code{while}, @code{do}
-or @code{for} statement.
-
-@item
-Input control, using the @code{getline} command
-(@pxref{Getline, ,Explicit Input with @code{getline}}), and the @code{next}
-statement (@pxref{Next Statement, ,The @code{next} Statement}).
-
-@item
-Output statements, @code{print} and @code{printf}.
-@xref{Printing, ,Printing Output}.@refill
-
-@item
-Deletion statements, for deleting array elements.
-@xref{Delete, ,The @code{delete} Statement}.@refill
-@end itemize
-
-@iftex
-The next two chapters cover in detail expressions and control
-statements, respectively. We go on to treat arrays and built-in
-functions, both of which are used in expressions. Then we proceed
-to discuss how to define your own functions.
-@end iftex
-
-@node Expressions, Statements, Actions, Top
-@chapter Expressions as Action Statements
-@cindex expression
-
-Expressions are the basic building block of @code{awk} actions. An
-expression evaluates to a value, which you can print, test, store in a
-variable or pass to a function. But beyond that, an expression can assign a new value to a variable
-or a field, with an assignment operator.
-
-An expression can serve as a statement on its own. Most other kinds of
-statements contain one or more expressions which specify data to be
-operated on. As in other languages, expressions in @code{awk} include
-variables, array references, constants, and function calls, as well as
-combinations of these with various operators.
-
-@menu
-* Constants:: String, numeric, and regexp constants.
-* Variables:: Variables give names to values for later use.
-* Arithmetic Ops:: Arithmetic operations (@samp{+}, @samp{-}, etc.)
-* Concatenation:: Concatenating strings.
-* Comparison Ops:: Comparison of numbers and strings
- with @samp{<}, etc.
-* Boolean Ops:: Combining comparison expressions
- using boolean operators
- @samp{||} (``or''), @samp{&&} (``and'') and @samp{!} (``not'').
-
-* Assignment Ops:: Changing the value of a variable or a field.
-* Increment Ops:: Incrementing the numeric value of a variable.
-
-* Conversion:: The conversion of strings to numbers
- and vice versa.
-* Values:: The whole truth about numbers and strings.
-* Conditional Exp:: Conditional expressions select
- between two subexpressions under control
- of a third subexpression.
-* Function Calls:: A function call is an expression.
-* Precedence:: How various operators nest.
-@end menu
-
-@node Constants, Variables, Expressions, Expressions
-@section Constant Expressions
-@cindex constants, types of
-@cindex string constants
-
-The simplest type of expression is the @dfn{constant}, which always has
-the same value. There are three types of constants: numeric constants,
-string constants, and regular expression constants.
-
-@cindex numeric constant
-@cindex numeric value
-A @dfn{numeric constant} stands for a number. This number can be an
-integer, a decimal fraction, or a number in scientific (exponential)
-notation. Note that all numeric values are represented within
-@code{awk} in double-precision floating point. Here are some examples
-of numeric constants, which all have the same value:
-
-@example
-105
-1.05e+2
-1050e-1
-@end example
-
-A string constant consists of a sequence of characters enclosed in
-double-quote marks. For example:
-
-@example
-"parrot"
-@end example
-
-@noindent
-@iftex
-@cindex differences between @code{gawk} and @code{awk}
-@end iftex
-represents the string whose contents are @samp{parrot}. Strings in
-@code{gawk} can be of any length and they can contain all the possible
-8-bit ASCII characters including ASCII NUL. Other @code{awk}
-implementations may have difficulty with some character codes.@refill
-
-@cindex escape sequence notation
-Some characters cannot be included literally in a string constant. You
-represent them instead with @dfn{escape sequences}, which are character
-sequences beginning with a backslash (@samp{\}).
-
-One use of an escape sequence is to include a double-quote character in
-a string constant. Since a plain double-quote would end the string, you
-must use @samp{\"} to represent a single double-quote character as a
-part of the string.
-The
-backslash character itself is another character that cannot be
-included normally; you write @samp{\\} to put one backslash in the
-string. Thus, the string whose contents are the two characters
-@samp{"\} must be written @code{"\"\\"}.
-
-Another use of backslash is to represent unprintable characters
-such as newline. While there is nothing to stop you from writing most
-of these characters directly in a string constant, they may look ugly.
-
-Here is a table of all the escape sequences used in @code{awk}:
-
-@table @code
-@item \\
-Represents a literal backslash, @samp{\}.
-
-@item \a
-Represents the ``alert'' character, control-g, ASCII code 7.
-
-@item \b
-Represents a backspace, control-h, ASCII code 8.
-
-@item \f
-Represents a formfeed, control-l, ASCII code 12.
-
-@item \n
-Represents a newline, control-j, ASCII code 10.
-
-@item \r
-Represents a carriage return, control-m, ASCII code 13.
-
-@item \t
-Represents a horizontal tab, control-i, ASCII code 9.
-
-@item \v
-Represents a vertical tab, control-k, ASCII code 11.
-
-@item \@var{nnn}
-Represents the octal value @var{nnn}, where @var{nnn} are one to three
-digits between 0 and 7. For example, the code for the ASCII ESC
-(escape) character is @samp{\033}.@refill
-
-@item \x@var{hh}@dots{}
-Represents the hexadecimal value @var{hh}, where @var{hh} are hexadecimal
-digits (@samp{0} through @samp{9} and either @samp{A} through @samp{F} or
-@samp{a} through @samp{f}). Like the same construct in @sc{ansi} C, the escape
-sequence continues until the first non-hexadecimal digit is seen. However,
-using more than two hexadecimal digits produces undefined results. (The
-@samp{\x} escape sequence is not allowed in @sc{posix} @code{awk}.)@refill
-@end table
-
-A @dfn{constant regexp} is a regular expression description enclosed in
-slashes, such as @code{/^beginning and end$/}. Most regexps used in
-@code{awk} programs are constant, but the @samp{~} and @samp{!~}
-operators can also match computed or ``dynamic'' regexps
-(@pxref{Regexp Usage, ,How to Use Regular Expressions}).@refill
-
-Constant regexps may be used like simple expressions. When a
-constant regexp is not on the right hand side of the @samp{~} or
-@samp{!~} operators, it has the same meaning as if it appeared
-in a pattern, i.e. @samp{($0 ~ /foo/)}
-(@pxref{Expression Patterns, ,Expressions as Patterns}).
-This means that the two code segments,@refill
-
-@example
-if ($0 ~ /barfly/ || $0 ~ /camelot/)
- print "found"
-@end example
-
-@noindent
-and
-
-@example
-if (/barfly/ || /camelot/)
- print "found"
-@end example
-
-@noindent
-are exactly equivalent. One rather bizarre consequence of this rule is
-that the following boolean expression is legal, but does not do what the user
-intended:@refill
-
-@example
-if (/foo/ ~ $1) print "found foo"
-@end example
-
-This code is ``obviously'' testing @code{$1} for a match against the regexp
-@code{/foo/}. But in fact, the expression @code{(/foo/ ~ $1)} actually means
-@code{(($0 ~ /foo/) ~ $1)}. In other words, first match the input record
-against the regexp @code{/foo/}. The result will be either a 0 or a 1,
-depending upon the success or failure of the match. Then match that result
-against the first field in the record.@refill
-
-Since it is unlikely that you would ever really wish to make this kind of
-test, @code{gawk} will issue a warning when it sees this construct in
-a program.@refill
-
-Another consequence of this rule is that the assignment statement
-
-@example
-matches = /foo/
-@end example
-
-@noindent
-will assign either 0 or 1 to the variable @code{matches}, depending
-upon the contents of the current input record.
-
-Constant regular expressions are also used as the first argument for
-the @code{sub} and @code{gsub} functions
-(@pxref{String Functions, ,Built-in Functions for String Manipulation}).@refill
-
-This feature of the language was never well documented until the
-@sc{posix} specification.
-
-You may be wondering, when is
-
-@example
-$1 ~ /foo/ @{ @dots{} @}
-@end example
-
-@noindent
-preferable to
-
-@example
-$1 ~ "foo" @{ @dots{} @}
-@end example
-
-Since the right-hand sides of both @samp{~} operators are constants,
-it is more efficient to use the @samp{/foo/} form: @code{awk} can note
-that you have supplied a regexp and store it internally in a form that
-makes pattern matching more efficient. In the second form, @code{awk}
-must first convert the string into this internal form, and then perform
-the pattern matching. The first form is also better style; it shows
-clearly that you intend a regexp match.
-
-@node Variables, Arithmetic Ops, Constants, Expressions
-@section Variables
-@cindex variables, user-defined
-@cindex user-defined variables
-@c there should be more than one subsection, ideally. Not a big deal.
-@c But usually there are supposed to be at least two. One way to get
-@c around this is to write the info in the subsection as the info in the
-@c section itself and not have any subsections.. --mew
-
-Variables let you give names to values and refer to them later. You have
-already seen variables in many of the examples. The name of a variable
-must be a sequence of letters, digits and underscores, but it may not begin
-with a digit. Case is significant in variable names; @code{a} and @code{A}
-are distinct variables.
-
-A variable name is a valid expression by itself; it represents the
-variable's current value. Variables are given new values with
-@dfn{assignment operators} and @dfn{increment operators}.
-@xref{Assignment Ops, ,Assignment Expressions}.
-
-A few variables have special built-in meanings, such as @code{FS}, the
-field separator, and @code{NF}, the number of fields in the current
-input record. @xref{Built-in Variables}, for a list of them. These
-built-in variables can be used and assigned just like all other
-variables, but their values are also used or changed automatically by
-@code{awk}. Each built-in variable's name is made entirely of upper case
-letters.
-
-Variables in @code{awk} can be assigned either numeric or string
-values. By default, variables are initialized to the null string, which
-is effectively zero if converted to a number. There is no need to
-``initialize'' each variable explicitly in @code{awk}, the way you would in C or most other traditional languages.
-
-@menu
-* Assignment Options:: Setting variables on the command line
- and a summary of command line syntax.
- This is an advanced method of input.
-@end menu
-
-@node Assignment Options, , Variables, Variables
-@subsection Assigning Variables on the Command Line
-
-You can set any @code{awk} variable by including a @dfn{variable assignment}
-among the arguments on the command line when you invoke @code{awk}
-(@pxref{Command Line, ,Invoking @code{awk}}). Such an assignment has
-this form:@refill
-
-@example
-@var{variable}=@var{text}
-@end example
-
-@noindent
-With it, you can set a variable either at the beginning of the
-@code{awk} run or in between input files.
-
-If you precede the assignment with the @samp{-v} option, like this:
-
-@example
--v @var{variable}=@var{text}
-@end example
-
-@noindent
-then the variable is set at the very beginning, before even the
-@code{BEGIN} rules are run. The @samp{-v} option and its assignment
-must precede all the file name arguments, as well as the program text.
-
-Otherwise, the variable assignment is performed at a time determined by
-its position among the input file arguments: after the processing of the
-preceding input file argument. For example:
-
-@example
-awk '@{ print $n @}' n=4 inventory-shipped n=2 BBS-list
-@end example
-
-@noindent
-prints the value of field number @code{n} for all input records. Before
-the first file is read, the command line sets the variable @code{n}
-equal to 4. This causes the fourth field to be printed in lines from
-the file @file{inventory-shipped}. After the first file has finished,
-but before the second file is started, @code{n} is set to 2, so that the
-second field is printed in lines from @file{BBS-list}.
-
-Command line arguments are made available for explicit examination by
-the @code{awk} program in an array named @code{ARGV}
-(@pxref{Built-in Variables}).@refill
-
-@code{awk} processes the values of command line assignments for escape
-sequences (@pxref{Constants, ,Constant Expressions}).
-
-@node Arithmetic Ops, Concatenation, Variables, Expressions
-@section Arithmetic Operators
-@cindex arithmetic operators
-@cindex operators, arithmetic
-@cindex addition
-@cindex subtraction
-@cindex multiplication
-@cindex division
-@cindex remainder
-@cindex quotient
-@cindex exponentiation
-
-The @code{awk} language uses the common arithmetic operators when
-evaluating expressions. All of these arithmetic operators follow normal
-precedence rules, and work as you would expect them to. This example
-divides field three by field four, adds field two, stores the result
-into field one, and prints the resulting altered input record:
-
-@example
-awk '@{ $1 = $2 + $3 / $4; print @}' inventory-shipped
-@end example
-
-The arithmetic operators in @code{awk} are:
-
-@table @code
-@item @var{x} + @var{y}
-Addition.
-
-@item @var{x} - @var{y}
-Subtraction.
-
-@item - @var{x}
-Negation.
-
-@item + @var{x}
-Unary plus. No real effect on the expression.
-
-@item @var{x} * @var{y}
-Multiplication.
-
-@item @var{x} / @var{y}
-Division. Since all numbers in @code{awk} are double-precision
-floating point, the result is not rounded to an integer: @code{3 / 4}
-has the value 0.75.
-
-@item @var{x} % @var{y}
-@iftex
-@cindex differences between @code{gawk} and @code{awk}
-@end iftex
-Remainder. The quotient is rounded toward zero to an integer,
-multiplied by @var{y} and this result is subtracted from @var{x}.
-This operation is sometimes known as ``trunc-mod.'' The following
-relation always holds:
-
-@example
-b * int(a / b) + (a % b) == a
-@end example
-
-One possibly undesirable effect of this definition of remainder is that
-@code{@var{x} % @var{y}} is negative if @var{x} is negative. Thus,
-
-@example
--17 % 8 = -1
-@end example
-
-In other @code{awk} implementations, the signedness of the remainder
-may be machine dependent.
-
-@item @var{x} ^ @var{y}
-@itemx @var{x} ** @var{y}
-Exponentiation: @var{x} raised to the @var{y} power. @code{2 ^ 3} has
-the value 8. The character sequence @samp{**} is equivalent to
-@samp{^}. (The @sc{posix} standard only specifies the use of @samp{^}
-for exponentiation.)
-@end table
-
-@node Concatenation, Comparison Ops, Arithmetic Ops, Expressions
-@section String Concatenation
-
-@cindex string operators
-@cindex operators, string
-@cindex concatenation
-There is only one string operation: concatenation. It does not have a
-specific operator to represent it. Instead, concatenation is performed by
-writing expressions next to one another, with no operator. For example:
-
-@example
-awk '@{ print "Field number one: " $1 @}' BBS-list
-@end example
-
-@noindent
-produces, for the first record in @file{BBS-list}:
-
-@example
-Field number one: aardvark
-@end example
-
-Without the space in the string constant after the @samp{:}, the line
-would run together. For example:
-
-@example
-awk '@{ print "Field number one:" $1 @}' BBS-list
-@end example
-
-@noindent
-produces, for the first record in @file{BBS-list}:
-
-@example
-Field number one:aardvark
-@end example
-
-Since string concatenation does not have an explicit operator, it is
-often necessary to insure that it happens where you want it to by
-enclosing the items to be concatenated in parentheses. For example, the
-following code fragment does not concatenate @code{file} and @code{name}
-as you might expect:
-
-@example
-file = "file"
-name = "name"
-print "something meaningful" > file name
-@end example
-
-@noindent
-It is necessary to use the following:
-
-@example
-print "something meaningful" > (file name)
-@end example
-
-We recommend you use parentheses around concatenation in all but the
-most common contexts (such as in the right-hand operand of @samp{=}).
-
-@ignore
-@code{gawk} actually now allows a concatenation on the right hand
-side of a @code{>} redirection, but other @code{awk}s don't. So for
-now we won't mention that fact.
-@end ignore
-
-@node Comparison Ops, Boolean Ops, Concatenation, Expressions
-@section Comparison Expressions
-@cindex comparison expressions
-@cindex expressions, comparison
-@cindex relational operators
-@cindex operators, relational
-@cindex regexp operators
-
-@dfn{Comparison expressions} compare strings or numbers for
-relationships such as equality. They are written using @dfn{relational
-operators}, which are a superset of those in C. Here is a table of
-them:
-
-@table @code
-@item @var{x} < @var{y}
-True if @var{x} is less than @var{y}.
-
-@item @var{x} <= @var{y}
-True if @var{x} is less than or equal to @var{y}.
-
-@item @var{x} > @var{y}
-True if @var{x} is greater than @var{y}.
-
-@item @var{x} >= @var{y}
-True if @var{x} is greater than or equal to @var{y}.
-
-@item @var{x} == @var{y}
-True if @var{x} is equal to @var{y}.
-
-@item @var{x} != @var{y}
-True if @var{x} is not equal to @var{y}.
-
-@item @var{x} ~ @var{y}
-True if the string @var{x} matches the regexp denoted by @var{y}.
-
-@item @var{x} !~ @var{y}
-True if the string @var{x} does not match the regexp denoted by @var{y}.
-
-@item @var{subscript} in @var{array}
-True if array @var{array} has an element with the subscript @var{subscript}.
-@end table
-
-Comparison expressions have the value 1 if true and 0 if false.
-
-The rules @code{gawk} uses for performing comparisons are based on those
-in draft 11.2 of the @sc{posix} standard. The @sc{posix} standard introduced
-the concept of a @dfn{numeric string}, which is simply a string that looks
-like a number, for example, @code{@w{" +2"}}.
-
-@vindex CONVFMT
-When performing a relational operation, @code{gawk} considers the type of an
-operand to be the type it received on its last @emph{assignment}, rather
-than the type of its last @emph{use}
-(@pxref{Values, ,Numeric and String Values}).
-This type is @emph{unknown} when the operand is from an ``external'' source:
-field variables, command line arguments, array elements resulting from a
-@code{split} operation, and the value of an @code{ENVIRON} element.
-In this case only, if the operand is a numeric string, then it is
-considered to be of both string type and numeric type. If at least one
-operand of a comparison is of string type only, then a string
-comparison is performed. Any numeric operand will be converted to a
-string using the value of @code{CONVFMT}
-(@pxref{Conversion, ,Conversion of Strings and Numbers}).
-If one operand of a comparison is numeric, and the other operand is
-either numeric or both numeric and string, then @code{gawk} does a
-numeric comparison. If both operands have both types, then the
-comparison is numeric. Strings are compared
-by comparing the first character of each, then the second character of each,
-and so on. Thus @code{"10"} is less than @code{"9"}. If there are two
-strings where one is a prefix of the other, the shorter string is less than
-the longer one. Thus @code{"abc"} is less than @code{"abcd"}.@refill
-
-Here are some sample expressions, how @code{gawk} compares them, and what
-the result of the comparison is.
-
-@table @code
-@item 1.5 <= 2.0
-numeric comparison (true)
-
-@item "abc" >= "xyz"
-string comparison (false)
-
-@item 1.5 != " +2"
-string comparison (true)
-
-@item "1e2" < "3"
-string comparison (true)
-
-@item a = 2; b = "2"
-@itemx a == b
-string comparison (true)
-@end table
-
-@example
-echo 1e2 3 | awk '@{ print ($1 < $2) ? "true" : "false" @}'
-@end example
-
-@noindent
-prints @samp{false} since both @code{$1} and @code{$2} are numeric
-strings and thus have both string and numeric types, thus dictating
-a numeric comparison.
-
-The purpose of the comparison rules and the use of numeric strings is
-to attempt to produce the behavior that is ``least surprising,'' while
-still ``doing the right thing.''
-
-String comparisons and regular expression comparisons are very different.
-For example,
-
-@example
-$1 == "foo"
-@end example
-
-@noindent
-has the value of 1, or is true, if the first field of the current input
-record is precisely @samp{foo}. By contrast,
-
-@example
-$1 ~ /foo/
-@end example
-
-@noindent
-has the value 1 if the first field contains @samp{foo}, such as @samp{foobar}.
-
-The right hand operand of the @samp{~} and @samp{!~} operators may be
-either a constant regexp (@code{/@dots{}/}), or it may be an ordinary
-expression, in which case the value of the expression as a string is a
-dynamic regexp (@pxref{Regexp Usage, ,How to Use Regular Expressions}).
-
-@cindex regexp as expression
-In very recent implementations of @code{awk}, a constant regular
-expression in slashes by itself is also an expression. The regexp
-@code{/@var{regexp}/} is an abbreviation for this comparison expression:
-
-@example
-$0 ~ /@var{regexp}/
-@end example
-
-In some contexts it may be necessary to write parentheses around the
-regexp to avoid confusing the @code{gawk} parser. For example,
-@code{(/x/ - /y/) > threshold} is not allowed, but @code{((/x/) - (/y/))
-> threshold} parses properly.
-
-One special place where @code{/foo/} is @emph{not} an abbreviation for
-@code{$0 ~ /foo/} is when it is the right-hand operand of @samp{~} or
-@samp{!~}! @xref{Constants, ,Constant Expressions}, where this is
-discussed in more detail.
-
-@node Boolean Ops, Assignment Ops, Comparison Ops, Expressions
-@section Boolean Expressions
-@cindex expressions, boolean
-@cindex boolean expressions
-@cindex operators, boolean
-@cindex boolean operators
-@cindex logical operations
-@cindex and operator
-@cindex or operator
-@cindex not operator
-
-A @dfn{boolean expression} is a combination of comparison expressions or
-matching expressions, using the boolean operators ``or''
-(@samp{||}), ``and'' (@samp{&&}), and ``not'' (@samp{!}), along with
-parentheses to control nesting. The truth of the boolean expression is
-computed by combining the truth values of the component expressions.
-
-Boolean expressions can be used wherever comparison and matching
-expressions can be used. They can be used in @code{if}, @code{while}
-@code{do} and @code{for} statements. They have numeric values (1 if true,
-0 if false), which come into play if the result of the boolean expression
-is stored in a variable, or used in arithmetic.@refill
-
-In addition, every boolean expression is also a valid boolean pattern, so
-you can use it as a pattern to control the execution of rules.
-
-Here are descriptions of the three boolean operators, with an example of
-each. It may be instructive to compare these examples with the
-analogous examples of boolean patterns
-(@pxref{Boolean Patterns, ,Boolean Operators and Patterns}), which
-use the same boolean operators in patterns instead of expressions.@refill
-
-@table @code
-@item @var{boolean1} && @var{boolean2}
-True if both @var{boolean1} and @var{boolean2} are true. For example,
-the following statement prints the current input record if it contains
-both @samp{2400} and @samp{foo}.@refill
-
-@smallexample
-if ($0 ~ /2400/ && $0 ~ /foo/) print
-@end smallexample
-
-The subexpression @var{boolean2} is evaluated only if @var{boolean1}
-is true. This can make a difference when @var{boolean2} contains
-expressions that have side effects: in the case of @code{$0 ~ /foo/ &&
-($2 == bar++)}, the variable @code{bar} is not incremented if there is
-no @samp{foo} in the record.
-
-@item @var{boolean1} || @var{boolean2}
-True if at least one of @var{boolean1} or @var{boolean2} is true.
-For example, the following command prints all records in the input
-file @file{BBS-list} that contain @emph{either} @samp{2400} or
-@samp{foo}, or both.@refill
-
-@smallexample
-awk '@{ if ($0 ~ /2400/ || $0 ~ /foo/) print @}' BBS-list
-@end smallexample
-
-The subexpression @var{boolean2} is evaluated only if @var{boolean1}
-is false. This can make a difference when @var{boolean2} contains
-expressions that have side effects.
-
-@item !@var{boolean}
-True if @var{boolean} is false. For example, the following program prints
-all records in the input file @file{BBS-list} that do @emph{not} contain the
-string @samp{foo}.
-
-@smallexample
-awk '@{ if (! ($0 ~ /foo/)) print @}' BBS-list
-@end smallexample
-@end table
-
-@node Assignment Ops, Increment Ops, Boolean Ops, Expressions
-@section Assignment Expressions
-@cindex assignment operators
-@cindex operators, assignment
-@cindex expressions, assignment
-
-An @dfn{assignment} is an expression that stores a new value into a
-variable. For example, let's assign the value 1 to the variable
-@code{z}:@refill
-
-@example
-z = 1
-@end example
-
-After this expression is executed, the variable @code{z} has the value 1.
-Whatever old value @code{z} had before the assignment is forgotten.
-
-Assignments can store string values also. For example, this would store
-the value @code{"this food is good"} in the variable @code{message}:
-
-@example
-thing = "food"
-predicate = "good"
-message = "this " thing " is " predicate
-@end example
-
-@noindent
-(This also illustrates concatenation of strings.)
-
-The @samp{=} sign is called an @dfn{assignment operator}. It is the
-simplest assignment operator because the value of the right-hand
-operand is stored unchanged.
-
-@cindex side effect
-Most operators (addition, concatenation, and so on) have no effect
-except to compute a value. If you ignore the value, you might as well
-not use the operator. An assignment operator is different; it does
-produce a value, but even if you ignore the value, the assignment still
-makes itself felt through the alteration of the variable. We call this
-a @dfn{side effect}.
-
-@cindex lvalue
-The left-hand operand of an assignment need not be a variable
-(@pxref{Variables}); it can also be a field
-(@pxref{Changing Fields, ,Changing the Contents of a Field}) or
-an array element (@pxref{Arrays, ,Arrays in @code{awk}}).
-These are all called @dfn{lvalues},
-which means they can appear on the left-hand side of an assignment operator.
-The right-hand operand may be any expression; it produces the new value
-which the assignment stores in the specified variable, field or array
-element.@refill
-
-It is important to note that variables do @emph{not} have permanent types.
-The type of a variable is simply the type of whatever value it happens
-to hold at the moment. In the following program fragment, the variable
-@code{foo} has a numeric value at first, and a string value later on:
-
-@example
-foo = 1
-print foo
-foo = "bar"
-print foo
-@end example
-
-@noindent
-When the second assignment gives @code{foo} a string value, the fact that
-it previously had a numeric value is forgotten.
-
-An assignment is an expression, so it has a value: the same value that
-is assigned. Thus, @code{z = 1} as an expression has the value 1.
-One consequence of this is that you can write multiple assignments together:
-
-@example
-x = y = z = 0
-@end example
-
-@noindent
-stores the value 0 in all three variables. It does this because the
-value of @code{z = 0}, which is 0, is stored into @code{y}, and then
-the value of @code{y = z = 0}, which is 0, is stored into @code{x}.
-
-You can use an assignment anywhere an expression is called for. For
-example, it is valid to write @code{x != (y = 1)} to set @code{y} to 1
-and then test whether @code{x} equals 1. But this style tends to make
-programs hard to read; except in a one-shot program, you should
-rewrite it to get rid of such nesting of assignments. This is never very
-hard.
-
-Aside from @samp{=}, there are several other assignment operators that
-do arithmetic with the old value of the variable. For example, the
-operator @samp{+=} computes a new value by adding the right-hand value
-to the old value of the variable. Thus, the following assignment adds
-5 to the value of @code{foo}:
-
-@example
-foo += 5
-@end example
-
-@noindent
-This is precisely equivalent to the following:
-
-@example
-foo = foo + 5
-@end example
-
-@noindent
-Use whichever one makes the meaning of your program clearer.
-
-Here is a table of the arithmetic assignment operators. In each
-case, the right-hand operand is an expression whose value is converted
-to a number.
-
-@table @code
-@item @var{lvalue} += @var{increment}
-Adds @var{increment} to the value of @var{lvalue} to make the new value
-of @var{lvalue}.
-
-@item @var{lvalue} -= @var{decrement}
-Subtracts @var{decrement} from the value of @var{lvalue}.
-
-@item @var{lvalue} *= @var{coefficient}
-Multiplies the value of @var{lvalue} by @var{coefficient}.
-
-@item @var{lvalue} /= @var{quotient}
-Divides the value of @var{lvalue} by @var{quotient}.
-
-@item @var{lvalue} %= @var{modulus}
-Sets @var{lvalue} to its remainder by @var{modulus}.
-
-@item @var{lvalue} ^= @var{power}
-@itemx @var{lvalue} **= @var{power}
-Raises @var{lvalue} to the power @var{power}.
-(Only the @code{^=} operator is specified by @sc{posix}.)
-@end table
-
-@ignore
-From: gatech!ames!elroy!cit-vax!EQL.Caltech.Edu!rankin (Pat Rankin)
- In the discussion of assignment operators, it states that
-``foo += 5'' "is precisely equivalent to" ``foo = foo + 5'' (p.77). That
-may be true for simple variables, but it's not true for expressions with
-side effects, like array references. For proof, try
- BEGIN {
- foo[rand()] += 5; for (x in foo) print x, foo[x]
- bar[rand()] = bar[rand()] + 5; for (x in bar) print x, bar[x]
- }
-I suspect that the original statement is simply untrue--that '+=' is more
-efficient in all cases.
-
-ADR --- Try to add something about this here for the next go 'round.
-@end ignore
-
-@node Increment Ops, Conversion, Assignment Ops, Expressions
-@section Increment Operators
-
-@cindex increment operators
-@cindex operators, increment
-@dfn{Increment operators} increase or decrease the value of a variable
-by 1. You could do the same thing with an assignment operator, so
-the increment operators add no power to the @code{awk} language; but they
-are convenient abbreviations for something very common.
-
-The operator to add 1 is written @samp{++}. It can be used to increment
-a variable either before or after taking its value.
-
-To pre-increment a variable @var{v}, write @code{++@var{v}}. This adds
-1 to the value of @var{v} and that new value is also the value of this
-expression. The assignment expression @code{@var{v} += 1} is completely
-equivalent.
-
-Writing the @samp{++} after the variable specifies post-increment. This
-increments the variable value just the same; the difference is that the
-value of the increment expression itself is the variable's @emph{old}
-value. Thus, if @code{foo} has the value 4, then the expression @code{foo++}
-has the value 4, but it changes the value of @code{foo} to 5.
-
-The post-increment @code{foo++} is nearly equivalent to writing @code{(foo
-+= 1) - 1}. It is not perfectly equivalent because all numbers in
-@code{awk} are floating point: in floating point, @code{foo + 1 - 1} does
-not necessarily equal @code{foo}. But the difference is minute as
-long as you stick to numbers that are fairly small (less than a trillion).
-
-Any lvalue can be incremented. Fields and array elements are incremented
-just like variables. (Use @samp{$(i++)} when you wish to do a field reference
-and a variable increment at the same time. The parentheses are necessary
-because of the precedence of the field reference operator, @samp{$}.)
-@c expert information in the last parenthetical remark
-
-The decrement operator @samp{--} works just like @samp{++} except that
-it subtracts 1 instead of adding. Like @samp{++}, it can be used before
-the lvalue to pre-decrement or after it to post-decrement.
-
-Here is a summary of increment and decrement expressions.
-
-@table @code
-@item ++@var{lvalue}
-This expression increments @var{lvalue} and the new value becomes the
-value of this expression.
-
-@item @var{lvalue}++
-This expression causes the contents of @var{lvalue} to be incremented.
-The value of the expression is the @emph{old} value of @var{lvalue}.
-
-@item --@var{lvalue}
-Like @code{++@var{lvalue}}, but instead of adding, it subtracts. It
-decrements @var{lvalue} and delivers the value that results.
-
-@item @var{lvalue}--
-Like @code{@var{lvalue}++}, but instead of adding, it subtracts. It
-decrements @var{lvalue}. The value of the expression is the @emph{old}
-value of @var{lvalue}.
-@end table
-
-@node Conversion, Values, Increment Ops, Expressions
-@section Conversion of Strings and Numbers
-
-@cindex conversion of strings and numbers
-Strings are converted to numbers, and numbers to strings, if the context
-of the @code{awk} program demands it. For example, if the value of
-either @code{foo} or @code{bar} in the expression @code{foo + bar}
-happens to be a string, it is converted to a number before the addition
-is performed. If numeric values appear in string concatenation, they
-are converted to strings. Consider this:@refill
-
-@example
-two = 2; three = 3
-print (two three) + 4
-@end example
-
-@noindent
-This eventually prints the (numeric) value 27. The numeric values of
-the variables @code{two} and @code{three} are converted to strings and
-concatenated together, and the resulting string is converted back to the
-number 23, to which 4 is then added.
-
-If, for some reason, you need to force a number to be converted to a
-string, concatenate the null string with that number. To force a string
-to be converted to a number, add zero to that string.
-
-A string is converted to a number by interpreting a numeric prefix
-of the string as numerals:
-@code{"2.5"} converts to 2.5, @code{"1e3"} converts to 1000, and @code{"25fix"}
-has a numeric value of 25.
-Strings that can't be interpreted as valid numbers are converted to
-zero.
-
-@vindex CONVFMT
-The exact manner in which numbers are converted into strings is controlled
-by the @code{awk} built-in variable @code{CONVFMT} (@pxref{Built-in Variables}).
-Numbers are converted using a special version of the @code{sprintf} function
-(@pxref{Built-in, ,Built-in Functions}) with @code{CONVFMT} as the format
-specifier.@refill
-
-@code{CONVFMT}'s default value is @code{"%.6g"}, which prints a value with
-at least six significant digits. For some applications you will want to
-change it to specify more precision. Double precision on most modern
-machines gives you 16 or 17 decimal digits of precision.
-
-Strange results can happen if you set @code{CONVFMT} to a string that doesn't
-tell @code{sprintf} how to format floating point numbers in a useful way.
-For example, if you forget the @samp{%} in the format, all numbers will be
-converted to the same constant string.@refill
-
-As a special case, if a number is an integer, then the result of converting
-it to a string is @emph{always} an integer, no matter what the value of
-@code{CONVFMT} may be. Given the following code fragment:
-
-@example
-CONVFMT = "%2.2f"
-a = 12
-b = a ""
-@end example
-
-@noindent
-@code{b} has the value @code{"12"}, not @code{"12.00"}.
-
-@ignore
-For the 2.14 version, describe the ``stickyness'' of conversions. Right now
-the manual assumes everywhere that variables are either numbers or strings;
-in fact both kinds of values may be valid. If both happen to be valid, a
-conversion isn't necessary and isn't done. Revising the manual to be
-consistent with this, though, is too big a job to tackle at the moment.
-
-7/92: This has sort of been done, only the section isn't completely right!
- What to do?
-7/92: Pretty much fixed, at least for the short term, thanks to text
- from David.
-@end ignore
-
-@vindex OFMT
-Prior to the @sc{posix} standard, @code{awk} specified that the value
-of @code{OFMT} was used for converting numbers to strings. @code{OFMT}
-specifies the output format to use when printing numbers with @code{print}.
-@code{CONVFMT} was introduced in order to separate the semantics of
-conversions from the semantics of printing. Both @code{CONVFMT} and
-@code{OFMT} have the same default value: @code{"%.6g"}. In the vast majority
-of cases, old @code{awk} programs will not change their behavior.
-However, this use of @code{OFMT} is something to keep in mind if you must
-port your program to other implementations of @code{awk}; we recommend
-that instead of changing your programs, you just port @code{gawk} itself!@refill
-
-@node Values, Conditional Exp, Conversion, Expressions
-@section Numeric and String Values
-@cindex conversion of strings and numbers
-
-Through most of this manual, we present @code{awk} values (such as constants,
-fields, or variables) as @emph{either} numbers @emph{or} strings. This is
-a convenient way to think about them, since typically they are used in only
-one way, or the other.
-
-In truth though, @code{awk} values can be @emph{both} string and
-numeric, at the same time. Internally, @code{awk} represents values
-with a string, a (floating point) number, and an indication that one,
-the other, or both representations of the value are valid.
-
-Keeping track of both kinds of values is important for execution
-efficiency: a variable can acquire a string value the first time it
-is used as a string, and then that string value can be used until the
-variable is assigned a new value. Thus, if a variable with only a numeric
-value is used in several concatenations in a row, it only has to be given
-a string representation once. The numeric value remains valid, so that
-no conversion back to a number is necessary if the variable is later used
-in an arithmetic expression.
-
-Tracking both kinds of values is also important for precise numerical
-calculations. Consider the following:
-
-@smallexample
-a = 123.321
-CONVFMT = "%3.1f"
-b = a " is a number"
-c = a + 1.654
-@end smallexample
-
-@noindent
-The variable @code{a} receives a string value in the concatenation and
-assignment to @code{b}. The string value of @code{a} is @code{"123.3"}.
-If the numeric value was lost when it was converted to a string, then the
-numeric use of @code{a} in the last statement would lose information.
-@code{c} would be assigned the value 124.954 instead of 124.975.
-Such errors accumulate rapidly, and very adversely affect numeric
-computations.@refill
-
-Once a numeric value acquires a corresponding string value, it stays valid
-until a new assignment is made. If @code{CONVFMT}
-(@pxref{Conversion, ,Conversion of Strings and Numbers}) changes in the
-meantime, the old string value will still be used. For example:@refill
-
-@smallexample
-BEGIN @{
- CONVFMT = "%2.2f"
- a = 123.456
- b = a "" # force `a' to have string value too
- printf "a = %s\n", a
- CONVFMT = "%.6g"
- printf "a = %s\n", a
- a += 0 # make `a' numeric only again
- printf "a = %s\n", a # use `a' as string
-@}
-@end smallexample
-
-@noindent
-This program prints @samp{a = 123.46} twice, and then prints
-@samp{a = 123.456}.
-
-@xref{Conversion, ,Conversion of Strings and Numbers}, for the rules that
-specify how string values are made from numeric values.
-
-@node Conditional Exp, Function Calls, Values, Expressions
-@section Conditional Expressions
-@cindex conditional expression
-@cindex expression, conditional
-
-A @dfn{conditional expression} is a special kind of expression with
-three operands. It allows you to use one expression's value to select
-one of two other expressions.
-
-The conditional expression looks the same as in the C language:
-
-@example
-@var{selector} ? @var{if-true-exp} : @var{if-false-exp}
-@end example
-
-@noindent
-There are three subexpressions. The first, @var{selector}, is always
-computed first. If it is ``true'' (not zero and not null) then
-@var{if-true-exp} is computed next and its value becomes the value of
-the whole expression. Otherwise, @var{if-false-exp} is computed next
-and its value becomes the value of the whole expression.@refill
-
-For example, this expression produces the absolute value of @code{x}:
-
-@example
-x > 0 ? x : -x
-@end example
-
-Each time the conditional expression is computed, exactly one of
-@var{if-true-exp} and @var{if-false-exp} is computed; the other is ignored.
-This is important when the expressions contain side effects. For example,
-this conditional expression examines element @code{i} of either array
-@code{a} or array @code{b}, and increments @code{i}.
-
-@example
-x == y ? a[i++] : b[i++]
-@end example
-
-@noindent
-This is guaranteed to increment @code{i} exactly once, because each time
-one or the other of the two increment expressions is executed,
-and the other is not.
-
-@node Function Calls, Precedence, Conditional Exp, Expressions
-@section Function Calls
-@cindex function call
-@cindex calling a function
-
-A @dfn{function} is a name for a particular calculation. Because it has
-a name, you can ask for it by name at any point in the program. For
-example, the function @code{sqrt} computes the square root of a number.
-
-A fixed set of functions are @dfn{built-in}, which means they are
-available in every @code{awk} program. The @code{sqrt} function is one
-of these. @xref{Built-in, ,Built-in Functions}, for a list of built-in
-functions and their descriptions. In addition, you can define your own
-functions in the program for use elsewhere in the same program.
-@xref{User-defined, ,User-defined Functions}, for how to do this.@refill
-
-@cindex arguments in function call
-The way to use a function is with a @dfn{function call} expression,
-which consists of the function name followed by a list of
-@dfn{arguments} in parentheses. The arguments are expressions which
-give the raw materials for the calculation that the function will do.
-When there is more than one argument, they are separated by commas. If
-there are no arguments, write just @samp{()} after the function name.
-Here are some examples:
-
-@example
-sqrt(x^2 + y^2) # @r{One argument}
-atan2(y, x) # @r{Two arguments}
-rand() # @r{No arguments}
-@end example
-
-@strong{Do not put any space between the function name and the
-open-parenthesis!} A user-defined function name looks just like the name of
-a variable, and space would make the expression look like concatenation
-of a variable with an expression inside parentheses. Space before the
-parenthesis is harmless with built-in functions, but it is best not to get
-into the habit of using space to avoid mistakes with user-defined
-functions.
-
-Each function expects a particular number of arguments. For example, the
-@code{sqrt} function must be called with a single argument, the number
-to take the square root of:
-
-@example
-sqrt(@var{argument})
-@end example
-
-Some of the built-in functions allow you to omit the final argument.
-If you do so, they use a reasonable default.
-@xref{Built-in, ,Built-in Functions}, for full details. If arguments
-are omitted in calls to user-defined functions, then those arguments are
-treated as local variables, initialized to the null string
-(@pxref{User-defined, ,User-defined Functions}).@refill
-
-Like every other expression, the function call has a value, which is
-computed by the function based on the arguments you give it. In this
-example, the value of @code{sqrt(@var{argument})} is the square root of the
-argument. A function can also have side effects, such as assigning the
-values of certain variables or doing I/O.
-
-Here is a command to read numbers, one number per line, and print the
-square root of each one:
-
-@example
-awk '@{ print "The square root of", $1, "is", sqrt($1) @}'
-@end example
-
-@node Precedence, , Function Calls, Expressions
-@section Operator Precedence (How Operators Nest)
-@cindex precedence
-@cindex operator precedence
-
-@dfn{Operator precedence} determines how operators are grouped, when
-different operators appear close by in one expression. For example,
-@samp{*} has higher precedence than @samp{+}; thus, @code{a + b * c}
-means to multiply @code{b} and @code{c}, and then add @code{a} to the
-product (i.e., @code{a + (b * c)}).
-
-You can overrule the precedence of the operators by using parentheses.
-You can think of the precedence rules as saying where the
-parentheses are assumed if you do not write parentheses yourself. In
-fact, it is wise to always use parentheses whenever you have an unusual
-combination of operators, because other people who read the program may
-not remember what the precedence is in this case. You might forget,
-too; then you could make a mistake. Explicit parentheses will help prevent
-any such mistake.
-
-When operators of equal precedence are used together, the leftmost
-operator groups first, except for the assignment, conditional and
-exponentiation operators, which group in the opposite order.
-Thus, @code{a - b + c} groups as @code{(a - b) + c};
-@code{a = b = c} groups as @code{a = (b = c)}.@refill
-
-The precedence of prefix unary operators does not matter as long as only
-unary operators are involved, because there is only one way to parse
-them---innermost first. Thus, @code{$++i} means @code{$(++i)} and
-@code{++$x} means @code{++($x)}. However, when another operator follows
-the operand, then the precedence of the unary operators can matter.
-Thus, @code{$x^2} means @code{($x)^2}, but @code{-x^2} means
-@code{-(x^2)}, because @samp{-} has lower precedence than @samp{^}
-while @samp{$} has higher precedence.
-
-Here is a table of the operators of @code{awk}, in order of increasing
-precedence:
-
-@table @asis
-@item assignment
-@samp{=}, @samp{+=}, @samp{-=}, @samp{*=}, @samp{/=}, @samp{%=},
-@samp{^=}, @samp{**=}. These operators group right-to-left.
-(The @samp{**=} operator is not specified by @sc{posix}.)
-
-@item conditional
-@samp{?:}. This operator groups right-to-left.
-
-@item logical ``or''.
-@samp{||}.
-
-@item logical ``and''.
-@samp{&&}.
-
-@item array membership
-@samp{in}.
-
-@item matching
-@samp{~}, @samp{!~}.
-
-@item relational, and redirection
-The relational operators and the redirections have the same precedence
-level. Characters such as @samp{>} serve both as relationals and as
-redirections; the context distinguishes between the two meanings.
-
-The relational operators are @samp{<}, @samp{<=}, @samp{==}, @samp{!=},
-@samp{>=} and @samp{>}.
-
-The I/O redirection operators are @samp{<}, @samp{>}, @samp{>>} and
-@samp{|}.
-
-Note that I/O redirection operators in @code{print} and @code{printf}
-statements belong to the statement level, not to expressions. The
-redirection does not produce an expression which could be the operand of
-another operator. As a result, it does not make sense to use a
-redirection operator near another operator of lower precedence, without
-parentheses. Such combinations, for example @samp{print foo > a ? b :
-c}, result in syntax errors.
-
-@item concatenation
-No special token is used to indicate concatenation.
-The operands are simply written side by side.
-
-@item add, subtract
-@samp{+}, @samp{-}.
-
-@item multiply, divide, mod
-@samp{*}, @samp{/}, @samp{%}.
-
-@item unary plus, minus, ``not''
-@samp{+}, @samp{-}, @samp{!}.
-
-@item exponentiation
-@samp{^}, @samp{**}. These operators group right-to-left.
-(The @samp{**} operator is not specified by @sc{posix}.)
-
-@item increment, decrement
-@samp{++}, @samp{--}.
-
-@item field
-@samp{$}.
-@end table
-
-@node Statements, Arrays, Expressions, Top
-@chapter Control Statements in Actions
-@cindex control statement
-
-@dfn{Control statements} such as @code{if}, @code{while}, and so on
-control the flow of execution in @code{awk} programs. Most of the
-control statements in @code{awk} are patterned on similar statements in
-C.
-
-All the control statements start with special keywords such as @code{if}
-and @code{while}, to distinguish them from simple expressions.
-
-Many control statements contain other statements; for example, the
-@code{if} statement contains another statement which may or may not be
-executed. The contained statement is called the @dfn{body}. If you
-want to include more than one statement in the body, group them into a
-single compound statement with curly braces, separating them with
-newlines or semicolons.
-
-@menu
-* If Statement:: Conditionally execute
- some @code{awk} statements.
-* While Statement:: Loop until some condition is satisfied.
-* Do Statement:: Do specified action while looping until some
- condition is satisfied.
-* For Statement:: Another looping statement, that provides
- initialization and increment clauses.
-* Break Statement:: Immediately exit the innermost enclosing loop.
-* Continue Statement:: Skip to the end of the innermost
- enclosing loop.
-* Next Statement:: Stop processing the current input record.
-* Next File Statement:: Stop processing the current file.
-* Exit Statement:: Stop execution of @code{awk}.
-@end menu
-
-@node If Statement, While Statement, Statements, Statements
-@section The @code{if} Statement
-
-@cindex @code{if} statement
-The @code{if}-@code{else} statement is @code{awk}'s decision-making
-statement. It looks like this:@refill
-
-@example
-if (@var{condition}) @var{then-body} @r{[}else @var{else-body}@r{]}
-@end example
-
-@noindent
-@var{condition} is an expression that controls what the rest of the
-statement will do. If @var{condition} is true, @var{then-body} is
-executed; otherwise, @var{else-body} is executed (assuming that the
-@code{else} clause is present). The @code{else} part of the statement is
-optional. The condition is considered false if its value is zero or
-the null string, and true otherwise.@refill
-
-Here is an example:
-
-@example
-if (x % 2 == 0)
- print "x is even"
-else
- print "x is odd"
-@end example
-
-In this example, if the expression @code{x % 2 == 0} is true (that is,
-the value of @code{x} is divisible by 2), then the first @code{print}
-statement is executed, otherwise the second @code{print} statement is
-performed.@refill
-
-If the @code{else} appears on the same line as @var{then-body}, and
-@var{then-body} is not a compound statement (i.e., not surrounded by
-curly braces), then a semicolon must separate @var{then-body} from
-@code{else}. To illustrate this, let's rewrite the previous example:
-
-@example
-awk '@{ if (x % 2 == 0) print "x is even"; else
- print "x is odd" @}'
-@end example
-
-@noindent
-If you forget the @samp{;}, @code{awk} won't be able to parse the
-statement, and you will get a syntax error.
-
-We would not actually write this example this way, because a human
-reader might fail to see the @code{else} if it were not the first thing
-on its line.
-
-@node While Statement, Do Statement, If Statement, Statements
-@section The @code{while} Statement
-@cindex @code{while} statement
-@cindex loop
-@cindex body of a loop
-
-In programming, a @dfn{loop} means a part of a program that is (or at least can
-be) executed two or more times in succession.
-
-The @code{while} statement is the simplest looping statement in
-@code{awk}. It repeatedly executes a statement as long as a condition is
-true. It looks like this:
-
-@example
-while (@var{condition})
- @var{body}
-@end example
-
-@noindent
-Here @var{body} is a statement that we call the @dfn{body} of the loop,
-and @var{condition} is an expression that controls how long the loop
-keeps running.
-
-The first thing the @code{while} statement does is test @var{condition}.
-If @var{condition} is true, it executes the statement @var{body}.
-(@var{condition} is true when the value
-is not zero and not a null string.) After @var{body} has been executed,
-@var{condition} is tested again, and if it is still true, @var{body} is
-executed again. This process repeats until @var{condition} is no longer
-true. If @var{condition} is initially false, the body of the loop is
-never executed.@refill
-
-This example prints the first three fields of each record, one per line.
-
-@example
-awk '@{ i = 1
- while (i <= 3) @{
- print $i
- i++
- @}
-@}'
-@end example
-
-@noindent
-Here the body of the loop is a compound statement enclosed in braces,
-containing two statements.
-
-The loop works like this: first, the value of @code{i} is set to 1.
-Then, the @code{while} tests whether @code{i} is less than or equal to
-three. This is the case when @code{i} equals one, so the @code{i}-th
-field is printed. Then the @code{i++} increments the value of @code{i}
-and the loop repeats. The loop terminates when @code{i} reaches 4.
-
-As you can see, a newline is not required between the condition and the
-body; but using one makes the program clearer unless the body is a
-compound statement or is very simple. The newline after the open-brace
-that begins the compound statement is not required either, but the
-program would be hard to read without it.
-
-@node Do Statement, For Statement, While Statement, Statements
-@section The @code{do}-@code{while} Statement
-
-The @code{do} loop is a variation of the @code{while} looping statement.
-The @code{do} loop executes the @var{body} once, then repeats @var{body}
-as long as @var{condition} is true. It looks like this:
-
-@example
-do
- @var{body}
-while (@var{condition})
-@end example
-
-Even if @var{condition} is false at the start, @var{body} is executed at
-least once (and only once, unless executing @var{body} makes
-@var{condition} true). Contrast this with the corresponding
-@code{while} statement:
-
-@example
-while (@var{condition})
- @var{body}
-@end example
-
-@noindent
-This statement does not execute @var{body} even once if @var{condition}
-is false to begin with.
-
-Here is an example of a @code{do} statement:
-
-@example
-awk '@{ i = 1
- do @{
- print $0
- i++
- @} while (i <= 10)
-@}'
-@end example
-
-@noindent
-prints each input record ten times. It isn't a very realistic example,
-since in this case an ordinary @code{while} would do just as well. But
-this reflects actual experience; there is only occasionally a real use
-for a @code{do} statement.@refill
-
-@node For Statement, Break Statement, Do Statement, Statements
-@section The @code{for} Statement
-@cindex @code{for} statement
-
-The @code{for} statement makes it more convenient to count iterations of a
-loop. The general form of the @code{for} statement looks like this:@refill
-
-@example
-for (@var{initialization}; @var{condition}; @var{increment})
- @var{body}
-@end example
-
-@noindent
-This statement starts by executing @var{initialization}. Then, as long
-as @var{condition} is true, it repeatedly executes @var{body} and then
-@var{increment}. Typically @var{initialization} sets a variable to
-either zero or one, @var{increment} adds 1 to it, and @var{condition}
-compares it against the desired number of iterations.
-
-Here is an example of a @code{for} statement:
-
-@example
-@group
-awk '@{ for (i = 1; i <= 3; i++)
- print $i
-@}'
-@end group
-@end example
-
-@noindent
-This prints the first three fields of each input record, one field per
-line.
-
-In the @code{for} statement, @var{body} stands for any statement, but
-@var{initialization}, @var{condition} and @var{increment} are just
-expressions. You cannot set more than one variable in the
-@var{initialization} part unless you use a multiple assignment statement
-such as @code{x = y = 0}, which is possible only if all the initial values
-are equal. (But you can initialize additional variables by writing
-their assignments as separate statements preceding the @code{for} loop.)
-
-The same is true of the @var{increment} part; to increment additional
-variables, you must write separate statements at the end of the loop.
-The C compound expression, using C's comma operator, would be useful in
-this context, but it is not supported in @code{awk}.
-
-Most often, @var{increment} is an increment expression, as in the
-example above. But this is not required; it can be any expression
-whatever. For example, this statement prints all the powers of 2
-between 1 and 100:
-
-@example
-for (i = 1; i <= 100; i *= 2)
- print i
-@end example
-
-Any of the three expressions in the parentheses following the @code{for} may
-be omitted if there is nothing to be done there. Thus, @w{@samp{for (;x
-> 0;)}} is equivalent to @w{@samp{while (x > 0)}}. If the
-@var{condition} is omitted, it is treated as @var{true}, effectively
-yielding an @dfn{infinite loop} (i.e., a loop that will never
-terminate).@refill
-
-In most cases, a @code{for} loop is an abbreviation for a @code{while}
-loop, as shown here:
-
-@example
-@var{initialization}
-while (@var{condition}) @{
- @var{body}
- @var{increment}
-@}
-@end example
-
-@noindent
-The only exception is when the @code{continue} statement
-(@pxref{Continue Statement, ,The @code{continue} Statement}) is used
-inside the loop; changing a @code{for} statement to a @code{while}
-statement in this way can change the effect of the @code{continue}
-statement inside the loop.@refill
-
-There is an alternate version of the @code{for} loop, for iterating over
-all the indices of an array:
-
-@example
-for (i in array)
- @var{do something with} array[i]
-@end example
-
-@noindent
-@xref{Arrays, ,Arrays in @code{awk}}, for more information on this
-version of the @code{for} loop.
-
-The @code{awk} language has a @code{for} statement in addition to a
-@code{while} statement because often a @code{for} loop is both less work to
-type and more natural to think of. Counting the number of iterations is
-very common in loops. It can be easier to think of this counting as part
-of looping rather than as something to do inside the loop.
-
-The next section has more complicated examples of @code{for} loops.
-
-@node Break Statement, Continue Statement, For Statement, Statements
-@section The @code{break} Statement
-@cindex @code{break} statement
-@cindex loops, exiting
-
-The @code{break} statement jumps out of the innermost @code{for},
-@code{while}, or @code{do}-@code{while} loop that encloses it. The
-following example finds the smallest divisor of any integer, and also
-identifies prime numbers:@refill
-
-@smallexample
-awk '# find smallest divisor of num
- @{ num = $1
- for (div = 2; div*div <= num; div++)
- if (num % div == 0)
- break
- if (num % div == 0)
- printf "Smallest divisor of %d is %d\n", num, div
- else
- printf "%d is prime\n", num @}'
-@end smallexample
-
-When the remainder is zero in the first @code{if} statement, @code{awk}
-immediately @dfn{breaks out} of the containing @code{for} loop. This means
-that @code{awk} proceeds immediately to the statement following the loop
-and continues processing. (This is very different from the @code{exit}
-statement which stops the entire @code{awk} program.
-@xref{Exit Statement, ,The @code{exit} Statement}.)@refill
-
-Here is another program equivalent to the previous one. It illustrates how
-the @var{condition} of a @code{for} or @code{while} could just as well be
-replaced with a @code{break} inside an @code{if}:
-
-@smallexample
-@group
-awk '# find smallest divisor of num
- @{ num = $1
- for (div = 2; ; div++) @{
- if (num % div == 0) @{
- printf "Smallest divisor of %d is %d\n", num, div
- break
- @}
- if (div*div > num) @{
- printf "%d is prime\n", num
- break
- @}
- @}
-@}'
-@end group
-@end smallexample
-
-@node Continue Statement, Next Statement, Break Statement, Statements
-@section The @code{continue} Statement
-
-@cindex @code{continue} statement
-The @code{continue} statement, like @code{break}, is used only inside
-@code{for}, @code{while}, and @code{do}-@code{while} loops. It skips
-over the rest of the loop body, causing the next cycle around the loop
-to begin immediately. Contrast this with @code{break}, which jumps out
-of the loop altogether. Here is an example:@refill
-
-@example
-# print names that don't contain the string "ignore"
-
-# first, save the text of each line
-@{ names[NR] = $0 @}
-
-# print what we're interested in
-END @{
- for (x in names) @{
- if (names[x] ~ /ignore/)
- continue
- print names[x]
- @}
-@}
-@end example
-
-If one of the input records contains the string @samp{ignore}, this
-example skips the print statement for that record, and continues back to
-the first statement in the loop.
-
-This is not a practical example of @code{continue}, since it would be
-just as easy to write the loop like this:
-
-@example
-for (x in names)
- if (names[x] !~ /ignore/)
- print names[x]
-@end example
-
-@ignore
-from brennan@boeing.com:
-
-page 90, section 9.6. The example is too artificial as
-the one line program
-
- !/ignore/
-
-does the same thing.
-@end ignore
-@c ADR --- he's right, but don't worry about this for now
-
-The @code{continue} statement in a @code{for} loop directs @code{awk} to
-skip the rest of the body of the loop, and resume execution with the
-increment-expression of the @code{for} statement. The following program
-illustrates this fact:@refill
-
-@example
-awk 'BEGIN @{
- for (x = 0; x <= 20; x++) @{
- if (x == 5)
- continue
- printf ("%d ", x)
- @}
- print ""
-@}'
-@end example
-
-@noindent
-This program prints all the numbers from 0 to 20, except for 5, for
-which the @code{printf} is skipped. Since the increment @code{x++}
-is not skipped, @code{x} does not remain stuck at 5. Contrast the
-@code{for} loop above with the @code{while} loop:
-
-@example
-awk 'BEGIN @{
- x = 0
- while (x <= 20) @{
- if (x == 5)
- continue
- printf ("%d ", x)
- x++
- @}
- print ""
-@}'
-@end example
-
-@noindent
-This program loops forever once @code{x} gets to 5.
-
-As described above, the @code{continue} statement has no meaning when
-used outside the body of a loop. However, although it was never documented,
-historical implementations of @code{awk} have treated the @code{continue}
-statement outside of a loop as if it were a @code{next} statement
-(@pxref{Next Statement, ,The @code{next} Statement}).
-By default, @code{gawk} silently supports this usage. However, if
-@samp{-W posix} has been specified on the command line
-(@pxref{Command Line, ,Invoking @code{awk}}),
-it will be treated as an error, since the @sc{posix} standard specifies
-that @code{continue} should only be used inside the body of a loop.@refill
-
-@node Next Statement, Next File Statement, Continue Statement, Statements
-@section The @code{next} Statement
-@cindex @code{next} statement
-
-The @code{next} statement forces @code{awk} to immediately stop processing
-the current record and go on to the next record. This means that no
-further rules are executed for the current record. The rest of the
-current rule's action is not executed either.
-
-Contrast this with the effect of the @code{getline} function
-(@pxref{Getline, ,Explicit Input with @code{getline}}). That too causes
-@code{awk} to read the next record immediately, but it does not alter the
-flow of control in any way. So the rest of the current action executes
-with a new input record.
-
-At the highest level, @code{awk} program execution is a loop that reads
-an input record and then tests each rule's pattern against it. If you
-think of this loop as a @code{for} statement whose body contains the
-rules, then the @code{next} statement is analogous to a @code{continue}
-statement: it skips to the end of the body of this implicit loop, and
-executes the increment (which reads another record).
-
-For example, if your @code{awk} program works only on records with four
-fields, and you don't want it to fail when given bad input, you might
-use this rule near the beginning of the program:
-
-@smallexample
-NF != 4 @{
- printf("line %d skipped: doesn't have 4 fields", FNR) > "/dev/stderr"
- next
-@}
-@end smallexample
-
-@noindent
-so that the following rules will not see the bad record. The error
-message is redirected to the standard error output stream, as error
-messages should be. @xref{Special Files, ,Standard I/O Streams}.
-
-According to the @sc{posix} standard, the behavior is undefined if
-the @code{next} statement is used in a @code{BEGIN} or @code{END} rule.
-@code{gawk} will treat it as a syntax error.
-
-If the @code{next} statement causes the end of the input to be reached,
-then the code in the @code{END} rules, if any, will be executed.
-@xref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}.
-
-@node Next File Statement, Exit Statement, Next Statement, Statements
-@section The @code{next file} Statement
-
-@cindex @code{next file} statement
-The @code{next file} statement is similar to the @code{next} statement.
-However, instead of abandoning processing of the current record, the
-@code{next file} statement instructs @code{awk} to stop processing the
-current data file.
-
-Upon execution of the @code{next file} statement, @code{FILENAME} is
-updated to the name of the next data file listed on the command line,
-@code{FNR} is reset to 1, and processing starts over with the first
-rule in the progam. @xref{Built-in Variables}.
-
-If the @code{next file} statement causes the end of the input to be reached,
-then the code in the @code{END} rules, if any, will be executed.
-@xref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}.
-
-The @code{next file} statement is a @code{gawk} extension; it is not
-(currently) available in any other @code{awk} implementation. You can
-simulate its behavior by creating a library file named @file{nextfile.awk},
-with the following contents. (This sample program uses user-defined
-functions, a feature that has not been presented yet.
-@xref{User-defined, ,User-defined Functions},
-for more information.)@refill
-
-@smallexample
-# nextfile --- function to skip remaining records in current file
-
-# this should be read in before the "main" awk program
-
-function nextfile() @{ _abandon_ = FILENAME; next @}
-
-_abandon_ == FILENAME && FNR > 1 @{ next @}
-_abandon_ == FILENAME && FNR == 1 @{ _abandon_ = "" @}
-@end smallexample
-
-The @code{nextfile} function simply sets a ``private'' variable@footnote{Since
-all variables in @code{awk} are global, this program uses the common
-practice of prefixing the variable name with an underscore. In fact, it
-also suffixes the variable name with an underscore, as extra insurance
-against using a variable name that might be used in some other library
-file.} to the name of the current data file, and then retrieves the next
-record. Since this file is read before the main @code{awk} program,
-the rules that follows the function definition will be executed before the
-rules in the main program. The first rule continues to skip records as long as
-the name of the input file has not changed, and this is not the first
-record in the file. This rule is sufficient most of the time. But what if
-the @emph{same} data file is named twice in a row on the command line?
-This rule would not process the data file the second time. The second rule
-catches this case: If the data file name is what was being skipped, but
-@code{FNR} is 1, then this is the second time the file is being processed,
-and it should not be skipped.
-
-The @code{next file} statement would be useful if you have many data
-files to process, and due to the nature of the data, you expect that you
-would not want to process every record in the file. In order to move on to
-the next data file, you would have to continue scanning the unwanted
-records (as described above). The @code{next file} statement accomplishes
-this much more efficiently.
-
-@ignore
-Would it make sense down the road to nuke `next file' in favor of
-semantics that would make this work?
-
- function nextfile() { ARGIND++ ; next }
-@end ignore
-
-@node Exit Statement, , Next File Statement, Statements
-@section The @code{exit} Statement
-
-@cindex @code{exit} statement
-The @code{exit} statement causes @code{awk} to immediately stop
-executing the current rule and to stop processing input; any remaining input
-is ignored.@refill
-
-If an @code{exit} statement is executed from a @code{BEGIN} rule the
-program stops processing everything immediately. No input records are
-read. However, if an @code{END} rule is present, it is executed
-(@pxref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}).
-
-If @code{exit} is used as part of an @code{END} rule, it causes
-the program to stop immediately.
-
-An @code{exit} statement that is part of an ordinary rule (that is, not part
-of a @code{BEGIN} or @code{END} rule) stops the execution of any further
-automatic rules, but the @code{END} rule is executed if there is one.
-If you do not want the @code{END} rule to do its job in this case, you
-can set a variable to nonzero before the @code{exit} statement, and check
-that variable in the @code{END} rule.
-
-If an argument is supplied to @code{exit}, its value is used as the exit
-status code for the @code{awk} process. If no argument is supplied,
-@code{exit} returns status zero (success).@refill
-
-For example, let's say you've discovered an error condition you really
-don't know how to handle. Conventionally, programs report this by
-exiting with a nonzero status. Your @code{awk} program can do this
-using an @code{exit} statement with a nonzero argument. Here's an
-example of this:@refill
-
-@example
-@group
-BEGIN @{
- if (("date" | getline date_now) < 0) @{
- print "Can't get system date" > "/dev/stderr"
- exit 4
- @}
-@}
-@end group
-@end example
-
-@node Arrays, Built-in, Statements, Top
-@chapter Arrays in @code{awk}
-
-An @dfn{array} is a table of values, called @dfn{elements}. The
-elements of an array are distinguished by their indices. @dfn{Indices}
-may be either numbers or strings. Each array has a name, which looks
-like a variable name, but must not be in use as a variable name in the
-same @code{awk} program.
-
-@menu
-* Array Intro:: Introduction to Arrays
-* Reference to Elements:: How to examine one element of an array.
-* Assigning Elements:: How to change an element of an array.
-* Array Example:: Basic Example of an Array
-* Scanning an Array:: A variation of the @code{for} statement.
- It loops through the indices of
- an array's existing elements.
-* Delete:: The @code{delete} statement removes
- an element from an array.
-* Numeric Array Subscripts:: How to use numbers as subscripts in @code{awk}.
-* Multi-dimensional:: Emulating multi-dimensional arrays in @code{awk}.
-* Multi-scanning:: Scanning multi-dimensional arrays.
-@end menu
-
-@node Array Intro, Reference to Elements, Arrays, Arrays
-@section Introduction to Arrays
-
-@cindex arrays
-The @code{awk} language has one-dimensional @dfn{arrays} for storing groups
-of related strings or numbers.
-
-Every @code{awk} array must have a name. Array names have the same
-syntax as variable names; any valid variable name would also be a valid
-array name. But you cannot use one name in both ways (as an array and
-as a variable) in one @code{awk} program.
-
-Arrays in @code{awk} superficially resemble arrays in other programming
-languages; but there are fundamental differences. In @code{awk}, you
-don't need to specify the size of an array before you start to use it.
-Additionally, any number or string in @code{awk} may be used as an
-array index.
-
-In most other languages, you have to @dfn{declare} an array and specify
-how many elements or components it contains. In such languages, the
-declaration causes a contiguous block of memory to be allocated for that
-many elements. An index in the array must be a positive integer; for
-example, the index 0 specifies the first element in the array, which is
-actually stored at the beginning of the block of memory. Index 1
-specifies the second element, which is stored in memory right after the
-first element, and so on. It is impossible to add more elements to the
-array, because it has room for only as many elements as you declared.
-
-A contiguous array of four elements might look like this,
-conceptually, if the element values are @code{8}, @code{"foo"},
-@code{""} and @code{30}:@refill
-
-@example
-+---------+---------+--------+---------+
-| 8 | "foo" | "" | 30 | @r{value}
-+---------+---------+--------+---------+
- 0 1 2 3 @r{index}
-@end example
-
-@noindent
-Only the values are stored; the indices are implicit from the order of
-the values. @code{8} is the value at index 0, because @code{8} appears in the
-position with 0 elements before it.
-
-@cindex arrays, definition of
-@cindex associative arrays
-Arrays in @code{awk} are different: they are @dfn{associative}. This means
-that each array is a collection of pairs: an index, and its corresponding
-array element value:
-
-@example
-@r{Element} 4 @r{Value} 30
-@r{Element} 2 @r{Value} "foo"
-@r{Element} 1 @r{Value} 8
-@r{Element} 3 @r{Value} ""
-@end example
-
-@noindent
-We have shown the pairs in jumbled order because their order is irrelevant.
-
-One advantage of an associative array is that new pairs can be added
-at any time. For example, suppose we add to the above array a tenth element
-whose value is @w{@code{"number ten"}}. The result is this:
-
-@example
-@r{Element} 10 @r{Value} "number ten"
-@r{Element} 4 @r{Value} 30
-@r{Element} 2 @r{Value} "foo"
-@r{Element} 1 @r{Value} 8
-@r{Element} 3 @r{Value} ""
-@end example
-
-@noindent
-Now the array is @dfn{sparse} (i.e., some indices are missing): it has
-elements 1--4 and 10, but doesn't have elements 5, 6, 7, 8, or 9.@refill
-
-Another consequence of associative arrays is that the indices don't
-have to be positive integers. Any number, or even a string, can be
-an index. For example, here is an array which translates words from
-English into French:
-
-@example
-@r{Element} "dog" @r{Value} "chien"
-@r{Element} "cat" @r{Value} "chat"
-@r{Element} "one" @r{Value} "un"
-@r{Element} 1 @r{Value} "un"
-@end example
-
-@noindent
-Here we decided to translate the number 1 in both spelled-out and
-numeric form---thus illustrating that a single array can have both
-numbers and strings as indices.
-
-When @code{awk} creates an array for you, e.g., with the @code{split}
-built-in function,
-that array's indices are consecutive integers starting at 1.
-(@xref{String Functions, ,Built-in Functions for String Manipulation}.)
-
-@node Reference to Elements, Assigning Elements, Array Intro, Arrays
-@section Referring to an Array Element
-@cindex array reference
-@cindex element of array
-@cindex reference to array
-
-The principal way of using an array is to refer to one of its elements.
-An array reference is an expression which looks like this:
-
-@example
-@var{array}[@var{index}]
-@end example
-
-@noindent
-Here, @var{array} is the name of an array. The expression @var{index} is
-the index of the element of the array that you want.
-
-The value of the array reference is the current value of that array
-element. For example, @code{foo[4.3]} is an expression for the element
-of array @code{foo} at index 4.3.
-
-If you refer to an array element that has no recorded value, the value
-of the reference is @code{""}, the null string. This includes elements
-to which you have not assigned any value, and elements that have been
-deleted (@pxref{Delete, ,The @code{delete} Statement}). Such a reference
-automatically creates that array element, with the null string as its value.
-(In some cases, this is unfortunate, because it might waste memory inside
-@code{awk}).
-
-@cindex arrays, presence of elements
-You can find out if an element exists in an array at a certain index with
-the expression:
-
-@example
-@var{index} in @var{array}
-@end example
-
-@noindent
-This expression tests whether or not the particular index exists,
-without the side effect of creating that element if it is not present.
-The expression has the value 1 (true) if @code{@var{array}[@var{index}]}
-exists, and 0 (false) if it does not exist.@refill
-
-For example, to test whether the array @code{frequencies} contains the
-index @code{"2"}, you could write this statement:@refill
-
-@smallexample
-if ("2" in frequencies) print "Subscript \"2\" is present."
-@end smallexample
-
-Note that this is @emph{not} a test of whether or not the array
-@code{frequencies} contains an element whose @emph{value} is @code{"2"}.
-(There is no way to do that except to scan all the elements.) Also, this
-@emph{does not} create @code{frequencies["2"]}, while the following
-(incorrect) alternative would do so:@refill
-
-@smallexample
-if (frequencies["2"] != "") print "Subscript \"2\" is present."
-@end smallexample
-
-@node Assigning Elements, Array Example, Reference to Elements, Arrays
-@section Assigning Array Elements
-@cindex array assignment
-@cindex element assignment
-
-Array elements are lvalues: they can be assigned values just like
-@code{awk} variables:
-
-@example
-@var{array}[@var{subscript}] = @var{value}
-@end example
-
-@noindent
-Here @var{array} is the name of your array. The expression
-@var{subscript} is the index of the element of the array that you want
-to assign a value. The expression @var{value} is the value you are
-assigning to that element of the array.@refill
-
-@node Array Example, Scanning an Array, Assigning Elements, Arrays
-@section Basic Example of an Array
-
-The following program takes a list of lines, each beginning with a line
-number, and prints them out in order of line number. The line numbers are
-not in order, however, when they are first read: they are scrambled. This
-program sorts the lines by making an array using the line numbers as
-subscripts. It then prints out the lines in sorted order of their numbers.
-It is a very simple program, and gets confused if it encounters repeated
-numbers, gaps, or lines that don't begin with a number.@refill
-
-@example
-@{
- if ($1 > max)
- max = $1
- arr[$1] = $0
-@}
-
-END @{
- for (x = 1; x <= max; x++)
- print arr[x]
-@}
-@end example
-
-The first rule keeps track of the largest line number seen so far;
-it also stores each line into the array @code{arr}, at an index that
-is the line's number.
-
-The second rule runs after all the input has been read, to print out
-all the lines.
-
-When this program is run with the following input:
-
-@example
-5 I am the Five man
-2 Who are you? The new number two!
-4 . . . And four on the floor
-1 Who is number one?
-3 I three you.
-@end example
-
-@noindent
-its output is this:
-
-@example
-1 Who is number one?
-2 Who are you? The new number two!
-3 I three you.
-4 . . . And four on the floor
-5 I am the Five man
-@end example
-
-If a line number is repeated, the last line with a given number overrides
-the others.
-
-Gaps in the line numbers can be handled with an easy improvement to the
-program's @code{END} rule:
-
-@example
-END @{
- for (x = 1; x <= max; x++)
- if (x in arr)
- print arr[x]
-@}
-@end example
-
-@node Scanning an Array, Delete, Array Example, Arrays
-@section Scanning all Elements of an Array
-@cindex @code{for (x in @dots{})}
-@cindex arrays, special @code{for} statement
-@cindex scanning an array
-
-In programs that use arrays, often you need a loop that executes
-once for each element of an array. In other languages, where arrays are
-contiguous and indices are limited to positive integers, this is
-easy: the largest index is one less than the length of the array, and you can
-find all the valid indices by counting from zero up to that value. This
-technique won't do the job in @code{awk}, since any number or string
-may be an array index. So @code{awk} has a special kind of @code{for}
-statement for scanning an array:
-
-@example
-for (@var{var} in @var{array})
- @var{body}
-@end example
-
-@noindent
-This loop executes @var{body} once for each different value that your
-program has previously used as an index in @var{array}, with the
-variable @var{var} set to that index.@refill
-
-Here is a program that uses this form of the @code{for} statement. The
-first rule scans the input records and notes which words appear (at
-least once) in the input, by storing a 1 into the array @code{used} with
-the word as index. The second rule scans the elements of @code{used} to
-find all the distinct words that appear in the input. It prints each
-word that is more than 10 characters long, and also prints the number of
-such words. @xref{Built-in, ,Built-in Functions}, for more information
-on the built-in function @code{length}.
-
-@smallexample
-# Record a 1 for each word that is used at least once.
-@{
- for (i = 1; i <= NF; i++)
- used[$i] = 1
-@}
-
-# Find number of distinct words more than 10 characters long.
-END @{
- for (x in used)
- if (length(x) > 10) @{
- ++num_long_words
- print x
- @}
- print num_long_words, "words longer than 10 characters"
-@}
-@end smallexample
-
-@noindent
-@xref{Sample Program}, for a more detailed example of this type.
-
-The order in which elements of the array are accessed by this statement
-is determined by the internal arrangement of the array elements within
-@code{awk} and cannot be controlled or changed. This can lead to
-problems if new elements are added to @var{array} by statements in
-@var{body}; you cannot predict whether or not the @code{for} loop will
-reach them. Similarly, changing @var{var} inside the loop can produce
-strange results. It is best to avoid such things.@refill
-
-@node Delete, Numeric Array Subscripts, Scanning an Array, Arrays
-@section The @code{delete} Statement
-@cindex @code{delete} statement
-@cindex deleting elements of arrays
-@cindex removing elements of arrays
-@cindex arrays, deleting an element
-
-You can remove an individual element of an array using the @code{delete}
-statement:
-
-@example
-delete @var{array}[@var{index}]
-@end example
-
-You can not refer to an array element after it has been deleted;
-it is as if you had never referred
-to it and had never given it any value. You can no longer obtain any
-value the element once had.
-
-Here is an example of deleting elements in an array:
-
-@example
-for (i in frequencies)
- delete frequencies[i]
-@end example
-
-@noindent
-This example removes all the elements from the array @code{frequencies}.
-
-If you delete an element, a subsequent @code{for} statement to scan the array
-will not report that element, and the @code{in} operator to check for
-the presence of that element will return 0:
-
-@example
-delete foo[4]
-if (4 in foo)
- print "This will never be printed"
-@end example
-
-It is not an error to delete an element which does not exist.
-
-@node Numeric Array Subscripts, Multi-dimensional, Delete, Arrays
-@section Using Numbers to Subscript Arrays
-
-An important aspect of arrays to remember is that array subscripts
-are @emph{always} strings. If you use a numeric value as a subscript,
-it will be converted to a string value before it is used for subscripting
-(@pxref{Conversion, ,Conversion of Strings and Numbers}).
-
-@cindex conversions, during subscripting
-@cindex numbers, used as subscripts
-@vindex CONVFMT
-This means that the value of the @code{CONVFMT} can potentially
-affect how your program accesses elements of an array. For example:
-
-@example
-a = b = 12.153
-data[a] = 1
-CONVFMT = "%2.2f"
-if (b in data)
- printf "%s is in data", b
-else
- printf "%s is not in data", b
-@end example
-
-@noindent
-should print @samp{12.15 is not in data}. The first statement gives
-both @code{a} and @code{b} the same numeric value. Assigning to
-@code{data[a]} first gives @code{a} the string value @code{"12.153"}
-(using the default conversion value of @code{CONVFMT}, @code{"%.6g"}),
-and then assigns 1 to @code{data["12.153"]}. The program then changes
-the value of @code{CONVFMT}. The test @samp{(b in data)} forces @code{b}
-to be converted to a string, this time @code{"12.15"}, since the value of
-@code{CONVFMT} only allows two significant digits. This test fails,
-since @code{"12.15"} is a different string from @code{"12.153"}.@refill
-
-According to the rules for conversions
-(@pxref{Conversion, ,Conversion of Strings and Numbers}), integer
-values are always converted to strings as integers, no matter what the
-value of @code{CONVFMT} may happen to be. So the usual case of@refill
-
-@example
-for (i = 1; i <= maxsub; i++)
- @i{do something with} array[i]
-@end example
-
-@noindent
-will work, no matter what the value of @code{CONVFMT}.
-
-Like many things in @code{awk}, the majority of the time things work
-as you would expect them to work. But it is useful to have a precise
-knowledge of the actual rules, since sometimes they can have a subtle
-effect on your programs.
-
-@node Multi-dimensional, Multi-scanning, Numeric Array Subscripts, Arrays
-@section Multi-dimensional Arrays
-
-@c the following index entry is an overfull hbox. --mew 30jan1992
-@cindex subscripts in arrays
-@cindex arrays, multi-dimensional subscripts
-@cindex multi-dimensional subscripts
-A multi-dimensional array is an array in which an element is identified
-by a sequence of indices, not a single index. For example, a
-two-dimensional array requires two indices. The usual way (in most
-languages, including @code{awk}) to refer to an element of a
-two-dimensional array named @code{grid} is with
-@code{grid[@var{x},@var{y}]}.
-
-@vindex SUBSEP
-Multi-dimensional arrays are supported in @code{awk} through
-concatenation of indices into one string. What happens is that
-@code{awk} converts the indices into strings
-(@pxref{Conversion, ,Conversion of Strings and Numbers}) and
-concatenates them together, with a separator between them. This creates
-a single string that describes the values of the separate indices. The
-combined string is used as a single index into an ordinary,
-one-dimensional array. The separator used is the value of the built-in
-variable @code{SUBSEP}.@refill
-
-For example, suppose we evaluate the expression @code{foo[5,12]="value"}
-when the value of @code{SUBSEP} is @code{"@@"}. The numbers 5 and 12 are
-converted to strings and
-concatenated with an @samp{@@} between them, yielding @code{"5@@12"}; thus,
-the array element @code{foo["5@@12"]} is set to @code{"value"}.@refill
-
-Once the element's value is stored, @code{awk} has no record of whether
-it was stored with a single index or a sequence of indices. The two
-expressions @code{foo[5,12]} and @w{@code{foo[5 SUBSEP 12]}} always have
-the same value.
-
-The default value of @code{SUBSEP} is the string @code{"\034"},
-which contains a nonprinting character that is unlikely to appear in an
-@code{awk} program or in the input data.
-
-The usefulness of choosing an unlikely character comes from the fact
-that index values that contain a string matching @code{SUBSEP} lead to
-combined strings that are ambiguous. Suppose that @code{SUBSEP} were
-@code{"@@"}; then @w{@code{foo["a@@b", "c"]}} and @w{@code{foo["a",
-"b@@c"]}} would be indistinguishable because both would actually be
-stored as @code{foo["a@@b@@c"]}. Because @code{SUBSEP} is
-@code{"\034"}, such confusion can arise only when an index
-contains the character with ASCII code 034, which is a rare
-event.@refill
-
-You can test whether a particular index-sequence exists in a
-``multi-dimensional'' array with the same operator @code{in} used for single
-dimensional arrays. Instead of a single index as the left-hand operand,
-write the whole sequence of indices, separated by commas, in
-parentheses:@refill
-
-@example
-(@var{subscript1}, @var{subscript2}, @dots{}) in @var{array}
-@end example
-
-The following example treats its input as a two-dimensional array of
-fields; it rotates this array 90 degrees clockwise and prints the
-result. It assumes that all lines have the same number of
-elements.
-
-@example
-awk '@{
- if (max_nf < NF)
- max_nf = NF
- max_nr = NR
- for (x = 1; x <= NF; x++)
- vector[x, NR] = $x
-@}
-
-END @{
- for (x = 1; x <= max_nf; x++) @{
- for (y = max_nr; y >= 1; --y)
- printf("%s ", vector[x, y])
- printf("\n")
- @}
-@}'
-@end example
-
-@noindent
-When given the input:
-
-@example
-@group
-1 2 3 4 5 6
-2 3 4 5 6 1
-3 4 5 6 1 2
-4 5 6 1 2 3
-@end group
-@end example
-
-@noindent
-it produces:
-
-@example
-@group
-4 3 2 1
-5 4 3 2
-6 5 4 3
-1 6 5 4
-2 1 6 5
-3 2 1 6
-@end group
-@end example
-
-@node Multi-scanning, , Multi-dimensional, Arrays
-@section Scanning Multi-dimensional Arrays
-
-There is no special @code{for} statement for scanning a
-``multi-dimensional'' array; there cannot be one, because in truth there
-are no multi-dimensional arrays or elements; there is only a
-multi-dimensional @emph{way of accessing} an array.
-
-However, if your program has an array that is always accessed as
-multi-dimensional, you can get the effect of scanning it by combining
-the scanning @code{for} statement
-(@pxref{Scanning an Array, ,Scanning all Elements of an Array}) with the
-@code{split} built-in function
-(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
-It works like this:@refill
-
-@example
-for (combined in @var{array}) @{
- split(combined, separate, SUBSEP)
- @dots{}
-@}
-@end example
-
-@noindent
-This finds each concatenated, combined index in the array, and splits it
-into the individual indices by breaking it apart where the value of
-@code{SUBSEP} appears. The split-out indices become the elements of
-the array @code{separate}.
-
-Thus, suppose you have previously stored in @code{@var{array}[1,
-"foo"]}; then an element with index @code{"1\034foo"} exists in
-@var{array}. (Recall that the default value of @code{SUBSEP} contains
-the character with code 034.) Sooner or later the @code{for} statement
-will find that index and do an iteration with @code{combined} set to
-@code{"1\034foo"}. Then the @code{split} function is called as
-follows:
-
-@example
-split("1\034foo", separate, "\034")
-@end example
-
-@noindent
-The result of this is to set @code{separate[1]} to 1 and @code{separate[2]}
-to @code{"foo"}. Presto, the original sequence of separate indices has
-been recovered.
-
-@node Built-in, User-defined, Arrays, Top
-@chapter Built-in Functions
-
-@cindex built-in functions
-@dfn{Built-in} functions are functions that are always available for
-your @code{awk} program to call. This chapter defines all the built-in
-functions in @code{awk}; some of them are mentioned in other sections,
-but they are summarized here for your convenience. (You can also define
-new functions yourself. @xref{User-defined, ,User-defined Functions}.)
-
-@menu
-* Calling Built-in:: How to call built-in functions.
-* Numeric Functions:: Functions that work with numbers,
- including @code{int}, @code{sin} and @code{rand}.
-* String Functions:: Functions for string manipulation,
- such as @code{split}, @code{match}, and @code{sprintf}.
-* I/O Functions:: Functions for files and shell commands.
-* Time Functions:: Functions for dealing with time stamps.
-@end menu
-
-@node Calling Built-in, Numeric Functions, Built-in, Built-in
-@section Calling Built-in Functions
-
-To call a built-in function, write the name of the function followed
-by arguments in parentheses. For example, @code{atan2(y + z, 1)}
-is a call to the function @code{atan2}, with two arguments.
-
-Whitespace is ignored between the built-in function name and the
-open-parenthesis, but we recommend that you avoid using whitespace
-there. User-defined functions do not permit whitespace in this way, and
-you will find it easier to avoid mistakes by following a simple
-convention which always works: no whitespace after a function name.
-
-Each built-in function accepts a certain number of arguments. In most
-cases, any extra arguments given to built-in functions are ignored. The
-defaults for omitted arguments vary from function to function and are
-described under the individual functions.
-
-When a function is called, expressions that create the function's actual
-parameters are evaluated completely before the function call is performed.
-For example, in the code fragment:
-
-@example
-i = 4
-j = sqrt(i++)
-@end example
-
-@noindent
-the variable @code{i} is set to 5 before @code{sqrt} is called
-with a value of 4 for its actual parameter.
-
-@node Numeric Functions, String Functions, Calling Built-in, Built-in
-@section Numeric Built-in Functions
-@c I didn't make all the examples small because a couple of them were
-@c short already. --mew 29jan1992
-
-Here is a full list of built-in functions that work with numbers:
-
-@table @code
-@item int(@var{x})
-This gives you the integer part of @var{x}, truncated toward 0. This
-produces the nearest integer to @var{x}, located between @var{x} and 0.
-
-For example, @code{int(3)} is 3, @code{int(3.9)} is 3, @code{int(-3.9)}
-is @minus{}3, and @code{int(-3)} is @minus{}3 as well.@refill
-
-@item sqrt(@var{x})
-This gives you the positive square root of @var{x}. It reports an error
-if @var{x} is negative. Thus, @code{sqrt(4)} is 2.@refill
-
-@item exp(@var{x})
-This gives you the exponential of @var{x}, or reports an error if
-@var{x} is out of range. The range of values @var{x} can have depends
-on your machine's floating point representation.@refill
-
-@item log(@var{x})
-This gives you the natural logarithm of @var{x}, if @var{x} is positive;
-otherwise, it reports an error.@refill
-
-@item sin(@var{x})
-This gives you the sine of @var{x}, with @var{x} in radians.
-
-@item cos(@var{x})
-This gives you the cosine of @var{x}, with @var{x} in radians.
-
-@item atan2(@var{y}, @var{x})
-This gives you the arctangent of @code{@var{y} / @var{x}} in radians.
-
-@item rand()
-This gives you a random number. The values of @code{rand} are
-uniformly-distributed between 0 and 1. The value is never 0 and never
-1.
-
-Often you want random integers instead. Here is a user-defined function
-you can use to obtain a random nonnegative integer less than @var{n}:
-
-@example
-function randint(n) @{
- return int(n * rand())
-@}
-@end example
-
-@noindent
-The multiplication produces a random real number greater than 0 and less
-than @var{n}. We then make it an integer (using @code{int}) between 0
-and @code{@var{n} @minus{} 1}.
-
-Here is an example where a similar function is used to produce
-random integers between 1 and @var{n}. Note that this program will
-print a new random number for each input record.
-
-@smallexample
-awk '
-# Function to roll a simulated die.
-function roll(n) @{ return 1 + int(rand() * n) @}
-
-# Roll 3 six-sided dice and print total number of points.
-@{
- printf("%d points\n", roll(6)+roll(6)+roll(6))
-@}'
-@end smallexample
-
-@strong{Note:} @code{rand} starts generating numbers from the same
-point, or @dfn{seed}, each time you run @code{awk}. This means that
-a program will produce the same results each time you run it.
-The numbers are random within one @code{awk} run, but predictable
-from run to run. This is convenient for debugging, but if you want
-a program to do different things each time it is used, you must change
-the seed to a value that will be different in each run. To do this,
-use @code{srand}.
-
-@item srand(@var{x})
-The function @code{srand} sets the starting point, or @dfn{seed},
-for generating random numbers to the value @var{x}.
-
-Each seed value leads to a particular sequence of ``random'' numbers.
-Thus, if you set the seed to the same value a second time, you will get
-the same sequence of ``random'' numbers again.
-
-If you omit the argument @var{x}, as in @code{srand()}, then the current
-date and time of day are used for a seed. This is the way to get random
-numbers that are truly unpredictable.
-
-The return value of @code{srand} is the previous seed. This makes it
-easy to keep track of the seeds for use in consistently reproducing
-sequences of random numbers.
-@end table
-
-@node String Functions, I/O Functions, Numeric Functions, Built-in
-@section Built-in Functions for String Manipulation
-
-The functions in this section look at or change the text of one or more
-strings.
-
-@table @code
-@item index(@var{in}, @var{find})
-@findex match
-This searches the string @var{in} for the first occurrence of the string
-@var{find}, and returns the position in characters where that occurrence
-begins in the string @var{in}. For example:@refill
-
-@smallexample
-awk 'BEGIN @{ print index("peanut", "an") @}'
-@end smallexample
-
-@noindent
-prints @samp{3}. If @var{find} is not found, @code{index} returns 0.
-(Remember that string indices in @code{awk} start at 1.)
-
-@item length(@var{string})
-@findex length
-This gives you the number of characters in @var{string}. If
-@var{string} is a number, the length of the digit string representing
-that number is returned. For example, @code{length("abcde")} is 5. By
-contrast, @code{length(15 * 35)} works out to 3. How? Well, 15 * 35 =
-525, and 525 is then converted to the string @samp{"525"}, which has
-three characters.
-
-If no argument is supplied, @code{length} returns the length of @code{$0}.
-
-In older versions of @code{awk}, you could call the @code{length} function
-without any parentheses. Doing so is marked as ``deprecated'' in the
-@sc{posix} standard. This means that while you can do this in your
-programs, it is a feature that can eventually be removed from a future
-version of the standard. Therefore, for maximal portability of your
-@code{awk} programs you should always supply the parentheses.
-
-@item match(@var{string}, @var{regexp})
-@findex match
-The @code{match} function searches the string, @var{string}, for the
-longest, leftmost substring matched by the regular expression,
-@var{regexp}. It returns the character position, or @dfn{index}, of
-where that substring begins (1, if it starts at the beginning of
-@var{string}). If no match if found, it returns 0.
-
-@vindex RSTART
-@vindex RLENGTH
-The @code{match} function sets the built-in variable @code{RSTART} to
-the index. It also sets the built-in variable @code{RLENGTH} to the
-length in characters of the matched substring. If no match is found,
-@code{RSTART} is set to 0, and @code{RLENGTH} to @minus{}1.
-
-For example:
-
-@smallexample
-awk '@{
- if ($1 == "FIND")
- regex = $2
- else @{
- where = match($0, regex)
- if (where)
- print "Match of", regex, "found at", where, "in", $0
- @}
-@}'
-@end smallexample
-
-@noindent
-This program looks for lines that match the regular expression stored in
-the variable @code{regex}. This regular expression can be changed. If the
-first word on a line is @samp{FIND}, @code{regex} is changed to be the
-second word on that line. Therefore, given:
-
-@smallexample
-FIND fo*bar
-My program was a foobar
-But none of it would doobar
-FIND Melvin
-JF+KM
-This line is property of The Reality Engineering Co.
-This file created by Melvin.
-@end smallexample
-
-@noindent
-@code{awk} prints:
-
-@smallexample
-Match of fo*bar found at 18 in My program was a foobar
-Match of Melvin found at 26 in This file created by Melvin.
-@end smallexample
-
-@item split(@var{string}, @var{array}, @var{fieldsep})
-@findex split
-This divides @var{string} into pieces separated by @var{fieldsep},
-and stores the pieces in @var{array}. The first piece is stored in
-@code{@var{array}[1]}, the second piece in @code{@var{array}[2]}, and so
-forth. The string value of the third argument, @var{fieldsep}, is
-a regexp describing where to split @var{string} (much as @code{FS} can
-be a regexp describing where to split input records). If
-the @var{fieldsep} is omitted, the value of @code{FS} is used.
-@code{split} returns the number of elements created.@refill
-
-The @code{split} function, then, splits strings into pieces in a
-manner similar to the way input lines are split into fields. For example:
-
-@smallexample
-split("auto-da-fe", a, "-")
-@end smallexample
-
-@noindent
-splits the string @samp{auto-da-fe} into three fields using @samp{-} as the
-separator. It sets the contents of the array @code{a} as follows:
-
-@smallexample
-a[1] = "auto"
-a[2] = "da"
-a[3] = "fe"
-@end smallexample
-
-@noindent
-The value returned by this call to @code{split} is 3.
-
-As with input field-splitting, when the value of @var{fieldsep} is
-@code{" "}, leading and trailing whitespace is ignored, and the elements
-are separated by runs of whitespace.
-
-@item sprintf(@var{format}, @var{expression1},@dots{})
-@findex sprintf
-This returns (without printing) the string that @code{printf} would
-have printed out with the same arguments
-(@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing}).
-For example:@refill
-
-@smallexample
-sprintf("pi = %.2f (approx.)", 22/7)
-@end smallexample
-
-@noindent
-returns the string @w{@code{"pi = 3.14 (approx.)"}}.
-
-@item sub(@var{regexp}, @var{replacement}, @var{target})
-@findex sub
-The @code{sub} function alters the value of @var{target}.
-It searches this value, which should be a string, for the
-leftmost substring matched by the regular expression, @var{regexp},
-extending this match as far as possible. Then the entire string is
-changed by replacing the matched text with @var{replacement}.
-The modified string becomes the new value of @var{target}.
-
-This function is peculiar because @var{target} is not simply
-used to compute a value, and not just any expression will do: it
-must be a variable, field or array reference, so that @code{sub} can
-store a modified value there. If this argument is omitted, then the
-default is to use and alter @code{$0}.
-
-For example:@refill
-
-@smallexample
-str = "water, water, everywhere"
-sub(/at/, "ith", str)
-@end smallexample
-
-@noindent
-sets @code{str} to @w{@code{"wither, water, everywhere"}}, by replacing the
-leftmost, longest occurrence of @samp{at} with @samp{ith}.
-
-The @code{sub} function returns the number of substitutions made (either
-one or zero).
-
-If the special character @samp{&} appears in @var{replacement}, it
-stands for the precise substring that was matched by @var{regexp}. (If
-the regexp can match more than one string, then this precise substring
-may vary.) For example:@refill
-
-@smallexample
-awk '@{ sub(/candidate/, "& and his wife"); print @}'
-@end smallexample
-
-@noindent
-changes the first occurrence of @samp{candidate} to @samp{candidate
-and his wife} on each input line.
-
-Here is another example:
-
-@smallexample
-awk 'BEGIN @{
- str = "daabaaa"
- sub(/a*/, "c&c", str)
- print str
-@}'
-@end smallexample
-
-@noindent
-prints @samp{dcaacbaaa}. This show how @samp{&} can represent a non-constant
-string, and also illustrates the ``leftmost, longest'' rule.
-
-The effect of this special character (@samp{&}) can be turned off by putting a
-backslash before it in the string. As usual, to insert one backslash in
-the string, you must write two backslashes. Therefore, write @samp{\\&}
-in a string constant to include a literal @samp{&} in the replacement.
-For example, here is how to replace the first @samp{|} on each line with
-an @samp{&}:@refill
-
-@smallexample
-awk '@{ sub(/\|/, "\\&"); print @}'
-@end smallexample
-
-@strong{Note:} as mentioned above, the third argument to @code{sub} must
-be an lvalue. Some versions of @code{awk} allow the third argument to
-be an expression which is not an lvalue. In such a case, @code{sub}
-would still search for the pattern and return 0 or 1, but the result of
-the substitution (if any) would be thrown away because there is no place
-to put it. Such versions of @code{awk} accept expressions like
-this:@refill
-
-@smallexample
-sub(/USA/, "United States", "the USA and Canada")
-@end smallexample
-
-@noindent
-But that is considered erroneous in @code{gawk}.
-
-@item gsub(@var{regexp}, @var{replacement}, @var{target})
-@findex gsub
-This is similar to the @code{sub} function, except @code{gsub} replaces
-@emph{all} of the longest, leftmost, @emph{nonoverlapping} matching
-substrings it can find. The @samp{g} in @code{gsub} stands for
-``global,'' which means replace everywhere. For example:@refill
-
-@smallexample
-awk '@{ gsub(/Britain/, "United Kingdom"); print @}'
-@end smallexample
-
-@noindent
-replaces all occurrences of the string @samp{Britain} with @samp{United
-Kingdom} for all input records.@refill
-
-The @code{gsub} function returns the number of substitutions made. If
-the variable to be searched and altered, @var{target}, is
-omitted, then the entire input record, @code{$0}, is used.@refill
-
-As in @code{sub}, the characters @samp{&} and @samp{\} are special, and
-the third argument must be an lvalue.
-
-@item substr(@var{string}, @var{start}, @var{length})
-@findex substr
-This returns a @var{length}-character-long substring of @var{string},
-starting at character number @var{start}. The first character of a
-string is character number one. For example,
-@code{substr("washington", 5, 3)} returns @code{"ing"}.@refill
-
-If @var{length} is not present, this function returns the whole suffix of
-@var{string} that begins at character number @var{start}. For example,
-@code{substr("washington", 5)} returns @code{"ington"}. This is also
-the case if @var{length} is greater than the number of characters remaining
-in the string, counting from character number @var{start}.
-
-@item tolower(@var{string})
-@findex tolower
-This returns a copy of @var{string}, with each upper-case character
-in the string replaced with its corresponding lower-case character.
-Nonalphabetic characters are left unchanged. For example,
-@code{tolower("MiXeD cAsE 123")} returns @code{"mixed case 123"}.
-
-@item toupper(@var{string})
-@findex toupper
-This returns a copy of @var{string}, with each lower-case character
-in the string replaced with its corresponding upper-case character.
-Nonalphabetic characters are left unchanged. For example,
-@code{toupper("MiXeD cAsE 123")} returns @code{"MIXED CASE 123"}.
-@end table
-
-@node I/O Functions, Time Functions, String Functions, Built-in
-@section Built-in Functions for Input/Output
-
-@table @code
-@item close(@var{filename})
-Close the file @var{filename}, for input or output. The argument may
-alternatively be a shell command that was used for redirecting to or
-from a pipe; then the pipe is closed.
-
-@xref{Close Input, ,Closing Input Files and Pipes}, regarding closing
-input files and pipes. @xref{Close Output, ,Closing Output Files and Pipes},
-regarding closing output files and pipes.@refill
-
-@item system(@var{command})
-@findex system
-@c the following index entry is an overfull hbox. --mew 30jan1992
-@cindex interaction, @code{awk} and other programs
-The system function allows the user to execute operating system commands
-and then return to the @code{awk} program. The @code{system} function
-executes the command given by the string @var{command}. It returns, as
-its value, the status returned by the command that was executed.
-
-For example, if the following fragment of code is put in your @code{awk}
-program:
-
-@smallexample
-END @{
- system("mail -s 'awk run done' operator < /dev/null")
-@}
-@end smallexample
-
-@noindent
-the system operator will be sent mail when the @code{awk} program
-finishes processing input and begins its end-of-input processing.
-
-Note that much the same result can be obtained by redirecting
-@code{print} or @code{printf} into a pipe. However, if your @code{awk}
-program is interactive, @code{system} is useful for cranking up large
-self-contained programs, such as a shell or an editor.@refill
-
-Some operating systems cannot implement the @code{system} function.
-@code{system} causes a fatal error if it is not supported.
-@end table
-
-@c fakenode --- for prepinfo
-@subheading Controlling Output Buffering with @code{system}
-@cindex flushing buffers
-@cindex buffers, flushing
-@cindex buffering output
-@cindex output, buffering
-
-Many utility programs will @dfn{buffer} their output; they save information
-to be written to a disk file or terminal in memory, until there is enough
-to be written in one operation. This is often more efficient than writing
-every little bit of information as soon as it is ready. However, sometimes
-it is necessary to force a program to @dfn{flush} its buffers; that is,
-write the information to its destination, even if a buffer is not full.
-You can do this from your @code{awk} program by calling @code{system}
-with a null string as its argument:
-
-@example
-system("") # flush output
-@end example
-
-@noindent
-@code{gawk} treats this use of the @code{system} function as a special
-case, and is smart enough not to run a shell (or other command
-interpreter) with the empty command. Therefore, with @code{gawk}, this
-idiom is not only useful, it is efficient. While this idiom should work
-with other @code{awk} implementations, it will not necessarily avoid
-starting an unnecessary shell.
-@ignore
-Need a better explanation, perhaps in a separate paragraph. Explain that
-for
-
-awk 'BEGIN { print "hi"
- system("echo hello")
- print "howdy" }'
-
-that the output had better be
-
- hi
- hello
- howdy
-
-and not
-
- hello
- hi
- howdy
-
-which it would be if awk did not flush its buffers before calling system.
-@end ignore
-
-@node Time Functions, , I/O Functions, Built-in
-@section Functions for Dealing with Time Stamps
-
-@cindex time stamps
-@cindex time of day
-A common use for @code{awk} programs is the processing of log files.
-Log files often contain time stamp information, indicating when a
-particular log record was written. Many programs log their time stamp
-in the form returned by the @code{time} system call, which is the
-number of seconds since a particular epoch. On @sc{posix} systems,
-it is the number of seconds since Midnight, January 1, 1970, @sc{utc}.
-
-In order to make it easier to process such log files, and to easily produce
-useful reports, @code{gawk} provides two functions for working with time
-stamps. Both of these are @code{gawk} extensions; they are not specified
-in the @sc{posix} standard, nor are they in any other known version
-of @code{awk}.
-
-@table @code
-@item systime()
-@findex systime
-This function returns the current time as the number of seconds since
-the system epoch. On @sc{posix} systems, this is the number of seconds
-since Midnight, January 1, 1970, @sc{utc}. It may be a different number on
-other systems.
-
-@item strftime(@var{format}, @var{timestamp})
-@findex strftime
-This function returns a string. It is similar to the function of the
-same name in the @sc{ansi} C standard library. The time specified by
-@var{timestamp} is used to produce a string, based on the contents
-of the @var{format} string.
-@end table
-
-The @code{systime} function allows you to compare a time stamp from a
-log file with the current time of day. In particular, it is easy to
-determine how long ago a particular record was logged. It also allows
-you to produce log records using the ``seconds since the epoch'' format.
-
-The @code{strftime} function allows you to easily turn a time stamp
-into human-readable information. It is similar in nature to the @code{sprintf}
-function, copying non-format specification characters verbatim to the
-returned string, and substituting date and time values for format
-specifications in the @var{format} string. If no @var{timestamp} argument
-is supplied, @code{gawk} will use the current time of day as the
-time stamp.@refill
-
-@code{strftime} is guaranteed by the @sc{ansi} C standard to support
-the following date format specifications:
-
-@table @code
-@item %a
-The locale's abbreviated weekday name.
-
-@item %A
-The locale's full weekday name.
-
-@item %b
-The locale's abbreviated month name.
-
-@item %B
-The locale's full month name.
-
-@item %c
-The locale's ``appropriate'' date and time representation.
-
-@item %d
-The day of the month as a decimal number (01--31).
-
-@item %H
-The hour (24-hour clock) as a decimal number (00--23).
-
-@item %I
-The hour (12-hour clock) as a decimal number (01--12).
-
-@item %j
-The day of the year as a decimal number (001--366).
-
-@item %m
-The month as a decimal number (01--12).
-
-@item %M
-The minute as a decimal number (00--59).
-
-@item %p
-The locale's equivalent of the AM/PM designations associated
-with a 12-hour clock.
-
-@item %S
-The second as a decimal number (00--61). (Occasionally there are
-minutes in a year with one or two leap seconds, which is why the
-seconds can go from 0 all the way to 61.)
-
-@item %U
-The week number of the year (the first Sunday as the first day of week 1)
-as a decimal number (00--53).
-
-@item %w
-The weekday as a decimal number (0--6). Sunday is day 0.
-
-@item %W
-The week number of the year (the first Monday as the first day of week 1)
-as a decimal number (00--53).
-
-@item %x
-The locale's ``appropriate'' date representation.
-
-@item %X
-The locale's ``appropriate'' time representation.
-
-@item %y
-The year without century as a decimal number (00--99).
-
-@item %Y
-The year with century as a decimal number.
-
-@item %Z
-The time zone name or abbreviation, or no characters if
-no time zone is determinable.
-
-@item %%
-A literal @samp{%}.
-@end table
-
-@c The parenthetical remark here should really be a footnote, but
-@c it gave formatting problems at the FSF. So for now put it in
-@c parentheses.
-If a conversion specifier is not one of the above, the behavior is
-undefined. (This is because the @sc{ansi} standard for C leaves the
-behavior of the C version of @code{strftime} undefined, and @code{gawk}
-will use the system's version of @code{strftime} if it's there.
-Typically, the conversion specifier will either not appear in the
-returned string, or it will appear literally.)
-
-Informally, a @dfn{locale} is the geographic place in which a program
-is meant to run. For example, a common way to abbreviate the date
-September 4, 1991 in the United States would be ``9/4/91''.
-In many countries in Europe, however, it would be abbreviated ``4.9.91''.
-Thus, the @samp{%x} specification in a @code{"US"} locale might produce
-@samp{9/4/91}, while in a @code{"EUROPE"} locale, it might produce
-@samp{4.9.91}. The @sc{ansi} C standard defines a default @code{"C"}
-locale, which is an environment that is typical of what most C programmers
-are used to.
-
-A public-domain C version of @code{strftime} is shipped with @code{gawk}
-for systems that are not yet fully @sc{ansi}-compliant. If that version is
-used to compile @code{gawk} (@pxref{Installation, ,Installing @code{gawk}}),
-then the following additional format specifications are available:@refill
-
-@table @code
-@item %D
-Equivalent to specifying @samp{%m/%d/%y}.
-
-@item %e
-The day of the month, padded with a blank if it is only one digit.
-
-@item %h
-Equivalent to @samp{%b}, above.
-
-@item %n
-A newline character (ASCII LF).
-
-@item %r
-Equivalent to specifying @samp{%I:%M:%S %p}.
-
-@item %R
-Equivalent to specifying @samp{%H:%M}.
-
-@item %T
-Equivalent to specifying @samp{%H:%M:%S}.
-
-@item %t
-A TAB character.
-
-@item %k
-is replaced by the hour (24-hour clock) as a decimal number (0-23).
-Single digit numbers are padded with a blank.
-
-@item %l
-is replaced by the hour (12-hour clock) as a decimal number (1-12).
-Single digit numbers are padded with a blank.
-
-@item %C
-The century, as a number between 00 and 99.
-
-@item %u
-is replaced by the weekday as a decimal number
-[1 (Monday)--7].
-
-@item %V
-is replaced by the week number of the year (the first Monday as the first
-day of week 1) as a decimal number (01--53).
-The method for determining the week number is as specified by ISO 8601
-(to wit: if the week containing January 1 has four or more days in the
-new year, then it is week 1, otherwise it is week 53 of the previous year
-and the next week is week 1).@refill
-
-@item %Ec %EC %Ex %Ey %EY %Od %Oe %OH %OI
-@itemx %Om %OM %OS %Ou %OU %OV %Ow %OW %Oy
-These are ``alternate representations'' for the specifications
-that use only the second letter (@samp{%c}, @samp{%C}, and so on).
-They are recognized, but their normal representations are used.
-(These facilitate compliance with the @sc{posix} @code{date}
-utility.)@refill
-
-@item %v
-The date in VMS format (e.g. 20-JUN-1991).
-@end table
-
-Here are two examples that use @code{strftime}. The first is an
-@code{awk} version of the C @code{ctime} function. (This is a
-user defined function, which we have not discussed yet.
-@xref{User-defined, ,User-defined Functions}, for more information.)
-
-@smallexample
-# ctime.awk
-#
-# awk version of C ctime(3) function
-
-function ctime(ts, format)
-@{
- format = "%a %b %e %H:%M:%S %Z %Y"
- if (ts == 0)
- ts = systime() # use current time as default
- return strftime(format, ts)
-@}
-@end smallexample
-
-This next example is an @code{awk} implementation of the @sc{posix}
-@code{date} utility. Normally, the @code{date} utility prints the
-current date and time of day in a well known format. However, if you
-provide an argument to it that begins with a @samp{+}, @code{date}
-will copy non-format specifier characters to the standard output, and
-will interpret the current time according to the format specifiers in
-the string. For example:
-
-@smallexample
-date '+Today is %A, %B %d, %Y.'
-@end smallexample
-
-@noindent
-might print
-
-@smallexample
-Today is Thursday, July 11, 1991.
-@end smallexample
-
-Here is the @code{awk} version of the @code{date} utility.
-
-@smallexample
-#! /usr/bin/gawk -f
-#
-# date --- implement the P1003.2 Draft 11 'date' command
-#
-# Bug: does not recognize the -u argument.
-
-BEGIN \
-@{
- format = "%a %b %e %H:%M:%S %Z %Y"
- exitval = 0
-
- if (ARGC > 2)
- exitval = 1
- else if (ARGC == 2) @{
- format = ARGV[1]
- if (format ~ /^\+/)
- format = substr(format, 2) # remove leading +
- @}
- print strftime(format)
- exit exitval
-@}
-@end smallexample
-
-@node User-defined, Built-in Variables, Built-in, Top
-@chapter User-defined Functions
-
-@cindex user-defined functions
-@cindex functions, user-defined
-Complicated @code{awk} programs can often be simplified by defining
-your own functions. User-defined functions can be called just like
-built-in ones (@pxref{Function Calls}), but it is up to you to define
-them---to tell @code{awk} what they should do.
-
-@menu
-* Definition Syntax:: How to write definitions and what they mean.
-* Function Example:: An example function definition and
- what it does.
-* Function Caveats:: Things to watch out for.
-* Return Statement:: Specifying the value a function returns.
-@end menu
-
-@node Definition Syntax, Function Example, User-defined, User-defined
-@section Syntax of Function Definitions
-@cindex defining functions
-@cindex function definition
-
-Definitions of functions can appear anywhere between the rules of the
-@code{awk} program. Thus, the general form of an @code{awk} program is
-extended to include sequences of rules @emph{and} user-defined function
-definitions.
-
-The definition of a function named @var{name} looks like this:
-
-@example
-function @var{name} (@var{parameter-list}) @{
- @var{body-of-function}
-@}
-@end example
-
-@noindent
-@var{name} is the name of the function to be defined. A valid function
-name is like a valid variable name: a sequence of letters, digits and
-underscores, not starting with a digit. Functions share the same pool
-of names as variables and arrays.
-
-@var{parameter-list} is a list of the function's arguments and local
-variable names, separated by commas. When the function is called,
-the argument names are used to hold the argument values given in
-the call. The local variables are initialized to the null string.
-
-The @var{body-of-function} consists of @code{awk} statements. It is the
-most important part of the definition, because it says what the function
-should actually @emph{do}. The argument names exist to give the body a
-way to talk about the arguments; local variables, to give the body
-places to keep temporary values.
-
-Argument names are not distinguished syntactically from local variable
-names; instead, the number of arguments supplied when the function is
-called determines how many argument variables there are. Thus, if three
-argument values are given, the first three names in @var{parameter-list}
-are arguments, and the rest are local variables.
-
-It follows that if the number of arguments is not the same in all calls
-to the function, some of the names in @var{parameter-list} may be
-arguments on some occasions and local variables on others. Another
-way to think of this is that omitted arguments default to the
-null string.
-
-Usually when you write a function you know how many names you intend to
-use for arguments and how many you intend to use as locals. By
-convention, you should write an extra space between the arguments and
-the locals, so other people can follow how your function is
-supposed to be used.
-
-During execution of the function body, the arguments and local variable
-values hide or @dfn{shadow} any variables of the same names used in the
-rest of the program. The shadowed variables are not accessible in the
-function definition, because there is no way to name them while their
-names have been taken away for the local variables. All other variables
-used in the @code{awk} program can be referenced or set normally in the
-function definition.
-
-The arguments and local variables last only as long as the function body
-is executing. Once the body finishes, the shadowed variables come back.
-
-The function body can contain expressions which call functions. They
-can even call this function, either directly or by way of another
-function. When this happens, we say the function is @dfn{recursive}.
-
-There is no need in @code{awk} to put the definition of a function
-before all uses of the function. This is because @code{awk} reads the
-entire program before starting to execute any of it.
-
-In many @code{awk} implementations, the keyword @code{function} may be
-abbreviated @code{func}. However, @sc{posix} only specifies the use of
-the keyword @code{function}. This actually has some practical implications.
-If @code{gawk} is in @sc{posix}-compatibility mode
-(@pxref{Command Line, ,Invoking @code{awk}}), then the following
-statement will @emph{not} define a function:@refill
-
-@example
-func foo() @{ a = sqrt($1) ; print a @}
-@end example
-
-@noindent
-Instead it defines a rule that, for each record, concatenates the value
-of the variable @samp{func} with the return value of the function @samp{foo},
-and based on the truth value of the result, executes the corresponding action.
-This is probably not what was desired. (@code{awk} accepts this input as
-syntactically valid, since functions may be used before they are defined
-in @code{awk} programs.)
-
-@node Function Example, Function Caveats, Definition Syntax, User-defined
-@section Function Definition Example
-
-Here is an example of a user-defined function, called @code{myprint}, that
-takes a number and prints it in a specific format.
-
-@example
-function myprint(num)
-@{
- printf "%6.3g\n", num
-@}
-@end example
-
-@noindent
-To illustrate, here is an @code{awk} rule which uses our @code{myprint}
-function:
-
-@example
-$3 > 0 @{ myprint($3) @}
-@end example
-
-@noindent
-This program prints, in our special format, all the third fields that
-contain a positive number in our input. Therefore, when given:
-
-@example
- 1.2 3.4 5.6 7.8
- 9.10 11.12 -13.14 15.16
-17.18 19.20 21.22 23.24
-@end example
-
-@noindent
-this program, using our function to format the results, prints:
-
-@example
- 5.6
- 21.2
-@end example
-
-Here is a rather contrived example of a recursive function. It prints a
-string backwards:
-
-@example
-function rev (str, len) @{
- if (len == 0) @{
- printf "\n"
- return
- @}
- printf "%c", substr(str, len, 1)
- rev(str, len - 1)
-@}
-@end example
-
-@node Function Caveats, Return Statement, Function Example, User-defined
-@section Calling User-defined Functions
-
-@dfn{Calling a function} means causing the function to run and do its job.
-A function call is an expression, and its value is the value returned by
-the function.
-
-A function call consists of the function name followed by the arguments
-in parentheses. What you write in the call for the arguments are
-@code{awk} expressions; each time the call is executed, these
-expressions are evaluated, and the values are the actual arguments. For
-example, here is a call to @code{foo} with three arguments (the first
-being a string concatenation):
-
-@example
-foo(x y, "lose", 4 * z)
-@end example
-
-@quotation
-@strong{Caution:} whitespace characters (spaces and tabs) are not allowed
-between the function name and the open-parenthesis of the argument list.
-If you write whitespace by mistake, @code{awk} might think that you mean
-to concatenate a variable with an expression in parentheses. However, it
-notices that you used a function name and not a variable name, and reports
-an error.
-@end quotation
-
-@cindex call by value
-When a function is called, it is given a @emph{copy} of the values of
-its arguments. This is called @dfn{call by value}. The caller may use
-a variable as the expression for the argument, but the called function
-does not know this: it only knows what value the argument had. For
-example, if you write this code:
-
-@example
-foo = "bar"
-z = myfunc(foo)
-@end example
-
-@noindent
-then you should not think of the argument to @code{myfunc} as being
-``the variable @code{foo}.'' Instead, think of the argument as the
-string value, @code{"bar"}.
-
-If the function @code{myfunc} alters the values of its local variables,
-this has no effect on any other variables. In particular, if @code{myfunc}
-does this:
-
-@example
-function myfunc (win) @{
- print win
- win = "zzz"
- print win
-@}
-@end example
-
-@noindent
-to change its first argument variable @code{win}, this @emph{does not}
-change the value of @code{foo} in the caller. The role of @code{foo} in
-calling @code{myfunc} ended when its value, @code{"bar"}, was computed.
-If @code{win} also exists outside of @code{myfunc}, the function body
-cannot alter this outer value, because it is shadowed during the
-execution of @code{myfunc} and cannot be seen or changed from there.
-
-@cindex call by reference
-However, when arrays are the parameters to functions, they are @emph{not}
-copied. Instead, the array itself is made available for direct manipulation
-by the function. This is usually called @dfn{call by reference}.
-Changes made to an array parameter inside the body of a function @emph{are}
-visible outside that function.
-@ifinfo
-This can be @strong{very} dangerous if you do not watch what you are
-doing. For example:@refill
-@end ifinfo
-@iftex
-@emph{This can be very dangerous if you do not watch what you are
-doing.} For example:@refill
-@end iftex
-
-@example
-function changeit (array, ind, nvalue) @{
- array[ind] = nvalue
-@}
-
-BEGIN @{
- a[1] = 1 ; a[2] = 2 ; a[3] = 3
- changeit(a, 2, "two")
- printf "a[1] = %s, a[2] = %s, a[3] = %s\n", a[1], a[2], a[3]
- @}
-@end example
-
-@noindent
-prints @samp{a[1] = 1, a[2] = two, a[3] = 3}, because calling
-@code{changeit} stores @code{"two"} in the second element of @code{a}.
-
-@node Return Statement, , Function Caveats, User-defined
-@section The @code{return} Statement
-@cindex @code{return} statement
-
-The body of a user-defined function can contain a @code{return} statement.
-This statement returns control to the rest of the @code{awk} program. It
-can also be used to return a value for use in the rest of the @code{awk}
-program. It looks like this:@refill
-
-@example
-return @var{expression}
-@end example
-
-The @var{expression} part is optional. If it is omitted, then the returned
-value is undefined and, therefore, unpredictable.
-
-A @code{return} statement with no value expression is assumed at the end of
-every function definition. So if control reaches the end of the function
-body, then the function returns an unpredictable value. @code{awk}
-will not warn you if you use the return value of such a function; you will
-simply get unpredictable or unexpected results.
-
-Here is an example of a user-defined function that returns a value
-for the largest number among the elements of an array:@refill
-
-@example
-@group
-function maxelt (vec, i, ret) @{
- for (i in vec) @{
- if (ret == "" || vec[i] > ret)
- ret = vec[i]
- @}
- return ret
-@}
-@end group
-@end example
-
-@noindent
-You call @code{maxelt} with one argument, which is an array name. The local
-variables @code{i} and @code{ret} are not intended to be arguments;
-while there is nothing to stop you from passing two or three arguments
-to @code{maxelt}, the results would be strange. The extra space before
-@code{i} in the function parameter list is to indicate that @code{i} and
-@code{ret} are not supposed to be arguments. This is a convention which
-you should follow when you define functions.
-
-Here is a program that uses our @code{maxelt} function. It loads an
-array, calls @code{maxelt}, and then reports the maximum number in that
-array:@refill
-
-@example
-@group
-awk '
-function maxelt (vec, i, ret) @{
- for (i in vec) @{
- if (ret == "" || vec[i] > ret)
- ret = vec[i]
- @}
- return ret
-@}
-@end group
-
-@group
-# Load all fields of each record into nums.
-@{
- for(i = 1; i <= NF; i++)
- nums[NR, i] = $i
-@}
-
-END @{
- print maxelt(nums)
-@}'
-@end group
-@end example
-
-Given the following input:
-
-@example
-@group
- 1 5 23 8 16
-44 3 5 2 8 26
-256 291 1396 2962 100
--6 467 998 1101
-99385 11 0 225
-@end group
-@end example
-
-@noindent
-our program tells us (predictably) that:
-
-@example
-99385
-@end example
-
-@noindent
-is the largest number in our array.
-
-@node Built-in Variables, Command Line, User-defined, Top
-@chapter Built-in Variables
-@cindex built-in variables
-
-Most @code{awk} variables are available for you to use for your own
-purposes; they never change except when your program assigns values to
-them, and never affect anything except when your program examines them.
-
-A few variables have special built-in meanings. Some of them @code{awk}
-examines automatically, so that they enable you to tell @code{awk} how
-to do certain things. Others are set automatically by @code{awk}, so
-that they carry information from the internal workings of @code{awk} to
-your program.
-
-This chapter documents all the built-in variables of @code{gawk}. Most
-of them are also documented in the chapters where their areas of
-activity are described.
-
-@menu
-* User-modified:: Built-in variables that you change
- to control @code{awk}.
-* Auto-set:: Built-in variables where @code{awk}
- gives you information.
-@end menu
-
-@node User-modified, Auto-set, Built-in Variables, Built-in Variables
-@section Built-in Variables that Control @code{awk}
-@cindex built-in variables, user modifiable
-
-This is a list of the variables which you can change to control how
-@code{awk} does certain things.
-
-@table @code
-@iftex
-@vindex CONVFMT
-@end iftex
-@item CONVFMT
-This string is used by @code{awk} to control conversion of numbers to
-strings (@pxref{Conversion, ,Conversion of Strings and Numbers}).
-It works by being passed, in effect, as the first argument to the
-@code{sprintf} function. Its default value is @code{"%.6g"}.
-@code{CONVFMT} was introduced by the @sc{posix} standard.@refill
-
-@iftex
-@vindex FIELDWIDTHS
-@end iftex
-@item FIELDWIDTHS
-This is a space separated list of columns that tells @code{gawk}
-how to manage input with fixed, columnar boundaries. It is an
-experimental feature that is still evolving. Assigning to @code{FIELDWIDTHS}
-overrides the use of @code{FS} for field splitting.
-@xref{Constant Size, ,Reading Fixed-width Data}, for more information.@refill
-
-If @code{gawk} is in compatibility mode
-(@pxref{Command Line, ,Invoking @code{awk}}), then @code{FIELDWIDTHS}
-has no special meaning, and field splitting operations are done based
-exclusively on the value of @code{FS}.@refill
-
-@iftex
-@vindex FS
-@end iftex
-@item FS
-@code{FS} is the input field separator
-(@pxref{Field Separators, ,Specifying how Fields are Separated}).
-The value is a single-character string or a multi-character regular
-expression that matches the separations between fields in an input
-record.@refill
-
-The default value is @w{@code{" "}}, a string consisting of a single
-space. As a special exception, this value actually means that any
-sequence of spaces and tabs is a single separator. It also causes
-spaces and tabs at the beginning or end of a line to be ignored.
-
-You can set the value of @code{FS} on the command line using the
-@samp{-F} option:
-
-@example
-awk -F, '@var{program}' @var{input-files}
-@end example
-
-If @code{gawk} is using @code{FIELDWIDTHS} for field-splitting,
-assigning a value to @code{FS} will cause @code{gawk} to return to
-the normal, regexp-based, field splitting.
-
-@item IGNORECASE
-@iftex
-@vindex IGNORECASE
-@end iftex
-If @code{IGNORECASE} is nonzero, then @emph{all} regular expression
-matching is done in a case-independent fashion. In particular, regexp
-matching with @samp{~} and @samp{!~}, and the @code{gsub} @code{index},
-@code{match}, @code{split} and @code{sub} functions all ignore case when
-doing their particular regexp operations. @strong{Note:} since field
-splitting with the value of the @code{FS} variable is also a regular
-expression operation, that too is done with case ignored.
-@xref{Case-sensitivity, ,Case-sensitivity in Matching}.
-
-If @code{gawk} is in compatibility mode
-(@pxref{Command Line, ,Invoking @code{awk}}), then @code{IGNORECASE} has
-no special meaning, and regexp operations are always case-sensitive.@refill
-
-@item OFMT
-@iftex
-@vindex OFMT
-@end iftex
-This string is used by @code{awk} to control conversion of numbers to
-strings (@pxref{Conversion, ,Conversion of Strings and Numbers}) for
-printing with the @code{print} statement.
-It works by being passed, in effect, as the first argument to the
-@code{sprintf} function. Its default value is @code{"%.6g"}.
-Earlier versions of @code{awk} also used @code{OFMT} to specify the
-format for converting numbers to strings in general expressions; this
-has been taken over by @code{CONVFMT}.@refill
-
-@item OFS
-@iftex
-@vindex OFS
-@end iftex
-This is the output field separator (@pxref{Output Separators}). It is
-output between the fields output by a @code{print} statement. Its
-default value is @w{@code{" "}}, a string consisting of a single space.
-
-@item ORS
-@iftex
-@vindex ORS
-@end iftex
-This is the output record separator. It is output at the end of every
-@code{print} statement. Its default value is a string containing a
-single newline character, which could be written as @code{"\n"}.
-(@xref{Output Separators}.)@refill
-
-@item RS
-@iftex
-@vindex RS
-@end iftex
-This is @code{awk}'s input record separator. Its default value is a string
-containing a single newline character, which means that an input record
-consists of a single line of text.
-(@xref{Records, ,How Input is Split into Records}.)@refill
-
-@item SUBSEP
-@iftex
-@vindex SUBSEP
-@end iftex
-@code{SUBSEP} is the subscript separator. It has the default value of
-@code{"\034"}, and is used to separate the parts of the name of a
-multi-dimensional array. Thus, if you access @code{foo[12,3]}, it
-really accesses @code{foo["12\0343"]}
-(@pxref{Multi-dimensional, ,Multi-dimensional Arrays}).@refill
-@end table
-
-@node Auto-set, , User-modified, Built-in Variables
-@section Built-in Variables that Convey Information
-
-This is a list of the variables that are set automatically by @code{awk}
-on certain occasions so as to provide information to your program.
-
-@table @code
-@item ARGC
-@itemx ARGV
-@iftex
-@vindex ARGC
-@vindex ARGV
-@end iftex
-The command-line arguments available to @code{awk} programs are stored in
-an array called @code{ARGV}. @code{ARGC} is the number of command-line
-arguments present. @xref{Command Line, ,Invoking @code{awk}}.
-@code{ARGV} is indexed from zero to @w{@code{ARGC - 1}}. For example:@refill
-
-@example
-awk 'BEGIN @{
- for (i = 0; i < ARGC; i++)
- print ARGV[i]
- @}' inventory-shipped BBS-list
-@end example
-
-@noindent
-In this example, @code{ARGV[0]} contains @code{"awk"}, @code{ARGV[1]}
-contains @code{"inventory-shipped"}, and @code{ARGV[2]} contains
-@code{"BBS-list"}. The value of @code{ARGC} is 3, one more than the
-index of the last element in @code{ARGV} since the elements are numbered
-from zero.@refill
-
-The names @code{ARGC} and @code{ARGV}, as well the convention of indexing
-the array from 0 to @w{@code{ARGC - 1}}, are derived from the C language's
-method of accessing command line arguments.@refill
-
-Notice that the @code{awk} program is not entered in @code{ARGV}. The
-other special command line options, with their arguments, are also not
-entered. But variable assignments on the command line @emph{are}
-treated as arguments, and do show up in the @code{ARGV} array.
-
-Your program can alter @code{ARGC} and the elements of @code{ARGV}.
-Each time @code{awk} reaches the end of an input file, it uses the next
-element of @code{ARGV} as the name of the next input file. By storing a
-different string there, your program can change which files are read.
-You can use @code{"-"} to represent the standard input. By storing
-additional elements and incrementing @code{ARGC} you can cause
-additional files to be read.
-
-If you decrease the value of @code{ARGC}, that eliminates input files
-from the end of the list. By recording the old value of @code{ARGC}
-elsewhere, your program can treat the eliminated arguments as
-something other than file names.
-
-To eliminate a file from the middle of the list, store the null string
-(@code{""}) into @code{ARGV} in place of the file's name. As a
-special feature, @code{awk} ignores file names that have been
-replaced with the null string.
-
-@ignore
-see getopt.awk in the examples...
-@end ignore
-
-@item ARGIND
-@vindex ARGIND
-The index in @code{ARGV} of the current file being processed.
-Every time @code{gawk} opens a new data file for processing, it sets
-@code{ARGIND} to the index in @code{ARGV} of the file name. Thus, the
-condition @samp{FILENAME == ARGV[ARGIND]} is always true.
-
-This variable is useful in file processing; it allows you to tell how far
-along you are in the list of data files, and to distinguish between
-multiple successive instances of the same filename on the command line.
-
-While you can change the value of @code{ARGIND} within your @code{awk}
-program, @code{gawk} will automatically set it to a new value when the
-next file is opened.
-
-This variable is a @code{gawk} extension; in other @code{awk} implementations
-it is not special.
-
-@item ENVIRON
-@vindex ENVIRON
-This is an array that contains the values of the environment. The array
-indices are the environment variable names; the values are the values of
-the particular environment variables. For example,
-@code{ENVIRON["HOME"]} might be @file{/u/close}. Changing this array
-does not affect the environment passed on to any programs that
-@code{awk} may spawn via redirection or the @code{system} function.
-(In a future version of @code{gawk}, it may do so.)
-
-Some operating systems may not have environment variables.
-On such systems, the array @code{ENVIRON} is empty.
-
-@item ERRNO
-@iftex
-@vindex ERRNO
-@end iftex
-If a system error occurs either doing a redirection for @code{getline},
-during a read for @code{getline}, or during a @code{close} operation,
-then @code{ERRNO} will contain a string describing the error.
-
-This variable is a @code{gawk} extension; in other @code{awk} implementations
-it is not special.
-
-@item FILENAME
-@iftex
-@vindex FILENAME
-@end iftex
-This is the name of the file that @code{awk} is currently reading.
-If @code{awk} is reading from the standard input (in other words,
-there are no files listed on the command line),
-@code{FILENAME} is set to @code{"-"}.
-@code{FILENAME} is changed each time a new file is read
-(@pxref{Reading Files, ,Reading Input Files}).@refill
-
-@item FNR
-@iftex
-@vindex FNR
-@end iftex
-@code{FNR} is the current record number in the current file. @code{FNR} is
-incremented each time a new record is read
-(@pxref{Getline, ,Explicit Input with @code{getline}}). It is reinitialized
-to 0 each time a new input file is started.@refill
-
-@item NF
-@iftex
-@vindex NF
-@end iftex
-@code{NF} is the number of fields in the current input record.
-@code{NF} is set each time a new record is read, when a new field is
-created, or when @code{$0} changes (@pxref{Fields, ,Examining Fields}).@refill
-
-@item NR
-@iftex
-@vindex NR
-@end iftex
-This is the number of input records @code{awk} has processed since
-the beginning of the program's execution.
-(@pxref{Records, ,How Input is Split into Records}).
-@code{NR} is set each time a new record is read.@refill
-
-@item RLENGTH
-@iftex
-@vindex RLENGTH
-@end iftex
-@code{RLENGTH} is the length of the substring matched by the
-@code{match} function
-(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
-@code{RLENGTH} is set by invoking the @code{match} function. Its value
-is the length of the matched string, or @minus{}1 if no match was found.@refill
-
-@item RSTART
-@iftex
-@vindex RSTART
-@end iftex
-@code{RSTART} is the start-index in characters of the substring matched by the
-@code{match} function
-(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
-@code{RSTART} is set by invoking the @code{match} function. Its value
-is the position of the string where the matched substring starts, or 0
-if no match was found.@refill
-@end table
-
-@node Command Line, Language History, Built-in Variables, Top
-@c node-name, next, previous, up
-@chapter Invoking @code{awk}
-@cindex command line
-@cindex invocation of @code{gawk}
-@cindex arguments, command line
-@cindex options, command line
-@cindex long options
-@cindex options, long
-
-There are two ways to run @code{awk}: with an explicit program, or with
-one or more program files. Here are templates for both of them; items
-enclosed in @samp{@r{[}@dots{}@r{]}} in these templates are optional.
-
-Besides traditional one-letter @sc{posix}-style options, @code{gawk} also
-supports GNU long named options.
-
-@example
-awk @r{[@var{POSIX or GNU style options}]} -f progfile @r{[@code{--}]} @var{file} @dots{}
-awk @r{[@var{POSIX or GNU style options}]} @r{[@code{--}]} '@var{program}' @var{file} @dots{}
-@end example
-
-@menu
-* Options:: Command line options and their meanings.
-* Other Arguments:: Input file names and variable assignments.
-* AWKPATH Variable:: Searching directories for @code{awk} programs.
-* Obsolete:: Obsolete Options and/or features.
-* Undocumented:: Undocumented Options and Features.
-@end menu
-
-@node Options, Other Arguments, Command Line, Command Line
-@section Command Line Options
-
-Options begin with a minus sign, and consist of a single character.
-GNU style long named options consist of two minus signs and
-a keyword that can be abbreviated if the abbreviation allows the option
-to be uniquely identified. If the option takes an argument, then the
-keyword is immediately followed by an equals sign (@samp{=}) and the
-argument's value. For brevity, the discussion below only refers to the
-traditional short options; however the long and short options are
-interchangeable in all contexts.
-
-Each long named option for @code{gawk} has a corresponding
-@sc{posix}-style option. The options and their meanings are as follows:
-
-@table @code
-@item -F @var{fs}
-@itemx --field-separator=@var{fs}
-@iftex
-@cindex @code{-F} option
-@end iftex
-@cindex @code{--field-separator} option
-Sets the @code{FS} variable to @var{fs}
-(@pxref{Field Separators, ,Specifying how Fields are Separated}).@refill
-
-@item -f @var{source-file}
-@itemx --file=@var{source-file}
-@iftex
-@cindex @code{-f} option
-@end iftex
-@cindex @code{--file} option
-Indicates that the @code{awk} program is to be found in @var{source-file}
-instead of in the first non-option argument.
-
-@item -v @var{var}=@var{val}
-@itemx --assign=@var{var}=@var{val}
-@cindex @samp{-v} option
-@cindex @code{--assign} option
-Sets the variable @var{var} to the value @var{val} @emph{before}
-execution of the program begins. Such variable values are available
-inside the @code{BEGIN} rule (see below for a fuller explanation).
-
-The @samp{-v} option can only set one variable, but you can use
-it more than once, setting another variable each time, like this:
-@samp{@w{-v foo=1} @w{-v bar=2}}.
-
-@item -W @var{gawk-opt}
-@cindex @samp{-W} option
-Following the @sc{posix} standard, options that are implementation
-specific are supplied as arguments to the @samp{-W} option. With @code{gawk},
-these arguments may be separated by commas, or quoted and separated by
-whitespace. Case is ignored when processing these options. These options
-also have corresponding GNU style long named options. The following
-@code{gawk}-specific options are available:
-
-@table @code
-@item -W compat
-@itemx --compat
-@cindex @code{--compat} option
-Specifies @dfn{compatibility mode}, in which the GNU extensions in
-@code{gawk} are disabled, so that @code{gawk} behaves just like Unix
-@code{awk}.
-@xref{POSIX/GNU, ,Extensions in @code{gawk} not in POSIX @code{awk}},
-which summarizes the extensions. Also see
-@ref{Compatibility Mode, ,Downward Compatibility and Debugging}.@refill
-
-@item -W copyleft
-@itemx -W copyright
-@itemx --copyleft
-@itemx --copyright
-@cindex @code{--copyleft} option
-@cindex @code{--copyright} option
-Print the short version of the General Public License.
-This option may disappear in a future version of @code{gawk}.
-
-@item -W help
-@itemx -W usage
-@itemx --help
-@itemx --usage
-@cindex @code{--help} option
-@cindex @code{--usage} option
-Print a ``usage'' message summarizing the short and long style options
-that @code{gawk} accepts, and then exit.
-
-@item -W lint
-@itemx --lint
-@cindex @code{--lint} option
-Provide warnings about constructs that are dubious or non-portable to
-other @code{awk} implementations.
-Some warnings are issued when @code{gawk} first reads your program. Others
-are issued at run-time, as your program executes.
-
-@item -W posix
-@itemx --posix
-@cindex @code{--posix} option
-Operate in strict @sc{posix} mode. This disables all @code{gawk}
-extensions (just like @code{-W compat}), and adds the following additional
-restrictions:
-
-@itemize @bullet{}
-@item
-@code{\x} escape sequences are not recognized
-(@pxref{Constants, ,Constant Expressions}).@refill
-
-@item
-The synonym @code{func} for the keyword @code{function} is not
-recognized (@pxref{Definition Syntax, ,Syntax of Function Definitions}).
-
-@item
-The operators @samp{**} and @samp{**=} cannot be used in
-place of @samp{^} and @samp{^=} (@pxref{Arithmetic Ops, ,Arithmetic Operators},
-and also @pxref{Assignment Ops, ,Assignment Expressions}).@refill
-
-@item
-Specifying @samp{-Ft} on the command line does not set the value
-of @code{FS} to be a single tab character
-(@pxref{Field Separators, ,Specifying how Fields are Separated}).@refill
-@end itemize
-
-Although you can supply both @samp{-W compat} and @samp{-W posix} on the
-command line, @samp{-W posix} will take precedence.
-
-@item -W source=@var{program-text}
-@itemx --source=@var{program-text}
-@cindex @code{--source} option
-Program source code is taken from the @var{program-text}. This option
-allows you to mix @code{awk} source code in files with program source
-code that you would enter on the command line. This is particularly useful
-when you have library functions that you wish to use from your command line
-programs (@pxref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable}).
-
-@item -W version
-@itemx --version
-@cindex @code{--version} option
-Prints version information for this particular copy of @code{gawk}.
-This is so you can determine if your copy of @code{gawk} is up to date
-with respect to whatever the Free Software Foundation is currently
-distributing. This option may disappear in a future version of @code{gawk}.
-@end table
-
-@item --
-Signals the end of the command line options. The following arguments
-are not treated as options even if they begin with @samp{-}. This
-interpretation of @samp{--} follows the @sc{posix} argument parsing
-conventions.
-
-This is useful if you have file names that start with @samp{-},
-or in shell scripts, if you have file names that will be specified
-by the user which could start with @samp{-}.
-@end table
-
-Any other options are flagged as invalid with a warning message, but
-are otherwise ignored.
-
-In compatibility mode, as a special case, if the value of @var{fs} supplied
-to the @samp{-F} option is @samp{t}, then @code{FS} is set to the tab
-character (@code{"\t"}). This is only true for @samp{-W compat}, and not
-for @samp{-W posix}
-(@pxref{Field Separators, ,Specifying how Fields are Separated}).@refill
-
-If the @samp{-f} option is @emph{not} used, then the first non-option
-command line argument is expected to be the program text.
-
-The @samp{-f} option may be used more than once on the command line.
-If it is, @code{awk} reads its program source from all of the named files, as
-if they had been concatenated together into one big file. This is
-useful for creating libraries of @code{awk} functions. Useful functions
-can be written once, and then retrieved from a standard place, instead
-of having to be included into each individual program. You can still
-type in a program at the terminal and use library functions, by specifying
-@samp{-f /dev/tty}. @code{awk} will read a file from the terminal
-to use as part of the @code{awk} program. After typing your program,
-type @kbd{Control-d} (the end-of-file character) to terminate it.
-(You may also use @samp{-f -} to read program source from the standard
-input, but then you will not be able to also use the standard input as a
-source of data.)
-
-Because it is clumsy using the standard @code{awk} mechanisms to mix source
-file and command line @code{awk} programs, @code{gawk} provides the
-@samp{--source} option. This does not require you to pre-empt the standard
-input for your source code, and allows you to easily mix command line
-and library source code
-(@pxref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable}).
-
-If no @samp{-f} or @samp{--source} option is specified, then @code{gawk}
-will use the first non-option command line argument as the text of the
-program source code.
-
-@node Other Arguments, AWKPATH Variable, Options, Command Line
-@section Other Command Line Arguments
-
-Any additional arguments on the command line are normally treated as
-input files to be processed in the order specified. However, an
-argument that has the form @code{@var{var}=@var{value}}, means to assign
-the value @var{value} to the variable @var{var}---it does not specify a
-file at all.
-
-@vindex ARGV
-All these arguments are made available to your @code{awk} program in the
-@code{ARGV} array (@pxref{Built-in Variables}). Command line options
-and the program text (if present) are omitted from the @code{ARGV}
-array. All other arguments, including variable assignments, are
-included.
-
-The distinction between file name arguments and variable-assignment
-arguments is made when @code{awk} is about to open the next input file.
-At that point in execution, it checks the ``file name'' to see whether
-it is really a variable assignment; if so, @code{awk} sets the variable
-instead of reading a file.
-
-Therefore, the variables actually receive the specified values after all
-previously specified files have been read. In particular, the values of
-variables assigned in this fashion are @emph{not} available inside a
-@code{BEGIN} rule
-(@pxref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}),
-since such rules are run before @code{awk} begins scanning the argument list.
-The values given on the command line are processed for escape sequences
-(@pxref{Constants, ,Constant Expressions}).@refill
-
-In some earlier implementations of @code{awk}, when a variable assignment
-occurred before any file names, the assignment would happen @emph{before}
-the @code{BEGIN} rule was executed. Some applications came to depend
-upon this ``feature.'' When @code{awk} was changed to be more consistent,
-the @samp{-v} option was added to accommodate applications that depended
-upon this old behavior.
-
-The variable assignment feature is most useful for assigning to variables
-such as @code{RS}, @code{OFS}, and @code{ORS}, which control input and
-output formats, before scanning the data files. It is also useful for
-controlling state if multiple passes are needed over a data file. For
-example:@refill
-
-@cindex multiple passes over data
-@cindex passes, multiple
-@smallexample
-awk 'pass == 1 @{ @var{pass 1 stuff} @}
- pass == 2 @{ @var{pass 2 stuff} @}' pass=1 datafile pass=2 datafile
-@end smallexample
-
-Given the variable assignment feature, the @samp{-F} option is not
-strictly necessary. It remains for historical compatibility.
-
-@node AWKPATH Variable, Obsolete, Other Arguments, Command Line
-@section The @code{AWKPATH} Environment Variable
-@cindex @code{AWKPATH} environment variable
-@cindex search path
-@cindex directory search
-@cindex path, search
-@iftex
-@cindex differences between @code{gawk} and @code{awk}
-@end iftex
-
-The previous section described how @code{awk} program files can be named
-on the command line with the @samp{-f} option. In some @code{awk}
-implementations, you must supply a precise path name for each program
-file, unless the file is in the current directory.
-
-But in @code{gawk}, if the file name supplied in the @samp{-f} option
-does not contain a @samp{/}, then @code{gawk} searches a list of
-directories (called the @dfn{search path}), one by one, looking for a
-file with the specified name.
-
-The search path is actually a string consisting of directory names
-separated by colons. @code{gawk} gets its search path from the
-@code{AWKPATH} environment variable. If that variable does not exist,
-@code{gawk} uses the default path, which is
-@samp{.:/usr/lib/awk:/usr/local/lib/awk}. (Programs written by
-system administrators should use an @code{AWKPATH} variable that
-does not include the current directory, @samp{.}.)@refill
-
-The search path feature is particularly useful for building up libraries
-of useful @code{awk} functions. The library files can be placed in a
-standard directory that is in the default path, and then specified on
-the command line with a short file name. Otherwise, the full file name
-would have to be typed for each file.
-
-By combining the @samp{--source} and @samp{-f} options, your command line
-@code{awk} programs can use facilities in @code{awk} library files.
-
-Path searching is not done if @code{gawk} is in compatibility mode.
-This is true for both @samp{-W compat} and @samp{-W posix}.
-@xref{Options, ,Command Line Options}.
-
-@strong{Note:} if you want files in the current directory to be found,
-you must include the current directory in the path, either by writing
-@file{.} as an entry in the path, or by writing a null entry in the
-path. (A null entry is indicated by starting or ending the path with a
-colon, or by placing two colons next to each other (@samp{::}).) If the
-current directory is not included in the path, then files cannot be
-found in the current directory. This path search mechanism is identical
-to the shell's.
-@c someday, @cite{The Bourne Again Shell}....
-
-@node Obsolete, Undocumented, AWKPATH Variable, Command Line
-@section Obsolete Options and/or Features
-
-@cindex deprecated options
-@cindex obsolete options
-@cindex deprecated features
-@cindex obsolete features
-This section describes features and/or command line options from the
-previous release of @code{gawk} that are either not available in the
-current version, or that are still supported but deprecated (meaning that
-they will @emph{not} be in the next release).
-
-@c update this section for each release!
-
-For version 2.15 of @code{gawk}, the following command line options
-from version 2.11.1 are no longer recognized.
-
-@table @samp
-@ignore
-@item -nostalgia
-Use @samp{-W nostalgia} instead.
-@end ignore
-
-@item -c
-Use @samp{-W compat} instead.
-
-@item -V
-Use @samp{-W version} instead.
-
-@item -C
-Use @samp{-W copyright} instead.
-
-@item -a
-@itemx -e
-These options produce an ``unrecognized option'' error message but have
-no effect on the execution of @code{gawk}. The @sc{posix} standard now
-specifies traditional @code{awk} regular expressions for the @code{awk} utility.
-@end table
-
-The public-domain version of @code{strftime} that is distributed with
-@code{gawk} changed for the 2.14 release. The @samp{%V} conversion specifier
-that used to generate the date in VMS format was changed to @samp{%v}.
-This is because the @sc{posix} standard for the @code{date} utility now
-specifies a @samp{%V} conversion specifier.
-@xref{Time Functions, ,Functions for Dealing with Time Stamps}, for details.
-
-@node Undocumented, , Obsolete, Command Line
-@section Undocumented Options and Features
-
-This section intentionally left blank.
-
-@c Read The Source, Luke!
-
-@ignore
-@c If these came out in the Info file or TeX manual, then they wouldn't
-@c be undocumented, would they?
-
-@code{gawk} has one undocumented option:
-
-@table @samp
-@item -W nostalgia
-Print the message @code{"awk: bailing out near line 1"} and dump core.
-This option was inspired by the common behavior of very early versions of
-Unix @code{awk}, and by a t--shirt.
-@end table
-
-Early versions of @code{awk} used to not require any separator (either
-a newline or @samp{;}) between the rules in @code{awk} programs. Thus,
-it was common to see one-line programs like:
-
-@example
-awk '@{ sum += $1 @} END @{ print sum @}'
-@end example
-
-@code{gawk} actually supports this, but it is purposely undocumented
-since it is considered bad style. The correct way to write such a program
-is either
-
-@example
-awk '@{ sum += $1 @} ; END @{ print sum @}'
-@end example
-
-@noindent
-or
-
-@example
-awk '@{ sum += $1 @}
- END @{ print sum @}' data
-@end example
-
-@noindent
-@xref{Statements/Lines, ,@code{awk} Statements versus Lines}, for a fuller
-explanation.@refill
-
-As an accident of the implementation of the original Unix @code{awk}, if
-a built-in function used @code{$0} as its default argument, it was possible
-to call that function without the parentheses. In particular, it was
-common practice to use the @code{length} function in this fashion.
-For example, the pipeline:
-
-@example
-echo abcdef | awk '@{ print length @}'
-@end example
-
-@noindent
-would print @samp{6}.
-
-For backwards compatibility with old programs, @code{gawk} supports
-this usage, but only for the @code{length} function. New programs should
-@emph{not} call the @code{length} function this way. In particular,
-this usage will not be portable to other @sc{posix} compliant versions
-of @code{awk}. It is also poor style.
-
-@end ignore
-
-@node Language History, Installation, Command Line, Top
-@chapter The Evolution of the @code{awk} Language
-
-This manual describes the GNU implementation of @code{awk}, which is patterned
-after the @sc{posix} specification. Many @code{awk} users are only familiar
-with the original @code{awk} implementation in Version 7 Unix, which is also
-the basis for the version in Berkeley Unix (through 4.3--Reno). This chapter
-briefly describes the evolution of the @code{awk} language.
-
-@menu
-* V7/S5R3.1:: The major changes between V7 and
- System V Release 3.1.
-* S5R4:: Minor changes between System V
- Releases 3.1 and 4.
-* POSIX:: New features from the @sc{posix} standard.
-* POSIX/GNU:: The extensions in @code{gawk}
- not in @sc{posix} @code{awk}.
-@end menu
-
-@node V7/S5R3.1, S5R4, Language History, Language History
-@section Major Changes between V7 and S5R3.1
-
-The @code{awk} language evolved considerably between the release of
-Version 7 Unix (1978) and the new version first made widely available in
-System V Release 3.1 (1987). This section summarizes the changes, with
-cross-references to further details.
-
-@itemize @bullet
-@item
-The requirement for @samp{;} to separate rules on a line
-(@pxref{Statements/Lines, ,@code{awk} Statements versus Lines}).
-
-@item
-User-defined functions, and the @code{return} statement
-(@pxref{User-defined, ,User-defined Functions}).
-
-@item
-The @code{delete} statement (@pxref{Delete, ,The @code{delete} Statement}).
-
-@item
-The @code{do}-@code{while} statement
-(@pxref{Do Statement, ,The @code{do}-@code{while} Statement}).@refill
-
-@item
-The built-in functions @code{atan2}, @code{cos}, @code{sin}, @code{rand} and
-@code{srand} (@pxref{Numeric Functions, ,Numeric Built-in Functions}).
-
-@item
-The built-in functions @code{gsub}, @code{sub}, and @code{match}
-(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
-
-@item
-The built-in functions @code{close}, which closes an open file, and
-@code{system}, which allows the user to execute operating system
-commands (@pxref{I/O Functions, ,Built-in Functions for Input/Output}).@refill
-@c Does the above verbiage prevents an overfull hbox? --mew, rjc 24jan1992
-
-@item
-The @code{ARGC}, @code{ARGV}, @code{FNR}, @code{RLENGTH}, @code{RSTART},
-and @code{SUBSEP} built-in variables (@pxref{Built-in Variables}).
-
-@item
-The conditional expression using the operators @samp{?} and @samp{:}
-(@pxref{Conditional Exp, ,Conditional Expressions}).@refill
-
-@item
-The exponentiation operator @samp{^}
-(@pxref{Arithmetic Ops, ,Arithmetic Operators}) and its assignment operator
-form @samp{^=} (@pxref{Assignment Ops, ,Assignment Expressions}).@refill
-
-@item
-C-compatible operator precedence, which breaks some old @code{awk}
-programs (@pxref{Precedence, ,Operator Precedence (How Operators Nest)}).
-
-@item
-Regexps as the value of @code{FS}
-(@pxref{Field Separators, ,Specifying how Fields are Separated}), and as the
-third argument to the @code{split} function
-(@pxref{String Functions, ,Built-in Functions for String Manipulation}).@refill
-
-@item
-Dynamic regexps as operands of the @samp{~} and @samp{!~} operators
-(@pxref{Regexp Usage, ,How to Use Regular Expressions}).
-
-@item
-Escape sequences (@pxref{Constants, ,Constant Expressions}) in regexps.@refill
-
-@item
-The escape sequences @samp{\b}, @samp{\f}, and @samp{\r}
-(@pxref{Constants, ,Constant Expressions}).
-
-@item
-Redirection of input for the @code{getline} function
-(@pxref{Getline, ,Explicit Input with @code{getline}}).@refill
-
-@item
-Multiple @code{BEGIN} and @code{END} rules
-(@pxref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}).@refill
-
-@item
-Simulated multi-dimensional arrays
-(@pxref{Multi-dimensional, ,Multi-dimensional Arrays}).@refill
-@end itemize
-
-@node S5R4, POSIX, V7/S5R3.1, Language History
-@section Changes between S5R3.1 and S5R4
-
-The System V Release 4 version of Unix @code{awk} added these features
-(some of which originated in @code{gawk}):
-
-@itemize @bullet
-@item
-The @code{ENVIRON} variable (@pxref{Built-in Variables}).
-
-@item
-Multiple @samp{-f} options on the command line
-(@pxref{Command Line, ,Invoking @code{awk}}).@refill
-
-@item
-The @samp{-v} option for assigning variables before program execution begins
-(@pxref{Command Line, ,Invoking @code{awk}}).@refill
-
-@item
-The @samp{--} option for terminating command line options.
-
-@item
-The @samp{\a}, @samp{\v}, and @samp{\x} escape sequences
-(@pxref{Constants, ,Constant Expressions}).@refill
-
-@item
-A defined return value for the @code{srand} built-in function
-(@pxref{Numeric Functions, ,Numeric Built-in Functions}).
-
-@item
-The @code{toupper} and @code{tolower} built-in string functions
-for case translation
-(@pxref{String Functions, ,Built-in Functions for String Manipulation}).@refill
-
-@item
-A cleaner specification for the @samp{%c} format-control letter in the
-@code{printf} function
-(@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing}).@refill
-
-@item
-The ability to dynamically pass the field width and precision (@code{"%*.*d"})
-in the argument list of the @code{printf} function
-(@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing}).@refill
-
-@item
-The use of constant regexps such as @code{/foo/} as expressions, where
-they are equivalent to use of the matching operator, as in @code{$0 ~
-/foo/} (@pxref{Constants, ,Constant Expressions}).
-@end itemize
-
-@node POSIX, POSIX/GNU, S5R4, Language History
-@section Changes between S5R4 and POSIX @code{awk}
-
-The @sc{posix} Command Language and Utilities standard for @code{awk}
-introduced the following changes into the language:
-
-@itemize @bullet{}
-@item
-The use of @samp{-W} for implementation-specific options.
-
-@item
-The use of @code{CONVFMT} for controlling the conversion of numbers
-to strings (@pxref{Conversion, ,Conversion of Strings and Numbers}).
-
-@item
-The concept of a numeric string, and tighter comparison rules to go
-with it (@pxref{Comparison Ops, ,Comparison Expressions}).
-
-@item
-More complete documentation of many of the previously undocumented
-features of the language.
-@end itemize
-
-@node POSIX/GNU, , POSIX, Language History
-@section Extensions in @code{gawk} not in POSIX @code{awk}
-
-The GNU implementation, @code{gawk}, adds these features:
-
-@itemize @bullet
-@item
-The @code{AWKPATH} environment variable for specifying a path search for
-the @samp{-f} command line option
-(@pxref{Command Line, ,Invoking @code{awk}}).@refill
-
-@item
-The various @code{gawk} specific features available via the @samp{-W}
-command line option (@pxref{Command Line, ,Invoking @code{awk}}).
-
-@item
-The @code{ARGIND} variable, that tracks the movement of @code{FILENAME}
-through @code{ARGV}. (@pxref{Built-in Variables}).
-
-@item
-The @code{ERRNO} variable, that contains the system error message when
-@code{getline} returns @minus{}1, or when @code{close} fails.
-(@pxref{Built-in Variables}).
-
-@item
-The @code{IGNORECASE} variable and its effects
-(@pxref{Case-sensitivity, ,Case-sensitivity in Matching}).@refill
-
-@item
-The @code{FIELDWIDTHS} variable and its effects
-(@pxref{Constant Size, ,Reading Fixed-width Data}).@refill
-
-@item
-The @code{next file} statement for skipping to the next data file
-(@pxref{Next File Statement, ,The @code{next file} Statement}).@refill
-
-@item
-The @code{systime} and @code{strftime} built-in functions for obtaining
-and printing time stamps
-(@pxref{Time Functions, ,Functions for Dealing with Time Stamps}).@refill
-
-@item
-The @file{/dev/stdin}, @file{/dev/stdout}, @file{/dev/stderr}, and
-@file{/dev/fd/@var{n}} file name interpretation
-(@pxref{Special Files, ,Standard I/O Streams}).@refill
-
-@item
-The @samp{-W compat} option to turn off these extensions
-(@pxref{Command Line, ,Invoking @code{awk}}).@refill
-
-@item
-The @samp{-W posix} option for full @sc{posix} compliance
-(@pxref{Command Line, ,Invoking @code{awk}}).@refill
-
-@end itemize
-
-@node Installation, Gawk Summary, Language History, Top
-@chapter Installing @code{gawk}
-
-This chapter provides instructions for installing @code{gawk} on the
-various platforms that are supported by the developers. The primary
-developers support Unix (and one day, GNU), while the other ports were
-contributed. The file @file{ACKNOWLEDGMENT} in the @code{gawk}
-distribution lists the electronic mail addresses of the people who did
-the respective ports.@refill
-
-@menu
-* Gawk Distribution:: What is in the @code{gawk} distribution.
-* Unix Installation:: Installing @code{gawk} under various versions
- of Unix.
-* VMS Installation:: Installing @code{gawk} on VMS.
-* MS-DOS Installation:: Installing @code{gawk} on MS-DOS.
-* Atari Installation:: Installing @code{gawk} on the Atari ST.
-@end menu
-
-@node Gawk Distribution, Unix Installation, Installation, Installation
-@section The @code{gawk} Distribution
-
-This section first describes how to get and extract the @code{gawk}
-distribution, and then discusses what is in the various files and
-subdirectories.
-
-@menu
-* Extracting:: How to get and extract the distribution.
-* Distribution contents:: What is in the distribution.
-@end menu
-
-@node Extracting, Distribution contents, Gawk Distribution, Gawk Distribution
-@subsection Getting the @code{gawk} Distribution
-
-@cindex getting gawk
-@cindex anonymous ftp
-@cindex anonymous uucp
-@cindex ftp, anonymous
-@cindex uucp, anonymous
-@code{gawk} is distributed as a @code{tar} file compressed with the
-GNU Zip program, @code{gzip}. You can
-get it via anonymous @code{ftp} to the Internet host @code{prep.ai.mit.edu}.
-Like all GNU software, it will be archived at other well known systems,
-from which it will be possible to use some sort of anonymous @code{uucp} to
-obtain the distribution as well.
-You can also order @code{gawk} on tape or CD-ROM directly from the
-Free Software Foundation. (The address is on the copyright page.)
-Doing so directly contributes to the support of the foundation and to
-the production of more free software.
-
-Once you have the distribution (for example,
-@file{gawk-2.15.0.tar.z}), first use @code{gzip} to expand the
-file, and then use @code{tar} to extract it. You can use the following
-pipeline to produce the @code{gawk} distribution:
-
-@example
-# Under System V, add 'o' to the tar flags
-gzip -d -c gawk-2.15.0.tar.z | tar -xvpf -
-@end example
-
-@noindent
-This will create a directory named @file{gawk-2.15} in the current
-directory.
-
-The distribution file name is of the form @file{gawk-2.15.@var{n}.tar.Z}.
-The @var{n} represents a @dfn{patchlevel}, meaning that minor bugs have
-been fixed in the major release. The current patchlevel is 0, but when
-retrieving distributions, you should get the version with the highest
-patchlevel.@refill
-
-If you are not on a Unix system, you will need to make other arrangements
-for getting and extracting the @code{gawk} distribution. You should consult
-a local expert.
-
-@node Distribution contents, , Extracting, Gawk Distribution
-@subsection Contents of the @code{gawk} Distribution
-
-@code{gawk} has a number of C source files, documentation files,
-subdirectories and files related to the configuration process
-(@pxref{Unix Installation, ,Compiling and Installing @code{gawk} on Unix}),
-and several subdirectories related to different, non-Unix,
-operating systems.@refill
-
-@table @asis
-@item various @samp{.c}, @samp{.y}, and @samp{.h} files
-
-The C and YACC source files are the actual @code{gawk} source code.
-@end table
-
-@table @file
-@item README
-@itemx README.VMS
-@itemx README.dos
-@itemx README.rs6000
-@itemx README.ultrix
-Descriptive files: @file{README} for @code{gawk} under Unix, and the
-rest for the various hardware and software combinations.
-
-@item PORTS
-A list of systems to which @code{gawk} has been ported, and which
-have successfully run the test suite.
-
-@item ACKNOWLEDGMENT
-A list of the people who contributed major parts of the code or documentation.
-
-@item NEWS
-A list of changes to @code{gawk} since the last release or patch.
-
-@item COPYING
-The GNU General Public License.
-
-@item FUTURES
-A brief list of features and/or changes being contemplated for future
-releases, with some indication of the time frame for the feature, based
-on its difficulty.
-
-@item LIMITATIONS
-A list of those factors that limit @code{gawk}'s performance.
-Most of these depend on the hardware or operating system software, and
-are not limits in @code{gawk} itself.@refill
-
-@item PROBLEMS
-A file describing known problems with the current release.
-
-@item gawk.1
-The @code{troff} source for a manual page describing @code{gawk}.
-
-@item gawk.texinfo
-@ifinfo
-The @code{texinfo} source file for this Info file.
-It should be processed with @TeX{} to produce a printed manual, and
-with @code{makeinfo} to produce the Info file.@refill
-@end ifinfo
-@iftex
-The @code{texinfo} source file for this manual.
-It should be processed with @TeX{} to produce a printed manual, and
-with @code{makeinfo} to produce the Info file.@refill
-@end iftex
-
-@item Makefile.in
-@itemx config
-@itemx config.in
-@itemx configure
-@itemx missing
-@itemx mungeconf
-These files and subdirectories are used when configuring @code{gawk}
-for various Unix systems. They are explained in detail in
-@ref{Unix Installation, ,Compiling and Installing @code{gawk} on Unix}.@refill
-
-@item atari
-Files needed for building @code{gawk} on an Atari ST.
-@xref{Atari Installation, ,Installing @code{gawk} on the Atari ST}, for details.
-
-@item pc
-Files needed for building @code{gawk} under MS-DOS.
-@xref{MS-DOS Installation, ,Installing @code{gawk} on MS-DOS}, for details.
-
-@item vms
-Files needed for building @code{gawk} under VMS.
-@xref{VMS Installation, ,Compiling Installing and Running @code{gawk} on VMS}, for details.
-
-@item test
-Many interesting @code{awk} programs, provided as a test suite for
-@code{gawk}. You can use @samp{make test} from the top level @code{gawk}
-directory to run your version of @code{gawk} against the test suite.
-@c There are many programs here that are useful in their own right.
-If @code{gawk} successfully passes @samp{make test} then you can
-be confident of a successful port.@refill
-@end table
-
-@node Unix Installation, VMS Installation, Gawk Distribution, Installation
-@section Compiling and Installing @code{gawk} on Unix
-
-Often, you can compile and install @code{gawk} by typing only two
-commands. However, if you do not use a supported system, you may need
-to configure @code{gawk} for your system yourself.
-
-@menu
-* Quick Installation:: Compiling @code{gawk} on a
- supported Unix version.
-* Configuration Philosophy:: How it's all supposed to work.
-* New Configurations:: What to do if there is no supplied
- configuration for your system.
-@end menu
-
-@node Quick Installation, Configuration Philosophy, Unix Installation, Unix Installation
-@subsection Compiling @code{gawk} for a Supported Unix Version
-
-@cindex installation, unix
-After you have extracted the @code{gawk} distribution, @code{cd}
-to @file{gawk-2.15}. Look in the @file{config} subdirectory for a
-file that matches your hardware/software combination. In general,
-only the software is relevant; for example @code{sunos41} is used
-for SunOS 4.1, on both Sun 3 and Sun 4 hardware.@refill
-
-If you find such a file, run the command:
-
-@example
-# assume you have SunOS 4.1
-./configure sunos41
-@end example
-
-This produces a @file{Makefile} and @file{config.h} tailored to your
-system. You may wish to edit the @file{Makefile} to use a different
-C compiler, such as @code{gcc}, the GNU C compiler, if you have it.
-You may also wish to change the @code{CFLAGS} variable, which controls
-the command line options that are passed to the C compiler (such as
-optimization levels, or compiling for debugging).@refill
-
-After you have configured @file{Makefile} and @file{config.h}, type:
-
-@example
-make
-@end example
-
-@noindent
-and shortly thereafter, you should have an executable version of @code{gawk}.
-That's all there is to it!
-
-@node Configuration Philosophy, New Configurations, Quick Installation, Unix Installation
-@subsection The Configuration Process
-
-(This section is of interest only if you know something about using the
-C language and the Unix operating system.)
-
-The source code for @code{gawk} generally attempts to adhere to industry
-standards wherever possible. This means that @code{gawk} uses library
-routines that are specified by the @sc{ansi} C standard and by the @sc{posix}
-operating system interface standard. When using an @sc{ansi} C compiler,
-function prototypes are provided to help improve the compile-time checking.
-
-Many older Unix systems do not support all of either the @sc{ansi} or the
-@sc{posix} standards. The @file{missing} subdirectory in the @code{gawk}
-distribution contains replacement versions of those subroutines that are
-most likely to be missing.
-
-The @file{config.h} file that is created by the @code{configure} program
-contains definitions that describe features of the particular operating
-system where you are attempting to compile @code{gawk}. For the most
-part, it lists which standard subroutines are @emph{not} available.
-For example, if your system lacks the @samp{getopt} routine, then
-@samp{GETOPT_MISSING} would be defined.
-
-@file{config.h} also defines constants that describe facts about your
-variant of Unix. For example, there may not be an @samp{st_blksize}
-element in the @code{stat} structure. In this case @samp{BLKSIZE_MISSING}
-would be defined.
-
-Based on the list in @file{config.h} of standard subroutines that are
-missing, @file{missing.c} will do a @samp{#include} of the appropriate
-file(s) from the @file{missing} subdirectory.@refill
-
-Conditionally compiled code in the other source files relies on the
-other definitions in the @file{config.h} file.
-
-Besides creating @file{config.h}, @code{configure} produces a @file{Makefile}
-from @file{Makefile.in}. There are a number of lines in @file{Makefile.in}
-that are system or feature specific. For example, there is line that begins
-with @samp{##MAKE_ALLOCA_C##}. This is normally a comment line, since
-it starts with @samp{#}. If a configuration file has @samp{MAKE_ALLOCA_C}
-in it, then @code{configure} will delete the @samp{##MAKE_ALLOCA_C##}
-from the beginning of the line. This will enable the rules in the
-@file{Makefile} that use a C version of @samp{alloca}. There are several
-similar features that work in this fashion.@refill
-
-@node New Configurations, , Configuration Philosophy, Unix Installation
-@subsection Configuring @code{gawk} for a New System
-
-(This section is of interest only if you know something about using the
-C language and the Unix operating system, and if you have to install
-@code{gawk} on a system that is not supported by the @code{gawk} distribution.
-If you are a C or Unix novice, get help from a local expert.)
-
-If you need to configure @code{gawk} for a Unix system that is not
-supported in the distribution, first see
-@ref{Configuration Philosophy, ,The Configuration Process}.
-Then, copy @file{config.in} to @file{config.h}, and copy
-@file{Makefile.in} to @file{Makefile}.@refill
-
-Next, edit both files. Both files are liberally commented, and the
-necessary changes should be straightforward.
-
-While editing @file{config.h}, you need to determine what library
-routines you do or do not have by consulting your system documentation, or
-by perusing your actual libraries using the @code{ar} or @code{nm} utilities.
-In the worst case, simply do not define @emph{any} of the macros for missing
-subroutines. When you compile @code{gawk}, the final link-editing step
-will fail. The link editor will provide you with a list of unresolved external
-references---these are the missing subroutines. Edit @file{config.h} again
-and recompile, and you should be set.@refill
-
-Editing the @file{Makefile} should also be straightforward. Enable or
-disable the lines that begin with @samp{##MAKE_@var{whatever}##}, as
-appropriate. Select the correct C compiler and @code{CFLAGS} for it.
-Then run @code{make}.
-
-Getting a correct configuration is likely to be an iterative process.
-Do not be discouraged if it takes you several tries. If you have no
-luck whatsoever, please report your system type, and the steps you took.
-Once you do have a working configuration, please send it to the maintainers
-so that support for your system can be added to the official release.
-
-@xref{Bugs, ,Reporting Problems and Bugs}, for information on how to report
-problems in configuring @code{gawk}. You may also use the same mechanisms
-for sending in new configurations.@refill
-
-@node VMS Installation, MS-DOS Installation, Unix Installation, Installation
-@section Compiling, Installing, and Running @code{gawk} on VMS
-
-@c based on material from
-@c Pat Rankin <rankin@eql.caltech.edu>
-
-@cindex installation, vms
-This section describes how to compile and install @code{gawk} under VMS.
-
-@menu
-* VMS Compilation:: How to compile @code{gawk} under VMS.
-* VMS Installation Details:: How to install @code{gawk} under VMS.
-* VMS Running:: How to run @code{gawk} under VMS.
-* VMS POSIX:: Alternate instructions for VMS POSIX.
-@end menu
-
-@node VMS Compilation, VMS Installation Details, VMS Installation, VMS Installation
-@subsection Compiling @code{gawk} under VMS
-
-To compile @code{gawk} under VMS, there is a @code{DCL} command procedure that
-will issue all the necessary @code{CC} and @code{LINK} commands, and there is
-also a @file{Makefile} for use with the @code{MMS} utility. From the source
-directory, use either
-
-@smallexample
-$ @@[.VMS]VMSBUILD.COM
-@end smallexample
-
-@noindent
-or
-
-@smallexample
-$ MMS/DESCRIPTION=[.VMS]DECSRIP.MMS GAWK
-@end smallexample
-
-Depending upon which C compiler you are using, follow one of the sets
-of instructions in this table:
-
-@table @asis
-@item VAX C V3.x
-Use either @file{vmsbuild.com} or @file{descrip.mms} as is. These use
-@code{CC/OPTIMIZE=NOLINE}, which is essential for Version 3.0.
-
-@item VAX C V2.x
-You must have Version 2.3 or 2.4; older ones won't work. Edit either
-@file{vmsbuild.com} or @file{descrip.mms} according to the comments in them.
-For @file{vmsbuild.com}, this just entails removing two @samp{!} delimiters.
-Also edit @file{config.h} (which is a copy of file @file{[.config]vms-conf.h})
-and comment out or delete the two lines @samp{#define __STDC__ 0} and
-@samp{#define VAXC_BUILTINS} near the end.@refill
-
-@item GNU C
-Edit @file{vmsbuild.com} or @file{descrip.mms}; the changes are different
-from those for VAX C V2.x, but equally straightforward. No changes to
-@file{config.h} should be needed.
-
-@item DEC C
-Edit @file{vmsbuild.com} or @file{descrip.mms} according to their comments.
-No changes to @file{config.h} should be needed.
-@end table
-
-@code{gawk} 2.15 has been tested under VAX/VMS 5.5-1 using VAX C V3.2,
-GNU C 1.40 and 2.3. It should work without modifications for VMS V4.6 and up.
-
-@node VMS Installation Details, VMS Running, VMS Compilation, VMS Installation
-@subsection Installing @code{gawk} on VMS
-
-To install @code{gawk}, all you need is a ``foreign'' command, which is
-a @code{DCL} symbol whose value begins with a dollar sign.
-
-@smallexample
-$ GAWK :== $device:[directory]GAWK
-@end smallexample
-
-@noindent
-(Substitute the actual location of @code{gawk.exe} for
-@samp{device:[directory]}.) The symbol should be placed in the
-@file{login.com} of any user who wishes to run @code{gawk},
-so that it will be defined every time the user logs on.
-Alternatively, the symbol may be placed in the system-wide
-@file{sylogin.com} procedure, which will allow all users
-to run @code{gawk}.@refill
-
-Optionally, the help entry can be loaded into a VMS help library:
-
-@smallexample
-$ LIBRARY/HELP SYS$HELP:HELPLIB [.VMS]GAWK.HLP
-@end smallexample
-
-@noindent
-(You may want to substitute a site-specific help library rather than
-the standard VMS library @samp{HELPLIB}.) After loading the help text,
-
-@c this is so tiny, but `should' be smallexample for consistency sake...
-@c I didn't because it was so short. --mew 29jan1992
-@example
-$ HELP GAWK
-@end example
-
-@noindent
-will provide information about both the @code{gawk} implementation and the
-@code{awk} programming language.
-
-The logical name @samp{AWK_LIBRARY} can designate a default location
-for @code{awk} program files. For the @samp{-f} option, if the specified
-filename has no device or directory path information in it, @code{gawk}
-will look in the current directory first, then in the directory specified
-by the translation of @samp{AWK_LIBRARY} if the file was not found.
-If after searching in both directories, the file still is not found,
-then @code{gawk} appends the suffix @samp{.awk} to the filename and the
-file search will be re-tried. If @samp{AWK_LIBRARY} is not defined, that
-portion of the file search will fail benignly.@refill
-
-@node VMS Running, VMS POSIX, VMS Installation Details, VMS Installation
-@subsection Running @code{gawk} on VMS
-
-Command line parsing and quoting conventions are significantly different
-on VMS, so examples in this manual or from other sources often need minor
-changes. They @emph{are} minor though, and all @code{awk} programs
-should run correctly.
-
-Here are a couple of trivial tests:
-
-@smallexample
-$ gawk -- "BEGIN @{print ""Hello, World!""@}"
-$ gawk -"W" version ! could also be -"W version" or "-W version"
-@end smallexample
-
-@noindent
-Note that upper-case and mixed-case text must be quoted.
-
-The VMS port of @code{gawk} includes a @code{DCL}-style interface in addition
-to the original shell-style interface (see the help entry for details).
-One side-effect of dual command line parsing is that if there is only a
-single parameter (as in the quoted string program above), the command
-becomes ambiguous. To work around this, the normally optional @samp{--}
-flag is required to force Unix style rather than @code{DCL} parsing. If any
-other dash-type options (or multiple parameters such as data files to be
-processed) are present, there is no ambiguity and @samp{--} can be omitted.
-
-The default search path when looking for @code{awk} program files specified
-by the @samp{-f} option is @code{"SYS$DISK:[],AWK_LIBRARY:"}. The logical
-name @samp{AWKPATH} can be used to override this default. The format
-of @samp{AWKPATH} is a comma-separated list of directory specifications.
-When defining it, the value should be quoted so that it retains a single
-translation, and not a multi-translation @code{RMS} searchlist.
-
-@node VMS POSIX, , VMS Running, VMS Installation
-@subsection Building and using @code{gawk} under VMS POSIX
-
-Ignore the instructions above, although @file{vms/gawk.hlp} should still
-be made available in a help library. Make sure that the two scripts,
-@file{configure} and @file{mungeconf}, are executable; use @samp{chmod +x}
-on them if necessary. Then execute the following commands:
-
-@smallexample
-$ POSIX
-psx> configure vms-posix
-psx> make awktab.c gawk
-@end smallexample
-
-@noindent
-The first command will construct files @file{config.h} and @file{Makefile}
-out of templates. The second command will compile and link @code{gawk}.
-Due to a @code{make} bug in VMS POSIX V1.0 and V1.1,
-the file @file{awktab.c} must be given as an explicit target or it will
-not be built and the final link step will fail. Ignore the warning
-@samp{"Could not find lib m in lib list"}; it is harmless, caused by the
-explicit use of @samp{-lm} as a linker option which is not needed
-under VMS POSIX. Under V1.1 (but not V1.0) a problem with the @code{yacc}
-skeleton @file{/etc/yyparse.c} will cause a compiler warning for
-@file{awktab.c}, followed by a linker warning about compilation warnings
-in the resulting object module. These warnings can be ignored.@refill
-
-Once built, @code{gawk} will work like any other shell utility. Unlike
-the normal VMS port of @code{gawk}, no special command line manipulation is
-needed in the VMS POSIX environment.
-
-@node MS-DOS Installation, Atari Installation, VMS Installation, Installation
-@section Installing @code{gawk} on MS-DOS
-
-@cindex installation, ms-dos
-The first step is to get all the files in the @code{gawk} distribution
-onto your PC. Move all the files from the @file{pc} directory into
-the main directory where the other files are. Edit the file
-@file{make.bat} so that it will be an acceptable MS-DOS batch file.
-This means making sure that all lines are terminated with the ASCII
-carriage return and line feed characters.
-restrictions.
-
-@code{gawk} has only been compiled with version 5.1 of the Microsoft
-C compiler. The file @file{make.bat} from the @file{pc} directory
-assumes that you have this compiler.
-
-Copy the file @file{setargv.obj} from the library directory where it
-resides to the @code{gawk} source code directory.
-
-Run @file{make.bat}. This will compile @code{gawk} for you, and link it.
-That's all there is to it!
-
-@node Atari Installation, , MS-DOS Installation, Installation
-@section Installing @code{gawk} on the Atari ST
-
-@c based on material from
-@c Michal Jaegermann <ntomczak@vm.ucs.ualberta.ca>
-
-@cindex installation, atari
-This section assumes that you are running TOS. It applies to other Atari
-models (STe, TT) as well.
-
-In order to use @code{gawk}, you need to have a shell, either text or
-graphics, that does not map all the characters of a command line to
-upper case. Maintaining case distinction in option flags is very
-important (@pxref{Command Line, ,Invoking @code{awk}}). Popular shells
-like @code{gulam} or @code{gemini} will work, as will newer versions of
-@code{desktop}. Support for I/O redirection is necessary to make it easy
-to import @code{awk} programs from other environments. Pipes are nice to have,
-but not vital.
-
-If you have received an executable version of @code{gawk}, place it,
-as usual, anywhere in your @code{PATH} where your shell will find it.
-
-While executing, @code{gawk} creates a number of temporary files.
-@code{gawk} looks for either of the environment variables @code{TEMP}
-or @code{TMPDIR}, in that order. If either one is found, its value
-is assumed to be a directory for temporary files. This directory
-must exist, and if you can spare the memory, it is a good idea to
-put it on a @sc{ram} drive. If neither @code{TEMP} nor @code{TMPDIR}
-are found, then @code{gawk} uses the current directory for its
-temporary files.
-
-The ST version of @code{gawk} searches for its program files as
-described in @ref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable}.
-On the ST, the default value for the @code{AWKPATH} variable is
-@code{@w{".,c:\lib\awk,c:\gnu\lib\awk"}}.
-The search path can be modified by explicitly setting @code{AWKPATH} to
-whatever you wish. Note that colons cannot be used on the ST to separate
-elements in the @code{AWKPATH} variable, since they have another, reserved,
-meaning. Instead, you must use a comma to separate elements in the path.
-If you are recompiling @code{gawk} on the ST, then you can choose a new
-default search path, by setting the value of @samp{DEFPATH} in the file
-@file{...\config\atari}. You may choose a different separator character
-by setting the value of @samp{ENVSEP} in the same file. The new values will
-be used when creating the header file @file{config.h}.@refill
-
-@ignore
-As a last resort, small
-adjustments can be made directly on the executable version of @code{gawk}
-using a binary editor.@refill
-@end ignore
-
-Although @code{awk} allows great flexibility in doing I/O redirections
-from within a program, this facility should be used with care on the ST.
-In some circumstances the OS routines for file handle pool processing
-lose track of certain events, causing the computer to crash, and requiring
-a reboot. Often a warm reboot is sufficient. Fortunately, this happens
-infrequently, and in rather esoteric situations. In particular, avoid
-having one part of an @code{awk} program using @code{print}
-statements explicitly redirected to @code{"/dev/stdout"}, while other
-@code{print} statements use the default standard output, and a
-calling shell has redirected standard output to a file.@refill
-@c whew!
-
-When @code{gawk} is compiled with the ST version of @code{gcc} and its
-usual libraries, it will accept both @samp{/} and @samp{\} as path separators.
-While this is convenient, it should be remembered that this removes one,
-technically legal, character (@samp{/}) from your file names, and that
-it may create problems for external programs, called via the @code{system()}
-function, which may not support this convention. Whenever it is possible
-that a file created by @code{gawk} will be used by some other program,
-use only backslashes. Also remember that in @code{awk}, backslashes in
-strings have to be doubled in order to get literal backslashes.
-
-The initial port of @code{gawk} to the ST was done with @code{gcc}.
-If you wish to recompile @code{gawk} from scratch, you will need to use
-a compiler that accepts @sc{ansi} standard C (such as @code{gcc}, Turbo C,
-or Prospero C). If @code{sizeof(int) != @w{sizeof(int *)}}, the correctness
-of the generated code depends heavily on the fact that all function calls
-have function prototypes in the current scope. If your compiler does
-not accept function prototypes, you will probably have to add a
-number of casts to the code.@refill
-
-If you are using @code{gcc}, make sure that you have up-to-date libraries.
-Older versions have problems with some library functions (@code{atan2()},
-@code{strftime()}, the @samp{%g} conversion in @code{sprintf()}) which
-may affect the operation of @code{gawk}.
-
-In the @file{atari} subdirectory of the @code{gawk} distribution is
-a version of the @code{system()} function that has been tested with
-@code{gulam} and @code{msh}; it should work with other shells as well.
-With @code{gulam}, it passes the string to be executed without spawning
-an extra copy of a shell. It is possible to replace this version of
-@code{system()} with a similar function from a library or from some other
-source if that version would be a better choice for the shell you prefer.
-
-The files needed to recompile @code{gawk} on the ST can be found in
-the @file{atari} directory. The provided files and instructions below
-assume that you have the GNU C compiler (@code{gcc}), the @code{gulam} shell,
-and an ST version of @code{sed}. The @file{Makefile} is set up to use
-@file{byacc} as a @file{yacc} replacement. With a different set of tools some
-adjustments and/or editing will be needed.@refill
-
-@code{cd} to the @file{atari} directory. Copy @file{Makefile.st} to
-@file{makefile} in the source (parent) directory. Possibly adjust
-@file{../config/atari} to suit your system. Execute the script @file{mkconf.g}
-which will create the header file @file{../config.h}. Go back to the source
-directory. If you are not using @code{gcc}, check the file @file{missing.c}.
-It may be necessary to change forward slashes in the references to files
-from the @file{atari} subdirectory into backslashes. Type @code{make} and
-enjoy.@refill
-
-Compilation with @code{gcc} of some of the bigger modules, like
-@file{awk_tab.c}, may require a full four megabytes of memory. On smaller
-machines you would need to cut down on optimizations, or you would have to
-switch to another, less memory hungry, compiler.@refill
-
-@node Gawk Summary, Sample Program, Installation, Top
-@appendix @code{gawk} Summary
-
-This appendix provides a brief summary of the @code{gawk} command line and the
-@code{awk} language. It is designed to serve as ``quick reference.'' It is
-therefore terse, but complete.
-
-@menu
-* Command Line Summary:: Recapitulation of the command line.
-* Language Summary:: A terse review of the language.
-* Variables/Fields:: Variables, fields, and arrays.
-* Rules Summary:: Patterns and Actions, and their
- component parts.
-* Functions Summary:: Defining and calling functions.
-* Historical Features:: Some undocumented but supported ``features''.
-@end menu
-
-@node Command Line Summary, Language Summary, Gawk Summary, Gawk Summary
-@appendixsec Command Line Options Summary
-
-The command line consists of options to @code{gawk} itself, the
-@code{awk} program text (if not supplied via the @samp{-f} option), and
-values to be made available in the @code{ARGC} and @code{ARGV}
-predefined @code{awk} variables:
-
-@example
-awk @r{[@var{POSIX or GNU style options}]} -f source-file @r{[@code{--}]} @var{file} @dots{}
-awk @r{[@var{POSIX or GNU style options}]} @r{[@code{--}]} '@var{program}' @var{file} @dots{}
-@end example
-
-The options that @code{gawk} accepts are:
-
-@table @code
-@item -F @var{fs}
-@itemx --field-separator=@var{fs}
-Use @var{fs} for the input field separator (the value of the @code{FS}
-predefined variable).
-
-@item -f @var{program-file}
-@itemx --file=@var{program-file}
-Read the @code{awk} program source from the file @var{program-file}, instead
-of from the first command line argument.
-
-@item -v @var{var}=@var{val}
-@itemx --assign=@var{var}=@var{val}
-Assign the variable @var{var} the value @var{val} before program execution
-begins.
-
-@item -W compat
-@itemx --compat
-Specifies compatibility mode, in which @code{gawk} extensions are turned
-off.
-
-@item -W copyleft
-@itemx -W copyright
-@itemx --copyleft
-@itemx --copyright
-Print the short version of the General Public License on the error
-output. This option may disappear in a future version of @code{gawk}.
-
-@item -W help
-@itemx -W usage
-@itemx --help
-@itemx --usage
-Print a relatively short summary of the available options on the error output.
-
-@item -W lint
-@itemx --lint
-Give warnings about dubious or non-portable @code{awk} constructs.
-
-@item -W posix
-@itemx --posix
-Specifies @sc{posix} compatibility mode, in which @code{gawk} extensions
-are turned off and additional restrictions apply.
-
-@item -W source=@var{program-text}
-@itemx --source=@var{program-text}
-Use @var{program-text} as @code{awk} program source code. This option allows
-mixing command line source code with source code from files, and is
-particularly useful for mixing command line programs with library functions.
-
-@item -W version
-@itemx --version
-Print version information for this particular copy of @code{gawk} on the error
-output. This option may disappear in a future version of @code{gawk}.
-
-@item --
-Signal the end of options. This is useful to allow further arguments to the
-@code{awk} program itself to start with a @samp{-}. This is mainly for
-consistency with the argument parsing conventions of @sc{posix}.
-@end table
-
-Any other options are flagged as invalid, but are otherwise ignored.
-@xref{Command Line, ,Invoking @code{awk}}, for more details.
-
-@node Language Summary, Variables/Fields, Command Line Summary, Gawk Summary
-@appendixsec Language Summary
-
-An @code{awk} program consists of a sequence of pattern-action statements
-and optional function definitions.
-
-@example
-@var{pattern} @{ @var{action statements} @}
-
-function @var{name}(@var{parameter list}) @{ @var{action statements} @}
-@end example
-
-@code{gawk} first reads the program source from the
-@var{program-file}(s) if specified, or from the first non-option
-argument on the command line. The @samp{-f} option may be used multiple
-times on the command line. @code{gawk} reads the program text from all
-the @var{program-file} files, effectively concatenating them in the
-order they are specified. This is useful for building libraries of
-@code{awk} functions, without having to include them in each new
-@code{awk} program that uses them. To use a library function in a file
-from a program typed in on the command line, specify @samp{-f /dev/tty};
-then type your program, and end it with a @kbd{Control-d}.
-@xref{Command Line, ,Invoking @code{awk}}.@refill
-
-The environment variable @code{AWKPATH} specifies a search path to use
-when finding source files named with the @samp{-f} option. The default
-path, which is
-@samp{.:/usr/lib/awk:/usr/local/lib/awk} is used if @code{AWKPATH} is not set.
-If a file name given to the @samp{-f} option contains a @samp{/} character,
-no path search is performed.
-@xref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable},
-for a full description of the @code{AWKPATH} environment variable.@refill
-
-@code{gawk} compiles the program into an internal form, and then proceeds to
-read each file named in the @code{ARGV} array. If there are no files named
-on the command line, @code{gawk} reads the standard input.
-
-If a ``file'' named on the command line has the form
-@samp{@var{var}=@var{val}}, it is treated as a variable assignment: the
-variable @var{var} is assigned the value @var{val}.
-If any of the files have a value that is the null string, that
-element in the list is skipped.@refill
-
-For each line in the input, @code{gawk} tests to see if it matches any
-@var{pattern} in the @code{awk} program. For each pattern that the line
-matches, the associated @var{action} is executed.
-
-@node Variables/Fields, Rules Summary, Language Summary, Gawk Summary
-@appendixsec Variables and Fields
-
-@code{awk} variables are dynamic; they come into existence when they are
-first used. Their values are either floating-point numbers or strings.
-@code{awk} also has one-dimension arrays; multiple-dimensional arrays
-may be simulated. There are several predefined variables that
-@code{awk} sets as a program runs; these are summarized below.
-
-@menu
-* Fields Summary:: Input field splitting.
-* Built-in Summary:: @code{awk}'s built-in variables.
-* Arrays Summary:: Using arrays.
-* Data Type Summary:: Values in @code{awk} are numbers or strings.
-@end menu
-
-@node Fields Summary, Built-in Summary, Variables/Fields, Variables/Fields
-@appendixsubsec Fields
-
-As each input line is read, @code{gawk} splits the line into
-@var{fields}, using the value of the @code{FS} variable as the field
-separator. If @code{FS} is a single character, fields are separated by
-that character. Otherwise, @code{FS} is expected to be a full regular
-expression. In the special case that @code{FS} is a single blank,
-fields are separated by runs of blanks and/or tabs. Note that the value
-of @code{IGNORECASE} (@pxref{Case-sensitivity, ,Case-sensitivity in Matching})
-also affects how fields are split when @code{FS} is a regular expression.@refill
-
-Each field in the input line may be referenced by its position, @code{$1},
-@code{$2}, and so on. @code{$0} is the whole line. The value of a field may
-be assigned to as well. Field numbers need not be constants:
-
-@example
-n = 5
-print $n
-@end example
-
-@noindent
-prints the fifth field in the input line. The variable @code{NF} is set to
-the total number of fields in the input line.
-
-References to nonexistent fields (i.e., fields after @code{$NF}) return
-the null-string. However, assigning to a nonexistent field (e.g.,
-@code{$(NF+2) = 5}) increases the value of @code{NF}, creates any
-intervening fields with the null string as their value, and causes the
-value of @code{$0} to be recomputed, with the fields being separated by
-the value of @code{OFS}.@refill
-
-@xref{Reading Files, ,Reading Input Files}, for a full description of the
-way @code{awk} defines and uses fields.
-
-@node Built-in Summary, Arrays Summary, Fields Summary, Variables/Fields
-@appendixsubsec Built-in Variables
-
-@code{awk}'s built-in variables are:
-
-@table @code
-@item ARGC
-The number of command line arguments (not including options or the
-@code{awk} program itself).
-
-@item ARGIND
-The index in @code{ARGV} of the current file being processed.
-It is always true that @samp{FILENAME == ARGV[ARGIND]}.
-
-@item ARGV
-The array of command line arguments. The array is indexed from 0 to
-@code{ARGC} @minus{} 1. Dynamically changing the contents of @code{ARGV}
-can control the files used for data.@refill
-
-@item CONVFMT
-The conversion format to use when converting numbers to strings.
-
-@item FIELDWIDTHS
-A space separated list of numbers describing the fixed-width input data.
-
-@item ENVIRON
-An array containing the values of the environment variables. The array
-is indexed by variable name, each element being the value of that
-variable. Thus, the environment variable @code{HOME} would be in
-@code{ENVIRON["HOME"]}. Its value might be @file{/u/close}.
-
-Changing this array does not affect the environment seen by programs
-which @code{gawk} spawns via redirection or the @code{system} function.
-(This may change in a future version of @code{gawk}.)
-
-Some operating systems do not have environment variables.
-The array @code{ENVIRON} is empty when running on these systems.
-
-@item ERRNO
-The system error message when an error occurs using @code{getline}
-or @code{close}.
-
-@item FILENAME
-The name of the current input file. If no files are specified on the command
-line, the value of @code{FILENAME} is @samp{-}.
-
-@item FNR
-The input record number in the current input file.
-
-@item FS
-The input field separator, a blank by default.
-
-@item IGNORECASE
-The case-sensitivity flag for regular expression operations. If
-@code{IGNORECASE} has a nonzero value, then pattern matching in rules,
-field splitting with @code{FS}, regular expression matching with
-@samp{~} and @samp{!~}, and the @code{gsub}, @code{index}, @code{match},
-@code{split} and @code{sub} predefined functions all ignore case
-when doing regular expression operations.@refill
-
-@item NF
-The number of fields in the current input record.
-
-@item NR
-The total number of input records seen so far.
-
-@item OFMT
-The output format for numbers for the @code{print} statement,
-@code{"%.6g"} by default.
-
-@item OFS
-The output field separator, a blank by default.
-
-@item ORS
-The output record separator, by default a newline.
-
-@item RS
-The input record separator, by default a newline. @code{RS} is exceptional
-in that only the first character of its string value is used for separating
-records. If @code{RS} is set to the null string, then records are separated by
-blank lines. When @code{RS} is set to the null string, then the newline
-character always acts as a field separator, in addition to whatever value
-@code{FS} may have.@refill
-
-@item RSTART
-The index of the first character matched by @code{match}; 0 if no match.
-
-@item RLENGTH
-The length of the string matched by @code{match}; @minus{}1 if no match.
-
-@item SUBSEP
-The string used to separate multiple subscripts in array elements, by
-default @code{"\034"}.
-@end table
-
-@xref{Built-in Variables}, for more information.
-
-@node Arrays Summary, Data Type Summary, Built-in Summary, Variables/Fields
-@appendixsubsec Arrays
-
-Arrays are subscripted with an expression between square brackets
-(@samp{[} and @samp{]}). Array subscripts are @emph{always} strings;
-numbers are converted to strings as necessary, following the standard
-conversion rules
-(@pxref{Conversion, ,Conversion of Strings and Numbers}).@refill
-
-If you use multiple expressions separated by commas inside the square
-brackets, then the array subscript is a string consisting of the
-concatenation of the individual subscript values, converted to strings,
-separated by the subscript separator (the value of @code{SUBSEP}).
-
-The special operator @code{in} may be used in an @code{if} or
-@code{while} statement to see if an array has an index consisting of a
-particular value.
-
-@example
-if (val in array)
- print array[val]
-@end example
-
-If the array has multiple subscripts, use @code{(i, j, @dots{}) in array}
-to test for existence of an element.
-
-The @code{in} construct may also be used in a @code{for} loop to iterate
-over all the elements of an array.
-@xref{Scanning an Array, ,Scanning all Elements of an Array}.@refill
-
-An element may be deleted from an array using the @code{delete} statement.
-
-@xref{Arrays, ,Arrays in @code{awk}}, for more detailed information.
-
-@node Data Type Summary, , Arrays Summary, Variables/Fields
-@appendixsubsec Data Types
-
-The value of an @code{awk} expression is always either a number
-or a string.
-
-Certain contexts (such as arithmetic operators) require numeric
-values. They convert strings to numbers by interpreting the text
-of the string as a numeral. If the string does not look like a
-numeral, it converts to 0.
-
-Certain contexts (such as concatenation) require string values.
-They convert numbers to strings by effectively printing them
-with @code{sprintf}.
-@xref{Conversion, ,Conversion of Strings and Numbers}, for the details.@refill
-
-To force conversion of a string value to a number, simply add 0
-to it. If the value you start with is already a number, this
-does not change it.
-
-To force conversion of a numeric value to a string, concatenate it with
-the null string.
-
-The @code{awk} language defines comparisons as being done numerically if
-both operands are numeric, or if one is numeric and the other is a numeric
-string. Otherwise one or both operands are converted to strings and a
-string comparison is performed.
-
-Uninitialized variables have the string value @code{""} (the null, or
-empty, string). In contexts where a number is required, this is
-equivalent to 0.
-
-@xref{Variables}, for more information on variable naming and initialization;
-@pxref{Conversion, ,Conversion of Strings and Numbers}, for more information
-on how variable values are interpreted.@refill
-
-@node Rules Summary, Functions Summary, Variables/Fields, Gawk Summary
-@appendixsec Patterns and Actions
-
-@menu
-* Pattern Summary:: Quick overview of patterns.
-* Regexp Summary:: Quick overview of regular expressions.
-* Actions Summary:: Quick overview of actions.
-@end menu
-
-An @code{awk} program is mostly composed of rules, each consisting of a
-pattern followed by an action. The action is enclosed in @samp{@{} and
-@samp{@}}. Either the pattern may be missing, or the action may be
-missing, but, of course, not both. If the pattern is missing, the
-action is executed for every single line of input. A missing action is
-equivalent to this action,
-
-@example
-@{ print @}
-@end example
-
-@noindent
-which prints the entire line.
-
-Comments begin with the @samp{#} character, and continue until the end of the
-line. Blank lines may be used to separate statements. Normally, a statement
-ends with a newline, however, this is not the case for lines ending in a
-@samp{,}, @samp{@{}, @samp{?}, @samp{:}, @samp{&&}, or @samp{||}. Lines
-ending in @code{do} or @code{else} also have their statements automatically
-continued on the following line. In other cases, a line can be continued by
-ending it with a @samp{\}, in which case the newline is ignored.@refill
-
-Multiple statements may be put on one line by separating them with a @samp{;}.
-This applies to both the statements within the action part of a rule (the
-usual case), and to the rule statements.
-
-@xref{Comments, ,Comments in @code{awk} Programs}, for information on
-@code{awk}'s commenting convention;
-@pxref{Statements/Lines, ,@code{awk} Statements versus Lines}, for a
-description of the line continuation mechanism in @code{awk}.@refill
-
-@node Pattern Summary, Regexp Summary, Rules Summary, Rules Summary
-@appendixsubsec Patterns
-
-@code{awk} patterns may be one of the following:
-
-@example
-/@var{regular expression}/
-@var{relational expression}
-@var{pattern} && @var{pattern}
-@var{pattern} || @var{pattern}
-@var{pattern} ? @var{pattern} : @var{pattern}
-(@var{pattern})
-! @var{pattern}
-@var{pattern1}, @var{pattern2}
-BEGIN
-END
-@end example
-
-@code{BEGIN} and @code{END} are two special kinds of patterns that are not
-tested against the input. The action parts of all @code{BEGIN} rules are
-merged as if all the statements had been written in a single @code{BEGIN}
-rule. They are executed before any of the input is read. Similarly, all the
-@code{END} rules are merged, and executed when all the input is exhausted (or
-when an @code{exit} statement is executed). @code{BEGIN} and @code{END}
-patterns cannot be combined with other patterns in pattern expressions.
-@code{BEGIN} and @code{END} rules cannot have missing action parts.@refill
-
-For @samp{/@var{regular-expression}/} patterns, the associated statement is
-executed for each input line that matches the regular expression. Regular
-expressions are extensions of those in @code{egrep}, and are summarized below.
-
-A @var{relational expression} may use any of the operators defined below in
-the section on actions. These generally test whether certain fields match
-certain regular expressions.
-
-The @samp{&&}, @samp{||}, and @samp{!} operators are logical ``and,''
-logical ``or,'' and logical ``not,'' respectively, as in C. They do
-short-circuit evaluation, also as in C, and are used for combining more
-primitive pattern expressions. As in most languages, parentheses may be
-used to change the order of evaluation.
-
-The @samp{?:} operator is like the same operator in C. If the first
-pattern matches, then the second pattern is matched against the input
-record; otherwise, the third is matched. Only one of the second and
-third patterns is matched.
-
-The @samp{@var{pattern1}, @var{pattern2}} form of a pattern is called a
-range pattern. It matches all input lines starting with a line that
-matches @var{pattern1}, and continuing until a line that matches
-@var{pattern2}, inclusive. A range pattern cannot be used as an operand
-to any of the pattern operators.
-
-@xref{Patterns}, for a full description of the pattern part of @code{awk}
-rules.
-
-@node Regexp Summary, Actions Summary, Pattern Summary, Rules Summary
-@appendixsubsec Regular Expressions
-
-Regular expressions are the extended kind found in @code{egrep}.
-They are composed of characters as follows:
-
-@table @code
-@item @var{c}
-matches the character @var{c} (assuming @var{c} is a character with no
-special meaning in regexps).
-
-@item \@var{c}
-matches the literal character @var{c}.
-
-@item .
-matches any character except newline.
-
-@item ^
-matches the beginning of a line or a string.
-
-@item $
-matches the end of a line or a string.
-
-@item [@var{abc}@dots{}]
-matches any of the characters @var{abc}@dots{} (character class).
-
-@item [^@var{abc}@dots{}]
-matches any character except @var{abc}@dots{} and newline (negated
-character class).
-
-@item @var{r1}|@var{r2}
-matches either @var{r1} or @var{r2} (alternation).
-
-@item @var{r1r2}
-matches @var{r1}, and then @var{r2} (concatenation).
-
-@item @var{r}+
-matches one or more @var{r}'s.
-
-@item @var{r}*
-matches zero or more @var{r}'s.
-
-@item @var{r}?
-matches zero or one @var{r}'s.
-
-@item (@var{r})
-matches @var{r} (grouping).
-@end table
-
-@xref{Regexp, ,Regular Expressions as Patterns}, for a more detailed
-explanation of regular expressions.
-
-The escape sequences allowed in string constants are also valid in
-regular expressions (@pxref{Constants, ,Constant Expressions}).
-
-@node Actions Summary, , Regexp Summary, Rules Summary
-@appendixsubsec Actions
-
-Action statements are enclosed in braces, @samp{@{} and @samp{@}}.
-Action statements consist of the usual assignment, conditional, and looping
-statements found in most languages. The operators, control statements,
-and input/output statements available are patterned after those in C.
-
-@menu
-* Operator Summary:: @code{awk} operators.
-* Control Flow Summary:: The control statements.
-* I/O Summary:: The I/O statements.
-* Printf Summary:: A summary of @code{printf}.
-* Special File Summary:: Special file names interpreted internally.
-* Numeric Functions Summary:: Built-in numeric functions.
-* String Functions Summary:: Built-in string functions.
-* Time Functions Summary:: Built-in time functions.
-* String Constants Summary:: Escape sequences in strings.
-@end menu
-
-@node Operator Summary, Control Flow Summary, Actions Summary, Actions Summary
-@appendixsubsubsec Operators
-
-The operators in @code{awk}, in order of increasing precedence, are:
-
-@table @code
-@item = += -= *= /= %= ^=
-Assignment. Both absolute assignment (@code{@var{var}=@var{value}})
-and operator assignment (the other forms) are supported.
-
-@item ?:
-A conditional expression, as in C. This has the form @code{@var{expr1} ?
-@var{expr2} : @var{expr3}}. If @var{expr1} is true, the value of the
-expression is @var{expr2}; otherwise it is @var{expr3}. Only one of
-@var{expr2} and @var{expr3} is evaluated.@refill
-
-@item ||
-Logical ``or''.
-
-@item &&
-Logical ``and''.
-
-@item ~ !~
-Regular expression match, negated match.
-
-@item < <= > >= != ==
-The usual relational operators.
-
-@item @var{blank}
-String concatenation.
-
-@item + -
-Addition and subtraction.
-
-@item * / %
-Multiplication, division, and modulus.
-
-@item + - !
-Unary plus, unary minus, and logical negation.
-
-@item ^
-Exponentiation (@samp{**} may also be used, and @samp{**=} for the assignment
-operator, but they are not specified in the @sc{posix} standard).
-
-@item ++ --
-Increment and decrement, both prefix and postfix.
-
-@item $
-Field reference.
-@end table
-
-@xref{Expressions, ,Expressions as Action Statements}, for a full
-description of all the operators listed above.
-@xref{Fields, ,Examining Fields}, for a description of the field
-reference operator.@refill
-
-@node Control Flow Summary, I/O Summary, Operator Summary, Actions Summary
-@appendixsubsubsec Control Statements
-
-The control statements are as follows:
-
-@example
-if (@var{condition}) @var{statement} @r{[} else @var{statement} @r{]}
-while (@var{condition}) @var{statement}
-do @var{statement} while (@var{condition})
-for (@var{expr1}; @var{expr2}; @var{expr3}) @var{statement}
-for (@var{var} in @var{array}) @var{statement}
-break
-continue
-delete @var{array}[@var{index}]
-exit @r{[} @var{expression} @r{]}
-@{ @var{statements} @}
-@end example
-
-@xref{Statements, ,Control Statements in Actions}, for a full description
-of all the control statements listed above.
-
-@node I/O Summary, Printf Summary, Control Flow Summary, Actions Summary
-@appendixsubsubsec I/O Statements
-
-The input/output statements are as follows:
-
-@table @code
-@item getline
-Set @code{$0} from next input record; set @code{NF}, @code{NR}, @code{FNR}.
-
-@item getline <@var{file}
-Set @code{$0} from next record of @var{file}; set @code{NF}.
-
-@item getline @var{var}
-Set @var{var} from next input record; set @code{NF}, @code{FNR}.
-
-@item getline @var{var} <@var{file}
-Set @var{var} from next record of @var{file}.
-
-@item next
-Stop processing the current input record. The next input record is read and
-processing starts over with the first pattern in the @code{awk} program.
-If the end of the input data is reached, the @code{END} rule(s), if any,
-are executed.
-
-@item next file
-Stop processing the current input file. The next input record read comes
-from the next input file. @code{FILENAME} is updated, @code{FNR} is set to 1,
-and processing starts over with the first pattern in the @code{awk} program.
-If the end of the input data is reached, the @code{END} rule(s), if any,
-are executed.
-
-@item print
-Prints the current record.
-
-@item print @var{expr-list}
-Prints expressions.
-
-@item print @var{expr-list} > @var{file}
-Prints expressions on @var{file}.
-
-@item printf @var{fmt, expr-list}
-Format and print.
-
-@item printf @var{fmt, expr-list} > file
-Format and print on @var{file}.
-@end table
-
-Other input/output redirections are also allowed. For @code{print} and
-@code{printf}, @samp{>> @var{file}} appends output to the @var{file},
-and @samp{| @var{command}} writes on a pipe. In a similar fashion,
-@samp{@var{command} | getline} pipes input into @code{getline}.
-@code{getline} returns 0 on end of file, and @minus{}1 on an error.@refill
-
-@xref{Getline, ,Explicit Input with @code{getline}}, for a full description
-of the @code{getline} statement.
-@xref{Printing, ,Printing Output}, for a full description of @code{print} and
-@code{printf}. Finally, @pxref{Next Statement, ,The @code{next} Statement},
-for a description of how the @code{next} statement works.@refill
-
-@node Printf Summary, Special File Summary, I/O Summary, Actions Summary
-@appendixsubsubsec @code{printf} Summary
-
-The @code{awk} @code{printf} statement and @code{sprintf} function
-accept the following conversion specification formats:
-
-@table @code
-@item %c
-An ASCII character. If the argument used for @samp{%c} is numeric, it is
-treated as a character and printed. Otherwise, the argument is assumed to
-be a string, and the only first character of that string is printed.
-
-@item %d
-@itemx %i
-A decimal number (the integer part).
-
-@item %e
-A floating point number of the form
-@samp{@r{[}-@r{]}d.ddddddE@r{[}+-@r{]}dd}.@refill
-
-@item %f
-A floating point number of the form
-@r{[}@code{-}@r{]}@code{ddd.dddddd}.
-
-@item %g
-Use @samp{%e} or @samp{%f} conversion, whichever produces a shorter string,
-with nonsignificant zeros suppressed.
-
-@item %o
-An unsigned octal number (again, an integer).
-
-@item %s
-A character string.
-
-@item %x
-An unsigned hexadecimal number (an integer).
-
-@item %X
-Like @samp{%x}, except use @samp{A} through @samp{F} instead of @samp{a}
-through @samp{f} for decimal 10 through 15.@refill
-
-@item %%
-A single @samp{%} character; no argument is converted.
-@end table
-
-There are optional, additional parameters that may lie between the @samp{%}
-and the control letter:
-
-@table @code
-@item -
-The expression should be left-justified within its field.
-
-@item @var{width}
-The field should be padded to this width. If @var{width} has a leading zero,
-then the field is padded with zeros. Otherwise it is padded with blanks.
-
-@item .@var{prec}
-A number indicating the maximum width of strings or digits to the right
-of the decimal point.
-@end table
-
-Either or both of the @var{width} and @var{prec} values may be specified
-as @samp{*}. In that case, the particular value is taken from the argument
-list.
-
-@xref{Printf, ,Using @code{printf} Statements for Fancier Printing}, for
-examples and for a more detailed description.
-
-@node Special File Summary, Numeric Functions Summary, Printf Summary, Actions Summary
-@appendixsubsubsec Special File Names
-
-When doing I/O redirection from either @code{print} or @code{printf} into a
-file, or via @code{getline} from a file, @code{gawk} recognizes certain special
-file names internally. These file names allow access to open file descriptors
-inherited from @code{gawk}'s parent process (usually the shell). The
-file names are:
-
-@table @file
-@item /dev/stdin
-The standard input.
-
-@item /dev/stdout
-The standard output.
-
-@item /dev/stderr
-The standard error output.
-
-@item /dev/fd/@var{n}
-The file denoted by the open file descriptor @var{n}.
-@end table
-
-In addition the following files provide process related information
-about the running @code{gawk} program.
-
-@table @file
-@item /dev/pid
-Reading this file returns the process ID of the current process,
-in decimal, terminated with a newline.
-
-@item /dev/ppid
-Reading this file returns the parent process ID of the current process,
-in decimal, terminated with a newline.
-
-@item /dev/pgrpid
-Reading this file returns the process group ID of the current process,
-in decimal, terminated with a newline.
-
-@item /dev/user
-Reading this file returns a single record terminated with a newline.
-The fields are separated with blanks. The fields represent the
-following information:
-
-@table @code
-@item $1
-The value of the @code{getuid} system call.
-
-@item $2
-The value of the @code{geteuid} system call.
-
-@item $3
-The value of the @code{getgid} system call.
-
-@item $4
-The value of the @code{getegid} system call.
-@end table
-
-If there are any additional fields, they are the group IDs returned by
-@code{getgroups} system call.
-(Multiple groups may not be supported on all systems.)@refill
-@end table
-
-@noindent
-These file names may also be used on the command line to name data files.
-These file names are only recognized internally if you do not
-actually have files by these names on your system.
-
-@xref{Special Files, ,Standard I/O Streams}, for a longer description that
-provides the motivation for this feature.
-
-@node Numeric Functions Summary, String Functions Summary, Special File Summary, Actions Summary
-@appendixsubsubsec Numeric Functions
-
-@code{awk} has the following predefined arithmetic functions:
-
-@table @code
-@item atan2(@var{y}, @var{x})
-returns the arctangent of @var{y/x} in radians.
-
-@item cos(@var{expr})
-returns the cosine in radians.
-
-@item exp(@var{expr})
-the exponential function.
-
-@item int(@var{expr})
-truncates to integer.
-
-@item log(@var{expr})
-the natural logarithm function.
-
-@item rand()
-returns a random number between 0 and 1.
-
-@item sin(@var{expr})
-returns the sine in radians.
-
-@item sqrt(@var{expr})
-the square root function.
-
-@item srand(@var{expr})
-use @var{expr} as a new seed for the random number generator. If no @var{expr}
-is provided, the time of day is used. The return value is the previous
-seed for the random number generator.
-@end table
-
-@node String Functions Summary, Time Functions Summary, Numeric Functions Summary, Actions Summary
-@appendixsubsubsec String Functions
-
-@code{awk} has the following predefined string functions:
-
-@table @code
-@item gsub(@var{r}, @var{s}, @var{t})
-for each substring matching the regular expression @var{r} in the string
-@var{t}, substitute the string @var{s}, and return the number of substitutions.
-If @var{t} is not supplied, use @code{$0}.
-
-@item index(@var{s}, @var{t})
-returns the index of the string @var{t} in the string @var{s}, or 0 if
-@var{t} is not present.
-
-@item length(@var{s})
-returns the length of the string @var{s}. The length of @code{$0}
-is returned if no argument is supplied.
-
-@item match(@var{s}, @var{r})
-returns the position in @var{s} where the regular expression @var{r}
-occurs, or 0 if @var{r} is not present, and sets the values of @code{RSTART}
-and @code{RLENGTH}.
-
-@item split(@var{s}, @var{a}, @var{r})
-splits the string @var{s} into the array @var{a} on the regular expression
-@var{r}, and returns the number of fields. If @var{r} is omitted, @code{FS}
-is used instead.
-
-@item sprintf(@var{fmt}, @var{expr-list})
-prints @var{expr-list} according to @var{fmt}, and returns the resulting string.
-
-@item sub(@var{r}, @var{s}, @var{t})
-this is just like @code{gsub}, but only the first matching substring is
-replaced.
-
-@item substr(@var{s}, @var{i}, @var{n})
-returns the @var{n}-character substring of @var{s} starting at @var{i}.
-If @var{n} is omitted, the rest of @var{s} is used.
-
-@item tolower(@var{str})
-returns a copy of the string @var{str}, with all the upper-case characters in
-@var{str} translated to their corresponding lower-case counterparts.
-Nonalphabetic characters are left unchanged.
-
-@item toupper(@var{str})
-returns a copy of the string @var{str}, with all the lower-case characters in
-@var{str} translated to their corresponding upper-case counterparts.
-Nonalphabetic characters are left unchanged.
-
-@item system(@var{cmd-line})
-Execute the command @var{cmd-line}, and return the exit status.
-@end table
-
-@node Time Functions Summary, String Constants Summary, String Functions Summary, Actions Summary
-@appendixsubsubsec Built-in time functions
-
-The following two functions are available for getting the current
-time of day, and for formatting time stamps.
-
-@table @code
-@item systime()
-returns the current time of day as the number of seconds since a particular
-epoch (Midnight, January 1, 1970 @sc{utc}, on @sc{posix} systems).
-
-@item strftime(@var{format}, @var{timestamp})
-formats @var{timestamp} according to the specification in @var{format}.
-The current time of day is used if no @var{timestamp} is supplied.
-@xref{Time Functions, ,Functions for Dealing with Time Stamps}, for the
-details on the conversion specifiers that @code{strftime} accepts.@refill
-@end table
-
-@iftex
-@xref{Built-in, ,Built-in Functions}, for a description of all of
-@code{awk}'s built-in functions.
-@end iftex
-
-@node String Constants Summary, , Time Functions Summary, Actions Summary
-@appendixsubsubsec String Constants
-
-String constants in @code{awk} are sequences of characters enclosed
-between double quotes (@code{"}). Within strings, certain @dfn{escape sequences}
-are recognized, as in C. These are:
-
-@table @code
-@item \\
-A literal backslash.
-
-@item \a
-The ``alert'' character; usually the ASCII BEL character.
-
-@item \b
-Backspace.
-
-@item \f
-Formfeed.
-
-@item \n
-Newline.
-
-@item \r
-Carriage return.
-
-@item \t
-Horizontal tab.
-
-@item \v
-Vertical tab.
-
-@item \x@var{hex digits}
-The character represented by the string of hexadecimal digits following
-the @samp{\x}. As in @sc{ansi} C, all following hexadecimal digits are
-considered part of the escape sequence. (This feature should tell us
-something about language design by committee.) E.g., @code{"\x1B"} is a
-string containing the ASCII ESC (escape) character. (The @samp{\x}
-escape sequence is not in @sc{posix} @code{awk}.)
-
-@item \@var{ddd}
-The character represented by the 1-, 2-, or 3-digit sequence of octal
-digits. Thus, @code{"\033"} is also a string containing the ASCII ESC
-(escape) character.
-
-@item \@var{c}
-The literal character @var{c}.
-@end table
-
-The escape sequences may also be used inside constant regular expressions
-(e.g., the regexp @code{@w{/[@ \t\f\n\r\v]/}} matches whitespace
-characters).@refill
-
-@xref{Constants, ,Constant Expressions}.
-
-@node Functions Summary, Historical Features, Rules Summary, Gawk Summary
-@appendixsec Functions
-
-Functions in @code{awk} are defined as follows:
-
-@example
-function @var{name}(@var{parameter list}) @{ @var{statements} @}
-@end example
-
-Actual parameters supplied in the function call are used to instantiate
-the formal parameters declared in the function. Arrays are passed by
-reference, other variables are passed by value.
-
-If there are fewer arguments passed than there are names in @var{parameter-list},
-the extra names are given the null string as value. Extra names have the
-effect of local variables.
-
-The open-parenthesis in a function call of a user-defined function must
-immediately follow the function name, without any intervening white space.
-This is to avoid a syntactic ambiguity with the concatenation operator.
-
-The word @code{func} may be used in place of @code{function} (but not in
-@sc{posix} @code{awk}).
-
-Use the @code{return} statement to return a value from a function.
-
-@xref{User-defined, ,User-defined Functions}, for a more complete description.
-
-@node Historical Features, , Functions Summary, Gawk Summary
-@appendixsec Historical Features
-
-There are two features of historical @code{awk} implementations that
-@code{gawk} supports. First, it is possible to call the @code{length}
-built-in function not only with no arguments, but even without parentheses!
-
-@example
-a = length
-@end example
-
-@noindent
-is the same as either of
-
-@example
-a = length()
-a = length($0)
-@end example
-
-@noindent
-This feature is marked as ``deprecated'' in the @sc{posix} standard, and
-@code{gawk} will issue a warning about its use if @samp{-W lint} is
-specified on the command line.
-
-The other feature is the use of the @code{continue} statement outside the
-body of a @code{while}, @code{for}, or @code{do} loop. Traditional
-@code{awk} implementations have treated such usage as equivalent to the
-@code{next} statement. @code{gawk} will support this usage if @samp{-W posix}
-has not been specified.
-
-@node Sample Program, Bugs, Gawk Summary, Top
-@appendix Sample Program
-
-The following example is a complete @code{awk} program, which prints
-the number of occurrences of each word in its input. It illustrates the
-associative nature of @code{awk} arrays by using strings as subscripts. It
-also demonstrates the @samp{for @var{x} in @var{array}} construction.
-Finally, it shows how @code{awk} can be used in conjunction with other
-utility programs to do a useful task of some complexity with a minimum of
-effort. Some explanations follow the program listing.@refill
-
-@example
-awk '
-# Print list of word frequencies
-@{
- for (i = 1; i <= NF; i++)
- freq[$i]++
-@}
-
-END @{
- for (word in freq)
- printf "%s\t%d\n", word, freq[word]
-@}'
-@end example
-
-The first thing to notice about this program is that it has two rules. The
-first rule, because it has an empty pattern, is executed on every line of
-the input. It uses @code{awk}'s field-accessing mechanism
-(@pxref{Fields, ,Examining Fields}) to pick out the individual words from
-the line, and the built-in variable @code{NF} (@pxref{Built-in Variables})
-to know how many fields are available.@refill
-
-For each input word, an element of the array @code{freq} is incremented to
-reflect that the word has been seen an additional time.@refill
-
-The second rule, because it has the pattern @code{END}, is not executed
-until the input has been exhausted. It prints out the contents of the
-@code{freq} table that has been built up inside the first action.@refill
-
-Note that this program has several problems that would prevent it from being
-useful by itself on real text files:@refill
-
-@itemize @bullet
-@item
-Words are detected using the @code{awk} convention that fields are
-separated by whitespace and that other characters in the input (except
-newlines) don't have any special meaning to @code{awk}. This means that
-punctuation characters count as part of words.@refill
-
-@item
-The @code{awk} language considers upper and lower case characters to be
-distinct. Therefore, @samp{foo} and @samp{Foo} are not treated by this
-program as the same word. This is undesirable since in normal text, words
-are capitalized if they begin sentences, and a frequency analyzer should not
-be sensitive to that.@refill
-
-@item
-The output does not come out in any useful order. You're more likely to be
-interested in which words occur most frequently, or having an alphabetized
-table of how frequently each word occurs.@refill
-@end itemize
-
-The way to solve these problems is to use some of the more advanced
-features of the @code{awk} language. First, we use @code{tolower} to remove
-case distinctions. Next, we use @code{gsub} to remove punctuation
-characters. Finally, we use the system @code{sort} utility to process the
-output of the @code{awk} script. First, here is the new version of
-the program:@refill
-
-@example
-awk '
-# Print list of word frequencies
-@{
- $0 = tolower($0) # remove case distinctions
- gsub(/[^a-z0-9_ \t]/, "", $0) # remove punctuation
- for (i = 1; i <= NF; i++)
- freq[$i]++
-@}
-
-END @{
- for (word in freq)
- printf "%s\t%d\n", word, freq[word]
-@}'
-@end example
-
-Assuming we have saved this program in a file named @file{frequency.awk},
-and that the data is in @file{file1}, the following pipeline
-
-@example
-awk -f frequency.awk file1 | sort +1 -nr
-@end example
-
-@noindent
-produces a table of the words appearing in @file{file1} in order of
-decreasing frequency.
-
-The @code{awk} program suitably massages the data and produces a word
-frequency table, which is not ordered.
-
-The @code{awk} script's output is then sorted by the @code{sort} command and
-printed on the terminal. The options given to @code{sort} in this example
-specify to sort using the second field of each input line (skipping one field),
-that the sort keys should be treated as numeric quantities (otherwise
-@samp{15} would come before @samp{5}), and that the sorting should be done
-in descending (reverse) order.@refill
-
-We could have even done the @code{sort} from within the program, by
-changing the @code{END} action to:
-
-@example
-END @{
- sort = "sort +1 -nr"
- for (word in freq)
- printf "%s\t%d\n", word, freq[word] | sort
- close(sort)
-@}'
-@end example
-
-See the general operating system documentation for more information on how
-to use the @code{sort} command.@refill
-
-@ignore
-@strong{ADR: I have some more substantial programs courtesy of Rick Adams
-at UUNET. I am planning on incorporating those either in addition to or
-instead of this program.}
-
-@strong{I would also like to incorporate the general @code{translate}
-function that I have written.}
-
-@strong{I have a ton of other sample programs to include too.}
-@end ignore
-
-@node Bugs, Notes, Sample Program, Top
-@appendix Reporting Problems and Bugs
-
-@c This chapter stolen shamelessly from the GNU m4 manual.
-@c This chapter has been unshamelessly altered to emulate changes made to
-@c make.texi from whence it was originally shamelessly stolen! :-} --mew
-
-If you have problems with @code{gawk} or think that you have found a bug,
-please report it to the developers; we cannot promise to do anything
-but we might well want to fix it.
-
-Before reporting a bug, make sure you have actually found a real bug.
-Carefully reread the documentation and see if it really says you can do
-what you're trying to do. If it's not clear whether you should be able
-to do something or not, report that too; it's a bug in the documentation!
-
-Before reporting a bug or trying to fix it yourself, try to isolate it
-to the smallest possible @code{awk} program and input data file that
-reproduces the problem. Then send us the program and data file,
-some idea of what kind of Unix system you're using, and the exact results
-@code{gawk} gave you. Also say what you expected to occur; this will help
-us decide whether the problem was really in the documentation.
-
-Once you have a precise problem, send e-mail to (Internet)
-@samp{bug-gnu-utils@@prep.ai.mit.edu} or (UUCP)
-@samp{mit-eddie!prep.ai.mit.edu!bug-gnu-utils}. Please include the
-version number of @code{gawk} you are using. You can get this information
-with the command @samp{gawk -W version '@{@}' /dev/null}.
-You should send carbon copies of your mail to David Trueman at
-@samp{david@@cs.dal.ca}, and to Arnold Robbins, who can be reached at
-@samp{arnold@@skeeve.atl.ga.us}. David is most likely to fix code
-problems, while Arnold is most likely to fix documentation problems.@refill
-
-Non-bug suggestions are always welcome as well. If you have questions
-about things that are unclear in the documentation or are just obscure
-features, ask Arnold Robbins; he will try to help you out, although he
-may not have the time to fix the problem. You can send him electronic mail at the Internet address
-above.
-
-If you find bugs in one of the non-Unix ports of @code{gawk}, please send
-an electronic mail message to the person who maintains that port. They
-are listed below, and also in the @file{README} file in the @code{gawk}
-distribution. Information in the @code{README} file should be considered
-authoritative if it conflicts with this manual.
-
-The people maintaining the non-Unix ports of @code{gawk} are:
-
-@table @asis
-@item MS-DOS
-The port to MS-DOS is maintained by Scott Deifik.
-His electronic mail address is @samp{scottd@@amgen.com}.
-
-@item VMS
-The port to VAX VMS is maintained by Pat Rankin.
-His electronic mail address is @samp{rankin@@eql.caltech.edu}.
-
-@item Atari ST
-The port to the Atari ST is maintained by Michal Jaegermann.
-His electronic mail address is @samp{ntomczak@@vm.ucs.ualberta.ca}.
-
-@end table
-
-If your bug is also reproducible under Unix, please send copies of your
-report to the general GNU bug list, as well as to Arnold Robbins and David
-Trueman, at the addresses listed above.
-
-@node Notes, Glossary, Bugs, Top
-@appendix Implementation Notes
-
-This appendix contains information mainly of interest to implementors and
-maintainers of @code{gawk}. Everything in it applies specifically to
-@code{gawk}, and not to other implementations.
-
-@menu
-* Compatibility Mode:: How to disable certain @code{gawk} extensions.
-* Future Extensions:: New features we may implement soon.
-* Improvements:: Suggestions for improvements by volunteers.
-@end menu
-
-@node Compatibility Mode, Future Extensions, Notes, Notes
-@appendixsec Downward Compatibility and Debugging
-
-@xref{POSIX/GNU, ,Extensions in @code{gawk} not in POSIX @code{awk}},
-for a summary of the GNU extensions to the @code{awk} language and program.
-All of these features can be turned off by invoking @code{gawk} with the
-@samp{-W compat} option, or with the @samp{-W posix} option.@refill
-
-If @code{gawk} is compiled for debugging with @samp{-DDEBUG}, then there
-is one more option available on the command line:
-
-@table @samp
-@item -W parsedebug
-Print out the parse stack information as the program is being parsed.
-@end table
-
-This option is intended only for serious @code{gawk} developers,
-and not for the casual user. It probably has not even been compiled into
-your version of @code{gawk}, since it slows down execution.
-
-@node Future Extensions, Improvements, Compatibility Mode, Notes
-@appendixsec Probable Future Extensions
-
-This section briefly lists extensions that indicate the directions we are
-currently considering for @code{gawk}. The file @file{FUTURES} in the
-@code{gawk} distributions lists these extensions, as well as several others.
-
-@table @asis
-@item @code{RS} as a regexp
-The meaning of @code{RS} may be generalized along the lines of @code{FS}.
-
-@item Control of subprocess environment
-Changes made in @code{gawk} to the array @code{ENVIRON} may be
-propagated to subprocesses run by @code{gawk}.
-
-@item Databases
-It may be possible to map a GDBM/NDBM/SDBM file into an @code{awk} array.
-
-@item Single-character fields
-The null string, @code{""}, as a field separator, will cause field
-splitting and the @code{split} function to separate individual characters.
-Thus, @code{split(a, "abcd", "")} would yield @code{a[1] == "a"},
-@code{a[2] == "b"}, and so on.
-
-@item More @code{lint} warnings
-There are more things that could be checked for portability.
-
-@item @code{RECLEN} variable for fixed length records
-Along with @code{FIELDWIDTHS}, this would speed up the processing of
-fixed-length records.
-
-@item @code{RT} variable to hold the record terminator
-It is occasionally useful to have access to the actual string of
-characters that matched the @code{RS} variable. The @code{RT}
-variable would hold these characters.
-
-@item A @code{restart} keyword
-After modifying @code{$0}, @code{restart} would restart the pattern
-matching loop, without reading a new record from the input.
-
-@item A @samp{|&} redirection
-The @samp{|&} redirection, in place of @samp{|}, would open a two-way
-pipeline for communication with a sub-process (via @code{getline} and
-@code{print} and @code{printf}).
-
-@item @code{IGNORECASE} affecting all comparisons
-The effects of the @code{IGNORECASE} variable may be generalized to
-all string comparisons, and not just regular expression operations.
-
-@item A way to mix command line source code and library files
-There may be a new option that would make it possible to easily use library
-functions from a program entered on the command line.
-@c probably a @samp{-s} option...
-
-@item GNU-style long options
-We will add GNU-style long options
-to @code{gawk} for compatibility with other GNU programs.
-(For example, @samp{--field-separator=:} would be equivalent to
-@samp{-F:}.)@refill
-
-@c this is @emph{very} long term --- not worth including right now.
-@ignore
-@item The C Comma Operator
-We may add the C comma operator, which takes the form
-@code{@var{expr1},@var{expr2}}. The first expression is evaluated, and the
-result is thrown away. The value of the full expression is the value of
-@var{expr2}.@refill
-@end ignore
-@end table
-
-@node Improvements, , Future Extensions, Notes
-@appendixsec Suggestions for Improvements
-
-Here are some projects that would-be @code{gawk} hackers might like to take
-on. They vary in size from a few days to a few weeks of programming,
-depending on which one you choose and how fast a programmer you are. Please
-send any improvements you write to the maintainers at the GNU
-project.@refill
-
-@enumerate
-@item
-Compilation of @code{awk} programs: @code{gawk} uses a Bison (YACC-like)
-parser to convert the script given it into a syntax tree; the syntax
-tree is then executed by a simple recursive evaluator. This method incurs
-a lot of overhead, since the recursive evaluator performs many procedure
-calls to do even the simplest things.@refill
-
-It should be possible for @code{gawk} to convert the script's parse tree
-into a C program which the user would then compile, using the normal
-C compiler and a special @code{gawk} library to provide all the needed
-functions (regexps, fields, associative arrays, type coercion, and so
-on).@refill
-
-An easier possibility might be for an intermediate phase of @code{awk} to
-convert the parse tree into a linear byte code form like the one used
-in GNU Emacs Lisp. The recursive evaluator would then be replaced by
-a straight line byte code interpreter that would be intermediate in speed
-between running a compiled program and doing what @code{gawk} does
-now.@refill
-
-This may actually happen for the 3.0 version of @code{gawk}.
-
-@item
-An error message section has not been included in this version of the
-manual. Perhaps some nice beta testers will document some of the messages
-for the future.
-
-@item
-The programs in the test suite could use documenting in this manual.
-
-@item
-The programs and data files in the manual should be available in
-separate files to facilitate experimentation.
-
-@item
-See the @file{FUTURES} file for more ideas. Contact us if you would
-seriously like to tackle any of the items listed there.
-@end enumerate
-
-@node Glossary, Index, Notes, Top
-@appendix Glossary
-
-@table @asis
-@item Action
-A series of @code{awk} statements attached to a rule. If the rule's
-pattern matches an input record, the @code{awk} language executes the
-rule's action. Actions are always enclosed in curly braces.
-@xref{Actions, ,Overview of Actions}.@refill
-
-@item Amazing @code{awk} Assembler
-Henry Spencer at the University of Toronto wrote a retargetable assembler
-completely as @code{awk} scripts. It is thousands of lines long, including
-machine descriptions for several 8-bit microcomputers.
-@c It is distributed with @code{gawk} (as part of the test suite) and
-It is a good example of a
-program that would have been better written in another language.@refill
-
-@item @sc{ansi}
-The American National Standards Institute. This organization produces
-many standards, among them the standard for the C programming language.
-
-@item Assignment
-An @code{awk} expression that changes the value of some @code{awk}
-variable or data object. An object that you can assign to is called an
-@dfn{lvalue}. @xref{Assignment Ops, ,Assignment Expressions}.@refill
-
-@item @code{awk} Language
-The language in which @code{awk} programs are written.
-
-@item @code{awk} Program
-An @code{awk} program consists of a series of @dfn{patterns} and
-@dfn{actions}, collectively known as @dfn{rules}. For each input record
-given to the program, the program's rules are all processed in turn.
-@code{awk} programs may also contain function definitions.@refill
-
-@item @code{awk} Script
-Another name for an @code{awk} program.
-
-@item Built-in Function
-The @code{awk} language provides built-in functions that perform various
-numerical, time stamp related, and string computations. Examples are
-@code{sqrt} (for the square root of a number) and @code{substr} (for a
-substring of a string). @xref{Built-in, ,Built-in Functions}.@refill
-
-@item Built-in Variable
-@code{ARGC}, @code{ARGIND}, @code{ARGV}, @code{CONVFMT}, @code{ENVIRON},
-@code{ERRNO}, @code{FIELDWIDTHS}, @code{FILENAME}, @code{FNR}, @code{FS},
-@code{IGNORECASE}, @code{NF}, @code{NR}, @code{OFMT}, @code{OFS}, @code{ORS},
-@code{RLENGTH}, @code{RSTART}, @code{RS}, and @code{SUBSEP},
-are the variables that have special
-meaning to @code{awk}. Changing some of them affects @code{awk}'s running
-environment. @xref{Built-in Variables}.@refill
-
-@item Braces
-See ``Curly Braces.''
-
-@item C
-The system programming language that most GNU software is written in. The
-@code{awk} programming language has C-like syntax, and this manual
-points out similarities between @code{awk} and C when appropriate.@refill
-
-@item CHEM
-A preprocessor for @code{pic} that reads descriptions of molecules
-and produces @code{pic} input for drawing them. It was written by
-Brian Kernighan, and is available from @code{netlib@@research.att.com}.@refill
-
-@item Compound Statement
-A series of @code{awk} statements, enclosed in curly braces. Compound
-statements may be nested.
-@xref{Statements, ,Control Statements in Actions}.@refill
-
-@item Concatenation
-Concatenating two strings means sticking them together, one after another,
-giving a new string. For example, the string @samp{foo} concatenated with
-the string @samp{bar} gives the string @samp{foobar}.
-@xref{Concatenation, ,String Concatenation}.@refill
-
-@item Conditional Expression
-An expression using the @samp{?:} ternary operator, such as
-@code{@var{expr1} ? @var{expr2} : @var{expr3}}. The expression
-@var{expr1} is evaluated; if the result is true, the value of the whole
-expression is the value of @var{expr2} otherwise the value is
-@var{expr3}. In either case, only one of @var{expr2} and @var{expr3}
-is evaluated. @xref{Conditional Exp, ,Conditional Expressions}.@refill
-
-@item Constant Regular Expression
-A constant regular expression is a regular expression written within
-slashes, such as @samp{/foo/}. This regular expression is chosen
-when you write the @code{awk} program, and cannot be changed doing
-its execution. @xref{Regexp Usage, ,How to Use Regular Expressions}.
-
-@item Comparison Expression
-A relation that is either true or false, such as @code{(a < b)}.
-Comparison expressions are used in @code{if}, @code{while}, and @code{for}
-statements, and in patterns to select which input records to process.
-@xref{Comparison Ops, ,Comparison Expressions}.@refill
-
-@item Curly Braces
-The characters @samp{@{} and @samp{@}}. Curly braces are used in
-@code{awk} for delimiting actions, compound statements, and function
-bodies.@refill
-
-@item Data Objects
-These are numbers and strings of characters. Numbers are converted into
-strings and vice versa, as needed.
-@xref{Conversion, ,Conversion of Strings and Numbers}.@refill
-
-@item Dynamic Regular Expression
-A dynamic regular expression is a regular expression written as an
-ordinary expression. It could be a string constant, such as
-@code{"foo"}, but it may also be an expression whose value may vary.
-@xref{Regexp Usage, ,How to Use Regular Expressions}.
-
-@item Escape Sequences
-A special sequence of characters used for describing nonprinting
-characters, such as @samp{\n} for newline, or @samp{\033} for the ASCII
-ESC (escape) character. @xref{Constants, ,Constant Expressions}.
-
-@item Field
-When @code{awk} reads an input record, it splits the record into pieces
-separated by whitespace (or by a separator regexp which you can
-change by setting the built-in variable @code{FS}). Such pieces are
-called fields. If the pieces are of fixed length, you can use the built-in
-variable @code{FIELDWIDTHS} to describe their lengths.
-@xref{Records, ,How Input is Split into Records}.@refill
-
-@item Format
-Format strings are used to control the appearance of output in the
-@code{printf} statement. Also, data conversions from numbers to strings
-are controlled by the format string contained in the built-in variable
-@code{CONVFMT}. @xref{Control Letters, ,Format-Control Letters}.@refill
-
-@item Function
-A specialized group of statements often used to encapsulate general
-or program-specific tasks. @code{awk} has a number of built-in
-functions, and also allows you to define your own.
-@xref{Built-in, ,Built-in Functions}.
-Also, see @ref{User-defined, ,User-defined Functions}.@refill
-
-@item @code{gawk}
-The GNU implementation of @code{awk}.
-
-@item GNU
-``GNU's not Unix''. An on-going project of the Free Software Foundation
-to create a complete, freely distributable, @sc{posix}-compliant computing
-environment.
-
-@item Input Record
-A single chunk of data read in by @code{awk}. Usually, an @code{awk} input
-record consists of one line of text.
-@xref{Records, ,How Input is Split into Records}.@refill
-
-@item Keyword
-In the @code{awk} language, a keyword is a word that has special
-meaning. Keywords are reserved and may not be used as variable names.
-
-@code{awk}'s keywords are:
-@code{if},
-@code{else},
-@code{while},
-@code{do@dots{}while},
-@code{for},
-@code{for@dots{}in},
-@code{break},
-@code{continue},
-@code{delete},
-@code{next},
-@code{function},
-@code{func},
-and @code{exit}.@refill
-
-@item Lvalue
-An expression that can appear on the left side of an assignment
-operator. In most languages, lvalues can be variables or array
-elements. In @code{awk}, a field designator can also be used as an
-lvalue.@refill
-
-@item Number
-A numeric valued data object. The @code{gawk} implementation uses double
-precision floating point to represent numbers.@refill
-
-@item Pattern
-Patterns tell @code{awk} which input records are interesting to which
-rules.
-
-A pattern is an arbitrary conditional expression against which input is
-tested. If the condition is satisfied, the pattern is said to @dfn{match}
-the input record. A typical pattern might compare the input record against
-a regular expression. @xref{Patterns}.@refill
-
-@item @sc{posix}
-The name for a series of standards being developed by the @sc{ieee}
-that specify a Portable Operating System interface. The ``IX'' denotes
-the Unix heritage of these standards. The main standard of interest for
-@code{awk} users is P1003.2, the Command Language and Utilities standard.
-
-@item Range (of input lines)
-A sequence of consecutive lines from the input file. A pattern
-can specify ranges of input lines for @code{awk} to process, or it can
-specify single lines. @xref{Patterns}.@refill
-
-@item Recursion
-When a function calls itself, either directly or indirectly.
-If this isn't clear, refer to the entry for ``recursion.''
-
-@item Redirection
-Redirection means performing input from other than the standard input
-stream, or output to other than the standard output stream.
-
-You can redirect the output of the @code{print} and @code{printf} statements
-to a file or a system command, using the @samp{>}, @samp{>>}, and @samp{|}
-operators. You can redirect input to the @code{getline} statement using
-the @samp{<} and @samp{|} operators.
-@xref{Redirection, ,Redirecting Output of @code{print} and @code{printf}}.@refill
-
-@item Regular Expression
-See ``regexp.''
-
-@item Regexp
-Short for @dfn{regular expression}. A regexp is a pattern that denotes a
-set of strings, possibly an infinite set. For example, the regexp
-@samp{R.*xp} matches any string starting with the letter @samp{R}
-and ending with the letters @samp{xp}. In @code{awk}, regexps are
-used in patterns and in conditional expressions. Regexps may contain
-escape sequences. @xref{Regexp, ,Regular Expressions as Patterns}.@refill
-
-@item Rule
-A segment of an @code{awk} program, that specifies how to process single
-input records. A rule consists of a @dfn{pattern} and an @dfn{action}.
-@code{awk} reads an input record; then, for each rule, if the input record
-satisfies the rule's pattern, @code{awk} executes the rule's action.
-Otherwise, the rule does nothing for that input record.@refill
-
-@item Side Effect
-A side effect occurs when an expression has an effect aside from merely
-producing a value. Assignment expressions, increment expressions and
-function calls have side effects. @xref{Assignment Ops, ,Assignment Expressions}.
-
-@item Special File
-A file name interpreted internally by @code{gawk}, instead of being handed
-directly to the underlying operating system. For example, @file{/dev/stdin}.
-@xref{Special Files, ,Standard I/O Streams}.
-
-@item Stream Editor
-A program that reads records from an input stream and processes them one
-or more at a time. This is in contrast with batch programs, which may
-expect to read their input files in entirety before starting to do
-anything, and with interactive programs, which require input from the
-user.@refill
-
-@item String
-A datum consisting of a sequence of characters, such as @samp{I am a
-string}. Constant strings are written with double-quotes in the
-@code{awk} language, and may contain escape sequences.
-@xref{Constants, ,Constant Expressions}.
-
-@item Whitespace
-A sequence of blank or tab characters occurring inside an input record or a
-string.@refill
-@end table
-
-@node Index, , Glossary, Top
-@unnumbered Index
-@printindex cp
-
-@summarycontents
-@contents
-@bye
-
-Unresolved Issues:
-------------------
-1. From: ntomczak@vm.ucs.ualberta.ca (Michal Jaegermann)
- Examples of usage tend to suggest that /../ and ".." delimiters
- can be used for regular expressions, even if definition is consistently
- using /../. I am not sure what the real rules are and in particular
- what of the following is a bug and what is a feature:
- # This program matches everything
- '"\(" { print }'
- # This one complains about mismatched parenthesis
- '$0 ~ "\(" { print }'
- # This one behaves in an expected manner
- '/\(/ { print }'
- You may also try to use "\(" as an argument to match() to see what
- will happen.
-
-2. From ADR.
-
- The posix (and original Unix!) notion of awk values as both number
- and string values needs to be put into the manual. This involves
- major and minor rewrites of most of the manual, but should help in
- clarifying many of the weirder points of the language.
-
-3. From ADR.
-
- The manual should be reorganized. Expressions should be introduced
- early, building up to regexps as expressions, and from there to their
- use as patterns and then in actions. Built-in vars should come earlier
- in the manual too. The 'expert info' sections marked with comments
- should get their own sections or subsections with nodes and titles.
- The manual should be gone over thoroughly for indexing.
-
-4. From ADR.
-
- Robert J. Chassell points out that awk programs should have some indication
- of how to use them. It would be useful to perhaps have a "programming
- style" section of the manual that would include this and other tips.
-
-5. From ADR in response to moraes@uunet.ca
- (This would make the beginnings of a good "puzzles" section...)
-
- Date: Mon, 2 Dec 91 10:08:05 EST
- From: gatech!cc!arnold (Arnold Robbins)
- To: cs.dal.ca!david, uunet.ca!moraes
- Subject: redirecting to /dev/stderr
- Cc: skeeve!arnold, boeing.com!brennan, research.att.com!bwk
-
- In 2.13.3 the following program no longer dumps core:
-
- BEGIN { print "hello" > /dev/stderr ; exit(1) }
-
- Instead, it creates a file named `0' with the word `hello' in it. AWK
- semantics strikes again. The meaning of the statement is
-
- print "hello" > (($0 ~ /dev/) stderr)
-
- /dev/ tests $0 for the pattern `dev'. This yields a 0. The variable stderr,
- having never been used, has a null string in it. The concatenation yields
- a string value of "0" which is used as the file name. Sigh.
-
- I think with some more time I can come up with a decent fix, but it will
- probably only print a diagnostic with -Wlint.
-
- Arnold
-
diff --git a/gawkmisc.c b/gawkmisc.c
new file mode 100644
index 00000000..0660587e
--- /dev/null
+++ b/gawkmisc.c
@@ -0,0 +1,66 @@
+/*
+ * gawkmisc.c --- miscellanious gawk routines that are OS specific.
+ */
+
+/*
+ * Copyright (C) 1986, 1988, 1989, 1991 - 95 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include "awk.h"
+
+/* some old compilers don't grok #elif. sigh */
+
+#if defined(MSDOS) || defined(OS2)
+#include "gawkmisc.pc"
+#else
+#if defined(VMS)
+#include "vms/gawkmisc.vms"
+#else
+#if defined(atarist)
+#include "atari/gawkmisc.atr"
+#else
+#if defined(__amigados__)
+#include "amiga/gawkmisc.ami"
+#else
+#include "posix/gawkmisc.c"
+#endif
+#endif
+#endif
+#endif
+
+/* xmalloc --- provide this so that other GNU library routines work */
+
+#if __STDC__
+typedef void *pointer;
+#else
+typedef char *pointer;
+#endif
+
+pointer
+xmalloc(bytes)
+size_t bytes;
+{
+ pointer p;
+
+ emalloc(p, pointer, bytes, "xmalloc");
+
+ return p;
+}
+
diff --git a/getopt.c b/getopt.c
index fd142f5a..023281cd 100644
--- a/getopt.c
+++ b/getopt.c
@@ -3,7 +3,7 @@
"Keep this file name-space clean" means, talk to roland@gnu.ai.mit.edu
before changing it!
- Copyright (C) 1987, 88, 89, 90, 91, 92, 93, 1994
+ Copyright (C) 1987, 88, 89, 90, 91, 92, 93, 94, 95
Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify it
@@ -18,20 +18,19 @@
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+/* This tells Alpha OSF/1 not to define a getopt prototype in <stdio.h>.
+ Ditto for AIX 3.2 and <stdlib.h>. */
+#ifndef _NO_PROTO
+#define _NO_PROTO
+#endif
+
#ifdef HAVE_CONFIG_H
-#if defined (emacs) || defined (CONFIG_BROKETS)
-/* We use <config.h> instead of "config.h" so that a compilation
- using -I. -I$srcdir will use ./config.h rather than $srcdir/config.h
- (which it would do because it found this file in $srcdir). */
#include <config.h>
-#else
-#include "config.h"
-#endif
#endif
-#ifndef __STDC__
+#if !defined (__STDC__) || !__STDC__
/* This is a separate conditional since some stdc systems
reject `defined (const)'. */
#ifndef const
@@ -39,11 +38,6 @@
#endif
#endif
-/* This tells Alpha OSF/1 not to define a getopt prototype in <stdio.h>. */
-#ifndef _NO_PROTO
-#define _NO_PROTO
-#endif
-
#include <stdio.h>
/* Comment out all this code if we are using the GNU C Library, and are not
@@ -59,18 +53,19 @@
/* This needs to come after some library #include
to get __GNU_LIBRARY__ defined. */
-#if defined(__GNU_LIBRARY__) || defined(STDC_HEADERS)
+#ifdef __GNU_LIBRARY__
/* Don't include stdlib.h for non-GNU C libraries because some of them
contain conflicting prototypes for getopt. */
#include <stdlib.h>
-#else
-extern char *getenv ();
-#endif /* __GNU_LIBRARY || STDC_HEADERS */
+#endif /* GNU C library. */
-/* If GETOPT_COMPAT is defined, `+' as well as `--' can introduce a
- long-named option. Because this is not POSIX.2 compliant, it is
- being phased out. */
-/* #define GETOPT_COMPAT */
+/* This is for other GNU distributions with internationalized messages.
+ The GNU C Library itself does not yet support such messages. */
+#if HAVE_LIBINTL_H
+# include <libintl.h>
+#else
+# define gettext(msgid) (msgid)
+#endif
/* This version of `getopt' appears to the caller like standard Unix `getopt'
but it behaves differently for the user, since it allows the user
@@ -94,7 +89,7 @@ extern char *getenv ();
Also, when `ordering' is RETURN_IN_ORDER,
each non-option ARGV-element is returned here. */
-char *optarg = 0;
+char *optarg = NULL;
/* Index in ARGV of the next element to be scanned.
This is used for communication to and from the caller
@@ -164,8 +159,11 @@ static enum
{
REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
} ordering;
+
+/* Value of POSIXLY_CORRECT environment variable. */
+static char *posixly_correct;
-#if defined(__GNU_LIBRARY__) || defined(STDC_HEADERS)
+#ifdef __GNU_LIBRARY__
/* We want to avoid inclusion of string.h with non-GNU libraries
because there are many ways it can cause trouble.
On some systems, it contains special magic macros that don't work
@@ -177,6 +175,8 @@ static enum
/* Avoid depending on library functions or files
whose names are inconsistent. */
+char *getenv ();
+
static char *
my_index (str, chr)
const char *str;
@@ -192,19 +192,18 @@ my_index (str, chr)
}
/* If using GCC, we can safely declare strlen this way.
- If not using GCC, it is ok not to declare it.
- (Supposedly there are some machines where it might get a warning,
- but changing this conditional to __STDC__ is too risky.) */
+ If not using GCC, it is ok not to declare it. */
#ifdef __GNUC__
-#ifdef IN_GCC
-#include "gstddef.h"
-#else
-#include <stddef.h>
-#endif
-extern size_t strlen (const char *);
-#endif
-
-#endif /* __GNU_LIBRARY__ || STDC_HEADERS */
+/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h.
+ That was relevant to code that was here before. */
+#if !defined (__STDC__) || !__STDC__
+/* gcc with -traditional declares the built-in strlen to return int,
+ and has done so at least since version 2.4.5. -- rms. */
+extern int strlen (const char *);
+#endif /* not __STDC__ */
+#endif /* __GNUC__ */
+
+#endif /* not __GNU_LIBRARY__ */
/* Handle permutation of arguments. */
@@ -279,6 +278,42 @@ exchange (argv)
first_nonopt += (optind - last_nonopt);
last_nonopt = optind;
}
+
+/* Initialize the internal data when the first call is made. */
+
+static const char *
+_getopt_initialize (optstring)
+ const char *optstring;
+{
+ /* Start processing options with ARGV-element 1 (since ARGV-element 0
+ is the program name); the sequence of previously skipped
+ non-option ARGV-elements is empty. */
+
+ first_nonopt = last_nonopt = optind = 1;
+
+ nextchar = NULL;
+
+ posixly_correct = getenv ("POSIXLY_CORRECT");
+
+ /* Determine how to handle the ordering of options and nonoptions. */
+
+ if (optstring[0] == '-')
+ {
+ ordering = RETURN_IN_ORDER;
+ ++optstring;
+ }
+ else if (optstring[0] == '+')
+ {
+ ordering = REQUIRE_ORDER;
+ ++optstring;
+ }
+ else if (posixly_correct != NULL)
+ ordering = REQUIRE_ORDER;
+ else
+ ordering = PERMUTE;
+
+ return optstring;
+}
/* Scan elements of ARGV (whose length is ARGC) for option characters
given in OPTSTRING.
@@ -345,41 +380,18 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only)
int *longind;
int long_only;
{
- int option_index;
-
- optarg = 0;
-
- /* Initialize the internal data when the first call is made.
- Start processing options with ARGV-element 1 (since ARGV-element 0
- is the program name); the sequence of previously skipped
- non-option ARGV-elements is empty. */
+ optarg = NULL;
if (optind == 0)
{
- first_nonopt = last_nonopt = optind = 1;
-
- nextchar = NULL;
-
- /* Determine how to handle the ordering of options and nonoptions. */
-
- if (optstring[0] == '-')
- {
- ordering = RETURN_IN_ORDER;
- ++optstring;
- }
- else if (optstring[0] == '+')
- {
- ordering = REQUIRE_ORDER;
- ++optstring;
- }
- else if (getenv ("POSIXLY_CORRECT") != NULL)
- ordering = REQUIRE_ORDER;
- else
- ordering = PERMUTE;
+ optstring = _getopt_initialize (optstring);
+ optind = 1; /* Don't scan ARGV[0], the program name. */
}
if (nextchar == NULL || *nextchar == '\0')
{
+ /* Advance to the next ARGV-element. */
+
if (ordering == PERMUTE)
{
/* If we have just processed some options following some non-options,
@@ -390,21 +402,16 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only)
else if (last_nonopt != optind)
first_nonopt = optind;
- /* Now skip any additional non-options
+ /* Skip any additional non-options
and extend the range of non-options previously skipped. */
while (optind < argc
- && (argv[optind][0] != '-' || argv[optind][1] == '\0')
-#ifdef GETOPT_COMPAT
- && (longopts == NULL
- || argv[optind][0] != '+' || argv[optind][1] == '\0')
-#endif /* GETOPT_COMPAT */
- )
+ && (argv[optind][0] != '-' || argv[optind][1] == '\0'))
optind++;
last_nonopt = optind;
}
- /* Special ARGV-element `--' means premature end of options.
+ /* The special ARGV-element `--' means premature end of options.
Skip it like a null option,
then exchange with previous non-options as if it were an option,
then skip everything else like a non-option. */
@@ -437,12 +444,7 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only)
/* If we have come to a non-option and did not permute it,
either stop the scan or describe it to the caller and pass it by. */
- if ((argv[optind][0] != '-' || argv[optind][1] == '\0')
-#ifdef GETOPT_COMPAT
- && (longopts == NULL
- || argv[optind][0] != '+' || argv[optind][1] == '\0')
-#endif /* GETOPT_COMPAT */
- )
+ if ((argv[optind][0] != '-' || argv[optind][1] == '\0'))
{
if (ordering == REQUIRE_ORDER)
return EOF;
@@ -451,36 +453,48 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only)
}
/* We have found another option-ARGV-element.
- Start decoding its characters. */
+ Skip the initial punctuation. */
nextchar = (argv[optind] + 1
+ (longopts != NULL && argv[optind][1] == '-'));
}
+ /* Decode the current option-ARGV-element. */
+
+ /* Check whether the ARGV-element is a long option.
+
+ If long_only and the ARGV-element has the form "-f", where f is
+ a valid short option, don't consider it an abbreviated form of
+ a long option that starts with f. Otherwise there would be no
+ way to give the -f short option.
+
+ On the other hand, if there's a long option "fubar" and
+ the ARGV-element is "-fu", do consider that an abbreviation of
+ the long option, just like "--fu", and not "-f" with arg "u".
+
+ This distinction seems to be the most useful approach. */
+
if (longopts != NULL
- && ((argv[optind][0] == '-'
- && (argv[optind][1] == '-' || long_only))
-#ifdef GETOPT_COMPAT
- || argv[optind][0] == '+'
-#endif /* GETOPT_COMPAT */
- ))
+ && (argv[optind][1] == '-'
+ || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1])))))
{
+ char *nameend;
const struct option *p;
- char *s = nextchar;
+ const struct option *pfound = NULL;
int exact = 0;
int ambig = 0;
- const struct option *pfound = NULL;
int indfound;
+ int option_index;
- while (*s && *s != '=')
- s++;
+ for (nameend = nextchar; *nameend && *nameend != '='; nameend++)
+ /* Do nothing. */ ;
- /* Test all options for either exact match or abbreviated matches. */
- for (p = longopts, option_index = 0; p->name;
- p++, option_index++)
- if (!strncmp (p->name, nextchar, s - nextchar))
+ /* Test all long options for either exact match
+ or abbreviated matches. */
+ for (p = longopts, option_index = 0; p->name; p++, option_index++)
+ if (!strncmp (p->name, nextchar, nameend - nextchar))
{
- if (s - nextchar == strlen (p->name))
+ if (nameend - nextchar == strlen (p->name))
{
/* Exact match found. */
pfound = p;
@@ -495,14 +509,14 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only)
indfound = option_index;
}
else
- /* Second nonexact match found. */
+ /* Second or later nonexact match found. */
ambig = 1;
}
if (ambig && !exact)
{
if (opterr)
- fprintf (stderr, "%s: option `%s' is ambiguous\n",
+ fprintf (stderr, gettext ("%s: option `%s' is ambiguous\n"),
argv[0], argv[optind]);
nextchar += strlen (nextchar);
optind++;
@@ -513,27 +527,26 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only)
{
option_index = indfound;
optind++;
- if (*s)
+ if (*nameend)
{
/* Don't test has_arg with >, because some C compilers don't
allow it to be used on enums. */
if (pfound->has_arg)
- optarg = s + 1;
+ optarg = nameend + 1;
else
{
if (opterr)
- {
- if (argv[optind - 1][1] == '-')
- /* --option */
- fprintf (stderr,
- "%s: option `--%s' doesn't allow an argument\n",
- argv[0], pfound->name);
- else
- /* +option or -option */
- fprintf (stderr,
- "%s: option `%c%s' doesn't allow an argument\n",
- argv[0], argv[optind - 1][0], pfound->name);
- }
+ if (argv[optind - 1][1] == '-')
+ /* --option */
+ fprintf (stderr,
+ gettext ("%s: option `--%s' doesn't allow an argument\n"),
+ argv[0], pfound->name);
+ else
+ /* +option or -option */
+ fprintf (stderr,
+ gettext ("%s: option `%c%s' doesn't allow an argument\n"),
+ argv[0], argv[optind - 1][0], pfound->name);
+
nextchar += strlen (nextchar);
return '?';
}
@@ -545,8 +558,9 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only)
else
{
if (opterr)
- fprintf (stderr, "%s: option `%s' requires an argument\n",
- argv[0], argv[optind - 1]);
+ fprintf (stderr,
+ gettext ("%s: option `%s' requires an argument\n"),
+ argv[0], argv[optind - 1]);
nextchar += strlen (nextchar);
return optstring[0] == ':' ? ':' : '?';
}
@@ -561,25 +575,23 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only)
}
return pfound->val;
}
+
/* Can't find it as a long option. If this is not getopt_long_only,
or the option starts with '--' or is not a valid short
option, then it's an error.
Otherwise interpret it as a short option. */
if (!long_only || argv[optind][1] == '-'
-#ifdef GETOPT_COMPAT
- || argv[optind][0] == '+'
-#endif /* GETOPT_COMPAT */
|| my_index (optstring, *nextchar) == NULL)
{
if (opterr)
{
if (argv[optind][1] == '-')
/* --option */
- fprintf (stderr, "%s: unrecognized option `--%s'\n",
+ fprintf (stderr, gettext ("%s: unrecognized option `--%s'\n"),
argv[0], nextchar);
else
/* +option or -option */
- fprintf (stderr, "%s: unrecognized option `%c%s'\n",
+ fprintf (stderr, gettext ("%s: unrecognized option `%c%s'\n"),
argv[0], argv[optind][0], nextchar);
}
nextchar = (char *) "";
@@ -588,7 +600,7 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only)
}
}
- /* Look at and handle the next option-character. */
+ /* Look at and handle the next short option-character. */
{
char c = *nextchar++;
@@ -602,16 +614,13 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only)
{
if (opterr)
{
-#if 0
- if (c < 040 || c >= 0177)
- fprintf (stderr, "%s: unrecognized option, character code 0%o\n",
+ if (posixly_correct)
+ /* 1003.2 specifies the format of this message. */
+ fprintf (stderr, gettext ("%s: illegal option -- %c\n"),
argv[0], c);
else
- fprintf (stderr, "%s: unrecognized option `-%c'\n", argv[0], c);
-#else
- /* 1003.2 specifies the format of this message. */
- fprintf (stderr, "%s: illegal option -- %c\n", argv[0], c);
-#endif
+ fprintf (stderr, gettext ("%s: invalid option -- %c\n"),
+ argv[0], c);
}
optopt = c;
return '?';
@@ -627,7 +636,7 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only)
optind++;
}
else
- optarg = 0;
+ optarg = NULL;
nextchar = NULL;
}
else
@@ -644,14 +653,10 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only)
{
if (opterr)
{
-#if 0
- fprintf (stderr, "%s: option `-%c' requires an argument\n",
- argv[0], c);
-#else
/* 1003.2 specifies the format of this message. */
- fprintf (stderr, "%s: option requires an argument -- %c\n",
- argv[0], c);
-#endif
+ fprintf (stderr,
+ gettext ("%s: option requires an argument -- %c\n"),
+ argv[0], c);
}
optopt = c;
if (optstring[0] == ':')
diff --git a/getopt.h b/getopt.h
index b0fc4ffb..dcfedc65 100644
--- a/getopt.h
+++ b/getopt.h
@@ -1,5 +1,5 @@
/* Declarations for getopt.
- Copyright (C) 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc.
+ Copyright (C) 1989, 90, 91, 92, 93, 94 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
@@ -13,7 +13,7 @@
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */
#ifndef _GETOPT_H
#define _GETOPT_H 1
@@ -76,7 +76,7 @@ extern int optopt;
struct option
{
-#ifdef __STDC__
+#if defined (__STDC__) && __STDC__
const char *name;
#else
char *name;
@@ -94,15 +94,15 @@ struct option
#define required_argument 1
#define optional_argument 2
-#ifdef __STDC__
-#if defined(__GNU_LIBRARY__)
+#if defined (__STDC__) && __STDC__
+#ifdef __GNU_LIBRARY__
/* Many other libraries have conflicting prototypes for getopt, with
differences in the consts, in stdlib.h. To avoid compilation
errors, only prototype getopt for the GNU C library. */
extern int getopt (int argc, char *const *argv, const char *shortopts);
#else /* not __GNU_LIBRARY__ */
extern int getopt ();
-#endif /* not __GNU_LIBRARY__ */
+#endif /* __GNU_LIBRARY__ */
extern int getopt_long (int argc, char *const *argv, const char *shortopts,
const struct option *longopts, int *longind);
extern int getopt_long_only (int argc, char *const *argv,
@@ -120,7 +120,7 @@ extern int getopt_long ();
extern int getopt_long_only ();
extern int _getopt_internal ();
-#endif /* not __STDC__ */
+#endif /* __STDC__ */
#ifdef __cplusplus
}
diff --git a/getopt1.c b/getopt1.c
index 7739b512..6fea28c1 100644
--- a/getopt1.c
+++ b/getopt1.c
@@ -14,7 +14,7 @@
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */
#ifdef HAVE_CONFIG_H
#if defined (emacs) || defined (CONFIG_BROKETS)
diff --git a/install-sh b/install-sh
new file mode 100755
index 00000000..ab74c882
--- /dev/null
+++ b/install-sh
@@ -0,0 +1,238 @@
+#!/bin/sh
+#
+# install - install a program, script, or datafile
+# This comes from X11R5.
+#
+# Calling this script install-sh is preferred over install.sh, to prevent
+# `make' implicit rules from creating a file called install from it
+# when there is no Makefile.
+#
+# This script is compatible with the BSD install script, but was written
+# from scratch.
+#
+
+
+# set DOITPROG to echo to test this script
+
+# Don't use :- since 4.3BSD and earlier shells don't like it.
+doit="${DOITPROG-}"
+
+
+# put in absolute paths if you don't have them in your path; or use env. vars.
+
+mvprog="${MVPROG-mv}"
+cpprog="${CPPROG-cp}"
+chmodprog="${CHMODPROG-chmod}"
+chownprog="${CHOWNPROG-chown}"
+chgrpprog="${CHGRPPROG-chgrp}"
+stripprog="${STRIPPROG-strip}"
+rmprog="${RMPROG-rm}"
+mkdirprog="${MKDIRPROG-mkdir}"
+
+tranformbasename=""
+transform_arg=""
+instcmd="$mvprog"
+chmodcmd="$chmodprog 0755"
+chowncmd=""
+chgrpcmd=""
+stripcmd=""
+rmcmd="$rmprog -f"
+mvcmd="$mvprog"
+src=""
+dst=""
+dir_arg=""
+
+while [ x"$1" != x ]; do
+ case $1 in
+ -c) instcmd="$cpprog"
+ shift
+ continue;;
+
+ -d) dir_arg=true
+ shift
+ continue;;
+
+ -m) chmodcmd="$chmodprog $2"
+ shift
+ shift
+ continue;;
+
+ -o) chowncmd="$chownprog $2"
+ shift
+ shift
+ continue;;
+
+ -g) chgrpcmd="$chgrpprog $2"
+ shift
+ shift
+ continue;;
+
+ -s) stripcmd="$stripprog"
+ shift
+ continue;;
+
+ -t=*) transformarg=`echo $1 | sed 's/-t=//'`
+ shift
+ continue;;
+
+ -b=*) transformbasename=`echo $1 | sed 's/-b=//'`
+ shift
+ continue;;
+
+ *) if [ x"$src" = x ]
+ then
+ src=$1
+ else
+ # this colon is to work around a 386BSD /bin/sh bug
+ :
+ dst=$1
+ fi
+ shift
+ continue;;
+ esac
+done
+
+if [ x"$src" = x ]
+then
+ echo "install: no input file specified"
+ exit 1
+else
+ true
+fi
+
+if [ x"$dir_arg" != x ]; then
+ dst=$src
+ src=""
+
+ if [ -d $dst ]; then
+ instcmd=:
+ else
+ instcmd=mkdir
+ fi
+else
+
+# Waiting for this to be detected by the "$instcmd $src $dsttmp" command
+# might cause directories to be created, which would be especially bad
+# if $src (and thus $dsttmp) contains '*'.
+
+ if [ -f $src -o -d $src ]
+ then
+ true
+ else
+ echo "install: $src does not exist"
+ exit 1
+ fi
+
+ if [ x"$dst" = x ]
+ then
+ echo "install: no destination specified"
+ exit 1
+ else
+ true
+ fi
+
+# If destination is a directory, append the input filename; if your system
+# does not like double slashes in filenames, you may need to add some logic
+
+ if [ -d $dst ]
+ then
+ dst="$dst"/`basename $src`
+ else
+ true
+ fi
+fi
+
+## this sed command emulates the dirname command
+dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
+
+# Make sure that the destination directory exists.
+# this part is taken from Noah Friedman's mkinstalldirs script
+
+# Skip lots of stat calls in the usual case.
+if [ ! -d "$dstdir" ]; then
+defaultIFS='
+'
+IFS="${IFS-${defaultIFS}}"
+
+oIFS="${IFS}"
+# Some sh's can't handle IFS=/ for some reason.
+IFS='%'
+set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'`
+IFS="${oIFS}"
+
+pathcomp=''
+
+while [ $# -ne 0 ] ; do
+ pathcomp="${pathcomp}${1}"
+ shift
+
+ if [ ! -d "${pathcomp}" ] ;
+ then
+ $mkdirprog "${pathcomp}"
+ else
+ true
+ fi
+
+ pathcomp="${pathcomp}/"
+done
+fi
+
+if [ x"$dir_arg" != x ]
+then
+ $doit $instcmd $dst &&
+
+ if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi &&
+ if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi &&
+ if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi &&
+ if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi
+else
+
+# If we're going to rename the final executable, determine the name now.
+
+ if [ x"$transformarg" = x ]
+ then
+ dstfile=`basename $dst`
+ else
+ dstfile=`basename $dst $transformbasename |
+ sed $transformarg`$transformbasename
+ fi
+
+# don't allow the sed command to completely eliminate the filename
+
+ if [ x"$dstfile" = x ]
+ then
+ dstfile=`basename $dst`
+ else
+ true
+ fi
+
+# Make a temp file name in the proper directory.
+
+ dsttmp=$dstdir/#inst.$$#
+
+# Move or copy the file name to the temp name
+
+ $doit $instcmd $src $dsttmp &&
+
+ trap "rm -f ${dsttmp}" 0 &&
+
+# and set any options; do chmod last to preserve setuid bits
+
+# If any of these fail, we abort the whole thing. If we want to
+# ignore errors from any of these, just make sure not to ignore
+# errors from the above "$doit $instcmd $src $dsttmp" command.
+
+ if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi &&
+ if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi &&
+ if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi &&
+ if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi &&
+
+# Now rename the file to the real destination.
+
+ $doit $rmcmd -f $dstdir/$dstfile &&
+ $doit $mvcmd $dsttmp $dstdir/$dstfile
+
+fi &&
+
+
+exit 0
diff --git a/io.c b/io.c
index 03b73daa..5253af88 100644
--- a/io.c
+++ b/io.c
@@ -6,7 +6,7 @@
* Copyright (C) 1986, 1988, 1989, 1991-1995 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
- * AWK Progamming Language.
+ * AWK Programming Language.
*
* GAWK is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -19,20 +19,31 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with GAWK; see the file COPYING. If not, write to
- * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/
-#if !defined(VMS) && !defined(VMS_POSIX) && !defined(_MSC_VER)
-#include <sys/param.h>
-#endif
#include "awk.h"
+#ifdef HAVE_SYS_PARAM_H
+#include <sys/param.h>
+#endif /* HAVE_SYS_PARAM_H */
+
+#ifdef HAVE_SYS_WAIT_H
+#include <sys/wait.h>
+#endif /* HAVE_SYS_WAIT_H */
+
#ifndef O_RDONLY
#include <fcntl.h>
#endif
-#if !defined(S_ISDIR) && defined(S_IFDIR)
+#include <assert.h>
+
+#if ! defined(S_ISREG) && defined(S_IFREG)
+#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
+#endif
+
+#if ! defined(S_ISDIR) && defined(S_IFDIR)
#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
#endif
@@ -40,13 +51,11 @@
#define ENFILE EMFILE
#endif
-#ifndef atarist
-#define INVALID_HANDLE (-1)
-#else
-#define INVALID_HANDLE (__SMALLEST_VALID_HANDLE - 1)
+#ifdef atarist
+#include <stddef.h>
#endif
-#if defined(MSDOS) || defined(OS2) || defined(atarist)
+#if defined(MSDOS) || defined(OS2)
#define PIPES_SIMULATED
#endif
@@ -63,23 +72,22 @@ static IOBUF *gawk_popen P((char *cmd, struct redirect *rp));
static IOBUF *iop_open P((const char *file, const char *how));
static int gawk_pclose P((struct redirect *rp));
static int do_pathopen P((const char *file));
+static int get_a_record P((char **out, IOBUF *iop, int rs, Regexp *RSre, int *errcode));
static int str2mode P((const char *mode));
static void spec_setup P((IOBUF *iop, int len, int allocate));
static int specfdopen P((IOBUF *iop, const char *name, const char *mode));
static int pidopen P((IOBUF *iop, const char *name, const char *mode));
static int useropen P((IOBUF *iop, const char *name, const char *mode));
-extern FILE *fdopen();
-
-#if defined (MSDOS)
+#if defined (MSDOS) && !defined (__GO32__)
#include "popen.h"
-#define popen(c,m) os_popen(c,m)
-#define pclose(f) os_pclose(f)
+#define popen(c, m) os_popen(c, m)
+#define pclose(f) os_pclose(f)
#else
#if defined (OS2) /* OS/2, but not family mode */
#if defined (_MSC_VER)
-#define popen(c,m) _popen(c,m)
-#define pclose(f) _pclose(f)
+#define popen(c, m) _popen(c, m)
+#define pclose(f) _pclose(f)
#endif
#else
extern FILE *popen();
@@ -87,6 +95,10 @@ extern FILE *popen();
#endif
static struct redirect *red_head = NULL;
+static NODE *RS;
+static Regexp *RS_regexp;
+
+int RS_is_null;
extern int output_is_tty;
extern NODE *ARGC_node;
@@ -102,15 +114,17 @@ static jmp_buf filebuf; /* for do_nextfile() */
void
do_nextfile()
{
- (void) nextfile(1);
+ (void) nextfile(TRUE);
longjmp(filebuf, 1);
}
+/* nextfile --- move to the next input data file */
+
static IOBUF *
nextfile(skipping)
int skipping;
{
- static int i = 1;
+ static long i = 1;
static int files = 0;
NODE *arg;
static IOBUF *curfile = NULL;
@@ -128,16 +142,16 @@ int skipping;
} else
return curfile;
}
- for (; i < (int) (ARGC_node->lnode->numbr); i++) {
+ for (; i < (long) (ARGC_node->lnode->numbr); i++) {
arg = *assoc_lookup(ARGV_node, tmp_number((AWKNUM) i));
- if (arg->stptr[0] == '\0')
+ if (arg->stlen == 0)
continue;
arg->stptr[arg->stlen] = '\0';
- if (! do_unix) {
- ARGIND_node->var_value->numbr = i;
- ARGIND_node->var_value->flags = NUM|NUMBER;
+ if (! do_traditional) {
+ unref(ARGIND_node->var_value);
+ ARGIND_node->var_value = make_number((AWKNUM) i);
}
- if (!arg_assign(arg->stptr)) {
+ if (! arg_assign(arg->stptr)) {
files++;
curfile = iop_open(arg->stptr, "r");
if (curfile == NULL)
@@ -157,26 +171,29 @@ int skipping;
/* no args. -- use stdin */
/* FNR is init'ed to 0 */
FILENAME_node->var_value = make_string("-", 1);
- curfile = iop_alloc(fileno(stdin));
+ curfile = iop_alloc(fileno(stdin), "stdin");
}
return curfile;
}
+/* set_FNR --- update internal FNR from awk variable */
+
void
set_FNR()
{
FNR = (long) FNR_node->var_value->numbr;
}
+/* set_NR --- update internal NR from awk variable */
+
void
set_NR()
{
NR = (long) NR_node->var_value->numbr;
}
-/*
- * This reads in a record from the input file
- */
+/* inrec --- This reads in a record from the input file */
+
static int
inrec(iop)
IOBUF *iop;
@@ -185,19 +202,21 @@ IOBUF *iop;
register int cnt;
int retval = 0;
- cnt = get_a_record(&begin, iop, *RS, NULL);
+ cnt = get_a_record(&begin, iop, RS->stptr[0], RS_regexp, NULL);
if (cnt == EOF) {
cnt = 0;
retval = 1;
} else {
NR += 1;
FNR += 1;
- set_record(begin, cnt, 1);
+ set_record(begin, cnt, TRUE);
}
return retval;
}
+/* iop_close --- close an open IOP */
+
static int
iop_close(iop)
IOBUF *iop;
@@ -224,14 +243,15 @@ IOBUF *iop;
}
/* Don't close standard files or else crufty code elsewhere will lose */
- if (iop->fd == fileno(stdin) ||
- iop->fd == fileno(stdout) ||
- iop->fd == fileno(stderr))
+ if (iop->fd == fileno(stdin)
+ || iop->fd == fileno(stdout)
+ || iop->fd == fileno(stderr))
ret = 0;
else
ret = close(iop->fd);
if (ret == -1)
- warning("close of fd %d failed (%s)", iop->fd, strerror(errno));
+ warning("close of fd %d (`%s') failed (%s)", iop->fd,
+ iop->name, strerror(errno));
if ((iop->flag & IOP_NO_FREE) == 0) {
/*
* be careful -- $0 may still reference the buffer even though
@@ -247,36 +267,40 @@ IOBUF *iop;
fields_arr[0]->stlen);
unref(fields_arr[0]);
fields_arr [0] = t;
- reset_record ();
+ reset_record();
}
free(iop->buf);
}
- free((char *)iop);
+ free((char *) iop);
}
return ret == -1 ? 1 : 0;
}
+/* do_input --- the main input processing loop */
+
void
do_input()
{
IOBUF *iop;
extern int exiting;
- (void) setjmp(filebuf);
+ (void) setjmp(filebuf); /* for `nextfile' */
- while ((iop = nextfile(0)) != NULL) {
+ while ((iop = nextfile(FALSE)) != NULL) {
if (inrec(iop) == 0)
while (interpret(expression_value) && inrec(iop) == 0)
continue;
+#ifdef C_ALLOCA
/* recover any space from C based alloca */
(void) alloca(0);
-
+#endif
if (exiting)
break;
}
}
-/* Redirection for printf and print commands */
+/* redirect --- Redirection for printf and print commands */
+
struct redirect *
redirect(tree, errflg)
NODE *tree;
@@ -317,11 +341,11 @@ int *errflg;
what = "<";
break;
default:
- fatal ("invalid tree type %d in redirect()", tree->type);
+ fatal("invalid tree type %d in redirect()", tree->type);
break;
}
tmp = tree_eval(tree->subnode);
- if (do_lint && ! (tmp->flags & STR))
+ if (do_lint && (tmp->flags & STR) == 0)
warning("expression in `%s' redirection only has numeric value",
what);
tmp = force_string(tmp);
@@ -336,7 +360,7 @@ int *errflg;
if (strlen(rp->value) == tmp->stlen
&& STREQN(rp->value, str, tmp->stlen)
&& ((rp->flag & ~(RED_NOBUF|RED_EOF)) == tflag
- || (outflag
+ || (outflag != 0
&& (rp->flag & (RED_FILE|RED_WRITE)) == outflag)))
break;
if (rp == NULL) {
@@ -352,7 +376,7 @@ int *errflg;
rp->pid = 0; /* unlikely that we're worried about init */
rp->status = 0;
/* maintain list in most-recently-used first order */
- if (red_head)
+ if (red_head != NULL)
red_head->prev = rp;
rp->prev = NULL;
rp->next = red_head;
@@ -360,7 +384,8 @@ int *errflg;
}
while (rp->fp == NULL && rp->iop == NULL) {
if (rp->flag & RED_EOF)
- /* encountered EOF on file or pipe -- must be cleared
+ /*
+ * encountered EOF on file or pipe -- must be cleared
* by explicit close() before reading more
*/
return rp;
@@ -369,7 +394,7 @@ int *errflg;
switch (tree->type) {
case Node_redirect_output:
mode = "w";
- if (rp->flag & RED_USED)
+ if ((rp->flag & RED_USED) != 0)
mode = "a";
break;
case Node_redirect_append:
@@ -427,7 +452,8 @@ int *errflg;
* complain. The shell will complain on
* a bad command to a pipe.
*/
- *errflg = errno;
+ if (errflg != NULL)
+ *errflg = errno;
if (tree->type == Node_redirect_output
|| tree->type == Node_redirect_append)
fatal("can't redirect %s `%s' (%s)",
@@ -443,6 +469,24 @@ int *errflg;
return rp;
}
+/* getredirect --- find the struct redirect for this file or pipe */
+
+struct redirect *
+getredirect(str, len)
+char *str;
+int len;
+{
+ struct redirect *rp;
+
+ for (rp = red_head; rp != NULL; rp = rp->next)
+ if (strlen(rp->value) == len && STREQN(rp->value, str, len))
+ return rp;
+
+ return NULL;
+}
+
+/* close_one --- temporarily close an open file to re-use the fd */
+
static void
close_one()
{
@@ -454,10 +498,10 @@ close_one()
rplast = rp;
/* now work back up through the list */
for (rp = rplast; rp != NULL; rp = rp->prev)
- if (rp->fp && (rp->flag & RED_FILE)) {
+ if (rp->fp != NULL && (rp->flag & RED_FILE) != 0) {
rp->flag |= RED_USED;
errno = 0;
- if (fclose(rp->fp))
+ if (/* do_lint && */ fclose(rp->fp) != 0)
warning("close of \"%s\" failed (%s).",
rp->value, strerror(errno));
rp->fp = NULL;
@@ -468,6 +512,8 @@ close_one()
fatal("too many pipes or input files open");
}
+/* do_close --- completely close an open file or pipe */
+
NODE *
do_close(tree)
NODE *tree;
@@ -485,11 +531,13 @@ NODE *tree;
if (rp == NULL) /* no match */
return tmp_number((AWKNUM) 0.0);
fflush(stdout); /* synchronize regular output */
- tmp = tmp_number((AWKNUM)close_redir(rp, 0));
+ tmp = tmp_number((AWKNUM) close_redir(rp, FALSE));
rp = NULL;
return tmp;
}
+/* close_redir --- close an open file or pipe */
+
static int
close_redir(rp, exitwarn)
register struct redirect *rp;
@@ -505,10 +553,10 @@ int exitwarn;
errno = 0;
if ((rp->flag & (RED_PIPE|RED_WRITE)) == (RED_PIPE|RED_WRITE))
status = pclose(rp->fp);
- else if (rp->fp)
+ else if (rp->fp != NULL)
status = fclose(rp->fp);
- else if (rp->iop) {
- if (rp->flag & RED_PIPE)
+ else if (rp->iop != NULL) {
+ if ((rp->flag & RED_PIPE) != 0)
status = gawk_pclose(rp);
else {
status = iop_close(rp->iop);
@@ -516,14 +564,14 @@ int exitwarn;
}
}
- what = (rp->flag & RED_PIPE) ? "pipe" : "file";
+ what = ((rp->flag & RED_PIPE) != 0) ? "pipe" : "file";
if (exitwarn)
- warning("no explicit close of %s \"%s\" provided",
+ warning("no explicit close of %s `%s' provided",
what, rp->value);
/* SVR4 awk checks and warns about status of close */
- if (status) {
+ if (do_lint && status != 0) {
char *s = strerror(errno);
/*
@@ -534,37 +582,41 @@ int exitwarn;
warning("failure status (%d) on %s close of \"%s\" (%s)",
status, what, rp->value, s);
- if (! do_unix) {
+ if (! do_traditional) {
/* set ERRNO too so that program can get at it */
unref(ERRNO_node->var_value);
ERRNO_node->var_value = make_string(s, strlen(s));
}
}
- if (rp->next)
+ if (rp->next != NULL)
rp->next->prev = rp->prev;
- if (rp->prev)
+ if (rp->prev != NULL)
rp->prev->next = rp->next;
else
red_head = rp->next;
free(rp->value);
- free((char *)rp);
+ free((char *) rp);
return status;
}
+/* flush_io --- flush all open output files */
+
int
-flush_io ()
+flush_io()
{
register struct redirect *rp;
int status = 0;
errno = 0;
if (fflush(stdout)) {
- warning("error writing standard output (%s).", strerror(errno));
+ warning("error writing standard output (%s)", strerror(errno));
status++;
}
if (fflush(stderr)) {
- warning("error writing standard error (%s).", strerror(errno));
+#ifndef __amigados__ /* HACK (fnf) */
+ warning("error writing standard error (%s)", strerror(errno));
status++;
+#endif
}
for (rp = red_head; rp != NULL; rp = rp->next)
/* flush both files and pipes, what the heck */
@@ -579,8 +631,10 @@ flush_io ()
return status;
}
+/* close_io --- close all open files, called when exiting */
+
int
-close_io ()
+close_io()
{
register struct redirect *rp;
register struct redirect *next;
@@ -589,8 +643,10 @@ close_io ()
errno = 0;
for (rp = red_head; rp != NULL; rp = next) {
next = rp->next;
- /* close_redir() will print a message if needed */
- /* if do_lint, warn about lack of explicit close */
+ /*
+ * close_redir() will print a message if needed
+ * if do_lint, warn about lack of explicit close
+ */
if (close_redir(rp, do_lint))
status++;
rp = NULL;
@@ -601,12 +657,14 @@ close_io ()
* them, we just flush them, and do that across the board.
*/
if (fflush(stdout)) {
- warning("error writing standard output (%s).", strerror(errno));
+ warning("error writing standard output (%s)", strerror(errno));
status++;
}
if (fflush(stderr)) {
- warning("error writing standard error (%s).", strerror(errno));
+#ifndef __amigados__ /* HACK (fnf) */
+ warning("error writing standard error (%s)", strerror(errno));
status++;
+#endif
}
return status;
}
@@ -652,7 +710,7 @@ int
devopen(name, mode)
const char *name, *mode;
{
- int openfd = INVALID_HANDLE;
+ int openfd;
const char *cp;
char *ptr;
int flag = 0;
@@ -661,17 +719,18 @@ const char *name, *mode;
flag = str2mode(mode);
- if (do_unix)
+ if (STREQ(name, "-"))
+ openfd = fileno(stdin);
+ else
+ openfd = INVALID_HANDLE;
+
+ if (do_traditional)
goto strictopen;
-#ifdef VMS
- if ((openfd = vms_devopen(name, flag)) >= 0)
+ if ((openfd = os_devopen(name, flag)) >= 0)
return openfd;
-#endif /* VMS */
- if (STREQ(name, "-"))
- openfd = fileno(stdin);
- else if (STREQN(name, "/dev/", 5) && stat((char *) name, &buf) == -1) {
+ if (STREQN(name, "/dev/", 5) && stat((char *) name, &buf) == -1) {
cp = name + 5;
if (STREQ(cp, "stdin") && (flag & O_RDONLY) == O_RDONLY)
@@ -682,7 +741,7 @@ const char *name, *mode;
openfd = fileno(stderr);
else if (STREQN(cp, "fd/", 3)) {
cp += 3;
- openfd = (int)strtod(cp, &ptr);
+ openfd = (int) strtod(cp, &ptr);
if (openfd <= INVALID_HANDLE || ptr == cp)
openfd = INVALID_HANDLE;
}
@@ -725,7 +784,7 @@ int allocate;
iop->flag = IOP_IS_INTERNAL;
}
-/* specfdopen --- open a fd special file */
+/* specfdopen --- open an fd special file */
static int
specfdopen(iop, name, mode)
@@ -738,38 +797,22 @@ const char *name, *mode;
fd = devopen(name, mode);
if (fd == INVALID_HANDLE)
return INVALID_HANDLE;
- tp = iop_alloc(fd);
- if (tp == NULL)
+ tp = iop_alloc(fd, name);
+ if (tp == NULL) {
+ /* don't leak fd's */
+ close(fd);
return INVALID_HANDLE;
+ }
*iop = *tp;
iop->flag |= IOP_NO_FREE;
free(tp);
return 0;
}
-/*
- * Following mess will improve in 2.16; this is written to avoid
- * long lines, avoid splitting #if with backslash, and avoid #elif
- * to maximize portability.
- */
-#ifndef GETPGRP_NOARG
-#if defined(__svr4__) || defined(BSD4_4) || defined(_POSIX_SOURCE)
-#define GETPGRP_NOARG
+#ifdef GETPGRP_VOID
+#define getpgrp_arg() /* nothing */
#else
-#if defined(i860) || defined(_AIX) || defined(hpux) || defined(VMS)
-#define GETPGRP_NOARG
-#else
-#if defined(OS2) || defined(MSDOS) || defined(AMIGA) || defined(atarist)
-#define GETPGRP_NOARG
-#endif
-#endif
-#endif
-#endif
-
-#ifdef GETPGRP_NOARG
-#define getpgrp_ARG /* nothing */
-#else
-#define getpgrp_ARG getpid()
+#define getpgrp_arg() getpid()
#endif
/* pidopen --- "open" /dev/pid, /dev/ppid, and /dev/pgrpid */
@@ -783,13 +826,13 @@ const char *name, *mode;
int i;
if (name[6] == 'g')
- sprintf(tbuf, "%d\n", getpgrp( getpgrp_ARG ));
+ sprintf(tbuf, "%d\n", getpgrp(getpgrp_arg()));
else if (name[6] == 'i')
sprintf(tbuf, "%d\n", getpid());
else
sprintf(tbuf, "%d\n", getppid());
i = strlen(tbuf);
- spec_setup(iop, i, 1);
+ spec_setup(iop, i, TRUE);
strcpy(iop->buf, tbuf);
return 0;
}
@@ -802,7 +845,7 @@ const char *name, *mode;
* $2 = geteuid()
* $3 = getgid()
* $4 = getegid()
- * If multiple groups are supported, the $5 through $NF are the
+ * If multiple groups are supported, then $5 through $NF are the
* supplementary group set.
*/
@@ -814,11 +857,7 @@ const char *name, *mode;
char tbuf[BUFSIZ], *cp;
int i;
#if defined(NGROUPS_MAX) && NGROUPS_MAX > 0
-#if defined(atarist) || defined(__svr4__) || defined(__osf__) || defined(__bsdi__)
- gid_t groupset[NGROUPS_MAX];
-#else
- int groupset[NGROUPS_MAX];
-#endif
+ GETGROUPS_T groupset[NGROUPS_MAX];
int ngroups;
#endif
@@ -832,16 +871,15 @@ const char *name, *mode;
for (i = 0; i < ngroups; i++) {
*cp++ = ' ';
- sprintf(cp, "%d", (int)groupset[i]);
+ sprintf(cp, "%d", (int) groupset[i]);
cp += strlen(cp);
}
#endif
*cp++ = '\n';
*cp++ = '\0';
-
i = strlen(tbuf);
- spec_setup(iop, i, 1);
+ spec_setup(iop, i, TRUE);
strcpy(iop->buf, tbuf);
return 0;
}
@@ -859,7 +897,7 @@ const char *name, *mode;
static struct internal {
const char *name;
int compare;
- int (*fp) P((IOBUF*,const char *,const char *));
+ int (*fp) P((IOBUF *, const char *, const char *));
IOBUF iob;
} table[] = {
{ "/dev/fd/", 8, specfdopen },
@@ -875,11 +913,10 @@ const char *name, *mode;
flag = str2mode(mode);
- if (do_unix)
- goto strictopen;
-
if (STREQ(name, "-"))
openfd = fileno(stdin);
+ else if (do_traditional)
+ goto strictopen;
else if (STREQN(name, "/dev/", 5) && stat((char *) name, &buf) == -1) {
int i;
@@ -888,7 +925,7 @@ const char *name, *mode;
iop = & table[i].iob;
if (iop->buf != NULL) {
- spec_setup(iop, 0, 0);
+ spec_setup(iop, 0, FALSE);
return iop;
} else if ((*table[i].fp)(iop, name, mode) == 0)
return iop;
@@ -907,17 +944,19 @@ strictopen:
if (openfd != INVALID_HANDLE && fstat(openfd, &buf) > 0)
if ((buf.st_mode & S_IFMT) == S_IFDIR)
fatal("file `%s' is a directory", name);
- iop = iop_alloc(openfd);
+ iop = iop_alloc(openfd, name);
return iop;
}
-#ifndef PIPES_SIMULATED
- /* real pipes */
+#ifndef PIPES_SIMULATED /* real pipes */
+
+/* wait_any --- wait for a child process, close associated pipe */
+
static int
wait_any(interesting)
int interesting; /* pid of interest, if any */
{
- SIGTYPE (*hstat)(), (*istat)(), (*qstat)();
+ RETSIGTYPE (*hstat)(), (*istat)(), (*qstat)();
int pid;
int status = 0;
struct redirect *redp;
@@ -927,10 +966,10 @@ int interesting; /* pid of interest, if any */
istat = signal(SIGINT, SIG_IGN);
qstat = signal(SIGQUIT, SIG_IGN);
for (;;) {
-#ifdef NeXT
- pid = wait((union wait *)&status);
-#else
+#ifdef HAVE_SYS_WAIT_H /* Posix compatible sys/wait.h */
pid = wait(&status);
+#else
+ pid = wait((union wait *)&status);
#endif /* NeXT */
if (interesting && pid == interesting) {
break;
@@ -939,13 +978,13 @@ int interesting; /* pid of interest, if any */
if (pid == redp->pid) {
redp->pid = -1;
redp->status = status;
- if (redp->fp) {
+ if (redp->fp != NULL) {
pclose(redp->fp);
- redp->fp = 0;
+ redp->fp = NULL;
}
- if (redp->iop) {
+ if (redp->iop != NULL) {
(void) iop_close(redp->iop);
- redp->iop = 0;
+ redp->iop = NULL;
}
break;
}
@@ -959,6 +998,8 @@ int interesting; /* pid of interest, if any */
return(status);
}
+/* gawk_popen --- open an IOBUF on a child process */
+
static IOBUF *
gawk_popen(cmd, rp)
char *cmd;
@@ -967,7 +1008,8 @@ struct redirect *rp;
int p[2];
register int pid;
- /* used to wait for any children to synchronize input and output,
+ /*
+ * used to wait for any children to synchronize input and output,
* but this could cause gawk to hang when it is started in a pipeline
* and thus has a child process feeding it input (shell dependant)
*/
@@ -983,10 +1025,7 @@ struct redirect *rp;
fatal("dup of pipe failed (%s)", strerror(errno));
if (close(p[0]) == -1 || close(p[1]) == -1)
fatal("close of pipe failed (%s)", strerror(errno));
- if (close(0) == -1)
- fatal("close of stdin in child failed (%s)",
- strerror(errno));
- execl("/bin/sh", "sh", "-c", cmd, 0);
+ execl("/bin/sh", "sh", "-c", cmd, NULL);
_exit(127);
}
if (pid == -1)
@@ -994,9 +1033,14 @@ struct redirect *rp;
rp->pid = pid;
if (close(p[1]) == -1)
fatal("close of pipe failed (%s)", strerror(errno));
- return (rp->iop = iop_alloc(p[0]));
+ rp->iop = iop_alloc(p[0], cmd);
+ if (rp->iop == NULL)
+ (void) close(p[0]);
+ return (rp->iop);
}
+/* gawk_pclose --- close an open child pipe */
+
static int
gawk_pclose(rp)
struct redirect *rp;
@@ -1013,10 +1057,16 @@ struct redirect *rp;
}
#else /* PIPES_SIMULATED */
- /* use temporary file rather than pipe */
- /* except if popen() provides real pipes too */
+
+/*
+ * use temporary file rather than pipe
+ * except if popen() provides real pipes too
+ */
#if defined(VMS) || defined(OS2) || defined (MSDOS)
+
+/* gawk_popen --- open an IOBUF on a child process */
+
static IOBUF *
gawk_popen(cmd, rp)
char *cmd;
@@ -1026,53 +1076,66 @@ struct redirect *rp;
if ((current = popen(cmd, "r")) == NULL)
return NULL;
- return (rp->iop = iop_alloc(fileno(current)));
+ rp->iop = iop_alloc(fileno(current), cmd);
+ if (rp->iop == NULL) {
+ (void) fclose(current);
+ current = NULL;
+ }
+ rp->ifp = current;
+ return (rp->iop);
}
+/* gawk_pclose --- close an open child pipe */
+
static int
gawk_pclose(rp)
struct redirect *rp;
{
int rval, aval, fd = rp->iop->fd;
- FILE *kludge = fdopen(fd, (char *) "r"); /* pclose needs FILE* w/ right fileno */
rp->iop->fd = dup(fd); /* kludge to allow close() + pclose() */
rval = iop_close(rp->iop);
rp->iop = NULL;
- aval = pclose(kludge);
+ aval = pclose(rp->ifp);
+ rp->ifp = NULL;
return (rval < 0 ? rval : aval);
}
-#else /* VMS || OS2 || MSDOS */
+#else /* not (VMS || OS2 || MSDOS) */
-static
-struct {
+static struct pipeinfo {
char *command;
char *name;
} pipes[_NFILE];
+/* gawk_popen --- open an IOBUF on a child process */
+
static IOBUF *
gawk_popen(cmd, rp)
char *cmd;
struct redirect *rp;
{
- extern char *strdup(const char *);
+ extern char *strdup P((const char *));
int current;
char *name;
static char cmdbuf[256];
- /* get a name to use. */
+ /* get a name to use */
if ((name = tempnam(".", "pip")) == NULL)
return NULL;
- sprintf(cmdbuf,"%s > %s", cmd, name);
+ sprintf(cmdbuf, "%s > %s", cmd, name);
system(cmdbuf);
- if ((current = open(name,O_RDONLY)) == INVALID_HANDLE)
+ if ((current = open(name, O_RDONLY)) == INVALID_HANDLE)
return NULL;
pipes[current].name = name;
pipes[current].command = strdup(cmd);
- rp->iop = iop_alloc(current);
- return (rp->iop = iop_alloc(current));
+ rp->iop = iop_alloc(current, name);
+ if (rp->iop == NULL)
+ (void) close(current);
+ return (rp->iop);
}
+/* gawk_pclose --- close an open child pipe */
+
static int
gawk_pclose(rp)
struct redirect *rp;
@@ -1092,10 +1155,12 @@ struct redirect *rp;
free(pipes[cur].command);
return rval;
}
-#endif /* VMS || OS2 || MSDOS */
+#endif /* not (VMS || OS2 || MSDOS) */
#endif /* PIPES_SIMULATED */
+/* do_getline --- read in a line, into var and with redirection, as needed */
+
NODE *
do_getline(tree)
NODE *tree;
@@ -1108,7 +1173,7 @@ NODE *tree;
while (cnt == EOF) {
if (tree->rnode == NULL) { /* no redirection */
- iop = nextfile(0);
+ iop = nextfile(FALSE);
if (iop == NULL) /* end of input */
return tmp_number((AWKNUM) 0.0);
} else {
@@ -1116,7 +1181,7 @@ NODE *tree;
rp = redirect(tree->rnode, &redir_error);
if (rp == NULL && redir_error) { /* failed redirect */
- if (! do_unix) {
+ if (! do_traditional) {
s = strerror(redir_error);
unref(ERRNO_node->var_value);
@@ -1130,22 +1195,24 @@ NODE *tree;
return tmp_number((AWKNUM) 0.0);
}
errcode = 0;
- cnt = get_a_record(&s, iop, *RS, & errcode);
- if (! do_unix && errcode != 0) {
- s = strerror(errcode);
+ cnt = get_a_record(&s, iop, RS->stptr[0], RS_regexp, &errcode);
+ if (errcode != 0) {
+ if (! do_traditional) {
+ s = strerror(errcode);
- unref(ERRNO_node->var_value);
- ERRNO_node->var_value = make_string(s, strlen(s));
+ unref(ERRNO_node->var_value);
+ ERRNO_node->var_value = make_string(s, strlen(s));
+ }
return tmp_number((AWKNUM) -1.0);
}
if (cnt == EOF) {
- if (rp) {
+ if (rp != NULL) {
/*
* Don't do iop_close() here if we are
* reading from a pipe; otherwise
* gawk_pclose will not be called.
*/
- if (!(rp->flag & RED_PIPE)) {
+ if ((rp->flag & RED_PIPE) == 0) {
(void) iop_close(iop);
rp->iop = NULL;
}
@@ -1154,12 +1221,12 @@ NODE *tree;
} else
continue; /* try another file */
}
- if (!rp) {
- NR += 1;
- FNR += 1;
+ if (rp == NULL) {
+ NR++;
+ FNR++;
}
if (tree->lnode == NULL) /* no optional var. */
- set_record(s, cnt, 1);
+ set_record(s, cnt, TRUE);
else { /* assignment to variable */
Func_ptr after_assign = NULL;
NODE **lhs;
@@ -1169,21 +1236,23 @@ NODE *tree;
*lhs = make_string(s, strlen(s));
(*lhs)->flags |= MAYBE_NUM;
/* we may have to regenerate $0 here! */
- if (after_assign)
+ if (after_assign != NULL)
(*after_assign)();
}
}
return tmp_number((AWKNUM) 1.0);
}
+/* pathopen --- pathopen with default file extension handling */
+
int
-pathopen (file)
+pathopen(file)
const char *file;
{
int fd = do_pathopen(file);
#ifdef DEFAULT_FILETYPE
- if (! do_unix && fd <= INVALID_HANDLE) {
+ if (! do_traditional && fd <= INVALID_HANDLE) {
char *file_awk;
int save = errno;
#ifdef VMS
@@ -1208,12 +1277,14 @@ const char *file;
return fd;
}
+/* do_pathopen --- search $AWKPATH for source file */
+
static int
-do_pathopen (file)
+do_pathopen(file)
const char *file;
{
- static const char *savepath = DEFPATH; /* defined in config.h */
- static int first = 1;
+ static const char *savepath = NULL;
+ static int first = TRUE;
const char *awkpath;
char *cp, trypath[BUFSIZ];
int fd;
@@ -1221,70 +1292,410 @@ const char *file;
if (STREQ(file, "-"))
return (0);
- if (do_unix)
+ if (do_traditional)
return (devopen(file, "r"));
if (first) {
- first = 0;
- if ((awkpath = getenv ("AWKPATH")) != NULL && *awkpath)
+ first = FALSE;
+ if ((awkpath = getenv("AWKPATH")) != NULL && *awkpath)
savepath = awkpath; /* used for restarting */
+ else
+ savepath = defpath;
}
awkpath = savepath;
/* some kind of path name, no search */
-#ifdef VMS /* (strchr not equal implies either or both not NULL) */
- if (strchr(file, ':') != strchr(file, ']')
- || strchr(file, '>') != strchr(file, '/'))
-#else /*!VMS*/
-#if defined(MSDOS) || defined(OS2)
- if (strchr(file, '/') != strchr(file, '\\')
- || strchr(file, ':') != NULL)
-#else
- if (strchr(file, '/') != NULL)
-#endif /*MSDOS*/
-#endif /*VMS*/
+ if (ispath(file))
return (devopen(file, "r"));
-#if defined(MSDOS) || defined(OS2)
- _searchenv(file, "AWKPATH", trypath);
- if (trypath[0] == '\0')
- _searchenv(file, "PATH", trypath);
- return (trypath[0] == '\0') ? 0 : devopen(trypath, "r");
-#else
do {
trypath[0] = '\0';
/* this should take into account limits on size of trypath */
- for (cp = trypath; *awkpath && *awkpath != ENVSEP; )
+ for (cp = trypath; *awkpath && *awkpath != envsep; )
*cp++ = *awkpath++;
if (cp != trypath) { /* nun-null element in path */
/* add directory punctuation only if needed */
-#ifdef VMS
- if (strchr(":]>/", *(cp-1)) == NULL)
-#else
-#if defined(MSDOS) || defined(OS2)
- if (strchr(":\\/", *(cp-1)) == NULL)
-#else
- if (*(cp-1) != '/')
-#endif
-#endif
+ if (! isdirpunct(*(cp-1)))
*cp++ = '/';
/* append filename */
- strcpy (cp, file);
+ strcpy(cp, file);
} else
- strcpy (trypath, file);
- if ((fd = devopen(trypath, "r")) >= 0)
+ strcpy(trypath, file);
+ if ((fd = devopen(trypath, "r")) > INVALID_HANDLE)
return (fd);
/* no luck, keep going */
- if(*awkpath == ENVSEP && awkpath[1] != '\0')
+ if(*awkpath == envsep && awkpath[1] != '\0')
awkpath++; /* skip colon */
- } while (*awkpath);
+ } while (*awkpath != '\0');
/*
- * You might have one of the awk
- * paths defined, WITHOUT the current working directory in it.
- * Therefore try to open the file in the current directory.
+ * You might have one of the awk paths defined, WITHOUT the current
+ * working directory in it. Therefore try to open the file in the
+ * current directory.
*/
return (devopen(file, "r"));
+}
+
+#ifdef TEST
+int bufsize = 8192;
+
+void
+fatal(s)
+char *s;
+{
+ printf("%s\n", s);
+ exit(1);
+}
#endif
+
+/* iop_alloc --- allocate an IOBUF structure for an open fd */
+
+IOBUF *
+iop_alloc(fd, name)
+int fd;
+const char *name;
+{
+ IOBUF *iop;
+ struct stat sbuf;
+
+ if (fd == INVALID_HANDLE)
+ return NULL;
+ emalloc(iop, IOBUF *, sizeof(IOBUF), "iop_alloc");
+ iop->flag = 0;
+ if (isatty(fd))
+ iop->flag |= IOP_IS_TTY;
+ iop->size = optimal_bufsize(fd, & sbuf);
+ if (do_lint && S_ISREG(sbuf.st_mode) && sbuf.st_size == 0)
+ warning("data file `%s' is empty", name);
+ iop->secsiz = -2;
+ errno = 0;
+ iop->fd = fd;
+ iop->off = iop->buf = NULL;
+ iop->cnt = 0;
+ iop->name = name;
+ return iop;
+}
+
+/*
+ * get_a_record:
+ * Get the next record. Uses a "split buffer" where the latter part is
+ * the normal read buffer and the head part is an "overflow" area that is used
+ * when a record spans the end of the normal buffer, in which case the first
+ * part of the record is copied into the overflow area just before the
+ * normal buffer. Thus, the eventual full record can be returned as a
+ * contiguous area of memory with a minimum of copying. The overflow area
+ * is expanded as needed, so that records are unlimited in length.
+ * We also mark both the end of the buffer and the end of the read() with
+ * a sentinel character (the current record separator) so that the inside
+ * loop can run as a single test.
+ *
+ * Note that since we know or can compute the end of the read and the end
+ * of the buffer, the sentinel character does not get in the way of regexp
+ * based searching, since we simply search up to that character, but not
+ * including it.
+ */
+
+static int
+get_a_record(out, iop, grRS, RSre, errcode)
+char **out; /* pointer to pointer to data */
+IOBUF *iop; /* input IOP */
+register int grRS; /* first char in RS->stptr */
+Regexp *RSre; /* regexp for RS */
+int *errcode; /* pointer to error variable */
+{
+ register char *bp = iop->off;
+ char *bufend;
+ char *start = iop->off; /* beginning of record */
+ char rs;
+ static Regexp *RS_null_re = NULL;
+ Regexp *rsre = NULL;
+ int continuing = FALSE; /* used for re matching */
+ int onecase;
+
+#define set_RT_to_null() \
+ (void)(! do_traditional && (unref(RT_node->var_value), \
+ RT_node->var_value = Nnull_string))
+
+#define set_RT(str, len) \
+ (void)(! do_traditional && (unref(RT_node->var_value), \
+ RT_node->var_value = make_string(str, len)))
+
+ /* first time through */
+ if (RS_null_re == NULL) {
+ RS_null_re = make_regexp("\n\n+", 3, TRUE, TRUE);
+ if (RS_null_re == NULL)
+ fatal("internal error: file `%s', line %d\n",
+ __FILE__, __LINE__);
+ }
+
+ if (iop->cnt == EOF) { /* previous read hit EOF */
+ *out = NULL;
+ set_RT_to_null();
+ return EOF;
+ }
+
+ if (grRS == FALSE) /* special case: RS == "" */
+ rs = '\n';
+ else
+ rs = (char) grRS;
+
+ onecase = (IGNORECASE && isalpha(rs));
+ if (onecase)
+ rs = casetable[rs];
+
+ /* set up sentinel */
+ if (iop->buf) {
+ bufend = iop->buf + iop->size + iop->secsiz;
+ *bufend = rs; /* add sentinel to buffer */
+ } else
+ bufend = NULL;
+
+ for (;;) { /* break on end of record, read error or EOF */
+/* buffer mgmt, chunk #1 */
+ /*
+ * Following code is entered on the first call of this routine
+ * for a new iop, or when we scan to the end of the buffer.
+ * In the latter case, we copy the current partial record to
+ * the space preceding the normal read buffer. If necessary,
+ * we expand this space. This is done so that we can return
+ * the record as a contiguous area of memory.
+ */
+ if ((iop->flag & IOP_IS_INTERNAL) == 0 && bp >= bufend) {
+ char *oldbuf = NULL;
+ char *oldsplit = iop->buf + iop->secsiz;
+ long len; /* record length so far */
+
+ len = bp - start;
+ if (len > iop->secsiz) {
+ /* expand secondary buffer */
+ if (iop->secsiz == -2)
+ iop->secsiz = 256;
+ while (len > iop->secsiz)
+ iop->secsiz *= 2;
+ oldbuf = iop->buf;
+ emalloc(iop->buf, char *,
+ iop->size+iop->secsiz+2, "get_a_record");
+ bufend = iop->buf + iop->size + iop->secsiz;
+ *bufend = rs;
+ }
+ if (len > 0) {
+ char *newsplit = iop->buf + iop->secsiz;
+
+ if (start < oldsplit) {
+ memcpy(newsplit - len, start,
+ oldsplit - start);
+ memcpy(newsplit - (bp - oldsplit),
+ oldsplit, bp - oldsplit);
+ } else
+ memcpy(newsplit - len, start, len);
+ }
+ bp = iop->end = iop->off = iop->buf + iop->secsiz;
+ start = bp - len;
+ if (oldbuf != NULL) {
+ free(oldbuf);
+ oldbuf = NULL;
+ }
+ }
+/* buffer mgmt, chunk #2 */
+ /*
+ * Following code is entered whenever we have no more data to
+ * scan. In most cases this will read into the beginning of
+ * the main buffer, but in some cases (terminal, pipe etc.)
+ * we may be doing smallish reads into more advanced positions.
+ */
+ if (bp >= iop->end) {
+ if ((iop->flag & IOP_IS_INTERNAL) != 0) {
+ iop->cnt = EOF;
+ break;
+ }
+ iop->cnt = read(iop->fd, iop->end, bufend - iop->end);
+ if (iop->cnt == -1) {
+ if (! do_traditional && errcode != NULL) {
+ *errcode = errno;
+ iop->cnt = EOF;
+ break;
+ } else
+ fatal("error reading input file `%s': %s",
+ iop->name, strerror(errno));
+ } else if (iop->cnt == 0) {
+ /*
+ * hit EOF before matching RS, so end
+ * the record and set RT to ""
+ */
+ iop->cnt = EOF;
+ /* see comments below about this test */
+ if (! continuing) {
+ set_RT_to_null();
+ break;
+ }
+ }
+ if (iop->cnt != EOF) {
+ iop->end += iop->cnt;
+ *iop->end = rs; /* reset the sentinel */
+ }
+ }
+/* buffers are now setup and filled with data */
+/* search for RS, #1, regexp based, or RS = "" */
+ /*
+ * Attempt to simplify the code a bit. The case where
+ * RS = "" can also be described by a regexp, RS = "\n\n+".
+ * The buffer managment and searching code can thus now
+ * use a common case (the one for regexps) both when RS is
+ * a regexp, and when RS = "". This particularly benefits
+ * us for keeping track of how many newlines were matched
+ * in order to set RT.
+ */
+ if (! do_traditional && RSre != NULL) /* regexp */
+ rsre = RSre;
+ else if (grRS == FALSE) /* RS = "" */
+ rsre = RS_null_re;
+ else
+ rsre = NULL;
+
+ /*
+ * Look for regexp match of RS. Non-match conditions are:
+ * 1. No match at all
+ * 2. Match of a null string
+ * 3. Match ends at exact end of buffer
+ * Number 3 is subtle; we have to add more to the buffer
+ * in case the match would have extended further into the
+ * file, since regexp match by definition always matches the
+ * longest possible match.
+ *
+ * It is even more subtle than you might think. Suppose
+ * the re matches at exactly the end of file. We don't know
+ * that until we try to add more to the buffer. Thus, we
+ * set a flag to indicate, that if eof really does happen,
+ * don't break early.
+ */
+ continuing = FALSE;
+ if (rsre != NULL) {
+ again:
+ /* cases 1 and 2 are simple, just keep going */
+ if (research(rsre, start, 0, iop->end - start, TRUE) == -1
+ || RESTART(rsre, start) == REEND(rsre, start)) {
+ bp = iop->end;
+ continue;
+ }
+ /* case 3, regex match at exact end */
+ if (start + REEND(rsre, start) >= iop->end) {
+ if (iop->cnt != EOF) {
+ bp = iop->end;
+ continuing = TRUE;
+ continue;
+ }
+ }
+ /* got a match! */
+ /*
+ * Leading newlines at the beginning of the file
+ * should be ignored. Whew!
+ */
+ if (grRS == FALSE && RESTART(rsre, start) == 0) {
+ start += REEND(rsre, start);
+ goto again;
+ }
+ bp = start + RESTART(rsre, start);
+ set_RT(bp, REEND(rsre, start) - RESTART(rsre, start));
+ *bp = '\0';
+ iop->off = start + REEND(rsre, start);
+ break;
+ }
+/* search for RS, #2, RS = <single char> */
+ if (onecase) {
+ while (casetable[*bp++] != rs)
+ continue;
+ } else {
+ while (*bp++ != rs)
+ continue;
+ }
+ set_RT(bp - 1, 1);
+
+ if (bp <= iop->end)
+ break;
+ else
+ bp--;
+
+ if ((iop->flag & IOP_IS_INTERNAL) != 0)
+ iop->cnt = bp - start;
+ }
+ if (iop->cnt == EOF
+ && (((iop->flag & IOP_IS_INTERNAL) != 0) || start == bp)) {
+ *out = NULL;
+ set_RT_to_null();
+ return EOF;
+ }
+
+ if (do_traditional || rsre == NULL) {
+ char *bstart;
+
+ bstart = iop->off = bp;
+ bp--;
+ if (onecase ? casetable[*bp] != rs : *bp != rs) {
+ bp++;
+ bstart = bp;
+ }
+ *bp = '\0';
+ } else if (grRS == FALSE && iop->cnt == EOF) {
+ /*
+ * special case, delete trailing newlines,
+ * should never be more than one.
+ */
+ while (bp[-1] == '\n')
+ bp--;
+ *bp = '\0';
+ }
+
+ *out = start;
+ return bp - start;
+}
+
+#ifdef TEST
+int
+main(argc, argv)
+int argc;
+char *argv[];
+{
+ IOBUF *iop;
+ char *out;
+ int cnt;
+ char rs[2];
+
+ rs[0] = '\0';
+ if (argc > 1)
+ bufsize = atoi(argv[1]);
+ if (argc > 2)
+ rs[0] = *argv[2];
+ iop = iop_alloc(0, "stdin");
+ while ((cnt = get_a_record(&out, iop, rs[0], NULL, NULL)) > 0) {
+ fwrite(out, 1, cnt, stdout);
+ fwrite(rs, 1, 1, stdout);
+ }
+ return 0;
+}
+#endif
+
+/* set_RS --- update things as appropriate when RS is set */
+
+void
+set_RS()
+{
+ static NODE *save_rs = NULL;
+
+ if (save_rs && cmp_nodes(RS_node->var_value, save_rs) == 0)
+ return;
+ unref(save_rs);
+ save_rs = dupnode(RS_node->var_value);
+ RS_is_null = FALSE;
+ RS = force_string(RS_node->var_value);
+ if (RS_regexp != NULL) {
+ refree(RS_regexp);
+ RS_regexp = NULL;
+ }
+ if (RS->stlen == 0)
+ RS_is_null = TRUE;
+ else if (RS->stlen > 1)
+ RS_regexp = make_regexp(RS->stptr, RS->stlen, IGNORECASE, TRUE);
+
+ set_FS_if_not_FIELDWIDTHS();
}
diff --git a/iop.c b/iop.c
deleted file mode 100644
index af6ecbc6..00000000
--- a/iop.c
+++ /dev/null
@@ -1,324 +0,0 @@
-/*
- * iop.c - do i/o related things.
- */
-
-/*
- * Copyright (C) 1986, 1988, 1989, 1991-1995 the Free Software Foundation, Inc.
- *
- * This file is part of GAWK, the GNU implementation of the
- * AWK Progamming Language.
- *
- * GAWK is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * GAWK is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GAWK; see the file COPYING. If not, write to
- * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include "awk.h"
-
-#ifndef atarist
-#define INVALID_HANDLE (-1)
-#else
-#include <stddef.h>
-#include <fcntl.h>
-#define INVALID_HANDLE (__SMALLEST_VALID_HANDLE - 1)
-#endif /* atarist */
-
-
-#ifdef TEST
-int bufsize = 8192;
-
-void
-fatal(s)
-char *s;
-{
- printf("%s\n", s);
- exit(1);
-}
-#endif
-
-int
-optimal_bufsize(fd)
-int fd;
-{
- struct stat stb;
-
-#ifdef VMS
- /*
- * These values correspond with the RMS multi-block count used by
- * vms_open() in vms/vms_misc.c.
- */
- if (isatty(fd) > 0)
- return BUFSIZ;
- else if (fstat(fd, &stb) < 0)
- return 8*512; /* conservative in case of DECnet access */
- else
- return 32*512;
-
-#else
- /*
- * System V doesn't have the file system block size in the
- * stat structure. So we have to make some sort of reasonable
- * guess. We use stdio's BUFSIZ, since that is what it was
- * meant for in the first place.
- */
-#ifdef BLKSIZE_MISSING
-#define DEFBLKSIZE BUFSIZ
-#else
-#define DEFBLKSIZE (stb.st_blksize ? stb.st_blksize : BUFSIZ)
-#endif
-
-#ifdef TEST
- return bufsize;
-#else
-#ifndef atarist
- if (isatty(fd))
-#else
- /*
- * On ST redirected stdin does not have a name attached
- * (this could be hard to do to) and fstat would fail
- */
- if (0 == fd || isatty(fd))
-#endif /*atarist */
- return BUFSIZ;
-#ifndef BLKSIZE_MISSING
- /* VMS POSIX 1.0: st_blksize is never assigned a value, so zero it */
- stb.st_blksize = 0;
-#endif
- if (fstat(fd, &stb) == -1)
- fatal("can't stat fd %d (%s)", fd, strerror(errno));
- if (lseek(fd, (off_t)0, 0) == -1) /* not a regular file */
- return DEFBLKSIZE;
- if (stb.st_size > 0 && stb.st_size < DEFBLKSIZE) /* small file */
- return (stb.st_size);
- return (DEFBLKSIZE);
-#endif /*! TEST */
-#endif /*! VMS */
-}
-
-IOBUF *
-iop_alloc(fd)
-int fd;
-{
- IOBUF *iop;
-
- if (fd == INVALID_HANDLE)
- return NULL;
- emalloc(iop, IOBUF *, sizeof(IOBUF), "iop_alloc");
- iop->flag = 0;
- if (isatty(fd))
- iop->flag |= IOP_IS_TTY;
- iop->size = optimal_bufsize(fd);
- iop->secsiz = -2;
- errno = 0;
- iop->fd = fd;
- iop->off = iop->buf = NULL;
- iop->cnt = 0;
- return iop;
-}
-
-/*
- * Get the next record. Uses a "split buffer" where the latter part is
- * the normal read buffer and the head part is an "overflow" area that is used
- * when a record spans the end of the normal buffer, in which case the first
- * part of the record is copied into the overflow area just before the
- * normal buffer. Thus, the eventual full record can be returned as a
- * contiguous area of memory with a minimum of copying. The overflow area
- * is expanded as needed, so that records are unlimited in length.
- * We also mark both the end of the buffer and the end of the read() with
- * a sentinel character (the current record separator) so that the inside
- * loop can run as a single test.
- */
-int
-get_a_record(out, iop, grRS, errcode)
-char **out;
-IOBUF *iop;
-register int grRS;
-int *errcode;
-{
- register char *bp = iop->off;
- char *bufend;
- char *start = iop->off; /* beginning of record */
- char rs;
- int saw_newline = 0, eat_whitespace = 0; /* used iff grRS==0 */
-
- if (iop->cnt == EOF) { /* previous read hit EOF */
- *out = NULL;
- return EOF;
- }
-
- if (grRS == 0) { /* special case: grRS == "" */
- rs = '\n';
- } else
- rs = (char) grRS;
-
- /* set up sentinel */
- if (iop->buf) {
- bufend = iop->buf + iop->size + iop->secsiz;
- *bufend = rs;
- } else
- bufend = NULL;
-
- for (;;) { /* break on end of record, read error or EOF */
-
- /* Following code is entered on the first call of this routine
- * for a new iop, or when we scan to the end of the buffer.
- * In the latter case, we copy the current partial record to
- * the space preceding the normal read buffer. If necessary,
- * we expand this space. This is done so that we can return
- * the record as a contiguous area of memory.
- */
- if ((iop->flag & IOP_IS_INTERNAL) == 0 && bp >= bufend) {
- char *oldbuf = NULL;
- char *oldsplit = iop->buf + iop->secsiz;
- long len; /* record length so far */
-
- len = bp - start;
- if (len > iop->secsiz) {
- /* expand secondary buffer */
- if (iop->secsiz == -2)
- iop->secsiz = 256;
- while (len > iop->secsiz)
- iop->secsiz *= 2;
- oldbuf = iop->buf;
- emalloc(iop->buf, char *,
- iop->size+iop->secsiz+2, "get_a_record");
- bufend = iop->buf + iop->size + iop->secsiz;
- *bufend = rs;
- }
- if (len > 0) {
- char *newsplit = iop->buf + iop->secsiz;
-
- if (start < oldsplit) {
- memcpy(newsplit - len, start,
- oldsplit - start);
- memcpy(newsplit - (bp - oldsplit),
- oldsplit, bp - oldsplit);
- } else
- memcpy(newsplit - len, start, len);
- }
- bp = iop->end = iop->off = iop->buf + iop->secsiz;
- start = bp - len;
- if (oldbuf) {
- free(oldbuf);
- oldbuf = NULL;
- }
- }
- /* Following code is entered whenever we have no more data to
- * scan. In most cases this will read into the beginning of
- * the main buffer, but in some cases (terminal, pipe etc.)
- * we may be doing smallish reads into more advanced positions.
- */
- if (bp >= iop->end) {
- if ((iop->flag & IOP_IS_INTERNAL) != 0) {
- iop->cnt = EOF;
- break;
- }
- iop->cnt = read(iop->fd, iop->end, bufend - iop->end);
- if (iop->cnt == -1) {
- if (! do_unix && errcode != NULL) {
- *errcode = errno;
- iop->cnt = EOF;
- break;
- } else
- fatal("error reading input: %s",
- strerror(errno));
- } else if (iop->cnt == 0) {
- iop->cnt = EOF;
- break;
- }
- iop->end += iop->cnt;
- *iop->end = rs;
- }
- if (grRS == 0) {
- extern int default_FS;
-
- if (default_FS && (bp == start || eat_whitespace)) {
- while (bp < iop->end
- && (*bp == ' ' || *bp == '\t' || *bp == '\n'))
- bp++;
- if (bp == iop->end) {
- eat_whitespace = 1;
- continue;
- } else
- eat_whitespace = 0;
- start = bp; /* skip leading white space */
- }
- if (saw_newline && *bp == rs) {
- bp++;
- break;
- }
- saw_newline = 0;
- }
-
- while (*bp++ != rs)
- ;
-
- if (bp <= iop->end) {
- if (grRS == 0)
- saw_newline = 1;
- else
- break;
- } else
- bp--;
-
- if ((iop->flag & IOP_IS_INTERNAL) != 0)
- iop->cnt = bp - start;
- }
- if (iop->cnt == EOF
- && (((iop->flag & IOP_IS_INTERNAL) != 0) || start == bp)) {
- *out = NULL;
- return EOF;
- }
-
- iop->off = bp;
- bp--;
- if (*bp != rs)
- bp++;
- *bp = '\0';
- if (grRS == 0) {
- /* there could be more newlines left, clean 'em out now */
- while (iop->off <= iop->end && *(iop->off) == rs)
- (iop->off)++;
-
- if (*--bp == rs)
- *bp = '\0';
- else
- bp++;
- }
-
- *out = start;
- return bp - start;
-}
-
-#ifdef TEST
-main(argc, argv)
-int argc;
-char *argv[];
-{
- IOBUF *iop;
- char *out;
- int cnt;
- char rs[2];
-
- rs[0] = 0;
- if (argc > 1)
- bufsize = atoi(argv[1]);
- if (argc > 2)
- rs[0] = *argv[2];
- iop = iop_alloc(0);
- while ((cnt = get_a_record(&out, iop, rs[0], NULL)) > 0) {
- fwrite(out, 1, cnt, stdout);
- fwrite(rs, 1, 1, stdout);
- }
-}
-#endif
diff --git a/main.c b/main.c
index c5a7f3a1..273b13b0 100644
--- a/main.c
+++ b/main.c
@@ -6,7 +6,7 @@
* Copyright (C) 1986, 1988, 1989, 1991-1995 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
- * AWK Progamming Language.
+ * AWK Programming Language.
*
* GAWK is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -19,8 +19,8 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with GAWK; see the file COPYING. If not, write to
- * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/
#include "awk.h"
@@ -33,33 +33,21 @@ static void cmdline_fs P((char *str));
static void init_args P((int argc0, int argc, char *argv0, char **argv));
static void init_vars P((void));
static void pre_assign P((char *v));
-SIGTYPE catchsig P((int sig, int code));
+RETSIGTYPE catchsig P((int sig, int code));
static void gawk_option P((char *optstr));
static void nostalgia P((void));
static void version P((void));
-char *gawk_name P((char *filespec));
-
-#ifdef MSDOS
-extern int isatty P((int));
-#endif
-
-extern void resetup P((void));
/* These nodes store all the special variables AWK uses */
-NODE *FS_node, *NF_node, *RS_node, *NR_node;
-NODE *FILENAME_node, *OFS_node, *ORS_node, *OFMT_node;
-NODE *CONVFMT_node;
-NODE *ERRNO_node;
-NODE *FNR_node, *RLENGTH_node, *RSTART_node, *SUBSEP_node;
-NODE *ENVIRON_node, *IGNORECASE_node;
-NODE *ARGC_node, *ARGV_node, *ARGIND_node;
-NODE *FIELDWIDTHS_node;
-
+NODE *ARGC_node, *ARGIND_node, *ARGV_node, *CONVFMT_node, *ENVIRON_node;
+NODE *ERRNO_node, *FIELDWIDTHS_node, *FILENAME_node, *FNR_node, *FS_node;
+NODE *IGNORECASE_node, *NF_node, *NR_node, *OFMT_node, *OFS_node;
+NODE *ORS_node, *RLENGTH_node, *RSTART_node, *RS_node, *RT_node, *SUBSEP_node;
+
long NF;
long NR;
long FNR;
int IGNORECASE;
-char *RS;
char *OFS;
char *ORS;
char *OFMT;
@@ -72,21 +60,20 @@ char *OFMT;
*/
char *CONVFMT = "%.6g";
-int errcount = 0; /* error counter, used by yyerror() */
+int errcount = 0; /* error counter, used by yyerror() */
-/* The global null string */
-NODE *Nnull_string;
+NODE *Nnull_string; /* The global null string */
/* The name the program was invoked under, for error messages */
const char *myname;
/* A block of AWK code to be run before running the program */
-NODE *begin_block = 0;
+NODE *begin_block = NULL;
/* A block of AWK code to be run after the last input file */
-NODE *end_block = 0;
+NODE *end_block = NULL;
-int exiting = 0; /* Was an "exit" statement executed? */
+int exiting = FALSE; /* Was an "exit" statement executed? */
int exit_val = 0; /* optional exit value */
#if defined(YYDEBUG) || defined(DEBUG)
@@ -94,17 +81,19 @@ extern int yydebug;
#endif
struct src *srcfiles = NULL; /* source file name(s) */
-int numfiles = -1; /* how many source files */
+long numfiles = -1; /* how many source files */
-int do_unix = 0; /* turn off gnu extensions */
-int do_posix = 0; /* turn off gnu and unix extensions */
-int do_lint = 0; /* provide warnings about questionable stuff */
-int do_nostalgia = 0; /* provide a blast from the past */
+int do_traditional = FALSE; /* no gnu extensions, add traditional weirdnesses */
+int do_posix = FALSE; /* turn off gnu and unix extensions */
+int do_lint = FALSE; /* provide warnings about questionable stuff */
+int do_lint_old = FALSE; /* warn about stuff not in V7 awk */
+int do_nostalgia = FALSE; /* provide a blast from the past */
+int do_intervals = FALSE; /* allow {...,...} in regexps */
-int in_begin_rule = 0; /* we're in a BEGIN rule */
-int in_end_rule = 0; /* we're in a END rule */
+int in_begin_rule = FALSE; /* we're in a BEGIN rule */
+int in_end_rule = FALSE; /* we're in a END rule */
-int output_is_tty = 0; /* control flushing of output */
+int output_is_tty = FALSE; /* control flushing of output */
extern char *version_string; /* current version, for printing */
@@ -112,25 +101,30 @@ extern char *version_string; /* current version, for printing */
NODE *expression_value;
static struct option optab[] = {
- { "compat", no_argument, & do_unix, 1 },
+ { "compat", no_argument, & do_traditional, 1 },
+ { "traditional", no_argument, & do_traditional, 1 },
{ "lint", no_argument, & do_lint, 1 },
+ { "lint-old", no_argument, & do_lint_old, 1 },
{ "posix", no_argument, & do_posix, 1 },
{ "nostalgia", no_argument, & do_nostalgia, 1 },
{ "copyleft", no_argument, NULL, 'C' },
{ "copyright", no_argument, NULL, 'C' },
{ "field-separator", required_argument, NULL, 'F' },
{ "file", required_argument, NULL, 'f' },
+ { "re-interval", no_argument, & do_intervals, 1 },
+ { "source", required_argument, NULL, 's' },
{ "assign", required_argument, NULL, 'v' },
{ "version", no_argument, NULL, 'V' },
{ "usage", no_argument, NULL, 'u' },
{ "help", no_argument, NULL, 'u' },
- { "source", required_argument, NULL, 's' },
#ifdef DEBUG
{ "parsedebug", no_argument, NULL, 'D' },
#endif
- { 0, 0, 0, 0 }
+ { NULL, 0, NULL, '\0' }
};
+/* main --- process args, parse program, run it, clean up */
+
int
main(argc, argv)
int argc;
@@ -140,29 +134,21 @@ char **argv;
char *scan;
/* the + on the front tells GNU getopt not to rearrange argv */
const char *optlist = "+F:f:v:W:m:";
- int stopped_early = 0;
+ int stopped_early = FALSE;
int old_optind;
extern int optind;
extern int opterr;
extern char *optarg;
-#ifdef __EMX__
- _response(&argc, &argv);
- _wildcard(&argc, &argv);
- setvbuf(stdout, NULL, _IOLBF, BUFSIZ);
-#endif
-
- (void) signal(SIGFPE, (SIGTYPE (*) P((int))) catchsig);
- (void) signal(SIGSEGV, (SIGTYPE (*) P((int))) catchsig);
+ (void) signal(SIGFPE, (RETSIGTYPE (*) P((int))) catchsig);
+ (void) signal(SIGSEGV, (RETSIGTYPE (*) P((int))) catchsig);
#ifdef SIGBUS
- (void) signal(SIGBUS, (SIGTYPE (*) P((int))) catchsig);
+ (void) signal(SIGBUS, (RETSIGTYPE (*) P((int))) catchsig);
#endif
myname = gawk_name(argv[0]);
- argv[0] = (char *)myname;
-#ifdef VMS
- vms_arg_fixup(&argc, &argv); /* emulate redirection, expand wildcards */
-#endif
+ argv[0] = (char *) myname;
+ os_arg_fixup(&argc, &argv); /* emulate redirection, expand wildcards */
/* remove sccs gunk */
if (strncmp(version_string, "@(#)", 4) == 0)
@@ -196,14 +182,15 @@ char **argv;
memset(srcfiles, '\0', argc * sizeof(struct src));
/* we do error messages ourselves on invalid options */
- opterr = 0;
+ opterr = FALSE;
/* option processing. ready, set, go! */
for (optopt = 0, old_optind = 1;
(c = getopt_long(argc, argv, optlist, optab, NULL)) != EOF;
optopt = 0, old_optind = optind) {
if (do_posix)
- opterr = 1;
+ opterr = TRUE;
+
switch (c) {
case 'F':
cmdline_fs(optarg);
@@ -214,13 +201,14 @@ char **argv;
* a la MKS awk, allow multiple -f options.
* this makes function libraries real easy.
* most of the magic is in the scanner.
- */
- /* The following is to allow for whitespace at the end
+ *
+ * The following is to allow for whitespace at the end
* of a #! /bin/gawk line in an executable file
*/
scan = optarg;
while (isspace(*scan))
scan++;
+
++numfiles;
srcfiles[numfiles].stype = SOURCEFILE;
if (*scan == '\0')
@@ -240,10 +228,10 @@ char **argv;
* -mr=nnn set record length, ditto
*/
if (do_lint)
- warning("-m[fr] option irrelevant");
+ warning("-m[fr] option irrelevant in gawk");
if ((optarg[0] != 'r' && optarg[0] != 'f')
|| optarg[1] != '=')
- warning("-m option usage: -m[fn]=nnn");
+ warning("-m option usage: -m[fr]=nnn");
break;
case 'W': /* gawk specific options */
@@ -251,18 +239,10 @@ char **argv;
break;
/* These can only come from long form options */
- case 'V':
- version();
- break;
-
case 'C':
copyleft();
break;
- case 'u':
- usage(0);
- break;
-
case 's':
if (optarg[0] == '\0')
warning("empty argument to --source ignored");
@@ -272,6 +252,14 @@ char **argv;
}
break;
+ case 'u':
+ usage(0);
+ break;
+
+ case 'V':
+ version();
+ break;
+
#ifdef DEBUG
case 'D':
yydebug = 2;
@@ -300,18 +288,18 @@ char **argv;
* getopt_long won't print the error message for us.
*/
if (! do_posix
- && (optopt == 0 || strchr(optlist, optopt) == NULL)) {
+ && (optopt == '\0' || strchr(optlist, optopt) == NULL)) {
/*
* can't just do optind--. In case of an
- * option with >=2 letters, getopt_long
+ * option with >= 2 letters, getopt_long
* won't have incremented optind.
*/
optind = old_optind;
- stopped_early = 1;
+ stopped_early = TRUE;
goto out;
- } else if (optopt)
+ } else if (optopt != '\0')
/* Use 1003.2 required message format */
- fprintf (stderr,
+ fprintf(stderr,
"%s: option requires an argument -- %c\n",
myname, optopt);
/* else
@@ -326,21 +314,45 @@ out:
/* check for POSIXLY_CORRECT environment variable */
if (! do_posix && getenv("POSIXLY_CORRECT") != NULL) {
- do_posix = 1;
+ do_posix = TRUE;
if (do_lint)
warning(
"environment variable `POSIXLY_CORRECT' set: turning on --posix");
}
- /* POSIX compliance also implies no Unix extensions either */
- if (do_posix)
- do_unix = 1;
+ if (do_posix) {
+ if (do_traditional) /* both on command line */
+ warning("--posix overrides --traditional");
+ else
+ do_traditional = TRUE;
+ /*
+ * POSIX compliance also implies
+ * no GNU extensions either.
+ */
+ }
+
+ /*
+ * Tell the regex routines how they should work.
+ * Do this again, after argument processing, since do_posix
+ * and do_traditional are now paid attention to by resetup().
+ */
+ if (do_traditional || do_posix) {
+ resetup();
+
+ /* now handle RS and FS. have to be careful with FS */
+ set_RS();
+ if (using_fieldwidths()) {
+ set_FS();
+ set_FIELDWIDTHS();
+ } else
+ set_FS();
+ }
#ifdef DEBUG
setbuf(stdout, (char *) NULL); /* make debugging easier */
#endif
if (isatty(fileno(stdout)))
- output_is_tty = 1;
+ output_is_tty = TRUE;
/* No -f or --source options, use next arg */
if (numfiles == -1) {
if (optind > argc - 1 || stopped_early) /* no args left or no program */
@@ -349,14 +361,17 @@ out:
srcfiles[numfiles].val = argv[optind];
optind++;
}
+
init_args(optind, argc, (char *) myname, argv);
(void) tokexpand();
/* Read in the program */
- if (yyparse() || errcount)
+ if (yyparse() != 0 || errcount != 0)
exit(1);
/* recover any space from C based alloca */
+#ifdef C_ALLOCA
(void) alloca(0);
+#endif
/* Set up the field variables */
init_fields();
@@ -365,18 +380,18 @@ out:
&& end_block == NULL)
warning("no program");
- if (begin_block) {
- in_begin_rule = 1;
+ if (begin_block != NULL) {
+ in_begin_rule = TRUE;
(void) interpret(begin_block);
}
- in_begin_rule = 0;
- if (!exiting && (expression_value || end_block))
+ in_begin_rule = FALSE;
+ if (! exiting && (expression_value != NULL || end_block != NULL))
do_input();
- if (end_block) {
- in_end_rule = 1;
+ if (end_block != NULL) {
+ in_end_rule = TRUE;
(void) interpret(end_block);
}
- in_end_rule = 0;
+ in_end_rule = FALSE;
if (close_io() != 0 && exit_val == 0)
exit_val = 1;
exit(exit_val); /* more portable */
@@ -389,16 +404,11 @@ static void
usage(exitval)
int exitval;
{
- const char *opt1 = " -f progfile [--]";
-#if defined(MSDOS) || defined(OS2) || defined(VMS)
- const char *opt2 = " [--] \"program\"";
-#else
- const char *opt2 = " [--] 'program'";
-#endif
- const char *regops = " [POSIX or GNU style options]";
+ char *opt1 = " -f progfile [--]";
+ char *regops = " [POSIX or GNU style options]";
- fprintf(stderr, "Usage:\t%s%s%s file ...\n\t%s%s%s file ...\n",
- myname, regops, opt1, myname, regops, opt2);
+ fprintf(stderr, "Usage: %s%s%s file ...\n\t%s%s [--] %cprogram%c file ...\n",
+ myname, regops, opt1, myname, regops, quote, quote);
/* GNU long options info. Gack. */
fputs("POSIX options:\t\tGNU long options:\n", stderr);
@@ -411,6 +421,7 @@ int exitval;
fputs("\t-W copyright\t\t--copyright\n", stderr);
fputs("\t-W help\t\t\t--help\n", stderr);
fputs("\t-W lint\t\t\t--lint\n", stderr);
+ fputs("\t-W lint-old\t\t--lint-old\n", stderr);
#ifdef NOSTALGIA
fputs("\t-W nostalgia\t\t--nostalgia\n", stderr);
#endif
@@ -418,17 +429,21 @@ int exitval;
fputs("\t-W parsedebug\t\t--parsedebug\n", stderr);
#endif
fputs("\t-W posix\t\t--posix\n", stderr);
+ fputs("\t-W re-interval\t\t--re-interval\n", stderr);
fputs("\t-W source=program-text\t--source=program-text\n", stderr);
+ fputs("\t-W traditional\t\t--traditional\n", stderr);
fputs("\t-W usage\t\t--usage\n", stderr);
fputs("\t-W version\t\t--version\n", stderr);
exit(exitval);
}
+/* copyleft --- print out the short GNU copyright information */
+
static void
-copyleft ()
+copyleft()
{
static char blurb_part1[] =
-"Copyright (C) 1989, 1991, 1992, Free Software Foundation.\n\
+"Copyright (C) 1989, 1991-1995 Free Software Foundation.\n\
\n\
This program is free software; you can redistribute it and/or modify\n\
it under the terms of the GNU General Public License as published by\n\
@@ -444,27 +459,28 @@ GNU General Public License for more details.\n\
static char blurb_part3[] =
"You should have received a copy of the GNU General Public License\n\
along with this program; if not, write to the Free Software\n\
-Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.\n";
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.\n";
+ /* multiple blurbs are needed for some brain dead compilers. */
fputs(blurb_part1, stderr);
fputs(blurb_part2, stderr);
fputs(blurb_part3, stderr);
fflush(stderr);
}
+/* cmdline_fs --- set FS from the command line */
+
static void
cmdline_fs(str)
char *str;
{
register NODE **tmp;
- /* int len = strlen(str); *//* don't do that - we want to
- avoid mismatched types */
tmp = get_lhs(FS_node, (Func_ptr *) 0);
unref(*tmp);
/*
* Only if in full compatibility mode check for the stupid special
- * case so -F\t works as documented in awk even though the shell
+ * case so -F\t works as documented in awk book even though the shell
* hands us -Ft. Bleah!
*
* Thankfully, Posix didn't propogate this "feature".
@@ -472,13 +488,15 @@ char *str;
if (str[0] == 't' && str[1] == '\0') {
if (do_lint)
warning("-Ft does not set FS to tab in POSIX awk");
- if (do_unix && ! do_posix)
+ if (do_traditional && ! do_posix)
str[0] = '\t';
}
*tmp = make_str_node(str, strlen(str), SCAN); /* do process escapes */
set_FS();
}
+/* init_args --- set up ARGV from stuff on the command line */
+
static void
init_args(argc0, argc, argv0, argv)
int argc0, argc;
@@ -488,7 +506,7 @@ char **argv;
int i, j;
NODE **aptr;
- ARGV_node = install("ARGV", node(Nnull_string, Node_var, (NODE *)NULL));
+ ARGV_node = install("ARGV", node(Nnull_string, Node_var_array, (NODE *) NULL));
aptr = assoc_lookup(ARGV_node, tmp_number(0.0));
*aptr = make_string(argv0, strlen(argv0));
(*aptr)->flags |= MAYBE_NUM;
@@ -519,25 +537,28 @@ struct varinit {
};
static struct varinit varinit[] = {
{&CONVFMT_node, "CONVFMT", Node_CONVFMT, "%.6g", 0, set_CONVFMT },
-{&NF_node, "NF", Node_NF, 0, -1, set_NF },
-{&FIELDWIDTHS_node, "FIELDWIDTHS", Node_FIELDWIDTHS, "", 0, 0 },
-{&NR_node, "NR", Node_NR, 0, 0, set_NR },
-{&FNR_node, "FNR", Node_FNR, 0, 0, set_FNR },
-{&FS_node, "FS", Node_FS, " ", 0, 0 },
+{&NF_node, "NF", Node_NF, NULL, -1, set_NF },
+{&FIELDWIDTHS_node, "FIELDWIDTHS", Node_FIELDWIDTHS, "", 0, NULL },
+{&NR_node, "NR", Node_NR, NULL, 0, set_NR },
+{&FNR_node, "FNR", Node_FNR, NULL, 0, set_FNR },
+{&FS_node, "FS", Node_FS, " ", 0, NULL },
{&RS_node, "RS", Node_RS, "\n", 0, set_RS },
-{&IGNORECASE_node, "IGNORECASE", Node_IGNORECASE, 0, 0, 0 },
-{&FILENAME_node, "FILENAME", Node_var, "", 0, 0 },
+{&IGNORECASE_node, "IGNORECASE", Node_IGNORECASE, NULL, 0, NULL },
+{&FILENAME_node, "FILENAME", Node_var, "", 0, NULL },
{&OFS_node, "OFS", Node_OFS, " ", 0, set_OFS },
{&ORS_node, "ORS", Node_ORS, "\n", 0, set_ORS },
{&OFMT_node, "OFMT", Node_OFMT, "%.6g", 0, set_OFMT },
-{&RLENGTH_node, "RLENGTH", Node_var, 0, 0, 0 },
-{&RSTART_node, "RSTART", Node_var, 0, 0, 0 },
-{&SUBSEP_node, "SUBSEP", Node_var, "\034", 0, 0 },
-{&ARGIND_node, "ARGIND", Node_var, 0, 0, 0 },
-{&ERRNO_node, "ERRNO", Node_var, 0, 0, 0 },
-{0, 0, Node_illegal, 0, 0, 0 },
+{&RLENGTH_node, "RLENGTH", Node_var, NULL, 0, NULL },
+{&RSTART_node, "RSTART", Node_var, NULL, 0, NULL },
+{&SUBSEP_node, "SUBSEP", Node_var, "\034", 0, NULL },
+{&ARGIND_node, "ARGIND", Node_var, NULL, 0, NULL },
+{&ERRNO_node, "ERRNO", Node_var, NULL, 0, NULL },
+{&RT_node, "RT", Node_var, "", 0, NULL },
+{0, NULL, Node_illegal, NULL, 0, NULL },
};
+/* init_vars --- actually initialize everything in the symbol table */
+
static void
init_vars()
{
@@ -545,47 +566,60 @@ init_vars()
for (vp = varinit; vp->name; vp++) {
*(vp->spec) = install((char *) vp->name,
- node(vp->strval == 0 ? make_number(vp->numval)
+ node(vp->strval == NULL ? make_number(vp->numval)
: make_string((char *) vp->strval,
strlen(vp->strval)),
vp->type, (NODE *) NULL));
+ (*(vp->spec))->flags |= SCALAR;
if (vp->assign)
(*(vp->assign))();
}
}
+/* load_environ --- populate the ENVIRON array */
+
void
load_environ()
{
-#if !defined(MSDOS) && !defined(OS2) && !(defined(VMS) && defined(__DECC))
+#if ! (defined(MSDOS) && !defined(DJGPP)) && ! defined(OS2) && ! (defined(VMS) && defined(__DECC))
extern char **environ;
#endif
- register char *var, *val;
+ register char *var, *val, *cp;
NODE **aptr;
register int i;
ENVIRON_node = install("ENVIRON",
node(Nnull_string, Node_var, (NODE *) NULL));
- for (i = 0; environ[i]; i++) {
+ for (i = 0; environ[i] != NULL; i++) {
static char nullstr[] = "";
var = environ[i];
val = strchr(var, '=');
- if (val)
+ if (val != NULL)
*val++ = '\0';
else
val = nullstr;
- aptr = assoc_lookup(ENVIRON_node, tmp_string(var, strlen (var)));
- *aptr = make_string(val, strlen (val));
- (*aptr)->flags |= MAYBE_NUM;
+ aptr = assoc_lookup(ENVIRON_node, tmp_string(var, strlen(var)));
+ *aptr = make_string(val, strlen(val));
+ (*aptr)->flags |= (MAYBE_NUM|SCALAR);
/* restore '=' so that system() gets a valid environment */
if (val != nullstr)
*--val = '=';
}
+ /*
+ * Put AWKPATH into ENVIRON if it's not there.
+ * This allows querying it from outside gawk.
+ */
+ if ((cp = getenv("AWKPATH")) == NULL) {
+ aptr = assoc_lookup(ENVIRON_node, tmp_string("AWKPATH", 7));
+ *aptr = make_string(defpath, strlen(defpath));
+ (*aptr)->flags |= SCALAR;
+ }
}
-/* Process a command-line assignment */
+/* arg_assign --- process a command-line assignment */
+
char *
arg_assign(arg)
char *arg;
@@ -601,13 +635,13 @@ char *arg;
if (cp != NULL) {
*cp++ = '\0';
/* first check that the variable name has valid syntax */
- badvar = 0;
+ badvar = FALSE;
if (! isalpha(arg[0]) && arg[0] != '_')
- badvar = 1;
+ badvar = TRUE;
else
for (cp2 = arg+1; *cp2; cp2++)
if (! isalnum(*cp2) && *cp2 != '_') {
- badvar = 1;
+ badvar = TRUE;
break;
}
if (badvar)
@@ -618,31 +652,35 @@ char *arg;
* This makes sense, so we do it too.
*/
it = make_str_node(cp, strlen(cp), SCAN);
- it->flags |= MAYBE_NUM;
- var = variable(arg, 0);
+ it->flags |= (MAYBE_NUM|SCALAR);
+ var = variable(arg, FALSE, Node_var);
lhs = get_lhs(var, &after_assign);
unref(*lhs);
*lhs = it;
- if (after_assign)
+ if (after_assign != NULL)
(*after_assign)();
*--cp = '='; /* restore original text of ARGV */
}
return cp;
}
+/* pre_assign --- handle -v, print a message and die if a problem */
+
static void
pre_assign(v)
char *v;
{
- if (!arg_assign(v)) {
- fprintf (stderr,
- "%s: '%s' argument to -v not in 'var=value' form\n",
+ if (arg_assign(v) == NULL) {
+ fprintf(stderr,
+ "%s: `%s' argument to `-v' not in `var=value' form\n",
myname, v);
usage(1);
}
}
-SIGTYPE
+/* catchsig --- catch signals */
+
+RETSIGTYPE
catchsig(sig, code)
int sig, code;
{
@@ -672,21 +710,13 @@ char *optstr;
{
char *cp;
- for (cp = optstr; *cp; cp++) {
+ for (cp = optstr; *cp != '\0'; cp++) {
+ /* keep this switch sorted as optioins are added */
switch (*cp) {
case ' ':
case '\t':
case ',':
break;
- case 'v':
- case 'V':
- /* print version */
- if (strncasecmp(cp, "version", 7) != 0)
- goto unknown;
- else
- cp += 6;
- version();
- break;
case 'c':
case 'C':
if (strncasecmp(cp, "copyright", 9) == 0) {
@@ -697,7 +727,25 @@ char *optstr;
copyleft();
} else if (strncasecmp(cp, "compat", 6) == 0) {
cp += 5;
- do_unix = 1;
+ do_traditional = TRUE;
+ } else
+ goto unknown;
+ break;
+ case 'H':
+ case 'h':
+ if (strncasecmp(cp, "help", 4) != 0)
+ goto unknown;
+ cp += 3;
+ usage(0);
+ break;
+ case 'l':
+ case 'L':
+ if (strncasecmp(cp, "lint-old", 8) == 0) {
+ cp += 7;
+ do_lint_old = TRUE;
+ } else if (strncasecmp(cp, "lint", 4) == 0) {
+ cp += 3;
+ do_lint = TRUE;
} else
goto unknown;
break;
@@ -723,28 +771,13 @@ char *optstr;
if (strncasecmp(cp, "posix", 5) != 0)
goto unknown;
cp += 4;
- do_posix = do_unix = 1;
- break;
- case 'l':
- case 'L':
- if (strncasecmp(cp, "lint", 4) != 0)
- goto unknown;
- cp += 3;
- do_lint = 1;
+ do_posix = do_traditional = TRUE;
break;
- case 'H':
- case 'h':
- if (strncasecmp(cp, "help", 4) != 0)
+ case 'r':
+ case 'R':
+ if (strncasecmp(cp, "re-interval", 11) != 0)
goto unknown;
- cp += 3;
- usage(0);
- break;
- case 'U':
- case 'u':
- if (strncasecmp(cp, "usage", 5) != 0)
- goto unknown;
- cp += 4;
- usage(0);
+ do_intervals = TRUE;
break;
case 's':
case 'S':
@@ -759,6 +792,29 @@ char *optstr;
return;
}
break;
+ case 't':
+ case 'T':
+ if (strncasecmp(cp, "traditional", 11) != 0)
+ goto unknown;
+ do_traditional = TRUE;
+ cp += 11;
+ break;
+ case 'U':
+ case 'u':
+ if (strncasecmp(cp, "usage", 5) != 0)
+ goto unknown;
+ cp += 4;
+ usage(0);
+ break;
+ case 'v':
+ case 'V':
+ /* print version */
+ if (strncasecmp(cp, "version", 7) != 0)
+ goto unknown;
+ else
+ cp += 6;
+ version();
+ break;
default:
unknown:
fprintf(stderr, "'%c' -- unknown option, ignored\n",
@@ -786,43 +842,3 @@ version()
/* per GNU coding standards, exit successfully, do nothing else */
exit(0);
}
-
-/* this mess will improve in 2.16 */
-char *
-gawk_name(filespec)
-char *filespec;
-{
- char *p;
-
-#ifdef VMS /* "device:[root.][directory.subdir]GAWK.EXE;n" -> "GAWK" */
- char *q;
-
- p = strrchr(filespec, ']'); /* directory punctuation */
- q = strrchr(filespec, '>'); /* alternate <international> punct */
-
- if (p == NULL || q > p) p = q;
- p = strdup(p == NULL ? filespec : (p + 1));
- if ((q = strrchr(p, '.')) != NULL) *q = '\0'; /* strip .typ;vers */
-
- return p;
-#endif /*VMS*/
-
-#if defined(MSDOS) || defined(OS2) || defined(atarist)
- char *q;
-
- for (p = filespec; (p = strchr(p, '\\')); *p = '/')
- ;
- p = filespec;
- if ((q = strrchr(p, '/')))
- p = q + 1;
- if ((q = strchr(p, '.')))
- *q = '\0';
- strlwr(p);
-
- return (p == NULL ? filespec : p);
-#endif /* MSDOS || atarist */
-
- /* "path/name" -> "name" */
- p = strrchr(filespec, '/');
- return (p == NULL ? filespec : p + 1);
-}
diff --git a/missing.c b/missing.c
index 39c620fc..aa0d03cb 100644
--- a/missing.c
+++ b/missing.c
@@ -2,33 +2,8 @@
* Do all necessary includes here, so that we don't have to worry about
* overlapping includes in the files in missing.d.
*/
-#include <stdio.h>
-#include <ctype.h>
-#include <errno.h>
-#if !defined(VMS) || (!defined(VAXC) && !defined(__DECC))
-#include <fcntl.h>
-#include <sys/types.h>
-#else /*VMS w/ VAXC or DECC*/
-#include <file.h>
-#include <types.h>
-#endif
-#include <varargs.h>
-
-#include "config.h"
+#include "awk.h"
-#ifndef __STDC__
-#define const
-#endif /* !__STDC__ */
-
-#ifdef STDC_HEADERS
-#include <string.h>
-#endif
-
-#ifdef TZSET_MISSING
-#include <sys/time.h>
-#else
-#include <time.h>
-#endif
#ifdef atarist
/*
@@ -37,53 +12,52 @@
*/
#include "atari/stack.c"
#include "atari/tmpnam.c"
-/* #include "atari/textrd.c" */ /* current libraries are correct bug fix */
#endif /* atarist */
-#ifdef SYSTEM_MISSING
+#ifndef HAVE_SYSTEM
#ifdef atarist
#include "atari/system.c"
#else
#include "missing/system.c"
#endif
-#endif /* SYSTEM_MISSING */
+#endif /* HAVE_SYSTEM */
-#ifdef MEMCMP_MISSING
+#ifndef HAVE_MEMCMP
#include "missing/memcmp.c"
-#endif /* MEMCMP_MISSING */
+#endif /* HAVE_MEMCMP */
-#ifdef MEMCPY_MISSING
+#ifndef HAVE_MEMCPY
#include "missing/memcpy.c"
-#endif /* MEMCPY_MISSING */
+#endif /* HAVE_MEMCPY */
-#ifdef MEMSET_MISSING
+#ifndef HAVE_MEMSET
#include "missing/memset.c"
-#endif /* MEMSET_MISSING */
+#endif /* HAVE_MEMSET */
-#ifdef RANDOM_MISSING
+#ifndef HAVE_RANDOM
#include "missing/random.c"
-#endif /* RANDOM_MISSING */
+#endif /* HAVE_RANDOM */
-#ifdef STRCASE_MISSING
+#ifndef HAVE_STRNCASECMP
#include "missing/strncasecmp.c"
-#endif /* STRCASE_MISSING */
+#endif /* HAVE_STRCASE */
-#ifdef STRERROR_MISSING
+#ifndef HAVE_STRERROR
#include "missing/strerror.c"
-#endif /* STRERROR_MISSING */
+#endif /* HAVE_STRERROR */
-#ifdef STRFTIME_MISSING
+#ifndef HAVE_STRFTIME
#include "missing/strftime.c"
-#endif /* STRFTIME_MISSING */
+#endif /* HAVE_STRFTIME */
-#ifdef STRCHR_MISSING
+#ifndef HAVE_STRCHR
#include "missing/strchr.c"
-#endif /* STRCHR_MISSING */
+#endif /* HAVE_STRCHR */
-#ifdef STRTOD_MISSING
+#ifndef HAVE_STRTOD
#include "missing/strtod.c"
-#endif /* STRTOD_MISSING */
+#endif /* HAVE_STRTOD */
-#ifdef TZSET_MISSING
+#ifndef HAVE_TZSET
#include "missing/tzset.c"
-#endif /* TZSET_MISSING */
+#endif /* HAVE_TZSET */
diff --git a/missing/memset.c b/missing/memset.c
index 120bdcb4..1ff4458b 100644
--- a/missing/memset.c
+++ b/missing/memset.c
@@ -4,15 +4,17 @@
* We supply this routine for those systems that aren't standard yet.
*/
-char *
-memset (dest, val, l)
-register char *dest, val;
-register int l;
+void *
+memset(dest, val, l)
+void *dest;
+register int val;
+register size_t l;
{
register char *ret = dest;
+ register char *d = dest;
while (l--)
- *dest++ = val;
+ *d++ = val;
- return ret;
+ return ((void *) ret);
}
diff --git a/missing/strchr.c b/missing/strchr.c
index 76016d89..7da479fc 100644
--- a/missing/strchr.c
+++ b/missing/strchr.c
@@ -4,13 +4,25 @@
* We supply this routine for those systems that aren't standard yet.
*/
+#if 0
+#include <stdio.h>
+#endif
+
char *
-strchr (str, c)
+strchr(str, c)
register const char *str, c;
{
- for (; *str; str++)
- if (*str == c)
- return (char *) str;
+ if (c == '\0') {
+ /* thanks to Mike Brennan ... */
+ do {
+ if (*str == c)
+ return (char *) str;
+ } while (*str++);
+ } else {
+ for (; *str; str++)
+ if (*str == c)
+ return (char *) str;
+ }
return NULL;
}
@@ -22,7 +34,7 @@ register const char *str, c;
*/
char *
-strrchr (str, c)
+strrchr(str, c)
register const char *str, c;
{
register const char *save = NULL;
diff --git a/missing/strerror.c b/missing/strerror.c
index badaf5dd..e49fdb65 100644
--- a/missing/strerror.c
+++ b/missing/strerror.c
@@ -6,7 +6,7 @@
* Copyright (C) 1986, 1988, 1989, 1991 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
- * AWK Progamming Language.
+ * AWK Programming Language.
*
* GAWK is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -19,10 +19,14 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with GAWK; see the file COPYING. If not, write to
- * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/
+#if 0
+#include <stdio.h>
+#endif
+
extern int sys_nerr;
extern char *sys_errlist[];
@@ -32,8 +36,8 @@ int n;
{
static char mesg[30];
- if (n < 0 || n > sys_nerr) {
- sprintf (mesg, "Unknown error (%d)", n);
+ if (n < 0 || n >= sys_nerr) {
+ sprintf(mesg, "Unknown error (%d)", n);
return mesg;
} else
return sys_errlist[n];
diff --git a/missing/strftime.3 b/missing/strftime.3
index 9efe8408..76fc02a0 100644
--- a/missing/strftime.3
+++ b/missing/strftime.3
@@ -274,6 +274,39 @@ is defined, then the following additional conversion is available:
.TP
.B %v
The date in VMS format (e.g. 20-JUN-1991).
+.SH MAIL HEADER EXTENSIONS
+If
+.B MAILHEADER_EXT
+is defined, then the following additional conversion is available:
+.TP
+.B %z
+The timezone offset in a +HHMM format (e.g. the format necessary to
+produce RFC-822/RFC-1036 date headers).
+.SH ISO DATE FORMAT EXTENSIONS
+If
+.B ISO_DATE_EXT
+is defined, then all of the conversions available with
+.BR POSIX2_DATE,
+.BR SYSV_EXT,
+and
+.B SUNOS_EXT
+are available, as well as the
+following additional conversions:
+.TP
+.B %G
+is replaced by the year with century of the ISO week number (see
+.BR %V ,
+above) as a decimal number.
+.TP
+.B %g
+is replaced by the year without century of the ISO week number,
+as a decimal number
+.RB ( 00 - 99 ).
+.PP
+For example, January 1, 1993, is in week 53 of 1992. Thus, the year
+of its ISO week number is 1992, even though its year is 1993.
+Similarly, December 31, 1973, is in week 1 of 1974. Thus, the year
+of its ISO week number is 1974, even though its year is 1973.
.SH SEE ALSO
.IR time (2),
.IR ctime (3),
@@ -285,8 +318,12 @@ setting of the
.B LC_TIME
environment variable.
.LP
-It is not clear what is ``appropriate'' for the C locale; the values
-returned are a best guess on the author's part.
+The ``appropriate'' values used for
+.BR %c ,
+.BR %x ,
+are
+.B %X
+are those specified by the 1003.2 standard for the POSIX locale.
.SH CAVEATS
The pre-processor symbol
.B POSIX_SEMANTICS
@@ -303,9 +340,7 @@ then there may be some performance improvements by not defining
.nf
Arnold Robbins
.sp
-INTERNET: arnold@skeeve.atl.ga.us
-UUCP: emory!skeeve!arnold
-Phone: +1 404 248 9324
+INTERNET: arnold@gnu.ai.mit.edu
.fi
.SH ACKNOWLEDGEMENTS
Thanks to Geoff Clare <gwc@root.co.uk> for helping debug earlier
@@ -314,3 +349,5 @@ Additional thanks to Arthur David Olsen <ado@elsie.nci.nih.gov>
for some code improvements.
Thanks also to Tor Lillqvist <tml@tik.vtt.fi>
for code fixes to the ISO 8601 code.
+Thanks to Hume Smith for pointing out a problem with the ISO 8601 code
+and to Arthur David Olsen for further discussions.
diff --git a/missing/strftime.c b/missing/strftime.c
index 629eb5f2..478471c3 100644
--- a/missing/strftime.c
+++ b/missing/strftime.c
@@ -10,9 +10,12 @@
* For extensions from SunOS, add SUNOS_EXT.
* For stuff needed to implement the P1003.2 date command, add POSIX2_DATE.
* For VMS dates, add VMS_EXT.
+ * For a an RFC822 time format, add MAILHEADER_EXT.
+ * For ISO week years, add ISO_DATE_EXT.
* For complete POSIX semantics, add POSIX_SEMANTICS.
*
- * The code for %c, %x, and %X is my best guess as to what's "appropriate".
+ * The code for %c, %x, and %X now follows the 1003.2 specification for
+ * the POSIX locale.
* This version ignores LOCALE information.
* It also doesn't worry about multi-byte characters.
* So there.
@@ -26,6 +29,9 @@
* Updated April, 1993
* Updated February, 1994
* Updated May, 1994
+ * Updated January, 1995
+ * Updated September, 1995
+ * Updated January, 1996
*
* Fixes from ado@elsie.nci.nih.gov
* February 1991, May 1992
@@ -33,17 +39,12 @@
* May, 1993
* Further fixes from ado@elsie.nci.nih.gov
* February 1994
+ * %z code from chip@chinacat.unicom.com
+ * Applied September 1995
+ * %V code fixed (again) and %G, %g added,
+ * January 1996
*/
-/************ for gawk 2.15.5 ***************/
-#ifndef TZNAME_MISSING
-#define HAVE_TZNAME
-#endif
-#ifndef TM_ZONE_MISSING
-#define HAVE_TM_ZONE
-#endif
-/*********** end of for gawk 2.15.5 *********/
-
#ifndef GAWK
#include <stdio.h>
#include <ctype.h>
@@ -60,10 +61,18 @@
#define SUNOS_EXT 1 /* stuff in SunOS strftime routine */
#define POSIX2_DATE 1 /* stuff in Posix 1003.2 date command */
#define VMS_EXT 1 /* include %v for VMS date format */
+#define MAILHEADER_EXT 1 /* add %z for HHMM format */
+#define ISO_DATE_EXT 1 /* %G and %g for year of ISO week */
#ifndef GAWK
#define POSIX_SEMANTICS 1 /* call tzset() if TZ changes */
#endif
+#if defined(ISO_DATE_EXT)
+#if ! defined(POSIX2_DATE)
+#define POSIX2_DATE 1
+#endif
+#endif
+
#if defined(POSIX2_DATE)
#if ! defined(SYSV_EXT)
#define SYSV_EXT 1
@@ -111,8 +120,15 @@ adddecl(static int iso8601wknum(const struct tm *timeptr);)
#if !defined(OS2) && !defined(MSDOS) && defined(HAVE_TZNAME)
extern char *tzname[2];
extern int daylight;
+#ifdef SOLARIS
+extern long timezone, altzone;
+#else
+extern int timezone, altzone;
+#endif
#endif
+#undef min /* just in case */
+
/* min --- return minimum of two numbers */
#ifndef __STDC__
@@ -127,6 +143,8 @@ min(int a, int b)
return (a < b ? a : b);
}
+#undef max /* also, just in case */
+
/* max --- return maximum of two numbers */
#ifndef __STDC__
@@ -158,7 +176,8 @@ strftime(char *s, size_t maxsize, const char *format, const struct tm *timeptr)
char *endp = s + maxsize;
char *start = s;
auto char tbuf[100];
- int i;
+ long off;
+ int i, w, y;
static short first = 1;
#ifdef POSIX_SEMANTICS
static char *savetz = NULL;
@@ -166,9 +185,13 @@ strftime(char *s, size_t maxsize, const char *format, const struct tm *timeptr)
char *tz;
#endif /* POSIX_SEMANTICS */
#ifndef HAVE_TM_ZONE
+#ifndef HAVE_TM_NAME
+#ifndef HAVE_TZNAME
extern char *timezone();
struct timeval tv;
struct timezone zone;
+#endif /* HAVE_TZNAME */
+#endif /* HAVE_TM_NAME */
#endif /* HAVE_TM_ZONE */
/* various tables, useful in North America */
@@ -281,14 +304,7 @@ strftime(char *s, size_t maxsize, const char *format, const struct tm *timeptr)
break;
case 'c': /* appropriate date and time representation */
- sprintf(tbuf, "%s %s %2d %02d:%02d:%02d %d",
- days_a[range(0, timeptr->tm_wday, 6)],
- months_a[range(0, timeptr->tm_mon, 11)],
- range(1, timeptr->tm_mday, 31),
- range(0, timeptr->tm_hour, 23),
- range(0, timeptr->tm_min, 59),
- range(0, timeptr->tm_sec, 61),
- timeptr->tm_year + 1900);
+ strftime(tbuf, sizeof tbuf, "%a %b %e %H:%M:%S %Y", timeptr);
break;
case 'd': /* day of the month, 01 - 31 */
@@ -351,18 +367,11 @@ strftime(char *s, size_t maxsize, const char *format, const struct tm *timeptr)
break;
case 'x': /* appropriate date representation */
- sprintf(tbuf, "%s %s %2d %d",
- days_a[range(0, timeptr->tm_wday, 6)],
- months_a[range(0, timeptr->tm_mon, 11)],
- range(1, timeptr->tm_mday, 31),
- timeptr->tm_year + 1900);
+ strftime(tbuf, sizeof tbuf, "%m/%d/%y", timeptr);
break;
case 'X': /* appropriate time representation */
- sprintf(tbuf, "%02d:%02d:%02d",
- range(0, timeptr->tm_hour, 23),
- range(0, timeptr->tm_min, 59),
- range(0, timeptr->tm_sec, 61));
+ strftime(tbuf, sizeof tbuf, "%H:%M:%S", timeptr);
break;
case 'y': /* year without a century, 00 - 99 */
@@ -374,19 +383,75 @@ strftime(char *s, size_t maxsize, const char *format, const struct tm *timeptr)
sprintf(tbuf, "%d", 1900 + timeptr->tm_year);
break;
+#ifdef MAILHEADER_EXT
+ /*
+ * From: Chip Rosenthal <chip@chinacat.unicom.com>
+ * Date: Sun, 19 Mar 1995 00:33:29 -0600 (CST)
+ *
+ * Warning: the %z [code] is implemented by inspecting the
+ * timezone name conditional compile settings, and
+ * inferring a method to get timezone offsets. I've tried
+ * this code on a couple of machines, but I don't doubt
+ * there is some system out there that won't like it.
+ * Maybe the easiest thing to do would be to bracket this
+ * with an #ifdef that can turn it off. The %z feature
+ * would be an admittedly obscure one that most folks can
+ * live without, but it would be a great help to those of
+ * us that muck around with various message processors.
+ */
+ case 'z': /* time zone offset east of GMT e.g. -0600 */
+#ifdef HAVE_TM_NAME
+ /*
+ * Systems with tm_name probably have tm_tzadj as
+ * secs west of GMT. Convert to mins east of GMT.
+ */
+ off = -timeptr->tm_tzadj / 60;
+#else /* !HAVE_TM_NAME */
+#ifdef HAVE_TM_ZONE
+ /*
+ * Systems with tm_zone probably have tm_gmtoff as
+ * secs east of GMT. Convert to mins east of GMT.
+ */
+ off = timeptr->tm_gmtoff / 60;
+#else /* !HAVE_TM_ZONE */
+#if HAVE_TZNAME
+ /*
+ * Systems with tzname[] probably have timezone as
+ * secs west of GMT. Convert to mins east of GMT.
+ */
+ off = -(daylight ? timezone : altzone) / 60;
+#else /* !HAVE_TZNAME */
+ off = -zone.tz_minuteswest;
+#endif /* !HAVE_TZNAME */
+#endif /* !HAVE_TM_ZONE */
+#endif /* !HAVE_TM_NAME */
+ if (off < 0) {
+ tbuf[0] = '-';
+ off = -off;
+ } else {
+ tbuf[0] = '+';
+ }
+ sprintf(tbuf+1, "%02d%02d", off/60, off%60);
+ break;
+#endif /* MAILHEADER_EXT */
+
case 'Z': /* time zone name or abbrevation */
#ifdef HAVE_TZNAME
- i = (daylight && timeptr->tm_isdst); /* 0 or 1 */
+ i = (daylight && timeptr->tm_isdst > 0); /* 0 or 1 */
strcpy(tbuf, tzname[i]);
#else
#ifdef HAVE_TM_ZONE
strcpy(tbuf, timeptr->tm_zone);
#else
+#ifdef HAVE_TM_NAME
+ strcpy(tbuf, timeptr->tm_name);
+#else
gettimeofday(& tv, & zone);
strcpy(tbuf, timezone(zone.tz_minuteswest,
- timeptr->tm_isdst));
-#endif
-#endif
+ timeptr->tm_isdst > 0));
+#endif /* HAVE_TM_NAME */
+#endif /* HAVE_TM_ZONE */
+#endif /* HAVE_TZNAME */
break;
#ifdef SYSV_EXT
@@ -462,19 +527,6 @@ strftime(char *s, size_t maxsize, const char *format, const struct tm *timeptr)
goto again;
case 'V': /* week of year according ISO 8601 */
-#if defined(GAWK) && defined(VMS_EXT)
- {
- extern int do_lint;
- extern void warning();
- static int warned = 0;
-
- if (! warned && do_lint) {
- warned = 1;
- warning(
- "conversion %%V added in P1003.2; for VMS style date, use %%v");
- }
- }
-#endif
sprintf(tbuf, "%02d", iso8601wknum(timeptr));
break;
@@ -484,6 +536,33 @@ strftime(char *s, size_t maxsize, const char *format, const struct tm *timeptr)
timeptr->tm_wday);
break;
#endif /* POSIX2_DATE */
+
+#ifdef ISO_DATE_EXT
+ case 'G':
+ case 'g':
+ /*
+ * Year of ISO week.
+ *
+ * If it's December but the ISO week number is one,
+ * that week is in next year.
+ * If it's January but the ISO week number is 52 or
+ * 53, that week is in last year.
+ * Otherwise, it's this year.
+ */
+ w = iso8601wknum(timeptr);
+ if (timeptr->tm_mon == 11 && w == 1)
+ y = 1900 + timeptr->tm_year + 1;
+ else if (timeptr->tm_mon == 0 && w >= 52)
+ y = 1900 + timeptr->tm_year - 1;
+ else
+ y = 1900 + timeptr->tm_year;
+
+ if (*format == 'G')
+ sprintf(tbuf, "%d", y);
+ else
+ sprintf(tbuf, "%02d", y % 100);
+ break;
+#endif ISO_DATE_EXT
default:
tbuf[0] = '%';
tbuf[1] = *format;
@@ -539,7 +618,7 @@ iso8601wknum(const struct tm *timeptr)
* If the week (Monday to Sunday) containing January 1
* has four or more days in the new year, then it is week 1;
* otherwise it is the highest numbered week of the previous
- * (52 or 53) year, and the next week is week 1.
+ * year (52 or 53), and the next week is week 1.
*
* ADR: This means if Jan 1 was Monday through Thursday,
* it was week 1, otherwise week 52 or 53.
@@ -587,7 +666,7 @@ iso8601wknum(const struct tm *timeptr)
case 1: /* Monday */
break;
case 2: /* Tuesday */
- case 3: /* Wednedsday */
+ case 3: /* Wednesday */
case 4: /* Thursday */
weeknum++;
break;
@@ -612,6 +691,29 @@ iso8601wknum(const struct tm *timeptr)
}
break;
}
+
+ if (timeptr->tm_mon == 11) {
+ /*
+ * The last week of the year
+ * can be in week 1 of next year.
+ * Sigh.
+ *
+ * This can only happen if
+ * M T W
+ * 29 30 31
+ * 30 31
+ * 31
+ */
+ int wday, mday;
+
+ wday = timeptr->tm_wday;
+ mday = timeptr->tm_mday;
+ if ( (wday == 1 && (mday >= 29 && mday <= 31))
+ || (wday == 2 && (mday == 30 || mday == 31))
+ || (wday == 3 && mday == 31))
+ weeknum = 1;
+ }
+
return weeknum;
}
#endif
@@ -749,6 +851,7 @@ static char *array[] =
"(%%w) day of week (0..6, Sunday == 0) %w",
"(%%x) appropriate locale date representation %x",
"(%%y) last two digits of year (00..99) %y",
+ "(%%z) timezone offset east of GMT as HHMM (e.g. -0500) %z",
(char *) NULL
};
diff --git a/missing/system.c b/missing/system.c
index fffb39c1..8e613a3a 100644
--- a/missing/system.c
+++ b/missing/system.c
@@ -6,7 +6,7 @@
* Copyright (C) 1986, 1988, 1989, 1991 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
- * AWK Progamming Language.
+ * AWK Programming Language.
*
* GAWK is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -19,8 +19,8 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with GAWK; see the file COPYING. If not, write to
- * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/
extern void fatal();
diff --git a/missing/tzset.c b/missing/tzset.c
index 7e0af48a..678ec66d 100644
--- a/missing/tzset.c
+++ b/missing/tzset.c
@@ -11,8 +11,9 @@
*/
#if 0
-#include <sys/time.h>
+#include <time.h>
#endif
+#include <sys/time.h>
static char tz1[1024];
static char tz2[1024];
diff --git a/mkinstalldirs b/mkinstalldirs
new file mode 100755
index 00000000..0801ec2c
--- /dev/null
+++ b/mkinstalldirs
@@ -0,0 +1,32 @@
+#! /bin/sh
+# mkinstalldirs --- make directory hierarchy
+# Author: Noah Friedman <friedman@prep.ai.mit.edu>
+# Created: 1993-05-16
+# Last modified: 1994-03-25
+# Public domain
+
+errstatus=0
+
+for file in ${1+"$@"} ; do
+ set fnord `echo ":$file" | sed -ne 's/^:\//#/;s/^://;s/\// /g;s/^#/\//;p'`
+ shift
+
+ pathcomp=
+ for d in ${1+"$@"} ; do
+ pathcomp="$pathcomp$d"
+ case "$pathcomp" in
+ -* ) pathcomp=./$pathcomp ;;
+ esac
+
+ if test ! -d "$pathcomp"; then
+ echo "mkdir $pathcomp" 1>&2
+ mkdir "$pathcomp" || errstatus=$?
+ fi
+
+ pathcomp="$pathcomp/"
+ done
+done
+
+exit $errstatus
+
+# mkinstalldirs ends here
diff --git a/msg.c b/msg.c
index 0ddd0715..a9fe66db 100644
--- a/msg.c
+++ b/msg.c
@@ -6,7 +6,7 @@
* Copyright (C) 1986, 1988, 1989, 1991-1995 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
- * AWK Progamming Language.
+ * AWK Programming Language.
*
* GAWK is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -19,8 +19,8 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with GAWK; see the file COPYING. If not, write to
- * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/
#include "awk.h"
@@ -28,6 +28,11 @@
int sourceline = 0;
char *source = NULL;
+/* prototype needed for ansi / gcc */
+void err P((const char *s, const char *emsg, va_list argp));
+
+/* err --- print an error message with source line and file and record */
+
/* VARARGS2 */
void
err(s, emsg, argp)
@@ -39,15 +44,15 @@ va_list argp;
(void) fflush(stdout);
(void) fprintf(stderr, "%s: ", myname);
- if (sourceline) {
- if (source)
+ if (sourceline != 0) {
+ if (source != NULL)
(void) fprintf(stderr, "%s:", source);
else
(void) fprintf(stderr, "cmd. line:");
(void) fprintf(stderr, "%d: ", sourceline);
}
- if (FNR) {
+ if (FNR > 0) {
file = FILENAME_node->var_value->stptr;
(void) putc('(', stderr);
if (file)
@@ -60,58 +65,100 @@ va_list argp;
(void) fflush(stderr);
}
+/* msg --- take a varargs error message and print it */
+
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+void
+msg(char *mesg, ...)
+#else
/*VARARGS0*/
void
msg(va_alist)
va_dcl
+#endif
{
va_list args;
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+ va_start(args, mesg);
+#else
char *mesg;
va_start(args);
mesg = va_arg(args, char *);
+#endif
err("", mesg, args);
va_end(args);
}
+/* warning --- print a warning message */
+
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+void
+warning(char *mesg, ...)
+#else
/*VARARGS0*/
void
warning(va_alist)
va_dcl
+#endif
{
va_list args;
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+ va_start(args, mesg);
+#else
char *mesg;
va_start(args);
mesg = va_arg(args, char *);
+#endif
err("warning: ", mesg, args);
va_end(args);
}
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+void
+error(char *mesg, ...)
+#else
/*VARARGS0*/
void
error(va_alist)
va_dcl
+#endif
{
va_list args;
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+ va_start(args, mesg);
+#else
char *mesg;
va_start(args);
mesg = va_arg(args, char *);
+#endif
err("error: ", mesg, args);
va_end(args);
}
+/* fatal --- print an error message and die */
+
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+void
+fatal(char *mesg, ...)
+#else
/*VARARGS0*/
void
fatal(va_alist)
va_dcl
+#endif
{
va_list args;
+#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
+ va_start(args, mesg);
+#else
char *mesg;
va_start(args);
mesg = va_arg(args, char *);
+#endif
err("fatal: ", mesg, args);
va_end(args);
#ifdef DEBUG
@@ -119,3 +166,4 @@ va_dcl
#endif
exit(2);
}
+
diff --git a/mungeconf b/mungeconf
deleted file mode 100755
index 95170450..00000000
--- a/mungeconf
+++ /dev/null
@@ -1,20 +0,0 @@
-#! /bin/sh
-
-# stdout is normally config.h
-
-case $# in
-2) ;;
-*) echo "Usage: mungeconf sysfile distfile" >&2 ; exit 2 ;;
-esac
-
-sed '/^#/d; /^MAKE_*/d' $1 | # strip comments and Makefile stuff
-sed '1s:.*:s~__SYSTEM__~&~:
-2,$s:^\([^ ]*\)[ ].*:s~^/\\* #define[ ]*\1.*~#define &~:' >sedscr
-
-sed -f sedscr $2
-
-echo
-echo '/* anything that follows is for system-specific short-term kludges */'
-grep '^#define' $1 # for system-specific short-term kludges
-
-rm -f sedscr
diff --git a/node.c b/node.c
index f6134b5e..cba10dfc 100644
--- a/node.c
+++ b/node.c
@@ -6,7 +6,7 @@
* Copyright (C) 1986, 1988, 1989, 1991-1995 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
- * AWK Progamming Language.
+ * AWK Programming Language.
*
* GAWK is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -19,13 +19,13 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with GAWK; see the file COPYING. If not, write to
- * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/
#include "awk.h"
-extern double strtod();
+/* r_force_number --- force a value to be numeric */
AWKNUM
r_force_number(n)
@@ -82,7 +82,7 @@ register NODE *n;
errno = 0;
save = *cpend;
*cpend = '\0';
- n->numbr = (AWKNUM) strtod((const char *)cp, &ptr);
+ n->numbr = (AWKNUM) strtod((const char *) cp, &ptr);
/* POSIX says trailing space is OK for NUMBER */
while (isspace(*ptr))
@@ -116,6 +116,8 @@ static const char *values[] = {
};
#define NVAL (sizeof(values)/sizeof(values[0]))
+/* r_force_string --- force a value to be a string */
+
NODE *
r_force_string(s)
register NODE *s;
@@ -125,11 +127,17 @@ register NODE *s;
double val;
#ifdef DEBUG
- if (s == NULL) cant_happen();
- if (s->type != Node_val) cant_happen();
- if ((s->flags & STR) && (s->stfmt == -1 || s->stfmt == CONVFMTidx)) return s;
- if (!(s->flags & NUM)) cant_happen();
- if (s->stref != 0) ; /*cant_happen();*/
+ if (s == NULL)
+ cant_happen();
+ if (s->type != Node_val)
+ cant_happen();
+ if ((s->flags & STR) != 0
+ && (s->stfmt == -1 || s->stfmt == CONVFMTidx))
+ return s;
+ if ((s->flags & NUM) == 0)
+ cant_happen();
+ if (s->stref <= 0)
+ cant_happen();
#endif
/* not an integral value, or out of range */
@@ -149,7 +157,7 @@ register NODE *s;
s->flags |= PERM; /* prevent from freeing by format_tree() */
r = format_tree(CONVFMT, fmt_list[CONVFMTidx]->stlen, dummy);
s->flags = oflags;
- s->stfmt = (char)CONVFMTidx;
+ s->stfmt = (char) CONVFMTidx;
s->stlen = r->stlen;
s->stptr = r->stptr;
freenode(r); /* Do not free_temp(r)! We want */
@@ -163,7 +171,7 @@ register NODE *s;
*/
sprintf(sp, CONVFMT, s->numbr);
s->stlen = strlen(sp);
- s->stfmt = (char)CONVFMTidx;
+ s->stfmt = (char) CONVFMTidx;
#endif /* GFMT_WORKAROUND */
} else {
/* integral value */
@@ -180,29 +188,33 @@ register NODE *s;
}
emalloc(s->stptr, char *, s->stlen + 2, "force_string");
memcpy(s->stptr, sp, s->stlen+1);
+#ifdef GFMT_WORKAROUND
no_malloc:
+#endif /* GFMT_WORKAROUND */
s->stref = 1;
s->flags |= STR;
return s;
}
/*
+ * dupnode:
* Duplicate a node. (For strings, "duplicate" means crank up the
* reference count.)
*/
+
NODE *
dupnode(n)
NODE *n;
{
register NODE *r;
- if (n->flags & TEMP) {
+ if ((n->flags & TEMP) != 0) {
n->flags &= ~TEMP;
n->flags |= MALLOC;
return n;
}
if ((n->flags & (MALLOC|STR)) == (MALLOC|STR)) {
- if (n->stref < 255)
+ if (n->stref < LONG_MAX)
n->stref++;
return n;
}
@@ -210,7 +222,7 @@ NODE *n;
*r = *n;
r->flags &= ~(PERM|TEMP);
r->flags |= MALLOC;
- if (n->type == Node_val && (n->flags & STR)) {
+ if (n->type == Node_val && (n->flags & STR) != 0) {
r->stref = 1;
emalloc(r->stptr, char *, r->stlen + 2, "dupnode");
memcpy(r->stptr, n->stptr, r->stlen);
@@ -219,7 +231,8 @@ NODE *n;
return r;
}
-/* this allocates a node with defined numbr */
+/* mk_number --- allocate a node with defined number */
+
NODE *
mk_number(x, flags)
AWKNUM x;
@@ -230,7 +243,7 @@ unsigned int flags;
getnode(r);
r->type = Node_val;
r->numbr = x;
- r->flags = flags;
+ r->flags = flags | SCALAR;
#ifdef DEBUG
r->stref = 1;
r->stptr = 0;
@@ -239,9 +252,8 @@ unsigned int flags;
return r;
}
-/*
- * Make a string node.
- */
+/* make_str_node --- make a string node */
+
NODE *
make_str_node(s, len, flags)
char *s;
@@ -252,7 +264,7 @@ int flags;
getnode(r);
r->type = Node_val;
- r->flags = (STRING|STR|MALLOC);
+ r->flags = (STRING|STR|MALLOC|SCALAR);
if (flags & ALREADY_MALLOCED)
r->stptr = s;
else {
@@ -261,7 +273,7 @@ int flags;
}
r->stptr[len] = '\0';
- if (flags & SCAN) { /* scan for escape sequences */
+ if ((flags & SCAN) != 0) { /* scan for escape sequences */
char *pf;
register char *ptm;
register int c;
@@ -293,6 +305,8 @@ int flags;
return r;
}
+/* tmp_string --- allocate a temporary string */
+
NODE *
tmp_string(s, len)
char *s;
@@ -305,6 +319,7 @@ size_t len;
return r;
}
+/* more_nodes --- allocate more nodes */
#define NODECHUNK 100
@@ -328,10 +343,13 @@ more_nodes()
}
#ifdef DEBUG
+/* freenode --- release a node back to the pool */
+
void
freenode(it)
NODE *it;
{
+ it->flags &= ~SCALAR;
#ifdef MPROF
it->stref = 0;
free((char *) it);
@@ -343,19 +361,21 @@ NODE *it;
}
#endif /* DEBUG */
+/* unref --- remove reference to a particular node */
+
void
unref(tmp)
register NODE *tmp;
{
if (tmp == NULL)
return;
- if (tmp->flags & PERM)
+ if ((tmp->flags & PERM) != 0)
return;
- if (tmp->flags & (MALLOC|TEMP)) {
+ if ((tmp->flags & (MALLOC|TEMP)) != 0) {
tmp->flags &= ~TEMP;
- if (tmp->flags & STR) {
+ if ((tmp->flags & STR) != 0) {
if (tmp->stref > 1) {
- if (tmp->stref != 255)
+ if (tmp->stref != LONG_MAX)
tmp->stref--;
return;
}
@@ -366,6 +386,8 @@ register NODE *tmp;
}
/*
+ * parse_escape:
+ *
* Parse a C escape sequence. STRING_PTR points to a variable containing a
* pointer to the string to parse. That pointer is updated past the
* characters we use. The value of the escape sequence is returned.
@@ -432,11 +454,11 @@ char **string_ptr;
return i;
case 'x':
if (do_lint) {
- static int didwarn;
+ static int didwarn = FALSE;
if (! didwarn) {
- didwarn = 1;
- warning("Posix does not allow \"\\x\" escapes");
+ didwarn = TRUE;
+ warning("POSIX does not allow \"\\x\" escapes");
}
}
if (do_posix)
@@ -446,7 +468,7 @@ char **string_ptr;
return ('x');
}
i = 0;
- while (1) {
+ for (;;) {
if (isxdigit((c = *(*string_ptr)++))) {
i *= 16;
if (isdigit(c))
diff --git a/patchlevel.h b/patchlevel.h
index e44bc091..2867bba9 100644
--- a/patchlevel.h
+++ b/patchlevel.h
@@ -1 +1 @@
-#define PATCHLEVEL 6
+#define PATCHLEVEL 0
diff --git a/pc/ChangeLog b/pc/ChangeLog
new file mode 100644
index 00000000..570c031b
--- /dev/null
+++ b/pc/ChangeLog
@@ -0,0 +1,3 @@
+Wed Jan 10 22:58:55 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * ChangeLog created.
diff --git a/pc/Makefile b/pc/Makefile
new file mode 100644
index 00000000..cefd9c3d
--- /dev/null
+++ b/pc/Makefile
@@ -0,0 +1,299 @@
+# Makefile for gawk (GNU awk) 1 Sep 1995
+#
+# - for GNU C (djgpp) [executable for DOS (32-bit)]
+# - for GNU C (emx) [executable for OS/2 2.x or DOS (32-bit)]
+# - for Microsoft C 7 [ececutable for DOS (16-bit)]
+# - for Microsoft C 6.00A [executable for OS/2 or DOS (16-bit)]
+# - for Microsoft C 5.1 [executable for OS/2 or DOS (16-bit)]
+
+# Tested with ndmake and dmake-3.8 under DOS and dmake and
+# GNU make under OS/2. Compiling with dmake under DOS may require
+# the DOS-only version of dmake (so that swapping works).
+
+default:
+ @echo "Enter $(MAK) target "
+ @echo " where 'target' is chosen from "
+ @echo " djgpp ... DOS 32-bit exe [GNU C, Delorie, v1 or v2] "
+ @echo " emx ..... OS/2 32-bit exe [emx/gcc; uses emxlibc.dll] "
+ @echo " emxbnd .. OS/2 and DOS 32-bit exe [emx/gcc] "
+ @echo " msc ..... DOS exe [Microsoft C 7] "
+ @echo " msc6 .... DOS exe [Microsoft C 6.00a] "
+ @echo " msc6os2 . OS/2 exe [Microsoft C 6.00a] "
+ @echo " msc6bnd . OS/2 and DOS exe [Microsoft C 6.00a] "
+ @echo " ----------------------------------------------------- "
+ @echo " test .... Perform tests (see README_d/README.pc) "
+ @echo " install . Install gawk under $(prefix)/ "
+
+# Support dropped in 3.0
+# @echo " msc51 DOS exe [Microsoft C 5.1] "
+# @echo " msc51bnd OS/2 and DOS exe [Microsoft C 5.1] "
+
+#======================= Configuration ==================================
+RSPFILE = gawk.rsp
+#
+# Choose method for passing arguments to the linker.
+#
+# If compiling under OS/2 or if make can pass long lines
+#LDRSP = $(GAWKOBJS)
+#LINKRSP = $(LDRSP)
+#
+# else if make == dmake
+# Response files for linker: dmake allows the macro expansion
+# $(macro_name:modifier_list:modifier_list:...)
+# The macro mktmp creates a temporary file for the linker.
+# The 't' modifier is for tokenization.
+#LDRSP = @$(mktmp $(<:t"\n"))
+#LINKRSP = @$(mktmp $(<:t"+\n") ) # Space before final paren req
+#
+# else use brain-dead approach (emxbnd will need 'tr').
+RSP = $(RSPFILE)
+LDRSP = @$(RSP)
+LINKRSP = $(LDRSP)
+#------------------------------------------------------------------------
+# Some makes do not define MAKE (and ndmake does not allow a define).
+# Define MAK to be your make command.
+#MAK = $(MAKE) $(MAKEFILE)
+MAK = $(MAKE)
+#MAKEFILE = -f Makefile
+#MAK = make45 $(MAKEFILE)
+#------------------------------------------------------------------------
+# Define the base directory for the install. "make install" will install
+# in bin, lib/awk, man, and info under $(prefix)/. Most likely, you should
+# edit config.h so that $(prefix)/lib/awk appears as part of DEFPATH.
+#prefix =
+prefix = c:/gnu
+#
+# Define the install method. Method 1 is Unix-like (and requires cat,
+# cp, mkdir, sed, and sh); method 2 uses gawk and batch files.
+install = 1
+#========================================================================
+# End of general configuration. Some platform-specific configuration
+# notes appear below.
+
+
+#========================================================================
+#========================== DJGPP =======================================
+#========================================================================
+
+LDJG = $(CC) $(LF) -o gawk $(LDRSP) $(LF2)
+#BDJG = coff2exe -s /djgpp/bin/go32.exe gawk
+BDJG = coff2exe gawk
+
+djgpp:
+ $(MAK) all \
+ CC=gcc O=.o CF=-O \
+ LINK=LDJG LF=-s LF2=-lm \
+ BIND=BDJG
+
+djgpp-debug:
+ $(MAK) all \
+ CC=gcc O=.o CF=-g \
+ LINK=LDJG LF2=-lm \
+ BIND=BDJG
+
+#========================================================================
+#========================== EMX =========================================
+#========================================================================
+
+# Link command for OS/2 versions.
+LEMX = $(CC) $(LF) -o $@ $(GAWKOBJS) gawk.def -lbsd $(LF2)
+
+# Link and bind for DOS and OS/2 versions.
+# emx-09 needs '-p' emx option here or in EMXOPT environ var.
+LEMXBND = $(CC) $(LF) -o a.out $(LDRSP) gawk.def -lbsd $(LF2)
+BEMX = emxbind -b /emx/bin/emxl.exe a.out $@ -p
+#BEMX = emxbind -b /emx/bin/emx.exe a.out $@ -p
+
+emx:
+ $(MAK) all \
+ "CC=gcc -Zomf" O=.obj "CF=-O -DOS2" \
+ LINK=LEMX "LF=-s -Zcrtdll -Zstack 512" RSP=
+
+emxbnd:
+ $(MAK) all \
+ CC=gcc O=.o "CF=-O -DOS2 -DMSDOS" OBJ=popen.o \
+ LINK=LEMXBND LF=-s \
+ BIND=BEMX "P=|tr \" \" \"\n\""
+
+emxbnd-debug:
+ $(MAK) all \
+ CC=gcc O=.o CF="-g -DOS2 -DMSDOS" OBJ=popen.o \
+ LINK=LEMXBND \
+ BIND=BEMX "P=|tr \" \" \"\n\""
+
+#========================================================================
+#========================== MSC =========================================
+#========================================================================
+
+# stdargv, glob, and director are from Stewartson's sh. These provide
+# globbing and enhanced argument-passing. MSC setargv.obj is a
+# more limited alternative (and it will permit a bound version).
+#STDARGV = stdargv.obj glob.obj director.obj
+STDARGV = setargv.obj
+
+# Optimization and library options:
+# Os == optimize for size, Ot == optimize for speed, G2 == 286 or better
+#MSCOPT = -Os -G2
+MSCOPT = -Ot # -G2
+# Alternate lib, does not use math coprocessor.
+#MSCLIB = llibca
+#MSCCL = -FPa
+# Emulator lib, uses math coprocessor if present.
+MSCLIB = llibce
+MSCCL = -FPi
+#MSCCL = -FPc
+
+LMSC = link $(LF) $(LINKRSP) $(STDARGV)/NOE,$@,,/NOD:llibce $(MSCLIB)$(LF2)/STACK:0x6f00;
+
+# CLMSC-linking works when building under OS/2
+CLMSC = $(CC) -o $@ $(LF) $(GAWKOBJS) $(STDARGV) $(LF2) -link /NOE/NOI/STACK:0x6f00
+
+BMSC = bind $@ /n DOSMAKEPIPE DOSCWAIT
+
+# Ugly hack: config.h defines __STDC__ if not defined on command-line.
+# OS/2 versions can't use -Za in getid.c. MSC7 uses stub headers in pc/
+# due to ANSI conflicts. MSC 5.1 defines __STDC__=0 regardless of ANSI flag.
+
+# dmake-3.8 runs out of memory under DOS. Request that dmake
+# swap itself out on these targets. Note that this won't have
+# any affect on the bound OS/2 and DOS version of dmake-3.8.
+
+.SWAP: msc msc-debug msc6 msc6os2 msc6bnd msc51 check
+
+msc:
+ $(MAK) all \
+ "CC=cl -nologo $(MSCCL)" O=.obj "CF=-AL -Ze -Ipc/include $(MSCOPT)" \
+ OBJ=popen.obj \
+ LINK=LMSC P=+
+
+msc-debug:
+ $(MAK) all \
+ "CC=cl $(MSCCL)" O=.obj "CF=-AL -Ze -Ipc/include -W2 -Zi -Od" \
+ OBJ=popen.obj \
+ LINK=LMSC LF2=/CO P=+
+
+msc6:
+ $(MAK) all \
+ "CC=cl -nologo $(MSCCL)" O=.obj "CF=-AL -Za $(MSCOPT)" \
+ OBJ=popen.obj \
+ LINK=LMSC P=+
+
+msc6os2:
+ $(MAK) all \
+ "CC=cl $(MSCCL)" O=.obj "CF=-AL -DOS2 -UMSDOS $(MSCOPT)" \
+ LINK=LMSC "LF2=p,gawk.def" P=+
+
+msc6bnd:
+ $(MAK) all \
+ "CC=cl $(MSCCL)" O=.obj "CF=-AL -DOS2 $(MSCOPT)" \
+ OBJ=popen.obj \
+ LINK=LMSC "LF2=p,gawk.def" P=+ \
+ BIND=BMSC
+
+# Support dropped in 3.0
+#msc51:
+# $(MAK) all \
+# "CC=cl $(MSCCL)" O=.obj "CF=-AL -Za -D_MSC_VER=510 $(MSCOPT)" \
+# OBJ=popen.obj \
+# LINK=LMSC P=+
+#
+#msc51bnd:
+# $(MAK) all \
+# "CC=cl -AL ($MSCCL)" O=.obj "CF=-DOS2 -D_MSC_VER=510 $(MSCOPT)" \
+# OBJ=popen.obj \
+# LINK=CLMSC "LF=-Lp -Fb" "LF2=gawk.def"
+
+#========================================================================
+
+# Define BIND for BINDless compiles, otherwise $($(BIND)) may break.
+BIND = EMPTY
+EMPTY=
+
+CFLAGS = $(CF) -DGAWK -I. -DHAVE_CONFIG_H
+
+# object files
+AWKOBJS1 = array$O builtin$O eval$O field$O gawkmisc$O io$O main$O
+AWKOBJS2 = missing$O msg$O node$O re$O version$O
+AWKOBJS = $(AWKOBJS1) $(AWKOBJS2)
+
+ALLOBJS = $(AWKOBJS) awktab$O getid$O $(OBJ)
+
+# GNUOBJS
+# GNU stuff that gawk uses as library routines.
+GNUOBJS= getopt$O getopt1$O regex$O dfa$O
+
+GAWKOBJS = $(ALLOBJS) $(GNUOBJS)
+
+# clear out suffixes list
+# .SUFFIXES:
+.SUFFIXES: .c $O
+
+.c$O:
+ $(CC) -c $(CFLAGS) $<
+
+# rules to build gawk
+all : gawk.exe
+
+gawk.exe:: $(ALLOBJS) $(GNUOBJS) $(RSP)
+ $($(LINK))
+ $($(BIND))
+
+$(RSPFILE) : $(GAWKOBJS)
+ echo $(AWKOBJS1)$P > $@
+ echo $(AWKOBJS2)$P >> $@
+ echo awktab$O getid$O $(OBJ) $(GNUOBJS)$P >> $@
+
+$(ALLOBJS): awk.h dfa.h regex.h config.h
+
+gawkmisc$O: pc/gawkmisc.pc
+
+getopt$O: getopt.h
+
+getopt1$O: getopt.h
+
+main$O: patchlevel.h
+
+# A bug in ndmake requires the following rule
+awktab$O: awk.h awktab.c
+ $(CC) -c $(CFLAGS) awktab.c
+
+awktab.c: awk.y
+ bison -o $@ awk.y
+
+alloca$O: alloca.c
+
+#.PRECIOUS: install
+#.PHONY: install
+
+install: install$(install)
+
+install1:
+ echo extproc sh $(prefix)/bin/igawk.cmd > igawk.cmd
+ echo shift >> igawk.cmd
+ cat pc/awklib/igawk >> igawk.cmd
+ sed "s;igawk;$(prefix)/bin/igawk;" pc/awklib/igawk.bat > igawk.bat
+ sh mkinstal.sh $(prefix)/bin
+ sh mkinstal.sh $(prefix)/lib/awk $(prefix)/man/man1 $(prefix)/info
+ cp gawk.exe igawk.bat igawk.cmd pc/awklib/igawk $(prefix)/bin
+ cp awklib/eg/lib/* pc/awklib/igawk.awk $(prefix)/lib/awk
+ cp doc/*.1 $(prefix)/man/man1
+ cp doc/gawk.info $(prefix)/info
+
+# install2 is equivalent to install1, but doesn't require cp, sed, etc.
+install2:
+ gawk -v prefix=$(prefix) -f install.awk
+
+clean:
+ rm -rf gawk gawk.exe *.o *.obj core a.out $(RSPFILE)
+# cd doc && $(MAKE) clean
+# cd test && $(MAKE) clean
+# cd awklib && $(MAKE) clean
+
+awklib/eg: doc/gawk.texi
+ rm -fr awklib/eg
+ sh -c "cd awklib && ../gawk -f extract.awk ../doc/gawk.texi"
+
+check:
+ cd test && $(MAK) -k AWK=../gawk.exe
+
+test: check
diff --git a/pc/Makefile.emx b/pc/Makefile.emx
deleted file mode 100644
index 3be4a844..00000000
--- a/pc/Makefile.emx
+++ /dev/null
@@ -1,53 +0,0 @@
-# Makefile for gawk (GNU awk) using EMX/gcc
-#
-# This makefile is designed to work within the limits of the DOS
-# command-line length. OS/2 users can use Makefile.os2, which
-# has additional targets.
-#
-
-
-CC=gcc -O -s
-O=.o
-CFLAGS=-DOS2 -DMSDOS
-
-LFLAGS=
-LFLAGS2=gawk-32.def
-
-#BIND=emxbind -u /emx/bin/emx.exe $@
-BIND=
-
-OBJ2=getid$O popen$O
-
-AWKOBJS = main$O eval$O builtin$O msg$O iop$O io$O field$O array$O \
- node$O version$O missing$O re$O
-ALLOBJS = $(AWKOBJS) awktab$O
-GNUOBJS= getopt$O getopt1$O regex$O dfa$O
-
-.SUFFIXES: $O .c .y
-
-.c$O:
- $(CC) $(CFLAGS) -DGAWK -DHAVE_CONFIG_H -c $<
-
-all: gawk.exe
-
-gawk.exe: $(ALLOBJS) $(GNUOBJS) $(OBJ2)
- $(CC) -o $@ $(LFLAGS) @names2.lnk $(LFLAGS2)
- $(BIND)
-
-$(AWKOBJS): awk.h config.h
-dfa$O: awk.h config.h dfa.h
-regex$O: awk.h config.h regex.h
-main$O: patchlev.h
-awktab$O: awk.h awktab.c
-
-awktab.c: awk.y
- bison -o $@ awk.y
-
-clean:
- rm -f *.o core awk.output gmon.out make.out y.output
-
-.PHONY: test
-test:
- @echo Both dmake and GNU make require modifications to test/Makefile,
- @echo but here we go...
- cd test && $(MAKE) -k
diff --git a/pc/Makefile.msc b/pc/Makefile.msc
deleted file mode 100644
index 078aab54..00000000
--- a/pc/Makefile.msc
+++ /dev/null
@@ -1,68 +0,0 @@
-# Makefile for gawk (GNU awk) using Microsoft C
-#
-# This makefile is designed to work within the limits of the DOS
-# command-line length. OS/2 users can use Makefile.os2, which
-# has additional targets.
-#
-
-DEFS = -D_MSC_VER
-
-# For MSC 5.1
-#DEFS = -D_MSC_VER=510
-
-# MSC 6.00A has _MSC_VER predefined
-#DEFS =
-
-
-#DEBUG = -W3 -Zi -Od
-DEBUG=
-
-MODEL = L
-CC=cl -nologo -A$(MODEL)
-O=.obj
-
-# Disable MSC extensions with -Za so that __STDC__ is defined for MSC 6.00A
-# MSC 5.1 defines __STDC__=0 regardless of the ANSI flag
-CFLAGS = -Za $(DEFS) $(DEBUG)
-
-#LIBS = /NOD:$(MODEL)libce $(MODEL)libcer.lib
-LIBS =
-
-OBJ2=getid$O popen$O
-
-AWKOBJS = main$O eval$O builtin$O msg$O iop$O io$O field$O array$O \
- node$O version$O missing$O re$O
-ALLOBJS = $(AWKOBJS) awktab$O
-GNUOBJS= getopt$O getopt1$O regex$O dfa$O
-
-.SUFFIXES: $O .c .y
-
-.c$O:
- $(CC) $(CFLAGS) -DGAWK -DHAVE_CONFIG_H -c $<
-
-all: gawk.exe
-
-gawk.exe: $(ALLOBJS) $(GNUOBJS) $(OBJ2)
- link @names.lnk, $@,,$(LIBS) /NOE /st:30000;
-
-
-$(AWKOBJS): awk.h config.h
-dfa$O: awk.h config.h dfa.h
-regex$O: awk.h config.h regex.h
-main$O: patchlev.h
-
-# A bug in ndmake requires the following rule
-awktab$O: awk.h awktab.c
- $(CC) $(CFLAGS) -DGAWK -c awktab.c
-
-awktab.c: awk.y
- bison -o $@ awk.y
-
-clean:
- rm -f *.o *.obj core awk.output gmon.out make.out y.output
-
-.PHONY: test
-test:
- @echo Both dmake and GNU make require modifications to test/Makefile,
- @echo but here we go...
- cd test && $(MAKE) -k
diff --git a/pc/Makefile.os2 b/pc/Makefile.os2
deleted file mode 100644
index 635f12a8..00000000
--- a/pc/Makefile.os2
+++ /dev/null
@@ -1,125 +0,0 @@
-# Makefile for gawk (GNU awk) 17 Sep 1993
-#
-# - for GNU gcc (emx 0.8g kit) [executables for OS/2 2.x or DOS (32-bit)]
-# - for Microsoft C 6.00A [executables for OS/2 or MSDOS (16-bit)]
-# - for Microsoft C 5.1 [executable for OS/2 or DOS (16-bit)]
-
-# To use, enter "make -f Makefile.os2" (this makefile depends on its
-# name being "Makefile.os2").
-#
-# Tested with dmake 3.8 and GNU make 3.68 under OS/2
-
-default:
- @echo "Enter $(MAKE) -f Makefile.os2 target "
- @echo " where 'target' is chosen from "
- @echo " msc OS/2 exe [Microsoft C 6.00a] "
- @echo " mscbnd OS/2 and DOS exe [Microsoft C 6.00a] "
- @echo " mscdos DOS exe [Microsoft C 6.00a] "
- @echo " msc51bnd OS/2 and DOS exe [Microsoft C 5.1] "
- @echo " emx OS/2 32-bit exe [EMX/gcc; uses emxlibc.dll] "
- @echo " emxbnd OS/2 and DOS 32-bit exe [EMX/gcc] "
-
-
-# stdargv, glob, and director are from Stewartson's sh. These provide
-# globbing and enhanced argument-passing. MSC setargv.obj is a
-# more limited alternative (and it will permit a bound version).
-
-#STDARGV = stdargv.obj glob.obj director.obj
-STDARGV = setargv.obj
-
-msc:
- $(MAKE) -f Makefile.os2 all \
- CC="cl -nologo -AL" O=".obj" \
- CFLAGS="-D__STDC__ -DOS2 -UMSDOS" \
- OBJ2="" \
- LFLAGS="-Lp" \
- LFLAGS2="$(STDARGV) gawk.def -link /NOE /st:30000"
-
-mscbnd:
- $(MAKE) -f Makefile.os2 all \
- CC="cl -nologo -AL" O=".obj" \
- CFLAGS="-D__STDC__ -DOS2" \
- OBJ2="popen.obj" \
- LFLAGS="-Lp" \
- LFLAGS2="setargv.obj gawk.def -link /NOE /st:30000" \
- BIND="bind gawk /n DOSMAKEPIPE DOSCWAIT"
-
-msc-debug:
- $(MAKE) -f Makefile.os2 all \
- CC="cl -nologo -AL" O=".obj" \
- CFLAGS="-DOS2 -D__STDC__ -DDEBUG -DFUNC_TRACE -DMEMDEBUG -Zi -Od" \
- OBJ2="popen.obj" \
- LFLAGS="-Lp" \
- LFLAGS2="$(STDARGV) gawk.def -link /CO /NOE /st:30000"
-
-mscdos:
- $(MAKE) -f Makefile.os2 all \
- CC="cl -nologo -AL" O=".obj" \
- CFLAGS="-D__STDC__" \
- OBJ2="popen.obj" \
- LFLAGS="-Lr" \
- LFLAGS2="$(STDARGV) -link /NOE /st:30000"
-
-msc51bnd:
- $(MAKE) -f Makefile.os2 all \
- CC="cl -AL" O=".obj" \
- CFLAGS="-DOS2 -D_MSC_VER=510" \
- OBJ2="popen.obj" \
- LFLAGS="-Lp -Fb" \
- LFLAGS2="setargv.obj gawk.def -link /NOE /NOI /st:30000"
-
-emx:
- $(MAKE) -f Makefile.os2 all \
- CC="gcc -Zomf -Zmtd -O -s" O=".obj" \
- CFLAGS="-DOS2" \
- LFLAGS="" \
- LFLAGS2="gawk-32.def"
-
-emx-debug:
- $(MAKE) -f Makefile.os2 all \
- CC="gcc -g" O=".o" \
- CFLAGS="-DOS2" \
- LFLAGS="" \
- LFLAGS2="gawk-32.def"
-
-emxbnd:
- $(MAKE) -f Makefile.os2 all \
- CC="gcc -O -s" O=".o" \
- CFLAGS="-DOS2 -DMSDOS" \
- OBJ2="popen.o" \
- LFLAGS="" \
- LFLAGS2="gawk-32.def"
-
-AWKOBJS = main$O eval$O builtin$O msg$O iop$O io$O field$O array$O \
- node$O version$O missing$O re$O
-ALLOBJS = $(AWKOBJS) awktab$O getid$O
-GNUOBJS= getopt$O getopt1$O regex$O dfa$O
-
-.SUFFIXES: $O .c .y
-
-.c$O:
- $(CC) $(CFLAGS) -DGAWK -DHAVE_CONFIG_H -c $<
-
-all: gawk.exe
-
-gawk.exe: $(ALLOBJS) $(GNUOBJS) $(OBJ2)
- $(CC) -o $@ $(LFLAGS) $(ALLOBJS) $(GNUOBJS) $(OBJ2) $(LFLAGS2)
- $(BIND)
-
-$(AWKOBJS): awk.h config.h
-dfa$O: awk.h config.h dfa.h
-regex$O: awk.h config.h regex.h
-main$O: patchlevel.h
-awktab$O: awk.h awktab.c
-
-awktab.c: awk.y
- bison -o $@ awk.y
-
-clean:
- rm -f *.o *.obj core awk.output gmon.out make.out y.output
-
-.PHONY: test
-test:
- @echo Both dmake and GNU make require modifications to test/Makefile,
- @echo but here we go...
- cd test && $(MAKE) -k
diff --git a/pc/Makefile.tst b/pc/Makefile.tst
new file mode 100644
index 00000000..749d83ab
--- /dev/null
+++ b/pc/Makefile.tst
@@ -0,0 +1,376 @@
+# Makefile for GNU Awk test suite.
+#
+# Copyright (C) 1988-1995 the Free Software Foundation, Inc.
+#
+# This file is part of GAWK, the GNU implementation of the
+# AWK Programming Language.
+#
+# GAWK is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GAWK is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+
+# ============================================================================
+# MS-DOS & OS/2 Notes: READ THEM!
+# ============================================================================
+
+# As of version 2.91, efforts to make this makefile run in MS-DOS and OS/2
+# have started in earnest. The following steps need to be followed in order
+# to run this makefile:
+#
+# 1. The first thing that you will need to do is to convert all of the
+# files ending in ".ok" in the test directory and all of the files ending
+# in ".good" (or ".goo") in the test/reg directory from having a linefeed
+# to having carriage return/linefeed at the end of each line. There are
+# various public domain UNIX to DOS converters and any should work.
+#
+# 2. You will need an sh-compatible shell. Please refer to the "README.pc"
+# file in the README_d directory for information about obtaining a copy.
+# You will also need various UNIX utilities. At a minimum, you will
+# need: rm, tr, cmp, cat, wc, and sh.
+# You should also have a UNIX-compatible date program.
+#
+# 3. You will need a \tmp directory on the same drive as the test directory
+# for the poundba (called poundbang in the UNIX makefile) test.
+#
+# The makefile has only been tested with dmake 3.8. After making all of these
+# changes, typing "dmake bigtest extra" should run successfully.
+
+# So far, the only MS-DOS & OS/2 shell that this has been found to work with
+# is Stewartson's sh 2.3. That version of sh will sometimes send long
+# command-line arguments to programs using the @ notation. You may need
+# to disable this feature of sh for programs that you have which don't support
+# that feature. For more information about the @ notation please refer to
+# the sh documentation.
+
+# You will almost certainly need to change some of the values (MACROS)
+# defined on the next few lines.
+
+# .USESHELL is used by dmake.
+.USESHELL = yes
+
+# Using EMXSHELL=/bin/sh with emx versions can exhaust lower mem.
+# The .SWAP setting forces (DOS-only) dmake to swap itself out.
+#.SWAP: childin fflush
+
+# This won't work unless you have "sh" and set SHELL equal to it.
+#SHELL = e:\bin\sh.exe
+SHELL = /bin/sh
+
+# Point to gawk
+AWK = ../gawk.exe
+
+# Set your cmp command here
+CMP = cmp
+#CMP = gcmp
+
+# Set your "cp" command here. Note: It must take forward slashes.
+# 'command -c copy' will work for MS-DOS if "command=noexpand switch export" is
+# set in extend.lst.
+#CP = cp
+#CP = gcp
+CP = command -c copy
+
+# Set your unix-style date function here
+#DATE = date
+DATE = gdate
+
+# Set your mkdir command here.
+#MKDIR = /bin/mkdir
+MKDIR = command -c mkdir
+
+# ============================================================================
+# You shouldn't need to modify anything below this line.
+# ============================================================================
+
+srcdir = .
+
+bigtest: basic poundba gawk.extensions
+
+basic: msg swaplns messages argarray longwrds \
+ getline fstabplus compare arrayref rs fsrs rand \
+ fsbs negexp asgext anchgsub splitargv awkpath nfset reparse \
+ convfmt arrayparm paramdup nonl defref nofmtch litoct resplit \
+ rswhite prmarscl sclforin sclifin intprec childin noeffect \
+ numsubstr pcntplus prmreuse math fflush fldchg
+
+gawk.extensions: fieldwdth ignrcase posix manyfiles igncfs argtest \
+ badargs strftime gensub gnureops
+
+extra: regtes inftest
+
+poundba::
+# The need for "basename" has been removed for MS-DOS & OS/2 systems which
+# lack it.
+# cp $(AWK) /tmp/gawk && $(srcdir)/poundbang $(srcdir)/poundbang >_`basename $@`
+ $(CP) $(AWK) /tmp/gawk.exe && $(srcdir)/poundbang $(srcdir)/poundbang >_$@
+# rm -f /tmp/gawk
+ rm -f /tmp/gawk.exe
+# $(CMP) $(srcdir)/poundbang.ok _`basename $@` && rm -f _`basename $@`
+ $(CMP) $(srcdir)/poundbang.ok _$@ && rm -f _$@
+
+msg::
+ @echo 'Any output from "cmp" is bad news, although some differences'
+ @echo 'in floating point values are probably benign -- in particular,'
+ @echo 'some systems may omit a leading zero and the floating point'
+ @echo 'precision may lead to slightly different output in a few cases.'
+
+swaplns::
+ @echo 'If swaplns fails make sure that all of the .ok files have CR/LFs.'
+ @$(AWK) -f $(srcdir)/swaplns.awk $(srcdir)/swaplns.in >_$@
+ $(CMP) $(srcdir)/swaplns.ok _$@ && rm -f _$@
+
+messages::
+ @echo 'If messages fails, set sh to swap to disk only (in sh.rc).'
+ @$(AWK) -f $(srcdir)/messages.awk >out2 2>out3
+# { $(CMP) $(srcdir)/out1.ok out1 && $(CMP) $(srcdir)/out2.ok out2 && $(CMP) $(srcdir)/out3.ok out3 && rm -f out1 out2 out3; } || { test -d /dev/fd && echo IT IS OK THAT THIS TEST FAILED; }
+ { $(CMP) $(srcdir)/out1.ok out1 && $(CMP) $(srcdir)/out2.ok out2 && $(CMP) $(srcdir)/out3.ok out3; } || { test -d /dev/fd && echo OK TEST FAILED; }
+ rm -f out1 out2 out3
+
+argarray::
+ @case $(srcdir) in \
+ .) : ;; \
+ *) cp $(srcdir)/argarray.in . ;; \
+ esac
+ @TEST=test echo just a test | $(AWK) -f $(srcdir)/argarray.awk ./argarray.in - >_$@
+ $(CMP) $(srcdir)/argarray.ok _$@ && rm -f _$@
+
+fstabplus::
+ @echo '1 2' | $(AWK) -f $(srcdir)/fstabplus.awk >_$@
+ $(CMP) $(srcdir)/fstabplus.ok _$@ && rm -f _$@
+
+fsrs::
+ @$(AWK) -f $(srcdir)/fsrs.awk $(srcdir)/fsrs.in >_$@
+ $(CMP) $(srcdir)/fsrs.ok _$@ && rm -f _$@
+
+igncfs::
+ @$(AWK) -f $(srcdir)/igncfs.awk $(srcdir)/igncfs.in >_$@
+ $(CMP) $(srcdir)/igncfs.ok _$@ && rm -f _$@
+
+longwrds::
+ @$(AWK) -f $(srcdir)/longwrds.awk $(srcdir)/manpage | sort >_$@
+ $(CMP) $(srcdir)/longwrds.ok _$@ && rm -f _$@
+
+fieldwdth::
+ @echo '123456789' | $(AWK) -v FIELDWIDTHS="2 3 4" '{ print $$2}' >_$@
+ $(CMP) $(srcdir)/fieldwdth.ok _$@ && rm -f _$@
+
+ignrcase::
+ @echo xYz | $(AWK) -v IGNORECASE=1 '{ sub(/y/, ""); print}' >_$@
+ $(CMP) $(srcdir)/ignrcase.ok _$@ && rm -f _$@
+
+regtes::
+ @echo 'Some of the output from regtest is very system specific, do not'
+ @echo 'be distressed if your output differs from that distributed.'
+ @echo 'Manual inspection is called for.'
+ AWK=`pwd`/$(AWK) $(srcdir)/regtest
+
+posix::
+ @echo 'posix test may fail due to 1.500000e+000 not being equal to'
+ @echo '1.500000e+00 for MSC 7.0 gawk.'
+ @echo '1:2,3 4' | $(AWK) -f $(srcdir)/posix.awk >_$@
+# $(CMP) $(srcdir)/posix.ok _$@ && rm -f _$@
+ -$(CMP) $(srcdir)/posix.ok _$@ && rm -f _$@
+
+manyfiles::
+ @rm -rf junk
+# @mkdir junk
+ @$(MKDIR) junk
+ @$(AWK) 'BEGIN { for (i = 1; i <= 300; i++) print i, i}' >_$@
+ @$(AWK) -f $(srcdir)/manyfiles.awk _$@ _$@
+# @echo "This number better be 1 ->" | tr -d '\012'
+ @echo "This number better be 1 ->" | tr -d '\012\015'
+# @wc -l junk/* | $(AWK) '$$1 != 2' | wc -l
+ @wc -l "junk/*" | $(AWK) '$$1 != 2' | wc -l
+# The quotes above are for people with a "wc" that doesn't support sh's "@"
+# argument passing.
+ @rm -rf junk _$@
+
+compare::
+ @$(AWK) -f $(srcdir)/compare.awk 0 1 $(srcdir)/compare.in >_$@
+ $(CMP) $(srcdir)/compare.ok _$@ && rm -f _$@
+
+arrayref::
+ @$(AWK) -f $(srcdir)/arrayref.awk >_$@
+ $(CMP) $(srcdir)/arrayref.ok _$@ && rm -f _$@
+
+rs::
+ @$(AWK) -v RS="" '{ print $$1, $$2}' $(srcdir)/rs.in >_$@
+ $(CMP) $(srcdir)/rs.ok _$@ && rm -f _$@
+
+fsbs::
+ @$(AWK) -v FS='\' '{ print $$1, $$2 }' $(srcdir)/fsbs.in >_$@
+ $(CMP) $(srcdir)/fsbs.ok _$@ && rm -f _$@
+
+inftest::
+ @echo This test is very machine specific...
+ @echo 'MSC 7.0 gawk generates a floating point exception.'
+ @echo 'EMX gawk uses #INF rather than Inf.'
+# @$(AWK) -f $(srcdir)/inftest.awk >_$@
+ @-$(AWK) -f $(srcdir)/inftest.awk >_$@
+# $(CMP) $(srcdir)/inftest.ok _$@ && rm -f _$@
+ -$(CMP) $(srcdir)/inftest.ok _$@ && rm -f _$@
+
+getline::
+ @$(AWK) -f $(srcdir)/getline.awk $(srcdir)/getline.awk $(srcdir)/getline.awk >_$@
+ $(CMP) $(srcdir)/getline.ok _$@ && rm -f _$@
+
+rand::
+ @echo The following line should just be 19 random numbers between 1 and 100
+ @$(AWK) -f $(srcdir)/rand.awk
+
+negexp::
+ @$(AWK) 'BEGIN { a = -2; print 10^a }' >_$@
+ $(CMP) $(srcdir)/negexp.ok _$@ && rm -f _$@
+
+asgext::
+ @$(AWK) -f $(srcdir)/asgext.awk $(srcdir)/asgext.in >_$@
+ $(CMP) $(srcdir)/asgext.ok _$@ && rm -f _$@
+
+anchgsub::
+ @$(AWK) -f $(srcdir)/anchgsub.awk $(srcdir)/anchgsub.in >_$@
+ $(CMP) $(srcdir)/anchgsub.ok _$@ && rm -f _$@
+
+splitargv::
+ @$(AWK) -f $(srcdir)/splitargv.awk $(srcdir)/splitargv.in >_$@
+ $(CMP) $(srcdir)/splitargv.ok _$@ && rm -f _$@
+
+awkpath::
+# MS-DOS and OS/2 use ; as a PATH delimiter
+# @AWKPATH="$(srcdir):$(srcdir)/lib" $(AWK) -f awkpath.awk >_$@
+ @AWKPATH="$(srcdir);$(srcdir)/lib" $(AWK) -f awkpath.awk >_$@
+ $(CMP) $(srcdir)/awkpath.ok _$@ && rm -f _$@
+
+nfset::
+ @$(AWK) -f $(srcdir)/nfset.awk $(srcdir)/nfset.in >_$@
+ $(CMP) $(srcdir)/nfset.ok _$@ && rm -f _$@
+
+reparse::
+ @$(AWK) -f $(srcdir)/reparse.awk $(srcdir)/reparse.in >_$@
+ $(CMP) $(srcdir)/reparse.ok _$@ && rm -f _$@
+
+argtest::
+ @$(AWK) -f $(srcdir)/argtest.awk -x -y abc >_$@
+ $(CMP) $(srcdir)/argtest.ok _$@ && rm -f _$@
+
+badargs::
+# For MS-DOS & OS/2, we use " rather than ' in the usage statement.
+ @-$(AWK) -f 2>&1 | grep -v patchlevel >_$@
+# Next line converts " to ' for $(CMP) to work with UNIX badargs.ok
+ @cat _$@ | tr '\042' '\047' > _$@.2
+# $(CMP) $(srcdir)/badargs.ok _$@ && rm -f _$@
+ $(CMP) $(srcdir)/badargs.ok _$@.2 && rm -f _$@ _$@.2
+
+convfmt::
+ @$(AWK) -f $(srcdir)/convfmt.awk >_$@
+ $(CMP) $(srcdir)/convfmt.ok _$@ && rm -f _$@
+
+arrayparm::
+ @-AWKPATH=$(srcdir) $(AWK) -f arrayparm.awk >_$@ 2>&1
+ $(CMP) $(srcdir)/arrayparm.ok _$@ && rm -f _$@
+
+paramdup::
+ @-AWKPATH=$(srcdir) $(AWK) -f paramdup.awk >_$@ 2>&1
+ $(CMP) $(srcdir)/paramdup.ok _$@ && rm -f _$@
+
+nonl::
+# @-AWKPATH=$(srcdir) $(AWK) --lint -f nonl.awk /dev/null >_$@ 2>&1
+ @-AWKPATH=$(srcdir) $(AWK) --lint -f nonl.awk NUL >_$@ 2>&1
+ $(CMP) $(srcdir)/nonl.ok _$@ && rm -f _$@
+
+defref::
+ @-AWKPATH=$(srcdir) $(AWK) --lint -f defref.awk >_$@ 2>&1
+ $(CMP) $(srcdir)/defref.ok _$@ && rm -f _$@
+
+nofmtch::
+ @-AWKPATH=$(srcdir) $(AWK) --lint -f nofmtch.awk >_$@ 2>&1
+ $(CMP) $(srcdir)/nofmtch.ok _$@ && rm -f _$@
+
+strftime::
+ : this test could fail on slow machines or on a second boundary,
+ : so if it does, double check the actual results
+# @date | $(AWK) '{ $$3 = sprintf("%02d", $$3 + 0) ; print }' > strftime.ok
+ @$(DATE) | $(AWK) '{ $$3 = sprintf("%02d", $$3 + 0) ; print }' > strftime.ok
+ @$(AWK) 'BEGIN { print strftime() }' >_$@
+ -$(CMP) strftime.ok _$@ && rm -f _$@ strftime.ok
+
+litoct::
+ @echo ab | $(AWK) --traditional -f $(srcdir)/litoct.awk >_$@
+ $(CMP) $(srcdir)/litoct.ok _$@ && rm -f _$@
+
+gensub::
+ @$(AWK) -f $(srcdir)/gensub.awk $(srcdir)/gensub.in >_$@
+ $(CMP) $(srcdir)/gensub.ok _$@ && rm -f _$@
+
+resplit::
+ @echo 'If resplit fails, check extend.lst and remove "unix" by the "gawk=" line'
+ @echo a:b:c d:e:f | $(AWK) '{ FS = ":"; $$0 = $$0; print $$2 }' > _$@
+ $(CMP) $(srcdir)/resplit.ok _$@ && rm -f _$@
+
+rswhite::
+ @$(AWK) -f $(srcdir)/rswhite.awk $(srcdir)/rswhite.in > _$@
+ $(CMP) $(srcdir)/rswhite.ok _$@ && rm -f _$@
+
+prmarscl::
+ @-AWKPATH=$(srcdir) $(AWK) -f prmarscl.awk > _$@ 2>&1
+ $(CMP) $(srcdir)/prmarscl.ok _$@ && rm -f _$@
+
+sclforin::
+ @-AWKPATH=$(srcdir) $(AWK) -f sclforin.awk > _$@ 2>&1
+ $(CMP) $(srcdir)/sclforin.ok _$@ && rm -f _$@
+
+sclifin::
+ @-AWKPATH=$(srcdir) $(AWK) -f sclifin.awk > _$@ 2>&1
+ $(CMP) $(srcdir)/sclifin.ok _$@ && rm -f _$@
+
+intprec::
+ @-$(AWK) -f $(srcdir)/intprec.awk > _$@ 2>&1
+ $(CMP) $(srcdir)/intprec.ok _$@ && rm -f _$@
+
+childin::
+ @echo hi | $(AWK) 'BEGIN { "cat" | getline; print; close("cat") }' > _$@
+ $(CMP) $(srcdir)/childin.ok _$@ && rm -f _$@
+
+noeffect::
+ @-AWKPATH=$(srcdir) $(AWK) --lint -f noeffect.awk > _$@ 2>&1
+ $(CMP) $(srcdir)/noeffect.ok _$@ && rm -f _$@
+
+numsubstr::
+ @-AWKPATH=$(srcdir) $(AWK) -f numsubstr.awk $(srcdir)/numsubstr.in >_$@
+ $(CMP) $(srcdir)/numsubstr.ok _$@ && rm -f _$@
+
+gnureops::
+ @$(AWK) -f $(srcdir)/gnureops.awk >_$@
+ $(CMP) $(srcdir)/gnureops.ok _$@ && rm -f _$@
+
+pcntplus::
+ @$(AWK) -f $(srcdir)/pcntplus.awk >_$@
+ $(CMP) $(srcdir)/pcntplus.ok _$@ && rm -f _$@
+
+prmreuse::
+ @$(AWK) -f $(srcdir)/prmreuse.awk >_$@
+ $(CMP) $(srcdir)/prmreuse.ok _$@ && rm -f _$@
+
+math::
+ @$(AWK) -f $(srcdir)/math.awk >_$@
+ $(CMP) $(srcdir)/math.ok _$@ && rm -f _$@
+
+fflush::
+ @$(srcdir)/fflush.sh >_$@
+ $(CMP) $(srcdir)/fflush.ok _$@ && rm -f _$@
+
+fldchg::
+ @$(AWK) -f $(srcdir)/fldchg.awk $(srcdir)/fldchg.in >_$@
+ $(CMP) $(srcdir)/fldchg.ok _$@ && rm -f _$@
+
+clean:
+ rm -fr _* core junk
diff --git a/pc/awklib/igawk b/pc/awklib/igawk
new file mode 100644
index 00000000..7c599dca
--- /dev/null
+++ b/pc/awklib/igawk
@@ -0,0 +1,85 @@
+#! /bin/sh
+
+# igawk --- like gawk but do @include processing
+# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain
+# July 1993
+
+igs=${TMP:-/tmp}/igs$$
+ige=${TMP:-/tmp}/ige$$
+
+if [ "$1" = debug ]
+then
+ set -x
+ shift
+else
+ # cleanup on exit, hangup, interrupt, quit, termination
+ #trap 'rm -f $igs $ige' 0 1 2 3 15
+ trap 'rm -f $igs $ige' 0 2 15
+fi
+
+while [ $# -ne 0 ] # loop over arguments
+do
+ case $1 in
+ --) shift; break;;
+
+ -W) shift
+ set -- -W"$@"
+ continue;;
+
+ -[vF]) opts="$opts $1 '$2'"
+ shift;;
+
+ -[vF]*) opts="$opts '$1'" ;;
+
+ -f) echo @include "$2" >> $igs
+ shift;;
+
+ -f*) f=`echo "$1" | sed 's/-f//'`
+ echo @include "$f" >> $igs ;;
+
+ -?file=*) # -Wfile or --file
+ f=`echo "$1" | sed 's/-.file=//'`
+ echo @include "$f" >> $igs ;;
+
+ -?file) # get arg, $2
+ echo @include "$2" >> $igs
+ shift;;
+
+ -?source=*) # -Wsource or --source
+ t=`echo "$1" | sed 's/-.source=//'`
+ echo "$t" >> $igs ;;
+
+ -?source) # get arg, $2
+ echo "$2" >> $igs
+ shift;;
+
+ -?version)
+ echo igawk: version 1.0 1>&2
+ gawk --version
+ exit 0 ;;
+
+ -[W-]*) opts="$opts '$1'" ;;
+
+ *) break;;
+ esac
+
+ shift
+done
+
+if [ ! -s $igs ]
+then
+ if [ -z "$1" ]
+ then
+ echo igawk: no program! 1>&2
+ exit 1
+ else
+ echo "$1" > $igs
+ shift
+ fi
+fi
+
+# at this point, $igs has the program
+gawk -f igawk.awk $igs > $ige
+eval gawk -f '$ige' $opts -- "$@"
+
+exit $?
diff --git a/pc/awklib/igawk.awk b/pc/awklib/igawk.awk
new file mode 100644
index 00000000..dc0ba405
--- /dev/null
+++ b/pc/awklib/igawk.awk
@@ -0,0 +1,51 @@
+# igawk.awk
+# process @include directives
+
+function pathto(file, i, t, junk)
+{
+ if (index(file, "/") != 0)
+ return file
+
+ for (i = 1; i <= ndirs; i++) {
+ t = (pathlist[i] "/" file)
+ if ((getline junk < t) > 0) {
+ # found it
+ close(t)
+ return t
+ }
+ }
+ return ""
+}
+BEGIN {
+ path = ENVIRON["AWKPATH"]
+ ndirs = split(path, pathlist, ";")
+ for (i = 1; i <= ndirs; i++) {
+ if (pathlist[i] == "")
+ pathlist[i] = "."
+ }
+ stackptr = 0
+ input[stackptr] = ARGV[1] # ARGV[1] is first file
+
+ for (; stackptr >= 0; stackptr--) {
+ while ((getline < input[stackptr]) > 0) {
+ if (tolower($1) != "@include") {
+ print
+ continue
+ }
+ fpath = pathto($2)
+ if (fpath == "") {
+ printf("igawk:%s:%d: cannot find %s\n", \
+ input[stackptr], FNR, $2) > "/dev/stderr"
+ continue
+ }
+ if (! (fpath in processed)) {
+ processed[fpath] = input[stackptr]
+ input[++stackptr] = fpath
+ } else
+ print $2, "included in", input[stackptr], \
+ "already included in", \
+ processed[fpath] > "/dev/stderr"
+ }
+ close(input[stackptr])
+ }
+}
diff --git a/pc/awklib/igawk.bat b/pc/awklib/igawk.bat
new file mode 100644
index 00000000..bfc9b2a3
--- /dev/null
+++ b/pc/awklib/igawk.bat
@@ -0,0 +1 @@
+@sh igawk %1 %2 %3 %4 %5 %6 %7 %8 %9 \ No newline at end of file
diff --git a/pc/config.h b/pc/config.h
index 06b3beca..00255c42 100644
--- a/pc/config.h
+++ b/pc/config.h
@@ -1,19 +1,19 @@
+/* config.h. Generated automatically by configure. */
+/* configh.in. Generated automatically from configure.in by autoheader. */
/*
- * config.h -- configuration definitions for gawk.
- *
- * OS/2 or MS-DOS systems using emx/gcc or MSC
+ * acconfig.h -- configuration definitions for gawk.
*/
/*
- * Copyright (C) 1991-1993 the Free Software Foundation, Inc.
+ * Copyright (C) 1995 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
*
* GAWK is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
*
* GAWK is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -21,260 +21,193 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with GAWK; see the file COPYING. If not, write to
- * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/
-/*
- * This file isolates configuration dependencies for gnu awk.
- * You should know something about your system, perhaps by having
- * a manual handy, when you edit this file. You should copy config.h-dist
- * to config.h, and edit config.h. Do not modify config.h-dist, so that
- * it will be easy to apply any patches that may be distributed.
- *
- * The general idea is that systems conforming to the various standards
- * should need to do the least amount of changing. Definining the various
- * items in ths file usually means that your system is missing that
- * particular feature.
- *
- * The order of preference in standard conformance is ANSI C, POSIX,
- * and the SVID.
- *
- * If you have no clue as to what's going on with your system, try
- * compiling gawk without editing this file and see what shows up
- * missing in the link stage. From there, you can probably figure out
- * which defines to turn on.
- */
-/**************************/
-/* Miscellanious features */
-/**************************/
+/* Define if on AIX 3.
+ System headers sometimes define this.
+ We just want to avoid a redefinition error message. */
+#ifndef _ALL_SOURCE
+/* #undef _ALL_SOURCE */
+#endif
-/*
- * BLKSIZE_MISSING
- *
- * Check your /usr/include/sys/stat.h file. If the stat structure
- * does not have a member named st_blksize, define this. (This will
- * most likely be the case on most System V systems prior to V.4.)
- */
-#define BLKSIZE_MISSING 1
+/* Define if using alloca.c. */
+/* #undef C_ALLOCA */
-/*
- * SIGTYPE
- *
- * The return type of the routines passed to the signal function.
- * Modern systems use `void', older systems use `int'.
- * If left undefined, it will default to void.
- */
-/* #define SIGTYPE int */
+/* Define if type char is unsigned and you are not using gcc. */
+#ifndef __CHAR_UNSIGNED__
+/* #undef __CHAR_UNSIGNED__ */
+#endif
-/*
- * SIZE_T_MISSING
- *
- * If your system has no typedef for size_t, define this to get a default
- */
-/* #define SIZE_T_MISSING 1 */
+/* Define to empty if the keyword does not work. */
+/* #undef const */
-/*
- * CHAR_UNSIGNED
- *
- * If your machine uses unsigned characters (IBM RT and RS/6000 and others)
- * then define this for use in regex.c
- */
-/* #define CHAR_UNSIGNED 1 */
+/* Define to one of _getb67, GETB67, getb67 for Cray-2 and Cray-YMP systems.
+ This function is required for alloca.c support on those systems. */
+/* #undef CRAY_STACKSEG_END */
-/*
- * HAVE_UNDERSCORE_SETJMP
- *
- * Check in your /usr/include/setjmp.h file. If there are routines
- * there named _setjmp and _longjmp, then you should define this.
- * Typically only systems derived from Berkeley Unix have this.
- */
-/* #define HAVE_UNDERSCORE_SETJMP 1 */
+/* Define to the type of elements in the array set by `getgroups'.
+ Usually this is either `int' or `gid_t'. */
+#define GETGROUPS_T gid_t
-/***********************************************/
-/* Missing library subroutines or system calls */
-/***********************************************/
+/* Define if the `getpgrp' function takes no argument. */
+#define GETPGRP_VOID 1
-/*
- * MEMCMP_MISSING
- * MEMCPY_MISSING
- * MEMSET_MISSING
- *
- * These three routines are for manipulating blocks of memory. Most
- * likely they will either all three be present or all three be missing,
- * so they're grouped together.
- */
-/* #define MEMCMP_MISSING 1 */
-/* #define MEMCPY_MISSING 1 */
-/* #define MEMSET_MISSING 1 */
+/* Define to `int' if <sys/types.h> doesn't define. */
+/* #undef gid_t */
-/*
- * RANDOM_MISSING
- *
- * Your system does not have the random(3) suite of random number
- * generating routines. These are different than the old rand(3)
- * routines!
- */
-#define RANDOM_MISSING 1
+/* Define if you have alloca, as a function or macro. */
+#define HAVE_ALLOCA 1
-/*
- * STRCASE_MISSING
- *
- * Your system does not have the strcasemp() and strncasecmp()
- * routines that originated in Berkeley Unix.
- */
-/* #define STRCASE_MISSING 1 */
-#define strcasecmp stricmp
-#define strncasecmp strnicmp
+/* Define if you have <alloca.h> and it should be used (not on Ultrix). */
+#define HAVE_ALLOCA_H 1
-/*
- * STRCHR_MISSING
- *
- * Your system does not have the strchr() and strrchr() functions.
- */
-/* #define STRCHR_MISSING 1 */
+/* Define if you don't have vprintf but do have _doprnt. */
+/* #undef HAVE_DOPRNT */
-/*
- * STRERROR_MISSING
- *
- * Your system lacks the ANSI C strerror() routine for returning the
- * strings associated with errno values.
- */
-/* #define STRERROR_MISSING 1 */
+/* Define if your struct stat has st_blksize. */
+#define HAVE_ST_BLKSIZE 1
-/*
- * STRTOD_MISSING
- *
- * Your system does not have the strtod() routine for converting
- * strings to double precision floating point values.
- */
-/* #define STRTOD_MISSING 1 */
+/* Define if your struct tm has tm_zone. */
+/* #undef HAVE_TM_ZONE */
-/*
- * STRFTIME_MISSING
- *
- * Your system lacks the ANSI C strftime() routine for formatting
- * broken down time values.
- */
-#define STRFTIME_MISSING 1
+/* Define if you don't have tm_zone but do have the external array
+ tzname. */
+#define HAVE_TZNAME 1
-/*
- * TZSET_MISSING
- *
- * If you have a 4.2 BSD vintage system, then the strftime() routine
- * supplied in the missing directory won't be enough, because it relies on the
- * tzset() routine from System V / Posix. Fortunately, there is an
- * emulation for tzset() too that should do the trick. If you don't
- * have tzset(), define this.
- */
-/* #define TZSET_MISSING 1 */
+/* Define if you have the vprintf function. */
+#define HAVE_VPRINTF 1
-/*
- * TZNAME_MISSING
- *
- * Some systems do not support the external variables tzname and daylight.
- * If this is the case *and* strftime() is missing, define this.
- */
-/* #define TZNAME_MISSING 1 */
+/* Define if on MINIX. */
+/* #undef _MINIX */
-/*
- * STDC_HEADERS
- *
- * If your system does have ANSI compliant header files that
- * provide prototypes for library routines, then define this.
- */
-#define STDC_HEADERS 1
+/* Define to `int' if <sys/types.h> doesn't define. */
+/* #undef pid_t */
-/*
- * NO_TOKEN_PASTING
- *
- * If your compiler define's __STDC__ but does not support token
- * pasting (tok##tok), then define this.
- */
-/* #define NO_TOKEN_PASTING 1 */
+/* Define if the system does not provide POSIX.1 features except
+ with this defined. */
+/* #undef _POSIX_1_SOURCE */
-/*****************************************************************/
-/* Stuff related to the Standard I/O Library. */
-/*****************************************************************/
-/* Much of this is (still, unfortunately) black magic in nature. */
-/* You may have to use some or all of these together to get gawk */
-/* to work correctly. */
-/*****************************************************************/
+/* Define if you need to in order for stat and other things to work. */
+/* #undef _POSIX_SOURCE */
-/*
- * NON_STD_SPRINTF
- *
- * Look in your /usr/include/stdio.h file. If the return type of the
- * sprintf() function is NOT `int', define this.
- */
-/* #define NON_STD_SPRINTF 1 */
+/* Define as the return type of signal handlers (int or void). */
+#define RETSIGTYPE void
-/*
- * VPRINTF_MISSING
- *
- * Define this if your system lacks vprintf() and the other routines
- * that go with it. This will trigger an attempt to use _doprnt().
- * If you don't have that, this attempt will fail and you are on your own.
- */
-/* #define VPRINTF_MISSING 1 */
+/* Define to `unsigned' if <sys/types.h> doesn't define. */
+/* #undef size_t */
-/*
- * Casts from size_t to int and back. These will become unnecessary
- * at some point in the future, but for now are required where the
- * two types are a different representation.
+/* If using the C implementation of alloca, define if you know the
+ direction of stack growth for your system; otherwise it will be
+ automatically deduced at run-time.
+ STACK_DIRECTION > 0 => grows toward higher addresses
+ STACK_DIRECTION < 0 => grows toward lower addresses
+ STACK_DIRECTION = 0 => direction of growth unknown
*/
-/* #define SZTC */
-/* #define INTC */
+/* #undef STACK_DIRECTION */
-/*
- * SYSTEM_MISSING
- *
- * Define this if your library does not provide a system function
- * or you are not entirely happy with it and would rather use
- * a provided replacement (atari only).
- */
-/* #define SYSTEM_MISSING 1 */
+/* Define if you have the ANSI C header files. */
+#define STDC_HEADERS 1
-/*
- * FMOD_MISSING
- *
- * Define this if your system lacks the fmod() function and modf() will
- * be used instead.
- */
-/* #define FMOD_MISSING 1 */
+/* Define if you can safely include both <sys/time.h> and <time.h>. */
+#define TIME_WITH_SYS_TIME 1
+/* Define if your <sys/time.h> declares struct tm. */
+/* #undef TM_IN_SYS_TIME */
-/*******************************/
-/* Gawk configuration options. */
-/*******************************/
+/* Define to `int' if <sys/types.h> doesn't define. */
+/* #undef uid_t */
-/*
- * DEFPATH
- *
- * The default search path for the -f option of gawk. It is used
- * if the AWKPATH environment variable is undefined. The default
- * definition is provided here. Most likely you should not change
- * this.
- */
+/* #undef GETPGRP_IS_STANDARD */ /* getpgrp does/does not take an argument */
+/* #undef HAVE_BCOPY */ /* we have the bcopy function */
+#define HAVE_MEMCPY 1 /* we have the memcpy function */
+#define HAVE_STRINGIZE 1 /* can use ANSI # operator in cpp */
+#define HAVE_STRING_H 1 /* the <string.h> header file */
+#define REGEX_MALLOC 1 /* use malloc instead of alloca in regex.c */
+#define SPRINTF_RET int /* return type of sprintf */
-/* #define DEFPATH ".:/usr/lib/awk:/usr/local/lib/awk" */
-/* #define ENVSEP ':' */
-#define ENVSEP ';'
+/* Define if you have the fmod function. */
+/* #undef HAVE_FMOD */
-/*
- * alloca already has a prototype defined - don't redefine it
- */
-/* #define ALLOCA_PROTO 1 */
+/* Define if you have the memcmp function. */
+#define HAVE_MEMCMP 1
-/*
- * srandom already has a prototype defined - don't redefine it
- */
-/* #define SRANDOM_PROTO 1 */
+/* Define if you have the memcpy function. */
+#define HAVE_MEMCPY 1
-/*
- * getpgrp() in sysvr4 and POSIX takes no argument
- */
-/* #define GETPGRP_NOARG 0 */
+/* Define if you have the memset function. */
+#define HAVE_MEMSET 1
+
+/* Define if you have the random function. */
+#define HAVE_RANDOM 1
+
+/* Define if you have the strchr function. */
+#define HAVE_STRCHR 1
+
+/* Define if you have the strerror function. */
+#define HAVE_STRERROR 1
+
+/* Define if you have the strftime function. */
+#define HAVE_STRFTIME 1
+
+/* Define if you have the strncasecmp function. */
+#define HAVE_STRNCASECMP 1
+
+/* Define if you have the strtod function. */
+#define HAVE_STRTOD 1
+
+/* Define if you have the system function. */
+#define HAVE_SYSTEM 1
+
+/* Define if you have the tzset function. */
+#define HAVE_TZSET 1
+
+/* Define if you have the <memory.h> header file. */
+#define HAVE_MEMORY_H 1
+
+/* Define if you have the <signum.h> header file. */
+/* #undef HAVE_SIGNUM_H */
+
+/* Define if you have the <stdarg.h> header file. */
+#define HAVE_STDARG_H 1
+
+/* Define if you have the <strings.h> header file. */
+/* #undef HAVE_STRINGS_H */
+
+/* Define if you have the <sys/param.h> header file. */
+#define HAVE_SYS_PARAM_H 1
+
+/* Define if you have the <unistd.h> header file. */
+#define HAVE_UNISTD_H 1
+
+
+/* Library search path */
+#define DEFPATH ".;c:/lib/awk;c:/gnu/lib/awk"
+
+#if defined (_MSC_VER)
+#if !defined(__STDC__)
+# define __STDC__ 1
+#endif
+#undef HAVE_UNISTD_H
+#undef HAVE_SYS_PARAM_H
+#undef HAVE_RANDOM
+#define RANDOM_MISSING
+/* msc strftime is incomplete, use supplied version */
+#undef HAVE_STRFTIME
+/* #define HAVE_TM_ZONE */
+#define altzone timezone
+#if defined(OS2) /* declare alloca for bison */
+void * alloca(unsigned);
+#endif
+#endif
+
+#if defined (_MSC_VER) || defined(__EMX__)
+#define strcasecmp stricmp
+#define strncasecmp strnicmp
+#endif
-/* anything that follows is for system-specific short-term kludges */
+#if defined(DJGPP)
+# define HAVE_LIMITS_H
+#endif
diff --git a/pc/gawk-32.def b/pc/gawk-32.def
deleted file mode 100644
index 968a0cb1..00000000
--- a/pc/gawk-32.def
+++ /dev/null
@@ -1,3 +0,0 @@
-NAME gawk WINDOWCOMPAT NEWFILES
-DESCRIPTION 'GNU awk for OS/2'
-STACKSIZE 0x100000
diff --git a/pc/gawkmisc.pc b/pc/gawkmisc.pc
new file mode 100644
index 00000000..05db6c36
--- /dev/null
+++ b/pc/gawkmisc.pc
@@ -0,0 +1,134 @@
+/*
+ * gawkmisc.c --- miscellanious gawk routines that are OS specific.
+ */
+
+/*
+ * Copyright (C) 1986, 1988, 1989, 1991 - 95 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+char quote = '"';
+char envsep = ';';
+#ifdef DEFPATH
+char *defpath = DEFPATH;
+#else
+char *defpath = ".;c:\\lib\\awk;c:\\gnu\\lib\\awk";
+#endif
+
+/* gawk_name --- pull out the "gawk" part from how the OS called us */
+
+char *
+gawk_name(filespec)
+const char *filespec;
+{
+ char *p, *q;
+
+ p = (char *) filespec; /* Sloppy... */
+
+ /* OS/2 allows / for directory separator too */
+ if ((q = strrchr(p, '\\')) != NULL)
+ p = q + 1;
+ if ((q = strrchr(p, '/')) != NULL)
+ p = q + 1;
+ if ((q = strchr(p, '.')) != NULL)
+ *q = '\0';
+ return strlwr(p);
+}
+
+/* os_arg_fixup --- fixup the command line */
+
+void
+os_arg_fixup(argcp, argvp)
+int *argcp;
+char ***argvp;
+{
+#ifdef __EMX__
+ _response(argcp, argvp);
+ _wildcard(argcp, argvp);
+ setvbuf(stdout, NULL, _IOLBF, BUFSIZ);
+#endif
+ return;
+}
+
+/* os_devopen --- open special per-OS devices */
+
+int
+os_devopen(name, flag)
+const char *name;
+int flag;
+{
+ /* no-op */
+ return -1;
+}
+
+/* optimal_bufsize --- determine optimal buffer size */
+
+int
+optimal_bufsize(fd, stb)
+int fd;
+struct stat *stb;
+{
+ /* force all members to zero in case OS doesn't use all of them. */
+ memset(stb, '\0', sizeof(struct stat));
+
+ /*
+ * DOS doesn't have the file system block size in the
+ * stat structure. So we have to make some sort of reasonable
+ * guess. We use stdio's BUFSIZ, since that is what it was
+ * meant for in the first place.
+ */
+#define DEFBLKSIZE BUFSIZ
+
+ if (isatty(fd))
+ return BUFSIZ;
+ if (fstat(fd, stb) == -1)
+ fatal("can't stat fd %d (%s)", fd, strerror(errno));
+ if (lseek(fd, (off_t)0, 0) == -1) /* not a regular file */
+ return DEFBLKSIZE;
+ if (stb->st_size > 0 && stb->st_size < DEFBLKSIZE) /* small file */
+ return stb->st_size;
+ return DEFBLKSIZE;
+}
+
+/* ispath --- return true if path has directory components */
+
+int
+ispath(file)
+const char *file;
+{
+ for (; *file; file++) {
+ switch (*file) {
+ case '/':
+ case '\\':
+ case ':':
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/* isdirpunct --- return true if char is a directory separator */
+
+int
+isdirpunct(c)
+int c;
+{
+ return (strchr(":\\/", c) != NULL);
+}
+
diff --git a/pc/getid.c b/pc/getid.c
index d645b39f..20cec884 100644
--- a/pc/getid.c
+++ b/pc/getid.c
@@ -1,69 +1,62 @@
-#ifdef _MSC_VER
-
-#ifdef OS2
-# define INCL_DOSPROCESS
-# include <os2.h>
-# if _MSC_VER == 510
-# define DosGetPID DosGetPid
-# endif
-#else
-# include <process.h>
-#endif
-
-#ifdef OS2
-int getpid(void)
-{
- PIDINFO PidInfo;
-
- DosGetPID(&PidInfo);
- return(PidInfo.pid);
-}
-#endif
-
-int getppid(void)
-{
-#ifdef OS2
- PIDINFO PidInfo;
-
- DosGetPID(&PidInfo);
- return(PidInfo.pidParent);
-#else
- return(0);
-#endif
-}
-
-unsigned int getuid (void)
-{
- return (0); /* root! */
-}
-
-
-unsigned int geteuid (void)
-{
- return (0);
-}
-
-
-unsigned int getgid (void)
-{
- return (0);
-}
-
-
-unsigned int getegid (void)
-{
- return (0);
-}
-
-
-char *getlogin (void)
-{
- return ("root");
-}
-
-#endif
-
-int getpgrp(void)
-{
- return (0);
-}
+#ifdef _MSC_VER
+
+#ifdef OS2
+# define INCL_DOSPROCESS
+# include <os2.h>
+# if _MSC_VER == 510
+# define DosGetPID DosGetPid
+# endif
+#else
+# include <process.h>
+#endif
+
+#ifdef OS2
+int getpid(void)
+{
+ PIDINFO PidInfo;
+
+ DosGetPID(&PidInfo);
+ return(PidInfo.pid);
+}
+#endif
+
+unsigned int getuid (void)
+{
+ return (0); /* root! */
+}
+
+unsigned int geteuid (void)
+{
+ return (0);
+}
+
+unsigned int getgid (void)
+{
+ return (0);
+}
+
+unsigned int getegid (void)
+{
+ return (0);
+}
+
+#endif
+
+int getpgrp(void)
+{
+ return (0);
+}
+
+#if defined(_MSC_VER) || defined(__GO32__)
+int getppid(void)
+{
+#ifdef OS2
+ PIDINFO PidInfo;
+
+ DosGetPID(&PidInfo);
+ return(PidInfo.pidParent);
+#else
+ return(0);
+#endif
+}
+#endif
diff --git a/pc/include/fcntl.h b/pc/include/fcntl.h
new file mode 100644
index 00000000..5ebc7a6b
--- /dev/null
+++ b/pc/include/fcntl.h
@@ -0,0 +1,3 @@
+#undef __STDC__
+#include <fcntl.h>
+#define __STDC__ 1
diff --git a/pc/include/stdio.h b/pc/include/stdio.h
new file mode 100644
index 00000000..25fb7c97
--- /dev/null
+++ b/pc/include/stdio.h
@@ -0,0 +1,3 @@
+#undef __STDC__
+#include <stdio.h>
+#define __STDC__ 1
diff --git a/pc/include/stdlib.h b/pc/include/stdlib.h
new file mode 100644
index 00000000..0789b242
--- /dev/null
+++ b/pc/include/stdlib.h
@@ -0,0 +1,3 @@
+#undef __STDC__
+#include <stdlib.h>
+#define __STDC__ 1
diff --git a/pc/include/string.h b/pc/include/string.h
new file mode 100644
index 00000000..48e942fa
--- /dev/null
+++ b/pc/include/string.h
@@ -0,0 +1,3 @@
+#undef __STDC__
+#include <string.h>
+#define __STDC__ 1
diff --git a/pc/include/sys/stat.h b/pc/include/sys/stat.h
new file mode 100644
index 00000000..904d056e
--- /dev/null
+++ b/pc/include/sys/stat.h
@@ -0,0 +1,3 @@
+#undef __STDC__
+#include <sys/stat.h>
+#define __STDC__ 1
diff --git a/pc/include/sys/types.h b/pc/include/sys/types.h
new file mode 100644
index 00000000..b942dda8
--- /dev/null
+++ b/pc/include/sys/types.h
@@ -0,0 +1,3 @@
+#undef __STDC__
+#include <sys/types.h>
+#define __STDC__ 1
diff --git a/pc/include/time.h b/pc/include/time.h
new file mode 100644
index 00000000..b4a9ea3f
--- /dev/null
+++ b/pc/include/time.h
@@ -0,0 +1,3 @@
+#undef __STDC__
+#include <time.h>
+#define __STDC__ 1
diff --git a/pc/install.awk b/pc/install.awk
new file mode 100644
index 00000000..1b125d47
--- /dev/null
+++ b/pc/install.awk
@@ -0,0 +1,61 @@
+# install.awk
+# awk script to handle "make install". Goal is to eliminate need for
+# extra utilities (such as sh, mkdir, and cp). This is a hack.
+
+function mkinstalldirs(dir, i, ii, j, jj, s, comp, mkdir)
+{
+ gsub("/", "\\", dir); ii = split(dir, s, " ")
+ print "@echo off" > install_bat
+ print "@echo off" > install_cmd
+ for (i = 1; i <= ii; i++) {
+ jj = split(s[i], comp, "\\"); dir = comp[1];
+ for (j = 1; j <= jj; dir=dir "\\" comp[++j]) {
+ if (substr(dir, length(dir)) == ":" || mkdir[dir]) continue;
+ printf("if not exist %s\\*.* mkdir %s\n", dir, dir) > install_bat
+ printf("if not exist %s\\* mkdir %s\n", dir, dir) > install_cmd
+ mkdir[dir] = 1
+ }
+ }
+ close(install_bat); close(install_cmd)
+ system(install)
+}
+
+function cp(s, j, n, comp)
+{
+ gsub("/", "\\", s); n = split(s, comp, " ");
+ print "@echo off" > install_bat
+ print "@echo off" > install_cmd
+ for (j = 1; j < n; j++) {
+ printf("copy %s %s\n", comp[j], comp[n]) > install_cmd
+ if (substr(comp[j], length(comp[j]), 1) == "*")
+ comp[j] = comp[j] ".*"
+ printf("copy %s %s\n", comp[j], comp[n]) > install_bat
+ }
+ close(install_bat); close(install_cmd)
+ system(install)
+}
+
+BEGIN{
+install = "installg"
+install_bat = install ".bat"; install_cmd = install ".cmd"
+igawk_cmd = prefix "/bin/igawk.cmd"
+igawk_bat = prefix "/bin/igawk.bat"
+igawk = "pc/awklib/igawk"
+
+# Make the bin directory
+mkinstalldirs(prefix "/bin");
+
+# Create igawk.cmd for OS/2
+printf("extproc sh %s/bin/igawk.cmd\nshift\n", prefix) > igawk_cmd
+while (getline < igawk) print $0 > igawk_cmd
+
+# Create igawk.bat for DOS
+printf("@sh %s/bin/igawk %%1 %%2 %%3 %%4 %%5 %%6 %%7 %%8 %%9", prefix) > igawk_bat
+
+# Do common
+cp(igawk " gawk.exe " prefix "/bin")
+mkinstalldirs(prefix "/lib/awk " prefix "/man/man1 " prefix "/info")
+cp("awklib/eg/lib/* pc/awklib/igawk.awk " prefix "/lib/awk");
+cp("doc/*.1 " prefix "/man/man1");
+cp("doc/gawk.info " prefix "/info");
+}
diff --git a/pc/makegawk.bat b/pc/makegawk.bat
deleted file mode 100644
index 2e69c7fa..00000000
--- a/pc/makegawk.bat
+++ /dev/null
@@ -1,65 +0,0 @@
-REM Simple brute force command file for building gawk under msdos
-REM
-REM *** This has only been tested using MSC 5.1 and MSC 6.00A ***
-REM
-REM Written by Arnold Robbins, May 1991
-REM Modified by Scott Deifik, July, 1992, Sep 1993
-REM Based on earlier makefile for dos
-REM
-REM Copyright (C) 1986, 1988, 1989, 1991, 1993 the Free Software Foundation, Inc.
-REM
-REM This file is part of GAWK, the GNU implementation of the
-REM AWK Progamming Language.
-REM
-REM GAWK is free software; you can redistribute it and/or modify
-REM it under the terms of the GNU General Public License as published by
-REM the Free Software Foundation; either version 2 of the License, or
-REM (at your option) any later version.
-REM
-REM GAWK is distributed in the hope that it will be useful,
-REM but WITHOUT ANY WARRANTY; without even the implied warranty of
-REM MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-REM GNU General Public License for more details.
-REM
-REM You should have received a copy of the GNU General Public License
-REM along with GAWK; see the file COPYING. If not, write to
-REM the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-REM
-REM compile debug flags: -DDEBUG -DFUNC_TRACE -DMEMDEBUG -Zi -Od
-REM
-
-set CFLAGS=-D_MSC_VER
-
-rem MSC 5.1 should use:
-rem set CFLAGS=-D_MSC_VER=510
-
-rem MSC 6.00A predefines _MSC_VER
-rem set CFLAGS=
-
-rem -Za sets ANSI flag so that __STDC__ is defined in MSC 6.00A
-rem (MSC 5.1 sets __STDC__=0 regardless of ANSI switch)
-
-cl -Za -c -AL %CFLAGS% -DGAWK array.c
-cl -Za -c -AL %CFLAGS% -DGAWK awktab.c
-cl -Za -c -AL %CFLAGS% -DGAWK builtin.c
-cl -Za -c -AL %CFLAGS% -DGAWK -DHAVE_CONFIG_H dfa.c
-cl -Za -c -AL %CFLAGS% -DGAWK eval.c
-cl -Za -c -AL %CFLAGS% -DGAWK field.c
-cl -Za -c -AL %CFLAGS% -DGAWK getid.c
-cl -Za -c -AL %CFLAGS% -DGAWK -DHAVE_CONFIG_H getopt.c
-cl -Za -c -AL %CFLAGS% -DGAWK -DHAVE_CONFIG_H getopt1.c
-cl -Za -c -AL %CFLAGS% -DGAWK io.c
-cl -Za -c -AL %CFLAGS% -DGAWK iop.c
-cl -Za -c -AL %CFLAGS% -DGAWK main.c
-cl -Za -c -AL %CFLAGS% -DGAWK missing.c
-cl -Za -c -AL %CFLAGS% -DGAWK msg.c
-cl -Za -c -AL %CFLAGS% -DGAWK node.c
-cl -Za -c -AL %CFLAGS% -DGAWK popen.c
-cl -Za -c -AL %CFLAGS% -DGAWK re.c
-REM You can ignore the warnings you will get
-cl -Za -c -AL %CFLAGS% -DGAWK -DHAVE_CONFIG_H regex.c
-cl -Za -c -AL %CFLAGS% -DGAWK version.c
-REM
-REM link debug flags: /CO /NOE /NOI /st:30000
-REM
-link @names.lnk,gawk.exe /NOE /NOI /st:30000;
diff --git a/pc/mkconf.cmd b/pc/mkconf.cmd
deleted file mode 100755
index 3d3eb3d3..00000000
--- a/pc/mkconf.cmd
+++ /dev/null
@@ -1,31 +0,0 @@
-extproc sh
-
-#! /bin/sh
-#
-# mkconf -- produce a config.h from a known configuration
-
-case "$#" in
-1) ;;
-*) echo "Usage: mkconf system_type" >&2
- echo "Known systems: `cd config; echo ;ls -C`" >&2
- exit 2
- ;;
-esac
-
-if [ -f config/$1 ]; then
- sh ./mungeconf.cmd config/$1 config.h-dist >config.h
- sed -n '/^#echo /s///p' config/$1
- sed -n '/^MAKE_.*/s//s,^##&## ,,/p' config/$1 >sedscr
- if [ -s sedscr ]
- then
- sed -f sedscr Makefile-dist >Makefile
- else
- cp Makefile-dist Makefile
- fi
- cmd /c del sedscr
-else
- echo "\`$1' is not a known configuration."
- echo "Either construct one based on the examples in the config directory,"
- echo "or copy config.h-dist to config.h and edit it."
- exit 1
-fi
diff --git a/pc/mkinstal.sh b/pc/mkinstal.sh
new file mode 100644
index 00000000..f5e45f33
--- /dev/null
+++ b/pc/mkinstal.sh
@@ -0,0 +1,33 @@
+#! /bin/sh
+# mkinstalldirs --- make directory hierarchy
+# Author: Noah Friedman <friedman@prep.ai.mit.edu>
+# Created: 1993-05-16
+# Last modified: 1994-03-25
+# Public domain
+
+errstatus=0
+
+for file in ${1+"$@"} ; do
+ #set fnord `echo ":$file" | sed -ne 's/^:\//#/;s/^://;s/\// /g;s/^#/\//;p'`
+ set fnord `echo "$file" | sed 's/\([^:]\)\//\1 /g'`
+ shift
+
+ pathcomp=
+ for d in ${1+"$@"} ; do
+ pathcomp="$pathcomp$d"
+ case "$pathcomp" in
+ -* ) pathcomp=./$pathcomp ;;
+ esac
+
+ if test ! -d "$pathcomp"; then
+ echo "mkdir $pathcomp" 1>&2
+ mkdir "$pathcomp" || errstatus=$?
+ fi
+
+ pathcomp="$pathcomp/"
+ done
+done
+
+exit $errstatus
+
+# mkinstalldirs ends here
diff --git a/pc/mungeconf.cmd b/pc/mungeconf.cmd
deleted file mode 100755
index cbf1d12e..00000000
--- a/pc/mungeconf.cmd
+++ /dev/null
@@ -1,15 +0,0 @@
-extproc sh
-
-#! /bin/sh
-
-case $# in
-2) ;;
-*) echo "Usage: mungeconf sysfile distfile" >&2 ; exit 2 ;;
-esac
-
-sed '/^#/d; /^MAKE_*/d' $1 |
-sed '1s:.*:s~__SYSTEM__~&~:
-2,$s:^\([^ ]*\)[ ].*:s~^/\\* #define[ ]*\1.*~#define &~:' >sedscr
-sed -f sedscr $2
-
-cmd /c del sedscr
diff --git a/pc/names.lnk b/pc/names.lnk
deleted file mode 100644
index 5f02ae54..00000000
--- a/pc/names.lnk
+++ /dev/null
@@ -1,20 +0,0 @@
-array.obj+
-awktab.obj+
-builtin.obj+
-dfa.obj+
-eval.obj+
-field.obj+
-getid.obj+
-getopt.obj+
-getopt1.obj+
-io.obj+
-iop.obj+
-main.obj+
-missing.obj+
-msg.obj+
-node.obj+
-popen.obj+
-re.obj+
-version.obj+
-regex.obj+
-setargv.obj
diff --git a/pc/names2.lnk b/pc/names2.lnk
deleted file mode 100644
index fb9cff4f..00000000
--- a/pc/names2.lnk
+++ /dev/null
@@ -1,19 +0,0 @@
-main.o
-eval.o
-builtin.o
-msg.o
-iop.o
-io.o
-field.o
-array.o
-node.o
-version.o
-missing.o
-re.o
-awktab.o
-getopt.o
-getopt1.o
-regex.o
-dfa.o
-getid.o
-popen.o
diff --git a/pc/popen.c b/pc/popen.c
index cdb54a0a..c2eca24d 100644
--- a/pc/popen.c
+++ b/pc/popen.c
@@ -1,112 +1,124 @@
-#include "popen.h"
-#include <stdlib.h>
-#include <io.h>
-#include <string.h>
-#include <process.h>
-
-#ifdef OS2
-#ifdef _MSC_VER
-#define popen(c,m) _popen(c,m)
-#define pclose(f) _pclose(f)
-#endif
-#endif
-
-#ifndef _NFILE
-#define _NFILE 40
-#endif
-
-static char template[] = "piXXXXXX";
-typedef enum { unopened = 0, reading, writing } pipemode;
-static
-struct {
- char *command;
- char *name;
- pipemode pmode;
-} pipes[_NFILE];
-
-FILE *
-os_popen( char *command, char *mode ) {
- FILE *current;
- char *name;
- int cur;
- pipemode curmode;
-
-#if defined(OS2) && (_MSC_VER != 510)
- if (_osmode == OS2_MODE)
- return(popen(command, mode));
-#endif
-
- /*
- ** decide on mode.
- */
- if(strcmp(mode,"r") == 0)
- curmode = reading;
- else if(strcmp(mode,"w") == 0)
- curmode = writing;
- else
- return NULL;
- /*
- ** get a name to use.
- */
- if((name = tempnam(".","pip"))==NULL)
- return NULL;
- /*
- ** If we're reading, just call system to get a file filled with
- ** output.
- */
- if(curmode == reading) {
- char cmd[256];
- sprintf(cmd,"%s > %s",command,name);
- system(cmd);
- if((current = fopen(name,"r")) == NULL)
- return NULL;
- } else {
- if((current = fopen(name,"w")) == NULL)
- return NULL;
- }
- cur = fileno(current);
- pipes[cur].name = name;
- pipes[cur].pmode = curmode;
- pipes[cur].command = strdup(command);
- return current;
-}
-
-int
-os_pclose( FILE * current) {
- int cur = fileno(current),rval;
-
-#if defined(OS2) && (_MSC_VER != 510)
- if (_osmode == OS2_MODE)
- return(pclose(current));
-#endif
-
- /*
- ** check for an open file.
- */
- if(pipes[cur].pmode == unopened)
- return -1;
- if(pipes[cur].pmode == reading) {
- /*
- ** input pipes are just files we're done with.
- */
- rval = fclose(current);
- unlink(pipes[cur].name);
- } else {
- /*
- ** output pipes are temporary files we have
- ** to cram down the throats of programs.
- */
- char command[256];
- fclose(current);
- sprintf(command,"%s < %s",pipes[cur].command,pipes[cur].name);
- rval = system(command);
- unlink(pipes[cur].name);
- }
- /*
- ** clean up current pipe.
- */
- pipes[cur].pmode = unopened;
- free(pipes[cur].name);
- free(pipes[cur].command);
- return rval;
-}
+#include "popen.h"
+#include <stdlib.h>
+#include <io.h>
+#include <string.h>
+#include <process.h>
+
+#ifdef OS2
+#ifdef _MSC_VER
+#define popen(c,m) _popen(c,m)
+#define pclose(f) _pclose(f)
+#endif
+#endif
+
+#ifndef _NFILE
+#define _NFILE 40
+#endif
+
+static char template[] = "piXXXXXX";
+typedef enum { unopened = 0, reading, writing } pipemode;
+static
+struct {
+ char *command;
+ char *name;
+ pipemode pmode;
+} pipes[_NFILE];
+
+FILE *
+os_popen( char *command, char *mode ) {
+ FILE *current;
+ char *name;
+ int cur;
+ pipemode curmode;
+
+#if defined(OS2) && (_MSC_VER != 510)
+ if (_osmode == OS2_MODE)
+ return(popen(command, mode));
+#endif
+
+ /*
+ ** decide on mode.
+ */
+ if(strcmp(mode,"r") == 0)
+ curmode = reading;
+ else if(strcmp(mode,"w") == 0)
+ curmode = writing;
+ else
+ return NULL;
+ /*
+ ** get a name to use.
+ */
+ if((name = tempnam(".","pip"))==NULL)
+ return NULL;
+ /*
+ ** If we're reading, just call system to get a file filled with
+ ** output.
+ */
+ if(curmode == reading) {
+ if ((cur = dup(fileno(stdout))) == -1)
+ return NULL;
+ if ((current = freopen(name, "w", stdout)) == NULL)
+ return NULL;
+ system(command);
+ if (dup2(cur, fileno(stdout)) == -1)
+ return NULL;
+ close(cur);
+ if((current = fopen(name,"r")) == NULL)
+ return NULL;
+ } else {
+ if((current = fopen(name,"w")) == NULL)
+ return NULL;
+ }
+ cur = fileno(current);
+ pipes[cur].name = name;
+ pipes[cur].pmode = curmode;
+ pipes[cur].command = strdup(command);
+ return current;
+}
+
+int
+os_pclose( FILE * current) {
+ int cur = fileno(current),rval;
+
+#if defined(OS2) && (_MSC_VER != 510)
+ if (_osmode == OS2_MODE)
+ return(pclose(current));
+#endif
+
+ /*
+ ** check for an open file.
+ */
+ if(pipes[cur].pmode == unopened)
+ return -1;
+ if(pipes[cur].pmode == reading) {
+ /*
+ ** input pipes are just files we're done with.
+ */
+ rval = fclose(current);
+ unlink(pipes[cur].name);
+ } else {
+ /*
+ ** output pipes are temporary files we have
+ ** to cram down the throats of programs.
+ */
+ int fd;
+ fclose(current);
+ rval = -1;
+ if ((fd = dup(fileno(stdin))) != -1) {
+ if (current = freopen(pipes[cur].name, "r", stdin)) {
+ rval = system(pipes[cur].command);
+ fclose(current);
+ if (dup2(fd, fileno(stdin)) == -1) rval = -1;
+ close(fd);
+ }
+ }
+ unlink(pipes[cur].name);
+ }
+ /*
+ ** clean up current pipe.
+ */
+ pipes[cur].pmode = unopened;
+ free(pipes[cur].name);
+ free(pipes[cur].command);
+ return rval;
+}
diff --git a/posix/ChangeLog b/posix/ChangeLog
new file mode 100644
index 00000000..570c031b
--- /dev/null
+++ b/posix/ChangeLog
@@ -0,0 +1,3 @@
+Wed Jan 10 22:58:55 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * ChangeLog created.
diff --git a/posix/gawkmisc.c b/posix/gawkmisc.c
new file mode 100644
index 00000000..dce9e70a
--- /dev/null
+++ b/posix/gawkmisc.c
@@ -0,0 +1,115 @@
+/*
+ * gawkmisc.c --- miscellanious gawk routines that are OS specific.
+ */
+
+/*
+ * Copyright (C) 1986, 1988, 1989, 1991 - 95 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+char quote = '\'';
+char *defpath = DEFPATH;
+char envsep = ':';
+
+/* gawk_name --- pull out the "gawk" part from how the OS called us */
+
+char *
+gawk_name(filespec)
+const char *filespec;
+{
+ char *p;
+
+ /* "path/name" -> "name" */
+ p = strrchr(filespec, '/');
+ return (p == NULL ? (char *) filespec : p + 1);
+}
+
+/* os_arg_fixup --- fixup the command line */
+
+void
+os_arg_fixup(argcp, argvp)
+int *argcp;
+char ***argvp;
+{
+ /* no-op */
+ return;
+}
+
+/* os_devopen --- open special per-OS devices */
+
+int
+os_devopen(name, flag)
+const char *name;
+int flag;
+{
+ /* no-op */
+ return INVALID_HANDLE;
+}
+
+/* optimal_bufsize --- determine optimal buffer size */
+
+int
+optimal_bufsize(fd, stb)
+int fd;
+struct stat *stb;
+{
+ /* force all members to zero in case OS doesn't use all of them. */
+ memset(stb, '\0', sizeof(struct stat));
+
+ /*
+ * System V.n, n < 4, doesn't have the file system block size in the
+ * stat structure. So we have to make some sort of reasonable
+ * guess. We use stdio's BUFSIZ, since that is what it was
+ * meant for in the first place.
+ */
+#ifdef HAVE_ST_BLKSIZE
+#define DEFBLKSIZE (stb->st_blksize ? stb->st_blksize : BUFSIZ)
+#else
+#define DEFBLKSIZE BUFSIZ
+#endif
+
+ if (isatty(fd))
+ return BUFSIZ;
+ if (fstat(fd, stb) == -1)
+ fatal("can't stat fd %d (%s)", fd, strerror(errno));
+ if (lseek(fd, (off_t)0, 0) == -1) /* not a regular file */
+ return DEFBLKSIZE;
+ if (stb->st_size > 0 && stb->st_size < DEFBLKSIZE) /* small file */
+ return stb->st_size;
+ return DEFBLKSIZE;
+}
+
+/* ispath --- return true if path has directory components */
+
+int
+ispath(file)
+const char *file;
+{
+ return (strchr(file, '/') != NULL);
+}
+
+/* isdirpunct --- return true if char is a directory separator */
+
+int
+isdirpunct(c)
+int c;
+{
+ return (c == '/');
+}
+
diff --git a/protos.h b/protos.h
index 1d4ac998..3429b21f 100644
--- a/protos.h
+++ b/protos.h
@@ -3,10 +3,10 @@
*/
/*
- * Copyright (C) 1991, 1992, 1993 the Free Software Foundation, Inc.
+ * Copyright (C) 1991 -95 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
- * AWK Progamming Language.
+ * AWK Programming Language.
*
* GAWK is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -19,8 +19,8 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with GAWK; see the file COPYING. If not, write to
- * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/
#ifdef __STDC__
@@ -32,27 +32,35 @@ extern aptr_t malloc P((MALLOC_ARG_T));
extern aptr_t realloc P((aptr_t, MALLOC_ARG_T));
extern aptr_t calloc P((MALLOC_ARG_T, MALLOC_ARG_T));
-#if !defined(sun) && !defined(__sun__)
extern void free P((aptr_t));
-#endif
extern char *getenv P((const char *));
+#if ! defined(HAVE_STRING_H) && ! defined(HAVE_STRINGS_H)
extern char *strcpy P((char *, const char *));
extern char *strcat P((char *, const char *));
-extern int strcmp P((const char *, const char *));
extern char *strncpy P((char *, const char *, size_t));
+extern int strcmp P((const char *, const char *));
extern int strncmp P((const char *, const char *, size_t));
-#ifndef VMS
-extern char *strerror P((int));
-#else
-extern char *strerror P((int,...));
-#endif
extern char *strchr P((const char *, int));
extern char *strrchr P((const char *, int));
extern char *strstr P((const char *s1, const char *s2));
extern size_t strlen P((const char *));
extern long strtol P((const char *, char **, int));
-#if !defined(_MSC_VER) && !defined(__GNU_LIBRARY__)
+
+extern aptr_t memset P((aptr_t, int, size_t));
+extern aptr_t memcpy P((aptr_t, const aptr_t, size_t));
+extern aptr_t memmove P((aptr_t, const aptr_t, size_t));
+extern aptr_t memchr P((const aptr_t, int, size_t));
+extern int memcmp P((const aptr_t, const aptr_t, size_t));
+#endif /* ! defined(HAVE_STRING_H) && ! defined(HAVE_STRINGS_H) */
+
+#ifndef VMS
+extern char *strerror P((int));
+#else
+extern char *strerror P((int,...));
+#endif
+
+#if ! defined(_MSC_VER) && ! defined(__GNU_LIBRARY__)
extern size_t strftime P((char *, size_t, const char *, const struct tm *));
#endif
#ifdef __STDC__
@@ -60,14 +68,10 @@ extern time_t time P((time_t *));
#else
extern long time();
#endif
-extern aptr_t memset P((aptr_t, int, size_t));
-extern aptr_t memcpy P((aptr_t, const aptr_t, size_t));
-extern aptr_t memmove P((aptr_t, const aptr_t, size_t));
-extern aptr_t memchr P((const aptr_t, int, size_t));
-extern int memcmp P((const aptr_t, const aptr_t, size_t));
-extern int fprintf P((FILE *, const char *, ...));
-#if !defined(MSDOS) && !defined(__GNU_LIBRARY__)
+extern FILE *fdopen P((int, const char *));
+extern int fprintf P((FILE *, const char *, ...));
+#if ! defined(MSDOS) && ! defined(__GNU_LIBRARY__)
#ifdef __STDC__
extern size_t fwrite P((const aptr_t, size_t, size_t, FILE *));
#else
@@ -106,17 +110,19 @@ extern int pipe P((int *));
extern int dup P((int));
extern int dup2 P((int,int));
extern int fork P(());
-extern int execl P((/* char *, char *, ... */));
+extern int execl P((const char *, const char *, ...));
#ifndef __STDC__
-extern int read P((int, char *, int));
+extern int read P((int, void *, unsigned int));
#endif
+#ifndef HAVE_SYS_WAIT_H
extern int wait P((int *));
+#endif
extern void _exit P((int));
-#ifdef NON_STD_SPRINTF
-extern char *sprintf P((char *, const char*, ...));
-#else
-extern int sprintf P((char *, const char*, ...));
-#endif /* SPRINTF_INT */
+#ifndef __STDC__
+extern long time P((long *));
+#endif
+
+extern SPRINTF_RET sprintf P((char *, const char *, ...));
#undef aptr_t
diff --git a/re.c b/re.c
index cd11d495..497f7214 100644
--- a/re.c
+++ b/re.c
@@ -6,7 +6,7 @@
* Copyright (C) 1991-1995 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
- * AWK Progamming Language.
+ * AWK Programming Language.
*
* GAWK is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -19,13 +19,15 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with GAWK; see the file COPYING. If not, write to
- * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/
#include "awk.h"
-/* Generate compiled regular expressions */
+static reg_syntax_t syn;
+
+/* make_regexp --- generate compiled regular expressions */
Regexp *
make_regexp(s, len, ignorecase, dfa)
@@ -40,14 +42,15 @@ int dfa;
char *temp;
char *end = s + len;
register char *dest;
- register int c;
+ register int c, c2;
/* Handle escaped characters first. */
- /* Build a copy of the string (in dest) with the
- escaped characters translated, and generate the regex
- from that.
- */
+ /*
+ * Build a copy of the string (in dest) with the
+ * escaped characters translated, and generate the regex
+ * from that.
+ */
emalloc(dest, char *, len + 2, "make_regexp");
temp = dest;
@@ -71,27 +74,42 @@ int dfa;
case '5':
case '6':
case '7':
- c = parse_escape(&src);
- if (c < 0)
+ c2 = parse_escape(&src);
+ if (c2 < 0)
cant_happen();
- *dest++ = (char)c;
+ /*
+ * Unix awk treats octal (and hex?) chars
+ * literally in re's, so escape regexp
+ * metacharacters.
+ */
+ if (do_traditional && ! do_posix && (isdigit(c) || c == 'x')
+ && strchr("()|*+?.^$\\[]", c2) != NULL)
+ *dest++ = '\\';
+ *dest++ = (char) c2;
break;
+ case 'y': /* normally \b */
+ /* gnu regex op */
+ if (! do_traditional) {
+ *dest++ = '\\';
+ *dest++ = 'b';
+ src++;
+ break;
+ }
+ /* else, fall through */
default:
*dest++ = '\\';
- *dest++ = (char)c;
+ *dest++ = (char) c;
src++;
break;
} /* switch */
- } else {
+ } else
*dest++ = *src++; /* not '\\' */
- }
} /* for */
*dest = '\0' ; /* Only necessary if we print dest ? */
emalloc(rp, Regexp *, sizeof(*rp), "make_regexp");
memset((char *) rp, 0, sizeof(*rp));
- emalloc(rp->pat.buffer, unsigned char *, 16, "make_regexp");
- rp->pat.allocated = 16;
+ rp->pat.allocated = 0; /* regex will allocate the buffer */
emalloc(rp->pat.fastmap, char *, 256, "make_regexp");
if (ignorecase)
@@ -103,17 +121,19 @@ int dfa;
fatal("%s: /%s/", rerr, temp);
/* gack. this must be done *after* re_compile_pattern */
- rp->pat.newline_anchor = 0; /* don't get \n in middle of string */
- if (dfa && !ignorecase) {
- dfacomp(temp, len, &(rp->dfareg), 1);
- rp->dfa = 1;
+ rp->pat.newline_anchor = FALSE; /* don't get \n in middle of string */
+ if (dfa && ! ignorecase) {
+ dfacomp(temp, len, &(rp->dfareg), TRUE);
+ rp->dfa = TRUE;
} else
- rp->dfa = 0;
+ rp->dfa = FALSE;
free(temp);
return rp;
}
+/* research --- do a regexp search. use dfa if possible */
+
int
research(rp, str, start, len, need_start)
Regexp *rp;
@@ -123,34 +143,42 @@ register size_t len;
int need_start;
{
char *ret = str;
+ int try_backref;
+ /*
+ * Always do dfa search if can; if it fails, then even if
+ * need_start is true, we won't bother with the regex search.
+ */
if (rp->dfa) {
char save;
int count = 0;
- int try_backref;
/*
* dfa likes to stick a '\n' right after the matched
* text. So we just save and restore the character.
*/
save = str[start+len];
- ret = dfaexec(&(rp->dfareg), str+start, str+start+len, 1,
+ ret = dfaexec(&(rp->dfareg), str+start, str+start+len, TRUE,
&count, &try_backref);
str[start+len] = save;
}
if (ret) {
- if (need_start || rp->dfa == 0) {
+ if (need_start || rp->dfa == FALSE || try_backref) {
int result = re_search(&(rp->pat), str, start+len,
start, len, &(rp->regs));
/* recover any space from C based alloca */
+#ifdef C_ALLOCA
(void) alloca(0);
+#endif
return result;
} else
return 1;
- } else
+ } else
return -1;
}
+/* refree --- free up the dynamic memory used by a compiled regexp */
+
void
refree(rp)
Regexp *rp;
@@ -166,25 +194,29 @@ Regexp *rp;
free(rp);
}
+/* dfaerror --- print an error message for the dfa routines */
+
void
dfaerror(s)
const char *s;
{
- fatal(s);
+ fatal("%s", s);
}
+/* re_update --- recompile a dynamic regexp */
+
Regexp *
re_update(t)
NODE *t;
{
NODE *t1;
-# define CASE 1
+/* # define CASE 1 */
if ((t->re_flags & CASE) == IGNORECASE) {
- if (t->re_flags & CONST)
+ if ((t->re_flags & CONST) != 0)
return t->re_reg;
t1 = force_string(tree_eval(t->re_exp));
- if (t->re_text) {
+ if (t->re_text != NULL) {
if (cmp_nodes(t->re_text, t1) == 0) {
free_temp(t1);
return t->re_reg;
@@ -194,13 +226,13 @@ NODE *t;
t->re_text = dupnode(t1);
free_temp(t1);
}
- if (t->re_reg)
+ if (t->re_reg != NULL)
refree(t->re_reg);
- if (t->re_cnt)
+ if (t->re_cnt > 0)
t->re_cnt++;
if (t->re_cnt > 10)
t->re_cnt = 0;
- if (!t->re_text) {
+ if (t->re_text == NULL) {
t1 = force_string(tree_eval(t->re_exp));
t->re_text = dupnode(t1);
free_temp(t1);
@@ -212,11 +244,62 @@ NODE *t;
return t->re_reg;
}
+/* resetup --- choose what kind of regexps we match */
+
void
resetup()
{
- reg_syntax_t syn = RE_SYNTAX_AWK;
+ if (do_posix)
+ syn = RE_SYNTAX_POSIX_AWK; /* strict POSIX re's */
+ else if (do_traditional)
+ syn = RE_SYNTAX_AWK; /* traditional Unix awk re's */
+ else
+ syn = RE_SYNTAX_GNU_AWK; /* POSIX re's + GNU ops */
+
+ /*
+ * Interval expressions are off by default, since it's likely to
+ * break too many old programs to have them on.
+ */
+ if (do_intervals)
+ syn |= RE_INTERVALS;
(void) re_set_syntax(syn);
- dfasyntax(syn, 0);
+ dfasyntax(syn, FALSE);
+}
+
+/* avoid_dfa --- temporary kludge function until we have a new dfa.c */
+
+int
+avoid_dfa(re, str, len)
+NODE *re;
+char *str;
+size_t len;
+{
+ char *restr;
+ int relen;
+ int anchor, i;
+ char *end;
+
+ if ((re->re_flags & CONST) != 0) {
+ restr = re->re_exp->stptr;
+ relen = re->re_exp->stlen;
+ } else {
+ restr = re->re_text->stptr;
+ relen = re->re_text->stlen;
+ }
+
+ for (anchor = FALSE, i = 0; i < relen; i++) {
+ if (restr[i] == '^' || restr[i] == '$') {
+ anchor = TRUE;
+ break;
+ }
+ }
+ if (! anchor)
+ return FALSE;
+
+ for (end = str + len; str < end; str++)
+ if (*str == '\n')
+ return TRUE;
+
+ return FALSE;
}
diff --git a/regex.c b/regex.c
index 9b508823..5d1e16fb 100644
--- a/regex.c
+++ b/regex.c
@@ -3,7 +3,7 @@
(Implements POSIX draft P10003.2/D11.2, except for
internationalization features.)
- Copyright (C) 1993-1995 Free Software Foundation, Inc.
+ Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -17,17 +17,18 @@
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */
/* AIX requires this to be the first thing in the file. */
#if defined (_AIX) && !defined (REGEX_MALLOC)
#pragma alloca
#endif
+#undef _GNU_SOURCE
#define _GNU_SOURCE
#ifdef HAVE_CONFIG_H
-#include "config.h"
+#include <config.h>
#endif
#if defined(STDC_HEADERS) && !defined(emacs)
@@ -37,6 +38,19 @@
#include <sys/types.h>
#endif
+/* This is for other GNU distributions with internationalized messages. */
+#if HAVE_LIBINTL_H || defined (_LIBC)
+# include <libintl.h>
+#else
+# define gettext(msgid) (msgid)
+#endif
+
+#ifndef gettext_noop
+/* This define is so xgettext can find the internationalizable
+ strings. */
+#define gettext_noop(String) String
+#endif
+
/* The `emacs' switch turns on certain matching commands
that make sense only in Emacs. */
#ifdef emacs
@@ -45,14 +59,35 @@
#include "buffer.h"
#include "syntax.h"
-/* Emacs uses `NULL' as a predicate. */
-#undef NULL
-
#else /* not emacs */
-/* We used to test for `BSTRING' here, but only GCC and Emacs define
- `BSTRING', as far as I know, and neither of them use this code. */
-#if HAVE_STRING_H || STDC_HEADERS
+/* If we are not linking with Emacs proper,
+ we can't use the relocating allocator
+ even if config.h says that we can. */
+#undef REL_ALLOC
+
+#if defined (STDC_HEADERS) || defined (_LIBC)
+#include <stdlib.h>
+#else
+char *malloc ();
+char *realloc ();
+#endif
+
+/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
+ If nothing else has been done, use the method below. */
+#ifdef INHIBIT_STRING_HEADER
+#if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY))
+#if !defined (bzero) && !defined (bcopy)
+#undef INHIBIT_STRING_HEADER
+#endif
+#endif
+#endif
+
+/* This is the normal way of making sure we have a bcopy and a bzero.
+ This is used in most programs--a few other programs avoid this
+ by defining INHIBIT_STRING_HEADER. */
+#ifndef INHIBIT_STRING_HEADER
+#if defined (HAVE_STRING_H) || defined (STDC_HEADERS) || defined (_LIBC)
#include <string.h>
#ifndef bcmp
#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
@@ -66,23 +101,22 @@
#else
#include <strings.h>
#endif
-
-#ifdef STDC_HEADERS
-#include <stdlib.h>
-#else
-char *malloc ();
-char *realloc ();
#endif
-
/* Define the syntax stuff for \<, \>, etc. */
/* This must be nonzero for the wordchar and notwordchar pattern
commands in re_match_2. */
-#ifndef Sword
+#ifndef Sword
#define Sword 1
#endif
+#ifdef SWITCH_ENUM_BUG
+#define SWITCH_ENUM_CAST(x) ((int)(x))
+#else
+#define SWITCH_ENUM_CAST(x) (x)
+#endif
+
#ifdef SYNTAX_TABLE
extern char *re_syntax_table;
@@ -141,35 +175,37 @@ init_syntax_once ()
macros don't need to be guarded with references to isascii. ...
Defining isascii to 1 should let any compiler worth its salt
eliminate the && through constant folding." */
-#if ! defined (isascii) || defined (STDC_HEADERS)
-#undef isascii
-#define isascii(c) 1
+
+#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
+#define ISASCII(c) 1
+#else
+#define ISASCII(c) isascii(c)
#endif
#ifdef isblank
-#define ISBLANK(c) (isascii (c) && isblank (c))
+#define ISBLANK(c) (ISASCII (c) && isblank (c))
#else
#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
#endif
#ifdef isgraph
-#define ISGRAPH(c) (isascii (c) && isgraph (c))
+#define ISGRAPH(c) (ISASCII (c) && isgraph (c))
#else
-#define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
+#define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
#endif
-#define ISPRINT(c) (isascii (c) && isprint (c))
-#define ISDIGIT(c) (isascii (c) && isdigit (c))
-#define ISALNUM(c) (isascii (c) && isalnum (c))
-#define ISALPHA(c) (isascii (c) && isalpha (c))
-#define ISCNTRL(c) (isascii (c) && iscntrl (c))
-#define ISLOWER(c) (isascii (c) && islower (c))
-#define ISPUNCT(c) (isascii (c) && ispunct (c))
-#define ISSPACE(c) (isascii (c) && isspace (c))
-#define ISUPPER(c) (isascii (c) && isupper (c))
-#define ISXDIGIT(c) (isascii (c) && isxdigit (c))
+#define ISPRINT(c) (ISASCII (c) && isprint (c))
+#define ISDIGIT(c) (ISASCII (c) && isdigit (c))
+#define ISALNUM(c) (ISASCII (c) && isalnum (c))
+#define ISALPHA(c) (ISASCII (c) && isalpha (c))
+#define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
+#define ISLOWER(c) (ISASCII (c) && islower (c))
+#define ISPUNCT(c) (ISASCII (c) && ispunct (c))
+#define ISSPACE(c) (ISASCII (c) && isspace (c))
+#define ISUPPER(c) (ISASCII (c) && isupper (c))
+#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
#ifndef NULL
-#define NULL 0
+#define NULL (void *)0
#endif
/* We remove any previous definition of `SIGN_EXTEND_CHAR',
@@ -188,8 +224,8 @@ init_syntax_once ()
use `alloca' instead of `malloc'. This is because using malloc in
re_search* or re_match* could cause memory leaks when C-g is used in
Emacs; also, malloc is slower and causes storage fragmentation. On
- the other hand, malloc is more portable, and easier to debug.
-
+ the other hand, malloc is more portable, and easier to debug.
+
Because we sometimes use alloca, some routines have to be macros,
not functions -- `alloca'-allocated space disappears at the end of the
function it is called in. */
@@ -198,6 +234,7 @@ init_syntax_once ()
#define REGEX_ALLOCATE malloc
#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
+#define REGEX_FREE free
#else /* not REGEX_MALLOC */
@@ -211,10 +248,12 @@ init_syntax_once ()
#if HAVE_ALLOCA_H
#include <alloca.h>
#else /* not __GNUC__ or HAVE_ALLOCA_H */
+#if 0 /* It is a bad idea to declare alloca. We always cast the result. */
#ifndef _AIX /* Already did AIX, up at the top. */
char *alloca ();
#endif /* not _AIX */
-#endif /* not HAVE_ALLOCA_H */
+#endif
+#endif /* not HAVE_ALLOCA_H */
#endif /* not __GNUC__ */
#endif /* not alloca */
@@ -227,7 +266,41 @@ char *alloca ();
bcopy (source, destination, osize), \
destination)
+/* No need to do anything to free, after alloca. */
+#define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
+
+#endif /* not REGEX_MALLOC */
+
+/* Define how to allocate the failure stack. */
+
+#if defined (REL_ALLOC) && defined (REGEX_MALLOC)
+
+#define REGEX_ALLOCATE_STACK(size) \
+ r_alloc (&failure_stack_ptr, (size))
+#define REGEX_REALLOCATE_STACK(source, osize, nsize) \
+ r_re_alloc (&failure_stack_ptr, (nsize))
+#define REGEX_FREE_STACK(ptr) \
+ r_alloc_free (&failure_stack_ptr)
+
+#else /* not using relocating allocator */
+
+#ifdef REGEX_MALLOC
+
+#define REGEX_ALLOCATE_STACK malloc
+#define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
+#define REGEX_FREE_STACK free
+
+#else /* not REGEX_MALLOC */
+
+#define REGEX_ALLOCATE_STACK alloca
+
+#define REGEX_REALLOCATE_STACK(source, osize, nsize) \
+ REGEX_REALLOCATE (source, osize, nsize)
+/* No need to explicitly free anything. */
+#define REGEX_FREE_STACK(arg)
+
#endif /* not REGEX_MALLOC */
+#endif /* not using relocating allocator */
/* True if `size1' is non-NULL and PTR is pointing anywhere inside
@@ -239,34 +312,39 @@ char *alloca ();
/* (Re)Allocate N items of type T using malloc, or fail. */
#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
+#define RETALLOC_IF(addr, n, t) \
+ if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
#define BYTEWIDTH 8 /* In bits. */
#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
+#undef MAX
+#undef MIN
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#define MIN(a, b) ((a) < (b) ? (a) : (b))
typedef char boolean;
#define false 0
#define true 1
+
+static int re_match_2_internal ();
/* These are the command codes that appear in compiled regular
expressions. Some opcodes are followed by argument bytes. A
command code can specify any interpretation whatsoever for its
- arguments. Zero bytes may appear in the compiled regular expression.
-
- The value of `exactn' is needed in search.c (search_buffer) in Emacs.
- So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
- `exactn' we use here must also be 1. */
+ arguments. Zero bytes may appear in the compiled regular expression. */
typedef enum
{
no_op = 0,
+ /* Succeed right away--no more backtracking. */
+ succeed,
+
/* Followed by one byte giving n, then by n literal bytes. */
- exactn = 1,
+ exactn,
/* Matches any (more or less) character. */
anychar,
@@ -317,9 +395,9 @@ typedef enum
/* Analogously, for end of buffer/string. */
endbuf,
-
+
/* Followed by two byte relative address to which to jump. */
- jump,
+ jump,
/* Same as jump, but marks the end of an alternative. */
jump_past_alt,
@@ -327,11 +405,11 @@ typedef enum
/* Followed by two-byte relative address of place to resume at
in case of failure. */
on_failure_jump,
-
+
/* Like on_failure_jump, but pushes a placeholder instead of the
current string position when executed. */
on_failure_keep_string_jump,
-
+
/* Throw away latest failure point and then jump to following
two-byte relative address. */
pop_failure_jump,
@@ -428,7 +506,7 @@ extract_number (dest, source)
int *dest;
unsigned char *source;
{
- int temp = SIGN_EXTEND_CHAR (*(source + 1));
+ int temp = SIGN_EXTEND_CHAR (*(source + 1));
*dest = *source & 0377;
*dest += temp << 8;
}
@@ -456,7 +534,7 @@ static void
extract_number_and_incr (destination, source)
int *destination;
unsigned char **source;
-{
+{
extract_number (destination, *source);
*source += 2;
}
@@ -496,8 +574,6 @@ static int debug = 0;
if (debug) print_double_string (w, s1, sz1, s2, sz2)
-extern void printchar ();
-
/* Print the fastmap in human-readable form. */
void
@@ -505,14 +581,14 @@ print_fastmap (fastmap)
char *fastmap;
{
unsigned was_a_range = 0;
- unsigned i = 0;
-
+ unsigned i = 0;
+
while (i < (1 << BYTEWIDTH))
{
if (fastmap[i++])
{
was_a_range = 0;
- printchar (i - 1);
+ putchar (i - 1);
while (i < (1 << BYTEWIDTH) && fastmap[i])
{
was_a_range = 1;
@@ -521,11 +597,11 @@ print_fastmap (fastmap)
if (was_a_range)
{
printf ("-");
- printchar (i - 1);
+ putchar (i - 1);
}
}
}
- putchar ('\n');
+ putchar ('\n');
}
@@ -546,7 +622,7 @@ print_partial_compiled_pattern (start, end)
printf ("(null)\n");
return;
}
-
+
/* Loop over pattern commands. */
while (p < pend)
{
@@ -564,7 +640,7 @@ print_partial_compiled_pattern (start, end)
do
{
putchar ('/');
- printchar (*p++);
+ putchar (*p++);
}
while (--mcnt);
break;
@@ -595,7 +671,7 @@ print_partial_compiled_pattern (start, end)
printf ("/charset [%s",
(re_opcode_t) *(p - 1) == charset_not ? "^" : "");
-
+
assert (p + *p < pend);
for (c = 0; c < 256; c++)
@@ -611,18 +687,18 @@ print_partial_compiled_pattern (start, end)
/* Have we broken a range? */
else if (last + 1 != c && in_range)
{
- printchar (last);
+ putchar (last);
in_range = 0;
}
-
+
if (! in_range)
- printchar (c);
+ putchar (c);
last = c;
}
if (in_range)
- printchar (last);
+ putchar (last);
putchar (']');
@@ -656,7 +732,7 @@ print_partial_compiled_pattern (start, end)
case push_dummy_failure:
printf ("/push_dummy_failure");
break;
-
+
case maybe_pop_jump:
extract_number_and_incr (&mcnt, &p);
printf ("/maybe_pop_jump to %d", p + mcnt - start);
@@ -665,36 +741,36 @@ print_partial_compiled_pattern (start, end)
case pop_failure_jump:
extract_number_and_incr (&mcnt, &p);
printf ("/pop_failure_jump to %d", p + mcnt - start);
- break;
-
+ break;
+
case jump_past_alt:
extract_number_and_incr (&mcnt, &p);
printf ("/jump_past_alt to %d", p + mcnt - start);
- break;
-
+ break;
+
case jump:
extract_number_and_incr (&mcnt, &p);
printf ("/jump to %d", p + mcnt - start);
break;
- case succeed_n:
+ case succeed_n:
extract_number_and_incr (&mcnt, &p);
extract_number_and_incr (&mcnt2, &p);
printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2);
break;
-
- case jump_n:
+
+ case jump_n:
extract_number_and_incr (&mcnt, &p);
extract_number_and_incr (&mcnt2, &p);
printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2);
break;
-
- case set_number_at:
+
+ case set_number_at:
extract_number_and_incr (&mcnt, &p);
extract_number_and_incr (&mcnt2, &p);
printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2);
break;
-
+
case wordbound:
printf ("/wordbound");
break;
@@ -706,10 +782,10 @@ print_partial_compiled_pattern (start, end)
case wordbeg:
printf ("/wordbeg");
break;
-
+
case wordend:
printf ("/wordend");
-
+
#ifdef emacs
case before_dot:
printf ("/before_dot");
@@ -728,7 +804,7 @@ print_partial_compiled_pattern (start, end)
mcnt = *p++;
printf ("/%d", mcnt);
break;
-
+
case notsyntaxspec:
printf ("/notsyntaxspec");
mcnt = *p++;
@@ -739,7 +815,7 @@ print_partial_compiled_pattern (start, end)
case wordchar:
printf ("/wordchar");
break;
-
+
case notwordchar:
printf ("/notwordchar");
break;
@@ -799,7 +875,7 @@ print_double_string (where, string1, size1, string2, size2)
int size2;
{
unsigned this_char;
-
+
if (where == NULL)
printf ("(null)");
else
@@ -807,13 +883,13 @@ print_double_string (where, string1, size1, string2, size2)
if (FIRST_STRING_P (where))
{
for (this_char = where - string1; this_char < size1; this_char++)
- printchar (string1[this_char]);
+ putchar (string1[this_char]);
- where = string2;
+ where = string2;
}
for (this_char = where - string2; this_char < size2; this_char++)
- printchar (string2[this_char]);
+ putchar (string2[this_char]);
}
}
@@ -842,7 +918,9 @@ printchar (c)
/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
also be assigned to arbitrarily: each pattern buffer stores its own
syntax, so it can be changed between regex compilations. */
-reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
+/* This has no initializer because initialized variables in Emacs
+ become read-only after dumping. */
+reg_syntax_t re_syntax_options;
/* Specify the precise syntax of regexps for compilation. This provides
@@ -857,34 +935,492 @@ re_set_syntax (syntax)
reg_syntax_t syntax;
{
reg_syntax_t ret = re_syntax_options;
-
+
re_syntax_options = syntax;
return ret;
}
/* This table gives an error message for each of the error codes listed
- in regex.h. Obviously the order here has to be same as there. */
-
-static const char *re_error_msg[] =
- { NULL, /* REG_NOERROR */
- "No match", /* REG_NOMATCH */
- "Invalid regular expression", /* REG_BADPAT */
- "Invalid collation character", /* REG_ECOLLATE */
- "Invalid character class name", /* REG_ECTYPE */
- "Trailing backslash", /* REG_EESCAPE */
- "Invalid back reference", /* REG_ESUBREG */
- "Unmatched [ or [^", /* REG_EBRACK */
- "Unmatched ( or \\(", /* REG_EPAREN */
- "Unmatched \\{", /* REG_EBRACE */
- "Invalid content of \\{\\}", /* REG_BADBR */
- "Invalid range end", /* REG_ERANGE */
- "Memory exhausted", /* REG_ESPACE */
- "Invalid preceding regular expression", /* REG_BADRPT */
- "Premature end of regular expression", /* REG_EEND */
- "Regular expression too big", /* REG_ESIZE */
- "Unmatched ) or \\)", /* REG_ERPAREN */
+ in regex.h. Obviously the order here has to be same as there.
+ POSIX doesn't require that we do anything for REG_NOERROR,
+ but why not be nice? */
+
+static const char *re_error_msgid[] =
+ {
+ gettext_noop ("Success"), /* REG_NOERROR */
+ gettext_noop ("No match"), /* REG_NOMATCH */
+ gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
+ gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
+ gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
+ gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
+ gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
+ gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */
+ gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
+ gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
+ gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
+ gettext_noop ("Invalid range end"), /* REG_ERANGE */
+ gettext_noop ("Memory exhausted"), /* REG_ESPACE */
+ gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
+ gettext_noop ("Premature end of regular expression"), /* REG_EEND */
+ gettext_noop ("Regular expression too big"), /* REG_ESIZE */
+ gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
};
+/* Avoiding alloca during matching, to placate r_alloc. */
+
+/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
+ searching and matching functions should not call alloca. On some
+ systems, alloca is implemented in terms of malloc, and if we're
+ using the relocating allocator routines, then malloc could cause a
+ relocation, which might (if the strings being searched are in the
+ ralloc heap) shift the data out from underneath the regexp
+ routines.
+
+ Here's another reason to avoid allocation: Emacs
+ processes input from X in a signal handler; processing X input may
+ call malloc; if input arrives while a matching routine is calling
+ malloc, then we're scrod. But Emacs can't just block input while
+ calling matching routines; then we don't notice interrupts when
+ they come in. So, Emacs blocks input around all regexp calls
+ except the matching calls, which it leaves unprotected, in the
+ faith that they will not malloc. */
+
+/* Normally, this is fine. */
+#define MATCH_MAY_ALLOCATE
+
+/* When using GNU C, we are not REALLY using the C alloca, no matter
+ what config.h may say. So don't take precautions for it. */
+#ifdef __GNUC__
+#undef C_ALLOCA
+#endif
+
+/* The match routines may not allocate if (1) they would do it with malloc
+ and (2) it's not safe for them to use malloc.
+ Note that if REL_ALLOC is defined, matching would not use malloc for the
+ failure stack, but we would still use it for the register vectors;
+ so REL_ALLOC should not affect this. */
+#if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
+#undef MATCH_MAY_ALLOCATE
+#endif
+
+
+/* Failure stack declarations and macros; both re_compile_fastmap and
+ re_match_2 use a failure stack. These have to be macros because of
+ REGEX_ALLOCATE_STACK. */
+
+
+/* Number of failure points for which to initially allocate space
+ when matching. If this number is exceeded, we allocate more
+ space, so it is not a hard limit. */
+#ifndef INIT_FAILURE_ALLOC
+#define INIT_FAILURE_ALLOC 5
+#endif
+
+/* Roughly the maximum number of failure points on the stack. Would be
+ exactly that if always used MAX_FAILURE_SPACE each time we failed.
+ This is a variable only so users of regex can assign to it; we never
+ change it ourselves. */
+
+#ifdef INT_IS_16BIT
+
+#if defined (MATCH_MAY_ALLOCATE)
+long re_max_failures = 20000;
+#else
+long re_max_failures = 2000;
+#endif
+
+union fail_stack_elt
+{
+ unsigned char *pointer;
+ long integer;
+};
+
+typedef union fail_stack_elt fail_stack_elt_t;
+
+typedef struct
+{
+ fail_stack_elt_t *stack;
+ unsigned long size;
+ unsigned long avail; /* Offset of next open position. */
+} fail_stack_type;
+
+#else /* not INT_IS_16BIT */
+
+#if defined (MATCH_MAY_ALLOCATE)
+int re_max_failures = 20000;
+#else
+int re_max_failures = 2000;
+#endif
+
+union fail_stack_elt
+{
+ unsigned char *pointer;
+ int integer;
+};
+
+typedef union fail_stack_elt fail_stack_elt_t;
+
+typedef struct
+{
+ fail_stack_elt_t *stack;
+ unsigned size;
+ unsigned avail; /* Offset of next open position. */
+} fail_stack_type;
+
+#endif /* INT_IS_16BIT */
+
+#define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
+#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
+#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
+
+
+/* Define macros to initialize and free the failure stack.
+ Do `return -2' if the alloc fails. */
+
+#ifdef MATCH_MAY_ALLOCATE
+#define INIT_FAIL_STACK() \
+ do { \
+ fail_stack.stack = (fail_stack_elt_t *) \
+ REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
+ \
+ if (fail_stack.stack == NULL) \
+ return -2; \
+ \
+ fail_stack.size = INIT_FAILURE_ALLOC; \
+ fail_stack.avail = 0; \
+ } while (0)
+
+#define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
+#else
+#define INIT_FAIL_STACK() \
+ do { \
+ fail_stack.avail = 0; \
+ } while (0)
+
+#define RESET_FAIL_STACK()
+#endif
+
+
+/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
+
+ Return 1 if succeeds, and 0 if either ran out of memory
+ allocating space for it or it was already too large.
+
+ REGEX_REALLOCATE_STACK requires `destination' be declared. */
+
+#define DOUBLE_FAIL_STACK(fail_stack) \
+ ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \
+ ? 0 \
+ : ((fail_stack).stack = (fail_stack_elt_t *) \
+ REGEX_REALLOCATE_STACK ((fail_stack).stack, \
+ (fail_stack).size * sizeof (fail_stack_elt_t), \
+ ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \
+ \
+ (fail_stack).stack == NULL \
+ ? 0 \
+ : ((fail_stack).size <<= 1, \
+ 1)))
+
+
+/* Push pointer POINTER on FAIL_STACK.
+ Return 1 if was able to do so and 0 if ran out of memory allocating
+ space to do so. */
+#define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \
+ ((FAIL_STACK_FULL () \
+ && !DOUBLE_FAIL_STACK (FAIL_STACK)) \
+ ? 0 \
+ : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
+ 1))
+
+/* Push a pointer value onto the failure stack.
+ Assumes the variable `fail_stack'. Probably should only
+ be called from within `PUSH_FAILURE_POINT'. */
+#define PUSH_FAILURE_POINTER(item) \
+ fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
+
+/* This pushes an integer-valued item onto the failure stack.
+ Assumes the variable `fail_stack'. Probably should only
+ be called from within `PUSH_FAILURE_POINT'. */
+#define PUSH_FAILURE_INT(item) \
+ fail_stack.stack[fail_stack.avail++].integer = (item)
+
+/* Push a fail_stack_elt_t value onto the failure stack.
+ Assumes the variable `fail_stack'. Probably should only
+ be called from within `PUSH_FAILURE_POINT'. */
+#define PUSH_FAILURE_ELT(item) \
+ fail_stack.stack[fail_stack.avail++] = (item)
+
+/* These three POP... operations complement the three PUSH... operations.
+ All assume that `fail_stack' is nonempty. */
+#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
+#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
+#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
+
+/* Used to omit pushing failure point id's when we're not debugging. */
+#ifdef DEBUG
+#define DEBUG_PUSH PUSH_FAILURE_INT
+#define DEBUG_POP(item_addr) (item_addr)->integer = POP_FAILURE_INT ()
+#else
+#define DEBUG_PUSH(item)
+#define DEBUG_POP(item_addr)
+#endif
+
+
+/* Push the information about the state we will need
+ if we ever fail back to it.
+
+ Requires variables fail_stack, regstart, regend, reg_info, and
+ num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be
+ declared.
+
+ Does `return FAILURE_CODE' if runs out of memory. */
+
+#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
+ do { \
+ char *destination; \
+ /* Must be int, so when we don't save any registers, the arithmetic \
+ of 0 + -1 isn't done as unsigned. */ \
+ /* Can't be int, since there is not a shred of a guarantee that int \
+ is wide enough to hold a value of something to which pointer can \
+ be assigned */ \
+ s_reg_t this_reg; \
+ \
+ DEBUG_STATEMENT (failure_id++); \
+ DEBUG_STATEMENT (nfailure_points_pushed++); \
+ DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
+ DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
+ DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
+ \
+ DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \
+ DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
+ \
+ /* Ensure we have enough space allocated for what we will push. */ \
+ while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
+ { \
+ if (!DOUBLE_FAIL_STACK (fail_stack)) \
+ return failure_code; \
+ \
+ DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
+ (fail_stack).size); \
+ DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
+ } \
+ \
+ /* Push the info, starting with the registers. */ \
+ DEBUG_PRINT1 ("\n"); \
+ \
+ if (1) \
+ for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
+ this_reg++) \
+ { \
+ DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \
+ DEBUG_STATEMENT (num_regs_pushed++); \
+ \
+ DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
+ PUSH_FAILURE_POINTER (regstart[this_reg]); \
+ \
+ DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
+ PUSH_FAILURE_POINTER (regend[this_reg]); \
+ \
+ DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \
+ DEBUG_PRINT2 (" match_null=%d", \
+ REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" matched_something=%d", \
+ MATCHED_SOMETHING (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" ever_matched=%d", \
+ EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
+ DEBUG_PRINT1 ("\n"); \
+ PUSH_FAILURE_ELT (reg_info[this_reg].word); \
+ } \
+ \
+ DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\
+ PUSH_FAILURE_INT (lowest_active_reg); \
+ \
+ DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\
+ PUSH_FAILURE_INT (highest_active_reg); \
+ \
+ DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
+ PUSH_FAILURE_POINTER (pattern_place); \
+ \
+ DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \
+ DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
+ size2); \
+ DEBUG_PRINT1 ("'\n"); \
+ PUSH_FAILURE_POINTER (string_place); \
+ \
+ DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
+ DEBUG_PUSH (failure_id); \
+ } while (0)
+
+/* This is the number of items that are pushed and popped on the stack
+ for each register. */
+#define NUM_REG_ITEMS 3
+
+/* Individual items aside from the registers. */
+#ifdef DEBUG
+#define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
+#else
+#define NUM_NONREG_ITEMS 4
+#endif
+
+/* We push at most this many items on the stack. */
+#define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
+
+/* We actually push this many items. */
+#define NUM_FAILURE_ITEMS \
+ (((0 \
+ ? 0 : highest_active_reg - lowest_active_reg + 1) \
+ * NUM_REG_ITEMS) \
+ + NUM_NONREG_ITEMS)
+
+/* How many items can still be added to the stack without overflowing it. */
+#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
+
+
+/* Pops what PUSH_FAIL_STACK pushes.
+
+ We restore into the parameters, all of which should be lvalues:
+ STR -- the saved data position.
+ PAT -- the saved pattern position.
+ LOW_REG, HIGH_REG -- the highest and lowest active registers.
+ REGSTART, REGEND -- arrays of string positions.
+ REG_INFO -- array of information about each subexpression.
+
+ Also assumes the variables `fail_stack' and (if debugging), `bufp',
+ `pend', `string1', `size1', `string2', and `size2'. */
+
+#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
+{ \
+ DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \
+ s_reg_t this_reg; \
+ const unsigned char *string_temp; \
+ \
+ assert (!FAIL_STACK_EMPTY ()); \
+ \
+ /* Remove failure points and point to how many regs pushed. */ \
+ DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
+ DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
+ DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
+ \
+ assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
+ \
+ DEBUG_POP (&failure_id); \
+ DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
+ \
+ /* If the saved string location is NULL, it came from an \
+ on_failure_keep_string_jump opcode, and we want to throw away the \
+ saved NULL, thus retaining our current position in the string. */ \
+ string_temp = POP_FAILURE_POINTER (); \
+ if (string_temp != NULL) \
+ str = (const char *) string_temp; \
+ \
+ DEBUG_PRINT2 (" Popping string 0x%x: `", str); \
+ DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
+ DEBUG_PRINT1 ("'\n"); \
+ \
+ pat = (unsigned char *) POP_FAILURE_POINTER (); \
+ DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
+ \
+ /* Restore register info. */ \
+ high_reg = (active_reg_t) POP_FAILURE_INT (); \
+ DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \
+ \
+ low_reg = (active_reg_t) POP_FAILURE_INT (); \
+ DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \
+ \
+ if (1) \
+ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
+ { \
+ DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \
+ \
+ reg_info[this_reg].word = POP_FAILURE_ELT (); \
+ DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \
+ \
+ regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \
+ DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
+ \
+ regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \
+ DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
+ } \
+ else \
+ { \
+ for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
+ { \
+ reg_info[this_reg].word.integer = 0; \
+ regend[this_reg] = 0; \
+ regstart[this_reg] = 0; \
+ } \
+ highest_active_reg = high_reg; \
+ } \
+ \
+ set_regs_matched_done = 0; \
+ DEBUG_STATEMENT (nfailure_points_popped++); \
+} /* POP_FAILURE_POINT */
+
+
+
+/* Structure for per-register (a.k.a. per-group) information.
+ Other register information, such as the
+ starting and ending positions (which are addresses), and the list of
+ inner groups (which is a bits list) are maintained in separate
+ variables.
+
+ We are making a (strictly speaking) nonportable assumption here: that
+ the compiler will pack our bit fields into something that fits into
+ the type of `word', i.e., is something that fits into one item on the
+ failure stack. */
+
+
+/* Declarations and macros for re_match_2. */
+
+typedef union
+{
+ fail_stack_elt_t word;
+ struct
+ {
+ /* This field is one if this group can match the empty string,
+ zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
+#define MATCH_NULL_UNSET_VALUE 3
+ unsigned match_null_string_p : 2;
+ unsigned is_active : 1;
+ unsigned matched_something : 1;
+ unsigned ever_matched_something : 1;
+ } bits;
+} register_info_type;
+
+#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
+#define IS_ACTIVE(R) ((R).bits.is_active)
+#define MATCHED_SOMETHING(R) ((R).bits.matched_something)
+#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
+
+
+/* Call this when have matched a real character; it sets `matched' flags
+ for the subexpressions which we are currently inside. Also records
+ that those subexprs have matched. */
+#define SET_REGS_MATCHED() \
+ do \
+ { \
+ if (!set_regs_matched_done) \
+ { \
+ active_reg_t r; \
+ set_regs_matched_done = 1; \
+ for (r = lowest_active_reg; r <= highest_active_reg; r++) \
+ { \
+ MATCHED_SOMETHING (reg_info[r]) \
+ = EVER_MATCHED_SOMETHING (reg_info[r]) \
+ = 1; \
+ } \
+ } \
+ } \
+ while (0)
+
+/* Registers are set to a sentinel when they haven't yet matched. */
+static char reg_unset_dummy;
+#define REG_UNSET_VALUE (&reg_unset_dummy)
+#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
+
/* Subroutine declarations and macros for regex_compile. */
static reg_errcode_t regex_compile _RE_ARGS((const char *pattern, size_t size,
@@ -907,15 +1443,17 @@ static reg_errcode_t compile_range _RE_ARGS((const char **p_ptr,
reg_syntax_t syntax,
unsigned char *b));
-/* Fetch the next character in the uncompiled pattern---translating it
+/* Fetch the next character in the uncompiled pattern---translating it
if necessary. Also cast from a signed character in the constant
string passed to us by the user to an unsigned char that we can use
as an array index (in, e.g., `translate'). */
+#ifndef PATFETCH
#define PATFETCH(c) \
do {if (p == pend) return REG_EEND; \
c = (unsigned char) *p++; \
- if (translate) c = translate[c]; \
+ if (translate) c = (unsigned char) translate[c]; \
} while (0)
+#endif
/* Fetch the next character in the uncompiled pattern, with no
translation. */
@@ -932,7 +1470,10 @@ static reg_errcode_t compile_range _RE_ARGS((const char **p_ptr,
cast the subscript to translate because some data is declared as
`char *', to avoid warnings when a string constant is passed. But
when we use a character as a subscript we must make it unsigned. */
-#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
+#ifndef TRANSLATE
+#define TRANSLATE(d) \
+ (translate ? (char) translate[(unsigned char) (d)] : (d))
+#endif
/* Macros for outputting the compiled pattern into `buffer'. */
@@ -1061,7 +1602,7 @@ typedef struct
pattern_offset_t begalt_offset;
pattern_offset_t fixup_alt_jump;
pattern_offset_t inner_group_offset;
- pattern_offset_t laststart_offset;
+ pattern_offset_t laststart_offset;
regnum_t regnum;
} compile_stack_elt_t;
@@ -1104,7 +1645,7 @@ typedef struct
PATFETCH (c); \
} \
} \
- }
+ }
#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
@@ -1116,6 +1657,54 @@ typedef struct
|| STREQ (string, "punct") || STREQ (string, "graph") \
|| STREQ (string, "cntrl") || STREQ (string, "blank"))
+#ifndef MATCH_MAY_ALLOCATE
+
+/* If we cannot allocate large objects within re_match_2_internal,
+ we make the fail stack and register vectors global.
+ The fail stack, we grow to the maximum size when a regexp
+ is compiled.
+ The register vectors, we adjust in size each time we
+ compile a regexp, according to the number of registers it needs. */
+
+static fail_stack_type fail_stack;
+
+/* Size with which the following vectors are currently allocated.
+ That is so we can make them bigger as needed,
+ but never make them smaller. */
+static int regs_allocated_size;
+
+static const char ** regstart, ** regend;
+static const char ** old_regstart, ** old_regend;
+static const char **best_regstart, **best_regend;
+static register_info_type *reg_info;
+static const char **reg_dummy;
+static register_info_type *reg_info_dummy;
+
+/* Make the register vectors big enough for NUM_REGS registers,
+ but don't make them smaller. */
+
+static
+regex_grow_registers (num_regs)
+ int num_regs;
+{
+ if (num_regs > regs_allocated_size)
+ {
+ RETALLOC_IF (regstart, num_regs, const char *);
+ RETALLOC_IF (regend, num_regs, const char *);
+ RETALLOC_IF (old_regstart, num_regs, const char *);
+ RETALLOC_IF (old_regend, num_regs, const char *);
+ RETALLOC_IF (best_regstart, num_regs, const char *);
+ RETALLOC_IF (best_regend, num_regs, const char *);
+ RETALLOC_IF (reg_info, num_regs, register_info_type);
+ RETALLOC_IF (reg_dummy, num_regs, const char *);
+ RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
+
+ regs_allocated_size = num_regs;
+ }
+}
+
+#endif /* not MATCH_MAY_ALLOCATE */
+
static boolean group_in_compile_stack _RE_ARGS((compile_stack_type
compile_stack,
regnum_t regnum));
@@ -1134,10 +1723,14 @@ static boolean group_in_compile_stack _RE_ARGS((compile_stack_type
`fastmap_accurate' is zero;
`re_nsub' is the number of subexpressions in PATTERN;
`not_bol' and `not_eol' are zero;
-
+
The `fastmap' and `newline_anchor' fields are neither
examined nor set. */
+/* Return, freeing storage we allocated. */
+#define FREE_STACK_RETURN(value) \
+ return (free (compile_stack.stack), value)
+
static reg_errcode_t
regex_compile (pattern, size, syntax, bufp)
const char *pattern;
@@ -1149,22 +1742,22 @@ regex_compile (pattern, size, syntax, bufp)
`char *' (i.e., signed), we declare these variables as unsigned, so
they can be reliably used as array indices. */
register unsigned char c, c1;
-
- /* A random tempory spot in PATTERN. */
+
+ /* A random temporary spot in PATTERN. */
const char *p1;
/* Points to the end of the buffer, where we should append. */
register unsigned char *b;
-
+
/* Keeps track of unclosed groups. */
compile_stack_type compile_stack;
/* Points to the current (ending) position in the pattern. */
const char *p = pattern;
const char *pend = pattern + size;
-
+
/* How to translate the characters in the pattern. */
- char *translate = bufp->translate;
+ RE_TRANSLATE_TYPE translate = bufp->translate;
/* Address of the count-byte of the most recently inserted `exactn'
command. This makes it possible to tell if a new exact-match
@@ -1183,7 +1776,7 @@ regex_compile (pattern, size, syntax, bufp)
/* Place in the uncompiled pattern (i.e., the {) to
which to go back if the interval is invalid. */
const char *beg_interval;
-
+
/* Address of the place where a forward jump should go to the end of
the containing expression. Each alternative of an `or' -- except the
last -- ends with a forward jump of this sort. */
@@ -1199,9 +1792,9 @@ regex_compile (pattern, size, syntax, bufp)
if (debug)
{
unsigned debug_count;
-
+
for (debug_count = 0; debug_count < size; debug_count++)
- printchar (pattern[debug_count]);
+ putchar (pattern[debug_count]);
putchar ('\n');
}
#endif /* DEBUG */
@@ -1223,9 +1816,9 @@ regex_compile (pattern, size, syntax, bufp)
printer (for debugging) will think there's no pattern. We reset it
at the end. */
bufp->used = 0;
-
+
/* Always count groups, whether or not bufp->no_sub is set. */
- bufp->re_nsub = 0;
+ bufp->re_nsub = 0;
#if !defined (emacs) && !defined (SYNTAX_TABLE)
/* Initialize the syntax table. */
@@ -1244,7 +1837,7 @@ regex_compile (pattern, size, syntax, bufp)
{ /* Caller did not allocate a buffer. Do it for them. */
bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
}
- if (!bufp->buffer) return REG_ESPACE;
+ if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
bufp->allocated = INIT_BUF_SIZE;
}
@@ -1276,7 +1869,7 @@ regex_compile (pattern, size, syntax, bufp)
case '$':
{
if ( /* If at end of pattern, it's an operator. */
- p == pend
+ p == pend
/* If context independent, it's an operator. */
|| syntax & RE_CONTEXT_INDEP_ANCHORS
/* Otherwise, depends on what's next. */
@@ -1299,7 +1892,7 @@ regex_compile (pattern, size, syntax, bufp)
if (!laststart)
{
if (syntax & RE_CONTEXT_INVALID_OPS)
- return REG_BADRPT;
+ FREE_STACK_RETURN (REG_BADRPT);
else if (!(syntax & RE_CONTEXT_INDEP_OPS))
goto normal_char;
}
@@ -1307,7 +1900,7 @@ regex_compile (pattern, size, syntax, bufp)
{
/* Are we optimizing this jump? */
boolean keep_string_p = false;
-
+
/* 1 means zero (many) matches is allowed. */
char zero_times_ok = 0, many_times_ok = 0;
@@ -1332,7 +1925,7 @@ regex_compile (pattern, size, syntax, bufp)
else if (syntax & RE_BK_PLUS_QM && c == '\\')
{
- if (p == pend) return REG_EESCAPE;
+ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
PATFETCH (c1);
if (!(c1 == '+' || c1 == '?'))
@@ -1355,7 +1948,7 @@ regex_compile (pattern, size, syntax, bufp)
/* Star, etc. applied to an empty pattern is equivalent
to an empty pattern. */
- if (!laststart)
+ if (!laststart)
break;
/* Now we know whether or not zero matches is allowed
@@ -1364,7 +1957,7 @@ regex_compile (pattern, size, syntax, bufp)
{ /* More than one repetition is allowed, so put in at the
end a backward relative jump from `b' to before the next
jump we're going to put in below (which jumps from
- laststart to after this jump).
+ laststart to after this jump).
But if we are at the `*' in the exact sequence `.*\n',
insert an unconditional jump backwards to the .,
@@ -1431,7 +2024,7 @@ regex_compile (pattern, size, syntax, bufp)
{
boolean had_char_class = false;
- if (p == pend) return REG_EBRACK;
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
/* Ensure that we have enough space to push a charset: the
opcode, the length count, and the bitset; 34 bytes in all. */
@@ -1441,7 +2034,7 @@ regex_compile (pattern, size, syntax, bufp)
/* We test `*p == '^' twice, instead of using an if
statement, so we only need one BUF_PUSH. */
- BUF_PUSH (*p == '^' ? charset_not : charset);
+ BUF_PUSH (*p == '^' ? charset_not : charset);
if (*p == '^')
p++;
@@ -1462,14 +2055,14 @@ regex_compile (pattern, size, syntax, bufp)
/* Read in characters and ranges, setting map bits. */
for (;;)
{
- if (p == pend) return REG_EBRACK;
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
PATFETCH (c);
/* \ might escape characters inside [...] and [^...]. */
if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
{
- if (p == pend) return REG_EESCAPE;
+ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
PATFETCH (c1);
SET_LIST_BIT (c1);
@@ -1485,20 +2078,20 @@ regex_compile (pattern, size, syntax, bufp)
/* Look ahead to see if it's a range when the last thing
was a character class. */
if (had_char_class && c == '-' && *p != ']')
- return REG_ERANGE;
+ FREE_STACK_RETURN (REG_ERANGE);
/* Look ahead to see if it's a range when the last thing
was a character: if this is a hyphen not at the
beginning or the end of a list, then it's the range
operator. */
- if (c == '-'
- && !(p - 2 >= pattern && p[-2] == '[')
+ if (c == '-'
+ && !(p - 2 >= pattern && p[-2] == '[')
&& !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
&& *p != ']')
{
reg_errcode_t ret
= compile_range (&p, pend, translate, syntax, b);
- if (ret != REG_NOERROR) return ret;
+ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
}
else if (p[0] == '-' && p[1] != ']')
@@ -1507,9 +2100,9 @@ regex_compile (pattern, size, syntax, bufp)
/* Move past the `-'. */
PATFETCH (c1);
-
+
ret = compile_range (&p, pend, translate, syntax, b);
- if (ret != REG_NOERROR) return ret;
+ if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
}
/* See if we're at the beginning of a possible character
@@ -1523,7 +2116,7 @@ regex_compile (pattern, size, syntax, bufp)
c1 = 0;
/* If pattern is `[[:'. */
- if (p == pend) return REG_EBRACK;
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
for (;;)
{
@@ -1536,7 +2129,7 @@ regex_compile (pattern, size, syntax, bufp)
str[c1] = '\0';
/* If isn't a word bracketed by `[:' and:`]':
- undo the ending character, the letters, and leave
+ undo the ending character, the letters, and leave
the leading `:' and `[' (but set bits for them). */
if (c == ':' && *p == ']')
{
@@ -1553,37 +2146,45 @@ regex_compile (pattern, size, syntax, bufp)
boolean is_space = STREQ (str, "space");
boolean is_upper = STREQ (str, "upper");
boolean is_xdigit = STREQ (str, "xdigit");
-
- if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
+
+ if (!IS_CHAR_CLASS (str))
+ FREE_STACK_RETURN (REG_ECTYPE);
/* Throw away the ] at the end of the character
class. */
- PATFETCH (c);
+ PATFETCH (c);
- if (p == pend) return REG_EBRACK;
+ if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
{
+ /* This was split into 3 if's to
+ avoid an arbitrary limit in some compiler. */
if ( (is_alnum && ISALNUM (ch))
|| (is_alpha && ISALPHA (ch))
|| (is_blank && ISBLANK (ch))
- || (is_cntrl && ISCNTRL (ch))
- || (is_digit && ISDIGIT (ch))
+ || (is_cntrl && ISCNTRL (ch)))
+ SET_LIST_BIT (ch);
+ if ( (is_digit && ISDIGIT (ch))
|| (is_graph && ISGRAPH (ch))
|| (is_lower && ISLOWER (ch))
- || (is_print && ISPRINT (ch))
- || (is_punct && ISPUNCT (ch))
+ || (is_print && ISPRINT (ch)))
+ SET_LIST_BIT (ch);
+ if ( (is_punct && ISPUNCT (ch))
|| (is_space && ISSPACE (ch))
|| (is_upper && ISUPPER (ch))
|| (is_xdigit && ISXDIGIT (ch)))
- SET_LIST_BIT (ch);
+ SET_LIST_BIT (ch);
+ if ( translate && (is_upper || is_lower)
+ && (ISUPPER(ch) || ISLOWER(ch)))
+ SET_LIST_BIT (ch);
}
had_char_class = true;
}
else
{
c1++;
- while (c1--)
+ while (c1--)
PATUNFETCH;
SET_LIST_BIT ('[');
SET_LIST_BIT (':');
@@ -1599,8 +2200,8 @@ regex_compile (pattern, size, syntax, bufp)
/* Discard any (non)matching list bytes that are all 0 at the
end of the map. Decrease the map-length byte too. */
- while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
- b[-1]--;
+ while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
+ b[-1]--;
b += b[-1];
}
break;
@@ -1642,7 +2243,7 @@ regex_compile (pattern, size, syntax, bufp)
case '\\':
- if (p == pend) return REG_EESCAPE;
+ if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
/* Do not translate the character after the \, so that we can
distinguish, e.g., \B from \b, even if we normally would
@@ -1660,7 +2261,7 @@ regex_compile (pattern, size, syntax, bufp)
regnum++;
if (COMPILE_STACK_FULL)
- {
+ {
RETALLOC (compile_stack.stack, compile_stack.size << 1,
compile_stack_elt_t);
if (compile_stack.stack == NULL) return REG_ESPACE;
@@ -1673,7 +2274,7 @@ regex_compile (pattern, size, syntax, bufp)
whole pattern moves because of realloc, they will still
be valid. */
COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
- COMPILE_STACK_TOP.fixup_alt_jump
+ COMPILE_STACK_TOP.fixup_alt_jump
= fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
COMPILE_STACK_TOP.regnum = regnum;
@@ -1687,7 +2288,7 @@ regex_compile (pattern, size, syntax, bufp)
COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
BUF_PUSH_3 (start_memory, regnum, 0);
}
-
+
compile_stack.avail++;
fixup_alt_jump = 0;
@@ -1707,7 +2308,7 @@ regex_compile (pattern, size, syntax, bufp)
if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
goto normal_backslash;
else
- return REG_ERPAREN;
+ FREE_STACK_RETURN (REG_ERPAREN);
handle_close:
if (fixup_alt_jump)
@@ -1716,7 +2317,7 @@ regex_compile (pattern, size, syntax, bufp)
`pop_failure_jump' to pop. See comments at
`push_dummy_failure' in `re_match_2'. */
BUF_PUSH (push_dummy_failure);
-
+
/* We allocated space for this jump when we assigned
to `fixup_alt_jump', in the `handle_alt' case below. */
STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
@@ -1727,7 +2328,7 @@ regex_compile (pattern, size, syntax, bufp)
if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
goto normal_char;
else
- return REG_ERPAREN;
+ FREE_STACK_RETURN (REG_ERPAREN);
/* Since we just checked for an empty stack above, this
``can't happen''. */
@@ -1738,11 +2339,11 @@ regex_compile (pattern, size, syntax, bufp)
as in `(ab)c(de)' -- the second group is #2. */
regnum_t this_group_regnum;
- compile_stack.avail--;
+ compile_stack.avail--;
begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
fixup_alt_jump
= COMPILE_STACK_TOP.fixup_alt_jump
- ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
+ ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
: 0;
laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
this_group_regnum = COMPILE_STACK_TOP.regnum;
@@ -1757,7 +2358,7 @@ regex_compile (pattern, size, syntax, bufp)
{
unsigned char *inner_group_loc
= bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
-
+
*inner_group_loc = regnum - this_group_regnum;
BUF_PUSH_3 (stop_memory, this_group_regnum,
regnum - this_group_regnum);
@@ -1786,10 +2387,10 @@ regex_compile (pattern, size, syntax, bufp)
jump (put in below, which in turn will jump to the next
(if any) alternative's such jump, etc.). The last such
jump jumps to the correct final destination. A picture:
- _____ _____
- | | | |
- | v | v
- a | b | c
+ _____ _____
+ | | | |
+ | v | v
+ a | b | c
If we are at `b', then fixup_alt_jump right now points to a
three-byte space after `a'. We'll put in the jump, set
@@ -1811,10 +2412,10 @@ regex_compile (pattern, size, syntax, bufp)
break;
- case '{':
+ case '{':
/* If \{ is a literal. */
if (!(syntax & RE_INTERVALS)
- /* If we're at `\{' and it's not the open-interval
+ /* If we're at `\{' and it's not the open-interval
operator. */
|| ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
|| (p - 2 == pattern && p == pend))
@@ -1834,7 +2435,7 @@ regex_compile (pattern, size, syntax, bufp)
if (syntax & RE_NO_BK_BRACES)
goto unfetch_interval;
else
- return REG_EBRACE;
+ FREE_STACK_RETURN (REG_EBRACE);
}
GET_UNSIGNED_NUMBER (lower_bound);
@@ -1853,13 +2454,13 @@ regex_compile (pattern, size, syntax, bufp)
{
if (syntax & RE_NO_BK_BRACES)
goto unfetch_interval;
- else
- return REG_BADBR;
+ else
+ FREE_STACK_RETURN (REG_BADBR);
}
- if (!(syntax & RE_NO_BK_BRACES))
+ if (!(syntax & RE_NO_BK_BRACES))
{
- if (c != '\\') return REG_EBRACE;
+ if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
PATFETCH (c);
}
@@ -1868,8 +2469,8 @@ regex_compile (pattern, size, syntax, bufp)
{
if (syntax & RE_NO_BK_BRACES)
goto unfetch_interval;
- else
- return REG_BADBR;
+ else
+ FREE_STACK_RETURN (REG_BADBR);
}
/* We just parsed a valid interval. */
@@ -1878,7 +2479,7 @@ regex_compile (pattern, size, syntax, bufp)
if (!laststart)
{
if (syntax & RE_CONTEXT_INVALID_OPS)
- return REG_BADRPT;
+ FREE_STACK_RETURN (REG_BADRPT);
else if (syntax & RE_CONTEXT_INDEP_OPS)
laststart = b;
else
@@ -1899,12 +2500,12 @@ regex_compile (pattern, size, syntax, bufp)
we're all done, the pattern will look like:
set_number_at <jump count> <upper bound>
set_number_at <succeed_n count> <lower bound>
- succeed_n <after jump addr> <succed_n count>
+ succeed_n <after jump addr> <succeed_n count>
<body of loop>
jump_n <succeed_n addr> <jump count>
(The upper bound and `jump_n' are omitted if
`upper_bound' is 1, though.) */
- else
+ else
{ /* If the upper bound is > 1, we need to insert
more at the end of the loop. */
unsigned nbytes = 10 + (upper_bound > 1) * 10;
@@ -1921,7 +2522,7 @@ regex_compile (pattern, size, syntax, bufp)
lower_bound);
b += 5;
- /* Code to initialize the lower bound. Insert
+ /* Code to initialize the lower bound. Insert
before the `succeed_n'. The `5' is the last two
bytes of this `set_number_at', plus 3 bytes of
the following `succeed_n'. */
@@ -1932,7 +2533,7 @@ regex_compile (pattern, size, syntax, bufp)
{ /* More than one repetition is allowed, so
append a backward jump to the `succeed_n'
that starts this interval.
-
+
When we've reached this during matching,
we'll have matched the interval once, so
jump back only `upper_bound - 1' times. */
@@ -1950,7 +2551,7 @@ regex_compile (pattern, size, syntax, bufp)
so everything is getting moved up by 5.
Conclusion: (b - 2) - (laststart + 3) + 5,
i.e., b - laststart.
-
+
We insert this at the beginning of the loop
so that if we fail during matching, we'll
reinitialize the bounds. */
@@ -1971,7 +2572,7 @@ regex_compile (pattern, size, syntax, bufp)
beg_interval = NULL;
/* normal_char and normal_backslash need `c'. */
- PATFETCH (c);
+ PATFETCH (c);
if (!(syntax & RE_NO_BK_BRACES))
{
@@ -1987,7 +2588,7 @@ regex_compile (pattern, size, syntax, bufp)
BUF_PUSH (at_dot);
break;
- case 's':
+ case 's':
laststart = b;
PATFETCH (c);
BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
@@ -2061,7 +2662,7 @@ regex_compile (pattern, size, syntax, bufp)
c1 = c - '0';
if (c1 > regnum)
- return REG_ESUBREG;
+ FREE_STACK_RETURN (REG_ESUBREG);
/* Can't back reference to a subexpression if inside of it. */
if (group_in_compile_stack (compile_stack, (regnum_t)c1))
@@ -2094,11 +2695,11 @@ regex_compile (pattern, size, syntax, bufp)
/* Expects the character in `c'. */
normal_char:
/* If no exactn currently being built. */
- if (!pending_exact
+ if (!pending_exact
/* If last exactn not at current position. */
|| pending_exact + *pending_exact + 1 != b
-
+
/* We have only one byte following the exactn for the count. */
|| *pending_exact == (1 << BYTEWIDTH) - 1
@@ -2113,27 +2714,32 @@ regex_compile (pattern, size, syntax, bufp)
: (p[0] == '\\' && p[1] == '{'))))
{
/* Start building a new exactn. */
-
+
laststart = b;
BUF_PUSH_2 (exactn, 0);
pending_exact = b - 1;
}
-
+
BUF_PUSH (c);
(*pending_exact)++;
break;
} /* switch (c) */
} /* while p != pend */
-
+
/* Through the pattern now. */
-
+
if (fixup_alt_jump)
STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
- if (!COMPILE_STACK_EMPTY)
- return REG_EPAREN;
+ if (!COMPILE_STACK_EMPTY)
+ FREE_STACK_RETURN (REG_EPAREN);
+
+ /* If we don't want backtracking, force success
+ the first time we reach the end of the compiled pattern. */
+ if (syntax & RE_NO_POSIX_BACKTRACKING)
+ BUF_PUSH (succeed);
free (compile_stack.stack);
@@ -2148,6 +2754,47 @@ regex_compile (pattern, size, syntax, bufp)
}
#endif /* DEBUG */
+#ifndef MATCH_MAY_ALLOCATE
+ /* Initialize the failure stack to the largest possible stack. This
+ isn't necessary unless we're trying to avoid calling alloca in
+ the search and match routines. */
+ {
+ int num_regs = bufp->re_nsub + 1;
+
+ /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
+ is strictly greater than re_max_failures, the largest possible stack
+ is 2 * re_max_failures failure points. */
+ if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
+ {
+ fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
+
+#ifdef emacs
+ if (! fail_stack.stack)
+ fail_stack.stack
+ = (fail_stack_elt_t *) xmalloc (fail_stack.size
+ * sizeof (fail_stack_elt_t));
+ else
+ fail_stack.stack
+ = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
+ (fail_stack.size
+ * sizeof (fail_stack_elt_t)));
+#else /* not emacs */
+ if (! fail_stack.stack)
+ fail_stack.stack
+ = (fail_stack_elt_t *) malloc (fail_stack.size
+ * sizeof (fail_stack_elt_t));
+ else
+ fail_stack.stack
+ = (fail_stack_elt_t *) realloc (fail_stack.stack,
+ (fail_stack.size
+ * sizeof (fail_stack_elt_t)));
+#endif /* not emacs */
+ }
+
+ regex_grow_registers (num_regs);
+ }
+#endif /* not MATCH_MAY_ALLOCATE */
+
return REG_NOERROR;
} /* regex_compile */
@@ -2188,14 +2835,14 @@ insert_op1 (op, loc, arg, end)
re_opcode_t op;
unsigned char *loc;
int arg;
- unsigned char *end;
+ unsigned char *end;
{
register unsigned char *pfrom = end;
register unsigned char *pto = end + 3;
while (pfrom != loc)
*--pto = *--pfrom;
-
+
store_op1 (op, loc, arg);
}
@@ -2207,14 +2854,14 @@ insert_op2 (op, loc, arg1, arg2, end)
re_opcode_t op;
unsigned char *loc;
int arg1, arg2;
- unsigned char *end;
+ unsigned char *end;
{
register unsigned char *pfrom = end;
register unsigned char *pto = end + 5;
while (pfrom != loc)
*--pto = *--pfrom;
-
+
store_op2 (op, loc, arg1, arg2);
}
@@ -2230,7 +2877,7 @@ at_begline_loc_p (pattern, p, syntax)
{
const char *prev = p - 2;
boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
-
+
return
/* After a subexpression? */
(*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
@@ -2249,8 +2896,8 @@ at_endline_loc_p (p, pend, syntax)
{
const char *next = p;
boolean next_backslash = *next == '\\';
- const char *next_next = p + 1 < pend ? p + 1 : NULL;
-
+ const char *next_next = p + 1 < pend ? p + 1 : 0;
+
return
/* Before a subexpression? */
(syntax & RE_NO_BK_PARENS ? *next == ')'
@@ -2261,7 +2908,7 @@ at_endline_loc_p (p, pend, syntax)
}
-/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
+/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
false if it's not. */
static boolean
@@ -2271,8 +2918,8 @@ group_in_compile_stack (compile_stack, regnum)
{
int this_element;
- for (this_element = compile_stack.avail - 1;
- this_element >= 0;
+ for (this_element = compile_stack.avail - 1;
+ this_element >= 0;
this_element--)
if (compile_stack.stack[this_element].regnum == regnum)
return true;
@@ -2286,16 +2933,16 @@ group_in_compile_stack (compile_stack, regnum)
starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
Then we set the translation of all bits between the starting and
ending characters (inclusive) in the compiled pattern B.
-
+
Return an error code.
-
+
We use these short variable names so we can use the same macros as
`regex_compile' itself. */
static reg_errcode_t
compile_range (p_ptr, pend, translate, syntax, b)
const char **p_ptr, *pend;
- char *translate;
+ RE_TRANSLATE_TYPE translate;
reg_syntax_t syntax;
unsigned char *b;
{
@@ -2303,7 +2950,7 @@ compile_range (p_ptr, pend, translate, syntax, b)
const char *p = *p_ptr;
int range_start, range_end;
-
+
if (p == pend)
return REG_ERANGE;
@@ -2312,10 +2959,11 @@ compile_range (p_ptr, pend, translate, syntax, b)
is set, the range endpoints will be negative if we fetch using a
signed char *.
- We also want to fetch the endpoints without translating them; the
+ We also want to fetch the endpoints without translating them; the
appropriate translation is done in the bit-setting loop below. */
- range_start = ((unsigned char *) p)[-2];
- range_end = ((unsigned char *) p)[0];
+ /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */
+ range_start = ((const unsigned char *) p)[-2];
+ range_end = ((const unsigned char *) p)[0];
/* Have to increment the pointer into the pattern string, so the
caller isn't still at the ending character. */
@@ -2333,300 +2981,10 @@ compile_range (p_ptr, pend, translate, syntax, b)
{
SET_LIST_BIT (TRANSLATE (this_char));
}
-
+
return REG_NOERROR;
}
-/* Failure stack declarations and macros; both re_compile_fastmap and
- re_match_2 use a failure stack. These have to be macros because of
- REGEX_ALLOCATE. */
-
-
-/* Number of failure points for which to initially allocate space
- when matching. If this number is exceeded, we allocate more
- space, so it is not a hard limit. */
-#ifndef INIT_FAILURE_ALLOC
-#define INIT_FAILURE_ALLOC 5
-#endif
-
-/* Roughly the maximum number of failure points on the stack. Would be
- exactly that if always used MAX_FAILURE_SPACE each time we failed.
- This is a variable only so users of regex can assign to it; we never
- change it ourselves. */
-int re_max_failures = 2000;
-
-typedef const unsigned char *fail_stack_elt_t;
-
-typedef struct
-{
- fail_stack_elt_t *stack;
- unsigned size;
- unsigned avail; /* Offset of next open position. */
-} fail_stack_type;
-
-#define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
-#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
-#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
-#define FAIL_STACK_TOP() (fail_stack.stack[fail_stack.avail])
-
-
-/* Initialize `fail_stack'. Do `return -2' if the alloc fails. */
-
-#define INIT_FAIL_STACK() \
- do { \
- fail_stack.stack = (fail_stack_elt_t *) \
- REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
- \
- if (fail_stack.stack == NULL) \
- return -2; \
- \
- fail_stack.size = INIT_FAILURE_ALLOC; \
- fail_stack.avail = 0; \
- } while (0)
-
-
-/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
-
- Return 1 if succeeds, and 0 if either ran out of memory
- allocating space for it or it was already too large.
-
- REGEX_REALLOCATE requires `destination' be declared. */
-
-#define DOUBLE_FAIL_STACK(fail_stack) \
- ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \
- ? 0 \
- : ((fail_stack).stack = (fail_stack_elt_t *) \
- REGEX_REALLOCATE ((fail_stack).stack, \
- (fail_stack).size * sizeof (fail_stack_elt_t), \
- ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \
- \
- (fail_stack).stack == NULL \
- ? 0 \
- : ((fail_stack).size <<= 1, \
- 1)))
-
-
-/* Push PATTERN_OP on FAIL_STACK.
-
- Return 1 if was able to do so and 0 if ran out of memory allocating
- space to do so. */
-#define PUSH_PATTERN_OP(pattern_op, fail_stack) \
- ((FAIL_STACK_FULL () \
- && !DOUBLE_FAIL_STACK (fail_stack)) \
- ? 0 \
- : ((fail_stack).stack[(fail_stack).avail++] = pattern_op, \
- 1))
-
-/* This pushes an item onto the failure stack. Must be a four-byte
- value. Assumes the variable `fail_stack'. Probably should only
- be called from within `PUSH_FAILURE_POINT'. */
-#define PUSH_FAILURE_ITEM(item) \
- fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
-
-/* The complement operation. Assumes `fail_stack' is nonempty. */
-#define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
-
-/* Used to omit pushing failure point id's when we're not debugging. */
-#ifdef DEBUG
-#define DEBUG_PUSH PUSH_FAILURE_ITEM
-#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
-#else
-#define DEBUG_PUSH(item)
-#define DEBUG_POP(item_addr)
-#endif
-
-
-/* Push the information about the state we will need
- if we ever fail back to it.
-
- Requires variables fail_stack, regstart, regend, reg_info, and
- num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be
- declared.
-
- Does `return FAILURE_CODE' if runs out of memory. */
-
-#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
- do { \
- char *destination; \
- /* Must be int, so when we don't save any registers, the arithmetic \
- of 0 + -1 isn't done as unsigned. */ \
- /* Can't be int, since there is not a shred of a guarantee that int \
- is wide enough to hold a value of something to which pointer can \
- be assigned */ \
- s_reg_t this_reg; \
- \
- DEBUG_STATEMENT (failure_id++); \
- DEBUG_STATEMENT (nfailure_points_pushed++); \
- DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
- DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
- DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
- \
- DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \
- DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
- \
- /* Ensure we have enough space allocated for what we will push. */ \
- while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
- { \
- if (!DOUBLE_FAIL_STACK (fail_stack)) \
- return failure_code; \
- \
- DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
- (fail_stack).size); \
- DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
- }
-
-#define PUSH_FAILURE_POINT2(pattern_place, string_place, failure_code) \
- /* Push the info, starting with the registers. */ \
- DEBUG_PRINT1 ("\n"); \
- \
- PUSH_FAILURE_POINT_LOOP (); \
- \
- DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\
- PUSH_FAILURE_ITEM (lowest_active_reg); \
- \
- DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\
- PUSH_FAILURE_ITEM (highest_active_reg); \
- \
- DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \
- DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
- PUSH_FAILURE_ITEM (pattern_place); \
- \
- DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \
- DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
- size2); \
- DEBUG_PRINT1 ("'\n"); \
- PUSH_FAILURE_ITEM (string_place); \
- \
- DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
- DEBUG_PUSH (failure_id); \
- } while (0)
-
-/* Pulled out of PUSH_FAILURE_POINT() to shorten the definition
- of that macro. (for VAX C) */
-#define PUSH_FAILURE_POINT_LOOP() \
- for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
- this_reg++) \
- { \
- DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \
- DEBUG_STATEMENT (num_regs_pushed++); \
- \
- DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
- PUSH_FAILURE_ITEM (regstart[this_reg]); \
- \
- DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
- PUSH_FAILURE_ITEM (regend[this_reg]); \
- \
- DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \
- DEBUG_PRINT2 (" match_null=%d", \
- REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
- DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
- DEBUG_PRINT2 (" matched_something=%d", \
- MATCHED_SOMETHING (reg_info[this_reg])); \
- DEBUG_PRINT2 (" ever_matched=%d", \
- EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
- DEBUG_PRINT1 ("\n"); \
- PUSH_FAILURE_ITEM (reg_info[this_reg].word); \
- }
-
-/* This is the number of items that are pushed and popped on the stack
- for each register. */
-#define NUM_REG_ITEMS 3
-
-/* Individual items aside from the registers. */
-#ifdef DEBUG
-#define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
-#else
-#define NUM_NONREG_ITEMS 4
-#endif
-
-/* We push at most this many items on the stack. */
-#define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
-
-/* We actually push this many items. */
-#define NUM_FAILURE_ITEMS \
- ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \
- + NUM_NONREG_ITEMS)
-
-/* How many items can still be added to the stack without overflowing it. */
-#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
-
-
-/* Pops what PUSH_FAIL_STACK pushes.
-
- We restore into the parameters, all of which should be lvalues:
- STR -- the saved data position.
- PAT -- the saved pattern position.
- LOW_REG, HIGH_REG -- the highest and lowest active registers.
- REGSTART, REGEND -- arrays of string positions.
- REG_INFO -- array of information about each subexpression.
-
- Also assumes the variables `fail_stack' and (if debugging), `bufp',
- `pend', `string1', `size1', `string2', and `size2'. */
-
-#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
-{ \
- DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \
- s_reg_t this_reg; \
- const unsigned char *string_temp; \
- \
- assert (!FAIL_STACK_EMPTY ()); \
- \
- /* Remove failure points and point to how many regs pushed. */ \
- DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
- DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
- DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
- \
- assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
- \
- DEBUG_POP (&failure_id); \
- DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
- \
- /* If the saved string location is NULL, it came from an \
- on_failure_keep_string_jump opcode, and we want to throw away the \
- saved NULL, thus retaining our current position in the string. */ \
- string_temp = POP_FAILURE_ITEM (); \
- if (string_temp != NULL) \
- str = (const char *) string_temp; \
- \
- DEBUG_PRINT2 (" Popping string 0x%x: `", str); \
- DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
- DEBUG_PRINT1 ("'\n"); \
- \
- pat = (unsigned char *) POP_FAILURE_ITEM (); \
- DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \
- DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
- \
- POP_FAILURE_POINT2 (low_reg, high_reg, regstart, regend, reg_info);
-
-/* Pulled out of POP_FAILURE_POINT() to shorten the definition
- of that macro. (for MSC 5.1) */
-#define POP_FAILURE_POINT2(low_reg, high_reg, regstart, regend, reg_info) \
- \
- /* Restore register info. */ \
- high_reg = (active_reg_t) POP_FAILURE_ITEM (); \
- DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \
- \
- low_reg = (active_reg_t) POP_FAILURE_ITEM (); \
- DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \
- \
- for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
- { \
- DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \
- \
- reg_info[this_reg].word = POP_FAILURE_ITEM (); \
- DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \
- \
- regend[this_reg] = (const char *) POP_FAILURE_ITEM (); \
- DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
- \
- regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \
- DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
- } \
- \
- DEBUG_STATEMENT (nfailure_points_popped++); \
-} /* POP_FAILURE_POINT */
-
-
/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
characters can start a string that matches the pattern. This fastmap
@@ -2634,7 +2992,7 @@ typedef struct
The caller must supply the address of a (1 << BYTEWIDTH)-byte data
area as BUFP->fastmap.
-
+
We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
the pattern buffer.
@@ -2645,18 +3003,26 @@ re_compile_fastmap (bufp)
struct re_pattern_buffer *bufp;
{
int j, k;
+#ifdef MATCH_MAY_ALLOCATE
fail_stack_type fail_stack;
+#endif
#ifndef REGEX_MALLOC
char *destination;
#endif
/* We don't push any register information onto the failure stack. */
unsigned num_regs = 0;
-
+
register char *fastmap = bufp->fastmap;
unsigned char *pattern = bufp->buffer;
- const unsigned char *p = pattern;
+ unsigned char *p = pattern;
register unsigned char *pend = pattern + bufp->used;
+#ifdef REL_ALLOC
+ /* This holds the pointer to the failure stack, when
+ it is allocated relocatably. */
+ fail_stack_elt_t *failure_stack_ptr;
+#endif
+
/* Assume that each path through the pattern can be null until
proven otherwise. We set this false at the bottom of switch
statement, to which we get only if a particular path doesn't
@@ -2667,32 +3033,36 @@ re_compile_fastmap (bufp)
boolean succeed_n_p = false;
assert (fastmap != NULL && p != NULL);
-
+
INIT_FAIL_STACK ();
bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */
bufp->fastmap_accurate = 1; /* It will be when we're done. */
bufp->can_be_null = 0;
-
- while (p != pend || !FAIL_STACK_EMPTY ())
+
+ while (1)
{
- if (p == pend)
- {
- bufp->can_be_null |= path_can_be_null;
-
- /* Reset for next path. */
- path_can_be_null = true;
-
- p = fail_stack.stack[--fail_stack.avail];
+ if (p == pend || *p == succeed)
+ {
+ /* We have reached the (effective) end of pattern. */
+ if (!FAIL_STACK_EMPTY ())
+ {
+ bufp->can_be_null |= path_can_be_null;
+
+ /* Reset for next path. */
+ path_can_be_null = true;
+
+ p = fail_stack.stack[--fail_stack.avail].pointer;
+
+ continue;
+ }
+ else
+ break;
}
/* We should never be about to go beyond the end of the pattern. */
assert (p < pend);
-
-#ifdef SWITCH_ENUM_BUG
- switch ((int) ((re_opcode_t) *p++))
-#else
- switch ((re_opcode_t) *p++)
-#endif
+
+ switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
{
/* I guess the idea here is to simply not bother with a fastmap
@@ -2702,7 +3072,7 @@ re_compile_fastmap (bufp)
that is all we do. */
case duplicate:
bufp->can_be_null = 1;
- return 0;
+ goto done;
/* Following are the cases which match a character. These end
@@ -2746,22 +3116,25 @@ re_compile_fastmap (bufp)
case anychar:
- /* `.' matches anything ... */
- for (j = 0; j < (1 << BYTEWIDTH); j++)
- fastmap[j] = 1;
+ {
+ int fastmap_newline = fastmap['\n'];
- /* ... except perhaps newline. */
- if (!(bufp->syntax & RE_DOT_NEWLINE))
- fastmap['\n'] = 0;
+ /* `.' matches anything ... */
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ fastmap[j] = 1;
- /* Return if we have already set `can_be_null'; if we have,
- then the fastmap is irrelevant. Something's wrong here. */
- else if (bufp->can_be_null)
- return 0;
+ /* ... except perhaps newline. */
+ if (!(bufp->syntax & RE_DOT_NEWLINE))
+ fastmap['\n'] = fastmap_newline;
- /* Otherwise, have to check alternative paths. */
- break;
+ /* Return if we have already set `can_be_null'; if we have,
+ then the fastmap is irrelevant. Something's wrong here. */
+ else if (bufp->can_be_null)
+ goto done;
+ /* Otherwise, have to check alternative paths. */
+ break;
+ }
#ifdef emacs
case syntaxspec:
@@ -2788,7 +3161,7 @@ re_compile_fastmap (bufp)
case at_dot:
case after_dot:
continue;
-#endif /* not emacs */
+#endif /* emacs */
case no_op:
@@ -2811,10 +3184,10 @@ re_compile_fastmap (bufp)
case jump_past_alt:
case dummy_failure_jump:
EXTRACT_NUMBER_AND_INCR (j, p);
- p += j;
+ p += j;
if (j > 0)
continue;
-
+
/* Jump backward implies we just went through the body of a
loop and matched nothing. Opcode jumped to should be
`on_failure_jump' or `succeed_n'. Just treat it like an
@@ -2826,11 +3199,11 @@ re_compile_fastmap (bufp)
p++;
EXTRACT_NUMBER_AND_INCR (j, p);
- p += j;
-
+ p += j;
+
/* If what's on the stack is where we are now, pop it. */
- if (!FAIL_STACK_EMPTY ()
- && fail_stack.stack[fail_stack.avail - 1] == p)
+ if (!FAIL_STACK_EMPTY ()
+ && fail_stack.stack[fail_stack.avail - 1].pointer == p)
fail_stack.avail--;
continue;
@@ -2851,7 +3224,10 @@ re_compile_fastmap (bufp)
if (p + j < pend)
{
if (!PUSH_PATTERN_OP (p + j, fail_stack))
- return -2;
+ {
+ RESET_FAIL_STACK ();
+ return -2;
+ }
}
else
bufp->can_be_null = 1;
@@ -2867,7 +3243,7 @@ re_compile_fastmap (bufp)
case succeed_n:
/* Get to the number of times to succeed. */
- p += 2;
+ p += 2;
/* Increment p past the n for when k != 0. */
EXTRACT_NUMBER_AND_INCR (k, p);
@@ -2908,6 +3284,9 @@ re_compile_fastmap (bufp)
/* Set `can_be_null' for the last path (also the first path, if the
pattern is empty). */
bufp->can_be_null |= path_can_be_null;
+
+ done:
+ RESET_FAIL_STACK ();
return 0;
} /* re_compile_fastmap */
@@ -2942,7 +3321,7 @@ re_set_registers (bufp, regs, num_regs, starts, ends)
{
bufp->regs_allocated = REGS_UNALLOCATED;
regs->num_regs = 0;
- regs->start = regs->end = 0;
+ regs->start = regs->end = (regoff_t *) 0;
}
}
@@ -2958,7 +3337,7 @@ re_search (bufp, string, size, startpos, range, regs)
int size, startpos, range;
struct re_registers *regs;
{
- return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
+ return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
regs, size);
}
@@ -2966,17 +3345,17 @@ re_search (bufp, string, size, startpos, range, regs)
/* Using the compiled pattern in BUFP->buffer, first tries to match the
virtual concatenation of STRING1 and STRING2, starting first at index
STARTPOS, then at STARTPOS + 1, and so on.
-
+
STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
-
+
RANGE is how far to scan while trying to match. RANGE = 0 means try
only at STARTPOS; in general, the last start tried is STARTPOS +
RANGE.
-
+
In REGS, return the indices of the virtual concatenation of STRING1
and STRING2 that matched the entire BUFP->buffer and its contained
subexpressions.
-
+
Do not consider matching one past the index STOP in the virtual
concatenation of STRING1 and STRING2.
@@ -2996,18 +3375,19 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
{
int val;
register char *fastmap = bufp->fastmap;
- register char *translate = bufp->translate;
+ register RE_TRANSLATE_TYPE translate = bufp->translate;
int total_size = size1 + size2;
int endpos = startpos + range;
/* Check for out-of-range STARTPOS. */
if (startpos < 0 || startpos > total_size)
return -1;
-
+
/* Fix up RANGE if it might eventually take us outside
- the virtual concatenation of STRING1 and STRING2. */
- if (endpos < -1)
- range = -1 - startpos;
+ the virtual concatenation of STRING1 and STRING2.
+ Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */
+ if (endpos < 0)
+ range = 0 - startpos;
else if (endpos > total_size)
range = total_size - startpos;
@@ -3021,14 +3401,25 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
range = 1;
}
+#ifdef emacs
+ /* In a forward search for something that starts with \=.
+ don't keep searching past point. */
+ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
+ {
+ range = PT - startpos;
+ if (range <= 0)
+ return -1;
+ }
+#endif /* emacs */
+
/* Update the fastmap now if not correct already. */
if (fastmap && !bufp->fastmap_accurate)
if (re_compile_fastmap (bufp) == -2)
return -2;
-
+
/* Loop through the string, looking for a place to start matching. */
for (;;)
- {
+ {
/* If a fastmap is supplied, skip quickly over characters that
cannot be the start of a match. If the pattern can match the
null string, however, we don't need to skip characters; we want
@@ -3045,7 +3436,7 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
lim = range - (size1 - startpos);
d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
-
+
/* Written out as an if-else to avoid testing `translate'
inside the loop. */
if (translate)
@@ -3062,7 +3453,7 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
else /* Searching backwards. */
{
register char c = (size1 == 0 || startpos >= size1
- ? string2[startpos - size1]
+ ? string2[startpos - size1]
: string1[startpos]);
if (!fastmap[(unsigned char) TRANSLATE (c)])
@@ -3075,104 +3466,43 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
&& !bufp->can_be_null)
return -1;
- val = re_match_2 (bufp, string1, size1, string2, size2,
- startpos, regs, stop);
+ val = re_match_2_internal (bufp, string1, size1, string2, size2,
+ startpos, regs, stop);
+#ifndef REGEX_MALLOC
+#ifdef C_ALLOCA
+ alloca (0);
+#endif
+#endif
+
if (val >= 0)
return startpos;
-
+
if (val == -2)
return -2;
advance:
- if (!range)
+ if (!range)
break;
- else if (range > 0)
+ else if (range > 0)
{
- range--;
+ range--;
startpos++;
}
else
{
- range++;
+ range++;
startpos--;
}
}
return -1;
} /* re_search_2 */
-/* Structure for per-register (a.k.a. per-group) information.
- This must not be longer than one word, because we push this value
- onto the failure stack. Other register information, such as the
- starting and ending positions (which are addresses), and the list of
- inner groups (which is a bits list) are maintained in separate
- variables.
-
- We are making a (strictly speaking) nonportable assumption here: that
- the compiler will pack our bit fields into something that fits into
- the type of `word', i.e., is something that fits into one item on the
- failure stack. */
-
-/* Declarations and macros for re_match_2. */
-
-typedef union
-{
- fail_stack_elt_t word;
- struct
- {
- /* This field is one if this group can match the empty string,
- zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
-#define MATCH_NULL_UNSET_VALUE 3
- unsigned match_null_string_p : 2;
- unsigned is_active : 1;
- unsigned matched_something : 1;
- unsigned ever_matched_something : 1;
- } bits;
-} register_info_type;
-
-#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
-#define IS_ACTIVE(R) ((R).bits.is_active)
-#define MATCHED_SOMETHING(R) ((R).bits.matched_something)
-#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
-
-static boolean group_match_null_string_p _RE_ARGS((unsigned char **p,
- unsigned char *end,
- register_info_type *reg_info));
-static boolean alt_match_null_string_p _RE_ARGS((unsigned char *p,
- unsigned char *end,
- register_info_type *reg_info));
-static boolean common_op_match_null_string_p _RE_ARGS((unsigned char **p,
- unsigned char *end,
- register_info_type *reg_info));
-static int bcmp_translate _RE_ARGS((const char *s1, const char *s2,
- int len, char *translate));
-
-/* Call this when have matched a real character; it sets `matched' flags
- for the subexpressions which we are currently inside. Also records
- that those subexprs have matched. */
-#define SET_REGS_MATCHED() \
- do \
- { \
- active_reg_t r; \
- for (r = lowest_active_reg; r <= highest_active_reg; r++) \
- { \
- MATCHED_SOMETHING (reg_info[r]) \
- = EVER_MATCHED_SOMETHING (reg_info[r]) \
- = 1; \
- } \
- } \
- while (0)
-
-
/* This converts PTR, a pointer into one of the search strings `string1'
and `string2' into an offset from the beginning of that string. */
-#define POINTER_TO_OFFSET(ptr) \
- (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
-
-/* Registers are set to a sentinel when they haven't yet matched. */
-static char reg_unset_dummy;
-#define REG_UNSET_VALUE (&reg_unset_dummy)
-#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
-
+#define POINTER_TO_OFFSET(ptr) \
+ (FIRST_STRING_P (ptr) \
+ ? ((regoff_t) ((ptr) - string1)) \
+ : ((regoff_t) ((ptr) - string2 + size1)))
/* Macros for dealing with the split strings in re_match_2. */
@@ -3195,7 +3525,7 @@ static char reg_unset_dummy;
/* Test if at very beginning or at very end of the virtual concatenation
of `string1' and `string2'. If only one string, it's `string2'. */
#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
-#define AT_STRINGS_END(d) ((d) == end2)
+#define AT_STRINGS_END(d) ((d) == end2)
/* Test if D points to a character which is word-constituent. We have
@@ -3215,11 +3545,11 @@ static char reg_unset_dummy;
/* Free everything we malloc. */
-#ifdef REGEX_MALLOC
-#define FREE_VAR(var) if (var) free (var); var = NULL
+#ifdef MATCH_MAY_ALLOCATE
+#define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
#define FREE_VARIABLES() \
do { \
- FREE_VAR (fail_stack.stack); \
+ REGEX_FREE_STACK (fail_stack.stack); \
FREE_VAR (regstart); \
FREE_VAR (regend); \
FREE_VAR (old_regstart); \
@@ -3230,11 +3560,9 @@ static char reg_unset_dummy;
FREE_VAR (reg_dummy); \
FREE_VAR (reg_info_dummy); \
} while (0)
-#else /* not REGEX_MALLOC */
-/* Some MIPS systems (at least) want this to free alloca'd storage. */
-#define FREE_VARIABLES() alloca (0)
-#endif /* not REGEX_MALLOC */
-
+#else
+#define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
+#endif /* not MATCH_MAY_ALLOCATE */
/* These values must meet several constraints. They must not be valid
register values; since we have a limit of 255 registers (because
@@ -3257,17 +3585,35 @@ re_match (bufp, string, size, pos, regs)
const char *string;
int size, pos;
struct re_registers *regs;
- {
- return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size);
+{
+ int result = re_match_2_internal (bufp, NULL, 0, string, size,
+ pos, regs, size);
+#ifndef REGEX_MALLOC
+#ifdef C_ALLOCA
+ alloca (0);
+#endif
+#endif
+ return result;
}
#endif /* not emacs */
+static boolean group_match_null_string_p _RE_ARGS((unsigned char **p,
+ unsigned char *end,
+ register_info_type *reg_info));
+static boolean alt_match_null_string_p _RE_ARGS((unsigned char *p,
+ unsigned char *end,
+ register_info_type *reg_info));
+static boolean common_op_match_null_string_p _RE_ARGS((unsigned char **p,
+ unsigned char *end,
+ register_info_type *reg_info));
+static int bcmp_translate _RE_ARGS((const char *s1, const char *s2,
+ int len, char *translate));
/* re_match_2 matches the compiled pattern in BUFP against the
the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
and SIZE2, respectively). We start matching at POS, and stop
matching at STOP.
-
+
If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
store offsets for the substring each group matched in REGS. See the
documentation for exactly how many groups we fill.
@@ -3285,6 +3631,27 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
struct re_registers *regs;
int stop;
{
+ int result = re_match_2_internal (bufp, string1, size1, string2, size2,
+ pos, regs, stop);
+#ifndef REGEX_MALLOC
+#ifdef C_ALLOCA
+ alloca (0);
+#endif
+#endif
+ return result;
+}
+
+/* This is a separate function so that we can force an alloca cleanup
+ afterwards. */
+static int
+re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int size1, size2;
+ int pos;
+ struct re_registers *regs;
+ int stop;
+{
/* General temporaries. */
int mcnt;
unsigned char *p1;
@@ -3298,13 +3665,17 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
/* Where we are in the data, and the end of the current string. */
const char *d, *dend;
-
+
/* Where we are in the pattern, and the end of the pattern. */
unsigned char *p = bufp->buffer;
register unsigned char *pend = p + bufp->used;
+ /* Mark the opcode just after a start_memory, so we can test for an
+ empty subpattern when we get to the stop_memory. */
+ unsigned char *just_past_start_mem = 0;
+
/* We use this to map every character in the string. */
- char *translate = bufp->translate;
+ RE_TRANSLATE_TYPE translate = bufp->translate;
/* Failure point stack. Each place that can handle a failure further
down the line pushes a failure point on this stack. It consists of
@@ -3315,17 +3686,25 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
scanning the strings. If the latter is zero, the failure point is
a ``dummy''; if a failure happens and the failure point is a dummy,
it gets discarded and the next next one is tried. */
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
fail_stack_type fail_stack;
+#endif
#ifdef DEBUG
static unsigned failure_id = 0;
unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
#endif
+#ifdef REL_ALLOC
+ /* This holds the pointer to the failure stack, when
+ it is allocated relocatably. */
+ fail_stack_elt_t *failure_stack_ptr;
+#endif
+
/* We fill all the registers internally, independent of what we
return, for use in backreferences. The number here includes
an element for register zero. */
size_t num_regs = bufp->re_nsub + 1;
-
+
/* The currently active registers. */
active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
@@ -3337,14 +3716,18 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
matching and the regnum-th regend points to right after where we
stopped matching the regnum-th subexpression. (The zeroth register
keeps track of what the whole pattern matches.) */
- const char **regstart = 0, **regend = 0;
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
+ const char **regstart, **regend;
+#endif
/* If a group that's operated upon by a repetition operator fails to
match anything, then the register for its start will need to be
restored because it will have been set to wherever in the string we
are when we last see its open-group operator. Similarly for a
register's end. */
- const char **old_regstart = 0, **old_regend = 0;
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
+ const char **old_regstart, **old_regend;
+#endif
/* The is_active field of reg_info helps us keep track of which (possibly
nested) subexpressions we are currently in. The matched_something
@@ -3352,15 +3735,19 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
matched any of the pattern so far this time through the reg_num-th
subexpression. These two fields get reset each time through any
loop their register is in. */
- register_info_type *reg_info = 0;
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
+ register_info_type *reg_info;
+#endif
/* The following record the register info as found in the above
- variables when we find a match better than any we've seen before.
+ variables when we find a match better than any we've seen before.
This happens as we backtrack through the failure points, which in
turn happens only if we have not yet matched the entire string. */
unsigned best_regs_set = false;
- const char **best_regstart = 0, **best_regend = 0;
-
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
+ const char **best_regstart, **best_regend;
+#endif
+
/* Logically, this is `best_regend[0]'. But we don't want to have to
allocate space for that if we're not allocating space for anything
else (see below). Also, we never need info about register 0 for
@@ -3371,19 +3758,25 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
and need to test it, it's not garbage. */
const char *match_end = NULL;
+ /* This helps SET_REGS_MATCHED avoid doing redundant work. */
+ int set_regs_matched_done = 0;
+
/* Used when we pop values we don't care about. */
- const char **reg_dummy = 0;
- register_info_type *reg_info_dummy = 0;
+#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
+ const char **reg_dummy;
+ register_info_type *reg_info_dummy;
+#endif
#ifdef DEBUG
/* Counts the total number of registers pushed. */
- unsigned num_regs_pushed = 0;
+ unsigned num_regs_pushed = 0;
#endif
DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
-
+
INIT_FAIL_STACK ();
-
+
+#ifdef MATCH_MAY_ALLOCATE
/* Do not bother to initialize all the register variables if there are
no groups in the pattern, as it takes a fair amount of time. If
there are groups, we include space for register 0 (the whole
@@ -3401,14 +3794,13 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
reg_dummy = REGEX_TALLOC (num_regs, const char *);
reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
- if (!(regstart && regend && old_regstart && old_regend && reg_info
- && best_regstart && best_regend && reg_dummy && reg_info_dummy))
+ if (!(regstart && regend && old_regstart && old_regend && reg_info
+ && best_regstart && best_regend && reg_dummy && reg_info_dummy))
{
FREE_VARIABLES ();
return -2;
}
}
-#ifdef REGEX_MALLOC
else
{
/* We must initialize all our variables to NULL, so that
@@ -3417,7 +3809,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
= best_regend = reg_dummy = NULL;
reg_info = reg_info_dummy = (register_info_type *) NULL;
}
-#endif /* REGEX_MALLOC */
+#endif /* MATCH_MAY_ALLOCATE */
/* The starting position is bogus. */
if (pos < 0 || pos > size1 + size2)
@@ -3425,21 +3817,21 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
FREE_VARIABLES ();
return -1;
}
-
+
/* Initialize subexpression text positions to -1 to mark ones that no
start_memory/stop_memory has been seen for. Also initialize the
register information struct. */
for (mcnt = 1; mcnt < num_regs; mcnt++)
{
- regstart[mcnt] = regend[mcnt]
+ regstart[mcnt] = regend[mcnt]
= old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
-
+
REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
IS_ACTIVE (reg_info[mcnt]) = 0;
MATCHED_SOMETHING (reg_info[mcnt]) = 0;
EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
}
-
+
/* We move `string1' into `string2' if the latter's empty -- but not if
`string1' is null. */
if (size2 == 0 && string1 != NULL)
@@ -3464,7 +3856,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
end_match_2 = string2 + stop - size1;
}
- /* `p' scans through the pattern as `d' scans through the data.
+ /* `p' scans through the pattern as `d' scans through the data.
`dend' is the end of the input string that `d' points within. `d'
is advanced into the following input string whenever necessary, but
this happens before fetching; therefore, at the beginning of the
@@ -3486,7 +3878,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
DEBUG_PRINT1 ("The string to match is: `");
DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
DEBUG_PRINT1 ("'\n");
-
+
/* This loops over pattern commands. It exits by returning from the
function if the match is complete, or it drops through if the match
fails at this starting point in the input data. */
@@ -3497,39 +3889,51 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
if (p == pend)
{ /* End of pattern means we might have succeeded. */
DEBUG_PRINT1 ("end of pattern ... ");
-
+
/* If we haven't matched the entire string, and we want the
longest match, try backtracking. */
if (d != end_match_2)
{
+ /* 1 if this match ends in the same string (string1 or string2)
+ as the best previous match. */
+ boolean same_str_p = (FIRST_STRING_P (match_end)
+ == MATCHING_IN_FIRST_STRING);
+ /* 1 if this match is the best seen so far. */
+ boolean best_match_p;
+
+ /* AIX compiler got confused when this was combined
+ with the previous declaration. */
+ if (same_str_p)
+ best_match_p = d > match_end;
+ else
+ best_match_p = !MATCHING_IN_FIRST_STRING;
+
DEBUG_PRINT1 ("backtracking.\n");
-
+
if (!FAIL_STACK_EMPTY ())
{ /* More failure points to try. */
- boolean same_str_p = (FIRST_STRING_P (match_end)
- == MATCHING_IN_FIRST_STRING);
/* If exceeds best match so far, save it. */
- if (!best_regs_set
- || (same_str_p && d > match_end)
- || (!same_str_p && !MATCHING_IN_FIRST_STRING))
+ if (!best_regs_set || best_match_p)
{
best_regs_set = true;
match_end = d;
-
+
DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
-
+
for (mcnt = 1; mcnt < num_regs; mcnt++)
{
best_regstart[mcnt] = regstart[mcnt];
best_regend[mcnt] = regend[mcnt];
}
}
- goto fail;
+ goto fail;
}
- /* If no failure points, don't restore garbage. */
- else if (best_regs_set)
+ /* If no failure points, don't restore garbage. And if
+ last match is real best match, don't restore second
+ best one. */
+ else if (best_regs_set && !best_match_p)
{
restore_best_regs:
/* Restore best match. It may happen that `dend ==
@@ -3538,7 +3942,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
strings `x-' and `y-z-', if the two strings are
not consecutive in memory. */
DEBUG_PRINT1 ("Restoring best registers.\n");
-
+
d = match_end;
dend = ((d >= string1 && d <= end1)
? end_match_1 : end_match_2);
@@ -3551,6 +3955,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
}
} /* d != end_match_2 */
+ succeed_label:
DEBUG_PRINT1 ("Accepting match.\n");
/* If caller wants register contents data back, do it. */
@@ -3565,7 +3970,10 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
regs->start = TALLOC (regs->num_regs, regoff_t);
regs->end = TALLOC (regs->num_regs, regoff_t);
if (regs->start == NULL || regs->end == NULL)
- return -2;
+ {
+ FREE_VARIABLES ();
+ return -2;
+ }
bufp->regs_allocated = REGS_REALLOCATE;
}
else if (bufp->regs_allocated == REGS_REALLOCATE)
@@ -3578,7 +3986,10 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
RETALLOC (regs->start, regs->num_regs, regoff_t);
RETALLOC (regs->end, regs->num_regs, regoff_t);
if (regs->start == NULL || regs->end == NULL)
- return -2;
+ {
+ FREE_VARIABLES ();
+ return -2;
+ }
}
}
else
@@ -3594,10 +4005,11 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
if (regs->num_regs > 0)
{
regs->start[0] = pos;
- regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1
- : d - string2 + size1);
+ regs->end[0] = (MATCHING_IN_FIRST_STRING
+ ? ((regoff_t) (d - string1))
+ : ((regoff_t) (d - string2 + size1)));
}
-
+
/* Go through the first `min (num_regs, regs->num_regs)'
registers, since that is all we initialized. */
for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
@@ -3606,11 +4018,13 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
regs->start[mcnt] = regs->end[mcnt] = -1;
else
{
- regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]);
- regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]);
+ regs->start[mcnt]
+ = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
+ regs->end[mcnt]
+ = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
}
}
-
+
/* If the regs structure we return has more elements than
were in the pattern, set the extra elements to -1. If
we (re)allocated the registers, this is the case,
@@ -3620,27 +4034,23 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
regs->start[mcnt] = regs->end[mcnt] = -1;
} /* regs && !bufp->no_sub */
- FREE_VARIABLES ();
DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
nfailure_points_pushed, nfailure_points_popped,
nfailure_points_pushed - nfailure_points_popped);
DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
- mcnt = d - pos - (MATCHING_IN_FIRST_STRING
- ? string1
+ mcnt = d - pos - (MATCHING_IN_FIRST_STRING
+ ? string1
: string2 - size1);
DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
+ FREE_VARIABLES ();
return mcnt;
}
/* Otherwise match next pattern command. */
-#ifdef SWITCH_ENUM_BUG
- switch ((int) ((re_opcode_t) *p++))
-#else
- switch ((re_opcode_t) *p++)
-#endif
+ switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
{
/* Ignore these. Used to ignore the n of succeed_n's which
currently have n == 0. */
@@ -3648,6 +4058,9 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
DEBUG_PRINT1 ("EXECUTING no_op.\n");
break;
+ case succeed:
+ DEBUG_PRINT1 ("EXECUTING succeed.\n");
+ goto succeed_label;
/* Match the next n pattern characters exactly. The following
byte in the pattern defines n, and the n bytes after that
@@ -3663,7 +4076,8 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
do
{
PREFETCH ();
- if (translate[(unsigned char) *d++] != (char) *p++)
+ if ((unsigned char) translate[(unsigned char) *d++]
+ != (unsigned char) *p++)
goto fail;
}
while (--mcnt);
@@ -3717,7 +4131,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
p += 1 + *p;
if (!not) goto fail;
-
+
SET_REGS_MATCHED ();
d++;
break;
@@ -3734,9 +4148,9 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
/* Find out if this group can match the empty string. */
p1 = p; /* To send to group_match_null_string_p. */
-
+
if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
- REG_MATCH_NULL_STRING_P (reg_info[*p])
+ REG_MATCH_NULL_STRING_P (reg_info[*p])
= group_match_null_string_p (&p1, pend, reg_info);
/* Save the position in the string where we were the last time
@@ -3747,7 +4161,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
? REG_UNSET (regstart[*p]) ? d : regstart[*p]
: regstart[*p];
- DEBUG_PRINT2 (" old_regstart: %d\n",
+ DEBUG_PRINT2 (" old_regstart: %d\n",
POINTER_TO_OFFSET (old_regstart[*p]));
regstart[*p] = d;
@@ -3755,10 +4169,13 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
IS_ACTIVE (reg_info[*p]) = 1;
MATCHED_SOMETHING (reg_info[*p]) = 0;
-
+
+ /* Clear this whenever we change the register activity status. */
+ set_regs_matched_done = 0;
+
/* This is the new highest active register. */
highest_active_reg = *p;
-
+
/* If nothing was active before, this is the new lowest active
register. */
if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
@@ -3766,6 +4183,8 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
/* Move past the register number and inner group count. */
p += 2;
+ just_past_start_mem = p;
+
break;
@@ -3774,7 +4193,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
number, and the number of inner groups. */
case stop_memory:
DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
-
+
/* We need to save the string position the last time we were at
this close-group operator in case the group is operated
upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
@@ -3783,7 +4202,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
? REG_UNSET (regend[*p]) ? d : regend[*p]
: regend[*p];
- DEBUG_PRINT2 (" old_regend: %d\n",
+ DEBUG_PRINT2 (" old_regend: %d\n",
POINTER_TO_OFFSET (old_regend[*p]));
regend[*p] = d;
@@ -3791,7 +4210,10 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
/* This register isn't active anymore. */
IS_ACTIVE (reg_info[*p]) = 0;
-
+
+ /* Clear this whenever we change the register activity status. */
+ set_regs_matched_done = 0;
+
/* If this was the only register active, nothing is active
anymore. */
if (lowest_active_reg == highest_active_reg)
@@ -3807,7 +4229,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
unsigned char r = *p - 1;
while (r > 0 && !IS_ACTIVE (reg_info[r]))
r--;
-
+
/* If we end up at register zero, that means that we saved
the registers as the result of an `on_failure_jump', not
a `start_memory', and we jumped to past the innermost
@@ -3823,18 +4245,18 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
else
highest_active_reg = r;
}
-
+
/* If just failed to match something this time around with a
group that's operated on by a repetition operator, try to
force exit from the ``loop'', and restore the register
information for this group that we had before trying this
last match. */
if ((!MATCHED_SOMETHING (reg_info[*p])
- || (re_opcode_t) p[-3] == start_memory)
- && (p + 2) < pend)
+ || just_past_start_mem == p - 1)
+ && (p + 2) < pend)
{
boolean is_a_jump_n = false;
-
+
p1 = p + 2;
mcnt = 0;
switch ((re_opcode_t) *p1++)
@@ -3849,12 +4271,12 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
if (is_a_jump_n)
p1 += 2;
break;
-
+
default:
/* do nothing */ ;
}
p1 += mcnt;
-
+
/* If the next operation is a jump backwards in the pattern
to an on_failure_jump right before the start_memory
corresponding to this stop_memory, exit from the loop
@@ -3868,36 +4290,35 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
failed match, e.g., with `(a*)*b' against `ab' for
regstart[1], and, e.g., with `((a*)*(b*)*)*'
against `aba' for regend[3].
-
+
Also restore the registers for inner groups for,
e.g., `((a*)(b*))*' against `aba' (register 3 would
otherwise get trashed). */
-
+
if (EVER_MATCHED_SOMETHING (reg_info[*p]))
{
- unsigned r;
-
+ unsigned r;
+
EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
-
+
/* Restore this and inner groups' (if any) registers. */
for (r = *p; r < *p + *(p + 1); r++)
{
regstart[r] = old_regstart[r];
/* xx why this test? */
- if ((s_reg_t) old_regend[r] >= (s_reg_t) regstart[r])
+ if (old_regend[r] >= regstart[r])
regend[r] = old_regend[r];
- }
+ }
}
p1++;
EXTRACT_NUMBER_AND_INCR (mcnt, p1);
PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
- PUSH_FAILURE_POINT2(p1 + mcnt, d, -2);
goto fail;
}
}
-
+
/* Move past the register number and the inner group count. */
p += 2;
break;
@@ -3914,16 +4335,16 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
/* Can't back reference a group which we've never matched. */
if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
goto fail;
-
+
/* Where in input to try to start matching. */
d2 = regstart[regno];
-
+
/* Where to stop matching; if both the place to start and
the place to stop matching are in the same string, then
set to the place to stop, otherwise, for now have to use
the end of the first string. */
- dend2 = ((FIRST_STRING_P (regstart[regno])
+ dend2 = ((FIRST_STRING_P (regstart[regno])
== FIRST_STRING_P (regend[regno]))
? regend[regno] : end_match_1);
for (;;)
@@ -3947,19 +4368,22 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
/* How many characters left in this segment to match. */
mcnt = dend - d;
-
+
/* Want how many consecutive characters we can match in
one shot, so, if necessary, adjust the count. */
if (mcnt > dend2 - d2)
mcnt = dend2 - d2;
-
+
/* Compare that many; failure if mismatch, else move
past them. */
- if (translate
- ? bcmp_translate (d, d2, mcnt, translate)
+ if (translate
+ ? bcmp_translate (d, d2, mcnt, translate)
: bcmp (d, d2, mcnt))
goto fail;
d += mcnt, d2 += mcnt;
+
+ /* Do this because we've match some characters. */
+ SET_REGS_MATCHED ();
}
}
break;
@@ -3970,7 +4394,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
`newline_anchor' is set, after newlines. */
case begline:
DEBUG_PRINT1 ("EXECUTING begline.\n");
-
+
if (AT_STRINGS_BEG (d))
{
if (!bufp->not_bol) break;
@@ -3991,7 +4415,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
{
if (!bufp->not_eol) break;
}
-
+
/* We have to ``prefetch'' the next character. */
else if ((d == end1 ? *string2 : *d) == '\n'
&& bufp->newline_anchor)
@@ -4025,7 +4449,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
then the . fails against the \n. But the next thing we want
to do is match the \n against the \n; if we restored the
string value, we would be back at the foo.
-
+
Because this is used only in specific cases, we don't need to
check all the things that `on_failure_jump' does, to make
sure the right things get saved on the stack. Hence we don't
@@ -4035,17 +4459,16 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
case; that seems worse than this. */
case on_failure_keep_string_jump:
DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
-
+
EXTRACT_NUMBER_AND_INCR (mcnt, p);
DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
- PUSH_FAILURE_POINT2(p + mcnt, NULL, -2);
break;
/* Uses of on_failure_jump:
-
+
Each alternative starts with an on_failure_jump that points
to the beginning of the next alternative. Each alternative
except the last ends with a jump that in effect jumps past
@@ -4068,7 +4491,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
for that group and all inner ones, so that if we fail back
to this point, the group's information will be correct.
For example, in \(a*\)*\1, we need the preceding group,
- and in \(\(a*\)b*\)\2, we need the inner group. */
+ and in \(zz\(a*\)b*\)\2, we need the inner group. */
/* We can't use `p' to check ahead because we push
a failure point to `p + mcnt' after we do this. */
@@ -4094,7 +4517,6 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
DEBUG_PRINT1 (":\n");
PUSH_FAILURE_POINT (p + mcnt, d, -2);
- PUSH_FAILURE_POINT2(p + mcnt, d, -2);
break;
@@ -4112,18 +4534,34 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
would have to backtrack because of (as in, e.g., `a*a')
then we can change to pop_failure_jump, because we'll
never have to backtrack.
-
+
This is not true in the case of alternatives: in
`(a|ab)*' we do need to backtrack to the `ab' alternative
(e.g., if the string was `ab'). But instead of trying to
detect that here, the alternative has put on a dummy
failure point which is what we will end up popping. */
- /* Skip over open/close-group commands. */
- while (p2 + 2 < pend
- && ((re_opcode_t) *p2 == stop_memory
- || (re_opcode_t) *p2 == start_memory))
- p2 += 3; /* Skip over args, too. */
+ /* Skip over open/close-group commands.
+ If what follows this loop is a ...+ construct,
+ look at what begins its body, since we will have to
+ match at least one of that. */
+ while (1)
+ {
+ if (p2 + 2 < pend
+ && ((re_opcode_t) *p2 == stop_memory
+ || (re_opcode_t) *p2 == start_memory))
+ p2 += 3;
+ else if (p2 + 6 < pend
+ && (re_opcode_t) *p2 == dummy_failure_jump)
+ p2 += 6;
+ else
+ break;
+ }
+
+ p1 = p + mcnt;
+ /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
+ to the `maybe_finalize_jump' of this case. Examine what
+ follows. */
/* If we're at the end of the pattern, we can change. */
if (p2 == pend)
@@ -4141,23 +4579,19 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
{
register unsigned char c
= *p2 == (unsigned char) endline ? '\n' : p2[2];
- p1 = p + mcnt;
- /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
- to the `maybe_finalize_jump' of this case. Examine what
- follows. */
if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
{
p[-3] = (unsigned char) pop_failure_jump;
DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
c, p1[5]);
}
-
+
else if ((re_opcode_t) p1[3] == charset
|| (re_opcode_t) p1[3] == charset_not)
{
int not = (re_opcode_t) p1[3] == charset_not;
-
+
if (c < (unsigned char) (p1[4] * BYTEWIDTH)
&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
not = !not;
@@ -4171,6 +4605,58 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
}
}
}
+ else if ((re_opcode_t) *p2 == charset)
+ {
+#ifdef DEBUG
+ register unsigned char c
+ = *p2 == (unsigned char) endline ? '\n' : p2[2];
+#endif
+
+ if ((re_opcode_t) p1[3] == exactn
+ && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
+ && (p2[1 + p1[4] / BYTEWIDTH]
+ & (1 << (p1[4] % BYTEWIDTH)))))
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
+ c, p1[5]);
+ }
+
+ else if ((re_opcode_t) p1[3] == charset_not)
+ {
+ int idx;
+ /* We win if the charset_not inside the loop
+ lists every character listed in the charset after. */
+ for (idx = 0; idx < (int) p2[1]; idx++)
+ if (! (p2[2 + idx] == 0
+ || (idx < (int) p1[4]
+ && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
+ break;
+
+ if (idx == p2[1])
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
+ }
+ }
+ else if ((re_opcode_t) p1[3] == charset)
+ {
+ int idx;
+ /* We win if the charset inside the loop
+ has no overlap with the one after the loop. */
+ for (idx = 0;
+ idx < (int) p2[1] && idx < (int) p1[4];
+ idx++)
+ if ((p2[2 + idx] & p1[5 + idx]) != 0)
+ break;
+
+ if (idx == p2[1] || idx == p1[4])
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
+ }
+ }
+ }
}
p -= 2; /* Point at relative address again. */
if ((re_opcode_t) p[-1] != pop_failure_jump)
@@ -4206,7 +4692,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
}
/* Note fall through. */
-
+
/* Unconditionally jump (without popping any failure points). */
case jump:
unconditional_jump:
@@ -4216,7 +4702,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
DEBUG_PRINT2 ("(to 0x%x).\n", p);
break;
-
+
/* We need this opcode so we can detect where alternatives end
in `group_match_null_string_p' et al. */
case jump_past_alt:
@@ -4234,7 +4720,6 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
/* It doesn't matter what we push for the string here. What
the code at `fail' tests is the value for the pattern. */
PUSH_FAILURE_POINT (0, 0, -2);
- PUSH_FAILURE_POINT2(0, 0, -2);
goto unconditional_jump;
@@ -4248,12 +4733,11 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
/* See comments just above at `dummy_failure_jump' about the
two zeroes. */
PUSH_FAILURE_POINT (0, 0, -2);
- PUSH_FAILURE_POINT2(0, 0, -2);
break;
/* Have to succeed matching what follows at least n times.
After that, handle like `on_failure_jump'. */
- case succeed_n:
+ case succeed_n:
EXTRACT_NUMBER (mcnt, p + 2);
DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
@@ -4274,8 +4758,8 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
goto on_failure;
}
break;
-
- case jump_n:
+
+ case jump_n:
EXTRACT_NUMBER (mcnt, p + 2);
DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
@@ -4284,13 +4768,13 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
{
mcnt--;
STORE_NUMBER (p + 2, mcnt);
- goto unconditional_jump;
+ goto unconditional_jump;
}
/* If don't have to jump any more, skip over the rest of command. */
- else
- p += 4;
+ else
+ p += 4;
break;
-
+
case set_number_at:
{
DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
@@ -4329,31 +4813,23 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
goto fail;
#ifdef emacs
-#ifdef emacs19
case before_dot:
DEBUG_PRINT1 ("EXECUTING before_dot.\n");
if (PTR_CHAR_POS ((unsigned char *) d) >= point)
goto fail;
break;
-
+
case at_dot:
DEBUG_PRINT1 ("EXECUTING at_dot.\n");
if (PTR_CHAR_POS ((unsigned char *) d) != point)
goto fail;
break;
-
+
case after_dot:
DEBUG_PRINT1 ("EXECUTING after_dot.\n");
if (PTR_CHAR_POS ((unsigned char *) d) <= point)
goto fail;
break;
-#else /* not emacs19 */
- case at_dot:
- DEBUG_PRINT1 ("EXECUTING at_dot.\n");
- if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point)
- goto fail;
- break;
-#endif /* not emacs19 */
case syntaxspec:
DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
@@ -4365,8 +4841,10 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
mcnt = (int) Sword;
matchsyntax:
PREFETCH ();
- if (SYNTAX (*d++) != (enum syntaxcode) mcnt)
- goto fail;
+ /* Can't use *d++ here; SYNTAX may be an unsafe macro. */
+ d++;
+ if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
+ goto fail;
SET_REGS_MATCHED ();
break;
@@ -4380,8 +4858,10 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
mcnt = (int) Sword;
matchnotsyntax:
PREFETCH ();
- if (SYNTAX (*d++) == (enum syntaxcode) mcnt)
- goto fail;
+ /* Can't use *d++ here; SYNTAX may be an unsafe macro. */
+ d++;
+ if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
+ goto fail;
SET_REGS_MATCHED ();
break;
@@ -4394,7 +4874,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
SET_REGS_MATCHED ();
d++;
break;
-
+
case notwordchar:
DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
PREFETCH ();
@@ -4404,7 +4884,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
d++;
break;
#endif /* not emacs */
-
+
default:
abort ();
}
@@ -4429,7 +4909,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
if (p < pend)
{
boolean is_a_jump_n = false;
-
+
/* If failed to a backwards jump that's part of a repetition
loop, need to pop this failure point and use the next one. */
switch ((re_opcode_t) *p)
@@ -4441,7 +4921,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
case jump:
p1 = p + 1;
EXTRACT_NUMBER_AND_INCR (mcnt, p1);
- p1 += mcnt;
+ p1 += mcnt;
if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
|| (!is_a_jump_n
@@ -4472,10 +4952,10 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
/* We are passed P pointing to a register number after a start_memory.
-
+
Return true if the pattern up to the corresponding stop_memory can
match the empty string, and false otherwise.
-
+
If we find the matching stop_memory, sets P to point to one past its number.
Otherwise, sets P to an undefined byte less than or equal to END.
@@ -4489,20 +4969,20 @@ group_match_null_string_p (p, end, reg_info)
int mcnt;
/* Point to after the args to the start_memory. */
unsigned char *p1 = *p + 2;
-
+
while (p1 < end)
{
/* Skip over opcodes that can match nothing, and return true or
false, as appropriate, when we get to one that can't, or to the
matching stop_memory. */
-
+
switch ((re_opcode_t) *p1)
{
/* Could be either a loop or a series of alternatives. */
case on_failure_jump:
p1++;
EXTRACT_NUMBER_AND_INCR (mcnt, p1);
-
+
/* If the next operation is not a jump backwards in the
pattern. */
@@ -4516,7 +4996,7 @@ group_match_null_string_p (p, end, reg_info)
/on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
/on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
- /exactn/1/c
+ /exactn/1/c
So, we have to first go through the first (n-1)
alternatives and then deal with the last one separately. */
@@ -4532,19 +5012,19 @@ group_match_null_string_p (p, end, reg_info)
is, including the ending `jump_past_alt' and
its number. */
- if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
+ if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
reg_info))
return false;
/* Move to right after this alternative, including the
jump_past_alt. */
- p1 += mcnt;
+ p1 += mcnt;
/* Break if it's the beginning of an n-th alternative
that doesn't begin with an on_failure_jump. */
if ((re_opcode_t) *p1 != on_failure_jump)
break;
-
+
/* Still have to check that it's not an n-th
alternative that starts with an on_failure_jump. */
p1++;
@@ -4569,14 +5049,14 @@ group_match_null_string_p (p, end, reg_info)
} /* if mcnt > 0 */
break;
-
+
case stop_memory:
assert (p1[1] == **p);
*p = p1 + 2;
return true;
-
- default:
+
+ default:
if (!common_op_match_null_string_p (&p1, end, reg_info))
return false;
}
@@ -4589,7 +5069,7 @@ group_match_null_string_p (p, end, reg_info)
/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
It expects P to be the first byte of a single alternative and END one
byte past the last. The alternative can contain groups. */
-
+
static boolean
alt_match_null_string_p (p, end, reg_info)
unsigned char *p, *end;
@@ -4597,12 +5077,12 @@ alt_match_null_string_p (p, end, reg_info)
{
int mcnt;
unsigned char *p1 = p;
-
+
while (p1 < end)
{
- /* Skip over opcodes that can match nothing, and break when we get
+ /* Skip over opcodes that can match nothing, and break when we get
to one that can't. */
-
+
switch ((re_opcode_t) *p1)
{
/* It's a loop. */
@@ -4611,8 +5091,8 @@ alt_match_null_string_p (p, end, reg_info)
EXTRACT_NUMBER_AND_INCR (mcnt, p1);
p1 += mcnt;
break;
-
- default:
+
+ default:
if (!common_op_match_null_string_p (&p1, end, reg_info))
return false;
}
@@ -4623,8 +5103,8 @@ alt_match_null_string_p (p, end, reg_info)
/* Deals with the ops common to group_match_null_string_p and
- alt_match_null_string_p.
-
+ alt_match_null_string_p.
+
Sets P to one after the op and its arguments, if any. */
static boolean
@@ -4659,7 +5139,7 @@ common_op_match_null_string_p (p, end, reg_info)
reg_no = *p1;
assert (reg_no > 0 && reg_no <= MAX_REGNUM);
ret = group_match_null_string_p (&p1, end, reg_info);
-
+
/* Have to set this here in case we're checking a group which
contains a group and a back reference to it. */
@@ -4669,7 +5149,7 @@ common_op_match_null_string_p (p, end, reg_info)
if (!ret)
return false;
break;
-
+
/* If this is an optimized succeed_n for zero times, make the jump. */
case jump:
EXTRACT_NUMBER_AND_INCR (mcnt, p1);
@@ -4681,7 +5161,7 @@ common_op_match_null_string_p (p, end, reg_info)
case succeed_n:
/* Get to the number of times to succeed. */
- p1 += 2;
+ p1 += 2;
EXTRACT_NUMBER_AND_INCR (mcnt, p1);
if (mcnt == 0)
@@ -4694,7 +5174,7 @@ common_op_match_null_string_p (p, end, reg_info)
return false;
break;
- case duplicate:
+ case duplicate:
if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
return false;
break;
@@ -4714,12 +5194,12 @@ common_op_match_null_string_p (p, end, reg_info)
/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
bytes; nonzero otherwise. */
-
+
static int
bcmp_translate (s1, s2, len, translate)
const char *s1, *s2;
register int len;
- char *translate;
+ RE_TRANSLATE_TYPE translate;
{
register const unsigned char *p1 = (const unsigned char *) s1,
*p2 = (const unsigned char *) s2;
@@ -4736,10 +5216,10 @@ bcmp_translate (s1, s2, len, translate)
/* re_compile_pattern is the GNU regular expression compiler: it
compiles PATTERN (of length SIZE) and puts the result in BUFP.
Returns 0 if the pattern was valid, otherwise an error string.
-
+
Assumes the `allocated' (and perhaps `buffer') and `translate' fields
are set in BUFP on entry.
-
+
We call regex_compile to do the actual compilation. */
const char *
@@ -4749,28 +5229,30 @@ re_compile_pattern (pattern, length, bufp)
struct re_pattern_buffer *bufp;
{
reg_errcode_t ret;
-
+
/* GNU code is written to assume at least RE_NREGS registers will be set
(and at least one extra will be -1). */
bufp->regs_allocated = REGS_UNALLOCATED;
-
+
/* And GNU code determines whether or not to get register information
by passing null for the REGS argument to re_match, etc., not by
setting no_sub. */
bufp->no_sub = 0;
-
+
/* Match anchors at newline. */
bufp->newline_anchor = 1;
-
+
ret = regex_compile (pattern, length, re_syntax_options, bufp);
- return re_error_msg[(int) ret];
-}
+ if (!ret)
+ return NULL;
+ return gettext (re_error_msgid[(int) ret]);
+}
/* Entry points compatible with 4.2 BSD regex library. We don't define
- them if this is an Emacs or POSIX compilation. */
+ them unless specifically requested. */
-#if !defined (emacs) && !defined (_POSIX_SOURCE)
+#ifdef _REGEX_RE_COMP
/* BSD has one and only one pattern buffer. */
static struct re_pattern_buffer re_comp_buf;
@@ -4780,11 +5262,11 @@ re_comp (s)
const char *s;
{
reg_errcode_t ret;
-
+
if (!s)
{
if (!re_comp_buf.buffer)
- return "No previous regular expression";
+ return gettext ("No previous regular expression");
return 0;
}
@@ -4792,12 +5274,12 @@ re_comp (s)
{
re_comp_buf.buffer = (unsigned char *) malloc (200);
if (re_comp_buf.buffer == NULL)
- return "Memory exhausted";
+ return gettext (re_error_msgid[(int) REG_ESPACE]);
re_comp_buf.allocated = 200;
re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
if (re_comp_buf.fastmap == NULL)
- return "Memory exhausted";
+ return gettext (re_error_msgid[(int) REG_ESPACE]);
}
/* Since `re_exec' always passes NULL for the `regs' argument, we
@@ -4807,9 +5289,12 @@ re_comp (s)
re_comp_buf.newline_anchor = 1;
ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
-
- /* Yes, we're discarding `const' here. */
- return (char *) re_error_msg[(int) ret];
+
+ if (!ret)
+ return NULL;
+
+ /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
+ return (char *) gettext (re_error_msgid[(int) ret]);
}
@@ -4821,7 +5306,7 @@ re_exec (s)
return
0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
}
-#endif /* not emacs and not _POSIX_SOURCE */
+#endif /* _REGEX_RE_COMP */
/* POSIX.2 functions. Don't define these for Emacs. */
@@ -4864,7 +5349,7 @@ re_exec (s)
int
regcomp (preg, pattern, cflags)
regex_t *preg;
- const char *pattern;
+ const char *pattern;
int cflags;
{
reg_errcode_t ret;
@@ -4876,18 +5361,20 @@ regcomp (preg, pattern, cflags)
preg->buffer = 0;
preg->allocated = 0;
preg->used = 0;
-
+
/* Don't bother to use a fastmap when searching. This simplifies the
REG_NEWLINE case: if we used a fastmap, we'd have to put all the
characters after newlines into the fastmap. This way, we just try
every character. */
preg->fastmap = 0;
-
+
if (cflags & REG_ICASE)
{
unsigned i;
-
- preg->translate = (char *) malloc (CHAR_SET_SIZE);
+
+ preg->translate
+ = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
+ * sizeof (*(RE_TRANSLATE_TYPE)0));
if (preg->translate == NULL)
return (int) REG_ESPACE;
@@ -4911,38 +5398,38 @@ regcomp (preg, pattern, cflags)
preg->no_sub = !!(cflags & REG_NOSUB);
- /* POSIX says a null character in the pattern terminates it, so we
+ /* POSIX says a null character in the pattern terminates it, so we
can use strlen here in compiling the pattern. */
ret = regex_compile (pattern, strlen (pattern), syntax, preg);
-
+
/* POSIX doesn't distinguish between an unmatched open-group and an
unmatched close-group: both are REG_EPAREN. */
if (ret == REG_ERPAREN) ret = REG_EPAREN;
-
+
return (int) ret;
}
/* regexec searches for a given pattern, specified by PREG, in the
string STRING.
-
+
If NMATCH is zero or REG_NOSUB was set in the cflags argument to
`regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
least NMATCH elements, and we set them to the offsets of the
corresponding matched substrings.
-
+
EFLAGS specifies `execution flags' which affect matching: if
REG_NOTBOL is set, then ^ does not match at the beginning of the
string; if REG_NOTEOL is set, then $ does not match at the end.
-
+
We return 0 if we find a match and REG_NOMATCH if not. */
int
regexec (preg, string, nmatch, pmatch, eflags)
const regex_t *preg;
- const char *string;
- size_t nmatch;
- regmatch_t pmatch[];
+ const char *string;
+ size_t nmatch;
+ regmatch_t pmatch[];
int eflags;
{
int ret;
@@ -4952,15 +5439,15 @@ regexec (preg, string, nmatch, pmatch, eflags)
boolean want_reg_info = !preg->no_sub && nmatch > 0;
private_preg = *preg;
-
+
private_preg.not_bol = !!(eflags & REG_NOTBOL);
private_preg.not_eol = !!(eflags & REG_NOTEOL);
-
+
/* The user has told us exactly how many registers to return
information about, via `nmatch'. We have to pass that on to the
matching routines. */
private_preg.regs_allocated = REGS_FIXED;
-
+
if (want_reg_info)
{
regs.num_regs = nmatch;
@@ -4974,7 +5461,7 @@ regexec (preg, string, nmatch, pmatch, eflags)
ret = re_search (&private_preg, string, len,
/* start: */ 0, /* range: */ len,
want_reg_info ? &regs : (struct re_registers *) 0);
-
+
/* Copy the register information to the POSIX structure. */
if (want_reg_info)
{
@@ -5013,22 +5500,17 @@ regerror (errcode, preg, errbuf, errbuf_size)
size_t msg_size;
if (errcode < 0
- || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0])))
- /* Only error codes returned by the rest of the code should be passed
+ || errcode >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0])))
+ /* Only error codes returned by the rest of the code should be passed
to this routine. If we are given anything else, or if other regex
code generates an invalid error code, then the program has a bug.
Dump core so we can fix it. */
abort ();
- msg = re_error_msg[errcode];
-
- /* POSIX doesn't require that we do anything in this case, but why
- not be nice. */
- if (! msg)
- msg = "Success";
+ msg = gettext (re_error_msgid[errcode]);
msg_size = strlen (msg) + 1; /* Includes the null. */
-
+
if (errbuf_size != 0)
{
if (msg_size > errbuf_size)
@@ -5053,7 +5535,7 @@ regfree (preg)
if (preg->buffer != NULL)
free (preg->buffer);
preg->buffer = NULL;
-
+
preg->allocated = 0;
preg->used = 0;
diff --git a/regex.h b/regex.h
index ec9598e7..21cd9902 100644
--- a/regex.h
+++ b/regex.h
@@ -1,7 +1,7 @@
/* Definitions for data structures and routines for the regular
expression library, version 0.12.
- Copyright (C) 1985, 1989, 1990-1995 Free Software Foundation, Inc.
+ Copyright (C) 1985, 89, 90, 91, 92, 93, 95 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -12,10 +12,10 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
+
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */
#ifndef __REGEXP_LIBRARY_H__
#define __REGEXP_LIBRARY_H__
@@ -23,7 +23,7 @@
/* POSIX says that <sys/types.h> must be included (by the caller) before
<regex.h>. */
-#ifdef VMS
+#if !defined (_POSIX_C_SOURCE) && !defined (_POSIX_SOURCE) && defined (VMS)
/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
should be there. */
#include <stddef.h>
@@ -141,6 +141,10 @@ typedef unsigned long reg_syntax_t;
IF not set, then the GNU regex operators are recognized. */
#define RE_NO_GNU_OPS (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
+/* If this bit is set, succeed as soon as we match the whole pattern,
+ without further backtracking. */
+#define RE_NO_POSIX_BACKTRACKING (RE_NO_GNU_OPS << 1)
+
/* This global variable defines the particular regexp syntax to use (for
some interfaces). When a regexp is compiled, the syntax used is
stored in the pattern buffer, so changing this does not affect
@@ -161,10 +165,11 @@ extern reg_syntax_t re_syntax_options;
| RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
#define RE_SYNTAX_GNU_AWK \
- (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)
+ ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS) \
+ & ~(RE_DOT_NOT_NULL|RE_INTERVALS))
#define RE_SYNTAX_POSIX_AWK \
- (RE_SYNTAX_GNU_AWK | RE_NO_GNU_OPS)
+ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS| RE_NO_GNU_OPS)
#define RE_SYNTAX_GREP \
(RE_BK_PLUS_QM | RE_CHAR_CLASSES \
@@ -291,6 +296,10 @@ typedef enum
compiled, the `re_nsub' field is available. All other fields are
private to the regex routines. */
+#ifndef RE_TRANSLATE_TYPE
+#define RE_TRANSLATE_TYPE char *
+#endif
+
struct re_pattern_buffer
{
/* [[[begin pattern_buffer]]] */
@@ -317,7 +326,7 @@ struct re_pattern_buffer
comparing them, or zero for no translation. The translation
is applied to a pattern when it is compiled and to a string
when it is matched. */
- char *translate;
+ RE_TRANSLATE_TYPE translate;
/* Number of subexpressions found by the compiler. */
size_t re_nsub;
@@ -360,11 +369,6 @@ struct re_pattern_buffer
};
typedef struct re_pattern_buffer regex_t;
-
-
-/* search.c (search_buffer) in Emacs needs this one opcode value. It is
- defined both in `regex.c' and here. */
-#define RE_EXACTN_VALUE 1
/* Type for byte offsets within the string. POSIX mandates this. */
typedef int regoff_t;
@@ -405,7 +409,7 @@ typedef struct
unfortunately clutters up the declarations a bit, but I think it's
worth it. */
-#ifdef __STDC__
+#if __STDC__
#define _RE_ARGS(args) args
@@ -481,11 +485,13 @@ extern void re_set_registers
_RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
unsigned num_regs, regoff_t *starts, regoff_t *ends));
+#ifdef _REGEX_RE_COMP
#ifndef _CRAY
/* 4.2 bsd compatibility. */
extern char *re_comp _RE_ARGS ((const char *));
extern int re_exec _RE_ARGS ((const char *));
#endif
+#endif
/* POSIX compatibility. */
extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags));
diff --git a/stamp-h.in b/stamp-h.in
new file mode 100644
index 00000000..eaec84aa
--- /dev/null
+++ b/stamp-h.in
@@ -0,0 +1 @@
+Wed Dec 20 16:39:26 EST 1995
diff --git a/support/texindex.c b/support/texindex.c
deleted file mode 100644
index 02838f17..00000000
--- a/support/texindex.c
+++ /dev/null
@@ -1,1605 +0,0 @@
-/* Prepare Tex index dribble output into an actual index.
- Copyright (C) 1987 Free Software Foundation, Inc.
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
-
-#include <stdio.h>
-#include <ctype.h>
-#include <errno.h>
-
-#ifdef VMS
-#ifndef VAX11C
-#define noshare
-#endif
-
-#include <perror.h>
-#include <file.h>
-
-#define EXIT_SUCCESS ((1 << 28) | 1)
-#define EXIT_FATAL ((1 << 28) | 4)
-#define unlink delete
-#define tell(fd) lseek(fd, 0L, 1)
-
-#else /* Not VMS */
-
-#ifdef USG
-#include <sys/types.h>
-#include <sys/fcntl.h>
-#endif
-#include <sys/file.h>
-
-#define EXIT_SUCCESS 0
-#define EXIT_FATAL 1
-
-#endif /* Not VMS */
-
-
-#ifndef L_XTND
-#define L_XTND 2
-#endif
-
-#ifdef VMS
-extern noshare int sys_nerr;
-extern noshare char *sys_errlist[];
-#else
-extern int sys_nerr;
-extern char *sys_errlist[];
-#endif
-
-/* When sorting in core, this structure describes one line
- and the position and length of its first keyfield. */
-
-struct lineinfo
- {
- char *text; /* The actual text of the line */
- union
- { /* The start of the key (for textual comparison) */
- char *text;
- long number; /* or the numeric value (for numeric comparison) */
- } key;
- long keylen; /* Length of key field */
- };
-
-/* This structure describes a field to use as a sort key */
-
-struct keyfield
- {
- int startwords; /* # words to skip */
- int startchars; /* and # additional chars to skip, to start of field */
- int endwords; /* similar, from beg (or end) of line, to find end of field */
- int endchars;
- char ignore_blanks; /* Ignore spaces and tabs within the field */
- char fold_case; /* Convert upper case to lower before comparing */
- char reverse; /* Compare in reverse order */
- char numeric; /* Parse text as an integer and compare the integers */
- char positional; /* Sort according to position within the file */
- char braced; /* Count balanced-braced groupings as fields */
- };
-
-/* Vector of keyfields to use */
-
-struct keyfield keyfields[3];
-
-/* Number of keyfields stored in that vector. */
-
-int num_keyfields = 3;
-
-/* Vector of input file names, terminated with a zero (null pointer) */
-
-char **infiles;
-
-/* Vector of corresponding output file names, or zero meaning default it */
-
-char **outfiles;
-
-/* Length of `infiles' */
-
-int num_infiles;
-
-/* Pointer to the array of pointers to lines being sorted */
-
-char **linearray;
-
-/* The allocated length of `linearray'. */
-
-long nlines;
-
-/* Directory to use for temporary files. On Unix, it ends with a slash. */
-
-char *tempdir;
-
-/* Start of filename to use for temporary files. */
-
-char *tempbase;
-
-/* Number of last temporary file. */
-
-int tempcount;
-
-/* Number of last temporary file already deleted.
- Temporary files are deleted by `flush_tempfiles' in order of creation. */
-
-int last_deleted_tempcount;
-
-/* During in-core sort, this points to the base of the data block
- which contains all the lines of data. */
-
-char *text_base;
-
-/* Additional command switches */
-
-int keep_tempfiles; /* Nonzero means do not delete tempfiles -- for debugging */
-
-/* Forward declarations of functions in this file */
-
-void decode_command ();
-void sort_in_core ();
-void sort_offline ();
-char **parsefile ();
-char *find_field ();
-char *find_pos ();
-long find_value ();
-char *find_braced_pos ();
-char *find_braced_end ();
-void writelines ();
-int compare_full ();
-long readline ();
-int merge_files ();
-int merge_direct ();
-char *concat ();
-char *maketempname ();
-void flush_tempfiles ();
-char *tempcopy ();
-
-extern char *mktemp ();
-
-#define MAX_IN_CORE_SORT 500000
-
-int
-main (argc, argv)
- int argc;
- char **argv;
-{
- int i;
-
- tempcount = 0;
- last_deleted_tempcount = 0;
-
- /* Describe the kind of sorting to do. */
- /* The first keyfield uses the first braced field and folds case */
- keyfields[0].braced = 1;
- keyfields[0].fold_case = 1;
- keyfields[0].endwords = -1;
- keyfields[0].endchars = -1;
- /* The second keyfield uses the second braced field, numerically */
- keyfields[1].braced = 1;
- keyfields[1].numeric = 1;
- keyfields[1].startwords = 1;
- keyfields[1].endwords = -1;
- keyfields[1].endchars = -1;
- /* The third keyfield (which is ignored while discarding duplicates)
- compares the whole line */
- keyfields[2].endwords = -1;
- keyfields[2].endchars = -1;
-
- decode_command (argc, argv);
-
- tempbase = mktemp (concat ("txiXXXXXX", "", ""));
-
- /* Process input files completely, one by one. */
-
- for (i = 0; i < num_infiles; i++)
- {
- int desc;
- long ptr;
- char *outfile;
- char *p;
-
- desc = open (infiles[i], 0, 0);
- if (desc < 0) pfatal_with_name (infiles[i]);
- lseek (desc, 0, L_XTND);
- ptr = tell (desc);
- close (desc);
-
- outfile = outfiles[i];
- if (!outfile)
- {
- outfile = concat (infiles[i], "s", "");
- }
-
- if (ptr < MAX_IN_CORE_SORT)
- /* Sort a small amount of data */
- sort_in_core (infiles[i], ptr, outfile);
- else
- sort_offline (infiles[i], ptr, outfile);
- }
-
- flush_tempfiles (tempcount);
- exit (EXIT_SUCCESS);
-}
-
-/* This page decodes the command line arguments to set the parameter variables
- and set up the vector of keyfields and the vector of input files */
-
-void
-decode_command (argc, argv)
- int argc;
- char **argv;
-{
- int i;
- char **ip;
- char **op;
-
- /* Store default values into parameter variables */
-
-#ifdef VMS
- tempdir = "sys$scratch:";
-#else
- tempdir = "/tmp/";
-#endif
-
- keep_tempfiles = 0;
-
- /* Allocate argc input files, which must be enough. */
-
- infiles = (char **) xmalloc (argc * sizeof (char *));
- outfiles = (char **) xmalloc (argc * sizeof (char *));
- ip = infiles;
- op = outfiles;
-
- /* First find all switches that control the default kind-of-sort */
-
- for (i = 1; i < argc; i++)
- {
- int tem = classify_arg (argv[i]);
- char c;
- char *p;
-
- if (tem <= 0)
- {
- *ip++ = argv[i];
- *op++ = 0;
- continue;
- }
- if (tem > 1)
- {
- if (i + 1 == argc)
- fatal ("switch %s given with no argument following it", argv[i]);
- else if (!strcmp (argv[i], "-T"))
- tempdir = argv[i + 1];
- else if (!strcmp (argv[i], "-o"))
- *(op - 1) = argv[i + 1];
- i += tem - 1;
- continue;
- }
-
- p = &argv[i][1];
- while (c = *p++)
- switch (c)
- {
- case 'k':
- keep_tempfiles = 1;
- break;
-
- default:
- fatal ("invalid command switch %c", c);
- }
- switchdone: ;
- }
-
- /* Record number of keyfields, terminate list of filenames */
-
- num_infiles = ip - infiles;
- *ip = 0;
-}
-
-/* Return 0 for an argument that is not a switch;
- for a switch, return 1 plus the number of following arguments that the switch swallows.
-*/
-
-int
-classify_arg (arg)
- char *arg;
-{
- if (!strcmp (arg, "-T") || !strcmp (arg, "-o"))
- return 2;
- if (arg[0] == '-')
- return 1;
- return 0;
-}
-
-/* Create a name for a temporary file */
-
-char *
-maketempname (count)
- int count;
-{
- char tempsuffix[10];
- sprintf (tempsuffix, "%d", count);
- return concat (tempdir, tempbase, tempsuffix);
-}
-
-/* Delete all temporary files up to the specified count */
-
-void
-flush_tempfiles (to_count)
- int to_count;
-{
- if (keep_tempfiles) return;
- while (last_deleted_tempcount < to_count)
- unlink (maketempname (++last_deleted_tempcount));
-}
-
-/* Copy an input file into a temporary file, and return the temporary file name */
-
-#define BUFSIZE 1024
-
-char *
-tempcopy (idesc)
- int idesc;
-{
- char *outfile = maketempname (++tempcount);
- int odesc;
- char buffer[BUFSIZE];
-
- odesc = open (outfile, O_WRONLY | O_CREAT, 0666);
-
- if (odesc < 0) pfatal_with_name (outfile);
-
- while (1)
- {
- int nread = read (idesc, buffer, BUFSIZE);
- write (odesc, buffer, nread);
- if (!nread) break;
- }
-
- close (odesc);
-
- return outfile;
-}
-
-/* Compare two lines, provided as pointers to pointers to text,
- according to the specified set of keyfields */
-
-int
-compare_full (line1, line2)
- char **line1, **line2;
-{
- int i;
-
- /* Compare using the first keyfield;
- if that does not distinguish the lines, try the second keyfield; and so on. */
-
- for (i = 0; i < num_keyfields; i++)
- {
- long length1, length2;
- char *start1 = find_field (&keyfields[i], *line1, &length1);
- char *start2 = find_field (&keyfields[i], *line2, &length2);
- int tem = compare_field (&keyfields[i], start1, length1, *line1 - text_base,
- start2, length2, *line2 - text_base);
- if (tem)
- {
- if (keyfields[i].reverse)
- return - tem;
- return tem;
- }
- }
-
- return 0; /* Lines match exactly */
-}
-
-/* Compare two lines described by structures
- in which the first keyfield is identified in advance.
- For positional sorting, assumes that the order of the lines in core
- reflects their nominal order. */
-
-int
-compare_prepared (line1, line2)
- struct lineinfo *line1, *line2;
-{
- int i;
- int tem;
- char *text1, *text2;
-
- /* Compare using the first keyfield, which has been found for us already */
- if (keyfields->positional)
- {
- if (line1->text - text_base > line2->text - text_base)
- tem = 1;
- else
- tem = -1;
- }
- else if (keyfields->numeric)
- tem = line1->key.number - line2->key.number;
- else
- tem = compare_field (keyfields, line1->key.text, line1->keylen, 0, line2->key.text, line2->keylen, 0);
- if (tem)
- {
- if (keyfields->reverse)
- return - tem;
- return tem;
- }
-
- text1 = line1->text;
- text2 = line2->text;
-
- /* Compare using the second keyfield;
- if that does not distinguish the lines, try the third keyfield; and so on. */
-
- for (i = 1; i < num_keyfields; i++)
- {
- long length1, length2;
- char *start1 = find_field (&keyfields[i], text1, &length1);
- char *start2 = find_field (&keyfields[i], text2, &length2);
- int tem = compare_field (&keyfields[i], start1, length1, text1 - text_base,
- start2, length2, text2 - text_base);
- if (tem)
- {
- if (keyfields[i].reverse)
- return - tem;
- return tem;
- }
- }
-
- return 0; /* Lines match exactly */
-}
-
-/* Like compare_full but more general.
- You can pass any strings, and you can say how many keyfields to use.
- `pos1' and `pos2' should indicate the nominal positional ordering of
- the two lines in the input. */
-
-int
-compare_general (str1, str2, pos1, pos2, use_keyfields)
- char *str1, *str2;
- long pos1, pos2;
- int use_keyfields;
-{
- int i;
-
- /* Compare using the first keyfield;
- if that does not distinguish the lines, try the second keyfield; and so on. */
-
- for (i = 0; i < use_keyfields; i++)
- {
- long length1, length2;
- char *start1 = find_field (&keyfields[i], str1, &length1);
- char *start2 = find_field (&keyfields[i], str2, &length2);
- int tem = compare_field (&keyfields[i], start1, length1, pos1, start2, length2, pos2);
- if (tem)
- {
- if (keyfields[i].reverse)
- return - tem;
- return tem;
- }
- }
-
- return 0; /* Lines match exactly */
-}
-
-/* Find the start and length of a field in `str' according to `keyfield'.
- A pointer to the starting character is returned, and the length
- is stored into the int that `lengthptr' points to. */
-
-char *
-find_field (keyfield, str, lengthptr)
- struct keyfield *keyfield;
- char *str;
- long *lengthptr;
-{
- char *start;
- char *end;
- char *(*fun) ();
-
- if (keyfield->braced) fun = find_braced_pos;
- else fun = find_pos;
-
- start = ( *fun )(str, keyfield->startwords, keyfield->startchars,
- keyfield->ignore_blanks);
- if (keyfield->endwords < 0)
- {
- if (keyfield->braced)
- end = find_braced_end (start);
- else
- {
- end = start;
- while (*end && *end != '\n') end++;
- }
- }
- else
- {
- end = ( *fun )(str, keyfield->endwords, keyfield->endchars, 0);
- if (end - str < start - str) end = start;
- }
- *lengthptr = end - start;
- return start;
-}
-
-/* Find a pointer to a specified place within `str',
- skipping (from the beginning) `words' words and then `chars' chars.
- If `ignore_blanks' is nonzero, we skip all blanks
- after finding the specified word. */
-
-char *
-find_pos (str, words, chars, ignore_blanks)
- char *str;
- int words, chars;
- int ignore_blanks;
-{
- int i;
- char *p = str;
-
- for (i = 0; i < words; i++)
- {
- char c;
- /* Find next bunch of nonblanks and skip them. */
- while ((c = *p) == ' ' || c == '\t') p++;
- while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t')) p++;
- if (!*p || *p == '\n') return p;
- }
-
- while (*p == ' ' || *p == '\t') p++;
-
- for (i = 0; i < chars; i++)
- {
- if (!*p || *p == '\n') break;
- p++;
- }
- return p;
-}
-
-/* Like find_pos but assumes that each field is surrounded by braces
- and that braces within fields are balanced. */
-
-char *
-find_braced_pos (str, words, chars, ignore_blanks)
- char *str;
- int words, chars;
- int ignore_blanks;
-{
- int i;
- int bracelevel;
- char *p = str;
- char c;
-
- for (i = 0; i < words; i++)
- {
- bracelevel = 1;
- while ((c = *p++) != '{' && c != '\n' && c);
- if (c != '{')
- return p - 1;
- while (bracelevel)
- {
- c = *p++;
- if (c == '{') bracelevel++;
- if (c == '}') bracelevel--;
-#if 0
- if (c == '\\' || c == '@') c = *p++; /* \ quotes braces and \ */
-#endif
- if (c == 0 || c == '\n') return p-1;
- }
- }
-
- while ((c = *p++) != '{' && c != '\n' && c);
-
- if (c != '{')
- return p-1;
-
- if (ignore_blanks)
- while ((c = *p) == ' ' || c == '\t') p++;
-
- for (i = 0; i < chars; i++)
- {
- if (!*p || *p == '\n') break;
- p++;
- }
- return p;
-}
-
-/* Find the end of the balanced-brace field which starts at `str'.
- The position returned is just before the closing brace. */
-
-char *
-find_braced_end (str)
- char *str;
-{
- int bracelevel;
- char *p = str;
- char c;
-
- bracelevel = 1;
- while (bracelevel)
- {
- c = *p++;
- if (c == '{') bracelevel++;
- if (c == '}') bracelevel--;
-#if 0
- if (c == '\\' || c == '@') c = *p++;
-#endif
- if (c == 0 || c == '\n') return p-1;
- }
- return p - 1;
-}
-
-long
-find_value (start, length)
- char *start;
- long length;
-{
- while (length != 0L) {
- if (isdigit(*start))
- return atol(start);
- length--;
- start++;
- }
- return 0l;
-}
-
-/* Vector used to translate characters for comparison.
- This is how we make all alphanumerics follow all else,
- and ignore case in the first sorting. */
-int char_order[256];
-
-init_char_order ()
-{
- int i;
- for (i = 1; i < 256; i++)
- char_order[i] = i;
-
- for (i = '0'; i <= '9'; i++)
- char_order[i] += 512;
-
- for (i = 'a'; i <= 'z'; i++) {
- char_order[i] = 512 + i;
- char_order[i + 'A' - 'a'] = 512 + i;
- }
-}
-
-/* Compare two fields (each specified as a start pointer and a character count)
- according to `keyfield'. The sign of the value reports the relation between the fields */
-
-int
-compare_field (keyfield, start1, length1, pos1, start2, length2, pos2)
- struct keyfield *keyfield;
- char *start1;
- long length1;
- long pos1;
- char *start2;
- long length2;
- long pos2;
-{
- if (keyfields->positional)
- {
- if (pos1 > pos2)
- return 1;
- else
- return -1;
- }
- if (keyfield->numeric)
- {
- long value = find_value (start1, length1) - find_value (start2, length2);
- if (value > 0) return 1;
- if (value < 0) return -1;
- return 0;
- }
- else
- {
- char *p1 = start1;
- char *p2 = start2;
- char *e1 = start1 + length1;
- char *e2 = start2 + length2;
-
- int fold_case = keyfield->fold_case;
-
- while (1)
- {
- int c1, c2;
-
- if (p1 == e1) c1 = 0;
- else c1 = *p1++;
- if (p2 == e2) c2 = 0;
- else c2 = *p2++;
-
- if (char_order[c1] != char_order[c2])
- return char_order[c1] - char_order[c2];
- if (!c1) break;
- }
-
- /* Strings are equal except possibly for case. */
- p1 = start1;
- p2 = start2;
- while (1)
- {
- int c1, c2;
-
- if (p1 == e1) c1 = 0;
- else c1 = *p1++;
- if (p2 == e2) c2 = 0;
- else c2 = *p2++;
-
- if (c1 != c2)
- /* Reverse sign here so upper case comes out last. */
- return c2 - c1;
- if (!c1) break;
- }
-
- return 0;
- }
-}
-
-/* A `struct linebuffer' is a structure which holds a line of text.
- `readline' reads a line from a stream into a linebuffer
- and works regardless of the length of the line. */
-
-struct linebuffer
- {
- long size;
- char *buffer;
- };
-
-/* Initialize a linebuffer for use */
-
-void
-initbuffer (linebuffer)
- struct linebuffer *linebuffer;
-{
- linebuffer->size = 200;
- linebuffer->buffer = (char *) xmalloc (200);
-}
-
-/* Read a line of text from `stream' into `linebuffer'.
- Return the length of the line. */
-
-long
-readline (linebuffer, stream)
- struct linebuffer *linebuffer;
- FILE *stream;
-{
- char *buffer = linebuffer->buffer;
- char *p = linebuffer->buffer;
- char *end = p + linebuffer->size;
-
- while (1)
- {
- int c = getc (stream);
- if (p == end)
- {
- buffer = (char *) xrealloc (buffer, linebuffer->size *= 2);
- p += buffer - linebuffer->buffer;
- end += buffer - linebuffer->buffer;
- linebuffer->buffer = buffer;
- }
- if (c < 0 || c == '\n')
- {
- *p = 0;
- break;
- }
- *p++ = c;
- }
-
- return p - buffer;
-}
-
-/* Sort an input file too big to sort in core. */
-
-void
-sort_offline (infile, nfiles, total, outfile)
- char *infile;
- long total;
- char *outfile;
-{
- int ntemps = 2 * (total + MAX_IN_CORE_SORT - 1) / MAX_IN_CORE_SORT; /* More than enough */
- char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
- FILE *istream = fopen (infile, "r");
- int i;
- struct linebuffer lb;
- long linelength;
- int failure = 0;
-
- initbuffer (&lb);
-
- /* Read in one line of input data. */
-
- linelength = readline (&lb, istream);
-
- if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
- {
- error ("%s: not a texinfo index file", infile);
- return;
- }
-
- /* Split up the input into `ntemps' temporary files, or maybe fewer,
- and put the new files' names into `tempfiles' */
-
- for (i = 0; i < ntemps; i++)
- {
- char *outname = maketempname (++tempcount);
- FILE *ostream = fopen (outname, "w");
- long tempsize = 0;
-
- if (!ostream) pfatal_with_name (outname);
- tempfiles[i] = outname;
-
- /* Copy lines into this temp file as long as it does not make file "too big"
- or until there are no more lines. */
-
- while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT)
- {
- tempsize += linelength + 1;
- fputs (lb.buffer, ostream);
- putc ('\n', ostream);
-
- /* Read another line of input data. */
-
- linelength = readline (&lb, istream);
- if (!linelength && feof (istream)) break;
-
- if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
- {
- error ("%s: not a texinfo index file", infile);
- failure = 1;
- goto fail;
- }
- }
- fclose (ostream);
- if (feof (istream)) break;
- }
-
- free (lb.buffer);
-
- fail:
- /* Record number of temp files we actually needed. */
-
- ntemps = i;
-
- /* Sort each tempfile into another tempfile.
- Delete the first set of tempfiles and put the names of the second into `tempfiles' */
-
- for (i = 0; i < ntemps; i++)
- {
- char *newtemp = maketempname (++tempcount);
- sort_in_core (&tempfiles[i], MAX_IN_CORE_SORT, newtemp);
- if (!keep_tempfiles)
- unlink (tempfiles[i]);
- tempfiles[i] = newtemp;
- }
-
- if (failure)
- return;
-
- /* Merge the tempfiles together and indexify */
-
- merge_files (tempfiles, ntemps, outfile);
-}
-
-/* Sort `infile', whose size is `total',
- assuming that is small enough to be done in-core,
- then indexify it and send the output to `outfile' (or to stdout). */
-
-void
-sort_in_core (infile, total, outfile)
- char *infile;
- long total;
- char *outfile;
-{
- char **nextline;
- char *data = (char *) xmalloc (total + 1);
- char *file_data;
- long file_size;
- int i;
- FILE *ostream = stdout;
- struct lineinfo *lineinfo;
-
- /* Read the contents of the file into the moby array `data' */
-
- int desc = open (infile, 0, 0);
-
- if (desc < 0)
- fatal ("failure reopening %s", infile);
- for (file_size = 0; ; )
- {
- if ((i = read (desc, data + file_size, total - file_size)) <= 0)
- break;
- file_size += i;
- }
- file_data = data;
- data[file_size] = 0;
-
- close (desc);
-
- if (file_size > 0 && data[0] != '\\' && data[0] != '@')
- {
- error ("%s: not a texinfo index file", infile);
- return;
- }
-
- init_char_order ();
-
- /* Sort routines want to know this address */
-
- text_base = data;
-
- /* Create the array of pointers to lines, with a default size frequently enough. */
-
- nlines = total / 50;
- if (!nlines) nlines = 2;
- linearray = (char **) xmalloc (nlines * sizeof (char *));
-
- /* `nextline' points to the next free slot in this array.
- `nlines' is the allocated size. */
-
- nextline = linearray;
-
- /* Parse the input file's data, and make entries for the lines. */
-
- nextline = parsefile (infile, nextline, file_data, file_size);
- if (nextline == 0)
- {
- error ("%s: not a texinfo index file", infile);
- return;
- }
-
- /* Sort the lines */
-
- /* If we have enough space, find the first keyfield of each line in advance.
- Make a `struct lineinfo' for each line, which records the keyfield
- as well as the line, and sort them. */
-
- lineinfo = (struct lineinfo *) malloc ((nextline - linearray) * sizeof (struct lineinfo));
-
- if (lineinfo)
- {
- struct lineinfo *lp;
- char **p;
-
- for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
- {
- lp->text = *p;
- lp->key.text = find_field (keyfields, *p, &lp->keylen);
- if (keyfields->numeric)
- lp->key.number = find_value (lp->key.text, lp->keylen);
- }
-
- qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo), compare_prepared);
-
- for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
- *p = lp->text;
-
- free (lineinfo);
- }
- else
- qsort (linearray, nextline - linearray, sizeof (char *), compare_full);
-
- /* Open the output file */
-
- if (outfile)
- {
- ostream = fopen (outfile, "w");
- if (!ostream)
- pfatal_with_name (outfile);
- }
-
- writelines (linearray, nextline - linearray, ostream);
- if (outfile) fclose (ostream);
-
- free (linearray);
- free (data);
-}
-
-/* Parse an input string in core into lines.
- DATA is the input string, and SIZE is its length.
- Data goes in LINEARRAY starting at NEXTLINE.
- The value returned is the first entry in LINEARRAY still unused.
- Value 0 means input file contents are invalid. */
-
-char **
-parsefile (filename, nextline, data, size)
- char *filename;
- char **nextline;
- char *data;
- long size;
-{
- char *p, *end;
- char **line = nextline;
-
- p = data;
- end = p + size;
- *end = 0;
-
- while (p != end)
- {
- if (p[0] != '\\' && p[0] != '@')
- return 0;
-
- *line = p;
- while (*p && *p != '\n') p++;
- if (p != end) p++;
-
- line++;
- if (line == linearray + nlines)
- {
- char **old = linearray;
- linearray = (char **) xrealloc (linearray, sizeof (char *) * (nlines *= 4));
- line += linearray - old;
- }
- }
-
- return line;
-}
-
-/* Indexification is a filter applied to the sorted lines
- as they are being written to the output file.
- Multiple entries for the same name, with different page numbers,
- get combined into a single entry with multiple page numbers.
- The first braced field, which is used for sorting, is discarded.
- However, its first character is examined, folded to lower case,
- and if it is different from that in the previous line fed to us
- a \initial line is written with one argument, the new initial.
-
- If an entry has four braced fields, then the second and third
- constitute primary and secondary names.
- In this case, each change of primary name
- generates a \primary line which contains only the primary name,
- and in between these are \secondary lines which contain
- just a secondary name and page numbers.
-*/
-
-/* The last primary name we wrote a \primary entry for.
- If only one level of indexing is being done, this is the last name seen */
-char *lastprimary;
-int lastprimarylength; /* Length of storage allocated for lastprimary */
-
-/* Similar, for the secondary name. */
-char *lastsecondary;
-int lastsecondarylength;
-
-/* Zero if we are not in the middle of writing an entry.
- One if we have written the beginning of an entry but have not
- yet written any page numbers into it.
- Greater than one if we have written the beginning of an entry
- plus at least one page number. */
-int pending;
-
-/* The initial (for sorting purposes) of the last primary entry written.
- When this changes, a \initial {c} line is written */
-
-char * lastinitial;
-
-int lastinitiallength;
-
-/* When we need a string of length 1 for the value of lastinitial,
- store it here. */
-
-char lastinitial1[2];
-
-/* Initialize static storage for writing an index */
-
-void
-init_index ()
-{
- pending = 0;
- lastinitial = lastinitial1;
- lastinitial1[0] = 0;
- lastinitial1[1] = 0;
- lastinitiallength = 0;
- lastprimarylength = 100;
- lastprimary = (char *) xmalloc (lastprimarylength + 1);
- bzero (lastprimary, lastprimarylength + 1);
- lastsecondarylength = 100;
- lastsecondary = (char *) xmalloc (lastsecondarylength + 1);
- bzero (lastsecondary, lastsecondarylength + 1);
-}
-
-/* Indexify. Merge entries for the same name,
- insert headers for each initial character, etc. */
-
-indexify (line, ostream)
- char *line;
- FILE *ostream;
-{
- char *primary, *secondary, *pagenumber;
- int primarylength, secondarylength, pagelength;
- int len = strlen (line);
- int nosecondary;
- int initiallength;
- char *initial;
- char initial1[2];
- register char *p;
-
- /* First, analyze the parts of the entry fed to us this time */
-
- p = find_braced_pos (line, 0, 0, 0);
- if (*p == '{')
- {
- initial = p;
- /* Get length of inner pair of braces starting at p,
- including that inner pair of braces. */
- initiallength = find_braced_end (p + 1) + 1 - p;
- }
- else
- {
- initial = initial1;
- initial1[0] = *p;
- initial1[1] = 0;
- initiallength = 1;
-
- if (initial1[0] >= 'a' && initial1[0] <= 'z')
- initial1[0] -= 040;
- }
-
- pagenumber = find_braced_pos (line, 1, 0, 0);
- pagelength = find_braced_end (pagenumber) - pagenumber;
- if (pagelength == 0)
- abort ();
-
- primary = find_braced_pos (line, 2, 0, 0);
- primarylength = find_braced_end (primary) - primary;
-
- secondary = find_braced_pos (line, 3, 0, 0);
- nosecondary = !*secondary;
- if (!nosecondary)
- secondarylength = find_braced_end (secondary) - secondary;
-
- /* If the primary is different from before, make a new primary entry */
- if (strncmp (primary, lastprimary, primarylength))
- {
- /* Close off current secondary entry first, if one is open */
- if (pending)
- {
- fputs ("}\n", ostream);
- pending = 0;
- }
-
- /* If this primary has a different initial, include an entry for the initial */
- if (initiallength != lastinitiallength ||
- strncmp (initial, lastinitial, initiallength))
- {
- fprintf (ostream, "\\initial {");
- fwrite (initial, 1, initiallength, ostream);
- fprintf (ostream, "}\n", initial);
- if (initial == initial1)
- {
- lastinitial = lastinitial1;
- *lastinitial1 = *initial1;
- }
- else
- {
- lastinitial = initial;
- }
- lastinitiallength = initiallength;
- }
-
- /* Make the entry for the primary. */
- if (nosecondary)
- fputs ("\\entry {", ostream);
- else
- fputs ("\\primary {", ostream);
- fwrite (primary, primarylength, 1, ostream);
- if (nosecondary)
- {
- fputs ("}{", ostream);
- pending = 1;
- }
- else
- fputs ("}\n", ostream);
-
- /* Record name of most recent primary */
- if (lastprimarylength < primarylength)
- {
- lastprimarylength = primarylength + 100;
- lastprimary = (char *) xrealloc (lastprimary,
- 1 + lastprimarylength);
- }
- strncpy (lastprimary, primary, primarylength);
- lastprimary[primarylength] = 0;
-
- /* There is no current secondary within this primary, now */
- lastsecondary[0] = 0;
- }
-
- /* Should not have an entry with no subtopic following one with a subtopic */
-
- if (nosecondary && *lastsecondary)
- error ("entry %s follows an entry with a secondary name", line);
-
- /* Start a new secondary entry if necessary */
- if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength))
- {
- if (pending)
- {
- fputs ("}\n", ostream);
- pending = 0;
- }
-
- /* Write the entry for the secondary. */
- fputs ("\\secondary {", ostream);
- fwrite (secondary, secondarylength, 1, ostream);
- fputs ("}{", ostream);
- pending = 1;
-
- /* Record name of most recent secondary */
- if (lastsecondarylength < secondarylength)
- {
- lastsecondarylength = secondarylength + 100;
- lastsecondary = (char *) xrealloc (lastsecondary,
- 1 + lastsecondarylength);
- }
- strncpy (lastsecondary, secondary, secondarylength);
- lastsecondary[secondarylength] = 0;
- }
-
- /* Here to add one more page number to the current entry */
- if (pending++ != 1)
- fputs (", ", ostream); /* Punctuate first, if this is not the first */
- fwrite (pagenumber, pagelength, 1, ostream);
-}
-
-/* Close out any unfinished output entry */
-
-void
-finish_index (ostream)
- FILE *ostream;
-{
- if (pending)
- fputs ("}\n", ostream);
- free (lastprimary);
- free (lastsecondary);
-}
-
-/* Copy the lines in the sorted order.
- Each line is copied out of the input file it was found in. */
-
-void
-writelines (linearray, nlines, ostream)
- char **linearray;
- int nlines;
- FILE *ostream;
-{
- char **stop_line = linearray + nlines;
- char **next_line;
-
- init_index ();
-
- /* Output the text of the lines, and free the buffer space */
-
- for (next_line = linearray; next_line != stop_line; next_line++)
- {
- /* If -u was specified, output the line only if distinct from previous one. */
- if (next_line == linearray
- /* Compare previous line with this one, using only the explicitly specd keyfields */
- || compare_general (*(next_line - 1), *next_line, 0L, 0L, num_keyfields - 1))
- {
- char *p = *next_line;
- char c;
- while ((c = *p++) && c != '\n');
- *(p-1) = 0;
- indexify (*next_line, ostream);
- }
- }
-
- finish_index (ostream);
-}
-
-/* Assume (and optionally verify) that each input file is sorted;
- merge them and output the result.
- Returns nonzero if any input file fails to be sorted.
-
- This is the high-level interface that can handle an unlimited number of files. */
-
-#define MAX_DIRECT_MERGE 10
-
-int
-merge_files (infiles, nfiles, outfile)
- char **infiles;
- int nfiles;
- char *outfile;
-{
- char **tempfiles;
- int ntemps;
- int i;
- int value = 0;
- int start_tempcount = tempcount;
-
- if (nfiles <= MAX_DIRECT_MERGE)
- return merge_direct (infiles, nfiles, outfile);
-
- /* Merge groups of MAX_DIRECT_MERGE input files at a time,
- making a temporary file to hold each group's result. */
-
- ntemps = (nfiles + MAX_DIRECT_MERGE - 1) / MAX_DIRECT_MERGE;
- tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
- for (i = 0; i < ntemps; i++)
- {
- int nf = MAX_DIRECT_MERGE;
- if (i + 1 == ntemps)
- nf = nfiles - i * MAX_DIRECT_MERGE;
- tempfiles[i] = maketempname (++tempcount);
- value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE], nf, tempfiles[i]);
- }
-
- /* All temporary files that existed before are no longer needed
- since their contents have been merged into our new tempfiles.
- So delete them. */
- flush_tempfiles (start_tempcount);
-
- /* Now merge the temporary files we created. */
-
- merge_files (tempfiles, ntemps, outfile);
-
- free (tempfiles);
-
- return value;
-}
-
-/* Assume (and optionally verify) that each input file is sorted;
- merge them and output the result.
- Returns nonzero if any input file fails to be sorted.
-
- This version of merging will not work if the number of
- input files gets too high. Higher level functions
- use it only with a bounded number of input files. */
-
-int
-merge_direct (infiles, nfiles, outfile)
- char **infiles;
- int nfiles;
- char *outfile;
-{
- char **ip = infiles;
- struct linebuffer *lb1, *lb2;
- struct linebuffer **thisline, **prevline;
- FILE **streams;
- int i;
- int nleft;
- int lossage = 0;
- int *file_lossage;
- struct linebuffer *prev_out = 0;
- FILE *ostream = stdout;
-
- if (outfile)
- {
- ostream = fopen (outfile, "w");
- }
- if (!ostream) pfatal_with_name (outfile);
-
- init_index ();
-
- if (nfiles == 0)
- {
- if (outfile)
- fclose (ostream);
- return 0;
- }
-
- /* For each file, make two line buffers.
- Also, for each file, there is an element of `thisline'
- which points at any time to one of the file's two buffers,
- and an element of `prevline' which points to the other buffer.
- `thisline' is supposed to point to the next available line from the file,
- while `prevline' holds the last file line used,
- which is remembered so that we can verify that the file is properly sorted. */
-
- /* lb1 and lb2 contain one buffer each per file */
- lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
- lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
-
- /* thisline[i] points to the linebuffer holding the next available line in file i,
- or is zero if there are no lines left in that file. */
- thisline = (struct linebuffer **) xmalloc (nfiles * sizeof (struct linebuffer *));
- /* prevline[i] points to the linebuffer holding the last used line from file i.
- This is just for verifying that file i is properly sorted. */
- prevline = (struct linebuffer **) xmalloc (nfiles * sizeof (struct linebuffer *));
- /* streams[i] holds the input stream for file i. */
- streams = (FILE **) xmalloc (nfiles * sizeof (FILE *));
- /* file_lossage[i] is nonzero if we already know file i is not properly sorted. */
- file_lossage = (int *) xmalloc (nfiles * sizeof (int));
-
- /* Allocate and initialize all that storage */
-
- for (i = 0; i < nfiles; i++)
- {
- initbuffer (&lb1[i]);
- initbuffer (&lb2[i]);
- thisline[i] = &lb1[i];
- prevline[i] = &lb2[i];
- file_lossage[i] = 0;
- streams[i] = fopen (infiles[i], "r");
- if (!streams[i])
- pfatal_with_name (infiles[i]);
-
- readline (thisline[i], streams[i]);
- }
-
- /* Keep count of number of files not at eof */
- nleft = nfiles;
-
- while (nleft)
- {
- struct linebuffer *best = 0;
- struct linebuffer *exch;
- int bestfile = -1;
- int i;
-
- /* Look at the next avail line of each file; choose the least one. */
-
- for (i = 0; i < nfiles; i++)
- {
- if (thisline[i] &&
- (!best ||
- 0 < compare_general (best->buffer, thisline[i]->buffer,
- (long) bestfile, (long) i, num_keyfields)))
- {
- best = thisline[i];
- bestfile = i;
- }
- }
-
- /* Output that line, unless it matches the previous one and we don't want duplicates */
-
- if (!(prev_out &&
- !compare_general (prev_out->buffer, best->buffer, 0L, 1L, num_keyfields - 1)))
- indexify (best->buffer, ostream);
- prev_out = best;
-
- /* Now make the line the previous of its file, and fetch a new line from that file */
-
- exch = prevline[bestfile];
- prevline[bestfile] = thisline[bestfile];
- thisline[bestfile] = exch;
-
- while (1)
- {
- /* If the file has no more, mark it empty */
-
- if (feof (streams[bestfile]))
- {
- thisline[bestfile] = 0;
- nleft--; /* Update the number of files still not empty */
- break;
- }
- readline (thisline[bestfile], streams[bestfile]);
- if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile])) break;
- }
- }
-
- finish_index (ostream);
-
- /* Free all storage and close all input streams */
-
- for (i = 0; i < nfiles; i++)
- {
- fclose (streams[i]);
- free (lb1[i].buffer);
- free (lb2[i].buffer);
- }
- free (file_lossage);
- free (lb1);
- free (lb2);
- free (thisline);
- free (prevline);
- free (streams);
-
- if (outfile)
- fclose (ostream);
-
- return lossage;
-}
-
-/* Print error message and exit. */
-
-fatal (s1, s2)
- char *s1, *s2;
-{
- error (s1, s2);
- exit (EXIT_FATAL);
-}
-
-/* Print error message. `s1' is printf control string, `s2' is arg for it. */
-
-error (s1, s2)
- char *s1, *s2;
-{
- printf ("texindex: ");
- printf (s1, s2);
- printf ("\n");
-}
-
-perror_with_name (name)
- char *name;
-{
- char *s;
-
- if (errno < sys_nerr)
- s = concat ("", sys_errlist[errno], " for %s");
- else
- s = "cannot open %s";
- error (s, name);
-}
-
-pfatal_with_name (name)
- char *name;
-{
- char *s;
-
- if (errno < sys_nerr)
- s = concat ("", sys_errlist[errno], " for %s");
- else
- s = "cannot open %s";
- fatal (s, name);
-}
-
-/* Return a newly-allocated string whose contents concatenate those of s1, s2, s3. */
-
-char *
-concat (s1, s2, s3)
- char *s1, *s2, *s3;
-{
- int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
- char *result = (char *) xmalloc (len1 + len2 + len3 + 1);
-
- strcpy (result, s1);
- strcpy (result + len1, s2);
- strcpy (result + len1 + len2, s3);
- *(result + len1 + len2 + len3) = 0;
-
- return result;
-}
-
-/* Like malloc but get fatal error if memory is exhausted. */
-
-int
-xmalloc (size)
- int size;
-{
- int result = malloc (size);
- if (!result)
- fatal ("virtual memory exhausted", 0);
- return result;
-}
-
-
-int
-xrealloc (ptr, size)
- char *ptr;
- int size;
-{
- int result = realloc (ptr, size);
- if (!result)
- fatal ("virtual memory exhausted");
- return result;
-}
-
-bzero (b, length)
- register char *b;
- register int length;
-{
-#ifdef VMS
- short zero = 0;
- long max_str = 65535;
- long len;
-
- while (length > max_str)
- {
- (void) LIB$MOVC5 (&zero, &zero, &zero, &max_str, b);
- length -= max_str;
- b += max_str;
- }
- len = length;
- (void) LIB$MOVC5 (&zero, &zero, &zero, &len, b);
-#else
- while (length-- > 0)
- *b++ = 0;
-#endif /* not VMS */
-}
diff --git a/test/ChangeLog b/test/ChangeLog
new file mode 100644
index 00000000..570c031b
--- /dev/null
+++ b/test/ChangeLog
@@ -0,0 +1,3 @@
+Wed Jan 10 22:58:55 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * ChangeLog created.
diff --git a/test/Makefile b/test/Makefile
index 1ad2b2c7..1ec91c45 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -1,9 +1,10 @@
+# Generated automatically from Makefile.in by configure.
# Makefile for GNU Awk test suite.
#
# Copyright (C) 1988-1995 the Free Software Foundation, Inc.
#
# This file is part of GAWK, the GNU implementation of the
-# AWK Progamming Language.
+# AWK Programming Language.
#
# GAWK is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -16,27 +17,33 @@
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
-# along with GAWK; see the file COPYING. If not, write to
-# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
SHELL = /bin/sh
+AWK = ../gawk
+CMP = cmp
+
+srcdir = .
bigtest: basic poundbang gawk.extensions
basic: msg swaplns messages argarray longwrds \
getline fstabplus compare arrayref rs fsrs rand \
fsbs negexp asgext anchgsub splitargv awkpath nfset reparse \
- convfmt arrayparm paramdup nonl
+ convfmt arrayparm paramdup nonl defref nofmtch litoct resplit \
+ rswhite prmarscl sclforin sclifin intprec childin noeffect \
+ numsubstr pcntplus prmreuse math fflush fldchg
gawk.extensions: fieldwdth ignrcase posix manyfiles igncfs argtest \
- badargs
+ badargs strftime gensub gnureops
extra: regtest inftest
poundbang::
- cp ../gawk /tmp && chmod +x poundbang && ./poundbang poundbang >tmp
+ cp $(AWK) /tmp/gawk && $(srcdir)/poundbang $(srcdir)/poundbang >_`basename $@`
rm -f /tmp/gawk
- cmp poundbang.good tmp && rm -f tmp
+ $(CMP) $(srcdir)/poundbang.ok _`basename $@` && rm -f _`basename $@`
msg::
@echo 'Any output from "cmp" is bad news, although some differences'
@@ -45,140 +52,228 @@ msg::
@echo 'precision may lead to slightly different output in a few cases.'
swaplns::
- @../gawk -f swaplns.awk data >tmp
- cmp swaplns.good tmp && rm -f tmp
+ @$(AWK) -f $(srcdir)/swaplns.awk $(srcdir)/swaplns.in >_$@
+ $(CMP) $(srcdir)/swaplns.ok _$@ && rm -f _$@
messages::
- @../gawk -f messages.awk >out2 2>out3
- { cmp out1.good out1 && cmp out2.good out2 && cmp out3.good out3 && rm -f out1 out2 out3; } || { test -d /dev/fd && echo IT IS OK THAT THIS TEST FAILED; }
+ @$(AWK) -f $(srcdir)/messages.awk >out2 2>out3
+ { $(CMP) $(srcdir)/out1.ok out1 && $(CMP) $(srcdir)/out2.ok out2 && $(CMP) $(srcdir)/out3.ok out3 && rm -f out1 out2 out3; } || { test -d /dev/fd && echo IT IS OK THAT THIS TEST FAILED; }
argarray::
- @TEST=test echo just a test | ../gawk -f argarray.awk argarray.awk - >tmp
- cmp argarray.good tmp && rm -f tmp
+ @case $(srcdir) in \
+ .) : ;; \
+ *) cp $(srcdir)/argarray.in . ;; \
+ esac
+ @TEST=test echo just a test | $(AWK) -f $(srcdir)/argarray.awk ./argarray.in - >_$@
+ $(CMP) $(srcdir)/argarray.ok _$@ && rm -f _$@
fstabplus::
- @echo '1 2' | ../gawk -f fstabplus >tmp
- cmp fstabplus.good tmp && rm -f tmp
+ @echo '1 2' | $(AWK) -f $(srcdir)/fstabplus.awk >_$@
+ $(CMP) $(srcdir)/fstabplus.ok _$@ && rm -f _$@
fsrs::
- @../gawk -f fsrs.awk fsrs.in >tmp
- cmp fsrs.good tmp && rm -f tmp
+ @$(AWK) -f $(srcdir)/fsrs.awk $(srcdir)/fsrs.in >_$@
+ $(CMP) $(srcdir)/fsrs.ok _$@ && rm -f _$@
igncfs::
- @../gawk -f igncfs.awk igncfs.in >tmp
- cmp igncfs.good tmp && rm -f tmp
+ @$(AWK) -f $(srcdir)/igncfs.awk $(srcdir)/igncfs.in >_$@
+ $(CMP) $(srcdir)/igncfs.ok _$@ && rm -f _$@
longwrds::
- @../gawk -f longwrds.awk manpage | sort >tmp
- cmp longwrds.good tmp && rm -f tmp
+ @$(AWK) -f $(srcdir)/longwrds.awk $(srcdir)/manpage | sort >_$@
+ $(CMP) $(srcdir)/longwrds.ok _$@ && rm -f _$@
fieldwdth::
- @echo '123456789' | ../gawk -v FIELDWIDTHS="2 3 4" '{ print $$2}' >tmp
- cmp fieldwdth.good tmp && rm -f tmp
+ @echo '123456789' | $(AWK) -v FIELDWIDTHS="2 3 4" '{ print $$2}' >_$@
+ $(CMP) $(srcdir)/fieldwdth.ok _$@ && rm -f _$@
ignrcase::
- @echo xYz | ../gawk -v IGNORECASE=1 '{ sub(/y/, ""); print}' >tmp
- cmp ignrcase.good tmp && rm -f tmp
+ @echo xYz | $(AWK) -v IGNORECASE=1 '{ sub(/y/, ""); print}' >_$@
+ $(CMP) $(srcdir)/ignrcase.ok _$@ && rm -f _$@
regtest::
@echo 'Some of the output from regtest is very system specific, do not'
@echo 'be distressed if your output differs from that distributed.'
@echo 'Manual inspection is called for.'
- AWK=`pwd`/../gawk ./regtest
+ AWK=`pwd`/$(AWK) $(srcdir)/regtest
posix::
- @echo '1:2,3 4' | ../gawk -f posix >tmp
- cmp posix.good tmp && rm -f tmp
+ @echo '1:2,3 4' | $(AWK) -f $(srcdir)/posix.awk >_$@
+ $(CMP) $(srcdir)/posix.ok _$@ && rm -f _$@
manyfiles::
@rm -rf junk
@mkdir junk
- @../gawk 'BEGIN { for (i = 1; i <= 300; i++) print i, i}' >tmp
- @../gawk -f manyfiles.awk tmp tmp
+ @$(AWK) 'BEGIN { for (i = 1; i <= 300; i++) print i, i}' >_$@
+ @$(AWK) -f $(srcdir)/manyfiles.awk _$@ _$@
@echo "This number better be 1 ->" | tr -d '\012'
- @wc -l junk/* | ../gawk '$$1 != 2' | wc -l
- @rm -rf junk tmp
+ @wc -l junk/* | $(AWK) '$$1 != 2' | wc -l
+ @rm -rf junk _$@
compare::
- @../gawk -f compare.awk 0 1 compare.in >tmp
- cmp compare.good tmp && rm -f tmp
+ @$(AWK) -f $(srcdir)/compare.awk 0 1 $(srcdir)/compare.in >_$@
+ $(CMP) $(srcdir)/compare.ok _$@ && rm -f _$@
arrayref::
- @../gawk -f arrayref >tmp
- cmp arrayref.good tmp && rm -f tmp
+ @$(AWK) -f $(srcdir)/arrayref.awk >_$@
+ $(CMP) $(srcdir)/arrayref.ok _$@ && rm -f _$@
rs::
- @../gawk -v RS="" '{ print $$1, $$2}' rs.data >tmp
- cmp rs.good tmp && rm -f tmp
+ @$(AWK) -v RS="" '{ print $$1, $$2}' $(srcdir)/rs.in >_$@
+ $(CMP) $(srcdir)/rs.ok _$@ && rm -f _$@
fsbs::
- @../gawk -v FS='\' '{ print $$1, $$2 }' fsbs.in >tmp
- cmp fsbs.good tmp && rm -f tmp
+ @$(AWK) -v FS='\' '{ print $$1, $$2 }' $(srcdir)/fsbs.in >_$@
+ $(CMP) $(srcdir)/fsbs.ok _$@ && rm -f _$@
inftest::
@echo This test is very machine specific...
- @../gawk -f inftest.awk >tmp
- cmp inftest.good tmp && rm -f tmp
+ @$(AWK) -f $(srcdir)/inftest.awk >_$@
+ $(CMP) $(srcdir)/inftest.ok _$@ && rm -f _$@
getline::
- @../gawk -f getline.awk getline.awk getline.awk >tmp
- cmp getline.good tmp && rm -f tmp
+ @$(AWK) -f $(srcdir)/getline.awk $(srcdir)/getline.awk $(srcdir)/getline.awk >_$@
+ $(CMP) $(srcdir)/getline.ok _$@ && rm -f _$@
rand::
@echo The following line should just be 19 random numbers between 1 and 100
- @../gawk -f rand.awk
+ @$(AWK) -f $(srcdir)/rand.awk
negexp::
- @../gawk 'BEGIN { a = -2; print 10^a }' >tmp
- cmp negexp.good tmp && rm -f tmp
+ @$(AWK) 'BEGIN { a = -2; print 10^a }' >_$@
+ $(CMP) $(srcdir)/negexp.ok _$@ && rm -f _$@
asgext::
- @../gawk -f asgext.awk asgext.in >tmp
- cmp asgext.good tmp && rm -f tmp
+ @$(AWK) -f $(srcdir)/asgext.awk $(srcdir)/asgext.in >_$@
+ $(CMP) $(srcdir)/asgext.ok _$@ && rm -f _$@
anchgsub::
- @../gawk -f anchgsub.awk anchgsub.in >tmp
- cmp anchgsub.good tmp && rm -f tmp
+ @$(AWK) -f $(srcdir)/anchgsub.awk $(srcdir)/anchgsub.in >_$@
+ $(CMP) $(srcdir)/anchgsub.ok _$@ && rm -f _$@
splitargv::
- @../gawk -f splitargv.awk splitargv.in >tmp
- cmp splitargv.good tmp && rm -f tmp
+ @$(AWK) -f $(srcdir)/splitargv.awk $(srcdir)/splitargv.in >_$@
+ $(CMP) $(srcdir)/splitargv.ok _$@ && rm -f _$@
awkpath::
- @AWKPATH=".:lib" ../gawk -f awkpath.awk >tmp
- cmp awkpath.good tmp && rm -f tmp
+ @AWKPATH="$(srcdir):$(srcdir)/lib" $(AWK) -f awkpath.awk >_$@
+ $(CMP) $(srcdir)/awkpath.ok _$@ && rm -f _$@
nfset::
- @../gawk -f nfset.awk nfset.in >tmp
- cmp nfset.good tmp && rm -f tmp
+ @$(AWK) -f $(srcdir)/nfset.awk $(srcdir)/nfset.in >_$@
+ $(CMP) $(srcdir)/nfset.ok _$@ && rm -f _$@
reparse::
- @../gawk -f reparse.awk reparse.in >tmp
- cmp reparse.good tmp && rm -f tmp
+ @$(AWK) -f $(srcdir)/reparse.awk $(srcdir)/reparse.in >_$@
+ $(CMP) $(srcdir)/reparse.ok _$@ && rm -f _$@
argtest::
- @../gawk -f argtest.awk -x -y abc >tmp
- cmp argtest.good tmp && rm -f tmp
+ @$(AWK) -f $(srcdir)/argtest.awk -x -y abc >_$@
+ $(CMP) $(srcdir)/argtest.ok _$@ && rm -f _$@
badargs::
- @-../gawk -f 2>&1 | grep -v patchlevel >tmp
- cmp badargs.good tmp && rm -f tmp
+ @-$(AWK) -f 2>&1 | grep -v patchlevel >_$@
+ $(CMP) $(srcdir)/badargs.ok _$@ && rm -f _$@
convfmt::
- @../gawk -f convfmt.awk >tmp
- cmp convfmt.good tmp && rm -f tmp
+ @$(AWK) -f $(srcdir)/convfmt.awk >_$@
+ $(CMP) $(srcdir)/convfmt.ok _$@ && rm -f _$@
arrayparm::
- @-../gawk -f arrayparm.awk >tmp 2>&1
- cmp arrayparm.good tmp && rm -f tmp
+ @-AWKPATH=$(srcdir) $(AWK) -f arrayparm.awk >_$@ 2>&1
+ $(CMP) $(srcdir)/arrayparm.ok _$@ && rm -f _$@
paramdup::
- @-../gawk -f paramdup.awk >tmp 2>&1
- cmp paramdup.good tmp && rm -f tmp
+ @-AWKPATH=$(srcdir) $(AWK) -f paramdup.awk >_$@ 2>&1
+ $(CMP) $(srcdir)/paramdup.ok _$@ && rm -f _$@
nonl::
- @-../gawk --lint -f nonl.awk /dev/null >tmp 2>&1
- cmp nonl.good tmp && rm -f tmp
+ @-AWKPATH=$(srcdir) $(AWK) --lint -f nonl.awk /dev/null >_$@ 2>&1
+ $(CMP) $(srcdir)/nonl.ok _$@ && rm -f _$@
+
+defref::
+ @-AWKPATH=$(srcdir) $(AWK) --lint -f defref.awk >_$@ 2>&1
+ $(CMP) $(srcdir)/defref.ok _$@ && rm -f _$@
+
+nofmtch::
+ @-AWKPATH=$(srcdir) $(AWK) --lint -f nofmtch.awk >_$@ 2>&1
+ $(CMP) $(srcdir)/nofmtch.ok _$@ && rm -f _$@
+
+strftime::
+ : this test could fail on slow machines or on a second boundary,
+ : so if it does, double check the actual results
+ @date | $(AWK) '{ $$3 = sprintf("%02d", $$3 + 0) ; \
+ print > "strftime.ok" ; \
+ print strftime() > "'_$@'" }'
+ -$(CMP) strftime.ok _$@ && rm -f _$@ strftime.ok
+
+litoct::
+ @echo ab | $(AWK) --traditional -f $(srcdir)/litoct.awk >_$@
+ $(CMP) $(srcdir)/litoct.ok _$@ && rm -f _$@
+
+gensub::
+ @$(AWK) -f $(srcdir)/gensub.awk $(srcdir)/gensub.in >_$@
+ $(CMP) $(srcdir)/gensub.ok _$@ && rm -f _$@
+
+resplit::
+ @echo a:b:c d:e:f | $(AWK) '{ FS = ":"; $$0 = $$0; print $$2 }' > _$@
+ $(CMP) $(srcdir)/resplit.ok _$@ && rm -f _$@
+
+rswhite::
+ @$(AWK) -f $(srcdir)/rswhite.awk $(srcdir)/rswhite.in > _$@
+ $(CMP) $(srcdir)/rswhite.ok _$@ && rm -f _$@
+
+prmarscl::
+ @-AWKPATH=$(srcdir) $(AWK) -f prmarscl.awk > _$@ 2>&1
+ $(CMP) $(srcdir)/prmarscl.ok _$@ && rm -f _$@
+
+sclforin::
+ @-AWKPATH=$(srcdir) $(AWK) -f sclforin.awk > _$@ 2>&1
+ $(CMP) $(srcdir)/sclforin.ok _$@ && rm -f _$@
+
+sclifin::
+ @-AWKPATH=$(srcdir) $(AWK) -f sclifin.awk > _$@ 2>&1
+ $(CMP) $(srcdir)/sclifin.ok _$@ && rm -f _$@
+
+intprec::
+ @-$(AWK) -f $(srcdir)/intprec.awk > _$@ 2>&1
+ $(CMP) $(srcdir)/intprec.ok _$@ && rm -f _$@
+
+childin::
+ @echo hi | $(AWK) 'BEGIN { "cat" | getline; print; close("cat") }' > _$@
+ $(CMP) $(srcdir)/childin.ok _$@ && rm -f _$@
+
+noeffect::
+ @-AWKPATH=$(srcdir) $(AWK) --lint -f noeffect.awk > _$@ 2>&1
+ $(CMP) $(srcdir)/noeffect.ok _$@ && rm -f _$@
+
+numsubstr::
+ @-AWKPATH=$(srcdir) $(AWK) -f numsubstr.awk $(srcdir)/numsubstr.in >_$@
+ $(CMP) $(srcdir)/numsubstr.ok _$@ && rm -f _$@
+
+gnureops::
+ @$(AWK) -f $(srcdir)/gnureops.awk >_$@
+ $(CMP) $(srcdir)/gnureops.ok _$@ && rm -f _$@
+
+pcntplus::
+ @$(AWK) -f $(srcdir)/pcntplus.awk >_$@
+ $(CMP) $(srcdir)/pcntplus.ok _$@ && rm -f _$@
+
+prmreuse::
+ @$(AWK) -f $(srcdir)/prmreuse.awk >_$@
+ $(CMP) $(srcdir)/prmreuse.ok _$@ && rm -f _$@
+
+math::
+ @$(AWK) -f $(srcdir)/math.awk >_$@
+ $(CMP) $(srcdir)/math.ok _$@ && rm -f _$@
+
+fflush::
+ @$(srcdir)/fflush.sh >_$@
+ $(CMP) $(srcdir)/fflush.ok _$@ && rm -f _$@
+
+fldchg::
+ @$(AWK) -f $(srcdir)/fldchg.awk $(srcdir)/fldchg.in >_$@
+ $(CMP) $(srcdir)/fldchg.ok _$@ && rm -f _$@
clean:
- rm -fr tmp core junk
+ rm -fr _* core junk
diff --git a/test/Makefile.in b/test/Makefile.in
new file mode 100644
index 00000000..52036d75
--- /dev/null
+++ b/test/Makefile.in
@@ -0,0 +1,279 @@
+# Makefile for GNU Awk test suite.
+#
+# Copyright (C) 1988-1995 the Free Software Foundation, Inc.
+#
+# This file is part of GAWK, the GNU implementation of the
+# AWK Programming Language.
+#
+# GAWK is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GAWK is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+
+SHELL = /bin/sh
+AWK = ../gawk
+CMP = cmp
+
+srcdir = @srcdir@
+VPATH = @srcdir@
+
+bigtest: basic poundbang gawk.extensions
+
+basic: msg swaplns messages argarray longwrds \
+ getline fstabplus compare arrayref rs fsrs rand \
+ fsbs negexp asgext anchgsub splitargv awkpath nfset reparse \
+ convfmt arrayparm paramdup nonl defref nofmtch litoct resplit \
+ rswhite prmarscl sclforin sclifin intprec childin noeffect \
+ numsubstr pcntplus prmreuse math fflush fldchg
+
+gawk.extensions: fieldwdth ignrcase posix manyfiles igncfs argtest \
+ badargs strftime gensub gnureops
+
+extra: regtest inftest
+
+poundbang::
+ cp $(AWK) /tmp/gawk && $(srcdir)/poundbang $(srcdir)/poundbang >_`basename $@`
+ rm -f /tmp/gawk
+ $(CMP) $(srcdir)/poundbang.ok _`basename $@` && rm -f _`basename $@`
+
+msg::
+ @echo 'Any output from "cmp" is bad news, although some differences'
+ @echo 'in floating point values are probably benign -- in particular,'
+ @echo 'some systems may omit a leading zero and the floating point'
+ @echo 'precision may lead to slightly different output in a few cases.'
+
+swaplns::
+ @$(AWK) -f $(srcdir)/swaplns.awk $(srcdir)/swaplns.in >_$@
+ $(CMP) $(srcdir)/swaplns.ok _$@ && rm -f _$@
+
+messages::
+ @$(AWK) -f $(srcdir)/messages.awk >out2 2>out3
+ { $(CMP) $(srcdir)/out1.ok out1 && $(CMP) $(srcdir)/out2.ok out2 && $(CMP) $(srcdir)/out3.ok out3 && rm -f out1 out2 out3; } || { test -d /dev/fd && echo IT IS OK THAT THIS TEST FAILED; }
+
+argarray::
+ @case $(srcdir) in \
+ .) : ;; \
+ *) cp $(srcdir)/argarray.in . ;; \
+ esac
+ @TEST=test echo just a test | $(AWK) -f $(srcdir)/argarray.awk ./argarray.in - >_$@
+ $(CMP) $(srcdir)/argarray.ok _$@ && rm -f _$@
+
+fstabplus::
+ @echo '1 2' | $(AWK) -f $(srcdir)/fstabplus.awk >_$@
+ $(CMP) $(srcdir)/fstabplus.ok _$@ && rm -f _$@
+
+fsrs::
+ @$(AWK) -f $(srcdir)/fsrs.awk $(srcdir)/fsrs.in >_$@
+ $(CMP) $(srcdir)/fsrs.ok _$@ && rm -f _$@
+
+igncfs::
+ @$(AWK) -f $(srcdir)/igncfs.awk $(srcdir)/igncfs.in >_$@
+ $(CMP) $(srcdir)/igncfs.ok _$@ && rm -f _$@
+
+longwrds::
+ @$(AWK) -f $(srcdir)/longwrds.awk $(srcdir)/manpage | sort >_$@
+ $(CMP) $(srcdir)/longwrds.ok _$@ && rm -f _$@
+
+fieldwdth::
+ @echo '123456789' | $(AWK) -v FIELDWIDTHS="2 3 4" '{ print $$2}' >_$@
+ $(CMP) $(srcdir)/fieldwdth.ok _$@ && rm -f _$@
+
+ignrcase::
+ @echo xYz | $(AWK) -v IGNORECASE=1 '{ sub(/y/, ""); print}' >_$@
+ $(CMP) $(srcdir)/ignrcase.ok _$@ && rm -f _$@
+
+regtest::
+ @echo 'Some of the output from regtest is very system specific, do not'
+ @echo 'be distressed if your output differs from that distributed.'
+ @echo 'Manual inspection is called for.'
+ AWK=`pwd`/$(AWK) $(srcdir)/regtest
+
+posix::
+ @echo '1:2,3 4' | $(AWK) -f $(srcdir)/posix.awk >_$@
+ $(CMP) $(srcdir)/posix.ok _$@ && rm -f _$@
+
+manyfiles::
+ @rm -rf junk
+ @mkdir junk
+ @$(AWK) 'BEGIN { for (i = 1; i <= 300; i++) print i, i}' >_$@
+ @$(AWK) -f $(srcdir)/manyfiles.awk _$@ _$@
+ @echo "This number better be 1 ->" | tr -d '\012'
+ @wc -l junk/* | $(AWK) '$$1 != 2' | wc -l
+ @rm -rf junk _$@
+
+compare::
+ @$(AWK) -f $(srcdir)/compare.awk 0 1 $(srcdir)/compare.in >_$@
+ $(CMP) $(srcdir)/compare.ok _$@ && rm -f _$@
+
+arrayref::
+ @$(AWK) -f $(srcdir)/arrayref.awk >_$@
+ $(CMP) $(srcdir)/arrayref.ok _$@ && rm -f _$@
+
+rs::
+ @$(AWK) -v RS="" '{ print $$1, $$2}' $(srcdir)/rs.in >_$@
+ $(CMP) $(srcdir)/rs.ok _$@ && rm -f _$@
+
+fsbs::
+ @$(AWK) -v FS='\' '{ print $$1, $$2 }' $(srcdir)/fsbs.in >_$@
+ $(CMP) $(srcdir)/fsbs.ok _$@ && rm -f _$@
+
+inftest::
+ @echo This test is very machine specific...
+ @$(AWK) -f $(srcdir)/inftest.awk >_$@
+ $(CMP) $(srcdir)/inftest.ok _$@ && rm -f _$@
+
+getline::
+ @$(AWK) -f $(srcdir)/getline.awk $(srcdir)/getline.awk $(srcdir)/getline.awk >_$@
+ $(CMP) $(srcdir)/getline.ok _$@ && rm -f _$@
+
+rand::
+ @echo The following line should just be 19 random numbers between 1 and 100
+ @$(AWK) -f $(srcdir)/rand.awk
+
+negexp::
+ @$(AWK) 'BEGIN { a = -2; print 10^a }' >_$@
+ $(CMP) $(srcdir)/negexp.ok _$@ && rm -f _$@
+
+asgext::
+ @$(AWK) -f $(srcdir)/asgext.awk $(srcdir)/asgext.in >_$@
+ $(CMP) $(srcdir)/asgext.ok _$@ && rm -f _$@
+
+anchgsub::
+ @$(AWK) -f $(srcdir)/anchgsub.awk $(srcdir)/anchgsub.in >_$@
+ $(CMP) $(srcdir)/anchgsub.ok _$@ && rm -f _$@
+
+splitargv::
+ @$(AWK) -f $(srcdir)/splitargv.awk $(srcdir)/splitargv.in >_$@
+ $(CMP) $(srcdir)/splitargv.ok _$@ && rm -f _$@
+
+awkpath::
+ @AWKPATH="$(srcdir):$(srcdir)/lib" $(AWK) -f awkpath.awk >_$@
+ $(CMP) $(srcdir)/awkpath.ok _$@ && rm -f _$@
+
+nfset::
+ @$(AWK) -f $(srcdir)/nfset.awk $(srcdir)/nfset.in >_$@
+ $(CMP) $(srcdir)/nfset.ok _$@ && rm -f _$@
+
+reparse::
+ @$(AWK) -f $(srcdir)/reparse.awk $(srcdir)/reparse.in >_$@
+ $(CMP) $(srcdir)/reparse.ok _$@ && rm -f _$@
+
+argtest::
+ @$(AWK) -f $(srcdir)/argtest.awk -x -y abc >_$@
+ $(CMP) $(srcdir)/argtest.ok _$@ && rm -f _$@
+
+badargs::
+ @-$(AWK) -f 2>&1 | grep -v patchlevel >_$@
+ $(CMP) $(srcdir)/badargs.ok _$@ && rm -f _$@
+
+convfmt::
+ @$(AWK) -f $(srcdir)/convfmt.awk >_$@
+ $(CMP) $(srcdir)/convfmt.ok _$@ && rm -f _$@
+
+arrayparm::
+ @-AWKPATH=$(srcdir) $(AWK) -f arrayparm.awk >_$@ 2>&1
+ $(CMP) $(srcdir)/arrayparm.ok _$@ && rm -f _$@
+
+paramdup::
+ @-AWKPATH=$(srcdir) $(AWK) -f paramdup.awk >_$@ 2>&1
+ $(CMP) $(srcdir)/paramdup.ok _$@ && rm -f _$@
+
+nonl::
+ @-AWKPATH=$(srcdir) $(AWK) --lint -f nonl.awk /dev/null >_$@ 2>&1
+ $(CMP) $(srcdir)/nonl.ok _$@ && rm -f _$@
+
+defref::
+ @-AWKPATH=$(srcdir) $(AWK) --lint -f defref.awk >_$@ 2>&1
+ $(CMP) $(srcdir)/defref.ok _$@ && rm -f _$@
+
+nofmtch::
+ @-AWKPATH=$(srcdir) $(AWK) --lint -f nofmtch.awk >_$@ 2>&1
+ $(CMP) $(srcdir)/nofmtch.ok _$@ && rm -f _$@
+
+strftime::
+ : this test could fail on slow machines or on a second boundary,
+ : so if it does, double check the actual results
+ @date | $(AWK) '{ $$3 = sprintf("%02d", $$3 + 0) ; \
+ print > "strftime.ok" ; \
+ print strftime() > "'_$@'" }'
+ -$(CMP) strftime.ok _$@ && rm -f _$@ strftime.ok
+
+litoct::
+ @echo ab | $(AWK) --traditional -f $(srcdir)/litoct.awk >_$@
+ $(CMP) $(srcdir)/litoct.ok _$@ && rm -f _$@
+
+gensub::
+ @$(AWK) -f $(srcdir)/gensub.awk $(srcdir)/gensub.in >_$@
+ $(CMP) $(srcdir)/gensub.ok _$@ && rm -f _$@
+
+resplit::
+ @echo a:b:c d:e:f | $(AWK) '{ FS = ":"; $$0 = $$0; print $$2 }' > _$@
+ $(CMP) $(srcdir)/resplit.ok _$@ && rm -f _$@
+
+rswhite::
+ @$(AWK) -f $(srcdir)/rswhite.awk $(srcdir)/rswhite.in > _$@
+ $(CMP) $(srcdir)/rswhite.ok _$@ && rm -f _$@
+
+prmarscl::
+ @-AWKPATH=$(srcdir) $(AWK) -f prmarscl.awk > _$@ 2>&1
+ $(CMP) $(srcdir)/prmarscl.ok _$@ && rm -f _$@
+
+sclforin::
+ @-AWKPATH=$(srcdir) $(AWK) -f sclforin.awk > _$@ 2>&1
+ $(CMP) $(srcdir)/sclforin.ok _$@ && rm -f _$@
+
+sclifin::
+ @-AWKPATH=$(srcdir) $(AWK) -f sclifin.awk > _$@ 2>&1
+ $(CMP) $(srcdir)/sclifin.ok _$@ && rm -f _$@
+
+intprec::
+ @-$(AWK) -f $(srcdir)/intprec.awk > _$@ 2>&1
+ $(CMP) $(srcdir)/intprec.ok _$@ && rm -f _$@
+
+childin::
+ @echo hi | $(AWK) 'BEGIN { "cat" | getline; print; close("cat") }' > _$@
+ $(CMP) $(srcdir)/childin.ok _$@ && rm -f _$@
+
+noeffect::
+ @-AWKPATH=$(srcdir) $(AWK) --lint -f noeffect.awk > _$@ 2>&1
+ $(CMP) $(srcdir)/noeffect.ok _$@ && rm -f _$@
+
+numsubstr::
+ @-AWKPATH=$(srcdir) $(AWK) -f numsubstr.awk $(srcdir)/numsubstr.in >_$@
+ $(CMP) $(srcdir)/numsubstr.ok _$@ && rm -f _$@
+
+gnureops::
+ @$(AWK) -f $(srcdir)/gnureops.awk >_$@
+ $(CMP) $(srcdir)/gnureops.ok _$@ && rm -f _$@
+
+pcntplus::
+ @$(AWK) -f $(srcdir)/pcntplus.awk >_$@
+ $(CMP) $(srcdir)/pcntplus.ok _$@ && rm -f _$@
+
+prmreuse::
+ @$(AWK) -f $(srcdir)/prmreuse.awk >_$@
+ $(CMP) $(srcdir)/prmreuse.ok _$@ && rm -f _$@
+
+math::
+ @$(AWK) -f $(srcdir)/math.awk >_$@
+ $(CMP) $(srcdir)/math.ok _$@ && rm -f _$@
+
+fflush::
+ @$(srcdir)/fflush.sh >_$@
+ $(CMP) $(srcdir)/fflush.ok _$@ && rm -f _$@
+
+fldchg::
+ @$(AWK) -f $(srcdir)/fldchg.awk $(srcdir)/fldchg.in >_$@
+ $(CMP) $(srcdir)/fldchg.ok _$@ && rm -f _$@
+
+clean:
+ rm -fr _* core junk
diff --git a/test/README b/test/README
new file mode 100644
index 00000000..725d7d98
--- /dev/null
+++ b/test/README
@@ -0,0 +1,15 @@
+Wed Aug 16 12:22:45 PDT 1995
+
+This directory contains the tests for gawk. The tests use the
+following conventions.
+
+Given some aspect of gawk named `foo', there will be one or more
+of the following files:
+
+foo.awk --- actual code for the test if not inline in the Makefile
+foo.in --- the data for the test, if it needs data
+foo.ok --- the expected results
+_foo --- the actual results; generated at run time
+
+The _foo file will be left around if a test fails, allowing you to
+compare actual and expected results, in case they differ.
diff --git a/test/anchgsub.good b/test/anchgsub.ok
index c33dfb95..c33dfb95 100644
--- a/test/anchgsub.good
+++ b/test/anchgsub.ok
diff --git a/test/argarray.in b/test/argarray.in
new file mode 100644
index 00000000..bc93338c
--- /dev/null
+++ b/test/argarray.in
@@ -0,0 +1 @@
+this is a simple test file
diff --git a/test/argarray.good b/test/argarray.ok
index 3bdcca63..18eb841c 100644
--- a/test/argarray.good
+++ b/test/argarray.ok
@@ -1,9 +1,9 @@
here we have 3 arguments
which are
gawk
- argarray.awk
+ ./argarray.in
-
Environment variable TEST=
and the current input file is called ""
-in main loop, this input file is known as "argarray.awk"
+in main loop, this input file is known as "./argarray.in"
in main loop, this input file is known as "-"
diff --git a/test/argtest.good b/test/argtest.ok
index 591bc641..591bc641 100644
--- a/test/argtest.good
+++ b/test/argtest.ok
diff --git a/test/arrayparm.good b/test/arrayparm.ok
index b315f7cf..b315f7cf 100644
--- a/test/arrayparm.good
+++ b/test/arrayparm.ok
diff --git a/test/arrayref b/test/arrayref.awk
index 144d41a0..144d41a0 100644
--- a/test/arrayref
+++ b/test/arrayref.awk
diff --git a/test/arrayref.good b/test/arrayref.ok
index 6ed281c7..6ed281c7 100644
--- a/test/arrayref.good
+++ b/test/arrayref.ok
diff --git a/test/asgext.good b/test/asgext.ok
index 2c0df70f..2c0df70f 100644
--- a/test/asgext.good
+++ b/test/asgext.ok
diff --git a/test/awkpath.good b/test/awkpath.ok
index 6cffe1b7..6cffe1b7 100644
--- a/test/awkpath.good
+++ b/test/awkpath.ok
diff --git a/test/badargs.good b/test/badargs.ok
index 76d8e860..a4652933 100644
--- a/test/badargs.good
+++ b/test/badargs.ok
@@ -1,5 +1,5 @@
gawk: option requires an argument -- f
-Usage: gawk [POSIX or GNU style options] -f progfile [--] file ...
+Usage: gawk [POSIX or GNU style options] -f progfile [--] file ...
gawk [POSIX or GNU style options] [--] 'program' file ...
POSIX options: GNU long options:
-f progfile --file=progfile
@@ -11,7 +11,10 @@ POSIX options: GNU long options:
-W copyright --copyright
-W help --help
-W lint --lint
+ -W lint-old --lint-old
-W posix --posix
+ -W re-interval --re-interval
-W source=program-text --source=program-text
+ -W traditional --traditional
-W usage --usage
-W version --version
diff --git a/test/childin.ok b/test/childin.ok
new file mode 100644
index 00000000..45b983be
--- /dev/null
+++ b/test/childin.ok
@@ -0,0 +1 @@
+hi
diff --git a/test/compare.good b/test/compare.ok
index 8241359b..8241359b 100644
--- a/test/compare.good
+++ b/test/compare.ok
diff --git a/test/convfmt.good b/test/convfmt.ok
index a7b66f78..a7b66f78 100644
--- a/test/convfmt.good
+++ b/test/convfmt.ok
diff --git a/test/csi1.out b/test/csi1.out
deleted file mode 100644
index f93c2cc2..00000000
--- a/test/csi1.out
+++ /dev/null
@@ -1,574 +0,0 @@
-Title: Ideal Charge Sensitive Amp
-Date: today
-Plotname: Transient analysis.
-Flags: real
-No. Variables: 7
-No. Points: 70
-Variables: 0 time time
- 1 v(1) voltage
- 2 v(3) voltage
- 3 v(5) voltage
- 4 v(9) voltage
- 5 v(11) voltage
- 6 v(13) voltage
-Values:
- 0 0.000000000000000e+00
- 0.000000000000000e+00
- 0.000000000000000e+00
- 0.000000000000000e+00
- 0.000000000000000e+00
- 0.000000000000000e+00
- 0.000000000000000e+00
-
- 1 1.000000000000000e-09
- -1.264149466030735e-09
- 1.264149466030735e-04
- 2.526984953580682e-04
- 2.521735549927725e-16
- 5.033500623385340e-16
- 1.004709971525236e-15
-
- 2 1.180906969374945e-09
- -1.514801380340722e-09
- 1.514801380340722e-04
- 3.028004880113196e-04
- 3.078631347571166e-16
- 6.145090159683228e-16
- 1.226588337655132e-15
-
- 3 1.542720908124834e-09
- -2.049689597483709e-09
- 2.049689597483709e-04
- 4.097115469383853e-04
- 4.437763676567463e-16
- 8.857978571714022e-16
- 1.768092896751413e-15
-
- 4 2.266348785624612e-09
- -3.265760932995932e-09
- 3.265760932995932e-04
- 6.527287612098135e-04
- 9.397999789660777e-16
- 1.875866006928358e-15
- 3.744278902267733e-15
-
- 5 3.713604540624168e-09
- -5.968710391500898e-09
- 5.968710391500899e-04
- 1.192640228419305e-03
- 4.345207513494314e-15
- 8.671579322257517e-15
- 1.730556654319970e-14
-
- 6 6.608116050623280e-09
- -1.165754959289845e-08
- 1.165754959289845e-03
- 2.327838222854165e-03
- 5.642967340561880e-14
- 1.124945877057287e-13
- 2.242619207939039e-13
-
- 7 1.179564303717826e-08
- -2.193530367475176e-08
- 2.193530367475176e-03
- 4.374658167824437e-03
- 9.992331799374361e-13
- 1.987384302704409e-12
- 3.952723001792399e-12
-
- 8 2.217069701028822e-08
- -4.233554991551749e-08
- 4.233554991551749e-03
- 8.421454467083355e-03
- 2.567075209989195e-11
- 5.079257750506960e-11
- 1.004987167234087e-10
-
- 9 4.292080495650814e-08
- -8.246787850423504e-08
- 8.246787850423504e-03
- 1.631955711542618e-02
- 7.429352627112179e-10
- 1.454666979009004e-09
- 2.848205166007601e-09
-
- 10 6.747377244914711e-08
- -1.288241599365203e-07
- 1.288241599365203e-02
- 2.533540213098864e-02
- 6.016036906368960e-09
- 1.168438224023758e-08
- 2.269242430687209e-08
-
- 11 1.027485608531171e-07
- -1.933367451151489e-07
- 1.933367451151489e-02
- 3.768347326653115e-02
- 4.547604757457383e-08
- 8.718365928170803e-08
- 1.671235294549662e-07
-
- 12 1.550811172440954e-07
- -2.846833995746566e-07
- 2.846833995746566e-02
- 5.474832541420137e-02
- 3.375250800116866e-07
- 6.342555725437884e-07
- 1.191537432922743e-06
-
- 13 2.124749343141491e-07
- -3.791439105081753e-07
- 3.791439105081753e-02
- 7.183486021773615e-02
- 1.485074359346451e-06
- 2.739473718610748e-06
- 5.050651248135159e-06
-
- 14 2.934050742073026e-07
- -5.028277031417205e-07
- 5.028277031417205e-02
- 9.325941790944214e-02
- 6.951921938590966e-06
- 1.246646151568946e-05
- 2.233163733376972e-05
-
- 15 3.899183279910254e-07
- -6.368485744692849e-07
- 6.368485744692850e-02
- 1.150948619702300e-01
- 2.649747455292758e-05
- 4.602250953893021e-05
- 7.978045912595374e-05
-
- 16 5.093968731264188e-07
- -7.843833530131612e-07
- 7.843833530131612e-02
- 1.371850504655500e-01
- 9.207624151396878e-05
- 1.537740811662592e-04
- 2.559528885476014e-04
-
- 17 6.489752711203596e-07
- -9.338518261925084e-07
- 9.338518261925084e-02
- 1.570273197910303e-01
- 2.792909973592655e-04
- 4.461073848024906e-04
- 7.086148431639843e-04
-
- 18 8.223850555731352e-07
- -1.089864299179190e-06
- 1.089864299179190e-01
- 1.742648556363447e-01
- 8.126979446078827e-04
- 1.228244125338940e-03
- 1.839630830557003e-03
-
- 19 1.039936620365588e-06
- -1.246638298441507e-06
- 1.246638298441507e-01
- 1.867092058644591e-01
- 2.285828604654097e-03
- 3.224697163278019e-03
- 4.483765996458879e-03
-
- 20 1.319715872115905e-06
- -1.397349357516515e-06
- 1.397349357516515e-01
- 1.916979805788835e-01
- 6.317689492712388e-03
- 8.164324401851115e-03
- 1.030606900115141e-02
-
- 21 1.686517899697324e-06
- -1.529891860605749e-06
- 1.529891860605749e-01
- 1.859565532106186e-01
- 1.718247093837089e-02
- 1.982736574760900e-02
- 2.201037442321020e-02
-
- 22 2.186647083767628e-06
- -1.629428398181613e-06
- 1.629428398181613e-01
- 1.662034028698354e-01
- 4.642596629125201e-02
- 4.602451354215933e-02
- 4.268299343823448e-02
-
- 23 2.786647083767628e-06
- -1.671982381159767e-06
- 1.671982381159767e-01
- 1.360832067083017e-01
- 1.086486529677349e-01
- 9.028520856351136e-02
- 6.680053659714552e-02
-
- 24 3.386647083767628e-06
- -1.666708289319382e-06
- 1.666708289319382e-01
- 1.065961137297528e-01
- 2.019933895320813e-01
- 1.409501492585539e-01
- 8.111767734595639e-02
-
- 25 3.986647083767628e-06
- -1.636239392733020e-06
- 1.636239392733020e-01
- 8.119800486443494e-02
- 3.221985296170131e-01
- 1.879090396235947e-01
- 8.016274379356519e-02
-
- 26 4.586647083767628e-06
- -1.592958303223056e-06
- 1.592958303223056e-01
- 6.070506604262340e-02
- 4.601454482849191e-01
- 2.224246188905672e-01
- 6.424612709287120e-02
-
- 27 5.186647083767628e-06
- -1.543623379945047e-06
- 1.543623379945047e-01
- 4.479678708109880e-02
- 6.043239691667113e-01
- 2.392201482511665e-01
- 3.780590619181199e-02
-
- 28 5.786647083767628e-06
- -1.491907702045976e-06
- 1.491907702045976e-01
- 3.275072893014933e-02
- 7.431846280885604e-01
- 2.368781117682013e-01
- 6.852359091660139e-03
-
- 29 6.386647083767628e-06
- -1.439792435778303e-06
- 1.439792435778303e-01
- 2.378169349328440e-02
- 8.668590173262367e-01
- 2.171478488724319e-01
- -2.306848179644787e-02
-
- 30 6.986647083767627e-06
- -1.388331481824472e-06
- 1.388331481824472e-01
- 1.718228907053183e-02
- 9.681191419274604e-01
- 1.838077897422596e-01
- -4.796365600793339e-02
-
- 31 7.586647083767627e-06
- -1.338071075679957e-06
- 1.338071075679957e-01
- 1.236766007539650e-02
- 1.042671016972224e+00
- 1.415321327946899e-01
- -6.566996968152291e-02
-
- 32 8.186647083767628e-06
- -1.289280023988654e-06
- 1.289280023988654e-01
- 8.876981250123693e-03
- 1.088969089643156e+00
- 9.499362660424981e-02
- -7.562842047016014e-02
-
- 33 8.786647083767628e-06
- -1.242076017350638e-06
- 1.242076017350638e-01
- 6.357881031810623e-03
- 1.107746658963011e+00
- 4.827333239307662e-02
- -7.843915471504923e-02
-
- 34 9.386647083767627e-06
- -1.196494910292573e-06
- 1.196494910292573e-01
- 4.546231107047477e-03
- 1.101424879765328e+00
- 4.554708620525528e-03
- -7.538805789638806e-02
-
- 35 9.986647083767627e-06
- -1.152528702498193e-06
- 1.152528702498193e-01
- 3.246760457659020e-03
- 1.073516666992439e+00
- -3.396212210108883e-02
- -6.804675773661231e-02
-
- 36 1.058664708376763e-05
- -1.110146344414089e-06
- 1.110146344414089e-01
- 2.316517021036618e-03
- 1.028097189336683e+00
- -6.599874853245306e-02
- -5.798336364268095e-02
-
- 37 1.118664708376763e-05
- -1.069305118106022e-06
- 1.069305118106022e-01
- 1.651595638606269e-03
- 9.693765313679967e-01
- -9.105384005235785e-02
- -4.658294889089031e-02
-
- 38 1.178664708376763e-05
- -1.029956847083499e-06
- 1.029956847083499e-01
- 1.176870150147819e-03
- 9.013842949659325e-01
- -1.092246920089114e-01
- -3.495841677136451e-02
-
- 39 1.238664708376763e-05
- -9.920512695316036e-07
- 9.920512695316036e-02
- 8.382359024086052e-04
- 8.277595463975323e-01
- -1.210283048726013e-01
- -2.392680475810324e-02
-
- 40 1.298664708376763e-05
- -9.555378560684139e-07
- 9.555378560684139e-02
- 5.968428720504958e-04
- 7.516306562496196e-01
- -1.272418012047138e-01
- -1.402749198168676e-02
-
- 41 1.358664708376763e-05
- -9.203667750735073e-07
- 9.203667750735073e-02
- 4.248571522307446e-04
- 6.755660933441945e-01
- -1.287717046055234e-01
- -5.563263673549205e-03
-
- 42 1.418664708376763e-05
- -8.864893913817122e-07
- 8.864893913817122e-02
- 3.023712418928277e-04
- 6.015772305806626e-01
- -1.265545538806776e-01
- 1.349584122132374e-03
-
- 43 1.478664708376763e-05
- -8.538585100266795e-07
- 8.538585100266795e-02
- 2.151653061229580e-04
- 5.311562150980697e-01
- -1.214869821570401e-01
- 6.731151169583247e-03
-
- 44 1.538664708376763e-05
- -8.224284811671143e-07
- 8.224284811671143e-02
- 1.530922818134957e-04
- 4.653348807170765e-01
- -1.143810920904009e-01
- 1.069309216830403e-02
-
- 45 1.598664708376763e-05
- -7.921552298901439e-07
- 7.921552298901439e-02
- 1.089168937368159e-04
- 4.047538335014330e-01
- -1.059400761361653e-01
- 1.340149806837349e-02
-
- 46 1.658664708376763e-05
- -7.629962458090119e-07
- 7.629962458090119e-02
- 7.748311028005782e-05
- 3.497338006619688e-01
- -9.674904580077289e-02
- 1.504812104097493e-02
-
- 47 1.718664708376763e-05
- -7.349105515804137e-07
- 7.349105515804137e-02
- 5.511827138234112e-05
- 3.003438820737499e-01
- -8.727655437525563e-02
- 1.582944868848389e-02
-
- 48 1.778664708376763e-05
- -7.078586608014144e-07
- 7.078586608014144e-02
- 3.920723301882219e-05
- 2.564634021780585e-01
- -7.788305033150851e-02
- 1.593255151302921e-02
-
- 49 1.838664708376763e-05
- -6.818025309915621e-07
- 6.818025309915621e-02
- 2.788835904166550e-05
- 2.178356320976079e-01
- -6.883330624805746e-02
- 1.552640761946806e-02
-
- 50 1.898664708376763e-05
- -6.567055147578671e-07
- 6.567055147578671e-02
- 1.983668298288980e-05
- 1.841127879600602e-01
- -6.031062641991226e-02
- 1.475741378469763e-02
-
- 51 1.958664708376763e-05
- -6.325323108102860e-07
- 6.325323108102860e-02
- 1.410934680022455e-05
- 1.548924830204021e-01
- -5.243129198065891e-02
- 1.374792257730501e-02
-
- 52 2.018664708376763e-05
- -6.092489157117236e-07
- 6.092489157117236e-02
- 1.003548599883787e-05
- 1.297462940285003e-01
- -4.525823594528676e-02
- 1.259683280234207e-02
-
- 53 2.078664708376763e-05
- -5.868225768176519e-07
- 5.868225768176519e-02
- 7.137810396578103e-06
- 1.082413673531994e-01
- -3.881335316739105e-02
- 1.138146154193364e-02
-
- 54 2.138664708376763e-05
- -5.652217466261597e-07
- 5.652217466261597e-02
- 5.076773907606561e-06
- 8.995609902455238e-02
- -3.308815413377584e-02
- 1.016011499551744e-02
-
- 55 2.198664708376763e-05
- -5.444160386317303e-07
- 5.444160386317303e-02
- 3.610835627798982e-06
- 7.449092524094347e-02
- -2.805268412856604e-02
- 8.974939299801216e-03
-
- 56 2.258664708376763e-05
- -5.243761847070938e-07
- 5.243761847070938e-02
- 2.568179429164067e-06
- 6.147519509948855e-02
- -2.366276900703495e-02
- 7.854766820299426e-03
-
- 57 2.318664708376763e-05
- -5.050739940006428e-07
- 5.050739940006428e-02
- 1.826590813882191e-06
- 5.057099452538205e-02
- -1.986573288373220e-02
- 6.817778296350529e-03
-
- 58 2.378664708376763e-05
- -4.864823133176291e-07
- 4.864823133176291e-02
- 1.299139640224724e-06
- 4.147467043621487e-02
- -1.660477643856061e-02
- 5.873879622379742e-03
-
- 59 2.438664708376763e-05
- -4.685749889436973e-07
- 4.685749889436973e-02
- 9.239944550735617e-07
- 3.391668122982992e-02
- -1.382221958359773e-02
- 5.026748086095437e-03
-
- 60 2.498664708376763e-05
- -4.513268298648920e-07
- 4.513268298648920e-02
- 6.571766090561239e-07
- 2.766028273321528e-02
- -1.146180858163665e-02
- 4.275540948961647e-03
-
- 61 2.558664708376763e-05
- -4.347135723367043e-07
- 4.347135723367043e-02
- 4.674059248624993e-07
- 2.249945299326918e-02
- -9.470272664280982e-03
- 3.616283467240179e-03
-
- 62 2.618664708376763e-05
- -4.187118457546762e-07
- 4.187118457546762e-02
- 3.324343204742104e-07
- 1.825636722182948e-02
- -7.798294060677664e-03
- 3.042967402610447e-03
-
- 63 2.678664708376763e-05
- -4.032991397798472e-07
- 4.032991397798472e-02
- 2.364379187533634e-07
- 1.477865648663428e-02
- -6.401031800939865e-03
- 2.548397723334224e-03
-
- 64 2.738664708376763e-05
- -3.884537726735109e-07
- 3.884537726735109e-02
- 1.681620986362542e-07
- 1.193661985373132e-02
- -5.238316132556311e-03
- 2.124826894723649e-03
-
- 65 2.798664708376763e-05
- -3.741548607971242e-07
- 3.741548607971242e-02
- 1.196021236620889e-07
- 9.620508435261841e-03
- -4.274608378947787e-03
- 1.764414688058854e-03
-
- 66 2.858664708376763e-05
- -3.603822892346797e-07
- 3.603822892346797e-02
- 8.506472355827554e-08
- 7.737959636137384e-03
- -3.478801396862400e-03
- 1.459548113681114e-03
-
- 67 2.918664708376763e-05
- -3.471166834963309e-07
- 3.471166834963309e-02
- 6.050063999733899e-08
- 6.211629169891409e-03
- -2.823918805423725e-03
- 1.203051836793585e-03
-
- 68 2.978664708376763e-05
- -3.343393822635298e-07
- 3.343393822635298e-02
- 4.302990054439619e-08
- 4.977045512498984e-03
- -2.286756907007834e-03
- 9.883148941077680e-04
-
- 69 3.000000000000000e-05
- -3.299106422685328e-07
- 3.299106422685328e-02
- 3.815817763877542e-08
- 4.599101705030142e-03
- -2.120719628045261e-03
- 9.210101886687262e-04
-
diff --git a/test/defref.awk b/test/defref.awk
new file mode 100644
index 00000000..b4e8f107
--- /dev/null
+++ b/test/defref.awk
@@ -0,0 +1 @@
+BEGIN { foo() }
diff --git a/test/defref.ok b/test/defref.ok
new file mode 100644
index 00000000..f833c961
--- /dev/null
+++ b/test/defref.ok
@@ -0,0 +1,2 @@
+gawk: defref.awk:2: warning: function `foo' called but never defined
+gawk: defref.awk:1: fatal: function `foo' not defined
diff --git a/test/fflush.ok b/test/fflush.ok
new file mode 100644
index 00000000..4cf0df6c
--- /dev/null
+++ b/test/fflush.ok
@@ -0,0 +1,16 @@
+1st
+2nd
+1st
+2nd
+1st
+2nd
+1st
+2nd
+1st
+2nd
+1st
+2nd
+1st
+2nd
+1st
+2nd
diff --git a/test/fflush.sh b/test/fflush.sh
new file mode 100755
index 00000000..42d624c9
--- /dev/null
+++ b/test/fflush.sh
@@ -0,0 +1,16 @@
+#! /bin/sh
+../gawk 'BEGIN{print "1st";fflush("/dev/stdout");print "2nd"|"cat"}'
+
+../gawk 'BEGIN{print "1st";fflush("/dev/stdout");print "2nd"|"cat"}'|cat
+
+../gawk 'BEGIN{print "1st";fflush("/dev/stdout");close("/dev/stdout");print "2nd"|"cat"}'|cat
+
+../gawk 'BEGIN{print "1st";fflush("/dev/stdout");print "2nd"|"cat";close("cat")}'|cat
+
+../gawk 'BEGIN{print "1st";fflush("/dev/stdout");print "2nd"|"cat";close("cat")}'|cat
+
+../gawk 'BEGIN{print "1st";fflush("/dev/stdout");print "2nd"|"cat";close("cat")}'|cat
+
+../gawk 'BEGIN{print "1st";fflush("/dev/stdout");print "2nd"|"sort"}'|cat
+
+../gawk 'BEGIN{print "1st";fflush("/dev/stdout");print "2nd"|"sort";close("sort")}'|cat
diff --git a/test/fieldwdth.good b/test/fieldwdth.ok
index 51b40081..51b40081 100644
--- a/test/fieldwdth.good
+++ b/test/fieldwdth.ok
diff --git a/test/fldchg.awk b/test/fldchg.awk
new file mode 100644
index 00000000..a8018f78
--- /dev/null
+++ b/test/fldchg.awk
@@ -0,0 +1,8 @@
+{
+# print "0:", $0
+ gsub("aa", "+")
+ print "1:", $0
+ $3 = "<" $3 ">"
+ print "2:", $0
+ print "2a:" "%" $1 "%" $2 "%" $3 "%" $4 "%" $5
+}
diff --git a/test/fldchg.in b/test/fldchg.in
new file mode 100644
index 00000000..f500c36a
--- /dev/null
+++ b/test/fldchg.in
@@ -0,0 +1 @@
+aa aab c d e f
diff --git a/test/fldchg.ok b/test/fldchg.ok
new file mode 100644
index 00000000..cc5032a8
--- /dev/null
+++ b/test/fldchg.ok
@@ -0,0 +1,3 @@
+1: + +b c d e f
+2: + +b <c> d e f
+2a:%+%+b%<c>%d%e
diff --git a/test/fontdata.txt b/test/fontdata.txt
deleted file mode 100644
index b2601237..00000000
--- a/test/fontdata.txt
+++ /dev/null
@@ -1,120 +0,0 @@
-@
-@ Data file for awk program genscrpt.awk which generates gulam
-@ script for creation of bitmap TeX fonts.
-@ Edit this file to your needs - each line starting with @ is ignored
-@ unless it is in a form '@ fonts' or '@ magstep'. The rest should be quite
-@ obvious.
-@
-@ basic fonts - all magsteps
-@ fonts
-cmb10
-cmbx10
-cmbx5
-cmbx7
-cmcsc10
-cmex10
-cmmi10
-cmmi5
-cmmi7
-cmr10
-cmr5
-cmr7
-cmsl10
-cmss10
-cmssbx10
-cmsy10
-cmsy5
-cmsy7
-cmti10
-cmtt10
-@ magstep
-0 0.5 1 2 3 4 5
-@ other fonts only in magsteps 0, 0.5 and 1
-@ fonts
-cmbsy10
-cmbx12
-cmbx6
-cmbx7
-cmbx8
-cmbx9
-cmbxsl10
-cmbxti10
-cmdunh10
-cmff10
-cmfi10
-cmfib8
-cmitt10
-cmmi12
-cmmi6
-cmmi8
-cmmi9
-cmmib10
-cmr12
-cmr17
-cmr6
-cmr8
-cmr9
-cmsl12
-cmsl8
-cmsl9
-cmsltt10
-cmss12
-cmss17
-cmss8
-cmss9
-cmssdc10
-cmssi10
-cmssi12
-cmssi17
-cmssi8
-cmssi9
-cmssq8
-cmssqi8
-cmsy6
-cmsy8
-cmsy9
-cmtcsc10
-cmtex10
-cmtex8
-cmtex9
-cmti12
-cmti7
-cmti8
-cmti9
-cmtt12
-cmtt8
-cmtt9
-cmu10
-cmvtt10
-@ magstep
-0 0.5 1
-@ specials
-@ fonts
-logo10
-@ magstep
-0 0.5 1
-@ fonts
-cminch
-@ magstep
-0
-@ LaTeX fonts
-lasy10
-lasy5
-lasy7
-lasyb10
-@ magstep
-0 0.5 1 2 3 4 5
-@ These fonts should not use cmbase
-@ fonts
-circle10
-circlew10
-line10
-linew10
-@ magstep
-0 0.5 1 2 3 4 5
-@ fonts
-lasy6
-lasy8
-lasy9
-@ magstep
-0 0.5 1
diff --git a/test/fsbs.good b/test/fsbs.ok
index 8d04f961..8d04f961 100644
--- a/test/fsbs.good
+++ b/test/fsbs.ok
diff --git a/test/fsrs.good b/test/fsrs.ok
index 7dafd658..7dafd658 100644
--- a/test/fsrs.good
+++ b/test/fsrs.ok
diff --git a/test/fstabplus b/test/fstabplus.awk
index 748a44f4..748a44f4 100644
--- a/test/fstabplus
+++ b/test/fstabplus.awk
diff --git a/test/fstabplus.good b/test/fstabplus.ok
index 8d04f961..8d04f961 100644
--- a/test/fstabplus.good
+++ b/test/fstabplus.ok
diff --git a/test/gensub.awk b/test/gensub.awk
new file mode 100644
index 00000000..33a2a5e4
--- /dev/null
+++ b/test/gensub.awk
@@ -0,0 +1,6 @@
+BEGIN { a = "this is a test of gawk"
+ b = gensub(/(this).*(test).*(gawk)/, "3 = <\\3>, 2 = <\\2>, 1 = <\\1>", 1, a)
+ print b
+}
+NR == 1 { print gensub(/b/, "BB", 2) }
+NR == 2 { print gensub(/c/, "CC", "global") }
diff --git a/test/gensub.in b/test/gensub.in
new file mode 100644
index 00000000..96c9fafc
--- /dev/null
+++ b/test/gensub.in
@@ -0,0 +1,2 @@
+a b c a b c a b c
+a b c a b c a b c
diff --git a/test/gensub.ok b/test/gensub.ok
new file mode 100644
index 00000000..c909cd0d
--- /dev/null
+++ b/test/gensub.ok
@@ -0,0 +1,3 @@
+3 = <gawk>, 2 = <test>, 1 = <this>
+a b c a BB c a b c
+a b CC a b CC a b CC
diff --git a/test/getline.good b/test/getline.ok
index 9b7f2b90..9b7f2b90 100644
--- a/test/getline.good
+++ b/test/getline.ok
diff --git a/test/gnureops.awk b/test/gnureops.awk
new file mode 100644
index 00000000..15b9b841
--- /dev/null
+++ b/test/gnureops.awk
@@ -0,0 +1,45 @@
+# test the gnu regexp ops
+
+BEGIN {
+ if ("a rat is here" ~ /\yrat/) print "test 1 ok (\\y)"
+ else print "test 1 failed (\\y)"
+ if ("a rat is here" ~ /rat\y/) print "test 2 ok (\\y)"
+ else print "test 2 failed (\\y)"
+ if ("what a brat" !~ /\yrat/) print "test 3 ok (\\y)"
+ else print "test 3 failed (\\y)"
+
+ if ("in the crate" ~ /\Brat/) print "test 4 ok (\\B)"
+ else print "test 4 failed (\\B)"
+ if ("a rat" !~ /\Brat/) print "test 5 ok (\\B)"
+ else print "test 5 failed (\\B)"
+
+ if ("a word" ~ /\<word/) print "test 6 ok (\\<)"
+ else print "test 6 failed (\\<)"
+ if ("foreword" !~ /\<word/) print "test 7 ok (\\<)"
+ else print "test 7 failed (\\<)"
+
+ if ("a word" ~ /word\>/) print "test 8 ok (\\>)"
+ else print "test 8 failed (\\\\>)"
+ if ("wordy" !~ /word\>/) print "test 9 ok (\\>)"
+ else print "test 9 failed (\\>)"
+
+ if ("a" ~ /\w/) print "test 10 ok (\\w)"
+ else print "test 10 failed (\\\\w)"
+ if ("+" !~ /\w/) print "test 11 ok (\\w)"
+ else print "test 11 failed (\\w)"
+
+ if ("a" !~ /\W/) print "test 12 ok (\\W)"
+ else print "test 12 failed (\\W)"
+ if ("+" ~ /\W/) print "test 13 ok (\\W)"
+ else print "test 13 failed (\\W)"
+
+ if ("a" ~ /\`a/) print "test 14 ok (\\`)"
+ else print "test 14 failed (\\`)"
+ if ("b" !~ /\`a/) print "test 15 ok (\\`)"
+ else print "test 15 failed (\\`)"
+
+ if ("a" ~ /a\'/) print "test 16 ok (\\')"
+ else print "test 16 failed (\\')"
+ if ("b" !~ /a\'/) print "test 17 ok (\\')"
+ else print "test 17 failed (\\')"
+}
diff --git a/test/gnureops.ok b/test/gnureops.ok
new file mode 100644
index 00000000..0fb5f504
--- /dev/null
+++ b/test/gnureops.ok
@@ -0,0 +1,17 @@
+test 1 ok (\y)
+test 2 ok (\y)
+test 3 ok (\y)
+test 4 ok (\B)
+test 5 ok (\B)
+test 6 ok (\<)
+test 7 ok (\<)
+test 8 ok (\>)
+test 9 ok (\>)
+test 10 ok (\w)
+test 11 ok (\w)
+test 12 ok (\W)
+test 13 ok (\W)
+test 14 ok (\`)
+test 15 ok (\`)
+test 16 ok (\')
+test 17 ok (\')
diff --git a/test/header.awk b/test/header.awk
deleted file mode 100644
index 2066c829..00000000
--- a/test/header.awk
+++ /dev/null
@@ -1,5 +0,0 @@
-BEGIN{
- "date" | getline cur_time
- close ("date")
- print "This line printed on", cur_time
-}
diff --git a/test/igncfs.good b/test/igncfs.ok
index 41df9a4c..41df9a4c 100644
--- a/test/igncfs.good
+++ b/test/igncfs.ok
diff --git a/test/ignrcase.good b/test/ignrcase.ok
index d66e95ca..d66e95ca 100644
--- a/test/ignrcase.good
+++ b/test/ignrcase.ok
diff --git a/test/include.awk b/test/include.awk
deleted file mode 100644
index a506a813..00000000
--- a/test/include.awk
+++ /dev/null
@@ -1,13 +0,0 @@
-# input file should have lines which start with "@incl" followed by
-# a name of a file to include
-{
- if ((NF == 2) && ($1 == "@incl")) {
- print " -- included file -- ", $2
- while ((getline line < $2) > 0)
- print line
- close ($2)
- printf "\t***\n"
- } else {
- print
- }
-}
diff --git a/test/inftest.good b/test/inftest.ok
index 83a93d01..83a93d01 100644
--- a/test/inftest.good
+++ b/test/inftest.ok
diff --git a/test/intprec.awk b/test/intprec.awk
new file mode 100644
index 00000000..978e9eac
--- /dev/null
+++ b/test/intprec.awk
@@ -0,0 +1 @@
+BEGIN { printf "%.10d:%.10x\n", 5, 14 }
diff --git a/test/intprec.ok b/test/intprec.ok
new file mode 100644
index 00000000..8783fac1
--- /dev/null
+++ b/test/intprec.ok
@@ -0,0 +1 @@
+0000000005:000000000e
diff --git a/test/lastnpages b/test/lastnpages
deleted file mode 100644
index 0acb7738..00000000
--- a/test/lastnpages
+++ /dev/null
@@ -1,47 +0,0 @@
-From nstn.ns.ca!news.cs.indiana.edu!news.nd.edu!spool.mu.edu!uunet!elroy.jpl.nasa.gov!swrinde!zaphod.mps.ohio-state.edu!uakari.primate.wisc.edu!dali.cs.montana.edu!milton!uw-beaver!fluke!ssc-vax!brennan Mon May 6 23:41:40 ADT 1991
-Article: 26492 of comp.unix.questions
-Path: cs.dal.ca!nstn.ns.ca!news.cs.indiana.edu!news.nd.edu!spool.mu.edu!uunet!elroy.jpl.nasa.gov!swrinde!zaphod.mps.ohio-state.edu!uakari.primate.wisc.edu!dali.cs.montana.edu!milton!uw-beaver!fluke!ssc-vax!brennan
-From: brennan@ssc-vax.UUCP (Michael D Brennan)
-Newsgroups: comp.unix.questions
-Subject: Re: How to print last <n> pages of a file
-Message-ID: <3948@ssc-bee.ssc-vax.UUCP>
-Date: 6 May 91 15:42:00 GMT
-Article-I.D.: ssc-bee.3948
-Organization: Boeing Aerospace & Electronics, Seattle WA
-Lines: 33
-
-
-The following shell & (new) awk program prints the last n pages.
-
-If you get more than 65 lines to a page, the program that inserts
-the ^L's should be fixed.
-
--------------------------------------------------------------
-#!/bin/sh
-# usage: lastpages -- prints 1 page reads stdin
-# lastpages n -- prints n pages reads stdin
-# lastpages n files -- prints n pages, reads file list
-
-program='BEGIN{RS = ORS = "\f" }
-
-
-{ page[NR] = $0
- if ( NR > numpages ) delete page[NR-numpages]
-}
-
-END {
- i = NR - numpages + 1
- if ( i <= 0 ) i = 1
-
- while( i <= NR ) print page[i++]
-}'
-
-
-case $# in
-0) awk "$program" numpages=1 - ;;
-1) awk "$program" numpages=$1 - ;;
-*) pages=$1 ; shift
- awk "$program" numpages=$pages $* ;;
-esac
-
-
diff --git a/test/litoct.awk b/test/litoct.awk
new file mode 100644
index 00000000..5cfc1284
--- /dev/null
+++ b/test/litoct.awk
@@ -0,0 +1 @@
+{ if (/a\52b/) print "match" ; else print "no match" }
diff --git a/test/litoct.ok b/test/litoct.ok
new file mode 100644
index 00000000..4c0be978
--- /dev/null
+++ b/test/litoct.ok
@@ -0,0 +1 @@
+no match
diff --git a/test/longwrds.good b/test/longwrds.ok
index 01faa847..01faa847 100644
--- a/test/longwrds.good
+++ b/test/longwrds.ok
diff --git a/test/math.awk b/test/math.awk
new file mode 100644
index 00000000..90a01dd8
--- /dev/null
+++ b/test/math.awk
@@ -0,0 +1,10 @@
+BEGIN {
+ pi = 3.1415927
+ printf "cos(%f) = %f\n", pi/4, cos(pi/4)
+ printf "sin(%f) = %f\n", pi/4, sin(pi/4)
+ e = exp(1)
+ printf "e = %f\n", e
+ printf "log(e) = %f\n", log(e)
+ printf "sqrt(pi ^ 2) = %f\n", sqrt(pi ^ 2)
+ printf "atan2(1, 1) = %f\n", atan2(1, 1)
+}
diff --git a/test/math.ok b/test/math.ok
new file mode 100644
index 00000000..a396a5b3
--- /dev/null
+++ b/test/math.ok
@@ -0,0 +1,6 @@
+cos(0.785398) = 0.707107
+sin(0.785398) = 0.707107
+e = 2.718282
+log(e) = 1.000000
+sqrt(pi ^ 2) = 3.141593
+atan2(1, 1) = 0.785398
diff --git a/test/negexp.good b/test/negexp.ok
index 6e6566ce..6e6566ce 100644
--- a/test/negexp.good
+++ b/test/negexp.ok
diff --git a/test/nfset.good b/test/nfset.ok
index 3ba48aec..3ba48aec 100644
--- a/test/nfset.good
+++ b/test/nfset.ok
diff --git a/test/noeffect.awk b/test/noeffect.awk
new file mode 100644
index 00000000..b375a4c2
--- /dev/null
+++ b/test/noeffect.awk
@@ -0,0 +1,4 @@
+BEGIN {
+ s == "hello, world";
+ print s
+}
diff --git a/test/noeffect.ok b/test/noeffect.ok
new file mode 100644
index 00000000..b820ddf8
--- /dev/null
+++ b/test/noeffect.ok
@@ -0,0 +1,2 @@
+gawk: noeffect.awk:3: warning: statement may have no effect
+
diff --git a/test/nofmtch.awk b/test/nofmtch.awk
new file mode 100644
index 00000000..2ea22492
--- /dev/null
+++ b/test/nofmtch.awk
@@ -0,0 +1 @@
+BEGIN { printf "%3\n" }
diff --git a/test/nofmtch.ok b/test/nofmtch.ok
new file mode 100644
index 00000000..e6f3846f
--- /dev/null
+++ b/test/nofmtch.ok
@@ -0,0 +1,2 @@
+gawk: nofmtch.awk:1: warning: printf format specifier does not have control letter
+%3
diff --git a/test/nonl.good b/test/nonl.ok
index 24bd9b78..24bd9b78 100644
--- a/test/nonl.good
+++ b/test/nonl.ok
diff --git a/test/numfunc.awk b/test/numfunc.awk
deleted file mode 100644
index de1d7a4d..00000000
--- a/test/numfunc.awk
+++ /dev/null
@@ -1,19 +0,0 @@
-BEGIN {
- y = 8
- x = 1
- while (x < 256) {
- print "arctan", y/x, atan2(y , x)
- x += x
- }
- print ""
- pi8 = atan2(1, 1) / 2
- arg = 0
- for (i = 0; i <= 8; i++) {
- print "cos sin", arg, cos(arg), sin(arg)
- arg += pi8
- }
- print ""
- for (i = -5; i<= 5; i++) {
- print "exp log", i, exp(i), log(exp(i))
- }
-}
diff --git a/test/numsubstr.awk b/test/numsubstr.awk
new file mode 100644
index 00000000..7a30993b
--- /dev/null
+++ b/test/numsubstr.awk
@@ -0,0 +1 @@
+{ print substr(1000+$1, 2) }
diff --git a/test/numsubstr.in b/test/numsubstr.in
new file mode 100644
index 00000000..ac65c364
--- /dev/null
+++ b/test/numsubstr.in
@@ -0,0 +1,3 @@
+5000
+10000
+5000
diff --git a/test/numsubstr.ok b/test/numsubstr.ok
new file mode 100644
index 00000000..86ec13ca
--- /dev/null
+++ b/test/numsubstr.ok
@@ -0,0 +1,3 @@
+000
+1000
+000
diff --git a/test/out1.good b/test/out1.ok
index f54b2b4d..f54b2b4d 100644
--- a/test/out1.good
+++ b/test/out1.ok
diff --git a/test/out2.good b/test/out2.ok
index 66b7d2f7..66b7d2f7 100644
--- a/test/out2.good
+++ b/test/out2.ok
diff --git a/test/out3.good b/test/out3.ok
index 7eb822ff..7eb822ff 100644
--- a/test/out3.good
+++ b/test/out3.ok
diff --git a/test/paramdup.good b/test/paramdup.ok
index 0308cc8c..0308cc8c 100644
--- a/test/paramdup.good
+++ b/test/paramdup.ok
diff --git a/test/pcntplus.awk b/test/pcntplus.awk
new file mode 100644
index 00000000..13999ac4
--- /dev/null
+++ b/test/pcntplus.awk
@@ -0,0 +1 @@
+BEGIN { printf "%+d %d\n", 3, 4 }
diff --git a/test/pcntplus.ok b/test/pcntplus.ok
new file mode 100644
index 00000000..b7902695
--- /dev/null
+++ b/test/pcntplus.ok
@@ -0,0 +1 @@
++3 4
diff --git a/test/plus-minus b/test/plus-minus
deleted file mode 100644
index 9fec4bff..00000000
--- a/test/plus-minus
+++ /dev/null
@@ -1,8 +0,0 @@
-{
- if ($1 == "-")
- print "minus"
- if ($1 == "+")
- print "plus"
- if (($1 != "-") && ($1 != "+"))
- print "something else"
-}
diff --git a/test/posix b/test/posix.awk
index 79474f30..79474f30 100755..100644
--- a/test/posix
+++ b/test/posix.awk
diff --git a/test/posix.good b/test/posix.ok
index 100b1505..100b1505 100644
--- a/test/posix.good
+++ b/test/posix.ok
diff --git a/test/poundbang.good b/test/poundbang.ok
index 143e28dd..143e28dd 100644
--- a/test/poundbang.good
+++ b/test/poundbang.ok
diff --git a/test/prmarscl.awk b/test/prmarscl.awk
new file mode 100644
index 00000000..3caf3d9c
--- /dev/null
+++ b/test/prmarscl.awk
@@ -0,0 +1,6 @@
+function test(a)
+{
+ print a[1]
+}
+
+BEGIN { j = 4; test(j) }
diff --git a/test/prmarscl.ok b/test/prmarscl.ok
new file mode 100644
index 00000000..b42cee67
--- /dev/null
+++ b/test/prmarscl.ok
@@ -0,0 +1 @@
+gawk: prmarscl.awk:4: fatal: attempt to use scalar parameter 1 as an array
diff --git a/test/prmreuse.awk b/test/prmreuse.awk
new file mode 100644
index 00000000..37e06f59
--- /dev/null
+++ b/test/prmreuse.awk
@@ -0,0 +1,14 @@
+# from Pat Rankin, rankin@eql.caltech.edu
+
+BEGIN { dummy(1); legit(); exit }
+
+function dummy(arg)
+{
+ return arg
+}
+
+function legit( scratch)
+{
+ split("1 2 3", scratch)
+ return ""
+}
diff --git a/config/sunos41-glibc b/test/prmreuse.ok
index e69de29b..e69de29b 100644
--- a/config/sunos41-glibc
+++ b/test/prmreuse.ok
diff --git a/test/reparse.good b/test/reparse.ok
index 6bdfacfa..6bdfacfa 100644
--- a/test/reparse.good
+++ b/test/reparse.ok
diff --git a/test/resplit.ok b/test/resplit.ok
new file mode 100644
index 00000000..61780798
--- /dev/null
+++ b/test/resplit.ok
@@ -0,0 +1 @@
+b
diff --git a/test/reverse.awk b/test/reverse.awk
deleted file mode 100644
index c6b2e299..00000000
--- a/test/reverse.awk
+++ /dev/null
@@ -1,13 +0,0 @@
-#this program creates palindromic output - slightly modified from Gawk Manual
-{
- rev($0, length)
-}
-
-function rev(str, len) {
- if (len == 0) {
- print " ", $0
- return
- }
- printf "%c", substr(str, len, 1)
- rev(str, len - 1)
-}
diff --git a/test/rs.data b/test/rs.in
index edef835e..edef835e 100644
--- a/test/rs.data
+++ b/test/rs.in
diff --git a/test/rs.good b/test/rs.ok
index 9dd6bd39..9dd6bd39 100644
--- a/test/rs.good
+++ b/test/rs.ok
diff --git a/test/rswhite.awk b/test/rswhite.awk
new file mode 100644
index 00000000..00487656
--- /dev/null
+++ b/test/rswhite.awk
@@ -0,0 +1,2 @@
+BEGIN { RS = "" }
+{ printf("<%s>\n", $0) }
diff --git a/test/rswhite.in b/test/rswhite.in
new file mode 100644
index 00000000..39f77564
--- /dev/null
+++ b/test/rswhite.in
@@ -0,0 +1,2 @@
+ a b
+c d
diff --git a/test/rswhite.ok b/test/rswhite.ok
new file mode 100644
index 00000000..a029e47f
--- /dev/null
+++ b/test/rswhite.ok
@@ -0,0 +1,2 @@
+< a b
+c d>
diff --git a/test/sclforin.awk b/test/sclforin.awk
new file mode 100644
index 00000000..335e8540
--- /dev/null
+++ b/test/sclforin.awk
@@ -0,0 +1 @@
+BEGIN { j = 4; for (i in j) print j[i] }
diff --git a/test/sclforin.ok b/test/sclforin.ok
new file mode 100644
index 00000000..d87fa61f
--- /dev/null
+++ b/test/sclforin.ok
@@ -0,0 +1 @@
+gawk: sclforin.awk:1: fatal: attempt to use scalar as array
diff --git a/test/sclifin.awk b/test/sclifin.awk
new file mode 100644
index 00000000..64f5d0d3
--- /dev/null
+++ b/test/sclifin.awk
@@ -0,0 +1,7 @@
+BEGIN {
+ j = 4
+ if ("foo" in j)
+ print "ouch"
+ else
+ print "ok"
+}
diff --git a/test/sclifin.ok b/test/sclifin.ok
new file mode 100644
index 00000000..717f8368
--- /dev/null
+++ b/test/sclifin.ok
@@ -0,0 +1 @@
+gawk: sclifin.awk:7: fatal: attempt to use scalar as array
diff --git a/test/splitargv.good b/test/splitargv.ok
index 10886ef2..10886ef2 100644
--- a/test/splitargv.good
+++ b/test/splitargv.ok
diff --git a/test/sqrt.awk b/test/sqrt.awk
deleted file mode 100644
index c9d8f512..00000000
--- a/test/sqrt.awk
+++ /dev/null
@@ -1,4 +0,0 @@
-BEGIN {
- for (i = 0; i <= 25; i++)
- printf "gawk sez -- square root of %2d is %15.12f\n", i, sqrt(i)
-}
diff --git a/test/strftime.ok b/test/strftime.ok
new file mode 100644
index 00000000..52715bb9
--- /dev/null
+++ b/test/strftime.ok
@@ -0,0 +1 @@
+Thu Jan 11 09:35:20 EST 1996
diff --git a/test/data b/test/swaplns.in
index 71fb1627..71fb1627 100644
--- a/test/data
+++ b/test/swaplns.in
diff --git a/test/swaplns.good b/test/swaplns.ok
index d38b7caa..d38b7caa 100644
--- a/test/swaplns.good
+++ b/test/swaplns.ok
diff --git a/test/up_down.awk b/test/up_down.awk
deleted file mode 100644
index 32ab847c..00000000
--- a/test/up_down.awk
+++ /dev/null
@@ -1,15 +0,0 @@
-{
- lim = split ($0, line)
- out = ""
- if (lim > 0) {
- i = 0
- while (i < lim) {
- i++
- if (i % 2)
- out = out sprintf("%s ", toupper(line[i]))
- else
- out = out sprintf("%s ", tolower(line[i]))
- }
- }
- print out
-}
diff --git a/test/zap_cpp.awk b/test/zap_cpp.awk
deleted file mode 100644
index 99a5a1f4..00000000
--- a/test/zap_cpp.awk
+++ /dev/null
@@ -1,13 +0,0 @@
-# this will remove (comment out) all preprocessor traces from
-# cpp produced files:
-# run this awk program as follows
-# awk -f zap_cpp.awk <file>
-# end redirect output where you want it to
-NF > 0 {
- if ($1 ~ /^#/)
- print "/*", $0, "*/"
- else
- print
-}
-
-
diff --git a/version.c b/version.c
index 89b6cc05..62729551 100644
--- a/version.c
+++ b/version.c
@@ -1,4 +1,4 @@
-char *version_string = "@(#)Gnu Awk (gawk) 2.15";
+char *version_string = "@(#)Gnu Awk (gawk) 3.0";
/* 1.02 fixed /= += *= etc to return the new Left Hand Side instead
of the Right Hand Side */
@@ -45,3 +45,6 @@ char *version_string = "@(#)Gnu Awk (gawk) 2.15";
GNU long options, ARGIND, ERRNO and Plan 9 style /dev/ files.
`delete array'. OS/2 port added. */
+/* 3.0 RS as regexp, RT variable, FS = "", fflush builtin, posix
+ regexps, IGNORECASE applies everywhere, autoconf, source
+ code cleanup. See the NEWS file. */
diff --git a/vms/ChangeLog b/vms/ChangeLog
new file mode 100644
index 00000000..570c031b
--- /dev/null
+++ b/vms/ChangeLog
@@ -0,0 +1,3 @@
+Wed Jan 10 22:58:55 1996 Arnold D. Robbins <arnold@skeeve.atl.ga.us>
+
+ * ChangeLog created.
diff --git a/vms/descrip.mms b/vms/descrip.mms
index 6c7910b9..6d01aa75 100644
--- a/vms/descrip.mms
+++ b/vms/descrip.mms
@@ -1,4 +1,4 @@
-# Descrip.MMS -- Makefile for building GNU Awk on VMS with VAXC and MMS.
+# Descrip.MMS -- Makefile for building GNU awk on VMS.
#
# usage:
# $ MMS /Description=[.vms]Descrip.MMS gawk
@@ -31,8 +31,9 @@
# specific post-processor on gawk.dvi in order to get printable data.
#
-# location of the VMS-specific files, relative to the 'main' directory
+# location of various source files, relative to the 'main' directory
VMSDIR = [.vms]
+DOCDIR = [.doc]
MAKEFILE = $(VMSDIR)Descrip.MMS
# debugging &c !'ccflags' is an escape to allow external compile flags
@@ -80,19 +81,21 @@ HELPLIB = sys$help:helplib.hlb
######## nothing below this line should need to be changed ########
#
+ECHO = write sys$output
+NOOP = continue
+
# ALLOCA
ALLOCA = alloca.obj
# object files
-AWKOBJS = main.obj,eval.obj,builtin.obj,msg.obj,iop.obj,io.obj,\
- field.obj,array.obj,node.obj,version.obj,missing.obj,re.obj,\
- getopt.obj,getopt1.obj
+AWKOBJS = array.obj,builtin.obj,eval.obj,field.obj,gawkmisc.obj,\
+ io.obj,main.obj,missing.obj,msg.obj,node.obj,re.obj,version.obj
ALLOBJS = $(AWKOBJS),awktab.obj
# GNUOBJS
# GNU stuff that gawk uses as library routines.
-GNUOBJS = regex.obj,dfa.obj,$(ALLOCA)
+GNUOBJS = getopt.obj,getopt1.obj,regex.obj,dfa.obj,$(ALLOCA)
# VMSOBJS
# VMS specific stuff
@@ -101,19 +104,37 @@ VMSCODE = vms_misc.obj,vms_popen.obj,vms_fwrite.obj,vms_args.obj,\
VMSCMD = gawk_cmd.obj # built from .cld file
VMSOBJS = $(VMSCODE),$(VMSCMD)
-VMSSRCS = $(VMSDIR)vms_misc.c,$(VMSDIR)vms_popen.c,$(VMSDIR)vms_fwrite.c,\
- $(VMSDIR)vms_args.c,$(VMSDIR)vms_gawk.c,$(VMSDIR)vms_cli.c
-VMSHDRS = $(VMSDIR)vms.h,$(VMSDIR)fcntl.h,$(VMSDIR)varargs.h,$(VMSDIR)unixlib.h
+# source and documentation files
+SRC = array.c,builtin.c,eval.c,field.c,gawkmisc.c,io.c,main.c,\
+ missing.c,msg.c,node.c,re.c,version.c
+
+ALLSRC= $(SRC),awktab.c
+
+AWKSRC= awk.h,awk.y,$(ALLSRC),patchlevel.h,protos.h
+
+GNUSRC = alloca.c,dfa.c,dfa.h,regex.c,regex.h,getopt.h,getopt.c,getopt1.c
+
+VMSSRCS = $(VMSDIR)gawkmisc.vms,$(VMSDIR)vms_misc.c,$(VMSDIR)vms_popen.c,\
+ $(VMSDIR)vms_fwrite.c,$(VMSDIR)vms_args.c,$(VMSDIR)vms_gawk.c,\
+ $(VMSDIR)vms_cli.c
+VMSHDRS = $(VMSDIR)redirect.h,$(VMSDIR)vms.h,$(VMSDIR)fcntl.h,\
+ $(VMSDIR)varargs.h,$(VMSDIR)unixlib.h
VMSOTHR = $(VMSDIR)Descrip.MMS,$(VMSDIR)vmsbuild.com,$(VMSDIR)version.com,\
$(VMSDIR)gawk.hlp
+DOCS= $(DOCDIR)gawk.1,$(DOCDIR)gawk.texi,$(DOCDIR)texinfo.tex
+
# Release of gawk
-REL=2.15
-PATCHLVL=6
+REL=3.0
+PATCHLVL=0
+
+# generic target
+all : gawk
+ $(NOOP)
# dummy target to allow building "gawk" in addition to explicit "gawk.exe"
gawk : gawk.exe
- write sys$output " GAWK "
+ $(ECHO) " GAWK "
# rules to build gawk
gawk.exe : $(ALLOBJS) $(GNUOBJS) $(VMSOBJS) gawk.opt
@@ -140,12 +161,13 @@ vms_gawk.obj : $(VMSDIR)vms_gawk.c
vms_cli.obj : $(VMSDIR)vms_cli.c
$(VMSCODE) : awk.h config.h $(VMSDIR)vms.h
-dfa.obj : awk.h config.h dfa.h
-regex.obj : awk.h config.h regex.h
+gawkmisc.obj : gawkmisc.c $(VMSDIR)gawkmisc.vms
+
+$(ALLOBJS) : awk.h dfa.h regex.h config.h
getopt.obj : getopt.h
+getopt1.obj : getopt.h
main.obj : patchlevel.h
awktab.obj : awk.h awktab.c
-$(AWKOBJS) : awk.h config.h
# bison or yacc required
awktab.c : awk.y # foo.y :: yacc => y[_]tab.c, bison => foo_tab.c
@@ -158,7 +180,7 @@ awktab.c : awk.y # foo.y :: yacc => y[_]tab.c, bison => foo_tab.c
@- if f$search("y_tab.c") .nes."" then rename/new_vers y_tab.c $@
@- if f$search("awk_tab.c").nes."" then rename/new_vers awk_tab.c $@
-config.h : [.config]vms-conf.h
+config.h : $(VMSDIR)vms-conf.h
copy $< $@
# Alloca - C simulation
diff --git a/vms/gawkmisc.vms b/vms/gawkmisc.vms
new file mode 100644
index 00000000..9d58b51e
--- /dev/null
+++ b/vms/gawkmisc.vms
@@ -0,0 +1,121 @@
+/*
+ * gawkmisc.vms --- miscellanious gawk routines that are OS specific.
+ */
+
+/*
+ * Copyright (C) 1986, 1988, 1989, 1991-1995 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+char quote = '\'';
+char *defpath = DEFPATH;
+char envsep = ':';
+
+/* gawk_name --- pull out the "gawk" part from how the OS called us */
+
+char *
+gawk_name(filespec)
+const char *filespec;
+{
+ char *p, *q;
+
+ /* "device:[root.][directory.subdir]GAWK.EXE;n" -> "GAWK" */
+ p = strrchr(filespec, ']'); /* directory punctuation */
+ q = strrchr(filespec, '>'); /* alternate <international> punct */
+
+ if (p == NULL || q > p)
+ p = q;
+ p = strdup(p == NULL ? filespec : (p + 1));
+ if ((q = strrchr(p, '.')) != NULL)
+ *q = '\0'; /* strip .typ;vers */
+
+ return p;
+}
+
+/* os_arg_fixup --- fixup the command line */
+
+void
+os_arg_fixup(argcp, argvp)
+int *argcp;
+char ***argvp;
+{
+ (void) vms_arg_fixup(argcp, argvp);
+}
+
+/* os_devopen --- open special per-OS devices */
+
+int
+os_devopen(name, flag)
+const char *name;
+int flag;
+{
+ return vms_devopen(name, flag);
+}
+
+/* optimal_bufsize --- determine optimal buffer size */
+
+int
+optimal_bufsize(fd, stb)
+int fd;
+struct stat *stb;
+{
+
+ /* force all members to zero in case OS doesn't use all of them. */
+ memset(stb, '\0', sizeof(struct stat));
+
+ /*
+ * These values correspond with the RMS multi-block count used by
+ * vms_open() in vms/vms_misc.c.
+ */
+ if (isatty(fd) > 0)
+ return BUFSIZ;
+ else if (fstat(fd, stb) < 0)
+ return 8*512; /* conservative in case of DECnet access */
+ else
+ return 32*512;
+}
+
+/* ispath --- return true if path has directory components */
+
+int
+ispath(file)
+const char *file;
+{
+ for (; *file; file++) {
+ switch (*file) {
+ case ':':
+ case ']':
+ case '>':
+ case '/':
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/* isdirpunct --- return true if char is a directory separator */
+
+int
+isdirpunct(c)
+int c;
+{
+ return (strchr(":]>/", c) != NULL);
+}
+
+
diff --git a/vms/posix-cc.sh b/vms/posix-cc.sh
new file mode 100755
index 00000000..6ac70990
--- /dev/null
+++ b/vms/posix-cc.sh
@@ -0,0 +1,16 @@
+# The VMS POSIX `c89' command writes any/all diagnostic info to stdout
+# rather than stderr, confusing configure tests which capture error output.
+#
+# Also, the VMS linker issues a warning for any undefined symbol, but that
+# does not inhibit creation of the final executable file, again confusing
+# configure. As an added complication, there's not enough control of the
+# linker to put the map file with chosen name into the current directory.
+#
+if [ -f ~/_posix-cc.map ] ; then rm -f ~/_posix-cc.map* ; fi
+c89 -Wc,nowarn -Wl,nodebug -Wl,map=_posix-cc.map $* ; x=$?
+if [ -f ~/_posix-cc.map ] ; then
+ if [ -n "`fgrep LINK-W-USEUNDEF ~/_posix-cc.map`" ] ; then x=1 ; fi
+ rm -f ~/_posix-cc.map*
+fi
+if [ x -ne 0 ] ; then echo "c89 reports failure" 1>&2 && exit 1 ; fi
+exit 0
diff --git a/vms/redirect.h b/vms/redirect.h
new file mode 100644
index 00000000..74fe622e
--- /dev/null
+++ b/vms/redirect.h
@@ -0,0 +1,78 @@
+/*
+ * redirect.h --- definitions for functions that are OS specific.
+ */
+
+/*
+ * Copyright (C) 1986, 88, 89, 91-93, 1995 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+/* This file is included by custom.h for VMS-POSIX, or first
+ by config.h (vms-conf.h) then again by awk.h for normal VMS. */
+
+#if defined(VMS_POSIX) || defined(IN_CONFIG_H)
+
+#define DEFAULT_FILETYPE ".awk"
+
+/* some macros to redirect some non-VMS-specific code */
+#define getopt gnu_getopt
+#define opterr gnu_opterr
+#define optarg gnu_optarg
+#define optind gnu_optind
+#define optopt gnu_optopt
+#define regcomp gnu_regcomp
+#define regexec gnu_regexec
+#define regfree gnu_regfree
+#define regerror gnu_regerror
+
+#else /* awk.h, not POSIX */
+
+/* some macros to redirect to code in vms/vms_misc.c */
+#define exit vms_exit
+#define open vms_open
+#define strerror vms_strerror
+#define strdup vms_strdup
+extern void exit P((int));
+extern int open P((const char *,int,...));
+extern char *strerror P((int));
+extern char *strdup P((const char *str));
+extern int vms_devopen P((const char *,int));
+# ifndef NO_TTY_FWRITE
+#define fwrite tty_fwrite
+#define fclose tty_fclose
+extern size_t fwrite P((const void *,size_t,size_t,FILE *));
+extern int fclose P((FILE *));
+# endif
+extern FILE *popen P((const char *,const char *));
+extern int pclose P((FILE *));
+extern void vms_arg_fixup P((int *,char ***));
+/* some things not in STDC_HEADERS */
+extern size_t gnu_strftime P((char *,size_t,const char *,const struct tm *));
+extern int unlink P((const char *));
+extern int getopt P((int,char **,char *));
+extern int isatty P((int));
+#ifndef fileno
+extern int fileno P((FILE *));
+#endif
+extern int close(), dup(), dup2(), fstat(), read(), stat();
+extern int getpgrp P((void));
+
+#endif /* not VMS_POSIX and not IN_CONFIG_H */
+
+/*vms/redirect.h*/
diff --git a/vms/vms-conf.h b/vms/vms-conf.h
new file mode 100644
index 00000000..d1593e6a
--- /dev/null
+++ b/vms/vms-conf.h
@@ -0,0 +1,168 @@
+/*
+ * config.h -- configuration definitions for gawk.
+ *
+ * For VMS (assumes V4.6 or later; tested on V5.5-2)
+ */
+
+/*
+ * Copyright (C) 1991, 1992, 1995 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Programming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+/* Define if using alloca.c. */
+#define C_ALLOCA
+#define STACK_DIRECTION (-1)
+#define REGEX_MALLOC /* use malloc instead of alloca in regex.c */
+
+/* Define as the return type of signal handlers (int or void). */
+#define RETSIGTYPE void
+
+#define SPRINTF_RET int
+
+/* Define if the `getpgrp' function takes no argument. */
+#define GETPGRP_VOID 1
+
+#define HAVE_STRING_H 1 /* the <string.h> header file */
+
+/* Define if you have the memcmp function. */
+#define HAVE_MEMCMP 1
+/* Define if you have the memcpy function. */
+#define HAVE_MEMCPY 1
+/* Define if you have the memset function. */
+#define HAVE_MEMSET 1
+
+/* Define if you have the strchr function. */
+#define HAVE_STRCHR 1
+
+/* Define if you have the strerror function. */
+#define HAVE_STRERROR 1
+
+/* Define if you have the strtod function. */
+#define HAVE_STRTOD 1
+
+/* Define if you have the system function. */
+#define HAVE_SYSTEM 1
+
+/* Define if you have the fmod function. */
+#define HAVE_FMOD 1
+
+/* Define if you have the tzset function. */
+/* [Fake it in vms/vms_misc.c since missing/tzset.c won't compile.] */
+#define HAVE_TZSET 1
+#define HAVE_TZNAME 1
+
+#define STDC_HEADERS 1
+
+#define HAVE_VPRINTF 1
+
+
+/*******************************/
+/* Gawk configuration options. */
+/*******************************/
+
+/*
+ * DEFPATH
+ * VMS: "/AWK_LIBRARY" => "AWK_LIBRARY:"
+ * The default search path for the -f option of gawk. It is used
+ * if the AWKPATH environment variable is undefined.
+ *
+ * Note: OK even if no AWK_LIBRARY logical name has been defined.
+ */
+
+#define DEFPATH ".,/AWK_LIBRARY"
+#define ENVSEP ','
+
+/*
+ * Extended source file access.
+ */
+#define DEFAULT_FILETYPE ".awk"
+
+/*
+ * Pipe handling.
+ */
+#define PIPES_SIMULATED 1
+
+/*
+ * %g format in VAXCRTL is broken (chooses %e format when should use %f).
+ */
+#define GFMT_WORKAROUND 1
+
+/*
+ * VAX C
+ *
+ * As of V3.2, VAX C is not yet ANSI-compliant. But it's close enough
+ * for GAWK's purposes. Comment this out for VAX C V2.4 and earlier.
+ * Value of 0 should mean "not ANSI-C", but GAWK uses def/not-def tests.
+ * YYDEBUG definition is needed for combination of VAX C V2.x and Bison.
+ */
+#if defined(VAXC) && !defined(__STDC__)
+#define __STDC__ 0
+#define NO_TOKEN_PASTING
+#ifndef __DECC /* DEC C does not support #pragma builtins even in VAXC mode */
+#define VAXC_BUILTINS
+#endif
+/* #define YYDEBUG 0 */
+#endif
+
+/*
+ * DEC C
+ *
+ * Digital's ANSI complier.
+ */
+#ifdef __DECC
+ /* DEC C implies DECC$SHR, which doesn't have the %g problem of VAXCRTL */
+#undef GFMT_WORKAROUND
+#endif
+
+/*
+ * GNU C
+ *
+ * Versions of GCC (actually GAS) earlier than 1.38 don't produce the
+ * right code for ``extern const'' constructs, and other usages of
+ * const might not be right either. The old set of include files from
+ * the gcc-vms distribution did not contain prototypes, and this could
+ * provoke some const-related compiler warnings. If you've got an old
+ * version of gcc for VMS, define 'const' out of existance, and by all
+ * means obtain the most recent version!
+ *
+ * Note: old versions of GCC should also avoid defining STDC_HEADERS,
+ * because most of the ANSI-C required header files are missing.
+ */
+#ifdef __GNUC__
+/* #define const */
+/* #undef STDC_HEADERS */
+#ifndef STDC_HEADERS
+#define alloca __builtin_alloca
+#define environ $$PsectAttributes_NOSHR$$environ /* awful GAS kludge */
+#endif
+#undef REGEX_MALLOC /* use true alloca() in regex.c */
+#endif
+
+#ifndef HAVE_STRFTIME
+/*
+ * Always use the version of strftime() in missing/strftime.c instead of
+ * the [as yet undocumented/unsupported] one in VAXCRTL. Renaming it here
+ * guarantees that it won't clash with the library routine.
+ */
+#define strftime gnu_strftime
+#endif
+
+#define IN_CONFIG_H
+#include "vms/redirect.h"
+#undef IN_CONFIG_H
diff --git a/vms/vms_args.c b/vms/vms_args.c
index a32e5d0f..945d7bf6 100644
--- a/vms/vms_args.c
+++ b/vms/vms_args.c
@@ -7,7 +7,7 @@
* Copyright (C) 1991-1995 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
- * AWK Progamming Language.
+ * AWK Programming Language.
*
* GAWK is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -20,8 +20,8 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with GAWK; see the file COPYING. If not, write to
- * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/
/*
diff --git a/vms/vms_fwrite.c b/vms/vms_fwrite.c
index 7ca8f5c4..ab9bbbd0 100644
--- a/vms/vms_fwrite.c
+++ b/vms/vms_fwrite.c
@@ -6,7 +6,7 @@
* Copyright (C) 1991-1995 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
- * AWK Progamming Language.
+ * AWK Programming Language.
*
* GAWK is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -19,8 +19,8 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with GAWK; see the file COPYING. If not, write to
- * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/
#include "awk.h" /* really "../awk.h" */
diff --git a/vms/vms_gawk.c b/vms/vms_gawk.c
index 69c91134..f65b9cbd 100644
--- a/vms/vms_gawk.c
+++ b/vms/vms_gawk.c
@@ -6,7 +6,7 @@
* Copyright (C) 1991-1993 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
- * AWK Progamming Language.
+ * AWK Programming Language.
*
* GAWK is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -19,8 +19,8 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with GAWK; see the file COPYING. If not, write to
- * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/
/*
diff --git a/vms/vms_misc.c b/vms/vms_misc.c
index 31c24ed7..748b3b94 100644
--- a/vms/vms_misc.c
+++ b/vms/vms_misc.c
@@ -6,7 +6,7 @@
* Copyright (C) 1991-1993 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
- * AWK Progamming Language.
+ * AWK Programming Language.
*
* GAWK is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -19,8 +19,8 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with GAWK; see the file COPYING. If not, write to
- * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/
#define creat creat_dummy /* one of gcc-vms's headers has bad prototype */
@@ -173,7 +173,7 @@ vms_devopen( const char *name, int mode )
*/
/* these are global for use by missing/strftime.c */
char *tzname[2] = { "local", "" };
-int daylight = 0;
+int daylight = 0, timezone = 0, altzone = 0;
/* dummy to satisfy linker */
void tzset()
@@ -189,6 +189,16 @@ int getpgrp()
return 0;
}
+#ifndef __GNUC__
+# ifdef bcopy
+# undef bcopy
+# endif
+void bcopy( const char *src, char *dst, int len )
+{
+ (void) memcpy(dst, src, len);
+}
+#endif /*!__GNUC__*/
+
/*----------------------------------------------------------------------*/
#ifdef NO_VMS_ARGS /* real code is in "vms/vms_args.c" */
void vms_arg_fixup( int *argc, char ***argv ) { return; } /* dummy */
diff --git a/vms/vms_popen.c b/vms/vms_popen.c
index 5c9c1a05..dcd8425b 100644
--- a/vms/vms_popen.c
+++ b/vms/vms_popen.c
@@ -6,7 +6,7 @@
* Copyright (C) 1991-1993 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
- * AWK Progamming Language.
+ * AWK Programming Language.
*
* GAWK is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -19,8 +19,8 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with GAWK; see the file COPYING. If not, write to
- * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*/
#ifndef NO_VMS_PIPES
diff --git a/vms/vmsbuild.com b/vms/vmsbuild.com
index 50d2b5a0..40a9307c 100644
--- a/vms/vmsbuild.com
+++ b/vms/vmsbuild.com
@@ -3,9 +3,10 @@ $! revised, Mar'90
$! gawk 2.13 revised, Jun'91
$! gawk 2.14 revised, Sep'92
$! gawk 2.15 revised, Oct'93
+$! gawk 3.0 revised, Dec'95
$!
-$ REL = "2.15" !release version number
-$ PATCHLVL = "6"
+$ REL = "3.0" !release version number
+$ PATCHLVL = "0"
$!
$! [ remove "/optimize=noinline" for VAX C V2.x or DEC C ]
$! [ add "/standard=VAXC" for DEC C and "/g_float" for Alpha ]
@@ -27,7 +28,7 @@ $! uncomment next two lines for GNU C
$ ! cc := gcc/Include=([],[.vms])/Define="(""GAWK"",""HAVE_CONFIG_H"")"
$ ! libs = "gnu_cc:[000000]gcclib.olb/Library,sys$library:vaxcrtl.olb/Library"
$!
-$ if f$search("config.h").eqs."" then copy [.config]vms-conf.h []config.h
+$ if f$search("config.h").eqs."" then copy [.vms]vms-conf.h []config.h
$ if f$search("awktab.c").nes."" then goto awktab_ok
$ write sys$output " You must process `awk.y' with ""yacc"" or ""bison"""
$ if f$search("awk_tab.c").nes."" then - !bison was run manually
@@ -36,21 +37,21 @@ $ if f$search("ytab.c").nes."" .or. f$search("y_tab.c").nes."" then - !yacc
write sys$output " or else rename `ytab.c' or `y_tab.c' to `awktab.c'."
$ exit
$awktab_ok:
-$ cc main.c
-$ cc eval.c
+$ cc array.c
$ cc builtin.c
-$ cc msg.c
-$ cc iop.c
-$ cc io.c
+$ cc eval.c
$ cc field.c
-$ cc array.c
-$ cc node.c
-$ cc version.c
+$ cc gawkmisc.c
+$ cc io.c
+$ cc main.c
$ cc missing.c
+$ cc msg.c
+$ cc node.c
$ cc re.c
+$ cc version.c
+$ cc awktab.c
$ cc getopt.c
$ cc getopt1.c
-$ cc awktab.c
$ cc regex.c
$ cc dfa.c
$ cc/define=("STACK_DIRECTION=(-1)","exit=vms_exit") alloca.c
@@ -64,11 +65,11 @@ $ set_command/object=[]gawk_cmd.obj [.vms]gawk.cld
$!
$ create gawk.opt
! GAWK -- Gnu AWK
-main.obj,eval.obj,builtin.obj,msg.obj,iop.obj,io.obj
-field.obj,array.obj,node.obj,version.obj,missing.obj
-re.obj,getopt.obj,getopt1.obj,awktab.obj,regex.obj,dfa.obj,[]alloca.obj
-[]vms_misc.obj,vms_popen.obj,vms_fwrite.obj
-[]vms_args.obj,vms_gawk.obj,vms_cli.obj,gawk_cmd.obj
+array.obj,builtin.obj,eval.obj,field.obj,gawkmisc.obj
+io.obj,main.obj,missing.obj,msg.obj,node.obj,re.obj,version.obj,awktab.obj
+getopt.obj,getopt1.obj,regex.obj,dfa.obj,alloca.obj
+[]vms_misc.obj,vms_popen.obj,vms_fwrite.obj,vms_args.obj
+[]vms_gawk.obj,vms_cli.obj,gawk_cmd.obj
psect_attr=environ,noshr !extern [noshare] char **
stack=48 !preallocate more pages (default is 20)
iosegment=128 !ditto (default is 32)