summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2010-07-16 11:58:26 +0300
committerArnold D. Robbins <arnold@skeeve.com>2010-07-16 11:58:26 +0300
commit765c7494b3dac62207e6cd57fb839997e237f292 (patch)
treef7da12ffdb85d9f82671cb3122775b2ce73f7ad9
parentcce5115e21db1702e0617afdca36633e7e2c9eae (diff)
downloadgawk-765c7494b3dac62207e6cd57fb839997e237f292.tar.gz
Moving to 2.13.2.
-rw-r--r--ACKNOWLEDGMENT23
-rw-r--r--CHANGES827
-rw-r--r--FUTURES104
-rw-r--r--LIMITATIONS16
-rw-r--r--Makefile262
-rw-r--r--Makefile-dist255
-rw-r--r--PORTS17
-rw-r--r--PROBLEMS13
-rw-r--r--README148
-rw-r--r--README.VMS83
-rw-r--r--README.dos9
-rw-r--r--README.reconstituted7
-rw-r--r--README.rs600021
-rw-r--r--README.ultrix5
-rw-r--r--alloca.c31
-rw-r--r--alloca.s68
-rw-r--r--array.c153
-rw-r--r--atari/Makefile.st172
-rw-r--r--atari/mkconf.g18
-rw-r--r--atari/mkscrpt.sed15
-rw-r--r--atari/stack.c6
-rw-r--r--atari/system.c112
-rw-r--r--atari/textrd.c30
-rw-r--r--atari/tmpnam.c47
-rw-r--r--awk.h648
-rw-r--r--awk.tab.c2802
-rw-r--r--awk.y1373
-rw-r--r--builtin.c780
-rw-r--r--config.h287
-rw-r--r--config.h-dist285
-rw-r--r--config/apollo6
-rw-r--r--config/atari9
-rw-r--r--config/bsd4216
-rw-r--r--config/bsd4316
-rw-r--r--config/bsd43r3
-rw-r--r--config/bsd43t14
-rw-r--r--config/cray9
-rw-r--r--config/cray2-507
-rw-r--r--config/cray2-606
-rw-r--r--config/cray605
-rw-r--r--config/interactive2.29
-rw-r--r--config/msc609
-rw-r--r--config/msdos9
-rw-r--r--config/news6
-rw-r--r--config/next206
-rw-r--r--config/rs60006
-rw-r--r--config/sequent16
-rw-r--r--config/sgi5
-rw-r--r--config/sgi334
-rw-r--r--config/sgi33.cc5
-rw-r--r--config/sunos38
-rw-r--r--config/sunos407
-rw-r--r--config/sunos414
-rw-r--r--config/sysv26
-rw-r--r--config/sysv36
-rw-r--r--config/sysv44
-rw-r--r--config/ultrix316
-rw-r--r--config/ultrix402
-rw-r--r--config/ultrix413
-rw-r--r--config/vms-conf.h307
-rwxr-xr-xconfigure32
-rw-r--r--debug.c561
-rw-r--r--dfa.c2309
-rw-r--r--dfa.h539
-rw-r--r--eval.c816
-rw-r--r--field.c668
-rw-r--r--foo.sh50
-rw-r--r--gawk.1797
-rw-r--r--gawk.texinfo30
-rw-r--r--io.c723
-rw-r--r--iop.c237
-rw-r--r--main.c499
-rw-r--r--missing.c85
-rw-r--r--missing/dup2.c (renamed from missing.d/dup2.c)0
-rw-r--r--missing/gcvt.c (renamed from missing.d/gcvt.c)0
-rw-r--r--missing/getopt.c (renamed from missing.d/getopt.c)0
-rw-r--r--missing/memcmp.c (renamed from missing.d/memcmp.c)0
-rw-r--r--missing/memcpy.c (renamed from missing.d/memcpy.c)0
-rw-r--r--missing/memset.c (renamed from missing.d/memset.c)0
-rw-r--r--missing/random.c (renamed from missing.d/random.c)0
-rw-r--r--missing/strcase.c (renamed from missing.d/strcase.c)0
-rw-r--r--missing/strchr.c (renamed from missing.d/strchr.c)0
-rw-r--r--missing/strerror.c (renamed from missing.d/strerror.c)0
-rw-r--r--missing/strtod.c (renamed from missing.d/strtod.c)0
-rw-r--r--missing/tmpnam.c (renamed from missing.d/tmpnam.c)0
-rw-r--r--missing/vprintf.c (renamed from missing.d/vprintf.c)0
-rwxr-xr-xmkconf32
-rw-r--r--msg.c14
-rwxr-xr-xmungeconf20
-rw-r--r--node.c346
-rw-r--r--patchlevel.h2
-rw-r--r--pc/Makefile.pc (renamed from pc.d/Makefile.pc)0
-rw-r--r--pc/config.h287
-rw-r--r--pc/make.bat55
-rw-r--r--pc/names.lnk17
-rw-r--r--pc/popen.c (renamed from pc.d/popen.c)0
-rw-r--r--pc/popen.h (renamed from pc.d/popen.h)0
-rw-r--r--protos.h113
-rw-r--r--re.c144
-rw-r--r--regex.c2440
-rw-r--r--regex.h418
-rw-r--r--version.c (renamed from version.sh)24
-rw-r--r--vms/descrip.mms200
-rw-r--r--vms/fcntl.h10
-rw-r--r--vms/gawk.cld46
-rw-r--r--vms/gawk.hlp1156
-rw-r--r--vms/unixlib.h24
-rw-r--r--vms/varargs.h38
-rw-r--r--vms/vms.h69
-rw-r--r--vms/vms_args.c398
-rw-r--r--vms/vms_cli.c88
-rw-r--r--vms/vms_fwrite.c209
-rw-r--r--vms/vms_gawk.c245
-rw-r--r--vms/vms_misc.c159
-rw-r--r--vms/vms_popen.c168
-rw-r--r--vms/vmsbuild.com70
116 files changed, 18556 insertions, 4718 deletions
diff --git a/ACKNOWLEDGMENT b/ACKNOWLEDGMENT
new file mode 100644
index 00000000..276c0133
--- /dev/null
+++ b/ACKNOWLEDGMENT
@@ -0,0 +1,23 @@
+The current developers of Gawk would like to thank and acknowledge the
+many people who have contributed to the development through bug reports
+and fixes and suggestions. Unfortunately, we have not been organized
+enough to keep track of all the names -- for that we apologize.
+
+Another group of people have assisted even more by porting Gawk to new
+platforms and providing a great deal of feedback. They are (for
+2.12):
+
+ Hal Peterson <hrp@pecan.cray.com> (Cray)
+ Pat Rankin <gawk.rankin@EQL.Caltech.Edu> (VMS)
+ Michal Jaegermann <NTOMCZAK@vm.ucs.UAlberta.CA> (Atari, NeXT, DEC 3100)
+ Mike Lijewski <mjlx@eagle.cnsf.cornell.edu> (IBM RS6000)
+
+Last, but far from least, we would like to thank Brian Kernighan who
+has helped to clear up many dark corners of the language and provided a
+restraining touch when we have been overly tempted by "feeping
+creaturism".
+
+Support for MSC 5.1 under MS-DOS was supplied for 2.11 by Kent
+Williams, who can be reached at williams@umaxc.weeg.uiowa.edu. It
+relies heavily on the earlier work done for 2.10 by Conrad Kwok and
+Scott Garfinkle.
diff --git a/CHANGES b/CHANGES
index 4a113af4..0d7396ca 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,3 +1,830 @@
+Changes from 2.13.1 to 2.13.2
+-----------------------------
+
+Toward conformity with GNU standards, configure is a link to mkconf, the latter
+ to disappear in the next major release.
+
+Update to config/bsd43.
+
+Added config/apollo, config/msc60, config/cray2-50, config/interactive2.2
+
+sgi33.cc added for compilation using cc ratther than gcc.
+
+Ultrix41 now propagates to config.h properly -- as part of a general
+ mechanism in configure for kludges -- #define anything from a config file
+ just gets tacked onto the end of config.h -- to be used sparingly.
+
+Got rid of an unnecessary and troublesome declaration of vprintf().
+
+Small improvement in locality of error messages.
+
+Try to diagnose use of array as scalar and vice versa -- to be improved in
+ the future.
+
+Fix for last bug fix for Cray division code--sigh.
+
+More changes to test suite to explicitly use sh. Also get rid of
+ a few generated files.
+
+Fixed off-by-one bug in string concatenation code.
+
+Fix for use of array that is passed in from a previous function parameter.
+ Addition to test suite for above.
+
+A number of changes associated with changing NF and access to fields
+ beyond the end of the current record.
+
+Change to missing/memcmp.c to avoid seg. fault on zero length input.
+
+Updates to test suite (including some inadvertently left out of the last patch)
+ to invoke sh explicitly (rather than rely on #!/bin/sh) and remove some
+ junk files. test/chem/good updated to correspond to bug fixes.
+
+Changes from 2.13.0 to 2.13.1
+-----------------------------
+
+More configs and PORTS.
+
+Fixed bug wherein a simple division produced an erroneous FPE, caused by
+ the Cray division workaround -- that code is now #ifdef'd only for
+ Cray *and* fixed.
+
+Fixed bug in modulus implementation -- it was very close to the above
+ code, so I noticed it.
+
+Fixed portability problem with limits.h in missing.c
+
+Fixed portability problem with tzname and daylight -- define TZNAME_MISSING
+ if strftime() is missing and tzname is also.
+
+Better support for Latin-1 character set.
+
+Fixed portability problem in test Makefile.
+
+Updated PROBLEMS file.
+
+=============================== gawk-2.13 released =========================
+Changes from 2.12.42 to 2.12.43
+-------------------------------
+
+Typo in awk.y
+
+Fixed up strftime.3 and added doc. for %V.
+
+Changes from 2.12.41 to 2.12.42
+-------------------------------
+
+Fixed bug in devopen() -- if you had write permission in /dev,
+ it would just create /dev/stdout etc.!!
+
+Final (?) VMS update.
+
+Make NeXT use GFMT_WORKAROUND
+
+Fixed bug in sub_common() for substitute on zero-length match. Improved the
+ code a bit while I was at it.
+
+Fixed grammar so that $i++ parses as ($i)++
+
+Put support/* back in the distribution (didn't I already do this?!)
+
+Changes from 2.12.40 to 2.12.41
+-------------------------------
+
+VMS workaround for broken %g format.
+
+Changes from 2.12.39 to 2.12.40
+-------------------------------
+
+Minor man page update.
+
+Fixed latent bug in redirect().
+
+Changes from 2.12.38 to 2.12.39
+-------------------------------
+
+Updates to test suite -- remove dependence on changing gawk.1 man page.
+
+Changes from 2.12.37 to 2.12.38
+-------------------------------
+
+Fixed bug in use of *= without whitespace following.
+
+VMS update.
+
+Updates to man page.
+
+Option handling updates in main.c
+
+test/manyfiles redone and added to bigtest.
+
+Fixed latent (on Sun) bug in handling of save_fs.
+
+Changes from 2.12.36 to 2.12.37
+-------------------------------
+
+Update REL in Makefile-dist. Incorporate test suite into main distribution.
+
+Minor fix in regtest.
+
+Changes from 2.12.35 to 2.12.36
+-------------------------------
+
+Release takes on dual personality -- 2.12.36 and 2.13.0 -- any further
+ patches before public release won't count for 2.13, although they will for
+ 2.12 -- be careful to avoid confusion! patchlevel.h will be the last thing
+ to change.
+
+Cray updates to deal with arithmetic problems.
+
+Minor test suite updates.
+
+Fixed latent bug in parser (freeing memory).
+
+Changes from 2.12.34 to 2.12.35
+-------------------------------
+
+VMS updates.
+
+Flush stdout at top of err() and stderr at bottom.
+
+Fixed bug in eval_condition() -- it wasn't testing for MAYBE_NUM and
+ doing the force_number().
+
+Included the missing manyfiles.awk and a new test to catch the above bug which
+ I am amazed wasn't already caught by the test suite -- it's pretty basic.
+
+Changes from 2.12.33 to 2.12.34
+-------------------------------
+
+Atari updates -- including bug fix.
+
+More VMS updates -- also nuke vms/version.com.
+
+Fixed bug in handling of large numbers of redirections -- it was probably never
+ tested before (blush!).
+
+Minor rearrangement of code in r_force_number().
+
+Made chem and regtest tests a bit more portable (Ultrix again).
+
+Added another test -- manyfiles -- not invoked under any other test -- very Unix
+ specific.
+
+Rough beginning of LIMITATIONS file -- need my AWK book to complete it.
+
+Changes from 2.12.32 to 2.12.33
+-------------------------------
+
+Expunge debug.? from various files.
+
+Remove vestiges of Floor and Ceil kludge.
+
+Special case integer division -- mainly for Cray, but maybe someone else
+ will benefit.
+
+Workaround for iop_close closing an output pipe descriptor on Cray --
+ not conditional since I think it may fix a bug on SGI as well and I don't
+ think it can hurt elsewhere.
+
+Fixed memory leak in assoc_lookup().
+
+Small cleanup in test suite.
+
+Changes from 2.12.31 to 2.12.32
+-------------------------------
+
+Nuked debug.c and debugging flag -- there are better ways.
+
+Nuked version.sh and version.c in subdirectories.
+
+Fixed bug in handling of IGNORECASE.
+
+Fixed bug when FIELDWIDTHS was set via -v option.
+
+Fixed (obscure) bug when $0 is assigned a numerical value.
+
+Fixed so that escape sequences in command-line assignments work (as it already
+ said in the comment).
+
+Added a few cases to test suite.
+
+Moved support/* back into distribution.
+
+VMS updates.
+
+Changes from 2.12.30 to 2.12.31
+-------------------------------
+
+Cosmetic manual page changes.
+
+Updated sunos3 config.
+
+Small changes in test suite including renaming files over 14 chars. in length.
+
+Changes from 2.12.29 to 2.12.30
+-------------------------------
+
+Bug fix for many string concatenations in a row.
+
+Changes from 2.12.28 to 2.12.29
+-------------------------------
+
+Minor cleanup in awk.y
+
+Minor VMS update.
+
+Minor atari update.
+
+Changes from 2.12.27 to 2.12.28
+-------------------------------
+
+Got rid of the debugging goop in eval.c -- there are better ways.
+
+Sequent port.
+
+VMS changes left out of the last patch -- sigh! config/vms.h renamed
+ to config/vms-conf.h.
+
+Fixed missing/tzset.c
+
+Removed use of gcvt() and GCVT_MISSING -- turns out it was no faster than
+ sprintf("%g") and caused all sorts of portability headaches.
+
+Tuned get_field() -- it was unnecessarily parsing the whole record on reference
+ to $0.
+
+Tuned interpret() a bit in the rule_node loop.
+
+In r_force_number(), worked around bug in Uglix strtod() and got rid of
+ ugly do{}while(0) at Michal's urging.
+
+Replaced do_deref() and deref with unref(node) -- much cleaner and a bit faster.
+
+Got rid of assign_number() -- contrary to comment, it was no faster than
+ just making a new node and freeing the old one.
+
+Replaced make_number() and tmp_number() with macros that call mk_number().
+
+Changed freenode() and newnode() into macros -- the latter is getnode()
+ which calls more_nodes() as necessary.
+
+Changes from 2.12.26 to 2.12.27
+-------------------------------
+
+Completion of Cray 2 port (includes a kludge for floor() and ceil()
+ that may go or be changed -- I think that it may just be working around
+ a bug in chem that is being tweaked on the Cray).
+
+More VMS updates.
+
+Moved kludge over yacc's insertion of malloc and realloc declarations
+ from protos.h to the Makefile.
+
+Added a lisp interpreter in awk to the test suite. (Invoked under
+ bigtest.)
+
+Cleanup in r_force_number() -- I had never gotten around to a thorough
+ profile of the cache code and it turns out to be not worth it.
+
+Performance boost -- do lazy force_number()'ing for fields etc. i.e.
+ flag them (MAYBE_NUM) and call force_number only as necessary.
+
+Changes from 2.12.25 to 2.12.26
+-------------------------------
+
+Rework of regexp stuff so that dynamic regexps have reasonable
+ performance -- string used for compiled regexp is stored and
+ compared to new string -- if same, no recompilation is necessary.
+ Also, very dynamic regexps cause dfa-based searching to be turned
+ off.
+
+Code in dev_open() is back to returning fileno(std*) rather than
+ dup()ing it. This will be documented. Sorry for the run-around
+ on this.
+
+Minor atari updates.
+
+Minor vms update.
+
+Missing file from MSDOS port.
+
+Added warning (under lint) if third arg. of [g]sub is a constant and
+ handle it properly in the code (i.e. return how many matches).
+
+Changes from 2.12.24 to 2.12.25
+-------------------------------
+
+MSDOS port.
+
+Non-consequential changes to regexp variables in preparation for
+ a more serious change to fix a serious performance problem.
+
+Changes from 2.12.23 to 2.12.24
+-------------------------------
+
+Fixed bug in output flushing introduced a few patches back. This caused
+ serious performance losses.
+
+Changes from 2.12.22 to 2.12.23
+-------------------------------
+
+Accidently left config/cray2-60 out of last patch.
+
+Added some missing dependencies to Makefile.
+
+Cleaned up mkconf a bit; made yacc the default parser (no alloca needed,
+ right?); added rs6000 hook for signed characters.
+
+Made regex.c with NO_ALLOCA undefined work.
+
+Fixed bug in dfa.c for systems where free(NULL) bombs.
+
+Deleted a few cant_happen()'s that *really* can't hapen.
+
+Changes from 2.12.21 to 2.12.22
+-------------------------------
+
+Added to config stuff the ability to choose YACC rather than bison.
+
+Fixed CHAR_UNSIGNED in config.h-dist.
+
+Second arg. of strtod() is char ** rather than const char **.
+
+stackb is now initially malloc()'ed since it may be realloc()'ed.
+
+VMS updates.
+
+Added SIZE_T_MISSING to config stuff and a default typedef to awk.h.
+ (Maybe it is not needed on any current systems??)
+
+re_compile_pattern()'s size is now size_t unconditionally.
+
+Changes from 2.12.20 to 2.12.21
+-------------------------------
+
+Corrected missing/gcvt.c.
+
+Got rid of use of dup2() and thus DUP_MISSING.
+
+Updated config/sgi33.
+
+Turned on (and fixed) in cmp_nodes() the behaviour that I *hope* will be in
+ POSIX 1003.2 for relational comparisons.
+
+Small updates to test suite.
+
+Changes from 2.12.19 to 2.12.20
+-------------------------------
+
+Sloppy, sloppy, sloppy!! I didn't even try to compile the last two
+ patches. This one fixes goofs in regex.c.
+
+Changes from 2.12.18 to 2.12.19
+-------------------------------
+
+Cleanup of last patch.
+
+Changes from 2.12.17 to 2.12.18
+-------------------------------
+
+Makefile renamed to Makefile-dist.
+
+Added alloca() configuration to mkconf. (A bit kludgey.) Just
+ add a single line containing ALLOCA_PW, ALLOCA_S or ALLOCA_C
+ to the appropriate config file to have Makefile-dist edited
+ accordingly.
+
+Reorganized output flushing to correspond with new semantics of
+ devopen() on "/dev/std*" etc.
+
+Fixed rest of last goof!!
+
+Save and restore errno in do_pathopen().
+
+Miscellaneous atari updates.
+
+Get rid of the trailing comma in the NODETYPE definition (Cray
+ compiler won't take it).
+
+Try to make the use of `const' consistent since Cray compiler is
+ fussy about that. See the changes to `basename' and `myname'.
+
+It turns out that, according to section 3.8.3 (Macro Replacement)
+ of the ANSI Standard: ``If there are sequences of preprocessing
+ tokens within the list of arguments that would otherwise act as
+ preprocessing directives, the behavior is undefined.'' That means
+ that you cannot count on the behavior of the declaration of
+ re_compile_pattern in awk.h, and indeed the Cray compiler chokes on it.
+
+Replaced alloca with malloc/realloc/free in regex.c. It was much simpler
+ than expected. (Inside NO_ALLOCA for now -- by default no alloca.)
+
+Added a configuration file, config/cray60, for Unicos-6.0.
+
+Changes from 2.12.16 to 2.12.17
+-------------------------------
+
+Ooops. Goofed signal use in last patch.
+
+Changes from 2.12.15 to 2.12.16
+-------------------------------
+
+RENAMED *_dir to just * (e.g. missing_dir).
+
+Numerous VMS changes.
+
+Proper inclusion of atari and vms files.
+
+Added experimental (ifdef'd out) RELAXED_CONTINUATION and DEFAULT_FILETYPE
+ -- please comment on these!
+
+Moved pathopen() to io.c (sigh).
+
+Put local directory ahead in default AWKPATH.
+
+Added facility in mkconf to echo comments on stdout: lines beginning
+ with "#echo " will have the remainder of the line echoed when mkconf is run.
+ Any lines starting with "#" will otherwise be treated as comments. The
+ intent is to be able to say:
+ "#echo Make sure you uncomment alloca.c in the Makefile"
+ or the like.
+
+Prototype fix for V.4
+
+Fixed version_string to not print leading @(#).
+
+Fixed FIELDWIDTHS to work with strict (turned out to be easy).
+
+Fixed conf for V.2.
+
+Changed semantics of /dev/fd/n to be like on real /dev/fd.
+
+Several configuration and updates in the makefile.
+
+Updated manpage.
+
+Include tzset.c and system.c from missing_dir that were accidently left out of
+ the last patch.
+
+Fixed bug in cmdline variable assignment -- arg was getting freed(!) in
+ call to variable.
+
+Backed out of parse-time constant folding for now, until I can figure out
+ how to do it right.
+
+Fixed devopen() so that getline <"-" works.
+
+Changes from 2.12.14 to 2.12.15
+-------------------------------
+
+Changed config/* to a condensed form that can be used with mkconf to generate
+ a config.h from config.h-dist -- much easier to maintain. Please chaeck
+ carefully against what you had before for a particular system and report
+ any problems. vms.h remains separate since the stuff at the bottom
+ didn't quite fit the mkconf model -- hopefully cleared up later.
+
+Fixed bug in grammar -- didn't allow function definition to be separated from
+ other rules by a semi-colon.
+
+VMS fix to #includes in missing.c -- should we just be including awk.h?
+
+Updated README for texinfo.tex version.
+
+Updating of copyright in all .[chy] files.
+
+Added but commented out Michal's fix to strftime.
+
+Added tzset() emulation based on Rick Adams' code. Added TZSET_MISSING to
+ config.h-dist.
+
+Added strftime.3 man page for missing_dir
+
+More posix: func, **, **= don't work in -W posix
+
+More lint: ^, ^= not in old awk
+
+gawk.1: removed ref to -DNO_DEV_FD, other minor updating.
+
+Style change: pushbak becomes pushback() in yylex().
+
+Changes from 2.12.13 to 2.12.14
+-------------------------------
+
+Better (?) organization of awk.h -- attempt to keep all system dependencies
+ near the top and move some of the non-general things out of the config.h
+ files.
+
+Change to handling of SYSTEM_MISSING.
+
+Small change to ultrix config.
+
+Do "/dev/fd/*" etc. checking at runtime.
+
+First pass at VMS port.
+
+Improvements to error handling (when lexeme spans buffers).
+
+Fixed backslash handling -- why didn't I notice this sooner?
+
+Added programs from book to test suite and new target "bigtest" to Makefile.
+
+Changes from 2.12.12 to 2.12.13
+-------------------------------
+
+Recognize OFS and ORS specially so that OFS = 9 works without efficiency hit.
+ Took advantage of opportunity to tune do_print*() for about 10% win on a
+ print with 5 args (i.e. small but significant).
+
+Somewhat pervasive changes to reconcile CONVFMT vs. OFMT.
+
+Better initialization of builtin vars.
+
+Make config/* consistent wrt STRTOL_MISSING.
+
+Small portability improvement to alloca.s
+
+Improvements to lint code in awk.y
+
+Replaced strtol() with a better one by Chris Torek.
+
+Changes from 2.12.11 to 2.12.12
+-------------------------------
+
+Added PORTS file to record successful ports.
+
+Added #define const to nothing if not STDC and added const to strtod() header.
+
+Added * to printf capabilities and partially implemented ' ' and '+' (has an
+ effect for %d only, silently ignored for other formats). I'm afraid that's
+ as far as I want to go before I look at a complete replacement for
+ do_sprintf().
+
+Added warning for /regexp/ on LHS of MATCHOP.
+
+Changes from 2.12.10 to 2.12.11
+-------------------------------
+
+Small Makefile improvements.
+
+Some remaining nits from the NeXT port.
+
+Got rid of bcopy() define in awk.h -- not needed anymore (??)
+
+Changed private in builtin.c -- it is special on Sequent.
+
+Added subset implementation of strtol() and STRTOL_MISSING.
+
+A little bit of cleanup in debug.c, dfa.c.
+
+Changes from 2.12.9 to 2.12.10
+------------------------------
+
+Redid compatability checking and checking for # of args.
+
+Removed all references to variables[] from outside awk.y, in preparation
+ for a more abstract interface to the symbol table.
+
+Got rid of a remaining use of bcopy() in regex.c.
+
+Changes from 2.12.8 to 2.12.9
+-----------------------------
+
+Portability improvements for atari, next and decstation.
+
+Bug fix in substr() -- wasn't handling 3rd arg. of -1 properly.
+
+Manpage updates.
+
+Moved support from src release to doc release.
+
+Updated FUTURES file.
+
+Added some "lint" warnings.
+
+Changes from 2.12.7 to 2.12.8
+-----------------------------
+
+Changed time() to systime().
+
+Changed warning() in snode() to fatal().
+
+strftime() now defaults second arg. to current time.
+
+Changes from 2.12.6 to 2.12.7
+-----------------------------
+
+Fixed bug in sub_common() involving inadequate allocation of a buffer.
+
+Added some missing files to the Makefile.
+
+Changes from 2.12.5 to 2.12.6
+-----------------------------
+
+Fixed bug wherein non-redirected getline could call iop_close() just
+ prior to a call from do_input().
+
+Fixed bug in handling of /dev/stdout and /dev/stderr.
+
+Changes from 2.12.4 to 2.12.5
+-----------------------------
+
+Updated README and support directory.
+
+Changes from 2.12.3 to 2.12.4
+-----------------------------
+
+Updated CHANGES and TODO (should have been done in previous 2 patches).
+
+Changes from 2.12.2 to 2.12.3
+-----------------------------
+
+Brought regex.c and alloca.s into line with current FSF versions.
+
+Changes from 2.12.1 to 2.12.2
+-----------------------------
+
+Portability improvements; mostly moving system prototypes out of awk.h
+
+Introduction of strftime.
+
+Use of CONVFMT.
+
+Changes from 2.12 to 2.12.1
+-----------------------------
+
+Consolidated treatment of command-line assignments (thus correcting the
+-v treatment).
+
+Rationalized builtin-variable handling into a table-driven process, thus
+simplifying variable() and eliminating spc_var().
+
+Fixed bug in handling of command-line source that ended in a newline.
+
+Simplified install() and lookup().
+
+Did away with double-mallocing of identifiers and now free second and later
+instances of a name, after the first gets installed into the symbol table.
+
+Treat IGNORECASE specially, simplifying a lot of code, and allowing
+checking against strict conformance only on setting it, rather than on each
+pattern match.
+
+Fixed regexp matching when IGNORECASE is non-zero (broken when dfa.c was
+added).
+
+Fixed bug where $0 was not being marked as valid, even after it was rebuilt.
+This caused mangling of $0.
+
+
+Changes from 2.11.1 to 2.12
+-----------------------------
+
+Makefile:
+
+Portability improvements in Makefile.
+Move configuration stuff into config.h
+
+FSF files:
+
+Synchronized alloca.[cs] and regex.[ch] with FSF.
+
+array.c:
+
+Rationalized hash routines into one with a different algorithm.
+delete() now works if the array is a local variable.
+Changed interface of assoc_next() and avoided dereferencing past the end of the
+ array.
+
+awk.h:
+
+Merged non-prototype and prototype declarations in awk.h.
+Expanded tree_eval #define to short-circuit more calls of r_tree_eval().
+
+awk.y:
+
+Delinted some of the code in the grammar.
+Fixed and improved some of the error message printing.
+Changed to accomodate unlimited length source lines.
+Line continuation now works as advertised.
+Source lines can be arbitrarily long.
+Refined grammar hacks so that /= assignment works. Regular expressions
+ starting with /= are recognized at the beginning of a line, after && or ||
+ and after ~ or !~. More contexts can be added if necessary.
+Fixed IGNORECASE (multiple scans for backslash).
+Condensed expression_lists in array references.
+Detect and warn for correct # args in builtin functions -- call most of them
+ with a fixed number (i.e. fill in defaults at parse-time rather than at
+ run-time).
+Load ENVIRON only if it is referenced (detected at parse-time).
+Treat NF, FS, RS, NR, FNR specially at parse time, to improve run time.
+Fold constant expressions at parse time.
+Do make_regexp() on third arg. of split() at parse tiem if it is a constant.
+
+builtin.c:
+
+srand() returns 0 the first time called.
+Replaced alloca() with malloc() in do_sprintf().
+Fixed setting of RSTART and RLENGTH in do_match().
+Got rid of get_{one,two,three} and allowance for variable # of args. at
+ run-time -- this is now done at parse-time.
+Fixed latent bug in [g]sub whereby changes to $0 would never get made.
+Rewrote much of sub_common() for simplicity and performance.
+Added ctime() and time() builtin functions (unless -DSTRICT). ctime() returns
+ a time string like the C function, given the number of seconds since the epoch
+ and time() returns the current time in seconds.
+do_sprintf() now checks for mismatch between format string and number of
+ arguments supplied.
+
+dfa.c
+
+This is borrowed (almost unmodified) from GNU grep to provide faster searches.
+
+eval.c
+
+Node_var, Node_var_array and Node_param_list handled from macro rather
+ than in r_tree_eval().
+Changed cmp_nodes() to not do a force_number() -- this, combined with a
+ force_number() on ARGV[] and ENVIRON[] brings it into line with other awks
+Greatly simplified cmp_nodes().
+Separated out Node_NF, Node_FS, Node_RS, Node_NR and Node_FNR in get_lhs().
+All adjacent string concatenations now done at once.
+
+field.c
+
+Added support for FIELDWIDTHS.
+Fixed bug in get_field() whereby changes to a field were not always
+ properly reflected in $0.
+Reordered tests in parse_field() so that reference off the end of the buffer
+ doesn't happen.
+set_FS() now sets *parse_field i.e. routine to call depending on type of FS.
+It also does make_regexp() for FS if needed. get_field() passes FS_regexp
+ to re_parse_field(), as does do_split().
+Changes to set_field() and set_record() to avoid malloc'ing and free'ing the
+ field nodes repeatedly. The fields now just point into $0 unless they are
+ assigned to another variable or changed. force_number() on the field is
+ *only* done when the field is needed.
+
+gawk.1
+
+Fixed troff formatting problem on .TP lines.
+
+io.c
+
+Moved some code out into iop.c.
+Output from pipes and system() calls is properly synchronized.
+Status from pipe close properly returned.
+Bug in getline with no redirect fixed.
+
+iop.c
+
+This file contains a totally revamped get_a_record and associated code.
+
+main.c
+
+Command line programs no longer use a temporary file.
+Therefore, tmpnam() no longer required.
+Deprecated -a and -e options -- they will go away in the next release,
+ but for now they cause a warning.
+Moved -C, -V, -c options to -W ala posix.
+Added -W posix option: throw out \x
+Added -W lint option.
+
+
+node.c
+
+force_number() now allows pure numerics to have leading whitespace.
+Added make_string facility to optimize case of adding an already malloc'd
+ string.
+Cleaned up and simplified do_deref().
+Fixed bug in handling of stref==255 in do_deref().
+
+re.c
+
+contains the interface to regexp code
+
+Changes from 2.11.1 to FSF version of same
+------------------------------------------
+Thu Jan 4 14:19:30 1990 Jim Kingdon (kingdon at albert)
+
+ * Makefile (YACC): Add -y to bison part.
+
+ * missing.c: Add #include <stdio.h>.
+
+Sun Dec 24 16:16:05 1989 David J. MacKenzie (djm at hobbes.ai.mit.edu)
+
+ * * Makefile: Add (commented out) default defines for Sony News.
+
+ * awk.h: Move declaration of vprintf so it will compile when
+ -DVPRINTF_MISSING is defined.
+
+Mon Nov 13 18:54:08 1989 Robert J. Chassell (bob at apple-gunkies.ai.mit.edu)
+
+ * gawk.texinfo: changed @-commands that are not part of the
+ standard, currently released texinfmt.el to those that are.
+ Otherwise, only people with the as-yet unreleased makeinfo.c can
+ format this file.
+
Changes from 2.11beta to 2.11.1 (production)
--------------------------------------------
diff --git a/FUTURES b/FUTURES
index e3fccf4d..9f906ff2 100644
--- a/FUTURES
+++ b/FUTURES
@@ -1,22 +1,106 @@
-This file lists future projects and enhancments for gawk.
+This file lists future projects and enhancements for gawk. Items are listed
+in roughly the order they will be done for a given release. This file is
+mainly for use by the developers to help keep themselves on track, please
+don't bug us too much about schedules or what all this really means.
-Synchronize alloca.[cs] and regex.[ch] with the latest versions at GNU.
-(this will likely be done as a patch to 2.11.)
+Still to go in 2.13
+===================
-Convert yylex() to allow arbitrary-length program lines.
+4/18/91: MOSTLY DONE: Make printf fully compatible with the ANSI C spec.
-Allow OFMT to be other than a floating point format.
+4/18/91: DONE: Supply a warning for: /foo/ ~ exp, /foo/ !~ exp
-Make printf fully compatible with the ANSI C spec.
+4/19/91: DONE: Fix CONVFMT and OFMT interaction
-Make it faster and smaller.
+3/19/91: DONE: move -C, -V, -c options to -W ala posix
+
+3/19/91: DONE: -W posix option, throw out \x
+
+3/19/91: DONE: clean up ifdef stuff, go to config.h
+
+3/19/91: DONE: -W lint option
+
+4/5/91: DONE: change from time and ctime to sysclock and strftime
+
+4/21/91: DONE: make /dev/fd checking happen at run time
+
+4/14/91: DONE: man page fix up
+
+4/17/91: DONE: Table driven built-in subroutine argument count checking
+
+Update regex.h flags for AWK and feed back to FSF
+
+Feedback alloca.s changes to FSF
+
+Brief manual clean up
+
+4/18/91: DONE: Provide a list of systems
+ a) where gawk has been ported
+ b) where it has run the test suite
+
+For 2.14
+========
+
+more lint: check for use of builtin vars. only in new awk.
+
+Extensible hashing and on-disk storage of awk arrays
+
+"restart" keyword
+
+"nextfile" keyword
+
+Add |&
+
+Warn or fatal if identifier used as both variable and array
+
+Split() with null string as third arg to split up strings
Allow RS to be a regexp.
-Read in environment only if necessary. (Is this all that big a deal?)
+RECLEN variable for fixed length records
-Use faster regex algorithms.
+Make awk '/foo/' files... run like egrep
-Create a gawk-to-C translator?
+Extensive manual cleanup:
+
+ Use of texinfo 2.0 features
+
+ Variables string or numeric or both
+
+ Lots more examples
+
+ Add an error messages section to the manual
+
+ A section on where gawk is bounded
+ regex
+ i/o
+ sun fp conversions
+
+Do a reference card
+
+? Have strftime() pay attention to the value of ENVIRON["TZ"]
+
+In 2.15 or later
+================
+
+Allow OFMT to be other than a floating point format.
+
+SFIO based printf and other i/o
+
+Allow redefining of builtin functions?
+
+Make regex + dfa less dependant on gawk header file includes
+
+A general sub function edsub(line, pat, sub, global-flag)
+
+Add lint checking everywhere
+
+Make it faster and smaller.
+
+Create a gawk-to-C translator? (or C++??)
Create a gawk compiler?
+
+Do an optimization pass over parse tree?
+
+Provide awk profiling and debugging.
diff --git a/LIMITATIONS b/LIMITATIONS
new file mode 100644
index 00000000..8184c87a
--- /dev/null
+++ b/LIMITATIONS
@@ -0,0 +1,16 @@
+This file describes limits of gawk on a Unix system (although it
+is variable even then). Non-Unix systems may have other limits.
+All cases below where MAX_SHORT is specified will be increased
+to MAX_INT in a future release.
+
+# of fields in a record: MAX_INT
+Length of input record: MAX_SHORT
+Length of output record: unlimited
+Size of a field: MAX_SHORT
+Size of a printf string: MAX_SHORT
+Size of a literal string: MAX_SHORT
+Characters in a character class: 2^(# of bits per byte)
+# of file redirections: unlimited
+# of pipe redirections: min(# of processes per user, # of open files)
+double-precision floating point
+Length of source line: unlimited
diff --git a/Makefile b/Makefile
index 30788c1d..2a7d6ab6 100644
--- a/Makefile
+++ b/Makefile
@@ -19,118 +19,96 @@
# along with GAWK; see the file COPYING. If not, write to
# the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
-# User tunable macros
+# User tunable macros -- CHANGE THESE IN Makefile-dist RATHER THAN IN
+# Makefile, OR configure WILL OVERWRITE YOUR CHANGES
+
+DESTDIR=
+BINDIR= /usr/local/bin
+MANDIR= /usr/man/manl
+MANEXT= l
# CFLAGS: options to the C compiler
#
# -O optimize
# -g include dbx/sdb info
-# -gg include gdb debugging info; only for GCC (deprecated)
# -pg include new (gmon) profiling info
-# -p include old style profiling info (System V)
#
-# To port GAWK, examine and adjust the following flags carefully.
-# In addition, you will have to look at alloca below.
-# The intent (eventual) is to not penalize the most-standard-conforming
-# systems with a lot of #define's.
+# The provided "configure" is used to turn a config file (samples in
+# the "config" directory into commands to edit config.h-dist into
+# a suitable config.h and to edit Makefile-dist into Makefile.
+# To port GAWK, create an appropriate config file using the ones in
+# the config directory as examples and using the comments in config.h-dist
+# as a guide.
#
-# -DBCOPY_MISSING - bcopy() et al. are missing; will replace
-# with a #define'd memcpy() et al. -- use at
-# your own risk (should really use a memmove())
-# -DSPRINTF_INT - sprintf() returns int (most USG systems)
-# -DBLKSIZE_MISSING - st_blksize missing from stat() structure
-# (most USG systems)
-# -DBSDSTDIO - has a BSD internally-compatible stdio
-# -DDOPRNT_MISSING - lacks doprnt() routine
-# -DDUP2_MISSING - lacks dup2() system call (S5Rn, n < 4)
-# -DGCVT_MISSING - lacks gcvt() routine
-# -DGETOPT_MISSING - lacks getopt() routine
-# -DMEMCMP_MISSING - lacks memcmp() routine
-# -DMEMCPY_MISSING - lacks memcpy() routine
-# -DMEMSET_MISSING - lacks memset() routine
-# -DRANDOM_MISSING - lacks random() routine
-# -DSTRCASE_MISSING - lacks strcasecmp() routine
-# -DSTRCHR_MISSING - lacks strchr() and strrchr() routines
-# -DSTRERROR_MISSING - lacks (ANSI C) strerror() routine
-# -DSTRTOD_MISSING - lacks strtod() routine
-# -DTMPNAM_MISSING - lacks or deficient tmpnam() routine
-# -DVPRINTF_MISSING - lacks vprintf and associated routines
-# -DSIGTYPE=int - signal routines return int (default void)
-
-# Sun running SunOS 4.x
-MISSING = -DSTRERROR_MISSING -DSTRCASE_MISSING
-
-# SGI Personal Iris (Sys V derived)
-# MISSING = -DSPRINTF_INT -DBLKSIZE_MISSING -DSTRERROR_MISSING -DRANDOM_MISSING
-
-# VAX running Ultrix 3.x
-# MISSING = -DSTRERROR_MISSING
-
-# A generic 4.2 BSD machine
-# (eliminate GETOPT_MISSING for 4.3 release)
-# (eliminate STRCASE_MISSING and TMPNAM_MISSING for Tahoe release)
-# MISSING = -DBSDSTDIO -DMEMCMP_MISSING -DMEMCPY_MISSING -DMEMSET_MISSING \
-# -DSTRERROR_MISSING -DSTRTOD_MISSING -DVPRINTF_MISSING \
-# -DSTRCASE_MISSING -DTMPNAM_MISSING \
-# -DGETOPT_MISSING -DSTRCHR_MISSING -DSIGTYPE=int
-
-# On Amdahl UTS, a SysVr2-derived system
-# MISSING = -DBCOPY_MISSING -DSPRINTF_INT -DRANDOM_MISSING -DSTRERROR_MISSING \
-# -DSTRCASE_MISSING -DDUP2_MISSING # -DBLKSIZE_MISSING ??????
-
-# Comment out the next line if you don't have gcc.
-# Also choose just one of -g and -O.
-CC= gcc
-
-OPTIMIZE= -O -g
+
+CC= cc
+
+OPTIMIZE= -g #-O -fstrength-reduce
PROFILE= #-pg
-DEBUG= #-DDEBUG #-DMEMDEBUG #-DFUNC_TRACE #-DMPROF
-DEBUGGER= #-g -Bstatic
+DEBUG= #-DMALLOCDEBUG #-DMEMDEBUG #-DDEBUG #-DFUNC_TRACE #-DMPROF
+LINKSTATIC= #-Bstatic
WARN= #-W -Wunused -Wimplicit -Wreturn-type -Wcomment # for gcc only
-# Parser to use on grammar -- if you don't have bison use the first one
-#PARSER = yacc
-PARSER = bison
+# Parser to use on grammar - any one of the following will work
+PARSER = yacc
+#PARSER = byacc
+#PARSER = bison -y
+
+# Set LIBS to any libraries that are machine specific
+LIBS =
+
+# Cray 2 running Unicos 5.0.7
+##MAKE_LIBNET## LIBS = -lnet
+
+##MAKE_NeXT## FLAGS = -DGFMT_WORKAROUND
-# ALLOCA
+# Systems with alloca in /lib/libPW.a
+##MAKE_ALLOCA_PW## LIBS = -lPW
+
+# ALLOCA - only needed if you use bison
# Set equal to alloca.o if your system is S5 and you don't have
# alloca. Uncomment one of the rules below to make alloca.o from
# either alloca.s or alloca.c.
-ALLOCA= #alloca.o
-
-#
-# With the exception of the alloca rule referred to above, you shouldn't
-# need to customize this file below this point.
+# This should have already been done automatically by configure.
#
+# Some systems have alloca in libPW.a, so LIBS=-lPW may work, too.
+##MAKE_ALLOCA_C## ALLOCA= alloca.o
+##MAKE_ALLOCA_S## ALLOCA= alloca.o
-FLAGS= $(MISSING) $(DEBUG)
-CFLAGS= $(FLAGS) $(DEBUGGER) $(PROFILE) $(OPTIMIZE) $(WARN)
+FLAGS=
+##MAKE_RS6000## FLAGS = -qchars=signed
+
+# HP/Apollo running cc version 6.7 or earlier
+##MAKE_Apollo## FLAGS = -U__STDC__ -A run,sys5.3
+##MAKE_Apollo## LIBS = -A sys,any
+
+CFLAGS= $(FLAGS) $(DEBUG) $(LINKSTATIC) $(PROFILE) $(OPTIMIZE) $(WARN)
# object files
-AWKOBJS = main.o eval.o builtin.o msg.o debug.o io.o field.o array.o node.o \
- version.o missing.o
+AWKOBJS = main.o eval.o builtin.o msg.o iop.o io.o field.o array.o \
+ node.o version.o missing.o re.o
ALLOBJS = $(AWKOBJS) awk.tab.o
# GNUOBJS
# GNU stuff that gawk uses as library routines.
-GNUOBJS= regex.o $(ALLOCA)
+GNUOBJS= regex.o dfa.o $(ALLOCA)
# source and documentation files
-SRC = main.c eval.c builtin.c msg.c \
- debug.c io.c field.c array.c node.c missing.c
+SRC = main.c eval.c builtin.c msg.c version.c \
+ iop.c io.c field.c array.c node.c missing.c re.c
ALLSRC= $(SRC) awk.tab.c
-AWKSRC= awk.h awk.y $(ALLSRC) version.sh patchlevel.h
+AWKSRC= awk.h awk.y $(ALLSRC) patchlevel.h protos.h config.h-dist
-GNUSRC = alloca.c alloca.s regex.c regex.h
+GNUSRC = alloca.c alloca.s dfa.c dfa.h regex.c regex.h
-COPIES = missing.d/dup2.c missing.d/gcvt.c missing.d/getopt.c \
- missing.d/memcmp.c missing.d/memcpy.c missing.d/memset.c \
- missing.d/random.c missing.d/strcase.c missing.d/strchr.c \
- missing.d/strerror.c missing.d/strtod.c missing.d/tmpnam.c \
- missing.d/vprintf.c
+COPIES = missing/getopt.c missing/system.c missing/tzset.c \
+ missing/memcmp.c missing/memcpy.c missing/memset.c \
+ missing/random.c missing/strcase.c missing/strchr.c \
+ missing/strerror.c missing/strtod.c missing/vprintf.c \
+ missing/strftime.c missing/strftime.3 missing/strtol.c
SUPPORT = support/texindex.c support/texinfo.tex
@@ -141,22 +119,28 @@ INFOFILES= gawk-info gawk-info-1 gawk-info-2 gawk-info-3 gawk-info-4 \
gawk.fns gawk.ky gawk.kys gawk.pg gawk.pgs gawk.toc \
gawk.tp gawk.tps gawk.vr gawk.vrs
-MISC = CHANGES COPYING FUTURES Makefile PROBLEMS README
+MISC = CHANGES COPYING FUTURES Makefile-dist PROBLEMS README* PORTS mkconf \
+ mungeconf configure ACKNOWLEDGMENT LIMITATIONS
-PCSTUFF= pc.d/Makefile.pc pc.d/popen.c pc.d/popen.h
+OTHERS= pc/* atari/* vms/*
ALLDOC= gawk.dvi $(INFOFILES)
-ALLFILES= $(AWKSRC) $(GNUSRC) $(COPIES) $(MISC) $(DOCS) $(ALLDOC) $(PCSTUFF) $(SUPPORT)
+ALLFILES= $(AWKSRC) $(GNUSRC) $(COPIES) $(MISC) $(DOCS) $(ALLDOC) $(OTHERS) \
+ $(SUPPORT)
# Release of gawk. There can be no leading or trailing white space here!
-REL=2.11
+REL=2.13
# rules to build gawk
-gawk: $(ALLOBJS) $(GNUOBJS)
- $(CC) -o gawk $(CFLAGS) $(ALLOBJS) $(GNUOBJS) -lm
+gawk: $(ALLOBJS) $(GNUOBJS) $(REOBJS)
+ $(CC) -o gawk $(CFLAGS) $(ALLOBJS) $(GNUOBJS) $(REOBJS) -lm $(LIBS)
+
+$(AWKOBJS): awk.h config.h
-$(AWKOBJS): awk.h
+dfa.o: awk.h config.h dfa.h
+
+regex.o: awk.h config.h regex.h
main.o: patchlevel.h
@@ -164,23 +148,34 @@ awk.tab.o: awk.h awk.tab.c
awk.tab.c: awk.y
$(PARSER) -v awk.y
- -mv -f y.tab.c awk.tab.c
+ sed '/^extern char .malloc(), .realloc();$$/d' y.tab.c >awk.tab.c
+ rm y.tab.c
-version.c: version.sh
- sh version.sh $(REL) > version.c
+config.h: config.h-dist
+ @echo You must provide a config.h!
+ @echo Run \"./configure\" to build it for known systems
+ @echo or copy config.h-dist to config.h and edit it.; exit 1
-# Alloca: uncomment this if your system (notably System V boxen)
-# does not have alloca in /lib/libc.a
-#
-#alloca.o: alloca.s
-# /lib/cpp < alloca.s | sed '/^#/d' > t.s
-# as t.s -o alloca.o
-# rm t.s
+install: gawk
+ install -s gawk $(DESTDIR)$(BINDIR)
+ install -c gawk.1 $(DESTDIR)$(MANDIR)/gawk.$(MANEXT)
+# ALLOCA: uncomment this if your system (notably System V boxen)
+# does not have alloca in /lib/libc.a or /lib/libPW.a
+#
# If your machine is not supported by the assembly version of alloca.s,
-# use the C version instead. This uses the default rules to make alloca.o.
+# use the C version which follows instead. It uses the default rules to
+# make alloca.o.
#
-#alloca.o: alloca.c
+# One of these rules should have already been selected by running configure.
+
+
+##MAKE_ALLOCA_S## alloca.o: alloca.s
+##MAKE_ALLOCA_S## /lib/cpp < alloca.s | sed '/^#/d' > t.s
+##MAKE_ALLOCA_S## as t.s -o alloca.o
+##MAKE_ALLOCA_S## rm t.s
+
+##MAKE_ALLOCA_C## alloca.o: alloca.c
# auxiliary rules for release maintenance
lint: $(ALLSRC)
@@ -190,7 +185,10 @@ xref:
cxref -c $(FLAGS) $(ALLSRC) | grep -v ' /' >xref
clean:
- rm -f gawk *.o core awk.output awk.tab.c gmon.out make.out version.c
+ rm -f *.o core awk.output gmon.out make.out y.output
+
+cleaner: clean
+ rm -f gawk awk.tab.c
clobber: clean
rm -f $(ALLDOC) gawk.log
@@ -203,31 +201,55 @@ gawk.dvi: gawk.texinfo
$(INFOFILES): gawk.texinfo
makeinfo gawk.texinfo
-srcrelease: $(AWKSRC) $(GNUSRC) $(DOCS) $(MISC) $(COPIES) $(PCSTUFF) $(SUPPORT)
+gawk-test-$(REL).tar.Z::
+ -rm -f gawk-test-$(REL).tar.Z
+ tar -cf - test | compress >gawk-test-$(REL).tar.Z
+
+dist: $(AWKSRC) $(GNUSRC) $(DOCS) $(MISC) $(COPIES) $(SUPPORT)
+ configure msdos
+ mv config.h pc
+ -rm -rf gawk-$(REL) gawk-$(REL).*.tar.Z
-mkdir gawk-$(REL)
cp -p $(AWKSRC) $(GNUSRC) $(DOCS) $(MISC) gawk-$(REL)
- -mkdir gawk-$(REL)/missing.d
- cp -p $(COPIES) gawk-$(REL)/missing.d
- -mkdir gawk-$(REL)/pc.d
- cp -p $(PCSTUFF) gawk-$(REL)/pc.d
+ -mkdir gawk-$(REL)/missing
+ cp -p $(COPIES) gawk-$(REL)/missing
+ -mkdir gawk-$(REL)/atari
+ cp -p atari/* gawk-$(REL)/atari
+ -mkdir gawk-$(REL)/pc
+ cp -p pc/* gawk-$(REL)/pc
+ -mkdir gawk-$(REL)/vms
+ cp -p vms/* gawk-$(REL)/vms
+ -mkdir gawk-$(REL)/config
+ cp -p config/* gawk-$(REL)/config
-mkdir gawk-$(REL)/support
- cp -p $(SUPPORT) gawk-$(REL)/support
- tar -cf - gawk-$(REL) | compress > gawk-$(REL).tar.Z
-
-docrelease: $(ALLDOC)
- -mkdir gawk-$(REL)-doc
- cp -p $(INFOFILES) gawk.dvi gawk-$(REL)-doc
- nroff -man gawk.1 > gawk-$(REL)-doc/gawk.1.pr
+ cp -p support/* gawk-$(REL)/support
+ ln -s ../test gawk-$(REL)
+ tar -cfh - gawk-$(REL) | compress > gawk-$(REL).`gawk '{print $$3}' patchlevel.h`.tar.Z
+
+gawk-doc-$(REL).tar.Z: $(ALLDOC)
+ -rm -rf gawk-doc-$(REL) gawk-doc-$(REL).tar.Z
+ -mkdir gawk-doc-$(REL)
+ cp -p $(INFOFILES) gawk.dvi gawk-doc-$(REL)
+ nroff -man gawk.1 > gawk-doc-$(REL)/gawk.1.pr
tar -cf - gawk-$(REL)-doc | compress > gawk-doc-$(REL).tar.Z
-psrelease: docrelease
- -mkdir gawk-postscript
- dvi2ps gawk.dvi > gawk-postscript/gawk.postscript
- psroff -t -man gawk.1 > gawk-postscript/gawk.1.ps
- tar -cf - gawk-postscript | compress > gawk.postscript.tar.Z
+gawk-ps-$(REL).tar.Z: gawk-ps-$(REL).tar.Z
+ -rm -rf gawk-ps-$(REL) gawk-ps-$(REL).tar.Z
+ -mkdir gawk-ps-$(REL)
+ dvips -o !cat gawk.dvi > gawk-ps-$(REL)/gawk.postscript
+ pstroff -man gawk.1 > gawk-ps-$(REL)/gawk.1.ps
+ tar -cf - gawk-ps-$(REL) | compress > gawk-ps-$(REL).tar.Z
-release: srcrelease docrelease psrelease
- rm -fr gawk-postscript gawk-$(REL) gawk-$(REL)-doc
+release: gawk-src-$(REL).tar.Z gawk-doc-$(REL).tar.Z gawk-ps-$(REL).tar.Z \
+ gawk-test-$(REL).tar.Z
diff:
for i in RCS/*; do rcsdiff -c -b $$i > `basename $$i ,v`.diff; done
+
+test::
+ make gawk
+ cd test; make -k
+
+bigtest::
+ make gawk
+ cd test; make -k bigtest
diff --git a/Makefile-dist b/Makefile-dist
new file mode 100644
index 00000000..2a7d6ab6
--- /dev/null
+++ b/Makefile-dist
@@ -0,0 +1,255 @@
+# Makefile for GNU Awk.
+#
+# Copyright (C) 1986, 1988, 1989 the Free Software Foundation, Inc.
+#
+# This file is part of GAWK, the GNU implementation of the
+# AWK Progamming Language.
+#
+# GAWK is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 1, or (at your option)
+# any later version.
+#
+# GAWK is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GAWK; see the file COPYING. If not, write to
+# the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+# User tunable macros -- CHANGE THESE IN Makefile-dist RATHER THAN IN
+# Makefile, OR configure WILL OVERWRITE YOUR CHANGES
+
+DESTDIR=
+BINDIR= /usr/local/bin
+MANDIR= /usr/man/manl
+MANEXT= l
+
+# CFLAGS: options to the C compiler
+#
+# -O optimize
+# -g include dbx/sdb info
+# -pg include new (gmon) profiling info
+#
+# The provided "configure" is used to turn a config file (samples in
+# the "config" directory into commands to edit config.h-dist into
+# a suitable config.h and to edit Makefile-dist into Makefile.
+# To port GAWK, create an appropriate config file using the ones in
+# the config directory as examples and using the comments in config.h-dist
+# as a guide.
+#
+
+CC= cc
+
+OPTIMIZE= -g #-O -fstrength-reduce
+PROFILE= #-pg
+DEBUG= #-DMALLOCDEBUG #-DMEMDEBUG #-DDEBUG #-DFUNC_TRACE #-DMPROF
+LINKSTATIC= #-Bstatic
+WARN= #-W -Wunused -Wimplicit -Wreturn-type -Wcomment # for gcc only
+
+# Parser to use on grammar - any one of the following will work
+PARSER = yacc
+#PARSER = byacc
+#PARSER = bison -y
+
+# Set LIBS to any libraries that are machine specific
+LIBS =
+
+# Cray 2 running Unicos 5.0.7
+##MAKE_LIBNET## LIBS = -lnet
+
+##MAKE_NeXT## FLAGS = -DGFMT_WORKAROUND
+
+# Systems with alloca in /lib/libPW.a
+##MAKE_ALLOCA_PW## LIBS = -lPW
+
+# ALLOCA - only needed if you use bison
+# Set equal to alloca.o if your system is S5 and you don't have
+# alloca. Uncomment one of the rules below to make alloca.o from
+# either alloca.s or alloca.c.
+# This should have already been done automatically by configure.
+#
+# Some systems have alloca in libPW.a, so LIBS=-lPW may work, too.
+##MAKE_ALLOCA_C## ALLOCA= alloca.o
+##MAKE_ALLOCA_S## ALLOCA= alloca.o
+
+FLAGS=
+##MAKE_RS6000## FLAGS = -qchars=signed
+
+# HP/Apollo running cc version 6.7 or earlier
+##MAKE_Apollo## FLAGS = -U__STDC__ -A run,sys5.3
+##MAKE_Apollo## LIBS = -A sys,any
+
+CFLAGS= $(FLAGS) $(DEBUG) $(LINKSTATIC) $(PROFILE) $(OPTIMIZE) $(WARN)
+
+# object files
+AWKOBJS = main.o eval.o builtin.o msg.o iop.o io.o field.o array.o \
+ node.o version.o missing.o re.o
+
+ALLOBJS = $(AWKOBJS) awk.tab.o
+
+# GNUOBJS
+# GNU stuff that gawk uses as library routines.
+GNUOBJS= regex.o dfa.o $(ALLOCA)
+
+# source and documentation files
+SRC = main.c eval.c builtin.c msg.c version.c \
+ iop.c io.c field.c array.c node.c missing.c re.c
+
+ALLSRC= $(SRC) awk.tab.c
+
+AWKSRC= awk.h awk.y $(ALLSRC) patchlevel.h protos.h config.h-dist
+
+GNUSRC = alloca.c alloca.s dfa.c dfa.h regex.c regex.h
+
+COPIES = missing/getopt.c missing/system.c missing/tzset.c \
+ missing/memcmp.c missing/memcpy.c missing/memset.c \
+ missing/random.c missing/strcase.c missing/strchr.c \
+ missing/strerror.c missing/strtod.c missing/vprintf.c \
+ missing/strftime.c missing/strftime.3 missing/strtol.c
+
+SUPPORT = support/texindex.c support/texinfo.tex
+
+DOCS= gawk.1 gawk.texinfo
+
+INFOFILES= gawk-info gawk-info-1 gawk-info-2 gawk-info-3 gawk-info-4 \
+ gawk-info-5 gawk-info-6 gawk.aux gawk.cp gawk.cps gawk.fn \
+ gawk.fns gawk.ky gawk.kys gawk.pg gawk.pgs gawk.toc \
+ gawk.tp gawk.tps gawk.vr gawk.vrs
+
+MISC = CHANGES COPYING FUTURES Makefile-dist PROBLEMS README* PORTS mkconf \
+ mungeconf configure ACKNOWLEDGMENT LIMITATIONS
+
+OTHERS= pc/* atari/* vms/*
+
+ALLDOC= gawk.dvi $(INFOFILES)
+
+ALLFILES= $(AWKSRC) $(GNUSRC) $(COPIES) $(MISC) $(DOCS) $(ALLDOC) $(OTHERS) \
+ $(SUPPORT)
+
+# Release of gawk. There can be no leading or trailing white space here!
+REL=2.13
+
+# rules to build gawk
+gawk: $(ALLOBJS) $(GNUOBJS) $(REOBJS)
+ $(CC) -o gawk $(CFLAGS) $(ALLOBJS) $(GNUOBJS) $(REOBJS) -lm $(LIBS)
+
+$(AWKOBJS): awk.h config.h
+
+dfa.o: awk.h config.h dfa.h
+
+regex.o: awk.h config.h regex.h
+
+main.o: patchlevel.h
+
+awk.tab.o: awk.h awk.tab.c
+
+awk.tab.c: awk.y
+ $(PARSER) -v awk.y
+ sed '/^extern char .malloc(), .realloc();$$/d' y.tab.c >awk.tab.c
+ rm y.tab.c
+
+config.h: config.h-dist
+ @echo You must provide a config.h!
+ @echo Run \"./configure\" to build it for known systems
+ @echo or copy config.h-dist to config.h and edit it.; exit 1
+
+install: gawk
+ install -s gawk $(DESTDIR)$(BINDIR)
+ install -c gawk.1 $(DESTDIR)$(MANDIR)/gawk.$(MANEXT)
+
+# ALLOCA: uncomment this if your system (notably System V boxen)
+# does not have alloca in /lib/libc.a or /lib/libPW.a
+#
+# If your machine is not supported by the assembly version of alloca.s,
+# use the C version which follows instead. It uses the default rules to
+# make alloca.o.
+#
+# One of these rules should have already been selected by running configure.
+
+
+##MAKE_ALLOCA_S## alloca.o: alloca.s
+##MAKE_ALLOCA_S## /lib/cpp < alloca.s | sed '/^#/d' > t.s
+##MAKE_ALLOCA_S## as t.s -o alloca.o
+##MAKE_ALLOCA_S## rm t.s
+
+##MAKE_ALLOCA_C## alloca.o: alloca.c
+
+# auxiliary rules for release maintenance
+lint: $(ALLSRC)
+ lint -hcbax $(FLAGS) $(ALLSRC)
+
+xref:
+ cxref -c $(FLAGS) $(ALLSRC) | grep -v ' /' >xref
+
+clean:
+ rm -f *.o core awk.output gmon.out make.out y.output
+
+cleaner: clean
+ rm -f gawk awk.tab.c
+
+clobber: clean
+ rm -f $(ALLDOC) gawk.log
+
+gawk.dvi: gawk.texinfo
+ tex gawk.texinfo ; texindex gawk.??
+ tex gawk.texinfo ; texindex gawk.??
+ tex gawk.texinfo
+
+$(INFOFILES): gawk.texinfo
+ makeinfo gawk.texinfo
+
+gawk-test-$(REL).tar.Z::
+ -rm -f gawk-test-$(REL).tar.Z
+ tar -cf - test | compress >gawk-test-$(REL).tar.Z
+
+dist: $(AWKSRC) $(GNUSRC) $(DOCS) $(MISC) $(COPIES) $(SUPPORT)
+ configure msdos
+ mv config.h pc
+ -rm -rf gawk-$(REL) gawk-$(REL).*.tar.Z
+ -mkdir gawk-$(REL)
+ cp -p $(AWKSRC) $(GNUSRC) $(DOCS) $(MISC) gawk-$(REL)
+ -mkdir gawk-$(REL)/missing
+ cp -p $(COPIES) gawk-$(REL)/missing
+ -mkdir gawk-$(REL)/atari
+ cp -p atari/* gawk-$(REL)/atari
+ -mkdir gawk-$(REL)/pc
+ cp -p pc/* gawk-$(REL)/pc
+ -mkdir gawk-$(REL)/vms
+ cp -p vms/* gawk-$(REL)/vms
+ -mkdir gawk-$(REL)/config
+ cp -p config/* gawk-$(REL)/config
+ -mkdir gawk-$(REL)/support
+ cp -p support/* gawk-$(REL)/support
+ ln -s ../test gawk-$(REL)
+ tar -cfh - gawk-$(REL) | compress > gawk-$(REL).`gawk '{print $$3}' patchlevel.h`.tar.Z
+
+gawk-doc-$(REL).tar.Z: $(ALLDOC)
+ -rm -rf gawk-doc-$(REL) gawk-doc-$(REL).tar.Z
+ -mkdir gawk-doc-$(REL)
+ cp -p $(INFOFILES) gawk.dvi gawk-doc-$(REL)
+ nroff -man gawk.1 > gawk-doc-$(REL)/gawk.1.pr
+ tar -cf - gawk-$(REL)-doc | compress > gawk-doc-$(REL).tar.Z
+
+gawk-ps-$(REL).tar.Z: gawk-ps-$(REL).tar.Z
+ -rm -rf gawk-ps-$(REL) gawk-ps-$(REL).tar.Z
+ -mkdir gawk-ps-$(REL)
+ dvips -o !cat gawk.dvi > gawk-ps-$(REL)/gawk.postscript
+ pstroff -man gawk.1 > gawk-ps-$(REL)/gawk.1.ps
+ tar -cf - gawk-ps-$(REL) | compress > gawk-ps-$(REL).tar.Z
+
+release: gawk-src-$(REL).tar.Z gawk-doc-$(REL).tar.Z gawk-ps-$(REL).tar.Z \
+ gawk-test-$(REL).tar.Z
+
+diff:
+ for i in RCS/*; do rcsdiff -c -b $$i > `basename $$i ,v`.diff; done
+
+test::
+ make gawk
+ cd test; make -k
+
+bigtest::
+ make gawk
+ cd test; make -k bigtest
diff --git a/PORTS b/PORTS
new file mode 100644
index 00000000..b7d1123f
--- /dev/null
+++ b/PORTS
@@ -0,0 +1,17 @@
+This version of gawk has been successfully compiled and run "make test"
+on the following:
+
+Sun 4/490 running 4.1
+NeXT running 2.0
+DECstation 3100 running Ultrix 4.0 or Ultrix 3.1 (different config)
+AtariST (16-bit ints, gcc compiler, byacc, running under TOS)
+ESIX V.3.2 Rev D (== System V Release 3.2), the 386. compiler was gcc + bison
+IBM RS/6000 (see README.rs6000)
+486 running SVR4, using cc and bison
+SGI running IRIX 3.3 using gcc (fails with cc)
+Sequent Balance running Dynix V3.1
+Cray Y-MP8 running Unicos 6.0.11
+Cray 2 running Unicos 6.1 (modulo trailing zeroes in chem)
+VMS 5.x (should also work on 4.6 and 4.7)
+MSDOS - Microsoft C 5.1, compiles and runs very simple testing
+CLOSE: 4.3reno
diff --git a/PROBLEMS b/PROBLEMS
index 07efe657..f36aa501 100644
--- a/PROBLEMS
+++ b/PROBLEMS
@@ -1,13 +1,8 @@
-This is a list of known problems in gawk 2.11.
+This is a list of known problems in gawk 2.13.
Hopefully they will all be fixed in the next major release of gawk.
-Please keep in mind that this is still beta software and the code
-is still undergoing significant evolution.
+Please keep in mind that the code is still undergoing significant evolution.
-1. The debugging code does not print redirection info.
+1. Gawk's error messages are not in GNU standard format (not emacs parseable).
-2. The scanner needs work.
-
-3. Gawk's printf doesn't yet match the latest nawk's.
-
-Arnold Robbins
+2. Gawk's printf is probably still not POSIX compliant.
diff --git a/README b/README
index 7632ddff..c3bfc0dd 100644
--- a/README
+++ b/README
@@ -1,72 +1,99 @@
+**** NOTE: The texinfo manual is being edited and will be restored
+**** to the distribution shortly after the release.
+
README:
-This is GNU Awk 2.11. It should be upwardly compatible with the
-System V Release 4 awk.
+This is GNU Awk 2.13. It should be upwardly compatible with the
+System V Release 4 awk. It is almost completely compliant with draft 11
+of POSIX 1003.2.
+
+This release is essentially a bug fix and tuning release.
-This release is essentially a bug fix release. The files have been
-renamed and code moved around to organize things by function. Gawk should
-also be somewhat faster now. More care has been given towards portability
-across different Unix systems. See the installation instructions, below.
+See the installation instructions, below.
Known problems are given in the PROBLEMS file. Work to be done is
-described briefly in the FUTURES file.
-
-The gawk.texinfo included in this release has been revised; it should
-be in sync with what the code does. The man page should also be accurate,
-but no promises there.
-
-CHANGES FROM 2.10
-
-User visible changes:
- Compatibility mode is now obtained via new -c option.
- The new ANSI C \a and \x escapes are now a standard part of gawk
- as Unix nawk has picked them up.
- The new tolower() and toupper() functions are also standard.
- A new undocumented option, -nostalgia, has been added.
- Command line options have changed somewhat from 2.10.
- -v is now -V
- -V is now -C
- new -v for doing variable assignments before the BEGIN block.
- new -c for compatibility mode.
- new -a for awk style regexps (default)
- new -e for egrep style regexps, per the POSIX draft spec.
- Some more formats have been added to printf, ala nawk and ANSI C.
-
-Other changes (the hard stuff):
- All known bugs fixed.
- Still more memory leaks plugged.
- Lots of changes to improve performance and portability.
-
-PC users, you've been saved!
- As of patchlevel 1, we are now supplying MS-DOS "support." Said
- support was generously provided by Kent Williams, who is now
- the contact person for it. See below for his address.
+described briefly in the FUTURES file. Verified ports are listed in
+the PORTS file. Please read the LIMITATIONS and ACKNOWLEDGMENT files.
+
+The gawk.texinfo included in this release is out of date with respect to
+the code. An updated manual will be released soon after the code.
+It can be changed into an info file (included) with
+makeinfo or with texinfo-format-buffer in emacs without changes.
+
+The man page is up to date.
+If you do not have nroff or troff, you can use `awf' included in
+the test suite to format the manual page with only a few small problems.
+
+Summary of Changes from 2.11.1
+
+Configuration is via a config file which is used by the "mkconf" script
+to create Makefile and config.h. Sample configuration files for various systems
+are included in the config directory.
+
+Non-POSIX options are arguments to -W; all non-compliant options are
+deprecated.
+
+New option "-W lint" to do extra checking. The coverage
+will expand a bit in future releases.
+
+Numeric to string conversion is done via the builtin variable CONVFMT
+rather than OFMT, in conformance with the POSIX draft standard. It is
+initialized with the same value as OFMT, so the vast majority of programs
+should see no change in behaviour.
+
+Awk program source no longer has any line length limits.
+
+New builtin functions systime() and strftime() provided.
+
+Error messages improved.
+
+FIELDWIDTHS variable gives a space-separated list of numbers specifying the
+widths of input fields, to accomodate fixed-format input.
+
+Numerous bug fixes and portability improvements.
+
+Performance is about 50% better than 2.11.1, although the improvements
+are uneven.
+
+The code has been extensively tested with test coverage monitored.
+A test suite is now included. This will be expanded in future releases.
INSTALLATION:
-The Makefile will need some tailoring. Currently it is set up for
-a Sun running SunOS 4.x and gcc. The changes to make in the Makefile are
-commented and should be obvious. Starting with 2.11, our intent has been
-to make the code conform to standards (ANSI, POSIX, SVID, in that order)
-whenever possible, and to not penalize standard conforming systems.
-We have included substitute versions of routines not universally available.
-Simply add the appropriate define for the missing feature(s) on your system.
+The Makefile may need some tailoring. The only changes necessary should
+be to change installation targets or to change compiler flags.
+The changes to make in the Makefile are commented and should be obvious.
-If you have 4.2 or 4.3 BSD, you should add -DTMPNAM_MISSING since the
-version of tmpnam on these systems won't accept a NULL pointer.
-This does not apply to 4.3-tahoe or the S5R[23] systems I have access to.
-You need this if gawk core dumps on something simple like 'BEGIN {print "hi"}'.
+All other changes should be made in a config file. Samples for
+various systems are included in the config directory. Starting with
+2.11, our intent has been to make the code conform to standards (ANSI,
+POSIX, SVID, in that order) whenever possible, and to not penalize
+standard conforming systems. We have included substitute versions of
+routines not universally available. Simply add the appropriate define
+for the missing feature(s) on your system.
If you have neither bison nor yacc, use the awk.tab.c file here. It was
generated with bison, and should have no AT&T code in it. (Note that
modifying awk.y without bison or yacc will be difficult, at best. You might
want to get a copy of bison from the FSF too.)
-If you have an MS-DOS system, use the stuff in pc.d.
+If no config file is included for your system, start by copying one
+for a similar system. One way of determining the defines needed is to
+try to load gawk with nothing defined and see what routines are
+unresolved by the loader. This should give you a good idea of how to
+proceed. We would like to receive a copy of any new config files.
+
+If you have an MS-DOS system, use the stuff in the pc directory.
+For an Atari theere is an atari directory and similarly one for VMS.
+
+After successful compilation, do 'make test' to run a small test suite.
+There should be no output from the 'cmp' invocations. If there is, please
+investigate and report the problem. More extensive testing can be invoked
+with 'make bigtest'. There are many interesting programs in the test suite!
PRINTING THE MANUAL
-The 'support' directory contains texinfo.tex 2.1, which will be necessary
+The 'support' directory contains texinfo.tex 2.40, which will be necessary
for printing the manual, and the texindex.c program from the emacs distribution
which is also necessary. See the makefile for the steps needed to get a
DVI file from the manual.
@@ -89,23 +116,8 @@ UUCP {uunet utai watmath}!dalcs!david
INTERNET david@cs.dal.ca
Arnold Robbins
-1315 Kittredge Court, N.E.
-Atlanta, GA, 30329-3539, USA
+1736 Reindeer Drive
+Atlanta, GA, 30329, USA
INTERNET: arnold@skeeve.atl.ga.us
UUCP: { gatech, emory, emoryu1 }!skeeve!arnold
-
-If you can't contact either of us, try Jay Fenlason, hack@prep.ai.mit.edu
-AKA mit-eddie!prep!hack. During odd hours he can sometimes be reached at
-(617) 253-8975, which is an MIT phone in the middle of the corridor, so don't
-be suprised if someone wierd answers, or if the person on the other end has
-never heard of him. (Direct them to the microvax about 10 feet to their left.)
-
-MS-DOS SUPPORT
-
-Support for MSC 5.1 was supplied for 2.11 by Kent Williams, who can be
-reached at williams@umaxc.weeg.uiowa.edu. It relies heavily on the
-earlier work done for 2.10 by Conrad Kwok and Scott Garfinkle. Bug
-reports on the MS-DOS version should go to Kent. Of course, if it's
-a generic bug, we want to hear about it too, but if it isn't reproducible
-under Unix, we won't be as interested.
diff --git a/README.VMS b/README.VMS
new file mode 100644
index 00000000..bbe9fa43
--- /dev/null
+++ b/README.VMS
@@ -0,0 +1,83 @@
+
+Compiling GAWK on VMS:
+
+ There's a DCL command procedure that will issue all the necessary
+CC and LINK commands, and there's also a Makefile for use with the MMS
+utility. From the source directory, use either
+ |$ @[.VMS]VMSBUILD.COM
+or
+ |$ MMS/DECRIPTION=[.VMS]DECSRIP.MMS GAWK
+
+VAX C V3.x -- use either vmsbuild.com or descrip.mms as is. These use
+ CC/OPTIMIZE=NOLINE, which is essential for version 3.0.
+VAX C V2.x -- (version 2.3 or 2.4; older ones won't work); edit either
+ vmsbuild.com or descrip.mms according to the comments in them.
+ For vmsbuild.com, this just entails removing two '!' delimiters.
+ Also edit config.h (which is a copy of file [.config]vms-conf.h)
+ and comment out or delete the two lines ``#define __STDC__ 0''
+ and ``#define VAXC_BUILTINS'' near the end.
+GNU C -- edit vmsbuild.com or descrip.mms; the changes are different
+ from those for VAX C V2.x, but equally straightforward. No
+ changes to config.h should be needed.
+
+ Tested under VMS V5.3 and V5.4-2 using VAX C V3.2, V3.1, and V2.3
+and also GNU C V1.39. Should work without modifications for VMS V4.6
+and up.
+
+
+Installing GAWK on VMS:
+
+ All that's needed is a 'foreign' command, which is a DCL symbol
+whose value begins with a dollar sign.
+ |$ GAWK :== $device:[directory]GAWK
+(Substitute the actual location of gawk.exe for 'device:[directory]'.)
+That symbol should be placed in the user's login.com or in the system-
+wide sylogin.com procedure so that it will be defined every time the
+user logs on.
+
+ Optionally, the help entry can be loaded into a VMS help library.
+ |$ LIBRARY/HELP SYS$HELP:HELPLIB [.VMS]GAWK.HLP
+(You may want to substitute a site-specific help library rather than
+the standard VMS library 'HELPLIB'.) After loading the help text,
+ |$ HELP GAWK
+will provide information about both the gawk implementation and the
+awk programming language.
+
+ The logical name AWK_LIBRARY can designate a default location
+for awk program files. For the '-f' option, if the specified filename
+has no device or directory path information in it, Gawk will look in
+the current directory first, then in the directory specified by the
+translation of AWK_LIBRARY if it the file wasn't found. If the file
+still isn't found, then ".awk" will be appended and the file access
+will be re-tried. If AWK_LIBRARY is not defined, that portion of the
+file search will fail benignly.
+
+
+Running GAWK on VMS:
+
+ Command line parsing and quoting conventions are significantly
+different on VMS, so examples in _The_GAWK_Manual_ or the awk book
+often need minor changes. They *are* minor though, and all the awk
+programs should run correctly.
+
+ Here are a couple of trivial tests:
+ |$ gawk -- "BEGIN {print ""Hello, World!""}"
+ |$ gawk -"W" version !could also be -"W version" or "-W version"
+Note that upper- and mixed-case text must be quoted.
+
+ The VMS port of Gawk includes a DCL-style interface in addition
+to the original shell-style interface. See the help entry for details.
+One side-effect of dual command line parsing is that if there's only a
+single parameter (as in the quoted string program above), the command
+becomes ambiguous. To work-around this, the normally optional "--"
+flag is required to force shell rather than DCL parsing. If any other
+dash-type options (or multiple parameters such as data files to be
+processed) are present, there is no ambiguity and "--" can be omitted.
+
+ The logical name AWKPATH can be used to override the default
+search path of "SYS$DISK:[],AWK_LIBRARY:" when looking for awk program
+files specified by the '-f' option. The format of AWKPATH is a comma-
+separated list of directory specifications. When defining it, the
+value should be quoted so that it retains a single translation, not a
+multi-translation RMS searchlist.
+
diff --git a/README.dos b/README.dos
new file mode 100644
index 00000000..0d81261f
--- /dev/null
+++ b/README.dos
@@ -0,0 +1,9 @@
+Gawk has been compiled and (very) minimally tested under MS-DOS using
+MSC 5.1. To compile under DOS, move the stuff in the pc directory to
+the directory with the rest of the gawk sources, and run make.bat.
+The Makefile.pc file is obsolete and should probably go away.
+
+You will have to put CRs at the end of each line in make.bat.
+
+You will have to copy the Microsoft library routine setargv.obj to
+the gawk directory for the link to work
diff --git a/README.reconstituted b/README.reconstituted
deleted file mode 100644
index a74993a5..00000000
--- a/README.reconstituted
+++ /dev/null
@@ -1,7 +0,0 @@
-Mon Jun 7 14:06:13 IDT 2004
-============================
-
-This version of gawk was found on the Sprite Walnut Creek CD-ROM
-image obtained via announcement on the TUHS list. It's a version
-I did not have. The Sprite CD rearranged things somewhat, but I
-think I've put it all back.
diff --git a/README.rs6000 b/README.rs6000
new file mode 100644
index 00000000..3ec26c81
--- /dev/null
+++ b/README.rs6000
@@ -0,0 +1,21 @@
+Date: Fri, 26 Apr 1991 18:01:04 -0300
+From: mjlx@eagle.cnsf.cornell.edu (Mike Lijewski)
+To: arnold@audiofax.com
+Cc: david@cs.dal.ca
+Subject: testing 2.12 on a machine with unsigned chars
+
+I chose to use the alloca which you supply. The RS/6000 has a builtin
+alloca which is accessible using a `#pragma alloca', but I chose not
+to use it. Initially, I tried to use it by conditionally compiling it,
+similar to the way alloca.h is included on sparcs. But this has
+some problems. Firstly, the RS/6000 compiler complains about the
+placement of the #pragma, something to the intent that the pragma must
+precede all C code. This would be easy enough to fix by conditionally
+including the #pragma elsewhere in the relevant files. A more
+difficult problem is that the awk.tab.c generated by bison uses
+alloca. To fix this the right way, bison would have to be modified to
+output the appropriate conditionally compilable code as it does now
+for sparcs. If you think it is worth while to use the builtin alloca,
+I would be happy to get it working, except for the bison problem, and
+send you the diffs. The FSF might also be interested in "fixing"
+bison to use the builtin alloca on the RS/6000.
diff --git a/README.ultrix b/README.ultrix
new file mode 100644
index 00000000..8a0ce552
--- /dev/null
+++ b/README.ultrix
@@ -0,0 +1,5 @@
+When compiling on DECstation running Ultrix 4.0 a command 'cc -c -O
+regex.c' is causing an infinite loop in an optimizer. Other sources
+compile fine with -O flag. If you are going to use this flag either
+add a special rule to Makefile for a compilation of regex.c, or issue
+'cc -c regex.c' before hitting 'make'.
diff --git a/alloca.c b/alloca.c
index d825b4b6..c29fbda3 100644
--- a/alloca.c
+++ b/alloca.c
@@ -42,10 +42,10 @@ you
lose
-- must know STACK_DIRECTION at compile-time
#endif /* STACK_DIRECTION undefined */
-#endif /* static */
-#endif /* emacs */
+#endif static
+#endif emacs
-#ifdef X3J11
+#ifdef __STDC__
typedef void *pointer; /* generic pointer type */
#else
typedef char *pointer; /* generic pointer type */
@@ -189,17 +189,16 @@ alloca (size) /* returns pointer to storage */
}
}
-pointer xmalloc(n)
-unsigned int n;
-{
- extern pointer malloc();
- pointer cp;
- static char mesg[] = "xmalloc: no memory!\n";
-
- cp = malloc(n);
- if (! cp) {
- write (2, mesg, sizeof(mesg) - 1);
- exit(1);
- }
- return cp;
+pointer
+xmalloc(n)
+int n;
+{
+ char *malloc();
+ char *p = malloc(n);
+
+ if (p)
+ return (p);
+
+ write(2, "fatal: out of memory!\n", 22);
+ exit(1);
}
diff --git a/alloca.s b/alloca.s
index 808b021e..08affc58 100644
--- a/alloca.s
+++ b/alloca.s
@@ -25,29 +25,34 @@ and this notice must be preserved on all copies. */
So replace whatever system-provided alloca there may be
on all 68000 systems. */
+#define NOT_C_CODE
/* #include "config.h" */
#ifndef HAVE_ALLOCA /* define this to use system's alloca */
#ifndef hp9000s300
+#ifndef m68k
#ifndef mc68k
#ifndef m68000
#ifndef WICAT
+#ifndef ns32000
#ifndef ns16000
#ifndef sequent
-#ifndef pyr
+#ifndef pyramid
#ifndef ATT3B5
#ifndef XENIX
you
lose!!
#endif /* XENIX */
#endif /* ATT3B5 */
-#endif /* pyr */
+#endif /* pyramid */
#endif /* sequent */
#endif /* ns16000 */
+#endif /* ns32000 */
#endif /* WICAT */
#endif /* m68000 */
#endif /* mc68k */
+#endif /* m68k */
#endif /* hp9000s300 */
@@ -79,7 +84,7 @@ PROBE equ -128 ; safety buffer for C compiler scratch
*/
text
set PROBE,-128 # safety for C frame temporaries
- set MAXREG,10 # d2-d7, a2-a5 may have been saved
+ set MAXREG,22 # d2-d7, a2-a5, fp2-fp7 may have been saved
global _alloca
_alloca:
mov.l (%sp)+,%a0 # return addess
@@ -91,20 +96,51 @@ _alloca:
sub.l &MAXREG*4,%d1 # space for saving registers
mov.l %d1,%sp # save new value of sp
tst.b PROBE(%sp) # create pages (sigh)
+ mov.l %a2,%d1 # save reg a2
+ mov.l %sp,%a2
move.w &MAXREG-1,%d0
copy_regs_loop: /* save caller's saved registers */
- mov.l (%a1)+,(%sp)+
+ mov.l (%a1)+,(%a2)+
dbra %d0,copy_regs_loop
- mov.l %sp,%d0 # return value
- mov.l %d1,%sp
+ mov.l %a2,%d0 # return value
+ mov.l %d1,%a2 # restore a2
add.l &-4,%sp # adjust tos
jmp (%a0) # rts
#endif /* new hp assembler */
#else
-#ifdef mc68k /* SGS assembler totally different */
+#if defined(m68k) || defined(mc68k) /* SGS assembler totally different */
file "alloca.s"
global alloca
alloca:
+#ifdef MOTOROLA_DELTA
+/* slightly modified version of alloca to motorola sysV/68 pcc - based
+ compiler.
+ this compiler saves used regfisters relative to %sp instead of %fp.
+ alright, just make new copy of saved register set whenever we allocate
+ new space from stack..
+ this is true at last until SVR3V5.1 . bug has reported to Motorola. */
+ set MAXREG,10 # max no of registers to save (d2-d7, a2-a5)
+ mov.l (%sp)+,%a1 # pop return addr from top of stack
+ mov.l (%sp)+,%d0 # pop size in bytes from top of stack
+ mov.l %sp,%a0 # save stack pointer for register copy
+ addq.l &3,%d0 # round size up to long word
+ andi.l &-4,%d0 # mask out lower two bits of size
+ mov.l %sp,%d1 # compute new value of sp to d1
+ sub.l %d0,%d1 # pseudo-allocate by moving stack pointer
+ sub.l &MAXREG*4,%d1 # allocate more space for saved regs.
+ mov.l %d1,%sp # actual alloaction.
+ move.w &MAXREG-1,%d0 # d0 counts saved regs.
+ mov.l %a2,%d1 # preserve a2.
+ mov.l %sp,%a2 # make pointer to new reg save area.
+copy_regs_loop: # copy stuff from old save area.
+ mov.l (%a0)+,(%a2)+ # save saved register
+ dbra %d0,copy_regs_loop
+ mov.l %a2,%a0 # now a2 is start of allocated space.
+ mov.l %a2,%d0 # return it in both a0 and d0 to play safe.
+ mov.l %d1,%a2 # restore a2.
+ subq.l &4,%sp # new top of stack
+ jmp (%a1) # far below normal return
+#else /* not MOTOROLA_DELTA */
mov.l (%sp)+,%a1 # pop return addr from top of stack
mov.l (%sp)+,%d0 # pop size in bytes from top of stack
add.l &R%1,%d0 # round size up to long word
@@ -118,8 +154,9 @@ alloca:
set S%1,64 # safety factor for C compiler scratch
set R%1,3+S%1 # add to size for rounding
set P%1,-132 # probe this far below current top of stack
+#endif /* not MOTOROLA_DELTA */
-#else /* not mc68k */
+#else /* not m68k && not mc68k */
#ifdef m68000
@@ -181,10 +218,10 @@ alloca:
#endif /* not WICAT */
#endif /* m68000 */
-#endif /* not mc68k */
+#endif /* not m68k */
#endif /* not hp9000s300 */
-#ifdef ns16000
+#if defined (ns16000) || defined (ns32000)
.text
.align 2
@@ -201,9 +238,14 @@ alloca:
#define IM
#define REGISTER(x) x
#else
+#ifdef NS5 /* ns SysV assembler */
+#define IM $
+#define REGISTER(x) x
+#else
#define IM $
#define REGISTER(x) 0(x)
#endif
+#endif
/*
* The ns16000 is a little more difficult, need to copy regs.
@@ -231,9 +273,9 @@ alloca:
movmd 0(r2),4(sp),IM/**/4 /* copy regs */
movmd 0x10(r2),0x14(sp),IM/**/4
jump REGISTER(r1) /* funky return */
-#endif /* ns16000 */
+#endif /* ns16000 or ns32000 */
-#ifdef pyr
+#ifdef pyramid
.globl _alloca
@@ -254,7 +296,7 @@ __longjmp: jump _longjmp
__setjmp: jump _setjmp
#endif
-#endif /* pyr */
+#endif /* pyramid */
#ifdef ATT3B5
diff --git a/array.c b/array.c
index 1bc0fe04..c6b1a0d6 100644
--- a/array.c
+++ b/array.c
@@ -3,7 +3,7 @@
*/
/*
- * Copyright (C) 1986, 1988, 1989 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@@ -25,20 +25,13 @@
#include "awk.h"
-#ifdef DONTDEF
-int primes[] = {31, 61, 127, 257, 509, 1021, 2053, 4099, 8191, 16381};
-#endif
-
-#define ASSOC_HASHSIZE 127
-#define STIR_BITS(n) ((n) << 5 | (((n) >> 27) & 0x1f))
-#define HASHSTEP(old, c) ((old << 1) + c)
-#define MAKE_POS(v) (v & ~0x80000000) /* make number positive */
+static NODE *assoc_find P((NODE *symbol, NODE *subs, int hash1));
NODE *
concat_exp(tree)
-NODE *tree;
+register NODE *tree;
{
- NODE *r;
+ register NODE *r;
char *str;
char *s;
unsigned len;
@@ -53,7 +46,7 @@ NODE *tree;
return r;
subseplen = SUBSEP_node->lnode->stlen;
subsep = SUBSEP_node->lnode->stptr;
- len = r->stlen + subseplen + 1;
+ len = r->stlen + subseplen + 2;
emalloc(str, char *, len, "concat_exp");
memcpy(str, r->stptr, r->stlen+1);
s = str + r->stlen;
@@ -76,8 +69,8 @@ NODE *tree;
free_temp(r);
tree = tree->rnode;
}
- r = tmp_string(str, s - str);
- free(str);
+ r = make_str_node(str, s - str, ALREADY_MALLOCED);
+ r->flags |= TEMP;
return r;
}
@@ -91,13 +84,11 @@ NODE *symbol;
if (symbol->var_array == 0)
return;
- for (i = 0; i < ASSOC_HASHSIZE; i++) {
+ for (i = 0; i < HASHSIZE; i++) {
for (bucket = symbol->var_array[i]; bucket; bucket = next) {
next = bucket->ahnext;
- deref = bucket->ahname;
- do_deref();
- deref = bucket->ahvalue;
- do_deref();
+ unref(bucket->ahname);
+ unref(bucket->ahvalue);
freenode(bucket);
}
symbol->var_array[i] = 0;
@@ -105,37 +96,58 @@ NODE *symbol;
}
/*
- * calculate the hash function of the string subs, also returning in *typtr
- * the type (string or number)
+ * calculate the hash function of the string in subs
*/
-static int
-hash_calc(subs)
-NODE *subs;
+unsigned int
+hash(s, len)
+register char *s;
+register int len;
{
- register int hash1 = 0, i;
-
- subs = force_string(subs);
- for (i = 0; i < subs->stlen; i++)
- hash1 = HASHSTEP(hash1, subs->stptr[i]);
+ register unsigned int h = 0, g;
- hash1 = MAKE_POS(STIR_BITS((int) hash1)) % ASSOC_HASHSIZE;
- return (hash1);
+ while (len--) {
+ h = (h << 4) + *s++;
+ g = (h & 0xf0000000);
+ if (g) {
+ h = h ^ (g >> 24);
+ h = h ^ g;
+ }
+ }
+ if (h < HASHSIZE)
+ return h;
+ else
+ return h%HASHSIZE;
}
/*
- * locate symbol[subs], given hash of subs and type
+ * locate symbol[subs]
*/
static NODE * /* NULL if not found */
assoc_find(symbol, subs, hash1)
-NODE *symbol, *subs;
+NODE *symbol;
+register NODE *subs;
int hash1;
{
register NODE *bucket;
+ int chained = 0;
for (bucket = symbol->var_array[hash1]; bucket; bucket = bucket->ahnext) {
- if (cmp_nodes(bucket->ahname, subs))
- continue;
- return bucket;
+ if (cmp_nodes(bucket->ahname, subs) == 0) {
+ if (chained) { /* move found to front of chain */
+ register NODE *this, *prev;
+ for (prev = this = symbol->var_array[hash1];
+ this; prev = this, this = this->ahnext) {
+ if (this == bucket) {
+ prev->ahnext = this->ahnext;
+ this->ahnext = symbol->var_array[hash1];
+ symbol->var_array[hash1] = this;
+ }
+ }
+ }
+ return bucket;
+ }
+ if (bucket)
+ chained = 1;
}
return NULL;
}
@@ -153,8 +165,8 @@ NODE *symbol, *subs;
symbol = stack_ptr[symbol->param_cnt];
if (symbol->var_array == 0)
return 0;
- subs = concat_exp(subs);
- hash1 = hash_calc(subs);
+ subs = concat_exp(subs); /* concat_exp returns a string node */
+ hash1 = hash(subs->stptr, subs->stlen);
if (assoc_find(symbol, subs, hash1) == NULL) {
free_temp(subs);
return 0;
@@ -175,17 +187,19 @@ NODE **
assoc_lookup(symbol, subs)
NODE *symbol, *subs;
{
- register int hash1, i;
+ register int hash1;
register NODE *bucket;
- hash1 = hash_calc(subs);
+ (void) force_string(subs);
+ hash1 = hash(subs->stptr, subs->stlen);
if (symbol->var_array == 0) { /* this table really should grow
* dynamically */
- emalloc(symbol->var_array, NODE **, (sizeof(NODE *) *
- ASSOC_HASHSIZE), "assoc_lookup");
- for (i = 0; i < ASSOC_HASHSIZE; i++)
- symbol->var_array[i] = 0;
+ unsigned size;
+
+ size = sizeof(NODE *) * HASHSIZE;
+ emalloc(symbol->var_array, NODE **, size, "assoc_lookup");
+ memset((char *)symbol->var_array, 0, size);
symbol->type = Node_var_array;
} else {
bucket = assoc_find(symbol, subs, hash1);
@@ -194,8 +208,10 @@ NODE *symbol, *subs;
return &(bucket->ahvalue);
}
}
- bucket = newnode(Node_ahash);
+ getnode(bucket);
+ bucket->type = Node_ahash;
bucket->ahname = dupnode(subs);
+ free_temp(subs);
bucket->ahvalue = Nnull_string;
bucket->ahnext = symbol->var_array[hash1];
symbol->var_array[hash1] = bucket;
@@ -210,10 +226,12 @@ NODE *symbol, *tree;
register NODE *bucket, *last;
NODE *subs;
+ if (symbol->type == Node_param_list)
+ symbol = stack_ptr[symbol->param_cnt];
if (symbol->var_array == 0)
return;
- subs = concat_exp(tree);
- hash1 = hash_calc(subs);
+ subs = concat_exp(tree); /* concat_exp returns string node */
+ hash1 = hash(subs->stptr, subs->stlen);
last = NULL;
for (bucket = symbol->var_array[hash1]; bucket; last = bucket, bucket = bucket->ahnext)
@@ -226,40 +244,41 @@ NODE *symbol, *tree;
last->ahnext = bucket->ahnext;
else
symbol->var_array[hash1] = bucket->ahnext;
- deref = bucket->ahname;
- do_deref();
- deref = bucket->ahvalue;
- do_deref();
+ unref(bucket->ahname);
+ unref(bucket->ahvalue);
freenode(bucket);
}
-struct search *
-assoc_scan(symbol)
+void
+assoc_scan(symbol, lookat)
NODE *symbol;
+struct search *lookat;
{
- struct search *lookat;
-
- if (!symbol->var_array)
- return 0;
- emalloc(lookat, struct search *, sizeof(struct search), "assoc_scan");
- lookat->numleft = ASSOC_HASHSIZE;
+ if (!symbol->var_array) {
+ lookat->retval = NULL;
+ return;
+ }
lookat->arr_ptr = symbol->var_array;
+ lookat->arr_end = lookat->arr_ptr + HASHSIZE; /* added */
lookat->bucket = symbol->var_array[0];
- return assoc_next(lookat);
+ assoc_next(lookat);
}
-struct search *
+void
assoc_next(lookat)
struct search *lookat;
{
- for (; lookat->numleft; lookat->numleft--) {
- while (lookat->bucket != 0) {
+ while (lookat->arr_ptr < lookat->arr_end) {
+ if (lookat->bucket != 0) {
lookat->retval = lookat->bucket->ahname;
lookat->bucket = lookat->bucket->ahnext;
- return lookat;
+ return;
}
- lookat->bucket = *++(lookat->arr_ptr);
+ lookat->arr_ptr++;
+ if (lookat->arr_ptr < lookat->arr_end)
+ lookat->bucket = *(lookat->arr_ptr);
+ else
+ lookat->retval = NULL;
}
- free((char *) lookat);
- return 0;
+ return;
}
diff --git a/atari/Makefile.st b/atari/Makefile.st
new file mode 100644
index 00000000..36bf0ae3
--- /dev/null
+++ b/atari/Makefile.st
@@ -0,0 +1,172 @@
+# Makefile for GNU Awk - ST version
+#
+# This is a subset of the full Makefile cut down for Atari ST
+# gcc compiler is assumed
+# It is known to work with gulam shell.
+# It may need some customization depending on your setup!
+# Replace with it 'Makefile' from the source directory.
+#
+# You need sed.ttp for an automatic creation of config.h file!
+# Check gulam script mkconf.g in atari directory.
+# In a pinch you may create one by checking config/atari file and
+# editing config.h-dist by hand.
+#
+# Copyright (C) 1986, 1988, 1989 the Free Software Foundation, Inc.
+#
+# This file is part of GAWK, the GNU implementation of the
+# AWK Progamming Language.
+#
+# GAWK is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 1, or (at your option)
+# any later version.
+#
+# GAWK is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GAWK; see the file COPYING. If not, write to
+# the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+# User tunable macros
+
+DESTDIR=
+BINDIR= c:\bin
+MANDIR= c:\man\manl
+MANEXT= l
+RM=rm -f
+
+# CFLAGS: options to the C compiler
+#
+# -O optimize
+# -mshort use integers which are 16-bits wide (a "correct" size on ST)
+#
+# Set LIBS to any libraries that are machine specific
+
+
+# Comment out the next line if you don't have gcc.
+# Also choose just one of -g and -O.
+CC= gcc
+# if cross-compiler
+#CC= cgcc
+LIBS = -lpml16
+
+OPTIMIZE= -O -fstrength-reduce -fcombine-regs -fomit-frame-pointer
+#OPTIMIZE= -O
+AFLAGS = -G -x -mshort # -Wall
+DEBUG= #-DMALLOCDEBUG #-DMEMDEBUG #-DDEBUG #-DFUNC_TRACE #-DMPROF
+WARN= #-W -Wunused -Wimplicit -Wreturn-type -Wcomment # for gcc only
+
+# Parser to use on grammar -- if you don't have bison use the first one
+#PARSER = yacc
+PARSER = byacc
+#PARSER = bison -y
+
+#
+# With the exception of names of parser output files
+# need to customize this file below this point.
+#
+
+FLAGS= $(DEBUG) $(AFLAGS)
+CFLAGS= $(FLAGS) $(LINKSTATIC) $(PROFILE) $(OPTIMIZE) $(WARN)
+
+# object files
+AWKOBJS = main.o eval.o builtin.o msg.o iop.o io.o field.o \
+ array.o node.o missing.o re.o version.o
+
+# basename of parser output
+#POUTPUT = awk.tab
+POUTPUT = awk_tab
+
+ALLOBJS = $(AWKOBJS) $(POUTPUT).o
+
+# GNUOBJS
+# GNU stuff that gawk uses as library routines.
+GNUOBJS= regex.o dfa.o $(ALLOCA)
+
+# source and documentation files
+SRC = main.c eval.c builtin.c msg.c \
+ iop.c io.c field.c array.c node.c missing.c re.c version.c
+
+ALLSRC= $(SRC) $(POUTPUT).c
+#ALLSRC= $(SRC) awk_tab.c
+
+AWKSRC= awk.h awk.y $(ALLSRC) patchlevel.h protos.h config.h-dist
+
+GNUSRC = alloca.c alloca.s dfa.c dfa.h regex.c regex.h
+
+COPIES = missing/getopt.c missing/system.c missing/tzset.c \
+ missing/memcmp.c missing/memcpy.c missing/memset.c \
+ missing/random.c missing/strcase.c missing/strchr.c \
+ missing/strerror.c missing/strtod.c missing/vprintf.c \
+ missing/strftime.c missing/strftime.3 missing/strtol.c
+
+SUPPORT = support/texindex.c support/texinfo.tex
+
+DOCS= gawk.1 gawk.texinfo
+
+#INFOFILES= gawk-info gawk-info-1 gawk-info-2 gawk-info-3 gawk-info-4 \
+# gawk-info-5 gawk-info-6 gawk.aux gawk.cp gawk.cps gawk.fn \
+# gawk.fns gawk.ky gawk.kys gawk.pg gawk.pgs gawk.toc \
+# gawk.tp gawk.tps gawk.vr gawk.vrs
+
+ALLDOC= gawk.dvi $(INFOFILES)
+
+# rules to build gawk
+gawk: $(ALLOBJS) $(GNUOBJS) $(REOBJS)
+ $(CC) -o gawk.ttp $(CFLAGS) $(ALLOBJS) $(GNUOBJS) $(REOBJS) $(LIBS)
+ toglclr gawk.ttp
+
+$(AWKOBJS): awk.h config.h
+
+dfa.o: awk.h config.h dfa.h
+regex.o: awk.h config.h regex.h
+main.o: patchlevel.h
+
+$(POUTPUT).o: awk.h $(POUTPUT).c
+
+$(POUTPUT).c: awk.y awk.h
+ $(PARSER) awk.y
+ sed '/^extern char .malloc(), .realloc();$$/d' ytab.c > $(POTPUT).c
+ rm ytab.c
+
+config.h: config.h-dist
+ pushd atari
+ mkconf.g
+ popd
+
+# One of these rules should have already been selected by running mkconf.
+
+##MAKE_ALLOCA_S## alloca.o: alloca.s
+##MAKE_ALLOCA_S## /lib/cpp < alloca.s | sed '/^#/d' > t.s
+##MAKE_ALLOCA_S## as t.s -o alloca.o
+##MAKE_ALLOCA_S## rm t.s
+##MAKE_ALLOCA_C## alloca.o: alloca.c
+install: gawk
+ cp gawk.ttp $(DESTDIR)$(BINDIR)
+ cp gawk.1 $(DESTDIR)$(MANDIR)/gawk.$(MANEXT)
+
+clean:
+ $(RM) gawk.ttp *.o core awk.output $(POUTPUT).c gmon.out make.out \
+ y.output
+
+clobber: clean
+ $(RM) $(ALLDOC) gawk.log
+
+gawk.dvi: gawk.texinfo
+ tex gawk.texinfo ; texindex gawk.??
+ tex gawk.texinfo ; texindex gawk.??
+ tex gawk.texinfo
+
+#$(INFOFILES): gawk.texinfo
+# makeinfo gawk.texinfo
+
+# Many tests in test directory depend on having Bourne shell around.
+# Before using this target edit test Makefile and test scripts adapting
+# them to your local conditions.
+test: gawk
+ pushd test
+ make
+ popd
diff --git a/atari/mkconf.g b/atari/mkconf.g
new file mode 100644
index 00000000..b7db88a9
--- /dev/null
+++ b/atari/mkconf.g
@@ -0,0 +1,18 @@
+#
+# gulam script to produce configuration file for Atari ST;
+# performs the same job as mkconf, but only for this specific configuration;
+# it is assumed that it is located in a subdirectory .\atari
+#
+if { -e ..\config\atari }
+ sed -n -f mkscrpt.sed ..\config\atari > sedscr
+ sed -f sedscr ..\config.h-d > config.h
+ sed -n '/^#echo./s///p' ..\config\atari
+ rm sedscr
+ mv config.h ..
+ef
+ echo "'..\config\atari' was lost somewhere"
+ echo "Either construct one based on the examples in the config directory,"
+ echo "or, in source directory, copy config.h-dist to config.h and edit it."
+ exit 1
+endif
+exit 0
diff --git a/atari/mkscrpt.sed b/atari/mkscrpt.sed
new file mode 100644
index 00000000..949d9c59
--- /dev/null
+++ b/atari/mkscrpt.sed
@@ -0,0 +1,15 @@
+# there is no automatic editing of Makefile for Atari right now
+# but lines starting with "MAKE_" string are processed for consistency
+# with other configuration files and in a case they would be needed
+# in a future
+:start
+ /^MAKE_/d
+ /^[^#]/s/.*/s~__SYSTEM__~&~/p
+ t cont
+ n
+ b start
+:cont
+ n
+ /^MAKE_/d
+ /^[^#]/s:^\([^ ]*\)[ ].*:s~^/\\* #define[ ]*\1.*~#define &~:p
+b cont
diff --git a/atari/stack.c b/atari/stack.c
new file mode 100644
index 00000000..c57880da
--- /dev/null
+++ b/atari/stack.c
@@ -0,0 +1,6 @@
+/*
+ * This value indicates an amount of reserved memory for a stack
+ * in executables - see source of a startup code for details.
+ * It can be changed without recompilation with fixstk.ttp utility.
+ */
+long _stksize = 2L; /* keep half of memory */
diff --git a/atari/system.c b/atari/system.c
new file mode 100644
index 00000000..26943bb4
--- /dev/null
+++ b/atari/system.c
@@ -0,0 +1,112 @@
+/*
+ * function system() - slightly modified from sources dLibs 1.2
+ * - a freely distributable C library for Atari ST.
+ * Authors: Dale Schumacher and John Stanley, I believe.
+ * Changes for gcc compiler and gnulib.olb - Michal Jaegermann
+ */
+
+#include <osbind.h>
+#include <stdio.h>
+#include <string.h>
+#include <basepage.h>
+#ifdef __GNUC__
+#include <process.h>
+#define ERROR 2
+#endif
+
+/* #define DEBUG */
+#ifdef DEBUG
+#define _COOKIE(x) puts(x);putchar('\n')
+#endif
+
+void static
+parse_args(char *cmdln, register char **argv)
+{
+ register char *p;
+ static char delim[] = " \t\r\n";
+
+ if(p = strtok(cmdln, delim)) {
+ do {
+ *argv++ = p;
+ } while(p = strtok(NULL, delim));
+ }
+}
+
+#ifdef __GNUC__
+/* this is used by assembler statement to keep a copy of registers */
+static volatile long savearea[16];
+#endif
+
+int
+system(const char *command)
+{
+ register char *p;
+ register int (*shell)();
+#ifndef __GNUC__
+ char rv[2];
+#endif
+ char cmdln[1024];
+ char *args[64];
+ char *getenv();
+
+ if(!command)
+ return(ERROR);
+
+ /* get _shell_p value */
+ p = (char *) Super(0L); /* supervisor mode */
+ shell = (int (*)()) *((long *) 0x4F6L);
+ (void) Super(p); /* restore user mode */
+
+ /* validate _shell_p */
+ if((shell) && /* Shell available. */
+ (((long) shell) < ((long) _base)) && /* Reasonable shell pointer. */
+ (strncmp((char *)shell, "PATH", 4))) /* Not corrupted */
+ {
+#ifdef __GNUC__
+ int ret;
+#endif
+ /* execute the command */
+#ifdef DEBUG
+_COOKIE("system: using _shell_p");
+printf("'shell' got value 0x%08lx\n", (long)shell);
+#endif
+/* a bit of paranoia caused by some misbehaving programs */
+#ifdef __GNUC__
+asm("moveml d1-d7/a0-a7,_savearea");
+ ret = (*shell)(command);
+asm("moveml _savearea,d1-d7/a0-a7");
+ return (ret);
+#else
+ return ((*shell)(command));
+#endif
+ }
+
+ strcpy(cmdln, command); /* copy the command line for parsing */
+
+ if((p = getenv("SHELL")) && (*p)) /* SHELL= variable? */
+ {
+ args[0] = p;
+ parse_args(cmdln, (args + 1));
+#ifdef DEBUG
+_COOKIE("system: executing SHELL");
+_COOKIE(p);
+#endif
+ }
+ else /* attempt to find first token as a program on the path */
+ {
+ parse_args(cmdln, args);
+ p = args[0];
+#ifdef DEBUG
+_COOKIE("system: directly executing program");
+_COOKIE(p);
+#endif
+ }
+
+#ifdef __GNUC__
+ return(spawnvp(0, p, args));
+#else /* original from dLibs */
+ forkvpe(p, args, NULL);
+ wait(rv);
+ return((rv[1] == 0) ? rv[0] : rv[1]);
+#endif
+}
diff --git a/atari/textrd.c b/atari/textrd.c
new file mode 100644
index 00000000..07ed0905
--- /dev/null
+++ b/atari/textrd.c
@@ -0,0 +1,30 @@
+/*
+ * From gnulib Atari ST sources - with a fix for a bug causing
+ * a premature EOF when only characters read are CR's.
+ * Written by Eric R. Smith and placed in the public domain.
+ * Fix - Michal Jaegermann, June 1991.
+ */
+#include <stdio.h>
+#include <unistd.h>
+int
+_text_read(fd, buf, nbytes)
+ int fd;
+ char *buf;
+ int nbytes;
+{
+ char *to, *from;
+ int r;
+ do {
+ r = read(fd, buf, nbytes);
+ if (r <= 0) /* if EOF or read error - return */
+ return r;
+ to = from = buf;
+ do {
+ if (*from == '\r')
+ from++;
+ else
+ *to++ = *from++;
+ } while (--r);
+ } while (buf == to); /* only '\r's? - try to read next nbytes */
+ return (to - buf);
+}
diff --git a/atari/tmpnam.c b/atari/tmpnam.c
new file mode 100644
index 00000000..bd68c4c0
--- /dev/null
+++ b/atari/tmpnam.c
@@ -0,0 +1,47 @@
+/* tmpnam.c : return a temporary file name */
+/* written by Eric R. Smith and placed in the public domain */
+/**
+ * - modified for gawk needs - pattern /$$XXXXXX from the original
+ * code creates names which are hard to remove when somethig
+ * goes wrong
+ * - retuned name can be passed outside via system(); other programs
+ * may not dig '/' as a path separator
+ * - somehow more frugal in a memory use
+ * (mj - October 1990)
+ **/
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+extern char * getenv(const char *);
+extern char * mktemp(char *);
+extern char * strcpy(char *, const char *);
+extern char * strcat(char *, const char *);
+extern size_t strlen(const char *s);
+static char pattern[] = "\\gwkXXXXX";
+
+char *tmpnam(buf)
+ char *buf;
+{
+ char *tmpdir;
+
+ if (!(tmpdir = getenv("TEMP")) && !(tmpdir = getenv("TMPDIR")))
+ tmpdir = ".";
+
+ if (!buf) {
+ size_t blen;
+
+ blen = strlen (tmpdir) + sizeof(pattern);
+ if (NULL == (buf = malloc(blen)))
+ return NULL;
+ }
+ (void) strcat(strcpy(buf, tmpdir), pattern);
+ return(mktemp(buf));
+}
+
+/* used by gawk_popen() */
+char *tempnam(path, base)
+const char *path, *base; /* ignored */
+{
+ return tmpnam(NULL);
+}
diff --git a/awk.h b/awk.h
index d154da26..778e4765 100644
--- a/awk.h
+++ b/awk.h
@@ -3,7 +3,7 @@
*/
/*
- * Copyright (C) 1986, 1988, 1989 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@@ -28,136 +28,142 @@
#include <ctype.h>
#include <setjmp.h>
#include <varargs.h>
+#include <time.h>
+#include <errno.h>
+#include <signal.h>
+
+/* ----------------- System dependencies (with more includes) -----------*/
+
+#ifndef VAXC
#include <sys/types.h>
#include <sys/stat.h>
-#include <errno.h>
+#else /* VMS w/ Digital's "VAX C" compiler */
+#include <types.h>
+#include <stat.h>
+#include <file.h> /* avoid <fcntl.h> in io.c */
+#endif /*VAXC*/
-#include "regex.h"
+#include "config.h"
+
+#ifdef __STDC__
+#define P(s) s
+#define MALLOC_ARG_T size_t
+#else
+#define P(s) ()
+#define MALLOC_ARG_T unsigned
+#define volatile
+#define const
+#endif
+
+#ifndef SIGTYPE
+#define SIGTYPE void
+#endif
+
+#ifdef SIZE_T_MISSING
+typedef unsigned int size_t;
+#endif
+
+#ifndef SZTC
+#define SZTC
+#define INTC
+#endif
+
+#ifdef STDC_HEADERS
+#include <stdlib.h>
+#include <string.h>
+#ifdef NeXT
+#include <libc.h>
+#undef atof
+#else
+#if defined(atarist) || defined(VMS)
+#include <unixlib.h>
+#else
+#include <unistd.h>
+#endif /* atarist || VMS */
+#endif /* Next */
+#else /* STDC_HEADERS */
+#include "protos.h"
+#endif /* STDC_HEADERS */
+
+#if defined(ultrix) && !defined(Ultrix41)
+extern char * getenv P((char *name));
+extern double atof P((char *s));
+#endif
-/* ------------------- System Functions, Variables, etc ------------------- */
-/* nasty nasty SunOS-ism */
#ifdef sparc
+/* nasty nasty SunOS-ism */
#include <alloca.h>
#ifdef lint
extern char *alloca();
#endif
-#else
+#else /* not sparc */
+#if (!defined(atarist)) && (!defined(NeXT)) && (!defined(alloca))
extern char *alloca();
-#endif
-#ifdef SPRINTF_INT
-extern int sprintf();
-#else /* not USG */
+#endif /* atarist */
+#endif /* sparc */
+
+#ifdef HAVE_UNDERSCORE_SETJMP
/* nasty nasty berkelixm */
#define setjmp _setjmp
#define longjmp _longjmp
-
-extern char *sprintf();
#endif
+
/*
* if you don't have vprintf, but you are BSD, the version defined in
- * vprintf.c should do the trick. Otherwise, use this and cross your fingers.
+ * vprintf.c should do the trick. Otherwise, try this and cross your fingers.
*/
#if defined(VPRINTF_MISSING) && !defined(DOPRNT_MISSING) && !defined(BSDSTDIO)
#define vfprintf(fp,fmt,arg) _doprnt((fmt), (arg), (fp))
#endif
-#ifdef __STDC__
-extern void *malloc(unsigned), *realloc(void *, unsigned);
-extern void free(char *);
-extern char *getenv(char *);
-
-extern char *strcpy(char *, char *), *strcat(char *, char *), *strncpy(char *, char *, int);
-extern int strcmp(char *, char *);
-extern int strncmp(char *, char *, int);
-extern int strncasecmp(char *, char *, int);
-extern char *strerror(int);
-extern char *strchr(char *, int);
-extern int strlen(char *);
-extern char *memcpy(char *, char *, int);
-extern int memcmp(char *, char *, int);
-extern char *memset(char *, int, int);
-
-/* extern int fprintf(FILE *, char *, ...); */
-extern int fprintf();
-extern int vfprintf();
-#ifndef MSDOS
-extern int fwrite(char *, int, int, FILE *);
-#endif
-extern int fflush(FILE *);
-extern int fclose(FILE *);
-extern int pclose(FILE *);
-#ifndef MSDOS
-extern int fputs(char *, FILE *);
+#ifdef VMS
+/* some macros to redirect to code in vms/vms_misc.c */
+#define exit vms_exit
+#define strerror vms_strerror
+#define strdup vms_strdup
+extern void exit P((int));
+extern char *strerror P((int));
+extern char *strdup P((const char *str));
+# ifndef NO_TTY_FWRITE
+#define fwrite tty_fwrite
+#define fclose tty_fclose
+extern size_t fwrite P((const void *,size_t,size_t,FILE *));
+extern int fclose P((FILE *));
+# endif
+extern void vms_arg_fixup P((int *,char ***));
+#endif /*VMS*/
+
+#ifndef _MSC_VER
+extern int errno; /* not necessary on many systems, but it can't hurt */
#endif
-extern void abort();
-extern int isatty(int);
-extern void exit(int);
-extern int system(char *);
-extern int sscanf(/* char *, char *, ... */);
-
-extern double atof(char *);
-extern int fstat(int, struct stat *);
-extern off_t lseek(int, off_t, int);
-extern int fseek(FILE *, long, int);
-extern int close(int);
-extern int open();
-extern int pipe(int *);
-extern int dup2(int, int);
-#ifndef MSDOS
-extern int unlink(char *);
+
+#define GNU_REGEX
+#ifdef GNU_REGEX
+#include "regex.h"
+#include "dfa.h"
+typedef struct Regexp {
+ struct re_pattern_buffer pat;
+ struct re_registers regs;
+ struct regexp dfareg;
+ int dfa;
+} Regexp;
+#define RESTART(rp,s) (rp)->regs.start[0]
+#define REEND(rp,s) (rp)->regs.end[0]
+#else /* GNU_REGEX */
+#endif /* GNU_REGEX */
+
+#ifdef atarist
+#define read _text_read /* we do not want all these CR's to mess our input */
+extern int _text_read (int, char *, int);
#endif
-extern int fork();
-extern int execl(/* char *, char *, ... */);
-extern int read(int, char *, int);
-extern int wait(int *);
-extern void _exit(int);
-#else
-extern void _exit();
-extern int wait();
-extern int read();
-extern int execl();
-extern int fork();
-extern int unlink();
-extern int dup2();
-extern int pipe();
-extern int open();
-extern int close();
-extern int fseek();
-extern off_t lseek();
-extern int fstat();
-extern void exit();
-extern int system();
-extern int isatty();
-extern void abort();
-extern int fputs();
-extern int fclose();
-extern int pclose();
-extern int fflush();
-extern int fwrite();
-extern int fprintf();
-extern int vfprintf();
-extern int sscanf();
-extern char *malloc(), *realloc();
-extern void free();
-extern char *getenv();
-
-extern int strcmp();
-extern int strncmp();
-extern int strncasecmp();
-extern int strlen();
-extern char *strcpy(), *strcat(), *strncpy();
-extern char *memset();
-extern int memcmp();
-extern char *memcpy();
-extern char *strerror();
-extern char *strchr();
-
-extern double atof();
+
+#ifndef DEFPATH
+#define DEFPATH ".:/usr/local/lib/awk:/usr/lib/awk"
#endif
-#ifndef MSDOS
-extern int errno;
-#endif /* MSDOS */
+#ifndef ENVSEP
+#define ENVSEP ':'
+#endif
/* ------------------ Constants, Structures, Typedefs ------------------ */
#define AWKNUM double
@@ -230,7 +236,7 @@ typedef enum {
Node_K_printf, /* lnode is exp_list, rnode is redirect */
Node_K_next, /* no subs */
Node_K_exit, /* subnode is return value, or NULL */
- Node_K_do, /* lnode is conditional, rnode stuff to run */
+/*50*/ Node_K_do, /* lnode is conditional, rnode stuff to run */
Node_K_return,
Node_K_delete,
Node_K_getline,
@@ -244,7 +250,7 @@ typedef enum {
Node_redirect_input, /* subnode is where to redirect */
/* Variables */
- Node_var, /* rnode is value, lnode is array stuff */
+/*60*/ Node_var, /* rnode is value, lnode is array stuff */
Node_var_array, /* array is ptr to elements, asize num of
* eles */
Node_val, /* node is a value - type in flags */
@@ -270,8 +276,19 @@ typedef enum {
Node_cond_exp, /* lnode is conditonal, rnode is if_branches */
Node_regex,
- Node_hashnode,
+/*70*/ Node_hashnode,
Node_ahash,
+ Node_NF,
+ Node_NR,
+ Node_FNR,
+ Node_FS,
+ Node_RS,
+ Node_FIELDWIDTHS,
+ Node_IGNORECASE,
+ Node_OFS,
+ Node_ORS,
+ Node_OFMT,
+ Node_CONVFMT
} NODETYPE;
/*
@@ -284,20 +301,24 @@ typedef struct exp_node {
union {
struct exp_node *lptr;
char *param_name;
- char *retext;
- struct exp_node *nextnode;
} l;
union {
struct exp_node *rptr;
struct exp_node *(*pptr) ();
- struct re_pattern_buffer *preg;
+ Regexp *preg;
struct for_loop_header *hd;
struct exp_node **av;
int r_ent; /* range entered */
} r;
- char *name;
+ union {
+ char *name;
+ struct exp_node *extra;
+ } x;
short number;
- unsigned char recase;
+ unsigned char reflags;
+# define CASE 1
+# define CONST 2
+# define FS_DFLT 4
} nodep;
struct {
AWKNUM fltnum; /* this is here for optimal packing of
@@ -306,6 +327,7 @@ typedef struct exp_node {
char *sp;
short slen;
unsigned char sref;
+ char idx;
} val;
struct {
struct exp_node *next;
@@ -327,21 +349,24 @@ typedef struct exp_node {
#define ahvalue sub.ahash.value
} sub;
NODETYPE type;
- unsigned char flags;
+ unsigned short flags;
# define MEM 0x7
# define MALLOC 1 /* can be free'd */
# define TEMP 2 /* should be free'd */
# define PERM 4 /* can't be free'd */
# define VAL 0x18
-# define NUM 8 /* numeric value is valid */
-# define STR 16 /* string value is valid */
-# define NUMERIC 32 /* entire field is numeric */
+# define NUM 8 /* numeric value is current */
+# define STR 16 /* string value is current */
+# define NUMERIC 32 /* entire string is numeric */
+# define NUMBER 64 /* assigned as number */
+# define STRING 128 /* assigned as string */
+# define MAYBE_NUM 256
} NODE;
#define lnode sub.nodep.l.lptr
-#define nextp sub.nodep.l.nextnode
+#define nextp sub.nodep.l.lptr
#define rnode sub.nodep.r.rptr
-#define source_file sub.nodep.name
+#define source_file sub.nodep.x.name
#define source_line sub.nodep.number
#define param_cnt sub.nodep.number
#define param sub.nodep.l.param_name
@@ -349,10 +374,11 @@ typedef struct exp_node {
#define subnode lnode
#define proc sub.nodep.r.pptr
-#define reexp lnode
-#define rereg sub.nodep.r.preg
-#define re_case sub.nodep.recase
-#define re_text sub.nodep.l.retext
+#define re_reg sub.nodep.r.preg
+#define re_flags sub.nodep.reflags
+#define re_text lnode
+#define re_exp sub.nodep.x.extra
+#define re_cnt sub.nodep.number
#define forsub lnode
#define forloop rnode->sub.nodep.r.hd
@@ -360,7 +386,7 @@ typedef struct exp_node {
#define stptr sub.val.sp
#define stlen sub.val.slen
#define stref sub.val.sref
-#define valstat flags
+#define stfmt sub.val.idx
#define numbr sub.val.fltnum
@@ -370,7 +396,11 @@ typedef struct exp_node {
#define condpair lnode
#define triggered sub.nodep.r.r_ent
-#define HASHSIZE 101
+#ifdef DONTDEF
+int primes[] = {31, 61, 127, 257, 509, 1021, 2053, 4099, 8191, 16381};
+#endif
+/* a quick profile suggests that the following is a good value */
+#define HASHSIZE 127
typedef struct for_loop_header {
NODE *init;
@@ -380,8 +410,8 @@ typedef struct for_loop_header {
/* for "for(iggy in foo) {" */
struct search {
- int numleft;
NODE **arr_ptr;
+ NODE **arr_end;
NODE *bucket;
NODE *retval;
};
@@ -391,31 +421,34 @@ typedef struct iobuf {
int fd;
char *buf;
char *off;
- int size; /* this will be determined by an fstat() call */
+ char *end;
+ size_t size; /* this will be determined by an fstat() call */
int cnt;
char *secbuf;
- int secsiz;
+ size_t secsiz;
int flag;
# define IOP_IS_TTY 1
} IOBUF;
+typedef void (*Func_ptr)();
+
/*
* structure used to dynamically maintain a linked-list of open files/pipes
*/
struct redirect {
- int flag;
+ unsigned int flag;
# define RED_FILE 1
# define RED_PIPE 2
# define RED_READ 4
# define RED_WRITE 8
# define RED_APPEND 16
# define RED_NOBUF 32
+# define RED_USED 64
char *value;
FILE *fp;
IOBUF *iop;
int pid;
int status;
- long offset; /* used for dynamic management of open files */
struct redirect *prev;
struct redirect *next;
};
@@ -428,7 +461,7 @@ struct redirect {
/* Return means return from a function call; leave value in ret_node */
#define TAG_RETURN 3
-#ifdef MSDOS
+#if defined(MSDOS) || (defined(atarist)) && (defined(__MSHORT__))
#define HUGE 0x7fff
#else
#define HUGE 0x7fffffff
@@ -436,156 +469,299 @@ struct redirect {
/* -------------------------- External variables -------------------------- */
/* gawk builtin variables */
+extern int NF;
+extern int NR;
+extern int FNR;
+extern int IGNORECASE;
+extern char *FS;
+extern char *RS;
+extern char *OFS;
+extern int OFSlen;
+extern char *ORS;
+extern int ORSlen;
+extern char *OFMT;
+extern char *CONVFMT;
+extern int CONVFMTidx;
+extern int OFMTidx;
extern NODE *FS_node, *NF_node, *RS_node, *NR_node;
extern NODE *FILENAME_node, *OFS_node, *ORS_node, *OFMT_node;
+extern NODE *CONVFMT_node;
extern NODE *FNR_node, *RLENGTH_node, *RSTART_node, *SUBSEP_node;
extern NODE *IGNORECASE_node;
+extern NODE *FIELDWIDTHS_node;
extern NODE **stack_ptr;
extern NODE *Nnull_string;
-extern NODE *deref;
extern NODE **fields_arr;
extern int sourceline;
extern char *source;
extern NODE *expression_value;
-extern NODE *variables[];
-
extern NODE *_t; /* used as temporary in tree_eval */
-extern char *myname;
+extern const char *myname;
-extern int node0_valid;
-extern int field_num;
+extern NODE *nextfree;
+extern int field0_valid;
extern int strict;
+extern int do_posix;
+extern int do_lint;
/* ------------------------- Pseudo-functions ------------------------- */
+
#define is_identchar(c) (isalnum(c) || (c) == '_')
-#define free_temp(n) if ((n)->flags&TEMP) { deref = (n); do_deref(); } else
+#ifndef MPROF
+#define getnode(n) if (nextfree) n = nextfree, nextfree = nextfree->nextp;\
+ else n = more_nodes()
+#define freenode(n) ((n)->nextp = nextfree, nextfree = (n))
+#else
+#define getnode(n) emalloc(n, NODE *, sizeof(NODE), "getnode")
+#define freenode(n) free(n)
+#endif
+
+#ifdef DEBUG
+#define tree_eval(t) r_tree_eval(t)
+#else
#define tree_eval(t) (_t = (t),(_t) == NULL ? Nnull_string : \
- ((_t)->type == Node_val ? (_t) : r_tree_eval((_t))))
-#define make_string(s,l) make_str_node((s),(l),0)
+ ((_t)->type == Node_val ? (_t) : \
+ ((_t)->type == Node_var ? (_t)->var_value : \
+ ((_t)->type == Node_param_list ? \
+ (stack_ptr[(_t)->param_cnt])->var_value : \
+ r_tree_eval((_t))))))
+#endif
+
+#define make_number(x) mk_number((x), (MALLOC|NUM|NUMERIC|NUMBER))
+#define tmp_number(x) mk_number((x), (MALLOC|TEMP|NUM|NUMERIC|NUMBER))
+
+#define free_temp(n) if ((n)->flags&TEMP) { unref(n); } else
+#define make_string(s,l) make_str_node((s), SZTC (l),0)
+#define SCAN 1
+#define ALREADY_MALLOCED 2
#define cant_happen() fatal("line %d, file: %s; bailing out", \
- __LINE__, __FILE__);
+ __LINE__, basename(__FILE__));
#ifdef MEMDEBUG
-#define memmsg(x,y,z,zz) fprintf(stderr, "malloc: %s: %s: %d %0x\n", z, x, y, zz)
-#define free(s) fprintf(stderr, "free: s: %0x\n", s), do_free(s)
+#define memmsg(X,Y,Z,ZZ) \
+ fprintf(stdout, "malloc: %s: %s: %ld 0x%08lx\n", Z, X, (long)Y, ZZ)
+#if defined(__STDC__) && !defined(NO_TOKEN_PASTING)
+#define free(s) fprintf(stdout, "free: %s: 0x%08lx\n", #s, (long)s), do_free(s)
#else
-#define memmsg(x,y,z,zz)
+#define free(s) fprintf(stdout, "free: s: 0x%08lx\n", (long)s), do_free(s)
#endif
+#else /* MEMDEBUG */
+#define memmsg(x,y,z,zz)
+#endif /* MEMDEBUG */
-#define emalloc(var,ty,x,str) if ((var = (ty) malloc((unsigned)(x))) == NULL)\
+#if defined(__STDC__) && !defined(NO_TOKEN_PASTING)
+#define emalloc(var,ty,x,str) if ((var=(ty)malloc((MALLOC_ARG_T)(x)))==NULL)\
+ fatal("%s: %s: can't allocate memory (%s)",\
+ (str), #var, strerror(errno));\
+ else memmsg(#var, x, str, var)
+#define erealloc(var,ty,x,str) if((var=(ty)realloc((char *)var,\
+ (MALLOC_ARG_T)(x)))==NULL)\
+ fatal("%s: %s: can't allocate memory (%s)",\
+ (str), #var, strerror(errno));\
+ else memmsg("re:" #var, x, str, var)
+#else /* __STDC__ */
+#define emalloc(var,ty,x,str) if ((var=(ty)malloc((MALLOC_ARG_T)(x)))==NULL)\
fatal("%s: %s: can't allocate memory (%s)",\
- (str), "var", strerror(errno)); else\
- memmsg("var", x, str, var)
+ (str), "var", strerror(errno));\
+ else memmsg("var", x, str, var)
#define erealloc(var,ty,x,str) if((var=(ty)realloc((char *)var,\
- (unsigned)(x)))==NULL)\
+ (MALLOC_ARG_T)(x)))==NULL)\
fatal("%s: %s: can't allocate memory (%s)",\
- (str), "var", strerror(errno)); else\
- memmsg("re: var", x, str, var)
+ (str), "var", strerror(errno));\
+ else memmsg("re: var", x, str, var)
+#endif /* __STDC__ */
+
#ifdef DEBUG
#define force_number r_force_number
#define force_string r_force_string
-#else
+#else /* not DEBUG */
#ifdef lint
extern AWKNUM force_number();
#endif
#ifdef MSDOS
extern double _msc51bug;
#define force_number(n) (_msc51bug=(_t = (n),(_t->flags & NUM) ? _t->numbr : r_force_number(_t)))
-#else
+#else /* not MSDOS */
#define force_number(n) (_t = (n),(_t->flags & NUM) ? _t->numbr : r_force_number(_t))
-#endif
+#endif /* MSDOS */
#define force_string(s) (_t = (s),(_t->flags & STR) ? _t : r_force_string(_t))
-#endif
+#endif /* not DEBUG */
#define STREQ(a,b) (*(a) == *(b) && strcmp((a), (b)) == 0)
-#define STREQN(a,b,n) ((n) && *(a) == *(b) && strncmp((a), (b), (n)) == 0)
-
-#define WHOLELINE (node0_valid ? fields_arr[0] : *get_field(0,0))
+#define STREQN(a,b,n) ((n)&& *(a)== *(b) && strncmp((a), (b), SZTC (n)) == 0)
/* ------------- Function prototypes or defs (as appropriate) ------------- */
-#ifdef __STDC__
-extern int parse_escape(char **);
-extern int devopen(char *, char *);
-extern struct re_pattern_buffer *make_regexp(NODE *, int);
-extern struct re_pattern_buffer *mk_re_parse(char *, int);
-extern NODE *variable(char *);
-extern NODE *install(NODE **, char *, NODE *);
-extern NODE *lookup(NODE **, char *);
-extern NODE *make_name(char *, NODETYPE);
-extern int interpret(NODE *);
-extern NODE *r_tree_eval(NODE *);
-extern void assign_number(NODE **, double);
-extern int cmp_nodes(NODE *, NODE *);
-extern struct redirect *redirect(NODE *, int *);
-extern int flush_io(void);
-extern void print_simple(NODE *, FILE *);
-/* extern void warning(char *,...); */
-extern void warning();
-/* extern void fatal(char *,...); */
-extern void fatal();
-extern void set_record(char *, int);
-extern NODE **get_field(int, int);
-extern NODE **get_lhs(NODE *, int);
-extern void do_deref(void );
-extern struct search *assoc_scan(NODE *);
-extern struct search *assoc_next(struct search *);
-extern NODE **assoc_lookup(NODE *, NODE *);
-extern double r_force_number(NODE *);
-extern NODE *r_force_string(NODE *);
-extern NODE *newnode(NODETYPE);
-extern NODE *dupnode(NODE *);
-extern NODE *make_number(double);
-extern NODE *tmp_number(double);
-extern NODE *make_str_node(char *, int, int);
-extern NODE *tmp_string(char *, int);
-extern char *re_compile_pattern(char *, int, struct re_pattern_buffer *);
-extern int re_search(struct re_pattern_buffer *, char *, int, int, int, struct re_registers *);
-extern void freenode(NODE *);
-#else
-extern int parse_escape();
-extern void freenode();
-extern int devopen();
-extern struct re_pattern_buffer *make_regexp();
-extern struct re_pattern_buffer *mk_re_parse();
-extern NODE *variable();
-extern NODE *install();
-extern NODE *lookup();
-extern int interpret();
-extern NODE *r_tree_eval();
-extern void assign_number();
-extern int cmp_nodes();
-extern struct redirect *redirect();
-extern int flush_io();
-extern void print_simple();
-extern void warning();
-extern void fatal();
-extern void set_record();
-extern NODE **get_field();
-extern NODE **get_lhs();
-extern void do_deref();
-extern struct search *assoc_scan();
-extern struct search *assoc_next();
-extern NODE **assoc_lookup();
-extern double r_force_number();
-extern NODE *r_force_string();
-extern NODE *newnode();
-extern NODE *dupnode();
-extern NODE *make_number();
-extern NODE *tmp_number();
-extern NODE *make_str_node();
-extern NODE *tmp_string();
-extern char *re_compile_pattern();
-extern int re_search();
+extern void set_NF();
+extern void set_FIELDWIDTHS();
+extern void set_NR();
+extern void set_FNR();
+extern void set_FS();
+extern void set_RS();
+extern void set_IGNORECASE();
+extern void set_OFMT();
+extern void set_CONVFMT();
+extern void set_OFS();
+extern void set_ORS();
+
+/* array.c */
+extern NODE *concat_exp P((NODE *tree));
+extern void assoc_clear P((NODE *symbol));
+extern unsigned int hash P((char *s, int len));
+extern int in_array P((NODE *symbol, NODE *subs));
+extern NODE **assoc_lookup P((NODE *symbol, NODE *subs));
+extern void do_delete P((NODE *symbol, NODE *tree));
+extern void assoc_scan P((NODE *symbol, struct search *lookat));
+extern void assoc_next P((struct search *lookat));
+/* awk.tab.c */
+extern char *tokexpand P((void));
+extern char nextc P((void));
+extern NODE *node P((NODE *left, NODETYPE op, NODE *right));
+extern NODE *install P((char *name, NODE *value));
+extern NODE *lookup P((char *name));
+extern NODE *variable P((char *name, int can_free));
+extern int yyparse P((void));
+/* builtin.c */
+extern NODE *do_exp P((NODE *tree));
+extern NODE *do_index P((NODE *tree));
+extern NODE *do_int P((NODE *tree));
+extern NODE *do_length P((NODE *tree));
+extern NODE *do_log P((NODE *tree));
+extern NODE *do_sprintf P((NODE *tree));
+extern void do_printf P((NODE *tree));
+extern void print_simple P((NODE *tree, FILE *fp));
+extern NODE *do_sqrt P((NODE *tree));
+extern NODE *do_substr P((NODE *tree));
+extern NODE *do_strftime P((NODE *tree));
+extern NODE *do_systime P((NODE *tree));
+extern NODE *do_system P((NODE *tree));
+extern void do_print P((NODE *tree));
+extern NODE *do_tolower P((NODE *tree));
+extern NODE *do_toupper P((NODE *tree));
+extern NODE *do_atan2 P((NODE *tree));
+extern NODE *do_sin P((NODE *tree));
+extern NODE *do_cos P((NODE *tree));
+extern NODE *do_rand P((NODE *tree));
+extern NODE *do_srand P((NODE *tree));
+extern NODE *do_match P((NODE *tree));
+extern NODE *do_gsub P((NODE *tree));
+extern NODE *do_sub P((NODE *tree));
+/* debug.c */
+extern int ptree P((NODE *n));
+extern NODE *pt P((void));
+extern int print_parse_tree P((NODE *ptr));
+extern int dump_vars P((void));
+extern int dump_fields P((void));
+extern int print_debug P((char *str, void * n));
+extern int print_a_node P((NODE *ptr));
+extern int print_maybe_semi P((NODE *ptr));
+extern int deal_with_curls P((NODE *ptr));
+extern NODE *do_prvars P((void));
+extern NODE *do_bp P((void));
+extern void do_free P((char *s));
+/* dfa.c */
+extern void regsyntax P((int bits, int fold));
+extern void regparse P((const char *s, size_t len, struct regexp *r));
+extern void reganalyze P((struct regexp *r, int searchflag));
+extern void regstate P((int s, struct regexp *r, int trans[]));
+extern char *regexecute P((struct regexp *r, char *begin,
+ char *end, int newline, int *count, int *backref));
+extern void reginit P((struct regexp *r));
+extern void regcompile P((const char *s, size_t len,
+ struct regexp *r, int searchflag));
+extern void regfree P((struct regexp *r));
+/* eval.c */
+extern int interpret P((NODE *tree));
+extern NODE *r_tree_eval P((NODE *tree));
+extern int cmp_nodes P((NODE *t1, NODE *t2));
+extern NODE **get_lhs P((NODE *ptr, Func_ptr *assign));
+extern void set_IGNORECASE P((void));
+/* field.c */
+extern void init_fields P((void));
+extern void set_record P((char *buf, int cnt, int freeold));
+extern void reset_record P((void));
+extern void set_NF P((void));
+extern NODE **get_field P((int num, Func_ptr *assign));
+extern NODE *do_split P((NODE *tree));
+extern void set_FS P((void));
+extern void set_RS P((void));
+extern void set_FIELDWIDTHS P((void));
+/* io.c */
+extern void set_FNR P((void));
+extern void set_NR P((void));
+extern void do_input P((void));
+extern struct redirect *redirect P((NODE *tree, int *errflg));
+extern NODE *do_close P((NODE *tree));
+extern int flush_io P((void));
+extern int close_io P((void));
+extern int devopen P((char *name, char *mode));
+extern int pathopen P((char *file));
+extern NODE *do_getline P((NODE *tree));
+/* iop.c */
+extern int optimal_bufsize P((int fd));
+extern IOBUF *iop_alloc P((int fd));
+extern int get_a_record P((char **out, IOBUF *iop, int rs));
+/* main.c */
+extern int main P((int argc, char **argv));
+extern Regexp *mk_re_parse P((char *s, int ignorecase));
+extern void load_environ P((void));
+extern char *arg_assign P((char *arg));
+extern SIGTYPE catchsig P((int sig, int code));
+extern const char *basename P((const char *));
+/* msg.c */
+#if 0 /* old varargs.h stuff */
+extern void msg P((int va_alist));
+extern void warning P((int va_alist));
+extern void fatal P((int va_alist));
#endif
-
-#if !defined(__STDC__) || __STDC__ <= 0
-#define volatile
+void msg ();
+void warning ();
+void fatal ();
+/* node.c */
+extern AWKNUM r_force_number P((NODE *n));
+extern NODE *r_force_string P((NODE *s));
+extern NODE *dupnode P((NODE *n));
+extern NODE *mk_number P((AWKNUM x, unsigned int flags));
+extern NODE *make_str_node P((char *s, size_t len, int scan ));
+extern NODE *tmp_string P((char *s, size_t len ));
+extern NODE *more_nodes P((void));
+#ifdef DEBUG
+extern void freenode P((NODE *it));
+#endif
+extern void unref P((NODE *tmp));
+extern int parse_escape P((char **string_ptr));
+/* re.c */
+extern Regexp *make_regexp P((NODE *s, int ignorecase, int dfa));
+extern int research P((Regexp *rp, char *str, int len, int need_start));
+extern void refree P((Regexp *rp));
+extern void regerror P((const char *s));
+extern Regexp *re_update P((NODE *t));
+/* regex.c */
+extern int re_set_syntax P((int syntax));
+extern char *re_compile_pattern P((char *pattern,
+ size_t size,
+ struct re_pattern_buffer *bufp ));
+
+extern int re_search P((struct re_pattern_buffer *pbufp,
+ char *string,
+ int size,
+ int startpos,
+ int range,
+ struct re_registers *regs ));
+extern void re_compile_fastmap P((struct re_pattern_buffer *bufp));
+/* strcase.c */
+extern int strcasecmp P((const char *s1, const char *s2));
+extern int strncasecmp P((const char *s1, const char *s2, register size_t n));
+
+#ifdef atarist
+/* atari/tmpnam.c */
+extern char *tmpnam P((char *buf));
+extern char *tempnam P((const char *path, const char *base));
#endif
/* Figure out what '\a' really is. */
@@ -603,8 +779,4 @@ extern int re_search();
# endif
#endif
-#ifndef SIGTYPE
-#define SIGTYPE void
-#endif
-
extern char casetable[]; /* for case-independent regexp matching */
diff --git a/awk.tab.c b/awk.tab.c
new file mode 100644
index 00000000..c61d2553
--- /dev/null
+++ b/awk.tab.c
@@ -0,0 +1,2802 @@
+
+# line 27 "awk.y"
+#ifdef DEBUG
+#define YYDEBUG 12
+#endif
+
+#define YYMAXDEPTH 300
+
+#include "awk.h"
+
+static void yyerror (); /* va_alist */
+static char *get_src_buf P((void));
+static int yylex P((void));
+static NODE *node_common P((NODETYPE op));
+static NODE *snode P((NODE *subn, NODETYPE op, int index));
+static NODE *mkrangenode P((NODE *cpair));
+static NODE *make_for_loop P((NODE *init, NODE *cond, NODE *incr));
+static NODE *append_right P((NODE *list, NODE *new));
+static void func_install P((NODE *params, NODE *def));
+static void pop_var P((NODE *np, int freeit));
+static void pop_params P((NODE *params));
+static NODE *make_param P((char *name));
+static NODE *mk_rexp P((NODE *exp));
+
+static int want_assign; /* lexical scanning kludge */
+static int want_regexp; /* lexical scanning kludge */
+static int can_return; /* lexical scanning kludge */
+static int io_allowed = 1; /* lexical scanning kludge */
+static char *lexptr; /* pointer to next char during parsing */
+static char *lexend;
+static char *lexptr_begin; /* keep track of where we were for error msgs */
+static char *lexeme; /* beginning of lexeme for debugging */
+static char *thisline = NULL;
+#define YYDEBUG_LEXER_TEXT (lexeme)
+static int param_counter;
+static char *tokstart = NULL;
+static char *token = NULL;
+static char *tokend;
+
+NODE *variables[HASHSIZE];
+
+extern char *source;
+extern int sourceline;
+extern char *cmdline_src;
+extern char **srcfiles;
+extern int errcount;
+extern NODE *begin_block;
+extern NODE *end_block;
+
+# line 75 "awk.y"
+typedef union {
+ long lval;
+ AWKNUM fval;
+ NODE *nodeval;
+ NODETYPE nodetypeval;
+ char *sval;
+ NODE *(*ptrval)();
+} YYSTYPE;
+# define FUNC_CALL 257
+# define NAME 258
+# define REGEXP 259
+# define ERROR 260
+# define YNUMBER 261
+# define YSTRING 262
+# define RELOP 263
+# define APPEND_OP 264
+# define ASSIGNOP 265
+# define MATCHOP 266
+# define NEWLINE 267
+# define CONCAT_OP 268
+# define LEX_BEGIN 269
+# define LEX_END 270
+# define LEX_IF 271
+# define LEX_ELSE 272
+# define LEX_RETURN 273
+# define LEX_DELETE 274
+# define LEX_WHILE 275
+# define LEX_DO 276
+# define LEX_FOR 277
+# define LEX_BREAK 278
+# define LEX_CONTINUE 279
+# define LEX_PRINT 280
+# define LEX_PRINTF 281
+# define LEX_NEXT 282
+# define LEX_EXIT 283
+# define LEX_FUNCTION 284
+# define LEX_GETLINE 285
+# define LEX_IN 286
+# define LEX_AND 287
+# define LEX_OR 288
+# define INCREMENT 289
+# define DECREMENT 290
+# define LEX_BUILTIN 291
+# define LEX_LENGTH 292
+# define UNARY 293
+#define yyclearin yychar = -1
+#define yyerrok yyerrflag = 0
+extern int yychar;
+extern int yyerrflag;
+#ifndef YYMAXDEPTH
+#define YYMAXDEPTH 150
+#endif
+YYSTYPE yylval, yyval;
+# define YYERRCODE 256
+
+# line 709 "awk.y"
+
+
+struct token {
+ char *operator; /* text to match */
+ NODETYPE value; /* node type */
+ int class; /* lexical class */
+ unsigned flags; /* # of args. allowed and compatability */
+# define ARGS 0xFF /* 0, 1, 2, 3 args allowed (any combination */
+# define A(n) (1<<(n))
+# define VERSION 0xFF00 /* old awk is zero */
+# define NOT_OLD 0x0100 /* feature not in old awk */
+# define NOT_POSIX 0x0200 /* feature not in POSIX */
+# define GAWK 0x0400 /* gawk extension */
+ NODE *(*ptr) (); /* function that implements this keyword */
+};
+
+extern NODE
+ *do_exp(), *do_getline(), *do_index(), *do_length(),
+ *do_sqrt(), *do_log(), *do_sprintf(), *do_substr(),
+ *do_split(), *do_system(), *do_int(), *do_close(),
+ *do_atan2(), *do_sin(), *do_cos(), *do_rand(),
+ *do_srand(), *do_match(), *do_tolower(), *do_toupper(),
+ *do_sub(), *do_gsub(), *do_strftime(), *do_systime();
+
+/* Tokentab is sorted ascii ascending order, so it can be binary searched. */
+
+static struct token tokentab[] = {
+{"BEGIN", Node_illegal, LEX_BEGIN, 0, 0},
+{"END", Node_illegal, LEX_END, 0, 0},
+{"atan2", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_atan2},
+{"break", Node_K_break, LEX_BREAK, 0, 0},
+{"close", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_close},
+{"continue", Node_K_continue, LEX_CONTINUE, 0, 0},
+{"cos", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_cos},
+{"delete", Node_K_delete, LEX_DELETE, NOT_OLD, 0},
+{"do", Node_K_do, LEX_DO, NOT_OLD, 0},
+{"else", Node_illegal, LEX_ELSE, 0, 0},
+{"exit", Node_K_exit, LEX_EXIT, 0, 0},
+{"exp", Node_builtin, LEX_BUILTIN, A(1), do_exp},
+{"for", Node_K_for, LEX_FOR, 0, 0},
+{"func", Node_K_function, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0},
+{"function", Node_K_function, LEX_FUNCTION, NOT_OLD, 0},
+{"getline", Node_K_getline, LEX_GETLINE, NOT_OLD, 0},
+{"gsub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_gsub},
+{"if", Node_K_if, LEX_IF, 0, 0},
+{"in", Node_illegal, LEX_IN, 0, 0},
+{"index", Node_builtin, LEX_BUILTIN, A(2), do_index},
+{"int", Node_builtin, LEX_BUILTIN, A(1), do_int},
+{"length", Node_builtin, LEX_LENGTH, A(0)|A(1), do_length},
+{"log", Node_builtin, LEX_BUILTIN, A(1), do_log},
+{"match", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_match},
+{"next", Node_K_next, LEX_NEXT, 0, 0},
+{"print", Node_K_print, LEX_PRINT, 0, 0},
+{"printf", Node_K_printf, LEX_PRINTF, 0, 0},
+{"rand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0), do_rand},
+{"return", Node_K_return, LEX_RETURN, NOT_OLD, 0},
+{"sin", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_sin},
+{"split", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_split},
+{"sprintf", Node_builtin, LEX_BUILTIN, 0, do_sprintf},
+{"sqrt", Node_builtin, LEX_BUILTIN, A(1), do_sqrt},
+{"srand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), do_srand},
+{"strftime", Node_builtin, LEX_BUILTIN, GAWK|A(1)|A(2), do_strftime},
+{"sub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_sub},
+{"substr", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_substr},
+{"system", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system},
+{"systime", Node_builtin, LEX_BUILTIN, GAWK|A(0), do_systime},
+{"tolower", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_tolower},
+{"toupper", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_toupper},
+{"while", Node_K_while, LEX_WHILE, 0, 0},
+};
+
+/* VARARGS0 */
+static void
+yyerror(va_alist)
+va_dcl
+{
+ va_list args;
+ char *mesg;
+ register char *ptr, *beg;
+ char *scan;
+
+ errcount++;
+ /* Find the current line in the input file */
+ if (lexptr) {
+ if (!thisline) {
+ for (beg = lexeme; beg != lexptr_begin && *beg != '\n'; --beg)
+ ;
+ if (*beg == '\n')
+ beg++;
+ thisline = beg;
+ }
+ /* NL isn't guaranteed */
+ ptr = lexeme;
+ while (ptr < lexend && *ptr && *ptr != '\n')
+ ptr++;
+ } else {
+ thisline = "(END OF FILE)";
+ ptr = thisline + 13;
+ }
+ msg("syntax error");
+ fprintf(stderr, "%.*s\n", (int) (ptr - thisline), thisline);
+ if (lexptr) {
+ scan = thisline;
+ while (scan < lexeme)
+ if (*scan++ == '\t')
+ putc('\t', stderr);
+ else
+ putc(' ', stderr);
+ putc('^', stderr);
+ putc(' ', stderr);
+ }
+ va_start(args);
+ mesg = va_arg(args, char *);
+ vfprintf(stderr, mesg, args);
+ va_end(args);
+ putc('\n', stderr);
+ exit(2);
+}
+
+static char *
+get_src_buf()
+{
+ static int samefile = 0;
+ static int nextfile = 0;
+ static char *buf = NULL;
+ static int fd;
+ int n;
+ register char *scan;
+ static int len = 0;
+ static int did_newline = 0;
+# define SLOP 128 /* enough space to hold most source lines */
+
+ if (cmdline_src) {
+ if (len == 0) {
+ len = strlen(cmdline_src);
+ if (len == 0)
+ cmdline_src = NULL;
+ sourceline = 1;
+ lexptr = lexptr_begin = cmdline_src;
+ lexend = lexptr + len;
+ } else if (!did_newline && *(lexptr-1) != '\n') {
+ /*
+ * The following goop is to ensure that the source
+ * ends with a newline and that the entire current
+ * line is available for error messages.
+ */
+ int offset;
+
+ did_newline = 1;
+ offset = lexptr - lexeme;
+ for (scan = lexeme; scan > lexptr_begin; scan--)
+ if (*scan == '\n') {
+ scan++;
+ break;
+ }
+ len = lexptr - scan;
+ emalloc(buf, char *, len+1, "get_src_buf");
+ memcpy(buf, scan, len);
+ thisline = buf;
+ lexptr = buf + len;
+ *lexptr = '\n';
+ lexeme = lexptr - offset;
+ lexptr_begin = buf;
+ lexend = lexptr + 1;
+ } else
+ lexptr = lexptr_begin = NULL;
+ return lexptr;
+ }
+ if (!samefile) {
+ source = srcfiles[nextfile];
+ if (source == NULL) {
+ if (buf)
+ free(buf);
+ return lexptr = lexptr_begin = NULL;
+ }
+ fd = pathopen(source);
+ if (fd == -1)
+ fatal("can't open source file \"%s\" for reading (%s)",
+ source, strerror(errno));
+ len = optimal_bufsize(fd);
+ if (buf)
+ free(buf);
+ emalloc(buf, char *, len + SLOP, "get_src_buf");
+ lexptr_begin = buf + SLOP;
+ samefile = 1;
+ sourceline = 1;
+ } else {
+ /*
+ * Here, we retain the current source line (up to length SLOP)
+ * in the beginning of the buffer that was overallocated above
+ */
+ int offset;
+ int linelen;
+
+ offset = lexptr - lexeme;
+ for (scan = lexeme; scan > lexptr_begin; scan--)
+ if (*scan == '\n') {
+ scan++;
+ break;
+ }
+ linelen = lexptr - scan;
+ if (linelen > SLOP)
+ len = SLOP;
+ thisline = buf + SLOP - linelen;
+ memcpy(thisline, scan, linelen);
+ lexeme = buf + SLOP - offset;
+ lexptr_begin = thisline;
+ }
+ n = read(fd, buf + SLOP, len);
+ if (n == -1)
+ fatal("can't read sourcefile \"%s\" (%s)",
+ source, strerror(errno));
+ if (n == 0) {
+ samefile = 0;
+ nextfile++;
+ return get_src_buf();
+ }
+ lexptr = buf + SLOP;
+ lexend = lexptr + n;
+ return buf;
+}
+
+#define tokadd(x) (*token++ = (x), token == tokend ? tokexpand() : token)
+
+char *
+tokexpand()
+{
+ static int toksize = 60;
+ int tokoffset;
+
+ tokoffset = token - tokstart;
+ toksize *= 2;
+ if (tokstart)
+ erealloc(tokstart, char *, toksize, "tokexpand");
+ else
+ emalloc(tokstart, char *, toksize, "tokexpand");
+ tokend = tokstart + toksize;
+ token = tokstart + tokoffset;
+ return token;
+}
+
+#ifdef DEBUG
+char
+nextc() {
+ if (lexptr && lexptr < lexend)
+ return *lexptr++;
+ if (get_src_buf())
+ return *lexptr++;
+ return '\0';
+}
+#else
+#define nextc() ((lexptr && lexptr < lexend) ? \
+ *lexptr++ : \
+ (get_src_buf() ? *lexptr++ : '\0') \
+ )
+#endif
+#define pushback() (lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr)
+
+/*
+ * Read the input and turn it into tokens.
+ */
+
+static int
+yylex()
+{
+ register int c;
+ int seen_e = 0; /* These are for numbers */
+ int seen_point = 0;
+ int esc_seen; /* for literal strings */
+ int low, mid, high;
+ static int did_newline = 0;
+ char *tokkey;
+
+ if (!nextc())
+ return 0;
+ pushback();
+ lexeme = lexptr;
+ thisline = NULL;
+ if (want_regexp) {
+ int in_brack = 0;
+
+ want_regexp = 0;
+ token = tokstart;
+ while (c = nextc()) {
+ switch (c) {
+ case '[':
+ in_brack = 1;
+ break;
+ case ']':
+ in_brack = 0;
+ break;
+ case '\\':
+ if ((c = nextc()) == '\0') {
+ yyerror("unterminated regexp ends with \\ at end of file");
+ } else if (c == '\n') {
+ sourceline++;
+ continue;
+ } else
+ tokadd('\\');
+ break;
+ case '/': /* end of the regexp */
+ if (in_brack)
+ break;
+
+ pushback();
+ tokadd('\0');
+ yylval.sval = tokstart;
+ return REGEXP;
+ case '\n':
+ pushback();
+ yyerror("unterminated regexp");
+ case '\0':
+ yyerror("unterminated regexp at end of file");
+ }
+ tokadd(c);
+ }
+ }
+retry:
+ while ((c = nextc()) == ' ' || c == '\t')
+ ;
+
+ lexeme = lexptr-1;
+ thisline = NULL;
+ token = tokstart;
+ yylval.nodetypeval = Node_illegal;
+
+ switch (c) {
+ case 0:
+ return 0;
+
+ case '\n':
+ sourceline++;
+ return NEWLINE;
+
+ case '#': /* it's a comment */
+ while ((c = nextc()) != '\n') {
+ if (c == '\0')
+ return 0;
+ }
+ sourceline++;
+ return NEWLINE;
+
+ case '\\':
+#ifdef RELAXED_CONTINUATION
+ if (!strict) { /* strip trailing white-space and/or comment */
+ while ((c = nextc()) == ' ' || c == '\t') continue;
+ if (c == '#')
+ while ((c = nextc()) != '\n') if (!c) break;
+ pushback();
+ }
+#endif /*RELAXED_CONTINUATION*/
+ if (nextc() == '\n') {
+ sourceline++;
+ goto retry;
+ } else
+ yyerror("inappropriate use of backslash");
+ break;
+
+ case '$':
+ want_assign = 1;
+ return '$';
+
+ case ')':
+ case ']':
+ case '(':
+ case '[':
+ case ';':
+ case ':':
+ case '?':
+ case '{':
+ case ',':
+ return c;
+
+ case '*':
+ if ((c = nextc()) == '=') {
+ yylval.nodetypeval = Node_assign_times;
+ return ASSIGNOP;
+ } else if (do_posix) {
+ pushback();
+ return '*';
+ } else if (c == '*') {
+ /* make ** and **= aliases for ^ and ^= */
+ static int did_warn_op = 0, did_warn_assgn = 0;
+
+ if (nextc() == '=') {
+ if (do_lint && ! did_warn_assgn) {
+ did_warn_assgn = 1;
+ warning("**= is not allowed by POSIX");
+ }
+ yylval.nodetypeval = Node_assign_exp;
+ return ASSIGNOP;
+ } else {
+ pushback();
+ if (do_lint && ! did_warn_op) {
+ did_warn_op = 1;
+ warning("** is not allowed by POSIX");
+ }
+ return '^';
+ }
+ }
+ pushback();
+ return '*';
+
+ case '/':
+ if (want_assign) {
+ if (nextc() == '=') {
+ yylval.nodetypeval = Node_assign_quotient;
+ return ASSIGNOP;
+ }
+ pushback();
+ }
+ return '/';
+
+ case '%':
+ if (nextc() == '=') {
+ yylval.nodetypeval = Node_assign_mod;
+ return ASSIGNOP;
+ }
+ pushback();
+ return '%';
+
+ case '^':
+ {
+ static int did_warn_op = 0, did_warn_assgn = 0;
+
+ if (nextc() == '=') {
+
+ if (do_lint && ! did_warn_assgn) {
+ did_warn_assgn = 1;
+ warning("operator `^=' is not supported in old awk");
+ }
+ yylval.nodetypeval = Node_assign_exp;
+ return ASSIGNOP;
+ }
+ pushback();
+ if (do_lint && ! did_warn_op) {
+ did_warn_op = 1;
+ warning("operator `^' is not supported in old awk");
+ }
+ return '^';
+ }
+
+ case '+':
+ if ((c = nextc()) == '=') {
+ yylval.nodetypeval = Node_assign_plus;
+ return ASSIGNOP;
+ }
+ if (c == '+')
+ return INCREMENT;
+ pushback();
+ return '+';
+
+ case '!':
+ if ((c = nextc()) == '=') {
+ yylval.nodetypeval = Node_notequal;
+ return RELOP;
+ }
+ if (c == '~') {
+ yylval.nodetypeval = Node_nomatch;
+ want_assign = 0;
+ return MATCHOP;
+ }
+ pushback();
+ return '!';
+
+ case '<':
+ if (nextc() == '=') {
+ yylval.nodetypeval = Node_leq;
+ return RELOP;
+ }
+ yylval.nodetypeval = Node_less;
+ pushback();
+ return '<';
+
+ case '=':
+ if (nextc() == '=') {
+ yylval.nodetypeval = Node_equal;
+ return RELOP;
+ }
+ yylval.nodetypeval = Node_assign;
+ pushback();
+ return ASSIGNOP;
+
+ case '>':
+ if ((c = nextc()) == '=') {
+ yylval.nodetypeval = Node_geq;
+ return RELOP;
+ } else if (c == '>') {
+ yylval.nodetypeval = Node_redirect_append;
+ return APPEND_OP;
+ }
+ yylval.nodetypeval = Node_greater;
+ pushback();
+ return '>';
+
+ case '~':
+ yylval.nodetypeval = Node_match;
+ want_assign = 0;
+ return MATCHOP;
+
+ case '}':
+ /*
+ * Added did newline stuff. Easier than
+ * hacking the grammar
+ */
+ if (did_newline) {
+ did_newline = 0;
+ return c;
+ }
+ did_newline++;
+ --lexptr; /* pick up } next time */
+ return NEWLINE;
+
+ case '"':
+ esc_seen = 0;
+ while ((c = nextc()) != '"') {
+ if (c == '\n') {
+ pushback();
+ yyerror("unterminated string");
+ }
+ if (c == '\\') {
+ c = nextc();
+ if (c == '\n') {
+ sourceline++;
+ continue;
+ }
+ esc_seen = 1;
+ tokadd('\\');
+ }
+ if (c == '\0') {
+ pushback();
+ yyerror("unterminated string");
+ }
+ tokadd(c);
+ }
+ yylval.nodeval = make_str_node(tokstart,
+ token - tokstart, esc_seen ? SCAN : 0);
+ yylval.nodeval->flags |= PERM;
+ return YSTRING;
+
+ case '-':
+ if ((c = nextc()) == '=') {
+ yylval.nodetypeval = Node_assign_minus;
+ return ASSIGNOP;
+ }
+ if (c == '-')
+ return DECREMENT;
+ pushback();
+ return '-';
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ case '.':
+ /* It's a number */
+ for (;;) {
+ int gotnumber = 0;
+
+ tokadd(c);
+ switch (c) {
+ case '.':
+ if (seen_point) {
+ gotnumber++;
+ break;
+ }
+ ++seen_point;
+ break;
+ case 'e':
+ case 'E':
+ if (seen_e) {
+ gotnumber++;
+ break;
+ }
+ ++seen_e;
+ if ((c = nextc()) == '-' || c == '+')
+ tokadd(c);
+ else
+ pushback();
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ break;
+ default:
+ gotnumber++;
+ }
+ if (gotnumber)
+ break;
+ c = nextc();
+ }
+ pushback();
+ yylval.nodeval = make_number(atof(tokstart));
+ yylval.nodeval->flags |= PERM;
+ return YNUMBER;
+
+ case '&':
+ if ((c = nextc()) == '&') {
+ yylval.nodetypeval = Node_and;
+ for (;;) {
+ c = nextc();
+ if (c == '\0')
+ break;
+ if (c == '#') {
+ while ((c = nextc()) != '\n' && c != '\0')
+ ;
+ if (c == '\0')
+ break;
+ }
+ if (c == '\n')
+ sourceline++;
+ if (! isspace(c)) {
+ pushback();
+ break;
+ }
+ }
+ want_assign = 0;
+ return LEX_AND;
+ }
+ pushback();
+ return '&';
+
+ case '|':
+ if ((c = nextc()) == '|') {
+ yylval.nodetypeval = Node_or;
+ for (;;) {
+ c = nextc();
+ if (c == '\0')
+ break;
+ if (c == '#') {
+ while ((c = nextc()) != '\n' && c != '\0')
+ ;
+ if (c == '\0')
+ break;
+ }
+ if (c == '\n')
+ sourceline++;
+ if (! isspace(c)) {
+ pushback();
+ break;
+ }
+ }
+ want_assign = 0;
+ return LEX_OR;
+ }
+ pushback();
+ return '|';
+ }
+
+ if (c != '_' && ! isalpha(c))
+ yyerror("Invalid char '%c' in expression\n", c);
+
+ /* it's some type of name-type-thing. Find its length */
+ token = tokstart;
+ while (is_identchar(c)) {
+ tokadd(c);
+ c = nextc();
+ }
+ tokadd('\0');
+ emalloc(tokkey, char *, token - tokstart, "yylex");
+ memcpy(tokkey, tokstart, token - tokstart);
+ pushback();
+
+ /* See if it is a special token. */
+ low = 0;
+ high = (sizeof (tokentab) / sizeof (tokentab[0])) - 1;
+ while (low <= high) {
+ int i, c;
+
+ mid = (low + high) / 2;
+ c = *tokstart - tokentab[mid].operator[0];
+ i = c ? c : strcmp (tokstart, tokentab[mid].operator);
+
+ if (i < 0) { /* token < mid */
+ high = mid - 1;
+ } else if (i > 0) { /* token > mid */
+ low = mid + 1;
+ } else {
+ if (do_lint) {
+ if (tokentab[mid].flags & GAWK)
+ warning("%s() is a gawk extension",
+ tokentab[mid].operator);
+ if (tokentab[mid].flags & NOT_POSIX)
+ warning("POSIX does not allow %s",
+ tokentab[mid].operator);
+ if (tokentab[mid].flags & NOT_OLD)
+ warning("%s is not supported in old awk",
+ tokentab[mid].operator);
+ }
+ if ((strict && (tokentab[mid].flags & GAWK))
+ || (do_posix && (tokentab[mid].flags & NOT_POSIX)))
+ break;
+ if (tokentab[mid].class == LEX_BUILTIN
+ || tokentab[mid].class == LEX_LENGTH
+ )
+ yylval.lval = mid;
+ else
+ yylval.nodetypeval = tokentab[mid].value;
+
+ return tokentab[mid].class;
+ }
+ }
+
+ yylval.sval = tokkey;
+ if (*lexptr == '(')
+ return FUNC_CALL;
+ else {
+ want_assign = 1;
+ return NAME;
+ }
+}
+
+static NODE *
+node_common(op)
+NODETYPE op;
+{
+ register NODE *r;
+
+ getnode(r);
+ r->type = op;
+ r->flags = MALLOC;
+ /* if lookahead is NL, lineno is 1 too high */
+ if (lexeme && *lexeme == '\n')
+ r->source_line = sourceline - 1;
+ else
+ r->source_line = sourceline;
+ r->source_file = source;
+ return r;
+}
+
+/*
+ * This allocates a node with defined lnode and rnode.
+ */
+NODE *
+node(left, op, right)
+NODE *left, *right;
+NODETYPE op;
+{
+ register NODE *r;
+
+ r = node_common(op);
+ r->lnode = left;
+ r->rnode = right;
+ return r;
+}
+
+/*
+ * This allocates a node with defined subnode and proc for builtin functions
+ * Checks for arg. count and supplies defaults where possible.
+ */
+static NODE *
+snode(subn, op, index)
+NODETYPE op;
+int index;
+NODE *subn;
+{
+ register NODE *r;
+ register NODE *n;
+ int nexp = 0;
+ int args_allowed;
+
+ r = node_common(op);
+
+ /* traverse expression list to see how many args. given */
+ for (n= subn; n; n= n->rnode) {
+ nexp++;
+ if (nexp > 3)
+ break;
+ }
+
+ /* check against how many args. are allowed for this builtin */
+ args_allowed = tokentab[index].flags & ARGS;
+ if (args_allowed && !(args_allowed & A(nexp)))
+ fatal("%s() cannot have %d argument%c",
+ tokentab[index].operator, nexp, nexp == 1 ? ' ' : 's');
+
+ r->proc = tokentab[index].ptr;
+
+ /* special case processing for a few builtins */
+ if (nexp == 0 && r->proc == do_length) {
+ subn = node(node(make_number(0.0),Node_field_spec,(NODE *)NULL),
+ Node_expression_list,
+ (NODE *) NULL);
+ } else if (r->proc == do_match) {
+ if (subn->rnode->lnode->type != Node_regex)
+ subn->rnode->lnode = mk_rexp(subn->rnode->lnode);
+ } else if (r->proc == do_sub || r->proc == do_gsub) {
+ if (subn->lnode->type != Node_regex)
+ subn->lnode = mk_rexp(subn->lnode);
+ if (nexp == 2)
+ append_right(subn, node(node(make_number(0.0),
+ Node_field_spec,
+ (NODE *) NULL),
+ Node_expression_list,
+ (NODE *) NULL));
+ else if (do_lint && subn->rnode->rnode->lnode->type == Node_val)
+ warning("string literal as last arg of substitute");
+ } else if (r->proc == do_split) {
+ if (nexp == 2)
+ append_right(subn,
+ node(FS_node, Node_expression_list, (NODE *) NULL));
+ n = subn->rnode->rnode->lnode;
+ if (n->type != Node_regex)
+ subn->rnode->rnode->lnode = mk_rexp(n);
+ if (nexp == 2)
+ subn->rnode->rnode->lnode->re_flags |= FS_DFLT;
+ }
+
+ r->subnode = subn;
+ return r;
+}
+
+/*
+ * This allocates a Node_line_range node with defined condpair and
+ * zeroes the trigger word to avoid the temptation of assuming that calling
+ * 'node( foo, Node_line_range, 0)' will properly initialize 'triggered'.
+ */
+/* Otherwise like node() */
+static NODE *
+mkrangenode(cpair)
+NODE *cpair;
+{
+ register NODE *r;
+
+ getnode(r);
+ r->type = Node_line_range;
+ r->condpair = cpair;
+ r->triggered = 0;
+ return r;
+}
+
+/* Build a for loop */
+static NODE *
+make_for_loop(init, cond, incr)
+NODE *init, *cond, *incr;
+{
+ register FOR_LOOP_HEADER *r;
+ NODE *n;
+
+ emalloc(r, FOR_LOOP_HEADER *, sizeof(FOR_LOOP_HEADER), "make_for_loop");
+ getnode(n);
+ n->type = Node_illegal;
+ r->init = init;
+ r->cond = cond;
+ r->incr = incr;
+ n->sub.nodep.r.hd = r;
+ return n;
+}
+
+/*
+ * Install a name in the symbol table, even if it is already there.
+ * Caller must check against redefinition if that is desired.
+ */
+NODE *
+install(name, value)
+char *name;
+NODE *value;
+{
+ register NODE *hp;
+ register int len, bucket;
+
+ len = strlen(name);
+ bucket = hash(name, len);
+ getnode(hp);
+ hp->type = Node_hashnode;
+ hp->hnext = variables[bucket];
+ variables[bucket] = hp;
+ hp->hlength = len;
+ hp->hvalue = value;
+ hp->hname = name;
+ return hp->hvalue;
+}
+
+/* find the most recent hash node for name installed by install */
+NODE *
+lookup(name)
+char *name;
+{
+ register NODE *bucket;
+ register int len;
+
+ len = strlen(name);
+ bucket = variables[hash(name, len)];
+ while (bucket) {
+ if (bucket->hlength == len && STREQN(bucket->hname, name, len))
+ return bucket->hvalue;
+ bucket = bucket->hnext;
+ }
+ return NULL;
+}
+
+/*
+ * Add new to the rightmost branch of LIST. This uses n^2 time, so we make
+ * a simple attempt at optimizing it.
+ */
+static NODE *
+append_right(list, new)
+NODE *list, *new;
+{
+ register NODE *oldlist;
+ static NODE *savefront = NULL, *savetail = NULL;
+
+ oldlist = list;
+ if (savefront == oldlist) {
+ savetail = savetail->rnode = new;
+ return oldlist;
+ } else
+ savefront = oldlist;
+ while (list->rnode != NULL)
+ list = list->rnode;
+ savetail = list->rnode = new;
+ return oldlist;
+}
+
+/*
+ * check if name is already installed; if so, it had better have Null value,
+ * in which case def is added as the value. Otherwise, install name with def
+ * as value.
+ */
+static void
+func_install(params, def)
+NODE *params;
+NODE *def;
+{
+ NODE *r;
+
+ pop_params(params->rnode);
+ pop_var(params, 0);
+ r = lookup(params->param);
+ if (r != NULL) {
+ fatal("function name `%s' previously defined", params->param);
+ } else
+ (void) install(params->param, node(params, Node_func, def));
+}
+
+static void
+pop_var(np, freeit)
+NODE *np;
+int freeit;
+{
+ register NODE *bucket, **save;
+ register int len;
+ char *name;
+
+ name = np->param;
+ len = strlen(name);
+ save = &(variables[hash(name, len)]);
+ for (bucket = *save; bucket; bucket = bucket->hnext) {
+ if (len == bucket->hlength && STREQN(bucket->hname, name, len)) {
+ *save = bucket->hnext;
+ freenode(bucket);
+ if (freeit)
+ free(np->param);
+ return;
+ }
+ save = &(bucket->hnext);
+ }
+}
+
+static void
+pop_params(params)
+NODE *params;
+{
+ register NODE *np;
+
+ for (np = params; np != NULL; np = np->rnode)
+ pop_var(np, 1);
+}
+
+static NODE *
+make_param(name)
+char *name;
+{
+ NODE *r;
+
+ getnode(r);
+ r->type = Node_param_list;
+ r->rnode = NULL;
+ r->param = name;
+ r->param_cnt = param_counter++;
+ return (install(name, r));
+}
+
+/* Name points to a variable name. Make sure its in the symbol table */
+NODE *
+variable(name, can_free)
+char *name;
+int can_free;
+{
+ register NODE *r;
+ static int env_loaded = 0;
+
+ if (!env_loaded && STREQ(name, "ENVIRON")) {
+ load_environ();
+ env_loaded = 1;
+ }
+ if ((r = lookup(name)) == NULL)
+ r = install(name, node(Nnull_string, Node_var, (NODE *) NULL));
+ else if (can_free)
+ free(name);
+ return r;
+}
+
+static NODE *
+mk_rexp(exp)
+NODE *exp;
+{
+ if (exp->type == Node_regex)
+ return exp;
+ else {
+ NODE *n;
+
+ getnode(n);
+ n->type = Node_regex;
+ n->re_exp = exp;
+ n->re_text = NULL;
+ n->re_reg = NULL;
+ n->re_flags = 0;
+ n->re_cnt = 1;
+ return n;
+ }
+}
+int yyexca[] ={
+-1, 1,
+ 0, -1,
+ -2, 0,
+-1, 5,
+ 0, 59,
+ -2, 0,
+-1, 77,
+ 264, 76,
+ 267, 76,
+ 62, 76,
+ 124, 76,
+ 59, 76,
+ -2, 0,
+-1, 111,
+ 41, 84,
+ -2, 0,
+-1, 112,
+ 41, 84,
+ -2, 0,
+-1, 113,
+ 41, 84,
+ -2, 0,
+-1, 126,
+ 266, 0,
+ -2, 99,
+-1, 128,
+ 263, 0,
+ 60, 0,
+ 62, 0,
+ 124, 0,
+ -2, 103,
+-1, 129,
+ 263, 0,
+ 60, 0,
+ 62, 0,
+ 124, 0,
+ -2, 104,
+-1, 130,
+ 263, 0,
+ 60, 0,
+ 62, 0,
+ 124, 0,
+ -2, 105,
+-1, 149,
+ 264, 77,
+ 267, 77,
+ 62, 77,
+ 124, 77,
+ 59, 77,
+ -2, 0,
+-1, 186,
+ 41, 85,
+ -2, 0,
+-1, 220,
+ 41, 67,
+ -2, 0,
+-1, 248,
+ 266, 0,
+ -2, 116,
+-1, 250,
+ 263, 0,
+ -2, 118,
+-1, 258,
+ 41, 68,
+ -2, 0,
+ };
+# define YYNPROD 158
+# define YYLAST 1843
+int yyact[]={
+
+ 62, 212, 20, 13, 107, 24, 13, 87, 225, 17,
+ 88, 89, 36, 123, 35, 82, 25, 291, 91, 235,
+ 45, 45, 4, 37, 168, 88, 89, 286, 45, 285,
+ 264, 88, 89, 200, 260, 24, 259, 261, 52, 184,
+ 166, 165, 249, 161, 63, 127, 199, 22, 100, 186,
+ 158, 173, 82, 122, 65, 124, 125, 126, 63, 128,
+ 129, 130, 131, 20, 218, 82, 24, 90, 107, 171,
+ 17, 82, 63, 36, 45, 35, 64, 25, 63, 174,
+ 159, 136, 63, 93, 22, 171, 228, 63, 263, 45,
+ 220, 185, 22, 270, 68, 202, 163, 144, 142, 175,
+ 113, 103, 112, 111, 6, 252, 101, 227, 183, 167,
+ 39, 102, 183, 183, 183, 160, 26, 157, 110, 133,
+ 20, 86, 82, 24, 11, 140, 97, 17, 121, 24,
+ 36, 46, 35, 98, 25, 48, 36, 41, 35, 164,
+ 25, 108, 82, 194, 82, 159, 45, 141, 91, 77,
+ 257, 71, 148, 22, 149, 68, 258, 182, 100, 10,
+ 27, 159, 5, 1, 219, 50, 118, 12, 221, 222,
+ 224, 120, 20, 0, 203, 24, 189, 0, 97, 17,
+ 0, 24, 36, 0, 35, 98, 25, 0, 36, 0,
+ 35, 0, 233, 0, 0, 197, 193, 198, 45, 236,
+ 0, 240, 241, 242, 187, 188, 0, 190, 0, 0,
+ 22, 217, 0, 0, 0, 192, 0, 0, 0, 183,
+ 0, 0, 0, 0, 0, 135, 30, 23, 4, 4,
+ 33, 34, 0, 0, 0, 201, 0, 213, 265, 0,
+ 83, 0, 80, 81, 72, 73, 74, 75, 76, 84,
+ 85, 78, 79, 0, 18, 0, 169, 23, 31, 32,
+ 28, 29, 22, 82, 276, 253, 20, 159, 255, 24,
+ 169, 232, 82, 17, 0, 0, 36, 0, 35, 159,
+ 25, 0, 0, 273, 169, 267, 67, 30, 23, 82,
+ 274, 33, 34, 59, 169, 60, 82, 82, 82, 204,
+ 0, 83, 19, 80, 81, 72, 73, 74, 75, 76,
+ 84, 85, 78, 79, 0, 18, 0, 279, 0, 31,
+ 32, 28, 29, 95, 0, 0, 0, 0, 0, 289,
+ 103, 0, 0, 0, 0, 101, 104, 0, 105, 0,
+ 102, 0, 0, 67, 30, 23, 0, 0, 33, 34,
+ 30, 23, 0, 0, 33, 34, 0, 53, 83, 0,
+ 80, 81, 72, 73, 74, 75, 76, 84, 85, 78,
+ 79, 0, 18, 0, 0, 0, 31, 32, 28, 29,
+ 154, 0, 31, 32, 28, 29, 0, 100, 0, 0,
+ 0, 0, 0, 0, 0, 211, 30, 23, 0, 0,
+ 33, 34, 30, 23, 0, 0, 33, 34, 0, 0,
+ 83, 0, 80, 81, 72, 73, 74, 75, 76, 84,
+ 85, 78, 79, 16, 18, 0, 0, 0, 31, 32,
+ 28, 29, 0, 0, 31, 32, 28, 29, 0, 0,
+ 0, 0, 94, 0, 99, 0, 0, 0, 109, 0,
+ 0, 0, 0, 154, 20, 114, 115, 24, 214, 99,
+ 99, 17, 0, 0, 36, 0, 35, 0, 25, 0,
+ 0, 0, 0, 150, 0, 0, 0, 0, 0, 0,
+ 45, 59, 0, 60, 61, 0, 0, 0, 0, 0,
+ 20, 23, 0, 24, 33, 34, 58, 17, 171, 0,
+ 36, 152, 35, 0, 25, 0, 154, 154, 154, 154,
+ 154, 0, 154, 154, 154, 0, 0, 59, 0, 60,
+ 61, 99, 31, 32, 99, 99, 99, 99, 99, 99,
+ 20, 0, 0, 24, 0, 0, 0, 17, 0, 0,
+ 36, 0, 35, 0, 25, 53, 154, 94, 154, 154,
+ 154, 154, 0, 154, 154, 154, 45, 59, 0, 60,
+ 61, 0, 0, 0, 0, 0, 20, 0, 0, 24,
+ 0, 154, 154, 17, 152, 0, 36, 94, 35, 99,
+ 25, 53, 0, 0, 154, 0, 0, 0, 0, 0,
+ 0, 191, 0, 59, 0, 60, 61, 0, 99, 243,
+ 245, 246, 247, 248, 0, 250, 251, 70, 0, 0,
+ 14, 0, 20, 14, 0, 24, 0, 0, 14, 17,
+ 51, 53, 36, 63, 35, 0, 25, 152, 152, 152,
+ 152, 152, 0, 152, 152, 152, 0, 0, 0, 59,
+ 0, 60, 61, 0, 0, 0, 0, 0, 269, 14,
+ 0, 0, 0, 0, 14, 0, 0, 53, 0, 0,
+ 0, 0, 0, 0, 281, 0, 0, 152, 0, 152,
+ 152, 152, 152, 0, 152, 152, 152, 0, 30, 23,
+ 0, 0, 33, 34, 58, 0, 0, 56, 4, 0,
+ 0, 0, 152, 152, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 53, 0, 152, 18, 57, 54, 55,
+ 31, 32, 28, 29, 30, 23, 0, 0, 33, 34,
+ 58, 0, 0, 56, 0, 0, 155, 0, 0, 24,
+ 0, 0, 0, 98, 0, 0, 36, 0, 35, 0,
+ 25, 0, 18, 57, 54, 55, 31, 32, 28, 29,
+ 0, 268, 0, 0, 30, 23, 210, 0, 33, 34,
+ 58, 0, 155, 56, 0, 24, 0, 0, 0, 98,
+ 0, 0, 36, 0, 35, 0, 25, 0, 0, 0,
+ 0, 0, 18, 57, 54, 55, 31, 32, 28, 29,
+ 30, 23, 210, 0, 33, 34, 58, 0, 0, 56,
+ 20, 0, 0, 24, 0, 0, 0, 17, 0, 0,
+ 36, 0, 35, 0, 25, 0, 0, 0, 18, 57,
+ 54, 55, 31, 32, 28, 29, 0, 59, 0, 60,
+ 61, 0, 0, 0, 0, 0, 30, 23, 21, 0,
+ 33, 34, 58, 0, 0, 56, 0, 0, 155, 0,
+ 0, 24, 0, 0, 0, 98, 0, 0, 36, 96,
+ 35, 0, 25, 0, 18, 57, 54, 55, 31, 32,
+ 28, 29, 0, 20, 116, 117, 24, 0, 0, 0,
+ 17, 0, 0, 36, 0, 35, 0, 25, 0, 0,
+ 0, 53, 0, 0, 0, 0, 0, 0, 0, 0,
+ 59, 0, 60, 0, 0, 0, 0, 0, 0, 0,
+ 155, 0, 0, 24, 0, 0, 156, 98, 20, 0,
+ 36, 24, 35, 0, 25, 17, 0, 0, 36, 0,
+ 35, 0, 25, 0, 0, 0, 96, 0, 0, 176,
+ 177, 178, 179, 180, 181, 59, 0, 60, 0, 0,
+ 30, 23, 0, 0, 33, 34, 209, 0, 0, 207,
+ 0, 0, 0, 20, 53, 0, 24, 0, 0, 0,
+ 17, 0, 0, 36, 0, 35, 0, 25, 153, 208,
+ 205, 206, 31, 32, 28, 29, 30, 23, 0, 156,
+ 33, 34, 209, 20, 96, 207, 24, 0, 0, 0,
+ 17, 0, 0, 36, 0, 35, 0, 25, 0, 53,
+ 0, 0, 0, 226, 153, 208, 205, 206, 31, 32,
+ 28, 29, 0, 0, 30, 23, 0, 0, 33, 34,
+ 58, 0, 0, 56, 0, 0, 0, 0, 0, 0,
+ 0, 0, 156, 156, 156, 156, 156, 0, 156, 156,
+ 156, 0, 18, 57, 54, 55, 31, 32, 28, 29,
+ 20, 0, 0, 24, 0, 0, 0, 17, 0, 0,
+ 36, 0, 35, 23, 25, 0, 33, 34, 209, 0,
+ 0, 0, 156, 22, 156, 156, 156, 156, 0, 156,
+ 156, 156, 0, 0, 0, 0, 0, 30, 23, 0,
+ 0, 33, 34, 58, 31, 32, 56, 156, 156, 155,
+ 0, 0, 24, 0, 0, 0, 98, 0, 0, 36,
+ 156, 35, 0, 25, 0, 18, 57, 54, 0, 31,
+ 32, 28, 29, 0, 0, 23, 0, 0, 33, 34,
+ 0, 0, 30, 23, 0, 0, 33, 34, 58, 155,
+ 22, 56, 24, 0, 0, 0, 98, 0, 0, 36,
+ 0, 35, 2, 25, 0, 0, 31, 32, 38, 0,
+ 18, 57, 0, 0, 31, 32, 28, 29, 0, 0,
+ 0, 0, 0, 0, 0, 106, 20, 0, 23, 24,
+ 0, 33, 34, 17, 0, 0, 36, 0, 35, 0,
+ 25, 0, 0, 0, 0, 0, 0, 119, 0, 0,
+ 0, 0, 45, 0, 0, 0, 40, 30, 23, 31,
+ 32, 33, 34, 0, 0, 0, 132, 4, 0, 8,
+ 9, 138, 139, 0, 0, 0, 143, 0, 0, 0,
+ 0, 0, 0, 0, 15, 18, 0, 0, 0, 31,
+ 32, 28, 29, 20, 0, 0, 24, 0, 0, 0,
+ 17, 155, 0, 36, 24, 35, 0, 25, 98, 0,
+ 0, 36, 0, 35, 0, 25, 0, 0, 0, 0,
+ 0, 66, 0, 7, 30, 23, 0, 0, 33, 34,
+ 0, 0, 0, 0, 0, 0, 8, 9, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 15, 18, 0, 0, 0, 31, 32, 28, 29,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 30, 23, 172, 0, 33, 34, 209,
+ 20, 0, 207, 24, 0, 0, 134, 17, 0, 0,
+ 36, 0, 35, 0, 25, 0, 0, 0, 0, 0,
+ 0, 153, 208, 205, 0, 31, 32, 28, 29, 0,
+ 0, 0, 0, 30, 23, 0, 0, 33, 34, 209,
+ 155, 0, 207, 24, 0, 0, 0, 147, 0, 0,
+ 36, 0, 35, 0, 25, 0, 0, 262, 0, 0,
+ 0, 153, 208, 134, 0, 31, 32, 28, 29, 0,
+ 30, 23, 0, 0, 33, 34, 0, 20, 0, 271,
+ 24, 0, 0, 134, 17, 195, 170, 36, 0, 35,
+ 0, 25, 0, 0, 0, 284, 0, 0, 18, 0,
+ 288, 0, 31, 32, 28, 29, 0, 0, 0, 0,
+ 292, 0, 0, 295, 296, 155, 0, 297, 24, 0,
+ 42, 0, 98, 0, 0, 36, 0, 35, 0, 25,
+ 47, 49, 0, 0, 0, 0, 92, 30, 23, 0,
+ 0, 33, 34, 0, 244, 30, 23, 0, 0, 33,
+ 34, 0, 20, 0, 0, 24, 0, 0, 0, 17,
+ 0, 0, 36, 0, 35, 18, 25, 0, 0, 31,
+ 32, 28, 29, 153, 0, 0, 0, 31, 32, 28,
+ 29, 229, 0, 230, 231, 0, 0, 0, 0, 0,
+ 234, 0, 0, 0, 238, 0, 145, 146, 0, 0,
+ 0, 0, 0, 162, 275, 0, 0, 69, 0, 0,
+ 0, 0, 0, 283, 0, 256, 44, 44, 44, 0,
+ 0, 0, 0, 223, 30, 23, 0, 0, 33, 34,
+ 293, 3, 0, 0, 0, 0, 0, 298, 299, 300,
+ 43, 43, 43, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 18, 272, 0, 0, 31, 32, 28, 29,
+ 277, 0, 0, 151, 30, 23, 0, 0, 33, 34,
+ 0, 0, 287, 137, 0, 290, 0, 0, 215, 216,
+ 0, 0, 0, 44, 44, 294, 0, 0, 0, 0,
+ 44, 0, 153, 0, 0, 0, 31, 32, 28, 29,
+ 0, 30, 23, 0, 0, 33, 34, 43, 43, 0,
+ 0, 0, 0, 0, 43, 0, 0, 0, 0, 0,
+ 239, 0, 0, 0, 0, 0, 0, 0, 0, 18,
+ 0, 0, 0, 31, 32, 28, 29, 0, 254, 30,
+ 23, 137, 0, 33, 34, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 44, 44, 153, 0, 0,
+ 0, 31, 32, 28, 29, 0, 30, 196, 0, 0,
+ 33, 34, 0, 0, 0, 0, 0, 0, 280, 43,
+ 43, 282, 0, 0, 0, 0, 0, 137, 0, 0,
+ 0, 0, 0, 0, 18, 237, 0, 44, 31, 32,
+ 28, 29, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 44, 0, 0, 0, 0,
+ 0, 43, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 266, 0, 0, 0, 43,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 278, 0, 44, 0, 0, 44, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 43,
+ 0, 0, 43 };
+int yypact[]={
+
+ -245, -1000, 1027, -244, -1000, 960, -1000, -1000, -38, -38,
+ -39, -1000, -76, 579, 30, -1000, -258, 1220, -1, -1000,
+ 93, 293, -245, -23, 145, -1000, -1000, -1000, 63, 62,
+ 60, -1, -1, -1000, -1000, 145, 145, -1000, -1000, -1000,
+ -1000, -76, -1000, -244, -245, -1000, -76, -1000, -1000, -1000,
+ -1000, 87, 1384, -272, 1384, 1384, 1384, -213, 1384, 1384,
+ 1384, 1384, 930, -245, -31, 15, -1000, -1000, -245, -245,
+ 30, -1000, 58, -245, 57, -38, -38, 1347, -1000, 1384,
+ -1000, -215, 421, 56, -1000, -1000, -217, -1000, -1000, -1000,
+ 28, 457, -1000, 19, -1000, -1000, -46, 145, 1384, -279,
+ 145, 145, 145, 145, 145, 145, -1000, 1220, -1000, -1000,
+ -220, 1220, 1220, 1220, -1000, -1000, -46, -46, -1000, -1000,
+ -1000, -31, 767, -1, 885, 840, 233, -1000, 930, 930,
+ 930, 533, -1000, 15, -1000, -1000, -1000, -1000, -1000, -1000,
+ -1000, -31, 1384, 139, 1459, -1000, -1000, 1220, -29, 43,
+ 729, -1000, -264, -1, -1000, 93, 293, -38, -38, 767,
+ 1384, -27, -1000, 1384, 50, -1000, -1000, 1384, 1307, 1384,
+ -278, -1000, -1000, -1000, 145, 457, -46, -46, -46, -46,
+ 64, 64, 14, 767, 39, 44, 38, 44, 44, 15,
+ -1000, 1384, -1000, -1000, 457, -256, -87, 15, 28, -38,
+ 1384, 1384, 1384, 1228, 1422, 1422, 1422, 1422, -216, 1422,
+ 1422, 877, -1000, 19, -1000, -1000, -1000, -38, 1220, 457,
+ -222, 767, 767, -1000, 767, -221, 293, -1000, -1000, -1000,
+ -1000, -1000, -1000, 767, -245, 48, -228, 1153, -29, -1000,
+ 767, 767, 767, 729, -1000, 729, 1116, 1076, 815, -1000,
+ 877, 693, 1422, -1000, -1000, 0, -245, 44, 34, -1000,
+ -1000, -1000, 139, 1384, 44, 497, 1384, -38, 1422, 729,
+ -38, 139, -245, -229, -1000, -1000, 457, -245, 1384, 44,
+ -1000, 729, -1000, -255, -1000, -1000, -1000, -245, 139, 44,
+ -245, -245, -1000, -1000, -245, 139, 139, 139, -1000, -1000,
+ -1000 };
+int yypgo[]={
+
+ 0, 167, 165, 395, 0, 163, 162, 104, 838, 116,
+ 160, 159, 124, 423, 156, 154, 152, 49, 91, 76,
+ 1281, 151, 150, 50, 83, 302, 51, 46, 149, 139,
+ 1162, 137, 131, 1460, 121, 1335, 607, 54, 81, 24,
+ 118, 1571, 1547, 117, 115, 109, 105 };
+int yyr1[]={
+
+ 0, 5, 6, 6, 6, 6, 31, 7, 32, 7,
+ 7, 7, 7, 7, 7, 7, 29, 29, 34, 1,
+ 2, 11, 11, 40, 25, 12, 12, 19, 19, 19,
+ 19, 33, 33, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 43, 20, 20, 44,
+ 20, 20, 20, 28, 28, 21, 21, 41, 41, 30,
+ 30, 26, 26, 27, 27, 27, 27, 22, 22, 14,
+ 14, 14, 14, 14, 23, 23, 16, 16, 15, 15,
+ 15, 15, 15, 15, 18, 18, 17, 17, 17, 17,
+ 17, 17, 45, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 46,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 8, 8, 8, 8, 8, 8, 8, 8,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
+ 9, 9, 10, 10, 10, 24, 24, 13, 13, 13,
+ 13, 36, 37, 35, 38, 38, 42, 39 };
+int yyr2[]={
+
+ 0, 7, 3, 5, 3, 5, 1, 7, 1, 7,
+ 5, 5, 5, 3, 5, 5, 3, 3, 1, 15,
+ 9, 3, 7, 1, 9, 9, 7, 3, 5, 3,
+ 5, 2, 4, 5, 5, 7, 3, 13, 17, 17,
+ 21, 19, 5, 5, 13, 9, 1, 7, 7, 1,
+ 9, 13, 5, 3, 3, 13, 19, 3, 4, 0,
+ 2, 1, 5, 1, 5, 5, 5, 1, 3, 3,
+ 7, 3, 5, 7, 1, 3, 1, 3, 3, 7,
+ 3, 5, 7, 7, 1, 3, 3, 7, 3, 5,
+ 7, 7, 1, 9, 11, 9, 7, 7, 7, 7,
+ 3, 5, 7, 7, 7, 7, 11, 3, 5, 1,
+ 9, 7, 7, 7, 3, 5, 7, 7, 7, 11,
+ 3, 5, 2, 2, 7, 7, 7, 7, 7, 7,
+ 5, 7, 9, 9, 3, 9, 5, 5, 3, 3,
+ 5, 5, 5, 5, 2, 1, 3, 3, 9, 5,
+ 5, 4, 5, 3, 0, 2, 3, 5 };
+int yychk[]={
+
+ -1000, -5, -30, -41, 267, -6, -7, 256, 269, 270,
+ -11, -12, -1, -4, -36, 284, -13, 40, 285, -25,
+ 33, -8, 123, 258, 36, 47, -9, -10, 291, 292,
+ 257, 289, 290, 261, 262, 45, 43, 267, -30, -7,
+ 256, -31, -33, -41, -42, 59, -32, -33, -12, -33,
+ -2, -36, -39, 124, 287, 288, 266, 286, 263, 60,
+ 62, 63, -4, 44, -19, -37, -20, 256, 125, -42,
+ -36, -21, 275, 276, 277, 278, 279, -28, 282, 283,
+ 273, 274, -4, 271, 280, 281, -34, 265, 289, 290,
+ -17, -4, 256, -24, -13, -25, -8, 33, 40, -13,
+ 94, 42, 47, 37, 43, 45, -30, 91, -9, -13,
+ -40, 40, 40, 40, -13, -13, -8, -8, -12, -30,
+ -12, -19, -4, 285, -4, -4, -4, 258, -4, -4,
+ -4, -4, -30, -37, -20, 256, -38, -42, -30, -30,
+ -37, -19, 40, -30, 40, -33, -33, 40, -16, -15,
+ -3, 256, -13, 285, -25, 33, -8, -43, -23, -4,
+ -44, 258, -33, 40, -29, 258, 257, -45, -39, 256,
+ -35, 41, -35, -26, 60, -4, -8, -8, -8, -8,
+ -8, -8, -17, -4, 259, -18, -17, -18, -18, -37,
+ -24, 58, -38, -37, -4, -20, 258, -23, -17, -27,
+ 62, 264, 124, -39, 256, 287, 288, 266, 286, 263,
+ 63, -3, 265, -24, -25, -33, -33, -23, 91, -4,
+ 40, -4, -4, 256, -4, 286, -8, 93, 47, -35,
+ -35, -35, -38, -4, -35, 275, 286, -42, -35, -33,
+ -4, -4, -4, -3, 256, -3, -3, -3, -3, 258,
+ -3, -3, -46, -26, -33, -17, -35, -22, -14, 258,
+ 256, 258, -30, 40, 258, -4, -42, -27, 58, -3,
+ 93, -30, -35, -39, 256, -20, -4, -35, -42, -23,
+ -33, -3, -33, -20, -30, 258, 256, -35, -30, -23,
+ -35, 272, -30, -20, -35, -30, -30, -30, -20, -20,
+ -20 };
+int yydef[]={
+
+ 59, -2, 0, 60, 57, -2, 2, 4, 6, 8,
+ 0, 13, 0, 21, 0, 18, 144, 0, 145, 100,
+ 0, 107, 59, 147, 0, 23, 122, 123, 0, 134,
+ 0, 0, 0, 138, 139, 0, 0, 58, 1, 3,
+ 5, 0, 10, 31, 59, 156, 0, 11, 12, 14,
+ 15, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 108, 59, 0, 154, 27, 29, 59, 59,
+ 0, 36, 0, 59, 0, 0, 0, -2, 46, 74,
+ 49, 0, 0, 0, 53, 54, 0, 92, 142, 143,
+ 0, 86, 88, 61, 146, 101, 130, 0, 0, 144,
+ 0, 0, 0, 0, 0, 0, 151, 0, 149, 150,
+ 0, -2, -2, -2, 136, 137, 140, 141, 7, 32,
+ 9, 0, 22, 145, 97, 98, -2, 102, -2, -2,
+ -2, 0, 157, 154, 28, 30, 26, 155, 152, 33,
+ 34, 0, 0, 0, 74, 42, 43, 0, 63, -2,
+ 78, 80, 144, 145, 114, 0, 120, 0, 0, 75,
+ 74, 0, 52, 0, 0, 16, 17, 0, 0, 89,
+ 0, 153, 131, 96, 0, 0, 124, 125, 126, 127,
+ 128, 129, 0, 86, 0, 0, -2, 0, 0, 154,
+ 95, 0, 25, 35, 0, 0, 147, 0, 0, 0,
+ 0, 0, 0, 0, 81, 0, 0, 0, 0, 0,
+ 0, 121, 109, 61, 115, 47, 48, 0, 0, 0,
+ -2, 93, 87, 91, 90, 0, 62, 148, 24, 132,
+ 133, 135, 20, 106, 59, 0, 0, 0, 63, 45,
+ 64, 65, 66, 79, 83, 82, 111, 112, -2, 117,
+ -2, 0, 0, 113, 50, 0, 59, 0, -2, 69,
+ 71, 94, 0, 0, 0, 0, 74, 0, 0, 110,
+ 0, 0, 59, 0, 72, 37, 0, 59, 74, 0,
+ 44, 119, 51, 55, 19, 70, 73, 59, 0, 0,
+ 59, 59, 38, 39, 59, 0, 0, 0, 41, 56,
+ 40 };
+typedef struct { char *t_name; int t_val; } yytoktype;
+#ifndef YYDEBUG
+# define YYDEBUG 0 /* don't allow debugging */
+#endif
+
+#if YYDEBUG
+
+yytoktype yytoks[] =
+{
+ "FUNC_CALL", 257,
+ "NAME", 258,
+ "REGEXP", 259,
+ "ERROR", 260,
+ "YNUMBER", 261,
+ "YSTRING", 262,
+ "RELOP", 263,
+ "APPEND_OP", 264,
+ "ASSIGNOP", 265,
+ "MATCHOP", 266,
+ "NEWLINE", 267,
+ "CONCAT_OP", 268,
+ "LEX_BEGIN", 269,
+ "LEX_END", 270,
+ "LEX_IF", 271,
+ "LEX_ELSE", 272,
+ "LEX_RETURN", 273,
+ "LEX_DELETE", 274,
+ "LEX_WHILE", 275,
+ "LEX_DO", 276,
+ "LEX_FOR", 277,
+ "LEX_BREAK", 278,
+ "LEX_CONTINUE", 279,
+ "LEX_PRINT", 280,
+ "LEX_PRINTF", 281,
+ "LEX_NEXT", 282,
+ "LEX_EXIT", 283,
+ "LEX_FUNCTION", 284,
+ "LEX_GETLINE", 285,
+ "LEX_IN", 286,
+ "LEX_AND", 287,
+ "LEX_OR", 288,
+ "INCREMENT", 289,
+ "DECREMENT", 290,
+ "LEX_BUILTIN", 291,
+ "LEX_LENGTH", 292,
+ "?", 63,
+ ":", 58,
+ "<", 60,
+ ">", 62,
+ "|", 124,
+ "+", 43,
+ "-", 45,
+ "*", 42,
+ "/", 47,
+ "%", 37,
+ "!", 33,
+ "UNARY", 293,
+ "^", 94,
+ "$", 36,
+ "(", 40,
+ ")", 41,
+ "-unknown-", -1 /* ends search */
+};
+
+char * yyreds[] =
+{
+ "-no such reduction-",
+ "start : opt_nls program opt_nls",
+ "program : rule",
+ "program : program rule",
+ "program : error",
+ "program : program error",
+ "rule : LEX_BEGIN",
+ "rule : LEX_BEGIN action",
+ "rule : LEX_END",
+ "rule : LEX_END action",
+ "rule : LEX_BEGIN statement_term",
+ "rule : LEX_END statement_term",
+ "rule : pattern action",
+ "rule : action",
+ "rule : pattern statement_term",
+ "rule : function_prologue function_body",
+ "func_name : NAME",
+ "func_name : FUNC_CALL",
+ "function_prologue : LEX_FUNCTION",
+ "function_prologue : LEX_FUNCTION func_name '(' opt_param_list r_paren opt_nls",
+ "function_body : l_brace statements r_brace opt_semi",
+ "pattern : exp",
+ "pattern : exp comma exp",
+ "regexp : '/'",
+ "regexp : '/' REGEXP '/'",
+ "action : l_brace statements r_brace opt_semi",
+ "action : l_brace r_brace opt_semi",
+ "statements : statement",
+ "statements : statements statement",
+ "statements : error",
+ "statements : statements error",
+ "statement_term : nls",
+ "statement_term : semi opt_nls",
+ "statement : semi opt_nls",
+ "statement : l_brace r_brace",
+ "statement : l_brace statements r_brace",
+ "statement : if_statement",
+ "statement : LEX_WHILE '(' exp r_paren opt_nls statement",
+ "statement : LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls",
+ "statement : LEX_FOR '(' NAME LEX_IN NAME r_paren opt_nls statement",
+ "statement : LEX_FOR '(' opt_exp semi exp semi opt_exp r_paren opt_nls statement",
+ "statement : LEX_FOR '(' opt_exp semi semi opt_exp r_paren opt_nls statement",
+ "statement : LEX_BREAK statement_term",
+ "statement : LEX_CONTINUE statement_term",
+ "statement : print '(' expression_list r_paren output_redir statement_term",
+ "statement : print opt_rexpression_list output_redir statement_term",
+ "statement : LEX_NEXT",
+ "statement : LEX_NEXT statement_term",
+ "statement : LEX_EXIT opt_exp statement_term",
+ "statement : LEX_RETURN",
+ "statement : LEX_RETURN opt_exp statement_term",
+ "statement : LEX_DELETE NAME '[' expression_list ']' statement_term",
+ "statement : exp statement_term",
+ "print : LEX_PRINT",
+ "print : LEX_PRINTF",
+ "if_statement : LEX_IF '(' exp r_paren opt_nls statement",
+ "if_statement : LEX_IF '(' exp r_paren opt_nls statement LEX_ELSE opt_nls statement",
+ "nls : NEWLINE",
+ "nls : nls NEWLINE",
+ "opt_nls : /* empty */",
+ "opt_nls : nls",
+ "input_redir : /* empty */",
+ "input_redir : '<' simp_exp",
+ "output_redir : /* empty */",
+ "output_redir : '>' exp",
+ "output_redir : APPEND_OP exp",
+ "output_redir : '|' exp",
+ "opt_param_list : /* empty */",
+ "opt_param_list : param_list",
+ "param_list : NAME",
+ "param_list : param_list comma NAME",
+ "param_list : error",
+ "param_list : param_list error",
+ "param_list : param_list comma error",
+ "opt_exp : /* empty */",
+ "opt_exp : exp",
+ "opt_rexpression_list : /* empty */",
+ "opt_rexpression_list : rexpression_list",
+ "rexpression_list : rexp",
+ "rexpression_list : rexpression_list comma rexp",
+ "rexpression_list : error",
+ "rexpression_list : rexpression_list error",
+ "rexpression_list : rexpression_list error rexp",
+ "rexpression_list : rexpression_list comma error",
+ "opt_expression_list : /* empty */",
+ "opt_expression_list : expression_list",
+ "expression_list : exp",
+ "expression_list : expression_list comma exp",
+ "expression_list : error",
+ "expression_list : expression_list error",
+ "expression_list : expression_list error exp",
+ "expression_list : expression_list comma error",
+ "exp : variable ASSIGNOP",
+ "exp : variable ASSIGNOP exp",
+ "exp : '(' expression_list r_paren LEX_IN NAME",
+ "exp : exp '|' LEX_GETLINE opt_variable",
+ "exp : LEX_GETLINE opt_variable input_redir",
+ "exp : exp LEX_AND exp",
+ "exp : exp LEX_OR exp",
+ "exp : exp MATCHOP exp",
+ "exp : regexp",
+ "exp : '!' regexp",
+ "exp : exp LEX_IN NAME",
+ "exp : exp RELOP exp",
+ "exp : exp '<' exp",
+ "exp : exp '>' exp",
+ "exp : exp '?' exp ':' exp",
+ "exp : simp_exp",
+ "exp : exp exp",
+ "rexp : variable ASSIGNOP",
+ "rexp : variable ASSIGNOP rexp",
+ "rexp : rexp LEX_AND rexp",
+ "rexp : rexp LEX_OR rexp",
+ "rexp : LEX_GETLINE opt_variable input_redir",
+ "rexp : regexp",
+ "rexp : '!' regexp",
+ "rexp : rexp MATCHOP rexp",
+ "rexp : rexp LEX_IN NAME",
+ "rexp : rexp RELOP rexp",
+ "rexp : rexp '?' rexp ':' rexp",
+ "rexp : simp_exp",
+ "rexp : rexp rexp",
+ "simp_exp : non_post_simp_exp",
+ "simp_exp : post_inc_dec_exp",
+ "simp_exp : simp_exp '^' simp_exp",
+ "simp_exp : simp_exp '*' simp_exp",
+ "simp_exp : simp_exp '/' simp_exp",
+ "simp_exp : simp_exp '%' simp_exp",
+ "simp_exp : simp_exp '+' simp_exp",
+ "simp_exp : simp_exp '-' simp_exp",
+ "non_post_simp_exp : '!' simp_exp",
+ "non_post_simp_exp : '(' exp r_paren",
+ "non_post_simp_exp : LEX_BUILTIN '(' opt_expression_list r_paren",
+ "non_post_simp_exp : LEX_LENGTH '(' opt_expression_list r_paren",
+ "non_post_simp_exp : LEX_LENGTH",
+ "non_post_simp_exp : FUNC_CALL '(' opt_expression_list r_paren",
+ "non_post_simp_exp : INCREMENT variable",
+ "non_post_simp_exp : DECREMENT variable",
+ "non_post_simp_exp : YNUMBER",
+ "non_post_simp_exp : YSTRING",
+ "non_post_simp_exp : '-' simp_exp",
+ "non_post_simp_exp : '+' simp_exp",
+ "post_inc_dec_exp : variable INCREMENT",
+ "post_inc_dec_exp : variable DECREMENT",
+ "post_inc_dec_exp : variable",
+ "opt_variable : /* empty */",
+ "opt_variable : variable",
+ "variable : NAME",
+ "variable : NAME '[' expression_list ']'",
+ "variable : '$' non_post_simp_exp",
+ "variable : '$' variable",
+ "l_brace : '{' opt_nls",
+ "r_brace : '}' opt_nls",
+ "r_paren : ')'",
+ "opt_semi : /* empty */",
+ "opt_semi : semi",
+ "semi : ';'",
+ "comma : ',' opt_nls",
+};
+#endif /* YYDEBUG */
+#line 1 "/usr/lib/yaccpar"
+/* @(#)yaccpar 1.10 89/04/04 SMI; from S5R3 1.10 */
+
+/*
+** Skeleton parser driver for yacc output
+*/
+
+/*
+** yacc user known macros and defines
+*/
+#define YYERROR goto yyerrlab
+#define YYACCEPT { free(yys); free(yyv); return(0); }
+#define YYABORT { free(yys); free(yyv); return(1); }
+#define YYBACKUP( newtoken, newvalue )\
+{\
+ if ( yychar >= 0 || ( yyr2[ yytmp ] >> 1 ) != 1 )\
+ {\
+ yyerror( "syntax error - cannot backup" );\
+ goto yyerrlab;\
+ }\
+ yychar = newtoken;\
+ yystate = *yyps;\
+ yylval = newvalue;\
+ goto yynewstate;\
+}
+#define YYRECOVERING() (!!yyerrflag)
+#ifndef YYDEBUG
+# define YYDEBUG 1 /* make debugging available */
+#endif
+
+/*
+** user known globals
+*/
+int yydebug; /* set to 1 to get debugging */
+
+/*
+** driver internal defines
+*/
+#define YYFLAG (-1000)
+
+/*
+** static variables used by the parser
+*/
+static YYSTYPE *yyv; /* value stack */
+static int *yys; /* state stack */
+
+static YYSTYPE *yypv; /* top of value stack */
+static int *yyps; /* top of state stack */
+
+static int yystate; /* current state */
+static int yytmp; /* extra var (lasts between blocks) */
+
+int yynerrs; /* number of errors */
+
+int yyerrflag; /* error recovery flag */
+int yychar; /* current input token number */
+
+
+/*
+** yyparse - return 0 if worked, 1 if syntax error not recovered from
+*/
+int
+yyparse()
+{
+ register YYSTYPE *yypvt; /* top of value stack for $vars */
+ unsigned yymaxdepth = YYMAXDEPTH;
+
+ /*
+ ** Initialize externals - yyparse may be called more than once
+ */
+ yyv = (YYSTYPE*)malloc(yymaxdepth*sizeof(YYSTYPE));
+ yys = (int*)malloc(yymaxdepth*sizeof(int));
+ if (!yyv || !yys)
+ {
+ yyerror( "out of memory" );
+ return(1);
+ }
+ yypv = &yyv[-1];
+ yyps = &yys[-1];
+ yystate = 0;
+ yytmp = 0;
+ yynerrs = 0;
+ yyerrflag = 0;
+ yychar = -1;
+
+ goto yystack;
+ {
+ register YYSTYPE *yy_pv; /* top of value stack */
+ register int *yy_ps; /* top of state stack */
+ register int yy_state; /* current state */
+ register int yy_n; /* internal state number info */
+
+ /*
+ ** get globals into registers.
+ ** branch to here only if YYBACKUP was called.
+ */
+ yynewstate:
+ yy_pv = yypv;
+ yy_ps = yyps;
+ yy_state = yystate;
+ goto yy_newstate;
+
+ /*
+ ** get globals into registers.
+ ** either we just started, or we just finished a reduction
+ */
+ yystack:
+ yy_pv = yypv;
+ yy_ps = yyps;
+ yy_state = yystate;
+
+ /*
+ ** top of for (;;) loop while no reductions done
+ */
+ yy_stack:
+ /*
+ ** put a state and value onto the stacks
+ */
+#if YYDEBUG
+ /*
+ ** if debugging, look up token value in list of value vs.
+ ** name pairs. 0 and negative (-1) are special values.
+ ** Note: linear search is used since time is not a real
+ ** consideration while debugging.
+ */
+ if ( yydebug )
+ {
+ register int yy_i;
+
+ (void)printf( "State %d, token ", yy_state );
+ if ( yychar == 0 )
+ (void)printf( "end-of-file\n" );
+ else if ( yychar < 0 )
+ (void)printf( "-none-\n" );
+ else
+ {
+ for ( yy_i = 0; yytoks[yy_i].t_val >= 0;
+ yy_i++ )
+ {
+ if ( yytoks[yy_i].t_val == yychar )
+ break;
+ }
+ (void)printf( "%s\n", yytoks[yy_i].t_name );
+ }
+ }
+#endif /* YYDEBUG */
+ if ( ++yy_ps >= &yys[ yymaxdepth ] ) /* room on stack? */
+ {
+ /*
+ ** reallocate and recover. Note that pointers
+ ** have to be reset, or bad things will happen
+ */
+ int yyps_index = (yy_ps - yys);
+ int yypv_index = (yy_pv - yyv);
+ int yypvt_index = (yypvt - yyv);
+ yymaxdepth += YYMAXDEPTH;
+ yyv = (YYSTYPE*)realloc((char*)yyv,
+ yymaxdepth * sizeof(YYSTYPE));
+ yys = (int*)realloc((char*)yys,
+ yymaxdepth * sizeof(int));
+ if (!yyv || !yys)
+ {
+ yyerror( "yacc stack overflow" );
+ return(1);
+ }
+ yy_ps = yys + yyps_index;
+ yy_pv = yyv + yypv_index;
+ yypvt = yyv + yypvt_index;
+ }
+ *yy_ps = yy_state;
+ *++yy_pv = yyval;
+
+ /*
+ ** we have a new state - find out what to do
+ */
+ yy_newstate:
+ if ( ( yy_n = yypact[ yy_state ] ) <= YYFLAG )
+ goto yydefault; /* simple state */
+#if YYDEBUG
+ /*
+ ** if debugging, need to mark whether new token grabbed
+ */
+ yytmp = yychar < 0;
+#endif
+ if ( ( yychar < 0 ) && ( ( yychar = yylex() ) < 0 ) )
+ yychar = 0; /* reached EOF */
+#if YYDEBUG
+ if ( yydebug && yytmp )
+ {
+ register int yy_i;
+
+ (void)printf( "Received token " );
+ if ( yychar == 0 )
+ (void)printf( "end-of-file\n" );
+ else if ( yychar < 0 )
+ (void)printf( "-none-\n" );
+ else
+ {
+ for ( yy_i = 0; yytoks[yy_i].t_val >= 0;
+ yy_i++ )
+ {
+ if ( yytoks[yy_i].t_val == yychar )
+ break;
+ }
+ (void)printf( "%s\n", yytoks[yy_i].t_name );
+ }
+ }
+#endif /* YYDEBUG */
+ if ( ( ( yy_n += yychar ) < 0 ) || ( yy_n >= YYLAST ) )
+ goto yydefault;
+ if ( yychk[ yy_n = yyact[ yy_n ] ] == yychar ) /*valid shift*/
+ {
+ yychar = -1;
+ yyval = yylval;
+ yy_state = yy_n;
+ if ( yyerrflag > 0 )
+ yyerrflag--;
+ goto yy_stack;
+ }
+
+ yydefault:
+ if ( ( yy_n = yydef[ yy_state ] ) == -2 )
+ {
+#if YYDEBUG
+ yytmp = yychar < 0;
+#endif
+ if ( ( yychar < 0 ) && ( ( yychar = yylex() ) < 0 ) )
+ yychar = 0; /* reached EOF */
+#if YYDEBUG
+ if ( yydebug && yytmp )
+ {
+ register int yy_i;
+
+ (void)printf( "Received token " );
+ if ( yychar == 0 )
+ (void)printf( "end-of-file\n" );
+ else if ( yychar < 0 )
+ (void)printf( "-none-\n" );
+ else
+ {
+ for ( yy_i = 0;
+ yytoks[yy_i].t_val >= 0;
+ yy_i++ )
+ {
+ if ( yytoks[yy_i].t_val
+ == yychar )
+ {
+ break;
+ }
+ }
+ (void)printf( "%s\n", yytoks[yy_i].t_name );
+ }
+ }
+#endif /* YYDEBUG */
+ /*
+ ** look through exception table
+ */
+ {
+ register int *yyxi = yyexca;
+
+ while ( ( *yyxi != -1 ) ||
+ ( yyxi[1] != yy_state ) )
+ {
+ yyxi += 2;
+ }
+ while ( ( *(yyxi += 2) >= 0 ) &&
+ ( *yyxi != yychar ) )
+ ;
+ if ( ( yy_n = yyxi[1] ) < 0 )
+ YYACCEPT;
+ }
+ }
+
+ /*
+ ** check for syntax error
+ */
+ if ( yy_n == 0 ) /* have an error */
+ {
+ /* no worry about speed here! */
+ switch ( yyerrflag )
+ {
+ case 0: /* new error */
+ yyerror( "syntax error" );
+ goto skip_init;
+ yyerrlab:
+ /*
+ ** get globals into registers.
+ ** we have a user generated syntax type error
+ */
+ yy_pv = yypv;
+ yy_ps = yyps;
+ yy_state = yystate;
+ yynerrs++;
+ skip_init:
+ case 1:
+ case 2: /* incompletely recovered error */
+ /* try again... */
+ yyerrflag = 3;
+ /*
+ ** find state where "error" is a legal
+ ** shift action
+ */
+ while ( yy_ps >= yys )
+ {
+ yy_n = yypact[ *yy_ps ] + YYERRCODE;
+ if ( yy_n >= 0 && yy_n < YYLAST &&
+ yychk[yyact[yy_n]] == YYERRCODE) {
+ /*
+ ** simulate shift of "error"
+ */
+ yy_state = yyact[ yy_n ];
+ goto yy_stack;
+ }
+ /*
+ ** current state has no shift on
+ ** "error", pop stack
+ */
+#if YYDEBUG
+# define _POP_ "Error recovery pops state %d, uncovers state %d\n"
+ if ( yydebug )
+ (void)printf( _POP_, *yy_ps,
+ yy_ps[-1] );
+# undef _POP_
+#endif
+ yy_ps--;
+ yy_pv--;
+ }
+ /*
+ ** there is no state on stack with "error" as
+ ** a valid shift. give up.
+ */
+ YYABORT;
+ case 3: /* no shift yet; eat a token */
+#if YYDEBUG
+ /*
+ ** if debugging, look up token in list of
+ ** pairs. 0 and negative shouldn't occur,
+ ** but since timing doesn't matter when
+ ** debugging, it doesn't hurt to leave the
+ ** tests here.
+ */
+ if ( yydebug )
+ {
+ register int yy_i;
+
+ (void)printf( "Error recovery discards " );
+ if ( yychar == 0 )
+ (void)printf( "token end-of-file\n" );
+ else if ( yychar < 0 )
+ (void)printf( "token -none-\n" );
+ else
+ {
+ for ( yy_i = 0;
+ yytoks[yy_i].t_val >= 0;
+ yy_i++ )
+ {
+ if ( yytoks[yy_i].t_val
+ == yychar )
+ {
+ break;
+ }
+ }
+ (void)printf( "token %s\n",
+ yytoks[yy_i].t_name );
+ }
+ }
+#endif /* YYDEBUG */
+ if ( yychar == 0 ) /* reached EOF. quit */
+ YYABORT;
+ yychar = -1;
+ goto yy_newstate;
+ }
+ }/* end if ( yy_n == 0 ) */
+ /*
+ ** reduction by production yy_n
+ ** put stack tops, etc. so things right after switch
+ */
+#if YYDEBUG
+ /*
+ ** if debugging, print the string that is the user's
+ ** specification of the reduction which is just about
+ ** to be done.
+ */
+ if ( yydebug )
+ (void)printf( "Reduce by (%d) \"%s\"\n",
+ yy_n, yyreds[ yy_n ] );
+#endif
+ yytmp = yy_n; /* value to switch over */
+ yypvt = yy_pv; /* $vars top of value stack */
+ /*
+ ** Look in goto table for next state
+ ** Sorry about using yy_state here as temporary
+ ** register variable, but why not, if it works...
+ ** If yyr2[ yy_n ] doesn't have the low order bit
+ ** set, then there is no action to be done for
+ ** this reduction. So, no saving & unsaving of
+ ** registers done. The only difference between the
+ ** code just after the if and the body of the if is
+ ** the goto yy_stack in the body. This way the test
+ ** can be made before the choice of what to do is needed.
+ */
+ {
+ /* length of production doubled with extra bit */
+ register int yy_len = yyr2[ yy_n ];
+
+ if ( !( yy_len & 01 ) )
+ {
+ yy_len >>= 1;
+ yyval = ( yy_pv -= yy_len )[1]; /* $$ = $1 */
+ yy_state = yypgo[ yy_n = yyr1[ yy_n ] ] +
+ *( yy_ps -= yy_len ) + 1;
+ if ( yy_state >= YYLAST ||
+ yychk[ yy_state =
+ yyact[ yy_state ] ] != -yy_n )
+ {
+ yy_state = yyact[ yypgo[ yy_n ] ];
+ }
+ goto yy_stack;
+ }
+ yy_len >>= 1;
+ yyval = ( yy_pv -= yy_len )[1]; /* $$ = $1 */
+ yy_state = yypgo[ yy_n = yyr1[ yy_n ] ] +
+ *( yy_ps -= yy_len ) + 1;
+ if ( yy_state >= YYLAST ||
+ yychk[ yy_state = yyact[ yy_state ] ] != -yy_n )
+ {
+ yy_state = yyact[ yypgo[ yy_n ] ];
+ }
+ }
+ /* save until reenter driver code */
+ yystate = yy_state;
+ yyps = yy_ps;
+ yypv = yy_pv;
+ }
+ /*
+ ** code supplied by user is placed in this switch
+ */
+ switch( yytmp )
+ {
+
+case 1:
+# line 135 "awk.y"
+{ expression_value = yypvt[-1].nodeval; } break;
+case 2:
+# line 140 "awk.y"
+{
+ if (yypvt[-0].nodeval != NULL)
+ yyval.nodeval = yypvt[-0].nodeval;
+ else
+ yyval.nodeval = NULL;
+ yyerrok;
+ } break;
+case 3:
+# line 149 "awk.y"
+{
+ if (yypvt[-0].nodeval == NULL)
+ yyval.nodeval = yypvt[-1].nodeval;
+ else if (yypvt[-1].nodeval == NULL)
+ yyval.nodeval = yypvt[-0].nodeval;
+ else {
+ if (yypvt[-1].nodeval->type != Node_rule_list)
+ yypvt[-1].nodeval = node(yypvt[-1].nodeval, Node_rule_list,
+ (NODE*)NULL);
+ yyval.nodeval = append_right (yypvt[-1].nodeval,
+ node(yypvt[-0].nodeval, Node_rule_list,(NODE *) NULL));
+ }
+ yyerrok;
+ } break;
+case 4:
+# line 163 "awk.y"
+{ yyval.nodeval = NULL; } break;
+case 5:
+# line 164 "awk.y"
+{ yyval.nodeval = NULL; } break;
+case 6:
+# line 168 "awk.y"
+{ io_allowed = 0; } break;
+case 7:
+# line 170 "awk.y"
+{
+ if (begin_block) {
+ if (begin_block->type != Node_rule_list)
+ begin_block = node(begin_block, Node_rule_list,
+ (NODE *)NULL);
+ (void) append_right (begin_block, node(
+ node((NODE *)NULL, Node_rule_node, yypvt[-0].nodeval),
+ Node_rule_list, (NODE *)NULL) );
+ } else
+ begin_block = node((NODE *)NULL, Node_rule_node, yypvt[-0].nodeval);
+ yyval.nodeval = NULL;
+ io_allowed = 1;
+ yyerrok;
+ } break;
+case 8:
+# line 184 "awk.y"
+{ io_allowed = 0; } break;
+case 9:
+# line 186 "awk.y"
+{
+ if (end_block) {
+ if (end_block->type != Node_rule_list)
+ end_block = node(end_block, Node_rule_list,
+ (NODE *)NULL);
+ (void) append_right (end_block, node(
+ node((NODE *)NULL, Node_rule_node, yypvt[-0].nodeval),
+ Node_rule_list, (NODE *)NULL));
+ } else
+ end_block = node((NODE *)NULL, Node_rule_node, yypvt[-0].nodeval);
+ yyval.nodeval = NULL;
+ io_allowed = 1;
+ yyerrok;
+ } break;
+case 10:
+# line 201 "awk.y"
+{
+ warning("BEGIN blocks must have an action part");
+ errcount++;
+ yyerrok;
+ } break;
+case 11:
+# line 207 "awk.y"
+{
+ warning("END blocks must have an action part");
+ errcount++;
+ yyerrok;
+ } break;
+case 12:
+# line 213 "awk.y"
+{ yyval.nodeval = node (yypvt[-1].nodeval, Node_rule_node, yypvt[-0].nodeval); yyerrok; } break;
+case 13:
+# line 215 "awk.y"
+{ yyval.nodeval = node ((NODE *)NULL, Node_rule_node, yypvt[-0].nodeval); yyerrok; } break;
+case 14:
+# line 217 "awk.y"
+{
+ yyval.nodeval = node (yypvt[-1].nodeval,
+ Node_rule_node,
+ node(node(node(make_number(0.0),
+ Node_field_spec,
+ (NODE *) NULL),
+ Node_expression_list,
+ (NODE *) NULL),
+ Node_K_print,
+ (NODE *) NULL));
+ yyerrok;
+ } break;
+case 15:
+# line 230 "awk.y"
+{
+ func_install(yypvt[-1].nodeval, yypvt[-0].nodeval);
+ yyval.nodeval = NULL;
+ yyerrok;
+ } break;
+case 16:
+# line 239 "awk.y"
+{ yyval.sval = yypvt[-0].sval; } break;
+case 17:
+# line 241 "awk.y"
+{ yyval.sval = yypvt[-0].sval; } break;
+case 18:
+# line 246 "awk.y"
+{
+ param_counter = 0;
+ } break;
+case 19:
+# line 250 "awk.y"
+{
+ yyval.nodeval = append_right(make_param(yypvt[-4].sval), yypvt[-2].nodeval);
+ can_return = 1;
+ } break;
+case 20:
+# line 258 "awk.y"
+{
+ yyval.nodeval = yypvt[-2].nodeval;
+ can_return = 0;
+ } break;
+case 21:
+# line 267 "awk.y"
+{ yyval.nodeval = yypvt[-0].nodeval; } break;
+case 22:
+# line 269 "awk.y"
+{ yyval.nodeval = mkrangenode ( node(yypvt[-2].nodeval, Node_cond_pair, yypvt[-0].nodeval) ); } break;
+case 23:
+# line 278 "awk.y"
+{ ++want_regexp; } break;
+case 24:
+# line 280 "awk.y"
+{
+ NODE *n;
+
+ getnode(n);
+ n->type = Node_regex;
+ n->re_exp = make_string(yypvt[-1].sval, strlen(yypvt[-1].sval));
+ n->re_reg = mk_re_parse(yypvt[-1].sval, 0);
+ n->re_text = NULL;
+ n->re_flags = CONST;
+ n->re_cnt = 1;
+ yyval.nodeval = n;
+ } break;
+case 25:
+# line 296 "awk.y"
+{ yyval.nodeval = yypvt[-2].nodeval ; } break;
+case 26:
+# line 298 "awk.y"
+{ yyval.nodeval = NULL; } break;
+case 27:
+# line 303 "awk.y"
+{ yyval.nodeval = yypvt[-0].nodeval; } break;
+case 28:
+# line 305 "awk.y"
+{
+ if (yypvt[-1].nodeval == NULL || yypvt[-1].nodeval->type != Node_statement_list)
+ yypvt[-1].nodeval = node(yypvt[-1].nodeval, Node_statement_list,(NODE *)NULL);
+ yyval.nodeval = append_right(yypvt[-1].nodeval,
+ node( yypvt[-0].nodeval, Node_statement_list, (NODE *)NULL));
+ yyerrok;
+ } break;
+case 29:
+# line 313 "awk.y"
+{ yyval.nodeval = NULL; } break;
+case 30:
+# line 315 "awk.y"
+{ yyval.nodeval = NULL; } break;
+case 33:
+# line 325 "awk.y"
+{ yyval.nodeval = NULL; } break;
+case 34:
+# line 327 "awk.y"
+{ yyval.nodeval = NULL; } break;
+case 35:
+# line 329 "awk.y"
+{ yyval.nodeval = yypvt[-1].nodeval; } break;
+case 36:
+# line 331 "awk.y"
+{ yyval.nodeval = yypvt[-0].nodeval; } break;
+case 37:
+# line 333 "awk.y"
+{ yyval.nodeval = node (yypvt[-3].nodeval, Node_K_while, yypvt[-0].nodeval); } break;
+case 38:
+# line 335 "awk.y"
+{ yyval.nodeval = node (yypvt[-2].nodeval, Node_K_do, yypvt[-5].nodeval); } break;
+case 39:
+# line 337 "awk.y"
+{
+ yyval.nodeval = node (yypvt[-0].nodeval, Node_K_arrayfor, make_for_loop(variable(yypvt[-5].sval,1),
+ (NODE *)NULL, variable(yypvt[-3].sval,1)));
+ } break;
+case 40:
+# line 342 "awk.y"
+{
+ yyval.nodeval = node(yypvt[-0].nodeval, Node_K_for, (NODE *)make_for_loop(yypvt[-7].nodeval, yypvt[-5].nodeval, yypvt[-3].nodeval));
+ } break;
+case 41:
+# line 346 "awk.y"
+{
+ yyval.nodeval = node (yypvt[-0].nodeval, Node_K_for,
+ (NODE *)make_for_loop(yypvt[-6].nodeval, (NODE *)NULL, yypvt[-3].nodeval));
+ } break;
+case 42:
+# line 352 "awk.y"
+{ yyval.nodeval = node ((NODE *)NULL, Node_K_break, (NODE *)NULL); } break;
+case 43:
+# line 355 "awk.y"
+{ yyval.nodeval = node ((NODE *)NULL, Node_K_continue, (NODE *)NULL); } break;
+case 44:
+# line 357 "awk.y"
+{ yyval.nodeval = node (yypvt[-3].nodeval, yypvt[-5].nodetypeval, yypvt[-1].nodeval); } break;
+case 45:
+# line 359 "awk.y"
+{
+ if (yypvt[-3].nodetypeval == Node_K_print && yypvt[-2].nodeval == NULL)
+ yypvt[-2].nodeval = node(node(make_number(0.0),
+ Node_field_spec,
+ (NODE *) NULL),
+ Node_expression_list,
+ (NODE *) NULL);
+
+ yyval.nodeval = node (yypvt[-2].nodeval, yypvt[-3].nodetypeval, yypvt[-1].nodeval);
+ } break;
+case 46:
+# line 370 "awk.y"
+{ if (! io_allowed) yyerror("next used in BEGIN or END action"); } break;
+case 47:
+# line 372 "awk.y"
+{ yyval.nodeval = node ((NODE *)NULL, Node_K_next, (NODE *)NULL); } break;
+case 48:
+# line 374 "awk.y"
+{ yyval.nodeval = node (yypvt[-1].nodeval, Node_K_exit, (NODE *)NULL); } break;
+case 49:
+# line 376 "awk.y"
+{ if (! can_return) yyerror("return used outside function context"); } break;
+case 50:
+# line 378 "awk.y"
+{ yyval.nodeval = node (yypvt[-1].nodeval, Node_K_return, (NODE *)NULL); } break;
+case 51:
+# line 380 "awk.y"
+{ yyval.nodeval = node (variable(yypvt[-4].sval,1), Node_K_delete, yypvt[-2].nodeval); } break;
+case 52:
+# line 382 "awk.y"
+{ yyval.nodeval = yypvt[-1].nodeval; } break;
+case 53:
+# line 387 "awk.y"
+{ yyval.nodetypeval = yypvt[-0].nodetypeval; } break;
+case 54:
+# line 389 "awk.y"
+{ yyval.nodetypeval = yypvt[-0].nodetypeval; } break;
+case 55:
+# line 394 "awk.y"
+{
+ yyval.nodeval = node(yypvt[-3].nodeval, Node_K_if,
+ node(yypvt[-0].nodeval, Node_if_branches, (NODE *)NULL));
+ } break;
+case 56:
+# line 400 "awk.y"
+{ yyval.nodeval = node (yypvt[-6].nodeval, Node_K_if,
+ node (yypvt[-3].nodeval, Node_if_branches, yypvt[-0].nodeval)); } break;
+case 57:
+# line 406 "awk.y"
+{ want_assign = 0; } break;
+case 61:
+# line 417 "awk.y"
+{ yyval.nodeval = NULL; } break;
+case 62:
+# line 419 "awk.y"
+{ yyval.nodeval = node (yypvt[-0].nodeval, Node_redirect_input, (NODE *)NULL); } break;
+case 63:
+# line 424 "awk.y"
+{ yyval.nodeval = NULL; } break;
+case 64:
+# line 426 "awk.y"
+{ yyval.nodeval = node (yypvt[-0].nodeval, Node_redirect_output, (NODE *)NULL); } break;
+case 65:
+# line 428 "awk.y"
+{ yyval.nodeval = node (yypvt[-0].nodeval, Node_redirect_append, (NODE *)NULL); } break;
+case 66:
+# line 430 "awk.y"
+{ yyval.nodeval = node (yypvt[-0].nodeval, Node_redirect_pipe, (NODE *)NULL); } break;
+case 67:
+# line 435 "awk.y"
+{ yyval.nodeval = NULL; } break;
+case 68:
+# line 437 "awk.y"
+{ yyval.nodeval = yypvt[-0].nodeval; } break;
+case 69:
+# line 442 "awk.y"
+{ yyval.nodeval = make_param(yypvt[-0].sval); } break;
+case 70:
+# line 444 "awk.y"
+{ yyval.nodeval = append_right(yypvt[-2].nodeval, make_param(yypvt[-0].sval)); yyerrok; } break;
+case 71:
+# line 446 "awk.y"
+{ yyval.nodeval = NULL; } break;
+case 72:
+# line 448 "awk.y"
+{ yyval.nodeval = NULL; } break;
+case 73:
+# line 450 "awk.y"
+{ yyval.nodeval = NULL; } break;
+case 74:
+# line 456 "awk.y"
+{ yyval.nodeval = NULL; } break;
+case 75:
+# line 458 "awk.y"
+{ yyval.nodeval = yypvt[-0].nodeval; } break;
+case 76:
+# line 463 "awk.y"
+{ yyval.nodeval = NULL; } break;
+case 77:
+# line 465 "awk.y"
+{ yyval.nodeval = yypvt[-0].nodeval; } break;
+case 78:
+# line 470 "awk.y"
+{ yyval.nodeval = node (yypvt[-0].nodeval, Node_expression_list, (NODE *)NULL); } break;
+case 79:
+# line 472 "awk.y"
+{
+ yyval.nodeval = append_right(yypvt[-2].nodeval,
+ node( yypvt[-0].nodeval, Node_expression_list, (NODE *)NULL));
+ yyerrok;
+ } break;
+case 80:
+# line 478 "awk.y"
+{ yyval.nodeval = NULL; } break;
+case 81:
+# line 480 "awk.y"
+{ yyval.nodeval = NULL; } break;
+case 82:
+# line 482 "awk.y"
+{ yyval.nodeval = NULL; } break;
+case 83:
+# line 484 "awk.y"
+{ yyval.nodeval = NULL; } break;
+case 84:
+# line 489 "awk.y"
+{ yyval.nodeval = NULL; } break;
+case 85:
+# line 491 "awk.y"
+{ yyval.nodeval = yypvt[-0].nodeval; } break;
+case 86:
+# line 496 "awk.y"
+{ yyval.nodeval = node (yypvt[-0].nodeval, Node_expression_list, (NODE *)NULL); } break;
+case 87:
+# line 498 "awk.y"
+{
+ yyval.nodeval = append_right(yypvt[-2].nodeval,
+ node( yypvt[-0].nodeval, Node_expression_list, (NODE *)NULL));
+ yyerrok;
+ } break;
+case 88:
+# line 504 "awk.y"
+{ yyval.nodeval = NULL; } break;
+case 89:
+# line 506 "awk.y"
+{ yyval.nodeval = NULL; } break;
+case 90:
+# line 508 "awk.y"
+{ yyval.nodeval = NULL; } break;
+case 91:
+# line 510 "awk.y"
+{ yyval.nodeval = NULL; } break;
+case 92:
+# line 515 "awk.y"
+{ want_assign = 0; } break;
+case 93:
+# line 517 "awk.y"
+{ yyval.nodeval = node (yypvt[-3].nodeval, yypvt[-2].nodetypeval, yypvt[-0].nodeval); } break;
+case 94:
+# line 519 "awk.y"
+{ yyval.nodeval = node (variable(yypvt[-0].sval,1), Node_in_array, yypvt[-3].nodeval); } break;
+case 95:
+# line 521 "awk.y"
+{
+ yyval.nodeval = node (yypvt[-0].nodeval, Node_K_getline,
+ node (yypvt[-3].nodeval, Node_redirect_pipein, (NODE *)NULL));
+ } break;
+case 96:
+# line 526 "awk.y"
+{
+ if (do_lint && ! io_allowed && yypvt[-0].nodeval == NULL)
+ warning("non-redirected getline undefined inside BEGIN or END action");
+ yyval.nodeval = node (yypvt[-1].nodeval, Node_K_getline, yypvt[-0].nodeval);
+ } break;
+case 97:
+# line 532 "awk.y"
+{ yyval.nodeval = node (yypvt[-2].nodeval, Node_and, yypvt[-0].nodeval); } break;
+case 98:
+# line 534 "awk.y"
+{ yyval.nodeval = node (yypvt[-2].nodeval, Node_or, yypvt[-0].nodeval); } break;
+case 99:
+# line 536 "awk.y"
+{
+ if (yypvt[-2].nodeval->type == Node_regex)
+ warning("Regular expression on left of MATCH operator.");
+ yyval.nodeval = node (yypvt[-2].nodeval, yypvt[-1].nodetypeval, mk_rexp(yypvt[-0].nodeval));
+ } break;
+case 100:
+# line 542 "awk.y"
+{ yyval.nodeval = yypvt[-0].nodeval; } break;
+case 101:
+# line 544 "awk.y"
+{
+ yyval.nodeval = node(node(make_number(0.0),
+ Node_field_spec,
+ (NODE *) NULL),
+ Node_nomatch,
+ yypvt[-0].nodeval);
+ } break;
+case 102:
+# line 552 "awk.y"
+{ yyval.nodeval = node (variable(yypvt[-0].sval,1), Node_in_array, yypvt[-2].nodeval); } break;
+case 103:
+# line 554 "awk.y"
+{ yyval.nodeval = node (yypvt[-2].nodeval, yypvt[-1].nodetypeval, yypvt[-0].nodeval); } break;
+case 104:
+# line 556 "awk.y"
+{ yyval.nodeval = node (yypvt[-2].nodeval, Node_less, yypvt[-0].nodeval); } break;
+case 105:
+# line 558 "awk.y"
+{ yyval.nodeval = node (yypvt[-2].nodeval, Node_greater, yypvt[-0].nodeval); } break;
+case 106:
+# line 560 "awk.y"
+{ yyval.nodeval = node(yypvt[-4].nodeval, Node_cond_exp, node(yypvt[-2].nodeval, Node_if_branches, yypvt[-0].nodeval));} break;
+case 107:
+# line 562 "awk.y"
+{ yyval.nodeval = yypvt[-0].nodeval; } break;
+case 108:
+# line 564 "awk.y"
+{ yyval.nodeval = node (yypvt[-1].nodeval, Node_concat, yypvt[-0].nodeval); } break;
+case 109:
+# line 569 "awk.y"
+{ want_assign = 0; } break;
+case 110:
+# line 571 "awk.y"
+{ yyval.nodeval = node (yypvt[-3].nodeval, yypvt[-2].nodetypeval, yypvt[-0].nodeval); } break;
+case 111:
+# line 573 "awk.y"
+{ yyval.nodeval = node (yypvt[-2].nodeval, Node_and, yypvt[-0].nodeval); } break;
+case 112:
+# line 575 "awk.y"
+{ yyval.nodeval = node (yypvt[-2].nodeval, Node_or, yypvt[-0].nodeval); } break;
+case 113:
+# line 577 "awk.y"
+{
+ if (do_lint && ! io_allowed && yypvt[-0].nodeval == NULL)
+ warning("non-redirected getline undefined inside BEGIN or END action");
+ yyval.nodeval = node (yypvt[-1].nodeval, Node_K_getline, yypvt[-0].nodeval);
+ } break;
+case 114:
+# line 583 "awk.y"
+{ yyval.nodeval = yypvt[-0].nodeval; } break;
+case 115:
+# line 585 "awk.y"
+{ yyval.nodeval = node((NODE *) NULL, Node_nomatch, yypvt[-0].nodeval); } break;
+case 116:
+# line 587 "awk.y"
+{ yyval.nodeval = node (yypvt[-2].nodeval, yypvt[-1].nodetypeval, mk_rexp(yypvt[-0].nodeval)); } break;
+case 117:
+# line 589 "awk.y"
+{ yyval.nodeval = node (variable(yypvt[-0].sval,1), Node_in_array, yypvt[-2].nodeval); } break;
+case 118:
+# line 591 "awk.y"
+{ yyval.nodeval = node (yypvt[-2].nodeval, yypvt[-1].nodetypeval, yypvt[-0].nodeval); } break;
+case 119:
+# line 593 "awk.y"
+{ yyval.nodeval = node(yypvt[-4].nodeval, Node_cond_exp, node(yypvt[-2].nodeval, Node_if_branches, yypvt[-0].nodeval));} break;
+case 120:
+# line 595 "awk.y"
+{ yyval.nodeval = yypvt[-0].nodeval; } break;
+case 121:
+# line 597 "awk.y"
+{ yyval.nodeval = node (yypvt[-1].nodeval, Node_concat, yypvt[-0].nodeval); } break;
+case 124:
+# line 605 "awk.y"
+{ yyval.nodeval = node (yypvt[-2].nodeval, Node_exp, yypvt[-0].nodeval); } break;
+case 125:
+# line 607 "awk.y"
+{ yyval.nodeval = node (yypvt[-2].nodeval, Node_times, yypvt[-0].nodeval); } break;
+case 126:
+# line 609 "awk.y"
+{ yyval.nodeval = node (yypvt[-2].nodeval, Node_quotient, yypvt[-0].nodeval); } break;
+case 127:
+# line 611 "awk.y"
+{ yyval.nodeval = node (yypvt[-2].nodeval, Node_mod, yypvt[-0].nodeval); } break;
+case 128:
+# line 613 "awk.y"
+{ yyval.nodeval = node (yypvt[-2].nodeval, Node_plus, yypvt[-0].nodeval); } break;
+case 129:
+# line 615 "awk.y"
+{ yyval.nodeval = node (yypvt[-2].nodeval, Node_minus, yypvt[-0].nodeval); } break;
+case 130:
+# line 620 "awk.y"
+{ yyval.nodeval = node (yypvt[-0].nodeval, Node_not,(NODE *) NULL); } break;
+case 131:
+# line 622 "awk.y"
+{ yyval.nodeval = yypvt[-1].nodeval; } break;
+case 132:
+# line 624 "awk.y"
+{ yyval.nodeval = snode (yypvt[-1].nodeval, Node_builtin, (int) yypvt[-3].lval); } break;
+case 133:
+# line 626 "awk.y"
+{ yyval.nodeval = snode (yypvt[-1].nodeval, Node_builtin, (int) yypvt[-3].lval); } break;
+case 134:
+# line 628 "awk.y"
+{ yyval.nodeval = snode ((NODE *)NULL, Node_builtin, (int) yypvt[-0].lval); } break;
+case 135:
+# line 630 "awk.y"
+{
+ yyval.nodeval = node (yypvt[-1].nodeval, Node_func_call, make_string(yypvt[-3].sval, strlen(yypvt[-3].sval)));
+ } break;
+case 136:
+# line 634 "awk.y"
+{ yyval.nodeval = node (yypvt[-0].nodeval, Node_preincrement, (NODE *)NULL); } break;
+case 137:
+# line 636 "awk.y"
+{ yyval.nodeval = node (yypvt[-0].nodeval, Node_predecrement, (NODE *)NULL); } break;
+case 138:
+# line 638 "awk.y"
+{ yyval.nodeval = yypvt[-0].nodeval; } break;
+case 139:
+# line 640 "awk.y"
+{ yyval.nodeval = yypvt[-0].nodeval; } break;
+case 140:
+# line 643 "awk.y"
+{ if (yypvt[-0].nodeval->type == Node_val) {
+ yypvt[-0].nodeval->numbr = -(force_number(yypvt[-0].nodeval));
+ yyval.nodeval = yypvt[-0].nodeval;
+ } else
+ yyval.nodeval = node (yypvt[-0].nodeval, Node_unary_minus, (NODE *)NULL);
+ } break;
+case 141:
+# line 650 "awk.y"
+{ yyval.nodeval = yypvt[-0].nodeval; } break;
+case 142:
+# line 655 "awk.y"
+{ yyval.nodeval = node (yypvt[-1].nodeval, Node_postincrement, (NODE *)NULL); } break;
+case 143:
+# line 657 "awk.y"
+{ yyval.nodeval = node (yypvt[-1].nodeval, Node_postdecrement, (NODE *)NULL); } break;
+case 145:
+# line 663 "awk.y"
+{ yyval.nodeval = NULL; } break;
+case 146:
+# line 665 "awk.y"
+{ yyval.nodeval = yypvt[-0].nodeval; } break;
+case 147:
+# line 670 "awk.y"
+{ yyval.nodeval = variable(yypvt[-0].sval,1); } break;
+case 148:
+# line 672 "awk.y"
+{
+ if (yypvt[-1].nodeval->rnode == NULL) {
+ yyval.nodeval = node (variable(yypvt[-3].sval,1), Node_subscript, yypvt[-1].nodeval->lnode);
+ freenode(yypvt[-1].nodeval);
+ } else
+ yyval.nodeval = node (variable(yypvt[-3].sval,1), Node_subscript, yypvt[-1].nodeval);
+ } break;
+case 149:
+# line 680 "awk.y"
+{ yyval.nodeval = node (yypvt[-0].nodeval, Node_field_spec, (NODE *)NULL); } break;
+case 150:
+# line 682 "awk.y"
+{ yyval.nodeval = node (yypvt[-0].nodeval, Node_field_spec, (NODE *)NULL); } break;
+case 152:
+# line 690 "awk.y"
+{ yyerrok; } break;
+case 153:
+# line 694 "awk.y"
+{ yyerrok; } break;
+case 156:
+# line 703 "awk.y"
+{ yyerrok; want_assign = 0; } break;
+case 157:
+# line 706 "awk.y"
+{ yyerrok; } break;
+ }
+ goto yystack; /* reset registers in driver code */
+}
diff --git a/awk.y b/awk.y
index bcaf492f..37fa96f5 100644
--- a/awk.y
+++ b/awk.y
@@ -3,7 +3,7 @@
*/
/*
- * Copyright (C) 1986, 1988, 1989 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@@ -28,47 +28,45 @@
#define YYDEBUG 12
#endif
+#define YYMAXDEPTH 300
+
#include "awk.h"
-/*
- * This line is necessary since the Bison parser skeleton uses bcopy.
- * Systems without memcpy should use -DMEMCPY_MISSING, per the Makefile.
- * It should not hurt anything if Yacc is being used instead of Bison.
- */
-#define bcopy(s,d,n) memcpy((d),(s),(n))
-
-extern void msg();
-extern struct re_pattern_buffer *mk_re_parse();
-
-NODE *node();
-NODE *lookup();
-NODE *install();
-
-static NODE *snode();
-static NODE *mkrangenode();
-static FILE *pathopen();
-static NODE *make_for_loop();
-static NODE *append_right();
-static void func_install();
-static NODE *make_param();
-static int hashf();
-static void pop_params();
-static void pop_var();
-static int yylex ();
-static void yyerror();
+static void yyerror (); /* va_alist */
+static char *get_src_buf P((void));
+static int yylex P((void));
+static NODE *node_common P((NODETYPE op));
+static NODE *snode P((NODE *subn, NODETYPE op, int index));
+static NODE *mkrangenode P((NODE *cpair));
+static NODE *make_for_loop P((NODE *init, NODE *cond, NODE *incr));
+static NODE *append_right P((NODE *list, NODE *new));
+static void func_install P((NODE *params, NODE *def));
+static void pop_var P((NODE *np, int freeit));
+static void pop_params P((NODE *params));
+static NODE *make_param P((char *name));
+static NODE *mk_rexp P((NODE *exp));
-static int want_regexp; /* lexical scanning kludge */
static int want_assign; /* lexical scanning kludge */
+static int want_regexp; /* lexical scanning kludge */
static int can_return; /* lexical scanning kludge */
static int io_allowed = 1; /* lexical scanning kludge */
-static int lineno = 1; /* for error msgs */
static char *lexptr; /* pointer to next char during parsing */
+static char *lexend;
static char *lexptr_begin; /* keep track of where we were for error msgs */
-static int curinfile = -1; /* index into sourcefiles[] */
+static char *lexeme; /* beginning of lexeme for debugging */
+static char *thisline = NULL;
+#define YYDEBUG_LEXER_TEXT (lexeme)
static int param_counter;
+static char *tokstart = NULL;
+static char *token = NULL;
+static char *tokend;
NODE *variables[HASHSIZE];
+extern char *source;
+extern int sourceline;
+extern char *cmdline_src;
+extern char **srcfiles;
extern int errcount;
extern NODE *begin_block;
extern NODE *end_block;
@@ -85,6 +83,7 @@ extern NODE *end_block;
%type <nodeval> function_prologue function_body
%type <nodeval> rexp exp start program rule simp_exp
+%type <nodeval> non_post_simp_exp post_inc_dec_exp
%type <nodeval> pattern
%type <nodeval> action variable param_list
%type <nodeval> rexpression_list opt_rexpression_list
@@ -92,12 +91,12 @@ extern NODE *end_block;
%type <nodeval> statements statement if_statement opt_param_list
%type <nodeval> opt_exp opt_variable regexp
%type <nodeval> input_redir output_redir
-%type <nodetypeval> r_paren comma nls opt_nls print
-
+%type <nodetypeval> print
%type <sval> func_name
+
%token <sval> FUNC_CALL NAME REGEXP
%token <lval> ERROR
-%token <nodeval> NUMBER YSTRING
+%token <nodeval> YNUMBER YSTRING
%token <nodetypeval> RELOP APPEND_OP
%token <nodetypeval> ASSIGNOP MATCHOP NEWLINE CONCAT_OP
%token <nodetypeval> LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE
@@ -106,7 +105,7 @@ extern NODE *end_block;
%token <nodetypeval> LEX_GETLINE
%token <nodetypeval> LEX_IN
%token <lval> LEX_AND LEX_OR INCREMENT DECREMENT
-%token <ptrval> LEX_BUILTIN LEX_LENGTH
+%token <lval> LEX_BUILTIN LEX_LENGTH
/* these are just yylval numbers */
@@ -121,7 +120,7 @@ extern NODE *end_block;
%nonassoc MATCHOP
%nonassoc RELOP '<' '>' '|' APPEND_OP
%left CONCAT_OP
-%left YSTRING NUMBER
+%left YSTRING YNUMBER
%left '+' '-'
%left '*' '/' '%'
%right '!' UNARY
@@ -129,7 +128,6 @@ extern NODE *end_block;
%left INCREMENT DECREMENT
%left '$'
%left '(' ')'
-
%%
start
@@ -174,7 +172,7 @@ rule
if (begin_block->type != Node_rule_list)
begin_block = node(begin_block, Node_rule_list,
(NODE *)NULL);
- append_right (begin_block, node(
+ (void) append_right (begin_block, node(
node((NODE *)NULL, Node_rule_node, $3),
Node_rule_list, (NODE *)NULL) );
} else
@@ -190,7 +188,7 @@ rule
if (end_block->type != Node_rule_list)
end_block = node(end_block, Node_rule_list,
(NODE *)NULL);
- append_right (end_block, node(
+ (void) append_right (end_block, node(
node((NODE *)NULL, Node_rule_node, $3),
Node_rule_list, (NODE *)NULL));
} else
@@ -201,13 +199,13 @@ rule
}
| LEX_BEGIN statement_term
{
- msg ("error near line %d: BEGIN blocks must have an action part", lineno);
+ warning("BEGIN blocks must have an action part");
errcount++;
yyerrok;
}
| LEX_END statement_term
{
- msg ("error near line %d: END blocks must have an action part", lineno);
+ warning("END blocks must have an action part");
errcount++;
yyerrok;
}
@@ -216,7 +214,18 @@ rule
| action
{ $$ = node ((NODE *)NULL, Node_rule_node, $1); yyerrok; }
| pattern statement_term
- { if($1) $$ = node ($1, Node_rule_node, (NODE *)NULL); yyerrok; }
+ {
+ $$ = node ($1,
+ Node_rule_node,
+ node(node(node(make_number(0.0),
+ Node_field_spec,
+ (NODE *) NULL),
+ Node_expression_list,
+ (NODE *) NULL),
+ Node_K_print,
+ (NODE *) NULL));
+ yyerrok;
+ }
| function_prologue function_body
{
func_install($1, $2);
@@ -245,7 +254,7 @@ function_prologue
;
function_body
- : l_brace statements r_brace
+ : l_brace statements r_brace opt_semi
{
$$ = $2;
can_return = 0;
@@ -267,24 +276,26 @@ regexp
*/
: '/'
{ ++want_regexp; }
- REGEXP '/'
+ REGEXP '/'
{
- want_regexp = 0;
- $$ = node((NODE *)NULL,Node_regex,(NODE *)mk_re_parse($3, 0));
- $$ -> re_case = 0;
- emalloc ($$ -> re_text, char *, strlen($3)+1, "regexp");
- strcpy ($$ -> re_text, $3);
+ NODE *n;
+
+ getnode(n);
+ n->type = Node_regex;
+ n->re_exp = make_string($3, strlen($3));
+ n->re_reg = mk_re_parse($3, 0);
+ n->re_text = NULL;
+ n->re_flags = CONST;
+ n->re_cnt = 1;
+ $$ = n;
}
;
action
- : l_brace r_brace opt_semi
- {
- /* empty actions are different from missing actions */
- $$ = node ((NODE *) NULL, Node_illegal, (NODE *) NULL);
- }
- | l_brace statements r_brace opt_semi
+ : l_brace statements r_brace opt_semi
{ $$ = $2 ; }
+ | l_brace r_brace opt_semi
+ { $$ = NULL; }
;
statements
@@ -306,12 +317,9 @@ statements
statement_term
: nls
- { $<nodetypeval>$ = Node_illegal; }
| semi opt_nls
- { $<nodetypeval>$ = Node_illegal; }
;
-
statement
: semi opt_nls
{ $$ = NULL; }
@@ -327,8 +335,8 @@ statement
{ $$ = node ($6, Node_K_do, $3); }
| LEX_FOR '(' NAME LEX_IN NAME r_paren opt_nls statement
{
- $$ = node ($8, Node_K_arrayfor, make_for_loop(variable($3),
- (NODE *)NULL, variable($5)));
+ $$ = node ($8, Node_K_arrayfor, make_for_loop(variable($3,1),
+ (NODE *)NULL, variable($5,1)));
}
| LEX_FOR '(' opt_exp semi exp semi opt_exp r_paren opt_nls statement
{
@@ -348,7 +356,16 @@ statement
| print '(' expression_list r_paren output_redir statement_term
{ $$ = node ($3, $1, $5); }
| print opt_rexpression_list output_redir statement_term
- { $$ = node ($2, $1, $3); }
+ {
+ if ($1 == Node_K_print && $2 == NULL)
+ $2 = node(node(make_number(0.0),
+ Node_field_spec,
+ (NODE *) NULL),
+ Node_expression_list,
+ (NODE *) NULL);
+
+ $$ = node ($2, $1, $3);
+ }
| LEX_NEXT
{ if (! io_allowed) yyerror("next used in BEGIN or END action"); }
statement_term
@@ -360,7 +377,7 @@ statement
opt_exp statement_term
{ $$ = node ($3, Node_K_return, (NODE *)NULL); }
| LEX_DELETE NAME '[' expression_list ']' statement_term
- { $$ = node (variable($2), Node_K_delete, $4); }
+ { $$ = node (variable($2,1), Node_K_delete, $4); }
| exp statement_term
{ $$ = $1; }
;
@@ -386,16 +403,13 @@ if_statement
nls
: NEWLINE
- { $<nodetypeval>$ = NULL; }
+ { want_assign = 0; }
| nls NEWLINE
- { $<nodetypeval>$ = NULL; }
;
opt_nls
: /* empty */
- { $<nodetypeval>$ = NULL; }
| nls
- { $<nodetypeval>$ = NULL; }
;
input_redir
@@ -497,12 +511,12 @@ expression_list
;
/* Expressions, not including the comma operator. */
-exp : variable ASSIGNOP
+exp : variable ASSIGNOP
{ want_assign = 0; }
- exp
+ exp
{ $$ = node ($1, $2, $4); }
| '(' expression_list r_paren LEX_IN NAME
- { $$ = node (variable($5), Node_in_array, $2); }
+ { $$ = node (variable($5,1), Node_in_array, $2); }
| exp '|' LEX_GETLINE opt_variable
{
$$ = node ($4, Node_K_getline,
@@ -510,11 +524,8 @@ exp : variable ASSIGNOP
}
| LEX_GETLINE opt_variable input_redir
{
- /* "too painful to do right" */
- /*
- if (! io_allowed && $3 == NULL)
- yyerror("non-redirected getline illegal inside BEGIN or END action");
- */
+ if (do_lint && ! io_allowed && $3 == NULL)
+ warning("non-redirected getline undefined inside BEGIN or END action");
$$ = node ($2, Node_K_getline, $3);
}
| exp LEX_AND exp
@@ -522,13 +533,23 @@ exp : variable ASSIGNOP
| exp LEX_OR exp
{ $$ = node ($1, Node_or, $3); }
| exp MATCHOP exp
- { $$ = node ($1, $2, $3); }
+ {
+ if ($1->type == Node_regex)
+ warning("Regular expression on left of MATCH operator.");
+ $$ = node ($1, $2, mk_rexp($3));
+ }
| regexp
{ $$ = $1; }
| '!' regexp %prec UNARY
- { $$ = node((NODE *) NULL, Node_nomatch, $2); }
+ {
+ $$ = node(node(make_number(0.0),
+ Node_field_spec,
+ (NODE *) NULL),
+ Node_nomatch,
+ $2);
+ }
| exp LEX_IN NAME
- { $$ = node (variable($3), Node_in_array, $1); }
+ { $$ = node (variable($3,1), Node_in_array, $1); }
| exp RELOP exp
{ $$ = node ($1, $2, $3); }
| exp '<' exp
@@ -544,9 +565,9 @@ exp : variable ASSIGNOP
;
rexp
- : variable ASSIGNOP
+ : variable ASSIGNOP
{ want_assign = 0; }
- rexp
+ rexp
{ $$ = node ($1, $2, $4); }
| rexp LEX_AND rexp
{ $$ = node ($1, Node_and, $3); }
@@ -554,11 +575,8 @@ rexp
{ $$ = node ($1, Node_or, $3); }
| LEX_GETLINE opt_variable input_redir
{
- /* "too painful to do right" */
- /*
- if (! io_allowed && $3 == NULL)
- yyerror("non-redirected getline illegal inside BEGIN or END action");
- */
+ if (do_lint && ! io_allowed && $3 == NULL)
+ warning("non-redirected getline undefined inside BEGIN or END action");
$$ = node ($2, Node_K_getline, $3);
}
| regexp
@@ -566,9 +584,9 @@ rexp
| '!' regexp %prec UNARY
{ $$ = node((NODE *) NULL, Node_nomatch, $2); }
| rexp MATCHOP rexp
- { $$ = node ($1, $2, $3); }
+ { $$ = node ($1, $2, mk_rexp($3)); }
| rexp LEX_IN NAME
- { $$ = node (variable($3), Node_in_array, $1); }
+ { $$ = node (variable($3,1), Node_in_array, $1); }
| rexp RELOP rexp
{ $$ = node ($1, $2, $3); }
| rexp '?' rexp ':' rexp
@@ -580,16 +598,34 @@ rexp
;
simp_exp
+ : non_post_simp_exp
+ | post_inc_dec_exp
+ /* Binary operators in order of decreasing precedence. */
+ | simp_exp '^' simp_exp
+ { $$ = node ($1, Node_exp, $3); }
+ | simp_exp '*' simp_exp
+ { $$ = node ($1, Node_times, $3); }
+ | simp_exp '/' simp_exp
+ { $$ = node ($1, Node_quotient, $3); }
+ | simp_exp '%' simp_exp
+ { $$ = node ($1, Node_mod, $3); }
+ | simp_exp '+' simp_exp
+ { $$ = node ($1, Node_plus, $3); }
+ | simp_exp '-' simp_exp
+ { $$ = node ($1, Node_minus, $3); }
+ ;
+
+non_post_simp_exp
: '!' simp_exp %prec UNARY
{ $$ = node ($2, Node_not,(NODE *) NULL); }
| '(' exp r_paren
{ $$ = $2; }
| LEX_BUILTIN '(' opt_expression_list r_paren
- { $$ = snode ($3, Node_builtin, $1); }
+ { $$ = snode ($3, Node_builtin, (int) $1); }
| LEX_LENGTH '(' opt_expression_list r_paren
- { $$ = snode ($3, Node_builtin, $1); }
+ { $$ = snode ($3, Node_builtin, (int) $1); }
| LEX_LENGTH
- { $$ = snode ((NODE *)NULL, Node_builtin, $1); }
+ { $$ = snode ((NODE *)NULL, Node_builtin, (int) $1); }
| FUNC_CALL '(' opt_expression_list r_paren
{
$$ = node ($3, Node_func_call, make_string($1, strlen($1)));
@@ -598,36 +634,30 @@ simp_exp
{ $$ = node ($2, Node_preincrement, (NODE *)NULL); }
| DECREMENT variable
{ $$ = node ($2, Node_predecrement, (NODE *)NULL); }
- | variable INCREMENT
- { $$ = node ($1, Node_postincrement, (NODE *)NULL); }
- | variable DECREMENT
- { $$ = node ($1, Node_postdecrement, (NODE *)NULL); }
- | variable
- { $$ = $1; }
- | NUMBER
+ | YNUMBER
{ $$ = $1; }
| YSTRING
{ $$ = $1; }
- /* Binary operators in order of decreasing precedence. */
- | simp_exp '^' simp_exp
- { $$ = node ($1, Node_exp, $3); }
- | simp_exp '*' simp_exp
- { $$ = node ($1, Node_times, $3); }
- | simp_exp '/' simp_exp
- { $$ = node ($1, Node_quotient, $3); }
- | simp_exp '%' simp_exp
- { $$ = node ($1, Node_mod, $3); }
- | simp_exp '+' simp_exp
- { $$ = node ($1, Node_plus, $3); }
- | simp_exp '-' simp_exp
- { $$ = node ($1, Node_minus, $3); }
| '-' simp_exp %prec UNARY
- { $$ = node ($2, Node_unary_minus, (NODE *)NULL); }
+ { if ($2->type == Node_val) {
+ $2->numbr = -(force_number($2));
+ $$ = $2;
+ } else
+ $$ = node ($2, Node_unary_minus, (NODE *)NULL);
+ }
| '+' simp_exp %prec UNARY
{ $$ = $2; }
;
+post_inc_dec_exp
+ : variable INCREMENT
+ { $$ = node ($1, Node_postincrement, (NODE *)NULL); }
+ | variable DECREMENT
+ { $$ = node ($1, Node_postdecrement, (NODE *)NULL); }
+ | variable
+ ;
+
opt_variable
: /* empty */
{ $$ = NULL; }
@@ -637,11 +667,19 @@ opt_variable
variable
: NAME
- { want_assign = 1; $$ = variable ($1); }
+ { $$ = variable($1,1); }
| NAME '[' expression_list ']'
- { want_assign = 1; $$ = node (variable($1), Node_subscript, $3); }
- | '$' simp_exp
- { want_assign = 1; $$ = node ($2, Node_field_spec, (NODE *)NULL); }
+ {
+ if ($3->rnode == NULL) {
+ $$ = node (variable($1,1), Node_subscript, $3->lnode);
+ freenode($3);
+ } else
+ $$ = node (variable($1,1), Node_subscript, $3);
+ }
+ | '$' non_post_simp_exp
+ { $$ = node ($2, Node_field_spec, (NODE *)NULL); }
+ | '$' variable
+ { $$ = node ($2, Node_field_spec, (NODE *)NULL); }
;
l_brace
@@ -653,7 +691,7 @@ r_brace
;
r_paren
- : ')' { $<nodetypeval>$ = Node_illegal; yyerrok; }
+ : ')' { yyerrok; }
;
opt_semi
@@ -662,10 +700,10 @@ opt_semi
;
semi
- : ';' { yyerrok; }
+ : ';' { yyerrok; want_assign = 0; }
;
-comma : ',' opt_nls { $<nodetypeval>$ = Node_illegal; yyerrok; }
+comma : ',' opt_nls { yyerrok; }
;
%%
@@ -674,7 +712,13 @@ struct token {
char *operator; /* text to match */
NODETYPE value; /* node type */
int class; /* lexical class */
- short nostrict; /* ignore if in strict compatibility mode */
+ unsigned flags; /* # of args. allowed and compatability */
+# define ARGS 0xFF /* 0, 1, 2, 3 args allowed (any combination */
+# define A(n) (1<<(n))
+# define VERSION 0xFF00 /* old awk is zero */
+# define NOT_OLD 0x0100 /* feature not in old awk */
+# define NOT_POSIX 0x0200 /* feature not in POSIX */
+# define GAWK 0x0400 /* gawk extension */
NODE *(*ptr) (); /* function that implements this keyword */
};
@@ -684,66 +728,55 @@ extern NODE
*do_split(), *do_system(), *do_int(), *do_close(),
*do_atan2(), *do_sin(), *do_cos(), *do_rand(),
*do_srand(), *do_match(), *do_tolower(), *do_toupper(),
- *do_sub(), *do_gsub();
-
-/* Special functions for debugging */
-#ifdef DEBUG
-NODE *do_prvars(), *do_bp();
-#endif
+ *do_sub(), *do_gsub(), *do_strftime(), *do_systime();
/* Tokentab is sorted ascii ascending order, so it can be binary searched. */
static struct token tokentab[] = {
- { "BEGIN", Node_illegal, LEX_BEGIN, 0, 0 },
- { "END", Node_illegal, LEX_END, 0, 0 },
- { "atan2", Node_builtin, LEX_BUILTIN, 0, do_atan2 },
-#ifdef DEBUG
- { "bp", Node_builtin, LEX_BUILTIN, 0, do_bp },
-#endif
- { "break", Node_K_break, LEX_BREAK, 0, 0 },
- { "close", Node_builtin, LEX_BUILTIN, 0, do_close },
- { "continue", Node_K_continue, LEX_CONTINUE, 0, 0 },
- { "cos", Node_builtin, LEX_BUILTIN, 0, do_cos },
- { "delete", Node_K_delete, LEX_DELETE, 0, 0 },
- { "do", Node_K_do, LEX_DO, 0, 0 },
- { "else", Node_illegal, LEX_ELSE, 0, 0 },
- { "exit", Node_K_exit, LEX_EXIT, 0, 0 },
- { "exp", Node_builtin, LEX_BUILTIN, 0, do_exp },
- { "for", Node_K_for, LEX_FOR, 0, 0 },
- { "func", Node_K_function, LEX_FUNCTION, 0, 0 },
- { "function", Node_K_function, LEX_FUNCTION, 0, 0 },
- { "getline", Node_K_getline, LEX_GETLINE, 0, 0 },
- { "gsub", Node_builtin, LEX_BUILTIN, 0, do_gsub },
- { "if", Node_K_if, LEX_IF, 0, 0 },
- { "in", Node_illegal, LEX_IN, 0, 0 },
- { "index", Node_builtin, LEX_BUILTIN, 0, do_index },
- { "int", Node_builtin, LEX_BUILTIN, 0, do_int },
- { "length", Node_builtin, LEX_LENGTH, 0, do_length },
- { "log", Node_builtin, LEX_BUILTIN, 0, do_log },
- { "match", Node_builtin, LEX_BUILTIN, 0, do_match },
- { "next", Node_K_next, LEX_NEXT, 0, 0 },
- { "print", Node_K_print, LEX_PRINT, 0, 0 },
- { "printf", Node_K_printf, LEX_PRINTF, 0, 0 },
-#ifdef DEBUG
- { "prvars", Node_builtin, LEX_BUILTIN, 0, do_prvars },
-#endif
- { "rand", Node_builtin, LEX_BUILTIN, 0, do_rand },
- { "return", Node_K_return, LEX_RETURN, 0, 0 },
- { "sin", Node_builtin, LEX_BUILTIN, 0, do_sin },
- { "split", Node_builtin, LEX_BUILTIN, 0, do_split },
- { "sprintf", Node_builtin, LEX_BUILTIN, 0, do_sprintf },
- { "sqrt", Node_builtin, LEX_BUILTIN, 0, do_sqrt },
- { "srand", Node_builtin, LEX_BUILTIN, 0, do_srand },
- { "sub", Node_builtin, LEX_BUILTIN, 0, do_sub },
- { "substr", Node_builtin, LEX_BUILTIN, 0, do_substr },
- { "system", Node_builtin, LEX_BUILTIN, 0, do_system },
- { "tolower", Node_builtin, LEX_BUILTIN, 0, do_tolower },
- { "toupper", Node_builtin, LEX_BUILTIN, 0, do_toupper },
- { "while", Node_K_while, LEX_WHILE, 0, 0 },
+{"BEGIN", Node_illegal, LEX_BEGIN, 0, 0},
+{"END", Node_illegal, LEX_END, 0, 0},
+{"atan2", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_atan2},
+{"break", Node_K_break, LEX_BREAK, 0, 0},
+{"close", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_close},
+{"continue", Node_K_continue, LEX_CONTINUE, 0, 0},
+{"cos", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_cos},
+{"delete", Node_K_delete, LEX_DELETE, NOT_OLD, 0},
+{"do", Node_K_do, LEX_DO, NOT_OLD, 0},
+{"else", Node_illegal, LEX_ELSE, 0, 0},
+{"exit", Node_K_exit, LEX_EXIT, 0, 0},
+{"exp", Node_builtin, LEX_BUILTIN, A(1), do_exp},
+{"for", Node_K_for, LEX_FOR, 0, 0},
+{"func", Node_K_function, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0},
+{"function", Node_K_function, LEX_FUNCTION, NOT_OLD, 0},
+{"getline", Node_K_getline, LEX_GETLINE, NOT_OLD, 0},
+{"gsub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_gsub},
+{"if", Node_K_if, LEX_IF, 0, 0},
+{"in", Node_illegal, LEX_IN, 0, 0},
+{"index", Node_builtin, LEX_BUILTIN, A(2), do_index},
+{"int", Node_builtin, LEX_BUILTIN, A(1), do_int},
+{"length", Node_builtin, LEX_LENGTH, A(0)|A(1), do_length},
+{"log", Node_builtin, LEX_BUILTIN, A(1), do_log},
+{"match", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_match},
+{"next", Node_K_next, LEX_NEXT, 0, 0},
+{"print", Node_K_print, LEX_PRINT, 0, 0},
+{"printf", Node_K_printf, LEX_PRINTF, 0, 0},
+{"rand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0), do_rand},
+{"return", Node_K_return, LEX_RETURN, NOT_OLD, 0},
+{"sin", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_sin},
+{"split", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_split},
+{"sprintf", Node_builtin, LEX_BUILTIN, 0, do_sprintf},
+{"sqrt", Node_builtin, LEX_BUILTIN, A(1), do_sqrt},
+{"srand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), do_srand},
+{"strftime", Node_builtin, LEX_BUILTIN, GAWK|A(1)|A(2), do_strftime},
+{"sub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_sub},
+{"substr", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_substr},
+{"system", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system},
+{"systime", Node_builtin, LEX_BUILTIN, GAWK|A(0), do_systime},
+{"tolower", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_tolower},
+{"toupper", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_toupper},
+{"while", Node_K_while, LEX_WHILE, 0, 0},
};
-static char *token_start;
-
/* VARARGS0 */
static void
yyerror(va_alist)
@@ -756,193 +789,207 @@ va_dcl
errcount++;
/* Find the current line in the input file */
- if (! lexptr) {
- beg = "(END OF FILE)";
- ptr = beg + 13;
- } else {
- if (*lexptr == '\n' && lexptr != lexptr_begin)
- --lexptr;
- for (beg = lexptr; beg != lexptr_begin && *beg != '\n'; --beg)
- ;
+ if (lexptr) {
+ if (!thisline) {
+ for (beg = lexeme; beg != lexptr_begin && *beg != '\n'; --beg)
+ ;
+ if (*beg == '\n')
+ beg++;
+ thisline = beg;
+ }
/* NL isn't guaranteed */
- for (ptr = lexptr; *ptr && *ptr != '\n'; ptr++)
- ;
- if (beg != lexptr_begin)
- beg++;
+ ptr = lexeme;
+ while (ptr < lexend && *ptr && *ptr != '\n')
+ ptr++;
+ } else {
+ thisline = "(END OF FILE)";
+ ptr = thisline + 13;
+ }
+ msg("syntax error");
+ fprintf(stderr, "%.*s\n", (int) (ptr - thisline), thisline);
+ if (lexptr) {
+ scan = thisline;
+ while (scan < lexeme)
+ if (*scan++ == '\t')
+ putc('\t', stderr);
+ else
+ putc(' ', stderr);
+ putc('^', stderr);
+ putc(' ', stderr);
}
- msg("syntax error near line %d:\n%.*s", lineno, ptr - beg, beg);
- scan = beg;
- while (scan < token_start)
- if (*scan++ == '\t')
- putc('\t', stderr);
- else
- putc(' ', stderr);
- putc('^', stderr);
- putc(' ', stderr);
va_start(args);
mesg = va_arg(args, char *);
vfprintf(stderr, mesg, args);
va_end(args);
putc('\n', stderr);
- exit(1);
+ exit(2);
}
-/*
- * Parse a C escape sequence. STRING_PTR points to a variable containing a
- * pointer to the string to parse. That pointer is updated past the
- * characters we use. The value of the escape sequence is returned.
- *
- * A negative value means the sequence \ newline was seen, which is supposed to
- * be equivalent to nothing at all.
- *
- * If \ is followed by a null character, we return a negative value and leave
- * the string pointer pointing at the null character.
- *
- * If \ is followed by 000, we return 0 and leave the string pointer after the
- * zeros. A value of 0 does not mean end of string.
- */
-
-int
-parse_escape(string_ptr)
-char **string_ptr;
+static char *
+get_src_buf()
{
- register int c = *(*string_ptr)++;
- register int i;
- register int count;
-
- switch (c) {
- case 'a':
- return BELL;
- case 'b':
- return '\b';
- case 'f':
- return '\f';
- case 'n':
- return '\n';
- case 'r':
- return '\r';
- case 't':
- return '\t';
- case 'v':
- return '\v';
- case '\n':
- return -2;
- case 0:
- (*string_ptr)--;
- return -1;
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- i = c - '0';
- count = 0;
- while (++count < 3) {
- if ((c = *(*string_ptr)++) >= '0' && c <= '7') {
- i *= 8;
- i += c - '0';
- } else {
- (*string_ptr)--;
- break;
- }
+ static int samefile = 0;
+ static int nextfile = 0;
+ static char *buf = NULL;
+ static int fd;
+ int n;
+ register char *scan;
+ static int len = 0;
+ static int did_newline = 0;
+# define SLOP 128 /* enough space to hold most source lines */
+
+ if (cmdline_src) {
+ if (len == 0) {
+ len = strlen(cmdline_src);
+ if (len == 0)
+ cmdline_src = NULL;
+ sourceline = 1;
+ lexptr = lexptr_begin = cmdline_src;
+ lexend = lexptr + len;
+ } else if (!did_newline && *(lexptr-1) != '\n') {
+ /*
+ * The following goop is to ensure that the source
+ * ends with a newline and that the entire current
+ * line is available for error messages.
+ */
+ int offset;
+
+ did_newline = 1;
+ offset = lexptr - lexeme;
+ for (scan = lexeme; scan > lexptr_begin; scan--)
+ if (*scan == '\n') {
+ scan++;
+ break;
+ }
+ len = lexptr - scan;
+ emalloc(buf, char *, len+1, "get_src_buf");
+ memcpy(buf, scan, len);
+ thisline = buf;
+ lexptr = buf + len;
+ *lexptr = '\n';
+ lexeme = lexptr - offset;
+ lexptr_begin = buf;
+ lexend = lexptr + 1;
+ } else
+ lexptr = lexptr_begin = NULL;
+ return lexptr;
+ }
+ if (!samefile) {
+ source = srcfiles[nextfile];
+ if (source == NULL) {
+ if (buf)
+ free(buf);
+ return lexptr = lexptr_begin = NULL;
}
- return i;
- case 'x':
- i = 0;
- while (1) {
- if (isxdigit((c = *(*string_ptr)++))) {
- if (isdigit(c))
- i += c - '0';
- else if (isupper(c))
- i += c - 'A' + 10;
- else
- i += c - 'a' + 10;
- } else {
- (*string_ptr)--;
+ fd = pathopen(source);
+ if (fd == -1)
+ fatal("can't open source file \"%s\" for reading (%s)",
+ source, strerror(errno));
+ len = optimal_bufsize(fd);
+ if (buf)
+ free(buf);
+ emalloc(buf, char *, len + SLOP, "get_src_buf");
+ lexptr_begin = buf + SLOP;
+ samefile = 1;
+ sourceline = 1;
+ } else {
+ /*
+ * Here, we retain the current source line (up to length SLOP)
+ * in the beginning of the buffer that was overallocated above
+ */
+ int offset;
+ int linelen;
+
+ offset = lexptr - lexeme;
+ for (scan = lexeme; scan > lexptr_begin; scan--)
+ if (*scan == '\n') {
+ scan++;
break;
}
- }
- return i;
- default:
- return c;
+ linelen = lexptr - scan;
+ if (linelen > SLOP)
+ len = SLOP;
+ thisline = buf + SLOP - linelen;
+ memcpy(thisline, scan, linelen);
+ lexeme = buf + SLOP - offset;
+ lexptr_begin = thisline;
+ }
+ n = read(fd, buf + SLOP, len);
+ if (n == -1)
+ fatal("can't read sourcefile \"%s\" (%s)",
+ source, strerror(errno));
+ if (n == 0) {
+ samefile = 0;
+ nextfile++;
+ return get_src_buf();
}
+ lexptr = buf + SLOP;
+ lexend = lexptr + n;
+ return buf;
}
+#define tokadd(x) (*token++ = (x), token == tokend ? tokexpand() : token)
+
+char *
+tokexpand()
+{
+ static int toksize = 60;
+ int tokoffset;
+
+ tokoffset = token - tokstart;
+ toksize *= 2;
+ if (tokstart)
+ erealloc(tokstart, char *, toksize, "tokexpand");
+ else
+ emalloc(tokstart, char *, toksize, "tokexpand");
+ tokend = tokstart + toksize;
+ token = tokstart + tokoffset;
+ return token;
+}
+
+#ifdef DEBUG
+char
+nextc() {
+ if (lexptr && lexptr < lexend)
+ return *lexptr++;
+ if (get_src_buf())
+ return *lexptr++;
+ return '\0';
+}
+#else
+#define nextc() ((lexptr && lexptr < lexend) ? \
+ *lexptr++ : \
+ (get_src_buf() ? *lexptr++ : '\0') \
+ )
+#endif
+#define pushback() (lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr)
+
/*
- * Read the input and turn it into tokens. Input is now read from a file
- * instead of from malloc'ed memory. The main program takes a program
- * passed as a command line argument and writes it to a temp file. Otherwise
- * the file name is made available in an external variable.
+ * Read the input and turn it into tokens.
*/
static int
yylex()
{
register int c;
- register int namelen;
- register char *tokstart;
- char *tokkey;
- static did_newline = 0; /* the grammar insists that actions end
- * with newlines. This was easier than
- * hacking the grammar. */
int seen_e = 0; /* These are for numbers */
int seen_point = 0;
- int esc_seen;
- extern char **sourcefile;
- extern int tempsource, numfiles;
- static int file_opened = 0;
- static FILE *fin;
- static char cbuf[BUFSIZ];
+ int esc_seen; /* for literal strings */
int low, mid, high;
-#ifdef DEBUG
- extern int debugging;
-#endif
-
- if (! file_opened) {
- file_opened = 1;
-#ifdef DEBUG
- if (debugging) {
- int i;
-
- for (i = 0; i <= numfiles; i++)
- fprintf (stderr, "sourcefile[%d] = %s\n", i,
- sourcefile[i]);
- }
-#endif
- nextfile:
- if ((fin = pathopen (sourcefile[++curinfile])) == NULL)
- fatal("cannot open `%s' for reading (%s)",
- sourcefile[curinfile],
- strerror(errno));
- *(lexptr = cbuf) = '\0';
- /*
- * immediately unlink the tempfile so that it will
- * go away cleanly if we bomb.
- */
- if (tempsource && curinfile == 0)
- (void) unlink (sourcefile[curinfile]);
- }
-
-retry:
- if (! *lexptr)
- if (fgets (cbuf, sizeof cbuf, fin) == NULL) {
- if (fin != NULL)
- fclose (fin); /* be neat and clean */
- if (curinfile < numfiles)
- goto nextfile;
- return 0;
- } else
- lexptr = lexptr_begin = cbuf;
+ static int did_newline = 0;
+ char *tokkey;
+ if (!nextc())
+ return 0;
+ pushback();
+ lexeme = lexptr;
+ thisline = NULL;
if (want_regexp) {
int in_brack = 0;
want_regexp = 0;
- token_start = tokstart = lexptr;
- while (c = *lexptr++) {
+ token = tokstart;
+ while (c = nextc()) {
switch (c) {
case '[':
in_brack = 1;
@@ -951,189 +998,212 @@ retry:
in_brack = 0;
break;
case '\\':
- if (*lexptr++ == '\0') {
- yyerror("unterminated regexp ends with \\");
- return ERROR;
- } else if (lexptr[-1] == '\n')
- goto retry;
+ if ((c = nextc()) == '\0') {
+ yyerror("unterminated regexp ends with \\ at end of file");
+ } else if (c == '\n') {
+ sourceline++;
+ continue;
+ } else
+ tokadd('\\');
break;
case '/': /* end of the regexp */
if (in_brack)
break;
- lexptr--;
+ pushback();
+ tokadd('\0');
yylval.sval = tokstart;
return REGEXP;
case '\n':
- lineno++;
- case '\0':
- lexptr--; /* so error messages work */
+ pushback();
yyerror("unterminated regexp");
- return ERROR;
+ case '\0':
+ yyerror("unterminated regexp at end of file");
}
+ tokadd(c);
}
}
+retry:
+ while ((c = nextc()) == ' ' || c == '\t')
+ ;
- if (*lexptr == '\n') {
- lexptr++;
- lineno++;
- return NEWLINE;
- }
-
- while (*lexptr == ' ' || *lexptr == '\t')
- lexptr++;
-
- token_start = tokstart = lexptr;
+ lexeme = lexptr-1;
+ thisline = NULL;
+ token = tokstart;
+ yylval.nodetypeval = Node_illegal;
- switch (c = *lexptr++) {
+ switch (c) {
case 0:
return 0;
case '\n':
- lineno++;
+ sourceline++;
return NEWLINE;
case '#': /* it's a comment */
- while (*lexptr != '\n' && *lexptr != '\0')
- lexptr++;
- goto retry;
+ while ((c = nextc()) != '\n') {
+ if (c == '\0')
+ return 0;
+ }
+ sourceline++;
+ return NEWLINE;
case '\\':
- if (*lexptr == '\n') {
- lineno++;
- lexptr++;
+#ifdef RELAXED_CONTINUATION
+ if (!strict) { /* strip trailing white-space and/or comment */
+ while ((c = nextc()) == ' ' || c == '\t') continue;
+ if (c == '#')
+ while ((c = nextc()) != '\n') if (!c) break;
+ pushback();
+ }
+#endif /*RELAXED_CONTINUATION*/
+ if (nextc() == '\n') {
+ sourceline++;
goto retry;
} else
- break;
+ yyerror("inappropriate use of backslash");
+ break;
+
+ case '$':
+ want_assign = 1;
+ return '$';
+
case ')':
case ']':
case '(':
case '[':
- case '$':
case ';':
case ':':
case '?':
-
- /*
- * set node type to ILLEGAL because the action should set it
- * to the right thing
- */
- yylval.nodetypeval = Node_illegal;
- return c;
-
case '{':
case ',':
- yylval.nodetypeval = Node_illegal;
return c;
case '*':
- if (*lexptr == '=') {
+ if ((c = nextc()) == '=') {
yylval.nodetypeval = Node_assign_times;
- lexptr++;
return ASSIGNOP;
- } else if (*lexptr == '*') { /* make ** and **= aliases
- * for ^ and ^= */
- if (lexptr[1] == '=') {
+ } else if (do_posix) {
+ pushback();
+ return '*';
+ } else if (c == '*') {
+ /* make ** and **= aliases for ^ and ^= */
+ static int did_warn_op = 0, did_warn_assgn = 0;
+
+ if (nextc() == '=') {
+ if (do_lint && ! did_warn_assgn) {
+ did_warn_assgn = 1;
+ warning("**= is not allowed by POSIX");
+ }
yylval.nodetypeval = Node_assign_exp;
- lexptr += 2;
return ASSIGNOP;
} else {
- yylval.nodetypeval = Node_illegal;
- lexptr++;
+ pushback();
+ if (do_lint && ! did_warn_op) {
+ did_warn_op = 1;
+ warning("** is not allowed by POSIX");
+ }
return '^';
}
}
- yylval.nodetypeval = Node_illegal;
- return c;
+ pushback();
+ return '*';
case '/':
- if (want_assign && *lexptr == '=') {
- yylval.nodetypeval = Node_assign_quotient;
- lexptr++;
- return ASSIGNOP;
+ if (want_assign) {
+ if (nextc() == '=') {
+ yylval.nodetypeval = Node_assign_quotient;
+ return ASSIGNOP;
+ }
+ pushback();
}
- yylval.nodetypeval = Node_illegal;
- return c;
+ return '/';
case '%':
- if (*lexptr == '=') {
+ if (nextc() == '=') {
yylval.nodetypeval = Node_assign_mod;
- lexptr++;
return ASSIGNOP;
}
- yylval.nodetypeval = Node_illegal;
- return c;
+ pushback();
+ return '%';
case '^':
- if (*lexptr == '=') {
+ {
+ static int did_warn_op = 0, did_warn_assgn = 0;
+
+ if (nextc() == '=') {
+
+ if (do_lint && ! did_warn_assgn) {
+ did_warn_assgn = 1;
+ warning("operator `^=' is not supported in old awk");
+ }
yylval.nodetypeval = Node_assign_exp;
- lexptr++;
return ASSIGNOP;
}
- yylval.nodetypeval = Node_illegal;
- return c;
+ pushback();
+ if (do_lint && ! did_warn_op) {
+ did_warn_op = 1;
+ warning("operator `^' is not supported in old awk");
+ }
+ return '^';
+ }
case '+':
- if (*lexptr == '=') {
+ if ((c = nextc()) == '=') {
yylval.nodetypeval = Node_assign_plus;
- lexptr++;
return ASSIGNOP;
}
- if (*lexptr == '+') {
- yylval.nodetypeval = Node_illegal;
- lexptr++;
+ if (c == '+')
return INCREMENT;
- }
- yylval.nodetypeval = Node_illegal;
- return c;
+ pushback();
+ return '+';
case '!':
- if (*lexptr == '=') {
+ if ((c = nextc()) == '=') {
yylval.nodetypeval = Node_notequal;
- lexptr++;
return RELOP;
}
- if (*lexptr == '~') {
+ if (c == '~') {
yylval.nodetypeval = Node_nomatch;
- lexptr++;
+ want_assign = 0;
return MATCHOP;
}
- yylval.nodetypeval = Node_illegal;
- return c;
+ pushback();
+ return '!';
case '<':
- if (*lexptr == '=') {
+ if (nextc() == '=') {
yylval.nodetypeval = Node_leq;
- lexptr++;
return RELOP;
}
yylval.nodetypeval = Node_less;
- return c;
+ pushback();
+ return '<';
case '=':
- if (*lexptr == '=') {
+ if (nextc() == '=') {
yylval.nodetypeval = Node_equal;
- lexptr++;
return RELOP;
}
yylval.nodetypeval = Node_assign;
+ pushback();
return ASSIGNOP;
case '>':
- if (*lexptr == '=') {
+ if ((c = nextc()) == '=') {
yylval.nodetypeval = Node_geq;
- lexptr++;
return RELOP;
- } else if (*lexptr == '>') {
+ } else if (c == '>') {
yylval.nodetypeval = Node_redirect_append;
- lexptr++;
return APPEND_OP;
}
yylval.nodetypeval = Node_greater;
- return c;
+ pushback();
+ return '>';
case '~':
yylval.nodetypeval = Node_match;
+ want_assign = 0;
return MATCHOP;
case '}':
@@ -1146,46 +1216,45 @@ retry:
return c;
}
did_newline++;
- --lexptr;
+ --lexptr; /* pick up } next time */
return NEWLINE;
case '"':
esc_seen = 0;
- while (*lexptr != '\0') {
- switch (*lexptr++) {
- case '\\':
+ while ((c = nextc()) != '"') {
+ if (c == '\n') {
+ pushback();
+ yyerror("unterminated string");
+ }
+ if (c == '\\') {
+ c = nextc();
+ if (c == '\n') {
+ sourceline++;
+ continue;
+ }
esc_seen = 1;
- if (*lexptr == '\n')
- yyerror("newline in string");
- if (*lexptr++ != '\0')
- break;
- /* fall through */
- case '\n':
- lexptr--;
+ tokadd('\\');
+ }
+ if (c == '\0') {
+ pushback();
yyerror("unterminated string");
- return ERROR;
- case '"':
- yylval.nodeval = make_str_node(tokstart + 1,
- lexptr-tokstart-2, esc_seen);
- yylval.nodeval->flags |= PERM;
- return YSTRING;
}
+ tokadd(c);
}
- return ERROR;
+ yylval.nodeval = make_str_node(tokstart,
+ token - tokstart, esc_seen ? SCAN : 0);
+ yylval.nodeval->flags |= PERM;
+ return YSTRING;
case '-':
- if (*lexptr == '=') {
+ if ((c = nextc()) == '=') {
yylval.nodetypeval = Node_assign_minus;
- lexptr++;
return ASSIGNOP;
}
- if (*lexptr == '-') {
- yylval.nodetypeval = Node_illegal;
- lexptr++;
+ if (c == '-')
return DECREMENT;
- }
- yylval.nodetypeval = Node_illegal;
- return c;
+ pushback();
+ return '-';
case '0':
case '1':
@@ -1199,21 +1268,29 @@ retry:
case '9':
case '.':
/* It's a number */
- for (namelen = 0; (c = tokstart[namelen]) != '\0'; namelen++) {
+ for (;;) {
+ int gotnumber = 0;
+
+ tokadd(c);
switch (c) {
case '.':
- if (seen_point)
- goto got_number;
+ if (seen_point) {
+ gotnumber++;
+ break;
+ }
++seen_point;
break;
case 'e':
case 'E':
- if (seen_e)
- goto got_number;
+ if (seen_e) {
+ gotnumber++;
+ break;
+ }
++seen_e;
- if (tokstart[namelen + 1] == '-' ||
- tokstart[namelen + 1] == '+')
- namelen++;
+ if ((c = nextc()) == '-' || c == '+')
+ tokadd(c);
+ else
+ pushback();
break;
case '0':
case '1':
@@ -1227,67 +1304,83 @@ retry:
case '9':
break;
default:
- goto got_number;
+ gotnumber++;
}
+ if (gotnumber)
+ break;
+ c = nextc();
}
-
-got_number:
- lexptr = tokstart + namelen;
- /*
- yylval.nodeval = make_string(tokstart, namelen);
- (void) force_number(yylval.nodeval);
- */
+ pushback();
yylval.nodeval = make_number(atof(tokstart));
yylval.nodeval->flags |= PERM;
- return NUMBER;
+ return YNUMBER;
case '&':
- if (*lexptr == '&') {
+ if ((c = nextc()) == '&') {
yylval.nodetypeval = Node_and;
- while (c = *++lexptr) {
- if (c == '#')
- while ((c = *++lexptr) != '\n'
- && c != '\0')
+ for (;;) {
+ c = nextc();
+ if (c == '\0')
+ break;
+ if (c == '#') {
+ while ((c = nextc()) != '\n' && c != '\0')
;
+ if (c == '\0')
+ break;
+ }
if (c == '\n')
- lineno++;
- else if (! isspace(c))
+ sourceline++;
+ if (! isspace(c)) {
+ pushback();
break;
+ }
}
+ want_assign = 0;
return LEX_AND;
}
- return ERROR;
+ pushback();
+ return '&';
case '|':
- if (*lexptr == '|') {
+ if ((c = nextc()) == '|') {
yylval.nodetypeval = Node_or;
- while (c = *++lexptr) {
- if (c == '#')
- while ((c = *++lexptr) != '\n'
- && c != '\0')
+ for (;;) {
+ c = nextc();
+ if (c == '\0')
+ break;
+ if (c == '#') {
+ while ((c = nextc()) != '\n' && c != '\0')
;
+ if (c == '\0')
+ break;
+ }
if (c == '\n')
- lineno++;
- else if (! isspace(c))
+ sourceline++;
+ if (! isspace(c)) {
+ pushback();
break;
+ }
}
+ want_assign = 0;
return LEX_OR;
}
- yylval.nodetypeval = Node_illegal;
- return c;
+ pushback();
+ return '|';
}
- if (c != '_' && ! isalpha(c)) {
+ if (c != '_' && ! isalpha(c))
yyerror("Invalid char '%c' in expression\n", c);
- return ERROR;
- }
/* it's some type of name-type-thing. Find its length */
- for (namelen = 0; is_identchar(tokstart[namelen]); namelen++)
- /* null */ ;
- emalloc(tokkey, char *, namelen+1, "yylex");
- memcpy(tokkey, tokstart, namelen);
- tokkey[namelen] = '\0';
+ token = tokstart;
+ while (is_identchar(c)) {
+ tokadd(c);
+ c = nextc();
+ }
+ tokadd('\0');
+ emalloc(tokkey, char *, token - tokstart, "yylex");
+ memcpy(tokkey, tokstart, token - tokstart);
+ pushback();
/* See if it is a special token. */
low = 0;
@@ -1297,115 +1390,45 @@ got_number:
mid = (low + high) / 2;
c = *tokstart - tokentab[mid].operator[0];
- i = c ? c : strcmp (tokkey, tokentab[mid].operator);
+ i = c ? c : strcmp (tokstart, tokentab[mid].operator);
if (i < 0) { /* token < mid */
high = mid - 1;
} else if (i > 0) { /* token > mid */
low = mid + 1;
} else {
- lexptr = tokstart + namelen;
- if (strict && tokentab[mid].nostrict)
+ if (do_lint) {
+ if (tokentab[mid].flags & GAWK)
+ warning("%s() is a gawk extension",
+ tokentab[mid].operator);
+ if (tokentab[mid].flags & NOT_POSIX)
+ warning("POSIX does not allow %s",
+ tokentab[mid].operator);
+ if (tokentab[mid].flags & NOT_OLD)
+ warning("%s is not supported in old awk",
+ tokentab[mid].operator);
+ }
+ if ((strict && (tokentab[mid].flags & GAWK))
+ || (do_posix && (tokentab[mid].flags & NOT_POSIX)))
break;
if (tokentab[mid].class == LEX_BUILTIN
- || tokentab[mid].class == LEX_LENGTH)
- yylval.ptrval = tokentab[mid].ptr;
+ || tokentab[mid].class == LEX_LENGTH
+ )
+ yylval.lval = mid;
else
yylval.nodetypeval = tokentab[mid].value;
+
return tokentab[mid].class;
}
}
- /* It's a name. See how long it is. */
yylval.sval = tokkey;
- lexptr = tokstart + namelen;
if (*lexptr == '(')
return FUNC_CALL;
- else
+ else {
+ want_assign = 1;
return NAME;
-}
-
-#ifndef DEFPATH
-#ifdef MSDOS
-#define DEFPATH "."
-#define ENVSEP ';'
-#else
-#define DEFPATH ".:/usr/lib/awk:/usr/local/lib/awk"
-#define ENVSEP ':'
-#endif
-#endif
-
-static FILE *
-pathopen (file)
-char *file;
-{
- static char *savepath = DEFPATH;
- static int first = 1;
- char *awkpath, *cp;
- char trypath[BUFSIZ];
- FILE *fp;
-#ifdef DEBUG
- extern int debugging;
-#endif
- int fd;
-
- if (strcmp (file, "-") == 0)
- return (stdin);
-
- if (strict)
- return (fopen (file, "r"));
-
- if (first) {
- first = 0;
- if ((awkpath = getenv ("AWKPATH")) != NULL && *awkpath)
- savepath = awkpath; /* used for restarting */
}
- awkpath = savepath;
-
- /* some kind of path name, no search */
-#ifndef MSDOS
- if (strchr (file, '/') != NULL)
-#else
- if (strchr (file, '/') != NULL || strchr (file, '\\') != NULL
- || strchr (file, ':') != NULL)
-#endif
- return ( (fd = devopen (file, "r")) >= 0 ?
- fdopen(fd, "r") :
- NULL);
-
- do {
- trypath[0] = '\0';
- /* this should take into account limits on size of trypath */
- for (cp = trypath; *awkpath && *awkpath != ENVSEP; )
- *cp++ = *awkpath++;
-
- if (cp != trypath) { /* nun-null element in path */
- *cp++ = '/';
- strcpy (cp, file);
- } else
- strcpy (trypath, file);
-#ifdef DEBUG
- if (debugging)
- fprintf(stderr, "trying: %s\n", trypath);
-#endif
- if ((fd = devopen (trypath, "r")) >= 0
- && (fp = fdopen(fd, "r")) != NULL)
- return (fp);
-
- /* no luck, keep going */
- if(*awkpath == ENVSEP && awkpath[1] != '\0')
- awkpath++; /* skip colon */
- } while (*awkpath);
-#ifdef MSDOS
- /*
- * Under DOS (and probably elsewhere) you might have one of the awk
- * paths defined, WITHOUT the current working directory in it.
- * Therefore you should try to open the file in the current directory.
- */
- return ( (fd = devopen(file, "r")) >= 0 ? fdopen(fd, "r") : NULL);
-#else
- return (NULL);
-#endif
}
static NODE *
@@ -1413,22 +1436,21 @@ node_common(op)
NODETYPE op;
{
register NODE *r;
- extern int numfiles;
- extern int tempsource;
- extern char **sourcefile;
-
- r = newnode(op);
- r->source_line = lineno;
- if (numfiles > -1 && ! tempsource)
- r->source_file = sourcefile[curinfile];
+
+ getnode(r);
+ r->type = op;
+ r->flags = MALLOC;
+ /* if lookahead is NL, lineno is 1 too high */
+ if (lexeme && *lexeme == '\n')
+ r->source_line = sourceline - 1;
else
- r->source_file = NULL;
+ r->source_line = sourceline;
+ r->source_file = source;
return r;
}
/*
* This allocates a node with defined lnode and rnode.
- * This should only be used by yyparse+co while reading in the program
*/
NODE *
node(left, op, right)
@@ -1444,20 +1466,68 @@ NODETYPE op;
}
/*
- * This allocates a node with defined subnode and proc
- * Otherwise like node()
+ * This allocates a node with defined subnode and proc for builtin functions
+ * Checks for arg. count and supplies defaults where possible.
*/
static NODE *
-snode(subn, op, procp)
+snode(subn, op, index)
NODETYPE op;
-NODE *(*procp) ();
+int index;
NODE *subn;
{
register NODE *r;
+ register NODE *n;
+ int nexp = 0;
+ int args_allowed;
r = node_common(op);
+
+ /* traverse expression list to see how many args. given */
+ for (n= subn; n; n= n->rnode) {
+ nexp++;
+ if (nexp > 3)
+ break;
+ }
+
+ /* check against how many args. are allowed for this builtin */
+ args_allowed = tokentab[index].flags & ARGS;
+ if (args_allowed && !(args_allowed & A(nexp)))
+ fatal("%s() cannot have %d argument%c",
+ tokentab[index].operator, nexp, nexp == 1 ? ' ' : 's');
+
+ r->proc = tokentab[index].ptr;
+
+ /* special case processing for a few builtins */
+ if (nexp == 0 && r->proc == do_length) {
+ subn = node(node(make_number(0.0),Node_field_spec,(NODE *)NULL),
+ Node_expression_list,
+ (NODE *) NULL);
+ } else if (r->proc == do_match) {
+ if (subn->rnode->lnode->type != Node_regex)
+ subn->rnode->lnode = mk_rexp(subn->rnode->lnode);
+ } else if (r->proc == do_sub || r->proc == do_gsub) {
+ if (subn->lnode->type != Node_regex)
+ subn->lnode = mk_rexp(subn->lnode);
+ if (nexp == 2)
+ append_right(subn, node(node(make_number(0.0),
+ Node_field_spec,
+ (NODE *) NULL),
+ Node_expression_list,
+ (NODE *) NULL));
+ else if (do_lint && subn->rnode->rnode->lnode->type == Node_val)
+ warning("string literal as last arg of substitute");
+ } else if (r->proc == do_split) {
+ if (nexp == 2)
+ append_right(subn,
+ node(FS_node, Node_expression_list, (NODE *) NULL));
+ n = subn->rnode->rnode->lnode;
+ if (n->type != Node_regex)
+ subn->rnode->rnode->lnode = mk_rexp(n);
+ if (nexp == 2)
+ subn->rnode->rnode->lnode->re_flags |= FS_DFLT;
+ }
+
r->subnode = subn;
- r->proc = procp;
return r;
}
@@ -1473,7 +1543,8 @@ NODE *cpair;
{
register NODE *r;
- r = newnode(Node_line_range);
+ getnode(r);
+ r->type = Node_line_range;
r->condpair = cpair;
r->triggered = 0;
return r;
@@ -1488,7 +1559,8 @@ NODE *init, *cond, *incr;
NODE *n;
emalloc(r, FOR_LOOP_HEADER *, sizeof(FOR_LOOP_HEADER), "make_for_loop");
- n = newnode(Node_illegal);
+ getnode(n);
+ n->type = Node_illegal;
r->init = init;
r->cond = cond;
r->incr = incr;
@@ -1497,55 +1569,39 @@ NODE *init, *cond, *incr;
}
/*
- * Install a name in the hash table specified, even if it is already there.
- * Name stops with first non alphanumeric. Caller must check against
- * redefinition if that is desired.
+ * Install a name in the symbol table, even if it is already there.
+ * Caller must check against redefinition if that is desired.
*/
NODE *
-install(table, name, value)
-NODE **table;
+install(name, value)
char *name;
NODE *value;
{
register NODE *hp;
register int len, bucket;
- register char *p;
-
- len = 0;
- p = name;
- while (is_identchar(*p))
- p++;
- len = p - name;
-
- hp = newnode(Node_hashnode);
- bucket = hashf(name, len, HASHSIZE);
- hp->hnext = table[bucket];
- table[bucket] = hp;
+
+ len = strlen(name);
+ bucket = hash(name, len);
+ getnode(hp);
+ hp->type = Node_hashnode;
+ hp->hnext = variables[bucket];
+ variables[bucket] = hp;
hp->hlength = len;
hp->hvalue = value;
- emalloc(hp->hname, char *, len + 1, "install");
- memcpy(hp->hname, name, len);
- hp->hname[len] = '\0';
+ hp->hname = name;
return hp->hvalue;
}
-/*
- * find the most recent hash node for name name (ending with first
- * non-identifier char) installed by install
- */
+/* find the most recent hash node for name installed by install */
NODE *
-lookup(table, name)
-NODE **table;
+lookup(name)
char *name;
{
- register char *bp;
register NODE *bucket;
register int len;
- for (bp = name; is_identchar(*bp); bp++)
- ;
- len = bp - name;
- bucket = table[hashf(name, len, HASHSIZE)];
+ len = strlen(name);
+ bucket = variables[hash(name, len)];
while (bucket) {
if (bucket->hlength == len && STREQN(bucket->hname, name, len))
return bucket->hvalue;
@@ -1554,27 +1610,6 @@ char *name;
return NULL;
}
-#define HASHSTEP(old, c) ((old << 1) + c)
-#define MAKE_POS(v) (v & ~0x80000000) /* make number positive */
-
-/*
- * return hash function on name.
- */
-static int
-hashf(name, len, hashsize)
-register char *name;
-register int len;
-int hashsize;
-{
- register int r = 0;
-
- while (len--)
- r = HASHSTEP(r, *name++);
-
- r = MAKE_POS(r) % hashsize;
- return r;
-}
-
/*
* Add new to the rightmost branch of LIST. This uses n^2 time, so we make
* a simple attempt at optimizing it.
@@ -1582,7 +1617,6 @@ int hashsize;
static NODE *
append_right(list, new)
NODE *list, *new;
-
{
register NODE *oldlist;
static NODE *savefront = NULL, *savetail = NULL;
@@ -1613,12 +1647,11 @@ NODE *def;
pop_params(params->rnode);
pop_var(params, 0);
- r = lookup(variables, params->param);
+ r = lookup(params->param);
if (r != NULL) {
fatal("function name `%s' previously defined", params->param);
} else
- (void) install(variables, params->param,
- node(params, Node_func, def));
+ (void) install(params->param, node(params, Node_func, def));
}
static void
@@ -1626,21 +1659,17 @@ pop_var(np, freeit)
NODE *np;
int freeit;
{
- register char *bp;
register NODE *bucket, **save;
register int len;
char *name;
name = np->param;
- for (bp = name; is_identchar(*bp); bp++)
- ;
- len = bp - name;
- save = &(variables[hashf(name, len, HASHSIZE)]);
+ len = strlen(name);
+ save = &(variables[hash(name, len)]);
for (bucket = *save; bucket; bucket = bucket->hnext) {
if (len == bucket->hlength && STREQN(bucket->hname, name, len)) {
*save = bucket->hnext;
freenode(bucket);
- free(bucket->hname);
if (freeit)
free(np->param);
return;
@@ -1665,22 +1694,50 @@ char *name;
{
NODE *r;
- r = newnode(Node_param_list);
- r->param = name;
+ getnode(r);
+ r->type = Node_param_list;
r->rnode = NULL;
+ r->param = name;
r->param_cnt = param_counter++;
- return (install(variables, name, r));
+ return (install(name, r));
}
/* Name points to a variable name. Make sure its in the symbol table */
NODE *
-variable(name)
+variable(name, can_free)
char *name;
+int can_free;
{
register NODE *r;
+ static int env_loaded = 0;
- if ((r = lookup(variables, name)) == NULL)
- r = install(variables, name,
- node(Nnull_string, Node_var, (NODE *) NULL));
+ if (!env_loaded && STREQ(name, "ENVIRON")) {
+ load_environ();
+ env_loaded = 1;
+ }
+ if ((r = lookup(name)) == NULL)
+ r = install(name, node(Nnull_string, Node_var, (NODE *) NULL));
+ else if (can_free)
+ free(name);
return r;
}
+
+static NODE *
+mk_rexp(exp)
+NODE *exp;
+{
+ if (exp->type == Node_regex)
+ return exp;
+ else {
+ NODE *n;
+
+ getnode(n);
+ n->type = Node_regex;
+ n->re_exp = exp;
+ n->re_text = NULL;
+ n->re_reg = NULL;
+ n->re_flags = 0;
+ n->re_cnt = 1;
+ return n;
+ }
+}
diff --git a/builtin.c b/builtin.c
index cfa86e2e..9465ba1f 100644
--- a/builtin.c
+++ b/builtin.c
@@ -3,7 +3,7 @@
*/
/*
- * Copyright (C) 1986, 1988, 1989 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@@ -25,16 +25,39 @@
#include "awk.h"
-extern void srandom();
-extern char *initstate();
-extern char *setstate();
-extern long random();
+#ifndef atarist
+extern void srandom P((int seed));
+#endif
+extern char *initstate P((unsigned seed, char *state, int n));
+extern char *setstate P((char *state));
+extern long random P((void));
extern NODE **fields_arr;
+extern int output_is_tty;
+
+static NODE *sub_common P((NODE *tree, int global));
+
+#ifdef GFMT_WORKAROUND
+char *gfmt P((double g, int prec, char *buf));
+#endif
-static void get_one();
-static void get_two();
-static int get_three();
+#ifdef _CRAY
+/* Work around a problem in conversion of doubles to exact integers. */
+#include <float.h>
+#define Floor(n) floor((n) * (1.0 + DBL_EPSILON))
+#define Ceil(n) ceil((n) * (1.0 + DBL_EPSILON))
+
+/* Force the standard C compiler to use the library math functions. */
+extern double exp(double);
+double (*Exp)() = exp;
+#define exp(x) (*Exp)(x)
+extern double log(double);
+double (*Log)() = log;
+#define log(x) (*Log)(x)
+#else
+#define Floor(n) floor(n)
+#define Ceil(n) ceil(n)
+#endif
/* Builtin functions */
NODE *
@@ -43,9 +66,11 @@ NODE *tree;
{
NODE *tmp;
double d, res;
+#ifndef exp
double exp();
+#endif
- get_one(tree, &tmp);
+ tmp= tree_eval(tree->lnode);
d = force_number(tmp);
free_temp(tmp);
errno = 0;
@@ -65,7 +90,8 @@ NODE *tree;
long ret;
- get_two(tree, &s1, &s2);
+ s1 = tree_eval(tree->lnode);
+ s2 = tree_eval(tree->rnode->lnode);
force_string(s1);
force_string(s2);
p1 = s1->stptr;
@@ -73,8 +99,10 @@ NODE *tree;
l1 = s1->stlen;
l2 = s2->stlen;
ret = 0;
- if (! strict && IGNORECASE_node->var_value->numbr != 0.0) {
+ if (IGNORECASE) {
while (l1) {
+ if (l2 > l1)
+ break;
if (casetable[*p1] == casetable[*p2]
&& strncasecmp(p1, p2, l2) == 0) {
ret = 1 + s1->stlen - l1;
@@ -85,6 +113,8 @@ NODE *tree;
}
} else {
while (l1) {
+ if (l2 > l1)
+ break;
if (STREQN(p1, p2, l2)) {
ret = 1 + s1->stlen - l1;
break;
@@ -104,10 +134,15 @@ NODE *tree;
{
NODE *tmp;
double floor();
+ double ceil();
double d;
- get_one(tree, &tmp);
- d = floor((double)force_number(tmp));
+ tmp = tree_eval(tree->lnode);
+ d = force_number(tmp);
+ if (d >= 0)
+ d = Floor(d);
+ else
+ d = Ceil(d);
free_temp(tmp);
return tmp_number((AWKNUM) d);
}
@@ -119,7 +154,7 @@ NODE *tree;
NODE *tmp;
int len;
- get_one(tree, &tmp);
+ tmp = tree_eval(tree->lnode);
len = force_string(tmp)->stlen;
free_temp(tmp);
return tmp_number((AWKNUM) len);
@@ -130,10 +165,12 @@ do_log(tree)
NODE *tree;
{
NODE *tmp;
+#ifndef log
double log();
+#endif
double d, arg;
- get_one(tree, &tmp);
+ tmp = tree_eval(tree->lnode);
arg = (double) force_number(tmp);
if (arg < 0.0)
warning("log called with negative argument %g", arg);
@@ -142,22 +179,16 @@ NODE *tree;
return tmp_number((AWKNUM) d);
}
-/*
- * Note that the output buffer cannot be static because sprintf may get
- * called recursively by force_string. Hence the wasteful alloca calls
- */
-
/* %e and %f formats are not properly implemented. Someone should fix them */
+/* Actually, this whole thing should be reimplemented. */
+
NODE *
do_sprintf(tree)
NODE *tree;
{
#define bchunk(s,l) if(l) {\
while((l)>ofre) {\
- char *tmp;\
- tmp=(char *)alloca(osiz*2);\
- memcpy(tmp,obuf,olen);\
- obuf=tmp;\
+ erealloc(obuf, char *, osiz*2, "do_sprintf");\
ofre+=osiz;\
osiz*=2;\
}\
@@ -168,10 +199,7 @@ NODE *tree;
/* Is there space for something L big in the buffer? */
#define chksize(l) if((l)>ofre) {\
- char *tmp;\
- tmp=(char *)alloca(osiz*2);\
- memcpy(tmp,obuf,olen);\
- obuf=tmp;\
+ erealloc(obuf, char *, osiz*2, "do_sprintf");\
ofre+=osiz;\
osiz*=2;\
}
@@ -181,13 +209,15 @@ NODE *tree;
* return "" (Null string)
*/
#define parse_next_arg() {\
- if(!carg) arg= Nnull_string;\
+ if(!carg) { toofew = 1; break; }\
else {\
- get_one(carg,&arg);\
+ arg=tree_eval(carg->lnode);\
carg=carg->rnode;\
}\
}
+ NODE *r;
+ int toofew = 0;
char *obuf;
int osiz, ofre, olen;
static char chbuf[] = "0123456789abcdef";
@@ -199,8 +229,8 @@ NODE *tree;
long fw, prec, lj, alt, big;
long *cur;
long val;
-#ifdef sun386 /* Can't cast unsigned (int/long) from ptr->value */
- long tmp_uval; /* on 386i 4.0.1 C compiler -- it just hangs */
+#ifdef sun386 /* Can't cast unsigned (int/long) from ptr->value */
+ long tmp_uval; /* on 386i 4.0.1 C compiler -- it just hangs */
#endif
unsigned long uval;
int sgn;
@@ -212,14 +242,15 @@ NODE *tree;
double tmpval;
char *pr_str;
int ucasehex = 0;
- extern char *gcvt();
+ char signchar = 0;
+ int len;
- obuf = (char *) alloca(120);
+ emalloc(obuf, char *, 120, "do_sprintf");
osiz = 120;
ofre = osiz;
olen = 0;
- get_one(tree, &sfmt);
+ sfmt = tree_eval(tree->lnode);
sfmt = force_string(sfmt);
carg = tree->rnode;
for (s0 = s1 = sfmt->stptr, n0 = sfmt->stlen; n0-- > 0;) {
@@ -267,10 +298,17 @@ retry:
*cur = *cur * 10 + *s1++ - '0';
}
goto retry;
-#ifdef not_yet
+ case '*':
+ if (cur == 0)
+ goto lose;
+ parse_next_arg();
+ *cur = force_number(arg);
+ free_temp(arg);
+ goto retry;
case ' ': /* print ' ' or '-' */
case '+': /* print '+' or '-' */
-#endif
+ signchar = *(s1-1);
+ goto retry;
case '-':
if (lj || fill != sp)
goto lose;
@@ -351,11 +389,13 @@ retry:
} while (val);
if (sgn)
*--cp = '-';
+ else if (signchar)
+ *--cp = signchar;
if (prec > fw)
fw = prec;
prec = cend - cp;
if (fw > prec && !lj) {
- if (fill != sp && *cp == '-') {
+ if (fill != sp && (*cp == '-' || signchar)) {
bchunk(cp, 1);
cp++;
prec--;
@@ -425,44 +465,52 @@ retry:
parse_next_arg();
tmpval = force_number(arg);
free_temp(arg);
- if (prec == 0)
- prec = 13;
- (void) gcvt(tmpval, (int) prec, cpbuf);
- prec = strlen(cpbuf);
+ chksize(fw + prec + 9); /* 9==slop */
+
cp = cpbuf;
- if (fw > prec && !lj) {
- if (fill != sp && *cp == '-') {
- bchunk(cp, 1);
- cp++;
- prec--;
- } /* Deal with .5 as 0.5 */
- if (fill == sp && *cp == '.') {
- --fw;
- while (--fw >= prec) {
- bchunk(fill, 1);
- }
- bchunk("0", 1);
- } else
- while (fw-- > prec)
- bchunk(fill, 1);
- } else {/* Turn .5 into 0.5 */
- /* FOO */
- if (*cp == '.' && fill == sp) {
- bchunk("0", 1);
- --fw;
- }
+ *cp++ = '%';
+ if (lj)
+ *cp++ = '-';
+ if (fill != sp)
+ *cp++ = '0';
+#ifndef GFMT_WORKAROUND
+ if (cur != &fw) {
+ (void) strcpy(cp, "*.*g");
+ (void) sprintf(obuf + olen, cpbuf, (int) fw, (int) prec, (double) tmpval);
+ } else {
+ (void) strcpy(cp, "*g");
+ (void) sprintf(obuf + olen, cpbuf, (int) fw, (double) tmpval);
}
- bchunk(cp, (int) prec);
- if (fw > prec)
- while (fw-- > prec)
- bchunk(fill, 1);
+#else /* GFMT_WORKAROUND */
+ {
+ char *gptr, gbuf[120];
+#define DEFAULT_G_PRECISION 6
+ if (fw + prec + 9 > sizeof gbuf) { /* 9==slop */
+ emalloc(gptr, char *, fw+prec+9, "do_sprintf(gfmt)");
+ } else
+ gptr = gbuf;
+ (void) gfmt((double) tmpval, cur != &fw ?
+ (int) prec : DEFAULT_G_PRECISION, gptr);
+ *cp++ = '*', *cp++ = 's', *cp = '\0';
+ (void) sprintf(obuf + olen, cpbuf, (int) fw, gptr);
+ if (fill != sp && *gptr == ' ') {
+ char *p = gptr;
+ do { *p++ = '0'; } while (*p == ' ');
+ }
+ if (gptr != gbuf) free(gptr);
+ }
+#endif /* GFMT_WORKAROUND */
+ len = strlen(obuf + olen);
+ ofre -= len;
+ olen += len;
s0 = s1;
break;
+
case 'f':
parse_next_arg();
tmpval = force_number(arg);
free_temp(arg);
- chksize(fw + prec + 5); /* 5==slop */
+ chksize(fw + prec + 9); /* 9==slop */
cp = cpbuf;
*cp++ = '%';
@@ -477,15 +525,16 @@ retry:
(void) strcpy(cp, "*f");
(void) sprintf(obuf + olen, cpbuf, (int) fw, (double) tmpval);
}
- ofre -= strlen(obuf + olen);
- olen += strlen(obuf + olen); /* There may be nulls */
+ len = strlen(obuf + olen);
+ ofre -= len;
+ olen += len;
s0 = s1;
break;
case 'e':
parse_next_arg();
tmpval = force_number(arg);
free_temp(arg);
- chksize(fw + prec + 5); /* 5==slop */
+ chksize(fw + prec + 9); /* 9==slop */
cp = cpbuf;
*cp++ = '%';
if (lj)
@@ -499,8 +548,9 @@ retry:
(void) strcpy(cp, "*e");
(void) sprintf(obuf + olen, cpbuf, (int) fw, (double) tmpval);
}
- ofre -= strlen(obuf + olen);
- olen += strlen(obuf + olen); /* There may be nulls */
+ len = strlen(obuf + olen);
+ ofre -= len;
+ olen += len;
s0 = s1;
break;
@@ -508,29 +558,51 @@ retry:
lose:
break;
}
+ if (toofew)
+ fatal("%s\n\t%s\n\t%*s%s",
+ "not enough arguments to satisfy format string",
+ sfmt->stptr, s1 - sfmt->stptr - 2, "",
+ "^ ran out for this one"
+ );
}
+ if (carg != NULL)
+ warning("too many arguments supplied for format string");
bchunk(s0, s1 - s0);
free_temp(sfmt);
- return tmp_string(obuf, olen);
+ r = make_str_node(obuf, olen, ALREADY_MALLOCED);
+ r->flags |= TEMP;
+ return r;
}
void
do_printf(tree)
-NODE *tree;
+register NODE *tree;
{
struct redirect *rp = NULL;
- register FILE *fp = stdout;
- int errflg = 0; /* not used, sigh */
+ register FILE *fp;
if (tree->rnode) {
+ int errflg; /* not used, sigh */
+
rp = redirect(tree->rnode, &errflg);
- if (rp)
+ if (rp) {
fp = rp->fp;
- }
- if (fp)
- print_simple(do_sprintf(tree->lnode), fp);
- if (rp && (rp->flag & RED_NOBUF))
+ if (!fp)
+ return;
+ } else
+ return;
+ } else
+ fp = stdout;
+ tree = do_sprintf(tree->lnode);
+ (void) fwrite(tree->stptr, sizeof(char), tree->stlen, fp);
+ free_temp(tree);
+ if ((fp == stdout && output_is_tty) || (rp && (rp->flag & RED_NOBUF))) {
fflush(fp);
+ if (ferror(fp)) {
+ warning("error writing output: %s", strerror(errno));
+ clearerr(fp);
+ }
+ }
}
NODE *
@@ -538,16 +610,15 @@ do_sqrt(tree)
NODE *tree;
{
NODE *tmp;
- double sqrt();
- double d, arg;
+ double arg;
+ extern double sqrt();
- get_one(tree, &tmp);
+ tmp = tree_eval(tree->lnode);
arg = (double) force_number(tmp);
+ free_temp(tmp);
if (arg < 0.0)
warning("sqrt called with negative argument %g", arg);
- d = sqrt(arg);
- free_temp(tmp);
- return tmp_number((AWKNUM) d);
+ return tmp_number((AWKNUM) sqrt(arg));
}
NODE *
@@ -558,86 +629,146 @@ NODE *tree;
NODE *r;
register int indx, length;
- t1 = t2 = t3 = NULL;
- length = -1;
- if (get_three(tree, &t1, &t2, &t3) == 3)
+ t1 = tree_eval(tree->lnode);
+ t2 = tree_eval(tree->rnode->lnode);
+ if (tree->rnode->rnode == NULL) /* third arg. missing */
+ length = t1->stlen;
+ else {
+ t3 = tree_eval(tree->rnode->rnode->lnode);
length = (int) force_number(t3);
+ free_temp(t3);
+ }
indx = (int) force_number(t2) - 1;
+ free_temp(t2);
t1 = force_string(t1);
- if (length == -1)
- length = t1->stlen;
if (indx < 0)
indx = 0;
if (indx >= t1->stlen || length <= 0) {
- if (t3)
- free_temp(t3);
- free_temp(t2);
free_temp(t1);
return Nnull_string;
}
if (indx + length > t1->stlen)
length = t1->stlen - indx;
- if (t3)
- free_temp(t3);
- free_temp(t2);
r = tmp_string(t1->stptr + indx, length);
free_temp(t1);
return r;
}
NODE *
+do_strftime(tree)
+NODE *tree;
+{
+ NODE *t1, *t2;
+ struct tm *tm;
+ long clock;
+ char buf[100];
+ int ret;
+
+ t1 = force_string(tree_eval(tree->lnode));
+
+ if (tree->rnode == NULL) /* second arg. missing, default */
+ (void) time(&clock);
+ else {
+ t2 = tree_eval(tree->rnode->lnode);
+ clock = (long) force_number(t2);
+ free_temp(t2);
+ }
+ tm = localtime(&clock);
+
+ ret = strftime(buf, 100, t1->stptr, tm);
+
+ return tmp_string(buf, ret);
+}
+
+NODE *
+do_systime(tree)
+NODE *tree;
+{
+ long clock;
+
+ (void) time(&clock);
+ return tmp_number((AWKNUM) clock);
+}
+
+NODE *
do_system(tree)
NODE *tree;
{
-#if defined(unix) || defined(MSDOS) /* || defined(gnu) */
NODE *tmp;
int ret;
(void) flush_io (); /* so output is synchronous with gawk's */
- get_one(tree, &tmp);
+ tmp = tree_eval(tree->lnode);
ret = system(force_string(tmp)->stptr);
ret = (ret >> 8) & 0xff;
free_temp(tmp);
return tmp_number((AWKNUM) ret);
-#else
- fatal("the \"system\" function is not supported.");
- /* NOTREACHED */
-#endif
}
void
do_print(tree)
register NODE *tree;
{
+ register NODE *t1;
struct redirect *rp = NULL;
- register FILE *fp = stdout;
- int errflg = 0; /* not used, sigh */
+ register FILE *fp;
+ register char *s;
if (tree->rnode) {
+ int errflg; /* not used, sigh */
+
rp = redirect(tree->rnode, &errflg);
- if (rp)
+ if (rp) {
fp = rp->fp;
- }
- if (!fp)
- return;
+ if (!fp)
+ return;
+ } else
+ return;
+ } else
+ fp = stdout;
tree = tree->lnode;
- if (!tree)
- tree = WHOLELINE;
- if (tree->type != Node_expression_list) {
- if (!(tree->flags & STR))
- cant_happen();
- print_simple(tree, fp);
- } else {
- while (tree) {
- print_simple(force_string(tree_eval(tree->lnode)), fp);
- tree = tree->rnode;
- if (tree)
- print_simple(OFS_node->var_value, fp);
+ while (tree) {
+ t1 = tree_eval(tree->lnode);
+ if (t1->flags & NUMBER) {
+ if (OFMTidx == CONVFMTidx)
+ (void) force_string(t1);
+ else {
+ char buf[100];
+
+ sprintf(buf, OFMT, t1->numbr);
+ t1 = tmp_string(buf, strlen(buf));
+ }
+ }
+ (void) fwrite(t1->stptr, sizeof(char), t1->stlen, fp);
+ free_temp(t1);
+ tree = tree->rnode;
+ if (tree) {
+ s = OFS;
+#if (!defined(VMS)) || defined(NO_TTY_FWRITE)
+ while (*s)
+ putc(*s++, fp);
+#else
+ if (OFSlen)
+ fwrite(s, sizeof(char), OFSlen, fp);
+#endif /* VMS && !NO_TTY_FWRITE */
}
}
- print_simple(ORS_node->var_value, fp);
- if (rp && (rp->flag & RED_NOBUF))
+ s = ORS;
+#if (!defined(VMS)) || defined(NO_TTY_FWRITE)
+ while (*s)
+ putc(*s++, fp);
+ if ((fp == stdout && output_is_tty) || (rp && (rp->flag & RED_NOBUF))) {
+#else
+ if (ORSlen)
+ fwrite(s, sizeof(char), ORSlen, fp);
+ if ((rp && (rp->flag & RED_NOBUF))) {
+#endif /* VMS && !NO_TTY_FWRITE */
fflush(fp);
+ if (ferror(fp)) {
+ warning("error writing output: %s", strerror(errno));
+ clearerr(fp);
+ }
+ }
}
NODE *
@@ -647,7 +778,7 @@ NODE *tree;
NODE *t1, *t2;
register char *cp, *cp2;
- get_one(tree, &t1);
+ t1 = tree_eval(tree->lnode);
t1 = force_string(t1);
t2 = tmp_string(t1->stptr, t1->stlen);
for (cp = t2->stptr, cp2 = t2->stptr + t2->stlen; cp < cp2; cp++)
@@ -664,7 +795,7 @@ NODE *tree;
NODE *t1, *t2;
register char *cp;
- get_one(tree, &t1);
+ t1 = tree_eval(tree->lnode);
t1 = force_string(t1);
t2 = tmp_string(t1->stptr, t1->stlen);
for (cp = t2->stptr; cp < t2->stptr + t2->stlen; cp++)
@@ -674,88 +805,6 @@ NODE *tree;
return t2;
}
-/*
- * Get the arguments to functions. No function cares if you give it too many
- * args (they're ignored). Only a few fuctions complain about being given
- * too few args. The rest have defaults.
- */
-
-static void
-get_one(tree, res)
-NODE *tree, **res;
-{
- if (!tree) {
- *res = WHOLELINE;
- return;
- }
- *res = tree_eval(tree->lnode);
-}
-
-static void
-get_two(tree, res1, res2)
-NODE *tree, **res1, **res2;
-{
- if (!tree) {
- *res1 = WHOLELINE;
- return;
- }
- *res1 = tree_eval(tree->lnode);
- if (!tree->rnode)
- return;
- tree = tree->rnode;
- *res2 = tree_eval(tree->lnode);
-}
-
-static int
-get_three(tree, res1, res2, res3)
-NODE *tree, **res1, **res2, **res3;
-{
- if (!tree) {
- *res1 = WHOLELINE;
- return 0;
- }
- *res1 = tree_eval(tree->lnode);
- if (!tree->rnode)
- return 1;
- tree = tree->rnode;
- *res2 = tree_eval(tree->lnode);
- if (!tree->rnode)
- return 2;
- tree = tree->rnode;
- *res3 = tree_eval(tree->lnode);
- return 3;
-}
-
-int
-a_get_three(tree, res1, res2, res3)
-NODE *tree, **res1, **res2, **res3;
-{
- if (!tree) {
- *res1 = WHOLELINE;
- return 0;
- }
- *res1 = tree_eval(tree->lnode);
- if (!tree->rnode)
- return 1;
- tree = tree->rnode;
- *res2 = tree->lnode;
- if (!tree->rnode)
- return 2;
- tree = tree->rnode;
- *res3 = tree_eval(tree->lnode);
- return 3;
-}
-
-void
-print_simple(tree, fp)
-NODE *tree;
-FILE *fp;
-{
- if (fwrite(tree->stptr, sizeof(char), tree->stlen, fp) != tree->stlen)
- warning("fwrite: %s", strerror(errno));
- free_temp(tree);
-}
-
NODE *
do_atan2(tree)
NODE *tree;
@@ -764,7 +813,8 @@ NODE *tree;
extern double atan2();
double d1, d2;
- get_two(tree, &t1, &t2);
+ t1 = tree_eval(tree->lnode);
+ t2 = tree_eval(tree->rnode->lnode);
d1 = force_number(t1);
d2 = force_number(t2);
free_temp(t1);
@@ -780,7 +830,7 @@ NODE *tree;
extern double sin();
double d;
- get_one(tree, &tmp);
+ tmp = tree_eval(tree->lnode);
d = sin((double)force_number(tmp));
free_temp(tmp);
return tmp_number((AWKNUM) d);
@@ -794,7 +844,7 @@ NODE *tree;
extern double cos();
double d;
- get_one(tree, &tmp);
+ tmp = tree_eval(tree->lnode);
d = cos((double)force_number(tmp));
free_temp(tmp);
return tmp_number((AWKNUM) d);
@@ -823,9 +873,8 @@ do_srand(tree)
NODE *tree;
{
NODE *tmp;
- static long save_seed = 1;
+ static long save_seed = 0;
long ret = save_seed; /* SVR4 awk srand returns previous seed */
- extern long time();
if (firstrand)
(void) initstate((unsigned) 1, state, sizeof state);
@@ -833,9 +882,9 @@ NODE *tree;
(void) setstate(state);
if (!tree)
- srandom((int) (save_seed = time((long *) 0)));
+ srandom((int) (save_seed = (long) time((long *) 0)));
else {
- get_one(tree, &tmp);
+ tmp = tree_eval(tree->lnode);
srandom((int) (save_seed = (long) force_number(tmp)));
free_temp(tmp);
}
@@ -849,54 +898,25 @@ NODE *tree;
{
NODE *t1;
int rstart;
- struct re_registers reregs;
- struct re_pattern_buffer *rp;
- int need_to_free = 0;
+ AWKNUM rlength;
+ Regexp *rp;
t1 = force_string(tree_eval(tree->lnode));
- tree = tree->rnode;
- if (tree == NULL || tree->lnode == NULL)
- fatal("match called with only one argument");
- tree = tree->lnode;
- if (tree->type == Node_regex) {
- rp = tree->rereg;
- if (!strict && ((IGNORECASE_node->var_value->numbr != 0)
- ^ (tree->re_case != 0))) {
- /* recompile since case sensitivity differs */
- rp = tree->rereg =
- mk_re_parse(tree->re_text,
- (IGNORECASE_node->var_value->numbr != 0));
- tree->re_case =
- (IGNORECASE_node->var_value->numbr != 0);
- }
- } else {
- need_to_free = 1;
- rp = make_regexp(force_string(tree_eval(tree)),
- (IGNORECASE_node->var_value->numbr != 0));
- if (rp == NULL)
- cant_happen();
- }
- rstart = re_search(rp, t1->stptr, t1->stlen, 0, t1->stlen, &reregs);
- free_temp(t1);
- if (rstart >= 0) {
+ tree = tree->rnode->lnode;
+ rp = re_update(tree);
+ rstart = research(rp, t1->stptr, t1->stlen, 1);
+ if (rstart >= 0) { /* match succeded */
rstart++; /* 1-based indexing */
- /* RSTART set to rstart below */
- RLENGTH_node->var_value->numbr =
- (AWKNUM) (reregs.end[0] - reregs.start[0]);
- } else {
- /*
- * Match failed. Set RSTART to 0, RLENGTH to -1.
- * Return the value of RSTART.
- */
- rstart = 0; /* used as return value */
- RLENGTH_node->var_value->numbr = -1.0;
- }
- RSTART_node->var_value->numbr = (AWKNUM) rstart;
- if (need_to_free) {
- free(rp->buffer);
- free(rp->fastmap);
- free((char *) rp);
+ rlength = REEND(rp, t1->stptr) - RESTART(rp, t1->stptr);
+ } else { /* match failed */
+ rstart = 0;
+ rlength = -1.0;
}
+ free_temp(t1);
+ unref(RSTART_node->var_value);
+ RSTART_node->var_value = make_number((AWKNUM) rstart);
+ unref(RLENGTH_node->var_value);
+ RLENGTH_node->var_value = make_number(rlength);
return tmp_number((AWKNUM) rstart);
}
@@ -905,137 +925,150 @@ sub_common(tree, global)
NODE *tree;
int global;
{
- register int len;
register char *scan;
register char *bp, *cp;
- int search_start = 0;
- int match_length;
- int matches = 0;
char *buf;
- struct re_pattern_buffer *rp;
+ int buflen;
+ register char *matchend;
+ register int len;
+ char *matchstart;
+ char *text;
+ int textlen;
+ char *repl;
+ char *replend;
+ int repllen;
+ int sofar;
+ int ampersands;
+ int inplace = 0;
+ int matches = 0;
+ Regexp *rp;
NODE *s; /* subst. pattern */
NODE *t; /* string to make sub. in; $0 if none given */
- struct re_registers reregs;
- unsigned int saveflags;
NODE *tmp;
- NODE **lhs;
- char *lastbuf;
- int need_to_free = 0;
+ NODE **lhs = &tree; /* value not used -- just different from NULL */
+ int priv = 0;
+ Func_ptr after_assign = NULL;
- if (tree == NULL)
- fatal("sub or gsub called with 0 arguments");
tmp = tree->lnode;
- if (tmp->type == Node_regex) {
- rp = tmp->rereg;
- if (! strict && ((IGNORECASE_node->var_value->numbr != 0)
- ^ (tmp->re_case != 0))) {
- /* recompile since case sensitivity differs */
- rp = tmp->rereg =
- mk_re_parse(tmp->re_text,
- (IGNORECASE_node->var_value->numbr != 0));
- tmp->re_case = (IGNORECASE_node->var_value->numbr != 0);
- }
- } else {
- need_to_free = 1;
- rp = make_regexp(force_string(tree_eval(tmp)),
- (IGNORECASE_node->var_value->numbr != 0));
- if (rp == NULL)
- cant_happen();
- }
+ rp = re_update(tmp);
+
tree = tree->rnode;
- if (tree == NULL)
- fatal("sub or gsub called with only 1 argument");
- s = force_string(tree_eval(tree->lnode));
+ s = tree->lnode;
+
tree = tree->rnode;
- deref = 0;
- field_num = -1;
- if (tree == NULL) {
- t = node0_valid ? fields_arr[0] : *get_field(0, 0);
- lhs = &fields_arr[0];
- field_num = 0;
- deref = t;
- } else {
- t = tree->lnode;
- lhs = get_lhs(t, 1);
- t = force_string(tree_eval(t));
- }
+ tmp = tree->lnode;
+ if (tmp->type == Node_val)
+ lhs = NULL;
+ t = force_string(tree_eval(tmp));
+
+ /* do the search early to avoid work on non-match */
+ if (research(rp, t->stptr, t->stlen, 1) == -1)
+ return tmp_number((AWKNUM) 0);
+
+ if (lhs != NULL)
+ lhs = get_lhs(tmp, &after_assign);
+ t->flags |= STRING;
/*
* create a private copy of the string
*/
if (t->stref > 1 || (t->flags & PERM)) {
+ unsigned int saveflags;
+
saveflags = t->flags;
t->flags &= ~MALLOC;
tmp = dupnode(t);
t->flags = saveflags;
- do_deref();
t = tmp;
- if (lhs)
- *lhs = tmp;
+ priv = 1;
+ }
+ text = t->stptr;
+ textlen = t->stlen;
+
+ s = force_string(tree_eval(s));
+ repl = s->stptr;
+ replend = repl + s->stlen;
+ repllen = replend - repl;
+ if (repllen == 0) { /* replacement is null string */
+ buflen = textlen;
+ buf = text; /* so do subs. in place */
+ inplace = 1;
+ } else {
+ buflen = textlen * 2; /* initial guess -- adjusted later */
+ emalloc(buf, char *, buflen, "do_sub");
+ }
+ ampersands = 0;
+ for (scan = repl; scan < replend; scan++) {
+ if (*scan == '&') {
+ repllen--;
+ ampersands++;
+ } else if (*scan == '\\' && *(scan+1) == '&')
+ repllen--;
}
- lastbuf = t->stptr;
- do {
- if (re_search(rp, t->stptr, t->stlen, search_start,
- t->stlen-search_start, &reregs) == -1
- || reregs.start[0] == reregs.end[0])
- break;
- matches++;
- /*
- * first, make a pass through the sub. pattern, to calculate
- * the length of the string after substitution
- */
- match_length = reregs.end[0] - reregs.start[0];
- len = t->stlen - match_length;
- for (scan = s->stptr; scan < s->stptr + s->stlen; scan++)
- if (*scan == '&')
- len += match_length;
- else if (*scan == '\\' && *(scan+1) == '&') {
- scan++;
- len++;
- } else
- len++;
- emalloc(buf, char *, len + 1, "do_sub");
- bp = buf;
+ bp = buf;
+ for (;;) {
+ matches++;
+ matchstart = text + RESTART(rp, t->stptr);
+ matchend = text + REEND(rp, t->stptr);
/*
- * now, create the result, copying in parts of the original
+ * create the result, copying in parts of the original
* string
*/
- for (scan = t->stptr; scan < t->stptr + reregs.start[0]; scan++)
+ len = matchstart - text + repllen
+ + ampersands * (matchend - matchstart);
+ sofar = bp - buf;
+ while (buflen - sofar - len - 1 < 0) {
+ buflen *= 2;
+ erealloc(buf, char *, buflen, "do_sub");
+ bp = buf + sofar;
+ }
+ for (scan = text; scan < matchstart; scan++)
*bp++ = *scan;
- for (scan = s->stptr; scan < s->stptr + s->stlen; scan++)
+ for (scan = repl; scan < replend; scan++)
if (*scan == '&')
- for (cp = t->stptr + reregs.start[0];
- cp < t->stptr + reregs.end[0]; cp++)
+ for (cp = matchstart; cp < matchend; cp++)
*bp++ = *cp;
else if (*scan == '\\' && *(scan+1) == '&') {
scan++;
*bp++ = *scan;
} else
*bp++ = *scan;
- search_start = bp - buf;
- for (scan = t->stptr + reregs.end[0];
- scan < t->stptr + t->stlen; scan++)
- *bp++ = *scan;
- *bp = '\0';
- free(lastbuf);
- t->stptr = buf;
- lastbuf = buf;
- t->stlen = len;
- } while (global && search_start < t->stlen);
+ if (global && matchstart == matchend) {
+ *bp++ = *text;
+ matchend++;
+ }
+ textlen = text + textlen - matchend;
+ text = matchend;
+ if (!global || research(rp, text, textlen, 1) == -1)
+ break;
+ }
+ sofar = bp - buf;
+ if (!inplace && buflen - sofar - textlen - 1) {
+ buflen = sofar + textlen + 2;
+ erealloc(buf, char *, buflen, "do_sub");
+ bp = buf + sofar;
+ }
+ for (scan = matchend; scan < text + textlen; scan++)
+ *bp++ = *scan;
+ textlen = bp - buf;
+ if (inplace)
+ erealloc(buf, char *, textlen + 2, "do_sub");
+ else
+ free(t->stptr);
+ t->stptr = buf;
+ t->stlen = textlen;
free_temp(s);
- if (need_to_free) {
- free(rp->buffer);
- free(rp->fastmap);
- free((char *) rp);
- }
- if (matches > 0) {
- if (field_num == 0)
- set_record(fields_arr[0]->stptr, fields_arr[0]->stlen);
+ if (matches > 0 && lhs) {
+ if (priv) {
+ unref(*lhs);
+ *lhs = t;
+ }
+ if (after_assign)
+ (*after_assign)();
t->flags &= ~(NUM|NUMERIC);
}
- field_num = -1;
return tmp_number((AWKNUM) matches);
}
@@ -1053,3 +1086,42 @@ NODE *tree;
return sub_common(tree, 0);
}
+#ifdef GFMT_WORKAROUND
+ /*
+ * printf's %g format [can't rely on gcvt()]
+ * caveat: don't use as argument to *printf()!
+ */
+char *
+gfmt(g, prec, buf)
+double g; /* value to format */
+int prec; /* indicates desired significant digits, not decimal places */
+char *buf; /* return buffer; assumed big enough to hold result */
+{
+ if (g == 0.0) {
+ (void) strcpy(buf, "0"); /* easy special case */
+ } else {
+ register char *d, *e, *p;
+
+ /* start with 'e' format (it'll provide nice exponent) */
+ if (prec < 1) prec = 1; /* at least 1 significant digit */
+ (void) sprintf(buf, "%.*e", prec - 1, g);
+ if ((e = strchr(buf, 'e')) != 0) { /* find exponent */
+ int exp = atoi(e+1); /* fetch exponent */
+ if (exp >= -4 && exp < prec) { /* per K&R2, B1.2 */
+ /* switch to 'f' format and re-do */
+ prec -= (exp + 1); /* decimal precision */
+ (void) sprintf(buf, "%.*f", prec, g);
+ e = buf + strlen(buf);
+ }
+ if ((d = strchr(buf, '.')) != 0) {
+ /* remove trailing zeroes and decimal point */
+ for (p = e; p > d && *--p == '0'; ) continue;
+ if (*p == '.') --p;
+ if (++p < e) /* copy exponent and NUL */
+ while ((*p++ = *e++) != '\0') continue;
+ }
+ }
+ }
+ return buf;
+}
+#endif /* GFMT_WORKAROUND */
diff --git a/config.h b/config.h
new file mode 100644
index 00000000..72406f83
--- /dev/null
+++ b/config.h
@@ -0,0 +1,287 @@
+/*
+ * config.h -- configuration definitions for gawk.
+ *
+ * Sun running SunOS 4.1
+ */
+
+/*
+ * Copyright (C) 1991, the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 1, or (at your option)
+ * any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GAWK; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * This file isolates configuration dependencies for gnu awk.
+ * You should know something about your system, perhaps by having
+ * a manual handy, when you edit this file. You should copy config.h-dist
+ * to config.h, and edit config.h. Do not modify config.h-dist, so that
+ * it will be easy to apply any patches that may be distributed.
+ *
+ * The general idea is that systems conforming to the various standards
+ * should need to do the least amount of changing. Definining the various
+ * items in ths file usually means that your system is missing that
+ * particular feature.
+ *
+ * The order of preference in standard conformance is ANSI C, POSIX,
+ * and the SVID.
+ *
+ * If you have no clue as to what's going on with your system, try
+ * compiling gawk without editing this file and see what shows up
+ * missing in the link stage. From there, you can probably figure out
+ * which defines to turn on.
+ */
+
+/**************************/
+/* Miscellanious features */
+/**************************/
+
+/*
+ * BLKSIZE_MISSING
+ *
+ * Check your /usr/include/sys/stat.h file. If the stat structure
+ * does not have a member named st_blksize, define this. (This will
+ * most likely be the case on most System V systems prior to V.4.)
+ */
+/* #define BLKSIZE_MISSING 1 */
+
+/*
+ * SIGTYPE
+ *
+ * The return type of the routines passed to the signal function.
+ * Modern systems use `void', older systems use `int'.
+ * If left undefined, it will default to void.
+ */
+/* #define SIGTYPE int */
+
+/*
+ * SIZE_T_MISSING
+ *
+ * If your system has no typedef for size_t, define this to get a default
+ */
+/* #define SIZE_T_MISSING 1 */
+
+/*
+ * CHAR_UNSIGNED
+ *
+ * If your machine uses unsigned characters (IBM RT and RS/6000 and others)
+ * then define this for use in regex.c
+ */
+/* #define CHAR_UNSIGNED 1 */
+
+/*
+ * HAVE_UNDERSCORE_SETJMP
+ *
+ * Check in your /usr/include/setjmp.h file. If there are routines
+ * there named _setjmp and _longjmp, then you should define this.
+ * Typically only systems derived from Berkeley Unix have this.
+ */
+#define HAVE_UNDERSCORE_SETJMP 1
+
+/***********************************************/
+/* Missing library subroutines or system calls */
+/***********************************************/
+
+/*
+ * GETOPT_MISSING
+ *
+ * Define this if your library does not have the getopt(3) library
+ * routine for parsing command line arguments.
+ */
+/* #define GETOPT_MISSING 1 */
+
+/*
+ * MEMCMP_MISSING
+ * MEMCPY_MISSING
+ * MEMSET_MISSING
+ *
+ * These three routines are for manipulating blocks of memory. Most
+ * likely they will either all three be present or all three be missing,
+ * so they're grouped together.
+ */
+/* #define MEMCMP_MISSING 1 */
+/* #define MEMCPY_MISSING 1 */
+/* #define MEMSET_MISSING 1 */
+
+/*
+ * RANDOM_MISSING
+ *
+ * Your system does not have the random(3) suite of random number
+ * generating routines. These are different than the old rand(3)
+ * routines!
+ */
+/* #define RANDOM_MISSING 1 */
+
+/*
+ * STRCASE_MISSING
+ *
+ * Your system does not have the strcasemp() and strncasecmp()
+ * routines that originated in Berkeley Unix.
+ */
+/* #define STRCASE_MISSING 1 */
+
+/*
+ * STRCHR_MISSING
+ *
+ * Your system does not have the strchr() and strrchr() functions.
+ */
+/* #define STRCHR_MISSING 1 */
+
+/*
+ * STRERROR_MISSING
+ *
+ * Your system lacks the ANSI C strerror() routine for returning the
+ * strings associated with errno values.
+ */
+#define STRERROR_MISSING 1
+
+/*
+ * STRTOD_MISSING
+ *
+ * Your system does not have the strtod() routine for converting
+ * strings to double precision floating point values.
+ */
+/* #define STRTOD_MISSING 1 */
+
+/*
+ * STRTOL_MISSING
+ *
+ * Your system does not have the strtol() routine for converting
+ * strings to long integers.
+ */
+/* #define STRTOL_MISSING 1 */
+
+/*
+ * STRFTIME_MISSING
+ *
+ * Your system lacks the ANSI C strftime() routine for formatting
+ * broken down time values.
+ */
+/* #define STRFTIME_MISSING 1 */
+
+/*
+ * TZSET_MISSING
+ *
+ * If you have a 4.2 BSD vintage system, then the strftime() routine
+ * supplied in the missing directory won't be enough, because it relies on the
+ * tzset() routine from System V / Posix. Fortunately, there is an
+ * emulation for tzset() too that should do the trick. If you don't
+ * have tzset(), define this.
+ */
+/* #define TZSET_MISSING 1 */
+
+/*
+ * TZNAME_MISSING
+ *
+ * Some systems do not support the external variables tzname and daylight.
+ * If this is the case *and* strftime() is missing, define this.
+ */
+/* #define TZNAME_MISSING 1 */
+
+/*
+ * STDC_HEADERS
+ *
+ * If your system does have ANSI compliant header files that
+ * provide prototypes for library routines, then define this.
+ */
+/* #define STDC_HEADERS 1 */
+
+/*
+ * NO_TOKEN_PASTING
+ *
+ * If your compiler define's __STDC__ but does not support token
+ * pasting (tok##tok), then define this.
+ */
+/* #define NO_TOKEN_PASTING 1 */
+
+/*****************************************************************/
+/* Stuff related to the Standard I/O Library. */
+/*****************************************************************/
+/* Much of this is (still, unfortunately) black magic in nature. */
+/* You may have to use some or all of these together to get gawk */
+/* to work correctly. */
+/*****************************************************************/
+
+/*
+ * NON_STD_SPRINTF
+ *
+ * Look in your /usr/include/stdio.h file. If the return type of the
+ * sprintf() function is NOT `int', define this.
+ */
+#define NON_STD_SPRINTF 1
+
+/*
+ * VPRINTF_MISSING
+ *
+ * Define this if your system lacks vprintf() and the other routines
+ * that go with it.
+ */
+/* #define VPRINTF_MISSING 1 */
+
+/*
+ * BSDSTDIO
+ *
+ * Define this if your standard i/o library is internally compatible
+ * with the one shipped with Berkeley Unix systems (4.n, n <= 3-reno).
+ * If you've defined VPRINTF_MISSING, you probably will need this too.
+ */
+/* #define BSDSTDIO 1 */
+
+/*
+ * DOPRNT_MISSING
+ *
+ * Define this if your standard i/o library does not have the _doprnt()
+ * routine. This is used in an attempt to simulate the vfprintf()
+ * routine.
+ */
+/* #define DOPRNT_MISSING 1 */
+
+/*
+ * Casts from size_t to int and back. These will become unnecessary
+ * at some point in the future, but for now are required where the
+ * two types are a different representation.
+ */
+/* #define SZTC */
+/* #define INTC */
+
+/*
+ * SYSTEM_MISSING
+ *
+ * Define this if your library does not provide a system function
+ * or you are not entirely happy with it and would rather use
+ * a provided replacement (atari only).
+ */
+/* #define SYSTEM_MISSING 1 */
+
+
+/*******************************/
+/* Gawk configuration options. */
+/*******************************/
+
+/*
+ * DEFPATH
+ *
+ * The default search path for the -f option of gawk. It is used
+ * if the AWKPATH environment variable is undefined. The default
+ * definition is provided here. Most likely you should not change
+ * this.
+ */
+
+/* #define DEFPATH ".:/usr/lib/awk:/usr/local/lib/awk" */
+/* #define ENVSEP ':' */
+
+/* anything that follows is for system-specific short-term kludges */
diff --git a/config.h-dist b/config.h-dist
new file mode 100644
index 00000000..d2b63c06
--- /dev/null
+++ b/config.h-dist
@@ -0,0 +1,285 @@
+/*
+ * config.h -- configuration definitions for gawk.
+ *
+ * __SYSTEM__
+ */
+
+/*
+ * Copyright (C) 1991, the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 1, or (at your option)
+ * any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GAWK; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * This file isolates configuration dependencies for gnu awk.
+ * You should know something about your system, perhaps by having
+ * a manual handy, when you edit this file. You should copy config.h-dist
+ * to config.h, and edit config.h. Do not modify config.h-dist, so that
+ * it will be easy to apply any patches that may be distributed.
+ *
+ * The general idea is that systems conforming to the various standards
+ * should need to do the least amount of changing. Definining the various
+ * items in ths file usually means that your system is missing that
+ * particular feature.
+ *
+ * The order of preference in standard conformance is ANSI C, POSIX,
+ * and the SVID.
+ *
+ * If you have no clue as to what's going on with your system, try
+ * compiling gawk without editing this file and see what shows up
+ * missing in the link stage. From there, you can probably figure out
+ * which defines to turn on.
+ */
+
+/**************************/
+/* Miscellanious features */
+/**************************/
+
+/*
+ * BLKSIZE_MISSING
+ *
+ * Check your /usr/include/sys/stat.h file. If the stat structure
+ * does not have a member named st_blksize, define this. (This will
+ * most likely be the case on most System V systems prior to V.4.)
+ */
+/* #define BLKSIZE_MISSING 1 */
+
+/*
+ * SIGTYPE
+ *
+ * The return type of the routines passed to the signal function.
+ * Modern systems use `void', older systems use `int'.
+ * If left undefined, it will default to void.
+ */
+/* #define SIGTYPE int */
+
+/*
+ * SIZE_T_MISSING
+ *
+ * If your system has no typedef for size_t, define this to get a default
+ */
+/* #define SIZE_T_MISSING 1 */
+
+/*
+ * CHAR_UNSIGNED
+ *
+ * If your machine uses unsigned characters (IBM RT and RS/6000 and others)
+ * then define this for use in regex.c
+ */
+/* #define CHAR_UNSIGNED 1 */
+
+/*
+ * HAVE_UNDERSCORE_SETJMP
+ *
+ * Check in your /usr/include/setjmp.h file. If there are routines
+ * there named _setjmp and _longjmp, then you should define this.
+ * Typically only systems derived from Berkeley Unix have this.
+ */
+/* #define HAVE_UNDERSCORE_SETJMP 1 */
+
+/***********************************************/
+/* Missing library subroutines or system calls */
+/***********************************************/
+
+/*
+ * GETOPT_MISSING
+ *
+ * Define this if your library does not have the getopt(3) library
+ * routine for parsing command line arguments.
+ */
+/* #define GETOPT_MISSING 1 */
+
+/*
+ * MEMCMP_MISSING
+ * MEMCPY_MISSING
+ * MEMSET_MISSING
+ *
+ * These three routines are for manipulating blocks of memory. Most
+ * likely they will either all three be present or all three be missing,
+ * so they're grouped together.
+ */
+/* #define MEMCMP_MISSING 1 */
+/* #define MEMCPY_MISSING 1 */
+/* #define MEMSET_MISSING 1 */
+
+/*
+ * RANDOM_MISSING
+ *
+ * Your system does not have the random(3) suite of random number
+ * generating routines. These are different than the old rand(3)
+ * routines!
+ */
+/* #define RANDOM_MISSING 1 */
+
+/*
+ * STRCASE_MISSING
+ *
+ * Your system does not have the strcasemp() and strncasecmp()
+ * routines that originated in Berkeley Unix.
+ */
+/* #define STRCASE_MISSING 1 */
+
+/*
+ * STRCHR_MISSING
+ *
+ * Your system does not have the strchr() and strrchr() functions.
+ */
+/* #define STRCHR_MISSING 1 */
+
+/*
+ * STRERROR_MISSING
+ *
+ * Your system lacks the ANSI C strerror() routine for returning the
+ * strings associated with errno values.
+ */
+/* #define STRERROR_MISSING 1 */
+
+/*
+ * STRTOD_MISSING
+ *
+ * Your system does not have the strtod() routine for converting
+ * strings to double precision floating point values.
+ */
+/* #define STRTOD_MISSING 1 */
+
+/*
+ * STRTOL_MISSING
+ *
+ * Your system does not have the strtol() routine for converting
+ * strings to long integers.
+ */
+/* #define STRTOL_MISSING 1 */
+
+/*
+ * STRFTIME_MISSING
+ *
+ * Your system lacks the ANSI C strftime() routine for formatting
+ * broken down time values.
+ */
+/* #define STRFTIME_MISSING 1 */
+
+/*
+ * TZSET_MISSING
+ *
+ * If you have a 4.2 BSD vintage system, then the strftime() routine
+ * supplied in the missing directory won't be enough, because it relies on the
+ * tzset() routine from System V / Posix. Fortunately, there is an
+ * emulation for tzset() too that should do the trick. If you don't
+ * have tzset(), define this.
+ */
+/* #define TZSET_MISSING 1 */
+
+/*
+ * TZNAME_MISSING
+ *
+ * Some systems do not support the external variables tzname and daylight.
+ * If this is the case *and* strftime() is missing, define this.
+ */
+/* #define TZNAME_MISSING 1 */
+
+/*
+ * STDC_HEADERS
+ *
+ * If your system does have ANSI compliant header files that
+ * provide prototypes for library routines, then define this.
+ */
+/* #define STDC_HEADERS 1 */
+
+/*
+ * NO_TOKEN_PASTING
+ *
+ * If your compiler define's __STDC__ but does not support token
+ * pasting (tok##tok), then define this.
+ */
+/* #define NO_TOKEN_PASTING 1 */
+
+/*****************************************************************/
+/* Stuff related to the Standard I/O Library. */
+/*****************************************************************/
+/* Much of this is (still, unfortunately) black magic in nature. */
+/* You may have to use some or all of these together to get gawk */
+/* to work correctly. */
+/*****************************************************************/
+
+/*
+ * NON_STD_SPRINTF
+ *
+ * Look in your /usr/include/stdio.h file. If the return type of the
+ * sprintf() function is NOT `int', define this.
+ */
+/* #define NON_STD_SPRINTF 1 */
+
+/*
+ * VPRINTF_MISSING
+ *
+ * Define this if your system lacks vprintf() and the other routines
+ * that go with it.
+ */
+/* #define VPRINTF_MISSING 1 */
+
+/*
+ * BSDSTDIO
+ *
+ * Define this if your standard i/o library is internally compatible
+ * with the one shipped with Berkeley Unix systems (4.n, n <= 3-reno).
+ * If you've defined VPRINTF_MISSING, you probably will need this too.
+ */
+/* #define BSDSTDIO 1 */
+
+/*
+ * DOPRNT_MISSING
+ *
+ * Define this if your standard i/o library does not have the _doprnt()
+ * routine. This is used in an attempt to simulate the vfprintf()
+ * routine.
+ */
+/* #define DOPRNT_MISSING 1 */
+
+/*
+ * Casts from size_t to int and back. These will become unnecessary
+ * at some point in the future, but for now are required where the
+ * two types are a different representation.
+ */
+/* #define SZTC */
+/* #define INTC */
+
+/*
+ * SYSTEM_MISSING
+ *
+ * Define this if your library does not provide a system function
+ * or you are not entirely happy with it and would rather use
+ * a provided replacement (atari only).
+ */
+/* #define SYSTEM_MISSING 1 */
+
+
+/*******************************/
+/* Gawk configuration options. */
+/*******************************/
+
+/*
+ * DEFPATH
+ *
+ * The default search path for the -f option of gawk. It is used
+ * if the AWKPATH environment variable is undefined. The default
+ * definition is provided here. Most likely you should not change
+ * this.
+ */
+
+/* #define DEFPATH ".:/usr/lib/awk:/usr/local/lib/awk" */
+/* #define ENVSEP ':' */
diff --git a/config/apollo b/config/apollo
new file mode 100644
index 00000000..c1660e02
--- /dev/null
+++ b/config/apollo
@@ -0,0 +1,6 @@
+HP/Apollo workstations, running Domain/OS 10.x, with cc 6.7
+STRCASE_MISSING 1
+STRERROR_MISSING 1
+STRFTIME_MISSING 1
+NO_TOKEN_PASTING 1
+MAKE_Apollo
diff --git a/config/atari b/config/atari
new file mode 100644
index 00000000..36bf23f6
--- /dev/null
+++ b/config/atari
@@ -0,0 +1,9 @@
+Atari ST under TOS with gcc compiler
+BLKSIZE_MISSING 1 /* Not really - but it may work better that way */
+STRCASE_MISSING 1
+STDC_HEADERS 1
+SYSTEM_MISSING 1
+DEFPATH ".,c:\\\\lib\\\\awk,c:\\\\gnu\\\\lib\\\\awk"
+ENVSEP ','
+SZTC (size_t)
+INTC (int)
diff --git a/config/bsd42 b/config/bsd42
new file mode 100644
index 00000000..720cd0f1
--- /dev/null
+++ b/config/bsd42
@@ -0,0 +1,16 @@
+For generic 4.2 BSD machine.
+SIGTYPE int
+HAVE_UNDERSCORE_SETJMP 1
+GETOPT_MISSING 1
+MEMCMP_MISSING 1
+MEMCPY_MISSING 1
+MEMSET_MISSING 1
+STRCASE_MISSING 1
+STRCHR_MISSING 1
+STRERROR_MISSING 1
+STRFTIME_MISSING 1
+STRTOD_MISSING 1
+STRTOL_MISSING 1
+NON_STD_SPRINTF 1
+VPRINTF_MISSING 1
+BSDSTDIO 1
diff --git a/config/bsd43 b/config/bsd43
new file mode 100644
index 00000000..c48601f7
--- /dev/null
+++ b/config/bsd43
@@ -0,0 +1,16 @@
+For generic 4.3 BSD machine.
+SIGTYPE int
+HAVE_UNDERSCORE_SETJMP 1
+MEMCMP_MISSING 1
+MEMCPY_MISSING 1
+MEMSET_MISSING 1
+STRCASE_MISSING 1
+STRCHR_MISSING 1
+STRERROR_MISSING 1
+STRFTIME_MISSING 1
+STRTOD_MISSING 1
+STRTOL_MISSING 1
+NON_STD_SPRINTF 1
+VPRINTF_MISSING 1
+BSDSTDIO 1
+TZNAME_MISSING 1
diff --git a/config/bsd43r b/config/bsd43r
new file mode 100644
index 00000000..e1ea95a2
--- /dev/null
+++ b/config/bsd43r
@@ -0,0 +1,3 @@
+For generic 4.3-Reno BSD machine.
+HAVE_UNDERSCORE_SETJMP 1
+STRTOD_MISSING 1
diff --git a/config/bsd43t b/config/bsd43t
new file mode 100644
index 00000000..d0bdcf6b
--- /dev/null
+++ b/config/bsd43t
@@ -0,0 +1,14 @@
+For generic 4.3-Tahoe BSD machine.
+SIGTYPE int
+HAVE_UNDERSCORE_SETJMP 1
+MEMCMP_MISSING 1
+MEMCPY_MISSING 1
+MEMSET_MISSING 1
+STRCHR_MISSING 1
+STRERROR_MISSING 1
+STRFTIME_MISSING 1
+STRTOD_MISSING 1
+STRTOL_MISSING 1
+NON_STD_SPRINTF 1
+VPRINTF_MISSING 1
+BSDSTDIO 1
diff --git a/config/cray b/config/cray
new file mode 100644
index 00000000..fab18998
--- /dev/null
+++ b/config/cray
@@ -0,0 +1,9 @@
+Cray 2 running Unicos 5.0.7
+BLKSIZE_MISSING 1
+SIGTYPE void
+HAVE_UNDERSCORE_SETJMP 1
+RANDOM_MISSING 1
+STRCASE_MISSING 1
+STRERROR_MISSING 1
+STRFTIME_MISSING 1
+STDC_HEADERS 1
diff --git a/config/cray2-50 b/config/cray2-50
new file mode 100644
index 00000000..744a97eb
--- /dev/null
+++ b/config/cray2-50
@@ -0,0 +1,7 @@
+Cray CRAY-2 system running Unicos 5.0 or 5.x?
+BLKSIZE_MISSING 1
+SIGTYPE void
+RANDOM_MISSING 1
+STDC_HEADERS 1
+CHAR_UNSIGNED 1
+STRCASE_MISSING 1
diff --git a/config/cray2-60 b/config/cray2-60
new file mode 100644
index 00000000..6330ba7f
--- /dev/null
+++ b/config/cray2-60
@@ -0,0 +1,6 @@
+Cray Research CRAY-2 system running Unicos 6.0 or 6.1
+BLKSIZE_MISSING 1
+SIGTYPE void
+RANDOM_MISSING 1
+STDC_HEADERS 1
+CHAR_UNSIGNED 1
diff --git a/config/cray60 b/config/cray60
new file mode 100644
index 00000000..043f5dd7
--- /dev/null
+++ b/config/cray60
@@ -0,0 +1,5 @@
+Cray Research system running Unicos 6.0
+SIGTYPE void
+RANDOM_MISSING 1
+STDC_HEADERS 1
+CHAR_UNSIGNED 1
diff --git a/config/interactive2.2 b/config/interactive2.2
new file mode 100644
index 00000000..24d5ec18
--- /dev/null
+++ b/config/interactive2.2
@@ -0,0 +1,9 @@
+Interactive Unix 2.2
+BLKSIZE_MISSING 1
+RANDOM_MISSING 1
+STRCASE_MISSING 1
+STRERROR_MISSING 1
+STRFTIME_MISSING 1
+STDC_HEADERS 1
+_POSIX_SOURCE 1
+POSIX 1
diff --git a/config/msc60 b/config/msc60
new file mode 100644
index 00000000..d9909659
--- /dev/null
+++ b/config/msc60
@@ -0,0 +1,9 @@
+MS-DOS systems using MSC 6.0
+BLKSIZE_MISSING 1
+SIZE_T_MISSING 1
+GCVT_MISSING 1
+GETOPT_MISSING 1
+RANDOM_MISSING 1
+STRCASE_MISSING 1
+#STRFTIME_MISSING 1
+STRTOL_MISSING 1
diff --git a/config/msdos b/config/msdos
new file mode 100644
index 00000000..cb2d4c67
--- /dev/null
+++ b/config/msdos
@@ -0,0 +1,9 @@
+MS-DOS systems using MSC 5.1
+BLKSIZE_MISSING 1
+SIZE_T_MISSING 1
+GCVT_MISSING 1
+GETOPT_MISSING 1
+RANDOM_MISSING 1
+STRCASE_MISSING 1
+STRFTIME_MISSING 1
+STRTOL_MISSING 1
diff --git a/config/news b/config/news
new file mode 100644
index 00000000..dbcc354e
--- /dev/null
+++ b/config/news
@@ -0,0 +1,6 @@
+Sony News
+HAVE_UNDERSCORE_SETJMP 1
+STRERROR_MISSING 1
+STRFTIME_MISSING 1
+STRTOD_MISSING 1
+NON_STD_SPRINTF 1
diff --git a/config/next20 b/config/next20
new file mode 100644
index 00000000..6151e496
--- /dev/null
+++ b/config/next20
@@ -0,0 +1,6 @@
+NeXT running 2.0
+STRTOD_MISSING 1 /* NeXT strtod() is buggy */
+STDC_HEADERS 1
+SZTC (size_t)
+INTC (int)
+MAKE_NeXT
diff --git a/config/rs6000 b/config/rs6000
new file mode 100644
index 00000000..ec389a3b
--- /dev/null
+++ b/config/rs6000
@@ -0,0 +1,6 @@
+For IBM RS/6000 systems.
+RANDOM_MISSING 1
+STDC_HEADERS 1
+CHAR_UNSIGNED 1
+MAKE_ALLOCA_C
+MAKE_RS6000
diff --git a/config/sequent b/config/sequent
new file mode 100644
index 00000000..48d2821a
--- /dev/null
+++ b/config/sequent
@@ -0,0 +1,16 @@
+For generic 4.3 BSD machine.
+SIGTYPE int
+HAVE_UNDERSCORE_SETJMP 1
+MEMCMP_MISSING 1
+MEMCPY_MISSING 1
+MEMSET_MISSING 1
+STRCASE_MISSING 1
+STRCHR_MISSING 1
+STRERROR_MISSING 1
+STRFTIME_MISSING 1
+STRTOD_MISSING 1
+STRTOL_MISSING 1
+NON_STD_SPRINTF 1
+VPRINTF_MISSING 1
+BSDSTDIO 1
+TZSET_MISSING 1
diff --git a/config/sgi b/config/sgi
new file mode 100644
index 00000000..7886bb5f
--- /dev/null
+++ b/config/sgi
@@ -0,0 +1,5 @@
+SGI Personal Iris (Sys V derived)
+BLKSIZE_MISSING 1
+RANDOM_MISSING 1
+STRERROR_MISSING 1
+STRFTIME_MISSING 1
diff --git a/config/sgi33 b/config/sgi33
new file mode 100644
index 00000000..633f8e53
--- /dev/null
+++ b/config/sgi33
@@ -0,0 +1,4 @@
+SGI Personal Iris (Sys V derived) (this works with gcc)
+BLKSIZE_MISSING 1
+STDC_HEADERS 1
+MAKE_ALLOCA_C
diff --git a/config/sgi33.cc b/config/sgi33.cc
new file mode 100644
index 00000000..2798db8a
--- /dev/null
+++ b/config/sgi33.cc
@@ -0,0 +1,5 @@
+SGI Personal Iris (Sys V derived) (this works with cc)
+BLKSIZE_MISSING 1
+STDC_HEADERS 1
+CHAR_UNSIGNED 1
+MAKE_ALLOCA_C
diff --git a/config/sunos3 b/config/sunos3
new file mode 100644
index 00000000..be09e0d8
--- /dev/null
+++ b/config/sunos3
@@ -0,0 +1,8 @@
+Sun running SunOS 3.x
+SIGTYPE int
+HAVE_UNDERSCORE_SETJMP 1
+STRCASE_MISSING 1
+STRERROR_MISSING 1
+STRFTIME_MISSING 1
+NON_STD_SPRINTF 1
+TZSET_MISSING 1
diff --git a/config/sunos40 b/config/sunos40
new file mode 100644
index 00000000..c3e8bdc2
--- /dev/null
+++ b/config/sunos40
@@ -0,0 +1,7 @@
+Sun running SunOS 4.0.x
+HAVE_UNDERSCORE_SETJMP 1
+STRCASE_MISSING 1
+STRERROR_MISSING 1
+STRFTIME_MISSING 1
+TZNAME_MISSING 1
+NON_STD_SPRINTF 1
diff --git a/config/sunos41 b/config/sunos41
new file mode 100644
index 00000000..c26040dc
--- /dev/null
+++ b/config/sunos41
@@ -0,0 +1,4 @@
+Sun running SunOS 4.1
+HAVE_UNDERSCORE_SETJMP 1
+STRERROR_MISSING 1
+NON_STD_SPRINTF 1
diff --git a/config/sysv2 b/config/sysv2
new file mode 100644
index 00000000..0239639c
--- /dev/null
+++ b/config/sysv2
@@ -0,0 +1,6 @@
+System V.2 Systems, Amdahl UTS, ATT UnixPCs
+BLKSIZE_MISSING 1
+RANDOM_MISSING 1
+STRCASE_MISSING 1
+STRERROR_MISSING 1
+STRFTIME_MISSING 1
diff --git a/config/sysv3 b/config/sysv3
new file mode 100644
index 00000000..30e19d16
--- /dev/null
+++ b/config/sysv3
@@ -0,0 +1,6 @@
+System V.3 Systems (generic)
+BLKSIZE_MISSING 1
+RANDOM_MISSING 1
+STRCASE_MISSING 1
+STRERROR_MISSING 1
+STRFTIME_MISSING 1
diff --git a/config/sysv4 b/config/sysv4
new file mode 100644
index 00000000..dfaf5ce4
--- /dev/null
+++ b/config/sysv4
@@ -0,0 +1,4 @@
+System V.4 Systems (generic)
+RANDOM_MISSING 1
+STRCASE_MISSING 1
+STDC_HEADERS 1
diff --git a/config/ultrix31 b/config/ultrix31
new file mode 100644
index 00000000..912c80cd
--- /dev/null
+++ b/config/ultrix31
@@ -0,0 +1,6 @@
+DECstation or VAX running Ultrix 3.x
+HAVE_UNDERSCORE_SETJMP 1
+STRERROR_MISSING 1
+STRFTIME_MISSING 1
+NON_STD_SPRINTF 1
+TZSET_MISSING 1
diff --git a/config/ultrix40 b/config/ultrix40
new file mode 100644
index 00000000..2c0b70db
--- /dev/null
+++ b/config/ultrix40
@@ -0,0 +1,2 @@
+DECstation running Ultrix 4.0 (4.x?)
+STDC_HEADERS 1
diff --git a/config/ultrix41 b/config/ultrix41
new file mode 100644
index 00000000..84348df8
--- /dev/null
+++ b/config/ultrix41
@@ -0,0 +1,3 @@
+DECstation running Ultrix 4.1 (and 4.2??)
+STDC_HEADERS 1
+#define Ultrix41 1
diff --git a/config/vms-conf.h b/config/vms-conf.h
new file mode 100644
index 00000000..d68f0a91
--- /dev/null
+++ b/config/vms-conf.h
@@ -0,0 +1,307 @@
+/*
+ * config.h -- configuration definitions for gawk.
+ *
+ * For VMS (assumes V4.6 or later; tested on V5.3 and V5.4)
+ */
+
+/*
+ * Copyright (C) 1991, the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 1, or (at your option)
+ * any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GAWK; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/**************************/
+/* Miscellanious features */
+/**************************/
+
+/*
+ * BLKSIZE_MISSING
+ * VMS: missing--not applicable
+ * Check your /usr/include/sys/stat.h file. If the stat structure
+ * does not have a member named st_blksize, define this. (This will
+ * most likely be the case on most System V systems prior to V.4.)
+ */
+#define BLKSIZE_MISSING 1
+
+/*
+ * SIGTYPE
+ * VMS: either should work; void is 'correct'
+ * The return type of the routines passed to the signal function.
+ * Modern systems use `void', older systems use `int'.
+ * If left undefined, it will default to void.
+ */
+#define SIGTYPE void
+
+/*
+ * SIZE_T_MISSING
+ * VMS: <stddef.h> via <stdlib.h> (VAX C V2.3 & up); <sys/types.h> (GNU C)
+ * If your system has no typedef for size_t, define this to get a default
+ */
+/* #define SIZE_T_MISSING 1 */
+
+/*
+ * CHAR_UNSIGNED
+ * VMS: well behaved, either signed or unsigned (signed by default)
+ * If your machine uses unsigned characters (IBM RT and RS/6000 and others)
+ * then define this for use in regex.c
+ */
+/* #define CHAR_UNSIGNED 1 */
+
+/*
+ * HAVE_UNDERSCORE_SETJMP
+ * VMS: not present
+ * Check in your /usr/include/setjmp.h file. If there are routines
+ * there named _setjmp and _longjmp, then you should define this.
+ * Typically only systems derived from Berkeley Unix have this.
+ */
+/* #define HAVE_UNDERSCORE_SETJMP 1 */
+
+/***********************************************/
+/* Missing library subroutines or system calls */
+/***********************************************/
+
+/*
+ * GETOPT_MISSING
+ * VMS: missing
+ * Define this if your library does not have the getopt(3) library
+ * routine for parsing command line arguments.
+ */
+#define GETOPT_MISSING 1
+
+/*
+ * MEMCMP_MISSING
+ * MEMCPY_MISSING
+ * MEMSET_MISSING
+ * VMS: <string.h> (introduced V4.6)
+ * These three routines are for manipulating blocks of memory. Most
+ * likely they will either all three be present or all three be missing,
+ * so they're grouped together.
+ */
+/* #define MEMCMP_MISSING 1 */
+/* #define MEMCPY_MISSING 1 */
+/* #define MEMSET_MISSING 1 */
+
+/*
+ * RANDOM_MISSING
+ * VMS: missing (as of V5.4)
+ * Your system does not have the random(3) suite of random number
+ * generating routines. These are different than the old rand(3)
+ * routines!
+ */
+#define RANDOM_MISSING 1
+
+/*
+ * STRCASE_MISSING
+ * VMS: missing
+ * Your system does not have the strcasemp() and strncasecmp()
+ * routines that originated in Berkeley Unix.
+ */
+#define STRCASE_MISSING 1
+
+/*
+ * STRCHR_MISSING
+ * VMS: <string.h>
+ * Your system does not have the strchr() and strrchr() functions.
+ */
+/* #define STRCHR_MISSING 1 */
+
+/*
+ * STRERROR_MISSING
+ * VMS: <stdlib.h> (introduced V4.6)
+ * Your system lacks the ANSI C strerror() routine for returning the
+ * strings associated with errno values.
+ */
+/* #define STRERROR_MISSING 1 */
+
+/*
+ * STRFTIME_MISSING
+ * VMS: missing (as of V5.4)
+ * Your system lacks the ANSI C strftime() routine for formatting
+ * broken down time values.
+ */
+#define STRFTIME_MISSING 1
+
+/*
+ * STRTOD_MISSING
+ * VMS: <stdlib.h> (introduced V4.6)
+ * Your system does not have the strtod() routine for converting
+ * strings to double precision floating point values.
+ */
+/* #define STRTOD_MISSING 1 */
+
+/*
+ * STRTOL_MISSING
+ * VMS: <stdlib.h> (introduced V4.6)
+ * Your system does not have the strtol() routine for converting
+ * strings to long integers.
+ */
+/* #define STRTOL_MISSING 1 */
+
+/*
+ * TZSET_MISSING
+ * VMS: missing, but can't use missing/tzset.c [no timezone support]
+ * If you have a 4.2 BSD vintage system, then the strftime() routine
+ * supplied in the missing directory won't be enough, because it relies on the
+ * tzset() routine from System V / Posix. Fortunately, there is an
+ * emulation for tzset() too that should do the trick. If you don't
+ * have tzset(), define this.
+ */
+/* #define TZSET_MISSING 1 */
+
+/*
+ * STDC_HEADERS
+ * VMS: close enough (as of V4.6, VAX C V2.3) [GCC, see below]
+ * If your system does have ANSI compliant header files that
+ * provide prototypes for library routines, then define this.
+ */
+#define STDC_HEADERS 1
+
+/*
+ * NO_TOKEN_PASTING
+ * VMS: compiler specific--see below
+ * If your compiler define's __STDC__ but does not support token
+ * pasting (tok##tok), then define this.
+ */
+/* #define NO_TOKEN_PASTING 1 */
+
+/*****************************************************************/
+/* Stuff related to the Standard I/O Library. */
+/*****************************************************************/
+/* Much of this is (still, unfortunately) black magic in nature. */
+/* You may have to use some or all of these together to get gawk */
+/* to work correctly. */
+/*****************************************************************/
+
+/*
+ * NON_STD_SPRINTF
+ * VMS: ok
+ * Look in your /usr/include/stdio.h file. If the return type of the
+ * sprintf() function is NOT `int', define this.
+ */
+/* #define NON_STD_SPRINTF 1 */
+
+/*
+ * VPRINTF_MISSING
+ * VMS: ok (introduced V4.6)
+ * Define this if your system lacks vprintf() and the other routines
+ * that go with it.
+ */
+/* #define VPRINTF_MISSING 1 */
+
+/*
+ * BSDSTDIO
+ * VMS: forgot it
+ * Define this if your standard i/o library is internally compatible
+ * with the one shipped with Berkeley Unix systems (4.n, n <= 3-reno).
+ * If you've defined VPRINTF_MISSING, you probably will need this too.
+ */
+/* #define BSDSTDIO 1 */
+
+/*
+ * DOPRNT_MISSING
+ * VMS: missing--doesn't matter
+ * Define this if your standard i/o library does not have the _doprnt()
+ * routine. This is used in an attempt to simulate the vfprintf()
+ * routine.
+ */
+/* #define DOPRNT_MISSING 1 */
+
+/*
+ * Casts from size_t to int and back. These will become unnecessary
+ * at some point in the future, but for now are required where the
+ * two types are a different representation.
+ */
+/* #define SZTC */
+/* #define INTC */
+
+/*
+ * SYSTEM_MISSING
+ * VMS: ok (introduced V4.6)
+ * Define this if your library does not provide a system function
+ * or you are not entirely happy with it and would rather use
+ * a provided replacement (atari only).
+ */
+/* #define SYSTEM_MISSING 1 */
+
+
+/*******************************/
+/* Gawk configuration options. */
+/*******************************/
+
+/*
+ * DEFPATH
+ * VMS: "/AWK_LIBRARY" => "AWK_LIBRARY:"
+ * The default search path for the -f option of gawk. It is used
+ * if the AWKPATH environment variable is undefined.
+ *
+ * Note: OK even if no AWK_LIBRARY logical name has been defined.
+ */
+
+#define DEFPATH ".,/AWK_LIBRARY"
+#define ENVSEP ','
+
+/*
+ * Extended source file access.
+ */
+#define DEFAULT_FILETYPE ".awk"
+
+/*
+ * Pipe handling.
+ */
+#define PIPES_SIMULATED 1
+
+/*
+ * %g format in VAXCRTL is broken (chooses %e format when should use %f).
+ */
+#define GFMT_WORKAROUND 1
+
+/*
+ * VAX C
+ *
+ * As of V3.2, VAX C is not yet ANSI-compliant. But it's close enough
+ * for GAWK's purposes. Comment this out for VAX C V2.4 and earlier.
+ * Value of 0 should mean "not ANSI-C", but GAWK uses def/not-def tests.
+ * YYDEBUG definition is needed for combination of VAX C V2.x and Bison.
+ */
+#if defined(VAXC) && !defined(__STDC__)
+#define __STDC__ 0
+#define NO_TOKEN_PASTING
+#define VAXC_BUILTINS
+/* #define YYDEBUG 0 */
+#endif
+
+/*
+ * GNU C
+ *
+ * Versions of GCC (actually GAS) earlier than 1.38 don't produce the
+ * right code for ``extern const'' constructs, and other usages of
+ * const might not be right either. The old set of include files from
+ * the gcc-vms distribution did not contain prototypes, and this could
+ * provoke some const-related compiler warnings. If you've got an old
+ * version of gcc for VMS, define 'const' out of existance, and by all
+ * means obtain the most recent version!
+ *
+ * Note: old versions of GCC should also avoid defining STDC_HEADERS,
+ * because most of the ANSI-C required header files are missing.
+ */
+#ifdef __GNUC__
+#define const
+#undef STDC_HEADERS
+#define alloca __builtin_alloca
+#define environ $$PsectAttributes_NOSHR$$environ /* awful GAS kludge */
+#endif
diff --git a/configure b/configure
new file mode 100755
index 00000000..2ff6dbff
--- /dev/null
+++ b/configure
@@ -0,0 +1,32 @@
+#! /bin/sh
+#
+# configure -- produce a config.h from a known configuration
+
+case "$#" in
+1) ;;
+*) echo "Usage: $0 system_type" >&2
+ echo "Known systems: `cd config; echo ;ls -C`" >&2
+ exit 2
+ ;;
+esac
+
+if [ -f config/$1 ]; then
+ sh ./mungeconf config/$1 config.h-dist >config.h
+
+ # echo #echo lines to stdout
+ sed -n '/^#echo /s///p' config/$1
+
+ sed -n '/^MAKE_.*/s//s,^##&## ,,/p' config/$1 >sedscr
+ if [ -s sedscr ]
+ then
+ sed -f sedscr Makefile-dist >Makefile
+ else
+ cp Makefile-dist Makefile
+ fi
+ rm -f sedscr
+else
+ echo "\`$1' is not a known configuration."
+ echo "Either construct one based on the examples in the config directory,"
+ echo "or copy config.h-dist to config.h and edit it."
+ exit 1
+fi
diff --git a/debug.c b/debug.c
deleted file mode 100644
index a444eb5e..00000000
--- a/debug.c
+++ /dev/null
@@ -1,561 +0,0 @@
-/*
- * debug.c -- Various debugging routines
- */
-
-/*
- * Copyright (C) 1986, 1988, 1989 the Free Software Foundation, Inc.
- *
- * This file is part of GAWK, the GNU implementation of the
- * AWK Progamming Language.
- *
- * GAWK is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 1, or (at your option)
- * any later version.
- *
- * GAWK is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GAWK; see the file COPYING. If not, write to
- * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include "awk.h"
-
-#ifdef DEBUG
-
-extern NODE **fields_arr;
-
-
-/* This is all debugging stuff. Ignore it and maybe it'll go away. */
-
-/*
- * Some of it could be turned into a really cute trace command, if anyone
- * wants to.
- */
-char *nnames[] = {
- "illegal", "times", "quotient", "mod", "plus",
- "minus", "cond_pair", "subscript", "concat", "exp",
- /* 10 */
- "preincrement", "predecrement", "postincrement", "postdecrement",
- "unary_minus",
- "field_spec", "assign", "assign_times", "assign_quotient", "assign_mod",
- /* 20 */
- "assign_plus", "assign_minus", "assign_exp", "and", "or",
- "equal", "notequal", "less", "greater", "leq",
- /* 30 */
- "geq", "match", "nomatch", "not", "rule_list",
- "rule_node", "statement_list", "if_branches", "expression_list",
- "param_list",
- /* 40 */
- "K_if", "K_while", "K_for", "K_arrayfor", "K_break",
- "K_continue", "K_print", "K_printf", "K_next", "K_exit",
- /* 50 */
- "K_do", "K_return", "K_delete", "K_getline", "K_function",
- "redirect_output", "redirect_append", "redirect_pipe",
- "redirect_pipein", "redirect_input",
- /* 60 */
- "var", "var_array", "val", "builtin", "line_range",
- "in_array", "func", "func_call", "cond_exp", "regex",
- /* 70 */
- "hashnode", "ahash"
-};
-
-ptree(n)
-NODE *n;
-{
- print_parse_tree(n);
-}
-
-pt()
-{
- long x;
-
- (void) scanf("%x", &x);
- printf("0x%x\n", x);
- print_parse_tree((NODE *) x);
- fflush(stdout);
-}
-
-static depth = 0;
-
-print_parse_tree(ptr)
-NODE *ptr;
-{
- if (!ptr) {
- printf("NULL\n");
- return;
- }
- if ((int) (ptr->type) < 0 || (int) (ptr->type) > sizeof(nnames) / sizeof(nnames[0])) {
- printf("(0x%x Type %d??)\n", ptr, ptr->type);
- return;
- }
- printf("(%d)%*s", depth, depth, "");
- switch ((int) ptr->type) {
- case (int) Node_val:
- printf("(0x%x Value ", ptr);
- if (ptr->flags&STR)
- printf("str: \"%.*s\" ", ptr->stlen, ptr->stptr);
- if (ptr->flags&NUM)
- printf("num: %g", ptr->numbr);
- printf(")\n");
- return;
- case (int) Node_var_array:
- {
- struct search *l;
-
- printf("(0x%x Array)\n", ptr);
- for (l = assoc_scan(ptr); l; l = assoc_next(l)) {
- printf("\tindex: ");
- print_parse_tree(l->retval);
- printf("\tvalue: ");
- print_parse_tree(*assoc_lookup(ptr, l->retval));
- printf("\n");
- }
- return;
- }
- case Node_param_list:
- printf("(0x%x Local variable %s)\n", ptr, ptr->param);
- if (ptr->rnode)
- print_parse_tree(ptr->rnode);
- return;
- case Node_regex:
- printf("(0x%x Regular expression %s\n", ptr, ptr->re_text);
- return;
- }
- if (ptr->lnode)
- printf("0x%x = left<--", ptr->lnode);
- printf("(0x%x %s.%d)", ptr, nnames[(int) (ptr->type)], ptr->type);
- if (ptr->rnode)
- printf("-->right = 0x%x", ptr->rnode);
- printf("\n");
- depth++;
- if (ptr->lnode)
- print_parse_tree(ptr->lnode);
- switch ((int) ptr->type) {
- case (int) Node_line_range:
- case (int) Node_match:
- case (int) Node_nomatch:
- break;
- case (int) Node_builtin:
- printf("Builtin: %d\n", ptr->proc);
- break;
- case (int) Node_K_for:
- case (int) Node_K_arrayfor:
- printf("(%s:)\n", nnames[(int) (ptr->type)]);
- print_parse_tree(ptr->forloop->init);
- printf("looping:\n");
- print_parse_tree(ptr->forloop->cond);
- printf("doing:\n");
- print_parse_tree(ptr->forloop->incr);
- break;
- default:
- if (ptr->rnode)
- print_parse_tree(ptr->rnode);
- break;
- }
- --depth;
-}
-
-
-/*
- * print out all the variables in the world
- */
-
-dump_vars()
-{
- register int n;
- register NODE *buc;
-
-#ifdef notdef
- printf("Fields:");
- dump_fields();
-#endif
- printf("Vars:\n");
- for (n = 0; n < HASHSIZE; n++) {
- for (buc = variables[n]; buc; buc = buc->hnext) {
- printf("'%.*s': ", buc->hlength, buc->hname);
- print_parse_tree(buc->hvalue);
- }
- }
- printf("End\n");
-}
-
-#ifdef notdef
-dump_fields()
-{
- register NODE **p;
- register int n;
-
- printf("%d fields\n", f_arr_siz);
- for (n = 0, p = &fields_arr[0]; n < f_arr_siz; n++, p++) {
- printf("$%d is '", n);
- print_simple(*p, stdout);
- printf("'\n");
- }
-}
-#endif
-
-/* VARARGS1 */
-print_debug(str, n)
-char *str;
-{
- extern int debugging;
-
- if (debugging)
- printf("%s:0x%x\n", str, n);
-}
-
-int indent = 0;
-
-print_a_node(ptr)
-NODE *ptr;
-{
- NODE *p1;
- char *str, *str2;
- int n;
- NODE *buc;
-
- if (!ptr)
- return; /* don't print null ptrs */
- switch (ptr->type) {
- case Node_val:
- if (ptr->flags&NUM)
- printf("%g", ptr->numbr);
- else
- printf("\"%.*s\"", ptr->stlen, ptr->stptr);
- return;
- case Node_times:
- str = "*";
- goto pr_twoop;
- case Node_quotient:
- str = "/";
- goto pr_twoop;
- case Node_mod:
- str = "%";
- goto pr_twoop;
- case Node_plus:
- str = "+";
- goto pr_twoop;
- case Node_minus:
- str = "-";
- goto pr_twoop;
- case Node_exp:
- str = "^";
- goto pr_twoop;
- case Node_concat:
- str = " ";
- goto pr_twoop;
- case Node_assign:
- str = "=";
- goto pr_twoop;
- case Node_assign_times:
- str = "*=";
- goto pr_twoop;
- case Node_assign_quotient:
- str = "/=";
- goto pr_twoop;
- case Node_assign_mod:
- str = "%=";
- goto pr_twoop;
- case Node_assign_plus:
- str = "+=";
- goto pr_twoop;
- case Node_assign_minus:
- str = "-=";
- goto pr_twoop;
- case Node_assign_exp:
- str = "^=";
- goto pr_twoop;
- case Node_and:
- str = "&&";
- goto pr_twoop;
- case Node_or:
- str = "||";
- goto pr_twoop;
- case Node_equal:
- str = "==";
- goto pr_twoop;
- case Node_notequal:
- str = "!=";
- goto pr_twoop;
- case Node_less:
- str = "<";
- goto pr_twoop;
- case Node_greater:
- str = ">";
- goto pr_twoop;
- case Node_leq:
- str = "<=";
- goto pr_twoop;
- case Node_geq:
- str = ">=";
- goto pr_twoop;
-
-pr_twoop:
- print_a_node(ptr->lnode);
- printf("%s", str);
- print_a_node(ptr->rnode);
- return;
-
- case Node_not:
- str = "!";
- str2 = "";
- goto pr_oneop;
- case Node_field_spec:
- str = "$(";
- str2 = ")";
- goto pr_oneop;
- case Node_postincrement:
- str = "";
- str2 = "++";
- goto pr_oneop;
- case Node_postdecrement:
- str = "";
- str2 = "--";
- goto pr_oneop;
- case Node_preincrement:
- str = "++";
- str2 = "";
- goto pr_oneop;
- case Node_predecrement:
- str = "--";
- str2 = "";
- goto pr_oneop;
-pr_oneop:
- printf(str);
- print_a_node(ptr->subnode);
- printf(str2);
- return;
-
- case Node_expression_list:
- print_a_node(ptr->lnode);
- if (ptr->rnode) {
- printf(",");
- print_a_node(ptr->rnode);
- }
- return;
-
- case Node_var:
- for (n = 0; n < HASHSIZE; n++) {
- for (buc = variables[n]; buc; buc = buc->hnext) {
- if (buc->hvalue == ptr) {
- printf("%.*s", buc->hlength, buc->hname);
- n = HASHSIZE;
- break;
- }
- }
- }
- return;
- case Node_subscript:
- print_a_node(ptr->lnode);
- printf("[");
- print_a_node(ptr->rnode);
- printf("]");
- return;
- case Node_builtin:
- printf("some_builtin(");
- print_a_node(ptr->subnode);
- printf(")");
- return;
-
- case Node_statement_list:
- printf("{\n");
- indent++;
- for (n = indent; n; --n)
- printf(" ");
- while (ptr) {
- print_maybe_semi(ptr->lnode);
- if (ptr->rnode)
- for (n = indent; n; --n)
- printf(" ");
- ptr = ptr->rnode;
- }
- --indent;
- for (n = indent; n; --n)
- printf(" ");
- printf("}\n");
- for (n = indent; n; --n)
- printf(" ");
- return;
-
- case Node_K_if:
- printf("if(");
- print_a_node(ptr->lnode);
- printf(") ");
- ptr = ptr->rnode;
- if (ptr->lnode->type == Node_statement_list) {
- printf("{\n");
- indent++;
- for (p1 = ptr->lnode; p1; p1 = p1->rnode) {
- for (n = indent; n; --n)
- printf(" ");
- print_maybe_semi(p1->lnode);
- }
- --indent;
- for (n = indent; n; --n)
- printf(" ");
- if (ptr->rnode) {
- printf("} else ");
- } else {
- printf("}\n");
- return;
- }
- } else {
- print_maybe_semi(ptr->lnode);
- if (ptr->rnode) {
- for (n = indent; n; --n)
- printf(" ");
- printf("else ");
- } else
- return;
- }
- if (!ptr->rnode)
- return;
- deal_with_curls(ptr->rnode);
- return;
-
- case Node_K_while:
- printf("while(");
- print_a_node(ptr->lnode);
- printf(") ");
- deal_with_curls(ptr->rnode);
- return;
-
- case Node_K_do:
- printf("do ");
- deal_with_curls(ptr->rnode);
- printf("while(");
- print_a_node(ptr->lnode);
- printf(") ");
- return;
-
- case Node_K_for:
- printf("for(");
- print_a_node(ptr->forloop->init);
- printf(";");
- print_a_node(ptr->forloop->cond);
- printf(";");
- print_a_node(ptr->forloop->incr);
- printf(") ");
- deal_with_curls(ptr->forsub);
- return;
- case Node_K_arrayfor:
- printf("for(");
- print_a_node(ptr->forloop->init);
- printf(" in ");
- print_a_node(ptr->forloop->incr);
- printf(") ");
- deal_with_curls(ptr->forsub);
- return;
-
- case Node_K_printf:
- printf("printf(");
- print_a_node(ptr->lnode);
- printf(")");
- return;
- case Node_K_print:
- printf("print(");
- print_a_node(ptr->lnode);
- printf(")");
- return;
- case Node_K_next:
- printf("next");
- return;
- case Node_K_break:
- printf("break");
- return;
- case Node_K_delete:
- printf("delete ");
- print_a_node(ptr->lnode);
- return;
- case Node_func:
- printf("function %s (", ptr->lnode->param);
- if (ptr->lnode->rnode)
- print_a_node(ptr->lnode->rnode);
- printf(")\n");
- print_a_node(ptr->rnode);
- return;
- case Node_param_list:
- printf("%s", ptr->param);
- if (ptr->rnode) {
- printf(", ");
- print_a_node(ptr->rnode);
- }
- return;
- default:
- print_parse_tree(ptr);
- return;
- }
-}
-
-print_maybe_semi(ptr)
-NODE *ptr;
-{
- print_a_node(ptr);
- switch (ptr->type) {
- case Node_K_if:
- case Node_K_for:
- case Node_K_arrayfor:
- case Node_statement_list:
- break;
- default:
- printf(";\n");
- break;
- }
-}
-
-deal_with_curls(ptr)
-NODE *ptr;
-{
- int n;
-
- if (ptr->type == Node_statement_list) {
- printf("{\n");
- indent++;
- while (ptr) {
- for (n = indent; n; --n)
- printf(" ");
- print_maybe_semi(ptr->lnode);
- ptr = ptr->rnode;
- }
- --indent;
- for (n = indent; n; --n)
- printf(" ");
- printf("}\n");
- } else {
- print_maybe_semi(ptr);
- }
-}
-
-NODE *
-do_prvars()
-{
- dump_vars();
- return Nnull_string;
-}
-
-NODE *
-do_bp()
-{
- return Nnull_string;
-}
-
-#endif
-
-#ifdef MEMDEBUG
-
-#undef free
-extern void free();
-
-void
-do_free(s)
-char *s;
-{
- free(s);
-}
-
-#endif
diff --git a/dfa.c b/dfa.c
new file mode 100644
index 00000000..b33ef8e7
--- /dev/null
+++ b/dfa.c
@@ -0,0 +1,2309 @@
+/* dfa.c - determinisitic extended regexp routines for GNU
+ Copyright (C) 1988 Free Software Foundation, Inc.
+ Written June, 1988 by Mike Haertel
+ Modified July, 1988 by Arthur David Olson
+ to assist BMG speedups
+
+ NO WARRANTY
+
+ BECAUSE THIS PROGRAM IS LICENSED FREE OF CHARGE, WE PROVIDE ABSOLUTELY
+NO WARRANTY, TO THE EXTENT PERMITTED BY APPLICABLE STATE LAW. EXCEPT
+WHEN OTHERWISE STATED IN WRITING, FREE SOFTWARE FOUNDATION, INC,
+RICHARD M. STALLMAN AND/OR OTHER PARTIES PROVIDE THIS PROGRAM "AS IS"
+WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
+BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY
+AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE
+DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR
+CORRECTION.
+
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL RICHARD M.
+STALLMAN, THE FREE SOFTWARE FOUNDATION, INC., AND/OR ANY OTHER PARTY
+WHO MAY MODIFY AND REDISTRIBUTE THIS PROGRAM AS PERMITTED BELOW, BE
+LIABLE TO YOU FOR DAMAGES, INCLUDING ANY LOST PROFITS, LOST MONIES, OR
+OTHER SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR
+DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY THIRD PARTIES OR
+A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS) THIS
+PROGRAM, EVEN IF YOU HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY.
+
+ GENERAL PUBLIC LICENSE TO COPY
+
+ 1. You may copy and distribute verbatim copies of this source file
+as you receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy a valid copyright notice "Copyright
+ (C) 1988 Free Software Foundation, Inc."; and include following the
+copyright notice a verbatim copy of the above disclaimer of warranty
+and of this License. You may charge a distribution fee for the
+physical act of transferring a copy.
+
+ 2. You may modify your copy or copies of this source file or
+any portion of it, and copy and distribute such modifications under
+the terms of Paragraph 1 above, provided that you also do the following:
+
+ a) cause the modified files to carry prominent notices stating
+ that you changed the files and the date of any change; and
+
+ b) cause the whole of any work that you distribute or publish,
+ that in whole or in part contains or is a derivative of this
+ program or any part thereof, to be licensed at no charge to all
+ third parties on terms identical to those contained in this
+ License Agreement (except that you may choose to grant more extensive
+ warranty protection to some or all third parties, at your option).
+
+ c) You may charge a distribution fee for the physical act of
+ transferring a copy, and you may at your option offer warranty
+ protection in exchange for a fee.
+
+Mere aggregation of another unrelated program with this program (or its
+derivative) on a volume of a storage or distribution medium does not bring
+the other program under the scope of these terms.
+
+ 3. You may copy and distribute this program or any portion of it in
+compiled, executable or object code form under the terms of Paragraphs
+1 and 2 above provided that you do the following:
+
+ a) accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of
+ Paragraphs 1 and 2 above; or,
+
+ b) accompany it with a written offer, valid for at least three
+ years, to give any third party free (except for a nominal
+ shipping charge) a complete machine-readable copy of the
+ corresponding source code, to be distributed under the terms of
+ Paragraphs 1 and 2 above; or,
+
+ c) accompany it with the information you received as to where the
+ corresponding source code may be obtained. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form alone.)
+
+For an executable file, complete source code means all the source code for
+all modules it contains; but, as a special exception, it need not include
+source code for modules which are standard libraries that accompany the
+operating system on which the executable file runs.
+
+ 4. You may not copy, sublicense, distribute or transfer this program
+except as expressly provided under this License Agreement. Any attempt
+otherwise to copy, sublicense, distribute or transfer this program is void and
+your rights to use the program under this License agreement shall be
+automatically terminated. However, parties who have received computer
+software programs from you with this License Agreement will not have
+their licenses terminated so long as such parties remain in full compliance.
+
+ 5. If you wish to incorporate parts of this program into other free
+programs whose distribution conditions are different, write to the Free
+Software Foundation at 675 Mass Ave, Cambridge, MA 02139. We have not yet
+worked out a simple rule that can be stated here, but we will often permit
+this. We will be guided by the two goals of preserving the free status of
+all derivatives our free software and of promoting the sharing and reuse of
+software.
+
+
+In other words, you are welcome to use, share and improve this program.
+You are forbidden to forbid anyone else to use, share and improve
+what you give them. Help stamp out software-hoarding! */
+
+#include "awk.h"
+#include <assert.h>
+
+#ifdef setbit /* surprise - setbit and clrbit are macros on NeXT */
+#undef setbit
+#endif
+#ifdef clrbit
+#undef clrbit
+#endif
+
+#ifdef __STDC__
+typedef void *ptr_t;
+#else
+typedef char *ptr_t;
+#endif
+
+typedef struct {
+ char ** in;
+ char * left;
+ char * right;
+ char * is;
+} must;
+
+static ptr_t xcalloc P((int n, size_t s));
+static ptr_t xmalloc P((size_t n));
+static ptr_t xrealloc P((ptr_t p, size_t n));
+static int tstbit P((int b, _charset c));
+static void setbit P((int b, _charset c));
+static void clrbit P((int b, _charset c));
+static void copyset P((const _charset src, _charset dst));
+static void zeroset P((_charset s));
+static void notset P((_charset s));
+static int equal P((const _charset s1, const _charset s2));
+static int charset_index P((const _charset s));
+static _token lex P((void));
+static void addtok P((_token t));
+static void atom P((void));
+static void closure P((void));
+static void branch P((void));
+static void regexp P((void));
+static void copy P((const _position_set *src, _position_set *dst));
+static void insert P((_position p, _position_set *s));
+static void merge P((_position_set *s1, _position_set *s2, _position_set *m));
+static void delete P((_position p, _position_set *s));
+static int state_index P((struct regexp *r, _position_set *s,
+ int newline, int letter));
+static void epsclosure P((_position_set *s, struct regexp *r));
+static void build_state P((int s, struct regexp *r));
+static void build_state_zero P((struct regexp *r));
+static char *icatalloc P((char *old, const char *new));
+static char *icpyalloc P((const char *string));
+static char *istrstr P((char *lookin, char *lookfor));
+static void ifree P((char *cp));
+static void freelist P((char **cpp));
+static char **enlist P((char **cpp, char *new, size_t len));
+static char **comsubs P((char *left, char *right));
+static char **addlists P((char **old, char **new));
+static char **inboth P((char **left, char **right));
+static void resetmust P((must *mp));
+static void regmust P((struct regexp *r));
+
+#undef P
+
+static ptr_t
+xcalloc(n, s)
+ int n;
+ size_t s;
+{
+ ptr_t r = calloc(n, s);
+
+ if (NULL == r)
+ regerror("Memory exhausted"); /* regerror does not return */
+ return r;
+}
+
+static ptr_t
+xmalloc(n)
+ size_t n;
+{
+ ptr_t r = malloc(n);
+
+ assert(n != 0);
+ if (NULL == r)
+ regerror("Memory exhausted");
+ return r;
+}
+
+static ptr_t
+xrealloc(p, n)
+ ptr_t p;
+ size_t n;
+{
+ ptr_t r = realloc(p, n);
+
+ assert(n != 0);
+ if (NULL == r)
+ regerror("Memory exhausted");
+ return r;
+}
+
+#define CALLOC(p, t, n) ((p) = (t *) xcalloc((n), sizeof (t)))
+#undef MALLOC
+#define MALLOC(p, t, n) ((p) = (t *) xmalloc((n) * sizeof (t)))
+#define REALLOC(p, t, n) ((p) = (t *) xrealloc((ptr_t) (p), (n) * sizeof (t)))
+
+/* Reallocate an array of type t if nalloc is too small for index. */
+#define REALLOC_IF_NECESSARY(p, t, nalloc, index) \
+ if ((index) >= (nalloc)) \
+ { \
+ while ((index) >= (nalloc)) \
+ (nalloc) *= 2; \
+ REALLOC(p, t, nalloc); \
+ }
+
+/* Stuff pertaining to charsets. */
+
+static int
+tstbit(b, c)
+ int b;
+ _charset c;
+{
+ return c[b / INTBITS] & 1 << b % INTBITS;
+}
+
+static void
+setbit(b, c)
+ int b;
+ _charset c;
+{
+ c[b / INTBITS] |= 1 << b % INTBITS;
+}
+
+static void
+clrbit(b, c)
+ int b;
+ _charset c;
+{
+ c[b / INTBITS] &= ~(1 << b % INTBITS);
+}
+
+static void
+copyset(src, dst)
+ const _charset src;
+ _charset dst;
+{
+ int i;
+
+ for (i = 0; i < _CHARSET_INTS; ++i)
+ dst[i] = src[i];
+}
+
+static void
+zeroset(s)
+ _charset s;
+{
+ int i;
+
+ for (i = 0; i < _CHARSET_INTS; ++i)
+ s[i] = 0;
+}
+
+static void
+notset(s)
+ _charset s;
+{
+ int i;
+
+ for (i = 0; i < _CHARSET_INTS; ++i)
+ s[i] = ~s[i];
+}
+
+static int
+equal(s1, s2)
+ const _charset s1;
+ const _charset s2;
+{
+ int i;
+
+ for (i = 0; i < _CHARSET_INTS; ++i)
+ if (s1[i] != s2[i])
+ return 0;
+ return 1;
+}
+
+/* A pointer to the current regexp is kept here during parsing. */
+static struct regexp *reg;
+
+/* Find the index of charset s in reg->charsets, or allocate a new charset. */
+static int
+charset_index(s)
+ const _charset s;
+{
+ int i;
+
+ for (i = 0; i < reg->cindex; ++i)
+ if (equal(s, reg->charsets[i]))
+ return i;
+ REALLOC_IF_NECESSARY(reg->charsets, _charset, reg->calloc, reg->cindex);
+ ++reg->cindex;
+ copyset(s, reg->charsets[i]);
+ return i;
+}
+
+/* Syntax bits controlling the behavior of the lexical analyzer. */
+static syntax_bits, syntax_bits_set;
+
+/* Flag for case-folding letters into sets. */
+static case_fold;
+
+/* Entry point to set syntax options. */
+void
+regsyntax(bits, fold)
+ int bits;
+ int fold;
+{
+ syntax_bits_set = 1;
+ syntax_bits = bits;
+ case_fold = fold;
+}
+
+/* Lexical analyzer. */
+static const char *lexstart; /* Pointer to beginning of input string. */
+static const char *lexptr; /* Pointer to next input character. */
+static lexleft; /* Number of characters remaining. */
+static caret_allowed; /* True if backward context allows ^
+ (meaningful only if RE_CONTEXT_INDEP_OPS
+ is turned off). */
+static closure_allowed; /* True if backward context allows closures
+ (meaningful only if RE_CONTEXT_INDEP_OPS
+ is turned off). */
+
+/* Note that characters become unsigned here. */
+#define FETCH(c, eoferr) \
+ { \
+ if (! lexleft) \
+ if (eoferr != NULL) \
+ regerror(eoferr); \
+ else \
+ return _END; \
+ (c) = (unsigned char) *lexptr++; \
+ --lexleft; \
+ }
+
+static _token
+lex()
+{
+ _token c, c2;
+ int invert;
+ _charset cset;
+
+ FETCH(c, (char *) 0);
+ switch (c)
+ {
+ case '^':
+ if (! (syntax_bits & RE_CONTEXT_INDEP_OPS)
+ && (!caret_allowed ||
+ (syntax_bits & RE_TIGHT_VBAR) && lexptr - 1 != lexstart))
+ goto normal_char;
+ caret_allowed = 0;
+ return syntax_bits & RE_TIGHT_VBAR ? _ALLBEGLINE : _BEGLINE;
+
+ case '$':
+ if (syntax_bits & RE_CONTEXT_INDEP_OPS || !lexleft
+ || (! (syntax_bits & RE_TIGHT_VBAR)
+ && ((syntax_bits & RE_NO_BK_PARENS
+ ? lexleft > 0 && *lexptr == ')'
+ : lexleft > 1 && *lexptr == '\\' && lexptr[1] == ')')
+ || (syntax_bits & RE_NO_BK_VBAR
+ ? lexleft > 0 && *lexptr == '|'
+ : lexleft > 1 && *lexptr == '\\' && lexptr[1] == '|'))))
+ return syntax_bits & RE_TIGHT_VBAR ? _ALLENDLINE : _ENDLINE;
+ goto normal_char;
+
+ case '\\':
+ FETCH(c, "Unfinished \\ quote");
+ switch (c)
+ {
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ caret_allowed = 0;
+ closure_allowed = 1;
+ return _BACKREF;
+
+ case '<':
+ caret_allowed = 0;
+ return _BEGWORD;
+
+ case '>':
+ caret_allowed = 0;
+ return _ENDWORD;
+
+ case 'b':
+ caret_allowed = 0;
+ return _LIMWORD;
+
+ case 'B':
+ caret_allowed = 0;
+ return _NOTLIMWORD;
+
+ case 'w':
+ case 'W':
+ zeroset(cset);
+ for (c2 = 0; c2 < _NOTCHAR; ++c2)
+ if (ISALNUM(c2))
+ setbit(c2, cset);
+ if (c == 'W')
+ notset(cset);
+ caret_allowed = 0;
+ closure_allowed = 1;
+ return _SET + charset_index(cset);
+
+ case '?':
+ if (syntax_bits & RE_BK_PLUS_QM)
+ goto qmark;
+ goto normal_char;
+
+ case '+':
+ if (syntax_bits & RE_BK_PLUS_QM)
+ goto plus;
+ goto normal_char;
+
+ case '|':
+ if (! (syntax_bits & RE_NO_BK_VBAR))
+ goto or;
+ goto normal_char;
+
+ case '(':
+ if (! (syntax_bits & RE_NO_BK_PARENS))
+ goto lparen;
+ goto normal_char;
+
+ case ')':
+ if (! (syntax_bits & RE_NO_BK_PARENS))
+ goto rparen;
+ goto normal_char;
+
+ default:
+ goto normal_char;
+ }
+
+ case '?':
+ if (syntax_bits & RE_BK_PLUS_QM)
+ goto normal_char;
+ qmark:
+ if (! (syntax_bits & RE_CONTEXT_INDEP_OPS) && !closure_allowed)
+ goto normal_char;
+ return _QMARK;
+
+ case '*':
+ if (! (syntax_bits & RE_CONTEXT_INDEP_OPS) && !closure_allowed)
+ goto normal_char;
+ return _STAR;
+
+ case '+':
+ if (syntax_bits & RE_BK_PLUS_QM)
+ goto normal_char;
+ plus:
+ if (! (syntax_bits & RE_CONTEXT_INDEP_OPS) && !closure_allowed)
+ goto normal_char;
+ return _PLUS;
+
+ case '|':
+ if (! (syntax_bits & RE_NO_BK_VBAR))
+ goto normal_char;
+ or:
+ caret_allowed = 1;
+ closure_allowed = 0;
+ return _OR;
+
+ case '\n':
+ if (! (syntax_bits & RE_NEWLINE_OR))
+ goto normal_char;
+ goto or;
+
+ case '(':
+ if (! (syntax_bits & RE_NO_BK_PARENS))
+ goto normal_char;
+ lparen:
+ caret_allowed = 1;
+ closure_allowed = 0;
+ return _LPAREN;
+
+ case ')':
+ if (! (syntax_bits & RE_NO_BK_PARENS))
+ goto normal_char;
+ rparen:
+ caret_allowed = 0;
+ closure_allowed = 1;
+ return _RPAREN;
+
+ case '.':
+ zeroset(cset);
+ notset(cset);
+ clrbit('\n', cset);
+ caret_allowed = 0;
+ closure_allowed = 1;
+ return _SET + charset_index(cset);
+
+ case '[':
+ zeroset(cset);
+ FETCH(c, "Unbalanced [");
+ if (c == '^')
+ {
+ FETCH(c, "Unbalanced [");
+ invert = 1;
+ }
+ else
+ invert = 0;
+ do
+ {
+ FETCH(c2, "Unbalanced [");
+ if ((syntax_bits & RE_AWK_CLASS_HACK) && c == '\\')
+ {
+ c = c2;
+ FETCH(c2, "Unbalanced [");
+ }
+ if (c2 == '-')
+ {
+ FETCH(c2, "Unbalanced [");
+ if (c2 == ']' && (syntax_bits & RE_AWK_CLASS_HACK))
+ {
+ setbit(c, cset);
+ setbit('-', cset);
+ break;
+ }
+ while (c <= c2)
+ setbit(c++, cset);
+ FETCH(c, "Unbalanced [");
+ }
+ else
+ {
+ setbit(c, cset);
+ c = c2;
+ }
+ }
+ while (c != ']');
+ if (invert)
+ notset(cset);
+ caret_allowed = 0;
+ closure_allowed = 1;
+ return _SET + charset_index(cset);
+
+ default:
+ normal_char:
+ caret_allowed = 0;
+ closure_allowed = 1;
+ if (case_fold && ISALPHA(c))
+ {
+ zeroset(cset);
+ if (isupper(c))
+ c = tolower(c);
+ setbit(c, cset);
+ setbit(toupper(c), cset);
+ return _SET + charset_index(cset);
+ }
+ return c;
+ }
+}
+
+/* Recursive descent parser for regular expressions. */
+
+static _token tok; /* Lookahead token. */
+static depth; /* Current depth of a hypothetical stack
+ holding deferred productions. This is
+ used to determine the depth that will be
+ required of the real stack later on in
+ reganalyze(). */
+
+/* Add the given token to the parse tree, maintaining the depth count and
+ updating the maximum depth if necessary. */
+static void
+addtok(t)
+ _token t;
+{
+ REALLOC_IF_NECESSARY(reg->tokens, _token, reg->talloc, reg->tindex);
+ reg->tokens[reg->tindex++] = t;
+
+ switch (t)
+ {
+ case _QMARK:
+ case _STAR:
+ case _PLUS:
+ break;
+
+ case _CAT:
+ case _OR:
+ --depth;
+ break;
+
+ default:
+ ++reg->nleaves;
+ case _EMPTY:
+ ++depth;
+ break;
+ }
+ if (depth > reg->depth)
+ reg->depth = depth;
+}
+
+/* The grammar understood by the parser is as follows.
+
+ start:
+ regexp
+ _ALLBEGLINE regexp
+ regexp _ALLENDLINE
+ _ALLBEGLINE regexp _ALLENDLINE
+
+ regexp:
+ regexp _OR branch
+ branch
+
+ branch:
+ branch closure
+ closure
+
+ closure:
+ closure _QMARK
+ closure _STAR
+ closure _PLUS
+ atom
+
+ atom:
+ <normal character>
+ _SET
+ _BACKREF
+ _BEGLINE
+ _ENDLINE
+ _BEGWORD
+ _ENDWORD
+ _LIMWORD
+ _NOTLIMWORD
+ <empty>
+
+ The parser builds a parse tree in postfix form in an array of tokens. */
+
+#ifdef __STDC__
+static void regexp(void);
+#else
+static void regexp();
+#endif
+
+static void
+atom()
+{
+ if (tok >= 0 && tok < _NOTCHAR || tok >= _SET || tok == _BACKREF
+ || tok == _BEGLINE || tok == _ENDLINE || tok == _BEGWORD
+ || tok == _ENDWORD || tok == _LIMWORD || tok == _NOTLIMWORD)
+ {
+ addtok(tok);
+ tok = lex();
+ }
+ else if (tok == _LPAREN)
+ {
+ tok = lex();
+ regexp();
+ if (tok != _RPAREN)
+ regerror("Unbalanced (");
+ tok = lex();
+ }
+ else
+ addtok(_EMPTY);
+}
+
+static void
+closure()
+{
+ atom();
+ while (tok == _QMARK || tok == _STAR || tok == _PLUS)
+ {
+ addtok(tok);
+ tok = lex();
+ }
+}
+
+static void
+branch()
+{
+ closure();
+ while (tok != _RPAREN && tok != _OR && tok != _ALLENDLINE && tok >= 0)
+ {
+ closure();
+ addtok(_CAT);
+ }
+}
+
+static void
+regexp()
+{
+ branch();
+ while (tok == _OR)
+ {
+ tok = lex();
+ branch();
+ addtok(_OR);
+ }
+}
+
+/* Main entry point for the parser. S is a string to be parsed, len is the
+ length of the string, so s can include NUL characters. R is a pointer to
+ the struct regexp to parse into. */
+void
+regparse(s, len, r)
+ const char *s;
+ size_t len;
+ struct regexp *r;
+{
+ reg = r;
+ lexstart = lexptr = s;
+ lexleft = len;
+ caret_allowed = 1;
+ closure_allowed = 0;
+
+ if (! syntax_bits_set)
+ regerror("No syntax specified");
+
+ tok = lex();
+ depth = r->depth;
+
+ if (tok == _ALLBEGLINE)
+ {
+ addtok(_BEGLINE);
+ tok = lex();
+ regexp();
+ addtok(_CAT);
+ }
+ else
+ regexp();
+
+ if (tok == _ALLENDLINE)
+ {
+ addtok(_ENDLINE);
+ addtok(_CAT);
+ tok = lex();
+ }
+
+ if (tok != _END)
+ regerror("Unbalanced )");
+
+ addtok(_END - r->nregexps);
+ addtok(_CAT);
+
+ if (r->nregexps)
+ addtok(_OR);
+
+ ++r->nregexps;
+}
+
+/* Some primitives for operating on sets of positions. */
+
+/* Copy one set to another; the destination must be large enough. */
+static void
+copy(src, dst)
+ const _position_set *src;
+ _position_set *dst;
+{
+ int i;
+
+ for (i = 0; i < src->nelem; ++i)
+ dst->elems[i] = src->elems[i];
+ dst->nelem = src->nelem;
+}
+
+/* Insert a position in a set. Position sets are maintained in sorted
+ order according to index. If position already exists in the set with
+ the same index then their constraints are logically or'd together.
+ S->elems must point to an array large enough to hold the resulting set. */
+static void
+insert(p, s)
+ _position p;
+ _position_set *s;
+{
+ int i;
+ _position t1, t2;
+
+ for (i = 0; i < s->nelem && p.index < s->elems[i].index; ++i)
+ ;
+ if (i < s->nelem && p.index == s->elems[i].index)
+ s->elems[i].constraint |= p.constraint;
+ else
+ {
+ t1 = p;
+ ++s->nelem;
+ while (i < s->nelem)
+ {
+ t2 = s->elems[i];
+ s->elems[i++] = t1;
+ t1 = t2;
+ }
+ }
+}
+
+/* Merge two sets of positions into a third. The result is exactly as if
+ the positions of both sets were inserted into an initially empty set. */
+static void
+merge(s1, s2, m)
+ _position_set *s1;
+ _position_set *s2;
+ _position_set *m;
+{
+ int i = 0, j = 0;
+
+ m->nelem = 0;
+ while (i < s1->nelem && j < s2->nelem)
+ if (s1->elems[i].index > s2->elems[j].index)
+ m->elems[m->nelem++] = s1->elems[i++];
+ else if (s1->elems[i].index < s2->elems[j].index)
+ m->elems[m->nelem++] = s2->elems[j++];
+ else
+ {
+ m->elems[m->nelem] = s1->elems[i++];
+ m->elems[m->nelem++].constraint |= s2->elems[j++].constraint;
+ }
+ while (i < s1->nelem)
+ m->elems[m->nelem++] = s1->elems[i++];
+ while (j < s2->nelem)
+ m->elems[m->nelem++] = s2->elems[j++];
+}
+
+/* Delete a position from a set. */
+static void
+delete(p, s)
+ _position p;
+ _position_set *s;
+{
+ int i;
+
+ for (i = 0; i < s->nelem; ++i)
+ if (p.index == s->elems[i].index)
+ break;
+ if (i < s->nelem)
+ for (--s->nelem; i < s->nelem; ++i)
+ s->elems[i] = s->elems[i + 1];
+}
+
+/* Find the index of the state corresponding to the given position set with
+ the given preceding context, or create a new state if there is no such
+ state. Newline and letter tell whether we got here on a newline or
+ letter, respectively. */
+static int
+state_index(r, s, newline, letter)
+ struct regexp *r;
+ _position_set *s;
+ int newline;
+ int letter;
+{
+ int hash = 0;
+ int constraint;
+ int i, j;
+
+ newline = newline ? 1 : 0;
+ letter = letter ? 1 : 0;
+
+ for (i = 0; i < s->nelem; ++i)
+ hash ^= s->elems[i].index + s->elems[i].constraint;
+
+ /* Try to find a state that exactly matches the proposed one. */
+ for (i = 0; i < r->sindex; ++i)
+ {
+ if (hash != r->states[i].hash || s->nelem != r->states[i].elems.nelem
+ || newline != r->states[i].newline || letter != r->states[i].letter)
+ continue;
+ for (j = 0; j < s->nelem; ++j)
+ if (s->elems[j].constraint
+ != r->states[i].elems.elems[j].constraint
+ || s->elems[j].index != r->states[i].elems.elems[j].index)
+ break;
+ if (j == s->nelem)
+ return i;
+ }
+
+ /* We'll have to create a new state. */
+ REALLOC_IF_NECESSARY(r->states, _dfa_state, r->salloc, r->sindex);
+ r->states[i].hash = hash;
+ MALLOC(r->states[i].elems.elems, _position, s->nelem);
+ copy(s, &r->states[i].elems);
+ r->states[i].newline = newline;
+ r->states[i].letter = letter;
+ r->states[i].backref = 0;
+ r->states[i].constraint = 0;
+ r->states[i].first_end = 0;
+ for (j = 0; j < s->nelem; ++j)
+ if (r->tokens[s->elems[j].index] < 0)
+ {
+ constraint = s->elems[j].constraint;
+ if (_SUCCEEDS_IN_CONTEXT(constraint, newline, 0, letter, 0)
+ || _SUCCEEDS_IN_CONTEXT(constraint, newline, 0, letter, 1)
+ || _SUCCEEDS_IN_CONTEXT(constraint, newline, 1, letter, 0)
+ || _SUCCEEDS_IN_CONTEXT(constraint, newline, 1, letter, 1))
+ r->states[i].constraint |= constraint;
+ if (! r->states[i].first_end)
+ r->states[i].first_end = r->tokens[s->elems[j].index];
+ }
+ else if (r->tokens[s->elems[j].index] == _BACKREF)
+ {
+ r->states[i].constraint = _NO_CONSTRAINT;
+ r->states[i].backref = 1;
+ }
+
+ ++r->sindex;
+
+ return i;
+}
+
+/* Find the epsilon closure of a set of positions. If any position of the set
+ contains a symbol that matches the empty string in some context, replace
+ that position with the elements of its follow labeled with an appropriate
+ constraint. Repeat exhaustively until no funny positions are left.
+ S->elems must be large enough to hold the result. */
+static void
+epsclosure(s, r)
+ _position_set *s;
+ struct regexp *r;
+{
+ int i, j;
+ int *visited;
+ _position p, old;
+
+ MALLOC(visited, int, r->tindex);
+ for (i = 0; i < r->tindex; ++i)
+ visited[i] = 0;
+
+ for (i = 0; i < s->nelem; ++i)
+ if (r->tokens[s->elems[i].index] >= _NOTCHAR
+ && r->tokens[s->elems[i].index] != _BACKREF
+ && r->tokens[s->elems[i].index] < _SET)
+ {
+ old = s->elems[i];
+ p.constraint = old.constraint;
+ delete(s->elems[i], s);
+ if (visited[old.index])
+ {
+ --i;
+ continue;
+ }
+ visited[old.index] = 1;
+ switch (r->tokens[old.index])
+ {
+ case _BEGLINE:
+ p.constraint &= _BEGLINE_CONSTRAINT;
+ break;
+ case _ENDLINE:
+ p.constraint &= _ENDLINE_CONSTRAINT;
+ break;
+ case _BEGWORD:
+ p.constraint &= _BEGWORD_CONSTRAINT;
+ break;
+ case _ENDWORD:
+ p.constraint &= _ENDWORD_CONSTRAINT;
+ break;
+ case _LIMWORD:
+ p.constraint &= _ENDWORD_CONSTRAINT;
+ break;
+ case _NOTLIMWORD:
+ p.constraint &= _NOTLIMWORD_CONSTRAINT;
+ break;
+ default:
+ break;
+ }
+ for (j = 0; j < r->follows[old.index].nelem; ++j)
+ {
+ p.index = r->follows[old.index].elems[j].index;
+ insert(p, s);
+ }
+ /* Force rescan to start at the beginning. */
+ i = -1;
+ }
+
+ free(visited);
+}
+
+/* Perform bottom-up analysis on the parse tree, computing various functions.
+ Note that at this point, we're pretending constructs like \< are real
+ characters rather than constraints on what can follow them.
+
+ Nullable: A node is nullable if it is at the root of a regexp that can
+ match the empty string.
+ * _EMPTY leaves are nullable.
+ * No other leaf is nullable.
+ * A _QMARK or _STAR node is nullable.
+ * A _PLUS node is nullable if its argument is nullable.
+ * A _CAT node is nullable if both its arguments are nullable.
+ * An _OR node is nullable if either argument is nullable.
+
+ Firstpos: The firstpos of a node is the set of positions (nonempty leaves)
+ that could correspond to the first character of a string matching the
+ regexp rooted at the given node.
+ * _EMPTY leaves have empty firstpos.
+ * The firstpos of a nonempty leaf is that leaf itself.
+ * The firstpos of a _QMARK, _STAR, or _PLUS node is the firstpos of its
+ argument.
+ * The firstpos of a _CAT node is the firstpos of the left argument, union
+ the firstpos of the right if the left argument is nullable.
+ * The firstpos of an _OR node is the union of firstpos of each argument.
+
+ Lastpos: The lastpos of a node is the set of positions that could
+ correspond to the last character of a string matching the regexp at
+ the given node.
+ * _EMPTY leaves have empty lastpos.
+ * The lastpos of a nonempty leaf is that leaf itself.
+ * The lastpos of a _QMARK, _STAR, or _PLUS node is the lastpos of its
+ argument.
+ * The lastpos of a _CAT node is the lastpos of its right argument, union
+ the lastpos of the left if the right argument is nullable.
+ * The lastpos of an _OR node is the union of the lastpos of each argument.
+
+ Follow: The follow of a position is the set of positions that could
+ correspond to the character following a character matching the node in
+ a string matching the regexp. At this point we consider special symbols
+ that match the empty string in some context to be just normal characters.
+ Later, if we find that a special symbol is in a follow set, we will
+ replace it with the elements of its follow, labeled with an appropriate
+ constraint.
+ * Every node in the firstpos of the argument of a _STAR or _PLUS node is in
+ the follow of every node in the lastpos.
+ * Every node in the firstpos of the second argument of a _CAT node is in
+ the follow of every node in the lastpos of the first argument.
+
+ Because of the postfix representation of the parse tree, the depth-first
+ analysis is conveniently done by a linear scan with the aid of a stack.
+ Sets are stored as arrays of the elements, obeying a stack-like allocation
+ scheme; the number of elements in each set deeper in the stack can be
+ used to determine the address of a particular set's array. */
+void
+reganalyze(r, searchflag)
+ struct regexp *r;
+ int searchflag;
+{
+ int *nullable; /* Nullable stack. */
+ int *nfirstpos; /* Element count stack for firstpos sets. */
+ _position *firstpos; /* Array where firstpos elements are stored. */
+ int *nlastpos; /* Element count stack for lastpos sets. */
+ _position *lastpos; /* Array where lastpos elements are stored. */
+ int *nalloc; /* Sizes of arrays allocated to follow sets. */
+ _position_set tmp; /* Temporary set for merging sets. */
+ _position_set merged; /* Result of merging sets. */
+ int wants_newline; /* True if some position wants newline info. */
+ int *o_nullable;
+ int *o_nfirst, *o_nlast;
+ _position *o_firstpos, *o_lastpos;
+ int i, j;
+ _position *pos;
+
+ r->searchflag = searchflag;
+
+ MALLOC(nullable, int, r->depth);
+ o_nullable = nullable;
+ MALLOC(nfirstpos, int, r->depth);
+ o_nfirst = nfirstpos;
+ MALLOC(firstpos, _position, r->nleaves);
+ o_firstpos = firstpos, firstpos += r->nleaves;
+ MALLOC(nlastpos, int, r->depth);
+ o_nlast = nlastpos;
+ MALLOC(lastpos, _position, r->nleaves);
+ o_lastpos = lastpos, lastpos += r->nleaves;
+ MALLOC(nalloc, int, r->tindex);
+ for (i = 0; i < r->tindex; ++i)
+ nalloc[i] = 0;
+ MALLOC(merged.elems, _position, r->nleaves);
+
+ CALLOC(r->follows, _position_set, r->tindex);
+
+ for (i = 0; i < r->tindex; ++i)
+ switch (r->tokens[i])
+ {
+ case _EMPTY:
+ /* The empty set is nullable. */
+ *nullable++ = 1;
+
+ /* The firstpos and lastpos of the empty leaf are both empty. */
+ *nfirstpos++ = *nlastpos++ = 0;
+ break;
+
+ case _STAR:
+ case _PLUS:
+ /* Every element in the firstpos of the argument is in the follow
+ of every element in the lastpos. */
+ tmp.nelem = nfirstpos[-1];
+ tmp.elems = firstpos;
+ pos = lastpos;
+ for (j = 0; j < nlastpos[-1]; ++j)
+ {
+ merge(&tmp, &r->follows[pos[j].index], &merged);
+ REALLOC_IF_NECESSARY(r->follows[pos[j].index].elems, _position,
+ nalloc[pos[j].index], merged.nelem - 1);
+ copy(&merged, &r->follows[pos[j].index]);
+ }
+
+ case _QMARK:
+ /* A _QMARK or _STAR node is automatically nullable. */
+ if (r->tokens[i] != _PLUS)
+ nullable[-1] = 1;
+ break;
+
+ case _CAT:
+ /* Every element in the firstpos of the second argument is in the
+ follow of every element in the lastpos of the first argument. */
+ tmp.nelem = nfirstpos[-1];
+ tmp.elems = firstpos;
+ pos = lastpos + nlastpos[-1];
+ for (j = 0; j < nlastpos[-2]; ++j)
+ {
+ merge(&tmp, &r->follows[pos[j].index], &merged);
+ REALLOC_IF_NECESSARY(r->follows[pos[j].index].elems, _position,
+ nalloc[pos[j].index], merged.nelem - 1);
+ copy(&merged, &r->follows[pos[j].index]);
+ }
+
+ /* The firstpos of a _CAT node is the firstpos of the first argument,
+ union that of the second argument if the first is nullable. */
+ if (nullable[-2])
+ nfirstpos[-2] += nfirstpos[-1];
+ else
+ firstpos += nfirstpos[-1];
+ --nfirstpos;
+
+ /* The lastpos of a _CAT node is the lastpos of the second argument,
+ union that of the first argument if the second is nullable. */
+ if (nullable[-1])
+ nlastpos[-2] += nlastpos[-1];
+ else
+ {
+ pos = lastpos + nlastpos[-2];
+ for (j = nlastpos[-1] - 1; j >= 0; --j)
+ pos[j] = lastpos[j];
+ lastpos += nlastpos[-2];
+ nlastpos[-2] = nlastpos[-1];
+ }
+ --nlastpos;
+
+ /* A _CAT node is nullable if both arguments are nullable. */
+ nullable[-2] = nullable[-1] && nullable[-2];
+ --nullable;
+ break;
+
+ case _OR:
+ /* The firstpos is the union of the firstpos of each argument. */
+ nfirstpos[-2] += nfirstpos[-1];
+ --nfirstpos;
+
+ /* The lastpos is the union of the lastpos of each argument. */
+ nlastpos[-2] += nlastpos[-1];
+ --nlastpos;
+
+ /* An _OR node is nullable if either argument is nullable. */
+ nullable[-2] = nullable[-1] || nullable[-2];
+ --nullable;
+ break;
+
+ default:
+ /* Anything else is a nonempty position. (Note that special
+ constructs like \< are treated as nonempty strings here;
+ an "epsilon closure" effectively makes them nullable later.
+ Backreferences have to get a real position so we can detect
+ transitions on them later. But they are nullable. */
+ *nullable++ = r->tokens[i] == _BACKREF;
+
+ /* This position is in its own firstpos and lastpos. */
+ *nfirstpos++ = *nlastpos++ = 1;
+ --firstpos, --lastpos;
+ firstpos->index = lastpos->index = i;
+ firstpos->constraint = lastpos->constraint = _NO_CONSTRAINT;
+
+ /* Allocate the follow set for this position. */
+ nalloc[i] = 1;
+ MALLOC(r->follows[i].elems, _position, nalloc[i]);
+ break;
+ }
+
+ /* For each follow set that is the follow set of a real position, replace
+ it with its epsilon closure. */
+ for (i = 0; i < r->tindex; ++i)
+ if (r->tokens[i] < _NOTCHAR || r->tokens[i] == _BACKREF
+ || r->tokens[i] >= _SET)
+ {
+ copy(&r->follows[i], &merged);
+ epsclosure(&merged, r);
+ if (r->follows[i].nelem < merged.nelem)
+ REALLOC(r->follows[i].elems, _position, merged.nelem);
+ copy(&merged, &r->follows[i]);
+ }
+
+ /* Get the epsilon closure of the firstpos of the regexp. The result will
+ be the set of positions of state 0. */
+ merged.nelem = 0;
+ for (i = 0; i < nfirstpos[-1]; ++i)
+ insert(firstpos[i], &merged);
+ epsclosure(&merged, r);
+
+ /* Check if any of the positions of state 0 will want newline context. */
+ wants_newline = 0;
+ for (i = 0; i < merged.nelem; ++i)
+ if (_PREV_NEWLINE_DEPENDENT(merged.elems[i].constraint))
+ wants_newline = 1;
+
+ /* Build the initial state. */
+ r->salloc = 1;
+ r->sindex = 0;
+ MALLOC(r->states, _dfa_state, r->salloc);
+ state_index(r, &merged, wants_newline, 0);
+
+ free(o_nullable);
+ free(o_nfirst);
+ free(o_firstpos);
+ free(o_nlast);
+ free(o_lastpos);
+ free(nalloc);
+ free(merged.elems);
+}
+
+/* Find, for each character, the transition out of state s of r, and store
+ it in the appropriate slot of trans.
+
+ We divide the positions of s into groups (positions can appear in more
+ than one group). Each group is labeled with a set of characters that
+ every position in the group matches (taking into account, if necessary,
+ preceding context information of s). For each group, find the union
+ of the its elements' follows. This set is the set of positions of the
+ new state. For each character in the group's label, set the transition
+ on this character to be to a state corresponding to the set's positions,
+ and its associated backward context information, if necessary.
+
+ If we are building a searching matcher, we include the positions of state
+ 0 in every state.
+
+ The collection of groups is constructed by building an equivalence-class
+ partition of the positions of s.
+
+ For each position, find the set of characters C that it matches. Eliminate
+ any characters from C that fail on grounds of backward context.
+
+ Search through the groups, looking for a group whose label L has nonempty
+ intersection with C. If L - C is nonempty, create a new group labeled
+ L - C and having the same positions as the current group, and set L to
+ the intersection of L and C. Insert the position in this group, set
+ C = C - L, and resume scanning.
+
+ If after comparing with every group there are characters remaining in C,
+ create a new group labeled with the characters of C and insert this
+ position in that group. */
+void
+regstate(s, r, trans)
+ int s;
+ struct regexp *r;
+ int trans[];
+{
+ _position_set grps[_NOTCHAR]; /* As many as will ever be needed. */
+ _charset labels[_NOTCHAR]; /* Labels corresponding to the groups. */
+ int ngrps = 0; /* Number of groups actually used. */
+ _position pos; /* Current position being considered. */
+ _charset matches; /* Set of matching characters. */
+ int matchesf; /* True if matches is nonempty. */
+ _charset intersect; /* Intersection with some label set. */
+ int intersectf; /* True if intersect is nonempty. */
+ _charset leftovers; /* Stuff in the label that didn't match. */
+ int leftoversf; /* True if leftovers is nonempty. */
+ static _charset letters; /* Set of characters considered letters. */
+ static _charset newline; /* Set of characters that aren't newline. */
+ _position_set follows; /* Union of the follows of some group. */
+ _position_set tmp; /* Temporary space for merging sets. */
+ int state; /* New state. */
+ int wants_newline; /* New state wants to know newline context. */
+ int state_newline; /* New state on a newline transition. */
+ int wants_letter; /* New state wants to know letter context. */
+ int state_letter; /* New state on a letter transition. */
+ static initialized; /* Flag for static initialization. */
+ int i, j, k;
+
+ /* Initialize the set of letters, if necessary. */
+ if (! initialized)
+ {
+ initialized = 1;
+ for (i = 0; i < _NOTCHAR; ++i)
+ if (ISALNUM(i))
+ setbit(i, letters);
+ setbit('\n', newline);
+ }
+
+ zeroset(matches);
+
+ for (i = 0; i < r->states[s].elems.nelem; ++i)
+ {
+ pos = r->states[s].elems.elems[i];
+ if (r->tokens[pos.index] >= 0 && r->tokens[pos.index] < _NOTCHAR)
+ setbit(r->tokens[pos.index], matches);
+ else if (r->tokens[pos.index] >= _SET)
+ copyset(r->charsets[r->tokens[pos.index] - _SET], matches);
+ else
+ continue;
+
+ /* Some characters may need to be climinated from matches because
+ they fail in the current context. */
+ if (pos.constraint != 0xff)
+ {
+ if (! _MATCHES_NEWLINE_CONTEXT(pos.constraint,
+ r->states[s].newline, 1))
+ clrbit('\n', matches);
+ if (! _MATCHES_NEWLINE_CONTEXT(pos.constraint,
+ r->states[s].newline, 0))
+ for (j = 0; j < _CHARSET_INTS; ++j)
+ matches[j] &= newline[j];
+ if (! _MATCHES_LETTER_CONTEXT(pos.constraint,
+ r->states[s].letter, 1))
+ for (j = 0; j < _CHARSET_INTS; ++j)
+ matches[j] &= ~letters[j];
+ if (! _MATCHES_LETTER_CONTEXT(pos.constraint,
+ r->states[s].letter, 0))
+ for (j = 0; j < _CHARSET_INTS; ++j)
+ matches[j] &= letters[j];
+
+ /* If there are no characters left, there's no point in going on. */
+ for (j = 0; j < _CHARSET_INTS && !matches[j]; ++j)
+ ;
+ if (j == _CHARSET_INTS)
+ continue;
+ }
+
+ for (j = 0; j < ngrps; ++j)
+ {
+ /* If matches contains a single character only, and the current
+ group's label doesn't contain that character, go on to the
+ next group. */
+ if (r->tokens[pos.index] >= 0 && r->tokens[pos.index] < _NOTCHAR
+ && !tstbit(r->tokens[pos.index], labels[j]))
+ continue;
+
+ /* Check if this group's label has a nonempty intersection with
+ matches. */
+ intersectf = 0;
+ for (k = 0; k < _CHARSET_INTS; ++k)
+ (intersect[k] = matches[k] & labels[j][k]) ? intersectf = 1 : 0;
+ if (! intersectf)
+ continue;
+
+ /* It does; now find the set differences both ways. */
+ leftoversf = matchesf = 0;
+ for (k = 0; k < _CHARSET_INTS; ++k)
+ {
+ /* Even an optimizing compiler can't know this for sure. */
+ int match = matches[k], label = labels[j][k];
+
+ (leftovers[k] = ~match & label) ? leftoversf = 1 : 0;
+ (matches[k] = match & ~label) ? matchesf = 1 : 0;
+ }
+
+ /* If there were leftovers, create a new group labeled with them. */
+ if (leftoversf)
+ {
+ copyset(leftovers, labels[ngrps]);
+ copyset(intersect, labels[j]);
+ MALLOC(grps[ngrps].elems, _position, r->nleaves);
+ copy(&grps[j], &grps[ngrps]);
+ ++ngrps;
+ }
+
+ /* Put the position in the current group. Note that there is no
+ reason to call insert() here. */
+ grps[j].elems[grps[j].nelem++] = pos;
+
+ /* If every character matching the current position has been
+ accounted for, we're done. */
+ if (! matchesf)
+ break;
+ }
+
+ /* If we've passed the last group, and there are still characters
+ unaccounted for, then we'll have to create a new group. */
+ if (j == ngrps)
+ {
+ copyset(matches, labels[ngrps]);
+ zeroset(matches);
+ MALLOC(grps[ngrps].elems, _position, r->nleaves);
+ grps[ngrps].nelem = 1;
+ grps[ngrps].elems[0] = pos;
+ ++ngrps;
+ }
+ }
+
+ MALLOC(follows.elems, _position, r->nleaves);
+ MALLOC(tmp.elems, _position, r->nleaves);
+
+ /* If we are a searching matcher, the default transition is to a state
+ containing the positions of state 0, otherwise the default transition
+ is to fail miserably. */
+ if (r->searchflag)
+ {
+ wants_newline = 0;
+ wants_letter = 0;
+ for (i = 0; i < r->states[0].elems.nelem; ++i)
+ {
+ if (_PREV_NEWLINE_DEPENDENT(r->states[0].elems.elems[i].constraint))
+ wants_newline = 1;
+ if (_PREV_LETTER_DEPENDENT(r->states[0].elems.elems[i].constraint))
+ wants_letter = 1;
+ }
+ copy(&r->states[0].elems, &follows);
+ state = state_index(r, &follows, 0, 0);
+ if (wants_newline)
+ state_newline = state_index(r, &follows, 1, 0);
+ else
+ state_newline = state;
+ if (wants_letter)
+ state_letter = state_index(r, &follows, 0, 1);
+ else
+ state_letter = state;
+ for (i = 0; i < _NOTCHAR; ++i)
+ if (i == '\n')
+ trans[i] = state_newline;
+ else if (ISALNUM(i))
+ trans[i] = state_letter;
+ else
+ trans[i] = state;
+ }
+ else
+ for (i = 0; i < _NOTCHAR; ++i)
+ trans[i] = -1;
+
+ for (i = 0; i < ngrps; ++i)
+ {
+ follows.nelem = 0;
+
+ /* Find the union of the follows of the positions of the group.
+ This is a hideously inefficient loop. Fix it someday. */
+ for (j = 0; j < grps[i].nelem; ++j)
+ for (k = 0; k < r->follows[grps[i].elems[j].index].nelem; ++k)
+ insert(r->follows[grps[i].elems[j].index].elems[k], &follows);
+
+ /* If we are building a searching matcher, throw in the positions
+ of state 0 as well. */
+ if (r->searchflag)
+ for (j = 0; j < r->states[0].elems.nelem; ++j)
+ insert(r->states[0].elems.elems[j], &follows);
+
+ /* Find out if the new state will want any context information. */
+ wants_newline = 0;
+ if (tstbit('\n', labels[i]))
+ for (j = 0; j < follows.nelem; ++j)
+ if (_PREV_NEWLINE_DEPENDENT(follows.elems[j].constraint))
+ wants_newline = 1;
+
+ wants_letter = 0;
+ for (j = 0; j < _CHARSET_INTS; ++j)
+ if (labels[i][j] & letters[j])
+ break;
+ if (j < _CHARSET_INTS)
+ for (j = 0; j < follows.nelem; ++j)
+ if (_PREV_LETTER_DEPENDENT(follows.elems[j].constraint))
+ wants_letter = 1;
+
+ /* Find the state(s) corresponding to the union of the follows. */
+ state = state_index(r, &follows, 0, 0);
+ if (wants_newline)
+ state_newline = state_index(r, &follows, 1, 0);
+ else
+ state_newline = state;
+ if (wants_letter)
+ state_letter = state_index(r, &follows, 0, 1);
+ else
+ state_letter = state;
+
+ /* Set the transitions for each character in the current label. */
+ for (j = 0; j < _CHARSET_INTS; ++j)
+ for (k = 0; k < INTBITS; ++k)
+ if (labels[i][j] & 1 << k)
+ {
+ int c = j * INTBITS + k;
+
+ if (c == '\n')
+ trans[c] = state_newline;
+ else if (ISALNUM(c))
+ trans[c] = state_letter;
+ else if (c < _NOTCHAR)
+ trans[c] = state;
+ }
+ }
+
+ for (i = 0; i < ngrps; ++i)
+ free(grps[i].elems);
+ free(follows.elems);
+ free(tmp.elems);
+}
+
+/* Some routines for manipulating a compiled regexp's transition tables.
+ Each state may or may not have a transition table; if it does, and it
+ is a non-accepting state, then r->trans[state] points to its table.
+ If it is an accepting state then r->fails[state] points to its table.
+ If it has no table at all, then r->trans[state] is NULL.
+ TODO: Improve this comment, get rid of the unnecessary redundancy. */
+
+static void
+build_state(s, r)
+ int s;
+ struct regexp *r;
+{
+ int *trans; /* The new transition table. */
+ int i;
+
+ /* Set an upper limit on the number of transition tables that will ever
+ exist at once. 1024 is arbitrary. The idea is that the frequently
+ used transition tables will be quickly rebuilt, whereas the ones that
+ were only needed once or twice will be cleared away. */
+ if (r->trcount >= 1024)
+ {
+ for (i = 0; i < r->tralloc; ++i)
+ if (r->trans[i])
+ {
+ free((ptr_t) r->trans[i]);
+ r->trans[i] = NULL;
+ }
+ else if (r->fails[i])
+ {
+ free((ptr_t) r->fails[i]);
+ r->fails[i] = NULL;
+ }
+ r->trcount = 0;
+ }
+
+ ++r->trcount;
+
+ /* Set up the success bits for this state. */
+ r->success[s] = 0;
+ if (ACCEPTS_IN_CONTEXT(r->states[s].newline, 1, r->states[s].letter, 0,
+ s, *r))
+ r->success[s] |= 4;
+ if (ACCEPTS_IN_CONTEXT(r->states[s].newline, 0, r->states[s].letter, 1,
+ s, *r))
+ r->success[s] |= 2;
+ if (ACCEPTS_IN_CONTEXT(r->states[s].newline, 0, r->states[s].letter, 0,
+ s, *r))
+ r->success[s] |= 1;
+
+ MALLOC(trans, int, _NOTCHAR);
+ regstate(s, r, trans);
+
+ /* Now go through the new transition table, and make sure that the trans
+ and fail arrays are allocated large enough to hold a pointer for the
+ largest state mentioned in the table. */
+ for (i = 0; i < _NOTCHAR; ++i)
+ if (trans[i] >= r->tralloc)
+ {
+ int oldalloc = r->tralloc;
+
+ while (trans[i] >= r->tralloc)
+ r->tralloc *= 2;
+ REALLOC(r->realtrans, int *, r->tralloc + 1);
+ r->trans = r->realtrans + 1;
+ REALLOC(r->fails, int *, r->tralloc);
+ REALLOC(r->success, int, r->tralloc);
+ REALLOC(r->newlines, int, r->tralloc);
+ while (oldalloc < r->tralloc)
+ {
+ r->trans[oldalloc] = NULL;
+ r->fails[oldalloc++] = NULL;
+ }
+ }
+
+ /* Keep the newline transition in a special place so we can use it as
+ a sentinel. */
+ r->newlines[s] = trans['\n'];
+ trans['\n'] = -1;
+
+ if (ACCEPTING(s, *r))
+ r->fails[s] = trans;
+ else
+ r->trans[s] = trans;
+}
+
+static void
+build_state_zero(r)
+ struct regexp *r;
+{
+ r->tralloc = 1;
+ r->trcount = 0;
+ CALLOC(r->realtrans, int *, r->tralloc + 1);
+ r->trans = r->realtrans + 1;
+ CALLOC(r->fails, int *, r->tralloc);
+ MALLOC(r->success, int, r->tralloc);
+ MALLOC(r->newlines, int, r->tralloc);
+ build_state(0, r);
+}
+
+/* Search through a buffer looking for a match to the given struct regexp.
+ Find the first occurrence of a string matching the regexp in the buffer,
+ and the shortest possible version thereof. Return a pointer to the first
+ character after the match, or NULL if none is found. Begin points to
+ the beginning of the buffer, and end points to the first character after
+ its end. We store a newline in *end to act as a sentinel, so end had
+ better point somewhere valid. Newline is a flag indicating whether to
+ allow newlines to be in the matching string. If count is non-
+ NULL it points to a place we're supposed to increment every time we
+ see a newline. Finally, if backref is non-NULL it points to a place
+ where we're supposed to store a 1 if backreferencing happened and the
+ match needs to be verified by a backtracking matcher. Otherwise
+ we store a 0 in *backref. */
+char *
+regexecute(r, begin, end, newline, count, backref)
+ struct regexp *r;
+ char *begin;
+ char *end;
+ int newline;
+ int *count;
+ int *backref;
+{
+ register s, s1, tmp; /* Current state. */
+ register unsigned char *p; /* Current input character. */
+ register **trans, *t; /* Copy of r->trans so it can be optimized
+ into a register. */
+ static sbit[_NOTCHAR]; /* Table for anding with r->success. */
+ static sbit_init;
+
+ if (! sbit_init)
+ {
+ int i;
+
+ sbit_init = 1;
+ for (i = 0; i < _NOTCHAR; ++i)
+ if (i == '\n')
+ sbit[i] = 4;
+ else if (ISALNUM(i))
+ sbit[i] = 2;
+ else
+ sbit[i] = 1;
+ }
+
+ if (! r->tralloc)
+ build_state_zero(r);
+
+ s = 0;
+ p = (unsigned char *) begin;
+ trans = r->trans;
+ *end = '\n';
+
+ for (;;)
+ {
+ /* The dreaded inner loop. */
+ if (t = trans[s])
+ do
+ {
+ s1 = t[*p++];
+ if (! (t = trans[s1]))
+ goto last_was_s;
+ s = t[*p++];
+ }
+ while (t = trans[s]);
+ goto last_was_s1;
+ last_was_s:
+ tmp = s, s = s1, s1 = tmp;
+ last_was_s1:
+
+ if (s >= 0 && p <= (unsigned char *) end && r->fails[s])
+ {
+ if (r->success[s] & sbit[*p])
+ {
+ if (backref)
+ if (r->states[s].backref)
+ *backref = 1;
+ else
+ *backref = 0;
+ return (char *) p;
+ }
+
+ s1 = s;
+ s = r->fails[s][*p++];
+ continue;
+ }
+
+ /* If the previous character was a newline, count it. */
+ if (count && (char *) p <= end && p[-1] == '\n')
+ ++*count;
+
+ /* Check if we've run off the end of the buffer. */
+ if ((char *) p >= end)
+ return NULL;
+
+ if (s >= 0)
+ {
+ build_state(s, r);
+ trans = r->trans;
+ continue;
+ }
+
+ if (p[-1] == '\n' && newline)
+ {
+ s = r->newlines[s1];
+ continue;
+ }
+
+ s = 0;
+ }
+}
+
+/* Initialize the components of a regexp that the other routines don't
+ initialize for themselves. */
+void
+reginit(r)
+ struct regexp *r;
+{
+ r->calloc = 1;
+ MALLOC(r->charsets, _charset, r->calloc);
+ r->cindex = 0;
+
+ r->talloc = 1;
+ MALLOC(r->tokens, _token, r->talloc);
+ r->tindex = r->depth = r->nleaves = r->nregexps = 0;
+
+ r->searchflag = 0;
+ r->tralloc = 0;
+}
+
+/* Parse and analyze a single string of the given length. */
+void
+regcompile(s, len, r, searchflag)
+ const char *s;
+ size_t len;
+ struct regexp *r;
+ int searchflag;
+{
+ if (case_fold) /* dummy folding in service of regmust() */
+ {
+ char *copy;
+ int i;
+
+ copy = malloc(len);
+ if (!copy)
+ regerror("out of memory");
+
+ /* This is a complete kludge and could potentially break
+ \<letter> escapes . . . */
+ case_fold = 0;
+ for (i = 0; i < len; ++i)
+ if (ISUPPER(s[i]))
+ copy[i] = tolower(s[i]);
+ else
+ copy[i] = s[i];
+
+ reginit(r);
+ r->mustn = 0;
+ r->must[0] = '\0';
+ regparse(copy, len, r);
+ free(copy);
+ regmust(r);
+ reganalyze(r, searchflag);
+ case_fold = 1;
+ reginit(r);
+ regparse(s, len, r);
+ reganalyze(r, searchflag);
+ }
+ else
+ {
+ reginit(r);
+ regparse(s, len, r);
+ regmust(r);
+ reganalyze(r, searchflag);
+ }
+}
+
+/* Free the storage held by the components of a regexp. */
+void
+regfree(r)
+ struct regexp *r;
+{
+ int i;
+
+ free((ptr_t) r->charsets);
+ free((ptr_t) r->tokens);
+ for (i = 0; i < r->sindex; ++i)
+ free((ptr_t) r->states[i].elems.elems);
+ free((ptr_t) r->states);
+ for (i = 0; i < r->tindex; ++i)
+ if (r->follows[i].elems)
+ free((ptr_t) r->follows[i].elems);
+ free((ptr_t) r->follows);
+ for (i = 0; i < r->tralloc; ++i)
+ if (r->trans[i])
+ free((ptr_t) r->trans[i]);
+ else if (r->fails[i])
+ free((ptr_t) r->fails[i]);
+ if (r->realtrans)
+ free((ptr_t) r->realtrans);
+ if (r->fails)
+ free((ptr_t) r->fails);
+ if (r->newlines)
+ free((ptr_t) r->newlines);
+}
+
+/*
+Having found the postfix representation of the regular expression,
+try to find a long sequence of characters that must appear in any line
+containing the r.e.
+Finding a "longest" sequence is beyond the scope here;
+we take an easy way out and hope for the best.
+(Take "(ab|a)b"--please.)
+
+We do a bottom-up calculation of sequences of characters that must appear
+in matches of r.e.'s represented by trees rooted at the nodes of the postfix
+representation:
+ sequences that must appear at the left of the match ("left")
+ sequences that must appear at the right of the match ("right")
+ lists of sequences that must appear somewhere in the match ("in")
+ sequences that must constitute the match ("is")
+When we get to the root of the tree, we use one of the longest of its
+calculated "in" sequences as our answer. The sequence we find is returned in
+r->must (where "r" is the single argument passed to "regmust");
+the length of the sequence is returned in r->mustn.
+
+The sequences calculated for the various types of node (in pseudo ANSI c)
+are shown below. "p" is the operand of unary operators (and the left-hand
+operand of binary operators); "q" is the right-hand operand of binary operators
+.
+"ZERO" means "a zero-length sequence" below.
+
+Type left right is in
+---- ---- ----- -- --
+char c # c # c # c # c
+
+SET ZERO ZERO ZERO ZERO
+
+STAR ZERO ZERO ZERO ZERO
+
+QMARK ZERO ZERO ZERO ZERO
+
+PLUS p->left p->right ZERO p->in
+
+CAT (p->is==ZERO)? (q->is==ZERO)? (p->is!=ZERO && p->in plus
+ p->left : q->right : q->is!=ZERO) ? q->in plus
+ p->is##q->left p->right##q->is p->is##q->is : p->right##q->left
+ ZERO
+
+OR longest common longest common (do p->is and substrings common to
+ leading trailing q->is have same p->in and q->in
+ (sub)sequence (sub)sequence length and
+ of p->left of p->right content) ?
+ and q->left and q->right p->is : NULL
+
+If there's anything else we recognize in the tree, all four sequences get set
+to zero-length sequences. If there's something we don't recognize in the tree,
+we just return a zero-length sequence.
+
+Break ties in favor of infrequent letters (choosing 'zzz' in preference to
+'aaa')?
+
+And. . .is it here or someplace that we might ponder "optimizations" such as
+ egrep 'psi|epsilon' -> egrep 'psi'
+ egrep 'pepsi|epsilon' -> egrep 'epsi'
+ (Yes, we now find "epsi" as a "string
+ that must occur", but we might also
+ simplify the *entire* r.e. being sought
+)
+ grep '[c]' -> grep 'c'
+ grep '(ab|a)b' -> grep 'ab'
+ grep 'ab*' -> grep 'a'
+ grep 'a*b' -> grep 'b'
+There are several issues:
+ Is optimization easy (enough)?
+
+ Does optimization actually accomplish anything,
+ or is the automaton you get from "psi|epsilon" (for example)
+ the same as the one you get from "psi" (for example)?
+
+ Are optimizable r.e.'s likely to be used in real-life situations
+ (something like 'ab*' is probably unlikely; something like is
+ 'psi|epsilon' is likelier)?
+*/
+
+static char *
+icatalloc(old, new)
+char * old;
+const char * new;
+{
+ register char * result;
+ register int oldsize, newsize;
+
+ newsize = (new == NULL) ? 0 : strlen(new);
+ if (old == NULL)
+ oldsize = 0;
+ else if (newsize == 0)
+ return old;
+ else oldsize = strlen(old);
+ if (old == NULL)
+ result = (char *) malloc(newsize + 1);
+ else result = (char *) realloc((void *) old, oldsize + newsize + 1);
+ if (result != NULL && new != NULL)
+ (void) strcpy(result + oldsize, new);
+ return result;
+}
+
+static char *
+icpyalloc(string)
+const char * string;
+{
+ return icatalloc((char *) NULL, string);
+}
+
+static char *
+istrstr(lookin, lookfor)
+char * lookin;
+register char * lookfor;
+{
+ register char * cp;
+ register int len;
+
+ len = strlen(lookfor);
+ for (cp = lookin; *cp != '\0'; ++cp)
+ if (strncmp(cp, lookfor, len) == 0)
+ return cp;
+ return NULL;
+}
+
+static void
+ifree(cp)
+char * cp;
+{
+ if (cp != NULL)
+ free(cp);
+}
+
+static void
+freelist(cpp)
+register char ** cpp;
+{
+ register int i;
+
+ if (cpp == NULL)
+ return;
+ for (i = 0; cpp[i] != NULL; ++i) {
+ free(cpp[i]);
+ cpp[i] = NULL;
+ }
+}
+
+static char **
+enlist(cpp, new, len)
+register char ** cpp;
+register char * new;
+#ifdef __STDC__
+size_t len;
+#else
+int len;
+#endif
+{
+ register int i, j;
+
+ if (cpp == NULL)
+ return NULL;
+ if ((new = icpyalloc(new)) == NULL) {
+ freelist(cpp);
+ return NULL;
+ }
+ new[len] = '\0';
+ /*
+ ** Is there already something in the list that's new (or longer)?
+ */
+ for (i = 0; cpp[i] != NULL; ++i)
+ if (istrstr(cpp[i], new) != NULL) {
+ free(new);
+ return cpp;
+ }
+ /*
+ ** Eliminate any obsoleted strings.
+ */
+ j = 0;
+ while (cpp[j] != NULL)
+ if (istrstr(new, cpp[j]) == NULL)
+ ++j;
+ else {
+ free(cpp[j]);
+ if (--i == j)
+ break;
+ cpp[j] = cpp[i];
+ }
+ /*
+ ** Add the new string.
+ */
+ cpp = (char **) realloc((char *) cpp, (i + 2) * sizeof *cpp);
+ if (cpp == NULL)
+ return NULL;
+ cpp[i] = new;
+ cpp[i + 1] = NULL;
+ return cpp;
+}
+
+/*
+** Given pointers to two strings,
+** return a pointer to an allocated list of their distinct common substrings.
+** Return NULL if something seems wild.
+*/
+
+static char **
+comsubs(left, right)
+char * left;
+char * right;
+{
+ register char ** cpp;
+ register char * lcp;
+ register char * rcp;
+ register int i, len;
+
+ if (left == NULL || right == NULL)
+ return NULL;
+ cpp = (char **) malloc(sizeof *cpp);
+ if (cpp == NULL)
+ return NULL;
+ cpp[0] = NULL;
+ for (lcp = left; *lcp != '\0'; ++lcp) {
+ len = 0;
+ rcp = strchr(right, *lcp);
+ while (rcp != NULL) {
+ for (i = 1; lcp[i] != '\0' && lcp[i] == rcp[i]; ++i)
+ ;
+ if (i > len)
+ len = i;
+ rcp = strchr(rcp + 1, *lcp);
+ }
+ if (len == 0)
+ continue;
+#ifdef __STDC__
+ if ((cpp = enlist(cpp, lcp, (size_t)len)) == NULL)
+#else
+ if ((cpp = enlist(cpp, lcp, len)) == NULL)
+#endif
+ break;
+ }
+ return cpp;
+}
+
+static char **
+addlists(old, new)
+char ** old;
+char ** new;
+{
+ register int i;
+
+ if (old == NULL || new == NULL)
+ return NULL;
+ for (i = 0; new[i] != NULL; ++i) {
+ old = enlist(old, new[i], strlen(new[i]));
+ if (old == NULL)
+ break;
+ }
+ return old;
+}
+
+/*
+** Given two lists of substrings,
+** return a new list giving substrings common to both.
+*/
+
+static char **
+inboth(left, right)
+char ** left;
+char ** right;
+{
+ register char ** both;
+ register char ** temp;
+ register int lnum, rnum;
+
+ if (left == NULL || right == NULL)
+ return NULL;
+ both = (char **) malloc(sizeof *both);
+ if (both == NULL)
+ return NULL;
+ both[0] = NULL;
+ for (lnum = 0; left[lnum] != NULL; ++lnum) {
+ for (rnum = 0; right[rnum] != NULL; ++rnum) {
+ temp = comsubs(left[lnum], right[rnum]);
+ if (temp == NULL) {
+ freelist(both);
+ return NULL;
+ }
+ both = addlists(both, temp);
+ freelist(temp);
+ if (both == NULL)
+ return NULL;
+ }
+ }
+ return both;
+}
+
+/*
+typedef struct {
+ char ** in;
+ char * left;
+ char * right;
+ char * is;
+} must;
+ */
+static void
+resetmust(mp)
+register must * mp;
+{
+ mp->left[0] = mp->right[0] = mp->is[0] = '\0';
+ freelist(mp->in);
+}
+
+static void
+regmust(r)
+register struct regexp * r;
+{
+ register must * musts;
+ register must * mp;
+ register char * result;
+ register int ri;
+ register int i;
+ register _token t;
+ static must must0;
+
+ reg->mustn = 0;
+ reg->must[0] = '\0';
+ musts = (must *) malloc((reg->tindex + 1) * sizeof *musts);
+ if (musts == NULL)
+ return;
+ mp = musts;
+ for (i = 0; i <= reg->tindex; ++i)
+ mp[i] = must0;
+ for (i = 0; i <= reg->tindex; ++i) {
+ mp[i].in = (char **) malloc(sizeof *mp[i].in);
+ mp[i].left = malloc(2);
+ mp[i].right = malloc(2);
+ mp[i].is = malloc(2);
+ if (mp[i].in == NULL || mp[i].left == NULL ||
+ mp[i].right == NULL || mp[i].is == NULL)
+ goto done;
+ mp[i].left[0] = mp[i].right[0] = mp[i].is[0] = '\0';
+ mp[i].in[0] = NULL;
+ }
+ result = "";
+ for (ri = 0; ri < reg->tindex; ++ri) {
+ switch (t = reg->tokens[ri]) {
+ case _ALLBEGLINE:
+ case _ALLENDLINE:
+ case _LPAREN:
+ case _RPAREN:
+ goto done; /* "cannot happen" */
+ case _EMPTY:
+ case _BEGLINE:
+ case _ENDLINE:
+ case _BEGWORD:
+ case _ENDWORD:
+ case _LIMWORD:
+ case _NOTLIMWORD:
+ case _BACKREF:
+ resetmust(mp);
+ break;
+ case _STAR:
+ case _QMARK:
+ if (mp <= musts)
+ goto done; /* "cannot happen" */
+ --mp;
+ resetmust(mp);
+ break;
+ case _OR:
+ if (mp < &musts[2])
+ goto done; /* "cannot happen" */
+ {
+ register char ** new;
+ register must * lmp;
+ register must * rmp;
+ register int j, ln, rn, n;
+
+ rmp = --mp;
+ lmp = --mp;
+ /* Guaranteed to be. Unlikely, but. . . */
+ if (strcmp(lmp->is, rmp->is) != 0)
+ lmp->is[0] = '\0';
+ /* Left side--easy */
+ i = 0;
+ while (lmp->left[i] != '\0' &&
+ lmp->left[i] == rmp->left[i])
+ ++i;
+ lmp->left[i] = '\0';
+ /* Right side */
+ ln = strlen(lmp->right);
+ rn = strlen(rmp->right);
+ n = ln;
+ if (n > rn)
+ n = rn;
+ for (i = 0; i < n; ++i)
+ if (lmp->right[ln - i - 1] !=
+ rmp->right[rn - i - 1])
+ break;
+ for (j = 0; j < i; ++j)
+ lmp->right[j] =
+ lmp->right[(ln - i) + j];
+ lmp->right[j] = '\0';
+ new = inboth(lmp->in, rmp->in);
+ if (new == NULL)
+ goto done;
+ freelist(lmp->in);
+ free((char *) lmp->in);
+ lmp->in = new;
+ }
+ break;
+ case _PLUS:
+ if (mp <= musts)
+ goto done; /* "cannot happen" */
+ --mp;
+ mp->is[0] = '\0';
+ break;
+ case _END:
+ if (mp != &musts[1])
+ goto done; /* "cannot happen" */
+ for (i = 0; musts[0].in[i] != NULL; ++i)
+ if (strlen(musts[0].in[i]) > strlen(result))
+ result = musts[0].in[i];
+ goto done;
+ case _CAT:
+ if (mp < &musts[2])
+ goto done; /* "cannot happen" */
+ {
+ register must * lmp;
+ register must * rmp;
+
+ rmp = --mp;
+ lmp = --mp;
+ /*
+ ** In. Everything in left, plus everything in
+ ** right, plus catenation of
+ ** left's right and right's left.
+ */
+ lmp->in = addlists(lmp->in, rmp->in);
+ if (lmp->in == NULL)
+ goto done;
+ if (lmp->right[0] != '\0' &&
+ rmp->left[0] != '\0') {
+ register char * tp;
+
+ tp = icpyalloc(lmp->right);
+ if (tp == NULL)
+ goto done;
+ tp = icatalloc(tp, rmp->left);
+ if (tp == NULL)
+ goto done;
+ lmp->in = enlist(lmp->in, tp,
+ strlen(tp));
+ free(tp);
+ if (lmp->in == NULL)
+ goto done;
+ }
+ /* Left-hand */
+ if (lmp->is[0] != '\0') {
+ lmp->left = icatalloc(lmp->left,
+ rmp->left);
+ if (lmp->left == NULL)
+ goto done;
+ }
+ /* Right-hand */
+ if (rmp->is[0] == '\0')
+ lmp->right[0] = '\0';
+ lmp->right = icatalloc(lmp->right, rmp->right);
+ if (lmp->right == NULL)
+ goto done;
+ /* Guaranteed to be */
+ if (lmp->is[0] != '\0' && rmp->is[0] != '\0') {
+ lmp->is = icatalloc(lmp->is, rmp->is);
+ if (lmp->is == NULL)
+ goto done;
+ }
+ }
+ break;
+ default:
+ if (t < _END) {
+ /* "cannot happen" */
+ goto done;
+ } else if (t == '\0') {
+ /* not on *my* shift */
+ goto done;
+ } else if (t >= _SET) {
+ /* easy enough */
+ resetmust(mp);
+ } else {
+ /* plain character */
+ resetmust(mp);
+ mp->is[0] = mp->left[0] = mp->right[0] = t;
+ mp->is[1] = mp->left[1] = mp->right[1] = '\0';
+ mp->in = enlist(mp->in, mp->is, 1);
+ if (mp->in == NULL)
+ goto done;
+ }
+ break;
+ }
+ ++mp;
+ }
+done:
+ (void) strncpy(reg->must, result, MUST_MAX - 1);
+ reg->must[MUST_MAX - 1] = '\0';
+ reg->mustn = strlen(reg->must);
+ mp = musts;
+ for (i = 0; i <= reg->tindex; ++i) {
+ freelist(mp[i].in);
+ ifree((char *) mp[i].in);
+ ifree(mp[i].left);
+ ifree(mp[i].right);
+ ifree(mp[i].is);
+ }
+ free((char *) mp);
+}
diff --git a/dfa.h b/dfa.h
new file mode 100644
index 00000000..69a0651a
--- /dev/null
+++ b/dfa.h
@@ -0,0 +1,539 @@
+/* dfa.h - declarations for GNU deterministic regexp compiler
+ Copyright (C) 1988 Free Software Foundation, Inc.
+ Written June, 1988 by Mike Haertel
+
+ NO WARRANTY
+
+ BECAUSE THIS PROGRAM IS LICENSED FREE OF CHARGE, WE PROVIDE ABSOLUTELY
+NO WARRANTY, TO THE EXTENT PERMITTED BY APPLICABLE STATE LAW. EXCEPT
+WHEN OTHERWISE STATED IN WRITING, FREE SOFTWARE FOUNDATION, INC,
+RICHARD M. STALLMAN AND/OR OTHER PARTIES PROVIDE THIS PROGRAM "AS IS"
+WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
+BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY
+AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE
+DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR
+CORRECTION.
+
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL RICHARD M.
+STALLMAN, THE FREE SOFTWARE FOUNDATION, INC., AND/OR ANY OTHER PARTY
+WHO MAY MODIFY AND REDISTRIBUTE THIS PROGRAM AS PERMITTED BELOW, BE
+LIABLE TO YOU FOR DAMAGES, INCLUDING ANY LOST PROFITS, LOST MONIES, OR
+OTHER SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR
+DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY THIRD PARTIES OR
+A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS) THIS
+PROGRAM, EVEN IF YOU HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY.
+
+ GENERAL PUBLIC LICENSE TO COPY
+
+ 1. You may copy and distribute verbatim copies of this source file
+as you receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy a valid copyright notice "Copyright
+ (C) 1988 Free Software Foundation, Inc."; and include following the
+copyright notice a verbatim copy of the above disclaimer of warranty
+and of this License. You may charge a distribution fee for the
+physical act of transferring a copy.
+
+ 2. You may modify your copy or copies of this source file or
+any portion of it, and copy and distribute such modifications under
+the terms of Paragraph 1 above, provided that you also do the following:
+
+ a) cause the modified files to carry prominent notices stating
+ that you changed the files and the date of any change; and
+
+ b) cause the whole of any work that you distribute or publish,
+ that in whole or in part contains or is a derivative of this
+ program or any part thereof, to be licensed at no charge to all
+ third parties on terms identical to those contained in this
+ License Agreement (except that you may choose to grant more extensive
+ warranty protection to some or all third parties, at your option).
+
+ c) You may charge a distribution fee for the physical act of
+ transferring a copy, and you may at your option offer warranty
+ protection in exchange for a fee.
+
+Mere aggregation of another unrelated program with this program (or its
+derivative) on a volume of a storage or distribution medium does not bring
+the other program under the scope of these terms.
+
+ 3. You may copy and distribute this program or any portion of it in
+compiled, executable or object code form under the terms of Paragraphs
+1 and 2 above provided that you do the following:
+
+ a) accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of
+ Paragraphs 1 and 2 above; or,
+
+ b) accompany it with a written offer, valid for at least three
+ years, to give any third party free (except for a nominal
+ shipping charge) a complete machine-readable copy of the
+ corresponding source code, to be distributed under the terms of
+ Paragraphs 1 and 2 above; or,
+
+ c) accompany it with the information you received as to where the
+ corresponding source code may be obtained. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form alone.)
+
+For an executable file, complete source code means all the source code for
+all modules it contains; but, as a special exception, it need not include
+source code for modules which are standard libraries that accompany the
+operating system on which the executable file runs.
+
+ 4. You may not copy, sublicense, distribute or transfer this program
+except as expressly provided under this License Agreement. Any attempt
+otherwise to copy, sublicense, distribute or transfer this program is void and
+your rights to use the program under this License agreement shall be
+automatically terminated. However, parties who have received computer
+software programs from you with this License Agreement will not have
+their licenses terminated so long as such parties remain in full compliance.
+
+ 5. If you wish to incorporate parts of this program into other free
+programs whose distribution conditions are different, write to the Free
+Software Foundation at 675 Mass Ave, Cambridge, MA 02139. We have not yet
+worked out a simple rule that can be stated here, but we will often permit
+this. We will be guided by the two goals of preserving the free status of
+all derivatives our free software and of promoting the sharing and reuse of
+software.
+
+
+In other words, you are welcome to use, share and improve this program.
+You are forbidden to forbid anyone else to use, share and improve
+what you give them. Help stamp out software-hoarding! */
+
+#ifdef __STDC__
+
+#ifdef SOMEDAY
+#define ISALNUM(c) isalnum(c)
+#define ISALPHA(c) isalpha(c)
+#define ISUPPER(c) isupper(c)
+#else
+#define ISALNUM(c) (isascii(c) && isalnum(c))
+#define ISALPHA(c) (isascii(c) && isalpha(c))
+#define ISUPPER(c) (isascii(c) && isupper(c))
+#endif
+
+#else /* ! __STDC__ */
+
+#define const
+
+#define ISALNUM(c) (isascii(c) && isalnum(c))
+#define ISALPHA(c) (isascii(c) && isalpha(c))
+#define ISUPPER(c) (isascii(c) && isupper(c))
+
+#endif /* ! __STDC__ */
+
+/* 1 means plain parentheses serve as grouping, and backslash
+ parentheses are needed for literal searching.
+ 0 means backslash-parentheses are grouping, and plain parentheses
+ are for literal searching. */
+#define RE_NO_BK_PARENS 1
+
+/* 1 means plain | serves as the "or"-operator, and \| is a literal.
+ 0 means \| serves as the "or"-operator, and | is a literal. */
+#define RE_NO_BK_VBAR (1 << 1)
+
+/* 0 means plain + or ? serves as an operator, and \+, \? are literals.
+ 1 means \+, \? are operators and plain +, ? are literals. */
+#define RE_BK_PLUS_QM (1 << 2)
+
+/* 1 means | binds tighter than ^ or $.
+ 0 means the contrary. */
+#define RE_TIGHT_VBAR (1 << 3)
+
+/* 1 means treat \n as an _OR operator
+ 0 means treat it as a normal character */
+#define RE_NEWLINE_OR (1 << 4)
+
+/* 0 means that a special characters (such as *, ^, and $) always have
+ their special meaning regardless of the surrounding context.
+ 1 means that special characters may act as normal characters in some
+ contexts. Specifically, this applies to:
+ ^ - only special at the beginning, or after ( or |
+ $ - only special at the end, or before ) or |
+ *, +, ? - only special when not after the beginning, (, or | */
+#define RE_CONTEXT_INDEP_OPS (1 << 5)
+
+/* 1 means that \ in a character class escapes the next character (typically
+ a hyphen. It also is overloaded to mean that hyphen at the end of the range
+ is allowable and means that the hyphen is to be taken literally. */
+#define RE_AWK_CLASS_HACK (1 << 6)
+
+/* Now define combinations of bits for the standard possibilities. */
+#ifdef notdef
+#define RE_SYNTAX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_CONTEXT_INDEP_OPS)
+#define RE_SYNTAX_EGREP (RE_SYNTAX_AWK | RE_NEWLINE_OR)
+#define RE_SYNTAX_GREP (RE_BK_PLUS_QM | RE_NEWLINE_OR)
+#define RE_SYNTAX_EMACS 0
+#endif
+
+/* The NULL pointer. */
+#ifndef NULL
+#define NULL 0
+#endif
+
+/* Number of bits in an unsigned char. */
+#define CHARBITS 8
+
+/* First integer value that is greater than any character code. */
+#define _NOTCHAR (1 << CHARBITS)
+
+/* INTBITS need not be exact, just a lower bound. */
+#define INTBITS (CHARBITS * sizeof (int))
+
+/* Number of ints required to hold a bit for every character. */
+#define _CHARSET_INTS ((_NOTCHAR + INTBITS - 1) / INTBITS)
+
+/* Sets of unsigned characters are stored as bit vectors in arrays of ints. */
+typedef int _charset[_CHARSET_INTS];
+
+/* The regexp is parsed into an array of tokens in postfix form. Some tokens
+ are operators and others are terminal symbols. Most (but not all) of these
+ codes are returned by the lexical analyzer. */
+#ifdef __STDC__
+
+typedef enum
+{
+ _END = -1, /* _END is a terminal symbol that matches the
+ end of input; any value of _END or less in
+ the parse tree is such a symbol. Accepting
+ states of the DFA are those that would have
+ a transition on _END. */
+
+ /* Ordinary character values are terminal symbols that match themselves. */
+
+ _EMPTY = _NOTCHAR, /* _EMPTY is a terminal symbol that matches
+ the empty string. */
+
+ _BACKREF, /* _BACKREF is generated by \<digit>; it
+ it not completely handled. If the scanner
+ detects a transition on backref, it returns
+ a kind of "semi-success" indicating that
+ the match will have to be verified with
+ a backtracking matcher. */
+
+ _BEGLINE, /* _BEGLINE is a terminal symbol that matches
+ the empty string if it is at the beginning
+ of a line. */
+
+ _ALLBEGLINE, /* _ALLBEGLINE is a terminal symbol that
+ matches the empty string if it is at the
+ beginning of a line; _ALLBEGLINE applies
+ to the entire regexp and can only occur
+ as the first token thereof. _ALLBEGLINE
+ never appears in the parse tree; a _BEGLINE
+ is prepended with _CAT to the entire
+ regexp instead. */
+
+ _ENDLINE, /* _ENDLINE is a terminal symbol that matches
+ the empty string if it is at the end of
+ a line. */
+
+ _ALLENDLINE, /* _ALLENDLINE is to _ENDLINE as _ALLBEGLINE
+ is to _BEGLINE. */
+
+ _BEGWORD, /* _BEGWORD is a terminal symbol that matches
+ the empty string if it is at the beginning
+ of a word. */
+
+ _ENDWORD, /* _ENDWORD is a terminal symbol that matches
+ the empty string if it is at the end of
+ a word. */
+
+ _LIMWORD, /* _LIMWORD is a terminal symbol that matches
+ the empty string if it is at the beginning
+ or the end of a word. */
+
+ _NOTLIMWORD, /* _NOTLIMWORD is a terminal symbol that
+ matches the empty string if it is not at
+ the beginning or end of a word. */
+
+ _QMARK, /* _QMARK is an operator of one argument that
+ matches zero or one occurences of its
+ argument. */
+
+ _STAR, /* _STAR is an operator of one argument that
+ matches the Kleene closure (zero or more
+ occurrences) of its argument. */
+
+ _PLUS, /* _PLUS is an operator of one argument that
+ matches the positive closure (one or more
+ occurrences) of its argument. */
+
+ _CAT, /* _CAT is an operator of two arguments that
+ matches the concatenation of its
+ arguments. _CAT is never returned by the
+ lexical analyzer. */
+
+ _OR, /* _OR is an operator of two arguments that
+ matches either of its arguments. */
+
+ _LPAREN, /* _LPAREN never appears in the parse tree,
+ it is only a lexeme. */
+
+ _RPAREN, /* _RPAREN never appears in the parse tree. */
+
+ _SET /* _SET and (and any value greater) is a
+ terminal symbol that matches any of a
+ class of characters. */
+} _token;
+
+#else /* ! __STDC__ */
+
+typedef short _token;
+
+#define _END -1
+#define _EMPTY _NOTCHAR
+#define _BACKREF (_EMPTY + 1)
+#define _BEGLINE (_EMPTY + 2)
+#define _ALLBEGLINE (_EMPTY + 3)
+#define _ENDLINE (_EMPTY + 4)
+#define _ALLENDLINE (_EMPTY + 5)
+#define _BEGWORD (_EMPTY + 6)
+#define _ENDWORD (_EMPTY + 7)
+#define _LIMWORD (_EMPTY + 8)
+#define _NOTLIMWORD (_EMPTY + 9)
+#define _QMARK (_EMPTY + 10)
+#define _STAR (_EMPTY + 11)
+#define _PLUS (_EMPTY + 12)
+#define _CAT (_EMPTY + 13)
+#define _OR (_EMPTY + 14)
+#define _LPAREN (_EMPTY + 15)
+#define _RPAREN (_EMPTY + 16)
+#define _SET (_EMPTY + 17)
+
+#endif /* ! __STDC__ */
+
+/* Sets are stored in an array in the compiled regexp; the index of the
+ array corresponding to a given set token is given by _SET_INDEX(t). */
+#define _SET_INDEX(t) ((t) - _SET)
+
+/* Sometimes characters can only be matched depending on the surrounding
+ context. Such context decisions depend on what the previous character
+ was, and the value of the current (lookahead) character. Context
+ dependent constraints are encoded as 8 bit integers. Each bit that
+ is set indicates that the constraint succeeds in the corresponding
+ context.
+
+ bit 7 - previous and current are newlines
+ bit 6 - previous was newline, current isn't
+ bit 5 - previous wasn't newline, current is
+ bit 4 - neither previous nor current is a newline
+ bit 3 - previous and current are word-constituents
+ bit 2 - previous was word-constituent, current isn't
+ bit 1 - previous wasn't word-constituent, current is
+ bit 0 - neither previous nor current is word-constituent
+
+ Word-constituent characters are those that satisfy isalnum().
+
+ The macro _SUCCEEDS_IN_CONTEXT determines whether a a given constraint
+ succeeds in a particular context. Prevn is true if the previous character
+ was a newline, currn is true if the lookahead character is a newline.
+ Prevl and currl similarly depend upon whether the previous and current
+ characters are word-constituent letters. */
+#define _MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \
+ ((constraint) & 1 << ((prevn) ? 2 : 0) + ((currn) ? 1 : 0) + 4)
+#define _MATCHES_LETTER_CONTEXT(constraint, prevl, currl) \
+ ((constraint) & 1 << ((prevl) ? 2 : 0) + ((currl) ? 1 : 0))
+#define _SUCCEEDS_IN_CONTEXT(constraint, prevn, currn, prevl, currl) \
+ (_MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \
+ && _MATCHES_LETTER_CONTEXT(constraint, prevl, currl))
+
+/* The following macros give information about what a constraint depends on. */
+#define _PREV_NEWLINE_DEPENDENT(constraint) \
+ (((constraint) & 0xc0) >> 2 != ((constraint) & 0x30))
+#define _PREV_LETTER_DEPENDENT(constraint) \
+ (((constraint) & 0x0c) >> 2 != ((constraint) & 0x03))
+
+/* Tokens that match the empty string subject to some constraint actually
+ work by applying that constraint to determine what may follow them,
+ taking into account what has gone before. The following values are
+ the constraints corresponding to the special tokens previously defined. */
+#define _NO_CONSTRAINT 0xff
+#define _BEGLINE_CONSTRAINT 0xcf
+#define _ENDLINE_CONSTRAINT 0xaf
+#define _BEGWORD_CONSTRAINT 0xf2
+#define _ENDWORD_CONSTRAINT 0xf4
+#define _LIMWORD_CONSTRAINT 0xf6
+#define _NOTLIMWORD_CONSTRAINT 0xf9
+
+/* States of the recognizer correspond to sets of positions in the parse
+ tree, together with the constraints under which they may be matched.
+ So a position is encoded as an index into the parse tree together with
+ a constraint. */
+typedef struct
+{
+ unsigned index; /* Index into the parse array. */
+ unsigned constraint; /* Constraint for matching this position. */
+} _position;
+
+/* Sets of positions are stored as arrays. */
+typedef struct
+{
+ _position *elems; /* Elements of this position set. */
+ int nelem; /* Number of elements in this set. */
+} _position_set;
+
+/* A state of the regexp consists of a set of positions, some flags,
+ and the token value of the lowest-numbered position of the state that
+ contains an _END token. */
+typedef struct
+{
+ int hash; /* Hash of the positions of this state. */
+ _position_set elems; /* Positions this state could match. */
+ char newline; /* True if previous state matched newline. */
+ char letter; /* True if previous state matched a letter. */
+ char backref; /* True if this state matches a \<digit>. */
+ unsigned char constraint; /* Constraint for this state to accept. */
+ int first_end; /* Token value of the first _END in elems. */
+} _dfa_state;
+
+/* If an r.e. is at most MUST_MAX characters long, we look for a string which
+ must appear in it; whatever's found is dropped into the struct reg. */
+
+#define MUST_MAX 50
+
+/* A compiled regular expression. */
+struct regexp
+{
+ /* Stuff built by the scanner. */
+ _charset *charsets; /* Array of character sets for _SET tokens. */
+ int cindex; /* Index for adding new charsets. */
+ int calloc; /* Number of charsets currently allocated. */
+
+ /* Stuff built by the parser. */
+ _token *tokens; /* Postfix parse array. */
+ int tindex; /* Index for adding new tokens. */
+ int talloc; /* Number of tokens currently allocated. */
+ int depth; /* Depth required of an evaluation stack
+ used for depth-first traversal of the
+ parse tree. */
+ int nleaves; /* Number of leaves on the parse tree. */
+ int nregexps; /* Count of parallel regexps being built
+ with regparse(). */
+
+ /* Stuff owned by the state builder. */
+ _dfa_state *states; /* States of the regexp. */
+ int sindex; /* Index for adding new states. */
+ int salloc; /* Number of states currently allocated. */
+
+ /* Stuff built by the structure analyzer. */
+ _position_set *follows; /* Array of follow sets, indexed by position
+ index. The follow of a position is the set
+ of positions containing characters that
+ could conceivably follow a character
+ matching the given position in a string
+ matching the regexp. Allocated to the
+ maximum possible position index. */
+ int searchflag; /* True if we are supposed to build a searching
+ as opposed to an exact matcher. A searching
+ matcher finds the first and shortest string
+ matching a regexp anywhere in the buffer,
+ whereas an exact matcher finds the longest
+ string matching, but anchored to the
+ beginning of the buffer. */
+
+ /* Stuff owned by the executor. */
+ int tralloc; /* Number of transition tables that have
+ slots so far. */
+ int trcount; /* Number of transition tables that have
+ actually been built. */
+ int **trans; /* Transition tables for states that can
+ never accept. If the transitions for a
+ state have not yet been computed, or the
+ state could possibly accept, its entry in
+ this table is NULL. */
+ int **realtrans; /* Trans always points to realtrans + 1; this
+ is so trans[-1] can contain NULL. */
+ int **fails; /* Transition tables after failing to accept
+ on a state that potentially could do so. */
+ int *success; /* Table of acceptance conditions used in
+ regexecute and computed in build_state. */
+ int *newlines; /* Transitions on newlines. The entry for a
+ newline in any transition table is always
+ -1 so we can count lines without wasting
+ too many cycles. The transition for a
+ newline is stored separately and handled
+ as a special case. Newline is also used
+ as a sentinel at the end of the buffer. */
+ char must[MUST_MAX];
+ int mustn;
+};
+
+/* Some macros for user access to regexp internals. */
+
+/* ACCEPTING returns true if s could possibly be an accepting state of r. */
+#define ACCEPTING(s, r) ((r).states[s].constraint)
+
+/* ACCEPTS_IN_CONTEXT returns true if the given state accepts in the
+ specified context. */
+#define ACCEPTS_IN_CONTEXT(prevn, currn, prevl, currl, state, reg) \
+ _SUCCEEDS_IN_CONTEXT((reg).states[state].constraint, \
+ prevn, currn, prevl, currl)
+
+/* FIRST_MATCHING_REGEXP returns the index number of the first of parallel
+ regexps that a given state could accept. Parallel regexps are numbered
+ starting at 1. */
+#define FIRST_MATCHING_REGEXP(state, reg) (-(reg).states[state].first_end)
+
+/* Entry points. */
+
+#ifdef __STDC__
+
+/* Regsyntax() takes two arguments; the first sets the syntax bits described
+ earlier in this file, and the second sets the case-folding flag. */
+extern void regsyntax(int, int);
+
+/* Compile the given string of the given length into the given struct regexp.
+ Final argument is a flag specifying whether to build a searching or an
+ exact matcher. */
+extern void regcompile(const char *, size_t, struct regexp *, int);
+
+/* Execute the given struct regexp on the buffer of characters. The
+ first char * points to the beginning, and the second points to the
+ first character after the end of the buffer, which must be a writable
+ place so a sentinel end-of-buffer marker can be stored there. The
+ second-to-last argument is a flag telling whether to allow newlines to
+ be part of a string matching the regexp. The next-to-last argument,
+ if non-NULL, points to a place to increment every time we see a
+ newline. The final argument, if non-NULL, points to a flag that will
+ be set if further examination by a backtracking matcher is needed in
+ order to verify backreferencing; otherwise the flag will be cleared.
+ Returns NULL if no match is found, or a pointer to the first
+ character after the first & shortest matching string in the buffer. */
+extern char *regexecute(struct regexp *, char *, char *, int, int *, int *);
+
+/* Free the storage held by the components of a struct regexp. */
+extern void regfree(struct regexp *);
+
+/* Entry points for people who know what they're doing. */
+
+/* Initialize the components of a struct regexp. */
+extern void reginit(struct regexp *);
+
+/* Incrementally parse a string of given length into a struct regexp. */
+extern void regparse(const char *, size_t, struct regexp *);
+
+/* Analyze a parsed regexp; second argument tells whether to build a searching
+ or an exact matcher. */
+extern void reganalyze(struct regexp *, int);
+
+/* Compute, for each possible character, the transitions out of a given
+ state, storing them in an array of integers. */
+extern void regstate(int, struct regexp *, int []);
+
+/* Error handling. */
+
+/* Regerror() is called by the regexp routines whenever an error occurs. It
+ takes a single argument, a NUL-terminated string describing the error.
+ The default regerror() prints the error message to stderr and exits.
+ The user can provide a different regfree() if so desired. */
+extern void regerror(const char *);
+
+#else /* ! __STDC__ */
+extern void regsyntax(), regcompile(), regfree(), reginit(), regparse();
+extern void reganalyze(), regstate(), regerror();
+extern char *regexecute();
+#endif
diff --git a/eval.c b/eval.c
index c88e7763..a467315d 100644
--- a/eval.c
+++ b/eval.c
@@ -3,7 +3,7 @@
*/
/*
- * Copyright (C) 1986, 1988, 1989 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@@ -25,33 +25,23 @@
#include "awk.h"
-extern void do_print();
-extern void do_printf();
-extern NODE *do_match();
-extern NODE *do_sub();
-extern NODE *do_getline();
-extern NODE *concat_exp();
-extern int in_array();
-extern void do_delete();
-extern double pow();
-
-static int eval_condition();
-static NODE *op_assign();
-static NODE *func_call();
-static NODE *match_op();
+extern double pow P((double x, double y));
+extern double modf P((double x, double *yp));
+
+static int eval_condition P((NODE *tree));
+static NODE *op_assign P((NODE *tree));
+static NODE *func_call P((NODE *name, NODE *arg_list));
+static NODE *match_op P((NODE *tree));
NODE *_t; /* used as a temporary in macros */
#ifdef MSDOS
double _msc51bug; /* to get around a bug in MSC 5.1 */
#endif
NODE *ret_node;
-
-/* More of that debugging stuff */
-#ifdef DEBUG
-#define DBG_P(X) print_debug X
-#else
-#define DBG_P(X)
-#endif
+int OFSlen;
+int ORSlen;
+int OFMTidx;
+int CONVFMTidx;
/* Macros and variables to save and restore function and loop bindings */
/*
@@ -135,7 +125,7 @@ char casetable[] = {
*/
int
interpret(tree)
-NODE *tree;
+register NODE *tree;
{
volatile jmp_buf loop_tag_stack; /* shallow binding stack for loop_tag */
static jmp_buf rule_tag;/* tag the rule currently being run, for NEXT
@@ -143,53 +133,29 @@ NODE *tree;
* there are no nested rules */
register NODE *t = NULL;/* temporary */
volatile NODE **lhs; /* lhs == Left Hand Side for assigns, etc */
- volatile struct search *l; /* For array_for */
volatile NODE *stable_tree;
+ int traverse = 1; /* True => loop thru tree (Node_rule_list) */
if (tree == NULL)
return 1;
sourceline = tree->source_line;
source = tree->source_file;
switch (tree->type) {
+ case Node_rule_node:
+ traverse = 0; /* False => one for-loop iteration only */
+ /* FALL THROUGH */
case Node_rule_list:
for (t = tree; t != NULL; t = t->rnode) {
- tree = t->lnode;
- /* FALL THROUGH */
- case Node_rule_node:
+ if (traverse)
+ tree = t->lnode;
sourceline = tree->source_line;
source = tree->source_file;
switch (setjmp(rule_tag)) {
case 0: /* normal non-jump */
/* test pattern, if any */
- if (tree->lnode == NULL
- || eval_condition(tree->lnode)) {
- DBG_P(("Found a rule", tree->rnode));
- if (tree->rnode == NULL) {
- /*
- * special case: pattern with
- * no action is equivalent to
- * an action of {print}
- */
- NODE printnode;
-
- printnode.type = Node_K_print;
- printnode.lnode = NULL;
- printnode.rnode = NULL;
- do_print(&printnode);
- } else if (tree->rnode->type == Node_illegal) {
- /*
- * An empty statement
- * (``{ }'') is different
- * from a missing statement.
- * A missing statement is
- * equal to ``{ print }'' as
- * above, but an empty
- * statement is as in C, do
- * nothing.
- */
- } else
- (void) interpret(tree->rnode);
- }
+ if (tree->lnode == NULL ||
+ eval_condition(tree->lnode))
+ (void) interpret(tree->rnode);
break;
case TAG_CONTINUE: /* NEXT statement */
return 1;
@@ -198,25 +164,20 @@ NODE *tree;
default:
cant_happen();
}
- if (t == NULL)
- break;
+ if (!traverse) /* case Node_rule_node */
+ break; /* don't loop */
}
break;
case Node_statement_list:
- for (t = tree; t != NULL; t = t->rnode) {
- DBG_P(("Statements", t->lnode));
+ for (t = tree; t != NULL; t = t->rnode)
(void) interpret(t->lnode);
- }
break;
case Node_K_if:
- DBG_P(("IF", tree->lnode));
if (eval_condition(tree->lnode)) {
- DBG_P(("True", tree->rnode->lnode));
(void) interpret(tree->rnode->lnode);
} else {
- DBG_P(("False", tree->rnode->rnode));
(void) interpret(tree->rnode->rnode);
}
break;
@@ -224,12 +185,10 @@ NODE *tree;
case Node_K_while:
PUSH_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
- DBG_P(("WHILE", tree->lnode));
stable_tree = tree;
while (eval_condition(stable_tree->lnode)) {
switch (setjmp(loop_tag)) {
case 0: /* normal non-jump */
- DBG_P(("DO", stable_tree->rnode));
(void) interpret(stable_tree->rnode);
break;
case TAG_CONTINUE: /* continue statement */
@@ -250,7 +209,6 @@ NODE *tree;
do {
switch (setjmp(loop_tag)) {
case 0: /* normal non-jump */
- DBG_P(("DO", stable_tree->rnode));
(void) interpret(stable_tree->rnode);
break;
case TAG_CONTINUE: /* continue statement */
@@ -261,25 +219,20 @@ NODE *tree;
default:
cant_happen();
}
- DBG_P(("WHILE", stable_tree->lnode));
} while (eval_condition(stable_tree->lnode));
RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
break;
case Node_K_for:
PUSH_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
- DBG_P(("FOR", tree->forloop->init));
(void) interpret(tree->forloop->init);
- DBG_P(("FOR.WHILE", tree->forloop->cond));
stable_tree = tree;
while (eval_condition(stable_tree->forloop->cond)) {
switch (setjmp(loop_tag)) {
case 0: /* normal non-jump */
- DBG_P(("FOR.DO", stable_tree->lnode));
(void) interpret(stable_tree->lnode);
/* fall through */
case TAG_CONTINUE: /* continue statement */
- DBG_P(("FOR.INCR", stable_tree->forloop->incr));
(void) interpret(stable_tree->forloop->incr);
break;
case TAG_BREAK: /* break statement */
@@ -293,68 +246,67 @@ NODE *tree;
break;
case Node_K_arrayfor:
+ {
+ volatile struct search l; /* For array_for */
+ Func_ptr after_assign = NULL;
+
#define hakvar forloop->init
#define arrvar forloop->incr
PUSH_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
- DBG_P(("AFOR.VAR", tree->hakvar));
- lhs = (volatile NODE **) get_lhs(tree->hakvar, 1);
+ lhs = (volatile NODE **) get_lhs(tree->hakvar, &after_assign);
t = tree->arrvar;
if (t->type == Node_param_list)
t = stack_ptr[t->param_cnt];
stable_tree = tree;
- for (l = assoc_scan(t); l; l = assoc_next((struct search *)l)) {
- deref = *((NODE **) lhs);
- do_deref();
- *lhs = dupnode(l->retval);
- if (field_num == 0)
- set_record(fields_arr[0]->stptr,
- fields_arr[0]->stlen);
- DBG_P(("AFOR.NEXTIS", *lhs));
+ for (assoc_scan(t, (struct search *)&l);
+ l.retval;
+ assoc_next((struct search *)&l)) {
+ unref(*((NODE **) lhs));
+ *lhs = dupnode(l.retval);
+ if (after_assign)
+ (*after_assign)();
switch (setjmp(loop_tag)) {
case 0:
- DBG_P(("AFOR.DO", stable_tree->lnode));
(void) interpret(stable_tree->lnode);
case TAG_CONTINUE:
break;
case TAG_BREAK:
RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
- field_num = -1;
return 1;
default:
cant_happen();
}
}
- field_num = -1;
RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
break;
+ }
case Node_K_break:
- DBG_P(("BREAK", NULL));
if (loop_tag_valid == 0)
fatal("unexpected break");
longjmp(loop_tag, TAG_BREAK);
break;
case Node_K_continue:
- DBG_P(("CONTINUE", NULL));
if (loop_tag_valid == 0)
fatal("unexpected continue");
longjmp(loop_tag, TAG_CONTINUE);
break;
case Node_K_print:
- DBG_P(("PRINT", tree));
do_print(tree);
break;
case Node_K_printf:
- DBG_P(("PRINTF", tree));
do_printf(tree);
break;
+ case Node_K_delete:
+ do_delete(tree->lnode, tree->rnode);
+ break;
+
case Node_K_next:
- DBG_P(("NEXT", NULL));
longjmp(rule_tag, TAG_CONTINUE);
break;
@@ -366,7 +318,6 @@ NODE *tree;
* any are executed." This implies that the rest of the rules
* are not done. So we immediately break out of the main loop.
*/
- DBG_P(("EXIT", NULL));
exiting = 1;
if (tree) {
t = tree_eval(tree->lnode);
@@ -377,7 +328,6 @@ NODE *tree;
break;
case Node_K_return:
- DBG_P(("RETURN", NULL));
t = tree_eval(tree->lnode);
ret_node = dupnode(t);
free_temp(t);
@@ -389,7 +339,6 @@ NODE *tree;
* Appears to be an expression statement. Throw away the
* value.
*/
- DBG_P(("E", NULL));
t = tree_eval(tree);
free_temp(t);
break;
@@ -397,92 +346,91 @@ NODE *tree;
return 1;
}
-/* evaluate a subtree, allocating strings on a temporary stack. */
+/* evaluate a subtree */
NODE *
r_tree_eval(tree)
-NODE *tree;
+register NODE *tree;
{
register NODE *r, *t1, *t2; /* return value & temporary subtrees */
- int i;
register NODE **lhs;
- int di;
- AWKNUM x, x2;
+ register int di;
+ AWKNUM x, x1, x2;
long lx;
- extern NODE **fields_arr;
+#ifdef CRAY
+ long lx2;
+#endif
- source = tree->source_file;
- sourceline = tree->source_line;
+#ifdef DEBUG
+ if (tree == NULL)
+ return Nnull_string;
+ if (tree->type == Node_val) {
+ if (tree->stref <= 0) cant_happen();
+ return tree;
+ }
+ if (tree->type == Node_var) {
+ if (tree->var_value->stref <= 0) cant_happen();
+ return tree->var_value;
+ }
+ if (tree->type == Node_param_list)
+ return (stack_ptr[(_t)->param_cnt])->var_value;
+#endif
switch (tree->type) {
case Node_and:
- DBG_P(("AND", tree));
return tmp_number((AWKNUM) (eval_condition(tree->lnode)
&& eval_condition(tree->rnode)));
case Node_or:
- DBG_P(("OR", tree));
return tmp_number((AWKNUM) (eval_condition(tree->lnode)
|| eval_condition(tree->rnode)));
case Node_not:
- DBG_P(("NOT", tree));
return tmp_number((AWKNUM) ! eval_condition(tree->lnode));
/* Builtins */
case Node_builtin:
- DBG_P(("builtin", tree));
return ((*tree->proc) (tree->subnode));
case Node_K_getline:
- DBG_P(("GETLINE", tree));
return (do_getline(tree));
case Node_in_array:
- DBG_P(("IN_ARRAY", tree));
return tmp_number((AWKNUM) in_array(tree->lnode, tree->rnode));
case Node_func_call:
- DBG_P(("func_call", tree));
return func_call(tree->rnode, tree->lnode);
- case Node_K_delete:
- DBG_P(("DELETE", tree));
- do_delete(tree->lnode, tree->rnode);
- return Nnull_string;
-
/* unary operations */
-
- case Node_var:
- case Node_var_array:
- case Node_param_list:
- case Node_subscript:
+ case Node_NR:
+ case Node_FNR:
+ case Node_NF:
+ case Node_FIELDWIDTHS:
+ case Node_FS:
+ case Node_RS:
case Node_field_spec:
- DBG_P(("var_type ref", tree));
- lhs = get_lhs(tree, 0);
- field_num = -1;
- deref = 0;
+ case Node_subscript:
+ case Node_IGNORECASE:
+ case Node_OFS:
+ case Node_ORS:
+ case Node_OFMT:
+ case Node_CONVFMT:
+ lhs = get_lhs(tree, (Func_ptr *)0);
return *lhs;
case Node_unary_minus:
- DBG_P(("UMINUS", tree));
t1 = tree_eval(tree->subnode);
x = -force_number(t1);
free_temp(t1);
return tmp_number(x);
case Node_cond_exp:
- DBG_P(("?:", tree));
- if (eval_condition(tree->lnode)) {
- DBG_P(("True", tree->rnode->lnode));
+ if (eval_condition(tree->lnode))
return tree_eval(tree->rnode->lnode);
- }
- DBG_P(("False", tree->rnode->rnode));
return tree_eval(tree->rnode->rnode);
case Node_match:
case Node_nomatch:
case Node_regex:
- DBG_P(("[no]match_op", tree));
return match_op(tree);
case Node_func:
@@ -490,18 +438,55 @@ NODE *tree;
tree->lnode->param,
"or used in other expression context");
- /* assignments */
+ /* assignments */
case Node_assign:
- DBG_P(("ASSIGN", tree));
+ {
+ Func_ptr after_assign = NULL;
+
r = tree_eval(tree->rnode);
- lhs = get_lhs(tree->lnode, 1);
+ lhs = get_lhs(tree->lnode, &after_assign);
+ unref(*lhs);
*lhs = dupnode(r);
free_temp(r);
- do_deref();
- if (field_num == 0)
- set_record(fields_arr[0]->stptr, fields_arr[0]->stlen);
- field_num = -1;
+ if (after_assign)
+ (*after_assign)();
return *lhs;
+ }
+
+ case Node_concat:
+ {
+#define STACKSIZE 10
+ NODE *stack[STACKSIZE];
+ register NODE **sp;
+ register int len;
+ char *str;
+ register char *dest;
+
+ sp = stack;
+ len = 0;
+ while (tree->type == Node_concat) {
+ *sp = force_string(tree_eval(tree->lnode));
+ tree = tree->rnode;
+ len += (*sp)->stlen;
+ if (++sp == &stack[STACKSIZE-2]) /* one more and NULL */
+ break;
+ }
+ *sp = force_string(tree_eval(tree));
+ len += (*sp)->stlen;
+ *++sp = NULL;
+ emalloc(str, char *, len+2, "tree_eval");
+ dest = str;
+ sp = stack;
+ while (*sp) {
+ memcpy(dest, (*sp)->stptr, (*sp)->stlen);
+ dest += (*sp)->stlen;
+ free_temp(*sp);
+ sp++;
+ }
+ r = make_str_node(str, len, ALREADY_MALLOCED);
+ r->flags |= TEMP;
+ }
+ return r;
/* other assignment types are easier because they are numeric */
case Node_preincrement:
@@ -524,22 +509,6 @@ NODE *tree;
t2 = tree_eval(tree->rnode);
switch (tree->type) {
- case Node_concat:
- DBG_P(("CONCAT", tree));
- t1 = force_string(t1);
- t2 = force_string(t2);
-
- r = newnode(Node_val);
- r->flags |= (STR|TEMP);
- r->stlen = t1->stlen + t2->stlen;
- r->stref = 1;
- emalloc(r->stptr, char *, r->stlen + 1, "tree_eval");
- memcpy(r->stptr, t1->stptr, t1->stlen);
- memcpy(r->stptr + t1->stlen, t2->stptr, t2->stlen + 1);
- free_temp(t1);
- free_temp(t2);
- return r;
-
case Node_geq:
case Node_leq:
case Node_greater:
@@ -551,22 +520,16 @@ NODE *tree;
free_temp(t2);
switch (tree->type) {
case Node_equal:
- DBG_P(("EQUAL", tree));
return tmp_number((AWKNUM) (di == 0));
case Node_notequal:
- DBG_P(("NOT_EQUAL", tree));
return tmp_number((AWKNUM) (di != 0));
case Node_less:
- DBG_P(("LESS_THAN", tree));
return tmp_number((AWKNUM) (di < 0));
case Node_greater:
- DBG_P(("GREATER_THAN", tree));
return tmp_number((AWKNUM) (di > 0));
case Node_leq:
- DBG_P(("LESS_THAN_EQUAL", tree));
return tmp_number((AWKNUM) (di <= 0));
case Node_geq:
- DBG_P(("GREATER_THAN_EQUAL", tree));
return tmp_number((AWKNUM) (di >= 0));
default:
cant_happen();
@@ -576,74 +539,57 @@ NODE *tree;
break; /* handled below */
}
- (void) force_number(t1);
- (void) force_number(t2);
-
+ x1 = force_number(t1);
+ free_temp(t1);
+ x2 = force_number(t2);
+ free_temp(t2);
switch (tree->type) {
case Node_exp:
- DBG_P(("EXPONENT", tree));
- if ((lx = t2->numbr) == t2->numbr) { /* integer exponent */
+ if ((lx = x2) == x2) { /* integer exponent */
if (lx == 0)
x = 1;
else if (lx == 1)
- x = t1->numbr;
+ x = x1;
else {
/* doing it this way should be more precise */
- for (x = x2 = t1->numbr; --lx; )
- x *= x2;
+ for (x = x1; --lx; )
+ x *= x1;
}
} else
- x = pow((double) t1->numbr, (double) t2->numbr);
- free_temp(t1);
- free_temp(t2);
+ x = pow((double) x1, (double) x2);
return tmp_number(x);
case Node_times:
- DBG_P(("MULT", tree));
- x = t1->numbr * t2->numbr;
- free_temp(t1);
- free_temp(t2);
- return tmp_number(x);
+ return tmp_number(x1 * x2);
case Node_quotient:
- DBG_P(("DIVIDE", tree));
- x = t2->numbr;
- free_temp(t2);
- if (x == (AWKNUM) 0)
+ if (x2 == 0)
fatal("division by zero attempted");
- /* NOTREACHED */
- else {
- x = t1->numbr / x;
- free_temp(t1);
- return tmp_number(x);
- }
+#ifdef _CRAY
+ /*
+ * special case for integer division, put in for Cray
+ */
+ lx2 = x2;
+ if (lx2 == 0)
+ return tmp_number(x1 / x2);
+ lx = (long) x1 / lx2;
+ if (lx * x2 == x1)
+ return tmp_number((AWKNUM) lx);
+ else
+#endif
+ return tmp_number(x1 / x2);
case Node_mod:
- DBG_P(("MODULUS", tree));
- x = t2->numbr;
- free_temp(t2);
- if (x == (AWKNUM) 0)
+ if (x2 == 0)
fatal("division by zero attempted in mod");
- /* NOTREACHED */
- lx = t1->numbr / x; /* assignment to long truncates */
- x2 = lx * x;
- x = t1->numbr - x2;
- free_temp(t1);
- return tmp_number(x);
+ (void) modf(x1 / x2, &x);
+ return tmp_number(x1 - x * x2);
case Node_plus:
- DBG_P(("PLUS", tree));
- x = t1->numbr + t2->numbr;
- free_temp(t1);
- free_temp(t2);
- return tmp_number(x);
+ return tmp_number(x1 + x2);
case Node_minus:
- DBG_P(("MINUS", tree));
- x = t1->numbr - t2->numbr;
- free_temp(t1);
- free_temp(t2);
- return tmp_number(x);
+ return tmp_number(x1 - x2);
default:
fatal("illegal type (%d) in tree_eval", tree->type);
@@ -651,48 +597,13 @@ NODE *tree;
return 0;
}
-/*
- * This makes numeric operations slightly more efficient. Just change the
- * value of a numeric node, if possible
- */
-void
-assign_number(ptr, value)
-NODE **ptr;
-AWKNUM value;
-{
- extern NODE *deref;
- register NODE *n = *ptr;
-
-#ifdef DEBUG
- if (n->type != Node_val)
- cant_happen();
-#endif
- if (n == Nnull_string) {
- *ptr = make_number(value);
- deref = 0;
- return;
- }
- if (n->stref > 1) {
- *ptr = make_number(value);
- return;
- }
- if ((n->flags & STR) && (n->flags & (MALLOC|TEMP)))
- free(n->stptr);
- n->numbr = value;
- n->flags |= (NUM|NUMERIC);
- n->flags &= ~STR;
- n->stref = 0;
- deref = 0;
-}
-
-
/* Is TREE true or false? Returns 0==false, non-zero==true */
static int
eval_condition(tree)
-NODE *tree;
+register NODE *tree;
{
register NODE *t1;
- int ret;
+ register int ret;
if (tree == NULL) /* Null trees are the easiest kinds */
return 1;
@@ -727,6 +638,8 @@ NODE *tree;
*/
t1 = tree_eval(tree);
+ if (t1->flags & MAYBE_NUM)
+ (void) force_number(t1);
if (t1->flags & NUMERIC)
ret = t1->numbr != 0.0;
else
@@ -735,79 +648,84 @@ NODE *tree;
return ret;
}
+/*
+ * compare two nodes, returning negative, 0, positive
+ */
int
cmp_nodes(t1, t2)
-NODE *t1, *t2;
+register NODE *t1, *t2;
{
- AWKNUM d;
- AWKNUM d1;
- AWKNUM d2;
- int ret;
- int len1, len2;
+ AWKNUM diff;
+ register int ret;
+ register int len1, len2;
+ int donum;
if (t1 == t2)
return 0;
- d1 = force_number(t1);
- d2 = force_number(t2);
+ if (t1->flags & MAYBE_NUM)
+ (void) force_number(t1);
+ if (t2->flags & MAYBE_NUM)
+ (void) force_number(t2);
+#ifdef maybe
if ((t1->flags & NUMERIC) && (t2->flags & NUMERIC)) {
- d = d1 - d2;
- if (d == 0.0) /* from profiling, this is most common */
- return 0;
- if (d > 0.0)
- return 1;
- return -1;
+#else
+ donum = 0;
+ if ((t1->flags & NUMBER)) {
+ (void) force_number(t2);
+ if (t2->flags & NUMERIC)
+ donum = 1;
+ } else if ((t2->flags & NUMBER)) {
+ (void) force_number(t1);
+ if (t1->flags & NUMERIC)
+ donum = 1;
+ }
+ if (donum) {
+#endif
+ diff = t1->numbr - t2->numbr;
+ if (diff == 0) return 0;
+ else if (diff < 0) return -1;
+ else return 1;
}
- t1 = force_string(t1);
- t2 = force_string(t2);
+ (void) force_string(t1);
+ (void) force_string(t2);
len1 = t1->stlen;
len2 = t2->stlen;
- if (len1 == 0) {
- if (len2 == 0)
- return 0;
- else
- return -1;
- } else if (len2 == 0)
- return 1;
+ if (len1 == 0 || len2 == 0)
+ return len1 - len2;
ret = memcmp(t1->stptr, t2->stptr, len1 <= len2 ? len1 : len2);
- if (ret == 0 && len1 != len2)
- return len1 < len2 ? -1: 1;
- return ret;
+ return ret == 0 ? len1-len2 : ret;
}
static NODE *
op_assign(tree)
-NODE *tree;
+register NODE *tree;
{
AWKNUM rval, lval;
NODE **lhs;
AWKNUM t1, t2;
long ltemp;
NODE *tmp;
+ Func_ptr after_assign = NULL;
- lhs = get_lhs(tree->lnode, 1);
+ lhs = get_lhs(tree->lnode, &after_assign);
lval = force_number(*lhs);
+ unref(*lhs);
switch(tree->type) {
case Node_preincrement:
case Node_predecrement:
- DBG_P(("+-X", tree));
- assign_number(lhs,
- lval + (tree->type == Node_preincrement ? 1.0 : -1.0));
- do_deref();
- if (field_num == 0)
- set_record(fields_arr[0]->stptr, fields_arr[0]->stlen);
- field_num = -1;
+ *lhs = make_number(lval +
+ (tree->type == Node_preincrement ? 1.0 : -1.0));
+ if (after_assign)
+ (*after_assign)();
return *lhs;
case Node_postincrement:
case Node_postdecrement:
- DBG_P(("X+-", tree));
- assign_number(lhs,
- lval + (tree->type == Node_postincrement ? 1.0 : -1.0));
- do_deref();
- if (field_num == 0)
- set_record(fields_arr[0]->stptr, fields_arr[0]->stlen);
- field_num = -1;
+ *lhs = make_number(lval +
+ (tree->type == Node_postincrement ? 1.0 : -1.0));
+ if (after_assign)
+ (*after_assign)();
return tmp_number(lval);
default:
break; /* handled below */
@@ -818,60 +736,65 @@ NODE *tree;
free_temp(tmp);
switch(tree->type) {
case Node_assign_exp:
- DBG_P(("ASSIGN_exp", tree));
if ((ltemp = rval) == rval) { /* integer exponent */
if (ltemp == 0)
- assign_number(lhs, (AWKNUM) 1);
+ *lhs = make_number((AWKNUM) 1);
else if (ltemp == 1)
- assign_number(lhs, lval);
+ *lhs = make_number(lval);
else {
/* doing it this way should be more precise */
for (t1 = t2 = lval; --ltemp; )
t1 *= t2;
- assign_number(lhs, t1);
+ *lhs = make_number(t1);
}
} else
- assign_number(lhs, (AWKNUM) pow((double) lval, (double) rval));
+ *lhs = make_number((AWKNUM) pow((double) lval, (double) rval));
break;
case Node_assign_times:
- DBG_P(("ASSIGN_times", tree));
- assign_number(lhs, lval * rval);
+ *lhs = make_number(lval * rval);
break;
case Node_assign_quotient:
- DBG_P(("ASSIGN_quotient", tree));
if (rval == (AWKNUM) 0)
fatal("division by zero attempted in /=");
- assign_number(lhs, lval / rval);
+#ifdef _CRAY
+ /*
+ * special case for integer division, put in for Cray
+ */
+ ltemp = rval;
+ if (ltemp == 0) {
+ *lhs = make_number(lval / rval);
+ break;
+ }
+ ltemp = (long) lval / ltemp;
+ if (ltemp * lval == rval)
+ *lhs = make_number((AWKNUM) ltemp);
+ else
+#endif
+ *lhs = make_number(lval / rval);
break;
case Node_assign_mod:
- DBG_P(("ASSIGN_mod", tree));
if (rval == (AWKNUM) 0)
fatal("division by zero attempted in %=");
- ltemp = lval / rval; /* assignment to long truncates */
- t1 = ltemp * rval;
- t2 = lval - t1;
- assign_number(lhs, t2);
+ (void) modf(lval / rval, &t1);
+ t2 = lval - rval * t1;
+ *lhs = make_number(t2);
break;
case Node_assign_plus:
- DBG_P(("ASSIGN_plus", tree));
- assign_number(lhs, lval + rval);
+ *lhs = make_number(lval + rval);
break;
case Node_assign_minus:
- DBG_P(("ASSIGN_minus", tree));
- assign_number(lhs, lval - rval);
+ *lhs = make_number(lval - rval);
break;
default:
cant_happen();
}
- do_deref();
- if (field_num == 0)
- set_record(fields_arr[0]->stptr, fields_arr[0]->stlen);
- field_num = -1;
+ if (after_assign)
+ (*after_assign)();
return *lhs;
}
@@ -888,21 +811,22 @@ NODE *arg_list; /* Node_expression_list of calling args. */
volatile jmp_buf loop_tag_stack;
volatile int save_loop_tag_valid = 0;
volatile NODE **save_stack, *save_ret_node;
- NODE **local_stack, **sp;
+ NODE **local_stack = NULL, **sp;
int count;
extern NODE *ret_node;
/*
* retrieve function definition node
*/
- f = lookup(variables, name->stptr);
+ f = lookup(name->stptr);
if (!f || f->type != Node_func)
fatal("function `%s' not defined", name->stptr);
#ifdef FUNC_TRACE
fprintf(stderr, "function %s called\n", name->stptr);
#endif
count = f->lnode->param_cnt;
- emalloc(local_stack, NODE **, count * sizeof(NODE *), "func_call");
+ if (count)
+ emalloc(local_stack, NODE **, count*sizeof(NODE *), "func_call");
sp = local_stack;
/*
@@ -910,7 +834,8 @@ NODE *arg_list; /* Node_expression_list of calling args. */
*/
for (argp = arg_list; count && argp != NULL; argp = argp->rnode) {
arg = argp->lnode;
- r = newnode(Node_var);
+ getnode(r);
+ r->type = Node_var;
/*
* call by reference for arrays; see below also
*/
@@ -935,7 +860,8 @@ NODE *arg_list; /* Node_expression_list of calling args. */
* add remaining params. on stack with null value
*/
while (count-- > 0) {
- r = newnode(Node_var);
+ getnode(r);
+ r->type = Node_var;
r->lnode = Nnull_string;
r->rnode = (NODE *) NULL;
*sp++ = r;
@@ -982,23 +908,24 @@ NODE *arg_list; /* Node_expression_list of calling args. */
count = f->lnode->param_cnt;
for (argp = arg_list; count > 0 && argp != NULL; argp = argp->rnode) {
arg = argp->lnode;
+ if (arg->type == Node_param_list)
+ arg = stack_ptr[arg->param_cnt];
n = *sp++;
if (arg->type == Node_var && n->type == Node_var_array) {
arg->var_array = n->var_array;
arg->type = Node_var_array;
}
- deref = n->lnode;
- do_deref();
+ unref(n->lnode);
freenode(n);
count--;
}
while (count-- > 0) {
n = *sp++;
- deref = n->lnode;
- do_deref();
+ unref(n->lnode);
freenode(n);
}
- free((char *) local_stack);
+ if (local_stack)
+ free((char *) local_stack);
/* Restore the loop_tag stuff if necessary. */
if (save_loop_tag_valid) {
@@ -1020,120 +947,237 @@ NODE *arg_list; /* Node_expression_list of calling args. */
NODE **
get_lhs(ptr, assign)
-NODE *ptr;
-int assign; /* this is being called for the LHS of an assign. */
+register NODE *ptr;
+Func_ptr *assign;
{
- register NODE **aptr;
- NODE *n;
+ register NODE **aptr = NULL;
+ register NODE *n;
-#ifdef DEBUG
- if (ptr == NULL)
- cant_happen();
-#endif
- deref = NULL;
- field_num = -1;
switch (ptr->type) {
- case Node_var:
case Node_var_array:
- if (ptr == NF_node && (int) NF_node->var_value->numbr == -1)
- (void) get_field(HUGE-1, assign); /* parse record */
- deref = ptr->var_value;
+ fatal("attempt to use an array in a scalar context");
+ case Node_var:
+ aptr = &(ptr->var_value);
#ifdef DEBUG
- if (deref->type != Node_val)
- cant_happen();
- if (deref->flags == 0)
+ if (ptr->var_value->stref <= 0)
cant_happen();
#endif
- return &(ptr->var_value);
+ break;
+
+ case Node_FIELDWIDTHS:
+ aptr = &(FIELDWIDTHS_node->var_value);
+ if (assign)
+ *assign = set_FIELDWIDTHS;
+ break;
+
+ case Node_RS:
+ aptr = &(RS_node->var_value);
+ if (assign)
+ *assign = set_RS;
+ break;
+
+ case Node_FS:
+ aptr = &(FS_node->var_value);
+ if (assign)
+ *assign = set_FS;
+ break;
+
+ case Node_FNR:
+ unref(FNR_node->var_value);
+ FNR_node->var_value = make_number((AWKNUM) FNR);
+ aptr = &(FNR_node->var_value);
+ if (assign)
+ *assign = set_FNR;
+ break;
+
+ case Node_NR:
+ unref(NR_node->var_value);
+ NR_node->var_value = make_number((AWKNUM) NR);
+ aptr = &(NR_node->var_value);
+ if (assign)
+ *assign = set_NR;
+ break;
+
+ case Node_NF:
+ if (NF == -1)
+ (void) get_field(HUGE-1, assign); /* parse record */
+ unref(NF_node->var_value);
+ NF_node->var_value = make_number((AWKNUM) NF);
+ aptr = &(NF_node->var_value);
+ if (assign)
+ *assign = set_NF;
+ break;
+
+ case Node_IGNORECASE:
+ unref(IGNORECASE_node->var_value);
+ IGNORECASE_node->var_value = make_number((AWKNUM) IGNORECASE);
+ aptr = &(IGNORECASE_node->var_value);
+ if (assign)
+ *assign = set_IGNORECASE;
+ break;
+
+ case Node_OFMT:
+ aptr = &(OFMT_node->var_value);
+ if (assign)
+ *assign = set_OFMT;
+ break;
+
+ case Node_CONVFMT:
+ aptr = &(CONVFMT_node->var_value);
+ if (assign)
+ *assign = set_CONVFMT;
+ break;
+
+ case Node_ORS:
+ aptr = &(ORS_node->var_value);
+ if (assign)
+ *assign = set_ORS;
+ break;
+
+ case Node_OFS:
+ aptr = &(OFS_node->var_value);
+ if (assign)
+ *assign = set_OFS;
+ break;
case Node_param_list:
- n = stack_ptr[ptr->param_cnt];
- deref = n->var_value;
-#ifdef DEBUG
- if (deref->type != Node_val)
- cant_happen();
- if (deref->flags == 0)
- cant_happen();
-#endif
- return &(n->var_value);
+ aptr = &(stack_ptr[ptr->param_cnt]->var_value);
+ break;
case Node_field_spec:
+ {
+ int field_num;
+
n = tree_eval(ptr->lnode);
field_num = (int) force_number(n);
free_temp(n);
if (field_num < 0)
fatal("attempt to access field %d", field_num);
+ if (field_num == 0 && field0_valid) { /* short circuit */
+ aptr = &fields_arr[0];
+ if (assign)
+ *assign = reset_record;
+ break;
+ }
aptr = get_field(field_num, assign);
- deref = *aptr;
- return aptr;
-
+ break;
+ }
case Node_subscript:
n = ptr->lnode;
if (n->type == Node_param_list)
n = stack_ptr[n->param_cnt];
aptr = assoc_lookup(n, concat_exp(ptr->rnode));
- deref = *aptr;
-#ifdef DEBUG
- if (deref->type != Node_val)
- cant_happen();
- if (deref->flags == 0)
- cant_happen();
-#endif
- return aptr;
+ break;
+
case Node_func:
fatal ("`%s' is a function, assignment is not allowed",
ptr->lnode->param);
default:
cant_happen();
}
- return 0;
+ return aptr;
}
static NODE *
match_op(tree)
-NODE *tree;
+register NODE *tree;
{
- NODE *t1;
- struct re_pattern_buffer *rp;
+ register NODE *t1;
+ register Regexp *rp;
int i;
int match = 1;
if (tree->type == Node_nomatch)
match = 0;
if (tree->type == Node_regex)
- t1 = WHOLELINE;
+ t1 = *get_field(0, (Func_ptr *) 0);
else {
- if (tree->lnode)
- t1 = force_string(tree_eval(tree->lnode));
- else
- t1 = WHOLELINE;
+ t1 = force_string(tree_eval(tree->lnode));
tree = tree->rnode;
}
- if (tree->type == Node_regex) {
- rp = tree->rereg;
- if (!strict && ((IGNORECASE_node->var_value->numbr != 0)
- ^ (tree->re_case != 0))) {
- /* recompile since case sensitivity differs */
- rp = tree->rereg =
- mk_re_parse(tree->re_text,
- (IGNORECASE_node->var_value->numbr != 0));
- tree->re_case =
- (IGNORECASE_node->var_value->numbr != 0);
- }
- } else {
- rp = make_regexp(force_string(tree_eval(tree)),
- (IGNORECASE_node->var_value->numbr != 0));
- if (rp == NULL)
- cant_happen();
- }
- i = re_search(rp, t1->stptr, t1->stlen, 0, t1->stlen,
- (struct re_registers *) NULL);
+ rp = re_update(tree);
+ i = research(rp, t1->stptr, t1->stlen, 0);
i = (i == -1) ^ (match == 1);
free_temp(t1);
- if (tree->type != Node_regex) {
- free(rp->buffer);
- free(rp->fastmap);
- free((char *) rp);
- }
return tmp_number((AWKNUM) i);
}
+
+void
+set_IGNORECASE()
+{
+ static int warned = 0;
+
+ if ((do_lint || strict) && ! warned) {
+ warned = 1;
+ warning("IGNORECASE not supported in compatibility mode");
+ }
+ IGNORECASE = (force_number(IGNORECASE_node->var_value) != 0.0);
+}
+
+void
+set_OFS()
+{
+ OFS = force_string(OFS_node->var_value)->stptr;
+ OFSlen = OFS_node->var_value->stlen;
+ OFS[OFSlen] = '\0';
+}
+
+void
+set_ORS()
+{
+ ORS = force_string(ORS_node->var_value)->stptr;
+ ORSlen = ORS_node->var_value->stlen;
+ ORS[ORSlen] = '\0';
+}
+
+static NODE **fmt_list = NULL;
+
+static int
+fmt_ok(n)
+NODE *n;
+{
+ /* to be done later */
+ return 1;
+}
+
+static int
+fmt_index(n)
+NODE *n;
+{
+ register int ix = 0;
+ static int fmt_num = 4;
+ static int fmt_hiwater = 0;
+
+ if (fmt_list == NULL)
+ emalloc(fmt_list, NODE **, fmt_num*sizeof(*fmt_list), "fmt_index");
+ (void) force_string(n);
+ while (ix < fmt_hiwater) {
+ if (cmp_nodes(fmt_list[ix], n) == 0)
+ return ix;
+ ix++;
+ }
+ /* not found */
+ n->stptr[n->stlen] = '\0';
+ if (!fmt_ok(n))
+ warning("bad FMT specification");
+ if (fmt_hiwater >= fmt_num) {
+ fmt_num *= 2;
+ emalloc(fmt_list, NODE **, fmt_num, "fmt_index");
+ }
+ fmt_list[fmt_hiwater] = dupnode(n);
+ return fmt_hiwater++;
+}
+
+void
+set_OFMT()
+{
+ OFMTidx = fmt_index(OFMT_node->var_value);
+ OFMT = fmt_list[OFMTidx]->stptr;
+}
+
+void
+set_CONVFMT()
+{
+ CONVFMTidx = fmt_index(CONVFMT_node->var_value);
+ CONVFMT = fmt_list[CONVFMTidx]->stptr;
+}
diff --git a/field.c b/field.c
index f457f427..10210a50 100644
--- a/field.c
+++ b/field.c
@@ -3,7 +3,7 @@
*/
/*
- * Copyright (C) 1986, 1988, 1989 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@@ -25,44 +25,69 @@
#include "awk.h"
-extern void assoc_clear();
-extern int a_get_three();
-extern int get_rs();
-
-static char *get_fs();
-static int re_split();
-static int parse_fields();
-static void set_element();
-
-char *line_buf = NULL; /* holds current input line */
+static int (*parse_field) P((int, char **, int, char *,
+ Regexp *, void (*)(), NODE *));
+static void rebuild_record P((void));
+static int re_parse_field P((int, char **, int, char *,
+ Regexp *, void (*)(), NODE *));
+static int def_parse_field P((int, char **, int, char *,
+ Regexp *, void (*)(), NODE *));
+static int sc_parse_field P((int, char **, int, char *,
+ Regexp *, void (*)(), NODE *));
+static int fw_parse_field P((int, char **, int, char *,
+ Regexp *, void (*)(), NODE *));
+static void set_element P((int, char *, int, NODE *));
+
+static Regexp *FS_regexp = NULL;
static char *parse_extent; /* marks where to restart parse of record */
static int parse_high_water=0; /* field number that we have parsed so far */
-static char f_empty[] = "";
-static char *save_fs = " "; /* save current value of FS when line is read,
+static int nf_high_water = 0; /* size of fields_arr */
+static char f_empty[] = "\0";
+static int resave_fs;
+static NODE *save_FS;
+static char *save_fs; /* save current value of FS when line is read,
* to be used in deferred parsing
*/
-
NODE **fields_arr; /* array of pointers to the field nodes */
-NODE node0; /* node for $0 which never gets free'd */
-int node0_valid = 1; /* $(>0) has not been changed yet */
+int field0_valid = 1; /* $(>0) has not been changed yet */
+NODE *field0;
+static NODE **nodes; /* permanent repository of field nodes */
+static int *FIELDWIDTHS = NULL;
void
init_fields()
{
emalloc(fields_arr, NODE **, sizeof(NODE *), "init_fields");
- node0.type = Node_val;
- node0.stref = 0;
- node0.stptr = "";
- node0.flags = (STR|PERM); /* never free buf */
- fields_arr[0] = &node0;
+ emalloc(nodes, NODE **, sizeof(NODE *), "init_fields");
+ emalloc(field0, NODE *, sizeof(NODE), "init_fields");
+ field0->type = Node_val;
+ field0->stref = 0;
+ field0->stptr = "";
+ field0->flags = (STRING|STR|PERM); /* never free buf */
+ fields_arr[0] = field0;
+ save_FS = dupnode(FS_node->var_value);
+ save_fs = save_FS->stptr;
}
-/*
- * Danger! Must only be called for fields we know have just been blanked, or
- * fields we know don't exist yet.
- */
+static void
+grow_fields_arr(num)
+int num;
+{
+ register int t;
+ register NODE *n;
+
+ erealloc(fields_arr, NODE **, (num + 1) * sizeof(NODE *), "set_field");
+ erealloc(nodes, NODE **, (num+1) * sizeof(NODE *), "set_field");
+ for (t = nf_high_water+1; t <= num; t++) {
+ getnode(n);
+ n->type = Node_val;
+ nodes[t] = n;
+ fields_arr[t] = nodes[t];
+ }
+ nf_high_water = num;
+}
/*ARGSUSED*/
static void
@@ -72,21 +97,16 @@ char *str;
int len;
NODE *dummy; /* not used -- just to make interface same as set_element */
{
- NODE *n;
- int t;
- static int nf_high_water = 0;
-
- if (num > nf_high_water) {
- erealloc(fields_arr, NODE **, (num + 1) * sizeof(NODE *), "set_field");
- nf_high_water = num;
- }
- /* fill in fields that don't exist */
- for (t = parse_high_water + 1; t < num; t++)
- fields_arr[t] = Nnull_string;
- n = make_string(str, len);
- (void) force_number(n);
+ register NODE *n;
+ register int t;
+
+ if (num > nf_high_water)
+ grow_fields_arr(num);
+ n = nodes[num];
+ n->stptr = str;
+ n->stlen = len;
+ n->flags = (PERM|STR|STRING|MAYBE_NUM);
fields_arr[num] = n;
- parse_high_water = num;
}
/* Someone assigned a value to $(something). Fix up $0 to be right */
@@ -104,17 +124,17 @@ rebuild_record()
tlen = 0;
ofs = force_string(OFS_node->var_value);
ofslen = ofs->stlen;
- ptr = &fields_arr[parse_high_water];
+ ptr = &fields_arr[NF];
while (ptr > &fields_arr[0]) {
tmp = force_string(*ptr);
tlen += tmp->stlen;
ptr--;
}
- tlen += (parse_high_water - 1) * ofslen;
- emalloc(ops, char *, tlen + 1, "fix_fields");
+ tlen += (NF - 1) * ofslen;
+ emalloc(ops, char *, tlen + 2, "fix_fields");
cops = ops;
ops[0] = '\0';
- for (ptr = &fields_arr[1]; ptr <= &fields_arr[parse_high_water]; ptr++) {
+ for (ptr = &fields_arr[1]; ptr <= &fields_arr[NF]; ptr++) {
tmp = *ptr;
if (tmp->stlen == 1)
*cops++ = tmp->stptr[0];
@@ -122,7 +142,7 @@ rebuild_record()
memcpy(cops, tmp->stptr, tmp->stlen);
cops += tmp->stlen;
}
- if (ptr != &fields_arr[parse_high_water]) {
+ if (ptr != &fields_arr[NF]) {
if (ofslen == 1)
*cops++ = ofs->stptr[0];
else if (ofslen != 0) {
@@ -131,11 +151,10 @@ rebuild_record()
}
}
}
- tmp = make_string(ops, tlen);
- free(ops);
- deref = fields_arr[0];
- do_deref();
+ tmp = make_str_node(ops, tlen, ALREADY_MALLOCED);
+ unref(fields_arr[0]);
fields_arr[0] = tmp;
+ field0_valid = 1;
}
/*
@@ -143,36 +162,231 @@ rebuild_record()
* or to NF. At that point, parse only as much as necessary.
*/
void
-set_record(buf, cnt)
+set_record(buf, cnt, freeold)
char *buf;
int cnt;
+int freeold;
{
register int i;
- assign_number(&NF_node->var_value, (AWKNUM)-1);
+ NF = -1;
for (i = 1; i <= parse_high_water; i++) {
- deref = fields_arr[i];
- do_deref();
+ unref(fields_arr[i]);
}
parse_high_water = 0;
- node0_valid = 1;
- if (buf == line_buf) {
- deref = fields_arr[0];
- do_deref();
- save_fs = get_fs();
- node0.type = Node_val;
- node0.stptr = buf;
- node0.stlen = cnt;
- node0.stref = 1;
- node0.flags = (STR|PERM); /* never free buf */
- fields_arr[0] = &node0;
+ if (freeold) {
+ unref(fields_arr[0]);
+ if (resave_fs) {
+ resave_fs = 0;
+ unref(save_FS);
+ save_FS = dupnode(FS_node->var_value);
+ save_fs = save_FS->stptr;
+ }
+ field0->stptr = buf;
+ field0->stlen = cnt;
+ field0->stref = 1;
+ field0->flags = (STRING|STR|PERM|MAYBE_NUM);
+ fields_arr[0] = field0;
+ }
+ fields_arr[0]->flags |= MAYBE_NUM;
+ field0_valid = 1;
+}
+
+void
+reset_record()
+{
+ (void) force_string(fields_arr[0]);
+ set_record(fields_arr[0]->stptr, fields_arr[0]->stlen, 0);
+}
+
+void
+set_NF()
+{
+ NF = (int) force_number(NF_node->var_value);
+ field0_valid = 0;
+}
+
+/*
+ * this is called both from get_field() and from do_split()
+ * via (*parse_field)(). This variation is for when FS is a regular
+ * expression -- either user-defined or because RS=="" and FS==" "
+ */
+static int
+re_parse_field(up_to, buf, len, fs, rp, set, n)
+int up_to; /* parse only up to this field number */
+char **buf; /* on input: string to parse; on output: point to start next */
+int len;
+register char *fs;
+Regexp *rp;
+void (*set) (); /* routine to set the value of the parsed field */
+NODE *n;
+{
+ register char *scan = *buf;
+ register int nf = parse_high_water;
+ register char *field;
+ register char *end = scan + len;
+ char *cp;
+
+ if (up_to == HUGE)
+ nf = 0;
+ if (len == 0)
+ return nf;
+
+ cp = FS_node->var_value->stptr;
+ if (*RS == 0 && *cp == ' ' && *(cp+1) == '\0') {
+ while (scan < end
+ && (*scan == '\n' || *scan == ' ' || *scan == '\t'))
+ scan++;
+ }
+ field = scan;
+ while (scan < end
+ && research(rp, scan, (int)(end - scan), 1) != -1
+ && nf < up_to) {
+ if (REEND(rp, scan) == RESTART(rp, scan)) { /* null match */
+ scan++;
+ if (scan == end) {
+ (*set)(++nf, field, scan - field, n);
+ up_to = nf;
+ break;
+ }
+ continue;
+ }
+ (*set)(++nf, field, RESTART(rp, scan), n);
+ scan += REEND(rp, scan);
+ field = scan;
+ }
+ if (nf != up_to && *RS != 0 && scan < end) {
+ (*set)(++nf, scan, (int)(end - scan), n);
+ scan = end;
+ }
+ *buf = scan;
+ return (nf);
+}
+
+/*
+ * this is called both from get_field() and from do_split()
+ * via (*parse_field)(). This variation is for when FS is a single space
+ * character.
+ */
+static int
+def_parse_field(up_to, buf, len, fs, rp, set, n)
+int up_to; /* parse only up to this field number */
+char **buf; /* on input: string to parse; on output: point to start next */
+int len;
+register char *fs;
+Regexp *rp;
+void (*set) (); /* routine to set the value of the parsed field */
+NODE *n;
+{
+ register char *scan = *buf;
+ register int nf = parse_high_water;
+ register char *field;
+ register char *end = scan + len;
+
+ if (up_to == HUGE)
+ nf = 0;
+ if (len == 0)
+ return nf;
+
+ *end = ' '; /* sentinel character */
+ for (; nf < up_to; scan++) {
+ /*
+ * special case: fs is single space, strip leading whitespace
+ */
+ while (scan < end && (*scan == ' ' || *scan == '\t'))
+ scan++;
+ if (scan >= end)
+ break;
+ field = scan;
+ while (*scan != ' ' && *scan != '\t')
+ scan++;
+ (*set)(++nf, field, (int)(scan - field), n);
+ if (scan == end)
+ break;
+ }
+ *buf = scan;
+ return nf;
+}
+
+/*
+ * this is called both from get_field() and from do_split()
+ * via (*pase_field)(). This variation is for when FS is a single character
+ * other than space.
+ */
+static int
+sc_parse_field(up_to, buf, len, fs, rp, set, n)
+int up_to; /* parse only up to this field number */
+char **buf; /* on input: string to parse; on output: point to start next */
+int len;
+register char *fs;
+Regexp *rp;
+void (*set) (); /* routine to set the value of the parsed field */
+NODE *n;
+{
+ register char *scan = *buf;
+ register char fschar = *fs;
+ register int nf = parse_high_water;
+ register char *field;
+ register char *end = scan + len;
+
+ if (up_to == HUGE)
+ nf = 0;
+ if (len == 0)
+ return nf;
+ *end = fschar; /* sentinel character */
+ for (; nf < up_to; scan++) {
+ field = scan;
+ while (*scan++ != fschar)
+ ;
+ scan--;
+ (*set)(++nf, field, (int)(scan - field), n);
+ if (scan == end)
+ break;
}
+ *buf = scan;
+ return nf;
+}
+
+/*
+ * this is called both from get_field() and from do_split()
+ * via (*pase_field)(). This variation is for when FS is a single character
+ * other than space.
+ */
+static int
+fw_parse_field(up_to, buf, len, fs, rp, set, n)
+int up_to; /* parse only up to this field number */
+char **buf; /* on input: string to parse; on output: point to start next */
+int len;
+register char *fs;
+Regexp *rp;
+void (*set) (); /* routine to set the value of the parsed field */
+NODE *n;
+{
+ register char *scan = *buf;
+ register int nf = parse_high_water;
+ register char *end = scan + len;
+
+ if (up_to == HUGE)
+ nf = 0;
+ if (len == 0)
+ return nf;
+ for (; nf < up_to && (len = FIELDWIDTHS[nf+1]) != -1; ) {
+ if (len > end - scan)
+ len = end - scan;
+ (*set)(++nf, scan, len, n);
+ scan += len;
+ }
+ if (len == -1)
+ *buf = end;
+ else
+ *buf = scan;
+ return nf;
}
NODE **
get_field(num, assign)
-int num;
-int assign; /* this field is on the LHS of an assign */
+register int num;
+Func_ptr *assign; /* this field is on the LHS of an assign */
{
int n;
@@ -180,40 +394,52 @@ int assign; /* this field is on the LHS of an assign */
* if requesting whole line but some other field has been altered,
* then the whole line must be rebuilt
*/
- if (num == 0 && (node0_valid == 0 || assign)) {
- /* first, parse remainder of input record */
- if (NF_node->var_value->numbr == -1) {
- if (parse_high_water == 0)
- parse_extent = node0.stptr;
- n = parse_fields(HUGE-1, &parse_extent,
- node0.stlen - (parse_extent - node0.stptr),
- save_fs, set_field, (NODE *)NULL);
- assign_number(&NF_node->var_value, (AWKNUM)n);
- }
- if (node0_valid == 0)
+ if (num == 0) {
+ if (!field0_valid) {
+ /* first, parse remainder of input record */
+ if (NF == -1) {
+ NF = (*parse_field)(HUGE-1, &parse_extent,
+ fields_arr[0]->stlen -
+ (parse_extent - fields_arr[0]->stptr),
+ save_fs, FS_regexp, set_field,
+ (NODE *)NULL);
+ parse_high_water = NF;
+ }
rebuild_record();
+ }
+ if (assign)
+ *assign = reset_record;
return &fields_arr[0];
}
- if (num > 0 && assign)
- node0_valid = 0;
+
+ /* assert(num > 0); */
+
+ if (assign)
+ field0_valid = 0;
if (num <= parse_high_water) /* we have already parsed this field */
return &fields_arr[num];
- if (parse_high_water == 0 && num > 0) /* starting at the beginning */
+ if (parse_high_water == 0) /* starting at the beginning */
parse_extent = fields_arr[0]->stptr;
/*
* parse up to num fields, calling set_field() for each, and saving
* in parse_extent the point where the parse left off
*/
- n = parse_fields(num, &parse_extent,
+ n = (*parse_field)(num, &parse_extent,
fields_arr[0]->stlen - (parse_extent-fields_arr[0]->stptr),
- save_fs, set_field, (NODE *)NULL);
+ save_fs, FS_regexp, set_field, (NODE *)NULL);
+ parse_high_water = n;
if (num == HUGE-1)
num = n;
- if (n < num) { /* requested field number beyond end of record;
- * set_field will just extend the number of fields,
- * with empty fields
- */
- set_field(num, f_empty, 0, (NODE *) NULL);
+ if (n < num) { /* requested field number beyond end of record; */
+ register int i;
+
+ if (num > nf_high_water)
+ grow_fields_arr(num);
+
+ /* fill in fields that don't exist */
+ for (i = n + 1; i <= num; i++)
+ fields_arr[i] = Nnull_string;
+
/*
* if this field is onthe LHS of an assignment, then we want to
* set NF to this value, below
@@ -229,196 +455,160 @@ int assign; /* this field is on the LHS of an assign */
* only gets set if the field is assigned to -- in this case n has
* been set to num above
*/
- if (*parse_extent == '\0')
- assign_number(&NF_node->var_value, (AWKNUM)n);
+ if (parse_extent == fields_arr[0]->stptr + fields_arr[0]->stlen)
+ NF = n;
return &fields_arr[num];
}
-/*
- * this is called both from get_field() and from do_split()
- */
-static int
-parse_fields(up_to, buf, len, fs, set, n)
-int up_to; /* parse only up to this field number */
-char **buf; /* on input: string to parse; on output: point to start next */
+static void
+set_element(num, s, len, n)
+int num;
+char *s;
int len;
-register char *fs;
-void (*set) (); /* routine to set the value of the parsed field */
NODE *n;
{
- char *s = *buf;
- register char *field;
- register char *scan;
- register char *end = s + len;
- int NF = parse_high_water;
- char rs = get_rs();
+ register NODE *it;
-
- if (up_to == HUGE)
- NF = 0;
- if (*fs && *(fs + 1) != '\0') { /* fs is a regexp */
- struct re_registers reregs;
-
- scan = s;
- if (rs == 0 && STREQ(FS_node->var_value->stptr, " ")) {
- while ((*scan == '\n' || *scan == ' ' || *scan == '\t')
- && scan < end)
- scan++;
- }
- s = scan;
- while (scan < end
- && re_split(scan, (int)(end - scan), fs, &reregs) != -1
- && NF < up_to) {
- if (reregs.end[0] == 0) { /* null match */
- scan++;
- if (scan == end) {
- (*set)(++NF, s, scan - s, n);
- up_to = NF;
- break;
- }
- continue;
- }
- (*set)(++NF, s, scan - s + reregs.start[0], n);
- scan += reregs.end[0];
- s = scan;
- }
- if (NF != up_to && scan <= end) {
- if (!(rs == 0 && scan == end)) {
- (*set)(++NF, scan, (int)(end - scan), n);
- scan = end;
- }
- }
- *buf = scan;
- return (NF);
- }
- for (scan = s; scan < end && NF < up_to; scan++) {
- /*
- * special case: fs is single space, strip leading
- * whitespace
- */
- if (*fs == ' ') {
- while ((*scan == ' ' || *scan == '\t') && scan < end)
- scan++;
- if (scan >= end)
- break;
- }
- field = scan;
- if (*fs == ' ')
- while (*scan != ' ' && *scan != '\t' && scan < end)
- scan++;
- else {
- while (*scan != *fs && scan < end)
- scan++;
- if (rs && scan == end-1 && *scan == *fs) {
- (*set)(++NF, field, (int)(scan - field), n);
- field = scan;
- }
- }
- (*set)(++NF, field, (int)(scan - field), n);
- if (scan == end)
- break;
- }
- *buf = scan;
- return NF;
-}
-
-static int
-re_split(buf, len, fs, reregsp)
-char *buf, *fs;
-int len;
-struct re_registers *reregsp;
-{
- typedef struct re_pattern_buffer RPAT;
- static RPAT *rp;
- static char *last_fs = NULL;
-
- if ((last_fs != NULL && !STREQ(fs, last_fs))
- || (rp && ! strict && ((IGNORECASE_node->var_value->numbr != 0)
- ^ (rp->translate != NULL))))
- {
- /* fs has changed or IGNORECASE has changed */
- free(rp->buffer);
- free(rp->fastmap);
- free((char *) rp);
- free(last_fs);
- last_fs = NULL;
- }
- if (last_fs == NULL) { /* first time */
- emalloc(rp, RPAT *, sizeof(RPAT), "re_split");
- memset((char *) rp, 0, sizeof(RPAT));
- emalloc(rp->buffer, char *, 8, "re_split");
- rp->allocated = 8;
- emalloc(rp->fastmap, char *, 256, "re_split");
- emalloc(last_fs, char *, strlen(fs) + 1, "re_split");
- (void) strcpy(last_fs, fs);
- if (! strict && IGNORECASE_node->var_value->numbr != 0.0)
- rp->translate = casetable;
- else
- rp->translate = NULL;
- if (re_compile_pattern(fs, strlen(fs), rp) != NULL)
- fatal("illegal regular expression for FS: `%s'", fs);
- }
- return re_search(rp, buf, len, 0, len, reregsp);
+ it = make_string(s, len);
+ it->flags |= MAYBE_NUM;
+ *assoc_lookup(n, tmp_number((AWKNUM) (num))) = it;
}
NODE *
do_split(tree)
NODE *tree;
{
- NODE *t1, *t2, *t3;
- register char *splitc;
+ NODE *t1, *t2, *t3, *tmp;
+ register char *splitc = "";
char *s;
- NODE *n;
+ int (*parseit)();
+ Regexp *rp = NULL;
- if (a_get_three(tree, &t1, &t2, &t3) < 3)
- splitc = get_fs();
- else
- splitc = force_string(t3)->stptr;
+ t1 = tree_eval(tree->lnode);
+ t2 = tree->rnode->lnode;
+ t3 = tree->rnode->rnode->lnode;
+
+ (void) force_string(t1);
- n = t2;
if (t2->type == Node_param_list)
- n = stack_ptr[t2->param_cnt];
- if (n->type != Node_var && n->type != Node_var_array)
+ t2 = stack_ptr[t2->param_cnt];
+ if (t2->type != Node_var && t2->type != Node_var_array)
fatal("second argument of split is not a variable");
- assoc_clear(n);
-
- tree = force_string(t1);
+ assoc_clear(t2);
+
+ if (t3->re_flags & FS_DFLT) {
+ parseit = parse_field;
+ splitc = FS;
+ rp = FS_regexp;
+ } else {
+ tmp = force_string(tree_eval(t3->re_exp));
+ if (tmp->stlen == 1) {
+ if (tmp->stptr[0] == ' ') {
+ parseit = def_parse_field;
+ } else {
+ parseit = sc_parse_field;
+ splitc = tmp->stptr;
+ }
+ } else {
+ parseit = re_parse_field;
+ rp = re_update(t3);
+ }
+ free_temp(tmp);
+ }
- s = tree->stptr;
- return tmp_number((AWKNUM)
- parse_fields(HUGE, &s, tree->stlen, splitc, set_element, n));
+ s = t1->stptr;
+ tmp = tmp_number((AWKNUM) (*parseit)(HUGE, &s, t1->stlen,
+ splitc, rp, set_element, t2));
+ free_temp(t1);
+ return tmp;
}
-static char *
-get_fs()
+void
+set_FS()
{
register NODE *tmp;
static char buf[10];
+ if (FS_regexp) {
+ refree(FS_regexp);
+ FS_regexp = NULL;
+ }
+ parse_field = def_parse_field;
tmp = force_string(FS_node->var_value);
- if (get_rs() == 0) {
+ FS = tmp->stptr;
+ if (*RS == 0) {
+ parse_field = re_parse_field;
+ FS = buf;
if (tmp->stlen == 1) {
if (tmp->stptr[0] == ' ')
(void) strcpy(buf, "[ \n]+");
- else
+ else if (tmp->stptr[0] != '\n')
sprintf(buf, "[%c\n]", tmp->stptr[0]);
+ else {
+ parse_field = sc_parse_field;
+ FS = tmp->stptr;
+ }
} else if (tmp->stlen == 0) {
buf[0] = '\n';
buf[1] = '\0';
+ parse_field = sc_parse_field;
} else
- return tmp->stptr;
- return buf;
+ FS = tmp->stptr;
+ } else {
+ if (tmp->stlen > 1)
+ parse_field = re_parse_field;
+ else if (*FS != ' ' && tmp->stlen == 1)
+ parse_field = sc_parse_field;
}
- return tmp->stptr;
+ if (parse_field == re_parse_field) {
+ tmp = tmp_string(FS, strlen(FS));
+ FS_regexp = make_regexp(tmp, 0, 1);
+ free_temp(tmp);
+ } else
+ FS_regexp = NULL;
+ resave_fs = 1;
}
-static void
-set_element(num, s, len, n)
-int num;
-char *s;
-int len;
-NODE *n;
+void
+set_RS()
{
- *assoc_lookup(n, tmp_number((AWKNUM) (num))) = make_string(s, len);
+ (void) force_string(RS_node->var_value);
+ RS = RS_node->var_value->stptr;
+ set_FS();
+}
+
+void
+set_FIELDWIDTHS()
+{
+ register char *scan;
+ char *end;
+ register int i;
+ static int fw_alloc = 1;
+ static int warned = 0;
+
+ if (do_lint && ! warned) {
+ warned = 1;
+ warning("use of FIELDWIDTHS is a gawk extension");
+ }
+ if (strict) /* quick and dirty, does the trick */
+ return;
+
+ parse_field = fw_parse_field;
+ scan = force_string(FIELDWIDTHS_node->var_value)->stptr;
+ end = scan + 1;
+ if (FIELDWIDTHS == NULL)
+ emalloc(FIELDWIDTHS, int *, fw_alloc * sizeof(int), "set_FIELDWIDTHS");
+ FIELDWIDTHS[0] = 0;
+ for (i = 1; ; i++) {
+ if (i >= fw_alloc) {
+ fw_alloc *= 2;
+ erealloc(FIELDWIDTHS, int *, fw_alloc * sizeof(int), "set_FIELDWIDTHS");
+ }
+ FIELDWIDTHS[i] = (int) strtol(scan, &end, 10);
+ if (end == scan)
+ break;
+ scan = end;
+ }
+ FIELDWIDTHS[i] = -1;
}
diff --git a/foo.sh b/foo.sh
new file mode 100644
index 00000000..1506a112
--- /dev/null
+++ b/foo.sh
@@ -0,0 +1,50 @@
+git add CHANGES
+git add FUTURES
+git add Makefile
+git add PROBLEMS
+git add README
+git add alloca.c
+git add alloca.s
+git add array.c
+git add awk.h
+git add awk.y
+git add builtin.c
+git add eval.c
+git add field.c
+git add gawk.1
+git add gawk.texinfo
+git add io.c
+git add main.c
+git add missing.c
+git add msg.c
+git add node.c
+git add patchlevel.h
+git add regex.c
+git add regex.h
+git add ACKNOWLEDGMENT
+git add LIMITATIONS
+git add Makefile-dist
+git add PORTS
+git add README.VMS
+git add README.dos
+git add README.rs6000
+git add README.ultrix
+git add atari
+git add awk.tab.c
+git add config.h
+git add config.h-dist
+git add config
+git add configure
+git add dfa.c
+git add dfa.h
+git add foo.sh
+git add iop.c
+git add mkconf
+git add mungeconf
+git add pc/config.h
+git add pc/make.bat
+git add pc/names.lnk
+git add protos.h
+git add re.c
+git add version.c
+git add vms
diff --git a/gawk.1 b/gawk.1
index 5472d20a..204b5bc7 100644
--- a/gawk.1
+++ b/gawk.1
@@ -1,25 +1,14 @@
-.TH GAWK 1 "August 24 1989" "Free Software Foundation"
+.ds PX \s-1POSIX\s+1
+.ds UX \s-1UNIX\s+1
+.ds AN \s-1ANSI\s+1
+.TH GAWK 1 "Jun 5 1991" "Free Software Foundation" "Utility Commands"
.SH NAME
gawk \- pattern scanning and processing language
.SH SYNOPSIS
.B gawk
-.ig
-[
-.B \-d
-] [
-.B \-D
-]
-..
[
-.B \-a
-] [
-.B \-e
-] [
-.B \-c
-] [
-.B \-C
-] [
-.B \-V
+.B \-W
+.I gawk-options
] [
.BI \-F\^ fs
] [
@@ -33,23 +22,9 @@ gawk \- pattern scanning and processing language
] file .\^.\^.
.br
.B gawk
-.ig
-[
-.B \-d
-] [
-.B \-D
-]
-..
[
-.B \-a
-] [
-.B \-e
-] [
-.B \-c
-] [
-.B \-C
-] [
-.B \-V
+.B \-W
+.I gawk-options
] [
.BI \-F\^ fs
] [
@@ -63,13 +38,17 @@ file .\^.\^.
.SH DESCRIPTION
.I Gawk
is the GNU Project's implementation of the AWK programming language.
-It conforms to the definition and description of the language in
+It conforms to the definition of the language in
+the \*(PX 1003.2 Command Language And Utilities Standard
+(draft 11).
+This version in turn is based on the description in
.IR "The AWK Programming Language" ,
by Aho, Kernighan, and Weinberger,
with the additional features defined in the System V Release 4 version
-of \s-1UNIX\s+1
-.IR awk ,
-and some GNU-specific extensions.
+of \*(UX
+.IR awk .
+.I Gawk
+also provides some GNU-specific extensions.
.PP
The command line consists of options to
.I gawk
@@ -81,6 +60,7 @@ available in the
and
.B ARGV
pre-defined AWK variables.
+.SH OPTIONS
.PP
.I Gawk
accepts the following options, which should be available on any implementation
@@ -116,42 +96,75 @@ options may be used.
Signal the end of options. This is useful to allow further arguments to the
AWK program itself to start with a ``\-''.
This is mainly for consistency with the argument parsing convention used
-by most other System V programs.
+by most other \*(PX programs.
+.PP
+Following the \*(PX standard,
+.IR gawk -specific
+options are supplied via arguments to the
+.B \-W
+option. Multiple
+.B \-W
+options may be supplied, or multiple arguments may be supplied together
+if they are separated by commas, or enclosed in quotes and separated
+by white space.
+Case is ignored in arguments to the
+.B \-W
+option.
.PP
-The following options are specific to the GNU implementation.
-.TP
-.B \-a
-Use AWK style regular expressions as described in the book.
-This is the current default, but may not be when the POSIX P1003.2
-standard is finalized.
-It is orthogonal to
-.BR \-c .
-.TP
-.B \-e
-Use
-.IR egrep (1)
-style regular expressions as described in POSIX standard.
-This may become the default when the POSIX P1003.2
-standard is finalized.
-It is orthogonal to
-.BR \-c .
-.TP
-.B \-c
+The
+.B \-W
+option accepts the following arguments:
+.TP \w'\fBcopyright\fR'u+1n
+.B compat
Run in
.I compatibility
mode. In compatibility mode,
.I gawk
-behaves identically to \s-1UNIX\s+1
+behaves identically to \*(UX
.IR awk ;
none of the GNU-specific extensions are recognized.
.TP
-.B \-C
+.PD 0
+.B copyleft
+.TP
+.PD
+.B copyright
Print the short version of the GNU copyright information message on
the error output.
-This option may disappear in a future version of
-.IR gawk .
.TP
-.B \-V
+.B lint
+Provide warnings about constructs that are
+dubious or non-portable to other AWK implementations.
+.TP
+.B posix
+This turns on
+.I compatibility
+mode, with the following additional restrictions:
+.RS
+.TP \w'\(bu'u+1n
+\(bu
+.B \ex
+escape sequences are not recognized.
+.TP
+\(bu
+The synonym
+.B func
+for the keyword
+.B function
+is not recognized.
+.TP
+\(bu
+The operators
+.B **
+and
+.B **=
+cannot be used in place of
+.B ^
+and
+.BR ^= .
+.RE
+.TP
+.B version
Print version information for this particular copy of
.I gawk
on the error output.
@@ -160,10 +173,9 @@ This is useful mainly for knowing if the current copy of
on your system
is up to date with respect to whatever the Free Software Foundation
is distributing.
-This option may disappear in a future version of
-.IR gawk .
.PP
Any other options are flagged as illegal, but are otherwise ignored.
+.SH AWK PROGRAM EXECUTION
.PP
An AWK program consists of a sequence of pattern-action statements
and optional function definitions.
@@ -207,7 +219,14 @@ If a file name given to the
option contains a ``/'' character, no path search is performed.
.PP
.I Gawk
-compiles the program into an internal form,
+executes AWK programs in the following order.
+First,
+.I gawk
+compiles the program into an internal form.
+Next, all variable assignments specified via the
+.B \-v
+option are performed. Then,
+.I gawk
executes the code in the
.B BEGIN
block(s) (if any),
@@ -219,17 +238,27 @@ If there are no files named on the command line,
.I gawk
reads the standard input.
.PP
-If a ``file'' named on the command line has the form
+If a filename on the command line has the form
.IB var = val
it is treated as a variable assignment. The variable
.I var
will be assigned the value
.IR val .
-This is most useful for dynamically assigning values to the variables
+(This happens after any
+.B BEGIN
+block(s) have been run.)
+Command line variable assignment
+is most useful for dynamically assigning values to the variables
AWK uses to control how input is broken into fields and records. It
is also useful for controlling state if multiple passes are needed over
a single data file.
.PP
+If the value of a particular element of
+.B ARGV
+is empty (\fB""\fR),
+.I gawk
+skips over it.
+.PP
For each line in the input,
.I gawk
tests to see if it matches any
@@ -238,12 +267,20 @@ in the AWK program.
For each pattern that the line matches, the associated
.I action
is executed.
+The patterns are tested in the order they occur in the program.
+.PP
+Finally, after all the input is exhausted,
+.I gawk
+executes the code in the
+.B END
+block(s) (if any).
.SH VARIABLES AND FIELDS
AWK variables are dynamic; they come into existence when they are
first used. Their values are either floating-point numbers or strings,
+or both,
depending upon how they are used. AWK also has one dimension
arrays; multiply dimensioned arrays may be simulated.
-There are several pre-defined variables that AWK sets as a program
+Several pre-defined variables are set as a program
runs; these will be described as needed and summarized below.
.SS Fields
.PP
@@ -270,6 +307,20 @@ Note that the value of
.B FS
is a regular expression.
.PP
+If the
+.B FIELDWIDTHS
+variable is set to a space separated list of numbers, each field is
+expected to have fixed width, and
+.I gawk
+will split up the record using the specified widths. The value of
+.B FS
+is ignored.
+Assigning a new value to
+.B FS
+overrides the use of
+.BR FIELDWIDTHS ,
+and restores the default behaviour.
+.PP
Each field in the input line may be referenced by its position,
.BR $1 ,
.BR $2 ,
@@ -292,7 +343,7 @@ The variable
is set to the total number of fields in the input line.
.PP
References to non-existent fields (i.e. fields after
-.BR $NF ),
+.BR $NF )
produce the null-string. However, assigning to a non-existent field
(e.g.,
.BR "$(NF+2) = 5" )
@@ -307,22 +358,24 @@ to be recomputed, with the fields being separated by the value of
.PP
AWK's built-in variables are:
.PP
-.RS
-.TP \l'\fBIGNORECASE\fR'
+.TP \w'\fBFIELDWIDTHS\fR'u+1n
.B ARGC
-the number of command line arguments (does not include options to
+The number of command line arguments (does not include options to
.IR gawk ,
or the program source).
-.TP \l'\fBIGNORECASE\fR'
+.TP
.B ARGV
-array of command line arguments. The array is indexed from
+Array of command line arguments. The array is indexed from
0 to
.B ARGC
\- 1.
Dynamically changing the contents of
.B ARGV
can control the files used for data.
-.TP \l'\fBIGNORECASE\fR'
+.TP
+.B CONVFMT
+The conversion format for numbers, \fB"%.6g"\fR, by default.
+.TP
.B ENVIRON
An array containing the values of the current environment.
The array is indexed by the environment variables, each element being
@@ -331,23 +384,36 @@ the value of that variable (e.g., \fBENVIRON["HOME"]\fP might be
Changing this array does not affect the environment seen by programs which
.I gawk
spawns via redirection or the
-.B system
+.B system()
function.
(This may change in a future version of
.IR gawk .)
-.TP \l'\fBIGNORECASE\fR'
+.\" but don't hold your breath...
+.TP
+.B FIELDWIDTHS
+A white-space separated list of fieldwidths. When set,
+.I gawk
+parses the input into fields of fixed width, instead of using the
+value of the
+.B FS
+variable as the field separator.
+The fixed field width facility is still experimental; expect the
+semantics to change as
+.I gawk
+evolves over time.
+.TP
.B FILENAME
-the name of the current input file.
+The name of the current input file.
If no files are specified on the command line, the value of
.B FILENAME
is ``\-''.
-.TP \l'\fBIGNORECASE\fR'
+.TP
.B FNR
-the input record number in the current input file.
-.TP \l'\fBIGNORECASE\fR'
+The input record number in the current input file.
+.TP
.B FS
-the input field separator, a blank by default.
-.TP \l'\fBIGNORECASE\fR'
+The input field separator, a blank by default.
+.TP
.B IGNORECASE
Controls the case-sensitivity of all regular expression operations. If
.B IGNORECASE
@@ -376,29 +442,30 @@ and \fB"AB"\fP.
As with all AWK variables, the initial value of
.B IGNORECASE
is zero, so all regular expression operations are normally case-sensitive.
-.TP \l'\fBIGNORECASE\fR'
+.TP
.B NF
-the number of fields in the current input record.
-.TP \l'\fBIGNORECASE\fR'
+The number of fields in the current input record.
+.TP
.B NR
-the total number of input records seen so far.
-.TP \l'\fBIGNORECASE\fR'
+The total number of input records seen so far.
+.TP
.B OFMT
-the output format for numbers,
-.B %.6g
-by default.
-.TP \l'\fBIGNORECASE\fR'
+The output format for numbers, \fB"%.6g"\fR, by default.
+.TP
.B OFS
-the output field separator, a blank by default.
-.TP \l'\fBIGNORECASE\fR'
+The output field separator, a blank by default.
+.TP
.B ORS
-the output record separator, by default a newline.
-.TP \l'\fBIGNORECASE\fR'
+The output record separator, by default a newline.
+.TP
.B RS
-the input record separator, by default a newline.
+The input record separator, by default a newline.
.B RS
is exceptional in that only the first character of its string
-value is used for separating records. If
+value is used for separating records.
+(This will probably change in a future release of
+.IR gawk .)
+If
.B RS
is set to the null string, then records are separated by
blank lines.
@@ -408,21 +475,20 @@ is set to the null string, then the newline character always acts as
a field separator, in addition to whatever value
.B FS
may have.
-.TP \l'\fBIGNORECASE\fR'
+.TP
.B RSTART
-the index of the first character matched by
+The index of the first character matched by
.BR match() ;
0 if no match.
-.TP \l'\fBIGNORECASE\fR'
+.TP
.B RLENGTH
-the length of the string matched by
+The length of the string matched by
.BR match() ;
\-1 if no match.
-.TP \l'\fBIGNORECASE\fR'
+.TP
.B SUBSEP
-the character used to separate multiple subscripts in array
+The character used to separate multiple subscripts in array
elements, by default \fB"\e034"\fR.
-.RE
.SS Arrays
.PP
Arrays are subscripted with an expression between square brackets
@@ -480,7 +546,7 @@ loop to iterate over all the elements of an array.
An element may be deleted from an array using the
.B delete
statement.
-.SS Variable Typing
+.SS Variable Typing And Conversion
.PP
Variables and fields
may be (floating point) numbers, or strings, or both. How the
@@ -491,9 +557,46 @@ it will be treated as a string.
To force a variable to be treated as a number, add 0 to it; to force it
to be treated as a string, concatenate it with the null string.
.PP
-The AWK language defines comparisons as being done numerically if
-possible, otherwise one or both operands are converted to strings and
-a string comparison is performed.
+When a string must be converted to a number, the conversion is accomplished
+using
+.IR atof (3).
+A number is converted to a string by using the value of
+.B CONVFMT
+as a format string for
+.IR sprintf (3),
+with the numeric value of the variable as the argument.
+However, even though all numbers in AWK are floating-point,
+integral values are
+.I always
+converted as integers. Thus, given
+.PP
+.RS
+.ft B
+.nf
+CONVFMT = "%2.2f"
+a = 12
+b = a ""
+.fi
+.ft R
+.RE
+.PP
+the variable
+.B b
+has a value of \fB"12"\fR and not \fB"12.00"\fR.
+.PP
+.I Gawk
+performs comparisons as follows:
+If two variables are numeric, they are compared numerically.
+If one value is numeric and the other has a string value that is a
+``numeric string,'' then comparisons are also done numerically.
+Otherwise, the numeric value is converted to a string and a string
+comparison is performed.
+Two strings are compared, of course, as strings.
+According to the \*(PX standard (draft 11), even if two strings are
+numeric strings, a numeric comparison is performed. However, this is
+clearly incorrect, and
+.I gawk
+does not do this.
.PP
Uninitialized variables have the numeric value 0 and the string value ""
(the null, or empty, string).
@@ -547,7 +650,7 @@ AWK patterns may be one of the following:
.IB pattern " ? " pattern " : " pattern
.BI ( pattern )
.BI ! " pattern"
-.IB pattern1 ", " pattern2"
+.IB pattern1 ", " pattern2
.fi
.RE
.PP
@@ -607,73 +710,72 @@ then the pattern used for testing is the second pattern, otherwise it is
the third. Only one of the second and third patterns is evaluated.
.PP
The
-.IB pattern1 ", " pattern2"
+.IB pattern1 ", " pattern2
form of an expression is called a range pattern.
-It matches all input lines starting with a line that matches
+It matches all input records starting with a line that matches
.IR pattern1 ,
-and continuing until a line that matches
+and continuing until a record that matches
.IR pattern2 ,
inclusive. It does not combine with any other sort of pattern expression.
.SS Regular Expressions
Regular expressions are the extended kind found in
.IR egrep .
They are composed of characters as follows:
-.RS
-.TP \l'[^abc...]'
+.TP \w'[^abc...]'u+1n
.I c
matches the non-metacharacter
.IR c .
-.TP \l'[^abc...]'
+.TP
.I \ec
matches the literal character
.IR c .
-.TP \l'[^abc...]'
+.TP
.B .
matches any character except newline.
-.TP \l'[^abc...]'
+.TP
.B ^
matches the beginning of a line or a string.
-.TP \l'[^abc...]'
+.TP
.B $
matches the end of a line or a string.
-.TP \l'[^abc...]'
+.TP
.BI [ abc... ]
character class, matches any of the characters
.IR abc... .
-.TP \l'[^abc...]'
+.TP
.BI [^ abc... ]
negated character class, matches any character except
.I abc...
and newline.
-.TP \l'[^abc...]'
+.TP
.IB r1 | r2
alternation: matches either
.I r1
or
.IR r2 .
-.TP \l'[^abc...]'
+.TP
.I r1r2
concatenation: matches
.IR r1 ,
and then
.IR r2 .
-.TP \l'[^abc...]'
+.TP
.IB r +
matches one or more
.IR r 's.
-.TP \l'[^abc...]'
+.TP
.IB r *
matches zero or more
.IR r 's.
-.TP \l'[^abc...]'
+.TP
.IB r ?
matches zero or one
.IR r 's.
-.TP \l'[^abc...]'
+.TP
.BI ( r )
grouping: matches
.IR r .
-.RE
+.PP
The escape sequences that are valid in string constants (see below)
are also legal in regular expressions.
.SS Actions
@@ -689,13 +791,16 @@ available are patterned after those in C.
.PP
The operators in AWK, in order of increasing precedence, are
.PP
-.RS
-.TP \l'\fB= += \-= *= /= %= ^=\fR'
-.B "= += \-= *= /= %= ^="
+.TP "\w'\fB*= /= %= ^=\fR'u+1n"
+.PD 0
+.B "= += \-="
+.TP
+.PD
+.B "*= /= %= ^="
Assignment. Both absolute assignment
.BI ( var " = " value )
and operator-assignment (the other forms) are supported.
-.TP \l'\fB= += \-= *= /= %= ^=\fR'
+.TP
.B ?:
The C conditional expression. This has the form
.IB expr1 " ? " expr2 " : " expr3\c
@@ -710,41 +815,60 @@ Only one of
and
.I expr3
is evaluated.
-.TP \l'\fB= += \-= *= /= %= ^=\fR'
+.TP
.B ||
-logical OR.
-.TP \l'\fB= += \-= *= /= %= ^=\fR'
+Logical OR.
+.TP
.B &&
-logical AND.
-.TP \l'\fB= += \-= *= /= %= ^=\fR'
+Logical AND.
+.TP
.B "~ !~"
-regular expression match, negated match.
-.TP \l'\fB= += \-= *= /= %= ^=\fR'
-.B "< <= > >= != =="
-the regular relational operators.
-.TP \l'\fB= += \-= *= /= %= ^=\fR'
+Regular expression match, negated match.
+.B NOTE:
+Do not use a constant regular expression
+.RB ( /foo/ )
+on the left-hand side of a
+.B ~
+or
+.BR !~ .
+Only use one on the right-hand side. The expression
+.BI "/foo/ ~ " exp
+has the same meaning as \fB(($0 ~ /foo/) ~ \fIexp\fB)\fR.
+This is usually
+.I not
+what was intended.
+.TP
+.PD 0
+.B "< >"
+.TP
+.PD 0
+.B "<= >="
+.TP
+.PD
+.B "!= =="
+The regular relational operators.
+.TP
.I blank
-string concatenation.
-.TP \l'\fB= += \-= *= /= %= ^=\fR'
+String concatenation.
+.TP
.B "+ \-"
-addition and subtraction.
-.TP \l'\fB= += \-= *= /= %= ^=\fR'
+Addition and subtraction.
+.TP
.B "* / %"
-multiplication, division, and modulus.
-.TP \l'\fB= += \-= *= /= %= ^=\fR'
+Multiplication, division, and modulus.
+.TP
.B "+ \- !"
-unary plus, unary minus, and logical negation.
-.TP \l'\fB= += \-= *= /= %= ^=\fR'
+Unary plus, unary minus, and logical negation.
+.TP
.B ^
-exponentiation (\fB**\fR may also be used, and \fB**=\fR for
+Exponentiation (\fB**\fR may also be used, and \fB**=\fR for
the assignment operator).
-.TP \l'\fB= += \-= *= /= %= ^=\fR'
+.TP
.B "++ \-\^\-"
-increment and decrement, both prefix and postfix.
-.TP \l'\fB= += \-= *= /= %= ^=\fR'
+Increment and decrement, both prefix and postfix.
+.TP
.B $
-field reference.
-.RE
+Field reference.
.SS Control Statements
.PP
The control statements are
@@ -768,71 +892,68 @@ as follows:
.PP
The input/output statements are as follows:
.PP
-.RS
-.TP \l'\fBprintf \fIfmt, expr-list\fR'
+.TP "\w'\fBprintf \fIfmt, expr-list\fR'u+1n"
.BI close( filename )
-close file (or pipe, see below).
-.TP \l'\fBprintf \fIfmt, expr-list\fR'
+Close file (or pipe, see below).
+.TP
.B getline
-set
+Set
.B $0
from next input record; set
.BR NF ,
.BR NR ,
.BR FNR .
-.TP \l'\fBprintf \fIfmt, expr-list\fR'
+.TP
.BI "getline <" file
-set
+Set
.B $0
from next record of
.IR file ;
set
.BR NF .
-.TP \l'\fBprintf \fIfmt, expr-list\fR'
+.TP
.BI getline " var"
-set
+Set
.I var
from next input record; set
.BR NF ,
.BR FNR .
-.TP \l'\fBprintf \fIfmt, expr-list\fR'
+.TP
.BI getline " var" " <" file
-set
+Set
.I var
from next record of
.IR file .
-.TP \l'\fBprintf \fIfmt, expr-list\fR'
+.TP
.B next
Stop processing the current input record. The next input record
is read and processing starts over with the first pattern in the
AWK program. If the end of the input data is reached, the
.B END
block(s), if any, are executed.
-.TP \l'\fBprintf \fIfmt, expr-list\fR'
+.TP
.B print
-prints the current record.
-.TP \l'\fBprintf \fIfmt, expr-list\fR'
+Prints the current record.
+.TP
.BI print " expr-list"
-prints expressions.
-.TP \l'\fBprintf \fIfmt, expr-list\fR'
+Prints expressions.
+.TP
.BI print " expr-list" " >" file
-prints expressions on
+Prints expressions on
.IR file .
-.TP \l'\fBprintf \fIfmt, expr-list\fR'
+.TP
.BI printf " fmt, expr-list"
-format and print.
-.TP \l'\fBprintf \fIfmt, expr-list\fR'
+Format and print.
+.TP
.BI printf " fmt, expr-list" " >" file
-format and print on
+Format and print on
.IR file .
-.TP \l'\fBprintf \fIfmt, expr-list\fR'
+.TP
.BI system( cmd-line )
-execute the command
+Execute the command
.IR cmd-line ,
and return the exit status.
-(This may not be available on
-systems besides \s-1UNIX\s+1 and \s-1GNU\s+1.)
-.RE
+(This may not be available on non-\*(PX systems.)
.PP
Other input/output redirections are also allowed. For
.B print
@@ -854,14 +975,14 @@ will return 0 on end of file, and \-1 on an error.
.PP
The AWK versions of the
.B printf
-and
-.B sprintf
+statement and
+.B sprintf()
+function
(see below)
-functions accept the following conversion specification formats:
-.RS
+accept the following conversion specification formats:
.TP
.B %c
-An ASCII character.
+An \s-1ASCII\s+1 character.
If the argument used for
.B %c
is numeric, it is treated as a character and printed.
@@ -911,12 +1032,10 @@ instead of
A single
.B %
character; no argument is converted.
-.RE
.PP
There are optional, additional parameters that may lie between the
.B %
and the control letter:
-.RS
.TP
.B \-
The expression should be left-justified within its field.
@@ -929,18 +1048,25 @@ Otherwise it is padded with blanks.
.BI . prec
A number indicating the maximum width of strings or digits to the right
of the decimal point.
-.RE
.PP
The dynamic
.I width
and
.I prec
-capabilities of the C library
+capabilities of the \*(AN C
+.B printf()
+routines are supported.
+A
+.B *
+in place of either the
+.B width
+or
+.B prec
+specifications will cause their values to be taken from
+the argument list to
.B printf
-routines are not supported.
-However, they may be simulated by using
-the AWK concatenation operation to build up
-a format specification dynamically.
+or
+.BR sprintf() .
.SS Special File Names
.PP
When doing I/O redirection from either
@@ -956,8 +1082,7 @@ recognizes certain special filenames internally. These filenames
allow access to open file descriptors inherited from
.IR gawk 's
parent process (usually the shell). The filenames are:
-.RS
-.TP
+.TP \w'\fB/dev/fd/\^\fIn\fR'u+1n
.B /dev/stdin
The standard input.
.TP
@@ -970,7 +1095,6 @@ The standard error output.
.BI /dev/fd/\^ n
The file denoted by the open file descriptor
.IR n .
-.RE
.PP
These are particularly useful for error messages. For example:
.PP
@@ -993,34 +1117,33 @@ These file names may also be used on the command line to name data files.
.PP
AWK has the following pre-defined arithmetic functions:
.PP
-.RS
-.TP \l'\fBsrand(\fIexpr\fB)\fR'
+.TP \w'\fBsrand(\^\fIexpr\^\fB)\fR'u+1n
.BI atan2( y , " x" )
returns the arctangent of
.I y/x
in radians.
-.TP \l'\fBsrand(\fIexpr\fB)\fR'
+.TP
.BI cos( expr )
returns the cosine in radians.
-.TP \l'\fBsrand(\fIexpr\fB)\fR'
+.TP
.BI exp( expr )
the exponential function.
-.TP \l'\fBsrand(\fIexpr\fB)\fR'
+.TP
.BI int( expr )
truncates to integer.
-.TP \l'\fBsrand(\fIexpr\fB)\fR'
+.TP
.BI log( expr )
the natural logarithm function.
-.TP \l'\fBsrand(\fIexpr\fB)\fR'
+.TP
.B rand()
returns a random number between 0 and 1.
-.TP \l'\fBsrand(\fIexpr\fB)\fR'
+.TP
.BI sin( expr )
returns the sine in radians.
-.TP \l'\fBsrand(\fIexpr\fB)\fR'
+.TP
.BI sqrt( expr )
the square root function.
-.TP \l'\fBsrand(\fIexpr\fB)\fR'
+.TP
.BI srand( expr )
use
.I expr
@@ -1029,13 +1152,11 @@ as a new seed for the random number generator. If no
is provided, the time of day will be used.
The return value is the previous seed for the random
number generator.
-.RE
.SS String Functions
.PP
AWK has the following pre-defined string functions:
.PP
-.RS
-.TP \l'\fBsprintf(\fIfmt\fB, \fIexpr-list\fB)\fR'
+.TP "\w'\fBsprintf(\^\fIfmt\fB\^, \fIexpr-list\^\fB)\fR'u+1n"
\fBgsub(\fIr\fB, \fIs\fB, \fIt\fB)\fR
for each substring matching the regular expression
.I r
@@ -1048,7 +1169,7 @@ If
.I t
is not supplied, use
.BR $0 .
-.TP \l'\fBsprintf(\fIfmt\fB, \fIexpr-list\fB)\fR'
+.TP
.BI index( s , " t" )
returns the index of the string
.I t
@@ -1057,11 +1178,16 @@ in the string
or 0 if
.I t
is not present.
-.TP \l'\fBsprintf(\fIfmt\fB, \fIexpr-list\fB)\fR'
+.TP
.BI length( s )
returns the length of the string
-.IR s .
-.TP \l'\fBsprintf(\fIfmt\fB, \fIexpr-list\fB)\fR'
+.IR s ,
+or the length of
+.B $0
+if
+.I s
+is not supplied.
+.TP
.BI match( s , " r" )
returns the position in
.I s
@@ -1073,7 +1199,7 @@ is not present, and sets the values of
.B RSTART
and
.BR RLENGTH .
-.TP \l'\fBsprintf(\fIfmt\fB, \fIexpr-list\fB)\fR'
+.TP
\fBsplit(\fIs\fB, \fIa\fB, \fIr\fB)\fR
splits the string
.I s
@@ -1086,19 +1212,19 @@ and returns the number of fields. If
is omitted,
.B FS
is used instead.
-.TP \l'\fBsprintf(\fIfmt\fB, \fIexpr-list\fB)\fR'
+.TP
.BI sprintf( fmt , " expr-list" )
prints
.I expr-list
according to
.IR fmt ,
and returns the resulting string.
-.TP \l'\fBsprintf(\fIfmt\fB, \fIexpr-list\fB)\fR'
+.TP
\fBsub(\fIr\fB, \fIs\fB, \fIt\fB)\fR
-this is just like
-.BR gsub ,
+just like
+.BR gsub() ,
but only the first matching substring is replaced.
-.TP \l'\fBsprintf(\fIfmt\fB, \fIexpr-list\fB)\fR'
+.TP
\fBsubstr(\fIs\fB, \fIi\fB, \fIn\fB)\fR
returns the
.IR n -character
@@ -1111,7 +1237,7 @@ If
is omitted, the rest of
.I s
is used.
-.TP \l'\fBsprintf(\fIfmt\fB, \fIexpr-list\fB)\fR'
+.TP
.BI tolower( str )
returns a copy of the string
.IR str ,
@@ -1119,7 +1245,7 @@ with all the upper-case characters in
.I str
translated to their corresponding lower-case counterparts.
Non-alphabetic characters are left unchanged.
-.TP \l'\fBsprintf(\fIfmt\fB, \fIexpr-list\fB)\fR'
+.TP
.BI toupper( str )
returns a copy of the string
.IR str ,
@@ -1127,7 +1253,43 @@ with all the lower-case characters in
.I str
translated to their corresponding upper-case counterparts.
Non-alphabetic characters are left unchanged.
-.RE
+.SS Time Functions
+.PP
+Since one of the primary uses of AWK programs in processing log files
+that contain time stamp information,
+.I gawk
+provides the following two functions for obtaining time stamps and
+formatting them.
+.PP
+.TP "\w'\fBsystime()\fR'u+1n"
+.B systime()
+returns the current time of day as the number of seconds since the Epoch
+(Midnight UTC, January 1, 1970 on \*(PX systems).
+.TP
+\fBstrftime(\fIformat\fR, \fItimestamp\fB)\fR
+formats
+.I timestamp
+according to the specification in
+.IR format.
+The
+.I timestamp
+should be of the same form as returned by
+.BR systime() .
+If
+.I timestamp
+is missing, the current time of day is used.
+See the specification for the
+.B strftime()
+function in \*(AN C for the format conversions that are
+guaranteed to be available.
+A public-domain version of
+.IR strftime (3)
+and a man page for it are shipped with
+.IR gawk ;
+if that version was used to build
+.IR gawk ,
+then all of the conversions described in that man page are available to
+.IR gawk.
.SS String Constants
.PP
String constants in AWK are sequences of characters enclosed
@@ -1135,49 +1297,47 @@ between double quotes (\fB"\fR). Within strings, certain
.I "escape sequences"
are recognized, as in C. These are:
.PP
-.RS
-.TP \l'\fB\e\fIddd\fR'
+.TP \w'\fB\e\^\fIddd\fR'u+1n
.B \e\e
A literal backslash.
-.TP \l'\fB\e\fIddd\fR'
+.TP
.B \ea
-The ``alert'' character; usually the ASCII BEL character.
-.TP \l'\fB\e\fIddd\fR'
+The ``alert'' character; usually the \s-1ASCII\s+1 \s-1BEL\s+1 character.
+.TP
.B \eb
backspace.
-.TP \l'\fB\e\fIddd\fR'
+.TP
.B \ef
form-feed.
-.TP \l'\fB\e\fIddd\fR'
+.TP
.B \en
new line.
-.TP \l'\fB\e\fIddd\fR'
+.TP
.B \er
carriage return.
-.TP \l'\fB\e\fIddd\fR'
+.TP
.B \et
horizontal tab.
-.TP \l'\fB\e\fIddd\fR'
+.TP
.B \ev
vertical tab.
-.TP \l'\fB\e\fIddd\fR'
+.TP
.BI \ex "\^hex digits"
The character represented by the string of hexadecimal digits following
the
.BR \ex .
-As in ANSI C, all following hexadecimal digits are considered part of
+As in \*(AN C, all following hexadecimal digits are considered part of
the escape sequence.
(This feature should tell us something about language design by committee.)
-E.g., "\ex1B" is the ASCII ESC (escape) character.
-.TP \l'\fB\e\fIddd\fR'
+E.g., "\ex1B" is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character.
+.TP
.BI \e ddd
The character represented by the 1-, 2-, or 3-digit sequence of octal
-digits. E.g. "\e033" is the ASCII ESC (escape) character.
-.TP \l'\fB\e\fIddd\fR'
+digits. E.g. "\e033" is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character.
+.TP
.BI \e c
The literal character
.IR c\^ .
-.RE
.PP
The escape sequences may also be used inside constant regular expressions
(e.g.,
@@ -1196,7 +1356,7 @@ call are used to instantiate the formal parameters declared in the function.
Arrays are passed by reference, other variables are passed by value.
.PP
Since functions were not originally part of the AWK language, the provision
-for local variables is rather clumsy: they are declared as extra parameters
+for local variables is rather clumsy: They are declared as extra parameters
in the parameter list. The convention is to separate local variables from
real parameters by extra spaces in the parameter list. For example:
.PP
@@ -1261,11 +1421,12 @@ Alfred V. Aho, Brian W. Kernighan, Peter J. Weinberger,
Addison-Wesley, 1988. ISBN 0-201-07981-X.
.PP
.IR "The GAWK Manual" ,
-published by the Free Software Foundation, 1989.
-.SH SYSTEM V RELEASE 4 COMPATIBILITY
+published by the Free Software Foundation, 1991.
+.SH POSIX COMPATIBILITY
A primary goal for
.I gawk
-is compatibility with the latest version of \s-1UNIX\s+1
+is compatibility with the \*(PX standard, as well as with the
+latest version of \*(UX
.IR awk .
To this end,
.I gawk
@@ -1273,7 +1434,7 @@ incorporates the following user visible
features which are not described in the AWK book,
but are part of
.I awk
-in System V Release 4.
+in System V Release 4, and are in the \*(PX standard.
.PP
The
.B \-v
@@ -1292,6 +1453,11 @@ When
.I awk
was changed to match its documentation, this option was added to
accomodate applications that depended upon the old behaviour.
+(This feature was agreed upon by both the AT&T and GNU developers.)
+.PP
+The
+.B \-W
+option for implementation specific features is from the \*(PX standard.
.PP
When processing arguments,
.I gawk
@@ -1300,8 +1466,9 @@ arguments, and warns about, but otherwise ignores, undefined options.
.PP
The AWK book does not define the return value of
.BR srand() .
-The System V Release 4 version of \s-1UNIX\s+1
+The System V Release 4 version of \*(UX
.I awk
+(and the \*(PX standard)
has it return the seed it was using, to allow keeping track
of random number sequences. Therefore
.B srand()
@@ -1312,79 +1479,80 @@ also returns its current seed.
Other new features are:
The use of multiple
.B \-f
-options; the
+options (from MKS
+.IR awk );
+the
.B ENVIRON
array; the
.BR \ea ,
and
-.BR \ev ,
-.B \ex
-escape sequences; the
-.B tolower
+.BR \ev
+escape sequences (done originally in
+.I gawk
+and fed back into AT&T's); the
+.B tolower()
and
-.B toupper
-built-in functions; and the ANSI C conversion specifications in
-.BR printf .
+.B toupper()
+built-in functions (from AT&T); and the \*(AN C conversion specifications in
+.B printf
+(done first in AT&T's version).
.SH GNU EXTENSIONS
.I Gawk
-has some extensions to System V
+has some extensions to \*(PX
.IR awk .
They are described in this section. All the extensions described here
-can be disabled by compiling
-.I gawk
-with
-.BR \-DSTRICT ,
-or by invoking
+can be disabled by
+invoking
.I gawk
with the
-.B \-c
+.B "\-W compat"
option.
-If the underlying operating system supports the
-.B /dev/fd
-directory and corresponding files, then
-.I gawk
-can be compiled with
-.B \-DNO_DEV_FD
-to disable the special filename processing.
.PP
The following features of
.I gawk
are not available in
-System V
+\*(PX
.IR awk .
.RS
-.TP \l'\(bu'
+.TP \w'\(bu'u+1n
+\(bu
+The
+.B \ex
+escape sequence.
+.TP
+\(bu
+The
+.B systime()
+and
+.B strftime()
+functions.
+.TP
\(bu
The special file names available for I/O redirection are not recognized.
-.TP \l'\(bu'
+.TP
\(bu
The
.B IGNORECASE
variable and its side-effects are not available.
-.TP \l'\(bu'
+.TP
+\(bu
+The
+.B FIELDWIDTHS
+variable and fixed width field splitting.
+.TP
\(bu
No path search is performed for files named via the
.B \-f
option. Therefore the
.B AWKPATH
environment variable is not special.
-.TP \l'\(bu'
-\(bu
-The
-.BR \-a ,
-.BR \-e ,
-.BR \-c ,
-.BR \-C ,
-and
-.B \-V
-command line options.
.RE
.PP
The AWK book does not define the return value of the
-.B close
+.B close()
function.
.IR Gawk\^ 's
-.B close
+.B close()
returns the value from
.IR fclose (3),
or
@@ -1394,7 +1562,7 @@ when closing a file or pipe, respectively.
When
.I gawk
is invoked with the
-.B \-c
+.B "\-W compat"
option,
if the
.I fs
@@ -1406,15 +1574,13 @@ will be set to the tab character.
Since this is a rather ugly special case, it is not the default behavior.
.ig
.PP
-The rest of the features described in this section may change at some time in
-the future, or may go away entirely.
-You should not write programs that depend upon them.
-.PP
-.I Gawk
-accepts the following additional options:
+If
+.I gawk
+was compiled for debugging, it will
+accept the following additional options:
.TP
-.B \-D
-Turn on general debugging and turn on
+.B "\-W parsedebug"
+Turn on
.IR yacc (1)
or
.IR bison (1)
@@ -1423,26 +1589,37 @@ This option should only be of interest to the
.I gawk
maintainers, and may not even be compiled into
.IR gawk .
-.TP
-.B \-d
-Turn on general debugging and print the
-.I gawk
-internal tree as the program is executed.
-This option should only be of interest to the
-.I gawk
-maintainers, and may not even be compiled into
-.IR gawk .
..
.SH BUGS
The
.B \-F
option is not necessary given the command line variable assignment feature;
it remains only for backwards compatibility.
+.SH VERSION INFORMATION
+This man page documents
+.IR gawk ,
+version 2.13.
+.PP
+For the 2.13 version of
+.IR gawk ,
+the
+.BR \-c ,
+.BR \-V ,
+.BR \-C ,
+.ig
+.BR \-D ,
+..
+.BR \-a ,
+and
+.B \-e
+options of the 2.11 version are recognized. However,
+.I gawk
+will print a warning message,
+and these options will go away in the 2.14 version.
.PP
-There are now too many options.
-Fortunately, most of them are rarely needed.
+The 2.12 version was a development version that was not officially released.
.SH AUTHORS
-The original version of \s-1UNIX\s+1
+The original version of \*(UX
.I awk
was designed and implemented by Alfred Aho,
Peter Weinberger, and Brian Kernighan of AT&T Bell Labs. Brian Kernighan
@@ -1453,12 +1630,12 @@ of the Free Software Foundation, wrote
.IR gawk ,
to be compatible with the original version of
.I awk
-distributed in Seventh Edition \s-1UNIX\s+1.
+distributed in Seventh Edition \*(UX.
John Woods contributed a number of bug fixes.
David Trueman of Dalhousie University, with contributions
-from Arnold Robbins at Emory University, made
+from Arnold Robbins at Emory University and AudioFAX, made
.I gawk
-compatible with the new version of \s-1UNIX\s+1
+compatible with the new version of \*(UX
.IR awk .
.SH ACKNOWLEDGEMENTS
Brian Kernighan of Bell Labs
diff --git a/gawk.texinfo b/gawk.texinfo
index 84ba0da5..fda27951 100644
--- a/gawk.texinfo
+++ b/gawk.texinfo
@@ -3,8 +3,15 @@
@setfilename gawk-info
@settitle The GAWK Manual
@c %**end of header (This is for running Texinfo on a region.)
+
+@ifinfo
+@synindex fn cp
+@synindex vr cp
+@end ifinfo
+@iftex
@syncodeindex fn cp
@syncodeindex vr cp
+@end iftex
@iftex
@finalout
@@ -54,7 +61,7 @@ by the Foundation.
@center Paul H. Rubin
@center Richard Stallman
@sp 2
-@center Edition 0.11 Beta
+@center Edition 0.12 Beta
@sp 2
@center October 1989
@@ -66,7 +73,7 @@ by the Foundation.
Copyright @copyright{} 1989 Free Software Foundation, Inc.
@sp 2
-This is Edition 0.11 Beta of @cite{The GAWK Manual}, @*
+This is Edition 0.12 Beta of @cite{The GAWK Manual}, @*
for the 2.11.1 version of the GNU implementation @*
of AWK.
@@ -3512,9 +3519,9 @@ awk '! /foo/' BBS-list
@end example
Note that boolean patterns are a special case of expression patterns
-(@pxref{Expression Patterns}); they are expressions that use the boolean
-operators. For complete information on the boolean operators, see
-@ref{Boolean Ops}.
+(@pxref{Expression Patterns}); they are expressions that use the
+boolean operators. @xref{Boolean Ops}, for complete information on
+the boolean operators.
The subpatterns of a boolean pattern can be constant regular
expressions, comparisons, or any other @code{gawk} expressions. Range
@@ -3657,7 +3664,7 @@ action for these rules since there is no current record when they run.
An @code{awk} @dfn{program} or @dfn{script} consists of a series of
@dfn{rules} and function definitions, interspersed. (Functions are
-described later; see @ref{User-defined}.)
+described later. @xref{User-defined}.)
A rule contains a pattern and an @dfn{action}, either of which may be
omitted. The purpose of the action is to tell @code{awk} what to do
@@ -5500,7 +5507,7 @@ function @code{length}.
@example
# Record a 1 for each word that is used at least once.
@{
- for (i = 0; i < NF; i++)
+ for (i = 1; i <= NF; i++)
used[$i] = 1
@}
@@ -5857,6 +5864,15 @@ numbers that are truly unpredictable.
The return value of @code{srand} is the previous seed. This makes it
easy to keep track of the seeds for use in consistently reproducing
sequences of random numbers.
+
+@item time()
+The function @code{time} (not in all versions of @code{awk}) returns the
+current time in seconds since January 1, 1970.
+
+@item ctime(@var{then})
+The function @code{ctime} (not in all versions of @code{awk}) takes an numeric
+argument in seconds and returns a string representing the corresponding date,
+suitable for printing or further processing.
@end table
@node String Functions, I/O Functions, Numeric Functions, Built-in
diff --git a/io.c b/io.c
index a7ef2d61..02852f15 100644
--- a/io.c
+++ b/io.c
@@ -3,7 +3,7 @@
*/
/*
- * Copyright (C) 1986, 1988, 1989 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@@ -24,43 +24,45 @@
*/
#include "awk.h"
+
#ifndef O_RDONLY
#include <fcntl.h>
#endif
-#include <signal.h>
-extern FILE *popen();
+#ifndef atarist
+#define INVALID_HANDLE (-1)
+#else
+#define INVALID_HANDLE (__SMALLEST_VALID_HANDLE - 1)
+#endif
-static void do_file();
-static IOBUF *nextfile();
-static int get_a_record();
-static int iop_close();
-static IOBUF *iop_alloc();
-static void close_one();
-static int close_redir();
-static IOBUF *gawk_popen();
-static int gawk_pclose();
+static IOBUF *nextfile P((void));
+static int inrec P((IOBUF *iop, int getline_redirect));
+static int iop_close P((IOBUF *iop));
+struct redirect *redirect P((NODE *tree, int *errflg));
+static void close_one P((void));
+static int close_redir P((struct redirect *rp));
+#if (!defined(MSDOS)) && (!defined(atarist))
+static int wait_any P((int interesting));
+#endif
+static IOBUF *gawk_popen P((char *cmd, struct redirect *rp));
+static int gawk_pclose P((struct redirect *rp));
+static int do_pathopen P((char *file));
static struct redirect *red_head = NULL;
-static int getline_redirect = 0; /* "getline <file" being executed */
+static IOBUF *curfile = NULL;
-extern char *line_buf;
extern int output_is_tty;
extern NODE *ARGC_node;
extern NODE *ARGV_node;
extern NODE **fields_arr;
-int field_num;
-
static IOBUF *
nextfile()
{
static int i = 1;
static int files = 0;
- static IOBUF *curfile = NULL;
char *arg;
- char *cp;
- int fd = -1;
+ int fd = INVALID_HANDLE;
if (curfile != NULL && curfile->cnt != EOF)
return curfile;
@@ -68,27 +70,18 @@ nextfile()
arg = (*assoc_lookup(ARGV_node, tmp_number((AWKNUM) i)))->stptr;
if (*arg == '\0')
continue;
- cp = strchr(arg, '=');
- if (cp != NULL) {
- *cp++ = '\0';
- variable(arg)->var_value = make_string(cp, strlen(cp));
- *--cp = '='; /* restore original text of ARGV */
- } else {
+ if (!arg_assign(arg)) {
files++;
- if (STREQ(arg, "-"))
- fd = 0;
- else
- fd = devopen(arg, "r");
- if (fd == -1)
+ fd = devopen(arg, "r");
+ if (fd == INVALID_HANDLE)
fatal("cannot open file `%s' for reading (%s)",
arg, strerror(errno));
/* NOTREACHED */
/* This is a kludge. */
- deref = FILENAME_node->var_value;
- do_deref();
+ unref(FILENAME_node->var_value);
FILENAME_node->var_value =
make_string(arg, strlen(arg));
- FNR_node->var_value->numbr = 0.0;
+ FNR = 0;
i++;
break;
}
@@ -100,129 +93,88 @@ nextfile()
/* FNR is init'ed to 0 */
fd = 0;
}
- if (fd == -1)
+ if (fd == INVALID_HANDLE)
return NULL;
return curfile = iop_alloc(fd);
}
-static IOBUF *
-iop_alloc(fd)
-int fd;
-{
- IOBUF *iop;
- struct stat stb;
-
- /*
- * System V doesn't have the file system block size in the
- * stat structure. So we have to make some sort of reasonable
- * guess. We use stdio's BUFSIZ, since that is what it was
- * meant for in the first place.
- */
-#ifdef BLKSIZE_MISSING
-#define DEFBLKSIZE BUFSIZ
-#else
-#define DEFBLKSIZE (stb.st_blksize ? stb.st_blksize : BUFSIZ)
-#endif
-
- if (fd == -1)
- return NULL;
- emalloc(iop, IOBUF *, sizeof(IOBUF), "nextfile");
- iop->flag = 0;
- if (isatty(fd)) {
- iop->flag |= IOP_IS_TTY;
- iop->size = BUFSIZ;
- } else if (fstat(fd, &stb) == -1)
- fatal("can't stat fd %d (%s)", fd, strerror(errno));
- else if (lseek(fd, 0L, 0) == -1)
- iop->size = DEFBLKSIZE;
- else
- iop->size = (stb.st_size < DEFBLKSIZE ?
- stb.st_size+1 : DEFBLKSIZE);
- errno = 0;
- iop->fd = fd;
- emalloc(iop->buf, char *, iop->size, "nextfile");
- iop->off = iop->buf;
- iop->cnt = 0;
- iop->secsiz = iop->size < BUFSIZ ? iop->size : BUFSIZ;
- emalloc(iop->secbuf, char *, iop->secsiz, "nextfile");
- return iop;
-}
-
void
-do_input()
+set_FNR()
{
- IOBUF *iop;
- extern int exiting;
-
- while ((iop = nextfile()) != NULL) {
- do_file(iop);
- if (exiting)
- break;
- }
+ FNR = (int) FNR_node->var_value->numbr;
}
-static int
-iop_close(iop)
-IOBUF *iop;
+void
+set_NR()
{
- int ret;
-
- ret = close(iop->fd);
- if (ret == -1)
- warning("close of fd %d failed (%s)", iop->fd, strerror(errno));
- free(iop->buf);
- free(iop->secbuf);
- free((char *)iop);
- return ret == -1 ? 1 : 0;
+ NR = (int) NR_node->var_value->numbr;
}
/*
* This reads in a record from the input file
*/
static int
-inrec(iop)
+inrec(iop, getline_redirect)
IOBUF *iop;
+int getline_redirect;
{
- int cnt;
+ char *begin;
+ register int cnt;
int retval = 0;
- cnt = get_a_record(&line_buf, iop);
+ cnt = get_a_record(&begin, iop, *RS);
if (cnt == EOF) {
cnt = 0;
retval = 1;
- } else {
- if (!getline_redirect) {
- assign_number(&NR_node->var_value,
- NR_node->var_value->numbr + 1.0);
- assign_number(&FNR_node->var_value,
- FNR_node->var_value->numbr + 1.0);
- }
+ } else if (!getline_redirect) {
+ NR += 1;
+ FNR += 1;
}
- set_record(line_buf, cnt);
+ set_record(begin, cnt, 1);
return retval;
}
-static void
-do_file(iop)
+static int
+iop_close(iop)
IOBUF *iop;
{
- /* This is where it spends all its time. The infamous MAIN LOOP */
- if (inrec(iop) == 0)
- while (interpret(expression_value) && inrec(iop) == 0)
- ;
- (void) iop_close(iop);
+ int ret;
+
+ if (iop == NULL)
+ return 0;
+ errno = 0;
+
+ /* Work around bug in UNICOS popen, but it shouldn't hurt elsewhere */
+ if (iop->fd < 3)
+ ret = 0;
+ else
+ ret = close(iop->fd);
+ if (ret == -1)
+ warning("close of fd %d failed (%s)", iop->fd, strerror(errno));
+ free(iop->buf);
+ free(iop->secbuf);
+ if (iop == curfile)
+ curfile = NULL; /* kludge -- gotta do better */
+ free((char *)iop);
+ return ret == -1 ? 1 : 0;
}
-int
-get_rs()
+void
+do_input()
{
- register NODE *tmp;
+ IOBUF *iop;
+ extern int exiting;
- tmp = force_string(RS_node->var_value);
- if (tmp->stlen == 0)
- return 0;
- return *(tmp->stptr);
+ while ((iop = nextfile()) != NULL) {
+ if (inrec(iop, 0) == 0)
+ while (interpret(expression_value) && inrec(iop, 0) == 0)
+ ;
+ (void) iop_close(iop);
+ iop = NULL;
+ if (exiting)
+ break;
+ }
}
/* Redirection for printf and print commands */
@@ -263,7 +215,8 @@ int *errflg;
tmp = force_string(tree_eval(tree->subnode));
str = tmp->stptr;
for (rp = red_head; rp != NULL; rp = rp->next)
- if (STREQ(rp->value, str)
+ if (strlen(rp->value) == tmp->stlen
+ && STREQN(rp->value, str, tmp->stlen)
&& ((rp->flag & ~RED_NOBUF) == tflag
|| (outflag
&& (rp->flag & (RED_FILE|RED_WRITE)) == outflag)))
@@ -272,12 +225,14 @@ int *errflg;
emalloc(rp, struct redirect *, sizeof(struct redirect),
"redirect");
emalloc(str, char *, tmp->stlen+1, "redirect");
- memcpy(str, tmp->stptr, tmp->stlen+1);
+ memcpy(str, tmp->stptr, tmp->stlen);
+ str[tmp->stlen] = '\0';
rp->value = str;
rp->flag = tflag;
- rp->offset = 0;
rp->fp = NULL;
rp->iop = NULL;
+ rp->pid = 0; /* unlikely that we're worried about init */
+ rp->status = 0;
/* maintain list in most-recently-used first order */
if (red_head)
red_head->prev = rp;
@@ -291,6 +246,8 @@ int *errflg;
switch (tree->type) {
case Node_redirect_output:
mode = "w";
+ if (rp->flag & RED_USED)
+ mode = "a";
break;
case Node_redirect_append:
mode = "a";
@@ -316,15 +273,26 @@ int *errflg;
}
if (mode != NULL) {
fd = devopen(str, mode);
- if (fd != -1) {
- rp->fp = fdopen(fd, mode);
+ if (fd > INVALID_HANDLE) {
+ if (fd == fileno(stdin))
+ rp->fp = stdin;
+ else if (fd == fileno(stdout))
+ rp->fp = stdout;
+ else if (fd == fileno(stderr))
+ rp->fp = stderr;
+ else
+ rp->fp = fdopen(fd, mode);
if (isatty(fd))
rp->flag |= RED_NOBUF;
}
}
if (rp->fp == NULL && rp->iop == NULL) {
/* too many files open -- close one and try again */
+#ifdef atarist
+ if (errno == EMFILE)
+#else
if (errno == ENFILE || errno == EMFILE)
+#endif
close_one();
else {
/*
@@ -341,15 +309,13 @@ int *errflg;
|| tree->type == Node_redirect_append)
fatal("can't redirect %s `%s' (%s)",
direction, str, strerror(errno));
- else
+ else {
+ free_temp(tmp);
return NULL;
+ }
}
}
}
- if (rp->offset != 0) /* this file was previously open */
- if (fseek(rp->fp, rp->offset, 0) == -1)
- fatal("can't seek to %ld on `%s' (%s)",
- rp->offset, str, strerror(errno));
free_temp(tmp);
return rp;
}
@@ -366,7 +332,8 @@ close_one()
/* now work back up through the list */
for (rp = rplast; rp != NULL; rp = rp->prev)
if (rp->fp && (rp->flag & RED_FILE)) {
- rp->offset = ftell(rp->fp);
+ rp->flag |= RED_USED;
+ errno = 0;
if (fclose(rp->fp))
warning("close of \"%s\" failed (%s).",
rp->value, strerror(errno));
@@ -387,14 +354,17 @@ NODE *tree;
tmp = force_string(tree_eval(tree->subnode));
for (rp = red_head; rp != NULL; rp = rp->next) {
- if (STREQ(rp->value, tmp->stptr))
+ if (strlen(rp->value) == tmp->stlen
+ && STREQN(rp->value, tmp->stptr, tmp->stlen))
break;
}
free_temp(tmp);
if (rp == NULL) /* no match */
return tmp_number((AWKNUM) 0.0);
fflush(stdout); /* synchronize regular output */
- return tmp_number((AWKNUM)close_redir(rp));
+ tmp = tmp_number((AWKNUM)close_redir(rp));
+ rp = NULL;
+ return tmp;
}
static int
@@ -403,6 +373,9 @@ register struct redirect *rp;
{
int status = 0;
+ if (rp == NULL)
+ return 0;
+ errno = 0;
if ((rp->flag & (RED_PIPE|RED_WRITE)) == (RED_PIPE|RED_WRITE))
status = pclose(rp->fp);
else if (rp->fp)
@@ -410,9 +383,10 @@ register struct redirect *rp;
else if (rp->iop) {
if (rp->flag & RED_PIPE)
status = gawk_pclose(rp);
- else
+ else {
status = iop_close(rp->iop);
-
+ rp->iop = NULL;
+ }
}
/* SVR4 awk checks and warns about status of close */
if (status)
@@ -449,13 +423,15 @@ flush_io ()
}
for (rp = red_head; rp != NULL; rp = rp->next)
/* flush both files and pipes, what the heck */
- if ((rp->flag & RED_WRITE) && rp->fp != NULL)
+ if ((rp->flag & RED_WRITE) && rp->fp != NULL) {
+ errno = 0;
if (fflush(rp->fp)) {
warning("%s flush of \"%s\" failed (%s).",
(rp->flag & RED_PIPE) ? "pipe" :
"file", rp->value, strerror(errno));
status++;
}
+ }
return status;
}
@@ -463,23 +439,29 @@ int
close_io ()
{
register struct redirect *rp;
+ register struct redirect *next;
int status = 0;
- for (rp = red_head; rp != NULL; rp = rp->next)
+ for (rp = red_head; rp != NULL; rp = next) {
+ next = rp->next;
if (close_redir(rp))
status++;
+ rp = NULL;
+ }
return status;
}
/* devopen --- handle /dev/std{in,out,err}, /dev/fd/N, regular files */
+
int
devopen (name, mode)
char *name, *mode;
{
- int openfd = -1;
+ int openfd = INVALID_HANDLE;
FILE *fdopen ();
- char *cp;
+ char *cp, *ptr;
int flag = 0;
+ struct stat buf;
switch(mode[0]) {
case 'r':
@@ -497,37 +479,95 @@ char *name, *mode;
cant_happen();
}
-#if defined(STRICT) || defined(NO_DEV_FD)
- return (open (name, flag, 0666));
-#else
- if (strict)
- return (open (name, flag, 0666));
+#ifdef VMS
+ if ((openfd = vms_devopen(name)) >= 0)
+ return openfd;
+# define strcmp strcasecmp /* VMS filenames are not case sensitive; */
+# define strncmp strncasecmp /* strncmp() is used by STREQN() below. */
+#endif /*VMS*/
- if (!STREQN (name, "/dev/", 5))
- return (open (name, flag, 0666));
- else
+ if (STREQ(name, "-"))
+ openfd = fileno(stdin);
+ else if (STREQN(name, "/dev/", 5) && stat(name, &buf) == -1) {
cp = name + 5;
- /* XXX - first three tests ignore mode */
- if (STREQ(cp, "stdin"))
- return (0);
- else if (STREQ(cp, "stdout"))
- return (1);
- else if (STREQ(cp, "stderr"))
- return (2);
- else if (STREQN(cp, "fd/", 3)) {
- cp += 3;
- if (sscanf (cp, "%d", & openfd) == 1 && openfd >= 0)
- /* got something */
- return openfd;
- else
- return -1;
- } else
- return (open (name, flag, 0666));
+ /* XXX - first three tests ignore mode */
+ if (STREQ(cp, "stdin") && (flag & O_RDONLY))
+ openfd = fileno(stdin);
+ else if (STREQ(cp, "stdout") && (flag & O_WRONLY))
+ openfd = fileno(stdout);
+ else if (STREQ(cp, "stderr") && (flag & O_WRONLY))
+ openfd = fileno(stderr);
+ else if (STREQN(cp, "fd/", 3)) {
+ cp += 3;
+ openfd = strtol(cp, &ptr, 10);
+ if (openfd <= INVALID_HANDLE || ptr == cp)
+ openfd = INVALID_HANDLE;
+#ifdef VMS
+ } else if (STREQ(cp, "null")) {
+ name = "NL:"; /* "/dev/null" => "NL:" */
+ } else if (STREQ(cp, "tty")) {
+ name = "TT:"; /* "/dev/tty" => "TT:" */
+# undef strcmp
+# undef strncmp
+#endif /*VMS*/
+ }
+ }
+
+ if (openfd != INVALID_HANDLE)
+ return openfd;
+ else
+ return open(name, flag, 0666);
+}
+
+#if defined(MSDOS) || defined(atarist)
+#define PIPES_SIMULATED
#endif
+
+#ifndef PIPES_SIMULATED
+ /* real pipes */
+static int
+wait_any(interesting)
+int interesting; /* pid of interest, if any */
+{
+ SIGTYPE (*hstat)(), (*istat)(), (*qstat)();
+ int pid;
+ int status = 0;
+ struct redirect *redp;
+ extern int errno;
+
+ hstat = signal(SIGHUP, SIG_IGN);
+ istat = signal(SIGINT, SIG_IGN);
+ qstat = signal(SIGQUIT, SIG_IGN);
+ for (;;) {
+ pid = wait(&status);
+ if (interesting && pid == interesting) {
+ break;
+ } else if (pid != -1) {
+ for (redp = red_head; redp != NULL; redp = redp->next)
+ if (pid == redp->pid) {
+ redp->pid = -1;
+ redp->status = status;
+ if (redp->fp) {
+ pclose(redp->fp);
+ redp->fp = 0;
+ }
+ if (redp->iop) {
+ (void) iop_close(redp->iop);
+ redp->iop = 0;
+ }
+ break;
+ }
+ }
+ if (pid == -1 && errno == ECHILD)
+ break;
+ }
+ signal(SIGHUP, hstat);
+ signal(SIGINT, istat);
+ signal(SIGQUIT, qstat);
+ return(status);
}
-#ifndef MSDOS
static IOBUF *
gawk_popen(cmd, rp)
char *cmd;
@@ -536,21 +576,25 @@ struct redirect *rp;
int p[2];
register int pid;
- rp->pid = -1;
- rp->iop = NULL;
+ (void) wait_any(0); /* wait for outstanding processes */
if (pipe(p) < 0)
- return NULL;
+ fatal("cannot open pipe \"%s\" (%s)", cmd, strerror(errno));
if ((pid = fork()) == 0) {
- close(p[0]);
- dup2(p[1], 1);
- close(p[1]);
+ if (close(1) == -1)
+ fatal("close of stdout in child failed (%s)",
+ strerror(errno));
+ if (dup(p[1]) != 1)
+ fatal("dup of pipe failed (%s)", strerror(errno));
+ if (close(p[0]) == -1 || close(p[1]) == -1)
+ fatal("close of pipe failed (%s)", strerror(errno));
execl("/bin/sh", "sh", "-c", cmd, 0);
_exit(127);
}
if (pid == -1)
- return NULL;
+ fatal("cannot fork for \"%s\" (%s)", cmd, strerror(errno));
rp->pid = pid;
- close(p[1]);
+ if (close(p[1]) == -1)
+ fatal("close of pipe failed (%s)", strerror(errno));
return (rp->iop = iop_alloc(p[0]));
}
@@ -558,40 +602,47 @@ static int
gawk_pclose(rp)
struct redirect *rp;
{
- SIGTYPE (*hstat)(), (*istat)(), (*qstat)();
- int pid;
- int status;
- struct redirect *redp;
+ (void) iop_close(rp->iop);
+ rp->iop = NULL;
- iop_close(rp->iop);
+ /* process previously found, return stored status */
if (rp->pid == -1)
- return rp->status;
- hstat = signal(SIGHUP, SIG_IGN);
- istat = signal(SIGINT, SIG_IGN);
- qstat = signal(SIGQUIT, SIG_IGN);
- for (;;) {
- pid = wait(&status);
- if (pid == -1 && errno == ECHILD)
- break;
- else if (pid == rp->pid) {
- rp->pid = -1;
- rp->status = status;
- break;
- } else {
- for (redp = red_head; redp != NULL; redp = redp->next)
- if (pid == redp->pid) {
- redp->pid = -1;
- redp->status = status;
- break;
- }
- }
- }
- signal(SIGHUP, hstat);
- signal(SIGINT, istat);
- signal(SIGQUIT, qstat);
- return(rp->status);
+ return (rp->status >> 8) & 0xFF;
+ rp->status = wait_any(rp->pid);
+ rp->pid = -1;
+ return (rp->status >> 8) & 0xFF;
}
-#else
+
+#else /* PIPES_SUMULATED */
+ /* use temporary file rather than pipe */
+
+#ifdef VMS
+static IOBUF *
+gawk_popen(cmd, rp)
+char *cmd;
+struct redirect *rp;
+{
+ FILE *current;
+
+ if ((current = popen(cmd, "r")) == NULL)
+ return NULL;
+ return (rp->iop = iop_alloc(fileno(current)));
+}
+
+static int
+gawk_pclose(rp)
+struct redirect *rp;
+{
+ int rval, aval, fd = rp->iop->fd;
+ FILE *kludge = fdopen(fd, "r"); /* pclose needs FILE* w/ right fileno */
+
+ rp->iop->fd = dup(fd); /* kludge to allow close() + pclose() */
+ rval = iop_close(rp->iop);
+ aval = pclose(kludge);
+ return (rval < 0 ? rval : aval);
+}
+#else /* VMS */
+
static
struct {
char *command;
@@ -613,7 +664,7 @@ struct redirect *rp;
return NULL;
sprintf(cmdbuf,"%s > %s", cmd, name);
system(cmdbuf);
- if ((current = open(name,O_RDONLY)) == -1)
+ if ((current = open(name,O_RDONLY)) == INVALID_HANDLE)
return NULL;
pipes[current].name = name;
pipes[current].command = strdup(cmd);
@@ -628,6 +679,7 @@ struct redirect *rp;
int rval;
rval = iop_close(rp->iop);
+ rp->iop = NULL;
/* check for an open file */
if (pipes[cur].name == NULL)
@@ -638,134 +690,20 @@ struct redirect *rp;
free(pipes[cur].command);
return rval;
}
-#endif
+#endif /* VMS */
-#define DO_END_OF_BUF len = bp - iop->off;\
- used = last - start;\
- while (len + used > iop->secsiz) {\
- iop->secsiz *= 2;\
- erealloc(iop->secbuf,char *,iop->secsiz,"get");\
- }\
- last = iop->secbuf + used;\
- start = iop->secbuf;\
- memcpy(last, iop->off, len);\
- last += len;\
- iop->cnt = read(iop->fd, iop->buf, iop->size);\
- if (iop->cnt < 0)\
- return iop->cnt;\
- end_data = iop->buf + iop->cnt;\
- iop->off = bp = iop->buf;
-
-#define DO_END_OF_DATA iop->cnt = read(iop->fd, end_data, end_buf - end_data);\
- if (iop->cnt < 0)\
- return iop->cnt;\
- end_data += iop->cnt;\
- if (iop->cnt == 0)\
- break;\
- iop->cnt = end_data - iop->buf;
-
-static int
-get_a_record(res, iop)
-char **res;
-IOBUF *iop;
-{
- register char *end_data;
- register char *end_buf;
- char *start;
- register char *bp;
- register char *last;
- int len, used;
- register char rs = get_rs();
-
- if (iop->cnt < 0)
- return iop->cnt;
- if ((iop->flag & IOP_IS_TTY) && output_is_tty)
- fflush(stdout);
- end_data = iop->buf + iop->cnt;
- if (iop->off >= end_data) {
- iop->cnt = read(iop->fd, iop->buf, iop->size);
- if (iop->cnt <= 0)
- return iop->cnt = EOF;
- end_data = iop->buf + iop->cnt;
- iop->off = iop->buf;
- }
- last = start = bp = iop->off;
- end_buf = iop->buf + iop->size;
- if (rs == 0) {
- while (!(*bp == '\n' && bp != iop->buf && bp[-1] == '\n')) {
- if (++bp == end_buf) {
- DO_END_OF_BUF
- }
- if (bp == end_data) {
- DO_END_OF_DATA
- }
- }
- if (*bp == '\n' && bp != iop->off && bp[-1] == '\n') {
- int tmp = 0;
-
- /* allow for more than two newlines */
- while (*bp == '\n') {
- tmp++;
- if (++bp == end_buf) {
- DO_END_OF_BUF
- }
- if (bp == end_data) {
- DO_END_OF_DATA
- }
- }
- iop->off = bp;
- bp -= 1 + tmp;
- } else if (bp != iop->buf && bp[-1] != '\n') {
- warning("record not terminated");
- iop->off = bp + 2;
- } else {
- bp--;
- iop->off = bp + 2;
- }
- } else {
- while (*bp++ != rs) {
- if (bp == end_buf) {
- DO_END_OF_BUF
- }
- if (bp == end_data) {
- DO_END_OF_DATA
- }
- }
- if (*--bp != rs) {
- warning("record not terminated");
- bp++;
- }
- iop->off = bp + 1;
- }
- if (start == iop->secbuf) {
- len = bp - iop->buf;
- if (len > 0) {
- used = last - start;
- while (len + used > iop->secsiz) {
- iop->secsiz *= 2;
- erealloc(iop->secbuf,char *,iop->secsiz,"get2");
- }
- last = iop->secbuf + used;
- start = iop->secbuf;
- memcpy(last, iop->buf, len);
- last += len;
- }
- } else
- last = bp;
- *last = '\0';
- *res = start;
- return last - start;
-}
+#endif /* PIPES_SUMULATED */
NODE *
do_getline(tree)
NODE *tree;
{
- struct redirect *rp;
+ struct redirect *rp = NULL;
IOBUF *iop;
int cnt;
NODE **lhs;
int redir_error = 0;
+ int getline_redirect = 0;
if (tree->rnode == NULL) { /* no redirection */
iop = nextfile();
@@ -779,33 +717,136 @@ NODE *tree;
getline_redirect++;
}
if (tree->lnode == NULL) { /* no optional var. -- read in $0 */
- if (inrec(iop) != 0) {
- getline_redirect = 0;
+ if (inrec(iop, getline_redirect) != 0)
return tmp_number((AWKNUM) 0.0);
- }
} else { /* read in a named variable */
char *s = NULL;
+ Func_ptr after_assign = NULL;
- lhs = get_lhs(tree->lnode, 1);
- cnt = get_a_record(&s, iop);
+ lhs = get_lhs(tree->lnode, &after_assign);
+ cnt = get_a_record(&s, iop, *RS);
if (!getline_redirect) {
- assign_number(&NR_node->var_value,
- NR_node->var_value->numbr + 1.0);
- assign_number(&FNR_node->var_value,
- FNR_node->var_value->numbr + 1.0);
+ NR += 1;
+ FNR += 1;
}
if (cnt == EOF) {
- getline_redirect = 0;
- free(s);
+ if (rp) {
+ (void) iop_close(iop);
+ rp->iop = NULL;
+ }
return tmp_number((AWKNUM) 0.0);
}
+ unref(*lhs);
*lhs = make_string(s, strlen(s));
- do_deref();
/* we may have to regenerate $0 here! */
- if (field_num == 0)
- set_record(fields_arr[0]->stptr, fields_arr[0]->stlen);
- field_num = -1;
+ if (after_assign)
+ (*after_assign)();
}
- getline_redirect = 0;
return tmp_number((AWKNUM) 1.0);
}
+
+int
+pathopen (file)
+char *file;
+{
+ int fd = do_pathopen(file);
+
+#ifdef DEFAULT_FILETYPE
+ if (!strict && fd <= INVALID_HANDLE) {
+ char *file_awk;
+ int save = errno;
+#ifdef VMS
+ int vms_save = vaxc$errno;
+#endif
+
+ /* append ".awk" and try again */
+ emalloc(file_awk, char *, strlen(file) +
+ sizeof(DEFAULT_FILETYPE) + 1, "pathopen");
+ strcat(strcpy(file_awk, file), DEFAULT_FILETYPE);
+ fd = do_pathopen(file_awk);
+ free(file_awk);
+ if (fd <= INVALID_HANDLE) {
+ errno = save;
+#ifdef VMS
+ vaxc$errno = vms_save;
+#endif
+ }
+ }
+#endif /*DEFAULT_FILETYPE*/
+
+ return fd;
+}
+
+static int
+do_pathopen (file)
+char *file;
+{
+ static char *savepath = DEFPATH; /* defined in config.h */
+ static int first = 1;
+ char *awkpath, *cp;
+ char trypath[BUFSIZ];
+ int fd;
+
+ if (STREQ(file, "-"))
+ return (0);
+
+ if (strict)
+ return (open (file, 0));
+
+ if (first) {
+ first = 0;
+ if ((awkpath = getenv ("AWKPATH")) != NULL && *awkpath)
+ savepath = awkpath; /* used for restarting */
+ }
+ awkpath = savepath;
+
+ /* some kind of path name, no search */
+#ifdef VMS /* (strchr not equal implies either or both not NULL) */
+ if (strchr(file, ':') != strchr(file, ']')
+ || strchr(file, '>') != strchr(file, '/'))
+#else /*!VMS*/
+#ifdef MSDOS
+ if (strchr(file, '/') != strchr(file, '\\')
+ || strchr(file, ':') != NULL)
+#else
+ if (strchr(file, '/') != NULL)
+#endif /*MSDOS*/
+#endif /*VMS*/
+ return (devopen (file, "r"));
+
+ do {
+ trypath[0] = '\0';
+ /* this should take into account limits on size of trypath */
+ for (cp = trypath; *awkpath && *awkpath != ENVSEP; )
+ *cp++ = *awkpath++;
+
+ if (cp != trypath) { /* nun-null element in path */
+ /* add directory punctuation only if needed */
+#ifdef VMS
+ if (strchr(":]>/", *(cp-1)) == NULL)
+#else
+#ifdef MSDOS
+ if (strchr(":\\/", *(cp-1)) == NULL)
+#else
+ if (*(cp-1) != '/')
+#endif
+#endif
+ *cp++ = '/';
+ /* append filename */
+ strcpy (cp, file);
+ } else
+ strcpy (trypath, file);
+ if ((fd = devopen (trypath, "r")) >= 0)
+ return (fd);
+
+ /* no luck, keep going */
+ if(*awkpath == ENVSEP && awkpath[1] != '\0')
+ awkpath++; /* skip colon */
+ } while (*awkpath);
+ /*
+ * You might have one of the awk
+ * paths defined, WITHOUT the current working directory in it.
+ * Therefore try to open the file in the current directory.
+ */
+ return (devopen(file, "r"));
+}
diff --git a/iop.c b/iop.c
new file mode 100644
index 00000000..dae43f42
--- /dev/null
+++ b/iop.c
@@ -0,0 +1,237 @@
+/*
+ * iop.c - do i/o related things.
+ */
+
+/*
+ * Copyright (C) 1986, 1988, 1989, 1991 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 1, or (at your option)
+ * any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GAWK; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "awk.h"
+
+#ifndef atarist
+#define INVALID_HANDLE (-1)
+#else
+#include <stddef.h>
+#include <fcntl.h>
+#define INVALID_HANDLE (__SMALLEST_VALID_HANDLE - 1)
+#endif /* atarist */
+
+
+#ifdef TEST
+int bufsize = 8192;
+#endif
+
+int
+optimal_bufsize(fd)
+int fd;
+{
+#ifdef VMS
+/* don't even bother trying [fstat() fails across DECnet] */
+ return BUFSIZ;
+#else
+ struct stat stb;
+
+ /*
+ * System V doesn't have the file system block size in the
+ * stat structure. So we have to make some sort of reasonable
+ * guess. We use stdio's BUFSIZ, since that is what it was
+ * meant for in the first place.
+ */
+#ifdef BLKSIZE_MISSING
+#define DEFBLKSIZE BUFSIZ
+#else
+#define DEFBLKSIZE (stb.st_blksize ? stb.st_blksize : BUFSIZ)
+#endif
+
+#ifdef TEST
+ return bufsize;
+#endif
+#ifndef atarist
+ if (isatty(fd))
+#else
+ /*
+ * On ST redirected stdin does not have a name attached
+ * (this could be hard to do to) and fstat would fail
+ */
+ if (0 == fd || isatty(fd))
+#endif /*atarist */
+ return BUFSIZ;
+ if (fstat(fd, &stb) == -1)
+ fatal("can't stat fd %d (%s)", fd, strerror(errno));
+ if (lseek(fd, 0L, 0) == -1)
+ return DEFBLKSIZE;
+ return (stb.st_size < DEFBLKSIZE ? stb.st_size : DEFBLKSIZE);
+#endif /*! VMS */
+}
+
+IOBUF *
+iop_alloc(fd)
+int fd;
+{
+ IOBUF *iop;
+
+ if (fd == INVALID_HANDLE)
+ return NULL;
+ emalloc(iop, IOBUF *, sizeof(IOBUF), "iop_alloc");
+ iop->flag = 0;
+ if (isatty(fd))
+ iop->flag |= IOP_IS_TTY;
+ iop->size = optimal_bufsize(fd);
+ errno = 0;
+ iop->fd = fd;
+ emalloc(iop->buf, char *, iop->size + 2, "iop_alloc");
+ iop->end = iop->off = iop->buf;
+ iop->secsiz = iop->size < BUFSIZ ? iop->size : BUFSIZ;
+ emalloc(iop->secbuf, char *, iop->secsiz+2, "iop_alloc");
+ iop->cnt = -1;
+ return iop;
+}
+
+int
+get_a_record(out, iop, rs)
+char **out;
+IOBUF *iop;
+register int rs;
+{
+ register char *bp = iop->off;
+ register char *end_data = iop->end; /* end of current data read */
+ char *end_buf = iop->buf + iop->size; /* end of input buffer */
+ char *start = iop->off; /* beginning of record */
+ char *offset = iop->secbuf; /* end of data in secbuf */
+ size_t size;
+
+ if (iop->cnt == 0)
+ return EOF;
+
+ /* set up sentinels */
+ if (rs == 0) {
+ *end_data = *(end_data+1) = '\n';
+ *end_buf = *(end_buf+1) = '\n';
+ } else
+ *end_data = *end_buf = rs;
+
+ for (;;) { /* break on end of record, read error or EOF */
+
+ if (bp == end_data) {
+ if (bp == end_buf) { /* record spans buffer end */
+#ifdef atarist
+#define P_DIFF ptrdiff_t
+#else
+#define P_DIFF int
+#endif
+#define COPY_TO_SECBUF { \
+ P_DIFF oldlen = offset - iop->secbuf; \
+ P_DIFF newlen = bp - start; \
+ \
+ if (iop->secsiz < oldlen + newlen) { \
+ erealloc(iop->secbuf, char *, \
+ oldlen+newlen, "get_record"); \
+ offset = iop->secbuf + oldlen; \
+ } \
+ memcpy(offset, start, newlen); \
+ offset += newlen; \
+ }
+ COPY_TO_SECBUF
+ start = bp = iop->buf;
+ size = iop->size;
+ } else
+ size = end_buf - bp;
+ iop->cnt = read(iop->fd, bp, size);
+ if (iop->cnt == -1)
+ fatal("error reading input");
+ else if (iop->cnt == 0) {
+ break;
+ } else {
+ end_data = bp + iop->cnt;
+ if (rs == 0 && *bp == '\n'
+ && offset > iop->secbuf
+ && *(offset-1) == '\n') {
+ bp++;
+ break;
+ }
+ if (rs == 0) {
+ *end_data = *(end_data+1) = '\n';
+ *end_buf = *(end_buf+1) = '\n';
+ } else
+ *end_data = rs;
+ }
+ }
+ if (rs == 0) {
+ for (;;) {
+ if (*bp++ == '\n' && *bp == '\n') {
+ bp++;
+ break;
+ }
+ }
+ } else
+ while (*bp++ != rs)
+ ;
+ if (bp <= end_data) /* end of record */
+ break;
+ bp = end_data;
+ }
+ if (offset == iop->secbuf && start == bp && iop->cnt == 0) {
+ *out = start;
+ return EOF;
+ }
+ iop->off = bp;
+ iop->end = end_data;
+ if (offset != iop->secbuf) {
+ if (start != bp)
+ COPY_TO_SECBUF
+ start = iop->secbuf;
+ bp = offset;
+ }
+ if (rs == 0) {
+ if (*--bp == '\n') {
+ *bp = '\0';
+ if (*--bp == '\n')
+ *bp = '\0';
+ else
+ bp++;
+ } else
+ bp++;
+ } else if (*--bp == rs)
+ ;
+ else
+ bp++;
+ *bp = '\0';
+ *out = start;
+ return bp - start;
+}
+
+#ifdef TEST
+main(argc, argv)
+int argc;
+char *argv[];
+{
+ IOBUF *iop;
+ char *out;
+ int cnt;
+
+ if (argc > 1)
+ bufsize = atoi(argv[1]);
+ iop = iop_alloc(0);
+ while ((cnt = get_a_record(&out, iop, 0)) > 0) {
+ fwrite(out, 1, cnt, stdout);
+ fwrite("\n", 1, 1, stdout);
+ }
+}
+#endif
diff --git a/main.c b/main.c
index cbdc5e2c..22d583dc 100644
--- a/main.c
+++ b/main.c
@@ -3,7 +3,7 @@
*/
/*
- * Copyright (C) 1986, 1988, 1989 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@@ -25,30 +25,36 @@
#include "awk.h"
#include "patchlevel.h"
-#include <signal.h>
-
-extern int yyparse();
-extern void do_input();
-extern int close_io();
-extern void init_fields();
-extern int getopt();
-extern int re_set_syntax();
-extern NODE *node();
-
-static void usage();
-static void set_fs();
-static void init_vars();
-static void init_args();
-static NODE *spc_var();
-static void pre_assign();
-static void copyleft();
+
+static void usage P((void));
+static void copyleft P((void));
+static void cmdline_fs P((char *str));
+static void init_args P((int argc0, int argc, char *argv0, char **argv));
+static void init_vars P((void));
+static void pre_assign P((char *v));
+SIGTYPE catchsig P((int sig, int code));
+static void gawk_option P((char *optstr));
+static void nostalgia P((void));
/* These nodes store all the special variables AWK uses */
NODE *FS_node, *NF_node, *RS_node, *NR_node;
NODE *FILENAME_node, *OFS_node, *ORS_node, *OFMT_node;
+NODE *CONVFMT_node;
NODE *FNR_node, *RLENGTH_node, *RSTART_node, *SUBSEP_node;
NODE *ENVIRON_node, *IGNORECASE_node;
NODE *ARGC_node, *ARGV_node;
+NODE *FIELDWIDTHS_node;
+
+int NF;
+int NR;
+int FNR;
+int IGNORECASE;
+char *FS;
+char *RS;
+char *OFS;
+char *ORS;
+char *OFMT;
+char *CONVFMT;
/*
* The parse tree and field nodes are stored here. Parse_end is a dummy item
@@ -60,7 +66,7 @@ int errcount = 0; /* error counter, used by yyerror() */
NODE *Nnull_string;
/* The name the program was invoked under, for error messages */
-char *myname;
+const char *myname;
/* A block of AWK code to be run before running the program */
NODE *begin_block = 0;
@@ -71,32 +77,35 @@ NODE *end_block = 0;
int exiting = 0; /* Was an "exit" statement executed? */
int exit_val = 0; /* optional exit value */
-#ifdef DEBUG
-/* non-zero means in debugging is enabled. Probably not very useful */
-int debugging = 0;
+#if defined(YYDEBUG) || defined(DEBUG)
extern int yydebug;
#endif
-int tempsource = 0; /* source is in a temp file */
-char **sourcefile = NULL; /* source file name(s) */
+char **srcfiles = NULL; /* source file name(s) */
int numfiles = -1; /* how many source files */
+char *cmdline_src = NULL; /* if prog is on command line */
int strict = 0; /* turn off gnu extensions */
+int do_posix = 0; /* turn off gnu extensions and \x */
+int do_lint = 0; /* provide warnings about questionable stuff */
int output_is_tty = 0; /* control flushing of output */
+extern char *version_string; /* current version, for printing */
+
NODE *expression_value;
/*
* for strict to work, legal options must be first
*
* Unfortunately, -a and -e are orthogonal to -c.
+ *
+ * Note that after 2.13, c,a,e,C,D, and V go away.
*/
-#define EXTENSIONS 8 /* where to clear */
#ifdef DEBUG
-char awk_opts[] = "F:f:v:caeCVdD";
+char awk_opts[] = "F:f:v:W:caeCVD";
#else
-char awk_opts[] = "F:f:v:caeCV";
+char awk_opts[] = "F:f:v:W:caeCV";
#endif
int
@@ -104,45 +113,38 @@ main(argc, argv)
int argc;
char **argv;
{
-#ifdef DEBUG
- /* Print out the parse tree. For debugging */
- register int dotree = 0;
-#endif
- extern char *version_string;
- FILE *fp;
int c;
- extern int opterr, optind;
+ extern int optind;
extern char *optarg;
- extern char *strrchr();
- extern char *tmpnam();
- extern SIGTYPE catchsig();
int i;
- int nostalgia;
-#ifdef somtime_in_the_future
- int regex_mode = RE_SYNTAX_POSIX_EGREP;
-#else
+ int do_nostalgia;
int regex_mode = RE_SYNTAX_AWK;
+
+ (void) signal(SIGFPE, (SIGTYPE (*) P((int))) catchsig);
+ (void) signal(SIGSEGV, (SIGTYPE (*) P((int))) catchsig);
+#ifdef VMS
+ (void) signal(SIGBUS, (SIGTYPE (*) P((int))) catchsig);
#endif
- (void) signal(SIGFPE, catchsig);
- (void) signal(SIGSEGV, catchsig);
+#ifndef VMS
+ myname = basename(argv[0]);
+#else /* VMS */
+ myname = strdup(basename(argv[0]));
+ argv[0] = (char *) myname; /* strip path [prior to getopt()] */
+ vms_arg_fixup(&argc, &argv); /* emulate redirection, expand wildcards */
+#endif
+ if (argc < 2)
+ usage();
+ /* remove sccs gunk */
if (strncmp(version_string, "@(#)", 4) == 0)
version_string += 4;
- myname = strrchr(argv[0], '/');
- if (myname == NULL)
- myname = argv[0];
- else
- myname++;
- if (argc < 2)
- usage();
-
/* initialize the null string */
Nnull_string = make_string("", 0);
Nnull_string->numbr = 0.0;
Nnull_string->type = Node_val;
- Nnull_string->flags = (PERM|STR|NUM|NUMERIC);
+ Nnull_string->flags = (PERM|STR|STRING|NUM|NUMERIC|NUMBER);
/* Set up the special variables */
@@ -153,55 +155,47 @@ char **argv;
init_vars();
/* worst case */
- emalloc(sourcefile, char **, argc * sizeof(char *), "main");
-
-
-#ifdef STRICT /* strict new awk compatibility */
- strict = 1;
- awk_opts[EXTENSIONS] = '\0';
-#endif
+ emalloc(srcfiles, char **, argc * sizeof(char *), "main");
+ srcfiles[0] = NULL;
-#ifndef STRICT
/* undocumented feature, inspired by nostalgia, and a T-shirt */
- nostalgia = 0;
+ do_nostalgia = 0;
for (i = 1; i < argc && argv[i][0] == '-'; i++) {
if (argv[i][1] == '-') /* -- */
break;
- else if (argv[i][1] == 'c') { /* compatibility mode */
- nostalgia = 0;
+ else if (argv[i][1] == 'c') { /* compat not in next release */
+ do_nostalgia = 0;
break;
} else if (STREQ(&argv[i][1], "nostalgia"))
- nostalgia = 1;
+ do_nostalgia = 1;
/* keep looping, in case -c after -nostalgia */
}
- if (nostalgia) {
- fprintf (stderr, "awk: bailing out near line 1\n");
- abort();
+ if (do_nostalgia) {
+ fprintf(stderr, "%s, %s\n",
+ "warning: option -nostalgia will go away in the next release",
+ "use -W nostalgia");
+ nostalgia();
+ /* NOTREACHED */
}
-#endif
-
+
while ((c = getopt (argc, argv, awk_opts)) != EOF) {
switch (c) {
#ifdef DEBUG
- case 'd':
- debugging++;
- dotree++;
- break;
-
case 'D':
- debugging++;
- yydebug = 2;
+ fprintf(stderr,
+"warning: option -D will go away in the next release, use -W parsedebug\n");
+ gawk_option("parsedebug");
break;
#endif
-#ifndef STRICT
case 'c':
- strict = 1;
+ fprintf(stderr,
+ "warning: option -c will go away in the next release, use -W compat\n");
+ gawk_option("compat");
break;
-#endif
case 'F':
- set_fs(optarg);
+ cmdline_fs(optarg);
break;
case 'f':
@@ -210,7 +204,7 @@ char **argv;
* this makes function libraries real easy.
* most of the magic is in the scanner.
*/
- sourcefile[++numfiles] = optarg;
+ srcfiles[++numfiles] = optarg;
break;
case 'v':
@@ -218,20 +212,29 @@ char **argv;
break;
case 'V':
- fprintf(stderr, "%s, patchlevel %d\n",
- version_string, PATCHLEVEL);
+ warning(
+ "option -V will go away in the next release, use -W version");
+ gawk_option("version");
break;
case 'C':
- copyleft();
+ warning(
+ "option -C will go away in the next release, use -W copyright");
+ gawk_option("copyright");
break;
case 'a': /* use old fashioned awk regexps */
- regex_mode = RE_SYNTAX_AWK;
+ warning("option -a will go away in the next release");
+ /*regex_mode = RE_SYNTAX_AWK;*/
break;
- case 'e': /* use egrep style regexps, per Posix */
- regex_mode = RE_SYNTAX_POSIX_EGREP;
+ case 'e': /* use Posix style regexps */
+ warning("option -e will go away in the next release");
+ /*regex_mode = RE_SYNTAX_POSIX_AWK;*/
+ break;
+
+ case 'W': /* gawk specific options */
+ gawk_option(optarg);
break;
case '?':
@@ -244,6 +247,7 @@ char **argv;
/* Tell the regex routines how they should work. . . */
(void) re_set_syntax(regex_mode);
+ regsyntax(regex_mode, 0);
#ifdef DEBUG
setbuf(stdout, (char *) NULL); /* make debugging easier */
@@ -253,41 +257,19 @@ char **argv;
/* No -f option, use next arg */
/* write to temp file and save sourcefile name */
if (numfiles == -1) {
- int i;
-
if (optind > argc - 1) /* no args left */
usage();
- numfiles++;
- i = strlen (argv[optind]);
- if (i == 0) { /* sanity check */
- fprintf(stderr, "%s: empty program text\n", myname);
- usage();
- /* NOTREACHED */
- }
- sourcefile[0] = tmpnam((char *) NULL);
- if ((fp = fopen (sourcefile[0], "w")) == NULL)
- fatal("could not save source prog in temp file (%s)",
- strerror(errno));
- if (fwrite (argv[optind], 1, i, fp) == 0)
- fatal(
- "could not write source program to temp file (%s)",
- strerror(errno));
- if (argv[optind][i-1] != '\n')
- putc ('\n', fp);
- (void) fclose (fp);
- tempsource++;
+ cmdline_src = argv[optind];
optind++;
}
- init_args(optind, argc, myname, argv);
+ srcfiles[++numfiles] = NULL;
+ init_args(optind, argc, (char *) myname, argv);
+ (void) tokexpand();
/* Read in the program */
if (yyparse() || errcount)
exit(1);
-#ifdef DEBUG
- if (dotree)
- print_parse_tree(expression_value);
-#endif
/* Set up the field variables */
init_fields();
@@ -299,9 +281,8 @@ char **argv;
(void) interpret(end_block);
if (close_io() != 0 && exit_val == 0)
exit_val = 1;
- exit(exit_val);
- /* NOTREACHED */
- return exit_val;
+ exit(exit_val); /* more portable */
+ return exit_val; /* to suppress warnings */
}
static void
@@ -309,43 +290,14 @@ usage()
{
char *opt1 = " -f progfile [--]";
char *opt2 = " [--] 'program'";
-#ifdef STRICT
- char *regops = " [-ae] [-F fs] [-v var=val]"
-#else
- char *regops = " [-aecCV] [-F fs] [-v var=val]";
-#endif
+ char *regops = " [-F fs] [-v var=val] [-W gawk-opts]";
fprintf(stderr, "usage: %s%s%s file ...\n %s%s%s file ...\n",
myname, regops, opt1, myname, regops, opt2);
exit(11);
}
-/* Generate compiled regular expressions */
-struct re_pattern_buffer *
-make_regexp(s, ignorecase)
-NODE *s;
-int ignorecase;
-{
- struct re_pattern_buffer *rp;
- char *err;
-
- emalloc(rp, struct re_pattern_buffer *, sizeof(*rp), "make_regexp");
- memset((char *) rp, 0, sizeof(*rp));
- emalloc(rp->buffer, char *, 16, "make_regexp");
- rp->allocated = 16;
- emalloc(rp->fastmap, char *, 256, "make_regexp");
-
- if (! strict && ignorecase)
- rp->translate = casetable;
- else
- rp->translate = NULL;
- if ((err = re_compile_pattern(s->stptr, s->stlen, rp)) != NULL)
- fatal("%s: /%s/", err, s->stptr);
- free_temp(s);
- return rp;
-}
-
-struct re_pattern_buffer *
+Regexp *
mk_re_parse(s, ignorecase)
char *s;
int ignorecase;
@@ -398,14 +350,12 @@ int ignorecase;
*dest++ = *src++;
}
}
- return make_regexp(tmp_string(s, dest-s), ignorecase);
+ return make_regexp(tmp_string(s, dest-s), ignorecase, 1);
}
static void
copyleft ()
{
- extern char *version_string;
- char *cp;
static char blurb[] =
"Copyright (C) 1989, Free Software Foundation.\n\
GNU Awk comes with ABSOLUTELY NO WARRANTY. This is free software, and\n\
@@ -416,18 +366,20 @@ You should have received a copy of the GNU General Public License along\n\
with this program; if not, write to the Free Software Foundation, Inc.,\n\
675 Mass Ave, Cambridge, MA 02139, USA.\n";
- fprintf (stderr, "%s, patchlevel %d\n", version_string, PATCHLEVEL);
+ fprintf(stderr, "%s, patchlevel %d\n", version_string, PATCHLEVEL);
fputs(blurb, stderr);
fflush(stderr);
}
static void
-set_fs(str)
+cmdline_fs(str)
char *str;
{
register NODE **tmp;
+ int len = strlen(str);
- tmp = get_lhs(FS_node, 0);
+ tmp = get_lhs(FS_node, (Func_ptr *) 0);
+ unref(*tmp);
/*
* Only if in full compatibility mode check for the stupid special
* case so -F\t works as documented in awk even though the shell
@@ -435,8 +387,8 @@ char *str;
*/
if (strict && str[0] == 't' && str[1] == '\0')
str[0] = '\t';
- *tmp = make_string(str, 1);
- do_deref();
+ *tmp = make_str_node(str, len, SCAN); /* do process escapes */
+ set_FS();
}
static void
@@ -448,43 +400,75 @@ char **argv;
int i, j;
NODE **aptr;
- ARGV_node = spc_var("ARGV", Nnull_string);
+ ARGV_node = install("ARGV", node(Nnull_string, Node_var, (NODE *)NULL));
aptr = assoc_lookup(ARGV_node, tmp_number(0.0));
*aptr = make_string(argv0, strlen(argv0));
+ (*aptr)->flags |= MAYBE_NUM;
for (i = argc0, j = 1; i < argc; i++) {
aptr = assoc_lookup(ARGV_node, tmp_number((AWKNUM) j));
*aptr = make_string(argv[i], strlen(argv[i]));
+ (*aptr)->flags |= MAYBE_NUM;
j++;
}
- ARGC_node = spc_var("ARGC", make_number((AWKNUM) j));
+ ARGC_node = install("ARGC",
+ node(make_number((AWKNUM) j), Node_var, (NODE *) NULL));
}
/*
* Set all the special variables to their initial values.
*/
+struct varinit {
+ NODE **spec;
+ char *name;
+ NODETYPE type;
+ char *strval;
+ AWKNUM numval;
+ Func_ptr assign;
+};
+static struct varinit varinit[] = {
+{&NF_node, "NF", Node_NF, 0, -1, set_NF },
+{&FIELDWIDTHS_node, "FIELDWIDTHS", Node_FIELDWIDTHS, "", 0, 0 },
+{&NR_node, "NR", Node_NR, 0, 0, set_NR },
+{&FNR_node, "FNR", Node_FNR, 0, 0, set_FNR },
+{&FS_node, "FS", Node_FS, " ", 0, 0 },
+{&RS_node, "RS", Node_RS, "\n", 0, set_RS },
+{&IGNORECASE_node, "IGNORECASE", Node_IGNORECASE, 0, 0, set_IGNORECASE },
+{&FILENAME_node, "FILENAME", Node_var, "-", 0, 0 },
+{&OFS_node, "OFS", Node_OFS, " ", 0, set_OFS },
+{&ORS_node, "ORS", Node_ORS, "\n", 0, set_ORS },
+{&OFMT_node, "OFMT", Node_OFMT, "%.6g", 0, set_OFMT },
+{&CONVFMT_node, "CONVFMT", Node_CONVFMT, "%.6g", 0, set_CONVFMT },
+{&RLENGTH_node, "RLENGTH", Node_var, 0, 0, 0 },
+{&RSTART_node, "RSTART", Node_var, 0, 0, 0 },
+{&SUBSEP_node, "SUBSEP", Node_var, "\034", 0, 0 },
+{0, 0, Node_illegal, 0, 0, 0 },
+};
+
static void
init_vars()
{
+ register struct varinit *vp;
+
+ for (vp = varinit; vp->name; vp++) {
+ *(vp->spec) = install(vp->name,
+ node(vp->strval == 0 ? make_number(vp->numval)
+ : make_string(vp->strval, strlen(vp->strval)),
+ vp->type, (NODE *) NULL));
+ if (vp->assign)
+ (*(vp->assign))();
+ }
+}
+
+void
+load_environ()
+{
extern char **environ;
- char *var, *val;
+ register char *var, *val;
NODE **aptr;
- int i;
+ register int i;
- FS_node = spc_var("FS", make_string(" ", 1));
- NF_node = spc_var("NF", make_number(-1.0));
- RS_node = spc_var("RS", make_string("\n", 1));
- NR_node = spc_var("NR", make_number(0.0));
- FNR_node = spc_var("FNR", make_number(0.0));
- FILENAME_node = spc_var("FILENAME", make_string("-", 1));
- OFS_node = spc_var("OFS", make_string(" ", 1));
- ORS_node = spc_var("ORS", make_string("\n", 1));
- OFMT_node = spc_var("OFMT", make_string("%.6g", 4));
- RLENGTH_node = spc_var("RLENGTH", make_number(0.0));
- RSTART_node = spc_var("RSTART", make_number(0.0));
- SUBSEP_node = spc_var("SUBSEP", make_string("\034", 1));
- IGNORECASE_node = spc_var("IGNORECASE", make_number(0.0));
-
- ENVIRON_node = spc_var("ENVIRON", Nnull_string);
+ ENVIRON_node = install("ENVIRON",
+ node(Nnull_string, Node_var, (NODE *) NULL));
for (i = 0; environ[i]; i++) {
static char nullstr[] = "";
@@ -496,6 +480,7 @@ init_vars()
val = nullstr;
aptr = assoc_lookup(ENVIRON_node, tmp_string(var, strlen (var)));
*aptr = make_string(val, strlen (val));
+ (*aptr)->flags |= MAYBE_NUM;
/* restore '=' so that system() gets a valid environment */
if (val != nullstr)
@@ -503,30 +488,42 @@ init_vars()
}
}
-/* Create a special variable */
-static NODE *
-spc_var(name, value)
-char *name;
-NODE *value;
+/* Process a command-line assignment */
+char *
+arg_assign(arg)
+char *arg;
{
- register NODE *r;
+ char *cp;
+ Func_ptr after_assign = NULL;
+ NODE *var;
+ NODE *it;
+ NODE **lhs;
- if ((r = lookup(variables, name)) == NULL)
- r = install(variables, name, node(value, Node_var, (NODE *) NULL));
- return r;
+ cp = strchr(arg, '=');
+ if (cp != NULL) {
+ *cp++ = '\0';
+ /*
+ * Recent versions of nawk expand escapes inside assignments.
+ * This makes sense, so we do it too.
+ */
+ it = make_str_node(cp, strlen(cp), SCAN);
+ it->flags |= MAYBE_NUM;
+ var = variable(arg, 0);
+ lhs = get_lhs(var, &after_assign);
+ unref(*lhs);
+ *lhs = it;
+ if (after_assign)
+ (*after_assign)();
+ *--cp = '='; /* restore original text of ARGV */
+ }
+ return cp;
}
static void
pre_assign(v)
char *v;
{
- char *cp;
-
- cp = strchr(v, '=');
- if (cp != NULL) {
- *cp++ = '\0';
- variable(v)->var_value = make_string(cp, strlen(cp));
- } else {
+ if (!arg_assign(v)) {
fprintf (stderr,
"%s: '%s' argument to -v not in 'var=value' form\n",
myname, v);
@@ -543,11 +540,133 @@ int sig, code;
#endif
if (sig == SIGFPE) {
fatal("floating point exception");
+#ifndef VMS
} else if (sig == SIGSEGV) {
msg("fatal error: segmentation fault");
+#else
+ } else if (sig == SIGSEGV || sig == SIGBUS) {
+ msg("fatal error: access violation");
+#endif
/* fatal won't abort() if not compiled for debugging */
abort();
} else
cant_happen();
/* NOTREACHED */
}
+
+/* gawk_option --- do gawk specific things */
+
+static void
+gawk_option(optstr)
+char *optstr;
+{
+ char *cp;
+
+ for (cp = optstr; *cp; cp++) {
+ switch (*cp) {
+ case ' ':
+ case '\t':
+ case ',':
+ break;
+ case 'v':
+ case 'V':
+ /* print version */
+ if (strncasecmp(cp, "version", 7) != 0)
+ goto unknown;
+ else
+ cp += 6;
+ fprintf(stderr, "%s, patchlevel %d\n",
+ version_string, PATCHLEVEL);
+ break;
+ case 'c':
+ case 'C':
+ if (strncasecmp(cp, "copyright", 9) == 0) {
+ cp += 8;
+ copyleft();
+ } else if (strncasecmp(cp, "copyleft", 8) == 0) {
+ cp += 7;
+ copyleft();
+ } else if (strncasecmp(cp, "compat", 6) == 0) {
+ cp += 5;
+ strict = 1;
+ } else
+ goto unknown;
+ break;
+ case 'n':
+ case 'N':
+ if (strncasecmp(cp, "nostalgia", 9) != 0)
+ goto unknown;
+ nostalgia();
+ break;
+ case 'p':
+ case 'P':
+#ifdef DEBUG
+ if (strncasecmp(cp, "parsedebug", 10) == 0) {
+ cp += 10;
+ yydebug = 2;
+ break;
+ }
+#endif
+ if (strncasecmp(cp, "posix", 5) != 0)
+ goto unknown;
+ cp += 4;
+ do_posix = 1;
+ strict = 1;
+ break;
+ case 'l':
+ case 'L':
+ if (strncasecmp(cp, "lint", 4) != 0)
+ goto unknown;
+ cp += 3;
+ do_lint = 1;
+ break;
+ default:
+ unknown:
+ fprintf(stderr, "'%c' -- unknown option, ignored\n",
+ *cp);
+ break;
+ }
+ }
+}
+
+/* nostalgia --- print the famous error message and die */
+
+static void
+nostalgia()
+{
+ fprintf(stderr, "awk: bailing out near line 1\n");
+ abort();
+}
+
+const char *
+basename(filespec)
+const char *filespec;
+{
+#ifndef VMS /* "path/name" -> "name" */
+ char *p = strrchr(filespec, '/');
+
+#if defined(MSDOS) || defined(atarist)
+ char *q = strrchr(filespec, '\\');
+
+ if (p == NULL || q > p)
+ p = q;
+#endif
+
+ return (p == NULL ? filespec : (const char *)(p + 1));
+
+#else /* "device:[root.][directory.subdir]GAWK.EXE;n" -> "GAWK" */
+ static char buf[255+1];
+ char *p = strrchr(filespec, ']'); /* directory punctuation */
+ char *q = strrchr(filespec, '>'); /* alternate <international> punct */
+
+ if (p == NULL || q > p)
+ p = q;
+ (void) strcpy(buf, p == NULL ? filespec : (p + 1));
+ q = strrchr(buf, '.');
+ if (q != NULL)
+ *q = '\0'; /* strip .type;version */
+
+ return (const char *) buf;
+
+#endif /*VMS*/
+}
diff --git a/missing.c b/missing.c
index 2f387fb5..daf16e60 100644
--- a/missing.c
+++ b/missing.c
@@ -1,60 +1,93 @@
-#ifdef MSDOS
-#define BCOPY_MISSING
-#define STRCASE_MISSING
-#define BLKSIZE_MISSING
-#define SPRINTF_INT
-#define RANDOM_MISSING
-#define GETOPT_MISSING
+/*
+ * Do all necessary includes here, so that we don't have to worry about
+ * overlapping includes in the files in missing.d.
+ */
+#include <stdio.h>
+#include <ctype.h>
+#include <errno.h>
+#ifndef VAXC
+#include <fcntl.h>
+#include <sys/types.h>
+#else /*VAXC (VMS)*/
+#include <file.h>
+#include <types.h>
#endif
+#include <varargs.h>
-#ifdef DUP2_MISSING
-#include "missing.d/dup2.c"
-#endif /* DUP2_MISSING */
+#include "config.h"
-#ifdef GCVT_MISSING
-#include "missing.d/gcvt.c"
-#endif /* GCVT_MISSING */
+#ifdef TZSET_MISSING
+#include <sys/time.h>
+#else
+#include <time.h>
+#endif
+
+#ifdef atarist
+/*
+ * this will work with gcc compiler - for other compilers you may
+ * have to replace path separators in this file into backslashes
+ */
+#include "atari/stack.c"
+#include "atari/tmpnam.c"
+#include "atari/textrd.c" /* gnulib bug fix */
+#endif /* atarist */
+
+#ifdef SYSTEM_MISSING
+#ifdef atarist
+#include "atari/system.c"
+#else
+#include "missing/system.c"
+#endif
+#endif /* SYSTEM_MISSING */
#ifdef GETOPT_MISSING
-#include "missing.d/getopt.c"
+#include "missing/getopt.c"
#endif /* GETOPT_MISSING */
#ifdef MEMCMP_MISSING
-#include "missing.d/memcmp.c"
+#include "missing/memcmp.c"
#endif /* MEMCMP_MISSING */
#ifdef MEMCPY_MISSING
-#include "missing.d/memcpy.c"
+#include "missing/memcpy.c"
#endif /* MEMCPY_MISSING */
#ifdef MEMSET_MISSING
-#include "missing.d/memset.c"
+#include "missing/memset.c"
#endif /* MEMSET_MISSING */
#ifdef RANDOM_MISSING
-#include "missing.d/random.c"
+#include "missing/random.c"
#endif /* RANDOM_MISSING */
#ifdef STRCASE_MISSING
-#include "missing.d/strcase.c"
+#include "missing/strcase.c"
#endif /* STRCASE_MISSING */
#ifdef STRCHR_MISSING
-#include "missing.d/strchr.c"
+#include "missing/strchr.c"
#endif /* STRCHR_MISSING */
#ifdef STRERROR_MISSING
-#include "missing.d/strerror.c"
+#include "missing/strerror.c"
#endif /* STRERROR_MISSING */
+#ifdef STRFTIME_MISSING
+#include "missing/strftime.c"
+#endif /* STRFTIME_MISSING */
+
#ifdef STRTOD_MISSING
-#include "missing.d/strtod.c"
+#include "missing/strtod.c"
#endif /* STRTOD_MISSING */
-#ifdef TMPNAM_MISSING
-#include "missing.d/tmpnam.c"
-#endif /* TMPNAM_MISSING */
+#ifdef STRTOL_MISSING
+#include "missing/strtol.c"
+#endif /* STRTOL_MISSING */
#if defined(VPRINTF_MISSING) && defined(BSDSTDIO)
-#include "missing.d/vprintf.c"
+#include "missing/vprintf.c"
#endif /* VPRINTF_MISSING && BSDSTDIO */
+
+#ifdef TZSET_MISSING
+#include "missing/tzset.c"
+#endif /* TZSET_MISSING */
diff --git a/missing.d/dup2.c b/missing/dup2.c
index 01068348..01068348 100644
--- a/missing.d/dup2.c
+++ b/missing/dup2.c
diff --git a/missing.d/gcvt.c b/missing/gcvt.c
index 1ebe41e1..1ebe41e1 100644
--- a/missing.d/gcvt.c
+++ b/missing/gcvt.c
diff --git a/missing.d/getopt.c b/missing/getopt.c
index d9e52945..d9e52945 100644
--- a/missing.d/getopt.c
+++ b/missing/getopt.c
diff --git a/missing.d/memcmp.c b/missing/memcmp.c
index e39c10ec..e39c10ec 100644
--- a/missing.d/memcmp.c
+++ b/missing/memcmp.c
diff --git a/missing.d/memcpy.c b/missing/memcpy.c
index 3c4accdf..3c4accdf 100644
--- a/missing.d/memcpy.c
+++ b/missing/memcpy.c
diff --git a/missing.d/memset.c b/missing/memset.c
index 120bdcb4..120bdcb4 100644
--- a/missing.d/memset.c
+++ b/missing/memset.c
diff --git a/missing.d/random.c b/missing/random.c
index 3708fe90..3708fe90 100644
--- a/missing.d/random.c
+++ b/missing/random.c
diff --git a/missing.d/strcase.c b/missing/strcase.c
index d8fa674a..d8fa674a 100644
--- a/missing.d/strcase.c
+++ b/missing/strcase.c
diff --git a/missing.d/strchr.c b/missing/strchr.c
index 234ac883..234ac883 100644
--- a/missing.d/strchr.c
+++ b/missing/strchr.c
diff --git a/missing.d/strerror.c b/missing/strerror.c
index e9c20804..e9c20804 100644
--- a/missing.d/strerror.c
+++ b/missing/strerror.c
diff --git a/missing.d/strtod.c b/missing/strtod.c
index 79350a1e..79350a1e 100644
--- a/missing.d/strtod.c
+++ b/missing/strtod.c
diff --git a/missing.d/tmpnam.c b/missing/tmpnam.c
index 8f49859a..8f49859a 100644
--- a/missing.d/tmpnam.c
+++ b/missing/tmpnam.c
diff --git a/missing.d/vprintf.c b/missing/vprintf.c
index dea4ca86..dea4ca86 100644
--- a/missing.d/vprintf.c
+++ b/missing/vprintf.c
diff --git a/mkconf b/mkconf
new file mode 100755
index 00000000..2ff6dbff
--- /dev/null
+++ b/mkconf
@@ -0,0 +1,32 @@
+#! /bin/sh
+#
+# configure -- produce a config.h from a known configuration
+
+case "$#" in
+1) ;;
+*) echo "Usage: $0 system_type" >&2
+ echo "Known systems: `cd config; echo ;ls -C`" >&2
+ exit 2
+ ;;
+esac
+
+if [ -f config/$1 ]; then
+ sh ./mungeconf config/$1 config.h-dist >config.h
+
+ # echo #echo lines to stdout
+ sed -n '/^#echo /s///p' config/$1
+
+ sed -n '/^MAKE_.*/s//s,^##&## ,,/p' config/$1 >sedscr
+ if [ -s sedscr ]
+ then
+ sed -f sedscr Makefile-dist >Makefile
+ else
+ cp Makefile-dist Makefile
+ fi
+ rm -f sedscr
+else
+ echo "\`$1' is not a known configuration."
+ echo "Either construct one based on the examples in the config directory,"
+ echo "or copy config.h-dist to config.h and edit it."
+ exit 1
+fi
diff --git a/msg.c b/msg.c
index 6f1090fd..eb96c4d5 100644
--- a/msg.c
+++ b/msg.c
@@ -3,7 +3,7 @@
*/
/*
- * Copyright (C) 1986, 1988, 1989 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@@ -35,26 +35,26 @@ char *s;
char *msg;
va_list *argp;
{
- int line;
char *file;
+ (void) fflush(stdout);
(void) fprintf(stderr, "%s: %s ", myname, s);
vfprintf(stderr, msg, *argp);
(void) fprintf(stderr, "\n");
- line = (int) FNR_node->var_value->numbr;
- if (line) {
- (void) fprintf(stderr, " input line number %d", line);
+ if (FNR) {
+ (void) fprintf(stderr, " input line number %d", FNR);
file = FILENAME_node->var_value->stptr;
if (file && !STREQ(file, "-"))
(void) fprintf(stderr, ", file `%s'", file);
(void) fprintf(stderr, "\n");
}
if (sourceline) {
- (void) fprintf(stderr, " source line number %d", sourceline);
+ (void) fprintf(stderr, " source line number %d", sourceline);
if (source)
(void) fprintf(stderr, ", file `%s'", source);
(void) fprintf(stderr, "\n");
}
+ (void) fflush(stderr);
}
/*VARARGS0*/
@@ -100,5 +100,5 @@ va_dcl
#ifdef DEBUG
abort();
#endif
- exit(1);
+ exit(2);
}
diff --git a/mungeconf b/mungeconf
new file mode 100755
index 00000000..95170450
--- /dev/null
+++ b/mungeconf
@@ -0,0 +1,20 @@
+#! /bin/sh
+
+# stdout is normally config.h
+
+case $# in
+2) ;;
+*) echo "Usage: mungeconf sysfile distfile" >&2 ; exit 2 ;;
+esac
+
+sed '/^#/d; /^MAKE_*/d' $1 | # strip comments and Makefile stuff
+sed '1s:.*:s~__SYSTEM__~&~:
+2,$s:^\([^ ]*\)[ ].*:s~^/\\* #define[ ]*\1.*~#define &~:' >sedscr
+
+sed -f sedscr $2
+
+echo
+echo '/* anything that follows is for system-specific short-term kludges */'
+grep '^#define' $1 # for system-specific short-term kludges
+
+rm -f sedscr
diff --git a/node.c b/node.c
index 971b8231..3bfc5e4a 100644
--- a/node.c
+++ b/node.c
@@ -3,7 +3,7 @@
*/
/*
- * Copyright (C) 1986, 1988, 1989 the Free Software Foundation, Inc.
+ * Copyright (C) 1986, 1988, 1989, 1991 the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Progamming Language.
@@ -25,19 +25,15 @@
#include "awk.h"
-extern double strtod();
-
-/*
- * We can't dereference a variable until after we've given it its new value.
- * This variable points to the value we have to free up
- */
-NODE *deref;
-
AWKNUM
r_force_number(n)
-NODE *n;
+register NODE *n;
{
+ register char *cp;
+ register char *cpend;
+ char save;
char *ptr;
+ unsigned int newflags = NUMERIC;
#ifdef DEBUG
if (n == NULL)
@@ -49,22 +45,52 @@ NODE *n;
if (n->flags & NUM)
return n->numbr;
#endif
+
+ /* all the conditionals are an attempt to avoid the expensive strtod */
+
+ n->numbr = 0.0;
+ n->flags |= NUM;
+
if (n->stlen == 0)
- n->numbr = 0.0;
- else if (n->stlen == 1) {
- if (isdigit(n->stptr[0])) {
- n->numbr = n->stptr[0] - '0';
- n->flags |= NUMERIC;
- } else
- n->numbr = 0.0;
- } else {
- errno = 0;
- n->numbr = (AWKNUM) strtod(n->stptr, &ptr);
- /* the following >= should be ==, but for SunOS 3.5 strtod() */
- if (errno == 0 && ptr >= n->stptr + n->stlen)
- n->flags |= NUMERIC;
+ return 0.0;
+
+ cp = n->stptr;
+ if (isalpha(*cp))
+ return 0.0;
+
+ cpend = cp + n->stlen;
+ while (cp < cpend && isspace(*cp))
+ cp++;
+ if (cp == cpend || isalpha(*cp))
+ return 0.0;
+
+ if (n->flags & MAYBE_NUM) {
+ newflags |= NUMBER;
+ n->flags &= ~MAYBE_NUM;
}
- n->flags |= NUM;
+ if (cpend - cp == 1) {
+ if (isdigit(*cp)) {
+ n->numbr = *cp - '0';
+ n->flags |= newflags;
+ }
+ return n->numbr;
+ }
+
+ errno = 0;
+ save = *cpend;
+ *cpend = '\0';
+ n->numbr = (AWKNUM) strtod((const char *)cp, &ptr);
+
+ /* POSIX says trailing space is OK for NUMERIC */
+ while (isspace(*ptr))
+ ptr++;
+ *cpend = save;
+ /* the >= should be ==, but for SunOS 3.5 strtod() */
+ if (errno == 0 && ptr >= cpend)
+ n->flags |= newflags;
+ else
+ errno = 0;
+
return n->numbr;
}
@@ -89,12 +115,11 @@ static char *values[] = {
NODE *
r_force_string(s)
-NODE *s;
+register NODE *s;
{
char buf[128];
- char *fmt;
- long num;
- char *sp = buf;
+ register long num;
+ register char *sp = buf;
#ifdef DEBUG
if (s == NULL)
@@ -106,11 +131,8 @@ NODE *s;
if (!(s->flags & NUM))
cant_happen();
if (s->stref != 0)
- cant_happen();
+ ; /*cant_happen();*/
#endif
- s->flags |= STR;
- /* should check validity of user supplied OFMT */
- fmt = OFMT_node->var_value->stptr;
if ((num = s->numbr) == s->numbr) {
/* integral value */
if (num < NVAL && num >= 0) {
@@ -120,13 +142,16 @@ NODE *s;
(void) sprintf(sp, "%ld", num);
s->stlen = strlen(sp);
}
+ s->stfmt = -1;
} else {
- (void) sprintf(sp, fmt, s->numbr);
+ (void) sprintf(sp, CONVFMT, s->numbr);
s->stlen = strlen(sp);
+ s->stfmt = CONVFMTidx;
}
s->stref = 1;
- emalloc(s->stptr, char *, s->stlen + 1, "force_string");
+ emalloc(s->stptr, char *, s->stlen + 2, "force_string");
memcpy(s->stptr, sp, s->stlen+1);
+ s->flags |= STR;
return s;
}
@@ -150,13 +175,13 @@ NODE *n;
n->stref++;
return n;
}
- r = newnode(Node_illegal);
+ getnode(r);
*r = *n;
r->flags &= ~(PERM|TEMP);
r->flags |= MALLOC;
if (n->type == Node_val && (n->flags & STR)) {
r->stref = 1;
- emalloc(r->stptr, char *, r->stlen + 1, "dupnode");
+ emalloc(r->stptr, char *, r->stlen + 2, "dupnode");
memcpy(r->stptr, n->stptr, r->stlen+1);
}
return r;
@@ -164,56 +189,53 @@ NODE *n;
/* this allocates a node with defined numbr */
NODE *
-make_number(x)
+mk_number(x, flags)
AWKNUM x;
+unsigned int flags;
{
register NODE *r;
- r = newnode(Node_val);
+ getnode(r);
+ r->type = Node_val;
r->numbr = x;
- r->flags |= (NUM|NUMERIC);
- r->stref = 0;
- return r;
-}
-
-/*
- * This creates temporary nodes. They go away quite quickly, so don't use
- * them for anything important
- */
-NODE *
-tmp_number(x)
-AWKNUM x;
-{
- NODE *r;
-
- r = make_number(x);
- r->flags |= TEMP;
+ r->flags = flags;
+#ifdef DEBUG
+ r->stref = 1;
+ r->stptr = 0;
+ r->stlen = 0;
+#endif
return r;
}
/*
* Make a string node.
*/
-
NODE *
-make_str_node(s, len, scan)
+make_str_node(s, len, flags)
char *s;
-int len;
-int scan;
+size_t len;
+int flags;
{
register NODE *r;
- char *pf;
- register char *pt;
- register int c;
- register char *end;
-
- r = newnode(Node_val);
- emalloc(r->stptr, char *, len + 1, s);
- memcpy(r->stptr, s, len);
+
+ getnode(r);
+ r->type = Node_val;
+ r->flags = (STRING|STR|MALLOC);
+ if (flags & ALREADY_MALLOCED)
+ r->stptr = s;
+ else {
+ emalloc(r->stptr, char *, len + 2, s);
+ memcpy(r->stptr, s, len);
+ }
r->stptr[len] = '\0';
- end = &(r->stptr[len]);
- if (scan) { /* scan for escape sequences */
+ if (flags & SCAN) { /* scan for escape sequences */
+ char *pf;
+ register char *pt;
+ register int c;
+ register char *end;
+
+ end = &(r->stptr[len]);
for (pf = pt = r->stptr; pf < end;) {
c = *pf++;
if (c == '\\') {
@@ -231,16 +253,15 @@ int scan;
}
r->stlen = len;
r->stref = 1;
- r->flags |= (STR|MALLOC);
+ r->stfmt = -1;
return r;
}
-/* Read the warning under tmp_number */
NODE *
tmp_string(s, len)
char *s;
-int len;
+size_t len;
{
register NODE *r;
@@ -252,92 +273,157 @@ int len;
#define NODECHUNK 100
-static NODE *nextfree = NULL;
+NODE *nextfree = NULL;
NODE *
-newnode(ty)
-NODETYPE ty;
+more_nodes()
{
- NODE *it;
- NODE *np;
+ register NODE *np;
-#ifdef MPROF
- emalloc(it, NODE *, sizeof(NODE), "newnode");
-#else
- if (nextfree == NULL) {
- /* get more nodes and initialize list */
- emalloc(nextfree, NODE *, NODECHUNK * sizeof(NODE), "newnode");
- for (np = nextfree; np < &nextfree[NODECHUNK - 1]; np++)
- np->nextp = np + 1;
- np->nextp = NULL;
- }
- /* get head of freelist */
- it = nextfree;
+ /* get more nodes and initialize list */
+ emalloc(nextfree, NODE *, NODECHUNK * sizeof(NODE), "newnode");
+ for (np = nextfree; np < &nextfree[NODECHUNK - 1]; np++)
+ np->nextp = np + 1;
+ np->nextp = NULL;
+ np = nextfree;
nextfree = nextfree->nextp;
-#endif
- it->type = ty;
- it->flags = MALLOC;
-#ifdef MEMDEBUG
- fprintf(stderr, "node: new: %0x\n", it);
-#endif
- return it;
+ return np;
}
+#ifdef DEBUG
void
freenode(it)
NODE *it;
{
-#ifdef DEBUG
- NODE *nf;
-#endif
-#ifdef MEMDEBUG
- fprintf(stderr, "node: free: %0x\n", it);
-#endif
#ifdef MPROF
+ it->stref = 0;
free((char *) it);
#else
-#ifdef DEBUG
- for (nf = nextfree; nf; nf = nf->nextp)
- if (nf == it)
- fatal("attempt to free free node");
+#ifdef MALLOCDEBUG
+ memset(it, '\04', sizeof(*it));
#endif
/* add it to head of freelist */
it->nextp = nextfree;
nextfree = it;
#endif
}
-
-#ifdef DEBUG
-pf()
-{
- NODE *nf = nextfree;
- while (nf != NULL) {
- fprintf(stderr, "%0x ", nf);
- nf = nf->nextp;
- }
-}
#endif
void
-do_deref()
+unref(tmp)
+register NODE *tmp;
{
- if (deref == NULL)
+ if (tmp == NULL)
return;
- if (deref->flags & PERM) {
- deref = 0;
+ if (tmp->flags & PERM)
return;
- }
- if ((deref->flags & MALLOC) || (deref->flags & TEMP)) {
- deref->flags &= ~TEMP;
- if (deref->flags & STR) {
- if (deref->stref > 1 && deref->stref != 255) {
- deref->stref--;
- deref = 0;
+ if (tmp->flags & (MALLOC|TEMP)) {
+ tmp->flags &= ~TEMP;
+ if (tmp->flags & STR) {
+ if (tmp->stref > 1) {
+ if (tmp->stref != 255)
+ tmp->stref--;
return;
}
- free(deref->stptr);
+ free(tmp->stptr);
+ }
+ freenode(tmp);
+ }
+}
+
+/*
+ * Parse a C escape sequence. STRING_PTR points to a variable containing a
+ * pointer to the string to parse. That pointer is updated past the
+ * characters we use. The value of the escape sequence is returned.
+ *
+ * A negative value means the sequence \ newline was seen, which is supposed to
+ * be equivalent to nothing at all.
+ *
+ * If \ is followed by a null character, we return a negative value and leave
+ * the string pointer pointing at the null character.
+ *
+ * If \ is followed by 000, we return 0 and leave the string pointer after the
+ * zeros. A value of 0 does not mean end of string.
+ *
+ * Posix doesn't allow \x.
+ */
+
+int
+parse_escape(string_ptr)
+char **string_ptr;
+{
+ register int c = *(*string_ptr)++;
+ register int i;
+ register int count;
+
+ switch (c) {
+ case 'a':
+ return BELL;
+ case 'b':
+ return '\b';
+ case 'f':
+ return '\f';
+ case 'n':
+ return '\n';
+ case 'r':
+ return '\r';
+ case 't':
+ return '\t';
+ case 'v':
+ return '\v';
+ case '\n':
+ return -2;
+ case 0:
+ (*string_ptr)--;
+ return -1;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ i = c - '0';
+ count = 0;
+ while (++count < 3) {
+ if ((c = *(*string_ptr)++) >= '0' && c <= '7') {
+ i *= 8;
+ i += c - '0';
+ } else {
+ (*string_ptr)--;
+ break;
+ }
+ }
+ return i;
+ case 'x':
+ if (do_lint) {
+ static int didwarn;
+
+ if (! didwarn) {
+ didwarn = 1;
+ warning("Posix does not allow \"\\x\" escapes");
+ }
+ }
+ if (do_posix)
+ return ('x');
+ i = 0;
+ while (1) {
+ if (isxdigit((c = *(*string_ptr)++))) {
+ i *= 16;
+ if (isdigit(c))
+ i += c - '0';
+ else if (isupper(c))
+ i += c - 'A' + 10;
+ else
+ i += c - 'a' + 10;
+ } else {
+ (*string_ptr)--;
+ break;
+ }
}
- freenode(deref);
+ return i;
+ default:
+ return c;
}
- deref = 0;
}
diff --git a/patchlevel.h b/patchlevel.h
index 131713a8..c6161a1f 100644
--- a/patchlevel.h
+++ b/patchlevel.h
@@ -1 +1 @@
-#define PATCHLEVEL 1
+#define PATCHLEVEL 2
diff --git a/pc.d/Makefile.pc b/pc/Makefile.pc
index 11aaa026..11aaa026 100644
--- a/pc.d/Makefile.pc
+++ b/pc/Makefile.pc
diff --git a/pc/config.h b/pc/config.h
new file mode 100644
index 00000000..4718a9cf
--- /dev/null
+++ b/pc/config.h
@@ -0,0 +1,287 @@
+/*
+ * config.h -- configuration definitions for gawk.
+ *
+ * MS-DOS systems using MSC 5.1
+ */
+
+/*
+ * Copyright (C) 1991, the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 1, or (at your option)
+ * any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GAWK; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * This file isolates configuration dependencies for gnu awk.
+ * You should know something about your system, perhaps by having
+ * a manual handy, when you edit this file. You should copy config.h-dist
+ * to config.h, and edit config.h. Do not modify config.h-dist, so that
+ * it will be easy to apply any patches that may be distributed.
+ *
+ * The general idea is that systems conforming to the various standards
+ * should need to do the least amount of changing. Definining the various
+ * items in ths file usually means that your system is missing that
+ * particular feature.
+ *
+ * The order of preference in standard conformance is ANSI C, POSIX,
+ * and the SVID.
+ *
+ * If you have no clue as to what's going on with your system, try
+ * compiling gawk without editing this file and see what shows up
+ * missing in the link stage. From there, you can probably figure out
+ * which defines to turn on.
+ */
+
+/**************************/
+/* Miscellanious features */
+/**************************/
+
+/*
+ * BLKSIZE_MISSING
+ *
+ * Check your /usr/include/sys/stat.h file. If the stat structure
+ * does not have a member named st_blksize, define this. (This will
+ * most likely be the case on most System V systems prior to V.4.)
+ */
+#define BLKSIZE_MISSING 1
+
+/*
+ * SIGTYPE
+ *
+ * The return type of the routines passed to the signal function.
+ * Modern systems use `void', older systems use `int'.
+ * If left undefined, it will default to void.
+ */
+/* #define SIGTYPE int */
+
+/*
+ * SIZE_T_MISSING
+ *
+ * If your system has no typedef for size_t, define this to get a default
+ */
+#define SIZE_T_MISSING 1
+
+/*
+ * CHAR_UNSIGNED
+ *
+ * If your machine uses unsigned characters (IBM RT and RS/6000 and others)
+ * then define this for use in regex.c
+ */
+/* #define CHAR_UNSIGNED 1 */
+
+/*
+ * HAVE_UNDERSCORE_SETJMP
+ *
+ * Check in your /usr/include/setjmp.h file. If there are routines
+ * there named _setjmp and _longjmp, then you should define this.
+ * Typically only systems derived from Berkeley Unix have this.
+ */
+/* #define HAVE_UNDERSCORE_SETJMP 1 */
+
+/***********************************************/
+/* Missing library subroutines or system calls */
+/***********************************************/
+
+/*
+ * GETOPT_MISSING
+ *
+ * Define this if your library does not have the getopt(3) library
+ * routine for parsing command line arguments.
+ */
+#define GETOPT_MISSING 1
+
+/*
+ * MEMCMP_MISSING
+ * MEMCPY_MISSING
+ * MEMSET_MISSING
+ *
+ * These three routines are for manipulating blocks of memory. Most
+ * likely they will either all three be present or all three be missing,
+ * so they're grouped together.
+ */
+/* #define MEMCMP_MISSING 1 */
+/* #define MEMCPY_MISSING 1 */
+/* #define MEMSET_MISSING 1 */
+
+/*
+ * RANDOM_MISSING
+ *
+ * Your system does not have the random(3) suite of random number
+ * generating routines. These are different than the old rand(3)
+ * routines!
+ */
+#define RANDOM_MISSING 1
+
+/*
+ * STRCASE_MISSING
+ *
+ * Your system does not have the strcasemp() and strncasecmp()
+ * routines that originated in Berkeley Unix.
+ */
+#define STRCASE_MISSING 1
+
+/*
+ * STRCHR_MISSING
+ *
+ * Your system does not have the strchr() and strrchr() functions.
+ */
+/* #define STRCHR_MISSING 1 */
+
+/*
+ * STRERROR_MISSING
+ *
+ * Your system lacks the ANSI C strerror() routine for returning the
+ * strings associated with errno values.
+ */
+/* #define STRERROR_MISSING 1 */
+
+/*
+ * STRTOD_MISSING
+ *
+ * Your system does not have the strtod() routine for converting
+ * strings to double precision floating point values.
+ */
+/* #define STRTOD_MISSING 1 */
+
+/*
+ * STRTOL_MISSING
+ *
+ * Your system does not have the strtol() routine for converting
+ * strings to long integers.
+ */
+#define STRTOL_MISSING 1
+
+/*
+ * STRFTIME_MISSING
+ *
+ * Your system lacks the ANSI C strftime() routine for formatting
+ * broken down time values.
+ */
+#define STRFTIME_MISSING 1
+
+/*
+ * TZSET_MISSING
+ *
+ * If you have a 4.2 BSD vintage system, then the strftime() routine
+ * supplied in the missing directory won't be enough, because it relies on the
+ * tzset() routine from System V / Posix. Fortunately, there is an
+ * emulation for tzset() too that should do the trick. If you don't
+ * have tzset(), define this.
+ */
+/* #define TZSET_MISSING 1 */
+
+/*
+ * TZNAME_MISSING
+ *
+ * Some systems do not support the external variables tzname and daylight.
+ * If this is the case *and* strftime() is missing, define this.
+ */
+/* #define TZNAME_MISSING 1 */
+
+/*
+ * STDC_HEADERS
+ *
+ * If your system does have ANSI compliant header files that
+ * provide prototypes for library routines, then define this.
+ */
+/* #define STDC_HEADERS 1 */
+
+/*
+ * NO_TOKEN_PASTING
+ *
+ * If your compiler define's __STDC__ but does not support token
+ * pasting (tok##tok), then define this.
+ */
+/* #define NO_TOKEN_PASTING 1 */
+
+/*****************************************************************/
+/* Stuff related to the Standard I/O Library. */
+/*****************************************************************/
+/* Much of this is (still, unfortunately) black magic in nature. */
+/* You may have to use some or all of these together to get gawk */
+/* to work correctly. */
+/*****************************************************************/
+
+/*
+ * NON_STD_SPRINTF
+ *
+ * Look in your /usr/include/stdio.h file. If the return type of the
+ * sprintf() function is NOT `int', define this.
+ */
+/* #define NON_STD_SPRINTF 1 */
+
+/*
+ * VPRINTF_MISSING
+ *
+ * Define this if your system lacks vprintf() and the other routines
+ * that go with it.
+ */
+/* #define VPRINTF_MISSING 1 */
+
+/*
+ * BSDSTDIO
+ *
+ * Define this if your standard i/o library is internally compatible
+ * with the one shipped with Berkeley Unix systems (4.n, n <= 3-reno).
+ * If you've defined VPRINTF_MISSING, you probably will need this too.
+ */
+/* #define BSDSTDIO 1 */
+
+/*
+ * DOPRNT_MISSING
+ *
+ * Define this if your standard i/o library does not have the _doprnt()
+ * routine. This is used in an attempt to simulate the vfprintf()
+ * routine.
+ */
+/* #define DOPRNT_MISSING 1 */
+
+/*
+ * Casts from size_t to int and back. These will become unnecessary
+ * at some point in the future, but for now are required where the
+ * two types are a different representation.
+ */
+/* #define SZTC */
+/* #define INTC */
+
+/*
+ * SYSTEM_MISSING
+ *
+ * Define this if your library does not provide a system function
+ * or you are not entirely happy with it and would rather use
+ * a provided replacement (atari only).
+ */
+/* #define SYSTEM_MISSING 1 */
+
+
+/*******************************/
+/* Gawk configuration options. */
+/*******************************/
+
+/*
+ * DEFPATH
+ *
+ * The default search path for the -f option of gawk. It is used
+ * if the AWKPATH environment variable is undefined. The default
+ * definition is provided here. Most likely you should not change
+ * this.
+ */
+
+/* #define DEFPATH ".:/usr/lib/awk:/usr/local/lib/awk" */
+/* #define ENVSEP ':' */
+
+/* anything that follows is for system-specific short-term kludges */
diff --git a/pc/make.bat b/pc/make.bat
new file mode 100644
index 00000000..301fdb1e
--- /dev/null
+++ b/pc/make.bat
@@ -0,0 +1,55 @@
+REM Simple brute force command file for building gawk under msdos
+REM
+REM *** This has only been using MSC 5.1 ***
+REM
+REM Written by Arnold Robbins, May 1991
+REM Based on earlier makefile for dos
+REM
+REM Copyright (C) 1986, 1988, 1989, 1991 the Free Software Foundation, Inc.
+REM
+REM This file is part of GAWK, the GNU implementation of the
+REM AWK Progamming Language.
+REM
+REM GAWK is free software; you can redistribute it and/or modify
+REM it under the terms of the GNU General Public License as published by
+REM the Free Software Foundation; either version 1, or (at your option)
+REM any later version.
+REM
+REM GAWK is distributed in the hope that it will be useful,
+REM but WITHOUT ANY WARRANTY; without even the implied warranty of
+REM MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+REM GNU General Public License for more details.
+REM
+REM You should have received a copy of the GNU General Public License
+REM along with GAWK; see the file COPYING. If not, write to
+REM the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+REM
+REM debug flags: DEBUG=#-DDEBUG #-DFUNC_TRACE -DMEMDEBUG
+REM DEBUGGER= #-Zi
+REM
+cl -c -AL -Oalt array.c
+cl -c -AL -Oalt awktab.c
+cl -c -AL -Oalt builtin.c
+cl -c -AL -Oalt dfa.c
+cl -c -AL -Oalt eval.c
+cl -c -AL -Oalt field.c
+cl -c -AL -Oalt io.c
+cl -c -AL -Oalt iop.c
+cl -c -AL -Oalt main.c
+cl -c -AL -Oalt missing.c
+cl -c -AL -Oalt msg.c
+cl -c -AL -Oalt node.c
+cl -c -AL -Oalt popen.c
+cl -c -AL -Oalt re.c
+cl -c -AL -Oalt version.c
+REM
+REM this kludge necessary because MSC 5.1 compiler bombs with -Oail (where
+REM -Ox == "-Oailt -Gs")
+REM
+REM You can ignore the warnings you will get
+cl -c -AL -Ot regex.c
+REM
+REM I'm not sure what this is for. It was commented out
+REM LINKFLAGS= /CO /NOE /NOI /st:0x1800
+REM
+link @names.lnk,gawk.exe /E /FAR /PAC /NOE /NOI /st:0x1800;
diff --git a/pc/names.lnk b/pc/names.lnk
new file mode 100644
index 00000000..5a42d0ef
--- /dev/null
+++ b/pc/names.lnk
@@ -0,0 +1,17 @@
+array.obj+
+awktab.obj+
+builtin.obj+
+dfa.obj+
+eval.obj+
+field.obj+
+io.obj+
+iop.obj+
+main.obj+
+missing.obj+
+msg.obj+
+node.obj+
+popen.obj+
+re.obj+
+version.obj+
+regex.obj+
+setargv.obj
diff --git a/pc.d/popen.c b/pc/popen.c
index 48952c22..48952c22 100644
--- a/pc.d/popen.c
+++ b/pc/popen.c
diff --git a/pc.d/popen.h b/pc/popen.h
index 55501324..55501324 100644
--- a/pc.d/popen.h
+++ b/pc/popen.h
diff --git a/protos.h b/protos.h
new file mode 100644
index 00000000..5f84915b
--- /dev/null
+++ b/protos.h
@@ -0,0 +1,113 @@
+/*
+ * protos.h -- function prototypes for when the headers don't have them.
+ */
+
+/*
+ * Copyright (C) 1991, the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 1, or (at your option)
+ * any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GAWK; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifdef __STDC__
+#define aptr_t void * /* arbitrary pointer type */
+#else
+#define aptr_t char *
+#endif
+extern aptr_t malloc P((MALLOC_ARG_T));
+extern aptr_t realloc P((aptr_t, MALLOC_ARG_T));
+extern aptr_t calloc P((MALLOC_ARG_T, MALLOC_ARG_T));
+
+extern void free P((aptr_t));
+extern int getopt P((int argc, char **argv, char *optstring));
+extern char *getenv P((char *));
+
+extern char *strcpy P((char *, const char *));
+extern char *strcat P((char *, const char *));
+extern char *strncpy P((char *, const char *, int));
+extern int strcmp P((const char *, const char *));
+extern int strncmp P((const char *, const char *, int));
+#ifndef VMS
+extern char *strerror P((int));
+#else
+extern char *strerror(); /* extern char *strerror(int,...); */
+#endif
+extern char *strchr P((const char *, int));
+extern char *strrchr P((const char *, int));
+extern char *strstr P((const char *s1, const char *s2));
+extern int strlen P((const char *));
+extern long strtol P((const char *, char **, int));
+#ifndef _MSC_VER
+extern int strftime P((char *, int, const char *, const struct tm *));
+#endif
+extern time_t time P((time_t *));
+extern aptr_t memset P((aptr_t, int, size_t));
+extern aptr_t memcpy P((aptr_t, const aptr_t, size_t));
+extern aptr_t memmove P((aptr_t, const aptr_t, size_t));
+extern aptr_t memchr P((const aptr_t, int, size_t));
+extern int memcmp P((const aptr_t, const aptr_t, size_t));
+
+/* extern int fprintf P((FILE *, char *, ...)); */
+extern int fprintf P(());
+#ifndef MSDOS
+extern int fwrite P((const char *, int, int, FILE *));
+extern int fputs P((const char *, FILE *));
+extern int unlink P((const char *));
+#endif
+extern int fflush P((FILE *));
+extern int fclose P((FILE *));
+extern FILE *popen P((const char *, const char *));
+extern int pclose P((FILE *));
+extern void abort P(());
+extern int isatty P((int));
+extern void exit P((int));
+extern int system P((const char *));
+extern int sscanf P((/* char *, char *, ... */));
+#ifndef toupper
+extern int toupper P((int));
+#endif
+#ifndef tolower
+extern int tolower P((int));
+#endif
+
+extern double pow P((double x, double y));
+extern double atof P((char *));
+extern double strtod P((const char *, char **));
+extern int fstat P((int, struct stat *));
+extern off_t lseek P((int, off_t, int));
+extern int fseek P((FILE *, long, int));
+extern int close P((int));
+extern int open P(());
+extern int pipe P((int *));
+extern int dup P((int));
+extern int fork P(());
+extern int execl P((/* char *, char *, ... */));
+extern int read P((int, char *, int));
+extern int wait P((int *));
+extern void _exit P((int));
+
+#ifndef __STDC__
+extern long time P((long *));
+#endif
+
+#ifdef NON_STD_SPRINTF
+extern char *sprintf();
+#else
+extern int sprintf();
+#endif /* SPRINTF_INT */
+
+#undef aptr_t
diff --git a/re.c b/re.c
new file mode 100644
index 00000000..3b05e6bf
--- /dev/null
+++ b/re.c
@@ -0,0 +1,144 @@
+/*
+ * re.c - compile regular expressions.
+ */
+
+/*
+ * Copyright (C) 1991 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 1, or (at your option)
+ * any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GAWK; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "awk.h"
+
+/* Generate compiled regular expressions */
+Regexp *
+make_regexp(s, ignorecase, dfa)
+NODE *s;
+int ignorecase;
+int dfa;
+{
+ Regexp *rp;
+ char *err;
+
+ emalloc(rp, Regexp *, sizeof(*rp), "make_regexp");
+ memset((char *) rp, 0, sizeof(*rp));
+ emalloc(rp->pat.buffer, char *, 16, "make_regexp");
+ rp->pat.allocated = 16;
+ emalloc(rp->pat.fastmap, char *, 256, "make_regexp");
+
+ if (ignorecase)
+ rp->pat.translate = casetable;
+ else
+ rp->pat.translate = NULL;
+ if ((err = re_compile_pattern(s->stptr, (size_t) s->stlen, &(rp->pat))) != NULL)
+ fatal("%s: /%s/", err, s->stptr);
+ if (dfa && !ignorecase) {
+ regcompile(s->stptr, s->stlen, &(rp->dfareg), 1);
+ rp->dfa = 1;
+ } else
+ rp->dfa = 0;
+ free_temp(s);
+ return rp;
+}
+
+int
+research(rp, str, len, need_start)
+Regexp *rp;
+register char *str;
+register int len;
+int need_start;
+{
+ int count;
+ int try_backref;
+ char save1;
+ char save2;
+ char *ret = &save2;
+
+ if (rp->dfa) {
+ save1 = str[len];
+ str[len] = '\n';
+ save2 = str[len+1];
+ ret = regexecute(&(rp->dfareg), str, str+len+1, 0, &count,
+ &try_backref);
+ str[len] = save1;
+ str[len+1] = save2;
+ }
+ if (ret) {
+ if (need_start || rp->dfa == 0)
+ return re_search(&(rp->pat), str, len, 0, len, &(rp->regs));
+ else
+ return 1;
+ } else
+ return -1;
+}
+
+void
+refree(rp)
+Regexp *rp;
+{
+ free(rp->pat.buffer);
+ free(rp->pat.fastmap);
+ if (rp->dfa)
+ regfree(&(rp->dfareg));
+ free(rp);
+}
+
+void
+regerror(s)
+const char *s;
+{
+ fatal(s);
+}
+
+Regexp *
+re_update(t)
+NODE *t;
+{
+ NODE *t1;
+
+# define CASE 1
+ if ((t->re_flags & CASE) == IGNORECASE) {
+ if (t->re_flags & CONST)
+ return t->re_reg;
+ t1 = force_string(tree_eval(t->re_exp));
+ if (t->re_text) {
+ if (cmp_nodes(t->re_text, t1) == 0) {
+ free_temp(t1);
+ return t->re_reg;
+ }
+ unref(t->re_text);
+ }
+ t->re_text = dupnode(t1);
+ free_temp(t1);
+ }
+ if (t->re_reg)
+ refree(t->re_reg);
+ if (t->re_cnt)
+ t->re_cnt++;
+ if (t->re_cnt > 10)
+ t->re_cnt = 0;
+ if (!t->re_text) {
+ t1 = force_string(tree_eval(t->re_exp));
+ t->re_text = dupnode(t1);
+ free_temp(t1);
+ }
+ t->re_reg = make_regexp(t->re_text, IGNORECASE, t->re_cnt);
+ t->re_flags &= ~CASE;
+ t->re_flags |= IGNORECASE;
+ return t->re_reg;
+}
diff --git a/regex.c b/regex.c
index a04a4398..e59a169a 100644
--- a/regex.c
+++ b/regex.c
@@ -1,177 +1,108 @@
-/* Extended regular expression matching and search.
- Copyright (C) 1985 Free Software Foundation, Inc.
-
- NO WARRANTY
-
- BECAUSE THIS PROGRAM IS LICENSED FREE OF CHARGE, WE PROVIDE ABSOLUTELY
-NO WARRANTY, TO THE EXTENT PERMITTED BY APPLICABLE STATE LAW. EXCEPT
-WHEN OTHERWISE STATED IN WRITING, FREE SOFTWARE FOUNDATION, INC,
-RICHARD M. STALLMAN AND/OR OTHER PARTIES PROVIDE THIS PROGRAM "AS IS"
-WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
-BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
-FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY
-AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE
-DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR
-CORRECTION.
-
- IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL RICHARD M.
-STALLMAN, THE FREE SOFTWARE FOUNDATION, INC., AND/OR ANY OTHER PARTY
-WHO MAY MODIFY AND REDISTRIBUTE THIS PROGRAM AS PERMITTED BELOW, BE
-LIABLE TO YOU FOR DAMAGES, INCLUDING ANY LOST PROFITS, LOST MONIES, OR
-OTHER SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
-USE OR INABILITY TO USE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR
-DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY THIRD PARTIES OR
-A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS) THIS
-PROGRAM, EVEN IF YOU HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH
-DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY.
-
- GENERAL PUBLIC LICENSE TO COPY
-
- 1. You may copy and distribute verbatim copies of this source file
-as you receive it, in any medium, provided that you conspicuously and
-appropriately publish on each copy a valid copyright notice "Copyright
-(C) 1985 Free Software Foundation, Inc."; and include following the
-copyright notice a verbatim copy of the above disclaimer of warranty
-and of this License. You may charge a distribution fee for the
-physical act of transferring a copy.
-
- 2. You may modify your copy or copies of this source file or
-any portion of it, and copy and distribute such modifications under
-the terms of Paragraph 1 above, provided that you also do the following:
-
- a) cause the modified files to carry prominent notices stating
- that you changed the files and the date of any change; and
-
- b) cause the whole of any work that you distribute or publish,
- that in whole or in part contains or is a derivative of this
- program or any part thereof, to be licensed at no charge to all
- third parties on terms identical to those contained in this
- License Agreement (except that you may choose to grant more extensive
- warranty protection to some or all third parties, at your option).
-
- c) You may charge a distribution fee for the physical act of
- transferring a copy, and you may at your option offer warranty
- protection in exchange for a fee.
-
-Mere aggregation of another unrelated program with this program (or its
-derivative) on a volume of a storage or distribution medium does not bring
-the other program under the scope of these terms.
-
- 3. You may copy and distribute this program (or a portion or derivative
-of it, under Paragraph 2) in object code or executable form under the terms
-of Paragraphs 1 and 2 above provided that you also do one of the following:
-
- a) accompany it with the complete corresponding machine-readable
- source code, which must be distributed under the terms of
- Paragraphs 1 and 2 above; or,
-
- b) accompany it with a written offer, valid for at least three
- years, to give any third party free (except for a nominal
- shipping charge) a complete machine-readable copy of the
- corresponding source code, to be distributed under the terms of
- Paragraphs 1 and 2 above; or,
-
- c) accompany it with the information you received as to where the
- corresponding source code may be obtained. (This alternative is
- allowed only for noncommercial distribution and only if you
- received the program in object code or executable form alone.)
-
-For an executable file, complete source code means all the source code for
-all modules it contains; but, as a special exception, it need not include
-source code for modules which are standard libraries that accompany the
-operating system on which the executable file runs.
-
- 4. You may not copy, sublicense, distribute or transfer this program
-except as expressly provided under this License Agreement. Any attempt
-otherwise to copy, sublicense, distribute or transfer this program is void and
-your rights to use the program under this License agreement shall be
-automatically terminated. However, parties who have received computer
-software programs from you with this License Agreement will not have
-their licenses terminated so long as such parties remain in full compliance.
-
- 5. If you wish to incorporate parts of this program into other free
-programs whose distribution conditions are different, write to the Free
-Software Foundation at 675 Mass Ave, Cambridge, MA 02139. We have not yet
-worked out a simple rule that can be stated here, but we will often permit
-this. We will be guided by the two goals of preserving the free status of
-all derivatives of our free software and of promoting the sharing and reuse of
-software.
-
-
-In other words, you are welcome to use, share and improve this program.
-You are forbidden to forbid anyone else to use, share and improve
-what you give them. Help stamp out software-hoarding! */
-
-#ifdef MSDOS
-#include <malloc.h>
-static void init_syntax_once(void );
-extern int re_set_syntax(int syntax);
-extern char *re_compile_pattern(char *pattern,int size,struct re_pattern_buffer *bufp);
-static int store_jump(char *from,char opcode,char *to);
-static int insert_jump(char op,char *from,char *to,char *current_end);
-extern void re_compile_fastmap(struct re_pattern_buffer *bufp);
-extern int re_search(struct re_pattern_buffer *pbufp,char *string,int size,int startpos,int range,struct re_registers *regs);
-extern int re_search_2(struct re_pattern_buffer *pbufp,char *string1,int size1,char *string2,int size2,int startpos,int range,struct re_registers *regs,int mstop);
-extern int re_match(struct re_pattern_buffer *pbufp,char *string,int size,int pos,struct re_registers *regs);
-extern int re_match_2(struct re_pattern_buffer *pbufp,unsigned char *string1,int size1,unsigned char *string2,int size2,int pos,struct re_registers *regs,int mstop);
-static int bcmp_translate(unsigned char *s1,unsigned char *s2,int len,unsigned char *translate);
-extern char *re_comp(char *s);
-extern int re_exec(char *s);
-#endif
+/* Extended regular expression matching and search library.
+ Copyright (C) 1985, 1989-90 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 1, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+
+/* To test, compile with -Dtest. This Dtestable feature turns this into
+ a self-contained program which reads a pattern, describes how it
+ compiles, then reads a string and searches for it.
+
+ On the other hand, if you compile with both -Dtest and -Dcanned you
+ can run some tests we've already thought of. */
-/* To test, compile with -Dtest.
- This Dtestable feature turns this into a self-contained program
- which reads a pattern, describes how it compiles,
- then reads a string and searches for it. */
#ifdef emacs
/* The `emacs' switch turns on certain special matching commands
- that make sense only in emacs. */
+ that make sense only in emacs. */
-#include "config.h"
#include "lisp.h"
#include "buffer.h"
#include "syntax.h"
-#else /* not emacs */
+/* We write fatal error messages on standard error. */
+#include <stdio.h>
-#ifdef BCOPY_MISSING
-#define bcopy(s,d,n) memcpy((d),(s),(n))
-#define bcmp(s1,s2,n) memcmp((s1),(s2),(n))
-#define bzero(s,n) memset((s),0,(n))
-#else
-void bcopy();
-int bcmp();
-void bzero();
-#endif
+/* isalpha(3) etc. are used for the character classes. */
+#include <ctype.h>
+
+#else /* not emacs */
+
+#include "awk.h"
+#define NO_ALLOCA /* try it out for now */
+#ifndef NO_ALLOCA
/* Make alloca work the best possible way. */
#ifdef __GNUC__
+#ifndef atarist
+#ifndef alloca
#define alloca __builtin_alloca
+#endif
+#endif /* atarist */
#else
#ifdef sparc
#include <alloca.h>
+#else
+char *alloca ();
#endif
-#endif
-
-/*
- * Define the syntax stuff, so we can do the \<...\> things.
- */
-
-#ifndef Sword /* must be non-zero in some of the tests below... */
+#endif /* __GNUC__ */
+
+#define FREE_AND_RETURN_VOID(stackb) return
+#define FREE_AND_RETURN(stackb,val) return(val)
+#define DOUBLE_STACK(stackx,stackb,len) \
+ (stackx = (unsigned char **) alloca (2 * len \
+ * sizeof (unsigned char *)),\
+ /* Only copy what is in use. */ \
+ (unsigned char **) memcpy (stackx, stackb, len * sizeof (char *)))
+#else /* NO_ALLOCA defined */
+#define FREE_AND_RETURN_VOID(stackb) free(stackb);return
+#define FREE_AND_RETURN(stackb,val) free(stackb);return(val)
+#define DOUBLE_STACK(stackx,stackb,len) \
+ (unsigned char **) realloc (stackb, 2 * len * sizeof (unsigned char *))
+#endif /* NO_ALLOCA */
+
+static void store_jump P((char *, int, char *));
+static void insert_jump P((int, char *, char *, char *));
+static void store_jump_n P((char *, int, char *, unsigned));
+static void insert_jump_n P((int, char *, char *, char *, unsigned));
+static void insert_op_2 P((int, char *, char *, int, int ));
+static int memcmp_translate P((unsigned char *, unsigned char *,
+ int, unsigned char *));
+
+
+/* Define the syntax stuff, so we can do the \<, \>, etc. */
+
+/* This must be nonzero for the wordchar and notwordchar pattern
+ commands in re_match_2. */
+#ifndef Sword
#define Sword 1
#endif
#define SYNTAX(c) re_syntax_table[c]
+
#ifdef SYNTAX_TABLE
char *re_syntax_table;
-#else
+#else /* not SYNTAX_TABLE */
static char re_syntax_table[256];
+static void init_syntax_once P((void));
+
static void
init_syntax_once ()
@@ -182,7 +113,7 @@ init_syntax_once ()
if (done)
return;
- bzero (re_syntax_table, sizeof re_syntax_table);
+ memset (re_syntax_table, 0, sizeof re_syntax_table);
for (c = 'a'; c <= 'z'; c++)
re_syntax_table[c] = Sword;
@@ -192,42 +123,167 @@ init_syntax_once ()
for (c = '0'; c <= '9'; c++)
re_syntax_table[c] = Sword;
+
+ /* Add specific syntax for ISO Latin-1. */
+ for (c = 0300; c <= 0377; c++)
+ re_syntax_table[c] = Sword;
+ re_syntax_table[0327] = 0;
+ re_syntax_table[0367] = 0;
done = 1;
}
#endif /* SYNTAX_TABLE */
-#endif /* not emacs */
+#undef P
+#endif /* emacs */
+
+/* Sequents are missing isgraph. */
+#ifndef isgraph
+#define isgraph(c) (isprint((c)) && !isspace((c)))
+#endif
+
+/* Get the interface, including the syntax bits. */
#include "regex.h"
+
+/* These are the command codes that appear in compiled regular
+ expressions, one per byte. Some command codes are followed by
+ argument bytes. A command code can specify any interpretation
+ whatsoever for its arguments. Zero-bytes may appear in the compiled
+ regular expression.
+
+ The value of `exactn' is needed in search.c (search_buffer) in emacs.
+ So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
+ `exactn' we use here must also be 1. */
+
+enum regexpcode
+ {
+ unused=0,
+ exactn=1, /* Followed by one byte giving n, then by n literal bytes. */
+ begline, /* Fail unless at beginning of line. */
+ endline, /* Fail unless at end of line. */
+ jump, /* Followed by two bytes giving relative address to jump to. */
+ on_failure_jump, /* Followed by two bytes giving relative address of
+ place to resume at in case of failure. */
+ finalize_jump, /* Throw away latest failure point and then jump to
+ address. */
+ maybe_finalize_jump, /* Like jump but finalize if safe to do so.
+ This is used to jump back to the beginning
+ of a repeat. If the command that follows
+ this jump is clearly incompatible with the
+ one at the beginning of the repeat, such that
+ we can be sure that there is no use backtracking
+ out of repetitions already completed,
+ then we finalize. */
+ dummy_failure_jump, /* Jump, and push a dummy failure point. This
+ failure point will be thrown away if an attempt
+ is made to use it for a failure. A + construct
+ makes this before the first repeat. Also
+ use it as an intermediary kind of jump when
+ compiling an or construct. */
+ succeed_n, /* Used like on_failure_jump except has to succeed n times;
+ then gets turned into an on_failure_jump. The relative
+ address following it is useless until then. The
+ address is followed by two bytes containing n. */
+ jump_n, /* Similar to jump, but jump n times only; also the relative
+ address following is in turn followed by yet two more bytes
+ containing n. */
+ set_number_at, /* Set the following relative location to the
+ subsequent number. */
+ anychar, /* Matches any (more or less) one character. */
+ charset, /* Matches any one char belonging to specified set.
+ First following byte is number of bitmap bytes.
+ Then come bytes for a bitmap saying which chars are in.
+ Bits in each byte are ordered low-bit-first.
+ A character is in the set if its bit is 1.
+ A character too large to have a bit in the map
+ is automatically not in the set. */
+ charset_not, /* Same parameters as charset, but match any character
+ that is not one of those specified. */
+ start_memory, /* Start remembering the text that is matched, for
+ storing in a memory register. Followed by one
+ byte containing the register number. Register numbers
+ must be in the range 0 through RE_NREGS. */
+ stop_memory, /* Stop remembering the text that is matched
+ and store it in a memory register. Followed by
+ one byte containing the register number. Register
+ numbers must be in the range 0 through RE_NREGS. */
+ duplicate, /* Match a duplicate of something remembered.
+ Followed by one byte containing the index of the memory
+ register. */
+ before_dot, /* Succeeds if before point. */
+ at_dot, /* Succeeds if at point. */
+ after_dot, /* Succeeds if after point. */
+ begbuf, /* Succeeds if at beginning of buffer. */
+ endbuf, /* Succeeds if at end of buffer. */
+ wordchar, /* Matches any word-constituent character. */
+ notwordchar, /* Matches any char that is not a word-constituent. */
+ wordbeg, /* Succeeds if at word beginning. */
+ wordend, /* Succeeds if at word end. */
+ wordbound, /* Succeeds if at a word boundary. */
+ notwordbound,/* Succeeds if not at a word boundary. */
+ syntaxspec, /* Matches any character whose syntax is specified.
+ followed by a byte which contains a syntax code,
+ e.g., Sword. */
+ notsyntaxspec /* Matches any character whose syntax differs from
+ that specified. */
+ };
+
+
/* Number of failure points to allocate space for initially,
- when matching. If this number is exceeded, more space is allocated,
- so it is not a hard limit. */
+ when matching. If this number is exceeded, more space is allocated,
+ so it is not a hard limit. */
#ifndef NFAILURES
#define NFAILURES 80
-#endif /* NFAILURES */
-
-/* width of a byte in bits */
-
-#define BYTEWIDTH 8
+#endif
+#ifdef CHAR_UNSIGNED
+#define SIGN_EXTEND_CHAR(c) ((c)>(char)127?(c)-256:(c)) /* for IBM RT */
+#endif
#ifndef SIGN_EXTEND_CHAR
#define SIGN_EXTEND_CHAR(x) (x)
#endif
-
-static int obscure_syntax = 0;
+
-/* Specify the precise syntax of regexp for compilation.
- This provides for compatibility for various utilities
- which historically have different, incompatible syntaxes.
-
- The argument SYNTAX is a bit-mask containing the two bits
- RE_NO_BK_PARENS and RE_NO_BK_VBAR. */
+/* Store NUMBER in two contiguous bytes starting at DESTINATION. */
+#define STORE_NUMBER(destination, number) \
+ { (destination)[0] = (number) & 0377; \
+ (destination)[1] = (number) >> 8; }
+
+/* Same as STORE_NUMBER, except increment the destination pointer to
+ the byte after where the number is stored. Watch out that values for
+ DESTINATION such as p + 1 won't work, whereas p will. */
+#define STORE_NUMBER_AND_INCR(destination, number) \
+ { STORE_NUMBER(destination, number); \
+ (destination) += 2; }
+
+
+/* Put into DESTINATION a number stored in two contingous bytes starting
+ at SOURCE. */
+#define EXTRACT_NUMBER(destination, source) \
+ { (destination) = *(source) & 0377; \
+ (destination) += SIGN_EXTEND_CHAR (*(char *)((source) + 1)) << 8; }
+
+/* Same as EXTRACT_NUMBER, except increment the pointer for source to
+ point to second byte of SOURCE. Note that SOURCE has to be a value
+ such as p, not, e.g., p + 1. */
+#define EXTRACT_NUMBER_AND_INCR(destination, source) \
+ { EXTRACT_NUMBER (destination, source); \
+ (source) += 2; }
+
+
+/* Specify the precise syntax of regexps for compilation. This provides
+ for compatibility for various utilities which historically have
+ different, incompatible syntaxes.
+
+ The argument SYNTAX is a bit-mask comprised of the various bits
+ defined in regex.h. */
int
re_set_syntax (syntax)
+ int syntax;
{
int ret;
@@ -235,91 +291,115 @@ re_set_syntax (syntax)
obscure_syntax = syntax;
return ret;
}
-
-/* re_compile_pattern takes a regular-expression string
- and converts it into a buffer full of byte commands for matching.
- PATTERN is the address of the pattern string
- SIZE is the length of it.
- BUFP is a struct re_pattern_buffer * which points to the info
- on where to store the byte commands.
- This structure contains a char * which points to the
- actual space, which should have been obtained with malloc.
- re_compile_pattern may use realloc to grow the buffer space.
+/* Set by re_set_syntax to the current regexp syntax to recognize. */
+int obscure_syntax = 0;
- The number of bytes of commands can be found out by looking in
- the struct re_pattern_buffer that bufp pointed to,
- after re_compile_pattern returns.
-*/
-#define PATPUSH(ch) (*b++ = (char) (ch))
+
+/* Macros for re_compile_pattern, which is found below these definitions. */
-#define PATFETCH(c) \
- {if (p == pend) goto end_of_pattern; \
- c = * (unsigned char *) p++; \
+#define CHAR_CLASS_MAX_LENGTH 6
+
+/* Fetch the next character in the uncompiled pattern, translating it if
+ necessary. */
+#define PATFETCH(c) \
+ {if (p == pend) goto end_of_pattern; \
+ c = * (unsigned char *) p++; \
if (translate) c = translate[c]; }
-#define PATFETCH_RAW(c) \
- {if (p == pend) goto end_of_pattern; \
+/* Fetch the next character in the uncompiled pattern, with no
+ translation. */
+#define PATFETCH_RAW(c) \
+ {if (p == pend) goto end_of_pattern; \
c = * (unsigned char *) p++; }
#define PATUNFETCH p--
-#ifdef MSDOS
-#define MaxAllocation (1<<14)
-#else
-#define MaxAllocation (1<<16)
-#endif
-#define EXTEND_BUFFER \
- { char *old_buffer = bufp->buffer; \
- if (bufp->allocated == MaxAllocation) goto too_big; \
- bufp->allocated *= 2; \
- if (bufp->allocated > MaxAllocation) bufp->allocated = MaxAllocation; \
- if (!(bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated))) \
- goto memory_exhausted; \
- c = bufp->buffer - old_buffer; \
- b += c; \
- if (fixup_jump) \
- fixup_jump += c; \
- if (laststart) \
- laststart += c; \
- begalt += c; \
- if (pending_exact) \
- pending_exact += c; \
+
+/* If the buffer isn't allocated when it comes in, use this. */
+#define INIT_BUF_SIZE 28
+
+/* Make sure we have at least N more bytes of space in buffer. */
+#define GET_BUFFER_SPACE(n) \
+ { \
+ while (b - bufp->buffer + (n) >= bufp->allocated) \
+ EXTEND_BUFFER; \
}
-#ifdef NEVER
-#define EXTEND_BUFFER \
- { unsigned b_off = b - bufp->buffer, \
- f_off, l_off, p_off, \
- beg_off = begalt - bufp->buffer; \
- if (fixup_jump) \
- f_off = fixup_jump - bufp->buffer; \
- if (laststart) \
- l_off = laststart - bufp->buffer; \
- if (pending_exact) \
- p_off = pending_exact - bufp->buffer; \
- if (bufp->allocated == MaxAllocation) goto too_big; \
- bufp->allocated *= 2; \
- if (bufp->allocated > MaxAllocation) bufp->allocated = MaxAllocation; \
- if (!(bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated))) \
- goto memory_exhausted; \
- b = bufp->buffer + b_off; \
- if (fixup_jump) \
- fixup_jump = bufp->buffer + f_off; \
- if (laststart) \
- laststart = bufp->buffer + l_off; \
- begalt = bufp->buffer + beg_off; \
- if (pending_exact) \
- pending_exact = bufp->buffer + p_off; \
+/* Make sure we have one more byte of buffer space and then add CH to it. */
+#define BUFPUSH(ch) \
+ { \
+ GET_BUFFER_SPACE (1); \
+ *b++ = (char) (ch); \
}
-#endif
-static int store_jump (), insert_jump ();
+
+/* Extend the buffer by twice its current size via reallociation and
+ reset the pointers that pointed into the old allocation to point to
+ the correct places in the new allocation. If extending the buffer
+ results in it being larger than 1 << 16, then flag memory exhausted. */
+#define EXTEND_BUFFER \
+ { char *old_buffer = bufp->buffer; \
+ if (bufp->allocated == (1L<<16)) goto too_big; \
+ bufp->allocated *= 2; \
+ if (bufp->allocated > (1L<<16)) bufp->allocated = (1L<<16); \
+ bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated); \
+ if (bufp->buffer == 0) \
+ goto memory_exhausted; \
+ b = (b - old_buffer) + bufp->buffer; \
+ if (fixup_jump) \
+ fixup_jump = (fixup_jump - old_buffer) + bufp->buffer; \
+ if (laststart) \
+ laststart = (laststart - old_buffer) + bufp->buffer; \
+ begalt = (begalt - old_buffer) + bufp->buffer; \
+ if (pending_exact) \
+ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
+ }
+
+/* Set the bit for character C in a character set list. */
+#define SET_LIST_BIT(c) (b[(c) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
+
+/* Get the next unsigned number in the uncompiled pattern. */
+#define GET_UNSIGNED_NUMBER(num) \
+ { if (p != pend) \
+ { \
+ PATFETCH (c); \
+ while (isdigit (c)) \
+ { \
+ if (num < 0) \
+ num = 0; \
+ num = num * 10 + c - '0'; \
+ if (p == pend) \
+ break; \
+ PATFETCH (c); \
+ } \
+ } \
+ }
+
+/* Subroutines for re_compile_pattern. */
+static void store_jump (), insert_jump (), store_jump_n (),
+ insert_jump_n (), insert_op_2 ();
+
+
+/* re_compile_pattern takes a regular-expression string
+ and converts it into a buffer full of byte commands for matching.
+
+ PATTERN is the address of the pattern string
+ SIZE is the length of it.
+ BUFP is a struct re_pattern_buffer * which points to the info
+ on where to store the byte commands.
+ This structure contains a char * which points to the
+ actual space, which should have been obtained with malloc.
+ re_compile_pattern may use realloc to grow the buffer space.
+
+ The number of bytes of commands can be found out by looking in
+ the `struct re_pattern_buffer' that bufp pointed to, after
+ re_compile_pattern returns. */
char *
re_compile_pattern (pattern, size, bufp)
char *pattern;
- int size;
+ size_t size;
struct re_pattern_buffer *bufp;
{
register char *b = bufp->buffer;
@@ -329,36 +409,46 @@ re_compile_pattern (pattern, size, bufp)
char *p1;
unsigned char *translate = (unsigned char *) bufp->translate;
- /* address of the count-byte of the most recently inserted "exactn" command.
- This makes it possible to tell whether a new exact-match character
- can be added to that command or requires a new "exactn" command. */
+ /* Address of the count-byte of the most recently inserted `exactn'
+ command. This makes it possible to tell whether a new exact-match
+ character can be added to that command or requires a new `exactn'
+ command. */
char *pending_exact = 0;
- /* address of the place where a forward-jump should go
- to the end of the containing expression.
- Each alternative of an "or", except the last, ends with a forward-jump
- of this sort. */
+ /* Address of the place where a forward-jump should go to the end of
+ the containing expression. Each alternative of an `or', except the
+ last, ends with a forward-jump of this sort. */
char *fixup_jump = 0;
- /* address of start of the most recently finished expression.
- This tells postfix * where to find the start of its operand. */
+ /* Address of start of the most recently finished expression.
+ This tells postfix * where to find the start of its operand. */
char *laststart = 0;
- /* In processing a repeat, 1 means zero matches is allowed */
+ /* In processing a repeat, 1 means zero matches is allowed. */
char zero_times_ok;
- /* In processing a repeat, 1 means many matches is allowed */
+ /* In processing a repeat, 1 means many matches is allowed. */
char many_times_ok;
- /* address of beginning of regexp, or inside of last \( */
+ /* Address of beginning of regexp, or inside of last \(. */
char *begalt = b;
+ /* In processing an interval, at least this many matches must be made. */
+ int lower_bound;
+
+ /* In processing an interval, at most this many matches can be made. */
+ int upper_bound;
+
+ /* Place in pattern (i.e., the {) to which to go back if the interval
+ is invalid. */
+ char *beg_interval = 0;
+
/* Stack of information saved by \( and restored by \).
Four stack elements are pushed by each \(:
First, the value of b.
@@ -372,7 +462,8 @@ re_compile_pattern (pattern, size, bufp)
int *stackt;
/* Counts \('s as they are encountered. Remembered for the matching \),
- where it becomes the "register number" to put in the stop_memory command */
+ where it becomes the register number to put in the stop_memory
+ command. */
int regnum = 1;
@@ -380,94 +471,117 @@ re_compile_pattern (pattern, size, bufp)
#ifndef emacs
#ifndef SYNTAX_TABLE
- /*
- * Initialize the syntax table.
- */
+ /* Initialize the syntax table. */
init_syntax_once();
#endif
#endif
if (bufp->allocated == 0)
{
- bufp->allocated = 28;
+ bufp->allocated = INIT_BUF_SIZE;
if (bufp->buffer)
- /* EXTEND_BUFFER loses when bufp->allocated is 0 */
- bufp->buffer = (char *) realloc (bufp->buffer, 28);
+ /* EXTEND_BUFFER loses when bufp->allocated is 0. */
+ bufp->buffer = (char *) realloc (bufp->buffer, INIT_BUF_SIZE);
else
- /* Caller did not allocate a buffer. Do it for him */
- bufp->buffer = (char *) malloc (28);
+ /* Caller did not allocate a buffer. Do it for them. */
+ bufp->buffer = (char *) malloc (INIT_BUF_SIZE);
if (!bufp->buffer) goto memory_exhausted;
begalt = b = bufp->buffer;
}
while (p != pend)
{
- if (b - bufp->buffer > bufp->allocated - 10)
- /* Note that EXTEND_BUFFER clobbers c */
- EXTEND_BUFFER;
-
PATFETCH (c);
switch (c)
{
case '$':
- if (obscure_syntax & RE_TIGHT_VBAR)
- {
- if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p != pend)
- goto normal_char;
- /* Make operand of last vbar end before this `$'. */
- if (fixup_jump)
- store_jump (fixup_jump, jump, b);
- fixup_jump = 0;
- PATPUSH (endline);
- break;
- }
-
- /* $ means succeed if at end of line, but only in special contexts.
- If randomly in the middle of a pattern, it is a normal character. */
- if (p == pend || *p == '\n'
- || (obscure_syntax & RE_CONTEXT_INDEP_OPS)
- || (obscure_syntax & RE_NO_BK_PARENS
- ? *p == ')'
- : *p == '\\' && p[1] == ')')
- || (obscure_syntax & RE_NO_BK_VBAR
- ? *p == '|'
- : *p == '\\' && p[1] == '|'))
- {
- PATPUSH (endline);
- break;
- }
- goto normal_char;
-
+ {
+ char *p1 = p;
+ /* When testing what follows the $,
+ look past the \-constructs that don't consume anything. */
+ if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
+ while (p1 != pend)
+ {
+ if (*p1 == '\\' && p1 + 1 != pend
+ && (p1[1] == '<' || p1[1] == '>'
+ || p1[1] == '`' || p1[1] == '\''
+#ifdef emacs
+ || p1[1] == '='
+#endif
+ || p1[1] == 'b' || p1[1] == 'B'))
+ p1 += 2;
+ else
+ break;
+ }
+ if (obscure_syntax & RE_TIGHT_VBAR)
+ {
+ if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p1 != pend)
+ goto normal_char;
+ /* Make operand of last vbar end before this `$'. */
+ if (fixup_jump)
+ store_jump (fixup_jump, jump, b);
+ fixup_jump = 0;
+ BUFPUSH (endline);
+ break;
+ }
+ /* $ means succeed if at end of line, but only in special contexts.
+ If validly in the middle of a pattern, it is a normal character. */
+
+ if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) && p1 != pend)
+ goto invalid_pattern;
+ if (p1 == pend || *p1 == '\n'
+ || (obscure_syntax & RE_CONTEXT_INDEP_OPS)
+ || (obscure_syntax & RE_NO_BK_PARENS
+ ? *p1 == ')'
+ : *p1 == '\\' && p1[1] == ')')
+ || (obscure_syntax & RE_NO_BK_VBAR
+ ? *p1 == '|'
+ : *p1 == '\\' && p1[1] == '|'))
+ {
+ BUFPUSH (endline);
+ break;
+ }
+ goto normal_char;
+ }
case '^':
- /* ^ means succeed if at beg of line, but only if no preceding pattern. */
-
- if (laststart && p[-2] != '\n'
- && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
+ /* ^ means succeed if at beg of line, but only if no preceding
+ pattern. */
+
+ if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) && laststart)
+ goto invalid_pattern;
+ if (laststart && p - 2 >= pattern && p[-2] != '\n'
+ && !(obscure_syntax & RE_CONTEXT_INDEP_OPS))
goto normal_char;
if (obscure_syntax & RE_TIGHT_VBAR)
{
if (p != pattern + 1
&& ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
goto normal_char;
- PATPUSH (begline);
+ BUFPUSH (begline);
begalt = b;
}
else
- PATPUSH (begline);
+ BUFPUSH (begline);
break;
case '+':
case '?':
- if (obscure_syntax & RE_BK_PLUS_QM)
+ if ((obscure_syntax & RE_BK_PLUS_QM)
+ || (obscure_syntax & RE_LIMITED_OPS))
goto normal_char;
handle_plus:
case '*':
/* If there is no previous pattern, char not special. */
- if (!laststart && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
- goto normal_char;
+ if (!laststart)
+ {
+ if (obscure_syntax & RE_CONTEXTUAL_INVALID_OPS)
+ goto invalid_pattern;
+ else if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
+ goto normal_char;
+ }
/* If there is a sequence of repetition chars,
- collapse it down to equivalent to just one. */
+ collapse it down to just one. */
zero_times_ok = 0;
many_times_ok = 0;
while (1)
@@ -504,83 +618,201 @@ re_compile_pattern (pattern, size, bufp)
/* Star, etc. applied to an empty pattern is equivalent
to an empty pattern. */
- if (!laststart)
+ if (!laststart)
break;
- /* Now we know whether 0 matches is allowed,
- and whether 2 or more matches is allowed. */
+ /* Now we know whether or not zero matches is allowed
+ and also whether or not two or more matches is allowed. */
if (many_times_ok)
{
- /* If more than one repetition is allowed,
- put in a backward jump at the end. */
+ /* If more than one repetition is allowed, put in at the
+ end a backward relative jump from b to before the next
+ jump we're going to put in below (which jumps from
+ laststart to after this jump). */
+ GET_BUFFER_SPACE (3);
store_jump (b, maybe_finalize_jump, laststart - 3);
- b += 3;
+ b += 3; /* Because store_jump put stuff here. */
}
+ /* On failure, jump from laststart to b + 3, which will be the
+ end of the buffer after this jump is inserted. */
+ GET_BUFFER_SPACE (3);
insert_jump (on_failure_jump, laststart, b + 3, b);
pending_exact = 0;
b += 3;
if (!zero_times_ok)
{
- /* At least one repetition required: insert before the loop
- a skip over the initial on-failure-jump instruction */
- insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
+ /* At least one repetition is required, so insert a
+ dummy-failure before the initial on-failure-jump
+ instruction of the loop. This effects a skip over that
+ instruction the first time we hit that loop. */
+ GET_BUFFER_SPACE (6);
+ insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
b += 3;
}
break;
case '.':
laststart = b;
- PATPUSH (anychar);
+ BUFPUSH (anychar);
break;
- case '[':
+ case '[':
+ if (p == pend)
+ goto invalid_pattern;
while (b - bufp->buffer
> bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
- /* Note that EXTEND_BUFFER clobbers c */
EXTEND_BUFFER;
laststart = b;
if (*p == '^')
- PATPUSH (charset_not), p++;
+ {
+ BUFPUSH (charset_not);
+ p++;
+ }
else
- PATPUSH (charset);
+ BUFPUSH (charset);
p1 = p;
- PATPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
+ BUFPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
/* Clear the whole map */
- bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
- /* Read in characters and ranges, setting map bits */
+ memset (b, 0, (1 << BYTEWIDTH) / BYTEWIDTH);
+
+ if ((obscure_syntax & RE_HAT_NOT_NEWLINE) && b[-2] == charset_not)
+ SET_LIST_BIT ('\n');
+
+
+ /* Read in characters and ranges, setting map bits. */
while (1)
{
- PATFETCH (c);
+ /* Don't translate while fetching, in case it's a range bound.
+ When we set the bit for the character, we translate it. */
+ PATFETCH_RAW (c);
- /* If awk, \ escapes characters inside [...]. */
+ /* If set, \ escapes characters when inside [...]. */
if ((obscure_syntax & RE_AWK_CLASS_HACK) && c == '\\')
{
PATFETCH(c1);
- b[c1 / BYTEWIDTH] |= 1 << (c1 % BYTEWIDTH);
+ SET_LIST_BIT (c1);
continue;
}
-
- if (c == ']' && p != p1 + 1) break;
- if (*p == '-' && p[1] != ']')
+ if (c == ']')
+ {
+ if (p == p1 + 1)
+ {
+ /* If this is an empty bracket expression. */
+ if ((obscure_syntax & RE_NO_EMPTY_BRACKETS)
+ && p == pend)
+ goto invalid_pattern;
+ }
+ else
+ /* Stop if this isn't merely a ] inside a bracket
+ expression, but rather the end of a bracket
+ expression. */
+ break;
+ }
+ /* Get a range. */
+ if (p[0] == '-' && p[1] != ']')
{
- PATFETCH (c1);
- PATFETCH (c1);
- while (c <= c1)
- b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH), c++;
- }
+ PATFETCH (c1);
+ /* Don't translate the range bounds while fetching them. */
+ PATFETCH_RAW (c1);
+
+ if ((obscure_syntax & RE_NO_EMPTY_RANGES) && c > c1)
+ goto invalid_pattern;
+
+ if ((obscure_syntax & RE_NO_HYPHEN_RANGE_END)
+ && c1 == '-' && *p != ']')
+ goto invalid_pattern;
+
+ while (c <= c1)
+ {
+ /* Translate each char that's in the range. */
+ if (translate)
+ SET_LIST_BIT (translate[c]);
+ else
+ SET_LIST_BIT (c);
+ c++;
+ }
+ }
+ else if ((obscure_syntax & RE_CHAR_CLASSES)
+ && c == '[' && p[0] == ':')
+ {
+ /* Longest valid character class word has six characters. */
+ char str[CHAR_CLASS_MAX_LENGTH];
+ PATFETCH (c);
+ c1 = 0;
+ /* If no ] at end. */
+ if (p == pend)
+ goto invalid_pattern;
+ while (1)
+ {
+ /* Don't translate the ``character class'' characters. */
+ PATFETCH_RAW (c);
+ if (c == ':' || c == ']' || p == pend
+ || c1 == CHAR_CLASS_MAX_LENGTH)
+ break;
+ str[c1++] = c;
+ }
+ str[c1] = '\0';
+ if (p == pend
+ || c == ']' /* End of the bracket expression. */
+ || p[0] != ']'
+ || p + 1 == pend
+ || (strcmp (str, "alpha") != 0
+ && strcmp (str, "upper") != 0
+ && strcmp (str, "lower") != 0
+ && strcmp (str, "digit") != 0
+ && strcmp (str, "alnum") != 0
+ && strcmp (str, "xdigit") != 0
+ && strcmp (str, "space") != 0
+ && strcmp (str, "print") != 0
+ && strcmp (str, "punct") != 0
+ && strcmp (str, "graph") != 0
+ && strcmp (str, "cntrl") != 0))
+ {
+ /* Undo the ending character, the letters, and leave
+ the leading : and [ (but set bits for them). */
+ c1++;
+ while (c1--)
+ PATUNFETCH;
+ SET_LIST_BIT ('[');
+ SET_LIST_BIT (':');
+ }
+ else
+ {
+ /* The ] at the end of the character class. */
+ PATFETCH (c);
+ if (c != ']')
+ goto invalid_pattern;
+ for (c = 0; c < (1 << BYTEWIDTH); c++)
+ {
+ if ((strcmp (str, "alpha") == 0 && isalpha (c))
+ || (strcmp (str, "upper") == 0 && isupper (c))
+ || (strcmp (str, "lower") == 0 && islower (c))
+ || (strcmp (str, "digit") == 0 && isdigit (c))
+ || (strcmp (str, "alnum") == 0 && isalnum (c))
+ || (strcmp (str, "xdigit") == 0 && isxdigit (c))
+ || (strcmp (str, "space") == 0 && isspace (c))
+ || (strcmp (str, "print") == 0 && isprint (c))
+ || (strcmp (str, "punct") == 0 && ispunct (c))
+ || (strcmp (str, "graph") == 0 && isgraph (c))
+ || (strcmp (str, "cntrl") == 0 && iscntrl (c)))
+ SET_LIST_BIT (c);
+ }
+ }
+ }
+ else if (translate)
+ SET_LIST_BIT (translate[c]);
else
- {
- b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH);
- }
+ SET_LIST_BIT (c);
}
- /* Discard any bitmap bytes that are all 0 at the end of the map.
- Decrement the map-length byte too. */
- while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
- b[-1]--;
- b += b[-1];
- break;
+
+ /* Discard any character set/class bitmap bytes that are all
+ 0 at the end of the map. Decrement the map-length byte too. */
+ while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
+ b[-1]--;
+ b += b[-1];
+ break;
case '(':
if (! (obscure_syntax & RE_NO_BK_PARENS))
@@ -594,18 +826,28 @@ re_compile_pattern (pattern, size, bufp)
else
goto handle_close;
- case '\n':
+ case '\n':
if (! (obscure_syntax & RE_NEWLINE_OR))
goto normal_char;
else
goto handle_bar;
case '|':
- if (! (obscure_syntax & RE_NO_BK_VBAR))
+ if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS)
+ && (! laststart || p == pend))
+ goto invalid_pattern;
+ else if (! (obscure_syntax & RE_NO_BK_VBAR))
goto normal_char;
else
goto handle_bar;
+ case '{':
+ if (! ((obscure_syntax & RE_NO_BK_CURLY_BRACES)
+ && (obscure_syntax & RE_INTERVALS)))
+ goto normal_char;
+ else
+ goto handle_interval;
+
case '\\':
if (p == pend) goto invalid_pattern;
PATFETCH_RAW (c);
@@ -616,12 +858,15 @@ re_compile_pattern (pattern, size, bufp)
goto normal_backsl;
handle_open:
if (stackp == stacke) goto nesting_too_deep;
+
+ /* Laststart should point to the start_memory that we are about
+ to push (unless the pattern has RE_NREGS or more ('s). */
+ *stackp++ = b - bufp->buffer;
if (regnum < RE_NREGS)
{
- PATPUSH (start_memory);
- PATPUSH (regnum);
+ BUFPUSH (start_memory);
+ BUFPUSH (regnum);
}
- *stackp++ = b - bufp->buffer;
*stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
*stackp++ = regnum++;
*stackp++ = begalt - bufp->buffer;
@@ -640,83 +885,242 @@ re_compile_pattern (pattern, size, bufp)
store_jump (fixup_jump, jump, b);
if (stackp[-1] < RE_NREGS)
{
- PATPUSH (stop_memory);
- PATPUSH (stackp[-1]);
+ BUFPUSH (stop_memory);
+ BUFPUSH (stackp[-1]);
}
stackp -= 2;
- fixup_jump = 0;
- if (*stackp)
- fixup_jump = *stackp + bufp->buffer - 1;
- laststart = *--stackp + bufp->buffer;
+ fixup_jump = *stackp ? *stackp + bufp->buffer - 1 : 0;
+ laststart = *--stackp + bufp->buffer;
break;
case '|':
- if (obscure_syntax & RE_NO_BK_VBAR)
+ if ((obscure_syntax & RE_LIMITED_OPS)
+ || (obscure_syntax & RE_NO_BK_VBAR))
goto normal_backsl;
handle_bar:
- insert_jump (on_failure_jump, begalt, b + 6, b);
+ if (obscure_syntax & RE_LIMITED_OPS)
+ goto normal_char;
+ /* Insert before the previous alternative a jump which
+ jumps to this alternative if the former fails. */
+ GET_BUFFER_SPACE (6);
+ insert_jump (on_failure_jump, begalt, b + 6, b);
pending_exact = 0;
b += 3;
- if (fixup_jump)
+ /* The alternative before the previous alternative has a
+ jump after it which gets executed if it gets matched.
+ Adjust that jump so it will jump to the previous
+ alternative's analogous jump (put in below, which in
+ turn will jump to the next (if any) alternative's such
+ jump, etc.). The last such jump jumps to the correct
+ final destination. */
+ if (fixup_jump)
store_jump (fixup_jump, jump, b);
- fixup_jump = b;
- b += 3;
- laststart = 0;
+
+ /* Leave space for a jump after previous alternative---to be
+ filled in later. */
+ fixup_jump = b;
+ b += 3;
+
+ laststart = 0;
begalt = b;
break;
+ case '{':
+ if (! (obscure_syntax & RE_INTERVALS)
+ /* Let \{ be a literal. */
+ || ((obscure_syntax & RE_INTERVALS)
+ && (obscure_syntax & RE_NO_BK_CURLY_BRACES))
+ /* If it's the string "\{". */
+ || (p - 2 == pattern && p == pend))
+ goto normal_backsl;
+ handle_interval:
+ beg_interval = p - 1; /* The {. */
+ /* If there is no previous pattern, this isn't an interval. */
+ if (!laststart)
+ {
+ if (obscure_syntax & RE_CONTEXTUAL_INVALID_OPS)
+ goto invalid_pattern;
+ else
+ goto normal_backsl;
+ }
+ /* It also isn't an interval if not preceded by an re
+ matching a single character or subexpression, or if
+ the current type of intervals can't handle back
+ references and the previous thing is a back reference. */
+ if (! (*laststart == anychar
+ || *laststart == charset
+ || *laststart == charset_not
+ || *laststart == start_memory
+ || (*laststart == exactn && laststart[1] == 1)
+ || (! (obscure_syntax & RE_NO_BK_REFS)
+ && *laststart == duplicate)))
+ {
+ if (obscure_syntax & RE_NO_BK_CURLY_BRACES)
+ goto normal_char;
+
+ /* Posix extended syntax is handled in previous
+ statement; this is for Posix basic syntax. */
+ if (obscure_syntax & RE_INTERVALS)
+ goto invalid_pattern;
+
+ goto normal_backsl;
+ }
+ lower_bound = -1; /* So can see if are set. */
+ upper_bound = -1;
+ GET_UNSIGNED_NUMBER (lower_bound);
+ if (c == ',')
+ {
+ GET_UNSIGNED_NUMBER (upper_bound);
+ if (upper_bound < 0)
+ upper_bound = RE_DUP_MAX;
+ }
+ if (upper_bound < 0)
+ upper_bound = lower_bound;
+ if (! (obscure_syntax & RE_NO_BK_CURLY_BRACES))
+ {
+ if (c != '\\')
+ goto invalid_pattern;
+ PATFETCH (c);
+ }
+ if (c != '}' || lower_bound < 0 || upper_bound > RE_DUP_MAX
+ || lower_bound > upper_bound
+ || ((obscure_syntax & RE_NO_BK_CURLY_BRACES)
+ && p != pend && *p == '{'))
+ {
+ if (obscure_syntax & RE_NO_BK_CURLY_BRACES)
+ goto unfetch_interval;
+ else
+ goto invalid_pattern;
+ }
+
+ /* If upper_bound is zero, don't want to succeed at all;
+ jump from laststart to b + 3, which will be the end of
+ the buffer after this jump is inserted. */
+
+ if (upper_bound == 0)
+ {
+ GET_BUFFER_SPACE (3);
+ insert_jump (jump, laststart, b + 3, b);
+ b += 3;
+ }
+
+ /* Otherwise, after lower_bound number of succeeds, jump
+ to after the jump_n which will be inserted at the end
+ of the buffer, and insert that jump_n. */
+ else
+ { /* Set to 5 if only one repetition is allowed and
+ hence no jump_n is inserted at the current end of
+ the buffer; then only space for the succeed_n is
+ needed. Otherwise, need space for both the
+ succeed_n and the jump_n. */
+
+ unsigned slots_needed = upper_bound == 1 ? 5 : 10;
+
+ GET_BUFFER_SPACE (slots_needed);
+ /* Initialize the succeed_n to n, even though it will
+ be set by its attendant set_number_at, because
+ re_compile_fastmap will need to know it. Jump to
+ what the end of buffer will be after inserting
+ this succeed_n and possibly appending a jump_n. */
+ insert_jump_n (succeed_n, laststart, b + slots_needed,
+ b, lower_bound);
+ b += 5; /* Just increment for the succeed_n here. */
+
+ /* More than one repetition is allowed, so put in at
+ the end of the buffer a backward jump from b to the
+ succeed_n we put in above. By the time we've gotten
+ to this jump when matching, we'll have matched once
+ already, so jump back only upper_bound - 1 times. */
+
+ if (upper_bound > 1)
+ {
+ store_jump_n (b, jump_n, laststart, upper_bound - 1);
+ b += 5;
+ /* When hit this when matching, reset the
+ preceding jump_n's n to upper_bound - 1. */
+ BUFPUSH (set_number_at);
+ GET_BUFFER_SPACE (2);
+ STORE_NUMBER_AND_INCR (b, -5);
+ STORE_NUMBER_AND_INCR (b, upper_bound - 1);
+ }
+ /* When hit this when matching, set the succeed_n's n. */
+ GET_BUFFER_SPACE (5);
+ insert_op_2 (set_number_at, laststart, b, 5, lower_bound);
+ b += 5;
+ }
+ pending_exact = 0;
+ beg_interval = 0;
+ break;
+
+
+ unfetch_interval:
+ /* If an invalid interval, match the characters as literals. */
+ if (beg_interval)
+ p = beg_interval;
+ else
+ {
+ fprintf (stderr,
+ "regex: no interval beginning to which to backtrack.\n");
+ exit (1);
+ }
+
+ beg_interval = 0;
+ PATFETCH (c); /* normal_char expects char in `c'. */
+ goto normal_char;
+ break;
+
#ifdef emacs
case '=':
- PATPUSH (at_dot);
+ BUFPUSH (at_dot);
break;
case 's':
laststart = b;
- PATPUSH (syntaxspec);
+ BUFPUSH (syntaxspec);
PATFETCH (c);
- PATPUSH (syntax_spec_code[c]);
+ BUFPUSH (syntax_spec_code[c]);
break;
case 'S':
laststart = b;
- PATPUSH (notsyntaxspec);
+ BUFPUSH (notsyntaxspec);
PATFETCH (c);
- PATPUSH (syntax_spec_code[c]);
+ BUFPUSH (syntax_spec_code[c]);
break;
#endif /* emacs */
case 'w':
laststart = b;
- PATPUSH (wordchar);
+ BUFPUSH (wordchar);
break;
case 'W':
laststart = b;
- PATPUSH (notwordchar);
+ BUFPUSH (notwordchar);
break;
case '<':
- PATPUSH (wordbeg);
+ BUFPUSH (wordbeg);
break;
case '>':
- PATPUSH (wordend);
+ BUFPUSH (wordend);
break;
case 'b':
- PATPUSH (wordbound);
+ BUFPUSH (wordbound);
break;
case 'B':
- PATPUSH (notwordbound);
+ BUFPUSH (notwordbound);
break;
case '`':
- PATPUSH (begbuf);
+ BUFPUSH (begbuf);
break;
case '\'':
- PATPUSH (endbuf);
+ BUFPUSH (endbuf);
break;
case '1':
@@ -728,23 +1132,34 @@ re_compile_pattern (pattern, size, bufp)
case '7':
case '8':
case '9':
- c1 = c - '0';
+ if (obscure_syntax & RE_NO_BK_REFS)
+ goto normal_char;
+ c1 = c - '0';
if (c1 >= regnum)
- goto normal_char;
- for (stackt = stackp - 2; stackt > stackb; stackt -= 4)
+ {
+ if (obscure_syntax & RE_NO_EMPTY_BK_REF)
+ goto invalid_pattern;
+ else
+ goto normal_char;
+ }
+ /* Can't back reference to a subexpression if inside of it. */
+ for (stackt = stackp - 2; stackt > stackb; stackt -= 4)
if (*stackt == c1)
goto normal_char;
laststart = b;
- PATPUSH (duplicate);
- PATPUSH (c1);
+ BUFPUSH (duplicate);
+ BUFPUSH (c1);
break;
case '+':
case '?':
if (obscure_syntax & RE_BK_PLUS_QM)
goto handle_plus;
+ else
+ goto normal_backsl;
+ break;
- default:
+ default:
normal_backsl:
/* You might think it would be useful for \ to mean
not to translate; but if we don't translate it
@@ -755,19 +1170,23 @@ re_compile_pattern (pattern, size, bufp)
break;
default:
- normal_char:
+ normal_char: /* Expects the character in `c'. */
if (!pending_exact || pending_exact + *pending_exact + 1 != b
|| *pending_exact == 0177 || *p == '*' || *p == '^'
|| ((obscure_syntax & RE_BK_PLUS_QM)
? *p == '\\' && (p[1] == '+' || p[1] == '?')
- : (*p == '+' || *p == '?')))
+ : (*p == '+' || *p == '?'))
+ || ((obscure_syntax & RE_INTERVALS)
+ && ((obscure_syntax & RE_NO_BK_CURLY_BRACES)
+ ? *p == '{'
+ : (p[0] == '\\' && p[1] == '{'))))
{
laststart = b;
- PATPUSH (exactn);
+ BUFPUSH (exactn);
pending_exact = b;
- PATPUSH (0);
+ BUFPUSH (0);
}
- PATPUSH (c);
+ BUFPUSH (c);
(*pending_exact)++;
}
}
@@ -802,46 +1221,137 @@ re_compile_pattern (pattern, size, bufp)
return "Memory exhausted";
}
-/* Store where `from' points a jump operation to jump to where `to' points.
- `opcode' is the opcode to store. */
-static int
+/* Store a jump of the form <OPCODE> <relative address>.
+ Store in the location FROM a jump operation to jump to relative
+ address FROM - TO. OPCODE is the opcode to store. */
+
+static void
store_jump (from, opcode, to)
char *from, *to;
+#ifndef MSDOS
char opcode;
+#else
+ int opcode;
+#endif /* MSDOS */
{
from[0] = opcode;
- from[1] = (to - (from + 3)) & 0377;
- from[2] = (to - (from + 3)) >> 8;
+ STORE_NUMBER(from + 1, to - (from + 3));
}
-/* Open up space at char FROM, and insert there a jump to TO.
- CURRENT_END gives te end of the storage no in use,
- so we know how much data to copy up.
- OP is the opcode of the jump to insert.
+
+/* Open up space before char FROM, and insert there a jump to TO.
+ CURRENT_END gives the end of the storage not in use, so we know
+ how much data to copy up. OP is the opcode of the jump to insert.
If you call this function, you must zero out pending_exact. */
-static int
+static void
insert_jump (op, from, to, current_end)
+#ifndef MSDOS
char op;
+#else
+ int op;
+#endif /* MSDOS */
char *from, *to, *current_end;
{
- register char *pto = current_end + 3;
- register char *pfrom = current_end;
- while (pfrom != from)
+ register char *pfrom = current_end; /* Copy from here... */
+ register char *pto = current_end + 3; /* ...to here. */
+
+ while (pfrom != from)
*--pto = *--pfrom;
store_jump (from, op, to);
}
+
+
+/* Store a jump of the form <opcode> <relative address> <n> .
+
+ Store in the location FROM a jump operation to jump to relative
+ address FROM - TO. OPCODE is the opcode to store, N is a number the
+ jump uses, say, to decide how many times to jump.
+
+ If you call this function, you must zero out pending_exact. */
+
+static void
+store_jump_n (from, opcode, to, n)
+ char *from, *to;
+#ifndef MSDOS
+ char opcode;
+#else
+ int opcode;
+#endif /* MSDOS */
+ unsigned n;
+{
+ from[0] = opcode;
+ STORE_NUMBER (from + 1, to - (from + 3));
+ STORE_NUMBER (from + 3, n);
+}
+
+
+/* Similar to insert_jump, but handles a jump which needs an extra
+ number to handle minimum and maximum cases. Open up space at
+ location FROM, and insert there a jump to TO. CURRENT_END gives the
+ end of the storage in use, so we know how much data to copy up. OP is
+ the opcode of the jump to insert.
+
+ If you call this function, you must zero out pending_exact. */
+
+static void
+insert_jump_n (op, from, to, current_end, n)
+#ifndef MSDOS
+ char op;
+#else
+ int op;
+#endif /* MSDOS */
+ char *from, *to, *current_end;
+ unsigned n;
+{
+ register char *pfrom = current_end; /* Copy from here... */
+ register char *pto = current_end + 5; /* ...to here. */
+
+ while (pfrom != from)
+ *--pto = *--pfrom;
+ store_jump_n (from, op, to, n);
+}
+
+
+/* Open up space at location THERE, and insert operation OP followed by
+ NUM_1 and NUM_2. CURRENT_END gives the end of the storage in use, so
+ we know how much data to copy up.
+
+ If you call this function, you must zero out pending_exact. */
+
+static void
+insert_op_2 (op, there, current_end, num_1, num_2)
+#ifndef MSDOS
+ char op;
+#else
+ int op;
+#endif /* MSDOS */
+ char *there, *current_end;
+ int num_1, num_2;
+{
+ register char *pfrom = current_end; /* Copy from here... */
+ register char *pto = current_end + 5; /* ...to here. */
+
+ while (pfrom != there)
+ *--pto = *--pfrom;
+
+ there[0] = op;
+ STORE_NUMBER (there + 1, num_1);
+ STORE_NUMBER (there + 3, num_2);
+}
+
+
-/* Given a pattern, compute a fastmap from it.
- The fastmap records which of the (1 << BYTEWIDTH) possible characters
- can start a string that matches the pattern.
- This fastmap is used by re_search to skip quickly over totally implausible text.
+/* Given a pattern, compute a fastmap from it. The fastmap records
+ which of the (1 << BYTEWIDTH) possible characters can start a string
+ that matches the pattern. This fastmap is used by re_search to skip
+ quickly over totally implausible text.
- The caller must supply the address of a (1 << BYTEWIDTH)-byte data area
- as bufp->fastmap.
- The other components of bufp describe the pattern to be used. */
+ The caller must supply the address of a (1 << BYTEWIDTH)-byte data
+ area as bufp->fastmap.
+ The other components of bufp describe the pattern to be used. */
void
re_compile_fastmap (bufp)
@@ -854,16 +1364,26 @@ re_compile_fastmap (bufp)
register unsigned char *pend = pattern + size;
register int j, k;
unsigned char *translate = (unsigned char *) bufp->translate;
+ unsigned is_a_succeed_n;
+#ifndef NO_ALLOCA
unsigned char *stackb[NFAILURES];
unsigned char **stackp = stackb;
- bzero (fastmap, (1 << BYTEWIDTH));
+#else
+ unsigned char **stackb;
+ unsigned char **stackp;
+ stackb = (unsigned char **) malloc (NFAILURES * sizeof (unsigned char *));
+ stackp = stackb;
+
+#endif /* NO_ALLOCA */
+ memset (fastmap, 0, (1 << BYTEWIDTH));
bufp->fastmap_accurate = 1;
bufp->can_be_null = 0;
while (p)
{
+ is_a_succeed_n = 0;
if (p == pend)
{
bufp->can_be_null = 1;
@@ -892,51 +1412,70 @@ re_compile_fastmap (bufp)
case notwordbound:
case wordbeg:
case wordend:
- continue;
+ continue;
case endline:
if (translate)
fastmap[translate['\n']] = 1;
else
fastmap['\n'] = 1;
+
if (bufp->can_be_null != 1)
bufp->can_be_null = 2;
break;
- case finalize_jump:
+ case jump_n:
+ case finalize_jump:
case maybe_finalize_jump:
case jump:
case dummy_failure_jump:
- bufp->can_be_null = 1;
- j = *p++ & 0377;
- j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
- p += j + 1; /* The 1 compensates for missing ++ above */
+ EXTRACT_NUMBER_AND_INCR (j, p);
+ p += j;
if (j > 0)
continue;
- /* Jump backward reached implies we just went through
+ /* Jump backward reached implies we just went through
the body of a loop and matched nothing.
Opcode jumped to should be an on_failure_jump.
Just treat it like an ordinary jump.
For a * loop, it has pushed its failure point already;
- if so, discard that as redundant. */
- if ((enum regexpcode) *p != on_failure_jump)
+ If so, discard that as redundant. */
+
+ if ((enum regexpcode) *p != on_failure_jump
+ && (enum regexpcode) *p != succeed_n)
continue;
- p++;
- j = *p++ & 0377;
- j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
- p += j + 1; /* The 1 compensates for missing ++ above */
- if (stackp != stackb && *stackp == p)
- stackp--;
- continue;
+ p++;
+ EXTRACT_NUMBER_AND_INCR (j, p);
+ p += j;
+ if (stackp != stackb && *stackp == p)
+ stackp--;
+ continue;
- case on_failure_jump:
- j = *p++ & 0377;
- j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
- p++;
- *++stackp = p + j;
+ case on_failure_jump:
+ handle_on_failure_jump:
+ EXTRACT_NUMBER_AND_INCR (j, p);
+ *++stackp = p + j;
+ if (is_a_succeed_n)
+ EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
continue;
- case start_memory:
+ case succeed_n:
+ is_a_succeed_n = 1;
+ /* Get to the number of times to succeed. */
+ p += 2;
+ /* Increment p past the n for when k != 0. */
+ EXTRACT_NUMBER_AND_INCR (k, p);
+ if (k == 0)
+ {
+ p -= 4;
+ goto handle_on_failure_jump;
+ }
+ continue;
+
+ case set_number_at:
+ p += 4;
+ continue;
+
+ case start_memory:
case stop_memory:
p++;
continue;
@@ -949,7 +1488,9 @@ re_compile_fastmap (bufp)
if (j != '\n')
fastmap[j] = 1;
if (bufp->can_be_null)
- return;
+ {
+ FREE_AND_RETURN_VOID(stackb);
+ }
/* Don't return; check the alternative paths
so we can set can_be_null if appropriate. */
break;
@@ -980,7 +1521,7 @@ re_compile_fastmap (bufp)
if (SYNTAX (j) != (enum syntaxcode) k)
fastmap[j] = 1;
break;
-#endif /* emacs */
+#endif /* not emacs */
case charset:
for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
@@ -1012,17 +1553,22 @@ re_compile_fastmap (bufp)
break;
}
- /* Get here means we have successfully found the possible starting characters
- of one path of the pattern. We need not follow this path any farther.
- Instead, look at the next alternative remembered in the stack. */
- if (stackp != stackb)
+ /* Get here means we have successfully found the possible starting
+ characters of one path of the pattern. We need not follow this
+ path any farther. Instead, look at the next alternative
+ remembered in the stack. */
+ if (stackp != stackb)
p = *stackp--;
else
break;
}
+ FREE_AND_RETURN_VOID(stackb);
}
+
+
-/* Like re_search_2, below, but only one string is specified. */
+/* Like re_search_2, below, but only one string is specified, and
+ doesn't let you say where to stop matching. */
int
re_search (pbufp, string, size, startpos, range, regs)
@@ -1031,23 +1577,29 @@ re_search (pbufp, string, size, startpos, range, regs)
int size, startpos, range;
struct re_registers *regs;
{
- return re_search_2 (pbufp, 0, 0, string, size, startpos, range, regs, size);
+ return re_search_2 (pbufp, (char *) 0, 0, string, size, startpos, range,
+ regs, size);
}
-/* Like re_match_2 but tries first a match starting at index STARTPOS,
- then at STARTPOS + 1, and so on.
- RANGE is the number of places to try before giving up.
- If RANGE is negative, the starting positions tried are
- STARTPOS, STARTPOS - 1, etc.
- It is up to the caller to make sure that range is not so large
- as to take the starting position outside of the input strings.
-The value returned is the position at which the match was found,
- or -1 if no match was found,
- or -2 if error (such as failure stack overflow). */
+/* Using the compiled pattern in PBUFP->buffer, first tries to match the
+ virtual concatenation of STRING1 and STRING2, starting first at index
+ STARTPOS, then at STARTPOS + 1, and so on. RANGE is the number of
+ places to try before giving up. If RANGE is negative, it searches
+ backwards, i.e., the starting positions tried are STARTPOS, STARTPOS
+ - 1, etc. STRING1 and STRING2 are of SIZE1 and SIZE2, respectively.
+ In REGS, return the indices of the virtual concatenation of STRING1
+ and STRING2 that matched the entire PBUFP->buffer and its contained
+ subexpressions. Do not consider matching one past the index MSTOP in
+ the virtual concatenation of STRING1 and STRING2.
+
+ The value returned is the position in the strings at which the match
+ was found, or -1 if no match was found, or -2 if error (such as
+ failure stack overflow). */
int
-re_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs, mstop)
+re_search_2 (pbufp, string1, size1, string2, size2, startpos, range,
+ regs, mstop)
struct re_pattern_buffer *pbufp;
char *string1, *string2;
int size1, size2;
@@ -1058,15 +1610,27 @@ re_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs, mstop
{
register char *fastmap = pbufp->fastmap;
register unsigned char *translate = (unsigned char *) pbufp->translate;
- int total = size1 + size2;
+ int total_size = size1 + size2;
+ int endpos = startpos + range;
int val;
- /* Update the fastmap now if not correct already */
+ /* Check for out-of-range starting position. */
+ if (startpos < 0 || startpos > total_size)
+ return -1;
+
+ /* Fix up range if it would eventually take startpos outside of the
+ virtual concatenation of string1 and string2. */
+ if (endpos < -1)
+ range = -1 - startpos;
+ else if (endpos > total_size)
+ range = total_size - startpos;
+
+ /* Update the fastmap now if not correct already. */
if (fastmap && !pbufp->fastmap_accurate)
re_compile_fastmap (pbufp);
- /* Don't waste time in a long search for a pattern
- that says it is anchored. */
+ /* If the search isn't to be a backwards one, don't waste time in a
+ long search for a pattern that says it is anchored. */
if (pbufp->used > 0 && (enum regexpcode) pbufp->buffer[0] == begbuf
&& range > 0)
{
@@ -1077,16 +1641,16 @@ re_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs, mstop
}
while (1)
- {
- /* If a fastmap is supplied, skip quickly over characters
- that cannot possibly be the start of a match.
- Note, however, that if the pattern can possibly match
- the null string, we must test it at each starting point
- so that we take the first null string we get. */
-
- if (fastmap && startpos < total && pbufp->can_be_null != 1)
+ {
+ /* If a fastmap is supplied, skip quickly over characters that
+ cannot possibly be the start of a match. Note, however, that
+ if the pattern can possibly match the null string, we must
+ test it at each starting point so that we take the first null
+ string we get. */
+
+ if (fastmap && startpos < total_size && pbufp->can_be_null != 1)
{
- if (range > 0)
+ if (range > 0) /* Searching forwards. */
{
register int lim = 0;
register unsigned char *p;
@@ -1097,55 +1661,64 @@ re_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs, mstop
p = ((unsigned char *)
&(startpos >= size1 ? string2 - size1 : string1)[startpos]);
- if (translate)
- {
- while (range > lim && !fastmap[translate[*p++]])
+ while (range > lim && !fastmap[translate
+ ? translate[*p++]
+ : *p++])
range--;
- }
- else
- {
- while (range > lim && !fastmap[*p++])
- range--;
- }
startpos += irange - range;
}
- else
+ else /* Searching backwards. */
{
register unsigned char c;
- if (startpos >= size1)
+
+ if (string1 == 0 || startpos >= size1)
c = string2[startpos - size1];
- else
+ else
c = string1[startpos];
- c &= 0xff;
+
+ c &= 0xff;
if (translate ? !fastmap[translate[c]] : !fastmap[c])
goto advance;
}
}
- if (range >= 0 && startpos == total
+ if (range >= 0 && startpos == total_size
&& fastmap && pbufp->can_be_null == 0)
return -1;
- val = re_match_2 (pbufp, string1, size1, string2, size2, startpos, regs, mstop);
- if (0 <= val)
- {
- if (val == -2)
- return -2;
- return startpos;
- }
+ val = re_match_2 (pbufp, string1, size1, string2, size2, startpos,
+ regs, mstop);
+ if (val >= 0)
+ return startpos;
+ if (val == -2)
+ return -2;
+#ifndef NO_ALLOCA
#ifdef C_ALLOCA
alloca (0);
#endif /* C_ALLOCA */
+#endif /* NO_ALLOCA */
advance:
- if (!range) break;
- if (range > 0) range--, startpos++; else range++, startpos--;
+ if (!range)
+ break;
+ else if (range > 0)
+ {
+ range--;
+ startpos++;
+ }
+ else
+ {
+ range++;
+ startpos--;
+ }
}
return -1;
}
+
+
-#ifndef emacs /* emacs never uses this */
+#ifndef emacs /* emacs never uses this. */
int
re_match (pbufp, string, size, pos, regs)
struct re_pattern_buffer *pbufp;
@@ -1153,82 +1726,295 @@ re_match (pbufp, string, size, pos, regs)
int size, pos;
struct re_registers *regs;
{
- return re_match_2 (pbufp, 0, 0, string, size, pos, regs, size);
+ return re_match_2 (pbufp, (char *) 0, 0, string, size, pos, regs, size);
}
-#endif /* emacs */
+#endif /* not emacs */
+
-/* Maximum size of failure stack. Beyond this, overflow is an error. */
+/* The following are used for re_match_2, defined below: */
+/* Roughly the maximum number of failure points on the stack. Would be
+ exactly that if always pushed MAX_NUM_FAILURE_ITEMS each time we failed. */
+
int re_max_failures = 2000;
-static int bcmp_translate();
-/* Match the pattern described by PBUFP
- against data which is the virtual concatenation of STRING1 and STRING2.
- SIZE1 and SIZE2 are the sizes of the two data strings.
- Start the match at position POS.
- Do not consider matching past the position MSTOP.
+/* Routine used by re_match_2. */
+static int memcmp_translate ();
+
+
+/* Structure and accessing macros used in re_match_2: */
+
+struct register_info
+{
+ unsigned is_active : 1;
+ unsigned matched_something : 1;
+};
+
+#define IS_ACTIVE(R) ((R).is_active)
+#define MATCHED_SOMETHING(R) ((R).matched_something)
+
+
+/* Macros used by re_match_2: */
+
+
+/* I.e., regstart, regend, and reg_info. */
+
+#define NUM_REG_ITEMS 3
+
+/* We push at most this many things on the stack whenever we
+ fail. The `+ 2' refers to PATTERN_PLACE and STRING_PLACE, which are
+ arguments to the PUSH_FAILURE_POINT macro. */
+
+#define MAX_NUM_FAILURE_ITEMS (RE_NREGS * NUM_REG_ITEMS + 2)
+
+
+/* We push this many things on the stack whenever we fail. */
+
+#define NUM_FAILURE_ITEMS (last_used_reg * NUM_REG_ITEMS + 2)
+
+
+/* This pushes most of the information about the current state we will want
+ if we ever fail back to it. */
+
+#define PUSH_FAILURE_POINT(pattern_place, string_place) \
+ { \
+ short last_used_reg, this_reg; \
+ \
+ /* Find out how many registers are active or have been matched. \
+ (Aside from register zero, which is only set at the end.) */ \
+ for (last_used_reg = RE_NREGS - 1; last_used_reg > 0; last_used_reg--)\
+ if (regstart[last_used_reg] != (unsigned char *) -1) \
+ break; \
+ \
+ if (stacke - stackp < NUM_FAILURE_ITEMS) \
+ { \
+ unsigned char **stackx; \
+ unsigned int len = stacke - stackb; \
+ if (len > re_max_failures * MAX_NUM_FAILURE_ITEMS) \
+ { \
+ FREE_AND_RETURN(stackb,(-2)); \
+ } \
+ \
+ /* Roughly double the size of the stack. */ \
+ stackx = DOUBLE_STACK(stackx,stackb,len); \
+ /* Rearrange the pointers. */ \
+ stackp = stackx + (stackp - stackb); \
+ stackb = stackx; \
+ stacke = stackb + 2 * len; \
+ } \
+ \
+ /* Now push the info for each of those registers. */ \
+ for (this_reg = 1; this_reg <= last_used_reg; this_reg++) \
+ { \
+ *stackp++ = regstart[this_reg]; \
+ *stackp++ = regend[this_reg]; \
+ *stackp++ = (unsigned char *) &reg_info[this_reg]; \
+ } \
+ \
+ /* Push how many registers we saved. */ \
+ *stackp++ = (unsigned char *) last_used_reg; \
+ \
+ *stackp++ = pattern_place; \
+ *stackp++ = string_place; \
+ }
+
+
+/* This pops what PUSH_FAILURE_POINT pushes. */
+
+#define POP_FAILURE_POINT() \
+ { \
+ int temp; \
+ stackp -= 2; /* Remove failure points. */ \
+ temp = (int) *--stackp; /* How many regs pushed. */ \
+ temp *= NUM_REG_ITEMS; /* How much to take off the stack. */ \
+ stackp -= temp; /* Remove the register info. */ \
+ }
+
+
+#define MATCHING_IN_FIRST_STRING (dend == end_match_1)
+
+/* Is true if there is a first string and if PTR is pointing anywhere
+ inside it or just past the end. */
+
+#define IS_IN_FIRST_STRING(ptr) \
+ (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
+
+/* Call before fetching a character with *d. This switches over to
+ string2 if necessary. */
+
+#define PREFETCH \
+ while (d == dend) \
+ { \
+ /* end of string2 => fail. */ \
+ if (dend == end_match_2) \
+ goto fail; \
+ /* end of string1 => advance to string2. */ \
+ d = string2; \
+ dend = end_match_2; \
+ }
+
+
+/* Call this when have matched something; it sets `matched' flags for the
+ registers corresponding to the subexpressions of which we currently
+ are inside. */
+#define SET_REGS_MATCHED \
+ { unsigned this_reg; \
+ for (this_reg = 0; this_reg < RE_NREGS; this_reg++) \
+ { \
+ if (IS_ACTIVE(reg_info[this_reg])) \
+ MATCHED_SOMETHING(reg_info[this_reg]) = 1; \
+ else \
+ MATCHED_SOMETHING(reg_info[this_reg]) = 0; \
+ } \
+ }
+
+/* Test if at very beginning or at very end of the virtual concatenation
+ of string1 and string2. If there is only one string, we've put it in
+ string2. */
+
+#define AT_STRINGS_BEG (d == (size1 ? string1 : string2) || !size2)
+#define AT_STRINGS_END (d == end2)
+
+#define AT_WORD_BOUNDARY \
+ (AT_STRINGS_BEG || AT_STRINGS_END || IS_A_LETTER (d - 1) != IS_A_LETTER (d))
+
+/* We have two special cases to check for:
+ 1) if we're past the end of string1, we have to look at the first
+ character in string2;
+ 2) if we're before the beginning of string2, we have to look at the
+ last character in string1; we assume there is a string1, so use
+ this in conjunction with AT_STRINGS_BEG. */
+#define IS_A_LETTER(d) \
+ (SYNTAX ((d) == end1 ? *string2 : (d) == string2 - 1 ? *(end1 - 1) : *(d))\
+ == Sword)
+
+
+/* Match the pattern described by PBUFP against the virtual
+ concatenation of STRING1 and STRING2, which are of SIZE1 and SIZE2,
+ respectively. Start the match at index POS in the virtual
+ concatenation of STRING1 and STRING2. In REGS, return the indices of
+ the virtual concatenation of STRING1 and STRING2 that matched the
+ entire PBUFP->buffer and its contained subexpressions. Do not
+ consider matching one past the index MSTOP in the virtual
+ concatenation of STRING1 and STRING2.
If pbufp->fastmap is nonzero, then it had better be up to date.
The reason that the data to match are specified as two components
- which are to be regarded as concatenated
- is so this function can be used directly on the contents of an Emacs buffer.
+ which are to be regarded as concatenated is so this function can be
+ used directly on the contents of an Emacs buffer.
- -1 is returned if there is no match. -2 is returned if there is
- an error (such as match stack overflow). Otherwise the value is the length
- of the substring which was matched. */
+ -1 is returned if there is no match. -2 is returned if there is an
+ error (such as match stack overflow). Otherwise the value is the
+ length of the substring which was matched. */
int
-re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
+re_match_2 (pbufp, string1_arg, size1, string2_arg, size2, pos, regs, mstop)
struct re_pattern_buffer *pbufp;
- unsigned char *string1, *string2;
+ char *string1_arg, *string2_arg;
int size1, size2;
int pos;
struct re_registers *regs;
int mstop;
{
register unsigned char *p = (unsigned char *) pbufp->buffer;
+
+ /* Pointer to beyond end of buffer. */
register unsigned char *pend = p + pbufp->used;
- /* End of first string */
- unsigned char *end1;
- /* End of second string */
- unsigned char *end2;
- /* Pointer just past last char to consider matching */
+
+ unsigned char *string1 = (unsigned char *) string1_arg;
+ unsigned char *string2 = (unsigned char *) string2_arg;
+ unsigned char *end1; /* Just past end of first string. */
+ unsigned char *end2; /* Just past end of second string. */
+
+ /* Pointers into string1 and string2, just past the last characters in
+ each to consider matching. */
unsigned char *end_match_1, *end_match_2;
+
register unsigned char *d, *dend;
- register int mcnt;
+ register int mcnt; /* Multipurpose. */
unsigned char *translate = (unsigned char *) pbufp->translate;
-
- /* Failure point stack. Each place that can handle a failure further down the line
- pushes a failure point on this stack. It consists of two char *'s.
- The first one pushed is where to resume scanning the pattern;
- the second pushed is where to resume scanning the strings.
- If the latter is zero, the failure point is a "dummy".
- If a failure happens and the innermost failure point is dormant,
- it discards that failure point and tries the next one. */
-
- unsigned char *initial_stack[2 * NFAILURES];
- unsigned char **stackb = initial_stack;
- unsigned char **stackp = stackb, **stacke = &stackb[2 * NFAILURES];
-
- /* Information on the "contents" of registers.
- These are pointers into the input strings; they record
- just what was matched (on this attempt) by some part of the pattern.
- The start_memory command stores the start of a register's contents
- and the stop_memory command stores the end.
-
- At that point, regstart[regnum] points to the first character in the register,
- regend[regnum] points to the first character beyond the end of the register,
- regstart_seg1[regnum] is true iff regstart[regnum] points into string1,
- and regend_seg1[regnum] is true iff regend[regnum] points into string1. */
-
+ unsigned is_a_jump_n = 0;
+
+ /* Failure point stack. Each place that can handle a failure further
+ down the line pushes a failure point on this stack. It consists of
+ restart, regend, and reg_info for all registers corresponding to the
+ subexpressions we're currently inside, plus the number of such
+ registers, and, finally, two char *'s. The first char * is where to
+ resume scanning the pattern; the second one is where to resume
+ scanning the strings. If the latter is zero, the failure point is a
+ ``dummy''; if a failure happens and the failure point is a dummy, it
+ gets discarded and the next next one is tried. */
+
+#ifndef NO_ALLOCA
+ unsigned char *initial_stack[MAX_NUM_FAILURE_ITEMS * NFAILURES];
+#endif
+ unsigned char **stackb;
+ unsigned char **stackp;
+ unsigned char **stacke;
+
+
+ /* Information on the contents of registers. These are pointers into
+ the input strings; they record just what was matched (on this
+ attempt) by a subexpression part of the pattern, that is, the
+ regnum-th regstart pointer points to where in the pattern we began
+ matching and the regnum-th regend points to right after where we
+ stopped matching the regnum-th subexpression. (The zeroth register
+ keeps track of what the whole pattern matches.) */
+
unsigned char *regstart[RE_NREGS];
unsigned char *regend[RE_NREGS];
- unsigned char regstart_seg1[RE_NREGS], regend_seg1[RE_NREGS];
+
+ /* The is_active field of reg_info helps us keep track of which (possibly
+ nested) subexpressions we are currently in. The matched_something
+ field of reg_info[reg_num] helps us tell whether or not we have
+ matched any of the pattern so far this time through the reg_num-th
+ subexpression. These two fields get reset each time through any
+ loop their register is in. */
+
+ struct register_info reg_info[RE_NREGS];
+
+
+ /* The following record the register info as found in the above
+ variables when we find a match better than any we've seen before.
+ This happens as we backtrack through the failure points, which in
+ turn happens only if we have not yet matched the entire string. */
+
+ unsigned best_regs_set = 0;
+ unsigned char *best_regstart[RE_NREGS];
+ unsigned char *best_regend[RE_NREGS];
+
+ /* Initialize the stack. */
+#ifdef NO_ALLOCA
+ stackb = (unsigned char **) malloc (MAX_NUM_FAILURE_ITEMS * NFAILURES * sizeof (char *));
+#else
+ stackb = initial_stack;
+#endif
+ stackp = stackb;
+ stacke = &stackb[MAX_NUM_FAILURE_ITEMS * NFAILURES];
+
+#ifdef DEBUG_REGEX
+ fprintf (stderr, "Entering re_match_2(%s%s)\n", string1_arg, string2_arg);
+#endif
+
+ /* Initialize subexpression text positions to -1 to mark ones that no
+ \( or ( and \) or ) has been seen for. Also set all registers to
+ inactive and mark them as not having matched anything or ever
+ failed. */
+ for (mcnt = 0; mcnt < RE_NREGS; mcnt++)
+ {
+ regstart[mcnt] = regend[mcnt] = (unsigned char *) -1;
+ IS_ACTIVE (reg_info[mcnt]) = 0;
+ MATCHED_SOMETHING (reg_info[mcnt]) = 0;
+ }
+
+ if (regs)
+ for (mcnt = 0; mcnt < RE_NREGS; mcnt++)
+ regs->start[mcnt] = regs->end[mcnt] = -1;
/* Set up pointers to ends of strings.
Don't allow the second string to be empty unless both are empty. */
- if (!size2)
+ if (size2 == 0)
{
string2 = string1;
size2 = size1;
@@ -1238,7 +2024,7 @@ re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
end1 = string1 + size1;
end2 = string2 + size2;
- /* Compute where to stop matching, within the two strings */
+ /* Compute where to stop matching, within the two strings. */
if (mstop <= size1)
{
end_match_1 = string1 + mstop;
@@ -1250,50 +2036,87 @@ re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
end_match_2 = string2 + mstop - size1;
}
- /* Initialize \) text positions to -1
- to mark ones that no \( or \) has been seen for. */
-
- for (mcnt = 0; mcnt < sizeof (regend) / sizeof (*regend); mcnt++)
- regend[mcnt] = (unsigned char *) -1;
-
- /* `p' scans through the pattern as `d' scans through the data.
- `dend' is the end of the input string that `d' points within.
- `d' is advanced into the following input string whenever necessary,
- but this happens before fetching;
- therefore, at the beginning of the loop,
- `d' can be pointing at the end of a string,
- but it cannot equal string2. */
+ /* `p' scans through the pattern as `d' scans through the data. `dend'
+ is the end of the input string that `d' points within. `d' is
+ advanced into the following input string whenever necessary, but
+ this happens before fetching; therefore, at the beginning of the
+ loop, `d' can be pointing at the end of a string, but it cannot
+ equal string2. */
- if (pos <= size1)
+ if (size1 != 0 && pos <= size1)
d = string1 + pos, dend = end_match_1;
else
d = string2 + pos - size1, dend = end_match_2;
-/* Write PREFETCH; just before fetching a character with *d. */
-#define PREFETCH \
- while (d == dend) \
- { if (dend == end_match_2) goto fail; /* end of string2 => failure */ \
- d = string2; /* end of string1 => advance to string2. */ \
- dend = end_match_2; }
- /* This loop loops over pattern commands.
- It exits by returning from the function if match is complete,
- or it drops through if match fails at this starting point in the input data. */
+ /* This loops over pattern commands. It exits by returning from the
+ function if match is complete, or it drops through if match fails
+ at this starting point in the input data. */
while (1)
{
+#ifdef DEBUG_REGEX
+ fprintf (stderr,
+ "regex loop(%d): matching 0x%02d\n",
+ p - (unsigned char *) pbufp->buffer,
+ *p);
+#endif
+ is_a_jump_n = 0;
+ /* End of pattern means we might have succeeded. */
if (p == pend)
- /* End of pattern means we have succeeded! */
{
- /* If caller wants register contents data back, convert it to indices */
+ /* If not end of string, try backtracking. Otherwise done. */
+ if (d != end_match_2)
+ {
+ if (stackp != stackb)
+ {
+ /* More failure points to try. */
+
+ unsigned in_same_string =
+ IS_IN_FIRST_STRING (best_regend[0])
+ == MATCHING_IN_FIRST_STRING;
+
+ /* If exceeds best match so far, save it. */
+ if (! best_regs_set
+ || (in_same_string && d > best_regend[0])
+ || (! in_same_string && ! MATCHING_IN_FIRST_STRING))
+ {
+ best_regs_set = 1;
+ best_regend[0] = d; /* Never use regstart[0]. */
+
+ for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
+ {
+ best_regstart[mcnt] = regstart[mcnt];
+ best_regend[mcnt] = regend[mcnt];
+ }
+ }
+ goto fail;
+ }
+ /* If no failure points, don't restore garbage. */
+ else if (best_regs_set)
+ {
+ restore_best_regs:
+ /* Restore best match. */
+ d = best_regend[0];
+
+ for (mcnt = 0; mcnt < RE_NREGS; mcnt++)
+ {
+ regstart[mcnt] = best_regstart[mcnt];
+ regend[mcnt] = best_regend[mcnt];
+ }
+ }
+ }
+
+ /* If caller wants register contents data back, convert it
+ to indices. */
if (regs)
{
- regs->start[0] = pos;
- if (dend == end_match_1)
- regs->end[0] = d - string1;
- else
- regs->end[0] = d - string2 + size1;
- for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
+ regs->start[0] = pos;
+ if (MATCHING_IN_FIRST_STRING)
+ regs->end[0] = d - string1;
+ else
+ regs->end[0] = d - string2 + size1;
+ for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
{
if (regend[mcnt] == (unsigned char *) -1)
{
@@ -1301,23 +2124,24 @@ re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
regs->end[mcnt] = -1;
continue;
}
- if (regstart_seg1[mcnt])
+ if (IS_IN_FIRST_STRING (regstart[mcnt]))
regs->start[mcnt] = regstart[mcnt] - string1;
else
regs->start[mcnt] = regstart[mcnt] - string2 + size1;
- if (regend_seg1[mcnt])
+
+ if (IS_IN_FIRST_STRING (regend[mcnt]))
regs->end[mcnt] = regend[mcnt] - string1;
else
regs->end[mcnt] = regend[mcnt] - string2 + size1;
}
}
- if (dend == end_match_1)
- return (d - string1 - pos);
- else
- return d - string2 + size1 - pos;
- }
+ FREE_AND_RETURN(stackb,
+ (d - pos - (MATCHING_IN_FIRST_STRING ?
+ string1 :
+ string2 - size1)));
+ }
- /* Otherwise match next pattern command */
+ /* Otherwise match next pattern command. */
#ifdef SWITCH_ENUM_BUG
switch ((int) ((enum regexpcode) *p++))
#else
@@ -1325,33 +2149,80 @@ re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
#endif
{
- /* \( is represented by a start_memory, \) by a stop_memory.
- Both of those commands contain a "register number" argument.
- The text matched within the \( and \) is recorded under that number.
- Then, \<digit> turns into a `duplicate' command which
- is followed by the numeric value of <digit> as the register number. */
-
+ /* \( [or `(', as appropriate] is represented by start_memory,
+ \) by stop_memory. Both of those commands are followed by
+ a register number in the next byte. The text matched
+ within the \( and \) is recorded under that number. */
case start_memory:
- regstart[*p] = d;
- regstart_seg1[*p++] = (dend == end_match_1);
- break;
+ regstart[*p] = d;
+ IS_ACTIVE (reg_info[*p]) = 1;
+ MATCHED_SOMETHING (reg_info[*p]) = 0;
+ p++;
+ break;
case stop_memory:
- regend[*p] = d;
- regend_seg1[*p++] = (dend == end_match_1);
- break;
-
- case duplicate:
+ regend[*p] = d;
+ IS_ACTIVE (reg_info[*p]) = 0;
+
+ /* If just failed to match something this time around with a sub-
+ expression that's in a loop, try to force exit from the loop. */
+ if ((! MATCHED_SOMETHING (reg_info[*p])
+ || (enum regexpcode) p[-3] == start_memory)
+ && (p + 1) != pend)
+ {
+ register unsigned char *p2 = p + 1;
+ mcnt = 0;
+ switch (*p2++)
+ {
+ case jump_n:
+ is_a_jump_n = 1;
+ case finalize_jump:
+ case maybe_finalize_jump:
+ case jump:
+ case dummy_failure_jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p2);
+ if (is_a_jump_n)
+ p2 += 2;
+ break;
+ }
+ p2 += mcnt;
+
+ /* If the next operation is a jump backwards in the pattern
+ to an on_failure_jump, exit from the loop by forcing a
+ failure after pushing on the stack the on_failure_jump's
+ jump in the pattern, and d. */
+ if (mcnt < 0 && (enum regexpcode) *p2++ == on_failure_jump)
+ {
+ EXTRACT_NUMBER_AND_INCR (mcnt, p2);
+ PUSH_FAILURE_POINT (p2 + mcnt, d);
+ goto fail;
+ }
+ }
+ p++;
+ break;
+
+ /* \<digit> has been turned into a `duplicate' command which is
+ followed by the numeric value of <digit> as the register number. */
+ case duplicate:
{
int regno = *p++; /* Get which register to match against */
register unsigned char *d2, *dend2;
- d2 = regstart[regno];
- dend2 = ((regstart_seg1[regno] == regend_seg1[regno])
+ /* Where in input to try to start matching. */
+ d2 = regstart[regno];
+
+ /* Where to stop matching; if both the place to start and
+ the place to stop matching are in the same string, then
+ set to the place to stop, otherwise, for now have to use
+ the end of the first string. */
+
+ dend2 = ((IS_IN_FIRST_STRING (regstart[regno])
+ == IS_IN_FIRST_STRING (regend[regno]))
? regend[regno] : end_match_1);
while (1)
{
- /* Advance to next segment in register contents, if necessary */
+ /* If necessary, advance to next segment in register
+ contents. */
while (d2 == dend2)
{
if (dend2 == end_match_2) break;
@@ -1361,15 +2232,22 @@ re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
/* At end of register contents => success */
if (d2 == dend2) break;
- /* Advance to next segment in data being matched, if necessary */
+ /* If necessary, advance to next segment in data. */
PREFETCH;
- /* mcnt gets # consecutive chars to compare */
+ /* How many characters left in this segment to match. */
mcnt = dend - d;
- if (mcnt > dend2 - d2)
+
+ /* Want how many consecutive characters we can match in
+ one shot, so, if necessary, adjust the count. */
+ if (mcnt > dend2 - d2)
mcnt = dend2 - d2;
- /* Compare that many; failure if mismatch, else skip them. */
- if (translate ? bcmp_translate (d, d2, mcnt, translate) : bcmp (d, d2, mcnt))
+
+ /* Compare that many; failure if mismatch, else move
+ past them. */
+ if (translate
+ ? memcmp_translate (d, d2, mcnt, translate)
+ : memcmp ((char *)d, (char *)d2, mcnt))
goto fail;
d += mcnt, d2 += mcnt;
}
@@ -1377,27 +2255,28 @@ re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
break;
case anychar:
- /* fetch a data character */
- PREFETCH;
- /* Match anything but a newline. */
- if ((translate ? translate[*d++] : *d++) == '\n')
+ PREFETCH; /* Fetch a data character. */
+ /* Match anything but a newline, maybe even a null. */
+ if ((translate ? translate[*d] : *d) == '\n'
+ || ((obscure_syntax & RE_DOT_NOT_NULL)
+ && (translate ? translate[*d] : *d) == '\000'))
goto fail;
+ SET_REGS_MATCHED;
+ d++;
break;
case charset:
case charset_not:
{
- /* Nonzero for charset_not */
- int not = 0;
+ int not = 0; /* Nonzero for charset_not. */
register int c;
if (*(p - 1) == (unsigned char) charset_not)
not = 1;
- /* fetch a data character */
- PREFETCH;
+ PREFETCH; /* Fetch a data character. */
if (translate)
- c = translate [*d];
+ c = translate[*d];
else
c = *d;
@@ -1408,72 +2287,61 @@ re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
p += 1 + *p;
if (!not) goto fail;
- d++;
+ SET_REGS_MATCHED;
+ d++;
break;
}
case begline:
- if (d == string1 || d[-1] == '\n')
- break;
- goto fail;
-
+ if ((size1 != 0 && d == string1)
+ || (size1 == 0 && size2 != 0 && d == string2)
+ || (d && d[-1] == '\n')
+ || (size1 == 0 && size2 == 0))
+ break;
+ else
+ goto fail;
+
case endline:
if (d == end2
|| (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n'))
break;
goto fail;
- /* "or" constructs ("|") are handled by starting each alternative
- with an on_failure_jump that points to the start of the next alternative.
- Each alternative except the last ends with a jump to the joining point.
- (Actually, each jump except for the last one really jumps
- to the following jump, because tensioning the jumps is a hassle.) */
+ /* `or' constructs are handled by starting each alternative with
+ an on_failure_jump that points to the start of the next
+ alternative. Each alternative except the last ends with a
+ jump to the joining point. (Actually, each jump except for
+ the last one really jumps to the following jump, because
+ tensioning the jumps is a hassle.) */
/* The start of a stupid repeat has an on_failure_jump that points
- past the end of the repeat text.
- This makes a failure point so that, on failure to match a repetition,
- matching restarts past as many repetitions have been found
- with no way to fail and look for another one. */
+ past the end of the repeat text. This makes a failure point so
+ that on failure to match a repetition, matching restarts past
+ as many repetitions have been found with no way to fail and
+ look for another one. */
/* A smart repeat is similar but loops back to the on_failure_jump
- so that each repetition makes another failure point. */
+ so that each repetition makes another failure point. */
case on_failure_jump:
- if (stackp == stacke)
- {
- unsigned char **stackx;
- if (stacke - stackb > re_max_failures * 2)
- return -2;
- stackx = (unsigned char **) alloca (2 * (stacke - stackb)
- * sizeof (char *));
- bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
- stackp = stackx + (stackp - stackb);
- stacke = stackx + 2 * (stacke - stackb);
- stackb = stackx;
- }
- mcnt = *p++ & 0377;
- mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
- p++;
- *stackp++ = mcnt + p;
- *stackp++ = d;
- break;
-
- /* The end of a smart repeat has an maybe_finalize_jump back.
- Change it either to a finalize_jump or an ordinary jump. */
+ on_failure:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ PUSH_FAILURE_POINT (p + mcnt, d);
+ break;
+ /* The end of a smart repeat has a maybe_finalize_jump back.
+ Change it either to a finalize_jump or an ordinary jump. */
case maybe_finalize_jump:
- mcnt = *p++ & 0377;
- mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
- p++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
{
register unsigned char *p2 = p;
- /* Compare what follows with the begining of the repeat.
+ /* Compare what follows with the beginning of the repeat.
If we can establish that there is nothing that they would
- both match, we can change to finalize_jump */
- while (p2 != pend
+ both match, we can change to finalize_jump. */
+ while (p2 + 1 != pend
&& (*p2 == (unsigned char) stop_memory
|| *p2 == (unsigned char) start_memory))
- p2++;
+ p2 += 2; /* Skip over reg number. */
if (p2 == pend)
p[-3] = (unsigned char) finalize_jump;
else if (*p2 == (unsigned char) exactn
@@ -1482,7 +2350,7 @@ re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
register int c = *p2 == (unsigned char) endline ? '\n' : p2[2];
register unsigned char *p1 = p + mcnt;
/* p1[0] ... p1[2] are an on_failure_jump.
- Examine what follows that */
+ Examine what follows that. */
if (p1[3] == (unsigned char) exactn && p1[5] != c)
p[-3] = (unsigned char) finalize_jump;
else if (p1[3] == (unsigned char) charset
@@ -1492,108 +2360,139 @@ re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
if (c < p1[4] * BYTEWIDTH
&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
not = !not;
- /* not is 1 if c would match */
- /* That means it is not safe to finalize */
+ /* `not' is 1 if c would match. */
+ /* That means it is not safe to finalize. */
if (!not)
p[-3] = (unsigned char) finalize_jump;
}
}
}
- p -= 2;
+ p -= 2; /* Point at relative address again. */
if (p[-1] != (unsigned char) finalize_jump)
{
- p[-1] = (unsigned char) jump;
+ p[-1] = (unsigned char) jump;
goto nofinalize;
}
-
- /* The end of a stupid repeat has a finalize-jump
- back to the start, where another failure point will be made
- which will point after all the repetitions found so far. */
-
- case finalize_jump:
- stackp -= 2;
-
- case jump:
+ /* Note fall through. */
+
+ /* The end of a stupid repeat has a finalize_jump back to the
+ start, where another failure point will be made which will
+ point to after all the repetitions found so far. */
+
+ /* Take off failure points put on by matching on_failure_jump
+ because didn't fail. Also remove the register information
+ put on by the on_failure_jump. */
+ case finalize_jump:
+ POP_FAILURE_POINT ();
+ /* Note fall through. */
+
+ /* Jump without taking off any failure points. */
+ case jump:
nofinalize:
- mcnt = *p++ & 0377;
- mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
- p += mcnt + 1; /* The 1 compensates for missing ++ above */
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ p += mcnt;
break;
- case dummy_failure_jump:
- if (stackp == stacke)
- {
- unsigned char **stackx
- = (unsigned char **) alloca (2 * (stacke - stackb)
- * sizeof (char *));
- bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
- stackp = stackx + (stackp - stackb);
- stacke = stackx + 2 * (stacke - stackb);
- stackb = stackx;
+ case dummy_failure_jump:
+ /* Normally, the on_failure_jump pushes a failure point, which
+ then gets popped at finalize_jump. We will end up at
+ finalize_jump, also, and with a pattern of, say, `a+', we
+ are skipping over the on_failure_jump, so we have to push
+ something meaningless for finalize_jump to pop. */
+ PUSH_FAILURE_POINT (0, 0);
+ goto nofinalize;
+
+
+ /* Have to succeed matching what follows at least n times. Then
+ just handle like an on_failure_jump. */
+ case succeed_n:
+ EXTRACT_NUMBER (mcnt, p + 2);
+ /* Originally, this is how many times we HAVE to succeed. */
+ if (mcnt)
+ {
+ mcnt--;
+ p += 2;
+ STORE_NUMBER_AND_INCR (p, mcnt);
+ }
+ else if (mcnt == 0)
+ {
+ p[2] = unused;
+ p[3] = unused;
+ goto on_failure;
+ }
+ else
+ {
+ fprintf (stderr, "regex: the succeed_n's n is not set.\n");
+ exit (1);
}
- *stackp++ = 0;
- *stackp++ = 0;
- goto nofinalize;
-
- case wordbound:
- if (d == string1 /* Points to first char */
- || d == end2 /* Points to end */
- || (d == end1 && size2 == 0)) /* Points to end */
- break;
- if ((SYNTAX (d[-1]) == Sword)
- != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
+ break;
+
+ case jump_n:
+ EXTRACT_NUMBER (mcnt, p + 2);
+ /* Originally, this is how many times we CAN jump. */
+ if (mcnt)
+ {
+ mcnt--;
+ STORE_NUMBER(p + 2, mcnt);
+ goto nofinalize; /* Do the jump without taking off
+ any failure points. */
+ }
+ /* If don't have to jump any more, skip over the rest of command. */
+ else
+ p += 4;
+ break;
+
+ case set_number_at:
+ {
+ register unsigned char *p1;
+
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ p1 = p + mcnt;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ STORE_NUMBER (p1, mcnt);
+ break;
+ }
+
+ /* Ignore these. Used to ignore the n of succeed_n's which
+ currently have n == 0. */
+ case unused:
+ break;
+
+ case wordbound:
+ if (AT_WORD_BOUNDARY)
break;
goto fail;
case notwordbound:
- if (d == string1 /* Points to first char */
- || d == end2 /* Points to end */
- || (d == end1 && size2 == 0)) /* Points to end */
- goto fail;
- if ((SYNTAX (d[-1]) == Sword)
- != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
+ if (AT_WORD_BOUNDARY)
goto fail;
break;
case wordbeg:
- if (d == end2 /* Points to end */
- || (d == end1 && size2 == 0) /* Points to end */
- || SYNTAX (* (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */
- goto fail;
- if (d == string1 /* Points to first char */
- || SYNTAX (d[-1]) != Sword) /* prev char not letter */
+ if (IS_A_LETTER (d) && (!IS_A_LETTER (d - 1) || AT_STRINGS_BEG))
break;
goto fail;
case wordend:
- if (d == string1 /* Points to first char */
- || SYNTAX (d[-1]) != Sword) /* prev char not letter */
- goto fail;
- if (d == end2 /* Points to end */
- || (d == end1 && size2 == 0) /* Points to end */
- || SYNTAX (d == end1 ? *string2 : *d) != Sword) /* Next char not a letter */
+ /* Have to check if AT_STRINGS_BEG before looking at d - 1. */
+ if (!AT_STRINGS_BEG && IS_A_LETTER (d - 1)
+ && (!IS_A_LETTER (d) || AT_STRINGS_END))
break;
goto fail;
#ifdef emacs
case before_dot:
- if (((d - string2 <= (unsigned) size2)
- ? d - bf_p2 : d - bf_p1)
- <= point)
+ if (PTR_CHAR_POS (d) >= point)
goto fail;
break;
case at_dot:
- if (((d - string2 <= (unsigned) size2)
- ? d - bf_p2 : d - bf_p1)
- == point)
+ if (PTR_CHAR_POS (d) != point)
goto fail;
break;
case after_dot:
- if (((d - string2 <= (unsigned) size2)
- ? d - bf_p2 : d - bf_p1)
- >= point)
+ if (PTR_CHAR_POS (d) <= point)
goto fail;
break;
@@ -1606,6 +2505,7 @@ re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
matchsyntax:
PREFETCH;
if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail;
+ SET_REGS_MATCHED;
break;
case notwordchar:
@@ -1617,34 +2517,44 @@ re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
matchnotsyntax:
PREFETCH;
if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail;
- break;
-#else
+ SET_REGS_MATCHED;
+ break;
+
+#else /* not emacs */
+
case wordchar:
PREFETCH;
- if (SYNTAX (*d++) == 0) goto fail;
+ if (!IS_A_LETTER (d))
+ goto fail;
+ SET_REGS_MATCHED;
break;
case notwordchar:
PREFETCH;
- if (SYNTAX (*d++) != 0) goto fail;
+ if (IS_A_LETTER (d))
+ goto fail;
+ SET_REGS_MATCHED;
break;
+
#endif /* not emacs */
case begbuf:
- if (d == string1) /* Note, d cannot equal string2 */
- break; /* unless string1 == string2. */
- goto fail;
+ if (AT_STRINGS_BEG)
+ break;
+ goto fail;
- case endbuf:
- if (d == end2 || (d == end1 && size2 == 0))
+ case endbuf:
+ if (AT_STRINGS_END)
break;
goto fail;
case exactn:
/* Match the next few pattern characters exactly.
- mcnt is how many characters to match. */
+ mcnt is how many characters to match. */
mcnt = *p++;
- if (translate)
+ /* This is written out as an if-else so we don't waste time
+ testing `translate' inside the loop. */
+ if (translate)
{
do
{
@@ -1662,32 +2572,63 @@ re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
}
while (--mcnt);
}
- break;
+ SET_REGS_MATCHED;
+ break;
}
- continue; /* Successfully matched one pattern command; keep matching */
+ continue; /* Successfully executed one pattern command; keep going. */
- /* Jump here if any matching operation fails. */
+ /* Jump here if any matching operation fails. */
fail:
if (stackp != stackb)
/* A restart point is known. Restart there and pop it. */
{
+ short last_used_reg, this_reg;
+
+ /* If this failure point is from a dummy_failure_point, just
+ skip it. */
if (!stackp[-2])
- { /* If innermost failure point is dormant, flush it and keep looking */
- stackp -= 2;
- goto fail;
- }
- d = *--stackp;
+ {
+ POP_FAILURE_POINT ();
+ goto fail;
+ }
+
+ d = *--stackp;
p = *--stackp;
- if (d >= string1 && d <= end1)
+ if (d >= string1 && d <= end1)
dend = end_match_1;
+ /* Restore register info. */
+ last_used_reg = (short) *--stackp;
+
+ /* Make the ones that weren't saved -1 or 0 again. */
+ for (this_reg = RE_NREGS - 1; this_reg > last_used_reg; this_reg--)
+ {
+ regend[this_reg] = (unsigned char *) -1;
+ regstart[this_reg] = (unsigned char *) -1;
+ IS_ACTIVE (reg_info[this_reg]) = 0;
+ MATCHED_SOMETHING (reg_info[this_reg]) = 0;
+ }
+
+ /* And restore the rest from the stack. */
+ for ( ; this_reg > 0; this_reg--)
+ {
+ reg_info[this_reg] = *(struct register_info *) *--stackp;
+ regend[this_reg] = *--stackp;
+ regstart[this_reg] = *--stackp;
+ }
}
- else break; /* Matching at this starting point really fails! */
+ else
+ break; /* Matching at this starting point really fails. */
}
- return -1; /* Failure to match */
+
+ if (best_regs_set)
+ goto restore_best_regs;
+
+ FREE_AND_RETURN(stackb,(-1)); /* Failure to match. */
}
+
static int
-bcmp_translate (s1, s2, len, translate)
+memcmp_translate (s1, s2, len, translate)
unsigned char *s1, *s2;
register int len;
unsigned char *translate;
@@ -1700,8 +2641,10 @@ bcmp_translate (s1, s2, len, translate)
}
return 0;
}
+
+
-/* Entry points compatible with bsd4.2 regex library */
+/* Entry points compatible with 4.2 BSD regex library. */
#ifndef emacs
@@ -1734,18 +2677,24 @@ re_exec (s)
char *s;
{
int len = strlen (s);
- return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0);
+ return 0 <= re_search (&re_comp_buf, s, len, 0, len,
+ (struct re_registers *) 0);
}
+#endif /* not emacs */
+
-#endif /* emacs */
#ifdef test
+#ifdef atarist
+long _stksize = 2L; /* reserve memory for stack */
+#endif
#include <stdio.h>
-/* Indexed by a character, gives the upper case equivalent of the character */
+/* Indexed by a character, gives the upper case equivalent of the
+ character. */
-static char upcase[0400] =
+char upcase[0400] =
{ 000, 001, 002, 003, 004, 005, 006, 007,
010, 011, 012, 013, 014, 015, 016, 017,
020, 021, 022, 023, 024, 025, 026, 027,
@@ -1780,6 +2729,36 @@ static char upcase[0400] =
0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
};
+#ifdef canned
+
+#include "tests.h"
+
+typedef enum { extended_test, basic_test } test_type;
+
+/* Use this to run the tests we've thought of. */
+
+void
+main ()
+{
+ test_type t = extended_test;
+
+ if (t == basic_test)
+ {
+ printf ("Running basic tests:\n\n");
+ test_posix_basic ();
+ }
+ else if (t == extended_test)
+ {
+ printf ("Running extended tests:\n\n");
+ test_posix_extended ();
+ }
+}
+
+#else /* not canned */
+
+/* Use this to run interactive tests. */
+
+void
main (argc, argv)
int argc;
char **argv;
@@ -1828,6 +2807,9 @@ main (argc, argv)
}
}
+#endif
+
+
#ifdef NOTDEF
print_buf (bufp)
struct re_pattern_buffer *bufp;
@@ -1852,12 +2834,12 @@ print_buf (bufp)
printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
}
-#endif
+#endif /* NOTDEF */
printchar (c)
char c;
{
- if (c < 041 || c >= 0177)
+ if (c < 040 || c >= 0177)
{
putchar ('\\');
putchar (((c >> 6) & 3) + '0');
@@ -1868,4 +2850,10 @@ printchar (c)
putchar (c);
}
+error (string)
+ char *string;
+{
+ puts (string);
+ exit (1);
+}
#endif /* test */
diff --git a/regex.h b/regex.h
index 7ad5da24..145b6d13 100644
--- a/regex.h
+++ b/regex.h
@@ -1,107 +1,28 @@
/* Definitions for data structures callers pass the regex library.
- Copyright (C) 1985 Free Software Foundation, Inc.
-
- NO WARRANTY
-
- BECAUSE THIS PROGRAM IS LICENSED FREE OF CHARGE, WE PROVIDE ABSOLUTELY
-NO WARRANTY, TO THE EXTENT PERMITTED BY APPLICABLE STATE LAW. EXCEPT
-WHEN OTHERWISE STATED IN WRITING, FREE SOFTWARE FOUNDATION, INC,
-RICHARD M. STALLMAN AND/OR OTHER PARTIES PROVIDE THIS PROGRAM "AS IS"
-WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
-BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
-FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY
-AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE
-DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR
-CORRECTION.
-
- IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL RICHARD M.
-STALLMAN, THE FREE SOFTWARE FOUNDATION, INC., AND/OR ANY OTHER PARTY
-WHO MAY MODIFY AND REDISTRIBUTE THIS PROGRAM AS PERMITTED BELOW, BE
-LIABLE TO YOU FOR DAMAGES, INCLUDING ANY LOST PROFITS, LOST MONIES, OR
-OTHER SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
-USE OR INABILITY TO USE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR
-DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY THIRD PARTIES OR
-A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS) THIS
-PROGRAM, EVEN IF YOU HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH
-DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY.
-
- GENERAL PUBLIC LICENSE TO COPY
-
- 1. You may copy and distribute verbatim copies of this source file
-as you receive it, in any medium, provided that you conspicuously and
-appropriately publish on each copy a valid copyright notice "Copyright
-(C) 1985 Free Software Foundation, Inc."; and include following the
-copyright notice a verbatim copy of the above disclaimer of warranty
-and of this License. You may charge a distribution fee for the
-physical act of transferring a copy.
-
- 2. You may modify your copy or copies of this source file or
-any portion of it, and copy and distribute such modifications under
-the terms of Paragraph 1 above, provided that you also do the following:
-
- a) cause the modified files to carry prominent notices stating
- that you changed the files and the date of any change; and
-
- b) cause the whole of any work that you distribute or publish,
- that in whole or in part contains or is a derivative of this
- program or any part thereof, to be licensed at no charge to all
- third parties on terms identical to those contained in this
- License Agreement (except that you may choose to grant more extensive
- warranty protection to some or all third parties, at your option).
-
- c) You may charge a distribution fee for the physical act of
- transferring a copy, and you may at your option offer warranty
- protection in exchange for a fee.
-
-Mere aggregation of another unrelated program with this program (or its
-derivative) on a volume of a storage or distribution medium does not bring
-the other program under the scope of these terms.
-
- 3. You may copy and distribute this program (or a portion or derivative
-of it, under Paragraph 2) in object code or executable form under the terms
-of Paragraphs 1 and 2 above provided that you also do one of the following:
-
- a) accompany it with the complete corresponding machine-readable
- source code, which must be distributed under the terms of
- Paragraphs 1 and 2 above; or,
-
- b) accompany it with a written offer, valid for at least three
- years, to give any third party free (except for a nominal
- shipping charge) a complete machine-readable copy of the
- corresponding source code, to be distributed under the terms of
- Paragraphs 1 and 2 above; or,
-
- c) accompany it with the information you received as to where the
- corresponding source code may be obtained. (This alternative is
- allowed only for noncommercial distribution and only if you
- received the program in object code or executable form alone.)
-
-For an executable file, complete source code means all the source code for
-all modules it contains; but, as a special exception, it need not include
-source code for modules which are standard libraries that accompany the
-operating system on which the executable file runs.
-
- 4. You may not copy, sublicense, distribute or transfer this program
-except as expressly provided under this License Agreement. Any attempt
-otherwise to copy, sublicense, distribute or transfer this program is void and
-your rights to use the program under this License agreement shall be
-automatically terminated. However, parties who have received computer
-software programs from you with this License Agreement will not have
-their licenses terminated so long as such parties remain in full compliance.
-
- 5. If you wish to incorporate parts of this program into other free
-programs whose distribution conditions are different, write to the Free
-Software Foundation at 675 Mass Ave, Cambridge, MA 02139. We have not yet
-worked out a simple rule that can be stated here, but we will often permit
-this. We will be guided by the two goals of preserving the free status of
-all derivatives of our free software and of promoting the sharing and reuse of
-software.
-
-
-In other words, you are welcome to use, share and improve this program.
-You are forbidden to forbid anyone else to use, share and improve
-what you give them. Help stamp out software-hoarding! */
+ Copyright (C) 1985, 1989-90 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 1, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+
+#ifdef __GNUC__
+ #pragma once
+#endif
+
+#ifndef __REGEXP_LIBRARY
+#define __REGEXP_LIBRARY
/* Define number of parens for which we record the beginnings and ends.
This affects how much space the `struct re_registers' type takes up. */
@@ -109,70 +30,158 @@ what you give them. Help stamp out software-hoarding! */
#define RE_NREGS 10
#endif
-/* These bits are used in the obscure_syntax variable to choose among
+#define BYTEWIDTH 8
+
+
+/* Maximum number of duplicates an interval can allow. */
+#define RE_DUP_MAX ((1 << 15) - 1)
+
+
+/* This defines the various regexp syntaxes. */
+extern int obscure_syntax;
+
+
+/* The following bits are used in the obscure_syntax variable to choose among
alternative regexp syntaxes. */
-/* 1 means plain parentheses serve as grouping, and backslash
+/* If this bit is set, plain parentheses serve as grouping, and backslash
parentheses are needed for literal searching.
- 0 means backslash-parentheses are grouping, and plain parentheses
+ If not set, backslash-parentheses are grouping, and plain parentheses
are for literal searching. */
-#define RE_NO_BK_PARENS 1
-
-/* 1 means plain | serves as the "or"-operator, and \| is a literal.
- 0 means \| serves as the "or"-operator, and | is a literal. */
-#define RE_NO_BK_VBAR 2
-
-/* 0 means plain + or ? serves as an operator, and \+, \? are literals.
- 1 means \+, \? are operators and plain +, ? are literals. */
-#define RE_BK_PLUS_QM 4
-
-/* 1 means | binds tighter than ^ or $.
- 0 means the contrary. */
-#define RE_TIGHT_VBAR 8
-
-/* 1 means treat \n as an _OR operator
- 0 means treat it as a normal character */
-#define RE_NEWLINE_OR 16
-
-/* 0 means that a special characters (such as *, ^, and $) always have
- their special meaning regardless of the surrounding context.
- 1 means that special characters may act as normal characters in some
- contexts. Specifically, this applies to:
- ^ - only special at the beginning, or after ( or |
- $ - only special at the end, or before ) or |
- *, +, ? - only special when not after the beginning, (, or | */
-#define RE_CONTEXT_INDEP_OPS 32
-
-/* 0 means that \ before anything inside [ and ] is taken as a real \.
- 1 means that such a \ escapes the following character This is a
- special case for AWK. */
-#define RE_AWK_CLASS_HACK 64
-
-/* Now define combinations of bits for the standard possibilities. */
-#define RE_SYNTAX_POSIX_EGREP (RE_NO_BK_PARENS | RE_NO_BK_VBAR \
+#define RE_NO_BK_PARENS 1
+
+/* If this bit is set, plain | serves as the `or'-operator, and \| is a
+ literal.
+ If not set, \| serves as the `or'-operator, and | is a literal. */
+#define RE_NO_BK_VBAR (1 << 1)
+
+/* If this bit is not set, plain + or ? serves as an operator, and \+, \? are
+ literals.
+ If set, \+, \? are operators and plain +, ? are literals. */
+#define RE_BK_PLUS_QM (1 << 2)
+
+/* If this bit is set, | binds tighter than ^ or $.
+ If not set, the contrary. */
+#define RE_TIGHT_VBAR (1 << 3)
+
+/* If this bit is set, then treat newline as an OR operator.
+ If not set, treat it as a normal character. */
+#define RE_NEWLINE_OR (1 << 4)
+
+/* If this bit is set, then special characters may act as normal
+ characters in some contexts. Specifically, this applies to:
+ ^ -- only special at the beginning, or after ( or |;
+ $ -- only special at the end, or before ) or |;
+ *, +, ? -- only special when not after the beginning, (, or |.
+ If this bit is not set, special characters (such as *, ^, and $)
+ always have their special meaning regardless of the surrounding
+ context. */
+#define RE_CONTEXT_INDEP_OPS (1 << 5)
+
+/* If this bit is not set, then \ before anything inside [ and ] is taken as
+ a real \.
+ If set, then such a \ escapes the following character. This is a
+ special case for awk. */
+#define RE_AWK_CLASS_HACK (1 << 6)
+
+/* If this bit is set, then \{ and \} or { and } serve as interval operators.
+ If not set, then \{ and \} and { and } are treated as literals. */
+#define RE_INTERVALS (1 << 7)
+
+/* If this bit is not set, then \{ and \} serve as interval operators and
+ { and } are literals.
+ If set, then { and } serve as interval operators and \{ and \} are
+ literals. */
+#define RE_NO_BK_CURLY_BRACES (1 << 8)
+
+/* If this bit is set, then character classes are supported; they are:
+ [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
+ [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
+ If not set, then character classes are not supported. */
+#define RE_CHAR_CLASSES (1 << 9)
+
+/* If this bit is set, then the dot re doesn't match a null byte.
+ If not set, it does. */
+#define RE_DOT_NOT_NULL (1 << 10)
+
+/* If this bit is set, then [^...] doesn't match a newline.
+ If not set, it does. */
+#define RE_HAT_NOT_NEWLINE (1 << 11)
+
+/* If this bit is set, back references are recognized.
+ If not set, they aren't. */
+#define RE_NO_BK_REFS (1 << 12)
+
+/* If this bit is set, back references must refer to a preceding
+ subexpression. If not set, a back reference to a nonexistent
+ subexpression is treated as literal characters. */
+#define RE_NO_EMPTY_BK_REF (1 << 13)
+
+/* If this bit is set, bracket expressions can't be empty.
+ If it is set, they can be empty. */
+#define RE_NO_EMPTY_BRACKETS (1 << 14)
+
+/* If this bit is set, then *, +, ? and { cannot be first in an re or
+ immediately after a |, or a (. Furthermore, a | cannot be first or
+ last in an re, or immediately follow another | or a (. Also, a ^
+ cannot appear in a nonleading position and a $ cannot appear in a
+ nontrailing position (outside of bracket expressions, that is). */
+#define RE_CONTEXTUAL_INVALID_OPS (1 << 15)
+
+/* If this bit is set, then +, ? and | aren't recognized as operators.
+ If it's not, they are. */
+#define RE_LIMITED_OPS (1 << 16)
+
+/* If this bit is set, then an ending range point has to collate higher
+ or equal to the starting range point.
+ If it's not set, then when the ending range point collates higher
+ than the starting range point, the range is just considered empty. */
+#define RE_NO_EMPTY_RANGES (1 << 17)
+
+/* If this bit is set, then a hyphen (-) can't be an ending range point.
+ If it isn't, then it can. */
+#define RE_NO_HYPHEN_RANGE_END (1 << 18)
+
+
+/* Define combinations of bits for the standard possibilities. */
+#define RE_SYNTAX_POSIX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR \
| RE_CONTEXT_INDEP_OPS)
-#define RE_SYNTAX_AWK (RE_SYNTAX_POSIX_EGREP | RE_AWK_CLASS_HACK)
-#define RE_SYNTAX_EGREP (RE_SYNTAX_POSIX_EGREP | RE_NEWLINE_OR)
+#define RE_SYNTAX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_AWK_CLASS_HACK)
+#define RE_SYNTAX_EGREP (RE_NO_BK_PARENS | RE_NO_BK_VBAR \
+ | RE_CONTEXT_INDEP_OPS | RE_NEWLINE_OR)
#define RE_SYNTAX_GREP (RE_BK_PLUS_QM | RE_NEWLINE_OR)
#define RE_SYNTAX_EMACS 0
-
-/* This data structure is used to represent a compiled pattern. */
+#define RE_SYNTAX_POSIX_BASIC (RE_INTERVALS | RE_BK_PLUS_QM \
+ | RE_CHAR_CLASSES | RE_DOT_NOT_NULL \
+ | RE_HAT_NOT_NEWLINE | RE_NO_EMPTY_BK_REF \
+ | RE_NO_EMPTY_BRACKETS | RE_LIMITED_OPS \
+ | RE_NO_EMPTY_RANGES | RE_NO_HYPHEN_RANGE_END)
+
+#define RE_SYNTAX_POSIX_EXTENDED (RE_INTERVALS | RE_NO_BK_CURLY_BRACES \
+ | RE_NO_BK_VBAR | RE_NO_BK_PARENS \
+ | RE_HAT_NOT_NEWLINE | RE_CHAR_CLASSES \
+ | RE_NO_EMPTY_BRACKETS | RE_CONTEXTUAL_INVALID_OPS \
+ | RE_NO_BK_REFS | RE_NO_EMPTY_RANGES \
+ | RE_NO_HYPHEN_RANGE_END)
+
+
+/* This data structure is used to represent a compiled pattern. */
struct re_pattern_buffer
{
- char *buffer; /* Space holding the compiled pattern commands. */
- int allocated; /* Size of space that buffer points to */
- int used; /* Length of portion of buffer actually occupied */
- char *fastmap; /* Pointer to fastmap, if any, or zero if none. */
+ char *buffer; /* Space holding the compiled pattern commands. */
+ long allocated; /* Size of space that `buffer' points to. */
+ long used; /* Length of portion of buffer actually occupied */
+ char *fastmap; /* Pointer to fastmap, if any, or zero if none. */
/* re_search uses the fastmap, if there is one,
- to skip quickly over totally implausible characters */
- char *translate; /* Translate table to apply to all characters before comparing.
- Or zero for no translation.
- The translation is applied to a pattern when it is compiled
- and to data when it is matched. */
+ to skip over totally implausible characters. */
+ char *translate; /* Translate table to apply to all characters before
+ comparing, or zero for no translation.
+ The translation is applied to a pattern when it is
+ compiled and to data when it is matched. */
char fastmap_accurate;
/* Set to zero when a new pattern is stored,
- set to one when the fastmap is updated from it. */
+ set to one when the fastmap is updated from it. */
char can_be_null; /* Set to one by compiling fastmap
if this pattern might match the null string.
It does not necessarily match the null string
@@ -182,14 +191,21 @@ struct re_pattern_buffer
listed in the fastmap. */
};
-/* Structure to store "register" contents data in.
+
+/* search.c (search_buffer) needs this one value. It is defined both in
+ regex.c and here. */
+#define RE_EXACTN_VALUE 1
+
+
+/* Structure to store register contents data in.
Pass the address of such a structure as an argument to re_match, etc.,
if you want this information back.
- start[i] and end[i] record the string matched by \( ... \) grouping i,
- for i from 1 to RE_NREGS - 1.
- start[0] and end[0] record the entire string matched. */
+ For i from 1 to RE_NREGS - 1, start[i] records the starting index in
+ the string of where the ith subexpression matched, and end[i] records
+ one after the ending index. start[0] and end[0] are analogous, for
+ the entire pattern. */
struct re_registers
{
@@ -197,78 +213,44 @@ struct re_registers
int end[RE_NREGS];
};
-/* These are the command codes that appear in compiled regular expressions, one per byte.
- Some command codes are followed by argument bytes.
- A command code can specify any interpretation whatever for its arguments.
- Zero-bytes may appear in the compiled regular expression. */
-enum regexpcode
- {
- unused,
- exactn, /* followed by one byte giving n, and then by n literal bytes */
- begline, /* fails unless at beginning of line */
- endline, /* fails unless at end of line */
- jump, /* followed by two bytes giving relative address to jump to */
- on_failure_jump, /* followed by two bytes giving relative address of place
- to resume at in case of failure. */
- finalize_jump, /* Throw away latest failure point and then jump to address. */
- maybe_finalize_jump, /* Like jump but finalize if safe to do so.
- This is used to jump back to the beginning
- of a repeat. If the command that follows
- this jump is clearly incompatible with the
- one at the beginning of the repeat, such that
- we can be sure that there is no use backtracking
- out of repetitions already completed,
- then we finalize. */
- dummy_failure_jump, /* jump, and push a dummy failure point.
- This failure point will be thrown away
- if an attempt is made to use it for a failure.
- A + construct makes this before the first repeat. */
- anychar, /* matches any one character */
- charset, /* matches any one char belonging to specified set.
- First following byte is # bitmap bytes.
- Then come bytes for a bit-map saying which chars are in.
- Bits in each byte are ordered low-bit-first.
- A character is in the set if its bit is 1.
- A character too large to have a bit in the map
- is automatically not in the set */
- charset_not, /* similar but match any character that is NOT one of those specified */
- start_memory, /* starts remembering the text that is matched
- and stores it in a memory register.
- followed by one byte containing the register number.
- Register numbers must be in the range 0 through NREGS. */
- stop_memory, /* stops remembering the text that is matched
- and stores it in a memory register.
- followed by one byte containing the register number.
- Register numbers must be in the range 0 through NREGS. */
- duplicate, /* match a duplicate of something remembered.
- Followed by one byte containing the index of the memory register. */
- before_dot, /* Succeeds if before dot */
- at_dot, /* Succeeds if at dot */
- after_dot, /* Succeeds if after dot */
- begbuf, /* Succeeds if at beginning of buffer */
- endbuf, /* Succeeds if at end of buffer */
- wordchar, /* Matches any word-constituent character */
- notwordchar, /* Matches any char that is not a word-constituent */
- wordbeg, /* Succeeds if at word beginning */
- wordend, /* Succeeds if at word end */
- wordbound, /* Succeeds if at a word boundary */
- notwordbound, /* Succeeds if not at a word boundary */
- syntaxspec, /* Matches any character whose syntax is specified.
- followed by a byte which contains a syntax code, Sword or such like */
- notsyntaxspec /* Matches any character whose syntax differs from the specified. */
- };
+#ifdef __STDC__
+
+extern char *re_compile_pattern (char *, size_t, struct re_pattern_buffer *);
+/* Is this really advertised? */
+extern void re_compile_fastmap (struct re_pattern_buffer *);
+extern int re_search (struct re_pattern_buffer *, char*, int, int, int,
+ struct re_registers *);
+extern int re_search_2 (struct re_pattern_buffer *, char *, int,
+ char *, int, int, int,
+ struct re_registers *, int);
+extern int re_match (struct re_pattern_buffer *, char *, int, int,
+ struct re_registers *);
+extern int re_match_2 (struct re_pattern_buffer *, char *, int,
+ char *, int, int, struct re_registers *, int);
+
+/* 4.2 bsd compatibility. */
+extern char *re_comp (char *);
+extern int re_exec (char *);
+
+#else /* !__STDC__ */
+
extern char *re_compile_pattern ();
/* Is this really advertised? */
extern void re_compile_fastmap ();
extern int re_search (), re_search_2 ();
extern int re_match (), re_match_2 ();
-/* 4.2 bsd compatibility (yuck) */
+/* 4.2 bsd compatibility. */
extern char *re_comp ();
extern int re_exec ();
+#endif /* __STDC__ */
+
+
#ifdef SYNTAX_TABLE
extern char *re_syntax_table;
#endif
+
+#endif /* !__REGEXP_LIBRARY */
diff --git a/version.sh b/version.c
index 77f10bdf..046fbf22 100644
--- a/version.sh
+++ b/version.c
@@ -1,17 +1,4 @@
-#! /bin/sh
-
-# version.sh --- create version.c
-
-if [ "x$1" = "x" ]
-then
- echo you must specify a release number on the command line
- exit 1
-fi
-
-RELEASE="$1"
-
-cat << EOF
-char *version_string = "@(#)Gnu Awk (gawk) ${RELEASE}";
+char *version_string = "@(#)Gnu Awk (gawk) 2.13";
/* 1.02 fixed /= += *= etc to return the new Left Hand Side instead
of the Right Hand Side */
@@ -45,5 +32,10 @@ char *version_string = "@(#)Gnu Awk (gawk) ${RELEASE}";
/* 2.11 Bug fix release to 2.10. Lots of changes for portability,
speed, and configurability. */
-EOF
-exit 0
+
+/* 2.12 Lots of changes for portability, speed, and configurability.
+ Several bugs fixed. POSIX compliance. Removal of last set
+ of hard-wired limits. Atari and VMS ports added. */
+
+/* 2.13 Public release of 2.12 */
+
diff --git a/vms/descrip.mms b/vms/descrip.mms
new file mode 100644
index 00000000..040d458a
--- /dev/null
+++ b/vms/descrip.mms
@@ -0,0 +1,200 @@
+# Descrip.MMS -- Makefile for building GNU Awk on VMS with VAXC and MMS.
+#
+# usage:
+# $ MMS /Description=[.vms]Descrip.MMS gawk
+#
+# gawk.exe :
+# You'll need to modify this Makefile to use gcc or vaxc v2.x rather
+# than vaxc v3.x. Change the CFLAGS macro definition (move '#' from
+# beginning of 2nd alternative to beginning of 1st), and also perhaps
+# enable the following ".first" rule and its associated action. For
+# GNU C, change the LIBS macro definition.
+#
+# awk_tab.c :
+# If you have DEC/Shell, change the PARSER and PASERINIT macros to use
+# yacc rather than bison. If you have neither yacc nor bison, you'll
+# have to make sure that the distributed version of "awk.tab.c" is
+# named "awk_tab.c" and that its modification date is later than the
+# date of "awk.y", so that MMS won't try to build that target. If you
+# use bison and it is already defined system-wide, comment out the
+# PARSERINIT definition.
+#
+# install.help :
+# You can make the target 'install.help' to load the VMS help text
+# into a help library. Modify the HELPLIB macro if you don't want
+# to put entry into the regular VMS library. (If you use an alternate
+# help library, it must already exist; this target won't create it.)
+#
+# gawk.dvi :
+# If you have TeX, you can make the target 'gawk.dvi' to process
+# _The_GAWK_Manual_ from gawk.texinfo. You'll need to use a device
+# specific post-processor on gawk.dvi in order to get printable data.
+#
+
+# location of the VMS-specific files, relative to the 'main' directory
+VMSDIR = [.vms]
+MAKEFILE = $(VMSDIR)Descrip.MMS
+
+# debugging &c !'ccflags' is an escape to allow external compile flags
+#CCFLAGS = /noOpt/Debug
+
+# work within the main directory, even when handling files in [.vms]
+# note: use 2nd variant for either VAX C V2.x or for GNU C
+CFLAGS = /Include=[]/Object=[]/Opt=noInline $(CCFLAGS)
+#CFLAGS = /Include=([],$(VMSDIR))/Object=[] $(CCFLAGS)
+
+# uncomment this for GNU C
+#CC = gcc
+
+# uncomment these two lines for GNU C _if_ it's not installed system-wide
+#.first !compiler init, needed if there's no system-wide setup
+# set command gnu_cc:[000000]gcc
+
+# uncomment these three lines for VAX C V2.x
+#.first !compiler init, find all #include files
+# define/nolog vaxc$library sys$library:,sys$disk:$(VMSDIR)
+# define/nolog c$library [],$(VMSDIR)
+#!(it appears that if vaxc$library is defined, then the /Include
+#! qualifier is ignored, making a c$library definition essential)
+
+# run-time libraries; use the 2nd one for GNU C
+LIBS = sys$share:vaxcrtl.exe/Shareable
+#LIBS = gnu_cc:[000000]gcclib.olb/Library,sys$library:vaxcrtl.olb/Library
+
+PARSER = bison
+PARSERINIT = set command gnu_bison:[000000]bison
+#PARSER = yacc
+#PARSERINIT = yacc := $shell$exe:yacc
+
+# this is used for optional target 'install.help'
+HELPLIB = sys$help:helplib.hlb
+#HELPLIB = sys$help:local.hlb
+
+#
+######## nothing below this line should need to be changed ########
+#
+
+# ALLOCA
+ALLOCA = alloca.obj
+
+# object files
+AWKOBJS = main.obj,eval.obj,builtin.obj,msg.obj,iop.obj,io.obj,\
+ field.obj,array.obj,node.obj,version.obj,missing.obj,re.obj
+
+ALLOBJS = $(AWKOBJS),awk_tab.obj
+
+# GNUOBJS
+# GNU stuff that gawk uses as library routines.
+GNUOBJS = regex.obj,dfa.obj,$(ALLOCA)
+
+# VMSOBJS
+# VMS specific stuff
+VMSCODE = vms_misc.obj,vms_popen.obj,vms_fwrite.obj,vms_args.obj,\
+ vms_gawk.obj,vms_cli.obj
+VMSCMD = gawk_cmd.obj # built from .cld file
+VMSOBJS = $(VMSCODE),$(VMSCMD)
+
+VMSSRCS = $(VMSDIR)vms_misc.c,$(VMSDIR)vms_popen.c,$(VMSDIR)vms_fwrite.c,\
+ $(VMSDIR)vms_args.c,$(VMSDIR)vms_gawk.c,$(VMSDIR)vms_cli.c
+VMSHDRS = $(VMSDIR)vms.h,$(VMSDIR)fcntl.h,$(VMSDIR)varargs.h,$(VMSDIR)unixlib.h
+VMSOTHR = $(VMSDIR)Descrip.MMS,$(VMSDIR)vmsbuild.com,$(VMSDIR)version.com,\
+ $(VMSDIR)gawk.hlp
+
+# Release of gawk
+REL=2.13
+PATCHLVL=2
+
+# dummy target to allow building "gawk" in addition to explicit "gawk.exe"
+gawk : gawk.exe
+ write sys$output " GAWK "
+
+# rules to build gawk
+gawk.exe : $(ALLOBJS) $(GNUOBJS) $(VMSOBJS) gawk.opt
+ $(LINK) $(LINKFLAGS) gawk.opt/options
+
+gawk.opt : $(MAKEFILE) # create linker options file
+ open/write opt gawk.opt ! ~ 'cat <<close >gawk.opt'
+ write opt "! GAWK -- Gnu AWK"
+ @ write opt "$(ALLOBJS)"
+ @ write opt "$(GNUOBJS)"
+ @ write opt "$(VMSOBJS)"
+ @ write opt "$(LIBS)"
+ @ write opt "psect_attr=environ,noshr !extern [noshare] char **"
+ @ write opt "stack=50 !preallocate more pages (default is 20)"
+ write opt "identification=""V$(REL).$(PATCHLVL)"""
+ close opt
+
+$(AWKOBJS) : awk.h config.h
+$(VMSCODE) : awk.h config.h $(VMSDIR)vms.h
+vms_misc.obj : $(VMSDIR)vms_misc.c
+vms_popen.obj : $(VMSDIR)vms_popen.c
+vms_fwrite.obj : $(VMSDIR)vms_fwrite.c
+vms_args.obj : $(VMSDIR)vms_args.c
+vms_gawk.obj : $(VMSDIR)vms_gawk.c
+vms_cli.obj : $(VMSDIR)vms_cli.c
+dfa.obj : awk.h config.h dfa.h
+regex.obj : awk.h config.h regex.h
+main.obj : patchlevel.h
+awk_tab.obj : awk.h awk_tab.c
+
+# bison or yacc required
+awk_tab.c : awk.y # foo.y :: yacc => y_tab.c, bison => foo_tab.c
+ @- if f$search("y_tab.c").nes."" then delete y_tab.c;*
+ - $(PARSERINIT)
+ $(PARSER) $(YFLAGS) $<
+ @- if f$search("y_tab.c").nes."" then rename/new_vers y_tab.c $@ !yacc
+
+##version.c : version.sh $(MAKEFILE)
+## @$(VMSDIR)version.com "$(REL)"
+
+config.h : [.config]vms-conf.h
+ copy $< $@
+
+# Alloca - C simulation
+alloca.obj : alloca.c
+ $(CC) $(CFLAGS) /define=("STACK_DIRECTION=(-1)","exit=vms_exit") $<
+
+$(VMSCMD) : $(VMSDIR)gawk.cld
+ set command/object=$@ $(CLDFLAGS) $<
+
+# special target for loading the help text into a VMS help library
+install.help : $(VMS)gawk.hlp
+ library/help $(HELPLIB) $< /log
+
+# miscellaneous other targets
+tidy :
+ - if f$search("*.*;-1").nes."" then purge
+ - if f$search("[.*]*.*;-1").nes."" then purge [.*]
+
+clean :
+ - delete *.obj;*,gawk.opt;*
+
+spotless : clean tidy
+ - delete gawk.dvi;*,gawk.exe;*,[.support]texindex.exe;*
+
+#
+# build gawk.dvi from within the 'support' subdirectory
+#
+gawk.dvi : [.support]texindex.exe gawk.texinfo
+ @ set default [.support]
+ @ write sys$output " Warnings from TeX are expected during the first pass"
+ TeX [-]gawk.texinfo
+ mcr []texindex gawk.cp gawk.fn gawk.ky gawk.pg gawk.tp gawk.vr
+ @ write sys$output " Second pass"
+ TeX [-]gawk.texinfo
+ mcr []texindex gawk.cp gawk.fn gawk.ky gawk.pg gawk.tp gawk.vr
+ @ write sys$output " Third (final) pass"
+ TeX [-]gawk.texinfo
+ -@ purge
+ -@ delete gawk.lis;,.aux;,gawk.%%;,.cps;,.fns;,.kys;,.pgs;,.toc;,.tps;,.vrs;
+ @ rename/new_vers gawk.dvi [-]*.*
+ @ set default [-]
+
+[.support]texindex.exe : [.support]texindex.c
+ @ set default [.support]
+ $(CC) /noOpt/noList/Define=("lines=tlines") texindex.c
+ $(LINK) /noMap texindex.obj,sys$library:vaxcrtl.olb/Lib
+ -@ delete texindex.obj;*
+ @ set default [-]
+
+#eof
diff --git a/vms/fcntl.h b/vms/fcntl.h
new file mode 100644
index 00000000..d975db7a
--- /dev/null
+++ b/vms/fcntl.h
@@ -0,0 +1,10 @@
+/* "fcntl.h" -- constants for BSD-style I/O routines (ala VAX C's <file.h>) */
+#define O_RDONLY 0
+#define O_WRONLY 1
+#define O_RDWR 2
+#define O_NDELAY 4
+#define O_NOWAIT 4
+#define O_APPEND 8
+#define O_CREAT 0x0200
+#define O_TRUNC 0x0400
+#define O_EXCL 0x0800
diff --git a/vms/gawk.cld b/vms/gawk.cld
new file mode 100644
index 00000000..23d4c28a
--- /dev/null
+++ b/vms/gawk.cld
@@ -0,0 +1,46 @@
+! Gawk.Cld -- command defintion for GAWK
+! Pat Rankin, Nov'89
+! [ revised for 2.12, May'91 ]
+ module Gawk_Cmd
+define verb GAWK
+ synonym AWK
+! image gawk !usage $ DEFINE GAWK disk:[directory]GAWK
+ parameter p1, value(required,list), label=gawk_p1, prompt="data file(s)"
+ qualifier input, value(required,list,type=$infile), label=progfile
+ qualifier commands, value(required), label=program
+ qualifier field_separator, value(required), label=field_sep
+ qualifier reg_expr, value(type=reg_expr_keywords)
+ qualifier variables, value(required,list)
+ qualifier copyright
+ qualifier version
+ qualifier lint
+ qualifier posix
+ qualifier usage
+ qualifier strict, negatable
+ qualifier debug, negatable
+ qualifier output, value(type=$outfile,default="SYS$OUTPUT")
+ disallow progfile and program !or not progfile and not program
+define type reg_expr_keywords
+ keyword awk
+ keyword egrep, default !synonym for 'posix'
+ keyword posix !equivalent to 'egrep'
+!
+! p1 = data file list (possibly including 'var=value' contructs)
+!note: parameter required; use 'sys$input:' to read data from 'stdin'
+! /input = program source file ('-f progfile')
+! /commands = program source text ('program')
+!note: either input or commands, but not both; if neither, usage message given
+! /field_separator = character(s) delimiting record fields; default is "[ \t]"
+! /reg_expr = type of regular expressions: awk or posix (posix == egrep)
+!note: by default, use awk style; /reg_expr (w/o value), use egrep style
+! /variables = list of 'var=value' items for assignment prior to BEGIN
+! /posix = force POSIX compatability mode operation
+! /strict = force compatability mode operation (UN*X SYS V, Release 4)
+! /output = destination for print,printf (default is sys$output: ie, 'stdout')
+! /lint = scan the awk program for possible problems and warn about them
+! /debug = debugging mode
+!note: compilation options determine whether debug mode is valid
+! /usage = display 'usage' reminder [describing this VMS command syntax]
+! /version = show program version
+! /copyright = show abbreviated edition of FSF's copyright notice
+!
diff --git a/vms/gawk.hlp b/vms/gawk.hlp
new file mode 100644
index 00000000..68892393
--- /dev/null
+++ b/vms/gawk.hlp
@@ -0,0 +1,1156 @@
+! Gawk.Hlp
+! Pat Rankin, Jun'90
+! revised, Jun'91
+! Online help for GAWK.
+!
+1 GAWK
+ GAWK is GNU awk, the Free Software Foundation's implementation of
+ the awk programming language. awk is an interperative language which
+ can handle many data-reformatting jobs with just a few lines of code.
+ It has powerful string manipulation and pattern matching capabilities
+ built in. This version should be compatable with POSIX 1003.2 awk.
+
+ The VMS version of GAWK supports both the original UN*X-style command
+ interface and a DCL interface. The only setup requirement for GAWK
+ is to define it as a 'foreign' command: a DCL symbol with a value
+ which begins with '$'.
+ $ GAWK :== $disk:[directory]GAWK
+2 GNU_syntax
+ GAWK's UN*X-style interface uses the 'dash' convention for specifying
+ options and uses spaces to separate multiple arguments.
+
+ There are two main alternatives, depending on how the awk program is
+ to be passed to GAWK. Both alternatives share most options.
+
+ Usage: $ gawk [-W opts] [-F fs] [-v var=val] -f progfile [--] file ...
+ or $ gawk [-W opts] [-F fs] [-v var=val] [--] "program" file ...
+
+ The options are case-sensitive. On VMS, the DCL command interpreter
+ converts unquoted text into uppercase before passing it to the running
+ program. However, GAWK is written in 'C' and the C Run-Time Library
+ (VAXCRTL) converts unquoted text into *lowercase*. Therefore, the
+ -Fval and -W options must be enclosed in quotes.
+3 options
+ -f file use the specified file as the awk program source; if more
+ than one instance of -f is used, each file will be read
+ in succession
+ -Fstring define a value for the FS variable (field separator)
+ -v var=val assign a value of 'val' to the variable 'var'
+ -W 'options' additional gawk-specific options; multiple values may
+ be separated by commas, or by spaces if they're quoted,
+ or mulitple occurences of -W may be used.
+ -W compat use awk "compatibility mode" to disable GAWK extensions
+ and get the behavior of UN*X awk.
+ -W copyright [or -W copyleft] display an abbreivated version of
+ the GNU copyright information
+ -W lint warn about suspect or non-portable awk program code
+ -W posix compatibility mode with additional restrictions
+ -W version display program version number
+ -- don't check further arguments for leading dash
+3 program_text
+ If the '-f file' option is not used on the command line, then the
+ first "non-dash" argument is assumed to be a string of text containing
+ the awk source program. Here is a complete sample program:
+ $ gawk -- "BEGIN {print ""\nHello, World!\n""}"
+ This program would print a blank line (based on first "\n"), followed
+ by a line reading "Hello, World!", followed by another blank line
+ (since awk's 'print' statement includes trailing 'newline').
+
+ On VMS, to include a quote character inside of a quoted string, two
+ successive quotes ("") must be used.
+3 data_files
+ After all dash-options are examined, and after the program text if
+ there were no occurences of the -f option, remaining (space separated)
+ command line arguments are considered to be data files for the awk
+ program to process. If any of these actually contains an equals sign
+ (=), then it is interpreted as a variable assignment instead of a data
+ file. The syntax is 'variable_name=value'. For example, the command
+ $ gawk -f myprog.awk infile.one flag=2 start=0 infile.two
+ would read file 'infile.one' for the program in 'myprog.awk', then it
+ would set 'flag' to 2 and 'start' to 0, and finally it would read file
+ 'infile.two' for the program. Note that in a case like this, the two
+ assignments actually occur after the first file has been processed,
+ not at program startup when the command line is first scanned.
+3 IO_redirection
+ The command parsing in the VMS implementation of GAWK does some
+ emulation of a UN*X-style shell, where certain characters on the
+ command line have special meaning. In particular, the symbols '<',
+ '>', '|', '*', and '?' receive special handling before the main part
+ of the program has a chance to see them. The symbols '<' and '>'
+ perform some file manipulation from the command line:
+
+ <ifile open file 'ifile' (readonly) as 'stdin' [SYS$INPUT]
+ >nfile create 'nfile' at 'stdout' [SYS$OUTPUT], in stream-lf format
+ >>ofile append to 'ofile' for 'stdout'; create it if necessary
+ >&efile point 'stderr' [SYS$ERROR] at 'efile', but don't open it yet
+ >$vfile create 'vfile' as 'stdout', using RMS attributes appropriate
+ for a standard text file (variable length records with
+ implied carriage control)
+ 2>&1 route error messages into the regular output stream
+ 1>&2 send output data to the error destination
+ <<sentinal error; reading stdin until 'sentinal' not supported
+ <-, >- error; closer of stdin or stdout from cmd line not supported
+ >>$vfile incorrect; would be interpreted as file "$vfile" in stream-lf
+ format rather than as file "vfile" in RMS 'text' format
+ | error; command line pipes not supported
+3 wildcard_expansion
+ The command parsing in the VMS implementation of GAWK does some
+ emulation of a UN*X-style shell, where certain characters on the
+ command line have special meaning. In particular, the symbols '<',
+ '>', '*', '%', and '?' receive special handling before the main part
+ of the program has a chance to see them. The symbols '*', '%' and '?'
+ are used as wildcards in filenames. '*' and '%' have their usual VMS
+ meanings of multiple character and single character wildcards,
+ respectively, and '?' is also treated as a single character wildcard.
+
+ When a command line argument that should be a filename contains any
+ of the wildcard characters, a directory lookup is attempted for files
+ which match the specified pattern. If one or more matching files are
+ found, those filenames are put into the command line in place of the
+ original pattern. If no matching files are found, the original
+ pattern is left in place.
+2 DCL_syntax
+ GAWK's DCL-style interface is more or less a standard DCL command, with
+ one required parameter. Multiple values--when present--are separated
+ by commas.
+
+ There are two main alternatives, depending on how the awk program is
+ to be passed to GAWK. Both alternatives share most options.
+
+ Usage: GAWK /COMMANDS="awk program text" data_file[,data_file,...]
+ or GAWK /INPUT=awk_file data_file[,"Var=value",data_file,...]
+ ( or GAWK /INPUT=(awk_file1,awk_file2,...) data_file[,...] )
+3 Parameter
+ data_file[,datafile,...] (data_file data_file ...)
+ data_file[,"Var=value",...,data_file,...] (data_file Var=value &c)
+
+ Data file(s) for the awk program to process. If any of these
+ actually contains an equals sign (=), then it is interpreted as
+ a variable assignment instead of a data file. The syntax is
+ "variable_name=value". Quotes are required for non-file parameters.
+
+ For example, the command
+ $ gawk/input=myprog.awk infile.one,"flag=2","start=0",infile.two
+ would read file 'infile.one' for the program in 'myprog.awk', then it
+ would set 'flag' to 2 and 'start' to 0, and finally it would read file
+ 'infile.two' for the program. Note that in a case like this, the two
+ assignments actually occur after the first file has been processed,
+ not at program startup when the command line is first scanned.
+
+ Wildcard file lookups are attempted on data file specifications. See
+ subtopic 'GAWK GNU_syntax wildcard_expansion' for details.
+
+ At least one data_file parameter value is required. An exception is
+ made if /usage, /version, or /copyright is specifed *and* if GAWK is
+ defined as a 'foreign' command rather than a 'native' DCL command.
+3 Qualifiers
+/COMMANDS
+ /COMMANDS="awk program text" (-- "awk program text")
+
+ For short programs, it is possible to include the complete program
+ on the command line. The quotes are required. Here is a complete
+ sample program:
+ $ gawk/commands="BEGIN {print ""\nHello, World!\n""}" NL:
+ This program would print a blank line (based on first "\n"), followed
+ by a line reading "Hello, World!", followed by another blank line
+ (since awk's 'print' statement includes trailing 'newline').
+
+ To include a quote character inside of a quoted string, two
+ successive quotes ("") must be used.
+
+ Either /COMMANDS or /INPUT (but not both) must be supplied.
+/INPUT
+ /INPUT=(awk_file1,awk_file2) (-f awk_file1 -f awk_file2)
+
+ Used to specify one or more files containing the source code of
+ the awk program. If more than one file is used, separate them
+ with commas and enclose the list in parentheses.
+
+ Multiple source files are processed in order as if they had been
+ concatenated together.
+
+ Either /INPUT or /COMMANDS (but not both) must be supplied.
+/FIELD_SEPARATOR
+ /FIELD_SEPARATOR="FS_value" (-F"FS_value")
+
+ Assign a value to the built in variable FS (field separator).
+/VARIABLES
+ /VARIABLES=("Var1=val1","Var2=val2",...) (-v Var1=val1 -v Var2=val2)
+
+ Assign value(s) to the specified variable(s).
+/REG_EXPR
+ /REG_EXPR={AWK | EGREP | POSIX} (-a vs -e options [obsolete])
+
+ Specify regular expression syntax.
+
+ /REG_EXPR=AWK use the original awk syntax for regular expressions
+ /REG_EXPR=EGREP use the egrep syntax for regular expressions
+ /REG_EXPR=POSIX equivalent to /REG_EXPR=EGREP
+
+ If /REG_EXTR is omitted, then /REG_EXPR=AWK is the default. However,
+ if /REG_EXTR is included but its value is omitted, EGREP is used.
+
+ This qualifier is obsolete and has no effect.
+/STRICT
+ /[NO]STRICT (-"W compat" option)
+
+ Use strict awk compatibility mode (/strict) and suppress GAWK
+ extensions. The default is /NOSTRICT.
+/POSIX
+ /[NO]POSIX (-"W posix" option)
+
+ Use POSIX compatibility mode (/posix) and suppress GAWK extensions.
+ The default is /NOPOSIX. Slightly more restrictive than /strict.
+/LINT
+ /[NO]LINT (-"W lint" option)
+
+ Check the awk program cafefully for potential problems that might
+ be encountered if it were to be used with other awk implementations,
+ and print warnings for anything found. The default in /NOLINT.
+/VERSION
+ /VERSION (-"W version" option)
+
+ Print GAWK's version number.
+/COPYRIGHT
+ /COPYRIGHT (-"W copyright" or -"W copyleft" option)
+
+ Print a brief version of GAWK's copyright notice.
+/USAGE
+ /USAGE (no corresponding GNU_syntax option)
+
+ Print a compact summary of the command line options.
+
+ After the 'usage' message is printed, GAWK terminates regardless
+ of any other command line options.
+/OUTPUT
+ /OUTPUT=out_file (>$out_file)
+
+ Write program output into 'out_file'. The default is SYS$OUTPUT.
+2 awk_language
+ An awk program consists of one or more pattern-action pairs, sometimes
+ referred to as "rules". For each record of an input (data) file, the
+ rules are checked sequentially. Any pattern which matches the input
+ record triggers that rule's action. Actions are instructions which
+ resemble statements in the 'C' programming language. Patterns come
+ in several varieties, including field comparisons, regular expression
+ matching, and special cases defined by reserved keywords.
+
+ All awk keywords and variables are case-sensitive. Text matching is
+ also sensitive to character case unless the builtin variable IGNORECASE
+ is set to a non-zero value.
+3 rules
+ The syntax for a pattern-action 'rule' is simply
+ PATTERN { ACTION }
+ where the braces ({}) are required punctuation for the action.
+ Semicolons (;) or 'newlines' (ie, having the text on a separate line)
+ delimit multiple rules and also multiple actions within a given rule.
+ Either the pattern or the action may be omitted; an empty pattern
+ matches every record of the input file; a missing action (not an empty
+ action inside of braces), is an implicit request to print the current
+ record; an empty action (ie, {}) is legal but not very useful.
+3 patterns
+ There are several types of patterns available for awk rules.
+
+ expression an 'expression' is something to be evaluated (perhaps
+ a comparison or function call) which will
+ be considered true if non-zero (for numeric
+ results) or if non-null (for strings)
+ /regular_expression/ slashes (/) delimit a regular expression
+ which is used as a pattern
+ pattern1, pattern2 a pair of patterns separated by a comma (,),
+ which causes a range of records to trigger
+ the associated action; the records which
+ match the patterns are included in the range
+ <null> an omitted pattern (in this text, the string '<null>'
+ is displayed, but in an awk program, it
+ would really be blank) matches every record
+ BEGIN keyword for specifying a rule to be executed prior to
+ reading the 1st record of the 1st input file
+ END keyword for specifying a rule to be executed after
+ handling the last input record of last file
+4 examples
+ Some example patterns (mostly with the corresponding actions omitted)
+
+ NF > 0 # comparison expression: matches non-null records
+ $0 # implied comparison: also matches non-null records
+ $2 > 1000 && sum <= 999999 # slightly more elaborate expression
+ /x/ # regular expression matching any record with an 'x' in it
+ /^ / # reg-expr matching records beginning with a space
+ $1 == "start", $NF == "stop" # range pattern for input in which
+ some data lines begin with 'start' and/or end with
+ 'stop' in order to collect groups of records
+ { sum += $1 } # null pattern: it's action (add field #1 to
+ variable 'sum') would be executed for every record
+ BEGIN { sum = 0 } # keyword 'BEGIN': perform this action before
+ reading the input file (note: initialization to 0 is
+ unnecessary in awk)
+ END { print "total =", sum } # keyword 'END': perform this
+ action after the last input record has been processed
+3 actions
+ An 'action' is something to do when a given record has matched the
+ corresponding pattern in a rule. In general, actions resemble 'C'
+ statements and expressions. The action in a rule must be enclosed
+ in braces ({}).
+
+ Each action can contain more than one statement or expression to be
+ executed, provided that they're separated by semicolons (;) and/or
+ on separate lines.
+
+ An omitted action is equivalent to
+ { print $0 }
+ which prints the current record.
+3 operators
+ Relational operators
+ == compare for equality
+ != compare for inequality
+ <, <=, >, >= numerical or lexical comparison (less than, less or
+ equal, greater than, greater or equal, respectively)
+ ~ match against a regular expression
+ !~ match against a regular expression, but accept failed matches
+ instead of successful ones
+ Arithmetic operators
+ + addition
+ - subtraction
+ * multiplication
+ / division
+ % remainder
+ ^, ** exponentiation ('**' is a synonym for '^', unless POSIX
+ compatibility is specified, in which case it's invalid)
+ Boolean operators (aka Logical operators)
+ a value is considered false if it's 0 or a null string,
+ it is true otherwise; the result of a boolean operation
+ (and also of a comparison operation) will be 0 when false
+ or 1 when true
+ || or [expression (a || b) is true if either a is true or b
+ is true or both a and b are true; it is false otherwise]
+ && and [expression (a && b) is true if both a and b are true;
+ it is false otherwise]
+ ! not [expression (!a) is true if a is false, false otherwise]
+ in array membership; the keyword 'in' tests whether the value
+ on the left represents a current subscript in the array
+ named on the right
+ Conditional operator
+ ? : the conditional operator takes three operands; the first is
+ an expression to evaluate, the second is the expression to
+ use if the first was true, the third is the expession to
+ use if it was false [simple example (a < b ? b : a) gives
+ the maximum of a and b]
+ Assignment operators
+ = store the value on the right into the variable or array slot
+ on the left [expression (a = b) stores the value of b in a]
+ +=, -=, *=, /=, %=, ^=, **= perform the indicated arithmetic
+ operation using the current value of the variable or array
+ element of the left side and the expression on the right
+ side, then store the result in the left side
+ ++ increment by 1 [expression (++a) gets the current value of
+ a and adds 1 to it, stores that back in a, and returns the
+ new value; expression (a++) gets the current value of a,
+ adds 1 to it, stores that back in a, but returns the
+ original value of a]
+ -- decrement by 1 (analogous to increment)
+ String operators
+ there is no explicit operator for string concatenation;
+ two values and/or variables side-by-side are implicitly
+ concatenated into a string (numeric values are first
+ converted into their string equivalents)
+ Conversion between numeric and string values
+ there is no explicit operator for conversion; adding 0
+ to a string with force it to be converted to a number
+ (the numeric value will be 0 if the string does not
+ represent a decimal or floating point number); the
+ reverse, converting a number into a string, is done by
+ concatenating a null string ("") to it [the expression
+ (5.75 "") evaluates to "5.75"]
+ Field 'operator'
+ $ prefixing a number or variable with a dollar sign ($)
+ causes the appropriate record field to be returned [($2)
+ gives the second field of the record, ($NF) gives the
+ last field (since the builtin variable NF is set to the
+ number of fields in the current record)]
+ Array subscript operator
+ , multi-dimensional arrays are simulated by using comma (,)
+ separated array indices; the actual index is generated
+ by replacing commas with the value of builtin SUBSEP,
+ then concatenating the expression into a string index
+ [comma is also used to separate arguments in function
+ calls and user-defined function definitions]
+ [comma is *also* used to indicate a range pattern in an
+ awk rule]
+ Escape 'operator'
+ \ In quoted character strings, the backslash (\) character
+ causes the following character to be intrepreted in a
+ special manner [string "one\ntwo" has an embedded newline
+ character (linefeed on VMS, but treated as if it were both
+ carriage-return and linefeed); string "\033[" has an ASCII
+ 'escape' character (which has octal value 033) followed by
+ a 'right-bracket' character]
+ Backslash is also used in regular expressions
+ Redirection operators
+ < Read-from -- valid with 'getline'
+ > Write-to (create new file) -- valid with 'print' and 'printf'
+ >> Append-to (create file if it doesn't already exist)
+ | Pipe-from/to -- valid with 'getline', 'print', and 'printf'
+4 precedence
+ Operator precedence, listed from highest to lowest. Assignment,
+ conditional, and exponentiation operators group from right to left;
+ all others group from left to right. Parentheses may be used to
+ override the normal order.
+
+ field ($)
+ increment (++), decrement (--)
+ exponentiation (^, **)
+ unary plus (+), unary minus (-), boolean not (!)
+ multiplication (*), division (/), remainder (%)
+ addition (+), subtraction (-)
+ concatentation (no special symbol; implied by context)
+ relational (==, !=, <, >=, etc), and redirection (<, >, >>, |)
+ Relational and redirection operators have the same precedence
+ and use similar symbols; context distinguishes between them
+ matching (~, !~)
+ array membership ('in')
+ boolean and (&&)
+ boolean or (||)
+ conditional (? :)
+ assignment (=, +=, etc)
+4 escaped_characters
+ Inside of a quoted string, the backslash (\) character gives special
+ meaning the the character(s) after it. Special character letters
+ are case sensitive.
+ \\ results in one backslash in the string
+ \a is an 'alert' (<ctrl/G>. the ASCII <bell> character)
+ \b is a backspace (BS, <ctrl/H>)
+ \f is a form feed (FF, <ctrl/L>)
+ \n 'newline' (<ctrl/J> [line feed treated as CR+LF]
+ \r carriage return (CR, <ctrl/M> [re-positions at the
+ beginning of the current line]
+ \t tab (HT, <ctrl/I>)
+ \v vertical tab (VT, <ctrl/K>)
+ \### is an arbitrary character, where '###' represents 1 to 3
+ octal (ie, 0 thru 7) digits
+ \x## is an alternate arbitrary character, where '##' represents
+ 1 or more hexadecimal (ie, 0 thru 9 and/or A thru E and/or
+ a thru e) digits; if more than two digits follow, the
+ result is undefined; not recognized if POSIX compatibility
+ mode is specified.
+3 statements
+ A statement refers to a unit of intruction found in the action
+ part of an awk rule, and also found in the definition of a function.
+ The distinction between action, statement, and expression usually
+ won't matter to an awk programmer.
+
+ Compound statements consist of multiple statements separated by
+ semicolons or newlines and enclosed within braces ({}). They are
+ sometimes referred to as 'blocks'.
+4 expressions
+ An expression such as 'a = 10' or 'n += i++' is a valid statement.
+
+ Function invocations such as 'reformat_field($3)' are also valid
+ statements.
+4 if-then-else
+ A conditional statement in awk uses the same syntax as for the 'C'
+ programming language: the 'if' keyword, followed by an expression
+ in parentheses, followed by a statement--or block of statements
+ enclosed within braces ({})--which will be executed if the expression
+ is true but skipped if it's false. This can optionally be followed
+ by the 'else' keyword and another statement--or block of statements--
+ which will be executed if (and only if) the expression was false.
+5 examples
+ Simple example showing a statement used to control how many numbers
+ are printed on a given line.
+ if ( ++i <= 10 ) #check whether this would be the 11th
+ printf(" %5d", k) #print on current line if not
+ else {
+ printf("\n %5d", k) #print on next line if so
+ i = 1 #and reset the counter
+ }
+ Another example ('next' is described under 'action-controls')
+ if ($1 > $2) { print "rejected"; next } else diff = $2 - $1
+4 loops
+ Three types of loop statements are available in awk. Each uses
+ the same syntax as 'C'. The simplest of the three is the 'while'
+ statement. It consists of the 'while' keyword, followed by an
+ expression enclosed within parentheses, followed by a statement--or
+ block of statements in braces ({})--which will be executed if the
+ expression evaluates to true. The expression is evaluated before
+ attempting to execute the statement; if it's true, the statement is
+ executed (the entire block of statements if there is a block) and
+ then the expression is re-evaluated.
+
+ The second type of loop is the do-while loop. It consists of the
+ 'do' keyword, followed by a statement (usually a block of statements
+ enclosed within braces), followed by the 'while' keyword, followed
+ by a test expression enclosed within parentheses. The statement--or
+ block--is always executed at least once. Then the test expression
+ is evaluated, and the statement(s) re-executed if the result was
+ true (followed by re-evaluation of the test, and so on).
+
+ The most complex of the three loops is the 'for' statement, and it
+ has a second variant that is not found in 'C'. The ordinary for-loop
+ consists of the 'for' keyword, followed by three semicolon-separated
+ expressions enclosed within parentheses, followed by a statement or
+ brace-enclosed block of statements. The first of the three
+ expressions is an initialization clause; it is done before starting
+ the loop. The second expression is used as a test, just like the
+ expression in a while-loop. It is checked before attempting to
+ execute the statement block, and then re-checked after each execution
+ (if any) of the block. The third expression is an 'increment' clause;
+ it is evaluated after an execution of the statement block and before
+ re-evaluation of the test (2nd) expression. Normally, the increment
+ clause will change a variable used in the test clause, in such a
+ fashion that the test clause will eventually evaluate to false and
+ cause the loop to finish.
+
+ Note to 'C' programmers: the comma (,) operator commonly used in
+ 'C' for-loop expressions is not valid in awk.
+
+ The awk-specific variant of the for-loop is used for processing
+ arrays. Its syntax is 'for' keyword, followed by variable_name 'in'
+ array_name (where 'var in array' is enclosed in parentheses),
+ followed by a statement (or block). Each valid subscript value for
+ the array in question is successively placed--in no particular
+ order--into the specified 'index' variable.
+5 while_example
+ # strip fields from the input record until there's nothing left
+ while (NF > 0) {
+ $1 = "" #this causes $0 to be reconstructed
+ print
+ }
+5 do_while_example
+ # This is a variation of the while_example; it gives a slightly
+ # different display due to the order of operation.
+ # echo input record until all fields have been stripped
+ do {
+ print #output $0
+ $1 = "" #this causes $0 to be reconstructed
+ } while (NF > 0)
+5 for_example
+ # print the ASCII alphabet (in lowercase)
+ for ( letter = 'a'; letter <= 'z'; letter++ ) print letter
+
+ # display contents of builtin environment array
+ for (itm in ENVIRON)
+ print itm, ENVIRON[itm]
+4 loop-controls
+ There are two special statements--both from 'C'--for changing the
+ behavior of loop execution. The 'continue' statement is useful in
+ a compound (block) statement; when executed, it effectively skips
+ the rest of the block so that the increment-expression (only for
+ for-loops) and loop-termination expression can be re-evaluated.
+
+ The 'break' statement, when executed, effectively skips the rest
+ of the block and also treats the test expression as if it were
+ false (instead of actually re-evaluating it). In this case, the
+ increment-expression of a for-loop is also skipped.
+
+ Both 'break' and 'continue' are only allowed within a loop ('for',
+ 'while', or 'do-while'), and in nested loops they only apply to the
+ innermost loop.
+4 action-controls
+ There are two special statements for controlling statement execution.
+ The 'next' statement, when executed, causes the rest of the current
+ action and all further pattern-action rules to be skipped, so that
+ the next input record will be immediately processed. This is useful
+ if any early action knows that the current record will fail all the
+ remaining patterns; skipping those rules will reduce processing time.
+
+ The 'exit' statement causes GAWK execution to terminate. All open
+ files are closed, and no further processing is done. The END rule,
+ if any, is executed. 'exit' takes an optional numeric value as a
+ argument which is used as an exit status value, so that some sort
+ of indication of why execution has stopped can be passed on to the
+ user's environment.
+4 other_statements
+ The delete statement is used to remove an element from an array.
+ The syntax is 'delete' keyword followed by array name, followed
+ by index value enclosed in square brackets ([]).
+
+ The return statement is used in user-defined functions. The syntax
+ is the keyword 'return' optionally followed by a string or numeric
+ expression.
+
+ See also subtopic 'functions IO_functions' for a description of
+ 'print', 'printf', and 'getline'.
+3 fields
+ When an input record is read, it is automatically split into fields
+ based on the current values of FS (builtin variable defining field
+ separator expression) and RS (builtin variable defining record
+ separator character). The default value of FS is an expression
+ which matches one or more spaces and tabs; the default for RS is
+ newline. If the FIELDWIDTHS variable is set to a space separated
+ list of numbers (as in ``FIELDWIDTHS = "2 3 2"'') then the input
+ is treated as if it had fixed-width fields of the indicated sizes
+ and the FS value will be ignored.
+
+ The field prefix operator ($), is used to reference a particular
+ field. For example, $3 designates the third field of the current
+ record. The entire record can be referenced via $0 (and it holds
+ the actual input record, not the values of $1, $2, ... concatenated
+ together, so multiple spaces--when present--remain intact, unless
+ a new value gets assigned).
+
+ The builtin variable NF holds the number of fields in the current
+ record. $NF is therefore the value of the last field. Attempts to
+ access fields beyond NF result in null values (if a record contained
+ 3 fields, the value of $5 would be "").
+
+ Assigning a new value to $0 causes all the other field values (and NF)
+ to be re-evaluated. Changing a specific field, causes $0 to receive
+ a new value, but the other existing fields remain unchanged.
+
+ For efficiency, gawk only performs field splitting at the first time
+ a specific field (or NF) is actually needed.
+3 variables
+ Variables in awk can hold both numeric and string values and do not
+ have to be pre-declared. In fact, there is no way to explicitly
+ declare them at all. Variable names consist of a leading letter
+ (either upper or lower case, which are distinct from each other)
+ or underscore (_) character followed by any number of letters,
+ digits, or underscores.
+
+ When a variable that didn't previously exist is referenced, it is
+ created and given a null value. A null value is treated as 0 when
+ used as a number, and is a string of zero characters in length if
+ used as a string.
+4 builtin_variables
+ GAWK maintains several 'built-in' variables. All have default values;
+ some are updated automatically. All the builtins have uppercase-only
+ names.
+
+ These builtin variables control how awk behaves
+ FS input field separator; default is a single space, which is
+ treated as if it were a regular expression for matching
+ one or more spaces and/or tabs; a value of " " also has a
+ second special-case side-effect of causing leading blanks
+ to be ignored instead of producing a null first field;
+ initial value can be specified on the command line with
+ the -F option (or /field_separator); the value can be a
+ regular expression
+ RS input record separator; default value is a newline ("\n");
+ only a single character is allowed [no regular expressions
+ or multi-character strings; expected to be remedied in a
+ future release of gawk]
+ OFS output field separator; value to place between variables in
+ a 'print' statement; default is one space; can be arbitrary
+ string
+ ORS output record separator; value to implicitly terminate 'print'
+ statement with; default is newline ("\n"); can be arbitrary
+ string
+ OFMT default output format used for printing numbers; default
+ value is "%.6g"
+ CONVFMT conversion format used for string-to-number conversions;
+ default value is also "%.6g", like OFMT
+ SUBSEP subscript separator for array indices; used when an array
+ subscript is specified as a comma separated list of values:
+ the comma is replaced by SUBSEP and the resulting index
+ is a concatenation of the values and SUBSEP(s); default
+ value is "\034"; value may be arbitrary string
+ IGNORECASE regular expression matching flag; if true (non-zero)
+ matching ignores differences between upper and lower case
+ letters; affects the '~' and '!~' operators, the 'index',
+ 'match', 'split', 'sub', and 'gsub' functions, and the
+ field splitting based on FS; default value is false (0);
+ has no effect if GAWK is in strict compatibility mode (via
+ the -"W compat" option or /strict)
+ FIELDWIDTHS space or tab separated list of width sizes; takes
+ precedence over FS when set, but is cleared if FS has a
+ value assigned to it; [note: the current implementation
+ of fixed-field input is considered experimental and is
+ expected to evolve over time]
+
+ These builtin variables provide useful information
+ NF number of fields in the current record
+ NR record number (accumulated over all files when more than one
+ input file is processed by the same program)
+ FNR current record number of the current input file; reset to 0
+ each time an input file is completed
+ RSTART starting position of substring matched by last invocation
+ of the 'match' function; set to 0 if a match fails and at
+ the start of each input record
+ RLENGTH length of substring matched by the last invocation of the
+ 'match' function; set to -1 if a match fails
+ FILENAME name of the input file currently being processed; the
+ special name "-" is used to represent the standard input
+ ENVIRON array of miscellaneous user environment values; the VMS
+ implementation of GAWK provides values for ["USER"] (the
+ username), ["PATH"] (current default directory), ["HOME"]
+ (the user's login directory), and "[TERM]" (terminal type
+ if available) [all info provided by VAXCRTL's environ]
+ ARGC number of elements in the ARGV array, counting [0] which is
+ the program name (ie, "gawk")
+ ARGV array of command-line arguments (in [0] to [ARGC-1]); the
+ program name (ie, "gawk") in held in ARGV[0]; command line
+ parameters (data files and "var=value" expressions, but not
+ program options or the awk program text string if present)
+ are stored in ARGV[1] through ARGV[ARGC-1]; the awk program
+ can change values of ARGC and ARGV[] during execution in
+ order to alter which files are processed or which between-
+ file assignments are made
+4 arrays
+ awk supports associative arrays to collect data into tables. Array
+ elements can be either numeric or string, as can the indices used to
+ access them. Each array must have a unique name, but a given array
+ can hold both string and numeric elements at the same time. Arrays
+ are one-dimensional only, but multi-dimensional arrays can be
+ simulated using comma (,) separated indices, whereby a single index
+ value gets created by replacing commas with SUBSEP and concatenating
+ the resulting expression into a single string.
+
+ Referencing an array element is done with the expression
+ Array[Index]
+ where 'Array' represents the array's name and 'Index' represents a
+ value or expression used for a subscript. If the requested array
+ element did not exist, it will be created and assigned an initial
+ null value. To check whether an element exists without creating it,
+ use the 'in' boolean operator.
+ Index in Array
+ would check 'Array' for element 'Index' and return 1 if it existed
+ or 0 otherwise. To remove an element from an array, use the 'delete'
+ statement
+ delete Array[Index]
+ Note: there is no way to delete an ordinary variable or an entire
+ array; 'delete' only works on a specific array element.
+
+ To process all elements of an array (in succession) when their
+ subscripts might be unknown, use the 'in' variant of the for-loop
+ for (Index in Array) { ... }
+3 functions
+ awk supports both built-in and user-defined functions. A function
+ may be considered a 'black-box' which accepts zero or more input
+ parameters, performs some calculations or other manipulations based
+ on them, and returns a single result.
+
+ The syntax for calling a function consists of the function name
+ immediately followed by an open paren (left parenthesis '('),
+ optionally followed by white space (spaces and/or tabs), followed
+ by an appropriate argument value (number, string, variable, array
+ reference, or expression involving the above and/or nested function
+ call), optionally followed by more white space. That is followed by
+ either a closing paren (right parenthesis, ')'), or by a comma (,)
+ and another argument and so on until finally a closing paren.
+
+ The parentheses are required punctuation, except for the 'print' and
+ 'printf' builtin IO functions, where they're optional, and for the
+ builtin IO function 'getline', where they're not allowed. Some
+ functions support optional [trailing] arguments which can be simply
+ omitted (along with the corresponding comma if applicable).
+4 numeric_functions
+ Builtin numeric functions
+ int(n) returns the value of 'n' with any fraction truncated
+ [truncation of negative values is towards 0]
+ sqrt(n) the square root of n
+ exp(n) the exponential of n ('e' raised to the 'n'th power)
+ log(n) natural logarithm of n
+ sin(n) sine of n (in radians)
+ cos(n) cosine of n
+ atan2(m,n) arctangent of m/n (radians)
+ rand() random number in the range 0 to 1 (exclusive)
+ srand(s) sets the random number 'seed' to s, so that a sequence
+ of 'random' numbers can be repeated; returns the
+ previous seed value; srand() [argument omitted] sets
+ the seed to an 'unpredictable' value (based on date
+ and time, for instance, so should be unrepeatable)
+4 string_functions
+ Builtin string functions
+ index(s,t) search string s for substring t; result is 1-based
+ offset of t within s, or 0 if not found
+ length(s) returns the length of string s; 'length' without
+ parenthesized argument returns length of $0
+ match(s,r) search string s for regular expression r; the offset
+ of the longest, left-most substring which matches
+ is returned, or 0 if no match was found; the builtin
+ variables RSTART and RLENGTH are also set [RSTART to
+ the return value and RLENGTH to the size of the
+ matching substring, or to -1 if no match was found]
+ split(s,a,f) break string s into components based on field
+ separator f and store them in array a (into elements
+ [1], [2], and so on); the last argument is optional,
+ if omitted, the value of FS is used; the return value
+ is the number of components found
+ sprintf(f,e,...) format expression(s) e using format string f and
+ return the result as a string; formatting is similar
+ to the printf function
+ sub(r,t,s) search string target s for regular expression r, and
+ if a match is found, replace the matching text with
+ substring t, then store the result back in s; if s
+ is omitted, use $0 for the string; the result is
+ either 1 if a match+substitution was made, or 0
+ otherwise; if substring t contains the character
+ '&', the text which matched the regular expression
+ is used instead of '&' [to suppress this feature
+ of '&', 'quote' it with a backslash (\); since this
+ will be inside a quoted string which will receive
+ 'backslash' processing before being passed to sub(),
+ *two* consecutive backslashes will be needed "\\&"]
+ gsub(r,t,s) similar to sub(), but gsub() replaces all nonoverlapping
+ substrings instead of just the first, and the return
+ value is the number of substitutions made
+ substr(s,p,l) extract a substring l characters long starting at
+ offset p in string s; l is optional, if omitted then
+ the remainder of the string (p thru end) is returned
+ tolower(s) return a copy of string s in which every uppercase
+ letter has been converted into lowercase
+ toupper(s) analogous to tolower(); convert lowercase to uppercase
+4 time_functions
+ Builtin time functions
+ systime() return the current time of day as the number of seconds
+ since some reference point; on VMS the reference point
+ is January 1, 1970, at 12 AM local time (not UTC)
+ strftime(f,t) format time value t using format f; if t is omitted,
+ the default is systime()
+5 time_formats
+ Formatting directives similar to the 'printf' & 'sprintf' functions
+ (each is introduced in the format string by preceding it with a
+ percent sign (%)); the directive is substituted by the corresponding
+ value
+ a abbreviated weekday name (Sun,Mon,Tue,Wed,Thu,Fri,Sat)
+ A full weekday name
+ b abbreviated month name (Jan,Feb,...)
+ B full month name
+ c date and time (Unix-style "aaa bbb dd HH:MM:SS YYYY" format)
+ C century prefix (19 or 20) [not century number, ie 20th]
+ d day of month as two digit decimal number (01-31)
+ D date in mm/dd/yy format
+ e day of month with leading space instead of leading 0 ( 1-31)
+ E ignored; following format character used
+ H hour (24 hour clock) as two digit number (00-23)
+ I hour (12 hour clock) as two digit number (01-12)
+ j day of year as three digit number (001-366)
+ m month as two digit number (01-12)
+ M minute as two digit number (00-59)
+ n 'newline' (ie, treat %n as \n)
+ O ignored; following format character used
+ p AM/PM designation for 12 hour clock
+ r time in AM/PM format ("II:MM:SS p")
+ R time without seconds ("HH:MM")
+ S second as two digit number (00-59)
+ t tab (ie, treat %t as \t)
+ T time ("HH:MM:SS")
+ U week of year (00-53) [first Sunday is first day of week 1]
+ V date (VMS-style "dd-bbb-YYYY" with 'bbb' forced to uppercase)
+ w weekday as decimal digit (0 [Sunday] through 6 [Saturday])
+ W week of year (00-53) [first _Monday_ is first day of week 1]
+ x date ("aaa bbb dd YYYY")
+ X time ("HH:MM:SS")
+ y year without century (00-99)
+ Y year with century (19yy-20yy)
+ Z time zone name (always "local" for VMS)
+ % literal percent sign (%)
+4 IO_functions
+ Builtin I/O functions
+ print x,... print the values of one or more expressions; if none
+ are listed, $0 is used; parentheses are optional;
+ when multiple values are printed, the current value
+ of builtin OFS (default is 1 space) is used to
+ separate them; the print line is implicitly
+ terminated with the current value of ORS (default
+ is newline); print does not have a return value
+ printf(f,x,...) print the values of one or more expressions, using
+ the specified format string; null strings are used
+ to supply missing values (if any); no between field
+ or trailing newline characters are printed, they
+ should be specified within the format string; the
+ argument-enclosing parentheses are optional;
+ printf does not have a return value
+ getline v read a record into variable v; if v is omitted, $0 is
+ used (and NF, NR, and FNR are updated); if v is
+ specified, then field-splitting won't be performed;
+ note: parentheses around the argument are *not*
+ allowed; return value is 1 for successful read, 0
+ if end of file is encountered, or -1 if some sort
+ of error occured; [see 'redirection' for several
+ variants]
+ close(s) close a file or pipe specified by the string s; the
+ string used should have the same value as the one
+ used in a getline or print/printf redirection
+ system(s) pass string s to executed by the operating system;
+ the command string is executed in a subprocess
+5 redirection
+ Both getline and print/printf support variant forms which use
+ redirection and pipes.
+
+ To read from a file (instead of from the primary input file), use
+ getline var < "file"
+ or getline < "file" (read into $0)
+ where the string "file" represents either an actual file name (in
+ quotes) or a variable which contains a file name string value or an
+ expression which evaluates to a string filename.
+
+ To create a pipe executing some command and read the result into
+ a variable (or into $0), use
+ "command" | getline var
+ or "command" | getline (read into $0)
+ where "command" is a literal string containing an operating system
+ command or a variable with a string value representing such a
+ command.
+
+ To output into a file other that the primary output, use
+ print x,... > "file" (or >> "file")
+ or printf(f,x,...) > "file" (or >> "file")
+ similar to the 'getline' example above. '>>' causes output to be
+ appended to an existing file if it exists, or create the file if
+ it doesn't already exist. '>' always creates a new file. The
+ alternate redirection method of '>$' (for RMS text file attributes)
+ is *only* available on the command line, not with 'print' or
+ 'printf' in the current release.
+
+ To output an error message, use 'print' or 'printf' and redirect
+ the output to file "/dev/stderr" (or equivalently to "SYS$ERROR:"
+ on VMS). 'stderr' will normally be the user's terminal, even if
+ ordinary output is being redirected into a file.
+
+ To feed awk output into another command, use
+ print x,... | "command" (similarly for 'printf')
+ similar to the second 'getline' example. In this case, output
+ from awk will be passed as input to the specified operating system
+ command. The command must be capable of reading input from 'stdin'
+ ("SYS$INPUT:" on VMS) in order to receive data in this manner.
+
+ The 'close' function operates on the "file" or "command" argument
+ specified here (either a literal string or a variable or expression
+ resulting in a string value). It completely closes the file or
+ pipe so that further references to the same file or command string
+ would re-open that file or command at the beginning. Closing a
+ pipe or redirection also releases some file-oriented resources.
+
+ Note: the VMS implementation of GAWK uses temporary files to
+ simulate pipes, so a command must finish before 'getline' can get
+ any input from it, and 'close' must be called for an output pipe
+ before any data can be passed to the specified command.
+5 formats
+ Formatting characters used by the 'printf' and 'sprintf' functions
+ (each is introduced in the format string by preceding it with a
+ percent sign (%))
+ % include a literal percent sign (%) in the result
+ c format the next argument as a single ASCII character
+ (argument should be numeric in the range 0 to 255)
+ s format the next argument as a string (numeric arguments are
+ converted into strings on demand)
+ d decimal number (ie, integer value in base 10)
+ i integer (equivalent to decimal)
+ o octal number (integer in base 8)
+ x hecadecimal number (integer in base 16) [lowercase]
+ X hecadecimal number [digits 'A' thru 'E' in uppercase]
+ f floating point number (digits, decimal point, fraction digits)
+ e exponential (scientific notation) number (digit, decimal
+ point, fraction digits, letter 'e', sign '+' or '-',
+ exponent digits)
+ g 'fractional' number in either 'e' or 'f' format, whichever
+ produces shorter result
+
+ Three optional modifiers can be placed between the initiating
+ percent sign and the format character (doesn't apply to %%).
+ - left justify (only matters when width specifier is present)
+ NN width ['NN' represents 1 or more decimal digits]; actually
+ minimum width to use, longer items will not be truncated; a
+ leading 0 will cause right-justified numbers to be padded on
+ the left with zeroes instead of spaces when they're aligned
+ .MM precision [decimal point followed by 1 or more digits]; used
+ as maximum width for strings (causing truncation if they're
+ actually longer) or as number of fraction digits for 'f' or
+ 'e' numeric formats, or number of significant digits for 'g'
+ numeric format
+4 user_defined_functions
+ User-defined functions may be created as needed to simplify awk
+ programs or to collect commonly used code into one place. The
+ general syntax of a user-defined function is the 'function' keyword
+ followed by unique function name, followed by a comma-separated
+ parameter list enclosed in parentheses, followed by statement(s)
+ enclosed within braces ({}). A 'return' statement is customary
+ but is not required.
+ function FuncName(arg1,arg2) {
+ # arbitrary statements
+ return (arg1 + arg2) / 2
+ }
+ If a function does not use 'return' to specify an output value, the
+ result received by the caller will be unpredictable.
+
+ Functions may be placed in an awk program before, between, or after
+ the pattern-action rules. The abbreviation 'func' may be used in
+ place of 'function', unless POSIX compatibility mode is in effect.
+3 regular_expressions
+ A regular expression is a shorthand way of specifying a 'wildcard'
+ type of string comparison. Regular expression matching is very
+ fundamental to awk's operation.
+
+ Meta symbols
+ ^ matches beginning of line or beginning of string; note that
+ embedded newlines ('\n') create multi-line strings, so
+ beginning of line is not necessarily beginning of string
+ $ matches end of line or end of string
+ . any single character (except newline)
+ [ ] set of characters; [ABC] matches either 'A' or 'B' or 'C'; a
+ dash (other than first or last of the set) denotes a range
+ of characters: [A-Z] matches any upper case letter; if the
+ first character of the set is '^', then the sense of match
+ is reversed: [^0-9] matches any non-digit; several
+ characters need to be quoted with backslash (\) if they
+ occur in a set: '\', ']', '-', and '^'
+ | alternation (similar to boolean 'or'); match either of two
+ patterns [for example "^start|stop$" matches leading 'start'
+ or trailing 'stop']
+ ( ) grouping, alter normal precedence [for example, "^(start|stop)$"
+ matches lines reading either 'start' or 'stop']
+ * repeated matching; when placed after a pattern, indicates that
+ the pattern should match any number of times [for example,
+ "[a-z][0-9]*" matches a lower case letter followed by zero or
+ more digits]
+ + repeated matching; when placed after a pattern, indicates that
+ the pattern should match one or more times ["[0-9]+" matches
+ any non-empty sequence of digits]
+ ? optional matching; indicates that the pattern can match zero or
+ one times ["[a-z][0-9]?" matches lower case letter alone or
+ followed by a single digit]
+ \ quote; prevent the character which follows from having special
+ meaning
+
+ A regular expression which matches a string or line will match against
+ the first (left-most) substring which meets the pattern and include
+ the longest sequence of characters which still meets that pattern.
+3 comments
+ Comments in awk programs are introduced with '#'. Anything after
+ '#' on a line is ignored by GAWK. It's a good idea to include an
+ explanation of what an awk program is doing and also who wrote it
+ and when.
+3 further_information
+ For complete documentation on GAWK, see "The_GAWK_Manual" from FSF.
+ Source text for it is present in the file GAWK.TEXINFO. A postscript
+ version is available via anonymous FTP from host prep.ai.mit.edu in
+ directory pub/gnu/.
+
+ For additional documentation on awk--above and beyond that provided in
+ The_GAWK_Manual--see "The_AWK_Programming_Language" by Aho, Weinberger,
+ and Kernighan (2nd edition, 1988), published by Addison-Wesley. It is
+ both a reference on the awk language and a tutorial on awk's use, with
+ many sample programs.
+3 authors
+ The awk programming language was originally created by Alfred V. Aho,
+ Peter J. Weinberger, and Brian W. Kernighan in 1977. The language
+ was revised and enhanced in a new version which was released in 1985.
+
+ GAWK, the GNU implementation of awk, was written in 1986 by Paul Rubin
+ and Jay Fenlason, with advice from Richard Stallman, and with
+ contributions from John Woods. In 1988 and 1989, David Trueman and
+ Arnold Robbins revised GAWK for compatibility with the newer awk.
+
+ GAWK version 2.11.1 was ported to VMS by Pat Rankin in November, 1989,
+ with further revisions in the Spring of 1990. The VMS port was
+ incorporated into the official GNU distribution of version 2.13 in
+ Spring 1991. (Version 2.12 was never publically released.)
+2 release_notes
+ GAWK 2.13 tested under VMS V5.3 and V5.4-2, May, 1991; compatible with
+ VMS versions V4.6 and later. Current source code compatible with DEC's
+ VAXC v3.x and v2.4 or v2.3; also compiles successfully with GNUC (GNU's
+ gcc).
+3 AWK_LIBRARY
+ GAWK uses a built in search path when looking for a program file
+ specified by the -f option (or the /input qualifier) when that file
+ name does not include a device and/or directory. GAWK will first
+ look in the current default directory, then if the file wasn't found
+ it will look in the directory specified by the translation of logical
+ name "AWK_LIBRARY".
+3 known_problems
+ There are several known problems with GAWK running on VMS. Some can
+ be ignored, others require work-arounds.
+4 command_line_parsing
+ The command
+ gawk "program text"
+ will pass the first phase of DCL parsing (the single required
+ parameter is present), then it will give an error that a required
+ element (either /input=awk_file or /commands="program text") is
+ missing. If what was intended (as is most likely) is to pass the
+ program text to the UN*X-style command interface, the following
+ variation is required
+ gawk -- "program text"
+ The presence of "--", which is normally optional, will inhibit the
+ attempt to use DCL parsing (as will any '-' option or redirection).
+4 file_formats
+ If a file having the RMS attribute "Fortran carriage control" is
+ read as input, it will generate an empty first record if the first
+ actual record begins with a space (leading space becomes a newline).
+ Also, the last record of the file will give a "record not terminated"
+ warning. Both of these minor problems are due to the way that the
+ C Run-Time Library (VAXCRTL) converts record attributes.
+
+ Another poor feature without a work-around is that there's no way to
+ specify "append if possible, create with RMS text attributes if not"
+ with the current command line I/O redirection. '>>$' isn't supported.
+4 RS_peculiarities
+ Changing the record separator to something other than newline ('\n')
+ will produce anomolous results for ordinary files. For example,
+ using RS = "\f" and FS = "\n" with the following input
+ |rec 1, line 1
+ |rec 1, line 2
+ |^L (form feed)
+ |rec 2, line 1
+ |rec 2, line 2
+ |^L (form feed)
+ |rec 3, line 1
+ |rec 3, line 2
+ |(end of file)
+ will produce two fields for record 1, but three fields each for
+ records 2 and 3. This is because the form-feed record delimiter is
+ on its own line, so awk sees a newline after it. Since newline is
+ now a field separator, records 2 and 3 will have null first fields.
+ The following awk code will work-around this problem by inserting
+ a null first field in the first record, so that all records can be
+ handled the same by subsequent processing.
+ # fixup for first record (RS != "\n")
+ FNR == 1 { if ( $0 == "" ) #leading separator
+ next #skip its null record
+ else #otherwise,
+ $0 = FS $0 #realign fields
+ }
+ There is a second problem with this same example. It will always
+ trigger a "record not terminated" warning when it reaches the end of
+ file. In the sample shown, there is no final separator; however, if
+ a trailing form-feed were present, it would produce a spurious final
+ record with two null fields. This occurs because the I/O system
+ sees an implicit newline at the end of the last record, so awk sees
+ a pair of null fields separated by that newline. The following code
+ fragment will fix that provided there are no null records (in this
+ case, that would be two consecutive lines containing just form-feeds).
+ # fixup for last record (RS != "\n")
+ $0 == FS { next } #drop spurious final record
+ Note that the "record not terminated" warning will persist.
+4 cmd_inconsistency
+ The DCL qualifier /OUTPUT is internally equivalent to '>$' output
+ redirection, but the qualifier /INPUT corresponds to the -f option
+ rather than to '<' input redirection.
+4 exit
+ The exit statement can optionally pass a final status value to the
+ operating system. GAWK expects a UN*X-style value instead of a
+ VMS status value, so 0 indicates success and non-zero indicates
+ failure. The final exit status will be 1 (VMS success) if 0 is
+ used, or even (VMS non-success) if non-zero is used.
+3 changes
+ Changes between version 2.13 and 2.11.1: (2.12 was not released)
+
+ General
+ CONVFMT and FIELDWIDTHS builtin control variables added
+ systime() and strftime() date/time functions added
+ 'lint' and 'posix' run-time options added
+ '-W' command line option syntax supercedes '-c', '-C', and '-V'
+ '-a' and '-e' regular expression options made obsolete
+ Various bug fixes and effiency improvements
+ More platforms supported ('officially' including VMS)
+
+ VMS-specific
+ %g printf format fixed
+ Handling of '\' on command line modified; no longer necessary to
+ double it up
+ Problem redirecting stderr (>&efile) at same time as stdin (<ifile)
+ or stdout (>ofile) has been fixed
+ ``2>&1'' and ``1>&2'' redirection constructs added
+3 license
+ GAWK is covered by the "GNU General Public License", the gist of which
+ is that if you supply this software to a third party, you are expressly
+ forbidden to prevent them from supplying it to a fourth party, and if
+ you supply binaries you must make the source code available to them
+ at no additional cost. Any revisions or modified versions are also
+ covered by the same license. There is no warranty, express or implied,
+ for this software. It is provided "as is."
+
+ [Disclaimer: This is just an informal summary with no legal basis;
+ refer to the actual GNU General Public License for specific details.]
+!2 examples
+!
diff --git a/vms/unixlib.h b/vms/unixlib.h
new file mode 100644
index 00000000..17d99706
--- /dev/null
+++ b/vms/unixlib.h
@@ -0,0 +1,24 @@
+/* "unixlib.h" -- limited substitute for VAX C V3.x's <unixlib.h>,
+ * for use with VAX C V2.x and/or GNU C when building gawk.
+ */
+
+
+/* declare the global environ[] array */
+#ifdef VAXC
+extern char noshare **environ;
+#else
+# ifdef __GNUC__
+# define environ $$PsectAttributes_NOSHR$$environ
+# endif
+extern char **environ;
+#endif
+
+/* miscellaneous Unix emulation routines available in VAXCRTL */
+char *getenv(), *getcwd();
+
+char *ecvt(), *fcvt(), *gcvt();
+
+int getpid(), getppid();
+
+unsigned getgid(), getuid(), getegid(), geteuid();
+int setgid(), setuid(); /* no-ops */
diff --git a/vms/varargs.h b/vms/varargs.h
new file mode 100644
index 00000000..ce66e7d5
--- /dev/null
+++ b/vms/varargs.h
@@ -0,0 +1,38 @@
+/* "varargs.h" -- old style variable argument list manipulation (for VAX) */
+#ifndef __GNUC__
+
+ /* Use the system's macros with the system's compiler. */
+#include <varargs.h>
+
+#else /*__GNUC__*/
+
+# if defined(__VAX__) || defined(__vax__) || defined(VAX) || defined(vax)
+ /* These macros implement traditional (non-ANSI) varargs for GNU C on VAX */
+
+# if !defined(_VA_LIST) && !defined(_VA_LIST_)
+# define _VA_LIST
+# define _VA_LIST_
+typedef char *va_list;
+# endif
+
+# define va_alist _varargs
+# define va_dcl int va_alist;
+# define va_start(AP) AP = (va_list) &va_alist
+# define va_end(AP)
+
+# define _va_rounded_size(TYPE) \
+ (((sizeof (TYPE) + sizeof (int) - 1) / sizeof (int)) * sizeof (int))
+
+# define va_arg(AP,TYPE) \
+ (AP += _va_rounded_size(TYPE), \
+ *((TYPE *) (AP - _va_rounded_size(TYPE))))
+
+# if defined(__VMS__) || defined(__vms__) || defined(VMS) || defined(vms)
+ /* VAX C compatability macros */
+# define va_count(CNT) vaxc$va_count(&CNT) /* rtl routine */
+# define va_start_1(AP,OFFSET) AP = (va_list) (&va_alist + (OFFSET))
+# endif /* VMS */
+
+# endif /* VAX */
+
+#endif /*__GNUC__*/
diff --git a/vms/vms.h b/vms/vms.h
new file mode 100644
index 00000000..6491a1f5
--- /dev/null
+++ b/vms/vms.h
@@ -0,0 +1,69 @@
+/*
+ * vms.h - miscellaneous definitions for use with VMS system services.
+ * Pat Rankin, Nov'89
+ */
+
+#if 0
+#include <iodef.h>
+#else
+#define IO$_WRITEVBLK 48 /* write virtual block */
+#define IO$V_CANCTRLO 6 /* cancel <ctrl/O> (ie, resume tty output) */
+#define IO$M_CANCTRLO (1 << IO$V_CANCTRLO)
+#endif
+
+#if 0
+#include <clidef.h>
+#include <cliverbdef.h>
+#include <fscndef.h>
+#else
+#define CLI$K_GETCMD 1
+#define CLI$K_VERB_MCR 33
+#define CLI$K_VERB_RUN 36
+#define FSCN$_FILESPEC 1
+#endif
+
+#if 0
+#include <climsgdef.h>
+#else
+#define CLI$_RUNUSED 0x00030000 /* value returned by $CLI for "RUN" */
+#define CLI$_SYNTAX 0x000310FC /* error signalled by CLI$DCL_PARSE */
+#define CLI$_INSFPRM 0x00038048 /* insufficient parameters */
+#define CLI$_VALREQ 0x00038150 /* missing required value */
+#define CLI$_CONFLICT 0x00038258 /* conflicting qualifiers */
+#define CLI$_NOOPTPRS 0x00038840 /* no option present */
+#endif
+
+#if !defined(_TYPES_) || !defined(__GNUC__)
+typedef unsigned long u_long;
+typedef unsigned short u_short;
+#endif
+typedef struct _dsc { int len; char *adr; } Dsc; /* limited string descriptor */
+
+#define vmswork(sts) ((sts)&1)
+#define vmsfail(sts) (!vmswork(sts))
+#define CondVal(sts) ((sts)&0x0FFFFFF8) /* strip severity & msg inhibit */
+#define Descrip(strdsc,strbuf) Dsc strdsc = {sizeof strbuf - 1, strbuf}
+
+extern int shell$is_shell P((void));
+extern u_long LIB$FIND_FILE P((const Dsc *, Dsc *, void *, ...));
+extern u_long LIB$FIND_FILE_END P((void *));
+#ifndef NO_TTY_FWRITE
+extern u_long LIB$GET_EF P((long *));
+extern u_long SYS$ASSIGN P((const Dsc *, short *, long, const Dsc *));
+extern u_long SYS$DASSGN P((short));
+extern u_long SYS$QIO P((long, short, long, void *, const void *, long,
+ const char *, int, int, u_long, int, int));
+extern u_long SYS$SYNCH P((long, void *));
+#endif !NO_TTY_FWRITE
+
+extern void v_add_arg P((int, const char *));
+extern void vms_exit P((int));
+extern char *vms_strerror P((int));
+extern char *vms_strdup P((const char *));
+extern int vms_devopen P((const char *));
+extern int vms_execute P((const char *, const char *, const char *));
+extern int vms_gawk P((void));
+extern u_long Cli_Present P((const char *));
+extern u_long Cli_Get_Value P((const char *, char *, int));
+extern u_long Cli_Parse_Command P((const void *, const char *));
+
diff --git a/vms/vms_args.c b/vms/vms_args.c
new file mode 100644
index 00000000..b6736ff3
--- /dev/null
+++ b/vms/vms_args.c
@@ -0,0 +1,398 @@
+/*
+ * vms_args.c -- command line parsing, to emulate shell i/o redirection.
+ * [ Escape sequence parsing now suppressed. ]
+ */
+
+/*
+ * Copyright (C) 1991 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 1, or (at your option)
+ * any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GAWK; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * [.vms]vms_arg_fixup - emulate shell's command line processing: handle
+ * stdio redirection, backslash escape sequences, and file wildcard
+ * expansion. Should be called immediately upon image startup.
+ *
+ * Pat Rankin, Nov'89
+ * rankin@eql.Caltech.EDU
+ *
+ * <ifile - open 'ifile' (readonly) as 'stdin'
+ * >nfile - create 'nfile' as 'stdout' (stream-lf format)
+ * >>ofile - append to 'ofile' for 'stdout'; create it if necessary
+ * >&efile - point 'stderr' (SYS$ERROR) at 'efile', but don't open
+ * >$vfile - create 'vfile' as 'stdout', using rms attributes
+ * appropriate for a standard text file (variable length
+ * records with implied carriage control)
+ * 2>&1 - special case: direct error messages into output file
+ * 1>&2 - special case: direct output data to error destination
+ * <<sentinal - error; reading stdin until 'sentinal' not supported
+ * <-, >- - error: stdin/stdout closure not implemented
+ * | anything - error; pipes not implemented
+ * & <end-of-line> - error; background execution not implemented
+ *
+ * any\Xany - convert 'X' as appropriate; \000 will not work as
+ * intended since subsequent processing will misinterpret
+ *
+ * any*any - perform wildcard directory lookup to find file(s)
+ * any%any - " " ('%' is vms wildcard for '?' [ie, /./])
+ * any?any - treat like 'any%any' unless no files match
+ * *, %, ? - if no file(s) match, leave original value in arg list
+ *
+ *
+ * Notes: a redirection operator can have optional white space between it
+ * and its filename; the operator itself *must* be preceded by white
+ * space so that it starts a separate argument. '<' is ambiguous
+ * since "<dir>file" is a valid VMS file specification; leading '<' is
+ * assumed to be stdin--use "\<dir>file" to override. '>$' is local
+ * kludge to force stdout to be created with text file RMS attributes
+ * instead of stream format; file sharing is disabled for stdout
+ * regardless. Multiple instances of stdin or stdout or stderr are
+ * treated as fatal errors rather than using the first or last. If a
+ * wildcard file specification is detected, it is expanded into a list
+ * of filenames which match; if there are no matches, the original
+ * file-spec is left in the argument list rather than having it expand
+ * into thin air. No attempt is made to identify and make $(var)
+ * environment substitutions--must draw the line somewhere!
+ */
+
+#include "awk.h" /* really "../awk.h" */
+#include "vms.h"
+
+ void v_add_arg(int, const char *);
+static char *skipblanks(const char *);
+static void vms_expand_wildcards(const char *);
+static u_long vms_define(const char *, const char *);
+static char *t_strstr(const char *, const char *);
+#define strstr t_strstr /* strstr() missing from vaxcrtl for V4.x */
+
+static int v_argc, v_argz = 0;
+static char **v_argv;
+
+/* vms_arg_fixup() - scan argv[] for i/o redirection and wildcards and also */
+/* rebuild it with those removed or expanded, respectively */
+void
+vms_arg_fixup( int *pargc, char ***pargv )
+{
+ char *f_in, *f_out, *f_err,
+ *out_mode, *rms_opt1, *rms_opt2;
+ char **argv = *pargv;
+ int i, argc = *pargc;
+ int err_to_out_redirect = 0, out_to_err_redirect = 0;
+
+#ifndef NO_CHECK_SHELL
+ if (shell$is_shell())
+ return; /* don't do anything if we're running DECshell */
+#endif
+#ifndef NO_DCL_CMD
+ for (i = 1; i < argc ; i++) /* check for dash or other non-VMS args */
+ if (strchr("->\\|", *argv[i])) break; /* found => (i < argc) */
+ if (i >= argc && (v_argc = vms_gawk()) > 0) { /* vms_gawk => dcl_parse */
+ /* if we successfully parsed the command, replace original argv[] */
+ argc = v_argc, argv = v_argv;
+ v_argz = v_argc = 0, v_argv = NULL;
+ }
+#endif
+ v_add_arg(v_argc = 0, basename(argv[0])); /* store arg #0 (image name) */
+
+ f_in = f_out = f_err = NULL; /* stdio setup (no filenames yet) */
+ out_mode = "w"; /* default access for stdout */
+ rms_opt1 = rms_opt2 = "ctx=stm"; /* ("context = stream") == no-opt */
+
+ for (i = 1; i < argc; i++) {
+ char *p, *fn;
+ int is_arg;
+
+ is_arg = 0; /* current arg does not begin with dash */
+ p = argv[i]; /* current arg */
+ switch (*p) {
+ case '<': /* stdin */
+ /*[should try to determine whether this is really a directory
+ spec using <>; for now, force user to quote them with '\<']*/
+ if ( f_in ) {
+ fatal("multiple specification of '<' for stdin");
+ } else if (*++p == '<') { /* '<<' is not supported */
+ fatal("'<<' not available for stdin");
+ } else {
+ p = skipblanks(p);
+ fn = (*p ? p : argv[++i]); /* use next arg if necessary */
+ if (i >= argc || *fn == '-')
+ fatal("invalid i/o redirection, null filespec after '<'");
+ else
+ f_in = fn; /* save filename for stdin */
+ }
+ break;
+ case '>': { /* stdout or stderr */
+ /*[vms-specific kludge '>$' added to force stdout to be created
+ as record-oriented text file instead of in stream-lf format]*/
+ int is_out = 1; /* assume stdout */
+ if (*++p == '>') /* '>>' => append */
+ out_mode = "a", p++;
+ else if (*p == '&') /* '>&' => stderr */
+ is_out = 0, p++;
+ else if (*p == '$') /* '>$' => kludge for record format */
+ rms_opt1 = "rfm=var", rms_opt2 = "rat=cr", p++;
+ else /* '>' => create */
+ ; /* use default values initialized prior to loop */
+ p = skipblanks(p);
+ fn = (*p ? p : argv[++i]); /* use next arg if necessary */
+ if (i >= argc || *fn == '-') {
+ fatal("invalid i/o redirection, null filespec after '>'");
+ } else if (is_out) {
+ if (out_to_err_redirect)
+ fatal("conflicting specifications for stdout");
+ else if (f_out)
+ fatal("multiple specification of '>' for stdout");
+ else
+ f_out = fn; /* save filename for stdout */
+ } else {
+ if (err_to_out_redirect)
+ fatal("conflicting specifications for stderr");
+ else if (f_err)
+ fatal("multiple specification of '>&' for stderr");
+ else
+ f_err = fn; /* save filename for stderr */
+ }
+ } break;
+ case '2': /* check for ``2>&1'' special case'' */
+ if (strcmp(p, "2>&1") != 0)
+ goto ordinary_arg;
+ else if (f_err || out_to_err_redirect)
+ fatal("conflicting specifications for stderr");
+ else {
+ err_to_out_redirect = 1;
+ f_err = "SYS$OUTPUT:";
+ } break;
+ case '1': /* check for ``1>&2'' special case'' */
+ if (strcmp(p, "1>&2") != 0)
+ goto ordinary_arg;
+ else if (f_out || err_to_out_redirect)
+ fatal("conflicting specifications for stdout");
+ else {
+ out_to_err_redirect = 1;
+ f_out = "SYS$ERROR:";
+ } break;
+ case '|': /* pipe */
+ /* command pipelines are not supported */
+ fatal("command pipes not available ('|' encountered)");
+ break;
+ case '&': /* background */
+ /*[we could probably spawn or fork ourself--maybe someday]*/
+ if (*(p+1) == '\0' && i == argc - 1) {
+ fatal("background tasks not available ('&' encountered)");
+ break;
+ } else /* fall through */
+ ; /*NOBREAK*/
+ case '-': /* argument */
+ is_arg = 1; /*(=> skip wildcard check)*/
+ default: /* other (filespec assumed) */
+ordinary_arg:
+ /* process escape sequences or expand wildcards */
+ v_add_arg(++v_argc, p); /* include this arg */
+ p = strchr(p, '\\'); /* look for backslash */
+ if (p != NULL) { /* does it have escape sequence(s)? */
+#if 0 /* disable escape parsing; it's now done elsewhere within gawk */
+ register int c;
+ char *q = v_argv[v_argc] + (p - argv[i]);
+ do {
+ c = *p++;
+ if (c == '\\')
+ c = parse_escape(&p);
+ *q++ = (c >= 0 ? (char)c : '\\');
+ } while (*p != '\0');
+ *q = '\0';
+#endif /*0*/
+ } else if (!is_arg && strchr(v_argv[v_argc], '=') == NULL) {
+ vms_expand_wildcards(v_argv[v_argc]);
+ }
+ break;
+ } /* end switch */
+ } /* loop */
+
+ /*
+ * Now process any/all I/O options encountered above.
+ */
+
+ /* must do stderr first, or vaxcrtl init might not see it */
+ /*[ catch 22: we'll also redirect errors encountered doing <in or >out ]*/
+ if (f_err) { /* define logical name but don't open file */
+ int len = strlen(f_err);
+ if (strncasecmp(f_err, "SYS$OUTPUT", len) == 0
+ && (f_err[len] == ':' || f_err[len] == '\0'))
+ err_to_out_redirect = 1;
+ else
+ vms_define("SYS$ERROR", f_err);
+ }
+ /* do stdin before stdout, so we bomb we won't create empty output file */
+ if (f_in) { /* [re]open file and define logical name */
+ stdin = freopen(f_in, "r", stdin, "mbf=2");
+ if (stdin != NULL)
+ vms_define("SYS$INPUT", f_in);
+ else
+ fatal("<%s (%s)", f_in, strerror(errno));
+ }
+ if (f_out) { /* disallow file sharing to reduce overhead */
+ stdout = freopen(f_out, out_mode, stdout,
+ rms_opt1, rms_opt2, "shr=nil", "mbf=2"); /*VAXCRTL*/
+ if (stdout != NULL) {
+#ifdef crtl_bug /* eof sometimes doesn't get set properly for stm_lf file */
+# define BIGBUF 8*BUFSIZ /* maximum record size: 4096 instead of 512 */
+ setvbuf(stdout, malloc(BIGBUF), _IOFBF, BIGBUF);
+#endif
+ vms_define("SYS$OUTPUT", f_out);
+ } else
+ fatal(">%s%s (%s)", (*out_mode == 'a' ? ">" : ""),
+ f_out, strerror(errno));
+ }
+ if (err_to_out_redirect) { /* special case for ``2>&1'' construct */
+ fclose(stderr);
+ dup(1, 2); /* make file 2 (stderr) share file 1 (stdout) */
+ stderr = stdout;
+ vms_define("SYS$ERROR", "SYS$OUTPUT:");
+ } else if (out_to_err_redirect) { /* ``1>&2'' */
+ fclose(stdout);
+ dup(2, 1); /* make file 1 (stdout) share file 2 (stderr) */
+ stdout = stderr;
+ vms_define("SYS$OUTPUT", "SYS$ERROR:");
+ }
+
+#ifndef NO_DCL_CMD
+ /* if we replaced argv[] with our own, we can release it now */
+ if (argv != *pargv)
+ free((void *)argv), argv = NULL;
+#endif
+ *pargc = ++v_argc; /* increment to account for argv[0] */
+ *pargv = v_argv;
+ return;
+}
+
+/* vms_expand_wildcards() - check a string for wildcard punctuation; */
+/* if it has any, attempt a directory lookup */
+/* and store resulting name(s) in argv array */
+static void
+vms_expand_wildcards( const char *prospective_filespec )
+{
+ char *p, spec_buf[255+1], res_buf[255+1], *strstr();
+ Dsc spec, result;
+ void *context;
+ register int len = strlen(prospective_filespec);
+
+ if (len >= sizeof spec_buf)
+ return; /* can't be valid--or at least we can't handle it */
+ strcpy(spec_buf, prospective_filespec); /* copy the arg */
+ p = strchr(spec_buf, '?');
+ if (p != NULL) /* change '?' single-char wildcard to '%' */
+ do *p++ = '%', p = strchr(p, '?');
+ while (p != NULL);
+ else if (strchr(spec_buf, '*') == strchr(spec_buf, '%') /* => both NULL */
+ && strstr(spec_buf, "...") == NULL)
+ return; /* no wildcards present; don't attempt file lookup */
+ spec.len = len, spec.adr = spec_buf;
+ result.len = sizeof res_buf - 1, result.adr = res_buf;
+
+ /* The filespec is already in v_argv[v_argc]; if we fail to match anything,
+ we'll just leave it there (unlike most shells, where it would evaporate).
+ */
+ len = -1; /* overload 'len' with flag value */
+ context = NULL; /* init */
+ while (vmswork(LIB$FIND_FILE(&spec, &result, &context))) {
+ for (len = sizeof(res_buf)-1; len > 0 && res_buf[len-1] == ' '; len--) ;
+ res_buf[len] = '\0'; /* terminate after discarding trailing blanks */
+ v_add_arg(v_argc++, strdup(res_buf)); /* store result */
+ }
+ (void)LIB$FIND_FILE_END(&context);
+ if (len >= 0) /* (still -1 => never entered loop) */
+ --v_argc; /* undo final post-increment */
+ return;
+}
+
+/* v_add_arg() - store string pointer in v_argv[]; expand array if necessary */
+void
+v_add_arg( int idx, const char *val )
+{
+#ifdef DEBUG_VMS
+ fprintf(stderr, "v_add_arg: v_argv[%d] ", idx);
+#endif
+ if (idx + 1 >= v_argz) { /* 'v_argz' is the current size of v_argv[] */
+ int old_size = v_argz;
+
+ v_argz = idx + 10; /* increment by arbitrary amount */
+ if (old_size == 0)
+ v_argv = (char **)malloc((unsigned)(v_argz * sizeof(char **)));
+ else
+ v_argv = (char **)realloc((char *)v_argv,
+ (unsigned)(v_argz * sizeof(char **)));
+ if (v_argv == NULL) { /* error */
+ fatal("%s: %s: can't allocate memory (%s)", "vms_args",
+ "v_argv", strerror(errno));
+ } else {
+ memmsg((oldsize == 0 ? "v_argv" : "re: v_argv"), v_argz,
+ "vms_args", v_argv);
+ while (old_size < v_argz) v_argv[old_size++] = NULL;
+ }
+ }
+ v_argv[idx] = (char *)val;
+#ifdef DEBUG_VMS
+ fprintf(stderr, "= \"%s\"\n", val);
+#endif
+}
+
+/* skipblanks() - return a pointer to the first non-blank in the string */
+static char *
+skipblanks( const char *ptr )
+{
+ if (ptr)
+ while (*ptr == ' ' || *ptr == '\t')
+ ptr++;
+ return (char *)ptr;
+}
+
+/* vms_define() - assign a value to a logical name [define/process/user_mode] */
+static u_long
+vms_define( const char *log_name, const char *trans_val )
+{
+ Dsc log_dsc, trn_dsc;
+# define LOG_PROCESS_TABLE 2 /* <obsolete> */
+# define LOG_USERMODE 3 /* PSL$C_USER */
+ extern u_long SYS$CRELOG(); /* <superceded by $CRELNM> */
+
+ /* avoid "define SYS$OUTPUT sys$output:" for redundant ">sys$output:" */
+ if (strncasecmp(log_name, trans_val, strlen(log_name)) == 0)
+ return 0;
+
+ log_dsc.len = strlen(log_dsc.adr = (char *)log_name);
+ trn_dsc.len = strlen(trn_dsc.adr = (char *)trans_val);
+ return SYS$CRELOG(LOG_PROCESS_TABLE, &log_dsc, &trn_dsc, LOG_USERMODE);
+}
+
+/* t_strstr -- strstr() substitute; search 'str' for 'sub' */
+static char *t_strstr ( const char *str, const char *sub )
+{
+ register const char *s0, *s1, *s2;
+
+ /* special case: empty substring */
+ if (!*sub) return (char *)str;
+
+ /* brute force method */
+ for (s0 = s1 = str; *s1; s1 = ++s0) {
+ s2 = sub;
+ while (*s1++ == *s2++)
+ if (!*s2) return (char *)s0; /* full match */
+ }
+ return (char *)0; /* not found */
+}
diff --git a/vms/vms_cli.c b/vms/vms_cli.c
new file mode 100644
index 00000000..e4e33404
--- /dev/null
+++ b/vms/vms_cli.c
@@ -0,0 +1,88 @@
+/*
+ * vms_cli.c - command line interface routines.
+ * Pat Rankin, Nov'89
+ * Routines called from vms_gawk.c for DCL parsing.
+ */
+
+#define P(foo) ()
+#include "config.h" /* in case we want to suppress 'const' &c */
+#include "vms.h"
+
+extern u_long CLI$PRESENT(const Dsc *);
+extern u_long CLI$GET_VALUE(const Dsc *, Dsc *, short *);
+extern u_long CLI$DCL_PARSE(const Dsc *, const void *, ...);
+extern u_long SYS$CLI(void *, ...);
+extern u_long SYS$FILESCAN(const Dsc *, void *, long *);
+extern void *LIB$ESTABLISH(u_long (*handler)(void *, void *));
+extern u_long LIB$SIG_TO_RET(void *, void *); /* condition handler */
+
+/* Cli_Present() - call CLI$PRESENT to determine whether a parameter or */
+/* qualifier is present on the [already parsed] command line */
+u_long
+Cli_Present( const char *item )
+{
+ Dsc item_dsc;
+ (void)LIB$ESTABLISH(LIB$SIG_TO_RET);
+
+ item_dsc.len = strlen(item_dsc.adr = (char *)item);
+ return CLI$PRESENT(&item_dsc);
+}
+
+/* Cli_Get_Value() - call CLI$GET_VALUE to retreive the value of a */
+/* parameter or qualifier from the command line */
+u_long
+Cli_Get_Value( const char *item, char *result, int size )
+{
+ Dsc item_dsc, res_dsc;
+ u_long sts;
+ short len = 0;
+ (void)LIB$ESTABLISH(LIB$SIG_TO_RET);
+
+ item_dsc.len = strlen(item_dsc.adr = (char *)item);
+ res_dsc.len = size, res_dsc.adr = result;
+ sts = CLI$GET_VALUE(&item_dsc, &res_dsc, &len);
+ result[len] = '\0';
+ return sts;
+}
+
+/* Cli_Parse_Command() - use the $CLI system service (undocumented) to */
+/* retreive the actual command line (which might be */
+/* "run prog" or "mcr prog [params]") and then call */
+/* CLI$DCL_PARSE to parse it using specified tables */
+u_long
+Cli_Parse_Command( const void *cmd_tables, const char *cmd_verb )
+{
+ struct { short len, code; void *adr; } fscn[2];
+ struct { char rqtype, rqindx, rqflags, rqstat; unsigned :32;
+ Dsc rdesc; unsigned :32; unsigned :32; unsigned :32; } cmd;
+ u_long sts;
+ int ltmp;
+ char longbuf[2600];
+ (void)LIB$ESTABLISH(LIB$SIG_TO_RET);
+
+ memset(&cmd, 0, sizeof cmd);
+ cmd.rqtype = CLI$K_GETCMD; /* command line minus the verb */
+ sts = SYS$CLI( &cmd, (void *)0, (void *)0); /* get actual command line */
+
+ if (vmswork(sts)) { /* ok => cli available & verb wasn't "RUN" */
+ /* invoked via symbol => have command line (which might be empty) */
+ /* [might also be invoked via mcr or dcl; that's ok] */
+ if (cmd.rqstat == CLI$K_VERB_MCR) {
+ /* need to strip image name from MCR invocation */
+ memset(fscn, 0, sizeof fscn);
+ fscn[0].code = FSCN$_FILESPEC; /* full file specification */
+ (void)SYS$FILESCAN( &cmd.rdesc, fscn, (long *)0);
+ cmd.rdesc.len -= fscn[0].len; /* shrink size */
+ cmd.rdesc.adr += fscn[0].len; /* advance ptr */
+ }
+ /* prepend verb and then parse the command line */
+ strcat(strcpy(longbuf, cmd_verb), " "), ltmp = strlen(longbuf);
+ if (cmd.rdesc.len + ltmp > sizeof longbuf)
+ cmd.rdesc.len = sizeof longbuf - ltmp;
+ strncpy(&longbuf[ltmp], cmd.rdesc.adr, cmd.rdesc.len);
+ cmd.rdesc.len += ltmp, cmd.rdesc.adr = longbuf;
+ sts = CLI$DCL_PARSE( &cmd.rdesc, cmd_tables);
+ }
+
+ return sts;
+}
diff --git a/vms/vms_fwrite.c b/vms/vms_fwrite.c
new file mode 100644
index 00000000..c0282c14
--- /dev/null
+++ b/vms/vms_fwrite.c
@@ -0,0 +1,209 @@
+/*
+ * vms_fwrite.c - augmentation for the fwrite() function.
+ */
+
+/*
+ * Copyright (C) 1991 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 1, or (at your option)
+ * any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GAWK; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "awk.h" /* really "../awk.h" */
+
+#ifndef NO_TTY_FWRITE
+#include "vms.h"
+#include <stdio.h>
+#include <errno.h>
+
+#ifdef VAXC_BUILTINS
+#pragma builtins /* VAXC V3.0 & up */
+# define find_c(s,n,c) ((n) - _LOCC((c),(n),(s)))
+#else /*VAXC_BUILTINS*/
+static int find_c( const char *s, int n, char c ) {
+ register const char *t = (const char *)memchr(s, c, n);
+ return (t == 0 ? n : t - s); /* 0..n-1, or n if not found */
+}
+#endif /*VAXC_BUILTINS*/
+#define is_stdout(file_no) ((file_no) == 1) /* fileno(stdout) */
+#define is_stderr(file_no) ((file_no) == 2) /* fileno(stderr) */
+
+#define PREFIX_CR (0x8D << 16) /* leading carriage return */
+#define POSTFIX_CR (0x8D << 24) /* trailing carriage return (=> lf/cr) */
+
+static short channel[_NFILE] = {0};
+static FILE *prev_file = 0;
+static int prev_file_num;
+
+ /*
+ * VAXCRTL's fwrite() seems to flush after every character when
+ * writing to a terminal. This routine is a limited functionality
+ * substitute that is *much* faster. However, calls to fwrite()
+ * should not be mixed with other stdio calls to the same file
+ * unless fflush() is always called first. Also, this routine
+ * will not detect that a freopen() call has finished with the
+ * original terminal; tty_fclose() should be used to close a file.
+ */
+#ifdef fwrite
+# undef fwrite
+#endif
+/* tty_fwrite() - performance hack for fwrite() to a terminal */
+size_t
+tty_fwrite( const void *buf, size_t size, size_t number, FILE *file )
+{
+ static long evfn = -1;
+ short chan;
+ int file_num, result;
+
+ if (!file || !*file)
+ return 0 * (errno = EBADF); /* kludge alert! */
+ else if (file == prev_file)
+ file_num = prev_file_num;
+ else /* note: VAXCRTL's fileno() is a function, not just a macro */
+ prev_file_num = file_num = fileno(file), prev_file = file;
+
+ chan = file_num < _NFILE ? channel[file_num] : -1;
+ if (chan == 0) { /* if not initialized, need to assign a channel */
+ if (isatty(file_num) > 0) { /* isatty: 1=yes, 0=no, -1=problem */
+ Dsc device;
+ char devnam[255+1];
+ fgetname(file, devnam); /* get 'file's name */
+ device.len = strlen(device.adr = devnam); /* create descriptor */
+ if (vmswork(SYS$ASSIGN(&device, &chan, 0, (Dsc *)0))) {
+ /* get an event flag; use #0 if problem */
+ if (evfn == -1 && vmsfail(LIB$GET_EF(&evfn))) evfn = 0;
+ } else chan = 0; /* $ASSIGN failed */
+ }
+ /* store channel for later use; -1 => don't repeat failed init attempt */
+ channel[file_num] = (chan > 0 ? chan : -1);
+ }
+ if (chan > 0) { /* chan > 0 iff 'file' is a terminal */
+ struct _iosbw { u_short status, count; u_long rt_kludge; } iosb;
+ register u_long sts = 1;
+ register char *pt = (char *)buf;
+ register int offset, pos, count = size * number;
+ u_long cc_fmt, io_func = IO$_WRITEVBLK;
+ int extra = 0;
+ result = 0;
+ if (is_stderr(file_num)) /* if it's SYS$ERROR (stderr)... */
+ io_func |= IO$M_CANCTRLO; /* cancel ^O (resume tty output) */
+ while (count > 0) {
+ /* special handling for line-feeds to make them be 'newlines' */
+ offset = 0;
+ if (*pt == '\n') { /* got at least one leading line-feed */
+ cc_fmt = PREFIX_CR, extra++; /* precede 1st LF with a CR */
+ do offset++;
+ while (offset < count && *(pt + offset) == '\n');
+ } else
+ cc_fmt = 0;
+ /* look for another line-feed; if found, break line there */
+ pos = offset + find_c(pt + offset, count - offset, '\n');
+ if (pos >= BUFSIZ) pos = BUFSIZ - 1; /* throttle quota usage */
+ else if (pos < count) pos++, cc_fmt |= POSTFIX_CR, extra++;
+ /* wait for previous write, if any, to complete */
+ if (pt > (char *)buf) {
+ sts = SYS$SYNCH(evfn, &iosb);
+ if (vmswork(sts)) sts = iosb.status, result += iosb.count;
+ if (vmsfail(sts)) break;
+ }
+ /* queue an asynchronous write */
+ sts = SYS$QIO(evfn, chan, io_func, &iosb, (u_long (*)())0, 0,
+ pt, pos, 0, cc_fmt, 0, 0);
+ if (vmsfail(sts)) break; /*(should never happen)*/
+ pt += pos, count -= pos;
+ }
+ /* wait for last write to complete */
+ if (pt > (char *)buf && vmswork(sts)) {
+ sts = SYS$SYNCH(evfn, &iosb);
+ if (vmswork(sts)) sts = iosb.status, result += iosb.count;
+ }
+ if (vmsfail(sts)) errno = EVMSERR, vaxc$errno = sts;
+ else if (iosb.rt_kludge == 0) result = number + extra;
+ result -= extra; /* subtract the additional carriage-returns */
+ } else { /* use stdio */
+ /* Note: we assume that we're writing text, not binary data.
+ For stream format files, 'size' and 'number' are effectively
+ interchangable, and fwrite works fine. However, for record
+ format files, 'size' governs the maximum record length, so
+ fwrite(string, size(char), strlen(string), file)
+ will produce a sequence of 1-byte records, which is hardly
+ what we want in this (assumed) situation. Line-feeds ('\n')
+ are converted into newlines (ie, record separators) by the
+ run-time library, but strings that don't end with a newline
+ still become separate records. The simplest work around
+ is just to use fputs() instead of fwrite(); unfortunately,
+ we have to provide special treatment for NULs ('\0's).
+ At present, only stdout might be in record format (via
+ >$'filename' redirection on the command line).
+ */
+ if (size > 1) { /* not used by GAWK */
+ result = fwrite((void *)buf, size, number, file);
+ } else if (*((char *)buf + number - 1) == '\n' || !is_stdout(file_num)) {
+ result = fwrite((void *)buf, number, size, file);
+ result = result * number / size; /*(same as 'result = number')*/
+ } else {
+#ifdef NO_ALLOCA
+# define alloca(n) ((n) <= abuf_siz ? abuf : \
+ (abuf_siz > 0 ? (void *)free(abuf) : (void *)0), \
+ (abuf = malloc(abuf_siz = (n)+20)))
+ static void *abuf = 0;
+ static size_t abuf_siz = 0;
+#endif /*NO_ALLOCA*/
+ register char *pt = (char *)buf;
+ register int pos, count = number;
+ if (pt[count] != '\0') { /*(out of bounds, but relatively safe)*/
+ pt = (char *)alloca(count + 1);
+ memcpy(pt, buf, count), pt[count] = '\0';
+ /* if exiting this block undoes the alloca(), we're hosed :-( */
+ }
+ result = 0;
+ while (count > 0) {
+ pos = find_c(pt, count, '\0');
+ if (fputs(pt, file) < 0) break;
+ if (pos < count) {
+ if (fputc('\0', file) < 0) break;
+ pos++; /* 0..n-1 -> 1..n */
+ }
+ result += pos, pt += pos, count -= pos;
+ }
+ }
+ }
+ return result;
+}
+#define fwrite(b,s,n,f) tty_fwrite((b),(s),(n),(f))
+
+#ifdef fclose
+# undef fclose
+#endif
+/* tty_fclose() - keep tty_fwrite() up to date when closing a file */
+int
+tty_fclose( FILE *file )
+{
+ if (file && *file) { /* note: VAXCRTL stdio has extra level of indirection */
+ int file_num = fileno(file);
+ short chan = file_num < _NFILE ? channel[file_num] : -1;
+ if (chan > 0)
+ (void)SYS$DASSGN(chan); /* deassign the channel (ie, close) */
+ if (file_num < _NFILE)
+ channel[file_num] = 0; /* clear stale info */
+ }
+ prev_file = 0; /* force tty_fwrite() to reset */
+ return fclose(file);
+}
+#define fclose(f) tty_fclose(f)
+
+#endif /*!NO_TTY_FWRITE*/
diff --git a/vms/vms_gawk.c b/vms/vms_gawk.c
new file mode 100644
index 00000000..57abff7e
--- /dev/null
+++ b/vms/vms_gawk.c
@@ -0,0 +1,245 @@
+/*
+ * vms_gawk.c -- parse GAWK command line using DCL syntax ]
+ */
+
+/*
+ * Copyright (C) 1991 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 1, or (at your option)
+ * any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GAWK; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * vms_gawk.c - routines to parse the command line as a native DCL command
+ * rather than as a foreign command string.
+ * Pat Rankin, Nov'89
+ * [ revised for 2.12, May'91 ]
+ */
+
+#include "awk.h"
+#include "vms.h"
+#define COMMAND_NAME "GAWK" /* verb name & for 'usage' message(s) */
+#define USAGE_PROG_RQRD 1
+#define USAGE_FILE_RQRD 2
+#define USAGE_BAD_COMBO 3
+#define USAGE_RUN_CMD 4
+#define STS$M_INHIB_MSG 0x10000000
+
+#define Present(arg) vmswork(Cli_Present(arg))
+#define Get_Value(arg,buf,siz) vmswork(Cli_Get_Value(arg,buf,siz))
+
+extern void gawk_cmd(); /* created with $ SET COMMAND/OBJECT */
+static int vms_usage(int);
+
+#define ARG_SIZ 250
+union arg_w_prefix { /* structure used to simplify prepending of "-" */
+ char value[3+ARG_SIZ+1];
+ struct {
+ char prefix[3]; /* for "-? " */
+ char buf[ARG_SIZ];
+ char suffix[1]; /* room for '\0' */
+ } arg;
+};
+
+#define chk_option(qualifier,optname) \
+ if (Present(qualifier)) \
+ strcat(strcat(buf.arg.buf, W_cnt++ ? "," : ""), optname)
+
+
+/* vms_gawk() - parse GAWK command line using DCL and convert it into the */
+/* appropriate "-arg" values for compatability with GNU code */
+int
+vms_gawk()
+{
+ u_long sts;
+ union arg_w_prefix buf;
+ char misc_args[10], *misc_argp;
+ int argc, W_cnt;
+
+ /* check "GAWK_P1"--it's required; its presence will tip us off */
+ sts = Cli_Present("GAWK_P1");
+ if (CondVal(sts) == CondVal(CLI$_SYNTAX)) {
+ /* syntax error indicates that we weren't invoked as a native DCL
+ command, so we'll now attempt to generate a command from the
+ foreign command string and parse that.
+ */
+ sts = Cli_Parse_Command(gawk_cmd, COMMAND_NAME);
+ if (vmswork(sts))
+ sts = Cli_Present("GAWK_P1");
+ }
+ if (vmswork(sts)) /* command parsed successfully */
+ v_add_arg(argc = 0, COMMAND_NAME); /* save "GAWK" as argv[0] */
+ else if (CondVal(sts) == CondVal(CLI$_INSFPRM))
+ return vms_usage(USAGE_FILE_RQRD); /* insufficient parameters */
+ else if (CondVal(sts) == CondVal(CLI$_CONFLICT))
+ return vms_usage(USAGE_BAD_COMBO); /* conflicting qualifiers (/input+/command) */
+ else if (CondVal(sts) == CondVal(CLI$_RUNUSED))
+ return vms_usage(USAGE_RUN_CMD); /* RUN GAWK won't work (no command line) */
+ else
+ return 0; /* forced to rely on original parsing */
+
+ if (Present("USAGE")) /* give usage message and quit */
+ return vms_usage(0);
+ else if (! (Present("PROGRAM") || Present("PROGFILE")) )
+ return vms_usage(USAGE_PROG_RQRD); /* missing required option */
+
+ misc_argp = misc_args;
+ *misc_argp++ = '-'; /* now points at &misc_args[1] */
+ if (Present("REG_EXPR")) {
+ if (Present("REG_EXPR.AWK")) /* /reg_exp=awk -> -a */
+ *misc_argp++ = 'a';
+ else if (Present("REG_EXPR.EGREP") /* /reg_exp=egrep -> -e */
+ || Present("REG_EXPR.POSIX")) /* /reg_exp=posix -> -e */
+ *misc_argp++ = 'e';
+ }
+#if 0 /* gawk 2.11.1 */
+ if (Present("STRICT")) /* /strict -> -c */
+ *misc_argp++ = 'c';
+ if (Present("COPYRIGHT")) /* /copyright -> -C */
+ *misc_argp++ = 'C';
+ if (Present("VERSION")) /* /version -> -V */
+ *misc_argp++ = 'V';
+#else /* gawk 2.12 */
+ W_cnt = 0, buf.arg.buf[0] = '\0';
+ strncpy(buf.arg.prefix, "-W ", 3);
+ chk_option("LINT","lint");
+ chk_option("POSIX","posix");
+ chk_option("STRICT","compat");
+ chk_option("COPYRIGHT","copyright");
+ chk_option("VERSION","version");
+ if (W_cnt > 0) /* got something */
+ v_add_arg(++argc, strdup(buf.value));
+#endif /*0*/
+#ifdef DEBUG
+ if (Present("DEBUG")) {
+#if 0
+ int both = Present("DEBUG.ALL");
+ if (both || Present("DEBUG.EXECUTION"))
+ *misc_argp++ = 'd';
+ if (both || Present("DEBUG.PARSE"))
+#endif
+ *misc_argp++ = 'D';
+ }
+#endif
+ *misc_argp = '\0'; /* terminate misc_args[] */
+ if (misc_argp > &misc_args[1]) /* got something */
+ v_add_arg(++argc, misc_args); /* store it/them */
+
+ if (Present("FIELD_SEP")) { /* field separator */
+ strncpy(buf.arg.prefix, "-F ", 3);
+ if (Get_Value("FIELD_SEP", buf.arg.buf, sizeof buf.arg.buf))
+ v_add_arg(++argc, strdup(buf.value));
+ }
+ if (Present("VARIABLES")) { /* variables to init prior to BEGIN */
+ strncpy(buf.arg.prefix, "-v ", 3);
+ while (Get_Value("VARIABLES", buf.arg.buf, sizeof buf.arg.buf))
+ v_add_arg(++argc, strdup(buf.value));
+ }
+ if (Present("PROGFILE")) { /* program files, /input=file -> -f file */
+ strncpy(buf.arg.prefix, "-f ", 3);
+ while (Get_Value("PROGFILE", buf.arg.buf, sizeof buf.arg.buf))
+ v_add_arg(++argc, strdup(buf.value));
+ v_add_arg(++argc, "--");
+ } else if (Present("PROGRAM")) { /* program text, /program -> 'text' */
+ v_add_arg(++argc, "--");
+ if (Get_Value("PROGRAM", buf.value, sizeof buf.value))
+ v_add_arg(++argc, strdup(buf.value));
+ }
+
+ /* we know that "GAWK_P1" is present [data files and/or 'var=value'] */
+ while (Get_Value("GAWK_P1", buf.value, sizeof buf.value))
+ v_add_arg(++argc, strdup(buf.value));
+
+ if (Present("OUTPUT")) { /* let other parser treat this as 'stdout' */
+ strncpy(buf.arg.prefix, ">$ ", 3);
+ if (Get_Value("OUTPUT", buf.arg.buf, sizeof buf.arg.buf))
+ v_add_arg(++argc, strdup(buf.value));
+ }
+
+ return ++argc; /*(increment to account for arg[0])*/
+}
+
+/* vms_usage() - display one or more messages and then terminate */
+static int /* note: doesn't return anything; allows 'return vms_usage()' */
+vms_usage( int complaint )
+{
+static char
+ *usage_txt = "\n\
+usage: %s /COMMANDS=\"awk program text\" data_file[,data_file,...] \n\
+ or %s /INPUT=awk_file data_file[,\"Var=value\",data_file,...] \n\
+ or %s /INPUT=(awk_file1,awk_file2,...) data_file[,...] \n\
+", *options_txt = "\n\
+options: /FIELD_SEPARATOR=\"FS_value\" \n\
+ - /VARIABLES=(\"Var1=value1\",\"Var2=value2\",...) \n\
+ - /REG_EXPR= AWK or EGREP or POSIX \n\
+ - /LINT /POSIX /[NO]STRICT /VERSION /COPYRIGHT /USAGE \n\
+ - /OUTPUT=out_file \n\
+", *no_prog = "missing required element: /COMMANDS or /INPUT",
+ *no_file = "missing required element: data_file \n\
+ (use \"SYS$INPUT:\" to read data lines from the terminal)",
+ *bad_combo = "invalid combination of qualifiers \n\
+ (/INPUT=awk_file and /COMMANDS=\"awk program\" are mutually exclusive)",
+ *run_used = "\"RUN\" was used; required command components missing";
+int status, argc;
+
+ fflush(stdout);
+ switch (complaint) {
+ case USAGE_PROG_RQRD:
+ fprintf(stderr, "\n%%%s-W-%s, %s \n", COMMAND_NAME, "PROG_RQRD", no_prog);
+ status = CLI$_VALREQ | STS$M_INHIB_MSG;
+ break;
+ case USAGE_FILE_RQRD:
+ if (Present("USAGE")) {
+ status = 1; /* clean exit */
+ } else if (Present("COPYRIGHT") || Present("VERSION")) {
+ v_add_arg(argc=0, COMMAND_NAME); /* save "GAWK" as argv[0] */
+#if 0
+ v_add_arg(++argc, Present("COPYRIGHT") ? "-C" : "-V");
+#else
+ v_add_arg(++argc, "-W");
+ v_add_arg(++argc, Present("COPYRIGHT") ? "copyright" : "version");
+#endif
+ v_add_arg(++argc, "{}"); /* kludge to suppress 'usage' */
+ v_add_arg(++argc, "NL:"); /* dummy input for kludge */
+ return ++argc; /* count argv[0] too */
+ } else {
+ fprintf(stderr, "\n%%%s-W-%s, %s \n", COMMAND_NAME, "FILE_RQRD", no_file);
+ status = CLI$_INSFPRM | STS$M_INHIB_MSG;
+ }
+ break;
+ case USAGE_BAD_COMBO:
+ fprintf(stderr, "\n%%%s-W-%s, %s \n", COMMAND_NAME, "BAD_COMBO", bad_combo);
+ status = CLI$_CONFLICT | STS$M_INHIB_MSG;
+ break;
+ case USAGE_RUN_CMD:
+ fprintf(stderr, "\n%%%s-W-%s, %s \n", COMMAND_NAME, "RUN_CMD", run_used);
+ status = CLI$_NOOPTPRS | STS$M_INHIB_MSG;
+ break;
+ default:
+ status = 1;
+ break;
+ }
+ fprintf(stderr, usage_txt, COMMAND_NAME, COMMAND_NAME, COMMAND_NAME);
+ fprintf(stderr, options_txt);
+ fflush(stderr);
+
+ errno = EVMSERR;
+ vaxc$errno = status;
+ _exit(status);
+ /* NOTREACHED */
+ return 0;
+}
diff --git a/vms/vms_misc.c b/vms/vms_misc.c
new file mode 100644
index 00000000..8c7aee6a
--- /dev/null
+++ b/vms/vms_misc.c
@@ -0,0 +1,159 @@
+/*
+ * vms_misc.c -- sustitute code for missing/different run-time library routines.
+ */
+
+/*
+ * Copyright (C) 1991 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 1, or (at your option)
+ * any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GAWK; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "awk.h" /* really "../awk.h" */
+#include <ssdef.h>
+#include <stsdef.h>
+
+ /*
+ * VMS uses a completely different status scheme (odd => success,
+ * even => failure), so we'll trap calls to exit() and alter the
+ * exit status code. [VAXC can't handle this as a macro.]
+ */
+#ifdef exit
+# undef exit
+#endif
+void
+vms_exit( int errno ) /* note: local override of global 'errno' */
+{
+ exit(errno == 0 ? SS$_NORMAL : (SS$_ABORT | STS$M_INHIB_MSG));
+}
+#define exit(v) vms_exit(v)
+
+ /*
+ * In VMS's VAXCRTL, strerror() takes an optional second argument.
+ * #define strerror(errnum) strerror(errnum,vaxc$errno)
+ * is all that's needed, but VAXC can't handle that (gcc can).
+ * [The 2nd arg is used iff errnum == EVMSERR.]
+ */
+#ifdef strerror
+# undef strerror
+#endif
+/* vms_strerror() -- convert numeric error code into text string */
+char *
+vms_strerror( int errnum )
+{
+ extern char *strerror( /* int, ... */ );
+ return ( errnum != EVMSERR ? strerror(errnum)
+ : strerror(EVMSERR, vaxc$errno) );
+}
+# define strerror(v) vms_strerror(v)
+
+ /*
+ * Miscellaneous utility routine, not part of the run-time library.
+ */
+/* vms_strdup() - allocate some new memory and copy a string into it */
+char *
+vms_strdup( const char *str )
+{
+ char *result;
+ int len = strlen(str);
+
+ emalloc(result, char *, len+1, "strdup");
+ return strcpy(result, str);
+}
+
+ /*
+ * VAXCRTL does not contain unlink(). This replacement has limited
+ * functionality which is sufficient for GAWK's needs. It works as
+ * desired even when we have the file open.
+ */
+/* unlink(file) -- delete a file (ignore soft links) */
+int
+unlink( const char *file_spec ) {
+ char tmp[255+1]; /*(should use alloca(len+2+1)) */
+ extern int delete(const char *);
+
+ strcpy(tmp, file_spec); /* copy file name */
+ if (strchr(tmp, ';') == NULL)
+ strcat(tmp, ";0"); /* append version number */
+ return delete(tmp);
+}
+
+ /*
+ * Check for attempt to (re-)open known file.
+ */
+/* vms_devopen() - check for "SYS$INPUT" or "SYS$OUTPUT" or "SYS$ERROR" */
+int
+vms_devopen( const char *name )
+{
+ FILE *file = NULL;
+
+ if (strncasecmp(name, "SYS$", 4) == 0) {
+ name += 4; /* skip "SYS$" */
+ if (strncasecmp(name, "INPUT", 5) == 0)
+ file = stdin, name += 5;
+ else if (strncasecmp(name, "OUTPUT", 6) == 0)
+ file = stdout, name += 6;
+ else if (strncasecmp(name, "ERROR", 5) == 0)
+ file = stderr, name += 5;
+ if (*name == ':') name++; /* treat trailing colon as optional */
+ }
+ /* note: VAXCRTL stdio has extra level of indirection (*file) */
+ return (file && *file && *name == '\0') ? fileno(file) : -1;
+}
+
+ /*
+ * VMS has no timezone support.
+ */
+/* these are global for use by missing/strftime.c */
+char *tzname[2] = { "local", "" };
+int daylight = 0;
+
+/* dummy to satisfy linker */
+void tzset()
+{
+ return;
+}
+
+#ifndef __GNUC__
+# ifdef bcopy
+# undef bcopy
+# endif
+void bcopy( char *src, char *dst, int len )
+{
+ (void) OTS$MOVE3(len, src, dst);
+}
+#endif /*!__GNUC__*/
+
+/*----------------------------------------------------------------------*/
+#ifdef NO_VMS_ARGS /* real code is in "vms/vms_args.c" */
+void vms_arg_fixup( int *argc, char ***argv ) { return; } /* dummy */
+#endif
+
+#ifdef NO_VMS_PIPES /* real code is in "vms/vms_popen.c" */
+FILE *popen( const char *command, const char *mode ) {
+ fatal(" Cannot open pipe `%s' (not implemented)", command);
+ return NULL;
+}
+int pclose( FILE *current ) {
+ fatal(" Cannot close pipe #%d (not implemented)", fileno(current));
+ return -1;
+}
+int fork( void ) {
+ fatal(" Cannot fork process (not implemented)");
+ return -1;
+}
+#endif /*NO_VMS_PIPES*/
diff --git a/vms/vms_popen.c b/vms/vms_popen.c
new file mode 100644
index 00000000..f0eaa037
--- /dev/null
+++ b/vms/vms_popen.c
@@ -0,0 +1,168 @@
+/*
+ * [.vms]vms_popen.c -- substitute routines for missing pipe calls.
+ */
+
+/*
+ * Copyright (C) 1991 the Free Software Foundation, Inc.
+ *
+ * This file is part of GAWK, the GNU implementation of the
+ * AWK Progamming Language.
+ *
+ * GAWK is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 1, or (at your option)
+ * any later version.
+ *
+ * GAWK is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GAWK; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef NO_VMS_PIPES
+
+#include "awk.h" /* really "../awk.h" */
+#include <stdio.h>
+
+#ifndef PIPES_SIMULATED
+
+FILE *
+popen( const char *command, const char *mode )
+{
+ fatal(" Cannot open pipe `%s' (not implemented)", command);
+ /* NOT REACHED */
+ return 0;
+}
+
+int
+pclose( FILE *current )
+{
+ fatal(" Internal error ('pclose' not implemented)");
+ /* NOT REACHED */
+ return -1;
+}
+
+int
+fork()
+{
+ fatal(" Internal error ('fork' not implemented)");
+ /* NOT REACHED */
+ return -1;
+}
+
+#else PIPES_SIMULATED
+ /*
+ * Simulate pipes using temporary files; hope that the user
+ * doesn't expect pipe i/o to be interleaved with other i/o ;-}.
+ *
+ * This is essentially the same as the MSDOS version. The
+ * difference is that redirection is handled using LIB$SPAWN
+ * rather than constructing a command for system() which uses
+ * '<' or '>'.
+ */
+#include "vms.h"
+#include <errno.h>
+
+typedef enum { unopened = 0, reading, writing } pipemode;
+static
+struct {
+ char *command;
+ char *name;
+ pipemode pmode;
+} pipes[_NFILE];
+
+FILE *
+popen( const char *command, const char *mode )
+{
+ FILE *current;
+ char *name, *mktemp();
+ int cur, strcmp();
+ pipemode curmode;
+
+ if (strcmp(mode, "r") == 0)
+ curmode = reading;
+ else if (strcmp(mode, "w") == 0)
+ curmode = writing;
+ else
+ return NULL;
+
+ /* make a name for the temporary file */
+ if ((name = mktemp(strdup("sys$scratch:pipe_XXXX.tmp"))) == 0)
+ return NULL;
+
+ if (curmode == reading) {
+ /* an input pipe reads a temporary file created by the command */
+ vms_execute(command, (char *)0, name); /* 'command >tempfile' */
+ }
+ if ((current = fopen(name, mode)) == NULL) {
+ free(name);
+ return NULL;
+ }
+ cur = fileno(current);
+ pipes[cur].name = name;
+ pipes[cur].pmode = curmode;
+ pipes[cur].command = strdup(command);
+ return current;
+}
+
+int
+pclose( FILE *current )
+{
+ int rval, cur = fileno(current);
+
+ if (pipes[cur].pmode == unopened)
+ return -1; /* should never happen */
+
+ rval = fclose(current); /* close temp file; if reading, we're done */
+ if (pipes[cur].pmode == writing) {
+ /* an output pipe feeds the temporary file to the other program */
+ rval = vms_execute(pipes[cur].command, pipes[cur].name, (char *)0);
+ }
+ /* clean up */
+ unlink(pipes[cur].name); /* get rid of the temporary file */
+ pipes[cur].pmode = unopened;
+ free(pipes[cur].name), pipes[cur].name = 0;
+ free(pipes[cur].command), pipes[cur].command = 0;
+ return rval;
+}
+
+ /*
+ * Create a process and execute a command in it. This is essentially
+ * the same as system() but allows us to specify SYS$INPUT (stdin)
+ * and/or SYS$OUTPUT (stdout) for the process.
+ * [With more work it could truly simulate a pipe using mailboxes.]
+ */
+int
+vms_execute( const char *command, const char *input, const char *output )
+{
+ Dsc cmd, in, out, *in_p, *out_p;
+ u_long sts, cmpltn_sts, LIB$SPAWN();
+
+ cmd.len = strlen(cmd.adr = (char *)command);
+ if (input)
+ in.len = strlen(in.adr = (char *)input), in_p = &in;
+ else
+ in_p = 0;
+ if (output)
+ out.len = strlen(out.adr = (char *)output), out_p = &out;
+ else
+ out_p = 0;
+
+ sts = LIB$SPAWN(&cmd, in_p, out_p, (long *)0,
+ (Dsc *)0, (u_long *)0, &cmpltn_sts);
+
+ if (vmswork(sts) && vmsfail(cmpltn_sts)) sts = cmpltn_sts;
+ if (vmsfail(sts)) {
+ errno = EVMSERR, vaxc$errno = sts;
+ return -1;
+ } else
+ return 0;
+}
+
+#endif /* PIPES_SIMULATED */
+
+#endif /*!NO_VMS_PIPES*/
diff --git a/vms/vmsbuild.com b/vms/vmsbuild.com
new file mode 100644
index 00000000..e823eb1b
--- /dev/null
+++ b/vms/vmsbuild.com
@@ -0,0 +1,70 @@
+$! vmsbuild.com -- Commands to build GAWK Pat Rankin, Dec'89
+$! revised, Mar'90
+$! gawk 2.13 revised, Jun'91
+$!
+$ REL = "2.13" !release version number
+$ PATCHLVL = "2"
+$!
+$! [ remove "/optimize=noinline" for VAX C V2.x ]
+$ if f$type(cc) .nes."STRING" then cc := cc/nolist/optimize=noinline
+$ if f$type(link).nes."STRING" then link := link/nomap
+$!
+$ cc := 'cc'/include=[]
+$ libs = "sys$share:vaxcrtl.exe/Shareable"
+$
+$! uncomment the next two lines for VAX C V2.x
+$ ! define vaxc$library sys$library:,sys$disk:[.vms]
+$ ! define c$library [],[.vms]
+$!
+$! uncomment next two lines for GNU C
+$ ! cc := gcc/include=([],[.vms]) !use GNU C rather than VAX C
+$ ! libs = "gnu_cc:[000000]gcclib.olb/Library,sys$library:vaxcrtl.olb/Library"
+$!
+$ if f$search("config.h") .eqs."" then copy [.config]vms-conf.h []config.h
+$ if f$search("awk_tab.c").nes."" then goto awk_tab_ok
+$ write sys$output " You must process `awk.y' with ""yacc"" or ""bison"""
+$ if f$search("awk.tab_c").nes."" then - !unpacked with poor 'tar' reader
+ write sys$output " or else rename `awk.tab_c' to `awk_tab.c'."
+$ if f$search("y_tab.c").nes."" then - !yacc was run manually
+ write sys$output " or else rename `y_tab.c' to `awk_tab.c'."
+$ exit
+$awk_tab_ok:
+$ cc main.c
+$ cc eval.c
+$ cc builtin.c
+$ cc msg.c
+$ cc iop.c
+$ cc io.c
+$ cc field.c
+$ cc array.c
+$ cc node.c
+$ cc version.c
+$ cc missing.c
+$ cc awk_tab.c
+$ cc regex.c
+$ cc re.c
+$ cc dfa.c
+$ cc/define=("STACK_DIRECTION=(-1)","exit=vms_exit") alloca
+$ cc [.vms]vms_misc.c
+$ cc [.vms]vms_popen.c
+$ cc [.vms]vms_fwrite.c
+$ cc [.vms]vms_args.c
+$ cc [.vms]vms_gawk.c
+$ cc [.vms]vms_cli.c
+$ set command/object=[]gawk_cmd.obj [.vms]gawk.cld
+$!
+$ create gawk.opt
+! GAWK -- Gnu AWK
+main.obj,eval.obj,builtin.obj,msg.obj,iop.obj,io.obj
+field.obj,array.obj,node.obj,version.obj,missing.obj,awk_tab.obj
+regex.obj,re.obj,dfa.obj,[]alloca.obj
+[]vms_misc.obj,vms_popen.obj,vms_fwrite.obj
+[]vms_args.obj,vms_gawk.obj,vms_cli.obj,gawk_cmd.obj
+psect_attr=environ,noshr !extern [noshare] char **
+stack=50 !preallocate more pages (default is 20)
+$ open/append Fopt gawk.opt
+$ write Fopt libs
+$ write Fopt "identification=""V''REL'.''PATCHLVL'"""
+$ close Fopt
+$!
+$ link/exe=gawk.exe gawk.opt/options