diff options
-rw-r--r-- | Indent.opts | 1 | ||||
-rw-r--r-- | Makefile | 75 | ||||
-rw-r--r-- | PROBLEMS | 26 | ||||
-rw-r--r-- | README | 42 | ||||
-rw-r--r-- | README.1.01 | 25 | ||||
-rw-r--r-- | alloca.c | 191 | ||||
-rw-r--r-- | alloca.s | 15 | ||||
-rw-r--r-- | awk.h | 377 | ||||
-rw-r--r-- | awk.tab.c | 1696 | ||||
-rw-r--r-- | awk.y | 1053 | ||||
-rw-r--r-- | awk1.c | 555 | ||||
-rw-r--r-- | awk2.c | 235 | ||||
-rw-r--r-- | awk3.c | 195 | ||||
-rw-r--r-- | awk4.c | 103 | ||||
-rw-r--r-- | awk5.c | 60 | ||||
-rw-r--r-- | awk6.c | 29 | ||||
-rw-r--r-- | awk7.c | 165 | ||||
-rw-r--r-- | awk8.c | 65 | ||||
-rw-r--r-- | awk9.c | 49 | ||||
-rw-r--r-- | awka.c | 44 | ||||
-rw-r--r-- | copyleft.c | 10 | ||||
-rw-r--r-- | do_free.c | 5 | ||||
-rw-r--r-- | gawk.1 | 280 | ||||
-rw-r--r-- | getopt.c | 90 | ||||
-rw-r--r-- | obstack.h | 0 | ||||
-rw-r--r-- | regex.c | 10 | ||||
-rw-r--r-- | regex.h | 8 | ||||
-rw-r--r-- | version.c | 13 |
28 files changed, 2350 insertions, 3067 deletions
diff --git a/Indent.opts b/Indent.opts new file mode 100644 index 00000000..4d87ecee --- /dev/null +++ b/Indent.opts @@ -0,0 +1 @@ +-bap -bad -bbb -nip -di1 @@ -22,39 +22,36 @@ # CFLAGS: options to the C compiler # -# -I. so includes of <obstack.h> work. mandatory. (fix?) # -O optimize # -g include dbx/sdb info # -gg include gdb debugging info; only for GCC # -pg include new (gmon) profiling info # -p include old style profiling info (System V) # -# -Bstatic - For SunOS 4.0, don't use dynamic linking -# -DUSG - for System V boxen. -# -DSTRICT - remove anything not in Unix awk. Off by default. -# -DDEBUG - include debugging code and options -# -DVPRINTF - system has vprintf and associated routines -# -DBSD - system needs version of vprintf et al. defined in awk5.c -# (this is the only use at present, so don't define it if you -# *have* vprintf et al. in your library) +# -DNOVPRINTF - system has no vprintf and associated routines +# -DHASDOPRNT - system needs version of vprintf et al. defined in awk5.c +# and has a BSD compatable doprnt() +# -DNOMEMCMP - system lacks memcmp() +# -DUSG - system is generic-ish System V. # -INCLUDE= #-I. -OPTIMIZE= -O -DEBUG= #-DDEBUG -DEBUGGER= -g +OPTIMIZE=-O +DEBUG=#-DDEBUG #-DFUNC_TRACE -DMEMDEBUG +DEBUGGER=#-g PROFILE=#-pg -SUNOS=# -Bstatic -SYSV=# -DVPRINTF -BSD=-DBSD - -FLAGS= $(INCLUDE) $(OPTIMIZE) $(SYSV) $(DEBUG) $(BSD) -CFLAGS = $(FLAGS) $(DEBUGGER) $(SUNOS) $(PROFILE) +SYSV= +BSD=#-DHASDOPRNT +MEMCMP=#-DNOMEMCMP +VPRINTF=#-DNOVPRINTF + +FLAGS= $(OPTIMIZE) $(SYSV) $(DEBUG) $(BSD) $(MEMCMP) $(VPRINTF) +CFLAGS= $(FLAGS) $(DEBUGGER) $(PROFILE) +LDFLAGS= #-Bstatic SRC = awk1.c awk2.c awk3.c awk4.c awk5.c \ - awk6.c awk7.c awk8.c awk9.c regex.c version.c #obstack.c + awk6.c awk7.c awk8.c awk9.c regex.c version.c do_free.c awka.c AWKOBJS = awk1.o awk2.o awk3.o awk4.o awk5.o awk6.o awk7.o awk8.o awk9.o \ - version.o + version.o do_free.o awka.o ALLOBJS = $(AWKOBJS) awk.tab.o # Parser to use on grammar -- if you don't have bison use the first one @@ -66,24 +63,32 @@ PARSER = bison # alloca. Uncomment the rule below to actually make alloca.o. S5OBJS= +# GETOPT +# Set equal to getopt.o if you have a generic BSD system. The +# generic BSD getopt is reported to not work with gawk. The +# gnu getopt is supplied in getopt.c +GETOPT= + # LIBOBJS # Stuff that awk uses as library routines, but not in /lib/libc.a. -LIBOBJS= regex.o $(S5OBJS) #obstack.o +LIBOBJS= regex.o $(S5OBJS) $(GETOPT) # DOCS # Documentation for users +# +# Someday: +#DOCS=gawk.1 gawk.texinfo DOCS= gawk.1 # We don't distribute shar files, but they're useful for mailing. UPDATES = Makefile awk.h awk.y \ - $(SRC) regex.h #obstack.h + $(SRC) regex.h -SHARS = $(DOCS) COPYING README.1.01 README PROBLEMS \ - $(UPDATES) awk.tab.c\ - alloca.s +SHARS = $(DOCS) COPYING README PROBLEMS $(UPDATES) awk.tab.c \ + alloca.s alloca.c getopt.c gawk: $(ALLOBJS) $(LIBOBJS) - $(CC) -o gawk $(CFLAGS) $(ALLOBJS) $(LIBOBJS) -lm + $(CC) -o gawk $(CFLAGS) $(ALLOBJS) $(LIBOBJS) -lm $(LDFLAGS) $(AWKOBJS): awk.h @@ -92,17 +97,6 @@ awk.tab.o: awk.h awk.tab.c awk.tab.c: awk.y $(PARSER) -v awk.y -mv -f y.tab.c awk.tab.c -# -if [ $(PARSER) = "yacc" ] ; \ -# then \ -# if cmp -s y.tab.h awk.tab.h ; \ -# then : ; \ -# else \ -# cp y.tab.h awk.tab.h ; \ -# grep '^#.*define' awk.tab.h | \ -# sed 's/^# define \([^ ]*\) [^ ]*$$/ "\1",/' >y.tok.h ; \ -# mv y.tab.c awk.tab.c; \ -# fi; \ -# fi # Alloca: uncomment this if your system (notably System V boxen) # does not have alloca in /lib/libc.a @@ -112,6 +106,11 @@ awk.tab.c: awk.y # as t.s -o alloca.o # rm t.s +# If your machine is not supported by the assembly version of alloca.s, +# use the C version instead. This uses the default rules to make alloca.o. +# +#alloca.o: alloca.c + lint: $(SRC) lint -h $(FLAGS) $(SRC) awk.tab.c @@ -1,26 +1,22 @@ -This is a list of known problems in the current version of gawk. +This is a list of known problems in gawk 2.03. Hopefully they will all be fixed in the next major release of gawk. Please keep in mind that this is still beta software and the code is still undergoing significant evolution. -1. Memory Management. Gawk has memory leaks. This version (12/23/88) is - better than earlier versions, but still not wonderful. +1. %g does not truncate non-significant zeros. + gawk 'BEGIN { printf "%g\n", 1 }' should print "1", not "1.000000". -2. Gawk reportedly does not work well with the BSD getopt. The getopt from - gnu grep is reported to work fine. +2. %d doesn't quite work either: + echo 1 | gawk '{ printf "%0.2d\n", $1 }' should print "01", not "1". -3. The % operator truncates to integer. This will be fixed. +3. The debugging code does not print redirection info. -4. \ inside [] in regexps doesn't work like the book says they should. - This will also be fixed. +4. The scanner needs an overhaul. -5. %g does not seem to truncate non-significant zeros. - -6. No gawk.texinfo; this is being worked on. - -7. MS-DOS support. This version does not have it, although support for - MSC 5.1 was recently contributed and will be included in the next - major release. +5. MS-DOS support. Since 2.03 is a major change over 2.02, we are chucking + the ms-dos diffs for 2.02. However, the people who contributed it are + working on a pre-release of 2.03, so this entry may go away for the + release. Arnold Robbins @@ -1,22 +1,32 @@ README: -This is GNU Awk 2.00 Beta. It should be functionally equivalent to the +This is GNU Awk 2.03 Beta. It should be upwardly compatible with the System V Release 4 awk. (Yes, you read that right.) -**** N O T I C E ****: Although the functionality is reasonably stable -and we think it relatively bug-free, the code itself is not stable and -the next release may look quite different. In particular, the memory -management is in a state of flux and will be greatly changed (and -improved) in the next release. - -Some additional features are under design/discussion with Randall -Howard at MKS and Brian Kernighan at Bell Labs. Although they are -documented, they are subject to future change. +This represents a major improvement over the previous release. In +particular, most known bugs have been fixed, and the memory leaks have +been plugged. However, the code continues to undergo steady evolution. +Known problems are given in the PROBLEMS file. The gawk.1 man page is concise but (to the best of our knowledge) complete. -A gawk.texinfo is in the works. A preliminary draft exists, but has numerous -technical errors which are being fixed. It also will have to be reorganized. -Don't look for it for a while. However, the AWK book will do quite well. +A gawk.texinfo is real close to ready. It may even be included in this +distribution. If so, it likely does not document all the new features +in 2.03 not in 2.02; that will be rectified. For the moment, the man +page should be considered authoritative when it conflicts with the +gawk.texinfo file. + +INSTALLATION: + +The Makefile will need some tailoring. Currently it is set up for +a Sun running SunOS 4.x. The changes to make in the Makefile are +commented and should be obvious. + +If you have neither bison nor yacc, use the awk.tab.c file here. It was +generated with bison, and should have no AT+T code in it. (Note that +modifying awk.y without bison or yacc will be difficult, at best. You might +want to get a copy of bison from the FSF too.) + +BUG REPORTS AND FIXES: Please coordinate changes through David Trueman and/or Arnold Robbins. @@ -35,3 +45,9 @@ Emory University, Atlanta, GA, 30322, USA DOMAIN: arnold@emoryu1.cc.emory.edu UUCP: { gatech, mtxinu }!emoryu1!arnold BITNET: arnold@emoryu1 + +If you can't contact either of us, try Jay Fenlason, hack@prep.ai.mit.edu +AKA mit-eddie!prep!hack. During odd hours he can sometimes be reached at +(617) 253-8975, which is an MIT phone in the middle of the corridor, so don't +be suprised if someone wierd answers, or if the person on the other end has +never heard of him. (Direct them to the microvax about 10 feet to their left.) diff --git a/README.1.01 b/README.1.01 deleted file mode 100644 index e61441bb..00000000 --- a/README.1.01 +++ /dev/null @@ -1,25 +0,0 @@ -This is the Beta-test distribution of gawk. (Probably around version 1.01 -or so.) - -Please send all -bug-reports, comments, cries for help, etc, to hack@prep.ai.mit.edu -AKA mit-eddie!prep!hack During odd hours I can sometimes be reached at -(617) 253-8975, which is an MIT phone in the middle of the corridor, so don't -be suprised if someone wierd answers, or if the person on the other end has -never heard of me. (Direct them to the microvax about 10feet to their left.) - -Gawk requires some berkeleyisms, like alloca(), bcopy(), index(), etc. I -believe we have a portable version of alloca() (part of GNUemacs), and -probably the other stuff as well. Send me mail if you need anything. - -For real speed, you should change the Makefile to compile -O -DFAST and -disable the debugger. (-DFAST replaces some function calls with macros, and -disables a lot of debugging stuff.) - -If you don't have bison, modify the makefile to call yacc instead -(The proper commands should be already in the makefile; just un-comment them.) -If you have neither bison nor yacc, use the awk.tab.c file here. It was -generated with bison, and should have no AT+T code in it. (Note that -modifying awk.y without bison or yacc will be difficult, at best. You might -want to get a copy of bison from us too.) - diff --git a/alloca.c b/alloca.c new file mode 100644 index 00000000..cfe98f92 --- /dev/null +++ b/alloca.c @@ -0,0 +1,191 @@ +/* + alloca -- (mostly) portable public-domain implementation -- D A Gwyn + + last edit: 86/05/30 rms + include config.h, since on VMS it renames some symbols. + Use xmalloc instead of malloc. + + This implementation of the PWB library alloca() function, + which is used to allocate space off the run-time stack so + that it is automatically reclaimed upon procedure exit, + was inspired by discussions with J. Q. Johnson of Cornell. + + It should work under any C implementation that uses an + actual procedure stack (as opposed to a linked list of + frames). There are some preprocessor constants that can + be defined when compiling for your specific system, for + improved efficiency; however, the defaults should be okay. + + The general concept of this implementation is to keep + track of all alloca()-allocated blocks, and reclaim any + that are found to be deeper in the stack than the current + invocation. This heuristic does not reclaim storage as + soon as it becomes invalid, but it will do so eventually. + + As a special case, alloca(0) reclaims storage without + allocating any. It is a good idea to use alloca(0) in + your main control loop, etc. to force garbage collection. +*/ +#ifndef lint +static char SCCSid[] = "@(#)alloca.c 1.1"; /* for the "what" utility */ +#endif + +#ifdef emacs +#include "config.h" +#ifdef static +/* actually, only want this if static is defined as "" + -- this is for usg, in which emacs must undefine static + in order to make unexec workable + */ +#ifndef STACK_DIRECTION +you +lose +-- must know STACK_DIRECTION at compile-time +#endif /* STACK_DIRECTION undefined */ +#endif static +#endif emacs + +#ifdef X3J11 +typedef void *pointer; /* generic pointer type */ +#else +typedef char *pointer; /* generic pointer type */ +#endif + +#define NULL 0 /* null pointer constant */ + +extern void free(); +extern pointer xmalloc(); + +/* + Define STACK_DIRECTION if you know the direction of stack + growth for your system; otherwise it will be automatically + deduced at run-time. + + STACK_DIRECTION > 0 => grows toward higher addresses + STACK_DIRECTION < 0 => grows toward lower addresses + STACK_DIRECTION = 0 => direction of growth unknown +*/ + +#ifndef STACK_DIRECTION +#define STACK_DIRECTION 0 /* direction unknown */ +#endif + +#if STACK_DIRECTION != 0 + +#define STACK_DIR STACK_DIRECTION /* known at compile-time */ + +#else /* STACK_DIRECTION == 0; need run-time code */ + +static int stack_dir; /* 1 or -1 once known */ +#define STACK_DIR stack_dir + +static void +find_stack_direction (/* void */) +{ + static char *addr = NULL; /* address of first + `dummy', once known */ + auto char dummy; /* to get stack address */ + + if (addr == NULL) + { /* initial entry */ + addr = &dummy; + + find_stack_direction (); /* recurse once */ + } + else /* second entry */ + if (&dummy > addr) + stack_dir = 1; /* stack grew upward */ + else + stack_dir = -1; /* stack grew downward */ +} + +#endif /* STACK_DIRECTION == 0 */ + +/* + An "alloca header" is used to: + (a) chain together all alloca()ed blocks; + (b) keep track of stack depth. + + It is very important that sizeof(header) agree with malloc() + alignment chunk size. The following default should work okay. +*/ + +#ifndef ALIGN_SIZE +#define ALIGN_SIZE sizeof(double) +#endif + +typedef union hdr +{ + char align[ALIGN_SIZE]; /* to force sizeof(header) */ + struct + { + union hdr *next; /* for chaining headers */ + char *deep; /* for stack depth measure */ + } h; +} header; + +/* + alloca( size ) returns a pointer to at least `size' bytes of + storage which will be automatically reclaimed upon exit from + the procedure that called alloca(). Originally, this space + was supposed to be taken from the current stack frame of the + caller, but that method cannot be made to work for some + implementations of C, for example under Gould's UTX/32. +*/ + +static header *last_alloca_header = NULL; /* -> last alloca header */ + +pointer +alloca (size) /* returns pointer to storage */ + unsigned size; /* # bytes to allocate */ +{ + auto char probe; /* probes stack depth: */ + register char *depth = &probe; + +#if STACK_DIRECTION == 0 + if (STACK_DIR == 0) /* unknown growth direction */ + find_stack_direction (); +#endif + + /* Reclaim garbage, defined as all alloca()ed storage that + was allocated from deeper in the stack than currently. */ + + { + register header *hp; /* traverses linked list */ + + for (hp = last_alloca_header; hp != NULL;) + if (STACK_DIR > 0 && hp->h.deep > depth + || STACK_DIR < 0 && hp->h.deep < depth) + { + register header *np = hp->h.next; + + free ((pointer) hp); /* collect garbage */ + + hp = np; /* -> next header */ + } + else + break; /* rest are not deeper */ + + last_alloca_header = hp; /* -> last valid storage */ + } + + if (size == 0) + return NULL; /* no allocation required */ + + /* Allocate combined header + user data storage. */ + + { + register pointer new = xmalloc (sizeof (header) + size); + /* address of header */ + + ((header *)new)->h.next = last_alloca_header; + ((header *)new)->h.deep = depth; + + last_alloca_header = (header *)new; + + /* User storage begins just after header. */ + + return (pointer)((char *)new + sizeof(header)); + } +} + @@ -26,29 +26,28 @@ and this notice must be preserved on all copies. */ on all 68000 systems. */ /* #include "config.h" */ -#define m68k #ifndef HAVE_ALLOCA /* define this to use system's alloca */ #ifndef hp9000s300 -#ifndef m68k +#ifndef mc68k #ifndef m68000 #ifndef WICAT #ifndef ns16000 #ifndef sequent -#ifndef pyramid +#ifndef pyr #ifndef ATT3B5 #ifndef XENIX you lose!! #endif /* XENIX */ #endif /* ATT3B5 */ -#endif /* pyramid */ +#endif /* pyr */ #endif /* sequent */ #endif /* ns16000 */ #endif /* WICAT */ #endif /* m68000 */ -#endif /* m68k */ +#endif /* mc68k */ #endif /* hp9000s300 */ @@ -102,7 +101,7 @@ copy_regs_loop: /* save caller's saved registers */ jmp (%a0) # rts #endif /* new hp assembler */ #else -#ifdef m68k /* SGS assembler totally different */ +#ifdef mc68k /* SGS assembler totally different */ file "alloca.s" global alloca alloca: @@ -120,7 +119,7 @@ alloca: set R%1,3+S%1 # add to size for rounding set P%1,-132 # probe this far below current top of stack -#else /* not m68k */ +#else /* not mc68k */ #ifdef m68000 @@ -182,7 +181,7 @@ alloca: #endif /* not WICAT */ #endif /* m68000 */ -#endif /* not m68k */ +#endif /* not mc68k */ #endif /* not hp9000s300 */ #ifdef ns16000 @@ -5,6 +5,27 @@ * 1986 * * $Log: awk.h,v $ + * Revision 1.35 89/03/24 15:56:35 david + * merge HASHNODE and AHASH into NODE + * + * Revision 1.34 89/03/22 21:01:54 david + * support for new newnode(); delete obsolete member in struct search + * + * Revision 1.33 89/03/21 19:25:06 david + * bring some prototypes up to date + * + * Revision 1.32 89/03/21 11:10:44 david + * major cleanup + * rearrange NODE structure for space efficiency + * add MEMDEBUG stuff for finding memory leaks + * add STREQN define + * + * Revision 1.31 89/03/15 21:53:55 david + * changes from Arnold: case-insensitive matching, BELL, delete obstack, cleanup + * + * Revision 1.30 89/03/15 21:28:32 david + * add free_result to free return from tree_eval + * * Revision 1.29 88/12/15 12:52:10 david * casetable made static elsewhere * @@ -132,8 +153,7 @@ * from sharing it farther. Help stamp out software hoarding! */ -#define AWKNUM double - +/* ------------------------------ Includes ------------------------------ */ #include <stdio.h> #include <ctype.h> #include <setjmp.h> @@ -143,19 +163,48 @@ #include <errno.h> #include "regex.h" -#define is_identchar(c) (isalnum(c) || (c) == '_') -#ifdef notdef -#define free do_free /* for debugging */ -#define malloc do_malloc /* for debugging */ +/* ------------------- System Functions, Variables, etc ------------------- */ +/* nasty nasty SunOS-ism */ +#ifdef sparc +#include <alloca.h> +#else +extern char *alloca(); +#endif +#ifdef USG +extern int sprintf(); +#define index strchr +#define rindex strrchr +#define bcmp memcmp +#define bcopy(s,d,l) memcpy((d),(s),(l)) +#define bzero(p,l) memset((p),0,(l)) +/* nasty nasty berkelixm */ +#define _setjmp setjmp +#define _longjmp longjmp +#else /* not USG */ +extern char *sprintf(); +#endif +/* + * if you don't have vprintf, but you are BSD, the version defined in + * awk5.c should do the trick. Otherwise, use this and cross your fingers. + */ +#if !defined(VPRINTF) && !defined(BSD) +#define vfprintf(fp,fmt,arg) _doprnt((fmt), (arg), (fp)) #endif -#include "obstack.h" -#define obstack_chunk_alloc safe_malloc -#define obstack_chunk_free free -char *malloc(), *realloc(); -char *safe_malloc(); -void free(); +extern char *malloc(), *realloc(); +extern void free(); + +extern char *strcpy(), *strcat(); +extern char *index(); + +extern double atof(); + +extern int errno; +extern char *sys_errlist[]; + +/* ------------------ Constants, Structures, Typedefs ------------------ */ +#define AWKNUM double typedef enum { /* illegal entry == 0 */ @@ -167,8 +216,7 @@ typedef enum { Node_mod, /* 3 */ Node_plus, /* 4 */ Node_minus, /* 5 */ - Node_cond_pair, /* 6: conditional pair (see Node_line_range) - * jfw */ + Node_cond_pair, /* 6: conditional pair (see Node_line_range) */ Node_subscript, /* 7 */ Node_concat, /* 8 */ @@ -263,7 +311,6 @@ typedef enum { * pattern: conditional ',' conditional ; lnode of Node_line_range * is the two conditionals (Node_cond_pair), other word (rnode place) * is a flag indicating whether or not this range has been entered. - * (jfw@eddie.mit.edu) */ Node_line_range, /* 64 */ @@ -293,42 +340,59 @@ typedef enum { * status - to replace Node_string, Node_num, * Node_temp_str and Node_str_num */ - Node_case_match, /* 79 case independant regexp match */ - Node_case_nomatch, /* 80 case independant regexp no match */ + Node_hashnode, + Node_ahash, } NODETYPE; typedef struct exp_node { - NODETYPE type; union { struct { union { struct exp_node *lptr; char *param_name; + char *retext; + struct exp_node *nextnode; } l; union { struct exp_node *rptr; struct exp_node *(*pptr) (); struct re_pattern_buffer *preg; struct for_loop_header *hd; - struct ahash **av; - int r_ent; /* range entered (jfw) */ + struct exp_node **av; + int r_ent; /* range entered */ } r; - int number; char *name; + short number; + unsigned char recase; } nodep; struct { - struct exp_node **ap; - int as; - } ar; - struct { - char *sp; AWKNUM fltnum; /* this is here for optimal packing of * the structure on many machines */ + char *sp; short slen; unsigned char sref; } val; + struct { + struct exp_node *next; + char *name; + int length; + struct exp_node *value; + } hash; +#define hnext sub.hash.next +#define hname sub.hash.name +#define hlength sub.hash.length +#define hvalue sub.hash.value + struct { + struct exp_node *next; + struct exp_node *name; + struct exp_node *value; + } ahash; +#define ahnext sub.ahash.next +#define ahname sub.ahash.name +#define ahvalue sub.ahash.value } sub; + NODETYPE type; unsigned char flags; # define MEM 0x7 # define MALLOC 1 /* can be free'd */ @@ -340,6 +404,7 @@ typedef struct exp_node { } NODE; #define lnode sub.nodep.l.lptr +#define nextp sub.nodep.l.nextnode #define rnode sub.nodep.r.rptr #define varname sub.nodep.name #define source_file sub.nodep.name @@ -352,13 +417,12 @@ typedef struct exp_node { #define reexp lnode #define rereg sub.nodep.r.preg +#define re_case sub.nodep.recase +#define re_text sub.nodep.l.retext #define forsub lnode #define forloop rnode->sub.nodep.r.hd -#define array sub.ar.ap -#define arrsiz sub.ar.as - #define stptr sub.val.sp #define stlen sub.val.slen #define stref sub.val.sref @@ -372,149 +436,94 @@ typedef struct exp_node { #define condpair lnode #define triggered sub.nodep.r.r_ent -NODE *newnode(), *dupnode(); -NODE *node(), *snode(), *make_number(), *make_string(), *make_name(); -NODE *make_param(); -NODE *mkrangenode(); /* to remove the temptation to use - * sub.nodep.r.rptr as a boolean flag, or to - * call node() with a 0 and hope that it will - * store correctly as an int. (jfw) */ -NODE *tmp_string(), *tmp_number(); -NODE *variable(), *append_right(); +#define HASHSIZE 101 -NODE *r_tree_eval(); -NODE **get_lhs(); +typedef struct for_loop_header { + NODE *init; + NODE *cond; + NODE *incr; +} FOR_LOOP_HEADER; -struct re_pattern_buffer *make_regexp(); +/* for "for(iggy in foo) {" */ +struct search { + int numleft; + NODE **arr_ptr; + NODE *bucket; + NODE *retval; +}; -extern NODE **stack_ptr; -extern NODE *Nnull_string; +/* longjmp return codes, must be nonzero */ +/* Continue means either for loop/while continue, or next input record */ +#define TAG_CONTINUE 1 +/* Break means either for/while break, or stop reading input */ +#define TAG_BREAK 2 +/* Return means return from a function call; leave value in ret_node */ +#define TAG_RETURN 3 + +#define HUGE 0x7fffffff + +/* -------------------------- External variables -------------------------- */ +/* gawk builtin variables */ extern NODE *FS_node, *NF_node, *RS_node, *NR_node; extern NODE *FILENAME_node, *OFS_node, *ORS_node, *OFMT_node; extern NODE *FNR_node, *RLENGTH_node, *RSTART_node, *SUBSEP_node; +extern NODE *IGNORECASE_node; -extern struct obstack other_stack; +extern NODE **stack_ptr; +extern NODE *Nnull_string; extern NODE *deref; extern NODE **fields_arr; extern int sourceline; extern char *source; +extern NODE *expression_value; -#ifdef USG -int sprintf(); -#else -char *sprintf(); -#endif -char *strcpy(), *strcat(); - -double atof(); -AWKNUM r_force_number(); -NODE *r_force_string(); - - -NODE *expression_value; - -#define HASHSIZE 101 - -typedef struct hashnode HASHNODE; -struct hashnode { - HASHNODE *next; - char *name; - int length; - NODE *value; -} *variables[HASHSIZE]; - - -typedef struct ahash AHASH; -struct ahash { - AHASH *next; - NODE *name, *symbol, *value; -}; +extern NODE *variables[]; +extern NODE *_t; /* used as temporary in tree_eval */ +extern NODE *_result; /* Ditto */ -typedef struct for_loop_header { - NODE *init; - NODE *cond; - NODE *incr; -} FOR_LOOP_HEADER; +extern NODE *nextfree; +extern NODE *lastfree; -NODE *make_for_loop(); +extern char *myname; -/* for "for(iggy in foo) {" */ -struct search { - int numleft; - AHASH **arr_ptr; - AHASH *bucket; - NODE *symbol; - NODE *retval; -}; +extern int node0_valid; +extern int field_num; +extern int strict; -struct search *assoc_scan(), *assoc_next(); +/* ------------------------- Pseudo-functions ------------------------- */ +#define is_identchar(c) (isalnum(c) || (c) == '_') -extern NODE *_t; /* used as temporary in following macro */ -extern NODE *_result; #define tree_eval(t) (_result = (_t = (t),(_t) == NULL ? Nnull_string : \ ((_t)->type == Node_val ? (_t) : r_tree_eval((_t))))) #define free_temp(n) if ((n)->flags&TEMP) { deref = (n); do_deref(); } else #define free_result() if (_result) free_temp(_result); else -#ifdef USG -#define index strchr -#define rindex strrchr -#define bcmp memcmp -/* nasty nasty berkelixm */ -#define _setjmp setjmp -#define _longjmp longjmp -#endif - -char *index(); - -/* longjmp return codes, must be nonzero */ -/* Continue means either for loop/while continue, or next input record */ -#define TAG_CONTINUE 1 -/* Break means either for/while break, or stop reading input */ -#define TAG_BREAK 2 -/* Return means return from a function call; leave value in ret_node */ -#define TAG_RETURN 3 - /* * the loop_tag_valid variable allows continue/break-out-of-context to be - * caught and diagnosed (jfw) + * caught and diagnosed */ #define PUSH_BINDING(stack, x, val) (bcopy ((char *)(x), (char *)(stack), sizeof (jmp_buf)), val++) #define RESTORE_BINDING(stack, x, val) (bcopy ((char *)(stack), (char *)(x), sizeof (jmp_buf)), val--) -/* nasty nasty SunOS-ism */ -#ifdef sparc -#include <alloca.h> -#endif - -extern char *myname; -void msg(); -void warning(); -void illegal_type(); -void fatal(); - #define cant_happen() fatal("line %d, file: %s; bailing out", \ __LINE__, __FILE__); - -/* - * if you don't have vprintf, but you are BSD, the version defined in - * awk5.c should do the trick. Otherwise, use this and cross your fingers. - */ -#if !defined(VPRINTF) && !defined(BSD) -#define vfprintf(fp,fmt,arg) _doprnt((fmt), (arg), (fp)) +#ifdef MEMDEBUG +#define memmsg(x,y,z,zz) fprintf(stderr, "malloc: %s: %s: %d %0x\n", z, x, y, zz) +#define free(s) fprintf(stderr, "free: s: %0x\n", s), do_free(s) +#else +#define memmsg(x,y,z,zz) #endif -extern int errno; -extern char *sys_errlist[]; - #define emalloc(var,ty,x,str) if ((var = (ty) malloc((unsigned)(x))) == NULL)\ fatal("%s: %s: can't allocate memory (%s)",\ - (str), "var", sys_errlist[errno]); else + (str), "var", sys_errlist[errno]); else\ + memmsg("var", x, str, var) #define erealloc(var,ty,x,str) if((var=(ty)realloc(var,(unsigned)(x)))==NULL)\ fatal("%s: %s: can't allocate memory (%s)",\ - (str), "var", sys_errlist[errno]); else + (str), "var", sys_errlist[errno]); else\ + memmsg("re: var", x, str, var) #ifdef DEBUG #define force_number r_force_number #define force_string r_force_string @@ -524,11 +533,99 @@ extern char *sys_errlist[]; #endif #define STREQ(a,b) (*(a) == *(b) && strcmp((a), (b)) == 0) -#define HUGE 0x7fffffff +#define STREQN(a,b,n) ((n) && *(a) == *(b) && strncmp((a), (b), (n)) == 0) + +#define WHOLELINE (node0_valid ? fields_arr[0] : *get_field(0, 0)) + +/* ------------- Function prototypes or defs (as appropriate) ------------- */ +#ifdef __STDC__ +extern struct re_pattern_buffer *make_regexp(NODE *, int); +extern struct re_pattern_buffer *mk_re_parse(char *, int); +extern NODE *variable(char *); +extern NODE *spc_var(char *, NODE *); +extern NODE *install(NODE **, char *, NODE *); +extern NODE *lookup(NODE **, char *); +extern NODE *make_name(char *, NODETYPE); +extern FILE *nextfile(void); +extern int interpret(NODE *); +extern NODE *r_tree_eval(NODE *); +extern void assign_number(NODE **, double); +extern int cmp_nodes(NODE *, NODE *); +extern char *get_fs(void); +extern FILE *redirect(NODE *, int *); +extern int flush_io(void); +extern void print_simple(NODE *, FILE *); +/* extern void warning(char *,...); */ +/* extern void fatal(char *,...); */ +extern void set_record(char *, int); +extern NODE **get_field(int, int); +extern NODE **get_lhs(NODE *, int); +extern void do_deref(void ); +extern struct search *assoc_scan(NODE *); +extern struct search *assoc_next(struct search *); +extern NODE **assoc_lookup(NODE *, NODE *); +extern double r_force_number(NODE *); +extern NODE *r_force_string(NODE *); +extern NODE *newnode(NODETYPE); +extern NODE *dupnode(NODE *); +extern NODE *make_number(double); +extern NODE *tmp_number(double); +extern NODE *make_string(char *, int); +extern NODE *tmp_string(char *, int); +extern char *re_compile_pattern(char *, int, struct re_pattern_buffer *); +extern int re_search(struct re_pattern_buffer *, char *, int, int, int, struct re_registers *); -extern int node0_valid; -extern int field_num; -extern NODE **get_field(); -#define WHOLELINE (node0_valid ? fields_arr[0] : *get_field(0)) +#else +extern struct re_pattern_buffer *make_regexp(); +extern struct re_pattern_buffer *mk_re_parse(); +extern NODE *variable(); +extern NODE *spc_var(); +extern NODE *install(); +extern NODE *lookup(); +extern NODE *make_name(); +extern FILE *nextfile(); +extern int interpret(); +extern NODE *r_tree_eval(); +extern void assign_number(); +extern int cmp_nodes(); +extern char *get_fs(); +extern FILE *redirect(); +extern int flush_io(); +extern void print_simple(); +extern void warning(); +extern void fatal(); +extern void set_record(); +extern NODE **get_field(); +extern NODE **get_lhs(); +extern void do_deref(); +extern struct search *assoc_scan(); +extern struct search *assoc_next(); +extern NODE **assoc_lookup(); +extern double r_force_number(); +extern NODE *r_force_string(); +extern NODE *newnode(); +extern NODE *dupnode(); +extern NODE *make_number(); +extern NODE *tmp_number(); +extern NODE *make_string(); +extern NODE *tmp_string(); +extern char *re_compile_pattern(); +extern int re_search(); +#endif -extern int strict; +/* Figure out what '\a' really is. */ +#ifdef __STDC__ +#define BELL '\a' /* sure makes life easy, don't it? */ +#else +# if 'z' - 'a' == 25 /* ascii */ +# if 'a' != 97 /* machine is dumb enough to use mark parity */ +# define BELL '\207' +# else +# define BELL '\07' +# endif +# else +# define BELL '\057' +# endif +#endif + +extern char casetable[]; /* for case-independent regexp matching */ diff --git a/awk.tab.c b/awk.tab.c deleted file mode 100644 index f0f43df4..00000000 --- a/awk.tab.c +++ /dev/null @@ -1,1696 +0,0 @@ - -/* A Bison parser, made from awk.y */ - -#define NAME 258 -#define REGEXP 259 -#define YSTRING 260 -#define ERROR 261 -#define INCDEC 262 -#define NUMBER 263 -#define ASSIGNOP 264 -#define RELOP 265 -#define MATCHOP 266 -#define NEWLINE 267 -#define REDIRECT_OP 268 -#define CONCAT_OP 269 -#define LEX_BEGIN 270 -#define LEX_END 271 -#define LEX_IF 272 -#define LEX_ELSE 273 -#define LEX_WHILE 274 -#define LEX_FOR 275 -#define LEX_BREAK 276 -#define LEX_CONTINUE 277 -#define LEX_PRINT 278 -#define LEX_PRINTF 279 -#define LEX_NEXT 280 -#define LEX_EXIT 281 -#define LEX_IN 282 -#define LEX_AND 283 -#define LEX_OR 284 -#define INCREMENT 285 -#define DECREMENT 286 -#define LEX_BUILTIN 287 -#define UNARY 288 - -#line 27 "awk.y" - -#define YYDEBUG 12 - -#include <stdio.h> -#include "awk.h" - - static int yylex (); - - - /* - * The following variable is used for a very sickening thing. - * The awk language uses white space as the string concatenation - * operator, but having a white space token that would have to appear - * everywhere in all the grammar rules would be unbearable. - * It turns out we can return CONCAT_OP exactly when there really - * is one, just from knowing what kinds of other tokens it can appear - * between (namely, constants, variables, or close parentheses). - * This is because concatenation has the lowest priority of all - * operators. want_concat_token is used to remember that something - * that could be the left side of a concat has just been returned. - * - * If anyone knows a cleaner way to do this (don't look at the Un*x - * code to find one, though), please suggest it. - */ - static int want_concat_token; - - /* Two more horrible kludges. The same comment applies to these two too */ - static int want_regexp; /* lexical scanning kludge */ - static int want_redirect; /* similarly */ - int lineno = 1; /* JF for error msgs */ - -/* During parsing of a gawk program, the pointer to the next character - is in this variable. */ - char *lexptr; /* JF moved it up here */ - char *lexptr_begin; /* JF for error msgs */ - -#line 64 "awk.y" -typedef union { - long lval; - AWKNUM fval; - NODE *nodeval; - NODETYPE nodetypeval; - char *sval; - NODE *(*ptrval)(); -} YYSTYPE; - -#ifndef YYLTYPE -typedef - struct yyltype - { - int timestamp; - int first_line; - int first_column; - int last_line; - int last_column; - char *text; - } - yyltype; - -#define YYLTYPE yyltype -#endif - -#define YYACCEPT return(0) -#define YYABORT return(1) -#define YYERROR return(1) -#include <stdio.h> - -#ifndef __STDC__ -#define const -#endif - - - -#define YYFINAL 200 -#define YYFLAG -32768 -#define YYNTBASE 49 - -#define YYTRANSLATE(x) (yytranslate[x]) - -static const char yytranslate[] = { 0, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 40, 2, 2, 48, 37, 2, 2, 41, - 42, 35, 33, 39, 34, 2, 36, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 45, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 46, 2, 47, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 43, 2, 44, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 1, 2, 3, 4, 5, - 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, - 26, 27, 28, 29, 30, 31, 32, 38 -}; - -static const short yyrline[] = { 0, - 105, 110, 112, 117, 122, 124, 126, 131, 134, 136, - 138, 140, 142, 151, 153, 158, 160, 164, 166, 171, - 173, 178, 180, 182, 186, 189, 193, 196, 197, 198, - 199, 201, 204, 206, 208, 210, 212, 214, 217, 220, - 222, 228, 230, 236, 239, 244, 246, 248, 250, 255, - 259, 265, 267, 271, 276, 282, 284, 288, 291, 293, - 299, 301, 303, 305, 307, 309, 311, 313, 315, 317, - 319, 323, 325, 327, 329, 331, 334, 336, 340, 342, - 344, 346, 348, 350, 352, 354, 356, 358, 360, 364, - 366, 368, 370, 372, 375, 379, 382, 384 -}; - -static const char * yytname[] = { 0, -"error","$illegal.","NAME","REGEXP","YSTRING","ERROR","INCDEC","NUMBER","ASSIGNOP","RELOP", -"MATCHOP","NEWLINE","REDIRECT_OP","CONCAT_OP","LEX_BEGIN","LEX_END","LEX_IF","LEX_ELSE","LEX_WHILE","LEX_FOR", -"LEX_BREAK","LEX_CONTINUE","LEX_PRINT","LEX_PRINTF","LEX_NEXT","LEX_EXIT","LEX_IN","LEX_AND","LEX_OR","INCREMENT", -"DECREMENT","LEX_BUILTIN","'+'","'-'","'*'","'/'","'%'","UNARY","','","'!'", -"'('","')'","'{'","'}'","';'","'['","']'","'$'","start" -}; - -static const short yyr1[] = { 0, - 49, 50, 50, 51, 52, 52, 52, 53, 53, 53, - 53, 53, 53, 54, 53, 55, 53, 53, 53, 56, - 56, 57, 57, 57, 58, 58, 59, 59, 59, 59, - 59, 60, 60, 60, 60, 60, 60, 60, 60, 61, - 60, 62, 60, 63, 60, 60, 60, 60, 60, 64, - 64, 65, 65, 66, 66, 67, 67, 68, 68, 68, - 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, - 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, - 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, - 70, 70, 70, 70, 70, 71, 71, 71 -}; - -static const short yyr2[] = { 0, - 2, 1, 2, 4, 0, 1, 3, 1, 1, 2, - 3, 3, 3, 0, 4, 0, 6, 3, 1, 0, - 4, 0, 1, 2, 2, 2, 0, 1, 1, 2, - 2, 5, 1, 6, 10, 9, 9, 2, 2, 0, - 5, 0, 5, 0, 7, 2, 2, 5, 2, 6, - 9, 0, 2, 0, 2, 0, 1, 0, 1, 3, - 4, 1, 3, 2, 2, 2, 2, 2, 1, 1, - 1, 3, 3, 3, 3, 3, 3, 3, 4, 1, - 3, 2, 2, 2, 2, 2, 1, 1, 1, 3, - 3, 3, 3, 3, 3, 1, 4, 2 -}; - -static const short yydefact[] = { 52, - 5, 96, 71, 70, 53, 8, 9, 0, 0, 62, - 0, 14, 0, 0, 0, 5, 2, 20, 6, 19, - 69, 0, 65, 66, 58, 0, 64, 0, 10, 0, - 19, 89, 88, 0, 0, 80, 0, 0, 98, 87, - 3, 27, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 67, 68, 0, 0, 59, - 0, 0, 13, 63, 83, 84, 58, 82, 0, 0, - 0, 0, 0, 0, 0, 85, 86, 29, 28, 22, - 52, 11, 12, 7, 18, 16, 77, 75, 76, 72, - 73, 74, 78, 97, 0, 61, 15, 0, 81, 95, - 93, 94, 90, 91, 92, 31, 30, 0, 0, 0, - 0, 0, 40, 42, 0, 0, 27, 0, 23, 33, - 0, 4, 0, 60, 79, 0, 0, 56, 52, 52, - 38, 39, 58, 58, 58, 46, 0, 47, 22, 21, - 24, 49, 0, 0, 0, 96, 0, 57, 25, 26, - 54, 0, 54, 0, 0, 17, 27, 27, 0, 0, - 0, 0, 44, 0, 0, 27, 0, 0, 0, 56, - 0, 55, 41, 54, 43, 48, 32, 50, 34, 0, - 0, 56, 0, 27, 27, 27, 0, 45, 0, 0, - 0, 27, 51, 37, 36, 0, 35, 0, 0, 0 -}; - -static const short yydefgoto[] = { 198, - 16, 17, 18, 19, 28, 123, 43, 118, 131, 80, - 119, 133, 135, 174, 120, 1, 162, 147, 59, 121, - 39, 21 -}; - -static const short yypact[] = {-32768, - 262, -29,-32768,-32768,-32768,-32768,-32768, 11, 11, 6, - 313,-32768, 326, 326, 345, 142,-32768, 17, 243, 399, - 31, 313,-32768,-32768, 313, 313,-32768, 61,-32768, 35, - 129,-32768,-32768, 11, 11, 39, 313, 313,-32768, 157, --32768, 99, 81, 326, 326, 326, 313, 64, 313, 313, - 313, 313, 313, 313, 313,-32768,-32768, 72, -20, 254, - 87, 105,-32768,-32768,-32768,-32768, 313,-32768, 369, 313, - 313, 313, 313, 313, 313,-32768,-32768,-32768,-32768, 172, --32768,-32768, 107, 89, 254,-32768, 336, 164, 164,-32768, --32768,-32768, 254,-32768, 313,-32768,-32768, 46,-32768, 336, - 164, 164,-32768,-32768,-32768,-32768,-32768, 111, 114, 115, - -6, -6,-32768, 120, -6, 71, 99, 202,-32768,-32768, - -7, 156, 166, 254,-32768, 326, 326, 360,-32768,-32768, --32768,-32768, 313, 313, 313,-32768, 313,-32768, 172,-32768, --32768,-32768, 143, 109, 125, -5, 140, 254, 156, 156, - 3, 53, 3, 381, 232,-32768, 99, 99, 181, 292, - 313, -6,-32768, -6, -6, 99, 172, 172, 206, 313, - -25, 254,-32768, 198,-32768,-32768, 132, 194,-32768, 174, - 175, 313, -6, 99, 99, 99, 176,-32768, 172, 172, - 172, 99,-32768,-32768,-32768, 172,-32768, 214, 229,-32768 -}; - -static const short yypgoto[] = {-32768, --32768, 215,-32768, -12,-32768,-32768,-32768, 91, -34, -82, - -100,-32768,-32768,-32768,-32768, -73, -95, -159, -36, -1, --32768, 304 -}; - - -#define YYLAST 436 - - -static const short yytable[] = { 20, - 29, 30, 44, 45, 129, 129, 49, 122, 159, 27, - 181, 20, 31, 2, 20, 161, 22, 141, 95, 182, - 58, 96, 187, 60, 61, 50, 51, 52, 53, 54, - 98, 82, 83, 84, 139, 68, 69, 130, 130, 55, - 22, 95, 20, 20, 20, 85, 25, 87, 88, 89, - 90, 91, 92, 93, 141, 149, 150, 164, 15, 42, - 56, 57, 44, 45, 62, 60, 178, 179, 100, 101, - 102, 103, 104, 105, 167, 168, 63, 132, 183, 67, - 136, 138, 129, 177, 95, 49, 142, 125, 193, 194, - 195, 95, 81, 124, 163, 197, 151, 152, 153, 86, - 49, 189, 190, 191, 50, 51, 52, 53, 54, 196, - 78, 137, 79, 144, 145, 130, 44, 45, 94, 50, - 51, 52, 53, 54, 20, 20, 148, 173, 64, 175, - 176, 60, 60, 60, 44, 154, 44, 45, 47, 48, - 97, -1, 49, 106, 2, 107, 3, 171, 188, 4, - 157, 126, 44, 45, 127, 128, 6, 7, 20, 172, - 134, 50, 51, 52, 53, 54, 158, 5, 148, 143, - 64, 8, 9, 10, 2, 11, 3, 12, 156, 4, - 148, 13, 14, 106, 160, 107, 76, 77, 108, 15, - 109, 110, 111, 112, 113, 114, 115, 116, 52, 53, - 54, 8, 9, 10, 2, 11, 3, 169, 180, 4, - 161, 184, 26, 199, 117, 185, 186, 192, 108, 15, - 109, 110, 111, 112, 113, 114, 115, 116, 200, 155, - 41, 8, 9, 10, 2, 11, 3, 0, 0, 4, - 0, 0, 26, 0, 117, 140, 0, 0, 108, 15, - 109, 110, 111, 112, 113, 114, 115, 116, 0, 0, - 0, 8, 9, 10, 2, 11, 3, 49, 0, 4, - 44, 45, 26, 5, 117, 166, 6, 7, 0, 15, - 0, 46, 0, 0, 0, 0, 50, 51, 52, 53, - 54, 8, 9, 10, 2, 11, 3, 12, 0, 4, - 0, 13, 14, 0, 0, 0, 6, 7, 0, 15, - 0, 23, 24, 0, 0, 2, 0, 3, 40, 0, - 4, 8, 9, 10, 0, 11, 0, 12, 2, 0, - 3, 13, 14, 4, 0, 0, 170, 65, 66, 15, - 6, 7, 8, 9, 10, 0, 11, 2, 0, 32, - 0, 0, 33, 26, 0, 8, 9, 10, 0, 11, - 15, 12, 146, 0, 3, 13, 14, 4, 50, 51, - 52, 53, 54, 15, 34, 35, 36, 0, 37, 0, - 0, 0, 49, 0, 0, 38, 0, 0, 0, 8, - 9, 10, 15, 11, 49, 0, 0, 0, 0, 0, - 26, 50, 51, 52, 53, 54, 0, 15, 47, 48, - 99, 0, 49, 50, 51, 52, 53, 54, 0, 0, - 0, 0, 165, 0, 0, 0, 0, 0, 0, 0, - 0, 50, 51, 52, 53, 54 -}; - -static const short yycheck[] = { 1, - 13, 14, 28, 29, 12, 12, 14, 81, 14, 11, - 170, 13, 14, 3, 16, 13, 46, 118, 39, 45, - 22, 42, 182, 25, 26, 33, 34, 35, 36, 37, - 67, 44, 45, 46, 117, 37, 38, 45, 45, 9, - 46, 39, 44, 45, 46, 47, 41, 49, 50, 51, - 52, 53, 54, 55, 155, 129, 130, 153, 48, 43, - 30, 31, 28, 29, 4, 67, 167, 168, 70, 71, - 72, 73, 74, 75, 157, 158, 42, 112, 174, 41, - 115, 116, 12, 166, 39, 14, 121, 42, 189, 190, - 191, 39, 12, 95, 42, 196, 133, 134, 135, 36, - 14, 184, 185, 186, 33, 34, 35, 36, 37, 192, - 12, 41, 14, 126, 127, 45, 28, 29, 47, 33, - 34, 35, 36, 37, 126, 127, 128, 162, 42, 164, - 165, 133, 134, 135, 28, 137, 28, 29, 10, 11, - 36, 0, 14, 12, 3, 14, 5, 160, 183, 8, - 42, 41, 28, 29, 41, 41, 15, 16, 160, 161, - 41, 33, 34, 35, 36, 37, 42, 12, 170, 4, - 42, 30, 31, 32, 3, 34, 5, 36, 36, 8, - 182, 40, 41, 12, 45, 14, 30, 31, 17, 48, - 19, 20, 21, 22, 23, 24, 25, 26, 35, 36, - 37, 30, 31, 32, 3, 34, 5, 27, 3, 8, - 13, 18, 41, 0, 43, 42, 42, 42, 17, 48, - 19, 20, 21, 22, 23, 24, 25, 26, 0, 139, - 16, 30, 31, 32, 3, 34, 5, -1, -1, 8, - -1, -1, 41, -1, 43, 44, -1, -1, 17, 48, - 19, 20, 21, 22, 23, 24, 25, 26, -1, -1, - -1, 30, 31, 32, 3, 34, 5, 14, -1, 8, - 28, 29, 41, 12, 43, 44, 15, 16, -1, 48, - -1, 39, -1, -1, -1, -1, 33, 34, 35, 36, - 37, 30, 31, 32, 3, 34, 5, 36, -1, 8, - -1, 40, 41, -1, -1, -1, 15, 16, -1, 48, - -1, 8, 9, -1, -1, 3, -1, 5, 15, -1, - 8, 30, 31, 32, -1, 34, -1, 36, 3, -1, - 5, 40, 41, 8, -1, -1, 45, 34, 35, 48, - 15, 16, 30, 31, 32, -1, 34, 3, -1, 5, - -1, -1, 8, 41, -1, 30, 31, 32, -1, 34, - 48, 36, 3, -1, 5, 40, 41, 8, 33, 34, - 35, 36, 37, 48, 30, 31, 32, -1, 34, -1, - -1, -1, 14, -1, -1, 41, -1, -1, -1, 30, - 31, 32, 48, 34, 14, -1, -1, -1, -1, -1, - 41, 33, 34, 35, 36, 37, -1, 48, 10, 11, - 42, -1, 14, 33, 34, 35, 36, 37, -1, -1, - -1, -1, 42, -1, -1, -1, -1, -1, -1, -1, - -1, 33, 34, 35, 36, 37 -}; -#define YYPURE 1 - -#line 2 "bison.simple" - -/* Skeleton output parser for bison, - copyright (C) 1984 Bob Corbett and Richard Stallman - - Permission is granted to anyone to make or distribute verbatim copies of this program - provided that the copyright notice and this permission notice are preserved; - and provided that the recipient is not asked to waive or limit his right to - redistribute copies as permitted by this permission notice; - and provided that anyone possessing an executable copy - is granted access to copy the source code, in machine-readable form, - in some reasonable manner. - - Permission is granted to distribute derived works or enhanced versions of - this program under the above conditions with the additional condition - that the entire derivative or enhanced work - must be covered by a permission notice identical to this one. - - Anything distributed as part of a package containing portions derived - from this program, which cannot in current practice perform its function usefully - in the absense of what was derived directly from this program, - is to be considered as forming, together with the latter, - a single work derived from this program, - which must be entirely covered by a permission notice identical to this one - in order for distribution of the package to be permitted. - - In other words, you are welcome to use, share and improve this program. - You are forbidden to forbid anyone else to use, share and improve - what you give them. Help stamp out software-hoarding! */ - -/* This is the parser code that is written into each bison parser - when the %semantic_parser declaration is not specified in the grammar. - It was written by Richard Stallman by simplifying the hairy parser - used when %semantic_parser is specified. */ - -/* Note: there must be only one dollar sign in this file. - It is replaced by the list of actions, each action - as one case of the switch. */ - -#define yyerrok (yyerrstatus = 0) -#define yyclearin (yychar = YYEMPTY) -#define YYEMPTY -2 -#define YYEOF 0 -#define YYFAIL goto yyerrlab; - -#define YYTERROR 1 - -#ifndef YYIMPURE -#define YYLEX yylex() -#endif - -#ifndef YYPURE -#define YYLEX yylex(&yylval, &yylloc) -#endif - -/* If nonreentrant, generate the variables here */ - -#ifndef YYIMPURE - -int yychar; /* the lookahead symbol */ -YYSTYPE yylval; /* the semantic value of the */ - /* lookahead symbol */ - -YYLTYPE yylloc; /* location data for the lookahead */ - /* symbol */ - -int yydebug = 0; /* nonzero means print parse trace */ - -#endif /* YYIMPURE */ - - -/* YYMAXDEPTH indicates the initial size of the parser's stacks */ - -#ifndef YYMAXDEPTH -#define YYMAXDEPTH 200 -#endif - -/* YYMAXLIMIT is the maximum size the stacks can grow to - (effective only if the built-in stack extension method is used). */ - -#ifndef YYMAXLIMIT -#define YYMAXLIMIT 10000 -#endif - - -#line 87 "bison.simple" -int -yyparse() -{ - register int yystate; - register int yyn; - register short *yyssp; - register YYSTYPE *yyvsp; - YYLTYPE *yylsp; - int yyerrstatus; /* number of tokens to shift before error messages enabled */ - int yychar1; /* lookahead token as an internal (translated) token number */ - - short yyssa[YYMAXDEPTH]; /* the state stack */ - YYSTYPE yyvsa[YYMAXDEPTH]; /* the semantic value stack */ - YYLTYPE yylsa[YYMAXDEPTH]; /* the location stack */ - - short *yyss = yyssa; /* refer to the stacks thru separate pointers */ - YYSTYPE *yyvs = yyvsa; /* to allow yyoverflow to reallocate them elsewhere */ - YYLTYPE *yyls = yylsa; - - int yymaxdepth = YYMAXDEPTH; - -#ifndef YYPURE - - int yychar; - YYSTYPE yylval; - YYLTYPE yylloc; - - extern int yydebug; - -#endif - - - YYSTYPE yyval; /* the variable used to return */ - /* semantic values from the action */ - /* routines */ - - int yylen; - - if (yydebug) - fprintf(stderr, "Starting parse\n"); - - yystate = 0; - yyerrstatus = 0; - yychar = YYEMPTY; /* Cause a token to be read. */ - - /* Initialize stack pointers. - Waste one element of value and location stack - so that they stay on the same level as the state stack. */ - - yyssp = yyss - 1; - yyvsp = yyvs; - yylsp = yyls; - -/* Push a new state, which is found in yystate . */ -/* In all cases, when you get here, the value and location stacks - have just been pushed. so pushing a state here evens the stacks. */ -yynewstate: - - *++yyssp = yystate; - - if (yyssp >= yyss + yymaxdepth - 1) - { - /* Give user a chance to reallocate the stack */ - /* Use copies of these so that the &'s don't force the real ones into memory. */ - YYSTYPE *yyvs1 = yyvs; - YYLTYPE *yyls1 = yyls; - short *yyss1 = yyss; - - /* Get the current used size of the three stacks, in elements. */ - int size = yyssp - yyss + 1; - -#ifdef yyoverflow - /* Each stack pointer address is followed by the size of - the data in use in that stack, in bytes. */ - yyoverflow("parser stack overflow", - &yyss1, size * sizeof (*yyssp), - &yyvs1, size * sizeof (*yyvsp), - &yyls1, size * sizeof (*yylsp), - &yymaxdepth); - - yyss = yyss1; yyvs = yyvs1; yyls = yyls1; -#else /* no yyoverflow */ - /* Extend the stack our own way. */ - if (yymaxdepth >= YYMAXLIMIT) - yyerror("parser stack overflow"); - yymaxdepth *= 2; - if (yymaxdepth > YYMAXLIMIT) - yymaxdepth = YYMAXLIMIT; - yyss = (short *) alloca (yymaxdepth * sizeof (*yyssp)); - bcopy ((char *)yyss1, (char *)yyss, size * sizeof (*yyssp)); - yyls = (YYLTYPE *) alloca (yymaxdepth * sizeof (*yylsp)); - bcopy ((char *)yyls1, (char *)yyls, size * sizeof (*yylsp)); - yyvs = (YYSTYPE *) alloca (yymaxdepth * sizeof (*yyvsp)); - bcopy ((char *)yyvs1, (char *)yyvs, size * sizeof (*yyvsp)); -#endif /* no yyoverflow */ - - yyssp = yyss + size - 1; - yylsp = yyls + size - 1; - yyvsp = yyvs + size - 1; - - if (yydebug) - fprintf(stderr, "Stack size increased to %d\n", yymaxdepth); - - if (yyssp >= yyss + yymaxdepth - 1) - YYERROR; - } - - if (yydebug) - fprintf(stderr, "Entering state %d\n", yystate); - -/* Do appropriate processing given the current state. */ -/* Read a lookahead token if we need one and don't already have one. */ -yyresume: - - /* First try to decide what to do without reference to lookahead token. */ - - yyn = yypact[yystate]; - if (yyn == YYFLAG) - goto yydefault; - - /* Not known => get a lookahead token if don't already have one. */ - - /* yychar is either YYEMPTY or YYEOF - or a valid token in external form. */ - - if (yychar == YYEMPTY) - { - yychar = YYLEX; - } - - /* Convert token to internal form (in yychar1) for indexing tables with */ - - if (yychar <= 0) /* This means end of input. */ - { - yychar1 = 0; - yychar = YYEOF; /* Don't call YYLEX any more */ - - if (yydebug) - fprintf(stderr, "Now at end of input.\n"); - } - else - { - yychar1 = YYTRANSLATE(yychar); - - if (yydebug) - fprintf(stderr, "Parsing next token; it is %d (%s)\n", yychar, yytname[yychar1]); - } - - yyn += yychar1; - if (yyn < 0 || yyn > YYLAST || yycheck[yyn] != yychar1) - goto yydefault; - - yyn = yytable[yyn]; - - /* yyn is what to do for this token type in this state. - Negative => reduce, -yyn is rule number. - Positive => shift, yyn is new state. - New state is final state => don't bother to shift, - just return success. - 0, or most negative number => error. */ - - if (yyn < 0) - { - if (yyn == YYFLAG) - goto yyerrlab; - yyn = -yyn; - goto yyreduce; - } - else if (yyn == 0) - goto yyerrlab; - - if (yyn == YYFINAL) - YYACCEPT; - - /* Shift the lookahead token. */ - - if (yydebug) - fprintf(stderr, "Shifting token %d (%s), ", yychar, yytname[yychar1]); - - /* Discard the token being shifted unless it is eof. */ - if (yychar != YYEOF) - yychar = YYEMPTY; - - *++yyvsp = yylval; - *++yylsp = yylloc; - - /* count tokens shifted since error; after three, turn off error status. */ - if (yyerrstatus) yyerrstatus--; - - yystate = yyn; - goto yynewstate; - -/* Do the default action for the current state. */ -yydefault: - - yyn = yydefact[yystate]; - if (yyn == 0) - goto yyerrlab; - -/* Do a reduction. yyn is the number of a rule to reduce with. */ -yyreduce: - yylen = yyr2[yyn]; - yyval = yyvsp[1-yylen]; /* implement default value of the action */ - - if (yydebug) - { - if (yylen == 1) - fprintf (stderr, "Reducing 1 value via line %d, ", - yyrline[yyn]); - else - fprintf (stderr, "Reducing %d values via line %d, ", - yylen, yyrline[yyn]); - } - - - switch (yyn) { - -case 1: -#line 106 "awk.y" -{ expression_value = yyvsp[0].nodeval; ; - break;} -case 2: -#line 111 "awk.y" -{ yyval.nodeval = node (yyvsp[0].nodeval, Node_rule_list,(NODE *) NULL); ; - break;} -case 3: -#line 114 "awk.y" -{ yyval.nodeval = append_right (yyvsp[-1].nodeval, node(yyvsp[0].nodeval, Node_rule_list,(NODE *) NULL)); ; - break;} -case 4: -#line 118 "awk.y" -{ yyval.nodeval = node (yyvsp[-3].nodeval, Node_rule_node, yyvsp[-2].nodeval); ; - break;} -case 5: -#line 123 "awk.y" -{ yyval.nodeval = NULL; ; - break;} -case 6: -#line 125 "awk.y" -{ yyval.nodeval = yyvsp[0].nodeval; ; - break;} -case 7: -#line 127 "awk.y" -{ yyval.nodeval = mkrangenode ( node(yyvsp[-2].nodeval, Node_cond_pair, yyvsp[0].nodeval) ); ; - break;} -case 8: -#line 133 "awk.y" -{ yyval.nodeval = node ((NODE *)NULL, Node_K_BEGIN,(NODE *) NULL); ; - break;} -case 9: -#line 135 "awk.y" -{ yyval.nodeval = node ((NODE *)NULL, Node_K_END,(NODE *) NULL); ; - break;} -case 10: -#line 137 "awk.y" -{ yyval.nodeval = node (yyvsp[0].nodeval, Node_not,(NODE *) NULL); ; - break;} -case 11: -#line 139 "awk.y" -{ yyval.nodeval = node (yyvsp[-2].nodeval, Node_and, yyvsp[0].nodeval); ; - break;} -case 12: -#line 141 "awk.y" -{ yyval.nodeval = node (yyvsp[-2].nodeval, Node_or, yyvsp[0].nodeval); ; - break;} -case 13: -#line 143 "awk.y" -{ - yyval.nodeval = yyvsp[-1].nodeval; - want_concat_token = 0; - ; - break;} -case 14: -#line 152 "awk.y" -{ ++want_regexp; ; - break;} -case 15: -#line 154 "awk.y" -{ want_regexp = 0; - yyval.nodeval = node (node (make_number ((AWKNUM)0), Node_field_spec, (NODE *)NULL), - Node_match, (NODE *)make_regexp (yyvsp[-1].sval)); - ; - break;} -case 16: -#line 159 "awk.y" -{ ++want_regexp; ; - break;} -case 17: -#line 161 "awk.y" -{ want_regexp = 0; - yyval.nodeval = node (yyvsp[-5].nodeval, yyvsp[-4].nodetypeval, (NODE *)make_regexp(yyvsp[-1].sval)); - ; - break;} -case 18: -#line 165 "awk.y" -{ yyval.nodeval = node (yyvsp[-2].nodeval, yyvsp[-1].nodetypeval, yyvsp[0].nodeval); ; - break;} -case 19: -#line 167 "awk.y" -{ yyval.nodeval = yyvsp[0].nodeval; ; - break;} -case 20: -#line 172 "awk.y" -{ yyval.nodeval = NULL; ; - break;} -case 21: -#line 174 "awk.y" -{ yyval.nodeval = yyvsp[-1].nodeval; ; - break;} -case 22: -#line 179 "awk.y" -{ yyval.nodeval = NULL; ; - break;} -case 23: -#line 181 "awk.y" -{ yyval.nodeval = node (yyvsp[0].nodeval, Node_statement_list, (NODE *)NULL); ; - break;} -case 24: -#line 183 "awk.y" -{ yyval.nodeval = append_right(yyvsp[-1].nodeval, node( yyvsp[0].nodeval, Node_statement_list, (NODE *)NULL)); ; - break;} -case 25: -#line 188 "awk.y" -{ yyval.nodetypeval = Node_illegal; ; - break;} -case 26: -#line 190 "awk.y" -{ yyval.nodetypeval = Node_illegal; ; - break;} -case 27: -#line 195 "awk.y" -{ yyval.nodetypeval = Node_illegal; ; - break;} -case 32: -#line 203 "awk.y" -{ yyval.nodeval = yyvsp[-2].nodeval; ; - break;} -case 33: -#line 205 "awk.y" -{ yyval.nodeval = yyvsp[0].nodeval; ; - break;} -case 34: -#line 207 "awk.y" -{ yyval.nodeval = node (yyvsp[-3].nodeval, Node_K_while, yyvsp[0].nodeval); ; - break;} -case 35: -#line 209 "awk.y" -{ yyval.nodeval = node (yyvsp[0].nodeval, Node_K_for, (NODE *)make_for_loop (yyvsp[-7].nodeval, yyvsp[-5].nodeval, yyvsp[-3].nodeval)); ; - break;} -case 36: -#line 211 "awk.y" -{ yyval.nodeval = node (yyvsp[0].nodeval, Node_K_for, (NODE *)make_for_loop (yyvsp[-6].nodeval, (NODE *)NULL, yyvsp[-3].nodeval)); ; - break;} -case 37: -#line 213 "awk.y" -{ yyval.nodeval = node (yyvsp[0].nodeval, Node_K_arrayfor, (NODE *)make_for_loop(variable(yyvsp[-6].sval), (NODE *)NULL, variable(yyvsp[-3].sval))); ; - break;} -case 38: -#line 216 "awk.y" -{ yyval.nodeval = node ((NODE *)NULL, Node_K_break, (NODE *)NULL); ; - break;} -case 39: -#line 219 "awk.y" -{ yyval.nodeval = node ((NODE *)NULL, Node_K_continue, (NODE *)NULL); ; - break;} -case 40: -#line 221 "awk.y" -{ ++want_redirect; ; - break;} -case 41: -#line 223 "awk.y" -{ - want_redirect = 0; - /* $4->lnode = NULL; */ - yyval.nodeval = node (yyvsp[-2].nodeval, Node_K_print, yyvsp[-1].nodeval); - ; - break;} -case 42: -#line 229 "awk.y" -{ ++want_redirect; ; - break;} -case 43: -#line 231 "awk.y" -{ - want_redirect = 0; - /* $4->lnode = NULL; */ - yyval.nodeval = node (yyvsp[-2].nodeval, Node_K_printf, yyvsp[-1].nodeval); - ; - break;} -case 44: -#line 237 "awk.y" -{ ++want_redirect; - want_concat_token = 0; ; - break;} -case 45: -#line 240 "awk.y" -{ - want_redirect = 0; - yyval.nodeval = node (yyvsp[-4].nodeval, Node_K_printf, yyvsp[-1].nodeval); - ; - break;} -case 46: -#line 245 "awk.y" -{ yyval.nodeval = node ((NODE *)NULL, Node_K_next, (NODE *)NULL); ; - break;} -case 47: -#line 247 "awk.y" -{ yyval.nodeval = node ((NODE *)NULL, Node_K_exit, (NODE *)NULL); ; - break;} -case 48: -#line 249 "awk.y" -{ yyval.nodeval = node (yyvsp[-2].nodeval, Node_K_exit, (NODE *)NULL); ; - break;} -case 49: -#line 251 "awk.y" -{ yyval.nodeval = yyvsp[-1].nodeval; ; - break;} -case 50: -#line 257 "awk.y" -{ yyval.nodeval = node (yyvsp[-3].nodeval, Node_K_if, - node (yyvsp[0].nodeval, Node_if_branches, (NODE *)NULL)); ; - break;} -case 51: -#line 261 "awk.y" -{ yyval.nodeval = node (yyvsp[-6].nodeval, Node_K_if, - node (yyvsp[-3].nodeval, Node_if_branches, yyvsp[0].nodeval)); ; - break;} -case 53: -#line 268 "awk.y" -{ yyval.nodetypeval = Node_illegal; ; - break;} -case 54: -#line 273 "awk.y" -{ yyval.nodeval = NULL; /* node (NULL, Node_redirect_nil, NULL); */ ; - break;} -case 55: -#line 277 "awk.y" -{ yyval.nodeval = node (yyvsp[0].nodeval, yyvsp[-1].nodetypeval, (NODE *)NULL); ; - break;} -case 56: -#line 283 "awk.y" -{ yyval.nodeval = NULL; /* node(NULL, Node_builtin, NULL); */ ; - break;} -case 57: -#line 285 "awk.y" -{ yyval.nodeval = yyvsp[0].nodeval; ; - break;} -case 58: -#line 290 "awk.y" -{ yyval.nodeval = NULL; ; - break;} -case 59: -#line 292 "awk.y" -{ yyval.nodeval = node (yyvsp[0].nodeval, Node_expression_list, (NODE *)NULL); ; - break;} -case 60: -#line 294 "awk.y" -{ yyval.nodeval = append_right(yyvsp[-2].nodeval, node( yyvsp[0].nodeval, Node_expression_list, (NODE *)NULL)); ; - break;} -case 61: -#line 300 "awk.y" -{ yyval.nodeval = snode (yyvsp[-1].nodeval, Node_builtin, yyvsp[-3].ptrval); ; - break;} -case 62: -#line 302 "awk.y" -{ yyval.nodeval = snode ((NODE *)NULL, Node_builtin, yyvsp[0].ptrval); ; - break;} -case 63: -#line 304 "awk.y" -{ yyval.nodeval = yyvsp[-1].nodeval; ; - break;} -case 64: -#line 306 "awk.y" -{ yyval.nodeval = node (yyvsp[0].nodeval, Node_unary_minus, (NODE *)NULL); ; - break;} -case 65: -#line 308 "awk.y" -{ yyval.nodeval = node (yyvsp[0].nodeval, Node_preincrement, (NODE *)NULL); ; - break;} -case 66: -#line 310 "awk.y" -{ yyval.nodeval = node (yyvsp[0].nodeval, Node_predecrement, (NODE *)NULL); ; - break;} -case 67: -#line 312 "awk.y" -{ yyval.nodeval = node (yyvsp[-1].nodeval, Node_postincrement, (NODE *)NULL); ; - break;} -case 68: -#line 314 "awk.y" -{ yyval.nodeval = node (yyvsp[-1].nodeval, Node_postdecrement, (NODE *)NULL); ; - break;} -case 69: -#line 316 "awk.y" -{ yyval.nodeval = yyvsp[0].nodeval; ; - break;} -case 70: -#line 318 "awk.y" -{ yyval.nodeval = make_number (yyvsp[0].fval); ; - break;} -case 71: -#line 320 "awk.y" -{ yyval.nodeval = make_string (yyvsp[0].sval, -1); ; - break;} -case 72: -#line 324 "awk.y" -{ yyval.nodeval = node (yyvsp[-2].nodeval, Node_times, yyvsp[0].nodeval); ; - break;} -case 73: -#line 326 "awk.y" -{ yyval.nodeval = node (yyvsp[-2].nodeval, Node_quotient, yyvsp[0].nodeval); ; - break;} -case 74: -#line 328 "awk.y" -{ yyval.nodeval = node (yyvsp[-2].nodeval, Node_mod, yyvsp[0].nodeval); ; - break;} -case 75: -#line 330 "awk.y" -{ yyval.nodeval = node (yyvsp[-2].nodeval, Node_plus, yyvsp[0].nodeval); ; - break;} -case 76: -#line 332 "awk.y" -{ yyval.nodeval = node (yyvsp[-2].nodeval, Node_minus, yyvsp[0].nodeval); ; - break;} -case 77: -#line 335 "awk.y" -{ yyval.nodeval = node (yyvsp[-2].nodeval, Node_concat, yyvsp[0].nodeval); ; - break;} -case 78: -#line 337 "awk.y" -{ yyval.nodeval = node (yyvsp[-2].nodeval, yyvsp[-1].nodetypeval, yyvsp[0].nodeval); ; - break;} -case 79: -#line 341 "awk.y" -{ yyval.nodeval = snode (yyvsp[-1].nodeval, Node_builtin, yyvsp[-3].ptrval); ; - break;} -case 80: -#line 343 "awk.y" -{ yyval.nodeval = snode ((NODE *)NULL, Node_builtin, yyvsp[0].ptrval); ; - break;} -case 81: -#line 345 "awk.y" -{ yyval.nodeval = yyvsp[-1].nodeval; ; - break;} -case 82: -#line 347 "awk.y" -{ yyval.nodeval = node (yyvsp[0].nodeval, Node_unary_minus, (NODE *)NULL); ; - break;} -case 83: -#line 349 "awk.y" -{ yyval.nodeval = node (yyvsp[0].nodeval, Node_preincrement, (NODE *)NULL); ; - break;} -case 84: -#line 351 "awk.y" -{ yyval.nodeval = node (yyvsp[0].nodeval, Node_predecrement, (NODE *)NULL); ; - break;} -case 85: -#line 353 "awk.y" -{ yyval.nodeval = node (yyvsp[-1].nodeval, Node_postincrement, (NODE *)NULL); ; - break;} -case 86: -#line 355 "awk.y" -{ yyval.nodeval = node (yyvsp[-1].nodeval, Node_postdecrement, (NODE *)NULL); ; - break;} -case 87: -#line 357 "awk.y" -{ yyval.nodeval = yyvsp[0].nodeval; ; - break;} -case 88: -#line 359 "awk.y" -{ yyval.nodeval = make_number (yyvsp[0].fval); ; - break;} -case 89: -#line 361 "awk.y" -{ yyval.nodeval = make_string (yyvsp[0].sval, -1); ; - break;} -case 90: -#line 365 "awk.y" -{ yyval.nodeval = node (yyvsp[-2].nodeval, Node_times, yyvsp[0].nodeval); ; - break;} -case 91: -#line 367 "awk.y" -{ yyval.nodeval = node (yyvsp[-2].nodeval, Node_quotient, yyvsp[0].nodeval); ; - break;} -case 92: -#line 369 "awk.y" -{ yyval.nodeval = node (yyvsp[-2].nodeval, Node_mod, yyvsp[0].nodeval); ; - break;} -case 93: -#line 371 "awk.y" -{ yyval.nodeval = node (yyvsp[-2].nodeval, Node_plus, yyvsp[0].nodeval); ; - break;} -case 94: -#line 373 "awk.y" -{ yyval.nodeval = node (yyvsp[-2].nodeval, Node_minus, yyvsp[0].nodeval); ; - break;} -case 95: -#line 376 "awk.y" -{ yyval.nodeval = node (yyvsp[-2].nodeval, Node_concat, yyvsp[0].nodeval); ; - break;} -case 96: -#line 381 "awk.y" -{ yyval.nodeval = variable (yyvsp[0].sval); ; - break;} -case 97: -#line 383 "awk.y" -{ yyval.nodeval = node (variable(yyvsp[-3].sval), Node_subscript, yyvsp[-1].nodeval); ; - break;} -case 98: -#line 385 "awk.y" -{ yyval.nodeval = node (yyvsp[0].nodeval, Node_field_spec, (NODE *)NULL); ; - break;} -} - /* the action file gets copied in in place of this dollarsign */ -#line 303 "bison.simple" - - yyvsp -= yylen; - yylsp -= yylen; - yyssp -= yylen; - - if (yydebug) - { - short *ssp1 = yyss - 1; - fprintf (stderr, "state stack now", yyssp-yyss); - while (ssp1 != yyssp) - fprintf (stderr, " %d", *++ssp1); - fprintf (stderr, "\n"); - } - - *++yyvsp = yyval; - - yylsp++; - if (yylen == 0) - { - yylsp->first_line = yylloc.first_line; - yylsp->first_column = yylloc.first_column; - yylsp->last_line = (yylsp-1)->last_line; - yylsp->last_column = (yylsp-1)->last_column; - yylsp->text = 0; - } - else - { - yylsp->last_line = (yylsp+yylen-1)->last_line; - yylsp->last_column = (yylsp+yylen-1)->last_column; - } - - /* Now "shift" the result of the reduction. - Determine what state that goes to, - based on the state we popped back to - and the rule number reduced by. */ - - yyn = yyr1[yyn]; - - yystate = yypgoto[yyn - YYNTBASE] + *yyssp; - if (yystate >= 0 && yystate <= YYLAST && yycheck[yystate] == *yyssp) - yystate = yytable[yystate]; - else - yystate = yydefgoto[yyn - YYNTBASE]; - - goto yynewstate; - -yyerrlab: /* here on detecting error */ - - if (! yyerrstatus) - /* If not already recovering from an error, report this error. */ - { - yyerror("parse error"); - } - - if (yyerrstatus == 3) - { - /* if just tried and failed to reuse lookahead token after an error, discard it. */ - - /* return failure if at end of input */ - if (yychar == YYEOF) - YYERROR; - - if (yydebug) - fprintf(stderr, "Discarding token %d (%s).\n", yychar, yytname[yychar1]); - - yychar = YYEMPTY; - } - - /* Else will try to reuse lookahead token - after shifting the error token. */ - - yyerrstatus = 3; /* Each real token shifted decrements this */ - - goto yyerrhandle; - -yyerrdefault: /* current state does not do anything special for the error token. */ - -#if 0 - /* This is wrong; only states that explicitly want error tokens - should shift them. */ - yyn = yydefact[yystate]; /* If its default is to accept any token, ok. Otherwise pop it.*/ - if (yyn) goto yydefault; -#endif - -yyerrpop: /* pop the current state because it cannot handle the error token */ - - if (yyssp == yyss) YYERROR; - yyvsp--; - yylsp--; - yystate = *--yyssp; - - if (yydebug) - { - short *ssp1 = yyss - 1; - fprintf (stderr, "Error: state stack now", yyssp-yyss); - while (ssp1 != yyssp) - fprintf (stderr, " %d", *++ssp1); - fprintf (stderr, "\n"); - } - -yyerrhandle: - - yyn = yypact[yystate]; - if (yyn == YYFLAG) - goto yyerrdefault; - - yyn += YYTERROR; - if (yyn < 0 || yyn > YYLAST || yycheck[yyn] != YYTERROR) - goto yyerrdefault; - - yyn = yytable[yyn]; - if (yyn < 0) - { - if (yyn == YYFLAG) - goto yyerrpop; - yyn = -yyn; - goto yyreduce; - } - else if (yyn == 0) - goto yyerrpop; - - if (yyn == YYFINAL) - YYACCEPT; - - if (yydebug) - fprintf(stderr, "Shifting error token, "); - - *++yyvsp = yylval; - *++yylsp = yylloc; - - yystate = yyn; - goto yynewstate; -} -#line 388 "awk.y" - - - -struct token { - char *operator; - NODETYPE value; - int class; - NODE *(*ptr)(); -}; - -#define NULL 0 - -NODE *do_exp(), *do_getline(), *do_index(), *do_length(), - *do_sqrt(), *do_log(), *do_sprintf(), *do_substr(), - *do_split(), *do_int(); - - /* Special functions for debugging */ -#ifndef FAST -NODE *do_prvars(), *do_bp(); -#endif - -/* Tokentab is sorted ascii ascending order, so it can be binary searched. */ -/* (later. Right now its just sort of linear search (SLOW!!) */ - -static struct token tokentab[] = { - {"BEGIN", Node_illegal, LEX_BEGIN, 0}, - {"END", Node_illegal, LEX_END, 0}, -#ifndef FAST - {"bp", Node_builtin, LEX_BUILTIN, do_bp}, -#endif - {"break", Node_K_break, LEX_BREAK, 0}, - {"continue", Node_K_continue, LEX_CONTINUE, 0}, - {"else", Node_illegal, LEX_ELSE, 0}, - {"exit", Node_K_exit, LEX_EXIT, 0}, - {"exp", Node_builtin, LEX_BUILTIN, do_exp}, - {"for", Node_K_for, LEX_FOR, 0}, - {"getline", Node_builtin, LEX_BUILTIN, do_getline}, - {"if", Node_K_if, LEX_IF, 0}, - {"in", Node_illegal, LEX_IN, 0}, - {"index", Node_builtin, LEX_BUILTIN, do_index}, - {"int", Node_builtin, LEX_BUILTIN, do_int}, - {"length", Node_builtin, LEX_BUILTIN, do_length}, - {"log", Node_builtin, LEX_BUILTIN, do_log}, - {"next", Node_K_next, LEX_NEXT, 0}, - {"print", Node_K_print, LEX_PRINT, 0}, - {"printf", Node_K_printf, LEX_PRINTF, 0}, -#ifndef FAST - {"prvars", Node_builtin, LEX_BUILTIN, do_prvars}, -#endif - {"split", Node_builtin, LEX_BUILTIN, do_split}, - {"sprintf", Node_builtin, LEX_BUILTIN, do_sprintf}, - {"sqrt", Node_builtin, LEX_BUILTIN, do_sqrt}, - {"substr", Node_builtin, LEX_BUILTIN, do_substr}, - {"while", Node_K_while, LEX_WHILE, 0}, - {NULL, Node_illegal, ERROR, 0} -}; - -/* Read one token, getting characters through lexptr. */ - -static int -yylex () -{ - register int c; - register int namelen; - register char *tokstart; - register struct token *toktab; - double atof(); /* JF know what happens if you forget this? */ - - - static did_newline = 0; /* JF the grammar insists that actions end - with newlines. This was easier than hacking - the grammar. */ - int do_concat; - - int seen_e = 0; /* These are for numbers */ - int seen_point = 0; - - retry: - - if(!lexptr) - return 0; - - if (want_regexp) { - want_regexp = 0; - /* there is a potential bug if a regexp is followed by an equal sign: - "/foo/=bar" would result in assign_quotient being returned as the - next token. Nothing is done about it since it is not valid awk, - but maybe something should be done anyway. */ - - tokstart = lexptr; - while (c = *lexptr++) { - switch (c) { - case '\\': - if (*lexptr++ == '\0') { - yyerror ("unterminated regexp ends with \\"); - return ERROR; - } - break; - case '/': /* end of the regexp */ - lexptr--; - yylval.sval = tokstart; - return REGEXP; - case '\n': - case '\0': - yyerror ("unterminated regexp"); - return ERROR; - } - } - } - do_concat=want_concat_token; - want_concat_token=0; - - if(*lexptr=='\0') { - lexptr=0; - return NEWLINE; - } - - /* if lexptr is at white space between two terminal tokens or parens, - it is a concatenation operator. */ - if(do_concat && (*lexptr==' ' || *lexptr=='\t')) { - while (*lexptr == ' ' || *lexptr == '\t') - lexptr++; - if (isalnum(*lexptr) || *lexptr == '\"' || *lexptr == '(' - || *lexptr == '.' || *lexptr == '$') /* the '.' is for decimal pt */ - return CONCAT_OP; - } - - while (*lexptr == ' ' || *lexptr == '\t') - lexptr++; - - tokstart = lexptr; /* JF */ - - switch (c = *lexptr++) { - case 0: - return 0; - - case '\n': - lineno++; - return NEWLINE; - - case '#': /* it's a comment */ - while (*lexptr != '\n' && *lexptr != '\0') - lexptr++; - goto retry; - - case '\\': - if(*lexptr=='\n') { - lexptr++; - goto retry; - } else break; - case ')': - case ']': - ++want_concat_token; - /* fall through */ - case '(': /* JF these were above, but I don't see why they should turn on concat. . . &*/ - case '[': - - case '{': - case ',': /* JF */ - case '$': - case ';': - /* set node type to ILLEGAL because the action should set it to - the right thing */ - yylval.nodetypeval = Node_illegal; - return c; - - case '*': - if(*lexptr=='=') { - yylval.nodetypeval=Node_assign_times; - lexptr++; - return ASSIGNOP; - } - yylval.nodetypeval=Node_illegal; - return c; - - case '/': - if(*lexptr=='=') { - yylval.nodetypeval=Node_assign_quotient; - lexptr++; - return ASSIGNOP; - } - yylval.nodetypeval=Node_illegal; - return c; - - case '%': - if(*lexptr=='=') { - yylval.nodetypeval=Node_assign_mod; - lexptr++; - return ASSIGNOP; - } - yylval.nodetypeval=Node_illegal; - return c; - - case '+': - if(*lexptr=='=') { - yylval.nodetypeval=Node_assign_plus; - lexptr++; - return ASSIGNOP; - } - if(*lexptr=='+') { - yylval.nodetypeval=Node_illegal; - lexptr++; - return INCREMENT; - } - yylval.nodetypeval=Node_illegal; - return c; - - case '!': - if(*lexptr=='=') { - yylval.nodetypeval=Node_notequal; - lexptr++; - return RELOP; - } - if(*lexptr=='~') { - yylval.nodetypeval=Node_nomatch; - lexptr++; - return MATCHOP; - } - yylval.nodetypeval=Node_illegal; - return c; - - case '<': - if(*lexptr=='=') { - yylval.nodetypeval=Node_leq; - lexptr++; - return RELOP; - } - yylval.nodetypeval=Node_less; - return RELOP; - - case '=': - if(*lexptr=='=') { - yylval.nodetypeval=Node_equal; - lexptr++; - return RELOP; - } - yylval.nodetypeval=Node_assign; - return ASSIGNOP; - - case '>': - if(want_redirect) { - if (*lexptr == '>') { - yylval.nodetypeval = Node_redirect_append; - lexptr++; - } else - yylval.nodetypeval = Node_redirect_output; - return REDIRECT_OP; - } - if(*lexptr=='=') { - yylval.nodetypeval=Node_geq; - lexptr++; - return RELOP; - } - yylval.nodetypeval=Node_greater; - return RELOP; - - case '~': - yylval.nodetypeval=Node_match; - return MATCHOP; - - case '}': /* JF added did newline stuff. Easier than hacking the grammar */ - if(did_newline) { - did_newline=0; - return c; - } - did_newline++; - --lexptr; - return NEWLINE; - - case '"': - while (*lexptr != '\0') { - switch (*lexptr++) { - case '\\': - if (*lexptr++ != '\0') - break; - /* fall through */ - case '\n': - yyerror ("unterminated string"); - return ERROR; - case '\"': - yylval.sval = tokstart + 1; /* JF Skip the doublequote */ - ++want_concat_token; - return YSTRING; - } - } - return ERROR; /* JF this was one level up, wrong? */ - - case '-': - if(*lexptr=='=') { - yylval.nodetypeval=Node_assign_minus; - lexptr++; - return ASSIGNOP; - } - if(*lexptr=='-') { - yylval.nodetypeval=Node_illegal; - lexptr++; - return DECREMENT; - } - /* JF I think space tab comma and newline are the legal places for - a UMINUS. Have I missed any? */ - if((!isdigit(*lexptr) && *lexptr!='.') || (lexptr>lexptr_begin+1 && - !index(" \t,\n",lexptr[-2]))) { - /* set node type to ILLEGAL because the action should set it to - the right thing */ - yylval.nodetypeval = Node_illegal; - return c; - } - /* FALL through into number code */ - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - case '.': - /* It's a number */ - if(c=='-') namelen=1; - else namelen=0; - for (; (c = tokstart[namelen]) != '\0'; namelen++) { - switch (c) { - case '.': - if (seen_point) - goto got_number; - ++seen_point; - break; - case 'e': - case 'E': - if (seen_e) - goto got_number; - ++seen_e; - if (tokstart[namelen+1] == '-' || tokstart[namelen+1] == '+') - namelen++; - break; - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - break; - default: - goto got_number; - } - } - -got_number: - lexptr = tokstart + namelen; - yylval.fval = atof(tokstart); - ++want_concat_token; - return NUMBER; - - case '&': - if(*lexptr=='&') { - yylval.nodetypeval=Node_and; - lexptr++; - return LEX_AND; - } - return ERROR; - - case '|': - if(want_redirect) { - lexptr++; - yylval.nodetypeval = Node_redirect_pipe; - return REDIRECT_OP; - } - if(*lexptr=='|') { - yylval.nodetypeval=Node_or; - lexptr++; - return LEX_OR; - } - return ERROR; - } - - if (!isalpha(c)) { - yyerror ("Invalid char '%c' in expression\n", c); - return ERROR; - } - - /* its some type of name-type-thing. Find its length */ - for (namelen = 0; is_identchar(tokstart[namelen]); namelen++) - ; - - - /* See if it is a special token. */ - for (toktab = tokentab; toktab->operator != NULL; toktab++) { - if(*tokstart==toktab->operator[0] && - !strncmp(tokstart,toktab->operator,namelen) && - toktab->operator[namelen]=='\0') { - lexptr=tokstart+namelen; - if(toktab->class == LEX_BUILTIN) - yylval.ptrval = toktab->ptr; - else - yylval.nodetypeval = toktab->value; - return toktab->class; - } - } - - /* It's a name. See how long it is. */ - yylval.sval = tokstart; - lexptr = tokstart+namelen; - ++want_concat_token; - return NAME; -} - -/*VARARGS1*/ -yyerror (mesg,a1,a2,a3,a4,a5,a6,a7,a8) - char *mesg; -{ - register char *ptr,*beg; - - /* Find the current line in the input file */ - if(!lexptr) { - beg="(END OF FILE)"; - ptr=beg+13; - } else { - if (*lexptr == '\n' && lexptr!=lexptr_begin) - --lexptr; - for (beg = lexptr;beg!=lexptr_begin && *beg != '\n';--beg) - ; - for (ptr = lexptr;*ptr && *ptr != '\n';ptr++) /*jfw: NL isn't guaranteed*/ - ; - if(beg!=lexptr_begin) - beg++; - } - fprintf (stderr, "Error near line %d, '%.*s'\n",lineno, ptr-beg, beg); - /* figure out line number, etc. later */ - fprintf (stderr, mesg, a1, a2, a3, a4, a5, a6, a7, a8); - fprintf (stderr,"\n"); - exit (1); -} - -/* Parse a C escape sequence. STRING_PTR points to a variable - containing a pointer to the string to parse. That pointer - is updated past the characters we use. The value of the - escape sequence is returned. - - A negative value means the sequence \ newline was seen, - which is supposed to be equivalent to nothing at all. - - If \ is followed by a null character, we return a negative - value and leave the string pointer pointing at the null character. - - If \ is followed by 000, we return 0 and leave the string pointer - after the zeros. A value of 0 does not mean end of string. */ - -static int -parse_escape (string_ptr) - char **string_ptr; -{ - register int c = *(*string_ptr)++; - switch (c) - { - case 'a': - return '\a'; - case 'b': - return '\b'; - case 'e': - return 033; - case 'f': - return '\f'; - case 'n': - return '\n'; - case 'r': - return '\r'; - case 't': - return '\t'; - case 'v': - return '\v'; - case '\n': - return -2; - case 0: - (*string_ptr)--; - return 0; - case '^': - c = *(*string_ptr)++; - if (c == '\\') - c = parse_escape (string_ptr); - if (c == '?') - return 0177; - return (c & 0200) | (c & 037); - - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - { - register int i = c - '0'; - register int count = 0; - while (++count < 3) - { - if ((c = *(*string_ptr)++) >= '0' && c <= '7') - { - i *= 8; - i += c - '0'; - } - else - { - (*string_ptr)--; - break; - } - } - return i; - } - default: - return c; - } -} @@ -4,6 +4,38 @@ * Written by Paul Rubin, August 1986 * * $Log: awk.y,v $ + * Revision 1.30 89/03/24 15:52:15 david + * add getline production to rexp + * merge HASHNODE with NODE + * + * Revision 1.29 89/03/21 11:57:49 david + * substantial cleanup and code movement from awk1.c + * this and previous two changes represent a major reworking of the grammar + * to fix a number of bugs; two general problems were in I/O redirection + * specifications and in the handling of whitespace -- the general strategies + * in fixing these problems were to define some more specific grammatical + * elements (e.g. simp_exp and rexp) and use these in particular places; + * also got rid of want_concat and want_redirect kludges + * + * Revision 1.28 89/03/15 21:58:01 david + * more grammar changes (explanation to come) plus changes from Arnold: + * new case stuff added and old removed + * tolower and toupper added + * fix vararg stuff + * add new escape sequences + * fix bug in reporting unterminated regexps + * fix to allow -f - + * /dev/fd/N etc special files added + * + * Revision 1.27 89/03/02 21:10:09 david + * intermediate step in major revision -- description later + * + * Revision 1.26 89/01/18 20:39:58 david + * allow regexp && regexp as pattern and get rid of remaining reduce/reduce conflicts + * + * Revision 1.25 89/01/04 21:53:21 david + * purge obstack remnants + * * Revision 1.24 88/12/15 12:52:58 david * changes from Jay to get rid of some reduce/reduce conflicts - some remain * @@ -145,41 +177,36 @@ anyone else from sharing it farther. Help stamp out software hoarding! #include "awk.h" -static int yylex (); +extern void msg(); +extern struct re_pattern_buffer *mk_re_parse(); -/* - * The following variable is used for a very sickening thing. - * The awk language uses white space as the string concatenation - * operator, but having a white space token that would have to appear - * everywhere in all the grammar rules would be unbearable. - * It turns out we can return CONCAT_OP exactly when there really - * is one, just from knowing what kinds of other tokens it can appear - * between (namely, constants, variables, or close parentheses). - * This is because concatenation has the lowest priority of all - * operators. want_concat_token is used to remember that something - * that could be the left side of a concat has just been returned. - * - * If anyone knows a cleaner way to do this (don't look at the Un*x - * code to find one, though), please suggest it. - */ -static int want_concat_token; +NODE *node(); +NODE *lookup(); +NODE *install(); + +static NODE *snode(); +static NODE *mkrangenode(); +static FILE *pathopen(); +static NODE *make_for_loop(); +static NODE *append_right(); +static void func_install(); +static NODE *make_param(); +static int hashf(); +static void pop_params(); +static void pop_var(); +static int yylex (); +static void yyerror(); -/* Two more horrible kludges. The same comment applies to these two too */ static int want_regexp; /* lexical scanning kludge */ -static int want_redirect; /* similarly */ -int lineno = 1; /* for error msgs */ - -/* During parsing of a gawk program, the pointer to the next character - is in this variable. */ -char *lexptr; /* moved it up here */ -char *lexptr_begin; /* for error msgs */ -char *func_def; +static int lineno = 1; /* for error msgs */ +static char *lexptr; /* pointer to next char during parsing */ +static char *lexptr_begin; /* keep track of where we were for error msgs */ +static int curinfile = -1; /* index into sourcefiles[] */ + extern int errcount; extern NODE *begin_block; extern NODE *end_block; -extern struct re_pattern_buffer *mk_re_parse(); extern int param_counter; -struct re_pattern_buffer *rp; %} %union { @@ -192,16 +219,22 @@ struct re_pattern_buffer *rp; } %type <nodeval> function_prologue function_body -%type <nodeval> exp sub_exp start program rule pattern expression_list -%type <nodeval> action variable redirection param_list opt_expression_list +%type <nodeval> rexp exp start program rule simp_exp +%type <nodeval> simp_pattern pattern +%type <nodeval> action variable param_list +%type <nodeval> rexpression_list opt_rexpression_list +%type <nodeval> expression_list opt_expression_list %type <nodeval> statements statement if_statement opt_param_list -%type <nodeval> opt_exp opt_variable regexp -%type <nodetypeval> whitespace r_paren +%type <nodeval> opt_exp opt_variable regexp p_regexp +%type <nodeval> input_redir output_redir +%type <nodetypeval> r_paren comma nls opt_nls print -%token <sval> NAME REGEXP YSTRING +%type <sval> func_name +%token <sval> FUNC_CALL NAME REGEXP YSTRING %token <lval> ERROR INCDEC %token <fval> NUMBER -%token <nodetypeval> ASSIGNOP RELOP MATCHOP NEWLINE REDIRECT_OP CONCAT_OP +%token <nodetypeval> RELOP APPEND_OP +%token <nodetypeval> ASSIGNOP MATCHOP NEWLINE CONCAT_OP %token <nodetypeval> LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE %token <nodetypeval> LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE %token <nodetypeval> LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION @@ -217,14 +250,19 @@ struct re_pattern_buffer *rp; %right '?' ':' %left LEX_OR %left LEX_AND -%left LEX_IN +%left LEX_GETLINE +%left NUMBER +%left FUNC_CALL LEX_SUB LEX_BUILTIN LEX_MATCH %nonassoc MATCHOP -%nonassoc RELOP -%nonassoc REDIRECT_OP +%nonassoc RELOP '<' '>' '|' APPEND_OP +%left NAME +%nonassoc LEX_IN +%left YSTRING +%left '(' ')' %left CONCAT_OP %left '+' '-' %left '*' '/' '%' -%right UNARY +%right '!' UNARY %right '^' %left INCREMENT DECREMENT %left '$' @@ -232,7 +270,7 @@ struct re_pattern_buffer *rp; %% start - : opt_newlines program + : opt_nls program { expression_value = $2; } ; @@ -246,7 +284,7 @@ program yyerrok; } | program rule - /* cons the rule onto the tail of list */ + /* add the rule to the tail of list */ { if ($2 == NULL) $$ = $1; @@ -300,6 +338,8 @@ rule } | pattern action { $$ = node ($1, Node_rule_node, $2); yyerrok; } + | action + { $$ = node ((NODE *)NULL, Node_rule_node, $1); yyerrok; } | pattern statement_term { if($1) $$ = node ($1, Node_rule_node, (NODE *)NULL); yyerrok; } | function_prologue function_body @@ -309,65 +349,81 @@ rule yyerrok; } ; + +func_name + : NAME + | FUNC_CALL + ; function_prologue : LEX_FUNCTION { param_counter = 0; } - NAME whitespace '(' opt_param_list r_paren whitespace + func_name '(' opt_param_list r_paren { - $$ = append_right(make_param($3), $6); + $$ = append_right(make_param($3), $5); } ; function_body - : l_brace statements r_brace statement_term + : l_brace statements r_brace + { $$ = $2; } + ; + + +simp_pattern + : exp + | p_regexp + | p_regexp LEX_AND simp_pattern + { $$ = node ($1, Node_and, $3); } + | p_regexp LEX_OR simp_pattern + { $$ = node ($1, Node_or, $3); } + | '!' p_regexp %prec UNARY + { $$ = node ($2, Node_not,(NODE *) NULL); } + | '(' p_regexp r_paren { $$ = $2; } ; pattern - : /* empty */ - { $$ = NULL; } - | sub_exp - { $$ = $1; } - | regexp + : simp_pattern + | simp_pattern comma simp_pattern + { $$ = mkrangenode ( node($1, Node_cond_pair, $3) ); } + ; + +p_regexp + : regexp { $$ = node( node(make_number((AWKNUM)0),Node_field_spec,(NODE*)NULL), Node_match, $1); } - | pattern LEX_AND pattern - { $$ = node ($1, Node_and, $3); } - | pattern LEX_OR pattern - { $$ = node ($1, Node_or, $3); } - | '!' pattern %prec UNARY - { $$ = node ($2, Node_not,(NODE *) NULL); } - | '(' pattern r_paren - { $$ = $2; } - | pattern ',' pattern - { $$ = mkrangenode ( node($1, Node_cond_pair, $3) ); } ; regexp - /* In this rule, want_regexp tells yylex that the next thing - is a regexp so it should read up to the closing slash. */ + /* + * In this rule, want_regexp tells yylex that the next thing + * is a regexp so it should read up to the closing slash. + */ : '/' { ++want_regexp; } REGEXP '/' - { want_regexp = 0; - rp = mk_re_parse($3); - $$ = node((NODE *)NULL, Node_regex, (NODE *)rp); + { + want_regexp = 0; + $$ = node((NODE *)NULL,Node_regex,(NODE *)mk_re_parse($3, 0)); + $$ -> re_case = 0; + emalloc ($$ -> re_text, char *, strlen($3)+1, "regexp"); + strcpy ($$ -> re_text, $3); } ; action - : l_brace r_brace + : l_brace r_brace opt_semi { /* empty actions are different from missing actions */ $$ = node ((NODE *) NULL, Node_illegal, (NODE *) NULL); } - | l_brace statements r_brace + | l_brace statements r_brace opt_semi { $$ = $2 ; } ; @@ -386,68 +442,48 @@ statements ; statement_term - : NEWLINE opt_newlines - { $<nodetypeval>$ = Node_illegal; want_redirect = 0; } - | semi_colon opt_newlines - { $<nodetypeval>$ = Node_illegal; want_redirect = 0; } - ; - -whitespace - : /* blank */ - { $<nodetypeval>$ = Node_illegal; } - | CONCAT_OP - { $<nodetypeval>$ = Node_illegal; } - | NEWLINE + : nls { $<nodetypeval>$ = Node_illegal; } - | whitespace CONCAT_OP - { $<nodetypeval>$ = Node_illegal; } - | whitespace NEWLINE + | semi opt_nls { $<nodetypeval>$ = Node_illegal; } ; + statement - : semi_colon opt_newlines + : semi opt_nls { $$ = NULL; } - | l_brace statements r_brace whitespace + | l_brace statements r_brace { $$ = $2; } | if_statement { $$ = $1; } - | LEX_WHILE '(' exp r_paren whitespace statement + | LEX_WHILE '(' exp r_paren opt_nls statement { $$ = node ($3, Node_K_while, $6); } - | LEX_DO whitespace statement LEX_WHILE '(' exp r_paren whitespace + | LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls { $$ = node ($6, Node_K_do, $3); } - | LEX_FOR '(' opt_exp semi_colon exp semi_colon opt_exp r_paren whitespace statement - { $$ = node ($10, Node_K_for, (NODE *)make_for_loop ($3, $5, $7)); } - | LEX_FOR '(' opt_exp semi_colon semi_colon opt_exp r_paren whitespace statement - { $$ = node ($9, Node_K_for, (NODE *)make_for_loop ($3, (NODE *)NULL, $6)); } - | LEX_FOR '(' NAME CONCAT_OP LEX_IN NAME r_paren whitespace statement - { - $$ = node ($9, Node_K_arrayfor, - make_for_loop(variable($3), - (NODE *)NULL, variable($6))); - } + | LEX_FOR '(' NAME LEX_IN NAME r_paren opt_nls statement + { + $$ = node ($8, Node_K_arrayfor, make_for_loop(variable($3), + (NODE *)NULL, variable($5))); + } + | LEX_FOR '(' opt_exp semi exp semi opt_exp r_paren opt_nls statement + { + $$ = node($10, Node_K_for, (NODE *)make_for_loop($3, $5, $7)); + } + | LEX_FOR '(' opt_exp semi semi opt_exp r_paren opt_nls statement + { + $$ = node ($9, Node_K_for, + (NODE *)make_for_loop($3, (NODE *)NULL, $6)); + } | LEX_BREAK statement_term /* for break, maybe we'll have to remember where to break to */ { $$ = node ((NODE *)NULL, Node_K_break, (NODE *)NULL); } | LEX_CONTINUE statement_term /* similarly */ { $$ = node ((NODE *)NULL, Node_K_continue, (NODE *)NULL); } - | LEX_PRINT - { ++want_redirect; } - opt_expression_list redirection statement_term - { $$ = node ($3, Node_K_print, $4); } - | LEX_PRINT '(' opt_expression_list r_paren - { ++want_redirect; want_concat_token = 0; } - redirection statement_term - { $$ = node ($3, Node_K_print, $6); } - | LEX_PRINTF - { ++want_redirect; } - opt_expression_list redirection statement_term - { $$ = node ($3, Node_K_printf, $4); } - | LEX_PRINTF '(' opt_expression_list r_paren - { ++want_redirect; want_concat_token = 0; } - redirection statement_term - { $$ = node ($3, Node_K_printf, $6); } + | print '(' expression_list r_paren output_redir statement_term + { $$ = node ($3, $1, $5); } + | print opt_rexpression_list output_redir statement_term + { $$ = node ($2, $1, $3); } | LEX_NEXT statement_term { $$ = node ((NODE *)NULL, Node_K_next, (NODE *)NULL); } | LEX_EXIT opt_exp statement_term @@ -460,29 +496,53 @@ statement { $$ = $1; } ; +print + : LEX_PRINT + | LEX_PRINTF + ; + if_statement - : LEX_IF '(' exp r_paren whitespace statement - { $$ = node ($3, Node_K_if, - node ($6, Node_if_branches, (NODE *)NULL)); } - | LEX_IF '(' exp r_paren whitespace statement - LEX_ELSE whitespace statement + : LEX_IF '(' exp r_paren opt_nls statement + { + $$ = node($3, Node_K_if, + node($6, Node_if_branches, (NODE *)NULL)); + } + | LEX_IF '(' exp r_paren opt_nls statement + LEX_ELSE opt_nls statement { $$ = node ($3, Node_K_if, node ($6, Node_if_branches, $9)); } ; -opt_newlines +nls + : NEWLINE + { $$ = NULL; } + | nls NEWLINE + { $$ = NULL; } + ; + +opt_nls : /* empty */ - | opt_newlines NEWLINE - { $<nodetypeval>$ = Node_illegal; } + { $$ = NULL; } + | nls + { $$ = NULL; } ; -redirection +input_redir : /* empty */ - { want_redirect = 0; $$ = NULL; } - | REDIRECT_OP - { want_redirect = 0; } - exp - { $$ = node ($3, $1, (NODE *)NULL); } + { $$ = NULL; } + | '<' simp_exp + { $$ = node ($2, Node_redirect_input, (NODE *)NULL); } + ; + +output_redir + : /* empty */ + { $$ = NULL; } + | '>' simp_exp + { $$ = node ($2, Node_redirect_output, (NODE *)NULL); } + | APPEND_OP simp_exp + { $$ = node ($2, Node_redirect_append, (NODE *)NULL); } + | '|' simp_exp + { $$ = node ($2, Node_redirect_pipe, (NODE *)NULL); } ; opt_param_list @@ -494,27 +554,45 @@ opt_param_list param_list : NAME - { - $$ = make_param($1); - } - | param_list ',' NAME - { - $$ = append_right($1, make_param($3)); - yyerrok; - } + { $$ = make_param($1); } + | param_list comma NAME + { $$ = append_right($1, make_param($3)); yyerrok; } | error { $$ = NULL; } | param_list error - | param_list ',' error + | param_list comma error ; /* optional expression, as in for loop */ opt_exp : /* empty */ - { $$ = NULL; /* node(NULL, Node_builtin, NULL); */ } + { $$ = NULL; } | exp ; +opt_rexpression_list + : /* empty */ + { $$ = NULL; } + | rexpression_list + { $$ = $1; } + ; + +rexpression_list + : rexp + { $$ = node ($1, Node_expression_list, (NODE *)NULL); } + | rexpression_list comma rexp + { + $$ = append_right($1, + node( $3, Node_expression_list, (NODE *)NULL)); + yyerrok; + } + | error + { $$ = NULL; } + | rexpression_list error + | rexpression_list error rexp + | rexpression_list comma error + ; + opt_expression_list : /* empty */ { $$ = NULL; } @@ -525,7 +603,7 @@ opt_expression_list expression_list : exp { $$ = node ($1, Node_expression_list, (NODE *)NULL); } - | expression_list ',' exp + | expression_list comma exp { $$ = append_right($1, node( $3, Node_expression_list, (NODE *)NULL)); @@ -535,64 +613,93 @@ expression_list { $$ = NULL; } | expression_list error | expression_list error exp - | expression_list ',' error + | expression_list comma error ; /* Expressions, not including the comma operator. */ -exp : sub_exp - | exp LEX_AND whitespace exp - { $$ = node ($1, Node_and, $4); } - | exp LEX_OR whitespace exp - { $$ = node ($1, Node_or, $4); } - | '!' exp %prec UNARY - { $$ = node ($2, Node_not,(NODE *) NULL); } - | '(' exp r_paren - { $$ = $2; } - ; - -sub_exp : LEX_BUILTIN '(' opt_expression_list r_paren - { $$ = snode ($3, Node_builtin, $1); } - | LEX_BUILTIN - { $$ = snode ((NODE *)NULL, Node_builtin, $1); } - | exp MATCHOP regexp - { $$ = node ($1, $2, $3); } - | exp MATCHOP exp - { $$ = node ($1, $2, $3); } - | exp CONCAT_OP LEX_IN NAME - { $$ = node (variable($4), Node_in_array, $1); } - | '(' expression_list r_paren CONCAT_OP LEX_IN NAME - { $$ = node (variable($6), Node_in_array, $2); } - | LEX_SUB '(' regexp ',' expression_list r_paren - { $$ = node($5, $1, $3); } - | LEX_SUB '(' exp ',' expression_list r_paren - { $$ = node($5, $1, $3); } - | LEX_MATCH '(' exp ',' regexp r_paren - { $$ = node($3, $1, $5); } - | LEX_MATCH '(' exp ',' exp r_paren - { $$ = node($3, $1, $5); } - | LEX_GETLINE - {++want_redirect; } - opt_variable redirection - { - $$ = node ($3, Node_K_getline, $4); - } +exp : variable ASSIGNOP exp + { $$ = node ($1, $2, $3); } + | '(' expression_list r_paren LEX_IN NAME + { $$ = node (variable($5), Node_in_array, $2); } | exp '|' LEX_GETLINE opt_variable { $$ = node ($4, Node_K_getline, node ($1, Node_redirect_pipein, (NODE *)NULL)); } + | LEX_GETLINE opt_variable input_redir + { + $$ = node ($2, Node_K_getline, $3); + } + | exp LEX_AND exp + { $$ = node ($1, Node_and, $3); } + | exp LEX_OR exp + { $$ = node ($1, Node_or, $3); } + | exp MATCHOP regexp + { $$ = node ($1, $2, $3); } + | exp MATCHOP exp + { $$ = node ($1, $2, $3); } + | exp LEX_IN NAME + { $$ = node (variable($3), Node_in_array, $1); } | exp RELOP exp { $$ = node ($1, $2, $3); } + | exp '<' exp + { $$ = node ($1, Node_less, $3); } + | exp '>' exp + { $$ = node ($1, Node_greater, $3); } | exp '?' exp ':' exp - { $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5)); } - | NAME '(' opt_expression_list r_paren + { $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5));} + | exp exp %prec CONCAT_OP + { $$ = node ($1, Node_concat, $2); } + | simp_exp + ; + +rexp + : variable ASSIGNOP rexp + { $$ = node ($1, $2, $3); } + | rexp LEX_AND rexp + { $$ = node ($1, Node_and, $3); } + | rexp LEX_OR rexp + { $$ = node ($1, Node_or, $3); } + | LEX_GETLINE opt_variable input_redir { - $$ = node ($3, Node_func_call, make_string($1, strlen($1))); + $$ = node ($2, Node_K_getline, $3); } - | '-' exp %prec UNARY - { $$ = node ($2, Node_unary_minus, (NODE *)NULL); } - | '+' exp %prec UNARY + | rexp MATCHOP regexp + { $$ = node ($1, $2, $3); } + | rexp MATCHOP rexp + { $$ = node ($1, $2, $3); } + | rexp LEX_IN NAME + { $$ = node (variable($3), Node_in_array, $1); } + | rexp RELOP rexp + { $$ = node ($1, $2, $3); } + | rexp '?' rexp ':' rexp + { $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5));} + | rexp rexp %prec CONCAT_OP + { $$ = node ($1, Node_concat, $2); } + | simp_exp + ; + +simp_exp + : '!' simp_exp %prec UNARY + { $$ = node ($2, Node_not,(NODE *) NULL); } + | '(' exp r_paren { $$ = $2; } + | LEX_BUILTIN '(' opt_expression_list r_paren + { $$ = snode ($3, Node_builtin, $1); } + | LEX_BUILTIN + { $$ = snode ((NODE *)NULL, Node_builtin, $1); } + | LEX_SUB '(' regexp comma expression_list r_paren + { $$ = node($5, $1, $3); } + | LEX_SUB '(' exp comma expression_list r_paren + { $$ = node($5, $1, $3); } + | LEX_MATCH '(' exp comma regexp r_paren + { $$ = node($3, $1, $5); } + | LEX_MATCH '(' exp comma exp r_paren + { $$ = node($3, $1, $5); } + | FUNC_CALL '(' opt_expression_list r_paren + { + $$ = node ($3, Node_func_call, make_string($1, strlen($1))); + } | INCREMENT variable { $$ = node ($2, Node_preincrement, (NODE *)NULL); } | DECREMENT variable @@ -608,24 +715,23 @@ sub_exp : LEX_BUILTIN '(' opt_expression_list r_paren | YSTRING { $$ = make_string ($1, -1); } -/* Binary operators in order of decreasing precedence. */ - | exp '^' exp + /* Binary operators in order of decreasing precedence. */ + | simp_exp '^' simp_exp { $$ = node ($1, Node_exp, $3); } - | exp '*' exp + | simp_exp '*' simp_exp { $$ = node ($1, Node_times, $3); } - | exp '/' exp + | simp_exp '/' simp_exp { $$ = node ($1, Node_quotient, $3); } - | exp '%' exp + | simp_exp '%' simp_exp { $$ = node ($1, Node_mod, $3); } - | exp '+' exp + | simp_exp '+' simp_exp { $$ = node ($1, Node_plus, $3); } - | exp '-' exp + | simp_exp '-' simp_exp { $$ = node ($1, Node_minus, $3); } - /* Empty operator. See yylex for disgusting details. */ - | exp CONCAT_OP exp - { $$ = node ($1, Node_concat, $3); } - | variable ASSIGNOP exp - { $$ = node ($1, $2, $3); } + | '-' simp_exp %prec UNARY + { $$ = node ($2, Node_unary_minus, (NODE *)NULL); } + | '+' simp_exp %prec UNARY + { $$ = $2; } ; opt_variable @@ -639,42 +745,52 @@ variable { $$ = variable ($1); } | NAME '[' expression_list ']' { $$ = node (variable($1), Node_subscript, $3); } - | '$' exp + | '$' simp_exp { $$ = node ($2, Node_field_spec, (NODE *)NULL); } ; l_brace - : '{' whitespace + : '{' opt_nls ; r_brace - : '}' { yyerrok; } + : '}' opt_nls { yyerrok; } ; r_paren - : ')' { $<nodetypeval>$ = Node_illegal; yyerrok; } + : ')' { $<nodetypeval>$ = Node_illegal; yyerrok; } ; -semi_colon +opt_semi + : /* empty */ + | semi + ; + +semi : ';' { yyerrok; } ; +comma : ',' opt_nls { $<nodetypeval>$ = Node_illegal; yyerrok; } + ; + %% struct token { - char *operator; - NODETYPE value; - int class; - NODE *(*ptr) (); + char *operator; /* text to match */ + NODETYPE value; /* node type */ + int class; /* lexical class */ + short nostrict; /* ignore if in strict compatibility mode */ + NODE *(*ptr) (); /* function that implements this keyword */ }; #define NULL 0 -NODE *do_exp(), *do_getline(), *do_index(), *do_length(), +extern NODE + *do_exp(), *do_getline(), *do_index(), *do_length(), *do_sqrt(), *do_log(), *do_sprintf(), *do_substr(), *do_split(), *do_system(), *do_int(), *do_close(), *do_atan2(), *do_sin(), *do_cos(), *do_rand(), - *do_srand(), *do_match(); + *do_srand(), *do_match(), *do_tolower(), *do_toupper(); /* Special functions for debugging */ #ifdef DEBUG @@ -684,53 +800,56 @@ NODE *do_prvars(), *do_bp(); /* Tokentab is sorted ascii ascending order, so it can be binary searched. */ static struct token tokentab[] = { - { "BEGIN", Node_illegal, LEX_BEGIN, 0 }, - { "END", Node_illegal, LEX_END, 0 }, - { "atan2", Node_builtin, LEX_BUILTIN, do_atan2 }, + { "BEGIN", Node_illegal, LEX_BEGIN, 0, 0 }, + { "END", Node_illegal, LEX_END, 0, 0 }, + { "atan2", Node_builtin, LEX_BUILTIN, 0, do_atan2 }, #ifdef DEBUG - { "bp", Node_builtin, LEX_BUILTIN, do_bp }, + { "bp", Node_builtin, LEX_BUILTIN, 0, do_bp }, #endif - { "break", Node_K_break, LEX_BREAK, 0 }, - { "close", Node_builtin, LEX_BUILTIN, do_close }, - { "continue", Node_K_continue, LEX_CONTINUE, 0 }, - { "cos", Node_builtin, LEX_BUILTIN, do_cos }, - { "delete", Node_K_delete, LEX_DELETE, 0 }, - { "do", Node_K_do, LEX_DO, 0 }, - { "else", Node_illegal, LEX_ELSE, 0 }, - { "exit", Node_K_exit, LEX_EXIT, 0 }, - { "exp", Node_builtin, LEX_BUILTIN, do_exp }, - { "for", Node_K_for, LEX_FOR, 0 }, - { "func", Node_K_function, LEX_FUNCTION, 0 }, - { "function", Node_K_function, LEX_FUNCTION, 0 }, - { "getline", Node_K_getline, LEX_GETLINE, 0 }, - { "gsub", Node_gsub, LEX_SUB, 0 }, - { "if", Node_K_if, LEX_IF, 0 }, - { "in", Node_illegal, LEX_IN, 0 }, - { "index", Node_builtin, LEX_BUILTIN, do_index }, - { "int", Node_builtin, LEX_BUILTIN, do_int }, - { "length", Node_builtin, LEX_BUILTIN, do_length }, - { "log", Node_builtin, LEX_BUILTIN, do_log }, - { "match", Node_K_match, LEX_MATCH, 0 }, - { "next", Node_K_next, LEX_NEXT, 0 }, - { "print", Node_K_print, LEX_PRINT, 0 }, - { "printf", Node_K_printf, LEX_PRINTF, 0 }, + { "break", Node_K_break, LEX_BREAK, 0, 0 }, + { "close", Node_builtin, LEX_BUILTIN, 0, do_close }, + { "continue", Node_K_continue, LEX_CONTINUE, 0, 0 }, + { "cos", Node_builtin, LEX_BUILTIN, 0, do_cos }, + { "delete", Node_K_delete, LEX_DELETE, 0, 0 }, + { "do", Node_K_do, LEX_DO, 0, 0 }, + { "else", Node_illegal, LEX_ELSE, 0, 0 }, + { "exit", Node_K_exit, LEX_EXIT, 0, 0 }, + { "exp", Node_builtin, LEX_BUILTIN, 0, do_exp }, + { "for", Node_K_for, LEX_FOR, 0, 0 }, + { "func", Node_K_function, LEX_FUNCTION, 0, 0 }, + { "function", Node_K_function, LEX_FUNCTION, 0, 0 }, + { "getline", Node_K_getline, LEX_GETLINE, 0, 0 }, + { "gsub", Node_gsub, LEX_SUB, 0, 0 }, + { "if", Node_K_if, LEX_IF, 0, 0 }, + { "in", Node_illegal, LEX_IN, 0, 0 }, + { "index", Node_builtin, LEX_BUILTIN, 0, do_index }, + { "int", Node_builtin, LEX_BUILTIN, 0, do_int }, + { "length", Node_builtin, LEX_BUILTIN, 0, do_length }, + { "log", Node_builtin, LEX_BUILTIN, 0, do_log }, + { "match", Node_K_match, LEX_MATCH, 0, 0 }, + { "next", Node_K_next, LEX_NEXT, 0, 0 }, + { "print", Node_K_print, LEX_PRINT, 0, 0 }, + { "printf", Node_K_printf, LEX_PRINTF, 0, 0 }, #ifdef DEBUG - { "prvars", Node_builtin, LEX_BUILTIN, do_prvars }, + { "prvars", Node_builtin, LEX_BUILTIN, 0, do_prvars }, #endif - { "rand", Node_builtin, LEX_BUILTIN, do_rand }, - { "return", Node_K_return, LEX_RETURN, 0 }, - { "sin", Node_builtin, LEX_BUILTIN, do_sin }, - { "split", Node_builtin, LEX_BUILTIN, do_split }, - { "sprintf", Node_builtin, LEX_BUILTIN, do_sprintf }, - { "sqrt", Node_builtin, LEX_BUILTIN, do_sqrt }, - { "srand", Node_builtin, LEX_BUILTIN, do_srand }, - { "sub", Node_sub, LEX_SUB, 0 }, - { "substr", Node_builtin, LEX_BUILTIN, do_substr }, - { "system", Node_builtin, LEX_BUILTIN, do_system }, - { "while", Node_K_while, LEX_WHILE, 0 }, + { "rand", Node_builtin, LEX_BUILTIN, 0, do_rand }, + { "return", Node_K_return, LEX_RETURN, 0, 0 }, + { "sin", Node_builtin, LEX_BUILTIN, 0, do_sin }, + { "split", Node_builtin, LEX_BUILTIN, 0, do_split }, + { "sprintf", Node_builtin, LEX_BUILTIN, 0, do_sprintf }, + { "sqrt", Node_builtin, LEX_BUILTIN, 0, do_sqrt }, + { "srand", Node_builtin, LEX_BUILTIN, 0, do_srand }, + { "sub", Node_sub, LEX_SUB, 0, 0 }, + { "substr", Node_builtin, LEX_BUILTIN, 0, do_substr }, + { "system", Node_builtin, LEX_BUILTIN, 0, do_system }, + { "tolower", Node_builtin, LEX_BUILTIN, 1, do_tolower }, + { "toupper", Node_builtin, LEX_BUILTIN, 1, do_toupper }, + { "while", Node_K_while, LEX_WHILE, 0, 0 }, }; /* VARARGS0 */ +static void yyerror(va_alist) va_dcl { @@ -744,6 +863,9 @@ va_dcl errcount++; va_start(args); mesg = va_arg(args, char *); + if (! list) + a1 = va_arg(args, char *); + va_end(args); if (mesg || !list) { /* Find the current line in the input file */ if (!lexptr) { @@ -771,33 +893,26 @@ va_dcl putc(' ', stderr); if (mesg) { vfprintf(stderr, mesg, args); - va_end(args); putc('\n', stderr); exit(1); } else { - a1 = va_arg(args, char *); if (a1) { fputs("expecting: ", stderr); fputs(a1, stderr); list = 1; - va_end(args); return; } } - va_end(args); return; } - a1 = va_arg(args, char *); if (a1) { fputs(" or ", stderr); fputs(a1, stderr); - va_end(args); putc('\n', stderr); return; } putc('\n', stderr); list = 0; - va_end(args); } /* @@ -820,8 +935,14 @@ parse_escape(string_ptr) char **string_ptr; { register int c = *(*string_ptr)++; + register int i; switch (c) { + case 'a': + if (strict) + goto def; + else + return BELL; case 'b': return '\b'; case 'f': @@ -833,7 +954,10 @@ char **string_ptr; case 't': return '\t'; case 'v': - return '\v'; + if (strict) + goto def; + else + return '\v'; case '\n': return -2; case 0: @@ -862,7 +986,27 @@ char **string_ptr; } return i; } + case 'x': + if (strict) + goto def; + + i = 0; + while (1) { + if (isxdigit((c = *(*string_ptr)++))) { + if (isdigit(c)) + i += c - '0'; + else if (isupper(c)) + i += c - 'A' + 10; + else + i += c - 'a' + 10; + } else { + (*string_ptr)--; + break; + } + } + return i; default: + def: return c; } } @@ -874,8 +1018,6 @@ char **string_ptr; * the file name is made available in an external variable. */ -int curinfile = -1; - static int yylex() { @@ -884,16 +1026,13 @@ yylex() register char *tokstart; register struct token *tokptr; char *tokkey; - extern double atof(); /* know what happens if you forget this? */ static did_newline = 0; /* the grammar insists that actions end * with newlines. This was easier than * hacking the grammar. */ - int do_concat; int seen_e = 0; /* These are for numbers */ int seen_point = 0; extern char **sourcefile; extern int tempsource, numfiles; - extern FILE *pathopen(); static int file_opened = 0; static FILE *fin; static char cbuf[BUFSIZ]; @@ -964,13 +1103,12 @@ retry: case '\n': lineno++; case '\0': + lexptr--; /* so error messages work */ yyerror("unterminated regexp"); return ERROR; } } } - do_concat = want_concat_token; - want_concat_token = 0; if (*lexptr == '\n') { lexptr++; @@ -978,20 +1116,6 @@ retry: return NEWLINE; } - /* - * if lexptr is at white space between two terminal tokens or parens, - * it is a concatenation operator. - */ - if (do_concat && (*lexptr == ' ' || *lexptr == '\t')) { - while (*lexptr == ' ' || *lexptr == '\t') - lexptr++; - if (isalnum(*lexptr) || *lexptr == '_' || *lexptr == '\"' || - *lexptr == '(' || *lexptr == '.' || *lexptr == '$' || - (*lexptr == '+' && *(lexptr+1) == '+') || - (*lexptr == '-' && *(lexptr+1) == '-')) - /* the '.' is for decimal pt */ - return CONCAT_OP; - } while (*lexptr == ' ' || *lexptr == '\t') lexptr++; @@ -1014,14 +1138,11 @@ retry: if (*lexptr == '\n') { lineno++; lexptr++; - want_concat_token = do_concat; goto retry; } else break; case ')': case ']': - ++want_concat_token; - /* fall through */ case '(': case '[': case '$': @@ -1038,11 +1159,6 @@ retry: case '{': case ',': - while (isspace(*lexptr)) { - if (*lexptr == '\n') - lineno++; - lexptr++; - } yylval.nodetypeval = Node_illegal; return c; @@ -1115,10 +1231,6 @@ retry: } if (*lexptr == '~') { yylval.nodetypeval = Node_nomatch; - if (! strict && lexptr[1] == '~') { - yylval.nodetypeval = Node_case_nomatch; - lexptr++; - } lexptr++; return MATCHOP; } @@ -1126,17 +1238,13 @@ retry: return c; case '<': - if (want_redirect) { - yylval.nodetypeval = Node_redirect_input; - return REDIRECT_OP; - } if (*lexptr == '=') { yylval.nodetypeval = Node_leq; lexptr++; return RELOP; } yylval.nodetypeval = Node_less; - return RELOP; + return c; case '=': if (*lexptr == '=') { @@ -1148,28 +1256,20 @@ retry: return ASSIGNOP; case '>': - if (want_redirect) { - if (*lexptr == '>') { - yylval.nodetypeval = Node_redirect_append; - lexptr++; - } else - yylval.nodetypeval = Node_redirect_output; - return REDIRECT_OP; - } if (*lexptr == '=') { yylval.nodetypeval = Node_geq; lexptr++; return RELOP; + } else if (*lexptr == '>') { + yylval.nodetypeval = Node_redirect_append; + lexptr++; + return APPEND_OP; } yylval.nodetypeval = Node_greater; - return RELOP; + return c; case '~': yylval.nodetypeval = Node_match; - if (! strict && *lexptr == '~') { - yylval.nodetypeval = Node_case_match; - lexptr++; - } return MATCHOP; case '}': @@ -1198,7 +1298,6 @@ retry: case '\"': /* Skip the doublequote */ yylval.sval = tokstart + 1; - ++want_concat_token; return YSTRING; } } @@ -1260,7 +1359,8 @@ retry: if (seen_e) goto got_number; ++seen_e; - if (tokstart[namelen + 1] == '-' || tokstart[namelen + 1] == '+') + if (tokstart[namelen + 1] == '-' || + tokstart[namelen + 1] == '+') namelen++; break; case '0': @@ -1282,13 +1382,21 @@ retry: got_number: lexptr = tokstart + namelen; yylval.fval = atof(tokstart); - ++want_concat_token; return NUMBER; case '&': if (*lexptr == '&') { yylval.nodetypeval = Node_and; - lexptr++; + while (c = *++lexptr) { + if (c == '#') + while ((c = *++lexptr) != '\n' + && c != '\0') + ; + if (c == '\n') + lineno++; + else if (!isspace(c)) + break; + } return LEX_AND; } return ERROR; @@ -1296,11 +1404,17 @@ got_number: case '|': if (*lexptr == '|') { yylval.nodetypeval = Node_or; - lexptr++; + while (c = *++lexptr) { + if (c == '#') + while ((c = *++lexptr) != '\n' + && c != '\0') + ; + if (c == '\n') + lineno++; + else if (!isspace(c)) + break; + } return LEX_OR; - } else if (want_redirect) { - yylval.nodetypeval = Node_redirect_pipe; - return REDIRECT_OP; } else { yylval.nodetypeval = Node_illegal; return c; @@ -1338,12 +1452,12 @@ got_number: low = mid + 1; } else { lexptr = tokstart + namelen; + if (strict && tokentab[mid].nostrict) + break; if (tokentab[mid].class == LEX_BUILTIN) yylval.ptrval = tokentab[mid].ptr; else yylval.nodetypeval = tokentab[mid].value; - if (tokentab[mid].class == LEX_PRINT) - want_redirect++; return tokentab[mid].class; } } @@ -1351,27 +1465,32 @@ got_number: /* It's a name. See how long it is. */ yylval.sval = tokkey; lexptr = tokstart + namelen; - ++want_concat_token; - return NAME; + if (*lexptr == '(') + return FUNC_CALL; + else + return NAME; } #ifndef DEFPATH #define DEFPATH ".:/usr/lib/awk:/usr/local/lib/awk" #endif -FILE * +static FILE * pathopen (file) char *file; { static char defpath[] = DEFPATH; static char *savepath; static int first = 1; - extern char *getenv (); char *awkpath, *cp; char trypath[BUFSIZ]; - FILE *fp; + FILE *fp, *devopen(); + extern char *getenv (); extern int debugging; + if (strcmp (file, "-") == 0) + return (stdin); + if (strict) return (fopen (file, "r")); @@ -1384,7 +1503,7 @@ char *file; awkpath = savepath; if (index (file, '/') != NULL) /* some kind of path name, no search */ - return (fopen (file, "r")); + return (devopen (file, "r")); do { for (cp = trypath; *awkpath && *awkpath != ':'; ) @@ -1392,7 +1511,7 @@ char *file; *cp++ = '/'; *cp = '\0'; /* clear left over junk */ strcat (cp, file); - if ((fp = fopen (trypath, "r")) != NULL) + if ((fp = devopen (trypath, "r")) != NULL) return (fp); /* no luck, keep going */ @@ -1400,3 +1519,301 @@ char *file; } while (*awkpath); return (NULL); } + +/* devopen --- handle /dev/std{in,out,err}, /dev/fd/N, regular files */ + +FILE * +devopen (name, mode) +char *name, *mode; +{ + int openfd = -1; + FILE *fdopen (); + char *cp; + +#if defined(STRICT) || defined(NO_DEV_FD) + return (fopen (name, mode)); +#else + if (strict) + return (fopen (name, mode)); + + if (name[0] != '/' || !STREQN (name, "/dev/", 5)) + return (fopen (name, mode)); + else + cp = name + 5; + + /* XXX - first three tests ignore mode */ + if (STREQ(cp, "stdin")) + return (stdin); + else if (STREQ(cp, "stdout")) + return (stdout); + else if (STREQ(cp, "stderr")) + return (stderr); + else if (STREQN(cp, "fd/", 3)) { + if (sscanf (cp, "%d", & openfd) == 1 && openfd >= 0) + /* got something */ + return (fdopen (openfd, mode)); + else + return (NULL); + } else + return (fopen (name, mode)); +#endif +} + +static NODE * +node_common(op) +NODETYPE op; +{ + register NODE *r; + extern int numfiles; + extern int tempsource; + extern char **sourcefile; + + r = newnode(op); + r->source_line = lineno; + if (numfiles > 1 && !tempsource) + r->source_file = sourcefile[curinfile]; + else + r->source_file = NULL; + return r; +} + +/* + * This allocates a node with defined lnode and rnode. + * This should only be used by yyparse+co while reading in the program + */ +NODE * +node(left, op, right) +NODE *left, *right; +NODETYPE op; +{ + register NODE *r; + + r = node_common(op); + r->lnode = left; + r->rnode = right; + return r; +} + +/* + * This allocates a node with defined subnode and proc + * Otherwise like node() + */ +static NODE * +snode(subn, op, procp) +NODETYPE op; +NODE *(*procp) (); +NODE *subn; +{ + register NODE *r; + + r = node_common(op); + r->subnode = subn; + r->proc = procp; + return r; +} + +/* + * This allocates a Node_line_range node with defined condpair and + * zeroes the trigger word to avoid the temptation of assuming that calling + * 'node( foo, Node_line_range, 0)' will properly initialize 'triggered'. + */ +/* Otherwise like node() */ +static NODE * +mkrangenode(cpair) +NODE *cpair; +{ + register NODE *r; + + r = newnode(Node_line_range); + r->condpair = cpair; + r->triggered = 0; + return r; +} + +/* Build a for loop */ +static NODE * +make_for_loop(init, cond, incr) +NODE *init, *cond, *incr; +{ + register FOR_LOOP_HEADER *r; + NODE *n; + + emalloc(r, FOR_LOOP_HEADER *, sizeof(FOR_LOOP_HEADER), "make_for_loop"); + n = newnode(Node_illegal); + r->init = init; + r->cond = cond; + r->incr = incr; + n->sub.nodep.r.hd = r; + return n; +} + +/* + * Install a name in the hash table specified, even if it is already there. + * Name stops with first non alphanumeric. Caller must check against + * redefinition if that is desired. + */ +NODE * +install(table, name, value) +NODE **table; +char *name; +NODE *value; +{ + register NODE *hp; + register int i, len, bucket; + register char *p; + + len = 0; + p = name; + while (is_identchar(*p)) + p++; + len = p - name; + + hp = newnode(Node_hashnode); + bucket = hashf(name, len, HASHSIZE); + hp->hnext = table[bucket]; + table[bucket] = hp; + hp->hlength = len; + hp->hvalue = value; + emalloc(hp->hname, char *, len + 1, "install"); + bcopy(name, hp->hname, len); + hp->hname[len] = '\0'; + hp->hvalue->varname = hp->hname; + return hp->hvalue; +} + +/* + * find the most recent hash node for name name (ending with first + * non-identifier char) installed by install + */ +NODE * +lookup(table, name) +NODE **table; +char *name; +{ + register char *bp; + register NODE *bucket; + register int len; + + for (bp = name; is_identchar(*bp); bp++) + ; + len = bp - name; + bucket = table[hashf(name, len, HASHSIZE)]; + while (bucket) { + if (bucket->hlength == len && STREQN(bucket->hname, name, len)) + return bucket->hvalue; + bucket = bucket->hnext; + } + return NULL; +} + +#define HASHSTEP(old, c) ((old << 1) + c) +#define MAKE_POS(v) (v & ~0x80000000) /* make number positive */ + +/* + * return hash function on name. + */ +static int +hashf(name, len, hashsize) +register char *name; +register int len; +int hashsize; +{ + register int r = 0; + + while (len--) + r = HASHSTEP(r, *name++); + + r = MAKE_POS(r) % hashsize; + return r; +} + +/* + * Add new to the rightmost branch of LIST. This uses n^2 time, but doesn't + * get used enough to make optimizing worth it. . . + */ +/* You don't believe me? Profile it yourself! */ +static NODE * +append_right(list, new) +NODE *list, *new; +{ + register NODE *oldlist; + + oldlist = list; + while (list->rnode != NULL) + list = list->rnode; + list->rnode = new; + return oldlist; +} + +/* + * check if name is already installed; if so, it had better have Null value, + * in which case def is added as the value. Otherwise, install name with def + * as value. + */ +static void +func_install(params, def) +NODE *params; +NODE *def; +{ + NODE *r; + + pop_params(params->rnode); + pop_var(params, 0); + r = lookup(variables, params->param); + if (r != NULL) { + fatal("function name `%s' previously defined", params->param); + } else + (void) install(variables, params->param, + node(params, Node_func, def)); +} + +static void +pop_var(np, freeit) +NODE *np; +int freeit; +{ + register char *bp; + register NODE *bucket, **save; + register int len; + char *name; + + name = np->param; + for (bp = name; is_identchar(*bp); bp++) + ; + len = bp - name; + save = &(variables[hashf(name, len, HASHSIZE)]); + for (bucket = *save; bucket; bucket = bucket->hnext) { + if (len == bucket->hlength && STREQN(bucket->hname, name, len)) { + *save = bucket->hnext; + freenode(bucket); + free(bucket->hname); + if (freeit) + free(np->param); + return; + } + save = &(bucket->hnext); + } +} + +static void +pop_params(params) +NODE *params; +{ + register NODE *np; + + for (np = params; np != NULL; np = np->rnode) + pop_var(np, 1); +} + +static NODE * +make_param(name) +char *name; +{ + NODE *r; + + r = newnode(Node_param_list); + r->param = name; + r->rnode = NULL; + r->param_cnt = param_counter++; + return (install(variables, name, r)); +} @@ -1,4 +1,3 @@ - /* * awk1 -- Expression tree constructors and main program for gawk. * @@ -6,6 +5,20 @@ * 1986 * * $Log: awk1.c,v $ + * Revision 1.34 89/03/24 15:57:21 david + * hashnode changes to NODE + * + * Revision 1.33 89/03/21 10:58:26 david + * cleanup and movement of code to awk.y + * + * Revision 1.32 89/03/15 22:00:35 david + * old case stuff removed + * new case stuff added + * add new escape sequences + * + * Revision 1.31 89/03/15 21:31:10 david + * purge obstack stuff + * * Revision 1.30 88/12/15 12:56:18 david * changes from Jay to compile under gcc and fixing a bug in treatment of * input files @@ -148,6 +161,22 @@ #include "awk.h" +extern int yyparse(); +extern void init_vars(); +extern void set_fs(); +extern int close_io(); +extern void init_args(); +extern void init_fields(); +extern int inrec(); +extern int getopt(); +extern int re_set_syntax(); +extern NODE *node(); + +static void do_file(); +static void usage(); + +NODE *variables[HASHSIZE]; + /* * The parse tree and field nodes are stored here. Parse_end is a dummy item * used to free up unneeded fields without freeing the program being run @@ -159,7 +188,6 @@ int param_counter; NODE *Nnull_string; /* The special variable that contains the name of the current input file */ -extern NODE *FILENAME_node; extern NODE *ARGC_node; extern NODE *ARGV_node; @@ -187,10 +215,10 @@ int tempsource = 0; /* source is in a temp file */ char **sourcefile = NULL; /* source file name(s) */ int numfiles = -1; /* how many source files */ -int ignorecase = 0; /* global flag for ignoring case */ - int strict = 0; /* turn off gnu extensions */ +NODE *expression_value; + main(argc, argv) int argc; char **argv; @@ -201,25 +229,23 @@ char **argv; extern int yydebug; #endif - extern char *lexptr; - extern char *lexptr_begin; extern char *version_string; - extern FILE *nextfile(); - FILE *fp, *fopen(); + FILE *fp; static char template[] = "/tmp/gawk.XXXXX"; - char *mktemp (); int c; - extern int opterr, optind, getopt(); + extern int opterr, optind; extern char *optarg; - char *cp, *rindex(); + char *cp; + extern char *rindex(); + extern char *mktemp(); /* * for strict to work, legal options must be first */ -#define EXTENSIONS 4 /* where to clear */ +#define EXTENSIONS 5 /* where to clear */ #ifdef DEBUG - char *awk_opts = "F:f:ivdD"; + char *awk_opts = "F:f:vVdD"; #else - char *awk_opts = "F:f:iv"; + char *awk_opts = "F:f:vV"; #endif #ifdef DEBUG @@ -258,7 +284,7 @@ char **argv; cp++; else cp = myname; - if (strcmp (cp, "awk") == 0) + if (STREQ(cp, "awk")) strict = 1; #endif @@ -292,14 +318,14 @@ char **argv; sourcefile[++numfiles] = optarg; break; - case 'i': - ignorecase = 1; - break; - case 'v': fprintf(stderr, "%s", version_string); break; + case 'V': + copyleft(); + break; + case '?': default: /* getopt will print a message for us */ @@ -336,7 +362,6 @@ char **argv; init_args(optind, argc, myname, argv); /* Read in the program */ - lexptr_begin = lexptr; if (yyparse() || errcount) exit(1); @@ -367,19 +392,20 @@ char **argv; exit(exit_val); } +static void do_file(fp) FILE *fp; { input_file = fp; /* This is where it spends all its time. The infamous MAIN LOOP */ - if (inrec() == 0) { + if (inrec() == 0) while (interpret(expression_value) && inrec() == 0) ; - } if (fp != stdin) (void) fclose(fp); } +static void usage() { #ifdef STRICT @@ -395,84 +421,110 @@ usage() exit(11); } -NODE * -node_common(op) -NODETYPE op; +/* Generate compiled regular expressions */ +struct re_pattern_buffer * +make_regexp(s, ignorecase) +NODE *s; +int ignorecase; { - register NODE *r; - extern int lineno; - extern int numfiles; - extern int tempsource; - extern char **sourcefile; - extern int curinfile; - - emalloc(r, NODE *, sizeof(NODE), "node_common"); - r->type = op; - r->source_line = lineno; - if (numfiles > 1 && !tempsource) - r->source_file = sourcefile[curinfile]; + struct re_pattern_buffer *rp; + char *err; + + emalloc(rp, struct re_pattern_buffer *, sizeof(*rp), "make_regexp"); + bzero((char *) rp, sizeof(*rp)); + emalloc(rp->buffer, char *, 8, "make_regexp"); + rp->allocated = 8; + emalloc(rp->fastmap, char *, 256, "make_regexp"); + + if (! strict && ignorecase) + rp->translate = casetable; else - r->source_file = NULL; - return r; + rp->translate = NULL; + if ((err = re_compile_pattern(s->stptr, s->stlen, rp)) != NULL) + fatal("%s: /%s/", err, s->stptr); + free_temp(s); + return rp; } -/* - * This allocates a node with defined lnode and rnode. - * This should only be used by yyparse+co while reading in the program - */ -NODE * -node(left, op, right) -NODE *left, *right; -NODETYPE op; +FILE * +nextfile() { - register NODE *r; + static int i = 1; + static int files = 0; + char *arg; + char *cp; + FILE *fp; - r = node_common(op); - r->lnode = left; - r->rnode = right; - return r; + for (; i < (int) (ARGC_node->lnode->numbr); i++) { + arg = (*assoc_lookup(ARGV_node, tmp_number((AWKNUM) i)))->stptr; + if (*arg == '\0') + continue; + cp = index(arg, '='); + if (cp != NULL) { + *cp++ = '\0'; + variable(arg)->var_value = make_string(cp, strlen(cp)); + } else { + extern NODE *deref; + + files++; + if (STREQ(arg, "-")) + fp = stdin; + else + fp = fopen(arg, "r"); + if (fp == NULL) + fatal("cannot open file `%s' for reading (%s)", + arg, sys_errlist[errno]); + /* NOTREACHED */ + /* This is a kludge. */ + deref = FILENAME_node->var_value; + do_deref(); + FILENAME_node->var_value = + make_string(arg, strlen(arg)); + FNR_node->var_value->numbr = 0.0; + i++; + return fp; + } + } + if (files == 0) { + files++; + /* no args. -- use stdin */ + /* FILENAME is init'ed to "-" */ + /* FNR is init'ed to 0 */ + return stdin; + } + return NULL; } -/* - * This allocates a node with defined subnode and proc - * Otherwise like node() - */ +/* Name points to a variable name. Make sure its in the symbol table */ NODE * -snode(subn, op, procp) -NODETYPE op; -NODE *(*procp) (); -NODE *subn; +variable(name) +char *name; { register NODE *r; - r = node_common(op); - r->subnode = subn; - r->proc = procp; + if ((r = lookup(variables, name)) == NULL) + r = install(variables, name, + node(Nnull_string, Node_var, (NODE *) NULL)); return r; } -/* - * This allocates a Node_line_range node with defined condpair and - * zeroes the trigger word to avoid the temptation of assuming that calling - * 'node( foo, Node_line_range, 0)' will properly initialize 'triggered'. - */ -/* Otherwise like node() */ +/* Create a special variable */ NODE * -mkrangenode(cpair) -NODE *cpair; +spc_var(name, value) +char *name; +NODE *value; { register NODE *r; - emalloc(r, NODE *, sizeof(NODE), "mkrangenode"); - r->type = Node_line_range; - r->condpair = cpair; - r->triggered = 0; + if ((r = lookup(variables, name)) == NULL) + r = install(variables, name, node(value, Node_var, (NODE *) NULL)); return r; } struct re_pattern_buffer * -mk_re_parse(s) +mk_re_parse(s, ignorecase) char *s; +int ignorecase; { register char *src, *dest; int c; @@ -481,6 +533,12 @@ char *s; if (*src == '\\') { c = *++src; switch (c) { + case 'a': + if (strict) + goto def; + else + *dest++ = BELL; + break; case 'b': *dest++ = '\b'; break; @@ -497,7 +555,30 @@ char *s; *dest++ = '\t'; break; case 'v': - *dest++ = '\v'; + if (strict) + goto def; + else + *dest++ = '\v'; + break; + case 'x': + if (strict) + goto def; + else { + register int i = 0; + register int c; + + while ((c = *++src)) { + if (! isxdigit(c)) + break; + if (isdigit(c)) + i += c - '0'; + else if (isupper(c)) + i += c - 'A' + 10; + else + i += c - 'a' + 10; + } + *dest++ = i; + } break; case '0': case '1': @@ -522,6 +603,7 @@ char *s; } break; default: + def: *dest++ = '\\'; *dest++ = c; break; @@ -531,310 +613,25 @@ char *s; else *dest++ = *src; } - return make_regexp(tmp_string(s, dest-s)); + return make_regexp(tmp_string(s, dest-s), ignorecase); } -/* Generate compiled regular expressions */ -struct re_pattern_buffer * -make_regexp(s) -NODE *s; +copyleft () { - struct re_pattern_buffer *rp; - char *err; - - emalloc(rp, struct re_pattern_buffer *, sizeof(*rp), "make_regexp"); - bzero((char *) rp, sizeof(*rp)); - emalloc(rp->buffer, char *, 8, "make_regexp"); - rp->allocated = 8; - emalloc(rp->fastmap, char *, 256, "make_regexp"); - - if ((err = re_compile_pattern(s->stptr, s->stlen, rp)) != NULL) - fatal("%s: /%s/", err, s->stptr); - free_temp(s); - return rp; -} - -/* Build a for loop */ -NODE * -make_for_loop(init, cond, incr) -NODE *init, *cond, *incr; -{ - register FOR_LOOP_HEADER *r; - NODE *n; - - emalloc(r, FOR_LOOP_HEADER *, sizeof(FOR_LOOP_HEADER), "make_for_loop"); - emalloc(n, NODE *, sizeof(NODE), "make_for_loop"); - r->init = init; - r->cond = cond; - r->incr = incr; - n->type = Node_illegal; - n->sub.nodep.r.hd = r; - return n; -} - -/* Name points to a variable name. Make sure its in the symbol table */ -NODE * -variable(name) -char *name; -{ - register NODE *r; - NODE *lookup(), *install(), *make_name(); - - if ((r = lookup(variables, name)) == NULL) - r = install(variables, name, - node(Nnull_string, Node_var, (NODE *) NULL)); - return r; -} - -/* Create a special variable */ -NODE * -spc_var(name, value) -char *name; -NODE *value; -{ - register NODE *r; - NODE *lookup(), *install(); - - if ((r = lookup(variables, name)) == NULL) - r = install(variables, name, node(value, Node_var, (NODE *) NULL)); - return r; -} - - - -/* - * Install a name in the hash table specified, even if it is already there. - * Name stops with first non alphanumeric. Caller must check against - * redefinition if that is desired. - */ -NODE * -install(table, name, value) -HASHNODE **table; -char *name; -NODE *value; -{ - register HASHNODE *hp; - register int i, len, bucket; - register char *p; - - len = 0; - p = name; - while (is_identchar(*p)) - p++; - len = p - name; - - i = sizeof(HASHNODE) + len + 1; - emalloc(hp, HASHNODE *, i, "install"); - bucket = hashf(name, len, HASHSIZE); - hp->next = table[bucket]; - table[bucket] = hp; - hp->length = len; - hp->value = value; - hp->name = ((char *) hp) + sizeof(HASHNODE); - hp->length = len; - bcopy(name, hp->name, len); - hp->name[len] = '\0'; - hp->value->varname = hp->name; - return hp->value; -} - -/* - * find the most recent hash node for name name (ending with first - * non-identifier char) installed by install - */ -NODE * -lookup(table, name) -HASHNODE **table; -char *name; -{ - register char *bp; - register HASHNODE *bucket; - register int len; - - for (bp = name; is_identchar(*bp); bp++) - ; - len = bp - name; - bucket = table[hashf(name, len, HASHSIZE)]; - while (bucket) { - if (bucket->length == len && strncmp(bucket->name, name, len) == 0) - return bucket->value; - bucket = bucket->next; - } - return NULL; -} - -#define HASHSTEP(old, c) ((old << 1) + c) -#define MAKE_POS(v) (v & ~0x80000000) /* make number positive */ - -/* - * return hash function on name. - */ -int -hashf(name, len, hashsize) -register char *name; -register int len; -int hashsize; -{ - register int r = 0; - - while (len--) - r = HASHSTEP(r, *name++); - - r = MAKE_POS(r) % hashsize; - return r; -} - -/* - * Add new to the rightmost branch of LIST. This uses n^2 time, but doesn't - * get used enough to make optimizing worth it. . . - */ -/* You don't believe me? Profile it yourself! */ - -NODE * -append_right(list, new) -NODE *list, *new; -{ - register NODE *oldlist; - - oldlist = list; - while (list->rnode != NULL) - list = list->rnode; - list->rnode = new; - return oldlist; -} - -/* - * check if name is already installed; if so, it had better have Null value, - * in which case def is added as the value. Otherwise, install name with def - * as value. - */ -func_install(params, def) -NODE *params; -NODE *def; -{ - NODE *r; - NODE *lookup(); - - pop_params(params); - r = lookup(variables, params->param); - if (r != NULL) { - fatal("function name `%s' previously defined", params->param); - } else - (void) install(variables, params->param, - node(params, Node_func, def)); -} - -NODE * -pop_var(name) -char *name; -{ - register char *bp; - register HASHNODE *bucket, **save; - register int len; - - for (bp = name; is_identchar(*bp); bp++) - ; - len = bp - name; - save = &(variables[hashf(name, len, HASHSIZE)]); - bucket = *save; - while (bucket) { - if (strncmp(bucket->name, name, len) == 0) { - *save = bucket->next; - return bucket->value; - } - save = &(bucket->next); - bucket = bucket->next; - } - return NULL; -} - -pop_params(params) -NODE *params; -{ - register NODE *np; - - for (np = params; np != NULL; np = np->rnode) - pop_var(np->param); -} - -NODE * -make_name(name, type) -char *name; -NODETYPE type; -{ - register char *p; - register NODE *r; - register int len; - - p = name; - while (is_identchar(*p)) - p++; - len = p - name; - emalloc(r, NODE *, sizeof(NODE), "make_name"); - emalloc(r->param, char *, len + 1, "make_name"); - bcopy(name, r->param, len); - r->param[len] = '\0'; - r->rnode = NULL; - r->type = type; - return (install(variables, name, r)); -} - -NODE *make_param(name) -char *name; -{ - NODE *r; - - r = make_name(name, Node_param_list); - r->param_cnt = param_counter++; - return r; -} - -FILE * -nextfile() -{ - static int i = 1; - static int files = 0; - char *arg; + extern char *version_string; char *cp; - FILE *fp; - extern NODE **assoc_lookup(); - - for (; i < (int) (ARGC_node->lnode->numbr); i++) { - arg = (*assoc_lookup(ARGV_node, tmp_number((AWKNUM) i)))->stptr; - if (*arg == '\0') - continue; - cp = index(arg, '='); - if (cp != NULL) { - *cp++ = '\0'; - variable(arg)->var_value = make_string(cp, strlen(cp)); - } else { - extern NODE *deref; - - files++; - if (strcmp(arg, "-") == 0) - fp = stdin; - else - fp = fopen(arg, "r"); - if (fp == NULL) - fatal("cannot open file `%s' for reading (%s)", - arg, sys_errlist[errno]); - /* NOTREACHED */ - /* This is a kludge. */ - deref = FILENAME_node->var_value; - do_deref(); - FILENAME_node->var_value = - make_string(arg, strlen(arg)); - FNR_node->var_value->numbr = 0.0; - i++; - return fp; - } - } - if (files == 0) { - files++; - /* no args. -- use stdin */ - /* FILENAME is init'ed to "-" */ - /* FNR is init'ed to 0 */ - return stdin; - } - return NULL; + static char blurb[] = +".\nCopyright (C) 1989, Free Software Foundation.\n\ +GNU Awk comes with ABSOLUTELY NO WARRANTY. This is free software, and\n\ +you are welcome to distribute it under the terms of the GNU General\n\ +Public License, which covers both the warranty information and the\n\ +terms for redistribution.\n\ +\n\ +You should have received a copy of the GNU General Public License along\n\ +with this program; if not, write to the Free Software Foundation, Inc.,\n\ +675 Mass Ave, Cambridge, MA 02139, USA.\n"; + + for (cp = version_string; *cp && *cp != '\n'; cp++) + putc (*cp, stderr); + fprintf(stderr, blurb); } @@ -5,6 +5,30 @@ * 1986 * * $Log: awk2.c,v $ + * Revision 1.47 89/03/22 22:09:50 david + * a cleaner way to handle assignment to $n where n > 0 + * + * Revision 1.46 89/03/22 21:01:14 david + * delete some obsolete code + * + * Revision 1.45 89/03/21 19:25:37 david + * minor cleanup + * + * Revision 1.44 89/03/21 18:24:02 david + * bug fix in cmp_nodes: strings in which one was a prefix of the other compared equal + * + * Revision 1.43 89/03/21 10:55:55 david + * cleanup and fix of string comparison (0 length was wrong) + * + * Revision 1.42 89/03/15 22:01:17 david + * old case stuff removed + * new case stuff added + * fixed % operator + * strings with embedded \0 can now be compared + * + * Revision 1.41 89/03/15 21:32:50 david + * try to free up memory in as many places as possible + * * Revision 1.40 88/12/15 12:57:31 david * make casetable static * @@ -156,7 +180,7 @@ * * Revision 1.2 87/10/29 21:45:44 david added support for array membership * test, as in: if ("yes" in answers) ... this involved one more case: for - * Node_in_array and rearrangment of the code in assoc_lookup, so thatthe + * Node_in_array and rearrangment of the code in assoc_lookup, so that the * element can be located without being created * * Revision 1.1 87/10/27 15:23:28 david Initial revision @@ -183,17 +207,24 @@ #include "awk.h" +extern void do_print(); +extern NODE *do_printf(); +extern NODE *func_call(); +extern NODE *do_match(); +extern NODE *do_sub(); +extern NODE *do_getline(); +extern int in_array(); +extern void do_delete(); + +extern double pow(); + +static int eval_condition(); +static int is_a_number(); +static NODE *op_assign(); + NODE *_t; /* used as a temporary in macros */ NODE *_result; /* holds result of tree_eval, for possible freeing */ NODE *ret_node; -extern NODE *OFMT_node; - -/* - * BEGIN and END blocks need special handling, because we are handed them as - * raw Node_statement_lists, not as Node_rule_lists. - */ -extern NODE *begin_block, *end_block; -NODE *do_sprintf(); /* More of that debugging stuff */ #ifdef DEBUG @@ -202,7 +233,6 @@ NODE *do_sprintf(); #define DBG_P(X) #endif -NODE *func_call(); extern jmp_buf func_tag; /* @@ -212,9 +242,12 @@ extern jmp_buf func_tag; * entries, which may be overkill. Note also that if the system this * is compiled on doesn't use 7-bit ascii, casetable[] should not be * defined to the linker, so gawk should not load. + * + * Do NOT make this array static, it is used in several spots, not + * just in this file. */ #if 'a' == 97 /* it's ascii */ -static char casetable[] = { +char casetable[] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -261,7 +294,7 @@ static char casetable[] = { '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', }; #else -/* You lose. You will need a translation table for your character set. */ +#include "You lose. You will need a translation table for your character set." #endif /* @@ -287,8 +320,6 @@ NODE *tree; extern NODE **fields_arr; extern int exiting, exit_val; - NODE *do_printf(); - extern NODE *lookup(); /* * clean up temporary strings created by evaluating expressions in @@ -343,21 +374,6 @@ NODE *tree; break; case Node_statement_list: - /* - * because BEGIN and END do not have Node_rule_list nature, - * yet can have exits and nexts, we special-case a setjmp of - * rule_tag here. - */ - if (tree == begin_block || tree == end_block) { - switch (_setjmp(rule_tag)) { - case TAG_CONTINUE: /* next */ - fatal("unexpected \"next\" in %s block", - tree == begin_block ? "BEGIN" : "END"); - return 1; - case TAG_BREAK: - return 0; - } - } for (t = tree; t != NULL; t = t->rnode) { DBG_P(("Statements", t->lnode)); (void) interpret(t->lnode); @@ -451,7 +467,7 @@ NODE *tree; #define arrvar forloop->incr PUSH_BINDING(loop_tag_stack, loop_tag, loop_tag_valid); DBG_P(("AFOR.VAR", tree->hakvar)); - lhs = get_lhs(tree->hakvar); + lhs = get_lhs(tree->hakvar, 1); t = tree->arrvar; if (tree->arrvar->type == Node_param_list) t = stack_ptr[tree->arrvar->param_cnt]; @@ -462,13 +478,6 @@ NODE *tree; if (field_num == 0) set_record(fields_arr[0]->stptr, fields_arr[0]->stlen); - else if (field_num > 0) { - node0_valid = 0; - if (NF_node->var_value->numbr == -1 && - field_num > NF_node->var_value->numbr) - assign_number(&(NF_node->var_value), - (AWKNUM) field_num); - } DBG_P(("AFOR.NEXTIS", *lhs)); switch (_setjmp(loop_tag)) { case 0: @@ -505,7 +514,7 @@ NODE *tree; case Node_K_print: DBG_P(("PRINT", tree)); - (void) do_print(tree); + do_print(tree); break; case Node_K_printf: @@ -534,9 +543,6 @@ NODE *tree; _longjmp(rule_tag, TAG_BREAK); break; - case Node_K_function: - break; - case Node_K_return: DBG_P(("RETURN", NULL)); ret_node = dupnode(tree_eval(tree->lnode)); @@ -563,20 +569,14 @@ NODE * r_tree_eval(tree) NODE *tree; { - NODE *op_assign(); register NODE *r, *t1, *t2; /* return value & temporary subtrees */ int i; register NODE **lhs; int di; - AWKNUM x; - int samecase = 0; - extern int ignorecase; + AWKNUM x, x2; + long lx; struct re_pattern_buffer *rp; extern NODE **fields_arr; - extern NODE *do_getline(); - extern NODE *do_match(); - extern NODE *do_sub(); - extern double pow(); if (tree->type != Node_var) source = tree->source_file; @@ -635,7 +635,7 @@ NODE *tree; case Node_subscript: case Node_field_spec: DBG_P(("var_type ref", tree)); - lhs = get_lhs(tree); + lhs = get_lhs(tree, 0); field_num = -1; deref = 0; return *lhs; @@ -657,27 +657,29 @@ NODE *tree; } break; - case Node_case_match: - case Node_case_nomatch: - samecase = 1; - /* fall through */ case Node_match: case Node_nomatch: DBG_P(("ASSIGN_[no]match", tree)); t1 = force_string(tree_eval(tree->lnode)); - if (tree->rnode->type == Node_regex) + if (tree->rnode->type == Node_regex) { rp = tree->rnode->rereg; - else { - rp = make_regexp(force_string(tree_eval(tree->rnode))); + if (!strict && ((IGNORECASE_node->var_value->numbr != 0) + ^ (tree->rnode->re_case != 0))) { + /* recompile since case sensitivity differs */ + rp = tree->rnode->rereg = + mk_re_parse(tree->rnode->re_text, + (IGNORECASE_node->var_value->numbr != 0)); + tree->rnode->re_case = (IGNORECASE_node->var_value->numbr != 0); + } + } else { + rp = make_regexp(force_string(tree_eval(tree->rnode)), + (IGNORECASE_node->var_value->numbr != 0)); if (rp == NULL) cant_happen(); } - if (! strict && (ignorecase || samecase)) - rp->translate = casetable; i = re_search(rp, t1->stptr, t1->stlen, 0, t1->stlen, (struct re_registers *) NULL); - i = (i == -1) ^ (tree->type == Node_match || - tree->type == Node_case_match); + i = (i == -1) ^ (tree->type == Node_match); free_temp(t1); return tmp_number((AWKNUM) i); @@ -690,19 +692,12 @@ NODE *tree; case Node_assign: DBG_P(("ASSIGN", tree)); r = tree_eval(tree->rnode); - lhs = get_lhs(tree->lnode); + lhs = get_lhs(tree->lnode, 1); *lhs = dupnode(r); + do_deref(); if (field_num == 0) set_record(fields_arr[0]->stptr, fields_arr[0]->stlen); - else if (field_num > 0) { - node0_valid = 0; - if (NF_node->var_value->numbr == -1 && - field_num > NF_node->var_value->numbr) - assign_number(&(NF_node->var_value), - (AWKNUM) field_num); - } field_num = -1; - do_deref(); return *lhs; /* other assignment types are easier because they are numeric */ @@ -733,8 +728,7 @@ NODE *tree; t1 = force_string(t1); t2 = force_string(t2); - emalloc(r, NODE *, sizeof(NODE), "tree_eval"); - r->type = Node_val; + r = newnode(Node_val); r->flags = (STR|TEMP); r->stlen = t1->stlen + t2->stlen; r->stref = 1; @@ -817,7 +811,9 @@ NODE *tree; free_temp(t1); return tmp_number((AWKNUM) 0); } - x = ((int) t1->numbr) % ((int) x); + lx = t1->numbr / x; /* assignment to long truncates */ + x2 = lx * x; + x = t1->numbr - x2; free_temp(t1); return tmp_number(x); @@ -845,6 +841,7 @@ NODE *tree; * This makes numeric operations slightly more efficient. Just change the * value of a numeric node, if possible */ +void assign_number(ptr, value) NODE **ptr; AWKNUM value; @@ -873,24 +870,16 @@ AWKNUM value; /* Is TREE true or false? Returns 0==false, non-zero==true */ -int +static int eval_condition(tree) NODE *tree; { register NODE *t1; int ret; - extern double atof(); if (tree == NULL) /* Null trees are the easiest kinds */ return 1; - switch (tree->type) { - /* Maybe it's easy; check and see. */ - /* BEGIN and END are always false */ - case Node_K_BEGIN: - case Node_K_END: - return 0; - break; - + if (tree->type == Node_line_range) { /* * Node_line_range is kind of like Node_match, EXCEPT: the * lnode field (more properly, the condpair field) is a node @@ -904,7 +893,6 @@ NODE *tree; * able to begin and end on a single input record, so this * isn't an ELSE IF, as noted above. */ - case Node_line_range: if (!tree->triggered) if (!eval_condition(tree->condpair->lnode)) return 0; @@ -922,10 +910,6 @@ NODE *tree; */ t1 = tree_eval(tree); -#ifdef DEBUG - if (t1->type != Node_val) - cant_happen(); -#endif if (t1->flags & STR) ret = t1->stlen != 0; else @@ -992,6 +976,8 @@ cmp_nodes(t1, t2) NODE *t1, *t2; { AWKNUM d; + int ret; + int len1, len2; if (t1 == t2) return 0; @@ -1038,17 +1024,52 @@ NODE *t1, *t2; } strings: - return strcmp(t1->stptr, t2->stptr); + len1 = t1->stlen; + len2 = t2->stlen; + if (len1 == 0) { + if (len2 == 0) + return 0; + else + return -1; + } else if (len2 == 0) + return 1; + ret = memcmp(t1->stptr, t2->stptr, len1 <= len2 ? len1 : len2); + if (ret == 0 && len1 != len2) + return len1 < len2 ? -1: 1; + return ret; } -NODE * +#ifdef NOMEMCMP +/* + * memcmp --- compare strings. + * + * We use our own routine since it has to act like strcmp() for return + * value, and the BSD manual says bcmp() only returns zero/non-zero. + */ + +static int +memcmp (s1, s2, l) +register char *s1, *s2; +register int l; +{ + for (; l--; s1++, s2++) { + if (*s1 != *s2) + return (*s1 - *s2); + } + return (*--s1 - *--s2); +} +#endif + +static NODE * op_assign(tree) NODE *tree; { AWKNUM rval, lval; NODE **lhs; + AWKNUM t1, t2; + long ltemp; - lhs = get_lhs(tree->lnode); + lhs = get_lhs(tree->lnode, 1); lval = force_number(*lhs); switch(tree->type) { @@ -1057,17 +1078,10 @@ NODE *tree; DBG_P(("+-X", tree)); assign_number(lhs, lval + (tree->type == Node_preincrement ? 1.0 : -1.0)); + do_deref(); if (field_num == 0) set_record(fields_arr[0]->stptr, fields_arr[0]->stlen); - else if (field_num > 0) { - node0_valid = 0; - if (NF_node->var_value->numbr == -1 && - field_num > NF_node->var_value->numbr) - assign_number(&(NF_node->var_value), - (AWKNUM) field_num); - } field_num = -1; - do_deref(); return *lhs; break; @@ -1076,17 +1090,10 @@ NODE *tree; DBG_P(("X+-", tree)); assign_number(lhs, lval + (tree->type == Node_postincrement ? 1.0 : -1.0)); + do_deref(); if (field_num == 0) set_record(fields_arr[0]->stptr, fields_arr[0]->stlen); - else if (field_num > 0) { - node0_valid = 0; - if (NF_node->var_value->numbr == -1 && - field_num > NF_node->var_value->numbr) - assign_number(&(NF_node->var_value), - (AWKNUM) field_num); - } field_num = -1; - do_deref(); return tmp_number(lval); } @@ -1110,7 +1117,10 @@ NODE *tree; case Node_assign_mod: DBG_P(("ASSIGN_mod", tree)); - assign_number(lhs, (AWKNUM) (((int) lval) % ((int) rval))); + ltemp = lval / rval; /* assignment to long truncates */ + t1 = ltemp * rval; + t2 = lval - t1; + assign_number(lhs, t2); break; case Node_assign_plus: @@ -1123,17 +1133,10 @@ NODE *tree; assign_number(lhs, lval - rval); break; } + do_deref(); if (field_num == 0) set_record(fields_arr[0]->stptr, fields_arr[0]->stlen); - else if (field_num > 0) { - node0_valid = 0; - if (NF_node->var_value->numbr == -1 && - field_num > NF_node->var_value->numbr) - assign_number(&(NF_node->var_value), - (AWKNUM) field_num); - } field_num = -1; - do_deref(); return *lhs; } @@ -5,6 +5,23 @@ * December 1986 * * $Log: awk3.c,v $ + * Revision 1.38 89/03/22 22:10:20 david + * a cleaner way to handle assignment to $n where n > 0 + * + * Revision 1.37 89/03/21 10:54:21 david + * cleanup and fix of handling of precision in format string of printf call + * + * Revision 1.36 89/03/15 22:01:51 david + * ENVIRON fix from hack + * relegated -Ft to strict compatibility + * getline error return fix + * printf %c fix (only print 1 char of a string) + * tolower & toupper added + * /dev/fd/N etc special files added + * + * Revision 1.35 89/03/15 21:34:05 david + * try to free more memory + * * Revision 1.34 88/12/13 22:28:10 david * temporarily #ifdef out flush_io in redirect(); adjust atan2() for * force_number as a macro @@ -156,13 +173,35 @@ */ #include "awk.h" +extern int parse_fields(); +extern void assoc_clear(); +extern FILE *devopen(); + +#ifdef USG +extern long lrand48(); +extern void srand48(); +#else +extern void srandom(); +extern char *initstate(); +extern char *setstate(); +extern long random(); +#endif + +static void set_element(); +static void get_one(); +static void get_two(); +static int get_three(); +static int a_get_three(); +static void close_one(); +static int close_fp(); + +NODE *do_sprintf(); + /* These nodes store all the special variables AWK uses */ NODE *FS_node, *NF_node, *RS_node, *NR_node; NODE *FILENAME_node, *OFS_node, *ORS_node, *OFMT_node; NODE *FNR_node, *RLENGTH_node, *RSTART_node, *SUBSEP_node; -NODE *ENVIRON_node; - -FILE *redirect(); +NODE *ENVIRON_node, *IGNORECASE_node; /* * structure used to dynamically maintain a linked-list of open files/pipes @@ -187,14 +226,10 @@ struct redirect *red_head = NULL; */ init_vars() { - NODE *spc_var(); - NODE *do_sprintf(); extern char **environ; char *var, *val; NODE **aptr; int i; - extern NODE **assoc_lookup(); - extern NODE *tmp_string(); FS_node = spc_var("FS", make_string(" ", 1)); NF_node = spc_var("NF", make_number(-1.0)); @@ -208,17 +243,24 @@ init_vars() RLENGTH_node = spc_var("RLENGTH", make_number(0.0)); RSTART_node = spc_var("RSTART", make_number(0.0)); SUBSEP_node = spc_var("SUBSEP", make_string("\034", 1)); + IGNORECASE_node = spc_var("IGNORECASE", make_number(0.0)); ENVIRON_node = spc_var("ENVIRON", Nnull_string); for (i = 0; environ[i]; i++) { + static char nullstr[] = ""; + var = environ[i]; val = index(var, '='); if (val) *val++ = '\0'; else - val = ""; + val = nullstr; aptr = assoc_lookup(ENVIRON_node, tmp_string(var, strlen (var))); *aptr = make_string(val, strlen (val)); + + /* restore '=' so that system() gets a valid environment */ + if (val != nullstr) + *--val = '='; } } @@ -234,7 +276,7 @@ get_ofmt() { register NODE *tmp; - tmp = *get_lhs(OFMT_node); + tmp = OFMT_node->var_value; if ((tmp->type != Node_string && tmp->type != Node_str_num) || tmp->stlen == 0) return "%.6g"; return tmp->stptr; @@ -257,11 +299,13 @@ char *str; { register NODE **tmp; - tmp = get_lhs(FS_node); - do_deref(); - /* stupid special case so -F\t works as documented in awk */ - /* even though the shell hands us -Ft. Bleah! */ - if (str[0] == 't' && str[1] == '\0') + tmp = get_lhs(FS_node, 0); + /* + * Only if in full compatibility mode check for the stupid special + * case so -F\t works as documented in awk even though the shell + * hands us -Ft. Bleah! + */ + if (strict && str[0] == 't' && str[1] == '\0') str[0] = '\t'; *tmp = make_string(str, 1); do_deref(); @@ -304,7 +348,7 @@ NODE *tree; l1 = s1->stlen; l2 = s2->stlen; while (l1) { - if (!strncmp(p1, p2, l2)) + if (STREQN(p1, p2, l2)) return tmp_number((AWKNUM) (1 + s1->stlen - l1)); l1--; p1++; @@ -350,21 +394,21 @@ do_printf(tree) NODE *tree; { register FILE *fp; - NODE *do_sprintf(); + int errflg = 0; /* not used, sigh */ - fp = redirect(tree->rnode); - print_simple(do_sprintf(tree->lnode), fp); + fp = redirect(tree->rnode, &errflg); + if (fp) + print_simple(do_sprintf(tree->lnode), fp); return Nnull_string; } +static void set_element(num, s, len, n) int num; char *s; int len; NODE *n; { - extern NODE **assoc_lookup(); - *assoc_lookup(n, tmp_number((AWKNUM) (num))) = make_string(s, len); } @@ -498,7 +542,8 @@ retry: case '0': if (fill != sp || lj) goto lose; - fill = "0"; /* FALL through */ + if (cur == &fw) + fill = "0"; /* FALL through */ case '1': case '2': case '3': @@ -545,7 +590,9 @@ retry: pr_str = cpbuf; goto dopr_string; } - if (!prec || prec > arg->stlen) + if (! prec) + prec = 1; + else if (prec > arg->stlen) prec = arg->stlen; pr_str = cpbuf; goto dopr_string; @@ -687,7 +734,7 @@ retry: *cp++ = '-'; if (fill != sp) *cp++ = '0'; - if (prec != 0) { + if (cur != &fw) { (void) strcpy(cp, "*.*f"); (void) sprintf(obuf + olen, cpbuf, fw, prec, (double) tmpval); } else { @@ -709,7 +756,7 @@ retry: *cp++ = '-'; if (fill != sp) *cp++ = '0'; - if (prec != 0) { + if (cur != &fw) { (void) strcpy(cp, "*.*e"); (void) sprintf(obuf + olen, cpbuf, fw, prec, (double) tmpval); } else { @@ -771,7 +818,6 @@ NODE *tree; { NODE *tmp; int ret; - extern int flush_io (); (void) flush_io (); /* so output is syncrhonous with gawk's */ get_one(tree, &tmp); @@ -781,12 +827,16 @@ NODE *tree; } /* The print command. Its name is historical */ +void do_print(tree) NODE *tree; { register FILE *fp; + int errflg = 0; /* not used, sigh */ - fp = redirect(tree->rnode); + fp = redirect(tree->rnode, &errflg); + if (! fp) + return; tree = tree->lnode; if (!tree) tree = WHOLELINE; @@ -805,12 +855,45 @@ NODE *tree; print_simple(ORS_node->var_value, fp); } +NODE * +do_tolower(tree) +NODE *tree; +{ + NODE *t1, *t2; + register char *cp, *cp2; + + get_one(tree, &t1); + t1 = force_string(t1); + t2 = tmp_string(t1->stptr, t1->stlen); + for (cp = t2->stptr, cp2 = t2->stptr + t2->stlen; cp < cp2; cp++) + if (isupper(*cp)) + *cp = tolower(*cp); + return t2; +} + +NODE * +do_toupper(tree) +NODE *tree; +{ + NODE *t1, *t2; + register char *cp; + + get_one(tree, &t1); + t1 = force_string(t1); + t2 = tmp_string(t1->stptr, t1->stlen); + for (cp = t2->stptr; cp < t2->stptr + t2->stlen; cp++) + if (islower(*cp)) + *cp = toupper(*cp); + return t2; +} + /* * Get the arguments to functions. No function cares if you give it too many * args (they're ignored). Only a few fuctions complain about being given - * too few args. The rest have defaults + * too few args. The rest have defaults. */ +static void get_one(tree, res) NODE *tree, **res; { @@ -821,6 +904,7 @@ NODE *tree, **res; *res = tree_eval(tree->lnode); } +static void get_two(tree, res1, res2) NODE *tree, **res1, **res2; { @@ -835,6 +919,7 @@ NODE *tree, **res1, **res2; *res2 = tree_eval(tree->lnode); } +static int get_three(tree, res1, res2, res3) NODE *tree, **res1, **res2, **res3; { @@ -854,6 +939,7 @@ NODE *tree, **res1, **res2, **res3; return 3; } +static int a_get_three(tree, res1, res2, res3) NODE *tree, **res1, **res2, **res3; { @@ -875,15 +961,14 @@ NODE *tree, **res1, **res2, **res3; /* Redirection for printf and print commands */ FILE * -redirect(tree) +redirect(tree, errflg) NODE *tree; +int *errflg; { register NODE *tmp; register struct redirect *rp; register char *str; register FILE *fp; - FILE *popen(); - FILE *fopen(); int tflag; char *direction = "to"; @@ -912,7 +997,7 @@ NODE *tree; tmp = force_string(tree_eval(tree->subnode)); str = tmp->stptr; for (rp = red_head; rp != NULL; rp = rp->next) - if (rp->flag == tflag && strcmp(rp->value, str) == 0) + if (rp->flag == tflag && STREQ(rp->value, str)) break; if (rp == NULL) { emalloc(rp, struct redirect *, sizeof(struct redirect), @@ -934,10 +1019,10 @@ NODE *tree; errno = 0; switch (tree->type) { case Node_redirect_output: - fp = rp->fp = fopen(str, "w"); + fp = rp->fp = devopen(str, "w"); break; case Node_redirect_append: - fp = rp->fp = fopen(str, "a"); + fp = rp->fp = devopen(str, "a"); break; case Node_redirect_pipe: fp = rp->fp = popen(str, "w"); @@ -948,16 +1033,31 @@ NODE *tree; break; case Node_redirect_input: direction = "from"; - fp = rp->fp = fopen(str, "r"); + fp = rp->fp = devopen(str, "r"); break; } if (fp == NULL) { /* too many files open -- close one and try again */ if (errno == ENFILE || errno == EMFILE) close_one(); - else /* some other reason for failure */ - fatal("can't redirect %s `%s'\n", direction, - str); + else { + /* + * Some other reason for failure. + * + * On redirection of input from a file, + * just return an error, so e.g. getline + * can return -1. For output to file, + * complain. The shell will complain on + * a bad command to a pipe. + */ + *errflg = 1; + if (tree->type == Node_redirect_output + || tree->type == Node_redirect_append) + fatal("can't redirect %s `%s'\n", + direction, str); + else + return NULL; + } } } if (rp->offset != 0) { /* this file was previously open */ @@ -971,6 +1071,7 @@ NODE *tree; return rp->fp; } +static void close_one() { register struct redirect *rp; @@ -1003,7 +1104,7 @@ NODE *tree; tmp = force_string(tree_eval(tree->subnode)); for (rp = red_head; rp != NULL; rp = rp->next) { - if (strcmp(rp->value, tmp->stptr) == 0) + if (STREQ(rp->value, tmp->stptr)) break; } free_temp(tmp); @@ -1012,7 +1113,7 @@ NODE *tree; return tmp_number((AWKNUM)close_fp(rp)); } -int +static int close_fp(rp) register struct redirect *rp; { @@ -1074,10 +1175,13 @@ close_io () return status; } +void print_simple(tree, fp) NODE *tree; FILE *fp; { + if (! fp) /* can't happen */ + return; if (fwrite(tree->stptr, sizeof(char), tree->stlen, fp) != tree->stlen) warning("fwrite: %s", sys_errlist[errno]); free_temp(tree); @@ -1122,8 +1226,6 @@ static int firstrand = 1; #ifndef USG static char state[256]; -extern char *initstate(); - #endif #define MAXLONG 2147483647 /* maximum value for long int */ @@ -1134,12 +1236,8 @@ do_rand(tree) NODE *tree; { #ifdef USG - extern long lrand48(); - return tmp_number((AWKNUM) lrand48() / MAXLONG); #else - extern long random(); - if (firstrand) { (void) initstate((unsigned) 1, state, sizeof state); srandom(1); @@ -1154,13 +1252,11 @@ do_srand(tree) NODE *tree; { NODE *tmp; - extern long time(); static long save_seed = 1; long ret = save_seed; /* SVR4 awk srand returns previous seed */ + extern long time(); #ifdef USG - extern void srand48(); - if (tree == NULL) srand48(save_seed = time((long *) 0)); else { @@ -1168,9 +1264,6 @@ NODE *tree; srand48(save_seed = (long) force_number(tmp)); } #else - extern srandom(); - extern char *setstate(); - if (firstrand) (void) initstate((unsigned) 1, state, sizeof state); else @@ -5,6 +5,33 @@ * Written by David Trueman, 1988 * * $Log: awk4.c,v $ + * Revision 1.36 89/03/22 22:10:23 david + * a cleaner way to handle assignment to $n where n > 0 + * + * Revision 1.35 89/03/22 21:05:24 david + * delete some obsolete code + * + * Revision 1.34 89/03/22 21:00:34 david + * replace some free()'s with freenode() + * + * Revision 1.33 89/03/21 19:26:01 david + * minor cleanup + * + * Revision 1.32 89/03/21 18:22:54 david + * some function tracing debugging code + * + * Revision 1.31 89/03/21 10:53:21 david + * cleanup + * + * Revision 1.30 89/03/15 22:22:03 david + * fix from hack: check for null function before using it in diagnostic + * + * Revision 1.29 89/03/15 22:02:24 david + * new case stuff added + * + * Revision 1.28 89/03/15 21:34:37 david + * free more memory and purge obstack stuff + * * Revision 1.27 88/12/14 10:53:49 david * malloc structures in func_call and free them on return * @@ -100,16 +127,17 @@ NODE *arg_list; /* Node_expression_list of calling args. */ NODE **sp; static int func_tag_valid = 0; int count; - extern NODE *lookup(), *install(); - extern NODE *pop_var(); extern NODE *ret_node; /* * retrieve function definition node */ f = lookup(variables, name->stptr); - if (f->type != Node_func) + if (!f || f->type != Node_func) fatal("function `%s' not defined", name->stptr); +#ifdef FUNC_TRACE + fprintf(stderr, "function %s called\n", name->stptr); +#endif /* * mark stack for variables allocated during life of function */ @@ -186,13 +214,13 @@ NODE *arg_list; /* Node_expression_list of calling args. */ } deref = n->lnode; do_deref(); - free((char *) n); + freenode(n); } while (count-- > 0) { n = *sp++; deref = n->lnode; do_deref(); - free((char *) n); + freenode(n); } free((char *) local_stack); return r; @@ -206,13 +234,21 @@ NODE *tree; int rstart; struct re_registers reregs; struct re_pattern_buffer *rp; - extern NODE *RSTART_node, *RLENGTH_node; t1 = force_string(tree_eval(tree->lnode)); - if (tree->rnode->type == Node_regex) + if (tree->rnode->type == Node_regex) { rp = tree->rnode->rereg; - else { - rp = make_regexp(force_string(tree_eval(tree->rnode))); + if (!strict && ((IGNORECASE_node->var_value->numbr != 0) + ^ (tree->rnode->re_case != 0))) { + /* recompile since case sensitivity differs */ + rp = tree->rnode->rereg = + mk_re_parse(tree->rnode->re_text, + (IGNORECASE_node->var_value->numbr != 0)); + tree->rnode->re_case = (IGNORECASE_node->var_value->numbr != 0); + } + } else { + rp = make_regexp(force_string(tree_eval(tree->rnode)), + (IGNORECASE_node->var_value->numbr != 0)); if (rp == NULL) cant_happen(); } @@ -258,10 +294,19 @@ NODE *tree; global = (tree->type == Node_gsub); - if (tree->rnode->type == Node_regex) + if (tree->rnode->type == Node_regex) { rp = tree->rnode->rereg; - else { - rp = make_regexp(force_string(tree_eval(tree->rnode))); + if (! strict && ((IGNORECASE_node->var_value->numbr != 0) + ^ (tree->rnode->re_case != 0))) { + /* recompile since case sensitivity differs */ + rp = tree->rnode->rereg = + mk_re_parse(tree->rnode->re_text, + (IGNORECASE_node->var_value->numbr != 0)); + tree->rnode->re_case = (IGNORECASE_node->var_value->numbr != 0); + } + } else { + rp = make_regexp(force_string(tree_eval(tree->rnode)), + (IGNORECASE_node->var_value->numbr != 0)); if (rp == NULL) cant_happen(); } @@ -269,6 +314,7 @@ NODE *tree; s = force_string(tree_eval(tree->lnode)); tree = tree->rnode; deref = 0; + field_num = -1; if (tree == NULL) { t = WHOLELINE; lhs = &fields_arr[0]; @@ -276,7 +322,7 @@ NODE *tree; deref = t; } else { t = tree->lnode; - lhs = get_lhs(t); + lhs = get_lhs(t, 1); t = force_string(tree_eval(t)); } /* @@ -346,19 +392,13 @@ NODE *tree; if (matches > 0) { if (field_num == 0) set_record(fields_arr[0]->stptr, fields_arr[0]->stlen); - else if (field_num > 0) { - node0_valid = 0; - if (NF_node->var_value->numbr == -1 && - field_num > NF_node->var_value->numbr) - assign_number(&(NF_node->var_value), - (AWKNUM) field_num); - } t->flags &= ~NUM; } field_num = -1; return tmp_number((AWKNUM) matches); } +void init_args(argc0, argc, argv0, argv) int argc0, argc; char *argv0; @@ -366,11 +406,6 @@ char **argv; { int i, j; NODE **aptr; - extern NODE **assoc_lookup(); - extern NODE *spc_var(); - extern NODE *make_string(); - extern NODE *make_number(); - extern NODE *tmp_number(); ARGV_node = spc_var("ARGV", Nnull_string); aptr = assoc_lookup(ARGV_node, tmp_number(0.0)); @@ -382,21 +417,3 @@ char **argv; } ARGC_node = spc_var("ARGC", make_number((AWKNUM) j)); } - -#ifdef USG -int -bcopy (src, dst, length) -register char *src, *dst; -register int length; -{ - (void) memcpy (dst, src, length); -} - -int -bzero (b, length) -register char *b; -register int length; -{ - (void) memset (b, '\0', length); -} -#endif @@ -1,11 +1,21 @@ /* * routines for error messages * - * Copyright (C) 1988 Free Software Foundation - * * $Log: awk5.c,v $ + * Revision 1.12 89/03/21 18:31:46 david + * changed defines for system without vprintf() + * + * Revision 1.11 89/03/21 10:52:53 david + * cleanup + * * Revision 1.10 88/12/08 11:00:07 david - * add $Log$ + * add $Log: awk5.c,v $ + * Revision 1.12 89/03/21 18:31:46 david + * changed defines for system without vprintf() + * + * Revision 1.11 89/03/21 10:52:53 david + * cleanup + * * */ @@ -32,23 +42,24 @@ int sourceline = 0; char *source = NULL; -err(s, argp) +/* VARARGS2 */ +static void +err(s, msg, argp) char *s; +char *msg; va_list *argp; { - char *fmt; int line; char *file; (void) fprintf(stderr, "%s: %s ", myname, s); - fmt = va_arg(*argp, char *); - vfprintf(stderr, fmt, *argp); + vfprintf(stderr, msg, *argp); (void) fprintf(stderr, "\n"); line = (int) FNR_node->var_value->numbr; if (line) (void) fprintf(stderr, " input line number %d", line); file = FILENAME_node->var_value->stptr; - if (file && strcmp(file, "-") != 0) + if (file && !STREQ(file, "-")) (void) fprintf(stderr, ", file `%s'", file); (void) fprintf(stderr, "\n"); if (sourceline) @@ -60,38 +71,40 @@ va_list *argp; /*VARARGS0*/ void -msg(va_alist) +msg(msg, va_alist) +char *msg; va_dcl { va_list args; va_start(args); - err("", &args); + err("", msg, &args); va_end(args); } /*VARARGS0*/ void -warning(va_alist) +warning(msg, va_alist) +char *msg; va_dcl { va_list args; va_start(args); - err("warning:", &args); + err("warning:", msg, &args); va_end(args); } /*VARARGS0*/ void -fatal(va_alist) +fatal(msg, va_alist) +char *msg; va_dcl { va_list args; - extern char *sourcefile; va_start(args); - err("fatal error:", &args); + err("fatal error:", msg, &args); va_end(args); #ifdef DEBUG abort(); @@ -99,20 +112,7 @@ va_dcl exit(1); } -char * -safe_malloc(size) -unsigned size; -{ - char *ret; - - ret = malloc(size); - if (ret == NULL) - fatal("safe_malloc: can't allocate memory (%s)", - sys_errlist[errno]); - return ret; -} - -#if defined(BSD) && !defined(VPRINTF) +#if defined(HASDOPRNT) && defined(NOVPRINTF) int vsprintf(str, fmt, ap) char *str, *fmt; @@ -122,7 +122,7 @@ vsprintf(str, fmt, ap) int len; f._flag = _IOWRT+_IOSTRG; - f._ptr = str; + f._ptr = (unsigned char *)str; f._cnt = 32767; len = _doprnt(fmt, ap, &f); *f._ptr = 0; @@ -5,6 +5,15 @@ * 1986 * * $Log: awk6.c,v $ + * Revision 1.11 89/03/24 15:58:04 david + * HASHNODE becomes NODE + * + * Revision 1.10 89/03/21 10:48:51 david + * minor cleanup + * + * Revision 1.9 89/03/15 22:06:37 david + * remove old case stuff + * * Revision 1.8 88/11/22 13:51:34 david * Arnold: changes for case-insensitive matching * @@ -144,7 +153,6 @@ NODE *ptr; case (int) Node_var_array: { struct search *l; - NODE **assoc_lookup(); printf("(0x%x Array)\n", ptr); for (l = assoc_scan(ptr); l; l = assoc_next(l)) { @@ -175,8 +183,6 @@ NODE *ptr; case (int) Node_line_range: case (int) Node_match: case (int) Node_nomatch: - case (int) Node_case_match: - case (int) Node_case_nomatch: break; case (int) Node_builtin: printf("Builtin: %d\n", ptr->proc); @@ -206,7 +212,7 @@ NODE *ptr; dump_vars() { register int n; - register HASHNODE *buc; + register NODE *buc; #ifdef notdef printf("Fields:"); @@ -214,10 +220,9 @@ dump_vars() #endif printf("Vars:\n"); for (n = 0; n < HASHSIZE; n++) { - for (buc = variables[n]; buc; buc = buc->next) { - printf("'%.*s': ", buc->length, buc->name); - print_parse_tree(buc->value); - /* print_parse_tree(buc->value); */ + for (buc = variables[n]; buc; buc = buc->hnext) { + printf("'%.*s': ", buc->hlength, buc->hname); + print_parse_tree(buc->hvalue); } } printf("End\n"); @@ -256,7 +261,7 @@ NODE *ptr; NODE *p1; char *str, *str2; int n; - HASHNODE *buc; + NODE *buc; if (!ptr) return; /* don't print null ptrs */ @@ -380,9 +385,9 @@ pr_oneop: case Node_var: for (n = 0; n < HASHSIZE; n++) { - for (buc = variables[n]; buc; buc = buc->next) { - if (buc->value == ptr) { - printf("%.*s", buc->length, buc->name); + for (buc = variables[n]; buc; buc = buc->hnext) { + if (buc->hvalue == ptr) { + printf("%.*s", buc->hlength, buc->hname); n = HASHSIZE; break; } @@ -1,8 +1,5 @@ /* * gawk - routines for dealing with record input and fields - * - * Copyright (C) 1988 Free Software Foundation - * */ /* @@ -25,23 +22,28 @@ #include "awk.h" +extern int get_rs(); +extern NODE *concat_exp(); + +static int re_split(); +static int get_a_record(); + static int getline_redirect = 0;/* "getline <file" being executed */ static char *line_buf = NULL; /* holds current input line */ static int line_alloc = 0; /* current allocation for line_buf */ +static char *parse_extent; /* marks where to restart parse of record */ +static int parse_high_water=0; /* field number that we have parsed so far */ +static char *save_fs = " "; /* save current value of FS when line is read, + * to be used in deferred parsing + */ int field_num; /* save number of field in get_lhs */ -char *field_begin; NODE **fields_arr; /* array of pointers to the field nodes */ NODE node0; /* node for $0 which never gets free'd */ int node0_valid = 1; /* $(>0) has not been changed yet */ char f_empty[] = ""; -int parse_high_water = 0; /* field number that we have parsed so far */ -char *parse_extent; /* marks where to restart parse of record */ -char *save_fs = " "; /* save current value of FS when line is read, - * to be used in deferred parsing - */ -static get_a_record(); +void init_fields() { emalloc(fields_arr, NODE **, sizeof(NODE *), "init_fields"); @@ -55,6 +57,8 @@ init_fields() * Danger! Must only be called for fields we know have just been blanked, or * fields we know don't exist yet. */ + +static void set_field(num, str, len, dummy) int num; char *str; @@ -63,8 +67,12 @@ NODE *dummy; /* not used -- just to make interface same as set_element */ { NODE *n; int t; + static int nf_high_water = 0; - erealloc(fields_arr, NODE **, (num + 1) * sizeof(NODE *), "set_field"); + if (num > nf_high_water) { + erealloc(fields_arr, NODE **, (num + 1) * sizeof(NODE *), "set_field"); + nf_high_water = num; + } /* fill in fields that don't exist */ for (t = parse_high_water + 1; t < num; t++) fields_arr[t] = Nnull_string; @@ -74,6 +82,7 @@ NODE *dummy; /* not used -- just to make interface same as set_element */ } /* Someone assigned a value to $(something). Fix up $0 to be right */ +static void rebuild_record() { register int tlen; @@ -82,12 +91,10 @@ rebuild_record() char *ops; register char *cops; register NODE **ptr, **maxp; - extern NODE *OFS_node; maxp = 0; tlen = 0; - ofs = force_string(*get_lhs(OFS_node)); - deref = 0; + ofs = force_string(OFS_node->var_value); ptr = &fields_arr[parse_high_water]; while (ptr > &fields_arr[0]) { tmp = force_string(*ptr); @@ -96,18 +103,21 @@ rebuild_record() maxp = ptr; ptr--; } - tlen += ((maxp - fields_arr) - 1) * ofs->stlen; - emalloc(ops, char *, tlen + 1, "fix_fields"); - cops = ops; - for (ptr = &fields_arr[1]; ptr <= maxp; ptr++) { - tmp = force_string(*ptr); - bcopy(tmp->stptr, cops, tmp->stlen); - cops += tmp->stlen; - if (ptr != maxp) { - bcopy(ofs->stptr, cops, ofs->stlen); - cops += ofs->stlen; + if (maxp) { + tlen += ((maxp - fields_arr) - 1) * ofs->stlen; + emalloc(ops, char *, tlen + 1, "fix_fields"); + cops = ops; + for (ptr = &fields_arr[1]; ptr <= maxp; ptr++) { + tmp = force_string(*ptr); + bcopy(tmp->stptr, cops, tmp->stlen); + cops += tmp->stlen; + if (ptr != maxp) { + bcopy(ofs->stptr, cops, ofs->stlen); + cops += ofs->stlen; + } } - } + } else + ops = ""; tmp = make_string(ops, tlen); deref = fields_arr[0]; do_deref(); @@ -145,13 +155,18 @@ inrec() * setup $0, but defer parsing rest of line until reference is made to $(>0) * or to NF. At that point, parse only as much as necessary. */ +void set_record(buf, cnt) char *buf; int cnt; { - char *get_fs(); + register int i; assign_number(&(NF_node->var_value), (AWKNUM) -1); + for (i = 1; i <= parse_high_water; i++) { + deref = fields_arr[i]; + do_deref(); + } parse_high_water = 0; node0_valid = 1; if (buf == line_buf) { @@ -168,8 +183,9 @@ int cnt; } NODE ** -get_field(num) +get_field(num, assign) int num; +int assign; /* this field is on the LHS of an assign */ { int n; @@ -179,13 +195,15 @@ int num; */ if (num == 0 && node0_valid == 0) { /* first, parse remainder of input record */ - (void) parse_fields(HUGE-1, &parse_extent, - fields_arr[0]->stlen - (parse_extent-fields_arr[0]->stptr), + n = parse_fields(HUGE-1, &parse_extent, + node0.stlen - (parse_extent - node0.stptr), save_fs, set_field, (NODE *)NULL); + assign_number(&(NF_node->var_value), (AWKNUM) n); rebuild_record(); - parse_high_water = 0; return &fields_arr[0]; } + if (num > 0 && assign) + node0_valid = 0; if (num <= parse_high_water) /* we have already parsed this field */ return &fields_arr[num]; if (parse_high_water == 0 && num > 0) /* starting at the beginning */ @@ -199,17 +217,25 @@ int num; save_fs, set_field, (NODE *)NULL); if (num == HUGE-1) num = n; - if (n < num) /* requested field number beyond end of record; + if (n < num) { /* requested field number beyond end of record; * set_field will just extend the number of fields, * with empty fields */ set_field(num, f_empty, 0, (NODE *) NULL); + /* + * if this field is onthe LHS of an assignment, then we want to + * set NF to this value, below + */ + if (assign) + n = num; + } /* * if we reached the end of the record, set NF to the number of fields - * actually parsed. Note that num might actually refer to a field that + * so far. Note that num might actually refer to a field that * is beyond the end of the record, but we won't set NF to that value at - * this point, since this may only be a reference to the field and NF - * only gets set if the field is assigned to + * this point, since this is only a reference to the field and NF + * only gets set if the field is assigned to -- in this case n has + * been set to num above */ if (*parse_extent == '\0') assign_number(&(NF_node->var_value), (AWKNUM) n); @@ -230,10 +256,9 @@ int (*set) (); /* routine to set the value of the parsed field */ NODE *n; { char *s = *buf; - char *field; - int field_len; - char *scan; - char *end = s + len; + register char *field; + register char *scan; + register char *end = s + len; int NF = parse_high_water; if (up_to == HUGE) @@ -265,25 +290,19 @@ NODE *n; if (scan >= end) break; } - field_len = 0; field = scan; if (*fs == ' ') - while (*scan != ' ' && *scan != '\t' && scan < end) { + while (*scan != ' ' && *scan != '\t' && scan < end) scan++; - field_len++; - } else { - while (*scan != *fs && scan < end) { + while (*scan != *fs && scan < end) scan++; - field_len++; - } if (scan == end-1 && *scan == *fs) { - (*set)(++NF, field, field_len, n); + (*set)(++NF, field, scan - field, n); field = scan; - field_len = 0; } } - (*set)(++NF, field, field_len, n); + (*set)(++NF, field, scan - field, n); if (scan == end) break; } @@ -291,7 +310,7 @@ NODE *n; return NF; } -int +static int re_split(buf, len, fs, reregs) char *buf, *fs; int len; @@ -301,7 +320,11 @@ struct re_registers *reregs; static RPAT *rp; static char *last_fs = NULL; - if (last_fs != NULL && strcmp(fs, last_fs) != 0) { /* fs has changed */ + if ((last_fs != NULL && !STREQ(fs, last_fs)) + || (rp && ! strict && ((IGNORECASE_node->var_value->numbr != 0) + ^ (rp->translate != NULL)))) + { + /* fs has changed or IGNORECASE has changed */ free(rp->buffer); free(rp->fastmap); free((char *) rp); @@ -316,6 +339,10 @@ struct re_registers *reregs; emalloc(rp->fastmap, char *, 256, "re_split"); emalloc(last_fs, char *, strlen(fs) + 1, "re_split"); (void) strcpy(last_fs, fs); + if (! strict && IGNORECASE_node->var_value->numbr != 0.0) + rp->translate = casetable; + else + rp->translate = NULL; if (re_compile_pattern(fs, strlen(fs), rp) != NULL) fatal("illegal regular expression for FS: `%s'", fs); } @@ -374,12 +401,10 @@ do_getline(tree) NODE *tree; { FILE *save_fp; - FILE *redirect(); int cnt; NODE **lhs; - extern NODE **get_lhs(); extern FILE *input_file; - extern FILE *nextfile(); + int redir_error = 0; if (tree->rnode == NULL && (input_file == NULL || feof(input_file))) { input_file = nextfile(); @@ -388,7 +413,9 @@ NODE *tree; } save_fp = input_file; if (tree->rnode != NULL) { /* with redirection */ - input_file = redirect(tree->rnode); + input_file = redirect(tree->rnode, & redir_error); + if (input_file == NULL && redir_error) + return tmp_number((AWKNUM) -1.0); getline_redirect++; } if (tree->lnode == NULL) { /* read in $0 */ @@ -401,7 +428,7 @@ NODE *tree; char *s = NULL; int n = 0; - lhs = get_lhs(tree->lnode); + lhs = get_lhs(tree->lnode, 1); cnt = get_a_record(&s, &n); if (!getline_redirect) { assign_number(&(NR_node->var_value), @@ -417,18 +444,11 @@ NODE *tree; } *lhs = make_string(s, strlen(s)); free(s); + do_deref(); /* we may have to regenerate $0 here! */ if (field_num == 0) set_record(fields_arr[0]->stptr, fields_arr[0]->stlen); - else if (field_num > 0) { - node0_valid = 0; - if (NF_node->var_value->numbr == -1 && - field_num > NF_node->var_value->numbr) - assign_number(&(NF_node->var_value), - (AWKNUM) field_num); - } field_num = -1; - do_deref(); } getline_redirect = 0; input_file = save_fp; @@ -447,13 +467,12 @@ NODE *deref; */ NODE ** -get_lhs(ptr) +get_lhs(ptr, assign) NODE *ptr; +int assign; /* this is being called for the LHS of an assign. */ { register NODE **aptr; NODE *n; - NODE **assoc_lookup(); - extern NODE *concat_exp(); #ifdef DEBUG if (ptr == NULL) @@ -465,7 +484,7 @@ NODE *ptr; case Node_var: case Node_var_array: if (ptr == NF_node && (int) NF_node->var_value->numbr == -1) - (void) get_field(HUGE-1); /* parse entire record */ + (void) get_field(HUGE-1, assign); /* parse record */ deref = ptr->var_value; #ifdef DEBUG if (deref->type != Node_val) @@ -492,7 +511,7 @@ NODE *ptr; free_result(); if (field_num < 0) fatal("attempt to access field %d", field_num); - aptr = get_field(field_num); + aptr = get_field(field_num, assign); deref = *aptr; return aptr; @@ -516,6 +535,7 @@ NODE *ptr; return 0; } +void do_deref() { if (deref == NULL) @@ -537,16 +557,13 @@ do_deref() if (deref->stref > 0 && deref->stref != 255) deref->stref--; if (deref->stref > 0) { + deref->flags &= ~TEMP; deref = 0; return; } - free((char *)(deref->stptr)); + free(deref->stptr); } - deref->stptr = NULL; - deref->numbr = -1111111.0; - deref->flags = 0; - deref->type = Node_illegal; - free((char *)deref); + freenode(deref); } deref = 0; } @@ -3,8 +3,13 @@ * other pointer) being dereferenced. SUBS is a number or string used as the * subscript. * - * Copyright (C) 1988 Free Software Foundation - * + * $Log: awk8.c,v $ + * Revision 1.7 89/03/24 15:59:22 david + * AHASH becomes NODE + * + * Revision 1.6 89/03/21 10:44:17 david + * minor cleanup + * */ /* @@ -41,13 +46,10 @@ concat_exp(tree) NODE *tree; { NODE *r; - NODE *n; char *s; - unsigned char save; unsigned len; int subseplen; char *subsep; - extern NODE *SUBSEP_node; if (tree->type != Node_expression_list) return force_string(tree_eval(tree)); @@ -81,18 +83,18 @@ assoc_clear(symbol) NODE *symbol; { int i; - AHASH *bucket, *next; + NODE *bucket, *next; if (symbol->var_array == 0) return; for (i = 0; i < ASSOC_HASHSIZE; i++) { for (bucket = symbol->var_array[i]; bucket; bucket = next) { - next = bucket->next; - deref = bucket->name; + next = bucket->ahnext; + deref = bucket->ahname; do_deref(); - deref = bucket->value; + deref = bucket->ahvalue; do_deref(); - free((char *) bucket); + freenode(bucket); } symbol->var_array[i] = 0; } @@ -119,15 +121,15 @@ NODE *subs; /* * locate symbol[subs], given hash of subs and type */ -static AHASH * /* NULL if not found */ +static NODE * /* NULL if not found */ assoc_find(symbol, subs, hash1) NODE *symbol, *subs; int hash1; { - register AHASH *bucket; + register NODE *bucket; - for (bucket = symbol->var_array[hash1]; bucket; bucket = bucket->next) { - if (cmp_nodes(bucket->name, subs)) + for (bucket = symbol->var_array[hash1]; bucket; bucket = bucket->ahnext) { + if (cmp_nodes(bucket->ahname, subs)) continue; return bucket; } @@ -167,13 +169,13 @@ assoc_lookup(symbol, subs) NODE *symbol, *subs; { register int hash1 = 0, i; - register AHASH *bucket; + register NODE *bucket; hash1 = hash_calc(subs); if (symbol->var_array == 0) { /* this table really should grow * dynamically */ - emalloc(symbol->var_array, AHASH **, (sizeof(AHASH *) * + emalloc(symbol->var_array, NODE **, (sizeof(NODE *) * ASSOC_HASHSIZE), "assoc_lookup"); for (i = 0; i < ASSOC_HASHSIZE; i++) symbol->var_array[i] = 0; @@ -182,23 +184,22 @@ NODE *symbol, *subs; bucket = assoc_find(symbol, subs, hash1); if (bucket != NULL) { free_temp(subs); - return &(bucket->value); + return &(bucket->ahvalue); } } - emalloc(bucket, AHASH *, sizeof(AHASH), "assoc_lookup"); - bucket->symbol = symbol; - bucket->name = dupnode(subs); - bucket->value = Nnull_string; - bucket->next = symbol->var_array[hash1]; + bucket = newnode(Node_ahash); + bucket->ahname = dupnode(subs); + bucket->ahvalue = Nnull_string; + bucket->ahnext = symbol->var_array[hash1]; symbol->var_array[hash1] = bucket; - return &(bucket->value); + return &(bucket->ahvalue); } do_delete(symbol, tree) NODE *symbol, *tree; { register int hash1 = 0; - register AHASH *bucket, *last; + register NODE *bucket, *last; NODE *subs; if (symbol->var_array == 0) @@ -207,21 +208,21 @@ NODE *symbol, *tree; hash1 = hash_calc(subs); last = NULL; - for (bucket = symbol->var_array[hash1]; bucket; last = bucket, bucket = bucket->next) - if (cmp_nodes(bucket->name, subs) == 0) + for (bucket = symbol->var_array[hash1]; bucket; last = bucket, bucket = bucket->ahnext) + if (cmp_nodes(bucket->ahname, subs) == 0) break; free_temp(subs); if (bucket == NULL) return; if (last) - last->next = bucket->next; + last->ahnext = bucket->ahnext; else symbol->var_array[hash1] = NULL; - deref = bucket->name; + deref = bucket->ahname; do_deref(); - deref = bucket->value; + deref = bucket->ahvalue; do_deref(); - free((char *) bucket); + freenode(bucket); } struct search * @@ -245,8 +246,8 @@ struct search *lookat; { for (; lookat->numleft; lookat->numleft--) { while (lookat->bucket != 0) { - lookat->retval = lookat->bucket->name; - lookat->bucket = lookat->bucket->next; + lookat->retval = lookat->bucket->ahname; + lookat->bucket = lookat->bucket->ahnext; return lookat; } lookat->bucket = *++(lookat->arr_ptr); @@ -1,8 +1,5 @@ /* * routines for node management - * - * Copyright (C) 1988 Free Software Foundation - * */ /* @@ -29,8 +26,6 @@ AWKNUM r_force_number(n) NODE *n; { - double atof(); - #ifdef DEBUG if (n == NULL) cant_happen(); @@ -75,7 +70,7 @@ NODE *s; s->stlen = strlen(buf); s->stref = 1; emalloc(s->stptr, char *, s->stlen + 1, "force_string"); - memcpy(s->stptr, buf, s->stlen+1); + bcopy(buf, s->stptr, s->stlen+1); return s; } @@ -83,6 +78,7 @@ NODE *s; * This allocates a new node of type ty. Note that this node will not go * away unless freed. */ +#ifdef notdef NODE * newnode(ty) NODETYPE ty; @@ -95,6 +91,13 @@ NODETYPE ty; return r; } +freenode(n) +NODE *n; +{ + free((char *)n); +} +#endif + /* * Duplicate a node. (For global strings, "duplicate" means crank up the * reference count.) This creates global nodes. . . @@ -115,7 +118,7 @@ NODE *n; n->stref++; return n; } - emalloc(r, NODE *, sizeof(NODE), "dupnode"); + r = newnode(Node_illegal); *r = *n; r->flags &= ~(PERM|TEMP); r->flags |= MALLOC; @@ -214,6 +217,12 @@ char *s; break; } break; + case 'a': + if (strict) + goto def; + else + c = BELL; + break; case 'b': c = '\b'; break; @@ -230,9 +239,31 @@ char *s; c = '\t'; break; case 'v': - c = '\v'; + if (strict) + goto def; + else + c = '\v'; + break; + case 'x': + if (strict) + goto def; + else { + register int i; + + c = 0; + while (*pf && isxdigit(*pf)) { + if (isdigit(*pf)) + c += *pf - '0'; + else if (isupper(*pf)) + c += *pf - 'A' + 10; + else + c += *pf - 'a' + 10; + pf++; + } + } break; default: + def: *pt++ = '\\'; break; } @@ -247,7 +278,7 @@ char *s; len = pt - s; } r = newnode(Node_val); - emalloc(r->stptr, char *, len + 1, "make_string"); + emalloc(r->stptr, char *, len + 1, s); r->stlen = len; r->stref = 1; bcopy(s, r->stptr, len); @@ -0,0 +1,44 @@ +/* + * some speciallized memory allocation routines + * + * $Log: awka.c,v $ + * Revision 1.1 89/03/22 21:04:00 david + * Initial revision + * + */ + +#include "awk.h" + +#define NODECHUNK 50 + +NODE *nextfree = NULL; +NODE *lastfree = NULL; + +NODE * +newnode(ty) +NODETYPE ty; +{ + NODE *it; + NODE *np; + + if (nextfree == lastfree) { + emalloc(nextfree, NODE *, NODECHUNK * sizeof(NODE), "newnode"); + for (np = nextfree; np < &nextfree[NODECHUNK - 1]; np++) + np->nextp = np + 1; + np->nextp = lastfree; + lastfree = np; + } + it = nextfree; + nextfree = nextfree->nextp; + it->type = ty; + it->flags = MALLOC; + return it; +} + +freenode(it) +NODE *it; +{ + lastfree->nextp = it; + it->nextp = NULL; + lastfree = it; +} diff --git a/copyleft.c b/copyleft.c new file mode 100644 index 00000000..474beeb1 --- /dev/null +++ b/copyleft.c @@ -0,0 +1,10 @@ +char blurb[] = "\ +%s, Copyright (C) 1989, Free Software Foundation.\n\ +GNU Awk comes with ABSOLUTELY NO WARRANTY. This is free software, and\n\ +you are welcome to distribute it under the terms of the GNU General\n\ +Public License, which covers both the warranty information and the\n\ +terms for redistribution.\n\ +\n\ +You should have received a copy of the GNU General Public License along\n\ +with this program; if not, write to the Free Software Foundation, Inc.,\n\ +675 Mass Ave, Cambridge, MA 02139, USA.\n"; diff --git a/do_free.c b/do_free.c new file mode 100644 index 00000000..38e9605c --- /dev/null +++ b/do_free.c @@ -0,0 +1,5 @@ +do_free(s) +char *s; +{ + free(s); +} @@ -9,9 +9,9 @@ gawk \- pattern scanning and processing language ] [ .B \-D ] [ -.B \-i -] [ .B \-v +] [ +.B \-V ] .. [ @@ -33,9 +33,9 @@ gawk \- pattern scanning and processing language ] [ .B \-D ] [ -.B \-i -] [ .B \-v +] [ +.B \-V ] .. [ @@ -76,13 +76,7 @@ Use for the input field separator (the value of the .B FS predefined -variable). For compatibility with \s-1UNIX\s+1 -.IR awk , -if -.I fs -is ``t'', then -.B FS -will be set to the tab character. +variable). .TP .BI \-f " program-file" Read the AWK program source from the file @@ -130,6 +124,17 @@ type your program, and end it with a .B ^D (control-d). .PP +The environment variable +.B AWKPATH +specifies a search path to use when finding source files named with +the +.B \-f +option. If this variable does not exist, the default path is +\fB".:/usr/lib/awk:/usr/local/lib/awk"\fR. +If a file name given to the +.B \-f +option contains a ``/'' character, no path search is performed. +.PP .I Gawk compiles the program into an internal form, and then proceeds to read @@ -184,6 +189,11 @@ In the special case that .B FS is a single blank, fields are separated by runs of blanks and/or tabs. +Note that the value of +.B IGNORECASE +(see below) will also affect how fields are split when +.B FS +is a regular expression. .PP Each field in the input line may be referenced by its position, .BR $1 , @@ -223,12 +233,12 @@ to be recomputed, with the fields being separated by the value of AWK's built-in variables are: .PP .RS -.TP \l'\fBFILENAME\fR' +.TP \l'\fBIGNORECASE\fR' .B ARGC the number of command line arguments (does not include options to .IR gawk , or the program source). -.TP \l'\fBFILENAME\fR' +.TP \l'\fBIGNORECASE\fR' .B ARGV array of command line arguments. The array is indexed from 0 to @@ -237,7 +247,7 @@ array of command line arguments. The array is indexed from Dynamically changing the contents of .B ARGV can control the files used for data. -.TP \l'\fBFILENAME\fR' +.TP \l'\fBIGNORECASE\fR' .B ENVIRON An array containing the values of the current environment. The array is indexed by the environment variables, each element being @@ -248,36 +258,64 @@ Changing this array does not affect the environment seen by programs which spawns via redirection or the .B system function. -.TP \l'\fBFILENAME\fR' +.TP \l'\fBIGNORECASE\fR' .B FILENAME the name of the current input file. If no files are specified on the command line, the value of .B FILENAME is ``\-''. -.TP \l'\fBFILENAME\fR' +.TP \l'\fBIGNORECASE\fR' .B FNR the input record number in the current input file. -.TP \l'\fBFILENAME\fR' +.TP \l'\fBIGNORECASE\fR' .B FS the input field separator, a blank by default. -.TP \l'\fBFILENAME\fR' +.TP \l'\fBIGNORECASE\fR' +.B IGNORECASE +Controls the case-sensitivity of all regular expression operations. If +.B IGNORECASE +has a non-zero value, then pattern matching in rules, +field splitting with +.BR FS , +regular expression +matching with +.B ~ +and +.BR !~ , +and the +.BR gsub() , +.BR match() , +.BR split() , +and +.B sub() +pre-defined functions will all ignore case when doing regular expression +operations. Thus, if +.B IGNORECASE +is not equal to zero, +.B /aB/ +matches all of the strings \fB"ab"\fP, \fB"aB"\fP, \fB"Ab"\fP, +and \fB"AB"\fP. +As with all AWK variables, the initial value of +.B IGNORECASE +is zero, so all regular expression operations are normally case-sensitive. +.TP \l'\fBIGNORECASE\fR' .B NF the number of fields in the current input record. -.TP \l'\fBFILENAME\fR' +.TP \l'\fBIGNORECASE\fR' .B NR the total number of input records seen so far. -.TP \l'\fBFILENAME\fR' +.TP \l'\fBIGNORECASE\fR' .B OFMT the output format for numbers, .B %.6g by default. -.TP \l'\fBFILENAME\fR' +.TP \l'\fBIGNORECASE\fR' .B OFS the output field separator, a blank by default. -.TP \l'\fBFILENAME\fR' +.TP \l'\fBIGNORECASE\fR' .B ORS the output record separator, by default a newline. -.TP \l'\fBFILENAME\fR' +.TP \l'\fBIGNORECASE\fR' .B RS the input record separator, by default a newline. .B RS @@ -292,17 +330,17 @@ is set to the null string, then the newline character always acts as a field separator, in addition to whatever value .B FS may have. -.TP \l'\fBFILENAME\fR' +.TP \l'\fBIGNORECASE\fR' .B RSTART the index of the first character matched by .BR match() ; 0 if no match. -.TP \l'\fBFILENAME\fR' +.TP \l'\fBIGNORECASE\fR' .B RLENGTH the length of the string matched by .BR match() ; \-1 if no match. -.TP \l'\fBFILENAME\fR' +.TP \l'\fBIGNORECASE\fR' .B SUBSEP the character used to separate multiple subscripts in array elements, by default \fB"\e034"\fR. @@ -740,6 +778,11 @@ functions accept the following conversion specification formats: .TP .B %c An ASCII character. +If the argument used for +.B %c +is numeric, it is treated as a character and printed. +Otherwise, the argument is assumed to be a string, and the only first +character of that string is printed. .TP .B %d A decimal number (the integer part). @@ -803,6 +846,53 @@ However, they may be simulated by using the AWK concatenation operation to build up a format specification dynamically. .PP +When doing I/O redirection from either +.B print +or +.B printf +into a file, +or via +.B getline +from a file, +.I gawk +recognizes certain special filenames internally. These filenames +allow access to open file descriptors inherited from +.IR gawk 's +parent process (usually the shell). The filenames are: +.RS +.TP +.B /dev/stdin +The standard input. +.TP +.B /dev/stdout +The standard output. +.TP +.B /dev/stderr +The standard error output. +.TP +.BI /dev/fd/\^ n +The file denoted by the open file descriptor +.IR n . +.RE +.PP +These are particularly useful for error messages. For example: +.PP +.RS +.ft B +print "You blew it!" > "/dev/stderr" +.ft R +.RE +.PP +whereas you would otherwise have to use +.PP +.RS +.ft B +print "You blew it!" | "cat 1>&2" +.ft R +.RE +.PP +These file names may also be used on the command line to name data files. +.PP AWK has the following pre-defined arithmetic functions: .PP .RS @@ -922,6 +1012,22 @@ If is omitted, the rest of .I s is used. +.TP \l'\fBsprintf(\fIfmt\fB, \fIexpr-list\fB)\fR' +.BI tolower( str ) +returns a copy of the string +.IR str , +with all the upper-case characters in +.I str +translated to their corresponding lower-case counterparts. +Non-alphabetic characters are left unchanged. +.TP \l'\fBsprintf(\fIfmt\fB, \fIexpr-list\fB)\fR' +.BI toupper( str ) +returns a copy of the string +.IR str , +with all the lower-case characters in +.I str +translated to their corresponding upper-case counterparts. +Non-alphabetic characters are left unchanged. .RE .PP String constants in AWK are sequences of characters enclosed @@ -931,6 +1037,9 @@ are recognized, as in C. These are: .PP .RS .TP \l'\fB\e\fIddd\fR' +.B \ea +The ``alert'' character; usually the ASCII BEL character. +.TP \l'\fB\e\fIddd\fR' .B \eb backspace. .TP \l'\fB\e\fIddd\fR' @@ -949,10 +1058,24 @@ horizontal tab. .B \ev vertical tab. .TP \l'\fB\e\fIddd\fR' +.BI \ex "\^hex digits" +The character represented by the string of hexadecimal digits following +the +.BR \ex . +As in ANSI C, all following hexadecimal digits are considered part of +the escape sequence. +(This feature should tell us something about language design by committee.) +E.g., "\ex1B" is the ASCII ESC (escape) character. +.TP \l'\fB\e\fIddd\fR' .BI \e ddd The character represented by the 1-, 2-, or 3-digit sequence of octal digits. E.g. "\e033" is the ASCII ESC (escape) character. .RE +.PP +The escape sequences may also be used inside constant regular expressions +(e.g., +.B "/[\ \et\ef\en\er\ev]/" +matches whitespace characters). .SH FUNCTIONS Functions in AWK are defined as follows: .PP @@ -1064,10 +1187,8 @@ array. .I Gawk has some extensions to System V .IR awk . -They are described in this section. -All features described in this section may change at some time in -the future, or may go away entirely. They can be disabled either by -compiling +They are described in this section. All the extensions described here +can be disabled by compiling .I gawk with .BR \-DSTRICT , @@ -1075,25 +1196,51 @@ or by invoking .I gawk with the name .IR awk . -You should not write programs that depend upon them. -.PP -The environment variable -.B AWKPATH -specifies a search path to use when finding source files named with -the -.B \-f -option. If this variable does not exist, the default path is -\fB".:/usr/lib/awk:/usr/local/lib/awk"\fR. -If a file name given to the -.B \-f -option contains a ``/'' character, no path search is performed. +If the underlying operating system supports the +.B /dev/fd +directory and corresponding files, then +.I gawk +can be compiled with +.B \-DNO_DEV_FD +to disable the special filename processing. .PP -Two new relational operators are defined, -.BR ~~ , +The following features of +.I gawk +are not available in +System V +.IR awk . +.RS +.TP \l'\(bu' +\(bu +The +.BR \ea , +.BR \ev , +or +.B \ex +escape sequences are not recognized. +.TP \l'\(bu' +\(bu +The special file names available for I/O redirection are not recognized. +.TP \l'\(bu' +\(bu +The +.B tolower and -.BR !~~ . -These perform case independent regular expression match and no-match -operations, respectively. +.B toupper +built-in string functions are not available. +.TP \l'\(bu' +\(bu +The +.B IGNORECASE +variable and its side-effects are not available. +.TP \l'\(bu' +\(bu +No path search is performed for files named via the +.B \-f +option. Therefore the +.B AWKPATH +environment variable is not special. +.RE .PP The AWK book does not define the return value of the .B close @@ -1106,8 +1253,25 @@ or .IR pclose (3), when closing a file or pipe, respectively. .PP +When +.I gawk +is invoked as +.IR awk , +if the +.I fs +argument to the +.B \-F +option is ``t'', then +.B FS +will be set to the tab character. +Since this is a rather ugly special case, it is not the default behavior. +.PP +The rest of the features described in this section may change at some time in +the future, or may go away entirely. +You should not write programs that depend upon them. +.PP .I Gawk -accepts the following additional arguments: +accepts the following additional options: .ig .TP .B \-D @@ -1131,18 +1295,6 @@ maintainers, and may not even be compiled into .IR gawk . .. .TP -.B \-i -Ignore case when doing regular expression operations. -This causes -.B ~ -and -.B !~ -to behave like the new operators -.B ~~ -and -.BR !~~ , -described above. -.TP .B \-v Print version information for this particular copy of .I gawk @@ -1152,6 +1304,9 @@ This is useful mainly for knowing if the current copy of on your system is up to date with respect to whatever the Free Software Foundation is distributing. +.TP +.B \-V +Print the GNU copyright information message on the error output. .SH BUGS The .B \-F @@ -1164,12 +1319,13 @@ was designed and implemented by Alfred Aho, Peter Weinberger, and Brian Kernighan of AT&T Bell Labs. Brian Kernighan continues to maintain and enhance it. .PP -Paul Rubin and Jay Fenlason, with John Woods, -all of the Free Software Foundation, wrote +Paul Rubin and Jay Fenlason, +of the Free Software Foundation, wrote .IR gawk , to be compatible with the original version of .I awk distributed in Seventh Edition \s-1UNIX\s+1. +John Woods contributed a number of bug fixes. David Trueman of Dalhousie University, with contributions from Arnold Robbins at Emory University, made .I gawk diff --git a/getopt.c b/getopt.c new file mode 100644 index 00000000..c5eeddae --- /dev/null +++ b/getopt.c @@ -0,0 +1,90 @@ +/* +** @(#)getopt.c 2.5 (smail) 9/15/87 +*/ + +/* + * Here's something you've all been waiting for: the AT&T public domain + * source for getopt(3). It is the code which was given out at the 1985 + * UNIFORUM conference in Dallas. I obtained it by electronic mail + * directly from AT&T. The people there assure me that it is indeed + * in the public domain. + * + * There is no manual page. That is because the one they gave out at + * UNIFORUM was slightly different from the current System V Release 2 + * manual page. The difference apparently involved a note about the + * famous rules 5 and 6, recommending using white space between an option + * and its first argument, and not grouping options that have arguments. + * Getopt itself is currently lenient about both of these things White + * space is allowed, but not mandatory, and the last option in a group can + * have an argument. That particular version of the man page evidently + * has no official existence, and my source at AT&T did not send a copy. + * The current SVR2 man page reflects the actual behavor of this getopt. + * However, I am not about to post a copy of anything licensed by AT&T. + */ + +/* This include is needed only to get "index" defined as "strchr" on Sys V. */ +#include "defs.h" + +/*LINTLIBRARY*/ +#define NULL 0 +#define EOF (-1) +#define ERR(s, c) if(opterr){\ + extern int write();\ + char errbuf[2];\ + errbuf[0] = c; errbuf[1] = '\n';\ + (void) write(2, argv[0], (unsigned)strlen(argv[0]));\ + (void) write(2, s, (unsigned)strlen(s));\ + (void) write(2, errbuf, 2);} + +extern char *index(); + +int opterr = 1; +int optind = 1; +int optopt; +char *optarg; + +int +getopt(argc, argv, opts) +int argc; +char **argv, *opts; +{ + static int sp = 1; + register int c; + register char *cp; + + if(sp == 1) + if(optind >= argc || + argv[optind][0] != '-' || argv[optind][1] == '\0') + return(EOF); + else if(strcmp(argv[optind], "--") == NULL) { + optind++; + return(EOF); + } + optopt = c = argv[optind][sp]; + if(c == ':' || (cp=index(opts, c)) == NULL) { + ERR(": illegal option -- ", c); + if(argv[optind][++sp] == '\0') { + optind++; + sp = 1; + } + return('?'); + } + if(*++cp == ':') { + if(argv[optind][sp+1] != '\0') + optarg = &argv[optind++][sp+1]; + else if(++optind >= argc) { + ERR(": option requires an argument -- ", c); + sp = 1; + return('?'); + } else + optarg = argv[optind++]; + sp = 1; + } else { + if(argv[optind][++sp] == '\0') { + sp = 1; + optind++; + } + optarg = NULL; + } + return(c); +} diff --git a/obstack.h b/obstack.h deleted file mode 100644 index e69de29b..00000000 --- a/obstack.h +++ /dev/null @@ -502,6 +502,16 @@ re_compile_pattern (pattern, size, bufp) while (1) { PATFETCH (c); + + /* If awk, \ escapes a ] when inside [...]. */ + if ((obscure_syntax & RE_AWK_CLASS_HACK) + && c == '\\' && *p == ']') + { + PATFETCH(c1); + b[c1 / BYTEWIDTH] |= 1 << (c1 % BYTEWIDTH); + continue; + } + if (c == ']' && p != p1 + 1) break; if (*p == '-' && p[1] != ']') { @@ -143,8 +143,14 @@ what you give them. Help stamp out software-hoarding! */ *, +, ? - only special when not after the beginning, (, or | */ #define RE_CONTEXT_INDEP_OPS 32 +/* 0 means that \ before a ] inside [ and ] is taken as a real \. + 1 means that such a \ escapes the following ]. This is a + special case for AWK. Other \ inside [ ] seem to work ok. */ +#define RE_AWK_CLASS_HACK 64 + /* Now define combinations of bits for the standard possibilities. */ -#define RE_SYNTAX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_CONTEXT_INDEP_OPS) +#define RE_SYNTAX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR \ + | RE_CONTEXT_INDEP_OPS | RE_AWK_CLASS_HACK) #define RE_SYNTAX_EGREP (RE_SYNTAX_AWK | RE_NEWLINE_OR) #define RE_SYNTAX_GREP (RE_BK_PLUS_QM | RE_NEWLINE_OR) #define RE_SYNTAX_EMACS 0 @@ -1,4 +1,4 @@ -char *version_string = "@(#)Gnu Awk (gawk) 2.02beta 23 Dec 1988\n" + 4; +char *version_string = "@(#)Gnu Awk (gawk) 2.03beta 23 Mar 1989\n" + 4; /* 1.02 fixed /= += *= etc to return the new Left Hand Side instead of the Right Hand Side */ @@ -16,10 +16,13 @@ char *version_string = "@(#)Gnu Awk (gawk) 2.02beta 23 Dec 1988\n" + 4; particular in memory management which is currently almost non-existent. */ - /* JF: Modified to compile under GCC, and fixed a few - bugs while I was at it. I hope I didn't add any more. - I modified parse.y to reduce the number of reduce/reduce - conflicts. There are still a few left. */ +/* 2.01 JF: Modified to compile under GCC, and fixed a few + bugs while I was at it. I hope I didn't add any more. + I modified parse.y to reduce the number of reduce/reduce + conflicts. There are still a few left. */ /* 2.02 Fixed JF's bugs; improved memory management, still needs lots of work. */ + +/* 2.03 Major grammar rework and lots of bug fixes from David, + a number of minor bug fixes and new features from Arnold. */ |