diff options
Diffstat (limited to 'main.c')
-rw-r--r-- | main.c | 1047 |
1 files changed, 630 insertions, 417 deletions
@@ -3,7 +3,7 @@ */ /* - * Copyright (C) 1986, 1988, 1989, 1991-2011 the Free Software Foundation, Inc. + * Copyright (C) 1986, 1988, 1989, 1991-2014 the Free Software Foundation, Inc. * * This file is part of GAWK, the GNU implementation of the * AWK Programming Language. @@ -24,7 +24,7 @@ */ /* FIX THIS BEFORE EVERY RELEASE: */ -#define UPDATE_YEAR 2011 +#define UPDATE_YEAR 2014 #include "awk.h" #include "getopt.h" @@ -33,8 +33,20 @@ #include <mcheck.h> #endif +#ifdef HAVE_LIBSIGSEGV +#include <sigsegv.h> +#else +typedef void *stackoverflow_context_t; +/* the argument to this macro is purposely not used */ +#define sigsegv_install_handler(catchsegv) signal(SIGSEGV, catchsig) +/* define as 0 rather than empty so that (void) cast on it works */ +#define stackoverflow_install_handler(catchstackoverflow, extra_stack, STACK_SIZE) 0 +#endif + #define DEFAULT_PROFILE "awkprof.out" /* where to put profile */ #define DEFAULT_VARFILE "awkvars.out" /* where to put vars */ +#define DEFAULT_PREC 53 +#define DEFAULT_ROUNDMODE "N" /* round to nearest */ static const char *varfile = DEFAULT_VARFILE; const char *command_file = NULL; /* debugger commands */ @@ -55,19 +67,20 @@ static void nostalgia(void) ATTRIBUTE_NORETURN; static void version(void) ATTRIBUTE_NORETURN; static void init_fds(void); static void init_groupset(void); - static void save_argv(int, char **); +extern int debug_prog(INSTRUCTION *pc); /* debug.c */ +extern int init_debug(); /* debug.c */ + /* These nodes store all the special variables AWK uses */ NODE *ARGC_node, *ARGIND_node, *ARGV_node, *BINMODE_node, *CONVFMT_node; NODE *ENVIRON_node, *ERRNO_node, *FIELDWIDTHS_node, *FILENAME_node; NODE *FNR_node, *FPAT_node, *FS_node, *IGNORECASE_node, *LINT_node; NODE *NF_node, *NR_node, *OFMT_node, *OFS_node, *ORS_node, *PROCINFO_node; NODE *RLENGTH_node, *RSTART_node, *RS_node, *RT_node, *SUBSEP_node; +NODE *PREC_node, *ROUNDMODE_node; NODE *TEXTDOMAIN_node; -NODE *_r; /* used as temporary in stack macros */ - long NF; long NR; long FNR; @@ -81,7 +94,7 @@ char *TEXTDOMAIN; /* * CONVFMT is a convenience pointer for the current number to string format. * We must supply an initial value to avoid recursion problems of - * set_CONVFMT -> fmt_index -> r_force_string: gets NULL CONVFMT + * set_CONVFMT -> fmt_index -> force_string: gets NULL CONVFMT * Fun, fun, fun, fun. */ char *CONVFMT = "%.6g"; @@ -125,23 +138,27 @@ struct pre_assign { static struct pre_assign *preassigns = NULL; /* requested via -v or -F */ static long numassigns = -1; /* how many of them */ -static int disallow_var_assigns = FALSE; /* true for --exec */ +static bool disallow_var_assigns = false; /* true for --exec */ static void add_preassign(enum assign_type type, char *val); -int do_flags = FALSE; -int do_optimize = TRUE; /* apply default optimizations */ -static int do_nostalgia = FALSE; /* provide a blast from the past */ -static int do_binary = FALSE; /* hands off my data! */ +static void parse_args(int argc, char **argv); +static void set_locale_stuff(void); +static bool stopped_early = false; + +int do_flags = false; +bool do_optimize = false; /* apply default optimizations */ +static int do_nostalgia = false; /* provide a blast from the past */ +static int do_binary = false; /* hands off my data! */ +static int do_version = false; /* print version info */ +static const char *locale = ""; /* default value to setlocale */ -int use_lc_numeric = FALSE; /* obey locale for decimal point */ +int use_lc_numeric = false; /* obey locale for decimal point */ -#if MBS_SUPPORT int gawk_mb_cur_max; /* MB_CUR_MAX value, see comment in main() */ -#endif -FILE *output_fp; /* default output for debugger */ -int output_is_tty = FALSE; /* control flushing of output */ +FILE *output_fp; /* default gawk output, can be redirected in the debugger */ +bool output_is_tty = false; /* control flushing of output */ /* default format for strftime(), available via PROCINFO */ const char def_strftime_format[] = "%a %b %e %H:%M:%S %Z %Y"; @@ -153,60 +170,55 @@ GETGROUPS_T *groupset; /* current group set */ int ngroups; /* size of said set */ #endif -void (*lintfunc)(const char *mesg, ...) = warning; +void (*lintfunc)(const char *mesg, ...) = r_warning; -/* - * Note: reserve -D for future use, to merge dgawk into gawk. - * Note: reserve -l for future use, for xgawk's -l option. - */ +/* Sorted by long option name! */ static const struct option optab[] = { - { "traditional", no_argument, NULL, 'c' }, - { "lint", optional_argument, NULL, 'L' }, + { "assign", required_argument, NULL, 'v' }, + { "bignum", no_argument, NULL, 'M' }, + { "characters-as-bytes", no_argument, & do_binary, 'b' }, + { "copyright", no_argument, NULL, 'C' }, + { "debug", optional_argument, NULL, 'D' }, + { "dump-variables", optional_argument, NULL, 'd' }, + { "exec", required_argument, NULL, 'E' }, + { "field-separator", required_argument, NULL, 'F' }, + { "file", required_argument, NULL, 'f' }, + { "gen-pot", no_argument, NULL, 'g' }, + { "help", no_argument, NULL, 'h' }, + { "include", required_argument, NULL, 'i' }, + { "lint", optional_argument, NULL, 'L' }, { "lint-old", no_argument, NULL, 't' }, + { "load", required_argument, NULL, 'l' }, +#if defined(LOCALEDEBUG) + { "locale", required_argument, NULL, 'Z' }, +#endif + { "non-decimal-data", no_argument, NULL, 'n' }, + { "nostalgia", no_argument, & do_nostalgia, 1 }, { "optimize", no_argument, NULL, 'O' }, +#if defined(YYDEBUG) || defined(GAWKDEBUG) + { "parsedebug", no_argument, NULL, 'Y' }, +#endif { "posix", no_argument, NULL, 'P' }, - { "command", required_argument, NULL, 'R' }, - { "nostalgia", no_argument, & do_nostalgia, 1 }, - { "gen-pot", no_argument, NULL, 'g' }, - { "non-decimal-data", no_argument, NULL, 'n' }, - { "profile", optional_argument, NULL, 'p' }, - { "copyright", no_argument, NULL, 'C' }, - { "field-separator", required_argument, NULL, 'F' }, - { "file", required_argument, NULL, 'f' }, + { "pretty-print", optional_argument, NULL, 'o' }, + { "profile", optional_argument, NULL, 'p' }, { "re-interval", no_argument, NULL, 'r' }, - { "source", required_argument, NULL, 'e' }, - { "dump-variables", optional_argument, NULL, 'd' }, - { "assign", required_argument, NULL, 'v' }, - { "version", no_argument, NULL, 'V' }, - { "help", no_argument, NULL, 'h' }, - { "exec", required_argument, NULL, 'E' }, - { "use-lc-numeric", no_argument, & use_lc_numeric, 1 }, - { "characters-as-bytes", no_argument, & do_binary, 'b' }, { "sandbox", no_argument, NULL, 'S' }, -#if defined(YYDEBUG) || defined(GAWKDEBUG) - { "parsedebug", no_argument, NULL, 'Y' }, -#endif + { "source", required_argument, NULL, 'e' }, + { "traditional", no_argument, NULL, 'c' }, + { "use-lc-numeric", no_argument, & use_lc_numeric, 1 }, + { "version", no_argument, & do_version, 'V' }, { NULL, 0, NULL, '\0' } }; - /* main --- process args, parse program, run it, clean up */ int main(int argc, char **argv) { - /* - * The + on the front tells GNU getopt not to rearrange argv. - * Note: reserve -D for future use, to merge dgawk into gawk. - * Note: reserve -l for future use, for xgawk's -l option. - */ - const char *optlist = "+F:f:v:W;m:bcCd::e:E:gh:L:nNOp::PrR:StVY"; - int stopped_early = FALSE; - int old_optind; int i; - int c; - char *scan, *src; char *extra_stack; + int have_srcfile = 0; + SRCFILE *s; /* do these checks early */ if (getenv("TIDYMEM") != NULL) @@ -219,49 +231,11 @@ main(int argc, char **argv) #endif /* HAVE_MTRACE */ #endif /* HAVE_MCHECK_H */ -#if defined(LC_CTYPE) - setlocale(LC_CTYPE, ""); -#endif -#if defined(LC_COLLATE) - setlocale(LC_COLLATE, ""); -#endif -#if defined(LC_MESSAGES) - setlocale(LC_MESSAGES, ""); -#endif -#if defined(LC_NUMERIC) && defined(HAVE_LOCALE_H) - /* - * Force the issue here. According to POSIX 2001, decimal - * point is used for parsing source code and for command-line - * assignments and the locale value for processing input, - * number to string conversion, and printing output. - * - * 10/2005 --- see below also; we now only use the locale's - * decimal point if do_posix in effect. - * - * 9/2007: - * This is a mess. We need to get the locale's numeric info for - * the thousands separator for the %'d flag. - */ - setlocale(LC_NUMERIC, ""); - init_locale(& loc); - setlocale(LC_NUMERIC, "C"); -#endif -#if defined(LC_TIME) - setlocale(LC_TIME, ""); -#endif - -#if MBS_SUPPORT - /* - * In glibc, MB_CUR_MAX is actually a function. This value is - * tested *a lot* in many speed-critical places in gawk. Caching - * this value once makes a speed difference. - */ - gawk_mb_cur_max = MB_CUR_MAX; - /* Without MBS_SUPPORT, gawk_mb_cur_max is 1. */ + myname = gawk_name(argv[0]); + os_arg_fixup(&argc, &argv); /* emulate redirection, expand wildcards */ - /* init the cache for checking bytes if they're characters */ - init_btowc_cache(); -#endif + if (argc < 2) + usage(EXIT_FAILURE, stderr); (void) bindtextdomain(PACKAGE, LOCALEDIR); (void) textdomain(PACKAGE); @@ -270,6 +244,22 @@ main(int argc, char **argv) #ifdef SIGBUS (void) signal(SIGBUS, catchsig); #endif +#ifdef SIGPIPE + /* + * Ignore SIGPIPE so that writes to pipes that fail don't + * kill the process but instead return -1 and set errno. + * That lets us print a fatal message instead of dieing suddenly. + * + * Note that this requires ignoring EPIPE when writing and + * flushing stdout/stderr in other parts of the program. E.g., + * + * gawk 'BEGIN { print "hi" }' | exit + * + * should not give us "broken pipe" messages --- mainly because + * it did not do so in the past and people would complain. + */ + signal(SIGPIPE, SIG_IGN); +#endif (void) sigsegv_install_handler(catchsegv); #define STACK_SIZE (16*1024) @@ -277,11 +267,8 @@ main(int argc, char **argv) (void) stackoverflow_install_handler(catchstackoverflow, extra_stack, STACK_SIZE); #undef STACK_SIZE - myname = gawk_name(argv[0]); - os_arg_fixup(&argc, &argv); /* emulate redirection, expand wildcards */ - - if (argc < 2) - usage(EXIT_FAILURE, stderr); + /* initialize the null string */ + Nnull_string = make_string("", 0); /* Robustness: check that file descriptors 0, 1, 2 are open */ init_fds(); @@ -289,226 +276,27 @@ main(int argc, char **argv) /* init array handling. */ array_init(); - /* we do error messages ourselves on invalid options */ - opterr = FALSE; + /* init the symbol tables */ + init_symbol_table(); - /* copy argv before getopt gets to it; used to restart the debugger */ - save_argv(argc, argv); + output_fp = stdout; /* initialize global (main) execution context */ push_context(new_context()); - /* option processing. ready, set, go! */ - for (optopt = 0, old_optind = 1; - (c = getopt_long(argc, argv, optlist, optab, NULL)) != EOF; - optopt = 0, old_optind = optind) { - if (do_posix) - opterr = TRUE; - - switch (c) { - case 'F': - add_preassign(PRE_ASSIGN_FS, optarg); - break; - - case 'E': - disallow_var_assigns = TRUE; - /* fall through */ - case 'f': - /* - * Allow multiple -f options. - * This makes function libraries real easy. - * Most of the magic is in the scanner. - * - * The following is to allow for whitespace at the end - * of a #! /bin/gawk line in an executable file - */ - scan = optarg; - if (argv[optind-1] != optarg) - while (isspace((unsigned char) *scan)) - scan++; - src = (*scan == '\0' ? argv[optind++] : optarg); - (void) add_srcfile((src && src[0] == '-' && src[1] == '\0') ? - SRC_STDIN : SRC_FILE, - src, srcfiles, NULL, NULL); - - break; - - case 'v': - add_preassign(PRE_ASSIGN, optarg); - break; - - case 'm': - /* - * BWK awk extension. - * -mf nnn set # fields, gawk ignores - * -mr nnn set record length, ditto - * - * As of at least 10/2007, BWK awk also ignores it. - */ - if (do_lint) - lintwarn(_("`-m[fr]' option irrelevant in gawk")); - if (optarg[0] != 'r' && optarg[0] != 'f') - warning(_("-m option usage: `-m[fr] nnn'")); - break; - - case 'b': - do_binary = TRUE; - break; - - case 'c': - do_flags |= DO_TRADITIONAL; - break; - - case 'C': - copyleft(); - break; + parse_args(argc, argv); - case 'd': - do_flags |= DO_DUMP_VARS; - if (optarg != NULL && optarg[0] != '\0') - varfile = optarg; - break; - - case 'e': - if (optarg[0] == '\0') - warning(_("empty argument to `-e/--source' ignored")); - else - (void) add_srcfile(SRC_CMDLINE, optarg, srcfiles, NULL, NULL); - break; + set_locale_stuff(); - case 'g': - do_flags |= DO_INTL; - break; - - case 'h': - /* write usage to stdout, per GNU coding stds */ - usage(EXIT_SUCCESS, stdout); - break; - - case 'L': -#ifndef NO_LINT - do_flags |= DO_LINT_ALL; - if (optarg != NULL) { - if (strcmp(optarg, "fatal") == 0) - lintfunc = r_fatal; - else if (strcmp(optarg, "invalid") == 0) { - do_flags &= ~DO_LINT_ALL; - do_flags |= DO_LINT_INVALID; - } - } - break; - - case 't': - do_flags |= DO_LINT_OLD; - break; -#else - case 'L': - case 't': - break; -#endif - - case 'n': - do_flags |= DO_NON_DEC_DATA; - break; - - case 'N': - use_lc_numeric = TRUE; - break; - - case 'O': - do_optimize++; - break; - - case 'p': - do_flags |= DO_PROFILING; - if (optarg != NULL) - set_prof_file(optarg); - else - set_prof_file(DEFAULT_PROFILE); - break; - - case 'P': - do_flags |= DO_POSIX; - break; - - case 'r': - do_flags |= DO_INTERVALS; - break; - - case 'S': - do_flags |= DO_SANDBOX; - break; - - case 'V': - version(); - break; - - case 'W': /* gawk specific options - now in getopt_long */ - fprintf(stderr, _("%s: option `-W %s' unrecognized, ignored\n"), - argv[0], optarg); - break; - - case 0: - /* - * getopt_long found an option that sets a variable - * instead of returning a letter. Do nothing, just - * cycle around for the next one. - */ - break; - - case 'Y': - case 'R': -#if defined(YYDEBUG) || defined(GAWKDEBUG) - if (c == 'Y') { - yydebug = 2; - break; - } -#endif - if (c == 'R' && which_gawk == exe_debugging) { - if (optarg[0] != '\0') - command_file = optarg; - break; - } - /* if not debugging or dgawk, fall through */ + /* + * In glibc, MB_CUR_MAX is actually a function. This value is + * tested *a lot* in many speed-critical places in gawk. Caching + * this value once makes a speed difference. + */ + gawk_mb_cur_max = MB_CUR_MAX; - case '?': - default: - /* - * If not posix, an unrecognized option stops argument - * processing so that it can go into ARGV for the awk - * program to see. This makes use of ``#! /bin/gawk -f'' - * easier. - * - * However, it's never simple. If optopt is set, - * an option that requires an argument didn't get the - * argument. We care because if opterr is 0, then - * getopt_long won't print the error message for us. - */ - if (! do_posix - && (optopt == '\0' || strchr(optlist, optopt) == NULL)) { - /* - * can't just do optind--. In case of an - * option with >= 2 letters, getopt_long - * won't have incremented optind. - */ - optind = old_optind; - stopped_early = TRUE; - goto out; - } else if (optopt != '\0') { - /* Use POSIX required message format */ - fprintf(stderr, - _("%s: option requires an argument -- %c\n"), - myname, optopt); - usage(EXIT_FAILURE, stderr); - } - /* else - let getopt print error message for us */ - break; - } - if (c == 'E') /* --exec ends option processing */ - break; - } -out: + /* init the cache for checking bytes if they're characters */ + init_btowc_cache(); if (do_nostalgia) nostalgia(); @@ -522,7 +310,7 @@ out: } if (do_posix) { - use_lc_numeric = TRUE; + use_lc_numeric = true; if (do_traditional) /* both on command line */ warning(_("`--posix' overrides `--traditional'")); else @@ -541,29 +329,38 @@ out: if (do_lint && os_is_setuid()) warning(_("running %s setuid root may be a security problem"), myname); -#if MBS_SUPPORT if (do_binary) { if (do_posix) - warning(_("`--posix' overrides `--binary'")); + warning(_("`--posix' overrides `--characters-as-bytes'")); else gawk_mb_cur_max = 1; /* hands off my data! */ - } +#if defined(LC_ALL) + setlocale(LC_ALL, "C"); #endif + } - /* - * Force profiling if this is pgawk. - * Don't bother if the command line already set profiling up. - */ - if (! do_profiling) - init_profiling(& do_flags, DEFAULT_PROFILE); + if (do_debug) /* Need to register the debugger pre-exec hook before any other */ + init_debug(); + +#ifdef HAVE_MPFR + /* Set up MPFR defaults, and register pre-exec hook to process arithmetic opcodes */ + if (do_mpfr) + init_mpfr(DEFAULT_PREC, DEFAULT_ROUNDMODE); +#endif /* load group set */ init_groupset(); - /* initialize the null string */ - Nnull_string = make_string("", 0); - Nnull_string->numbr = 0.0; - Nnull_string->flags = (MALLOC|STRCUR|STRING|NUMCUR|NUMBER); +#ifdef HAVE_MPFR + if (do_mpfr) { + mpz_init(Nnull_string->mpg_i); + Nnull_string->flags = (MALLOC|STRCUR|STRING|MPZN|NUMCUR|NUMBER); + } else +#endif + { + Nnull_string->numbr = 0.0; + Nnull_string->flags = (MALLOC|STRCUR|STRING|NUMCUR|NUMBER); + } /* * Tell the regex routines how they should work. @@ -572,8 +369,6 @@ out: */ resetup(); - (void) grow_stack(); - /* Set up the special variables */ init_vars(); @@ -583,7 +378,7 @@ out: /* Now process the pre-assignments */ for (i = 0; i <= numassigns; i++) { if (preassigns[i].type == PRE_ASSIGN) - (void) arg_assign(preassigns[i].val, TRUE); + (void) arg_assign(preassigns[i].val, true); else /* PRE_ASSIGN_FS */ cmdline_fs(preassigns[i].val); efree(preassigns[i].val); @@ -592,10 +387,10 @@ out: if (preassigns != NULL) efree(preassigns); - if ((BINMODE & 1) != 0) + if ((BINMODE & BINMODE_INPUT) != 0) if (os_setbinmode(fileno(stdin), O_BINARY) == -1) fatal(_("can't set binary mode on stdin (%s)"), strerror(errno)); - if ((BINMODE & 2) != 0) { + if ((BINMODE & BINMODE_OUTPUT) != 0) { if (os_setbinmode(fileno(stdout), O_BINARY) == -1) fatal(_("can't set binary mode on stdout (%s)"), strerror(errno)); if (os_setbinmode(fileno(stderr), O_BINARY) == -1) @@ -606,15 +401,34 @@ out: setbuf(stdout, (char *) NULL); /* make debugging easier */ #endif if (os_isatty(fileno(stdout))) - output_is_tty = TRUE; + output_is_tty = true; + + /* initialize API before loading extension libraries */ + init_ext_api(); + + /* load extension libs */ + for (s = srcfiles->next; s != srcfiles; s = s->next) { + if (s->stype == SRC_EXTLIB) + load_ext(s->fullpath); + else if (s->stype != SRC_INC) + have_srcfile++; + } + + /* do version check after extensions are loaded to get extension info */ + if (do_version) + version(); + /* No -f or --source options, use next arg */ - if (srcfiles->next == srcfiles) { + if (! have_srcfile) { if (optind > argc - 1 || stopped_early) /* no args left or no program */ usage(EXIT_FAILURE, stderr); (void) add_srcfile(SRC_CMDLINE, argv[optind], srcfiles, NULL, NULL); optind++; } + /* Select the interpreter routine */ + init_interpret(); + init_args(optind, argc, do_posix ? argv[0] : myname, argv); @@ -628,19 +442,24 @@ out: setlocale(LC_NUMERIC, "C"); #endif /* Read in the program */ - if (parse_program(&code_block) != 0) + if (parse_program(& code_block) != 0) exit(EXIT_FAILURE); if (do_intl) exit(EXIT_SUCCESS); + install_builtins(); + if (do_lint) shadow_funcs(); if (do_lint && code_block->nexti->opcode == Op_atexit) lintwarn(_("no program text at all!")); - init_profiling_signals(); + load_symbols(); + + if (do_profile) + init_profiling_signals(); #if defined(LC_NUMERIC) /* @@ -659,13 +478,20 @@ out: * data using the local decimal point. */ if (use_lc_numeric) - setlocale(LC_NUMERIC, ""); + setlocale(LC_NUMERIC, locale); #endif + init_io(); output_fp = stdout; - interpret(code_block); - if (do_profiling) { + if (do_debug) + debug_prog(code_block); + else if (do_pretty_print && ! do_profile) + ; /* run pretty printer only. */ + else + interpret(code_block); + + if (do_pretty_print) { dump_prog(code_block); dump_funcs(); } @@ -673,6 +499,11 @@ out: if (do_dump_vars) dump_vars(varfile); +#ifdef HAVE_MPFR + if (do_mpfr) + cleanup_mpfr(); +#endif + if (do_tidy_mem) release_all_vars(); @@ -680,7 +511,7 @@ out: if (extra_stack) efree(extra_stack); - exit(exit_val); /* more portable */ + final_exit(exit_val); return exit_val; /* to suppress warnings */ } @@ -718,8 +549,7 @@ usage(int exitval, FILE *fp) /* Not factoring out common stuff makes it easier to translate. */ fprintf(fp, _("Usage: %s [POSIX or GNU style options] -f progfile [--] file ...\n"), myname); - if (which_gawk != exe_debugging) - fprintf(fp, _("Usage: %s [POSIX or GNU style options] [--] %cprogram%c file ...\n"), + fprintf(fp, _("Usage: %s [POSIX or GNU style options] [--] %cprogram%c file ...\n"), myname, quote, quote); /* GNU long options info. This is too many options. */ @@ -733,19 +563,22 @@ usage(int exitval, FILE *fp) fputs(_("\t-c\t\t\t--traditional\n"), fp); fputs(_("\t-C\t\t\t--copyright\n"), fp); fputs(_("\t-d[file]\t\t--dump-variables[=file]\n"), fp); + fputs(_("\t-D[file]\t\t--debug[=file]\n"), fp); fputs(_("\t-e 'program-text'\t--source='program-text'\n"), fp); fputs(_("\t-E file\t\t\t--exec=file\n"), fp); fputs(_("\t-g\t\t\t--gen-pot\n"), fp); fputs(_("\t-h\t\t\t--help\n"), fp); - fputs(_("\t-L [fatal]\t\t--lint[=fatal]\n"), fp); - fputs(_("\t-n\t\t\t--non-decimal-data\n"), fp); + fputs(_("\t-i includefile\t\t--include=includefile\n"), fp); + fputs(_("\t-l library\t\t--load=library\n"), fp); + fputs(_("\t-L[fatal|invalid]\t--lint[=fatal|invalid]\n"), fp); + fputs(_("\t-M\t\t\t--bignum\n"), fp); fputs(_("\t-N\t\t\t--use-lc-numeric\n"), fp); + fputs(_("\t-n\t\t\t--non-decimal-data\n"), fp); + fputs(_("\t-o[file]\t\t--pretty-print[=file]\n"), fp); fputs(_("\t-O\t\t\t--optimize\n"), fp); fputs(_("\t-p[file]\t\t--profile[=file]\n"), fp); fputs(_("\t-P\t\t\t--posix\n"), fp); fputs(_("\t-r\t\t\t--re-interval\n"), fp); - if (which_gawk == exe_debugging) - fputs(_("\t-R file\t\t\t--command=file\n"), fp); fputs(_("\t-S\t\t\t--sandbox\n"), fp); fputs(_("\t-t\t\t\t--lint-old\n"), fp); fputs(_("\t-V\t\t\t--version\n"), fp); @@ -776,8 +609,13 @@ By default it reads standard input and writes standard output.\n\n"), fp); fflush(fp); if (ferror(fp)) { - if (fp == stdout) - warning(_("error writing standard output (%s)"), strerror(errno)); + /* don't warn about stdout/stderr if EPIPE, but do error exit */ + if (errno != EPIPE) { + if (fp == stdout) + warning(_("error writing standard output (%s)"), strerror(errno)); + else if (fp == stderr) + warning(_("error writing standard error (%s)"), strerror(errno)); + } exit(EXIT_FAILURE); } @@ -814,7 +652,9 @@ along with this program. If not, see http://www.gnu.org/licenses/.\n"); fflush(stdout); if (ferror(stdout)) { - warning(_("error writing standard output (%s)"), strerror(errno)); + /* don't warn about stdout if EPIPE, but do error exit */ + if (errno != EPIPE) + warning(_("error writing standard output (%s)"), strerror(errno)); exit(EXIT_FAILURE); } @@ -893,40 +733,43 @@ struct varinit { AWKNUM numval; Func_ptr update; Func_ptr assign; - int do_assign; + bool do_assign; int flags; #define NO_INSTALL 0x01 #define NON_STANDARD 0x02 +#define NOT_OFF_LIMITS 0x04 /* may be accessed by extension function */ }; static const struct varinit varinit[] = { -{NULL, "ARGC", NULL, 0, NULL, NULL, FALSE, NO_INSTALL }, -{&ARGIND_node, "ARGIND", NULL, 0, NULL, NULL, FALSE, NON_STANDARD }, -{NULL, "ARGV", NULL, 0, NULL, NULL, FALSE, NO_INSTALL }, -{&BINMODE_node, "BINMODE", NULL, 0, NULL, set_BINMODE, FALSE, NON_STANDARD }, -{&CONVFMT_node, "CONVFMT", "%.6g", 0, NULL, set_CONVFMT,TRUE, 0 }, -{NULL, "ENVIRON", NULL, 0, NULL, NULL, FALSE, NO_INSTALL }, -{&ERRNO_node, "ERRNO", "", 0, NULL, NULL, FALSE, NON_STANDARD }, -{&FIELDWIDTHS_node, "FIELDWIDTHS", "", 0, NULL, set_FIELDWIDTHS, FALSE, NON_STANDARD }, -{&FILENAME_node, "FILENAME", "", 0, NULL, NULL, FALSE, 0 }, -{&FNR_node, "FNR", NULL, 0, update_FNR, set_FNR, TRUE, 0 }, -{&FS_node, "FS", " ", 0, NULL, set_FS, FALSE, 0 }, -{&FPAT_node, "FPAT", "[^[:space:]]+", 0, NULL, set_FPAT, FALSE, NON_STANDARD }, -{&IGNORECASE_node, "IGNORECASE", NULL, 0, NULL, set_IGNORECASE, FALSE, NON_STANDARD }, -{&LINT_node, "LINT", NULL, 0, NULL, set_LINT, FALSE, NON_STANDARD }, -{&NF_node, "NF", NULL, -1, update_NF, set_NF, FALSE, 0 }, -{&NR_node, "NR", NULL, 0, update_NR, set_NR, TRUE, 0 }, -{&OFMT_node, "OFMT", "%.6g", 0, NULL, set_OFMT, TRUE, 0 }, -{&OFS_node, "OFS", " ", 0, NULL, set_OFS, TRUE, 0 }, -{&ORS_node, "ORS", "\n", 0, NULL, set_ORS, TRUE, 0 }, -{NULL, "PROCINFO", NULL, 0, NULL, NULL, FALSE, NO_INSTALL | NON_STANDARD }, -{&RLENGTH_node, "RLENGTH", NULL, 0, NULL, NULL, FALSE, 0 }, -{&RS_node, "RS", "\n", 0, NULL, set_RS, TRUE, 0 }, -{&RSTART_node, "RSTART", NULL, 0, NULL, NULL, FALSE, 0 }, -{&RT_node, "RT", "", 0, NULL, NULL, FALSE, NON_STANDARD }, -{&SUBSEP_node, "SUBSEP", "\034", 0, NULL, set_SUBSEP, TRUE, 0 }, -{&TEXTDOMAIN_node, "TEXTDOMAIN", "messages", 0, NULL, set_TEXTDOMAIN, TRUE, NON_STANDARD }, -{0, NULL, NULL, 0, NULL, NULL, FALSE, 0 }, +{NULL, "ARGC", NULL, 0, NULL, NULL, false, NO_INSTALL }, +{&ARGIND_node, "ARGIND", NULL, 0, NULL, NULL, false, NON_STANDARD }, +{NULL, "ARGV", NULL, 0, NULL, NULL, false, NO_INSTALL }, +{&BINMODE_node, "BINMODE", NULL, 0, NULL, set_BINMODE, false, NON_STANDARD }, +{&CONVFMT_node, "CONVFMT", "%.6g", 0, NULL, set_CONVFMT,true, 0 }, +{NULL, "ENVIRON", NULL, 0, NULL, NULL, false, NO_INSTALL }, +{&ERRNO_node, "ERRNO", "", 0, NULL, NULL, false, NON_STANDARD }, +{&FIELDWIDTHS_node, "FIELDWIDTHS", "", 0, NULL, set_FIELDWIDTHS, false, NON_STANDARD }, +{&FILENAME_node, "FILENAME", "", 0, NULL, NULL, false, 0 }, +{&FNR_node, "FNR", NULL, 0, update_FNR, set_FNR, true, 0 }, +{&FS_node, "FS", " ", 0, NULL, set_FS, false, 0 }, +{&FPAT_node, "FPAT", "[^[:space:]]+", 0, NULL, set_FPAT, false, NON_STANDARD }, +{&IGNORECASE_node, "IGNORECASE", NULL, 0, NULL, set_IGNORECASE, false, NON_STANDARD }, +{&LINT_node, "LINT", NULL, 0, NULL, set_LINT, false, NON_STANDARD }, +{&PREC_node, "PREC", NULL, DEFAULT_PREC, NULL, set_PREC, false, NON_STANDARD}, +{&NF_node, "NF", NULL, -1, update_NF, set_NF, false, 0 }, +{&NR_node, "NR", NULL, 0, update_NR, set_NR, true, 0 }, +{&OFMT_node, "OFMT", "%.6g", 0, NULL, set_OFMT, true, 0 }, +{&OFS_node, "OFS", " ", 0, NULL, set_OFS, true, 0 }, +{&ORS_node, "ORS", "\n", 0, NULL, set_ORS, true, 0 }, +{NULL, "PROCINFO", NULL, 0, NULL, NULL, false, NO_INSTALL | NON_STANDARD | NOT_OFF_LIMITS }, +{&RLENGTH_node, "RLENGTH", NULL, 0, NULL, NULL, false, 0 }, +{&ROUNDMODE_node, "ROUNDMODE", DEFAULT_ROUNDMODE, 0, NULL, set_ROUNDMODE, false, NON_STANDARD }, +{&RS_node, "RS", "\n", 0, NULL, set_RS, true, 0 }, +{&RSTART_node, "RSTART", NULL, 0, NULL, NULL, false, 0 }, +{&RT_node, "RT", "", 0, NULL, NULL, false, NON_STANDARD }, +{&SUBSEP_node, "SUBSEP", "\034", 0, NULL, set_SUBSEP, true, 0 }, +{&TEXTDOMAIN_node, "TEXTDOMAIN", "messages", 0, NULL, set_TEXTDOMAIN, true, NON_STANDARD }, +{0, NULL, NULL, 0, NULL, NULL, false, 0 }, }; /* init_vars --- actually initialize everything in the symbol table */ @@ -941,18 +784,50 @@ init_vars() if ((vp->flags & NO_INSTALL) != 0) continue; n = *(vp->spec) = install_symbol(estrdup(vp->name, strlen(vp->name)), Node_var); - n->var_value = vp->strval == NULL ? make_number(vp->numval) - : make_string(vp->strval, strlen(vp->strval)); + if (vp->strval != NULL) + n->var_value = make_string(vp->strval, strlen(vp->strval)); + else + n->var_value = make_number(vp->numval); n->var_assign = (Func_ptr) vp->assign; n->var_update = (Func_ptr) vp->update; if (vp->do_assign) (*(vp->assign))(); } - /* Set up deferred variables (loaded only when accessed). */ + /* Load PROCINFO and ENVIRON */ if (! do_traditional) - register_deferred_variable("PROCINFO", load_procinfo); - register_deferred_variable("ENVIRON", load_environ); + load_procinfo(); + load_environ(); +} + +/* path_environ --- put path variable into environment if not already there */ + +static void +path_environ(const char *pname, const char *dflt) +{ + const char *val; + NODE **aptr; + NODE *tmp; + + tmp = make_string(pname, strlen(pname)); + /* + * On VMS, environ[] only holds a subset of what getenv() can + * find, so look AWKPATH up before resorting to default path. + */ + val = getenv(pname); + if (val == NULL || *val == '\0') + val = dflt; + aptr = assoc_lookup(ENVIRON_node, tmp); + /* + * If original value was the empty string, set it to + * the default value. + */ + if ((*aptr)->stlen == 0) { + unref(*aptr); + *aptr = make_string(val, strlen(val)); + } + + unref(tmp); } /* load_environ --- populate the ENVIRON array */ @@ -967,6 +842,12 @@ load_environ() NODE **aptr; int i; NODE *tmp; + static bool been_here = false; + + if (been_here) + return ENVIRON_node; + + been_here = true; ENVIRON_node = install_symbol(estrdup("ENVIRON", 7), Node_var_array); for (i = 0; environ[i] != NULL; i++) { @@ -990,23 +871,20 @@ load_environ() *--val = '='; } /* - * Put AWKPATH into ENVIRON if it's not there. + * Put AWKPATH and AWKLIBPATH into ENVIRON if not already there. * This allows querying it from within awk programs. + * + * October 2014: + * If their values are "", override with the default values; + * since 2.10 AWKPATH used default value if environment's + * value was "". */ - tmp = make_string("AWKPATH", 7); - if (! in_array(ENVIRON_node, tmp)) { - /* - * On VMS, environ[] only holds a subset of what getenv() can - * find, so look AWKPATH up before resorting to default path. - */ - val = getenv("AWKPATH"); - if (val == NULL) - val = defpath; - aptr = assoc_lookup(ENVIRON_node, tmp); - unref(*aptr); - *aptr = make_string(val, strlen(val)); - } - unref(tmp); + path_environ("AWKPATH", defpath); + path_environ("AWKLIBPATH", deflibpath); + + /* set up array functions */ + init_env_array(ENVIRON_node); + return ENVIRON_node; } @@ -1017,15 +895,37 @@ load_procinfo() { #if defined (HAVE_GETGROUPS) && defined(NGROUPS_MAX) && NGROUPS_MAX > 0 int i; +#endif +#if (defined (HAVE_GETGROUPS) && defined(NGROUPS_MAX) && NGROUPS_MAX > 0) || defined(HAVE_MPFR) char name[100]; #endif AWKNUM value; + static bool been_here = false; + + if (been_here) + return PROCINFO_node; + + been_here = true; PROCINFO_node = install_symbol(estrdup("PROCINFO", 8), Node_var_array); update_PROCINFO_str("version", VERSION); update_PROCINFO_str("strftime", def_strftime_format); +#ifdef HAVE_MPFR + sprintf(name, "GNU MPFR %s", mpfr_get_version()); + update_PROCINFO_str("mpfr_version", name); + sprintf(name, "GNU MP %s", gmp_version); + update_PROCINFO_str("gmp_version", name); + update_PROCINFO_num("prec_max", MPFR_PREC_MAX); + update_PROCINFO_num("prec_min", MPFR_PREC_MIN); +#endif + +#ifdef DYNAMIC + update_PROCINFO_num("api_major", GAWK_API_MAJOR_VERSION); + update_PROCINFO_num("api_minor", GAWK_API_MINOR_VERSION); +#endif + #ifdef GETPGRP_VOID #define getpgrp_arg() /* nothing */ #else @@ -1100,15 +1000,32 @@ is_std_var(const char *var) for (vp = varinit; vp->name != NULL; vp++) { if (strcmp(vp->name, var) == 0) { if ((do_traditional || do_posix) && (vp->flags & NON_STANDARD) != 0) - return FALSE; + return false; - return TRUE; + return true; } } - return FALSE; + return false; } +/* + * is_off_limits_var --- return true if a variable is off limits + * to extension functions + */ + +int +is_off_limits_var(const char *var) +{ + const struct varinit *vp; + + for (vp = varinit; vp->name != NULL; vp++) { + if (strcmp(vp->name, var) == 0) + return ((vp->flags & NOT_OFF_LIMITS) == 0); + } + + return false; +} /* get_spec_varname --- return the name of a special variable with the given assign or update routine. @@ -1132,23 +1049,23 @@ get_spec_varname(Func_ptr fptr) /* arg_assign --- process a command-line assignment */ int -arg_assign(char *arg, int initing) +arg_assign(char *arg, bool initing) { char *cp, *cp2; - int badvar; + bool badvar; NODE *var; NODE *it; NODE **lhs; long save_FNR; if (! initing && disallow_var_assigns) - return FALSE; /* --exec */ + return false; /* --exec */ cp = strchr(arg, '='); if (cp == NULL) { if (! initing) - return FALSE; /* This is file name, not assignment. */ + return false; /* This is file name, not assignment. */ fprintf(stderr, _("%s: `%s' argument to `-v' not in `var=value' form\n\n"), @@ -1165,13 +1082,13 @@ arg_assign(char *arg, int initing) FNR = 0; /* first check that the variable name has valid syntax */ - badvar = FALSE; - if (! isalpha((unsigned char) arg[0]) && arg[0] != '_') - badvar = TRUE; + badvar = false; + if (! is_alpha((unsigned char) arg[0]) && arg[0] != '_') + badvar = true; else for (cp2 = arg+1; *cp2; cp2++) - if (! isalnum((unsigned char) *cp2) && *cp2 != '_') { - badvar = TRUE; + if (! is_identchar((unsigned char) *cp2)) { + badvar = true; break; } @@ -1206,7 +1123,7 @@ arg_assign(char *arg, int initing) setlocale(LC_NUMERIC, "C"); (void) force_number(it); if (do_posix) - setlocale(LC_NUMERIC, ""); + setlocale(LC_NUMERIC, locale); #endif /* LC_NUMERIC */ /* @@ -1219,10 +1136,10 @@ arg_assign(char *arg, int initing) var = variable(0, cp2, Node_var); if (var == NULL) /* error */ - exit(EXIT_FATAL); + final_exit(EXIT_FATAL); if (var->type == Node_var && var->var_update) var->var_update(); - lhs = get_lhs(var, FALSE); + lhs = get_lhs(var, false); unref(*lhs); *lhs = it; /* check for set_FOO() routine */ @@ -1302,7 +1219,16 @@ nostalgia() static void version() { - printf("%s\n", version_string); + printf("%s", version_string); +#ifdef DYNAMIC + printf(", API: %d.%d", GAWK_API_MAJOR_VERSION, GAWK_API_MINOR_VERSION); +#endif +#ifdef HAVE_MPFR + printf(" (GNU MPFR %s, GNU MP %s)", mpfr_get_version(), gmp_version); +#endif + printf("\n"); + print_ext_versions(); + /* * Per GNU coding standards, print copyright info, * then exit successfully, do nothing else. @@ -1355,17 +1281,20 @@ init_groupset() */ ngroups = getgroups(0, NULL); #endif - if (ngroups == -1) - fatal(_("could not find groups: %s"), strerror(errno)); - else if (ngroups == 0) + /* If an error or no groups, just give up and get on with life. */ + if (ngroups <= 0) return; /* fill in groups */ emalloc(groupset, GETGROUPS_T *, ngroups * sizeof(GETGROUPS_T), "init_groupset"); ngroups = getgroups(ngroups, groupset); - if (ngroups == -1) - fatal(_("could not find groups: %s"), strerror(errno)); + /* same thing here, give up but keep going */ + if (ngroups == -1) { + efree(groupset); + ngroups = 0; + groupset = NULL; + } #endif } @@ -1429,6 +1358,8 @@ save_argv(int argc, char **argv) /* * update_global_values --- make sure the symbol table has correct values. * Called from the grammar before dumping values. + * + * Also called when accessing through SYMTAB, and from api_sym_lookup(). */ void @@ -1456,3 +1387,285 @@ getenv_long(const char *name) } return -1; } + +/* parse_args --- do the getopt_long thing */ + +static void +parse_args(int argc, char **argv) +{ + /* + * The + on the front tells GNU getopt not to rearrange argv. + */ + const char *optlist = "+F:f:v:W;bcCd::D::e:E:ghi:l:L:nNo::Op::MPrStVYZ:"; + int old_optind; + int c; + char *scan; + char *src; + + /* we do error messages ourselves on invalid options */ + opterr = false; + + /* copy argv before getopt gets to it; used to restart the debugger */ + save_argv(argc, argv); + + /* option processing. ready, set, go! */ + for (optopt = 0, old_optind = 1; + (c = getopt_long(argc, argv, optlist, optab, NULL)) != EOF; + optopt = 0, old_optind = optind) { + if (do_posix) + opterr = true; + + switch (c) { + case 'F': + add_preassign(PRE_ASSIGN_FS, optarg); + break; + + case 'E': + disallow_var_assigns = true; + /* fall through */ + case 'f': + /* + * Allow multiple -f options. + * This makes function libraries real easy. + * Most of the magic is in the scanner. + * + * The following is to allow for whitespace at the end + * of a #! /bin/gawk line in an executable file + */ + scan = optarg; + if (argv[optind-1] != optarg) + while (isspace((unsigned char) *scan)) + scan++; + src = (*scan == '\0' ? argv[optind++] : optarg); + (void) add_srcfile((src && src[0] == '-' && src[1] == '\0') ? + SRC_STDIN : SRC_FILE, + src, srcfiles, NULL, NULL); + + break; + + case 'v': + add_preassign(PRE_ASSIGN, optarg); + break; + + case 'b': + do_binary = true; + break; + + case 'c': + do_flags |= DO_TRADITIONAL; + break; + + case 'C': + copyleft(); + break; + + case 'd': + do_flags |= DO_DUMP_VARS; + if (optarg != NULL && optarg[0] != '\0') + varfile = optarg; + break; + + case 'D': + do_flags |= DO_DEBUG; + if (optarg != NULL && optarg[0] != '\0') + command_file = optarg; + break; + + case 'e': + if (optarg[0] == '\0') + warning(_("empty argument to `-e/--source' ignored")); + else + (void) add_srcfile(SRC_CMDLINE, optarg, srcfiles, NULL, NULL); + break; + + case 'g': + do_flags |= DO_INTL; + break; + + case 'h': + /* write usage to stdout, per GNU coding stds */ + usage(EXIT_SUCCESS, stdout); + break; + + case 'i': + (void) add_srcfile(SRC_INC, optarg, srcfiles, NULL, NULL); + break; + + case 'l': + (void) add_srcfile(SRC_EXTLIB, optarg, srcfiles, NULL, NULL); + break; + +#ifndef NO_LINT + case 'L': + do_flags |= DO_LINT_ALL; + if (optarg != NULL) { + if (strcmp(optarg, "fatal") == 0) + lintfunc = r_fatal; + else if (strcmp(optarg, "invalid") == 0) { + do_flags &= ~DO_LINT_ALL; + do_flags |= DO_LINT_INVALID; + } + } + break; + + case 't': + do_flags |= DO_LINT_OLD; + break; +#else + case 'L': + case 't': + break; +#endif + + case 'n': + do_flags |= DO_NON_DEC_DATA; + break; + + case 'N': + use_lc_numeric = true; + break; + + case 'O': + do_optimize = true; + break; + + case 'p': + do_flags |= DO_PROFILE; + /* fall through */ + case 'o': + do_flags |= DO_PRETTY_PRINT; + if (optarg != NULL) + set_prof_file(optarg); + else + set_prof_file(DEFAULT_PROFILE); + break; + + case 'M': +#ifdef HAVE_MPFR + do_flags |= DO_MPFR; +#else + warning(_("-M ignored: MPFR/GMP support not compiled in")); +#endif + break; + + case 'P': + do_flags |= DO_POSIX; + break; + + case 'r': + do_flags |= DO_INTERVALS; + break; + + case 'S': + do_flags |= DO_SANDBOX; + break; + + case 'V': + do_version = true; + break; + + case 'W': /* gawk specific options - now in getopt_long */ + fprintf(stderr, _("%s: option `-W %s' unrecognized, ignored\n"), + argv[0], optarg); + break; + + case 0: + /* + * getopt_long found an option that sets a variable + * instead of returning a letter. Do nothing, just + * cycle around for the next one. + */ + break; + + case 'Y': + case 'Z': +#if defined(YYDEBUG) || defined(GAWKDEBUG) + if (c == 'Y') { + yydebug = 2; + break; + } +#endif +#if defined(LOCALEDEBUG) + if (c == 'Z') { + locale = optarg; + break; + } +#endif + /* if not debugging, fall through */ + case '?': + default: + /* + * If not posix, an unrecognized option stops argument + * processing so that it can go into ARGV for the awk + * program to see. This makes use of ``#! /bin/gawk -f'' + * easier. + * + * However, it's never simple. If optopt is set, + * an option that requires an argument didn't get the + * argument. We care because if opterr is 0, then + * getopt_long won't print the error message for us. + */ + if (! do_posix + && (optopt == '\0' || strchr(optlist, optopt) == NULL)) { + /* + * can't just do optind--. In case of an + * option with >= 2 letters, getopt_long + * won't have incremented optind. + */ + optind = old_optind; + stopped_early = true; + goto out; + } else if (optopt != '\0') { + /* Use POSIX required message format */ + fprintf(stderr, + _("%s: option requires an argument -- %c\n"), + myname, optopt); + usage(EXIT_FAILURE, stderr); + } + /* else + let getopt print error message for us */ + break; + } + if (c == 'E') /* --exec ends option processing */ + break; + } +out: + return; +} + +/* set_locale_stuff --- setup the locale stuff */ + +static void +set_locale_stuff(void) +{ +#if defined(LC_CTYPE) + setlocale(LC_CTYPE, locale); +#endif +#if defined(LC_COLLATE) + setlocale(LC_COLLATE, locale); +#endif +#if defined(LC_MESSAGES) + setlocale(LC_MESSAGES, locale); +#endif +#if defined(LC_NUMERIC) && defined(HAVE_LOCALE_H) + /* + * Force the issue here. According to POSIX 2001, decimal + * point is used for parsing source code and for command-line + * assignments and the locale value for processing input, + * number to string conversion, and printing output. + * + * 10/2005 --- see below also; we now only use the locale's + * decimal point if do_posix in effect. + * + * 9/2007: + * This is a mess. We need to get the locale's numeric info for + * the thousands separator for the %'d flag. + */ + setlocale(LC_NUMERIC, locale); + init_locale(& loc); + setlocale(LC_NUMERIC, "C"); +#endif +#if defined(LC_TIME) + setlocale(LC_TIME, locale); +#endif +} |