diff options
Diffstat (limited to 'builtin.c')
-rw-r--r-- | builtin.c | 1077 |
1 files changed, 716 insertions, 361 deletions
@@ -3,7 +3,7 @@ */ /* - * Copyright (C) 1986, 1988, 1989, 1991-2011 the Free Software Foundation, Inc. + * Copyright (C) 1986, 1988, 1989, 1991-2015 the Free Software Foundation, Inc. * * This file is part of GAWK, the GNU implementation of the * AWK Programming Language. @@ -73,7 +73,7 @@ extern void srandom(unsigned long seed); extern NODE **args_array; extern int max_args; extern NODE **fields_arr; -extern int output_is_tty; +extern bool output_is_tty; extern FILE *output_fp; @@ -83,7 +83,7 @@ s1 = POP(); \ do { if (s1->type == Node_var_array) { \ DEREF(s2); \ fatal(_("attempt to use array `%s' in a scalar context"), array_vname(s1)); \ -}} while (FALSE) +}} while (false) /* @@ -92,9 +92,6 @@ fatal(_("attempt to use array `%s' in a scalar context"), array_vname(s1)); \ */ #define GAWK_RANDOM_MAX 0x7fffffffL -static void efwrite(const void *ptr, size_t size, size_t count, FILE *fp, - const char *from, struct redirect *rp, int flush); - /* efwrite --- like fwrite, but with error checking */ static void @@ -104,21 +101,35 @@ efwrite(const void *ptr, FILE *fp, const char *from, struct redirect *rp, - int flush) + bool flush) { errno = 0; - if (fwrite(ptr, size, count, fp) != count) + if (rp != NULL) { + if (rp->output.gawk_fwrite(ptr, size, count, fp, rp->output.opaque) != count) + goto wrerror; + } else if (fwrite(ptr, size, count, fp) != count) goto wrerror; if (flush && ((fp == stdout && output_is_tty) - || (rp != NULL && (rp->flag & RED_NOBUF)))) { - fflush(fp); - if (ferror(fp)) - goto wrerror; + || (rp != NULL && (rp->flag & RED_NOBUF) != 0))) { + if (rp != NULL) { + rp->output.gawk_fflush(fp, rp->output.opaque); + if (rp->output.gawk_ferror(fp, rp->output.opaque)) + goto wrerror; + } else { + fflush(fp); + if (ferror(fp)) + goto wrerror; + } } return; wrerror: + /* die silently on EPIPE to stdout */ + if (fp == stdout && errno == EPIPE) + gawk_exit(EXIT_FATAL); + + /* otherwise die verbosely */ fatal(_("%s to \"%s\" failed (%s)"), from, rp ? rp->value : _("standard output"), errno ? strerror(errno) : _("reason unknown")); @@ -135,7 +146,7 @@ do_exp(int nargs) tmp = POP_SCALAR(); if (do_lint && (tmp->flags & (NUMCUR|NUMBER)) == 0) lintwarn(_("exp: received non-numeric argument")); - d = force_number(tmp); + d = force_number(tmp)->numbr; DEREF(tmp); errno = 0; res = exp(d); @@ -156,9 +167,9 @@ static FILE * stdfile(const char *name, size_t len) { if (len == 11) { - if (STREQN(name, "/dev/stderr", 11)) + if (strncmp(name, "/dev/stderr", 11) == 0) return stderr; - else if (STREQN(name, "/dev/stdout", 11)) + else if (strncmp(name, "/dev/stdout", 11) == 0) return stdout; } @@ -176,29 +187,45 @@ do_fflush(int nargs) int status = 0; const char *file; - /* fflush() --- flush stdout */ + /* + * November, 2012. + * It turns out that circa 2002, when BWK + * added fflush() and fflush("") to his awk, he made both of + * them flush everything. + * + * Now, with our inside agent getting ready to try to get fflush() + * standardized in POSIX, we are going to make our awk consistent + * with his. This should not really affect anyone, as flushing + * everything also flushes stdout. + * + * So. Once upon a time: + * fflush() --- flush stdout + * fflush("") --- flush everything + * Now, both calls flush everything. + */ + + /* fflush() */ if (nargs == 0) { - if (output_fp != stdout) - (void) fflush(output_fp); - status = fflush(stdout); + status = flush_io(); return make_number((AWKNUM) status); } tmp = POP_STRING(); file = tmp->stptr; - /* fflush("") --- flush all */ + /* fflush("") */ if (tmp->stlen == 0) { status = flush_io(); DEREF(tmp); return make_number((AWKNUM) status); } + /* fflush("/some/path") */ rp = getredirect(tmp->stptr, tmp->stlen); status = -1; if (rp != NULL) { if ((rp->flag & (RED_WRITE|RED_APPEND)) == 0) { - if (rp->flag & RED_PIPE) + if ((rp->flag & RED_PIPE) != 0) warning(_("fflush: cannot flush: pipe `%s' opened for reading, not writing"), file); else @@ -207,9 +234,9 @@ do_fflush(int nargs) DEREF(tmp); return make_number((AWKNUM) status); } - fp = rp->fp; + fp = rp->output.fp; if (fp != NULL) - status = fflush(fp); + status = rp->output.gawk_fflush(fp, rp->output.opaque); } else if ((fp = stdfile(tmp->stptr, tmp->stlen)) != NULL) { status = fflush(fp); } else { @@ -220,7 +247,6 @@ do_fflush(int nargs) return make_number((AWKNUM) status); } -#if MBS_SUPPORT /* strncasecmpmbs --- like strncasecmp (multibyte string version) */ int @@ -300,14 +326,6 @@ index_multibyte_buffer(char* src, char* dest, int len) dest[idx] = mbclen; } } -#else -/* a dummy function */ -static void -index_multibyte_buffer(char* src ATTRIBUTE_UNUSED, char* dest ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) -{ - cant_happen(); -} -#endif /* do_index --- find index of a string */ @@ -318,15 +336,13 @@ do_index(int nargs) const char *p1, *p2; size_t l1, l2; long ret; -#if MBS_SUPPORT - int do_single_byte = FALSE; + bool do_single_byte = false; mbstate_t mbs1, mbs2; if (gawk_mb_cur_max > 1) { memset(& mbs1, 0, sizeof(mbstate_t)); memset(& mbs2, 0, sizeof(mbstate_t)); } -#endif POP_TWO_SCALARS(s1, s2); @@ -356,7 +372,6 @@ do_index(int nargs) goto out; } -#if MBS_SUPPORT if (gawk_mb_cur_max > 1) { s1 = force_wstring(s1); s2 = force_wstring(s2); @@ -367,14 +382,12 @@ do_index(int nargs) do_single_byte = ((s1->wstlen == 0 && s1->stlen > 0) || (s2->wstlen == 0 && s2->stlen > 0)); } -#endif /* IGNORECASE will already be false if posix */ if (IGNORECASE) { while (l1 > 0) { if (l2 > l1) break; -#if MBS_SUPPORT if (! do_single_byte && gawk_mb_cur_max > 1) { const wchar_t *pos; @@ -385,21 +398,18 @@ do_index(int nargs) ret = pos - s1->wstptr + 1; /* 1-based */ goto out; } else { -#endif - /* - * Could use tolower(*p1) == tolower(*p2) here. - * See discussion in eval.c as to why not. - */ - if (casetable[(unsigned char)*p1] == casetable[(unsigned char)*p2] - && (l2 == 1 || strncasecmp(p1, p2, l2) == 0)) { - ret = 1 + s1->stlen - l1; - break; - } - l1--; - p1++; -#if MBS_SUPPORT + /* + * Could use tolower(*p1) == tolower(*p2) here. + * See discussion in eval.c as to why not. + */ + if (casetable[(unsigned char)*p1] == casetable[(unsigned char)*p2] + && (l2 == 1 || strncasecmp(p1, p2, l2) == 0)) { + ret = 1 + s1->stlen - l1; + break; + } + l1--; + p1++; } -#endif } } else { while (l1 > 0) { @@ -410,7 +420,6 @@ do_index(int nargs) ret = 1 + s1->stlen - l1; break; } -#if MBS_SUPPORT if (! do_single_byte && gawk_mb_cur_max > 1) { const wchar_t *pos; @@ -424,10 +433,6 @@ do_index(int nargs) l1--; p1++; } -#else - l1--; - p1++; -#endif } } out: @@ -442,9 +447,9 @@ double double_to_int(double d) { if (d >= 0) - d = Floor(d); + d = floor(d); else - d = Ceil(d); + d = ceil(d); return d; } @@ -459,7 +464,7 @@ do_int(int nargs) tmp = POP_SCALAR(); if (do_lint && (tmp->flags & (NUMCUR|NUMBER)) == 0) lintwarn(_("int: received non-numeric argument")); - d = force_number(tmp); + d = force_number(tmp)->numbr; d = double_to_int(d); DEREF(tmp); return make_number((AWKNUM) d); @@ -491,15 +496,27 @@ do_length(int nargs) tmp = POP(); if (tmp->type == Node_var_array) { - static short warned = FALSE; + static bool warned = false; + unsigned long size; if (do_posix) fatal(_("length: received array argument")); if (do_lint && ! warned) { - warned = TRUE; + warned = true; lintwarn(_("`length(array)' is a gawk extension")); } - return make_number((AWKNUM) tmp->table_size); + + /* + * Support for deferred loading of array elements requires that + * we use the array length interface even though it isn't + * necessary for the built-in array types. + * + * 1/2015: The deferred arrays are gone, but this is probably + * still a good idea. + */ + + size = assoc_length(tmp); + return make_number(size); } assert(tmp->type == Node_val); @@ -508,7 +525,6 @@ do_length(int nargs) lintwarn(_("length: received non-string argument")); tmp = force_string(tmp); -#if MBS_SUPPORT if (gawk_mb_cur_max > 1) { tmp = force_wstring(tmp); len = tmp->wstlen; @@ -519,7 +535,6 @@ do_length(int nargs) if (len == 0 && tmp->stlen > 0) len = tmp->stlen; } else -#endif len = tmp->stlen; DEREF(tmp); @@ -537,7 +552,7 @@ do_log(int nargs) tmp = POP_SCALAR(); if (do_lint && (tmp->flags & (NUMCUR|NUMBER)) == 0) lintwarn(_("log: received non-numeric argument")); - arg = (double) force_number(tmp); + arg = force_number(tmp)->numbr; if (arg < 0.0) warning(_("log: received negative argument %g"), arg); d = log(arg); @@ -546,6 +561,42 @@ do_log(int nargs) } +#ifdef HAVE_MPFR + +/* + * mpz2mpfr --- convert an arbitrary-precision integer to a float + * without any loss of precision. The returned value is only + * good for temporary use. + */ + + +static mpfr_ptr +mpz2mpfr(mpz_ptr zi) +{ + size_t prec; + static mpfr_t mpfrval; + static bool inited = false; + int tval; + + /* estimate minimum precision for exact conversion */ + prec = mpz_sizeinbase(zi, 2); /* most significant 1 bit position starting at 1 */ + prec -= (size_t) mpz_scan1(zi, 0); /* least significant 1 bit index starting at 0 */ + if (prec < MPFR_PREC_MIN) + prec = MPFR_PREC_MIN; + else if (prec > MPFR_PREC_MAX) + prec = MPFR_PREC_MAX; + + if (! inited) { + mpfr_init2(mpfrval, prec); + inited = true; + } else + mpfr_set_prec(mpfrval, prec); + tval = mpfr_set_z(mpfrval, zi, ROUND_MODE); + IEEE_FMT(mpfrval, tval); + return mpfrval; +} +#endif + /* * format_tree() formats arguments of sprintf, * and accordingly to a fmt_string providing a format like in @@ -603,8 +654,8 @@ format_tree( size_t cur_arg = 0; NODE *r = NULL; - int i; - int toofew = FALSE; + int i, nc; + bool toofew = false; char *obuf, *obufout; size_t osiz, ofre; const char *chbuf; @@ -612,12 +663,12 @@ format_tree( int cs1; NODE *arg; long fw, prec, argnum; - int used_dollar; - int lj, alt, big_flag, bigbig_flag, small_flag, have_prec, need_format; + bool used_dollar; + bool lj, alt, big_flag, bigbig_flag, small_flag, have_prec, need_format; long *cur = NULL; uintmax_t uval; - int sgn; - int base = 0; + bool sgn; + int base; /* * Although this is an array, the elements serve two different * purposes. The first element is the general buffer meant @@ -635,14 +686,20 @@ format_tree( char *cend = &cpbufs[0].stackbuf[sizeof(cpbufs[0].stackbuf)]; char *cp; const char *fill; - AWKNUM tmpval; - char signchar = FALSE; + AWKNUM tmpval = 0.0; + char signchar = '\0'; size_t len; - int zero_flag = FALSE; - int quote_flag = FALSE; + bool zero_flag = false; + bool quote_flag = false; int ii, jj; char *chp; size_t copy_count, char_count; +#ifdef HAVE_MPFR + mpz_ptr zi; + mpfr_ptr mf; +#endif + enum { MP_NONE = 0, MP_INT_WITH_PREC = 1, MP_INT_WITHOUT_PREC, MP_FLOAT } fmt_type; + static const char sp[] = " "; static const char zero_string[] = "0"; static const char lchbuf[] = "0123456789abcdef"; @@ -705,7 +762,7 @@ format_tree( goto out; \ } else if (cur_arg >= num_args) { \ arg = 0; /* shutup the compiler */ \ - toofew = TRUE; \ + toofew = true; \ break; \ } else { \ arg = the_args[cur_arg]; \ @@ -713,8 +770,8 @@ format_tree( } \ } - need_format = FALSE; - used_dollar = FALSE; + need_format = false; + used_dollar = false; s0 = s1 = fmt_string; while (n0-- > 0) { @@ -722,18 +779,26 @@ format_tree( s1++; continue; } - need_format = TRUE; + need_format = true; bchunk(s0, s1 - s0); s0 = s1; cur = &fw; fw = 0; prec = 0; + base = 0; argnum = 0; - have_prec = FALSE; - signchar = FALSE; - zero_flag = FALSE; - quote_flag = FALSE; - lj = alt = big_flag = bigbig_flag = small_flag = FALSE; + base = 0; + have_prec = false; + signchar = '\0'; + zero_flag = false; + quote_flag = false; +#ifdef HAVE_MPFR + mf = NULL; + zi = NULL; +#endif + fmt_type = MP_NONE; + + lj = alt = big_flag = bigbig_flag = small_flag = false; fill = sp; cp = cend; chbuf = lchbuf; @@ -750,7 +815,7 @@ check_pos: break; /* reject as a valid format */ goto retry; case '%': - need_format = FALSE; + need_format = false; /* * 29 Oct. 2002: * The C99 standard pages 274 and 279 seem to imply that @@ -782,7 +847,7 @@ check_pos: * screws up floating point formatting. */ if (cur == & fw) - zero_flag = TRUE; + zero_flag = true; if (lj) goto retry; /* FALL through */ @@ -809,7 +874,7 @@ check_pos: *cur = *cur * 10 + *s1++ - '0'; } if (prec < 0) /* negative precision is discarded */ - have_prec = FALSE; + have_prec = false; if (cur == &prec) cur = NULL; if (n0 == 0) /* badly formatted control string */ @@ -824,7 +889,7 @@ check_pos: if (cur == &fw) { argnum = fw; fw = 0; - used_dollar = TRUE; + used_dollar = true; if (argnum <= 0) { msg(_("fatal: arg count with `$' must be > 0")); goto out; @@ -842,7 +907,10 @@ check_pos: case '*': if (cur == NULL) break; - if (! do_traditional && isdigit((unsigned char) *s1)) { + if (! do_traditional && used_dollar && ! isdigit((unsigned char) *s1)) { + fatal(_("fatal: must use `count$' on all formats or none")); + break; /* silence warnings */ + } else if (! do_traditional && isdigit((unsigned char) *s1)) { int val = 0; for (; n0 > 0 && *s1 && isdigit((unsigned char) *s1); s1++, n0--) { @@ -857,30 +925,31 @@ check_pos: n0--; } if (val >= num_args) { - toofew = TRUE; + toofew = true; break; } arg = the_args[val]; } else { parse_next_arg(); } - *cur = force_number(arg); + (void) force_number(arg); + *cur = get_number_si(arg); if (*cur < 0 && cur == &fw) { *cur = -*cur; lj++; } if (cur == &prec) { if (*cur >= 0) - have_prec = TRUE; + have_prec = true; else - have_prec = FALSE; + have_prec = false; cur = NULL; } goto retry; case ' ': /* print ' ' or '-' */ /* 'space' flag is ignored */ /* if '+' already present */ - if (signchar != FALSE) + if (signchar != false) goto check_pos; /* FALL THROUGH */ case '+': /* print '+' or '-' */ @@ -900,16 +969,14 @@ check_pos: if (cur != &fw) break; cur = ≺ - have_prec = TRUE; + have_prec = true; goto retry; case '#': - alt = TRUE; + alt = true; goto check_pos; case '\'': #if defined(HAVE_LOCALE_H) - /* allow quote_flag if there is a thousands separator. */ - if (loc.thousands_sep[0] != '\0') - quote_flag = TRUE; + quote_flag = true; goto check_pos; #else goto retry; @@ -918,62 +985,61 @@ check_pos: if (big_flag) break; else { - static short warned = FALSE; + static bool warned = false; if (do_lint && ! warned) { lintwarn(_("`l' is meaningless in awk formats; ignored")); - warned = TRUE; + warned = true; } if (do_posix) { msg(_("fatal: `l' is not permitted in POSIX awk formats")); goto out; } } - big_flag = TRUE; + big_flag = true; goto retry; case 'L': if (bigbig_flag) break; else { - static short warned = FALSE; + static bool warned = false; if (do_lint && ! warned) { lintwarn(_("`L' is meaningless in awk formats; ignored")); - warned = TRUE; + warned = true; } if (do_posix) { msg(_("fatal: `L' is not permitted in POSIX awk formats")); goto out; } } - bigbig_flag = TRUE; + bigbig_flag = true; goto retry; case 'h': if (small_flag) break; else { - static short warned = FALSE; + static bool warned = false; if (do_lint && ! warned) { lintwarn(_("`h' is meaningless in awk formats; ignored")); - warned = TRUE; + warned = true; } if (do_posix) { msg(_("fatal: `h' is not permitted in POSIX awk formats")); goto out; } } - small_flag = TRUE; + small_flag = true; goto retry; case 'c': - need_format = FALSE; + need_format = false; parse_next_arg(); /* user input that looks numeric is numeric */ if ((arg->flags & (MAYBE_NUM|NUMBER)) == MAYBE_NUM) (void) force_number(arg); - if (arg->flags & NUMBER) { - uval = (uintmax_t) arg->numbr; -#if MBS_SUPPORT + if ((arg->flags & NUMBER) != 0) { + uval = get_number_uj(arg); if (gawk_mb_cur_max > 1) { char buf[100]; wchar_t wc; @@ -981,13 +1047,29 @@ check_pos: size_t count; memset(& mbs, 0, sizeof(mbs)); + + /* handle systems with too small wchar_t */ + if (sizeof(wchar_t) < 4 && uval > 0xffff) { + if (do_lint) + lintwarn( + _("[s]printf: value %g is too big for %%c format"), + arg->numbr); + + goto out0; + } + wc = uval; count = wcrtomb(buf, wc, & mbs); if (count == 0 - || count == (size_t)-1 - || count == (size_t)-2) + || count == (size_t) -1) { + if (do_lint) + lintwarn( + _("[s]printf: value %g is not a valid wide character"), + arg->numbr); + goto out0; + } memcpy(cpbuf, buf, count); prec = count; @@ -998,11 +1080,7 @@ out0: ; /* else, fall through */ -#endif - if (do_lint && uval > 255) { - lintwarn("[s]printf: value %g is too big for %%c format", - arg->numbr); - } + cpbuf[0] = uval; prec = 1; cp = cpbuf; @@ -1015,7 +1093,7 @@ out0: * used to work? 6/2003.) */ cp = arg->stptr; -#if MBS_SUPPORT + prec = 1; /* * First character can be multiple bytes if * it's a multibyte character. Grr. @@ -1026,20 +1104,16 @@ out0: memset(& state, 0, sizeof(state)); count = mbrlen(cp, arg->stlen, & state); - if (count == 0 - || count == (size_t)-1 - || count == (size_t)-2) - goto out2; - prec = count; - goto pr_tail; + if (count != (size_t) -1 && count != (size_t) -2 && count > 0) { + prec = count; + /* may need to increase fw so that padding happens, see pr_tail code */ + if (fw > 0) + fw += count - 1; + } } -out2: - ; -#endif - prec = 1; goto pr_tail; case 's': - need_format = FALSE; + need_format = false; parse_next_arg(); arg = force_string(arg); if (fw == 0 && ! have_prec) @@ -1053,9 +1127,18 @@ out2: goto pr_tail; case 'd': case 'i': - need_format = FALSE; + need_format = false; parse_next_arg(); - tmpval = force_number(arg); + (void) force_number(arg); +#ifdef HAVE_MPFR + if (is_mpg_float(arg)) + goto mpf0; + else if (is_mpg_integer(arg)) + goto mpz0; + else +#endif + tmpval = arg->numbr; + /* * Check for Nan or Inf. */ @@ -1073,12 +1156,12 @@ out2: if (tmpval < 0) { tmpval = -tmpval; - sgn = TRUE; + sgn = true; } else { if (tmpval == -0.0) /* avoid printing -0 */ tmpval = 0.0; - sgn = FALSE; + sgn = false; } /* * Use snprintf return value to tell if there @@ -1102,6 +1185,9 @@ out2: } if (i < 1) goto out_of_range; +#if defined(HAVE_LOCALE_H) + quote_flag = (quote_flag && loc.thousands_sep[0] != 0); +#endif chp = &cpbufs[1].buf[i-1]; ii = jj = 0; do { @@ -1109,12 +1195,18 @@ out2: chp--; i--; #if defined(HAVE_LOCALE_H) if (quote_flag && loc.grouping[ii] && ++jj == loc.grouping[ii]) { - if (i) /* only add if more digits coming */ - PREPEND(loc.thousands_sep[0]); /* XXX - assumption it's one char */ + if (i) { /* only add if more digits coming */ + int k; + const char *ts = loc.thousands_sep; + + for (k = strlen(ts) - 1; k >= 0; k--) { + PREPEND(ts[k]); + } + } if (loc.grouping[ii+1] == 0) jj = 0; /* keep using current val in loc.grouping[ii] */ else if (loc.grouping[ii+1] == CHAR_MAX) - quote_flag = FALSE; + quote_flag = false; else { ii++; jj = 0; @@ -1164,9 +1256,80 @@ out2: base += 2; /* FALL THROUGH */ case 'o': base += 8; - need_format = FALSE; + need_format = false; parse_next_arg(); - tmpval = force_number(arg); + (void) force_number(arg); +#ifdef HAVE_MPFR + if (is_mpg_integer(arg)) { +mpz0: + zi = arg->mpg_i; + + if (cs1 != 'd' && cs1 != 'i') { + if (mpz_sgn(zi) <= 0) { + /* + * Negative value or 0 requires special handling. + * Unlike MPFR, GMP does not allow conversion + * to (u)intmax_t. So we first convert GMP type to + * a MPFR type. + */ + mf = mpz2mpfr(zi); + goto mpf1; + } + signchar = '\0'; /* Don't print '+' */ + } + + /* See comments above about when to fill with zeros */ + zero_flag = (! lj + && ((zero_flag && ! have_prec) + || (fw == 0 && have_prec))); + + fmt_type = have_prec ? MP_INT_WITH_PREC : MP_INT_WITHOUT_PREC; + goto fmt0; + + } else if (is_mpg_float(arg)) { +mpf0: + mf = arg->mpg_numbr; + if (! mpfr_number_p(mf)) { + /* inf or NaN */ + cs1 = 'g'; + fmt_type = MP_FLOAT; + goto fmt1; + } + + if (cs1 != 'd' && cs1 != 'i') { +mpf1: + /* + * The output of printf("%#.0x", 0) is 0 instead of 0x, hence <= in + * the comparison below. + */ + if (mpfr_sgn(mf) <= 0) { + if (! mpfr_fits_intmax_p(mf, ROUND_MODE)) { + /* -ve number is too large */ + cs1 = 'g'; + fmt_type = MP_FLOAT; + goto fmt1; + } + + tmpval = uval = (uintmax_t) mpfr_get_sj(mf, ROUND_MODE); + if (! alt && have_prec && prec == 0 && tmpval == 0) + goto pr_tail; /* printf("%.0x", 0) is no characters */ + goto int0; + } + signchar = '\0'; /* Don't print '+' */ + } + + /* See comments above about when to fill with zeros */ + zero_flag = (! lj + && ((zero_flag && ! have_prec) + || (fw == 0 && have_prec))); + + (void) mpfr_get_z(mpzval, mf, MPFR_RNDZ); /* convert to GMP integer */ + fmt_type = have_prec ? MP_INT_WITH_PREC : MP_INT_WITHOUT_PREC; + zi = mpzval; + goto fmt0; + } else +#endif + tmpval = arg->numbr; /* * ``The result of converting a zero value with a @@ -1185,14 +1348,19 @@ out2: if (tmpval < 0) { uval = (uintmax_t) (intmax_t) tmpval; - if ((AWKNUM)(intmax_t)uval != - double_to_int(tmpval)) + if ((AWKNUM)(intmax_t)uval != double_to_int(tmpval)) goto out_of_range; } else { uval = (uintmax_t) tmpval; if ((AWKNUM)uval != double_to_int(tmpval)) goto out_of_range; } +#ifdef HAVE_MPFR + int0: +#endif +#if defined(HAVE_LOCALE_H) + quote_flag = (quote_flag && loc.thousands_sep[0] != 0); +#endif /* * When to fill with zeroes is of course not simple. * First: No zero fill if left-justifying. @@ -1211,12 +1379,18 @@ out2: uval /= base; #if defined(HAVE_LOCALE_H) if (base == 10 && quote_flag && loc.grouping[ii] && ++jj == loc.grouping[ii]) { - if (uval) /* only add if more digits coming */ - PREPEND(loc.thousands_sep[0]); /* XXX --- assumption it's one char */ + if (uval) { /* only add if more digits coming */ + int k; + const char *ts = loc.thousands_sep; + + for (k = strlen(ts) - 1; k >= 0; k--) { + PREPEND(ts[k]); + } + } if (loc.grouping[ii+1] == 0) jj = 0; /* keep using current val in loc.grouping[ii] */ else if (loc.grouping[ii+1] == CHAR_MAX) - quote_flag = FALSE; + quote_flag = false; else { ii++; jj = 0; @@ -1257,9 +1431,14 @@ out2: copy_count = prec; if (fw == 0 && ! have_prec) ; - else if (gawk_mb_cur_max > 1 && (cs1 == 's' || cs1 == 'c')) { - assert(cp == arg->stptr || cp == cpbuf); - copy_count = mbc_byte_count(arg->stptr, prec); + else if (gawk_mb_cur_max > 1) { + if (cs1 == 's') { + assert(cp == arg->stptr || cp == cpbuf); + copy_count = mbc_byte_count(arg->stptr, prec); + } + /* prec was set by code for %c */ + /* else + copy_count = prec; */ } bchunk(cp, copy_count); while (fw > prec) { @@ -1275,7 +1454,7 @@ out2: lintwarn(_("[s]printf: value %g is out of range for `%%%c' format"), (double) tmpval, cs1); cs1 = 'g'; - goto format_float; + goto fmt1; case 'F': #if ! defined(PRINTF_HAS_F_FORMAT) || PRINTF_HAS_F_FORMAT != 1 @@ -1287,13 +1466,30 @@ out2: case 'e': case 'f': case 'E': - need_format = FALSE; + need_format = false; parse_next_arg(); - tmpval = force_number(arg); - format_float: + (void) force_number(arg); + + if (! is_mpg_number(arg)) + tmpval = arg->numbr; +#ifdef HAVE_MPFR + else if (is_mpg_float(arg)) { + mf = arg->mpg_numbr; + fmt_type = MP_FLOAT; + } else { + /* arbitrary-precision integer, convert to MPFR float */ + assert(mf == NULL); + mf = mpz2mpfr(arg->mpg_i); + fmt_type = MP_FLOAT; + } +#endif + fmt1: if (! have_prec) prec = DEFAULT_G_PRECISION; - chksize(fw + prec + 9); /* 9 == slop */ +#ifdef HAVE_MPFR + fmt0: +#endif + chksize(fw + prec + 11); /* 11 == slop */ cp = cpbuf; *cp++ = '%'; if (lj) @@ -1306,32 +1502,53 @@ out2: *cp++ = '0'; if (quote_flag) *cp++ = '\''; - strcpy(cp, "*.*"); - cp += 3; - *cp++ = cs1; - *cp = '\0'; + #if defined(LC_NUMERIC) if (quote_flag && ! use_lc_numeric) setlocale(LC_NUMERIC, ""); #endif - { - int n; - while ((n = snprintf(obufout, ofre, cpbuf, - (int) fw, (int) prec, - (double) tmpval)) >= ofre) - chksize(n) + + switch (fmt_type) { +#ifdef HAVE_MPFR + case MP_INT_WITH_PREC: + sprintf(cp, "*.*Z%c", cs1); + while ((nc = mpfr_snprintf(obufout, ofre, cpbuf, + (int) fw, (int) prec, zi)) >= ofre) + chksize(nc) + break; + case MP_INT_WITHOUT_PREC: + sprintf(cp, "*Z%c", cs1); + while ((nc = mpfr_snprintf(obufout, ofre, cpbuf, + (int) fw, zi)) >= ofre) + chksize(nc) + break; + case MP_FLOAT: + sprintf(cp, "*.*R*%c", cs1); + while ((nc = mpfr_snprintf(obufout, ofre, cpbuf, + (int) fw, (int) prec, ROUND_MODE, mf)) >= ofre) + chksize(nc) + break; +#endif + default: + sprintf(cp, "*.*%c", cs1); + while ((nc = snprintf(obufout, ofre, cpbuf, + (int) fw, (int) prec, + (double) tmpval)) >= ofre) + chksize(nc) } + #if defined(LC_NUMERIC) if (quote_flag && ! use_lc_numeric) setlocale(LC_NUMERIC, "C"); #endif + len = strlen(obufout); ofre -= len; obufout += len; s0 = s1; break; default: - if (do_lint && isalpha(cs1)) + if (do_lint && is_alpha(cs1)) lintwarn(_("ignoring unknown format specifier character `%c': no argument converted"), cs1); break; } @@ -1365,6 +1582,7 @@ out: if (obuf != NULL) efree(obuf); } + if (r == NULL) gawk_exit(EXIT_FATAL); return r; @@ -1379,7 +1597,7 @@ printf_common(int nargs) int i; NODE *r, *tmp; - assert(nargs <= max_args); + assert(nargs > 0 && nargs <= max_args); for (i = 1; i <= nargs; i++) { tmp = args_array[nargs - i] = POP(); if (tmp->type == Node_var_array) { @@ -1402,6 +1620,10 @@ NODE * do_sprintf(int nargs) { NODE *r; + + if (nargs == 0) + fatal(_("sprintf: no arguments")); + r = printf_common(nargs); if (r == NULL) gawk_exit(EXIT_FATAL); @@ -1443,9 +1665,11 @@ do_printf(int nargs, int redirtype) fatal(_("attempt to use array `%s' in a scalar context"), array_vname(redir_exp)); rp = redirect(redir_exp, redirtype, & errflg); if (rp != NULL) - fp = rp->fp; - } else + fp = rp->output.fp; + } else if (do_debug) /* only the debugger can change the default output */ fp = output_fp; + else + fp = stdout; tmp = printf_common(nargs); if (redir_exp != NULL) { @@ -1457,9 +1681,9 @@ do_printf(int nargs, int redirtype) DEREF(tmp); return; } - efwrite(tmp->stptr, sizeof(char), tmp->stlen, fp, "printf", rp, TRUE); + efwrite(tmp->stptr, sizeof(char), tmp->stlen, fp, "printf", rp, true); if (rp != NULL && (rp->flag & RED_TWOWAY) != 0) - fflush(rp->fp); + rp->output.gawk_fflush(rp->output.fp, rp->output.opaque); DEREF(tmp); } else gawk_exit(EXIT_FATAL); @@ -1476,7 +1700,7 @@ do_sqrt(int nargs) tmp = POP_SCALAR(); if (do_lint && (tmp->flags & (NUMCUR|NUMBER)) == 0) lintwarn(_("sqrt: received non-numeric argument")); - arg = (double) force_number(tmp); + arg = (double) force_number(tmp)->numbr; DEREF(tmp); if (arg < 0.0) warning(_("sqrt: called with negative argument %g"), arg); @@ -1495,9 +1719,16 @@ do_substr(int nargs) double d_index = 0, d_length = 0; size_t src_len; - if (nargs == 3) - POP_NUMBER(d_length); - POP_NUMBER(d_index); + if (nargs == 3) { + t1 = POP_NUMBER(); + d_length = get_number_d(t1); + DEREF(t1); + } + + t1 = POP_NUMBER(); + d_index = get_number_d(t1); + DEREF(t1); + t1 = POP_STRING(); if (nargs == 3) { @@ -1507,7 +1738,14 @@ do_substr(int nargs) else if (do_lint == DO_LINT_INVALID && ! (d_length >= 0)) lintwarn(_("substr: length %g is not >= 0"), d_length); DEREF(t1); - return dupnode(Nnull_string); + /* + * Return explicit null string instead of doing + * dupnode(Nnull_string) so that if the result + * is checked with the combination of length() + * and lint, no error is reported about using + * an uninitialized value. Same thing later, too. + */ + return make_string("", 0); } if (do_lint) { if (double_to_int(d_length) != d_length) @@ -1546,13 +1784,11 @@ do_substr(int nargs) if (nargs == 2) { /* third arg. missing */ /* use remainder of string */ length = t1->stlen - indx; /* default to bytes */ -#if MBS_SUPPORT if (gawk_mb_cur_max > 1) { t1 = force_wstring(t1); if (t1->wstlen > 0) /* use length of wide char string if we have one */ length = t1->wstlen - indx; } -#endif d_length = length; /* set here in case used in diagnostics, below */ } @@ -1561,16 +1797,14 @@ do_substr(int nargs) if (do_lint && (do_lint == DO_LINT_ALL || ((indx | length) != 0))) lintwarn(_("substr: source string is zero length")); DEREF(t1); - return dupnode(Nnull_string); + return make_string("", 0); } /* get total len of input string, for following checks */ -#if MBS_SUPPORT if (gawk_mb_cur_max > 1) { t1 = force_wstring(t1); src_len = t1->wstlen; } else -#endif src_len = t1->stlen; if (indx >= src_len) { @@ -1578,7 +1812,7 @@ do_substr(int nargs) lintwarn(_("substr: start index %g is past end of string"), d_index); DEREF(t1); - return dupnode(Nnull_string); + return make_string("", 0); } if (length > src_len - indx) { if (do_lint) @@ -1588,7 +1822,6 @@ do_substr(int nargs) length = src_len - indx; } -#if MBS_SUPPORT /* force_wstring() already called */ if (gawk_mb_cur_max == 1 || t1->wstlen == t1->stlen) /* single byte case */ @@ -1618,9 +1851,6 @@ do_substr(int nargs) *cp = '\0'; r = make_str_node(substr, cp - substr, ALREADY_MALLOCED); } -#else - r = make_string(t1->stptr + indx, length); -#endif DEREF(t1); return r; @@ -1648,7 +1878,7 @@ do_strftime(int nargs) format = def_strftime_format; /* traditional date format */ formatlen = strlen(format); (void) time(& fclock); /* current time of day */ - do_gmt = FALSE; + do_gmt = false; if (PROCINFO_node != NULL) { sub = make_string("strftime", 8); @@ -1681,7 +1911,8 @@ do_strftime(int nargs) t2 = POP_SCALAR(); if (do_lint && (t2->flags & (NUMCUR|NUMBER)) == 0) lintwarn(_("strftime: received non-numeric second argument")); - clock_val = (long) force_number(t2); + (void) force_number(t2); + clock_val = get_number_si(t2); if (clock_val < 0) fatal(_("strftime: second argument less than 0 or too big for time_t")); fclock = (time_t) clock_val; @@ -1774,10 +2005,10 @@ do_mktime(int nargs) & hour, & minute, & second, & dst); - if (do_lint /* Ready? Set! Go: */ - && ( (second < 0 || second > 60) - || (minute < 0 || minute > 60) - || (hour < 0 || hour > 23) + if ( do_lint /* Ready? Set! Go: */ + && ( (second < 0 || second > 60) + || (minute < 0 || minute > 59) + || (hour < 0 || hour > 23) /* FIXME ISO 8601 allows 24 ? */ || (day < 1 || day > 31) || (month < 1 || month > 12) )) lintwarn(_("mktime: at least one of the values is out of the default range")); @@ -1832,7 +2063,7 @@ do_system(int nargs) ret = system(cmd); if (ret != -1) ret = WEXITSTATUS(ret); - if ((BINMODE & 1) != 0) + if ((BINMODE & BINMODE_INPUT) != 0) os_setbinmode(fileno(stdin), O_BINARY); cmd[tmp->stlen] = save; @@ -1861,9 +2092,11 @@ do_print(int nargs, int redirtype) fatal(_("attempt to use array `%s' in a scalar context"), array_vname(redir_exp)); rp = redirect(redir_exp, redirtype, & errflg); if (rp != NULL) - fp = rp->fp; - } else + fp = rp->output.fp; + } else if (do_debug) /* only the debugger can change the default output */ fp = output_fp; + else + fp = stdout; for (i = 1; i <= nargs; i++) { tmp = args_array[i] = POP(); @@ -1893,18 +2126,18 @@ do_print(int nargs, int redirtype) } for (i = nargs; i > 0; i--) { - efwrite(args_array[i]->stptr, sizeof(char), args_array[i]->stlen, fp, "print", rp, FALSE); + efwrite(args_array[i]->stptr, sizeof(char), args_array[i]->stlen, fp, "print", rp, false); DEREF(args_array[i]); if (i != 1 && OFSlen > 0) efwrite(OFS, sizeof(char), (size_t) OFSlen, - fp, "print", rp, FALSE); + fp, "print", rp, false); } if (ORSlen > 0) - efwrite(ORS, sizeof(char), (size_t) ORSlen, fp, "print", rp, TRUE); + efwrite(ORS, sizeof(char), (size_t) ORSlen, fp, "print", rp, true); if (rp != NULL && (rp->flag & RED_TWOWAY) != 0) - fflush(rp->fp); + rp->output.gawk_fflush(rp->output.fp, rp->output.opaque); } /* do_print_rec --- special case printing of $0, for speed */ @@ -1923,7 +2156,7 @@ do_print_rec(int nargs, int redirtype) redir_exp = TOP(); rp = redirect(redir_exp, redirtype, & errflg); if (rp != NULL) - fp = rp->fp; + fp = rp->output.fp; DEREF(redir_exp); decr_sp(); } else @@ -1937,19 +2170,18 @@ do_print_rec(int nargs, int redirtype) f0 = fields_arr[0]; - if (do_lint && f0 == Nnull_string) + if (do_lint && (f0->flags & NULL_FIELD) != 0) lintwarn(_("reference to uninitialized field `$%d'"), 0); - efwrite(f0->stptr, sizeof(char), f0->stlen, fp, "print", rp, FALSE); + efwrite(f0->stptr, sizeof(char), f0->stlen, fp, "print", rp, false); if (ORSlen > 0) - efwrite(ORS, sizeof(char), (size_t) ORSlen, fp, "print", rp, TRUE); + efwrite(ORS, sizeof(char), (size_t) ORSlen, fp, "print", rp, true); if (rp != NULL && (rp->flag & RED_TWOWAY) != 0) - fflush(rp->fp); + rp->output.gawk_fflush(rp->output.fp, rp->output.opaque); } -#if MBS_SUPPORT /* is_wupper --- function version of iswupper for passing function pointers */ @@ -2014,7 +2246,6 @@ wide_tolower(wchar_t *wstr, size_t wlen) { wide_change_case(wstr, wlen, is_wupper, to_wlower); } -#endif /* do_tolower --- lower case a string */ @@ -2037,14 +2268,11 @@ do_tolower(int nargs) cp < cp2; cp++) if (isupper(*cp)) *cp = tolower(*cp); - } -#if MBS_SUPPORT - else { + } else { force_wstring(t2); wide_tolower(t2->wstptr, t2->wstlen); wstr2str(t2); } -#endif DEREF(t1); return t2; @@ -2071,14 +2299,11 @@ do_toupper(int nargs) cp < cp2; cp++) if (islower(*cp)) *cp = toupper(*cp); - } -#if MBS_SUPPORT - else { + } else { force_wstring(t2); wide_toupper(t2->wstptr, t2->wstlen); wstr2str(t2); } -#endif DEREF(t1); return t2; @@ -2099,8 +2324,8 @@ do_atan2(int nargs) if ((t2->flags & (NUMCUR|NUMBER)) == 0) lintwarn(_("atan2: received non-numeric second argument")); } - d1 = force_number(t1); - d2 = force_number(t2); + d1 = force_number(t1)->numbr; + d2 = force_number(t2)->numbr; DEREF(t1); DEREF(t2); return make_number((AWKNUM) atan2(d1, d2)); @@ -2117,7 +2342,7 @@ do_sin(int nargs) tmp = POP_SCALAR(); if (do_lint && (tmp->flags & (NUMCUR|NUMBER)) == 0) lintwarn(_("sin: received non-numeric argument")); - d = sin((double) force_number(tmp)); + d = sin((double) force_number(tmp)->numbr); DEREF(tmp); return make_number((AWKNUM) d); } @@ -2133,14 +2358,14 @@ do_cos(int nargs) tmp = POP_SCALAR(); if (do_lint && (tmp->flags & (NUMCUR|NUMBER)) == 0) lintwarn(_("cos: received non-numeric argument")); - d = cos((double) force_number(tmp)); + d = cos((double) force_number(tmp)->numbr); DEREF(tmp); return make_number((AWKNUM) d); } /* do_rand --- do the rand function */ -static int firstrand = TRUE; +static bool firstrand = true; /* Some systems require this array to be integer aligned. Sigh. */ #define SIZEOF_STATE 256 static uint32_t istate[SIZEOF_STATE/sizeof(uint32_t)]; @@ -2150,10 +2375,12 @@ static char *const state = (char *const) istate; NODE * do_rand(int nargs ATTRIBUTE_UNUSED) { + double tmprand; +#define RAND_DIVISOR ((double)GAWK_RANDOM_MAX+1.0) if (firstrand) { (void) initstate((unsigned) 1, state, SIZEOF_STATE); /* don't need to srandom(1), initstate() does it for us. */ - firstrand = FALSE; + firstrand = false; setstate(state); } /* @@ -2161,7 +2388,66 @@ do_rand(int nargs ATTRIBUTE_UNUSED) * * 0 <= n < 1 */ - return make_number((AWKNUM) (random() % GAWK_RANDOM_MAX) / GAWK_RANDOM_MAX); + /* + * Date: Wed, 28 Aug 2013 17:52:46 -0700 + * From: Bob Jewett <jewett@bill.scs.agilent.com> + * + * Call random() twice to fill in more bits in the value + * of the double. Also, there is a bug in random() such + * that when the values of successive values are combined + * like (rand1*rand2)^2, (rand3*rand4)^2, ... the + * resulting time series is not white noise. The + * following also seems to fix that bug. + * + * The add/subtract 0.5 keeps small bits from filling + * below 2^-53 in the double, not that anyone should be + * looking down there. + * + * Date: Wed, 25 Sep 2013 10:45:38 -0600 (MDT) + * From: "Nelson H. F. Beebe" <beebe@math.utah.edu> + * (4) The code is typical of many published fragments for converting + * from integer to floating-point, and I discuss the serious pitfalls + * in my book, because it leads to platform-dependent behavior at the + * end points of the interval [0,1] + * + * (5) the documentation in the gawk info node says + * + * `rand()' + * Return a random number. The values of `rand()' are uniformly + * distributed between zero and one. The value could be zero but is + * never one.(1) + * + * The division by RAND_DIVISOR may not guarantee that 1.0 is never + * returned: the programmer forgot the platform-dependent issue of + * rounding. + * + * For points 4 and 5, the safe way is a loop: + * + * double + * rand(void) // return value in [0.0, 1.0) + * { + * value = internal_rand(); + * + * while (value == 1.0) + * value = internal_rand(); + * + * return (value); + * } + */ + + do { + long d1, d2; + /* + * Do the calls in predictable order to avoid + * compiler differences in order of evaluation. + */ + d1 = random(); + d2 = random(); + tmprand = 0.5 + ( (d1/RAND_DIVISOR + d2) / RAND_DIVISOR ); + tmprand -= 0.5; + } while (tmprand == 1.0); + + return make_number((AWKNUM) tmprand); } /* do_srand --- seed the random number generator */ @@ -2176,7 +2462,7 @@ do_srand(int nargs) if (firstrand) { (void) initstate((unsigned) 1, state, SIZEOF_STATE); /* don't need to srandom(1), we're changing the seed below */ - firstrand = FALSE; + firstrand = false; (void) setstate(state); } @@ -2186,7 +2472,7 @@ do_srand(int nargs) tmp = POP_SCALAR(); if (do_lint && (tmp->flags & (NUMCUR|NUMBER)) == 0) lintwarn(_("srand: received non-numeric argument")); - srandom((unsigned int) (save_seed = (long) force_number(tmp))); + srandom((unsigned int) (save_seed = (long) force_number(tmp)->numbr)); DEREF(tmp); } return make_number((AWKNUM) ret); @@ -2228,13 +2514,12 @@ do_match(int nargs) size_t *wc_indices = NULL; rlength = REEND(rp, t1->stptr) - RESTART(rp, t1->stptr); /* byte length */ -#if MBS_SUPPORT if (rlength > 0 && gawk_mb_cur_max > 1) { t1 = str2wstr(t1, & wc_indices); rlength = wc_indices[rstart + rlength - 1] - wc_indices[rstart] + 1; rstart = wc_indices[rstart]; } -#endif + rstart++; /* now it's 1-based indexing */ /* Build the array only if the caller wants the optional subpatterns */ @@ -2256,12 +2541,10 @@ do_match(int nargs) start = t1->stptr + s; subpat_start = s; subpat_len = len = SUBPATEND(rp, t1->stptr, ii) - s; -#if MBS_SUPPORT if (len > 0 && gawk_mb_cur_max > 1) { subpat_start = wc_indices[s]; subpat_len = wc_indices[s + len - 1] - subpat_start + 1; } -#endif it = make_string(start, len); it->flags |= MAYBE_NUM; /* user input */ @@ -2270,6 +2553,9 @@ do_match(int nargs) lhs = assoc_lookup(dest, sub); unref(*lhs); *lhs = it; + /* execute post-assignment routine if any */ + if (dest->astore != NULL) + (*dest->astore)(dest, sub); unref(sub); sprintf(buff, "%d", ii); @@ -2293,6 +2579,8 @@ do_match(int nargs) lhs = assoc_lookup(dest, sub); unref(*lhs); *lhs = it; + if (dest->astore != NULL) + (*dest->astore)(dest, sub); unref(sub); memcpy(buf, buff, ilen); @@ -2306,6 +2594,8 @@ do_match(int nargs) lhs = assoc_lookup(dest, sub); unref(*lhs); *lhs = it; + if (dest->astore != NULL) + (*dest->astore)(dest, sub); unref(sub); } } @@ -2337,7 +2627,7 @@ do_match(int nargs) * #! /usr/local/bin/mawk -f * * BEGIN { - * TRUE = 1; FALSE = 0 + * true = 1; false = 0 * print "--->", mygsub("abc", "b+", "FOO") * print "--->", mygsub("abc", "x*", "X") * print "--->", mygsub("abc", "b*", "X") @@ -2349,10 +2639,10 @@ do_match(int nargs) * function mygsub(str, regex, replace, origstr, newstr, eosflag, nonzeroflag) * { * origstr = str; - * eosflag = nonzeroflag = FALSE + * eosflag = nonzeroflag = false * while (match(str, regex)) { * if (RLENGTH > 0) { # easy case - * nonzeroflag = TRUE + * nonzeroflag = true * if (RSTART == 1) { # match at front of string * newstr = newstr replace * } else { @@ -2365,7 +2655,7 @@ do_match(int nargs) * # which we don't really want, so skip over it * newstr = newstr substr(str, 1, 1) * str = substr(str, 2) - * nonzeroflag = FALSE + * nonzeroflag = false * } else { * # 0-length match * if (RSTART == 1) { @@ -2379,7 +2669,7 @@ do_match(int nargs) * if (eosflag) * break * else - * eosflag = TRUE + * eosflag = true * } * if (length(str) > 0) * newstr = newstr str # rest of string @@ -2397,23 +2687,28 @@ do_match(int nargs) * 2001 standard: * * sub(ere, repl[, in ]) - * Substitute the string repl in place of the first instance of the extended regular - * expression ERE in string in and return the number of substitutions. An ampersand - * ('&') appearing in the string repl shall be replaced by the string from in that - * matches the ERE. An ampersand preceded with a backslash ('\') shall be - * interpreted as the literal ampersand character. An occurrence of two consecutive - * backslashes shall be interpreted as just a single literal backslash character. Any - * other occurrence of a backslash (for example, preceding any other character) shall - * be treated as a literal backslash character. Note that if repl is a string literal (the - * lexical token STRING; see Grammar (on page 170)), the handling of the - * ampersand character occurs after any lexical processing, including any lexical - * backslash escape sequence processing. If in is specified and it is not an lvalue (see - * Expressions in awk (on page 156)), the behavior is undefined. If in is omitted, awk - * shall use the current record ($0) in its place. + * Substitute the string repl in place of the first instance of the + * extended regular expression ERE in string in and return the number of + * substitutions. An ampersand ('&') appearing in the string repl shall + * be replaced by the string from in that matches the ERE. An ampersand + * preceded with a backslash ('\') shall be interpreted as the literal + * ampersand character. An occurrence of two consecutive backslashes shall + * be interpreted as just a single literal backslash character. Any other + * occurrence of a backslash (for example, preceding any other character) + * shall be treated as a literal backslash character. Note that if repl is a + * string literal (the lexical token STRING; see Grammar (on page 170)), the + * handling of the ampersand character occurs after any lexical processing, + * including any lexical backslash escape sequence processing. If in is + * specified and it is not an lvalue (see Expressions in awk (on page 156)), + * the behavior is undefined. If in is omitted, awk shall use the current + * record ($0) in its place. * - * 11/2010: The text in the 2008 standard is the same as just quoted. However, POSIX behavior - * is now the default. This can change the behavior of awk programs. The old behavior - * is not available. + * 11/2010: The text in the 2008 standard is the same as just quoted. + * However, POSIX behavior is now the default. This can change the behavior + * of awk programs. The old behavior is not available. + * + * 7/2011: Reverted backslash handling to what it used to be. It was in + * gawk for too long. Should have known better. */ /* @@ -2446,7 +2741,7 @@ do_sub(int nargs, unsigned int flags) long how_many = 1; /* one substitution for sub, also gensub default */ int global; long current; - int lastmatchnonzero; + bool lastmatchnonzero; char *mb_indices = NULL; if ((flags & GENSUB) != 0) { @@ -2463,15 +2758,18 @@ do_sub(int nargs, unsigned int flags) if (t1->stlen > 0 && (t1->stptr[0] == 'g' || t1->stptr[0] == 'G')) how_many = -1; else { - d = force_number(t1); - + (void) force_number(t1); + d = get_number_d(t1); if ((t1->flags & NUMCUR) != 0) goto set_how_many; + warning(_("gensub: third argument `%.*s' treated as 1"), + (int) t1->stlen, t1->stptr); how_many = 1; } } else { - d = force_number(t1); + (void) force_number(t1); + d = get_number_d(t1); set_how_many: if (d < 1) how_many = 1; @@ -2479,8 +2777,8 @@ set_how_many: how_many = d; else how_many = LONG_MAX; - if (d == 0) - warning(_("gensub: third argument of 0 treated as 1")); + if (d <= 0) + warning(_("gensub: third argument %g treated as 1"), d); } DEREF(t1); @@ -2518,14 +2816,11 @@ set_how_many: text = t->stptr; textlen = t->stlen; - buflen = textlen + 2; repl = s->stptr; replend = repl + s->stlen; repllen = replend - repl; - emalloc(buf, char *, buflen + 2, "do_sub"); - buf[buflen] = '\0'; - buf[buflen + 1] = '\0'; + ampersands = 0; /* @@ -2547,7 +2842,7 @@ set_how_many: repllen--; ampersands++; } else if (*scan == '\\') { - if (flags & GENSUB) { /* gensub, behave sanely */ + if ((flags & GENSUB) != 0) { /* gensub, behave sanely */ if (isdigit((unsigned char) scan[1])) { ampersands++; scan++; @@ -2583,7 +2878,14 @@ set_how_many: } } - lastmatchnonzero = FALSE; + lastmatchnonzero = false; + + /* guesstimate how much room to allocate; +2 forces > 0 */ + buflen = textlen + (ampersands + 1) * repllen + 2; + emalloc(buf, char *, buflen + 2, "do_sub"); + buf[buflen] = '\0'; + buf[buflen + 1] = '\0'; + bp = buf; for (current = 1;; current++) { matches++; @@ -2615,7 +2917,7 @@ set_how_many: if (matchstart == matchend && lastmatchnonzero && matchstart == text) { - lastmatchnonzero = FALSE; + lastmatchnonzero = false; matches--; goto empty; } @@ -2684,7 +2986,7 @@ set_how_many: } else *bp++ = *scan; if (matchstart != matchend) - lastmatchnonzero = TRUE; + lastmatchnonzero = true; } else { /* * don't want this match, skip over it by copying @@ -2725,13 +3027,16 @@ set_how_many: done: DEREF(s); - if ((matches == 0 || (flags & LITERAL) != 0) && buf != NULL) + if ((matches == 0 || (flags & LITERAL) != 0) && buf != NULL) { efree(buf); + buf = NULL; + } if (flags & GENSUB) { if (matches > 0) { /* return the result string */ DEREF(t); + assert(buf != NULL); return make_str_node(buf, textlen, ALREADY_MALLOCED); } @@ -2740,7 +3045,7 @@ done: } /* For a string literal, must not change the original string. */ - if (flags & LITERAL) + if ((flags & LITERAL) != 0) DEREF(t); else if (matches > 0) { unref(*lhs); @@ -2777,15 +3082,15 @@ do_lshift(int nargs) if ((s2->flags & (NUMCUR|NUMBER)) == 0) lintwarn(_("lshift: received non-numeric second argument")); } - val = force_number(s1); - shift = force_number(s2); + val = force_number(s1)->numbr; + shift = force_number(s2)->numbr; if (do_lint) { if (val < 0 || shift < 0) - lintwarn(_("lshift(%lf, %lf): negative values will give strange results"), val, shift); + lintwarn(_("lshift(%f, %f): negative values will give strange results"), val, shift); if (double_to_int(val) != val || double_to_int(shift) != shift) - lintwarn(_("lshift(%lf, %lf): fractional values will be truncated"), val, shift); + lintwarn(_("lshift(%f, %f): fractional values will be truncated"), val, shift); if (shift >= sizeof(uintmax_t) * CHAR_BIT) - lintwarn(_("lshift(%lf, %lf): too large shift value will give strange results"), val, shift); + lintwarn(_("lshift(%f, %f): too large shift value will give strange results"), val, shift); } DEREF(s1); @@ -2814,15 +3119,15 @@ do_rshift(int nargs) if ((s2->flags & (NUMCUR|NUMBER)) == 0) lintwarn(_("rshift: received non-numeric second argument")); } - val = force_number(s1); - shift = force_number(s2); + val = force_number(s1)->numbr; + shift = force_number(s2)->numbr; if (do_lint) { if (val < 0 || shift < 0) - lintwarn(_("rshift(%lf, %lf): negative values will give strange results"), val, shift); + lintwarn(_("rshift(%f, %f): negative values will give strange results"), val, shift); if (double_to_int(val) != val || double_to_int(shift) != shift) - lintwarn(_("rshift(%lf, %lf): fractional values will be truncated"), val, shift); + lintwarn(_("rshift(%f, %f): fractional values will be truncated"), val, shift); if (shift >= sizeof(uintmax_t) * CHAR_BIT) - lintwarn(_("rshift(%lf, %lf): too large shift value will give strange results"), val, shift); + lintwarn(_("rshift(%f, %f): too large shift value will give strange results"), val, shift); } DEREF(s1); @@ -2840,33 +3145,30 @@ do_rshift(int nargs) NODE * do_and(int nargs) { - NODE *s1, *s2; - uintmax_t uleft, uright, res; - AWKNUM left, right; + NODE *s1; + uintmax_t res, uval; + AWKNUM val; + int i; - POP_TWO_SCALARS(s1, s2); - if (do_lint) { - if ((s1->flags & (NUMCUR|NUMBER)) == 0) - lintwarn(_("and: received non-numeric first argument")); - if ((s2->flags & (NUMCUR|NUMBER)) == 0) - lintwarn(_("and: received non-numeric second argument")); - } - left = force_number(s1); - right = force_number(s2); - if (do_lint) { - if (left < 0 || right < 0) - lintwarn(_("and(%lf, %lf): negative values will give strange results"), left, right); - if (double_to_int(left) != left || double_to_int(right) != right) - lintwarn(_("and(%lf, %lf): fractional values will be truncated"), left, right); - } + res = ~0; /* start off with all ones */ + if (nargs < 2) + fatal(_("and: called with less than two arguments")); - DEREF(s1); - DEREF(s2); + for (i = 1; nargs > 0; nargs--, i++) { + s1 = POP_SCALAR(); + if (do_lint && (s1->flags & (NUMCUR|NUMBER)) == 0) + lintwarn(_("and: argument %d is non-numeric"), i); + + val = force_number(s1)->numbr; + if (do_lint && val < 0) + lintwarn(_("and: argument %d negative value %g will give strange results"), i, val); + + uval = (uintmax_t) val; + res &= uval; - uleft = (uintmax_t) left; - uright = (uintmax_t) right; + DEREF(s1); + } - res = uleft & uright; return make_integer(res); } @@ -2875,33 +3177,30 @@ do_and(int nargs) NODE * do_or(int nargs) { - NODE *s1, *s2; - uintmax_t uleft, uright, res; - AWKNUM left, right; + NODE *s1; + uintmax_t res, uval; + AWKNUM val; + int i; - POP_TWO_SCALARS(s1, s2); - if (do_lint) { - if ((s1->flags & (NUMCUR|NUMBER)) == 0) - lintwarn(_("or: received non-numeric first argument")); - if ((s2->flags & (NUMCUR|NUMBER)) == 0) - lintwarn(_("or: received non-numeric second argument")); - } - left = force_number(s1); - right = force_number(s2); - if (do_lint) { - if (left < 0 || right < 0) - lintwarn(_("or(%lf, %lf): negative values will give strange results"), left, right); - if (double_to_int(left) != left || double_to_int(right) != right) - lintwarn(_("or(%lf, %lf): fractional values will be truncated"), left, right); - } + res = 0; + if (nargs < 2) + fatal(_("or: called with less than two arguments")); - DEREF(s1); - DEREF(s2); + for (i = 1; nargs > 0; nargs--, i++) { + s1 = POP_SCALAR(); + if (do_lint && (s1->flags & (NUMCUR|NUMBER)) == 0) + lintwarn(_("or: argument %d is non-numeric"), i); + + val = force_number(s1)->numbr; + if (do_lint && val < 0) + lintwarn(_("or: argument %d negative value %g will give strange results"), i, val); - uleft = (uintmax_t) left; - uright = (uintmax_t) right; + uval = (uintmax_t) val; + res |= uval; + + DEREF(s1); + } - res = uleft | uright; return make_integer(res); } @@ -2910,36 +3209,33 @@ do_or(int nargs) NODE * do_xor(int nargs) { - NODE *s1, *s2; - uintmax_t uleft, uright, res; - AWKNUM left, right; + NODE *s1; + uintmax_t res, uval; + AWKNUM val; + int i; - POP_TWO_SCALARS(s1, s2); - left = force_number(s1); - right = force_number(s2); + if (nargs < 2) + fatal(_("xor: called with less than two arguments")); - if (do_lint) { - if ((s1->flags & (NUMCUR|NUMBER)) == 0) - lintwarn(_("xor: received non-numeric first argument")); - if ((s2->flags & (NUMCUR|NUMBER)) == 0) - lintwarn(_("xor: received non-numeric second argument")); - } - left = force_number(s1); - right = force_number(s2); - if (do_lint) { - if (left < 0 || right < 0) - lintwarn(_("xor(%lf, %lf): negative values will give strange results"), left, right); - if (double_to_int(left) != left || double_to_int(right) != right) - lintwarn(_("xor(%lf, %lf): fractional values will be truncated"), left, right); - } + res = 0; /* silence compiler warning */ + for (i = 1; nargs > 0; nargs--, i++) { + s1 = POP_SCALAR(); + if (do_lint && (s1->flags & (NUMCUR|NUMBER)) == 0) + lintwarn(_("xor: argument %d is non-numeric"), i); - DEREF(s1); - DEREF(s2); + val = force_number(s1)->numbr; + if (do_lint && val < 0) + lintwarn(_("xor: argument %d negative value %g will give strange results"), i, val); + + uval = (uintmax_t) val; + if (i == 1) + res = uval; + else + res ^= uval; - uleft = (uintmax_t) left; - uright = (uintmax_t) right; + DEREF(s1); + } - res = uleft ^ uright; return make_integer(res); } @@ -2955,16 +3251,14 @@ do_compl(int nargs) tmp = POP_SCALAR(); if (do_lint && (tmp->flags & (NUMCUR|NUMBER)) == 0) lintwarn(_("compl: received non-numeric argument")); - d = force_number(tmp); + d = force_number(tmp)->numbr; DEREF(tmp); if (do_lint) { - if ((tmp->flags & (NUMCUR|NUMBER)) == 0) - lintwarn(_("compl: received non-numeric argument")); if (d < 0) - lintwarn(_("compl(%lf): negative value will give strange results"), d); + lintwarn(_("compl(%f): negative value will give strange results"), d); if (double_to_int(d) != d) - lintwarn(_("compl(%lf): fractional value will be truncated"), d); + lintwarn(_("compl(%f): fractional value will be truncated"), d); } uval = (uintmax_t) d; @@ -2982,11 +3276,11 @@ do_strtonum(int nargs) tmp = POP_SCALAR(); if ((tmp->flags & (NUMBER|NUMCUR)) != 0) - d = (AWKNUM) force_number(tmp); - else if (isnondecimal(tmp->stptr, use_lc_numeric)) + d = (AWKNUM) force_number(tmp)->numbr; + else if (get_numbase(tmp->stptr, use_lc_numeric) != 10) d = nondec2awknum(tmp->stptr, tmp->stlen); else - d = (AWKNUM) force_number(tmp); + d = (AWKNUM) force_number(tmp)->numbr; DEREF(tmp); return make_number((AWKNUM) d); @@ -3236,7 +3530,10 @@ do_dcngettext(int nargs) } #endif - POP_NUMBER(d); /* third argument */ + t2 = POP_NUMBER(); /* third argument */ + d = get_number_d(t2); + DEREF(t2); + number = (unsigned long) double_to_int(d); t2 = POP_STRING(); /* second argument */ string2 = t2->stptr; @@ -3298,13 +3595,78 @@ do_bindtextdomain(int nargs) return make_string(the_result, strlen(the_result)); } +/* do_div --- do integer division, return quotient and remainder in dest array */ + +/* + * We define the semantics as: + * numerator = int(numerator) + * denominator = int(denonmator) + * quotient = int(numerator / denomator) + * remainder = int(numerator % denomator) + */ + +NODE * +do_div(int nargs) +{ + NODE *numerator, *denominator, *result; + double num, denom, quotient, remainder; + NODE *sub, **lhs; + + result = POP_PARAM(); + if (result->type != Node_var_array) + fatal(_("div: third argument is not an array")); + assoc_clear(result); + + denominator = POP_SCALAR(); + numerator = POP_SCALAR(); + + if (do_lint) { + if ((numerator->flags & (NUMCUR|NUMBER)) == 0) + lintwarn(_("div: received non-numeric first argument")); + if ((denominator->flags & (NUMCUR|NUMBER)) == 0) + lintwarn(_("div: received non-numeric second argument")); + } + + (void) force_number(numerator); + (void) force_number(denominator); + num = double_to_int(get_number_d(numerator)); + denom = double_to_int(get_number_d(denominator)); + + if (denom == 0.0) + fatal(_("div: division by zero attempted")); + + quotient = double_to_int(num / denom); + /* + * FIXME: This code is duplicated, factor it out to a + * separate function. + */ +#ifdef HAVE_FMOD + remainder = fmod(num, denom); +#else /* ! HAVE_FMOD */ + (void) modf(num / denom, & remainder); + remainder = num - remainder * denom; +#endif /* ! HAVE_FMOD */ + remainder = double_to_int(remainder); + + sub = make_string("quotient", 8); + lhs = assoc_lookup(result, sub); + unref(*lhs); + *lhs = make_number((AWKNUM) quotient); + + sub = make_string("remainder", 9); + lhs = assoc_lookup(result, sub); + unref(*lhs); + *lhs = make_number((AWKNUM) remainder); + + return make_number((AWKNUM) 0.0); +} + /* mbc_byte_count --- return number of bytes for corresponding numchars multibyte characters */ static size_t mbc_byte_count(const char *ptr, size_t numchars) { -#if MBS_SUPPORT mbstate_t cur_state; size_t sum = 0; int mb_len; @@ -3325,9 +3687,6 @@ mbc_byte_count(const char *ptr, size_t numchars) } return sum; -#else - return numchars; -#endif } /* mbc_char_count --- return number of m.b. chars in string, up to numbytes bytes */ @@ -3335,7 +3694,6 @@ mbc_byte_count(const char *ptr, size_t numchars) static size_t mbc_char_count(const char *ptr, size_t numbytes) { -#if MBS_SUPPORT mbstate_t cur_state; size_t sum = 0; int mb_len; @@ -3358,7 +3716,4 @@ mbc_char_count(const char *ptr, size_t numbytes) } return sum; -#else - return numbytes; -#endif } |