summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2014-09-16 19:51:29 +0300
committerArnold D. Robbins <arnold@skeeve.com>2014-09-16 19:51:29 +0300
commitb219ee54781d97816b98b711396191c661925d29 (patch)
tree62c44c3ab74c95788d82d5caf6f40c057bbadd7b
parentea047491efb957a2a306a95faeb5fb859426c0d4 (diff)
parentdc510090126eb27d2e0514f42a4da4863873903d (diff)
downloadgawk-b219ee54781d97816b98b711396191c661925d29.tar.gz
Merge branch 'master' into num-handler
-rw-r--r--CMakeLists.txt108
-rw-r--r--ChangeLog408
-rw-r--r--Makefile.am7
-rw-r--r--Makefile.in7
-rw-r--r--NEWS45
-rw-r--r--README.git15
-rw-r--r--README_d/ChangeLog4
-rw-r--r--README_d/README.cmake95
-rw-r--r--TODO26
-rw-r--r--awk.h24
-rw-r--r--awkgram.c955
-rw-r--r--awkgram.y237
-rw-r--r--awklib/Makefile.am11
-rw-r--r--awklib/Makefile.in105
-rw-r--r--awklib/eg/lib/ctime.awk2
-rw-r--r--awklib/eg/lib/div.awk17
-rw-r--r--awklib/eg/lib/getopt.awk5
-rw-r--r--awklib/eg/lib/gettime.awk2
-rw-r--r--awklib/eg/lib/grcat.c2
-rw-r--r--awklib/eg/lib/groupawk.in3
-rw-r--r--awklib/eg/lib/pwcat.c2
-rw-r--r--awklib/eg/lib/strtonum.awk17
-rw-r--r--awklib/eg/prog/alarm.awk5
-rw-r--r--awklib/eg/prog/cut.awk11
-rw-r--r--awklib/eg/prog/egrep.awk7
-rw-r--r--awklib/eg/prog/extract.awk8
-rw-r--r--awklib/eg/prog/id.awk40
-rw-r--r--awklib/eg/prog/igawk.sh2
-rw-r--r--awklib/eg/prog/labels.awk3
-rw-r--r--awklib/eg/prog/split.awk5
-rw-r--r--awklib/eg/prog/tee.awk6
-rw-r--r--awklib/eg/prog/uniq.awk5
-rw-r--r--builtin.c62
-rw-r--r--cmake/Toolchain_clang.cmake19
-rw-r--r--cmake/Toolchain_generic.cmake21
-rw-r--r--cmake/Toolchain_mingw32.cmake23
-rw-r--r--cmake/Toolchain_s390.cmake20
-rw-r--r--cmake/auk.icobin0 -> 5190 bytes
-rwxr-xr-xcmake/basictest553
-rwxr-xr-xcmake/configure58
-rw-r--r--cmake/configure.cmake300
-rwxr-xr-xcmake/docmaker100
-rw-r--r--cmake/package.cmake54
-rw-r--r--command.c6
-rw-r--r--command.y6
-rw-r--r--configh.in6
-rwxr-xr-xconfigure12
-rw-r--r--configure.ac11
-rw-r--r--custom.h14
-rw-r--r--debug.c24
-rw-r--r--dfa.c1795
-rw-r--r--dfa.h26
-rw-r--r--doc/CMakeLists.txt95
-rw-r--r--doc/ChangeLog294
-rw-r--r--doc/Makefile.am16
-rw-r--r--doc/Makefile.in16
-rw-r--r--doc/array-elements.eps158
-rw-r--r--doc/array-elements.fig27
-rw-r--r--doc/array-elements.pdfbin0 -> 6796 bytes
-rw-r--r--doc/array-elements.pngbin0 -> 6091 bytes
-rw-r--r--doc/array-elements.txt4
-rw-r--r--doc/awkcard.in3
-rw-r--r--doc/gawk.135
-rw-r--r--doc/gawk.info8747
-rw-r--r--doc/gawk.texi8953
-rw-r--r--doc/gawktexi.in8774
-rw-r--r--double.c67
-rw-r--r--eval.c32
-rw-r--r--ext.c30
-rw-r--r--extension/CMakeLists.txt84
-rw-r--r--extension/ChangeLog14
-rw-r--r--extension/Makefile.am5
-rw-r--r--extension/Makefile.in5
-rw-r--r--field.c21
-rw-r--r--format.c26
-rw-r--r--gawkapi.h4
-rw-r--r--helpers/ChangeLog8
-rw-r--r--helpers/chlistref.awk31
-rw-r--r--helpers/testdfa.c31
-rw-r--r--interpret.h56
-rw-r--r--io.c254
-rw-r--r--main.c40
-rw-r--r--mpfr.c196
-rw-r--r--node.c4
-rw-r--r--pc/ChangeLog4
-rw-r--r--pc/Makefile.tst10
-rw-r--r--po/CMakeLists.txt133
-rw-r--r--po/id.po3182
-rw-r--r--po/it.po32
-rw-r--r--profile.c30
-rw-r--r--regcomp.c46
-rw-r--r--regex.h62
-rw-r--r--regex_internal.c9
-rw-r--r--replace.c2
-rw-r--r--symbol.c66
-rw-r--r--test/CMakeLists.txt90
-rw-r--r--test/ChangeLog89
-rw-r--r--test/Makefile.am72
-rw-r--r--test/Makefile.in91
-rw-r--r--test/Maketests19
-rw-r--r--test/badargs.ok4
-rw-r--r--test/dbugeval.in2
-rw-r--r--test/dbugeval.ok1
-rw-r--r--test/functab4.awk26
-rw-r--r--test/functab4.ok7
-rw-r--r--test/id.ok81
-rw-r--r--test/indirectcall2.awk11
-rw-r--r--test/indirectcall2.ok4
-rw-r--r--test/lintwarn.ok4
-rw-r--r--test/mpfrrem.awk6
-rw-r--r--test/mpfrrem.ok4
-rw-r--r--test/mpfrsqrt.awk82
-rw-r--r--test/mpfrsqrt.ok2
-rw-r--r--test/ofs1.ok6
-rw-r--r--test/printhuge.awk3
-rw-r--r--test/printhuge.ok1
-rw-r--r--test/profile2.ok4
-rw-r--r--test/regnul1.awk84
-rw-r--r--test/regnul1.ok8
-rw-r--r--test/regnul2.awk112
-rw-r--r--test/regnul2.ok27
-rw-r--r--test/rsgetline.awk23
-rw-r--r--test/rsgetline.in1
-rw-r--r--test/rsgetline.ok3
-rw-r--r--test/rsglstdin.ok3
-rw-r--r--vms/ChangeLog4
-rw-r--r--vms/gawk_alias_setup.com33
-rw-r--r--xalloc.h24
128 files changed, 23605 insertions, 14231 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 00000000..154d2afb
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,108 @@
+#
+# CMakeLists.txt --- CMake input file for gawk
+#
+# Copyright (C) 2013
+# the Free Software Foundation, Inc.
+#
+# This file is part of GAWK, the GNU implementation of the
+# AWK Programming Language.
+#
+# GAWK is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# GAWK is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+#
+
+## process this file with CMake to produce Makefile
+
+cmake_minimum_required (VERSION 2.6)
+project (gawk C)
+
+include(cmake/configure.cmake)
+
+set (EXTRA_LIBS "")
+
+if (${HAVE_MPFR})
+ set (EXTRA_LIBS ${EXTRA_LIBS} mpfr gmp)
+endif ()
+if (${HAVE_LIBREADLINE})
+ set (EXTRA_LIBS ${EXTRA_LIBS} readline)
+endif ()
+if (${DYNAMIC})
+ set (EXTRA_LIBS ${EXTRA_LIBS} ${CMAKE_DL_LIBS} )
+endif ()
+
+include_directories(${CMAKE_SOURCE_DIR})
+
+if(WIN32 OR "${CMAKE_SYSTEM_NAME}" STREQUAL "Windows")
+ # This is enough to build with MinGW in a native Windows environment
+ # and also with a cross-compiler on OpenSuSE 12.2.
+ # On Ubuntu 12.04 patches to gawk's source code are needed:
+ # - insert #include <windows.h> at the top of awk.h
+ # - remove function execvp from pc/gawkmisc.pc
+ DefineConfigHValue(HAVE_SETENV 1)
+ DefineConfigHValue(HAVE_USLEEP 1)
+ DefineConfigHValue(STDC_HEADERS 1)
+ DefineConfigHValue(HAVE_STRINGIZE 1)
+ include_directories(${CMAKE_SOURCE_DIR}/missing_d)
+ DefineConfigHValue(HAVE_MKSTEMP 1)
+ set (EXTRA_LIBS ${EXTRA_LIBS} ws2_32)
+ # TODO: Eli Zaretskii remined me that the generated
+ # settings in config.h should be the same as those in
+ # pc/config.h. With these settings and DYNAMIC=1
+ # it looks like functions in dynamic libs (extensions) can
+ # be invoked on Windows.
+ DefineConfigHValue(HAVE_GETSYSTEMTIMEASFILETIME 1)
+ set (GAWK_SOURCES ${GAWK_SOURCES} regex.c pc/getid.c pc/gawkmisc.pc pc/popen.c)
+ include_directories(${CMAKE_SOURCE_DIR}/pc)
+endif()
+
+set (GAWK_SOURCES ${GAWK_SOURCES}
+ array.c
+ builtin.c
+ cint_array.c
+ command.c
+ debug.c
+ dfa.c
+ eval.c
+ ext.c
+ field.c
+ floatcomp.c
+ gawkapi.c
+ gawkmisc.c
+ int_array.c
+ io.c
+ main.c
+ mpfr.c
+ msg.c
+ node.c
+ profile.c
+ random.c
+ re.c
+ replace.c
+ str_array.c
+ symbol.c
+ version.c
+)
+
+add_executable (gawk ${GAWK_SOURCES} ${BISON_awkgram_OUTPUTS})
+target_link_libraries (gawk m ${EXTRA_LIBS})
+install(PROGRAMS ${CMAKE_BINARY_DIR}/gawk${CMAKE_EXECUTABLE_SUFFIX} DESTINATION bin)
+
+# Beware: before building the extension, -DGAWK gets undefined.
+add_subdirectory(extension)
+enable_testing()
+add_subdirectory(test)
+add_subdirectory(doc)
+include(InstallRequiredSystemLibraries)
+set(CPACK_PACKAGING_INSTALL_PREFIX /usr)
+include(cmake/package.cmake)
diff --git a/ChangeLog b/ChangeLog
index 8681c408..ee54f79c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,411 @@
+2014-09-16 Arnold D. Robbins <arnold@skeeve.com>
+
+ * mpfr.c (cleanup_mpfr): Removed.
+ (do_mpfp_div): Merged in. Other changes to make things work.
+ * awkgram.y (tokentab): Need a new flag for "div" instead of
+ checking the builtin function.
+
+2014-09-15 Arnold D. Robbins <arnold@skeeve.com>
+
+ Finish removing use of isalpha and isalnum.
+
+ * awk.h (is_alpha, is_alnum, is_identchar): Add declarations.
+ * awkgram.y (yylex): Use is_alpha.
+ (is_alpha, is_alnum): New functions.
+ (is_identchar): Use is_alnum.
+ * builtin.c (r_format_tree): Use is_alpha, is_alnum.
+ * command.y (yylex): Use is_alpha, is_identchar.
+ * ext.c (is_letter): Use is_alpha.
+ (is_identifier_char): Removed; replaced uses with is_identchar.
+ * main.c (arg_assign): Use is_alpha, is_alnum.
+ * node.c (r_force_number): Use is_alpha.
+
+2014-09-14 Arnold D. Robbins <arnold@skeeve.com>
+
+ * awkgram.y (is_identchar): Change from simple macro to function
+ since use of isalnum() let non-ASCII letters slip through into
+ identifiers.
+
+2014-09-07 Arnold D. Robbins <arnold@skeeve.com>
+
+ * awk.h: Move libsigsegv stuff to ...
+ * main.c: here. Thanks to Yehezkel Bernat for motivating
+ the cleanup.
+ * symbol.c (make_symbol, install, install_symbol): Add const to
+ first parameter. Adjust decls and fix up uses.
+
+2014-09-05 Arnold D. Robbins <arnold@skeeve.com>
+
+ Add builtin functions to FUNCTAB for consistency.
+
+ * awk.h (Node_builtin_func): New node type.
+ (install_builtins): Declare new function.
+ * awkgram.y [DEBUG_USE]: New flag value for debug functions; they
+ don't go into FUNCTAB.
+ (install_builtins): New function.
+ * eval.c (nodetypes): Add Node_builtin_func.
+ * interpret.h (r_interpret): Rework indirect calls of built-ins
+ since they're now in the symbol table.
+ * main.c (main): Call `install_builtins'.
+ * symbol.c (install): Adjust for Node_builtin_func.
+ (load_symbols): Ditto.
+
+2014-09-04 Arnold D. Robbins <arnold@skeeve.com>
+
+ * profile.c (pprint): Case Op_K_for: Improve printing of
+ empty for loop header.
+
+ Unrelated: Make indirect function calls work for built-in and
+ extension functions.
+
+ * awkgram.y (lookup_builtin): New function.
+ * awk.h (builtin_func_t): New typedef.
+ (lookup_builtin): Declare it.
+ * interpret.h (r_interpret): For indirect calls, add code to
+ find and call builtin functions, and call extension functions.
+
+2014-09-01 Arnold D. Robbins <arnold@skeeve.com>
+
+ * builtin.c (do_substr): Return "" instead of null string in case
+ result is passed to length() with --lint. Based on discussions in
+ comp.lang.awk.
+
+ Unrelated:
+
+ * interpret.h (r_interpret): For indirect function call, separate
+ error message if lookup returned NULL. Otherwise got a core dump.
+ Thanks to "Kenny McKormack" for the report in comp.lang.awk.
+
+2014-08-27 Arnold D. Robbins <arnold@skeeve.com>
+
+ * configure.ac: Add test for strcasecmp.
+ * regcomp.c: Remove special case code around use of strcasecmp().
+ * replace.c: Include missing/strncasecmp.c if either strcasecmp()
+ or strncasecmp() aren't available.
+
+2014-08-26 Arnold D. Robbins <arnold@skeeve.com>
+
+ * regcomp.c, regex_internal.c: Sync with GBLIC. Why not.
+
+ Unrelated:
+
+ Remove support for MirBSD. It uglified the code too much
+ for no discernable gain.
+
+ * configure.ac: Remove check for MirBSD and define of
+ LIBC_IS_BORKED.
+ * dfa.c: Remove code depending on LIBC_IS_BORKED.
+ * main.c: Ditto.
+ * regcomp.c: Ditto.
+ * NEWS: Updated.
+
+2014-08-24 Arnold D. Robbins <arnold@skeeve.com>
+
+ * regex.h: Remove underscores in names of parameters in function
+ declarations. Tweak names as neeeded.
+
+2014-08-20 Arnold D. Robbins <arnold@skeeve.com>
+
+ * node.c (parse_escape): Max of 2 digits after \x.
+
+2014-08-18 Arnold D. Robbins <arnold@skeeve.com>
+
+ * symbol.c: General formatting cleanup.
+
+2014-08-15 Arnold D. Robbins <arnold@skeeve.com>
+
+ * main.c (usage): Adjust whitespace for -L and add "invalid"
+ as a possible value for it. Report from Robert P. J. Day
+ <rpjday@crashcourse.ca>.
+
+2014-08-14 Arnold D. Robbins <arnold@skeeve.com>
+
+ * Makefile.am (SUBDIRS): Put awklib after doc so that examples
+ get extracted when the doc changes.
+
+2014-08-13 Arnold D. Robbins <arnold@skeeve.com>
+
+ * builtin.c (do_sub): Move initial allocation of the replacement
+ string down towards code to do the replacement, with a (we hope)
+ better guesstimate of how much to initially allocate. The idea
+ is to avoid unnecessary realloc() calls by making a better guess
+ at how much to allocate. This came up in an email discussion
+ with Tom Dickey about mawk's gsub().
+
+2014-08-12 Juergen Kahrs <jkahrs@users.sourceforge.net>
+
+ * cmake/configure.cmake:
+ * cmake/package.cmake: Copyright update.
+ * README.cmake:
+ * README_d/README.cmake: Moved file.
+
+2014-08-12 Arnold D. Robbins <arnold@skeeve.com>
+
+ OFS being set should rebuild $0 using previous OFS if $0
+ needs to be rebuilt. Thanks to Mike Brennan for pointing this out.
+
+ * awk.h (rebuild_record): Declare.
+ * eval.c (set_OFS): If not being called from var_init(), check
+ if $0 needs rebuilding. If so, parse the record fully and rebuild it.
+ Make OFS point to a separate copy of the new OFS for next time, since
+ OFS_node->var_value->stptr was already updated at this point.
+ * field.c (rebuild_record): Is now extern instead of static.
+ Use OFS and OFSlen instead of the value of OFS_node.
+
+ Unrelated:
+
+ * Makefile.am (RM): Define for makes that don't have it,
+ such as on OpenBSD. Thanks to Jeremie Courreges-Anglas
+ <jca@wxcvbn.org> for the report.
+
+2014-08-05 Arnold D. Robbins <arnold@skeeve.com>
+
+ Bug fix: For MPFR sqrt(), need to set precision of result to be
+ the same as that of the argument. Doesn't hurt other functions.
+ See test/mpfrsqrt.awk. Thank to Katie Wasserman <katie@wass.net>
+ for the bug report.
+
+ * mpfr.c (do_mpfr_func): New function. Runs code for MPFR functions
+ while still enabling debugging. Add call here to mpfr_set_prec().
+ Original code from SPEC_MATH macro.
+ (SPEC_MATH): Change macro to call do_mpfr_func().
+
+ Next MPFR bug fix: The % operator gave strange results for negative
+ numerator. Thanks again to Katie Wasserman for the bug report.
+
+ * mpfr.c (mpg_mod): Use mpz_tdiv_qr() instead of mpz_mod(). From
+ the GMP doc, mpz_mod() should have worked; it's not clear why
+ it doesn't.
+
+2014-08-03 Arnold D. Robbins <arnold@skeeve.com>
+
+ * builtin.c (format_tree): Don't need to check return value of
+ wctombr for -2. Thanks to Eli Zaretskii for pointing this out.
+
+ Unrelated:
+
+ * gawkapi.h: Fix doc for API get_record - errcode needs to
+ be greater than zero.
+ * interpret.h (r_interpret): Move setting of ERRNO to here, from ...
+ * io.c (inrec): ... here. Makes the code cleaner.
+
+2014-08-03 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * awkgram.y (getfname): Match on either ptr or ptr2 so --profile
+ will work in -M (MPFR bignum) mode.
+
+2014-07-31 Arnold D. Robbins <arnold@skeeve.com>
+
+ * builtin.c (format_tree): Make %c handling more sane on Windows.
+ Rework the lint messages.
+
+ Unrelated:
+
+ * dfa.c: Sync with GNU grep. Mainly white space differences.
+
+ Unrelated:
+
+ * mpfr.c (cleanup_mpfr): New function to deallocate _mpf_t1
+ and _mpf_t2; removes some valgrind warnings.
+ * awk.h (cleanup_mpfr): Add declaration.
+ * main.c (main): Add call to `cleanup_mpfr'.
+
+ Fix memory leak:
+
+ * mpfr.c (do_mpfr_div): Add unref to denominator and numerator
+ to not leak memory. Thanks to Katie Wasserman <katie@wass.net>
+ for isolating the problem to that routine.
+
+2014-07-25 Arnold D. Robbins <arnold@skeeve.com>
+
+ * main.c (main): Add a warning message if -M is used and gawk was
+ compiled without MPFR/GMP.
+
+2014-07-24 Arnold D. Robbins <arnold@skeeve.com>
+
+ * main.c (usage): Put text for `-n' *after* text for `-m'.
+ Report from Robert P. J. Day <rpjday@crashcourse.ca>.
+
+ Fix problems with I/O errors reported by Assaf Gordon
+ <assafgordon@gmail.com>:
+
+ * io.c (inrec): Change type to bool to make calling easier. Add
+ check in non-EOF case for error, and if so, return false.
+ Update ERRNO in case there is an ENDFILE block.
+ * awk.h (inrec): Change type in declaration.
+ * interpret.h (r_interpret): Change call of inrec() to boolean
+ notation.
+
+2014-07-10 Arnold D. Robbins <arnold@skeeve.com>
+
+ New `div()' function to do integer division and remainder;
+ mainly useful for use with GMP integers. Thanks to
+ Katie Wasserman <katie@wass.net> for the suggestion.
+
+ * awk.h (do_div, do_mpfr_div): Declare new functions.
+ * builtin.c (do_div): New function.
+ * mpfr.c (do_mpfr_div): New function.
+ * awkgram.y (tokentab): New entry.
+ (snode): Add check for do_div/do_mpfr_div to make 3rd arg
+ be an array.
+ * NEWS: Updated.
+ * TODO: Updated.
+
+2014-07-10 Arnold D. Robbins <arnold@skeeve.com>
+
+ * awkgram.y (check_for_bad): New routine to do the fatal message,
+ with smarter checking.
+ (nextc): Call it as appropriate.
+
+ * builtin.c (format_tree): Add check for bad returns from mbrlen
+ to avoid trying to malloc (size_t) -1 bytes. Thanks to
+ mail.green.fox@gmail.com for the bug report.
+
+2014-07-03 Arnold D. Robbins <arnold@skeeve.com>
+
+ * awkgram.y (nextc): Add bool check_for_bad parameter to check
+ for bad characters in the source program.
+ (yylex): Adjust calls.
+
+2014-06-24 Arnold D. Robbins <arnold@skeeve.com>
+
+ * main.c (main): The --pretty-print option no longer runs the
+ program. This removes the need for the GAWK_NO_PP_RUN environment var.
+ * NEWS: Updated.
+ * TODO: Updated.
+
+2014-06-22 Paul Eggert <eggert@penguin.cs.ucla.edu>
+
+ Bring in from GNULIB:
+
+ regex: fix memory leak in compiler
+ Fix by Andreas Schwab in:
+ https://sourceware.org/ml/libc-alpha/2014-06/msg00462.html
+ * lib/regcomp.c (parse_expression): Deallocate partially
+ constructed tree before returning error.
+
+2014-06-19 Arnold D. Robbins <arnold@skeeve.com>
+
+ * builtin.c (do_sub): Add more info to leading comment.
+ Add some whitespace in the code.
+
+2014-06-08 Arnold D. Robbins <arnold@skeeve.com>
+
+ * dfa.c: Sync with GNU grep.
+
+2014-06-03 Arnold D. Robbins <arnold@skeeve.com>
+
+ * dfa.c (mbs_to_wchar): Define a macro if not MBS.
+
+2014-05-29 Arnold D. Robbins <arnold@skeeve.com>
+
+ * dfa.c: Sync with GNU grep.
+
+2014-05-26 Arnold D. Robbins <arnold@skeeve.com>
+
+ * io.c (inetfile): Change return type to bool. Wrap code
+ with ifdef HAVE_SOCKETS so that it'll compile on DJGPP.
+
+2014-05-22 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ Allow any redirected getline inside BEGINFILE/ENDFILE.
+
+ * awkgram.y (LEX_GETLINE): Only require a redirection and not also
+ a variable if getline is in a BEGINFILE or ENDFILE rule.
+ * interpret.h (Op_K_getline_redir): Remove check and fatal error.
+
+2014-05-20 Arnold D. Robbins <arnold@skeeve.com>
+
+ * dfa.c (dfaexec): Minor sync with GNU grep.
+
+2014-05-14 Arnold D. Robbins <arnold@skeeve.com>
+
+ * custom.h (_GL_PURE): Move definition to here. Sigh.
+ * dfa.h, dfa.c: Sync with GNU grep. Sigh.
+
+ Unrelated:
+
+ * custom.h: Remove stuff for Ultrix 4.3. No one has such
+ systems anymore; this just got missed earlier.
+
+2014-05-11 Arnold D. Robbins <arnold@skeeve.com>
+
+ * debug.c (do_eval): Repair fix of 2014-05-09 and use
+ assoc_remove to take @eval out of the function table.
+ * symbol.c: Fix a comment. This file needs some work.
+
+2014-05-10 Arnold D. Robbins <arnold@skeeve.com>
+
+ * io.c (get_a_record): Finish TERMNEAREND handling in case
+ we don't have a regular file but aren't going to get more data.
+ Added some additional comments.
+
+2014-05-09 Arnold D. Robbins <arnold@skeeve.com>
+
+ * debug.c (do_eval): Don't free `f' which points into the context
+ that was previously freed. Bug reported by Jan Chaloupka
+ <jchaloup@redhat.com>. Apparently introduced with move to
+ SYMTAB and FUNCTAB, but only showed up on Fedora 20 and Ubuntu 14.04,
+ which have a newer glibc.
+ (do_eval): Fix a memory leak seen by valgrind on Fedora 20 and
+ Ubuntu 14.04: the new SRCFILE that is added wasn't released.
+
+ Unrelated:
+
+ * io.c (get_a_record): Handle return of TERMNEAREND when the
+ entire file has been read into the buffer and we're using a
+ regex for RS. Bug report by Grail Dane <grail69@hotmail.com>.
+
+2014-05-04 Arnold D. Robbins <arnold@skeeve.com>
+
+ * debug.c (debug_prog): Change check for GAWK_RESTART so that it
+ actually works. Bug fix: run command in debugger would start
+ over again but not actually start running the program.
+
+2014-04-25 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * io.c (two_way_open): In forked child, reset SIGPIPE to SIG_DFL.
+ Fixes problems with "broken pipe" errors from child processes,
+ restoring 4.1.0 and earlier behavior. Thanks to Daryl F
+ <wyatt@prairieturtle.ca> for the report.
+ (gawk_popen): Ditto.
+
+2014-04-25 Arnold D. Robbins <arnold@skeeve.com>
+
+ * dfa.h, dfa.c: Merge with GNU grep; lots of forward motion.
+
+2014-04-24 Arnold D. Robbins <arnold@skeeve.com>
+
+ Update xalloc.h for pending merge with dfa.
+
+ * xalloc.h (xstrdup): Implement this.
+ (x2nrealloc): Incorporate changed logic from GNULIB.
+
+2014-04-20 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * io.c (struct inet_socket_info): Define new structure
+ for use in parsing special socket filenames.
+ (inetfile): Parse all components of the special socket filename
+ into the struct inet_socket_info. Returns true only if it is a
+ valid socket fliename, unlike the previous version which checked
+ for the '/inet[46]?/' prefix only.
+ (redirect): Patch to use updated inetfile() function.
+ (devopen): Remove logic to parse socket filenames, since this has
+ been moved into the inetfile() function.
+ (two_way_open): Update args to inetfile().
+
+2014-04-20 Arnold D. Robbins <arnold@skeeve.com>
+
+ * builtin.c (do_rand): Make calls to random() in predictable
+ order to avoid order of evaluation differences amongst compilers.
+ Thanks to Anders Magnusson <ragge@ludd.ltu.se> (of the PCC team)
+ for the suggestion.
+
+2014-04-18 Arnold D. Robbins <arnold@skeeve.com>
+
+ * configure.ac: Change adding of -export-dynamic for GCC to be
+ -Wl,-export-dynamic, which then works for PCC also.
+
2014-04-11 Arnold D. Robbins <arnold@skeeve.com>
* io.c (closemabyesocket): Define if not defined, e.g. building
diff --git a/Makefile.am b/Makefile.am
index 7ee910a0..52ef5a47 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -61,10 +61,12 @@ EXTRA_DIST = \
# The order to do things in.
# Build explicitly in "." in order to build gawk first, so
# that `make check' without a prior `make' works.
+# Build in awklib after in doc, since we want to extract
+# sample files if doc/gawk.texi changed.
SUBDIRS = \
. \
- awklib \
doc \
+ awklib \
po \
extension \
test
@@ -147,6 +149,9 @@ CLEANFILES = core core.*
# We want hard links for install-exec-hook, below
LN= ln
+# For some make's, e.g. OpenBSD, that don't define this
+RM = rm -f
+
# First, add a link from gawk to gawk-X.Y.Z.
#
# For GNU systems where gawk is awk, add a link to awk.
diff --git a/Makefile.in b/Makefile.in
index 339cb840..4b5fa69f 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -468,10 +468,12 @@ EXTRA_DIST = \
# The order to do things in.
# Build explicitly in "." in order to build gawk first, so
# that `make check' without a prior `make' works.
+# Build in awklib after in doc, since we want to extract
+# sample files if doc/gawk.texi changed.
SUBDIRS = \
. \
- awklib \
doc \
+ awklib \
po \
extension \
test
@@ -546,6 +548,9 @@ CLEANFILES = core core.*
# We want hard links for install-exec-hook, below
LN = ln
+
+# For some make's, e.g. OpenBSD, that don't define this
+RM = rm -f
all: config.h
$(MAKE) $(AM_MAKEFLAGS) all-recursive
diff --git a/NEWS b/NEWS
index fab31af4..58c77e7a 100644
--- a/NEWS
+++ b/NEWS
@@ -4,7 +4,7 @@
are permitted in any medium without royalty provided the copyright
notice and this notice are preserved.
-Changes from 4.1.1 to 4.2.0
+Changes from 4.1.x to 4.2.0
---------------------------
1. If not in POSIX mode, changes to ENVIRON are reflected into
@@ -16,6 +16,49 @@ Changes from 4.1.1 to 4.2.0
get the same series of numbers each time you call rand() repeatedly,
but this will be a different series than previously.
+3. The --pretty-print option no longer runs the program too.
+
+4. The igawk script and igawk.1 man page are no longer installed by
+ `make install'. They have been obsolete since gawk 4.0.0.
+
+5. Gawk now has a `div()' function to perform integer division; this is
+ primarily useful for the -M option to avoid MPFR division when all
+ values involved are integers.
+
+6. Gawk can now be built with CMake. This is an alternative build
+ system for those who may want it; gawk is not going to switch off
+ use of the autotools anytime soon, if ever.
+
+7. Gawk now processes a maximum of two hexadecimal digits in \x
+ escape sequences inside strings.
+
+8. MirBSD is no longer supported.
+
+Changes from 4.1.1 to 4.1.2
+---------------------------
+
+1. The manual has been considerably improved.
+ - Thoroughly reviewed and updated.
+ - Out-of-date examples replaced.
+ - Chapter 15 on MPFR reworked.
+ - Summary sections added to all chapters.
+ - Exercises added in several chapters.
+
+2. The debugger's "restart" command now works again.
+
+3. Redirected getline is now allowed inside BEGINFILE/ENDFILE.
+
+4. A number of bugs have been fixed in the MPFR code.
+
+5. Indirect function calls now work for both built-in and
+ extension functions.
+
+6. In non-English locales, it was accidentally possible to use "letters"
+ beside those of the English alphabet in identifiers. This has
+ been fixed. (isalpha and isalnum are NOT our friends.)
+
+XX. A number of bugs have been fixed. See the ChangeLog.
+
Changes from 4.1.0 to 4.1.1
---------------------------
diff --git a/README.git b/README.git
index 7d0d260c..c947cc24 100644
--- a/README.git
+++ b/README.git
@@ -1,4 +1,4 @@
-Sat Dec 1 21:53:02 IST 2012
+Thu Apr 17 16:54:26 IDT 2014
============================
If you are reading this, you have retrieved the gawk code base via
@@ -350,3 +350,16 @@ has been pushed up to the Savannah repo or not.
If your branch is completely local to your machine, use `git rebase'.
Otherwise, use `git merge'.
+
+- How do I remove branches in my local repo that are no longer in the
+ remote repo?
+
+ Either
+ git fetch --prune
+ or
+ git remote prune origin
+
+ These remove the remote branches (i.e., origin/something)
+ that no longer exist on the remote.
+
+ (Thanks to Stepan Kasal for this answer.)
diff --git a/README_d/ChangeLog b/README_d/ChangeLog
index 6bc828d9..8c19cfdb 100644
--- a/README_d/ChangeLog
+++ b/README_d/ChangeLog
@@ -1,3 +1,7 @@
+2014-08-12 Juergen Kahrs <jkahrs@users.sourceforge.net>
+
+ * README.cmake: Moved file from top-level to here.
+
2014-04-08 Arnold D. Robbins <arnold@skeeve.com>
* 4.1.1: Release tar ball made.
diff --git a/README_d/README.cmake b/README_d/README.cmake
new file mode 100644
index 00000000..b291d1be
--- /dev/null
+++ b/README_d/README.cmake
@@ -0,0 +1,95 @@
+CMake is a build automation system
+ http://en.wikipedia.org/wiki/Cmake
+
+We try to use it as a replacement for the established GNU build system.
+This attempt is currently only experimental. If you wonder why anyone
+should do this, read
+
+ Why the KDE project switched to CMake -- and how
+ http://lwn.net/Articles/188693/
+ Escape from GNU Autohell!
+ http://www.shlomifish.org/open-source/anti/autohell
+
+- How can I get GNU Awk compiled with CMake as fast as possible ?
+ git clone git://git.savannah.gnu.org/gawk.git
+ cd gawk
+ git checkout cmake
+ mkdir build
+ cd build
+ cmake ..
+ make
+ ./gawk --version
+ make test
+Notice that this git-checkout allows you to read the source code,
+track the cmake branch and get updates. You will not be able to
+commit anything.
+
+- How can I use git to contribute source code ?
+You need an account at Savannah. Read this to understand the first steps:
+ http://savannah.gnu.org/maintenance/UsingGit
+ README.git
+Use your account there to register your public ssh key at Savannah.
+Then you are ready to checkout. Remember that (when cloning) you are
+setting up your own local repository and make sure you configure it
+properly.
+ git clone ssh://my_account_name@git.sv.gnu.org/srv/git/gawk.git
+ git config --global user.name "first-name last-name"
+ git config --global user.email First.Last@email.com
+ git config --global color.ui auto
+
+- What is the current status of the cmake branch ?
+It has just begun, pre-alpha, unclear if it will ever be taken up
+by the maintainer. We want to study if using CMake with such a
+basic tool like gawk is feasible and if it easier to use than
+the GNU build system.
+
+- Where can I find a tutorial on CMake basics ?
+Use the "official tutorial":
+ http://www.cmake.org/cmake/help/cmake_tutorial.html
+
+- Where is the reference of all commands and variables ?
+Depending on the CMake version you use, select one of these:
+ http://www.cmake.org/cmake/help/v2.8.10/cmake.html
+
+- How can I cross-compile ?
+Proceed in the same way as explained above for native compilation,
+but use a different build directory. When using CMake, do this:
+ cmake -DCMAKE_TOOLCHAIN_FILE=../cmake/Toolchain_mingw32.cmake ..
+Write a new Toolchain file for your cross-compiler and use it.
+
+- How can I build an installable file ?
+Use "make package". The exact kind of installable file depends on your
+operating system and defaults to TGZ.
+
+- Can I build an executable that runs on any Win32 platform ?
+Yes, there are two ways of doing this.
+In both cases you need a MinGW compiler and the NSIS package builder
+installed on the host that shall do the build.
+ http://sourceforge.net/projects/mingw
+ http://sourceforge.net/projects/nsis
+When installed properly, the NSIS tool can even build an installer file
+(a single .exe file that unpacks, registers and installs the gawk executable
+and several other files).
+1. way: native build on a Win32 platform
+ http://www.cmake.org/cmake/help/runningcmake.html
+ After clicking "Configure" select the MinGW option with the default native compiler
+ In the build directory, the command "mingw32-make" will build the gawk.exe
+ The command "mingw32-make package" will build installer file
+2. way: build with cross-compiler on a Linux platform like Ubuntu 12.04 LTS
+ Proceed as describe above for cross-compilers.
+ The command "make ; make package" will build gawk.exe and the installer file
+
+- How can I run test cases ?
+You can run all the test cases that are defined in test/Makefile.am.
+These test case scripts were not changed, but the way they are invoked has
+been adapted to CMake habits.
+See http://cmake.org/Wiki/CMake/Testing_With_CTest#Simple_Testing
+ cmake ..
+ make
+ make test # run all test cases
+ ctest -N # list all test cases but don't run them
+ ctest -R BASIC # run all test cases belonging to group BASIC
+ ctest -R MPFR # run all test cases belonging to group MPFR
+ ctest -E SHLIB.filefunc # run all tests, except the SHLIB.filefunc test case
+Remember that running test cases is possible only after a native build.
+
diff --git a/TODO b/TODO
index f7a10ba3..66c8cc91 100644
--- a/TODO
+++ b/TODO
@@ -1,4 +1,4 @@
-Thu Oct 24 22:11:44 IDT 2013
+Sun Aug 24 20:00:53 IDT 2014
============================
There were too many files tracking different thoughts and ideas for
@@ -33,21 +33,16 @@ Minor Cleanups and Code Improvements
Look at function order within files.
- regex.h - remove underscores in param names
-
Consider removing use of and/or need for the protos.h file.
Recheck if gnulib regex can be dropped in
- Fully synchronize whitespaces tests (for \s, \S in Unicode
+ Fully synchronize whitespace tests (for \s, \S in Unicode
environment) with those of GNU grep.
Minor New Features
------------------
- Add a div() function to do integer division result. Needed
- esp for MPFR with large ints.
-
Enhance extension/fork.c waitpid to allow the caller to specify
the options. And add an optional array argument to wait and
waitpid in which to return exit status information.
@@ -57,10 +52,6 @@ Minor New Features
Make it possible to put print/printf + redirections into
an expression.
- Have pretty printing not run the program and nuke the
- undocumented GAWK_NO_PP_RUN env var. Requires test suite
- adjustments.
-
? Add an optional base to strtonum, allowing 2-36.
? Optional third argument for index indicating where to start the
@@ -91,9 +82,6 @@ Major New Features
Consider a typeof() function that returns a string (scalar, array,
regexp).
- Fix the early chapters in the doc with more up-to-date examples.
- No-one uses Bulletin Board Systems anymore.
-
Add ability to do decimal arithmetic.
Rework management of array index storage. (Partially DONE.)
@@ -124,12 +112,12 @@ Things To Think About That May Never Happen
https://github.com/emeryberger/DieHard
Implement namespaces. Arnold suggested the following in an email:
- - Extend the definition of an 'identifier' to include "." as a valid character
- although an identifier can't start with it.
+ - Extend the definition of an 'identifier' to include "." as a valid
+ character although an identifier can't start with it.
- Extension libraries install functions and global variables with names
that have a "." in them: XML.parse(), XML.name, whatever.
- - Awk code can read/write such variables and call such functions, but they
- cannot define such functions
+ - Awk code can read/write such variables and call such functions,
+ but they cannot define such functions
function XML.foo() { .. } # error
or create a variable with such a name if it doesn't exist. This would
be a run-time error, not a parse-time error.
@@ -139,8 +127,10 @@ Things To Think About That May Never Happen
to be very important.
Include a sample rpm spec file in a new packaging subdirectory.
+ (Really needed?)
Patch lexer for @include and @load to make quotes optional.
+ (Really needed?)
? Have strftime() pay attention to the value of ENVIRON["TZ"]
diff --git a/awk.h b/awk.h
index 57f1b3a8..e1ec7b91 100644
--- a/awk.h
+++ b/awk.h
@@ -193,15 +193,6 @@ extern void *memset_ulong(void *dest, int val, unsigned long l);
#define memset memset_ulong
#endif
-#ifdef HAVE_LIBSIGSEGV
-#include <sigsegv.h>
-#else
-typedef void *stackoverflow_context_t;
-#define sigsegv_install_handler(catchsegv) signal(SIGSEGV, catchsig)
-/* define as 0 rather than empty so that (void) cast on it works */
-#define stackoverflow_install_handler(catchstackoverflow, extra_stack, STACK_SIZE) 0
-#endif
-
#if defined(__EMX__) || defined(__MINGW32__)
#include "nonposix.h"
#endif /* defined(__EMX__) || defined(__MINGW32__) */
@@ -292,6 +283,7 @@ typedef enum nodevals {
Node_func, /* lnode is param. list, rnode is body */
Node_ext_func, /* extension function, code_ptr is builtin code */
Node_old_ext_func, /* extension function, code_ptr is builtin code */
+ Node_builtin_func, /* built-in function, main use is for FUNCTAB */
Node_array_ref, /* array passed by ref as parameter */
Node_array_tree, /* Hashed array tree (HAT) */
@@ -1390,10 +1382,16 @@ extern NODE *stopme(int nargs);
extern void shadow_funcs(void);
extern int check_special(const char *name);
extern SRCFILE *add_srcfile(enum srctype stype, char *src, SRCFILE *curr, bool *already_included, int *errcode);
+extern void free_srcfile(SRCFILE *thisfile);
extern void register_deferred_variable(const char *name, NODE *(*load_func)(void));
extern int files_are_same(char *path, SRCFILE *src);
extern void valinfo(NODE *n, Func_print print_func, FILE *fp);
-
+typedef NODE *(*builtin_func_t)(int); /* function that implements a built-in */
+extern builtin_func_t lookup_builtin(const char *name);
+extern void install_builtins(void);
+extern bool is_alpha(int c);
+extern bool is_alnum(int c);
+extern bool is_identchar(int c);
/* builtin.c */
extern NODE *do_fflush(int nargs);
extern NODE *do_index(int nargs);
@@ -1415,6 +1413,7 @@ extern NODE *do_sub(int nargs, unsigned int flags);
extern NODE *do_dcgettext(int nargs);
extern NODE *do_dcngettext(int nargs);
extern NODE *do_bindtextdomain(int nargs);
+extern NODE *do_div(int nargs);
#if MBS_SUPPORT
extern int strncasecmpmbs(const unsigned char *,
const unsigned char *, size_t);
@@ -1469,6 +1468,7 @@ extern NODE *get_actual_argument(int, bool, bool);
extern void init_fields(void);
extern void set_record(const char *buf, int cnt);
extern void reset_record(void);
+extern void rebuild_record(void);
extern void set_NF(void);
extern void set_PREC(void);
extern void set_ROUNDMODE(void);
@@ -1530,7 +1530,7 @@ extern char *find_source(const char *src, struct stat *stb, int *errcode, int is
extern NODE *do_getline_redir(int intovar, enum redirval redirtype);
extern NODE *do_getline(int intovar, IOBUF *iop);
extern struct redirect *getredirect(const char *str, int len);
-extern int inrec(IOBUF *iop, int *errcode);
+extern bool inrec(IOBUF *iop, int *errcode);
extern int nextfile(IOBUF **curfile, bool skipping);
/* main.c */
@@ -1610,7 +1610,7 @@ extern void load_symbols();
extern void init_symbol_table();
extern NODE *symbol_table;
extern NODE *func_table;
-extern NODE *install_symbol(char *name, NODETYPE type);
+extern NODE *install_symbol(const char *name, NODETYPE type);
extern NODE *remove_symbol(NODE *r);
extern void destroy_symbol(NODE *r);
extern void release_symbols(NODE *symlist, int keep_globals);
diff --git a/awkgram.c b/awkgram.c
index fe560913..861ab0bf 100644
--- a/awkgram.c
+++ b/awkgram.c
@@ -194,9 +194,7 @@ extern double fmod(double x, double y);
#define YYSTYPE INSTRUCTION *
-#define is_identchar(c) (isalnum(c) || (c) == '_')
-
-#line 200 "awkgram.c" /* yacc.c:339 */
+#line 198 "awkgram.c" /* yacc.c:339 */
# ifndef YY_NULLPTR
# if defined __cplusplus && 201103L <= __cplusplus
@@ -350,7 +348,7 @@ int yyparse (void);
/* Copy the second part of user declarations. */
-#line 354 "awkgram.c" /* yacc.c:358 */
+#line 352 "awkgram.c" /* yacc.c:358 */
#ifdef short
# undef short
@@ -652,25 +650,25 @@ static const yytype_uint8 yytranslate[] =
/* YYRLINE[YYN] -- Source line where rule number YYN was defined. */
static const yytype_uint16 yyrline[] =
{
- 0, 199, 199, 201, 206, 207, 213, 225, 229, 240,
- 246, 251, 259, 267, 269, 274, 282, 284, 290, 291,
- 293, 319, 330, 341, 347, 356, 366, 368, 370, 376,
- 381, 382, 386, 405, 404, 438, 440, 445, 446, 459,
- 464, 465, 469, 471, 473, 480, 570, 612, 654, 767,
- 774, 781, 791, 800, 809, 818, 829, 845, 844, 868,
- 880, 880, 978, 978, 1011, 1041, 1047, 1048, 1054, 1055,
- 1062, 1067, 1079, 1093, 1095, 1103, 1108, 1110, 1118, 1120,
- 1129, 1130, 1138, 1143, 1143, 1154, 1158, 1166, 1167, 1170,
- 1172, 1177, 1178, 1187, 1188, 1193, 1198, 1204, 1206, 1208,
- 1215, 1216, 1222, 1223, 1228, 1230, 1235, 1237, 1245, 1250,
- 1259, 1266, 1268, 1270, 1286, 1296, 1303, 1305, 1310, 1312,
- 1314, 1322, 1324, 1329, 1331, 1336, 1338, 1340, 1390, 1392,
- 1394, 1396, 1398, 1400, 1402, 1404, 1427, 1432, 1437, 1462,
- 1468, 1470, 1472, 1474, 1476, 1478, 1483, 1487, 1519, 1521,
- 1527, 1533, 1546, 1547, 1548, 1553, 1558, 1562, 1566, 1581,
- 1593, 1598, 1634, 1652, 1653, 1659, 1660, 1665, 1667, 1674,
- 1691, 1708, 1710, 1717, 1722, 1730, 1740, 1752, 1761, 1765,
- 1769, 1773, 1777, 1781, 1784, 1786, 1790, 1794, 1798
+ 0, 197, 197, 199, 204, 205, 211, 223, 227, 238,
+ 244, 249, 257, 265, 267, 272, 280, 282, 288, 289,
+ 291, 317, 328, 339, 345, 354, 364, 366, 368, 374,
+ 379, 380, 384, 403, 402, 436, 438, 443, 444, 457,
+ 462, 463, 467, 469, 471, 478, 568, 610, 652, 765,
+ 772, 779, 789, 798, 807, 816, 827, 843, 842, 866,
+ 878, 878, 976, 976, 1009, 1039, 1045, 1046, 1052, 1053,
+ 1060, 1065, 1077, 1091, 1093, 1101, 1106, 1108, 1116, 1118,
+ 1127, 1128, 1136, 1141, 1141, 1152, 1156, 1164, 1165, 1168,
+ 1170, 1175, 1176, 1185, 1186, 1191, 1196, 1202, 1204, 1206,
+ 1213, 1214, 1220, 1221, 1226, 1228, 1233, 1235, 1243, 1248,
+ 1257, 1264, 1266, 1268, 1284, 1294, 1301, 1303, 1308, 1310,
+ 1312, 1320, 1322, 1327, 1329, 1334, 1336, 1338, 1388, 1390,
+ 1392, 1394, 1396, 1398, 1400, 1402, 1416, 1421, 1426, 1451,
+ 1457, 1459, 1461, 1463, 1465, 1467, 1472, 1476, 1508, 1510,
+ 1516, 1522, 1535, 1536, 1537, 1542, 1547, 1551, 1555, 1570,
+ 1582, 1587, 1623, 1641, 1642, 1648, 1649, 1654, 1656, 1663,
+ 1680, 1697, 1699, 1706, 1711, 1719, 1729, 1741, 1750, 1754,
+ 1758, 1762, 1766, 1770, 1773, 1775, 1779, 1783, 1787
};
#endif
@@ -1843,26 +1841,26 @@ yyreduce:
switch (yyn)
{
case 3:
-#line 202 "awkgram.y" /* yacc.c:1646 */
+#line 200 "awkgram.y" /* yacc.c:1646 */
{
rule = 0;
yyerrok;
}
-#line 1852 "awkgram.c" /* yacc.c:1646 */
+#line 1850 "awkgram.c" /* yacc.c:1646 */
break;
case 5:
-#line 208 "awkgram.y" /* yacc.c:1646 */
+#line 206 "awkgram.y" /* yacc.c:1646 */
{
next_sourcefile();
if (sourcefile == srcfiles)
process_deferred();
}
-#line 1862 "awkgram.c" /* yacc.c:1646 */
+#line 1860 "awkgram.c" /* yacc.c:1646 */
break;
case 6:
-#line 214 "awkgram.y" /* yacc.c:1646 */
+#line 212 "awkgram.y" /* yacc.c:1646 */
{
rule = 0;
/*
@@ -1871,19 +1869,19 @@ yyreduce:
*/
/* yyerrok; */
}
-#line 1875 "awkgram.c" /* yacc.c:1646 */
+#line 1873 "awkgram.c" /* yacc.c:1646 */
break;
case 7:
-#line 226 "awkgram.y" /* yacc.c:1646 */
+#line 224 "awkgram.y" /* yacc.c:1646 */
{
(void) append_rule((yyvsp[-1]), (yyvsp[0]));
}
-#line 1883 "awkgram.c" /* yacc.c:1646 */
+#line 1881 "awkgram.c" /* yacc.c:1646 */
break;
case 8:
-#line 230 "awkgram.y" /* yacc.c:1646 */
+#line 228 "awkgram.y" /* yacc.c:1646 */
{
if (rule != Rule) {
msg(_("%s blocks must have an action part"), ruletab[rule]);
@@ -1894,39 +1892,39 @@ yyreduce:
} else /* pattern rule with non-empty pattern */
(void) append_rule((yyvsp[-1]), NULL);
}
-#line 1898 "awkgram.c" /* yacc.c:1646 */
+#line 1896 "awkgram.c" /* yacc.c:1646 */
break;
case 9:
-#line 241 "awkgram.y" /* yacc.c:1646 */
+#line 239 "awkgram.y" /* yacc.c:1646 */
{
in_function = NULL;
(void) mk_function((yyvsp[-1]), (yyvsp[0]));
yyerrok;
}
-#line 1908 "awkgram.c" /* yacc.c:1646 */
+#line 1906 "awkgram.c" /* yacc.c:1646 */
break;
case 10:
-#line 247 "awkgram.y" /* yacc.c:1646 */
+#line 245 "awkgram.y" /* yacc.c:1646 */
{
want_source = false;
yyerrok;
}
-#line 1917 "awkgram.c" /* yacc.c:1646 */
+#line 1915 "awkgram.c" /* yacc.c:1646 */
break;
case 11:
-#line 252 "awkgram.y" /* yacc.c:1646 */
+#line 250 "awkgram.y" /* yacc.c:1646 */
{
want_source = false;
yyerrok;
}
-#line 1926 "awkgram.c" /* yacc.c:1646 */
+#line 1924 "awkgram.c" /* yacc.c:1646 */
break;
case 12:
-#line 260 "awkgram.y" /* yacc.c:1646 */
+#line 258 "awkgram.y" /* yacc.c:1646 */
{
if (include_source((yyvsp[0])) < 0)
YYABORT;
@@ -1934,23 +1932,23 @@ yyreduce:
bcfree((yyvsp[0]));
(yyval) = NULL;
}
-#line 1938 "awkgram.c" /* yacc.c:1646 */
+#line 1936 "awkgram.c" /* yacc.c:1646 */
break;
case 13:
-#line 268 "awkgram.y" /* yacc.c:1646 */
+#line 266 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = NULL; }
-#line 1944 "awkgram.c" /* yacc.c:1646 */
+#line 1942 "awkgram.c" /* yacc.c:1646 */
break;
case 14:
-#line 270 "awkgram.y" /* yacc.c:1646 */
+#line 268 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = NULL; }
-#line 1950 "awkgram.c" /* yacc.c:1646 */
+#line 1948 "awkgram.c" /* yacc.c:1646 */
break;
case 15:
-#line 275 "awkgram.y" /* yacc.c:1646 */
+#line 273 "awkgram.y" /* yacc.c:1646 */
{
if (load_library((yyvsp[0])) < 0)
YYABORT;
@@ -1958,35 +1956,35 @@ yyreduce:
bcfree((yyvsp[0]));
(yyval) = NULL;
}
-#line 1962 "awkgram.c" /* yacc.c:1646 */
+#line 1960 "awkgram.c" /* yacc.c:1646 */
break;
case 16:
-#line 283 "awkgram.y" /* yacc.c:1646 */
+#line 281 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = NULL; }
-#line 1968 "awkgram.c" /* yacc.c:1646 */
+#line 1966 "awkgram.c" /* yacc.c:1646 */
break;
case 17:
-#line 285 "awkgram.y" /* yacc.c:1646 */
+#line 283 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = NULL; }
-#line 1974 "awkgram.c" /* yacc.c:1646 */
+#line 1972 "awkgram.c" /* yacc.c:1646 */
break;
case 18:
-#line 290 "awkgram.y" /* yacc.c:1646 */
+#line 288 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = NULL; rule = Rule; }
-#line 1980 "awkgram.c" /* yacc.c:1646 */
+#line 1978 "awkgram.c" /* yacc.c:1646 */
break;
case 19:
-#line 292 "awkgram.y" /* yacc.c:1646 */
+#line 290 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = (yyvsp[0]); rule = Rule; }
-#line 1986 "awkgram.c" /* yacc.c:1646 */
+#line 1984 "awkgram.c" /* yacc.c:1646 */
break;
case 20:
-#line 294 "awkgram.y" /* yacc.c:1646 */
+#line 292 "awkgram.y" /* yacc.c:1646 */
{
INSTRUCTION *tp;
@@ -2012,11 +2010,11 @@ yyreduce:
(yyval) = list_append(list_merge((yyvsp[-3]), (yyvsp[0])), tp);
rule = Rule;
}
-#line 2016 "awkgram.c" /* yacc.c:1646 */
+#line 2014 "awkgram.c" /* yacc.c:1646 */
break;
case 21:
-#line 320 "awkgram.y" /* yacc.c:1646 */
+#line 318 "awkgram.y" /* yacc.c:1646 */
{
static int begin_seen = 0;
if (do_lint_old && ++begin_seen == 2)
@@ -2027,11 +2025,11 @@ yyreduce:
(yyvsp[0])->source_file = source;
(yyval) = (yyvsp[0]);
}
-#line 2031 "awkgram.c" /* yacc.c:1646 */
+#line 2029 "awkgram.c" /* yacc.c:1646 */
break;
case 22:
-#line 331 "awkgram.y" /* yacc.c:1646 */
+#line 329 "awkgram.y" /* yacc.c:1646 */
{
static int end_seen = 0;
if (do_lint_old && ++end_seen == 2)
@@ -2042,70 +2040,70 @@ yyreduce:
(yyvsp[0])->source_file = source;
(yyval) = (yyvsp[0]);
}
-#line 2046 "awkgram.c" /* yacc.c:1646 */
+#line 2044 "awkgram.c" /* yacc.c:1646 */
break;
case 23:
-#line 342 "awkgram.y" /* yacc.c:1646 */
+#line 340 "awkgram.y" /* yacc.c:1646 */
{
(yyvsp[0])->in_rule = rule = BEGINFILE;
(yyvsp[0])->source_file = source;
(yyval) = (yyvsp[0]);
}
-#line 2056 "awkgram.c" /* yacc.c:1646 */
+#line 2054 "awkgram.c" /* yacc.c:1646 */
break;
case 24:
-#line 348 "awkgram.y" /* yacc.c:1646 */
+#line 346 "awkgram.y" /* yacc.c:1646 */
{
(yyvsp[0])->in_rule = rule = ENDFILE;
(yyvsp[0])->source_file = source;
(yyval) = (yyvsp[0]);
}
-#line 2066 "awkgram.c" /* yacc.c:1646 */
+#line 2064 "awkgram.c" /* yacc.c:1646 */
break;
case 25:
-#line 357 "awkgram.y" /* yacc.c:1646 */
+#line 355 "awkgram.y" /* yacc.c:1646 */
{
if ((yyvsp[-3]) == NULL)
(yyval) = list_create(instruction(Op_no_op));
else
(yyval) = (yyvsp[-3]);
}
-#line 2077 "awkgram.c" /* yacc.c:1646 */
+#line 2075 "awkgram.c" /* yacc.c:1646 */
break;
case 26:
-#line 367 "awkgram.y" /* yacc.c:1646 */
+#line 365 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = (yyvsp[0]); }
-#line 2083 "awkgram.c" /* yacc.c:1646 */
+#line 2081 "awkgram.c" /* yacc.c:1646 */
break;
case 27:
-#line 369 "awkgram.y" /* yacc.c:1646 */
+#line 367 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = (yyvsp[0]); }
-#line 2089 "awkgram.c" /* yacc.c:1646 */
+#line 2087 "awkgram.c" /* yacc.c:1646 */
break;
case 28:
-#line 371 "awkgram.y" /* yacc.c:1646 */
+#line 369 "awkgram.y" /* yacc.c:1646 */
{
yyerror(_("`%s' is a built-in function, it cannot be redefined"),
tokstart);
YYABORT;
}
-#line 2099 "awkgram.c" /* yacc.c:1646 */
+#line 2097 "awkgram.c" /* yacc.c:1646 */
break;
case 29:
-#line 377 "awkgram.y" /* yacc.c:1646 */
+#line 375 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = (yyvsp[0]); }
-#line 2105 "awkgram.c" /* yacc.c:1646 */
+#line 2103 "awkgram.c" /* yacc.c:1646 */
break;
case 32:
-#line 387 "awkgram.y" /* yacc.c:1646 */
+#line 385 "awkgram.y" /* yacc.c:1646 */
{
(yyvsp[-5])->source_file = source;
if (install_function((yyvsp[-4])->lextok, (yyvsp[-5]), (yyvsp[-2])) < 0)
@@ -2116,17 +2114,17 @@ yyreduce:
/* $4 already free'd in install_function */
(yyval) = (yyvsp[-5]);
}
-#line 2120 "awkgram.c" /* yacc.c:1646 */
+#line 2118 "awkgram.c" /* yacc.c:1646 */
break;
case 33:
-#line 405 "awkgram.y" /* yacc.c:1646 */
+#line 403 "awkgram.y" /* yacc.c:1646 */
{ want_regexp = true; }
-#line 2126 "awkgram.c" /* yacc.c:1646 */
+#line 2124 "awkgram.c" /* yacc.c:1646 */
break;
case 34:
-#line 407 "awkgram.y" /* yacc.c:1646 */
+#line 405 "awkgram.y" /* yacc.c:1646 */
{
NODE *n, *exp;
char *re;
@@ -2155,23 +2153,23 @@ yyreduce:
(yyval)->opcode = Op_match_rec;
(yyval)->memory = n;
}
-#line 2159 "awkgram.c" /* yacc.c:1646 */
+#line 2157 "awkgram.c" /* yacc.c:1646 */
break;
case 35:
-#line 439 "awkgram.y" /* yacc.c:1646 */
+#line 437 "awkgram.y" /* yacc.c:1646 */
{ bcfree((yyvsp[0])); }
-#line 2165 "awkgram.c" /* yacc.c:1646 */
+#line 2163 "awkgram.c" /* yacc.c:1646 */
break;
case 37:
-#line 445 "awkgram.y" /* yacc.c:1646 */
+#line 443 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = NULL; }
-#line 2171 "awkgram.c" /* yacc.c:1646 */
+#line 2169 "awkgram.c" /* yacc.c:1646 */
break;
case 38:
-#line 447 "awkgram.y" /* yacc.c:1646 */
+#line 445 "awkgram.y" /* yacc.c:1646 */
{
if ((yyvsp[0]) == NULL)
(yyval) = (yyvsp[-1]);
@@ -2184,40 +2182,40 @@ yyreduce:
}
yyerrok;
}
-#line 2188 "awkgram.c" /* yacc.c:1646 */
+#line 2186 "awkgram.c" /* yacc.c:1646 */
break;
case 39:
-#line 460 "awkgram.y" /* yacc.c:1646 */
+#line 458 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = NULL; }
-#line 2194 "awkgram.c" /* yacc.c:1646 */
+#line 2192 "awkgram.c" /* yacc.c:1646 */
break;
case 42:
-#line 470 "awkgram.y" /* yacc.c:1646 */
+#line 468 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = NULL; }
-#line 2200 "awkgram.c" /* yacc.c:1646 */
+#line 2198 "awkgram.c" /* yacc.c:1646 */
break;
case 43:
-#line 472 "awkgram.y" /* yacc.c:1646 */
+#line 470 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = (yyvsp[-1]); }
-#line 2206 "awkgram.c" /* yacc.c:1646 */
+#line 2204 "awkgram.c" /* yacc.c:1646 */
break;
case 44:
-#line 474 "awkgram.y" /* yacc.c:1646 */
+#line 472 "awkgram.y" /* yacc.c:1646 */
{
if (do_pretty_print)
(yyval) = list_prepend((yyvsp[0]), instruction(Op_exec_count));
else
(yyval) = (yyvsp[0]);
}
-#line 2217 "awkgram.c" /* yacc.c:1646 */
+#line 2215 "awkgram.c" /* yacc.c:1646 */
break;
case 45:
-#line 481 "awkgram.y" /* yacc.c:1646 */
+#line 479 "awkgram.y" /* yacc.c:1646 */
{
INSTRUCTION *dflt, *curr = NULL, *cexp, *cstmt;
INSTRUCTION *ip, *nextc, *tbreak;
@@ -2307,11 +2305,11 @@ yyreduce:
break_allowed--;
fix_break_continue(ip, tbreak, NULL);
}
-#line 2311 "awkgram.c" /* yacc.c:1646 */
+#line 2309 "awkgram.c" /* yacc.c:1646 */
break;
case 46:
-#line 571 "awkgram.y" /* yacc.c:1646 */
+#line 569 "awkgram.y" /* yacc.c:1646 */
{
/*
* -----------------
@@ -2353,11 +2351,11 @@ yyreduce:
continue_allowed--;
fix_break_continue(ip, tbreak, tcont);
}
-#line 2357 "awkgram.c" /* yacc.c:1646 */
+#line 2355 "awkgram.c" /* yacc.c:1646 */
break;
case 47:
-#line 613 "awkgram.y" /* yacc.c:1646 */
+#line 611 "awkgram.y" /* yacc.c:1646 */
{
/*
* -----------------
@@ -2399,11 +2397,11 @@ yyreduce:
} /* else
$1 and $4 are NULLs */
}
-#line 2403 "awkgram.c" /* yacc.c:1646 */
+#line 2401 "awkgram.c" /* yacc.c:1646 */
break;
case 48:
-#line 655 "awkgram.y" /* yacc.c:1646 */
+#line 653 "awkgram.y" /* yacc.c:1646 */
{
INSTRUCTION *ip;
char *var_name = (yyvsp[-5])->lextok;
@@ -2516,44 +2514,44 @@ regular_loop:
break_allowed--;
continue_allowed--;
}
-#line 2520 "awkgram.c" /* yacc.c:1646 */
+#line 2518 "awkgram.c" /* yacc.c:1646 */
break;
case 49:
-#line 768 "awkgram.y" /* yacc.c:1646 */
+#line 766 "awkgram.y" /* yacc.c:1646 */
{
(yyval) = mk_for_loop((yyvsp[-11]), (yyvsp[-9]), (yyvsp[-6]), (yyvsp[-3]), (yyvsp[0]));
break_allowed--;
continue_allowed--;
}
-#line 2531 "awkgram.c" /* yacc.c:1646 */
+#line 2529 "awkgram.c" /* yacc.c:1646 */
break;
case 50:
-#line 775 "awkgram.y" /* yacc.c:1646 */
+#line 773 "awkgram.y" /* yacc.c:1646 */
{
(yyval) = mk_for_loop((yyvsp[-10]), (yyvsp[-8]), (INSTRUCTION *) NULL, (yyvsp[-3]), (yyvsp[0]));
break_allowed--;
continue_allowed--;
}
-#line 2542 "awkgram.c" /* yacc.c:1646 */
+#line 2540 "awkgram.c" /* yacc.c:1646 */
break;
case 51:
-#line 782 "awkgram.y" /* yacc.c:1646 */
+#line 780 "awkgram.y" /* yacc.c:1646 */
{
if (do_pretty_print)
(yyval) = list_prepend((yyvsp[0]), instruction(Op_exec_count));
else
(yyval) = (yyvsp[0]);
}
-#line 2553 "awkgram.c" /* yacc.c:1646 */
+#line 2551 "awkgram.c" /* yacc.c:1646 */
break;
case 52:
-#line 792 "awkgram.y" /* yacc.c:1646 */
+#line 790 "awkgram.y" /* yacc.c:1646 */
{
if (! break_allowed)
error_ln((yyvsp[-1])->source_line,
@@ -2562,11 +2560,11 @@ regular_loop:
(yyval) = list_create((yyvsp[-1]));
}
-#line 2566 "awkgram.c" /* yacc.c:1646 */
+#line 2564 "awkgram.c" /* yacc.c:1646 */
break;
case 53:
-#line 801 "awkgram.y" /* yacc.c:1646 */
+#line 799 "awkgram.y" /* yacc.c:1646 */
{
if (! continue_allowed)
error_ln((yyvsp[-1])->source_line,
@@ -2575,11 +2573,11 @@ regular_loop:
(yyval) = list_create((yyvsp[-1]));
}
-#line 2579 "awkgram.c" /* yacc.c:1646 */
+#line 2577 "awkgram.c" /* yacc.c:1646 */
break;
case 54:
-#line 810 "awkgram.y" /* yacc.c:1646 */
+#line 808 "awkgram.y" /* yacc.c:1646 */
{
/* if inside function (rule = 0), resolve context at run-time */
if (rule && rule != Rule)
@@ -2588,11 +2586,11 @@ regular_loop:
(yyvsp[-1])->target_jmp = ip_rec;
(yyval) = list_create((yyvsp[-1]));
}
-#line 2592 "awkgram.c" /* yacc.c:1646 */
+#line 2590 "awkgram.c" /* yacc.c:1646 */
break;
case 55:
-#line 819 "awkgram.y" /* yacc.c:1646 */
+#line 817 "awkgram.y" /* yacc.c:1646 */
{
/* if inside function (rule = 0), resolve context at run-time */
if (rule == BEGIN || rule == END || rule == ENDFILE)
@@ -2603,11 +2601,11 @@ regular_loop:
(yyvsp[-1])->target_endfile = ip_endfile;
(yyval) = list_create((yyvsp[-1]));
}
-#line 2607 "awkgram.c" /* yacc.c:1646 */
+#line 2605 "awkgram.c" /* yacc.c:1646 */
break;
case 56:
-#line 830 "awkgram.y" /* yacc.c:1646 */
+#line 828 "awkgram.y" /* yacc.c:1646 */
{
/* Initialize the two possible jump targets, the actual target
* is resolved at run-time.
@@ -2622,20 +2620,20 @@ regular_loop:
} else
(yyval) = list_append((yyvsp[-1]), (yyvsp[-2]));
}
-#line 2626 "awkgram.c" /* yacc.c:1646 */
+#line 2624 "awkgram.c" /* yacc.c:1646 */
break;
case 57:
-#line 845 "awkgram.y" /* yacc.c:1646 */
+#line 843 "awkgram.y" /* yacc.c:1646 */
{
if (! in_function)
yyerror(_("`return' used outside function context"));
}
-#line 2635 "awkgram.c" /* yacc.c:1646 */
+#line 2633 "awkgram.c" /* yacc.c:1646 */
break;
case 58:
-#line 848 "awkgram.y" /* yacc.c:1646 */
+#line 846 "awkgram.y" /* yacc.c:1646 */
{
if ((yyvsp[-1]) == NULL) {
(yyval) = list_create((yyvsp[-3]));
@@ -2656,17 +2654,17 @@ regular_loop:
(yyval) = list_append((yyvsp[-1]), (yyvsp[-3]));
}
}
-#line 2660 "awkgram.c" /* yacc.c:1646 */
+#line 2658 "awkgram.c" /* yacc.c:1646 */
break;
case 60:
-#line 880 "awkgram.y" /* yacc.c:1646 */
+#line 878 "awkgram.y" /* yacc.c:1646 */
{ in_print = true; in_parens = 0; }
-#line 2666 "awkgram.c" /* yacc.c:1646 */
+#line 2664 "awkgram.c" /* yacc.c:1646 */
break;
case 61:
-#line 881 "awkgram.y" /* yacc.c:1646 */
+#line 879 "awkgram.y" /* yacc.c:1646 */
{
/*
* Optimization: plain `print' has no expression list, so $3 is null.
@@ -2763,17 +2761,17 @@ regular_print:
}
}
}
-#line 2767 "awkgram.c" /* yacc.c:1646 */
+#line 2765 "awkgram.c" /* yacc.c:1646 */
break;
case 62:
-#line 978 "awkgram.y" /* yacc.c:1646 */
+#line 976 "awkgram.y" /* yacc.c:1646 */
{ sub_counter = 0; }
-#line 2773 "awkgram.c" /* yacc.c:1646 */
+#line 2771 "awkgram.c" /* yacc.c:1646 */
break;
case 63:
-#line 979 "awkgram.y" /* yacc.c:1646 */
+#line 977 "awkgram.y" /* yacc.c:1646 */
{
char *arr = (yyvsp[-2])->lextok;
@@ -2806,11 +2804,11 @@ regular_print:
(yyval) = list_append(list_append((yyvsp[0]), (yyvsp[-2])), (yyvsp[-3]));
}
}
-#line 2810 "awkgram.c" /* yacc.c:1646 */
+#line 2808 "awkgram.c" /* yacc.c:1646 */
break;
case 64:
-#line 1016 "awkgram.y" /* yacc.c:1646 */
+#line 1014 "awkgram.y" /* yacc.c:1646 */
{
static bool warned = false;
char *arr = (yyvsp[-1])->lextok;
@@ -2836,52 +2834,52 @@ regular_print:
fatal(_("`delete' is not allowed with FUNCTAB"));
}
}
-#line 2840 "awkgram.c" /* yacc.c:1646 */
+#line 2838 "awkgram.c" /* yacc.c:1646 */
break;
case 65:
-#line 1042 "awkgram.y" /* yacc.c:1646 */
+#line 1040 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = optimize_assignment((yyvsp[0])); }
-#line 2846 "awkgram.c" /* yacc.c:1646 */
+#line 2844 "awkgram.c" /* yacc.c:1646 */
break;
case 66:
-#line 1047 "awkgram.y" /* yacc.c:1646 */
+#line 1045 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = NULL; }
-#line 2852 "awkgram.c" /* yacc.c:1646 */
+#line 2850 "awkgram.c" /* yacc.c:1646 */
break;
case 67:
-#line 1049 "awkgram.y" /* yacc.c:1646 */
+#line 1047 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = (yyvsp[0]); }
-#line 2858 "awkgram.c" /* yacc.c:1646 */
+#line 2856 "awkgram.c" /* yacc.c:1646 */
break;
case 68:
-#line 1054 "awkgram.y" /* yacc.c:1646 */
+#line 1052 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = NULL; }
-#line 2864 "awkgram.c" /* yacc.c:1646 */
+#line 2862 "awkgram.c" /* yacc.c:1646 */
break;
case 69:
-#line 1056 "awkgram.y" /* yacc.c:1646 */
+#line 1054 "awkgram.y" /* yacc.c:1646 */
{
if ((yyvsp[-1]) == NULL)
(yyval) = list_create((yyvsp[0]));
else
(yyval) = list_prepend((yyvsp[-1]), (yyvsp[0]));
}
-#line 2875 "awkgram.c" /* yacc.c:1646 */
+#line 2873 "awkgram.c" /* yacc.c:1646 */
break;
case 70:
-#line 1063 "awkgram.y" /* yacc.c:1646 */
+#line 1061 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = NULL; }
-#line 2881 "awkgram.c" /* yacc.c:1646 */
+#line 2879 "awkgram.c" /* yacc.c:1646 */
break;
case 71:
-#line 1068 "awkgram.y" /* yacc.c:1646 */
+#line 1066 "awkgram.y" /* yacc.c:1646 */
{
INSTRUCTION *casestmt = (yyvsp[0]);
if ((yyvsp[0]) == NULL)
@@ -2893,11 +2891,11 @@ regular_print:
bcfree((yyvsp[-2]));
(yyval) = (yyvsp[-4]);
}
-#line 2897 "awkgram.c" /* yacc.c:1646 */
+#line 2895 "awkgram.c" /* yacc.c:1646 */
break;
case 72:
-#line 1080 "awkgram.y" /* yacc.c:1646 */
+#line 1078 "awkgram.y" /* yacc.c:1646 */
{
INSTRUCTION *casestmt = (yyvsp[0]);
if ((yyvsp[0]) == NULL)
@@ -2908,17 +2906,17 @@ regular_print:
(yyvsp[-3])->case_stmt = casestmt;
(yyval) = (yyvsp[-3]);
}
-#line 2912 "awkgram.c" /* yacc.c:1646 */
+#line 2910 "awkgram.c" /* yacc.c:1646 */
break;
case 73:
-#line 1094 "awkgram.y" /* yacc.c:1646 */
+#line 1092 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = (yyvsp[0]); }
-#line 2918 "awkgram.c" /* yacc.c:1646 */
+#line 2916 "awkgram.c" /* yacc.c:1646 */
break;
case 74:
-#line 1096 "awkgram.y" /* yacc.c:1646 */
+#line 1094 "awkgram.y" /* yacc.c:1646 */
{
NODE *n = (yyvsp[0])->memory;
(void) force_number(n);
@@ -2926,71 +2924,71 @@ regular_print:
bcfree((yyvsp[-1]));
(yyval) = (yyvsp[0]);
}
-#line 2930 "awkgram.c" /* yacc.c:1646 */
+#line 2928 "awkgram.c" /* yacc.c:1646 */
break;
case 75:
-#line 1104 "awkgram.y" /* yacc.c:1646 */
+#line 1102 "awkgram.y" /* yacc.c:1646 */
{
bcfree((yyvsp[-1]));
(yyval) = (yyvsp[0]);
}
-#line 2939 "awkgram.c" /* yacc.c:1646 */
+#line 2937 "awkgram.c" /* yacc.c:1646 */
break;
case 76:
-#line 1109 "awkgram.y" /* yacc.c:1646 */
+#line 1107 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = (yyvsp[0]); }
-#line 2945 "awkgram.c" /* yacc.c:1646 */
+#line 2943 "awkgram.c" /* yacc.c:1646 */
break;
case 77:
-#line 1111 "awkgram.y" /* yacc.c:1646 */
+#line 1109 "awkgram.y" /* yacc.c:1646 */
{
(yyvsp[0])->opcode = Op_push_re;
(yyval) = (yyvsp[0]);
}
-#line 2954 "awkgram.c" /* yacc.c:1646 */
+#line 2952 "awkgram.c" /* yacc.c:1646 */
break;
case 78:
-#line 1119 "awkgram.y" /* yacc.c:1646 */
+#line 1117 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = (yyvsp[0]); }
-#line 2960 "awkgram.c" /* yacc.c:1646 */
+#line 2958 "awkgram.c" /* yacc.c:1646 */
break;
case 79:
-#line 1121 "awkgram.y" /* yacc.c:1646 */
+#line 1119 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = (yyvsp[0]); }
-#line 2966 "awkgram.c" /* yacc.c:1646 */
+#line 2964 "awkgram.c" /* yacc.c:1646 */
break;
case 81:
-#line 1131 "awkgram.y" /* yacc.c:1646 */
+#line 1129 "awkgram.y" /* yacc.c:1646 */
{
(yyval) = (yyvsp[-1]);
}
-#line 2974 "awkgram.c" /* yacc.c:1646 */
+#line 2972 "awkgram.c" /* yacc.c:1646 */
break;
case 82:
-#line 1138 "awkgram.y" /* yacc.c:1646 */
+#line 1136 "awkgram.y" /* yacc.c:1646 */
{
in_print = false;
in_parens = 0;
(yyval) = NULL;
}
-#line 2984 "awkgram.c" /* yacc.c:1646 */
+#line 2982 "awkgram.c" /* yacc.c:1646 */
break;
case 83:
-#line 1143 "awkgram.y" /* yacc.c:1646 */
+#line 1141 "awkgram.y" /* yacc.c:1646 */
{ in_print = false; in_parens = 0; }
-#line 2990 "awkgram.c" /* yacc.c:1646 */
+#line 2988 "awkgram.c" /* yacc.c:1646 */
break;
case 84:
-#line 1144 "awkgram.y" /* yacc.c:1646 */
+#line 1142 "awkgram.y" /* yacc.c:1646 */
{
if ((yyvsp[-2])->redir_type == redirect_twoway
&& (yyvsp[0])->lasti->opcode == Op_K_getline_redir
@@ -2998,136 +2996,136 @@ regular_print:
yyerror(_("multistage two-way pipelines don't work"));
(yyval) = list_prepend((yyvsp[0]), (yyvsp[-2]));
}
-#line 3002 "awkgram.c" /* yacc.c:1646 */
+#line 3000 "awkgram.c" /* yacc.c:1646 */
break;
case 85:
-#line 1155 "awkgram.y" /* yacc.c:1646 */
+#line 1153 "awkgram.y" /* yacc.c:1646 */
{
(yyval) = mk_condition((yyvsp[-3]), (yyvsp[-5]), (yyvsp[0]), NULL, NULL);
}
-#line 3010 "awkgram.c" /* yacc.c:1646 */
+#line 3008 "awkgram.c" /* yacc.c:1646 */
break;
case 86:
-#line 1160 "awkgram.y" /* yacc.c:1646 */
+#line 1158 "awkgram.y" /* yacc.c:1646 */
{
(yyval) = mk_condition((yyvsp[-6]), (yyvsp[-8]), (yyvsp[-3]), (yyvsp[-2]), (yyvsp[0]));
}
-#line 3018 "awkgram.c" /* yacc.c:1646 */
+#line 3016 "awkgram.c" /* yacc.c:1646 */
break;
case 91:
-#line 1177 "awkgram.y" /* yacc.c:1646 */
+#line 1175 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = NULL; }
-#line 3024 "awkgram.c" /* yacc.c:1646 */
+#line 3022 "awkgram.c" /* yacc.c:1646 */
break;
case 92:
-#line 1179 "awkgram.y" /* yacc.c:1646 */
+#line 1177 "awkgram.y" /* yacc.c:1646 */
{
bcfree((yyvsp[-1]));
(yyval) = (yyvsp[0]);
}
-#line 3033 "awkgram.c" /* yacc.c:1646 */
+#line 3031 "awkgram.c" /* yacc.c:1646 */
break;
case 93:
-#line 1187 "awkgram.y" /* yacc.c:1646 */
+#line 1185 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = NULL; }
-#line 3039 "awkgram.c" /* yacc.c:1646 */
+#line 3037 "awkgram.c" /* yacc.c:1646 */
break;
case 94:
-#line 1189 "awkgram.y" /* yacc.c:1646 */
+#line 1187 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = (yyvsp[0]) ; }
-#line 3045 "awkgram.c" /* yacc.c:1646 */
+#line 3043 "awkgram.c" /* yacc.c:1646 */
break;
case 95:
-#line 1194 "awkgram.y" /* yacc.c:1646 */
+#line 1192 "awkgram.y" /* yacc.c:1646 */
{
(yyvsp[0])->param_count = 0;
(yyval) = list_create((yyvsp[0]));
}
-#line 3054 "awkgram.c" /* yacc.c:1646 */
+#line 3052 "awkgram.c" /* yacc.c:1646 */
break;
case 96:
-#line 1199 "awkgram.y" /* yacc.c:1646 */
+#line 1197 "awkgram.y" /* yacc.c:1646 */
{
(yyvsp[0])->param_count = (yyvsp[-2])->lasti->param_count + 1;
(yyval) = list_append((yyvsp[-2]), (yyvsp[0]));
yyerrok;
}
-#line 3064 "awkgram.c" /* yacc.c:1646 */
+#line 3062 "awkgram.c" /* yacc.c:1646 */
break;
case 97:
-#line 1205 "awkgram.y" /* yacc.c:1646 */
+#line 1203 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = NULL; }
-#line 3070 "awkgram.c" /* yacc.c:1646 */
+#line 3068 "awkgram.c" /* yacc.c:1646 */
break;
case 98:
-#line 1207 "awkgram.y" /* yacc.c:1646 */
+#line 1205 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = (yyvsp[-1]); }
-#line 3076 "awkgram.c" /* yacc.c:1646 */
+#line 3074 "awkgram.c" /* yacc.c:1646 */
break;
case 99:
-#line 1209 "awkgram.y" /* yacc.c:1646 */
+#line 1207 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = (yyvsp[-2]); }
-#line 3082 "awkgram.c" /* yacc.c:1646 */
+#line 3080 "awkgram.c" /* yacc.c:1646 */
break;
case 100:
-#line 1215 "awkgram.y" /* yacc.c:1646 */
+#line 1213 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = NULL; }
-#line 3088 "awkgram.c" /* yacc.c:1646 */
+#line 3086 "awkgram.c" /* yacc.c:1646 */
break;
case 101:
-#line 1217 "awkgram.y" /* yacc.c:1646 */
+#line 1215 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = (yyvsp[0]); }
-#line 3094 "awkgram.c" /* yacc.c:1646 */
+#line 3092 "awkgram.c" /* yacc.c:1646 */
break;
case 102:
-#line 1222 "awkgram.y" /* yacc.c:1646 */
+#line 1220 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = NULL; }
-#line 3100 "awkgram.c" /* yacc.c:1646 */
+#line 3098 "awkgram.c" /* yacc.c:1646 */
break;
case 103:
-#line 1224 "awkgram.y" /* yacc.c:1646 */
+#line 1222 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = (yyvsp[0]); }
-#line 3106 "awkgram.c" /* yacc.c:1646 */
+#line 3104 "awkgram.c" /* yacc.c:1646 */
break;
case 104:
-#line 1229 "awkgram.y" /* yacc.c:1646 */
+#line 1227 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = mk_expression_list(NULL, (yyvsp[0])); }
-#line 3112 "awkgram.c" /* yacc.c:1646 */
+#line 3110 "awkgram.c" /* yacc.c:1646 */
break;
case 105:
-#line 1231 "awkgram.y" /* yacc.c:1646 */
+#line 1229 "awkgram.y" /* yacc.c:1646 */
{
(yyval) = mk_expression_list((yyvsp[-2]), (yyvsp[0]));
yyerrok;
}
-#line 3121 "awkgram.c" /* yacc.c:1646 */
+#line 3119 "awkgram.c" /* yacc.c:1646 */
break;
case 106:
-#line 1236 "awkgram.y" /* yacc.c:1646 */
+#line 1234 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = NULL; }
-#line 3127 "awkgram.c" /* yacc.c:1646 */
+#line 3125 "awkgram.c" /* yacc.c:1646 */
break;
case 107:
-#line 1238 "awkgram.y" /* yacc.c:1646 */
+#line 1236 "awkgram.y" /* yacc.c:1646 */
{
/*
* Returning the expression list instead of NULL lets
@@ -3135,52 +3133,52 @@ regular_print:
*/
(yyval) = (yyvsp[-1]);
}
-#line 3139 "awkgram.c" /* yacc.c:1646 */
+#line 3137 "awkgram.c" /* yacc.c:1646 */
break;
case 108:
-#line 1246 "awkgram.y" /* yacc.c:1646 */
+#line 1244 "awkgram.y" /* yacc.c:1646 */
{
/* Ditto */
(yyval) = mk_expression_list((yyvsp[-2]), (yyvsp[0]));
}
-#line 3148 "awkgram.c" /* yacc.c:1646 */
+#line 3146 "awkgram.c" /* yacc.c:1646 */
break;
case 109:
-#line 1251 "awkgram.y" /* yacc.c:1646 */
+#line 1249 "awkgram.y" /* yacc.c:1646 */
{
/* Ditto */
(yyval) = (yyvsp[-2]);
}
-#line 3157 "awkgram.c" /* yacc.c:1646 */
+#line 3155 "awkgram.c" /* yacc.c:1646 */
break;
case 110:
-#line 1260 "awkgram.y" /* yacc.c:1646 */
+#line 1258 "awkgram.y" /* yacc.c:1646 */
{
if (do_lint && (yyvsp[0])->lasti->opcode == Op_match_rec)
lintwarn_ln((yyvsp[-1])->source_line,
_("regular expression on right of assignment"));
(yyval) = mk_assignment((yyvsp[-2]), (yyvsp[0]), (yyvsp[-1]));
}
-#line 3168 "awkgram.c" /* yacc.c:1646 */
+#line 3166 "awkgram.c" /* yacc.c:1646 */
break;
case 111:
-#line 1267 "awkgram.y" /* yacc.c:1646 */
+#line 1265 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = mk_boolean((yyvsp[-2]), (yyvsp[0]), (yyvsp[-1])); }
-#line 3174 "awkgram.c" /* yacc.c:1646 */
+#line 3172 "awkgram.c" /* yacc.c:1646 */
break;
case 112:
-#line 1269 "awkgram.y" /* yacc.c:1646 */
+#line 1267 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = mk_boolean((yyvsp[-2]), (yyvsp[0]), (yyvsp[-1])); }
-#line 3180 "awkgram.c" /* yacc.c:1646 */
+#line 3178 "awkgram.c" /* yacc.c:1646 */
break;
case 113:
-#line 1271 "awkgram.y" /* yacc.c:1646 */
+#line 1269 "awkgram.y" /* yacc.c:1646 */
{
if ((yyvsp[-2])->lasti->opcode == Op_match_rec)
warning_ln((yyvsp[-1])->source_line,
@@ -3196,11 +3194,11 @@ regular_print:
(yyval) = list_append(list_merge((yyvsp[-2]), (yyvsp[0])), (yyvsp[-1]));
}
}
-#line 3200 "awkgram.c" /* yacc.c:1646 */
+#line 3198 "awkgram.c" /* yacc.c:1646 */
break;
case 114:
-#line 1287 "awkgram.y" /* yacc.c:1646 */
+#line 1285 "awkgram.y" /* yacc.c:1646 */
{
if (do_lint_old)
warning_ln((yyvsp[-1])->source_line,
@@ -3210,91 +3208,91 @@ regular_print:
(yyvsp[-1])->expr_count = 1;
(yyval) = list_append(list_merge((yyvsp[-2]), (yyvsp[0])), (yyvsp[-1]));
}
-#line 3214 "awkgram.c" /* yacc.c:1646 */
+#line 3212 "awkgram.c" /* yacc.c:1646 */
break;
case 115:
-#line 1297 "awkgram.y" /* yacc.c:1646 */
+#line 1295 "awkgram.y" /* yacc.c:1646 */
{
if (do_lint && (yyvsp[0])->lasti->opcode == Op_match_rec)
lintwarn_ln((yyvsp[-1])->source_line,
_("regular expression on right of comparison"));
(yyval) = list_append(list_merge((yyvsp[-2]), (yyvsp[0])), (yyvsp[-1]));
}
-#line 3225 "awkgram.c" /* yacc.c:1646 */
+#line 3223 "awkgram.c" /* yacc.c:1646 */
break;
case 116:
-#line 1304 "awkgram.y" /* yacc.c:1646 */
+#line 1302 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = mk_condition((yyvsp[-4]), (yyvsp[-3]), (yyvsp[-2]), (yyvsp[-1]), (yyvsp[0])); }
-#line 3231 "awkgram.c" /* yacc.c:1646 */
+#line 3229 "awkgram.c" /* yacc.c:1646 */
break;
case 117:
-#line 1306 "awkgram.y" /* yacc.c:1646 */
+#line 1304 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = (yyvsp[0]); }
-#line 3237 "awkgram.c" /* yacc.c:1646 */
+#line 3235 "awkgram.c" /* yacc.c:1646 */
break;
case 118:
-#line 1311 "awkgram.y" /* yacc.c:1646 */
+#line 1309 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = (yyvsp[0]); }
-#line 3243 "awkgram.c" /* yacc.c:1646 */
+#line 3241 "awkgram.c" /* yacc.c:1646 */
break;
case 119:
-#line 1313 "awkgram.y" /* yacc.c:1646 */
+#line 1311 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = (yyvsp[0]); }
-#line 3249 "awkgram.c" /* yacc.c:1646 */
+#line 3247 "awkgram.c" /* yacc.c:1646 */
break;
case 120:
-#line 1315 "awkgram.y" /* yacc.c:1646 */
+#line 1313 "awkgram.y" /* yacc.c:1646 */
{
(yyvsp[0])->opcode = Op_assign_quotient;
(yyval) = (yyvsp[0]);
}
-#line 3258 "awkgram.c" /* yacc.c:1646 */
+#line 3256 "awkgram.c" /* yacc.c:1646 */
break;
case 121:
-#line 1323 "awkgram.y" /* yacc.c:1646 */
+#line 1321 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = (yyvsp[0]); }
-#line 3264 "awkgram.c" /* yacc.c:1646 */
+#line 3262 "awkgram.c" /* yacc.c:1646 */
break;
case 122:
-#line 1325 "awkgram.y" /* yacc.c:1646 */
+#line 1323 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = (yyvsp[0]); }
-#line 3270 "awkgram.c" /* yacc.c:1646 */
+#line 3268 "awkgram.c" /* yacc.c:1646 */
break;
case 123:
-#line 1330 "awkgram.y" /* yacc.c:1646 */
+#line 1328 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = (yyvsp[0]); }
-#line 3276 "awkgram.c" /* yacc.c:1646 */
+#line 3274 "awkgram.c" /* yacc.c:1646 */
break;
case 124:
-#line 1332 "awkgram.y" /* yacc.c:1646 */
+#line 1330 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = (yyvsp[0]); }
-#line 3282 "awkgram.c" /* yacc.c:1646 */
+#line 3280 "awkgram.c" /* yacc.c:1646 */
break;
case 125:
-#line 1337 "awkgram.y" /* yacc.c:1646 */
+#line 1335 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = (yyvsp[0]); }
-#line 3288 "awkgram.c" /* yacc.c:1646 */
+#line 3286 "awkgram.c" /* yacc.c:1646 */
break;
case 126:
-#line 1339 "awkgram.y" /* yacc.c:1646 */
+#line 1337 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = (yyvsp[0]); }
-#line 3294 "awkgram.c" /* yacc.c:1646 */
+#line 3292 "awkgram.c" /* yacc.c:1646 */
break;
case 127:
-#line 1341 "awkgram.y" /* yacc.c:1646 */
+#line 1339 "awkgram.y" /* yacc.c:1646 */
{
int count = 2;
bool is_simple_var = false;
@@ -3341,92 +3339,83 @@ regular_print:
max_args = count;
}
}
-#line 3345 "awkgram.c" /* yacc.c:1646 */
+#line 3343 "awkgram.c" /* yacc.c:1646 */
break;
case 129:
-#line 1393 "awkgram.y" /* yacc.c:1646 */
+#line 1391 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = list_append(list_merge((yyvsp[-2]), (yyvsp[0])), (yyvsp[-1])); }
-#line 3351 "awkgram.c" /* yacc.c:1646 */
+#line 3349 "awkgram.c" /* yacc.c:1646 */
break;
case 130:
-#line 1395 "awkgram.y" /* yacc.c:1646 */
+#line 1393 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = list_append(list_merge((yyvsp[-2]), (yyvsp[0])), (yyvsp[-1])); }
-#line 3357 "awkgram.c" /* yacc.c:1646 */
+#line 3355 "awkgram.c" /* yacc.c:1646 */
break;
case 131:
-#line 1397 "awkgram.y" /* yacc.c:1646 */
+#line 1395 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = list_append(list_merge((yyvsp[-2]), (yyvsp[0])), (yyvsp[-1])); }
-#line 3363 "awkgram.c" /* yacc.c:1646 */
+#line 3361 "awkgram.c" /* yacc.c:1646 */
break;
case 132:
-#line 1399 "awkgram.y" /* yacc.c:1646 */
+#line 1397 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = list_append(list_merge((yyvsp[-2]), (yyvsp[0])), (yyvsp[-1])); }
-#line 3369 "awkgram.c" /* yacc.c:1646 */
+#line 3367 "awkgram.c" /* yacc.c:1646 */
break;
case 133:
-#line 1401 "awkgram.y" /* yacc.c:1646 */
+#line 1399 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = list_append(list_merge((yyvsp[-2]), (yyvsp[0])), (yyvsp[-1])); }
-#line 3375 "awkgram.c" /* yacc.c:1646 */
+#line 3373 "awkgram.c" /* yacc.c:1646 */
break;
case 134:
-#line 1403 "awkgram.y" /* yacc.c:1646 */
+#line 1401 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = list_append(list_merge((yyvsp[-2]), (yyvsp[0])), (yyvsp[-1])); }
-#line 3381 "awkgram.c" /* yacc.c:1646 */
+#line 3379 "awkgram.c" /* yacc.c:1646 */
break;
case 135:
-#line 1405 "awkgram.y" /* yacc.c:1646 */
+#line 1403 "awkgram.y" /* yacc.c:1646 */
{
/*
- * In BEGINFILE/ENDFILE, allow `getline var < file'
+ * In BEGINFILE/ENDFILE, allow `getline [var] < file'
*/
- if (rule == BEGINFILE || rule == ENDFILE) {
- if ((yyvsp[-1]) != NULL && (yyvsp[0]) != NULL)
- ; /* all ok */
- else {
- if ((yyvsp[-1]) != NULL)
- error_ln((yyvsp[-2])->source_line,
- _("`getline var' invalid inside `%s' rule"), ruletab[rule]);
- else
- error_ln((yyvsp[-2])->source_line,
- _("`getline' invalid inside `%s' rule"), ruletab[rule]);
- }
- }
+ if ((rule == BEGINFILE || rule == ENDFILE) && (yyvsp[0]) == NULL)
+ error_ln((yyvsp[-2])->source_line,
+ _("non-redirected `getline' invalid inside `%s' rule"), ruletab[rule]);
if (do_lint && rule == END && (yyvsp[0]) == NULL)
lintwarn_ln((yyvsp[-2])->source_line,
_("non-redirected `getline' undefined inside END action"));
(yyval) = mk_getline((yyvsp[-2]), (yyvsp[-1]), (yyvsp[0]), redirect_input);
}
-#line 3408 "awkgram.c" /* yacc.c:1646 */
+#line 3397 "awkgram.c" /* yacc.c:1646 */
break;
case 136:
-#line 1428 "awkgram.y" /* yacc.c:1646 */
+#line 1417 "awkgram.y" /* yacc.c:1646 */
{
(yyvsp[0])->opcode = Op_postincrement;
(yyval) = mk_assignment((yyvsp[-1]), NULL, (yyvsp[0]));
}
-#line 3417 "awkgram.c" /* yacc.c:1646 */
+#line 3406 "awkgram.c" /* yacc.c:1646 */
break;
case 137:
-#line 1433 "awkgram.y" /* yacc.c:1646 */
+#line 1422 "awkgram.y" /* yacc.c:1646 */
{
(yyvsp[0])->opcode = Op_postdecrement;
(yyval) = mk_assignment((yyvsp[-1]), NULL, (yyvsp[0]));
}
-#line 3426 "awkgram.c" /* yacc.c:1646 */
+#line 3415 "awkgram.c" /* yacc.c:1646 */
break;
case 138:
-#line 1438 "awkgram.y" /* yacc.c:1646 */
+#line 1427 "awkgram.y" /* yacc.c:1646 */
{
if (do_lint_old) {
warning_ln((yyvsp[-1])->source_line,
@@ -3446,64 +3435,64 @@ regular_print:
(yyval) = list_append(list_merge(t, (yyvsp[0])), (yyvsp[-1]));
}
}
-#line 3450 "awkgram.c" /* yacc.c:1646 */
+#line 3439 "awkgram.c" /* yacc.c:1646 */
break;
case 139:
-#line 1463 "awkgram.y" /* yacc.c:1646 */
+#line 1452 "awkgram.y" /* yacc.c:1646 */
{
(yyval) = mk_getline((yyvsp[-1]), (yyvsp[0]), (yyvsp[-3]), (yyvsp[-2])->redir_type);
bcfree((yyvsp[-2]));
}
-#line 3459 "awkgram.c" /* yacc.c:1646 */
+#line 3448 "awkgram.c" /* yacc.c:1646 */
break;
case 140:
-#line 1469 "awkgram.y" /* yacc.c:1646 */
+#line 1458 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = list_append(list_merge((yyvsp[-2]), (yyvsp[0])), (yyvsp[-1])); }
-#line 3465 "awkgram.c" /* yacc.c:1646 */
+#line 3454 "awkgram.c" /* yacc.c:1646 */
break;
case 141:
-#line 1471 "awkgram.y" /* yacc.c:1646 */
+#line 1460 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = list_append(list_merge((yyvsp[-2]), (yyvsp[0])), (yyvsp[-1])); }
-#line 3471 "awkgram.c" /* yacc.c:1646 */
+#line 3460 "awkgram.c" /* yacc.c:1646 */
break;
case 142:
-#line 1473 "awkgram.y" /* yacc.c:1646 */
+#line 1462 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = list_append(list_merge((yyvsp[-2]), (yyvsp[0])), (yyvsp[-1])); }
-#line 3477 "awkgram.c" /* yacc.c:1646 */
+#line 3466 "awkgram.c" /* yacc.c:1646 */
break;
case 143:
-#line 1475 "awkgram.y" /* yacc.c:1646 */
+#line 1464 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = list_append(list_merge((yyvsp[-2]), (yyvsp[0])), (yyvsp[-1])); }
-#line 3483 "awkgram.c" /* yacc.c:1646 */
+#line 3472 "awkgram.c" /* yacc.c:1646 */
break;
case 144:
-#line 1477 "awkgram.y" /* yacc.c:1646 */
+#line 1466 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = list_append(list_merge((yyvsp[-2]), (yyvsp[0])), (yyvsp[-1])); }
-#line 3489 "awkgram.c" /* yacc.c:1646 */
+#line 3478 "awkgram.c" /* yacc.c:1646 */
break;
case 145:
-#line 1479 "awkgram.y" /* yacc.c:1646 */
+#line 1468 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = list_append(list_merge((yyvsp[-2]), (yyvsp[0])), (yyvsp[-1])); }
-#line 3495 "awkgram.c" /* yacc.c:1646 */
+#line 3484 "awkgram.c" /* yacc.c:1646 */
break;
case 146:
-#line 1484 "awkgram.y" /* yacc.c:1646 */
+#line 1473 "awkgram.y" /* yacc.c:1646 */
{
(yyval) = list_create((yyvsp[0]));
}
-#line 3503 "awkgram.c" /* yacc.c:1646 */
+#line 3492 "awkgram.c" /* yacc.c:1646 */
break;
case 147:
-#line 1488 "awkgram.y" /* yacc.c:1646 */
+#line 1477 "awkgram.y" /* yacc.c:1646 */
{
if ((yyvsp[0])->opcode == Op_match_rec) {
(yyvsp[0])->opcode = Op_nomatch;
@@ -3535,37 +3524,37 @@ regular_print:
}
}
}
-#line 3539 "awkgram.c" /* yacc.c:1646 */
+#line 3528 "awkgram.c" /* yacc.c:1646 */
break;
case 148:
-#line 1520 "awkgram.y" /* yacc.c:1646 */
+#line 1509 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = (yyvsp[-1]); }
-#line 3545 "awkgram.c" /* yacc.c:1646 */
+#line 3534 "awkgram.c" /* yacc.c:1646 */
break;
case 149:
-#line 1522 "awkgram.y" /* yacc.c:1646 */
+#line 1511 "awkgram.y" /* yacc.c:1646 */
{
(yyval) = snode((yyvsp[-1]), (yyvsp[-3]));
if ((yyval) == NULL)
YYABORT;
}
-#line 3555 "awkgram.c" /* yacc.c:1646 */
+#line 3544 "awkgram.c" /* yacc.c:1646 */
break;
case 150:
-#line 1528 "awkgram.y" /* yacc.c:1646 */
+#line 1517 "awkgram.y" /* yacc.c:1646 */
{
(yyval) = snode((yyvsp[-1]), (yyvsp[-3]));
if ((yyval) == NULL)
YYABORT;
}
-#line 3565 "awkgram.c" /* yacc.c:1646 */
+#line 3554 "awkgram.c" /* yacc.c:1646 */
break;
case 151:
-#line 1534 "awkgram.y" /* yacc.c:1646 */
+#line 1523 "awkgram.y" /* yacc.c:1646 */
{
static bool warned = false;
@@ -3578,45 +3567,45 @@ regular_print:
if ((yyval) == NULL)
YYABORT;
}
-#line 3582 "awkgram.c" /* yacc.c:1646 */
+#line 3571 "awkgram.c" /* yacc.c:1646 */
break;
case 154:
-#line 1549 "awkgram.y" /* yacc.c:1646 */
+#line 1538 "awkgram.y" /* yacc.c:1646 */
{
(yyvsp[-1])->opcode = Op_preincrement;
(yyval) = mk_assignment((yyvsp[0]), NULL, (yyvsp[-1]));
}
-#line 3591 "awkgram.c" /* yacc.c:1646 */
+#line 3580 "awkgram.c" /* yacc.c:1646 */
break;
case 155:
-#line 1554 "awkgram.y" /* yacc.c:1646 */
+#line 1543 "awkgram.y" /* yacc.c:1646 */
{
(yyvsp[-1])->opcode = Op_predecrement;
(yyval) = mk_assignment((yyvsp[0]), NULL, (yyvsp[-1]));
}
-#line 3600 "awkgram.c" /* yacc.c:1646 */
+#line 3589 "awkgram.c" /* yacc.c:1646 */
break;
case 156:
-#line 1559 "awkgram.y" /* yacc.c:1646 */
+#line 1548 "awkgram.y" /* yacc.c:1646 */
{
(yyval) = list_create((yyvsp[0]));
}
-#line 3608 "awkgram.c" /* yacc.c:1646 */
+#line 3597 "awkgram.c" /* yacc.c:1646 */
break;
case 157:
-#line 1563 "awkgram.y" /* yacc.c:1646 */
+#line 1552 "awkgram.y" /* yacc.c:1646 */
{
(yyval) = list_create((yyvsp[0]));
}
-#line 3616 "awkgram.c" /* yacc.c:1646 */
+#line 3605 "awkgram.c" /* yacc.c:1646 */
break;
case 158:
-#line 1567 "awkgram.y" /* yacc.c:1646 */
+#line 1556 "awkgram.y" /* yacc.c:1646 */
{
if ((yyvsp[0])->lasti->opcode == Op_push_i
&& ((yyvsp[0])->lasti->memory->flags & (STRCUR|STRING)) == 0
@@ -3631,11 +3620,11 @@ regular_print:
(yyval) = list_append((yyvsp[0]), (yyvsp[-1]));
}
}
-#line 3635 "awkgram.c" /* yacc.c:1646 */
+#line 3624 "awkgram.c" /* yacc.c:1646 */
break;
case 159:
-#line 1582 "awkgram.y" /* yacc.c:1646 */
+#line 1571 "awkgram.y" /* yacc.c:1646 */
{
/*
* was: $$ = $2
@@ -3644,20 +3633,20 @@ regular_print:
(yyvsp[-1])->opcode = Op_unary_plus;
(yyval) = list_append((yyvsp[0]), (yyvsp[-1]));
}
-#line 3648 "awkgram.c" /* yacc.c:1646 */
+#line 3637 "awkgram.c" /* yacc.c:1646 */
break;
case 160:
-#line 1594 "awkgram.y" /* yacc.c:1646 */
+#line 1583 "awkgram.y" /* yacc.c:1646 */
{
func_use((yyvsp[0])->lasti->func_name, FUNC_USE);
(yyval) = (yyvsp[0]);
}
-#line 3657 "awkgram.c" /* yacc.c:1646 */
+#line 3646 "awkgram.c" /* yacc.c:1646 */
break;
case 161:
-#line 1599 "awkgram.y" /* yacc.c:1646 */
+#line 1588 "awkgram.y" /* yacc.c:1646 */
{
/* indirect function call */
INSTRUCTION *f, *t;
@@ -3690,11 +3679,11 @@ regular_print:
(yyval) = list_prepend((yyvsp[0]), t);
}
-#line 3694 "awkgram.c" /* yacc.c:1646 */
+#line 3683 "awkgram.c" /* yacc.c:1646 */
break;
case 162:
-#line 1635 "awkgram.y" /* yacc.c:1646 */
+#line 1624 "awkgram.y" /* yacc.c:1646 */
{
param_sanity((yyvsp[-1]));
(yyvsp[-3])->opcode = Op_func_call;
@@ -3708,49 +3697,49 @@ regular_print:
(yyval) = list_append(t, (yyvsp[-3]));
}
}
-#line 3712 "awkgram.c" /* yacc.c:1646 */
+#line 3701 "awkgram.c" /* yacc.c:1646 */
break;
case 163:
-#line 1652 "awkgram.y" /* yacc.c:1646 */
+#line 1641 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = NULL; }
-#line 3718 "awkgram.c" /* yacc.c:1646 */
+#line 3707 "awkgram.c" /* yacc.c:1646 */
break;
case 164:
-#line 1654 "awkgram.y" /* yacc.c:1646 */
+#line 1643 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = (yyvsp[0]); }
-#line 3724 "awkgram.c" /* yacc.c:1646 */
+#line 3713 "awkgram.c" /* yacc.c:1646 */
break;
case 165:
-#line 1659 "awkgram.y" /* yacc.c:1646 */
+#line 1648 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = NULL; }
-#line 3730 "awkgram.c" /* yacc.c:1646 */
+#line 3719 "awkgram.c" /* yacc.c:1646 */
break;
case 166:
-#line 1661 "awkgram.y" /* yacc.c:1646 */
+#line 1650 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = (yyvsp[-1]); }
-#line 3736 "awkgram.c" /* yacc.c:1646 */
+#line 3725 "awkgram.c" /* yacc.c:1646 */
break;
case 167:
-#line 1666 "awkgram.y" /* yacc.c:1646 */
+#line 1655 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = (yyvsp[0]); }
-#line 3742 "awkgram.c" /* yacc.c:1646 */
+#line 3731 "awkgram.c" /* yacc.c:1646 */
break;
case 168:
-#line 1668 "awkgram.y" /* yacc.c:1646 */
+#line 1657 "awkgram.y" /* yacc.c:1646 */
{
(yyval) = list_merge((yyvsp[-1]), (yyvsp[0]));
}
-#line 3750 "awkgram.c" /* yacc.c:1646 */
+#line 3739 "awkgram.c" /* yacc.c:1646 */
break;
case 169:
-#line 1675 "awkgram.y" /* yacc.c:1646 */
+#line 1664 "awkgram.y" /* yacc.c:1646 */
{
INSTRUCTION *ip = (yyvsp[0])->lasti;
int count = ip->sub_count; /* # of SUBSEP-seperated expressions */
@@ -3764,11 +3753,11 @@ regular_print:
sub_counter++; /* count # of dimensions */
(yyval) = (yyvsp[0]);
}
-#line 3768 "awkgram.c" /* yacc.c:1646 */
+#line 3757 "awkgram.c" /* yacc.c:1646 */
break;
case 170:
-#line 1692 "awkgram.y" /* yacc.c:1646 */
+#line 1681 "awkgram.y" /* yacc.c:1646 */
{
INSTRUCTION *t = (yyvsp[-1]);
if ((yyvsp[-1]) == NULL) {
@@ -3782,31 +3771,31 @@ regular_print:
(yyvsp[0])->sub_count = count_expressions(&t, false);
(yyval) = list_append(t, (yyvsp[0]));
}
-#line 3786 "awkgram.c" /* yacc.c:1646 */
+#line 3775 "awkgram.c" /* yacc.c:1646 */
break;
case 171:
-#line 1709 "awkgram.y" /* yacc.c:1646 */
+#line 1698 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = (yyvsp[0]); }
-#line 3792 "awkgram.c" /* yacc.c:1646 */
+#line 3781 "awkgram.c" /* yacc.c:1646 */
break;
case 172:
-#line 1711 "awkgram.y" /* yacc.c:1646 */
+#line 1700 "awkgram.y" /* yacc.c:1646 */
{
(yyval) = list_merge((yyvsp[-1]), (yyvsp[0]));
}
-#line 3800 "awkgram.c" /* yacc.c:1646 */
+#line 3789 "awkgram.c" /* yacc.c:1646 */
break;
case 173:
-#line 1718 "awkgram.y" /* yacc.c:1646 */
+#line 1707 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = (yyvsp[-1]); }
-#line 3806 "awkgram.c" /* yacc.c:1646 */
+#line 3795 "awkgram.c" /* yacc.c:1646 */
break;
case 174:
-#line 1723 "awkgram.y" /* yacc.c:1646 */
+#line 1712 "awkgram.y" /* yacc.c:1646 */
{
char *var_name = (yyvsp[0])->lextok;
@@ -3814,22 +3803,22 @@ regular_print:
(yyvsp[0])->memory = variable((yyvsp[0])->source_line, var_name, Node_var_new);
(yyval) = list_create((yyvsp[0]));
}
-#line 3818 "awkgram.c" /* yacc.c:1646 */
+#line 3807 "awkgram.c" /* yacc.c:1646 */
break;
case 175:
-#line 1731 "awkgram.y" /* yacc.c:1646 */
+#line 1720 "awkgram.y" /* yacc.c:1646 */
{
char *arr = (yyvsp[-1])->lextok;
(yyvsp[-1])->memory = variable((yyvsp[-1])->source_line, arr, Node_var_new);
(yyvsp[-1])->opcode = Op_push_array;
(yyval) = list_prepend((yyvsp[0]), (yyvsp[-1]));
}
-#line 3829 "awkgram.c" /* yacc.c:1646 */
+#line 3818 "awkgram.c" /* yacc.c:1646 */
break;
case 176:
-#line 1741 "awkgram.y" /* yacc.c:1646 */
+#line 1730 "awkgram.y" /* yacc.c:1646 */
{
INSTRUCTION *ip = (yyvsp[0])->nexti;
if (ip->opcode == Op_push
@@ -3841,73 +3830,73 @@ regular_print:
} else
(yyval) = (yyvsp[0]);
}
-#line 3845 "awkgram.c" /* yacc.c:1646 */
+#line 3834 "awkgram.c" /* yacc.c:1646 */
break;
case 177:
-#line 1753 "awkgram.y" /* yacc.c:1646 */
+#line 1742 "awkgram.y" /* yacc.c:1646 */
{
(yyval) = list_append((yyvsp[-1]), (yyvsp[-2]));
if ((yyvsp[0]) != NULL)
mk_assignment((yyvsp[-1]), NULL, (yyvsp[0]));
}
-#line 3855 "awkgram.c" /* yacc.c:1646 */
+#line 3844 "awkgram.c" /* yacc.c:1646 */
break;
case 178:
-#line 1762 "awkgram.y" /* yacc.c:1646 */
+#line 1751 "awkgram.y" /* yacc.c:1646 */
{
(yyvsp[0])->opcode = Op_postincrement;
}
-#line 3863 "awkgram.c" /* yacc.c:1646 */
+#line 3852 "awkgram.c" /* yacc.c:1646 */
break;
case 179:
-#line 1766 "awkgram.y" /* yacc.c:1646 */
+#line 1755 "awkgram.y" /* yacc.c:1646 */
{
(yyvsp[0])->opcode = Op_postdecrement;
}
-#line 3871 "awkgram.c" /* yacc.c:1646 */
+#line 3860 "awkgram.c" /* yacc.c:1646 */
break;
case 180:
-#line 1769 "awkgram.y" /* yacc.c:1646 */
+#line 1758 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = NULL; }
-#line 3877 "awkgram.c" /* yacc.c:1646 */
+#line 3866 "awkgram.c" /* yacc.c:1646 */
break;
case 182:
-#line 1777 "awkgram.y" /* yacc.c:1646 */
+#line 1766 "awkgram.y" /* yacc.c:1646 */
{ yyerrok; }
-#line 3883 "awkgram.c" /* yacc.c:1646 */
+#line 3872 "awkgram.c" /* yacc.c:1646 */
break;
case 183:
-#line 1781 "awkgram.y" /* yacc.c:1646 */
+#line 1770 "awkgram.y" /* yacc.c:1646 */
{ yyerrok; }
-#line 3889 "awkgram.c" /* yacc.c:1646 */
+#line 3878 "awkgram.c" /* yacc.c:1646 */
break;
case 186:
-#line 1790 "awkgram.y" /* yacc.c:1646 */
+#line 1779 "awkgram.y" /* yacc.c:1646 */
{ yyerrok; }
-#line 3895 "awkgram.c" /* yacc.c:1646 */
+#line 3884 "awkgram.c" /* yacc.c:1646 */
break;
case 187:
-#line 1794 "awkgram.y" /* yacc.c:1646 */
+#line 1783 "awkgram.y" /* yacc.c:1646 */
{ (yyval) = (yyvsp[0]); yyerrok; }
-#line 3901 "awkgram.c" /* yacc.c:1646 */
+#line 3890 "awkgram.c" /* yacc.c:1646 */
break;
case 188:
-#line 1798 "awkgram.y" /* yacc.c:1646 */
+#line 1787 "awkgram.y" /* yacc.c:1646 */
{ yyerrok; }
-#line 3907 "awkgram.c" /* yacc.c:1646 */
+#line 3896 "awkgram.c" /* yacc.c:1646 */
break;
-#line 3911 "awkgram.c" /* yacc.c:1646 */
+#line 3900 "awkgram.c" /* yacc.c:1646 */
default: break;
}
/* User semantic actions sometimes alter yychar, and that requires
@@ -4135,7 +4124,7 @@ yyreturn:
#endif
return yyresult;
}
-#line 1800 "awkgram.y" /* yacc.c:1906 */
+#line 1789 "awkgram.y" /* yacc.c:1906 */
struct token {
@@ -4151,6 +4140,8 @@ struct token {
# define GAWKX 0x0400 /* gawk extension */
# define BREAK 0x0800 /* break allowed inside */
# define CONTINUE 0x1000 /* continue allowed inside */
+# define DEBUG_USE 0x2000 /* for use by developers */
+# define ARG3_IS_ARR 0x4000 /* hack for div/do_mpfp_div */
NODE *(*ptr)(int); /* function that implements this keyword */
};
@@ -4183,7 +4174,7 @@ static struct token tokentab[] = {
{"END", Op_rule, LEX_END, 0, 0 },
{"ENDFILE", Op_rule, LEX_ENDFILE, GAWKX, 0 },
#ifdef ARRAYDEBUG
-{"adump", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_adump },
+{"adump", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|DEBUG_USE, do_adump },
#endif
{"and", Op_builtin, LEX_BUILTIN, GAWKX, 0 },
{"asort", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3), do_asort },
@@ -4200,6 +4191,7 @@ static struct token tokentab[] = {
{"dcngettext", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3)|A(4)|A(5), do_dcngettext },
{"default", Op_K_default, LEX_DEFAULT, GAWKX, 0 },
{"delete", Op_K_delete, LEX_DELETE, NOT_OLD, 0 },
+{"div", Op_builtin, LEX_BUILTIN, GAWKX|A(3)|ARG3_IS_ARR, do_div },
{"do", Op_K_do, LEX_DO, NOT_OLD|BREAK|CONTINUE, 0 },
{"else", Op_K_else, LEX_ELSE, 0, 0 },
{"eval", Op_symbol, LEX_EVAL, 0, 0 },
@@ -4242,7 +4234,7 @@ static struct token tokentab[] = {
{"sqrt", Op_builtin, LEX_BUILTIN, A(1), 0 },
{"srand", Op_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), 0 },
#if defined(GAWKDEBUG) || defined(ARRAYDEBUG) /* || ... */
-{"stopme", Op_builtin, LEX_BUILTIN, GAWKX|A(0), stopme },
+{"stopme", Op_builtin, LEX_BUILTIN, GAWKX|A(0)|DEBUG_USE, stopme },
#endif
{"strftime", Op_builtin, LEX_BUILTIN, GAWKX|A(0)|A(1)|A(2)|A(3), do_strftime },
{"strtonum", Op_builtin, LEX_BUILTIN, GAWKX|A(1), 0 },
@@ -4635,6 +4627,15 @@ parse_program(INSTRUCTION **pcode)
return (ret || errcount);
}
+/* free_srcfile --- free a SRCFILE struct */
+
+void
+free_srcfile(SRCFILE *thisfile)
+{
+ efree(thisfile->src);
+ efree(thisfile);
+}
+
/* do_add_srcfile --- add one item to srcfiles */
static SRCFILE *
@@ -5093,12 +5094,40 @@ tokexpand()
return tok;
}
+/* check_bad_char --- fatal if c isn't allowed in gawk source code */
+
+/*
+ * The error message was inspired by someone who decided to put
+ * a physical \0 byte into the source code to see what would
+ * happen and then filed a bug report about it. Sigh.
+ */
+
+static void
+check_bad_char(int c)
+{
+ /* allow escapes. needed for autoconf. bleah. */
+ switch (c) {
+ case '\a':
+ case '\b':
+ case '\f':
+ case '\n':
+ case '\r':
+ case '\t':
+ return;
+ default:
+ break;
+ }
+
+ if (iscntrl(c) && ! isspace(c))
+ fatal(_("PEBKAC error: invalid character '\\%03o' in source code"), c);
+}
+
/* nextc --- get the next input character */
#if MBS_SUPPORT
static int
-nextc(void)
+nextc(bool check_for_bad)
{
if (gawk_mb_cur_max > 1) {
again:
@@ -5149,14 +5178,19 @@ again:
0 : work_ring_idx + 1;
cur_char_ring[work_ring_idx] = 0;
}
+ if (check_for_bad)
+ check_bad_char(*lexptr);
return (int) (unsigned char) *lexptr++;
} else {
do {
if (lexeof)
return END_FILE;
- if (lexptr && lexptr < lexend)
- return ((int) (unsigned char) *lexptr++);
+ if (lexptr && lexptr < lexend) {
+ if (check_for_bad)
+ check_bad_char(*lexptr);
+ return ((int) (unsigned char) *lexptr++);
+ }
} while (get_src_buf());
return END_SRC;
}
@@ -5165,13 +5199,16 @@ again:
#else /* MBS_SUPPORT */
int
-nextc()
+nextc(bool check_for_bad)
{
do {
if (lexeof)
return END_FILE;
- if (lexptr && lexptr < lexend)
+ if (lexptr && lexptr < lexend) {
+ if (check_for_bad)
+ check_bad_char(*lexptr);
return ((int) (unsigned char) *lexptr++);
+ }
} while (get_src_buf());
return END_SRC;
}
@@ -5200,13 +5237,13 @@ allow_newline(void)
int c;
for (;;) {
- c = nextc();
+ c = nextc(true);
if (c == END_FILE) {
pushback();
break;
}
if (c == '#') {
- while ((c = nextc()) != '\n' && c != END_FILE)
+ while ((c = nextc(false)) != '\n' && c != END_FILE)
continue;
if (c == END_FILE) {
pushback();
@@ -5277,7 +5314,7 @@ yylex(void)
if (lasttok == LEX_EOF) /* error earlier in current source, must give up !! */
return 0;
- c = nextc();
+ c = nextc(true);
if (c == END_SRC)
return 0;
if (c == END_FILE)
@@ -5319,12 +5356,12 @@ yylex(void)
want_regexp = false;
tok = tokstart;
for (;;) {
- c = nextc();
+ c = nextc(true);
if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) {
case '[':
/* one day check for `.' and `=' too */
- if (nextc() == ':' || in_brack == 0)
+ if (nextc(true) == ':' || in_brack == 0)
in_brack++;
pushback();
break;
@@ -5338,7 +5375,7 @@ yylex(void)
in_brack--;
break;
case '\\':
- if ((c = nextc()) == END_FILE) {
+ if ((c = nextc(true)) == END_FILE) {
pushback();
yyerror(_("unterminated regexp ends with `\\' at end of file"));
goto end_regexp; /* kludge */
@@ -5358,7 +5395,7 @@ end_regexp:
yylval = GET_INSTRUCTION(Op_token);
yylval->lextok = estrdup(tokstart, tok - tokstart);
if (do_lint) {
- int peek = nextc();
+ int peek = nextc(true);
pushback();
if (peek == 'i' || peek == 's') {
@@ -5388,7 +5425,7 @@ end_regexp:
retry:
/* skipping \r is a hack, but windows is just too pervasive. sigh. */
- while ((c = nextc()) == ' ' || c == '\t' || c == '\r')
+ while ((c = nextc(true)) == ' ' || c == '\t' || c == '\r')
continue;
lexeme = lexptr ? lexptr - 1 : lexptr;
@@ -5410,7 +5447,7 @@ retry:
return lasttok = NEWLINE;
case '#': /* it's a comment */
- while ((c = nextc()) != '\n') {
+ while ((c = nextc(false)) != '\n') {
if (c == END_FILE)
return lasttok = NEWLINE_EOF;
}
@@ -5430,7 +5467,7 @@ retry:
*/
if (! do_traditional) {
/* strip trailing white-space and/or comment */
- while ((c = nextc()) == ' ' || c == '\t' || c == '\r')
+ while ((c = nextc(true)) == ' ' || c == '\t' || c == '\r')
continue;
if (c == '#') {
static bool warned = false;
@@ -5440,16 +5477,16 @@ retry:
lintwarn(
_("use of `\\ #...' line continuation is not portable"));
}
- while ((c = nextc()) != '\n')
+ while ((c = nextc(false)) != '\n')
if (c == END_FILE)
break;
}
pushback();
}
#endif /* RELAXED_CONTINUATION */
- c = nextc();
+ c = nextc(true);
if (c == '\r') /* allow MS-DOS files. bleah */
- c = nextc();
+ c = nextc(true);
if (c == '\n') {
sourceline++;
goto retry;
@@ -5488,7 +5525,7 @@ retry:
case '[':
return lasttok = c;
case ']':
- c = nextc();
+ c = nextc(true);
pushback();
if (c == '[') {
yylval = GET_INSTRUCTION(Op_sub_array);
@@ -5500,7 +5537,7 @@ retry:
return ']';
case '*':
- if ((c = nextc()) == '=') {
+ if ((c = nextc(true)) == '=') {
yylval = GET_INSTRUCTION(Op_assign_times);
return lasttok = ASSIGNOP;
} else if (do_posix) {
@@ -5511,7 +5548,7 @@ retry:
/* make ** and **= aliases for ^ and ^= */
static bool did_warn_op = false, did_warn_assgn = false;
- if (nextc() == '=') {
+ if (nextc(true) == '=') {
if (! did_warn_assgn) {
did_warn_assgn = true;
if (do_lint)
@@ -5539,7 +5576,7 @@ retry:
return lasttok = '*';
case '/':
- if (nextc() == '=') {
+ if (nextc(true) == '=') {
pushback();
return lasttok = SLASH_BEFORE_EQUAL;
}
@@ -5548,7 +5585,7 @@ retry:
return lasttok = '/';
case '%':
- if (nextc() == '=') {
+ if (nextc(true) == '=') {
yylval = GET_INSTRUCTION(Op_assign_mod);
return lasttok = ASSIGNOP;
}
@@ -5560,7 +5597,7 @@ retry:
{
static bool did_warn_op = false, did_warn_assgn = false;
- if (nextc() == '=') {
+ if (nextc(true) == '=') {
if (do_lint_old && ! did_warn_assgn) {
did_warn_assgn = true;
warning(_("operator `^=' is not supported in old awk"));
@@ -5578,7 +5615,7 @@ retry:
}
case '+':
- if ((c = nextc()) == '=') {
+ if ((c = nextc(true)) == '=') {
yylval = GET_INSTRUCTION(Op_assign_plus);
return lasttok = ASSIGNOP;
}
@@ -5591,7 +5628,7 @@ retry:
return lasttok = '+';
case '!':
- if ((c = nextc()) == '=') {
+ if ((c = nextc(true)) == '=') {
yylval = GET_INSTRUCTION(Op_notequal);
return lasttok = RELOP;
}
@@ -5604,7 +5641,7 @@ retry:
return lasttok = '!';
case '<':
- if (nextc() == '=') {
+ if (nextc(true) == '=') {
yylval = GET_INSTRUCTION(Op_leq);
return lasttok = RELOP;
}
@@ -5613,7 +5650,7 @@ retry:
return lasttok = '<';
case '=':
- if (nextc() == '=') {
+ if (nextc(true) == '=') {
yylval = GET_INSTRUCTION(Op_equal);
return lasttok = RELOP;
}
@@ -5622,7 +5659,7 @@ retry:
return lasttok = ASSIGN;
case '>':
- if ((c = nextc()) == '=') {
+ if ((c = nextc(true)) == '=') {
yylval = GET_INSTRUCTION(Op_geq);
return lasttok = RELOP;
} else if (c == '>') {
@@ -5661,7 +5698,7 @@ retry:
case '"':
string:
esc_seen = false;
- while ((c = nextc()) != '"') {
+ while ((c = nextc(true)) != '"') {
if (c == '\n') {
pushback();
yyerror(_("unterminated string"));
@@ -5669,7 +5706,7 @@ retry:
}
if ((gawk_mb_cur_max == 1 || nextc_is_1stbyte) &&
c == '\\') {
- c = nextc();
+ c = nextc(true);
if (c == '\n') {
sourceline++;
continue;
@@ -5703,7 +5740,7 @@ retry:
return lasttok = YSTRING;
case '-':
- if ((c = nextc()) == '=') {
+ if ((c = nextc(true)) == '=') {
yylval = GET_INSTRUCTION(Op_assign_minus);
return lasttok = ASSIGNOP;
}
@@ -5716,7 +5753,7 @@ retry:
return lasttok = '-';
case '.':
- c = nextc();
+ c = nextc(true);
pushback();
if (! isdigit(c))
return lasttok = '.';
@@ -5744,7 +5781,7 @@ retry:
if (do_traditional)
goto done;
if (tok == tokstart + 2) {
- int peek = nextc();
+ int peek = nextc(true);
if (isxdigit(peek)) {
inhex = true;
@@ -5772,8 +5809,8 @@ retry:
break;
}
seen_e = true;
- if ((c = nextc()) == '-' || c == '+') {
- int c2 = nextc();
+ if ((c = nextc(true)) == '-' || c == '+') {
+ int c2 = nextc(true);
if (isdigit(c2)) {
tokadd(c);
@@ -5820,7 +5857,7 @@ retry:
}
if (gotnumber)
break;
- c = nextc();
+ c = nextc(true);
}
pushback();
@@ -5844,7 +5881,7 @@ retry:
return lasttok = YNUMBER;
case '&':
- if ((c = nextc()) == '&') {
+ if ((c = nextc(true)) == '&') {
yylval = GET_INSTRUCTION(Op_and);
allow_newline();
return lasttok = LEX_AND;
@@ -5854,7 +5891,7 @@ retry:
return lasttok = '&';
case '|':
- if ((c = nextc()) == '|') {
+ if ((c = nextc(true)) == '|') {
yylval = GET_INSTRUCTION(Op_or);
allow_newline();
return lasttok = LEX_OR;
@@ -5875,7 +5912,7 @@ retry:
}
}
- if (c != '_' && ! isalpha(c)) {
+ if (c != '_' && ! is_alpha(c)) {
yyerror(_("invalid char '%c' in expression"), c);
return lasttok = LEX_EOF;
}
@@ -5895,7 +5932,7 @@ retry:
* occasions where the interactions are funny.
*/
if (! do_traditional && c == '_' && lasttok != '$') {
- if ((c = nextc()) == '"') {
+ if ((c = nextc(true)) == '"') {
intlstr = true;
goto string;
}
@@ -5907,7 +5944,7 @@ retry:
tok = tokstart;
while (c != END_FILE && is_identchar(c)) {
tokadd(c);
- c = nextc();
+ c = nextc(true);
}
tokadd('\0');
pushback();
@@ -6168,6 +6205,11 @@ snode(INSTRUCTION *subn, INSTRUCTION *r)
arg = subn->nexti;
if (arg->nexti == arg->lasti && arg->nexti->opcode == Op_push)
arg->nexti->opcode = Op_push_arg; /* argument may be array */
+ } else if ((tokentab[idx].flags & ARG3_IS_ARR) != 0) {
+ arg = subn->nexti->lasti->nexti->lasti->nexti; /* 3rd arg list */
+ ip = arg->lasti;
+ if (ip->opcode == Op_push)
+ ip->opcode = Op_push_array;
} else if (r->builtin == do_match) {
static bool warned = false;
@@ -7813,3 +7855,80 @@ one_line_close(int fd)
}
+/* lookup_builtin --- find a builtin function or return NULL */
+
+builtin_func_t
+lookup_builtin(const char *name)
+{
+ int mid = check_special(name);
+
+ if (mid == -1 || tokentab[mid].class != LEX_BUILTIN)
+ return NULL;
+
+ return tokentab[mid].ptr;
+}
+
+/* install_builtins --- add built-in functions to FUNCTAB */
+
+void
+install_builtins(void)
+{
+ int i, j;
+
+ j = sizeof(tokentab) / sizeof(tokentab[0]);
+ for (i = 0; i < j; i++) {
+ if ( tokentab[i].class == LEX_BUILTIN
+ && (tokentab[i].flags & DEBUG_USE) == 0) {
+ (void) install_symbol(tokentab[i].operator, Node_builtin_func);
+ }
+ }
+}
+
+/*
+ * 9/2014: Gawk cannot use <ctype.h> isalpha or isalnum when
+ * parsing the program since that can let through non-English
+ * letters. So, we supply our own. !@#$%^&*()-ing locales!
+ */
+
+/* is_alpha --- return true if c is an English letter */
+
+bool
+is_alpha(int c)
+{
+#ifdef I_DONT_KNOW_WHAT_IM_DOING
+ return isalpha(c);
+#else /* ! I_DONT_KNOW_WHAT_IM_DOING */
+ switch (c) {
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
+ case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
+ case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+ case 'y': case 'z':
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+ case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+ case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+ case 'Y': case 'Z':
+ return true;
+ }
+ return false;
+#endif /* ! I_DONT_KNOW_WHAT_IM_DOING */
+}
+
+/* is_alnum --- return true for alphanumeric, English only letters */
+
+bool
+is_alnum(int c)
+{
+ /* digit test is good for EBCDIC too. so there. */
+ return (is_alpha(c) || ('0' <= c && c <= '9'));
+}
+
+
+/* is_identchar --- return true if c can be in an identifier */
+
+bool
+is_identchar(int c)
+{
+ return (is_alnum(c) || c == '_');
+}
diff --git a/awkgram.y b/awkgram.y
index a634bc6b..eaa615a2 100644
--- a/awkgram.y
+++ b/awkgram.y
@@ -153,8 +153,6 @@ static inline INSTRUCTION *list_merge(INSTRUCTION *l1, INSTRUCTION *l2);
extern double fmod(double x, double y);
#define YYSTYPE INSTRUCTION *
-
-#define is_identchar(c) (isalnum(c) || (c) == '_')
%}
%token FUNC_CALL NAME REGEXP FILENAME
@@ -1404,21 +1402,12 @@ simp_exp
| LEX_GETLINE opt_variable input_redir
{
/*
- * In BEGINFILE/ENDFILE, allow `getline var < file'
+ * In BEGINFILE/ENDFILE, allow `getline [var] < file'
*/
- if (rule == BEGINFILE || rule == ENDFILE) {
- if ($2 != NULL && $3 != NULL)
- ; /* all ok */
- else {
- if ($2 != NULL)
- error_ln($1->source_line,
- _("`getline var' invalid inside `%s' rule"), ruletab[rule]);
- else
- error_ln($1->source_line,
- _("`getline' invalid inside `%s' rule"), ruletab[rule]);
- }
- }
+ if ((rule == BEGINFILE || rule == ENDFILE) && $3 == NULL)
+ error_ln($1->source_line,
+ _("non-redirected `getline' invalid inside `%s' rule"), ruletab[rule]);
if (do_lint && rule == END && $3 == NULL)
lintwarn_ln($1->source_line,
_("non-redirected `getline' undefined inside END action"));
@@ -1812,6 +1801,8 @@ struct token {
# define GAWKX 0x0400 /* gawk extension */
# define BREAK 0x0800 /* break allowed inside */
# define CONTINUE 0x1000 /* continue allowed inside */
+# define DEBUG_USE 0x2000 /* for use by developers */
+# define ARG3_IS_ARR 0x4000 /* hack for div/do_mpfp_div */
NODE *(*ptr)(int); /* function that implements this keyword */
};
@@ -1844,7 +1835,7 @@ static struct token tokentab[] = {
{"END", Op_rule, LEX_END, 0, 0 },
{"ENDFILE", Op_rule, LEX_ENDFILE, GAWKX, 0 },
#ifdef ARRAYDEBUG
-{"adump", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_adump },
+{"adump", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|DEBUG_USE, do_adump },
#endif
{"and", Op_builtin, LEX_BUILTIN, GAWKX, 0 },
{"asort", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3), do_asort },
@@ -1861,6 +1852,7 @@ static struct token tokentab[] = {
{"dcngettext", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3)|A(4)|A(5), do_dcngettext },
{"default", Op_K_default, LEX_DEFAULT, GAWKX, 0 },
{"delete", Op_K_delete, LEX_DELETE, NOT_OLD, 0 },
+{"div", Op_builtin, LEX_BUILTIN, GAWKX|A(3)|ARG3_IS_ARR, do_div },
{"do", Op_K_do, LEX_DO, NOT_OLD|BREAK|CONTINUE, 0 },
{"else", Op_K_else, LEX_ELSE, 0, 0 },
{"eval", Op_symbol, LEX_EVAL, 0, 0 },
@@ -1903,7 +1895,7 @@ static struct token tokentab[] = {
{"sqrt", Op_builtin, LEX_BUILTIN, A(1), 0 },
{"srand", Op_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), 0 },
#if defined(GAWKDEBUG) || defined(ARRAYDEBUG) /* || ... */
-{"stopme", Op_builtin, LEX_BUILTIN, GAWKX|A(0), stopme },
+{"stopme", Op_builtin, LEX_BUILTIN, GAWKX|A(0)|DEBUG_USE, stopme },
#endif
{"strftime", Op_builtin, LEX_BUILTIN, GAWKX|A(0)|A(1)|A(2)|A(3), do_strftime },
{"strtonum", Op_builtin, LEX_BUILTIN, GAWKX|A(1), 0 },
@@ -2296,6 +2288,15 @@ parse_program(INSTRUCTION **pcode)
return (ret || errcount);
}
+/* free_srcfile --- free a SRCFILE struct */
+
+void
+free_srcfile(SRCFILE *thisfile)
+{
+ efree(thisfile->src);
+ efree(thisfile);
+}
+
/* do_add_srcfile --- add one item to srcfiles */
static SRCFILE *
@@ -2754,12 +2755,40 @@ tokexpand()
return tok;
}
+/* check_bad_char --- fatal if c isn't allowed in gawk source code */
+
+/*
+ * The error message was inspired by someone who decided to put
+ * a physical \0 byte into the source code to see what would
+ * happen and then filed a bug report about it. Sigh.
+ */
+
+static void
+check_bad_char(int c)
+{
+ /* allow escapes. needed for autoconf. bleah. */
+ switch (c) {
+ case '\a':
+ case '\b':
+ case '\f':
+ case '\n':
+ case '\r':
+ case '\t':
+ return;
+ default:
+ break;
+ }
+
+ if (iscntrl(c) && ! isspace(c))
+ fatal(_("PEBKAC error: invalid character '\\%03o' in source code"), c);
+}
+
/* nextc --- get the next input character */
#if MBS_SUPPORT
static int
-nextc(void)
+nextc(bool check_for_bad)
{
if (gawk_mb_cur_max > 1) {
again:
@@ -2810,14 +2839,19 @@ again:
0 : work_ring_idx + 1;
cur_char_ring[work_ring_idx] = 0;
}
+ if (check_for_bad)
+ check_bad_char(*lexptr);
return (int) (unsigned char) *lexptr++;
} else {
do {
if (lexeof)
return END_FILE;
- if (lexptr && lexptr < lexend)
- return ((int) (unsigned char) *lexptr++);
+ if (lexptr && lexptr < lexend) {
+ if (check_for_bad)
+ check_bad_char(*lexptr);
+ return ((int) (unsigned char) *lexptr++);
+ }
} while (get_src_buf());
return END_SRC;
}
@@ -2826,13 +2860,16 @@ again:
#else /* MBS_SUPPORT */
int
-nextc()
+nextc(bool check_for_bad)
{
do {
if (lexeof)
return END_FILE;
- if (lexptr && lexptr < lexend)
+ if (lexptr && lexptr < lexend) {
+ if (check_for_bad)
+ check_bad_char(*lexptr);
return ((int) (unsigned char) *lexptr++);
+ }
} while (get_src_buf());
return END_SRC;
}
@@ -2861,13 +2898,13 @@ allow_newline(void)
int c;
for (;;) {
- c = nextc();
+ c = nextc(true);
if (c == END_FILE) {
pushback();
break;
}
if (c == '#') {
- while ((c = nextc()) != '\n' && c != END_FILE)
+ while ((c = nextc(false)) != '\n' && c != END_FILE)
continue;
if (c == END_FILE) {
pushback();
@@ -2938,7 +2975,7 @@ yylex(void)
if (lasttok == LEX_EOF) /* error earlier in current source, must give up !! */
return 0;
- c = nextc();
+ c = nextc(true);
if (c == END_SRC)
return 0;
if (c == END_FILE)
@@ -2980,12 +3017,12 @@ yylex(void)
want_regexp = false;
tok = tokstart;
for (;;) {
- c = nextc();
+ c = nextc(true);
if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) {
case '[':
/* one day check for `.' and `=' too */
- if (nextc() == ':' || in_brack == 0)
+ if (nextc(true) == ':' || in_brack == 0)
in_brack++;
pushback();
break;
@@ -2999,7 +3036,7 @@ yylex(void)
in_brack--;
break;
case '\\':
- if ((c = nextc()) == END_FILE) {
+ if ((c = nextc(true)) == END_FILE) {
pushback();
yyerror(_("unterminated regexp ends with `\\' at end of file"));
goto end_regexp; /* kludge */
@@ -3019,7 +3056,7 @@ end_regexp:
yylval = GET_INSTRUCTION(Op_token);
yylval->lextok = estrdup(tokstart, tok - tokstart);
if (do_lint) {
- int peek = nextc();
+ int peek = nextc(true);
pushback();
if (peek == 'i' || peek == 's') {
@@ -3049,7 +3086,7 @@ end_regexp:
retry:
/* skipping \r is a hack, but windows is just too pervasive. sigh. */
- while ((c = nextc()) == ' ' || c == '\t' || c == '\r')
+ while ((c = nextc(true)) == ' ' || c == '\t' || c == '\r')
continue;
lexeme = lexptr ? lexptr - 1 : lexptr;
@@ -3071,7 +3108,7 @@ retry:
return lasttok = NEWLINE;
case '#': /* it's a comment */
- while ((c = nextc()) != '\n') {
+ while ((c = nextc(false)) != '\n') {
if (c == END_FILE)
return lasttok = NEWLINE_EOF;
}
@@ -3091,7 +3128,7 @@ retry:
*/
if (! do_traditional) {
/* strip trailing white-space and/or comment */
- while ((c = nextc()) == ' ' || c == '\t' || c == '\r')
+ while ((c = nextc(true)) == ' ' || c == '\t' || c == '\r')
continue;
if (c == '#') {
static bool warned = false;
@@ -3101,16 +3138,16 @@ retry:
lintwarn(
_("use of `\\ #...' line continuation is not portable"));
}
- while ((c = nextc()) != '\n')
+ while ((c = nextc(false)) != '\n')
if (c == END_FILE)
break;
}
pushback();
}
#endif /* RELAXED_CONTINUATION */
- c = nextc();
+ c = nextc(true);
if (c == '\r') /* allow MS-DOS files. bleah */
- c = nextc();
+ c = nextc(true);
if (c == '\n') {
sourceline++;
goto retry;
@@ -3149,7 +3186,7 @@ retry:
case '[':
return lasttok = c;
case ']':
- c = nextc();
+ c = nextc(true);
pushback();
if (c == '[') {
yylval = GET_INSTRUCTION(Op_sub_array);
@@ -3161,7 +3198,7 @@ retry:
return ']';
case '*':
- if ((c = nextc()) == '=') {
+ if ((c = nextc(true)) == '=') {
yylval = GET_INSTRUCTION(Op_assign_times);
return lasttok = ASSIGNOP;
} else if (do_posix) {
@@ -3172,7 +3209,7 @@ retry:
/* make ** and **= aliases for ^ and ^= */
static bool did_warn_op = false, did_warn_assgn = false;
- if (nextc() == '=') {
+ if (nextc(true) == '=') {
if (! did_warn_assgn) {
did_warn_assgn = true;
if (do_lint)
@@ -3200,7 +3237,7 @@ retry:
return lasttok = '*';
case '/':
- if (nextc() == '=') {
+ if (nextc(true) == '=') {
pushback();
return lasttok = SLASH_BEFORE_EQUAL;
}
@@ -3209,7 +3246,7 @@ retry:
return lasttok = '/';
case '%':
- if (nextc() == '=') {
+ if (nextc(true) == '=') {
yylval = GET_INSTRUCTION(Op_assign_mod);
return lasttok = ASSIGNOP;
}
@@ -3221,7 +3258,7 @@ retry:
{
static bool did_warn_op = false, did_warn_assgn = false;
- if (nextc() == '=') {
+ if (nextc(true) == '=') {
if (do_lint_old && ! did_warn_assgn) {
did_warn_assgn = true;
warning(_("operator `^=' is not supported in old awk"));
@@ -3239,7 +3276,7 @@ retry:
}
case '+':
- if ((c = nextc()) == '=') {
+ if ((c = nextc(true)) == '=') {
yylval = GET_INSTRUCTION(Op_assign_plus);
return lasttok = ASSIGNOP;
}
@@ -3252,7 +3289,7 @@ retry:
return lasttok = '+';
case '!':
- if ((c = nextc()) == '=') {
+ if ((c = nextc(true)) == '=') {
yylval = GET_INSTRUCTION(Op_notequal);
return lasttok = RELOP;
}
@@ -3265,7 +3302,7 @@ retry:
return lasttok = '!';
case '<':
- if (nextc() == '=') {
+ if (nextc(true) == '=') {
yylval = GET_INSTRUCTION(Op_leq);
return lasttok = RELOP;
}
@@ -3274,7 +3311,7 @@ retry:
return lasttok = '<';
case '=':
- if (nextc() == '=') {
+ if (nextc(true) == '=') {
yylval = GET_INSTRUCTION(Op_equal);
return lasttok = RELOP;
}
@@ -3283,7 +3320,7 @@ retry:
return lasttok = ASSIGN;
case '>':
- if ((c = nextc()) == '=') {
+ if ((c = nextc(true)) == '=') {
yylval = GET_INSTRUCTION(Op_geq);
return lasttok = RELOP;
} else if (c == '>') {
@@ -3322,7 +3359,7 @@ retry:
case '"':
string:
esc_seen = false;
- while ((c = nextc()) != '"') {
+ while ((c = nextc(true)) != '"') {
if (c == '\n') {
pushback();
yyerror(_("unterminated string"));
@@ -3330,7 +3367,7 @@ retry:
}
if ((gawk_mb_cur_max == 1 || nextc_is_1stbyte) &&
c == '\\') {
- c = nextc();
+ c = nextc(true);
if (c == '\n') {
sourceline++;
continue;
@@ -3364,7 +3401,7 @@ retry:
return lasttok = YSTRING;
case '-':
- if ((c = nextc()) == '=') {
+ if ((c = nextc(true)) == '=') {
yylval = GET_INSTRUCTION(Op_assign_minus);
return lasttok = ASSIGNOP;
}
@@ -3377,7 +3414,7 @@ retry:
return lasttok = '-';
case '.':
- c = nextc();
+ c = nextc(true);
pushback();
if (! isdigit(c))
return lasttok = '.';
@@ -3405,7 +3442,7 @@ retry:
if (do_traditional)
goto done;
if (tok == tokstart + 2) {
- int peek = nextc();
+ int peek = nextc(true);
if (isxdigit(peek)) {
inhex = true;
@@ -3433,8 +3470,8 @@ retry:
break;
}
seen_e = true;
- if ((c = nextc()) == '-' || c == '+') {
- int c2 = nextc();
+ if ((c = nextc(true)) == '-' || c == '+') {
+ int c2 = nextc(true);
if (isdigit(c2)) {
tokadd(c);
@@ -3481,7 +3518,7 @@ retry:
}
if (gotnumber)
break;
- c = nextc();
+ c = nextc(true);
}
pushback();
@@ -3505,7 +3542,7 @@ retry:
return lasttok = YNUMBER;
case '&':
- if ((c = nextc()) == '&') {
+ if ((c = nextc(true)) == '&') {
yylval = GET_INSTRUCTION(Op_and);
allow_newline();
return lasttok = LEX_AND;
@@ -3515,7 +3552,7 @@ retry:
return lasttok = '&';
case '|':
- if ((c = nextc()) == '|') {
+ if ((c = nextc(true)) == '|') {
yylval = GET_INSTRUCTION(Op_or);
allow_newline();
return lasttok = LEX_OR;
@@ -3536,7 +3573,7 @@ retry:
}
}
- if (c != '_' && ! isalpha(c)) {
+ if (c != '_' && ! is_alpha(c)) {
yyerror(_("invalid char '%c' in expression"), c);
return lasttok = LEX_EOF;
}
@@ -3556,7 +3593,7 @@ retry:
* occasions where the interactions are funny.
*/
if (! do_traditional && c == '_' && lasttok != '$') {
- if ((c = nextc()) == '"') {
+ if ((c = nextc(true)) == '"') {
intlstr = true;
goto string;
}
@@ -3568,7 +3605,7 @@ retry:
tok = tokstart;
while (c != END_FILE && is_identchar(c)) {
tokadd(c);
- c = nextc();
+ c = nextc(true);
}
tokadd('\0');
pushback();
@@ -3829,6 +3866,11 @@ snode(INSTRUCTION *subn, INSTRUCTION *r)
arg = subn->nexti;
if (arg->nexti == arg->lasti && arg->nexti->opcode == Op_push)
arg->nexti->opcode = Op_push_arg; /* argument may be array */
+ } else if ((tokentab[idx].flags & ARG3_IS_ARR) != 0) {
+ arg = subn->nexti->lasti->nexti->lasti->nexti; /* 3rd arg list */
+ ip = arg->lasti;
+ if (ip->opcode == Op_push)
+ ip->opcode = Op_push_array;
} else if (r->builtin == do_match) {
static bool warned = false;
@@ -5474,3 +5516,80 @@ one_line_close(int fd)
}
+/* lookup_builtin --- find a builtin function or return NULL */
+
+builtin_func_t
+lookup_builtin(const char *name)
+{
+ int mid = check_special(name);
+
+ if (mid == -1 || tokentab[mid].class != LEX_BUILTIN)
+ return NULL;
+
+ return tokentab[mid].ptr;
+}
+
+/* install_builtins --- add built-in functions to FUNCTAB */
+
+void
+install_builtins(void)
+{
+ int i, j;
+
+ j = sizeof(tokentab) / sizeof(tokentab[0]);
+ for (i = 0; i < j; i++) {
+ if ( tokentab[i].class == LEX_BUILTIN
+ && (tokentab[i].flags & DEBUG_USE) == 0) {
+ (void) install_symbol(tokentab[i].operator, Node_builtin_func);
+ }
+ }
+}
+
+/*
+ * 9/2014: Gawk cannot use <ctype.h> isalpha or isalnum when
+ * parsing the program since that can let through non-English
+ * letters. So, we supply our own. !@#$%^&*()-ing locales!
+ */
+
+/* is_alpha --- return true if c is an English letter */
+
+bool
+is_alpha(int c)
+{
+#ifdef I_DONT_KNOW_WHAT_IM_DOING
+ return isalpha(c);
+#else /* ! I_DONT_KNOW_WHAT_IM_DOING */
+ switch (c) {
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
+ case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
+ case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+ case 'y': case 'z':
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+ case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+ case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+ case 'Y': case 'Z':
+ return true;
+ }
+ return false;
+#endif /* ! I_DONT_KNOW_WHAT_IM_DOING */
+}
+
+/* is_alnum --- return true for alphanumeric, English only letters */
+
+bool
+is_alnum(int c)
+{
+ /* digit test is good for EBCDIC too. so there. */
+ return (is_alpha(c) || ('0' <= c && c <= '9'));
+}
+
+
+/* is_identchar --- return true if c can be in an identifier */
+
+bool
+is_identchar(int c)
+{
+ return (is_alnum(c) || c == '_');
+}
diff --git a/awklib/Makefile.am b/awklib/Makefile.am
index 6ffbea81..87ee96b8 100644
--- a/awklib/Makefile.am
+++ b/awklib/Makefile.am
@@ -39,13 +39,12 @@ AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)
pkgdatadir = $(datadir)/awk
pkglibexecdir = $(libexecdir)/awk
-bin_SCRIPTS = igawk
pkglibexec_PROGRAMS = pwcat grcat
AUXAWK = passwd.awk group.awk
nodist_grcat_SOURCES = grcat.c
nodist_pwcat_SOURCES = pwcat.c
-all: $(srcdir)/stamp-eg $(AUXPROGS) igawk $(AUXAWK)
+all: $(srcdir)/stamp-eg $(AUXPROGS) $(AUXAWK)
install-exec-hook: $(AUXAWK)
$(mkinstalldirs) $(DESTDIR)$(pkgdatadir)
@@ -57,10 +56,9 @@ install-exec-hook: $(AUXAWK)
# pkglibexecdir and pkgdatadir are removed in the top level Makefile's uninstall
uninstall-local:
rm -fr $(DESTDIR)$(pkglibexecdir)/* $(DESTDIR)$(pkgdatadir)/*
- rm -f $(DESTDIR)$(bindir)/igawk
clean-local:
- rm -f $(AUXAWK) igawk *.exe
+ rm -f $(AUXAWK) *.exe
rm -fr eg.old
rm -fr grcat.dSYM pwcat.dSYM
@@ -72,7 +70,7 @@ $(srcdir)/stamp-eg: $(srcdir)/../doc/gawk.texi $(srcdir)/../doc/gawkinet.texi
@echo 'some makes are stupid and will not check a directory' > $(srcdir)/stamp-eg
@echo 'against a file, so this file is a place holder. gack.' >> $(srcdir)/stamp-eg
-$(srcdir)/eg/lib/pwcat.c $(srcdir)/eg/lib/grcat.c $(srcdir)/eg/prog/igawk.sh \
+$(srcdir)/eg/lib/pwcat.c $(srcdir)/eg/lib/grcat.c \
$(srcdir)/eg/lib/passwdawk.in $(srcdir)/eg/lib/groupawk.in: stamp-eg; @:
pwcat$(EXEEXT): $(srcdir)/eg/lib/pwcat.c
@@ -81,9 +79,6 @@ pwcat$(EXEEXT): $(srcdir)/eg/lib/pwcat.c
grcat$(EXEEXT): $(srcdir)/eg/lib/grcat.c
$(COMPILE) $(srcdir)/eg/lib/grcat.c $(LDFLAGS) -o $@
-igawk: $(srcdir)/eg/prog/igawk.sh
- cp $(srcdir)/eg/prog/igawk.sh $@ ; chmod 755 $@
-
passwd.awk: $(srcdir)/eg/lib/passwdawk.in
sed 's;/usr/local/libexec/awk;$(pkglibexecdir);' < $(srcdir)/eg/lib/passwdawk.in > passwd.awk
diff --git a/awklib/Makefile.in b/awklib/Makefile.in
index d32ae04a..38124fad 100644
--- a/awklib/Makefile.in
+++ b/awklib/Makefile.in
@@ -37,7 +37,6 @@
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
#
-
VPATH = @srcdir@
am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
am__make_running_with_option = \
@@ -122,7 +121,7 @@ mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
CONFIG_HEADER = $(top_builddir)/config.h
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
-am__installdirs = "$(DESTDIR)$(pkglibexecdir)" "$(DESTDIR)$(bindir)"
+am__installdirs = "$(DESTDIR)$(pkglibexecdir)"
PROGRAMS = $(pkglibexec_PROGRAMS)
nodist_grcat_OBJECTS = grcat.$(OBJEXT)
grcat_OBJECTS = $(nodist_grcat_OBJECTS)
@@ -130,34 +129,6 @@ grcat_LDADD = $(LDADD)
nodist_pwcat_OBJECTS = pwcat.$(OBJEXT)
pwcat_OBJECTS = $(nodist_pwcat_OBJECTS)
pwcat_LDADD = $(LDADD)
-am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
-am__vpath_adj = case $$p in \
- $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
- *) f=$$p;; \
- esac;
-am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
-am__install_max = 40
-am__nobase_strip_setup = \
- srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
-am__nobase_strip = \
- for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
-am__nobase_list = $(am__nobase_strip_setup); \
- for p in $$list; do echo "$$p $$p"; done | \
- sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
- $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
- if (++n[$$2] == $(am__install_max)) \
- { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
- END { for (dir in files) print dir, files[dir] }'
-am__base_list = \
- sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
- sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
-am__uninstall_files_from_dir = { \
- test -z "$$files" \
- || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
- || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
- $(am__cd) "$$dir" && rm -f $$files; }; \
- }
-SCRIPTS = $(bin_SCRIPTS)
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
am__v_P_0 = false
@@ -349,7 +320,6 @@ EXTRA_DIST = ChangeLog ChangeLog.0 extract.awk eg $(srcdir)/stamp-eg
# Get config.h from the build directory and custom.h from the source directory.
AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)
-bin_SCRIPTS = igawk
AUXAWK = passwd.awk group.awk
nodist_grcat_SOURCES = grcat.c
nodist_pwcat_SOURCES = pwcat.c
@@ -429,41 +399,6 @@ uninstall-pkglibexecPROGRAMS:
clean-pkglibexecPROGRAMS:
-test -z "$(pkglibexec_PROGRAMS)" || rm -f $(pkglibexec_PROGRAMS)
-install-binSCRIPTS: $(bin_SCRIPTS)
- @$(NORMAL_INSTALL)
- @list='$(bin_SCRIPTS)'; test -n "$(bindir)" || list=; \
- if test -n "$$list"; then \
- echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \
- $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \
- fi; \
- for p in $$list; do \
- if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
- if test -f "$$d$$p"; then echo "$$d$$p"; echo "$$p"; else :; fi; \
- done | \
- sed -e 'p;s,.*/,,;n' \
- -e 'h;s|.*|.|' \
- -e 'p;x;s,.*/,,;$(transform)' | sed 'N;N;N;s,\n, ,g' | \
- $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1; } \
- { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
- if ($$2 == $$4) { files[d] = files[d] " " $$1; \
- if (++n[d] == $(am__install_max)) { \
- print "f", d, files[d]; n[d] = 0; files[d] = "" } } \
- else { print "f", d "/" $$4, $$1 } } \
- END { for (d in files) print "f", d, files[d] }' | \
- while read type dir files; do \
- if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
- test -z "$$files" || { \
- echo " $(INSTALL_SCRIPT) $$files '$(DESTDIR)$(bindir)$$dir'"; \
- $(INSTALL_SCRIPT) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
- } \
- ; done
-
-uninstall-binSCRIPTS:
- @$(NORMAL_UNINSTALL)
- @list='$(bin_SCRIPTS)'; test -n "$(bindir)" || exit 0; \
- files=`for p in $$list; do echo "$$p"; done | \
- sed -e 's,.*/,,;$(transform)'`; \
- dir='$(DESTDIR)$(bindir)'; $(am__uninstall_files_from_dir)
mostlyclean-compile:
-rm -f *.$(OBJEXT)
@@ -572,9 +507,9 @@ distdir: $(DISTFILES)
done
check-am: all-am
check: check-am
-all-am: Makefile $(PROGRAMS) $(SCRIPTS)
+all-am: Makefile $(PROGRAMS)
installdirs:
- for dir in "$(DESTDIR)$(pkglibexecdir)" "$(DESTDIR)$(bindir)"; do \
+ for dir in "$(DESTDIR)$(pkglibexecdir)"; do \
test -z "$$dir" || $(MKDIR_P) "$$dir"; \
done
install: install-am
@@ -636,7 +571,7 @@ install-dvi: install-dvi-am
install-dvi-am:
-install-exec-am: install-binSCRIPTS install-pkglibexecPROGRAMS
+install-exec-am: install-pkglibexecPROGRAMS
@$(NORMAL_INSTALL)
$(MAKE) $(AM_MAKEFLAGS) install-exec-hook
install-html: install-html-am
@@ -676,8 +611,7 @@ ps: ps-am
ps-am:
-uninstall-am: uninstall-binSCRIPTS uninstall-local \
- uninstall-pkglibexecPROGRAMS
+uninstall-am: uninstall-local uninstall-pkglibexecPROGRAMS
.MAKE: install-am install-exec-am install-strip
@@ -685,20 +619,19 @@ uninstall-am: uninstall-binSCRIPTS uninstall-local \
clean-local clean-pkglibexecPROGRAMS cscopelist-am ctags \
ctags-am distclean distclean-compile distclean-generic \
distclean-tags distdir dvi dvi-am html html-am info info-am \
- install install-am install-binSCRIPTS install-data \
- install-data-am install-dvi install-dvi-am install-exec \
- install-exec-am install-exec-hook install-html install-html-am \
- install-info install-info-am install-man install-pdf \
- install-pdf-am install-pkglibexecPROGRAMS install-ps \
- install-ps-am install-strip installcheck installcheck-am \
- installdirs maintainer-clean maintainer-clean-generic \
- mostlyclean mostlyclean-compile mostlyclean-generic pdf pdf-am \
- ps ps-am tags tags-am uninstall uninstall-am \
- uninstall-binSCRIPTS uninstall-local \
+ install install-am install-data install-data-am install-dvi \
+ install-dvi-am install-exec install-exec-am install-exec-hook \
+ install-html install-html-am install-info install-info-am \
+ install-man install-pdf install-pdf-am \
+ install-pkglibexecPROGRAMS install-ps install-ps-am \
+ install-strip installcheck installcheck-am installdirs \
+ maintainer-clean maintainer-clean-generic mostlyclean \
+ mostlyclean-compile mostlyclean-generic pdf pdf-am ps ps-am \
+ tags tags-am uninstall uninstall-am uninstall-local \
uninstall-pkglibexecPROGRAMS
-all: $(srcdir)/stamp-eg $(AUXPROGS) igawk $(AUXAWK)
+all: $(srcdir)/stamp-eg $(AUXPROGS) $(AUXAWK)
install-exec-hook: $(AUXAWK)
$(mkinstalldirs) $(DESTDIR)$(pkgdatadir)
@@ -710,10 +643,9 @@ install-exec-hook: $(AUXAWK)
# pkglibexecdir and pkgdatadir are removed in the top level Makefile's uninstall
uninstall-local:
rm -fr $(DESTDIR)$(pkglibexecdir)/* $(DESTDIR)$(pkgdatadir)/*
- rm -f $(DESTDIR)$(bindir)/igawk
clean-local:
- rm -f $(AUXAWK) igawk *.exe
+ rm -f $(AUXAWK) *.exe
rm -fr eg.old
rm -fr grcat.dSYM pwcat.dSYM
@@ -725,7 +657,7 @@ $(srcdir)/stamp-eg: $(srcdir)/../doc/gawk.texi $(srcdir)/../doc/gawkinet.texi
@echo 'some makes are stupid and will not check a directory' > $(srcdir)/stamp-eg
@echo 'against a file, so this file is a place holder. gack.' >> $(srcdir)/stamp-eg
-$(srcdir)/eg/lib/pwcat.c $(srcdir)/eg/lib/grcat.c $(srcdir)/eg/prog/igawk.sh \
+$(srcdir)/eg/lib/pwcat.c $(srcdir)/eg/lib/grcat.c \
$(srcdir)/eg/lib/passwdawk.in $(srcdir)/eg/lib/groupawk.in: stamp-eg; @:
pwcat$(EXEEXT): $(srcdir)/eg/lib/pwcat.c
@@ -734,9 +666,6 @@ pwcat$(EXEEXT): $(srcdir)/eg/lib/pwcat.c
grcat$(EXEEXT): $(srcdir)/eg/lib/grcat.c
$(COMPILE) $(srcdir)/eg/lib/grcat.c $(LDFLAGS) -o $@
-igawk: $(srcdir)/eg/prog/igawk.sh
- cp $(srcdir)/eg/prog/igawk.sh $@ ; chmod 755 $@
-
passwd.awk: $(srcdir)/eg/lib/passwdawk.in
sed 's;/usr/local/libexec/awk;$(pkglibexecdir);' < $(srcdir)/eg/lib/passwdawk.in > passwd.awk
diff --git a/awklib/eg/lib/ctime.awk b/awklib/eg/lib/ctime.awk
index f37856c6..ca750370 100644
--- a/awklib/eg/lib/ctime.awk
+++ b/awklib/eg/lib/ctime.awk
@@ -4,7 +4,7 @@
function ctime(ts, format)
{
- format = "%a %b %e %H:%M:%S %Z %Y"
+ format = PROCINFO["strftime"]
if (ts == 0)
ts = systime() # use current time as default
return strftime(format, ts)
diff --git a/awklib/eg/lib/div.awk b/awklib/eg/lib/div.awk
new file mode 100644
index 00000000..9d919288
--- /dev/null
+++ b/awklib/eg/lib/div.awk
@@ -0,0 +1,17 @@
+# div --- do integer division
+
+#
+# Arnold Robbins, arnold@skeeve.com, Public Domain
+# July, 2014
+
+function div(numerator, denominator, result, i)
+{
+ split("", result)
+
+ numerator = int(numerator)
+ denominator = int(denominator)
+ result["quotient"] = int(numerator / denominator)
+ result["remainder"] = int(numerator % denominator)
+
+ return 0.0
+}
diff --git a/awklib/eg/lib/getopt.awk b/awklib/eg/lib/getopt.awk
index 4283a7e1..6b1f4c50 100644
--- a/awklib/eg/lib/getopt.awk
+++ b/awklib/eg/lib/getopt.awk
@@ -38,8 +38,7 @@ function getopt(argc, argv, options, thisopt, i)
i = index(options, thisopt)
if (i == 0) {
if (Opterr)
- printf("%c -- invalid option\n",
- thisopt) > "/dev/stderr"
+ printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
if (_opti >= length(argv[Optind])) {
Optind++
_opti = 0
@@ -70,7 +69,7 @@ BEGIN {
# test program
if (_getopt_test) {
while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1)
- printf("c = <%c>, optarg = <%s>\n",
+ printf("c = <%c>, Optarg = <%s>\n",
_go_c, Optarg)
printf("non-option arguments:\n")
for (; Optind < ARGC; Optind++)
diff --git a/awklib/eg/lib/gettime.awk b/awklib/eg/lib/gettime.awk
index 4cb56330..3da9c8ab 100644
--- a/awklib/eg/lib/gettime.awk
+++ b/awklib/eg/lib/gettime.awk
@@ -31,7 +31,7 @@ function getlocaltime(time, ret, now, i)
now = systime()
# return date(1)-style output
- ret = strftime("%a %b %e %H:%M:%S %Z %Y", now)
+ ret = strftime(PROCINFO["strftime"], now)
# clear out target array
delete time
diff --git a/awklib/eg/lib/grcat.c b/awklib/eg/lib/grcat.c
index ff2913a1..7d6b6a74 100644
--- a/awklib/eg/lib/grcat.c
+++ b/awklib/eg/lib/grcat.c
@@ -1,7 +1,7 @@
/*
* grcat.c
*
- * Generate a printable version of the group database
+ * Generate a printable version of the group database.
*/
/*
* Arnold Robbins, arnold@skeeve.com, May 1993
diff --git a/awklib/eg/lib/groupawk.in b/awklib/eg/lib/groupawk.in
index 0917b923..9382bce8 100644
--- a/awklib/eg/lib/groupawk.in
+++ b/awklib/eg/lib/groupawk.in
@@ -5,8 +5,7 @@
# Revised October 2000
# Revised December 2010
-BEGIN \
-{
+BEGIN {
# Change to suit your system
_gr_awklib = "/usr/local/libexec/awk/"
}
diff --git a/awklib/eg/lib/pwcat.c b/awklib/eg/lib/pwcat.c
index 910e0329..934ef34e 100644
--- a/awklib/eg/lib/pwcat.c
+++ b/awklib/eg/lib/pwcat.c
@@ -1,7 +1,7 @@
/*
* pwcat.c
*
- * Generate a printable version of the password database
+ * Generate a printable version of the password database.
*/
/*
* Arnold Robbins, arnold@skeeve.com, May 1993
diff --git a/awklib/eg/lib/strtonum.awk b/awklib/eg/lib/strtonum.awk
index a56ab50c..f82c89c5 100644
--- a/awklib/eg/lib/strtonum.awk
+++ b/awklib/eg/lib/strtonum.awk
@@ -3,8 +3,9 @@
#
# Arnold Robbins, arnold@skeeve.com, Public Domain
# February, 2004
+# Revised June, 2014
-function mystrtonum(str, ret, chars, n, i, k, c)
+function mystrtonum(str, ret, n, i, k, c)
{
if (str ~ /^0[0-7]*$/) {
# octal
@@ -12,12 +13,13 @@ function mystrtonum(str, ret, chars, n, i, k, c)
ret = 0
for (i = 1; i <= n; i++) {
c = substr(str, i, 1)
- if ((k = index("01234567", c)) > 0)
- k-- # adjust for 1-basing in awk
+ # index() returns 0 if c not in string,
+ # includes c == "0"
+ k = index("1234567", c)
ret = ret * 8 + k
}
- } else if (str ~ /^0[xX][[:xdigit:]]+/) {
+ } else if (str ~ /^0[xX][[:xdigit:]]+$/) {
# hexadecimal
str = substr(str, 3) # lop off leading 0x
n = length(str)
@@ -25,10 +27,9 @@ function mystrtonum(str, ret, chars, n, i, k, c)
for (i = 1; i <= n; i++) {
c = substr(str, i, 1)
c = tolower(c)
- if ((k = index("0123456789", c)) > 0)
- k-- # adjust for 1-basing in awk
- else if ((k = index("abcdef", c)) > 0)
- k += 9
+ # index() returns 0 if c not in string,
+ # includes c == "0"
+ k = index("123456789abcdef", c)
ret = ret * 16 + k
}
diff --git a/awklib/eg/prog/alarm.awk b/awklib/eg/prog/alarm.awk
index 9bb1633c..59630ea8 100644
--- a/awklib/eg/prog/alarm.awk
+++ b/awklib/eg/prog/alarm.awk
@@ -8,8 +8,7 @@
# usage: alarm time [ "message" [ count [ delay ] ] ]
-BEGIN \
-{
+BEGIN {
# Initial argument sanity checking
usage1 = "usage: alarm time ['message' [count [delay]]]"
usage2 = sprintf("\t(%s) time ::= hh:mm", ARGV[1])
@@ -71,7 +70,7 @@ BEGIN \
# how long to sleep for
naptime = target - current
if (naptime <= 0) {
- print "time is in the past!" > "/dev/stderr"
+ print "alarm: time is in the past!" > "/dev/stderr"
exit 1
}
# zzzzzz..... go away if interrupted
diff --git a/awklib/eg/prog/cut.awk b/awklib/eg/prog/cut.awk
index 1399411e..56e35e71 100644
--- a/awklib/eg/prog/cut.awk
+++ b/awklib/eg/prog/cut.awk
@@ -20,8 +20,7 @@ function usage( e1, e2)
print e2 > "/dev/stderr"
exit 1
}
-BEGIN \
-{
+BEGIN {
FS = "\t" # default
OFS = FS
while ((c = getopt(ARGC, ARGV, "sf:c:d:")) != -1) {
@@ -34,7 +33,7 @@ BEGIN \
OFS = ""
} else if (c == "d") {
if (length(Optarg) > 1) {
- printf("Using first character of %s" \
+ printf("cut: using first character of %s" \
" for delimiter\n", Optarg) > "/dev/stderr"
Optarg = substr(Optarg, 1, 1)
}
@@ -43,7 +42,7 @@ BEGIN \
if (FS == " ") # defeat awk semantics
FS = "[ ]"
} else if (c == "s")
- suppress++
+ suppress = 1
else
usage()
}
@@ -75,7 +74,7 @@ function set_fieldlist( n, m, i, j, k, f, g)
if (index(f[i], "-") != 0) { # a range
m = split(f[i], g, "-")
if (m != 2 || g[1] >= g[2]) {
- printf("bad field list: %s\n",
+ printf("cut: bad field list: %s\n",
f[i]) > "/dev/stderr"
exit 1
}
@@ -96,7 +95,7 @@ function set_charlist( field, i, j, f, g, n, m, t,
if (index(f[i], "-") != 0) { # range
m = split(f[i], g, "-")
if (m != 2 || g[1] >= g[2]) {
- printf("bad character list: %s\n",
+ printf("cut: bad character list: %s\n",
f[i]) > "/dev/stderr"
exit 1
}
diff --git a/awklib/eg/prog/egrep.awk b/awklib/eg/prog/egrep.awk
index 56d199c8..094bdea5 100644
--- a/awklib/eg/prog/egrep.awk
+++ b/awklib/eg/prog/egrep.awk
@@ -88,11 +88,8 @@ function endfile(file)
print
}
}
-END \
-{
- if (total == 0)
- exit 1
- exit 0
+END {
+ exit (total == 0)
}
function usage( e)
{
diff --git a/awklib/eg/prog/extract.awk b/awklib/eg/prog/extract.awk
index dc105728..12e30b54 100644
--- a/awklib/eg/prog/extract.awk
+++ b/awklib/eg/prog/extract.awk
@@ -10,7 +10,7 @@ BEGIN { IGNORECASE = 1 }
/^@c(omment)?[ \t]+system/ \
{
if (NF < 3) {
- e = (FILENAME ":" FNR)
+ e = ("extract: " FILENAME ":" FNR)
e = (e ": badly formed `system' line")
print e > "/dev/stderr"
next
@@ -19,7 +19,7 @@ BEGIN { IGNORECASE = 1 }
$2 = ""
stat = system($0)
if (stat != 0) {
- e = (FILENAME ":" FNR)
+ e = ("extract: " FILENAME ":" FNR)
e = (e ": warning: system returned " stat)
print e > "/dev/stderr"
}
@@ -27,7 +27,7 @@ BEGIN { IGNORECASE = 1 }
/^@c(omment)?[ \t]+file/ \
{
if (NF != 3) {
- e = (FILENAME ":" FNR ": badly formed `file' line")
+ e = ("extract: " FILENAME ":" FNR ": badly formed `file' line")
print e > "/dev/stderr"
next
}
@@ -65,7 +65,7 @@ BEGIN { IGNORECASE = 1 }
}
function unexpected_eof()
{
- printf("%s:%d: unexpected EOF or error\n",
+ printf("extract: %s:%d: unexpected EOF or error\n",
FILENAME, FNR) > "/dev/stderr"
exit 1
}
diff --git a/awklib/eg/prog/id.awk b/awklib/eg/prog/id.awk
index 8b60a245..992fa57c 100644
--- a/awklib/eg/prog/id.awk
+++ b/awklib/eg/prog/id.awk
@@ -5,13 +5,13 @@
# Arnold Robbins, arnold@skeeve.com, Public Domain
# May 1993
# Revised February 1996
+# Revised May 2014
# output is:
# uid=12(foo) euid=34(bar) gid=3(baz) \
# egid=5(blat) groups=9(nine),2(two),1(one)
-BEGIN \
-{
+BEGIN {
uid = PROCINFO["uid"]
euid = PROCINFO["euid"]
gid = PROCINFO["gid"]
@@ -19,34 +19,26 @@ BEGIN \
printf("uid=%d", uid)
pw = getpwuid(uid)
- if (pw != "") {
- split(pw, a, ":")
- printf("(%s)", a[1])
- }
+ if (pw != "")
+ pr_first_field(pw)
if (euid != uid) {
printf(" euid=%d", euid)
pw = getpwuid(euid)
- if (pw != "") {
- split(pw, a, ":")
- printf("(%s)", a[1])
- }
+ if (pw != "")
+ pr_first_field(pw)
}
printf(" gid=%d", gid)
pw = getgrgid(gid)
- if (pw != "") {
- split(pw, a, ":")
- printf("(%s)", a[1])
- }
+ if (pw != "")
+ pr_first_field(pw)
if (egid != gid) {
printf(" egid=%d", egid)
pw = getgrgid(egid)
- if (pw != "") {
- split(pw, a, ":")
- printf("(%s)", a[1])
- }
+ if (pw != "")
+ pr_first_field(pw)
}
for (i = 1; ("group" i) in PROCINFO; i++) {
@@ -55,13 +47,17 @@ BEGIN \
group = PROCINFO["group" i]
printf("%d", group)
pw = getgrgid(group)
- if (pw != "") {
- split(pw, a, ":")
- printf("(%s)", a[1])
- }
+ if (pw != "")
+ pr_first_field(pw)
if (("group" (i+1)) in PROCINFO)
printf(",")
}
print ""
}
+
+function pr_first_field(str, a)
+{
+ split(str, a, ":")
+ printf("(%s)", a[1])
+}
diff --git a/awklib/eg/prog/igawk.sh b/awklib/eg/prog/igawk.sh
index 03d1c996..70edf606 100644
--- a/awklib/eg/prog/igawk.sh
+++ b/awklib/eg/prog/igawk.sh
@@ -115,7 +115,7 @@ BEGIN {
}
fpath = pathto($2)
if (fpath == "") {
- printf("igawk:%s:%d: cannot find %s\n",
+ printf("igawk: %s:%d: cannot find %s\n",
input[stackptr], FNR, $2) > "/dev/stderr"
continue
}
diff --git a/awklib/eg/prog/labels.awk b/awklib/eg/prog/labels.awk
index abf53c3b..3195809b 100644
--- a/awklib/eg/prog/labels.awk
+++ b/awklib/eg/prog/labels.awk
@@ -48,7 +48,6 @@ function printpage( i, j)
Count++
}
-END \
-{
+END {
printpage()
}
diff --git a/awklib/eg/prog/split.awk b/awklib/eg/prog/split.awk
index c907530b..bcc73ae6 100644
--- a/awklib/eg/prog/split.awk
+++ b/awklib/eg/prog/split.awk
@@ -4,8 +4,9 @@
#
# Arnold Robbins, arnold@skeeve.com, Public Domain
# May 1993
+# Revised slightly, May 2014
-# usage: split [-num] [file] [outname]
+# usage: split [-count] [file] [outname]
BEGIN {
outfile = "x" # default
@@ -14,7 +15,7 @@ BEGIN {
usage()
i = 1
- if (ARGV[i] ~ /^-[[:digit:]]+$/) {
+ if (i in ARGV && ARGV[i] ~ /^-[[:digit:]]+$/) {
count = -ARGV[i]
ARGV[i] = ""
i++
diff --git a/awklib/eg/prog/tee.awk b/awklib/eg/prog/tee.awk
index 639b9f80..fd9985f1 100644
--- a/awklib/eg/prog/tee.awk
+++ b/awklib/eg/prog/tee.awk
@@ -7,8 +7,7 @@
# May 1993
# Revised December 1995
-BEGIN \
-{
+BEGIN {
for (i = 1; i < ARGC; i++)
copy[i] = ARGV[i]
@@ -35,8 +34,7 @@ BEGIN \
print > copy[i]
print
}
-END \
-{
+END {
for (i in copy)
close(copy[i])
}
diff --git a/awklib/eg/prog/uniq.awk b/awklib/eg/prog/uniq.awk
index 990387ac..2a2cf63e 100644
--- a/awklib/eg/prog/uniq.awk
+++ b/awklib/eg/prog/uniq.awk
@@ -18,8 +18,7 @@ function usage( e)
# -n skip n fields
# +n skip n characters, skip fields first
-BEGIN \
-{
+BEGIN {
count = 1
outputfile = "/dev/stdout"
opts = "udc0:1:2:3:4:5:6:7:8:9:"
@@ -31,7 +30,7 @@ BEGIN \
else if (c == "c")
do_count++
else if (index("0123456789", c) != 0) {
- # getopt requires args to options
+ # getopt() requires args to options
# this messes us up for things like -5
if (Optarg ~ /^[[:digit:]]+$/)
fcount = (c Optarg) + 0
diff --git a/builtin.c b/builtin.c
index 7ddc22f7..4b3670a1 100644
--- a/builtin.c
+++ b/builtin.c
@@ -624,7 +624,14 @@ do_substr(int nargs)
lintwarn(_("substr: length %g is not >= 1"), d_length);
else if (do_lint == DO_LINT_INVALID && ! (d_length >= 0))
lintwarn(_("substr: length %g is not >= 0"), d_length);
- r = dupnode(Nnull_string);
+ /*
+ * Return explicit null string instead of doing
+ * dupnode(Nnull_string) so that if the result
+ * is checked with the combination of length()
+ * and lint, no error is reported about using
+ * an uninitialized value. Same thing later, too.
+ */
+ r = make_string("", 0);
goto finish;
}
if (do_lint) {
@@ -678,7 +685,7 @@ do_substr(int nargs)
/* substr("", 1, 0) produces a warning only if LINT_ALL */
if (do_lint && (do_lint == DO_LINT_ALL || ((indx | length) != 0)))
lintwarn(_("substr: source string is zero length"));
- r = dupnode(Nnull_string);
+ r = make_string("", 0);
goto finish;
}
@@ -695,7 +702,7 @@ do_substr(int nargs)
if (do_lint)
lintwarn(_("substr: start index %g is past end of string"),
d_index);
- r = dupnode(Nnull_string);
+ r = make_string("", 0);
goto finish;
}
if (length > src_len - indx) {
@@ -1421,23 +1428,28 @@ do_match(int nargs)
* 2001 standard:
*
* sub(ere, repl[, in ])
- * Substitute the string repl in place of the first instance of the extended regular
- * expression ERE in string in and return the number of substitutions. An ampersand
- * ('&') appearing in the string repl shall be replaced by the string from in that
- * matches the ERE. An ampersand preceded with a backslash ('\') shall be
- * interpreted as the literal ampersand character. An occurrence of two consecutive
- * backslashes shall be interpreted as just a single literal backslash character. Any
- * other occurrence of a backslash (for example, preceding any other character) shall
- * be treated as a literal backslash character. Note that if repl is a string literal (the
- * lexical token STRING; see Grammar (on page 170)), the handling of the
- * ampersand character occurs after any lexical processing, including any lexical
- * backslash escape sequence processing. If in is specified and it is not an lvalue (see
- * Expressions in awk (on page 156)), the behavior is undefined. If in is omitted, awk
- * shall use the current record ($0) in its place.
+ * Substitute the string repl in place of the first instance of the
+ * extended regular expression ERE in string in and return the number of
+ * substitutions. An ampersand ('&') appearing in the string repl shall
+ * be replaced by the string from in that matches the ERE. An ampersand
+ * preceded with a backslash ('\') shall be interpreted as the literal
+ * ampersand character. An occurrence of two consecutive backslashes shall
+ * be interpreted as just a single literal backslash character. Any other
+ * occurrence of a backslash (for example, preceding any other character)
+ * shall be treated as a literal backslash character. Note that if repl is a
+ * string literal (the lexical token STRING; see Grammar (on page 170)), the
+ * handling of the ampersand character occurs after any lexical processing,
+ * including any lexical backslash escape sequence processing. If in is
+ * specified and it is not an lvalue (see Expressions in awk (on page 156)),
+ * the behavior is undefined. If in is omitted, awk shall use the current
+ * record ($0) in its place.
+ *
+ * 11/2010: The text in the 2008 standard is the same as just quoted.
+ * However, POSIX behavior is now the default. This can change the behavior
+ * of awk programs. The old behavior is not available.
*
- * 11/2010: The text in the 2008 standard is the same as just quoted. However, POSIX behavior
- * is now the default. This can change the behavior of awk programs. The old behavior
- * is not available.
+ * 7/2011: Reverted backslash handling to what it used to be. It was in
+ * gawk for too long. Should have known better.
*/
/*
@@ -1543,14 +1555,11 @@ set_how_many:
text = t->stptr;
textlen = t->stlen;
- buflen = textlen + 2;
repl = s->stptr;
replend = repl + s->stlen;
repllen = replend - repl;
- emalloc(buf, char *, buflen + 2, "do_sub");
- buf[buflen] = '\0';
- buf[buflen + 1] = '\0';
+
ampersands = 0;
/*
@@ -1609,6 +1618,13 @@ set_how_many:
}
lastmatchnonzero = false;
+
+ /* guesstimate how much room to allocate; +2 forces > 0 */
+ buflen = textlen + (ampersands + 1) * repllen + 2;
+ emalloc(buf, char *, buflen + 2, "do_sub");
+ buf[buflen] = '\0';
+ buf[buflen + 1] = '\0';
+
bp = buf;
for (current = 1;; current++) {
matches++;
diff --git a/cmake/Toolchain_clang.cmake b/cmake/Toolchain_clang.cmake
new file mode 100644
index 00000000..89353570
--- /dev/null
+++ b/cmake/Toolchain_clang.cmake
@@ -0,0 +1,19 @@
+# http://www.cmake.org/Wiki/CmakeMingw
+# http://www.cmake.org/Wiki/CMake_Cross_Compiling#The_toolchain_file
+
+# the name of the target operating system
+SET(CMAKE_SYSTEM_NAME Generic)
+
+# which compilers to use for C and C++
+SET(CMAKE_C_COMPILER /usr/bin/clang)
+
+# here is the target environment located
+SET(CMAKE_FIND_ROOT_PATH /usr/lib64/clang/3.1)
+
+# adjust the default behaviour of the FIND_XXX() commands:
+# search headers and libraries in the target environment, search
+# programs in the host environment
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+
diff --git a/cmake/Toolchain_generic.cmake b/cmake/Toolchain_generic.cmake
new file mode 100644
index 00000000..91ddc6e7
--- /dev/null
+++ b/cmake/Toolchain_generic.cmake
@@ -0,0 +1,21 @@
+# http://www.cmake.org/Wiki/CmakeMingw
+# http://www.cmake.org/Wiki/CMake_Cross_Compiling#The_toolchain_file
+
+# the name of the target operating system
+SET(CMAKE_SYSTEM_NAME Generic)
+
+# which compilers to use for C and C++
+# Settings for Ubuntu 12.04.1 LTS
+SET(CMAKE_C_COMPILER /usr/bin/gcc)
+
+# here is the target environment located
+# Settings for Ubuntu 12.04.1 LTS
+SET(CMAKE_FIND_ROOT_PATH /usr/)
+
+# adjust the default behaviour of the FIND_XXX() commands:
+# search headers and libraries in the target environment, search
+# programs in the host environment
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+
diff --git a/cmake/Toolchain_mingw32.cmake b/cmake/Toolchain_mingw32.cmake
new file mode 100644
index 00000000..bb885f2f
--- /dev/null
+++ b/cmake/Toolchain_mingw32.cmake
@@ -0,0 +1,23 @@
+# http://www.cmake.org/Wiki/CmakeMingw
+# http://www.cmake.org/Wiki/CMake_Cross_Compiling#The_toolchain_file
+
+# the name of the target operating system
+SET(CMAKE_SYSTEM_NAME Windows)
+
+# which compilers to use for C and C++
+# Settings for Ubuntu 12.04.1 LTS
+SET(CMAKE_C_COMPILER /usr/bin/i686-w64-mingw32-gcc)
+SET(CMAKE_CXX_COMPILER /usr/bin/i686-w64-mingw32-g++)
+SET(CMAKE_RC_COMPILER /usr/bin/i686-w64-mingw32-windres)
+
+# here is the target environment located
+# Settings for Ubuntu 12.04.1 LTS
+SET(CMAKE_FIND_ROOT_PATH /usr/i686-w64-mingw32)
+
+# adjust the default behaviour of the FIND_XXX() commands:
+# search headers and libraries in the target environment, search
+# programs in the host environment
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+
diff --git a/cmake/Toolchain_s390.cmake b/cmake/Toolchain_s390.cmake
new file mode 100644
index 00000000..e1cdcfff
--- /dev/null
+++ b/cmake/Toolchain_s390.cmake
@@ -0,0 +1,20 @@
+# http://www.cmake.org/Wiki/CmakeMingw
+# http://www.cmake.org/Wiki/CMake_Cross_Compiling#The_toolchain_file
+# http://wiki.debian.org/EmdebianToolchain#Get_the_binaries
+
+# the name of the target operating system
+SET(CMAKE_SYSTEM_NAME Generic)
+
+# which compilers to use for C and C++
+SET(CMAKE_C_COMPILER /usr/bin/s390-linux-gnu-gcc-4.4)
+
+# here is the target environment located
+SET(CMAKE_FIND_ROOT_PATH /usr/s390-linux-gnu/)
+
+# adjust the default behaviour of the FIND_XXX() commands:
+# search headers and libraries in the target environment, search
+# programs in the host environment
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+
diff --git a/cmake/auk.ico b/cmake/auk.ico
new file mode 100644
index 00000000..795ef1d9
--- /dev/null
+++ b/cmake/auk.ico
Binary files differ
diff --git a/cmake/basictest b/cmake/basictest
new file mode 100755
index 00000000..210ed224
--- /dev/null
+++ b/cmake/basictest
@@ -0,0 +1,553 @@
+#!/bin/sh
+
+# Use this for debugging the test cases.
+# The resulting textual output will not destroy the test cases.
+set -x
+# After test case execution, the output can be found in
+# build/Testing/Temporary/LastTest.log
+
+export PATH=$PATH:/c/MinGW/msys/1.0/bin
+export GAWKEXE=$1
+export TESTCASE=$2
+TOPSRCDIR=$(dirname ${0})/..
+SRCDIR=${TOPSRCDIR}/test
+export AWKPATH=${SRCDIR}
+export AWKLIBPATH=$(dirname ${GAWKEXE})/extension/
+export LANG=C
+# Is this shell running in a native MinGW shell (MSYS) ?
+if test -n "$COMSPEC"; then
+ # Ignore all differences in white space.
+ COMPARE="diff -w"
+ PATH_SEPARATOR="\\"
+else
+ # This is a shell running in Unix environment.
+ COMPARE="cmp"
+ PATH_SEPARATOR="/"
+fi
+
+# This is the central function for executing a standard test case.
+# Many of the more specialized test cases rely on this function.
+function simple_test_case() {
+ local options=$1 # options passed to the gawk executable
+ local parameters=$2 # parameters passed to the test case script
+ cd ${SRCDIR}
+ if test -r ${TESTCASE}.in
+ # Any existing .in file will be redirected to standard input.
+ # The output redirection must be bound to the test script, otherwise
+ # the "set -x" logging would mix with the test case output.
+ then
+ ${pregawk} $GAWKEXE ${options} -f ${TESTCASE}.awk ${parameters} < ${TESTCASE}.in ${postgawk} > _${TESTCASE} 2>&1
+ else
+ ${pregawk} $GAWKEXE ${options} -f ${TESTCASE}.awk ${parameters} ${postgawk} > _${TESTCASE} 2>&1
+ fi || echo EXIT CODE: $? >> _${TESTCASE}
+ # Compare the expected (correct) output with the actual output.
+ ${COMPARE} ${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+ # If the comparison succeeds then remove the actual output.
+ # Else leave the actual output file untouched for later analysis.
+}
+
+# Each test case that cannot be handle in the "standard way" shall
+# be implemented as a function here.
+
+function lintold() { simple_test_case "--lint-old" "" ; }
+function defref() { simple_test_case "--lint" "" ; }
+function fmtspcl() { simple_test_case "--lint" "" ; }
+function lintwarn() { simple_test_case "--lint" "" ; }
+function noeffect() { simple_test_case "--lint" "" ; }
+function nofmtch() { simple_test_case "--lint" "" ; }
+function shadow() { simple_test_case "--lint" "" ; }
+function uninit2() { simple_test_case "--lint" "" ; }
+function uninit3() { simple_test_case "--lint" "" ; }
+function uninit4() { simple_test_case "--lint" "" ; }
+function uninit5() { simple_test_case "--lint" "" ; }
+function uninitialized() { simple_test_case "--lint" "" ; }
+
+function regtest() {
+ echo 'Some of the output from regtest is very system specific, do not'
+ echo 'be distressed if your output differs from that distributed.'
+ echo 'Manual inspection is called for.'
+ AWK=$GAWKEXE ${SRCDIR}/regtest.sh
+}
+
+function compare() { simple_test_case "" "0 1" ; }
+
+function inftest() {
+ echo This test is very machine specific...
+ $GAWKEXE -f ${SRCDIR}/inftest.awk | sed "s/inf/Inf/g" >_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function getline2() { simple_test_case "" "getline2.awk getline2.awk" ; }
+
+function awkpath() {
+ AWKPATH="${SRCDIR}$(PATH_SEPARATOR)/lib" $GAWKEXE -f awkpath.awk >_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function argtest() { simple_test_case "" "-x -y abc" ; }
+
+function badargs() {
+ $GAWKEXE -f 2>&1 | grep -v patchlevel >_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function nonl() { simple_test_case "--lint" "/dev/null" ; }
+
+function poundbang() {
+# The original poundbang test case looks a bit non-deterministic.
+# This is a shortened version.
+ sed "s;/tmp/gawk;$GAWKEXE;" < ${SRCDIR}/poundbang.awk > ./_pbd.awk
+ chmod +x ./_pbd.awk
+ ./_pbd.awk ${SRCDIR}/poundbang.awk > _`basename ${TESTCASE}` ;
+ ${COMPARE} ${SRCDIR}/poundbang.awk _`basename ${TESTCASE}` && rm -f _`basename ${TESTCASE}` _pbd.awk
+}
+
+function messages() {
+ $GAWKEXE -f ${SRCDIR}/messages.awk >_out2 2>_out3
+ ${COMPARE} ${SRCDIR}/out1.ok _out1 && ${COMPARE} ${SRCDIR}/out2.ok _out2 && ${COMPARE} ${SRCDIR}/out3.ok _out3 && rm -f _out1 _out2 _out3
+}
+
+function argarray() {
+ case ${SRCDIR} in
+ .) : ;;
+ *) cp ${SRCDIR}/argarray.in . ;;
+ esac
+ TEST=test echo just a test | $GAWKEXE -f ${SRCDIR}/argarray.awk ./argarray.in - >_${TESTCASE}
+ case ${SRCDIR} in
+ .) : ;;
+ *) rm -f ./argarray.in ;;
+ esac
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+
+function localenl() {
+ ${SRCDIR}/${TESTCASE}.sh >_${TESTCASE} 2>/dev/null
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function mbprintf1() {
+ GAWKLOCALE=en_US.UTF-8 ; export GAWKLOCALE
+ LANG=en_US.UTF-8
+ $GAWKEXE -f ${SRCDIR}/${TESTCASE}.awk ${SRCDIR}/${TESTCASE}.in >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >> _${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function mbfw1() {
+ GAWKLOCALE=en_US.UTF-8 ; export GAWKLOCALE
+ LANG=en_US.UTF-8
+ $GAWKEXE -f ${SRCDIR}/${TESTCASE}.awk ${SRCDIR}/${TESTCASE}.in >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >> _${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function printfbad2() {
+ $GAWKEXE --lint -f ${SRCDIR}/${TESTCASE}.awk ${SRCDIR}/${TESTCASE}.in 2>&1 | sed "s;$SRCDIR/;;g" >_${TESTCASE} || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function beginfile1() {
+ AWKPATH=${SRCDIR} $GAWKEXE -f ${TESTCASE}.awk ${SRCDIR}/${TESTCASE}.awk . ./no/such/file Makefile >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function beginfile2() {
+ # This differs from the original, the pwd part is new.
+ # The re-direction is now bound to the .sh file.
+ # This way the output of "set -x" is not written to the script's output file.
+ ( cd ${SRCDIR} && LC_ALL=C AWK="$GAWKEXE" ${SRCDIR}/${TESTCASE}.sh ${SRCDIR}/${TESTCASE}.in > `pwd`/_${TESTCASE} 2>&1 )
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok ${SRCDIR}/_${TESTCASE} && rm -f ${SRCDIR}/_${TESTCASE}
+}
+
+function dumpvars() {
+ AWKPATH=${SRCDIR} $GAWKEXE --dump-variables 1 < ${SRCDIR}/${TESTCASE}.in >/dev/null 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ mv awkvars.out _${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function manyfiles() {
+ rm -rf junk
+ mkdir junk
+ $GAWKEXE 'BEGIN { for (i = 1; i <= 1030; i++) print i, i}' >_${TESTCASE}
+ $GAWKEXE -f ${SRCDIR}/manyfiles.awk _${TESTCASE} _${TESTCASE}
+ wc -l junk/* | $GAWKEXE '$1 != 2' | wc -l | sed "s/ *//g" > _${TESTCASE}
+ rm -rf junk
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function exitval1() {
+ $GAWKEXE -f ${SRCDIR}/exitval1.awk >_${TESTCASE} 2>&1; echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function fsspcoln() {
+ $GAWKEXE -f ${SRCDIR}/${TESTCASE}.awk 'FS=[ :]+' ${SRCDIR}/${TESTCASE}.in >_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function rsstart2() {
+ $GAWKEXE -f ${SRCDIR}/${TESTCASE}.awk ${SRCDIR}/rsstart1.in >_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function rsstart3() {
+ head ${SRCDIR}/rsstart1.in | $GAWKEXE -f ${SRCDIR}/rsstart2.awk >_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function strftime() {
+ echo This test could fail on slow machines or on a minute boundary,
+ echo so if it does, double check the actual results:
+ GAWKLOCALE=C; export GAWKLOCALE
+ TZ=GMT0; export TZ
+ (LC_ALL=C date) | $GAWKEXE -v OUTPUT=_${TESTCASE} -f ${SRCDIR}/strftime.awk
+ ${COMPARE} strftime.ok _${TESTCASE} && rm -f _${TESTCASE} strftime.ok || exit 0
+}
+
+function inplace1() {
+ cp ${SRCDIR}/inplace.1.in _${TESTCASE}.1
+ cp ${SRCDIR}/inplace.2.in _${TESTCASE}.2
+ AWKPATH=${SRCDIR}/../awklib/eg/lib $GAWKEXE -i inplace 'BEGIN {print "before"} {gsub(/foo/, "bar"); print} END {print "after"}' _${TESTCASE}.1 - _${TESTCASE}.2 < ${SRCDIR}/inplace.in >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.1.ok _${TESTCASE}.1 && rm -f _${TESTCASE}.1
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.2.ok _${TESTCASE}.2 && rm -f _${TESTCASE}.2
+}
+
+function inplace2() {
+ cp ${SRCDIR}/inplace.1.in _${TESTCASE}.1
+ cp ${SRCDIR}/inplace.2.in _${TESTCASE}.2
+ AWKPATH=${SRCDIR}/../awklib/eg/lib $GAWKEXE -i inplace -v INPLACE_SUFFIX=.bak 'BEGIN {print "before"} {gsub(/foo/, "bar"); print} END {print "after"}' _${TESTCASE}.1 - _${TESTCASE}.2 < ${SRCDIR}/inplace.in >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.1.ok _${TESTCASE}.1 && rm -f _${TESTCASE}.1
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.1.bak.ok _${TESTCASE}.1.bak && rm -f _${TESTCASE}.1.bak
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.2.ok _${TESTCASE}.2 && rm -f _${TESTCASE}.2
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.2.bak.ok _${TESTCASE}.2.bak && rm -f _${TESTCASE}.2.bak
+}
+
+function inplace3() {
+ cp ${SRCDIR}/inplace.1.in _${TESTCASE}.1
+ cp ${SRCDIR}/inplace.2.in _${TESTCASE}.2
+ AWKPATH=${SRCDIR}/../awklib/eg/lib $GAWKEXE -i inplace -v INPLACE_SUFFIX=.bak 'BEGIN {print "before"} {gsub(/foo/, "bar"); print} END {print "after"}' _${TESTCASE}.1 - _${TESTCASE}.2 < ${SRCDIR}/inplace.in >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ AWKPATH=${SRCDIR}/../awklib/eg/lib $GAWKEXE -i inplace -v INPLACE_SUFFIX=.bak 'BEGIN {print "Before"} {gsub(/bar/, "foo"); print} END {print "After"}' _${TESTCASE}.1 - _${TESTCASE}.2 < ${SRCDIR}/inplace.in >>_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.1.ok _${TESTCASE}.1 && rm -f _${TESTCASE}.1
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.1.bak.ok _${TESTCASE}.1.bak && rm -f _${TESTCASE}.1.bak
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.2.ok _${TESTCASE}.2 && rm -f _${TESTCASE}.2
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.2.bak.ok _${TESTCASE}.2.bak && rm -f _${TESTCASE}.2.bak
+}
+
+function testext() {
+ $GAWKEXE ' /^(@load|BEGIN)/,/^}/' ${SRCDIR}/../extension/testext.c > testext.awk
+ $GAWKEXE -f ${TESTCASE}.awk > ${SRCDIR}/_${TESTCASE} 2>&1 || echo EXIT CODE: $? >> ${SRCDIR}/_${TESTCASE}
+ rm -f testext.awk
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok ${SRCDIR}/_${TESTCASE} && rm -f ${SRCDIR}/_${TESTCASE}
+}
+
+function readdir() {
+ if [ "`uname`" = Linux ] && [ "`stat -f . 2>/dev/null | awk 'NR == 2 { print $NF }'`" = nfs ]; then
+ echo This test may fail on GNU/Linux systems when run on an NFS filesystem.;
+ echo If it does, try rerunning on an ext'[234]' filesystem. ;
+ fi
+ $GAWKEXE -f ${TESTCASE}.awk ${SRCDIR}/.. > ${SRCDIR}/_${TESTCASE} 2>&1
+ ls -afli ${TOPSRCDIR} | sed 1d | $GAWKEXE -f ${SRCDIR}/readdir0.awk -v extout=${SRCDIR}/_${TESTCASE} > ${SRCDIR}/${TESTCASE}.ok
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok ${SRCDIR}/_${TESTCASE} && rm -f ${SRCDIR}/_${TESTCASE} ${SRCDIR}/${TESTCASE}.ok
+}
+
+function ordchr2() {
+ $GAWKEXE -l ordchr 'BEGIN {print chr(ord("z"))}' >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function include2() {
+ AWKPATH=${SRCDIR} $GAWKEXE -i inclib 'BEGIN {print sandwich("a", "b", "c")}' >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function incdupe() {
+ AWKPATH=${SRCDIR} $GAWKEXE --lint -i inclib -i inclib.awk 'BEGIN {print sandwich("a", "b", "c")}' >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function incdupe2() {
+ AWKPATH=${SRCDIR} $GAWKEXE --lint -f inclib -f inclib.awk >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function incdupe3() {
+ AWKPATH=${SRCDIR} $GAWKEXE --lint -f hello -f hello.awk >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function incdupe4() {
+ AWKPATH=${SRCDIR} $GAWKEXE --lint -f hello -i hello.awk >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function incdupe5() {
+ AWKPATH=${SRCDIR} $GAWKEXE --lint -i hello -f hello.awk >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function incdupe6() {
+ AWKPATH=${SRCDIR} $GAWKEXE --lint -i inchello -f hello.awk >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function incdupe7() {
+ AWKPATH=${SRCDIR} $GAWKEXE --lint -f hello -i inchello >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+# TODO: The compare operation passes even when there are diffs.
+function readfile() {
+ $GAWKEXE -l readfile 'BEGIN {printf "%s", readfile("Makefile")}' >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} Makefile _${TESTCASE} && rm -f _${TESTCASE} || cp -p Makefile ${TESTCASE}.ok
+}
+
+function fts() {
+ if [ "`uname`" = IRIX ]; then \
+ echo This test may fail on IRIX systems when run on an NFS filesystem.; \
+ echo If it does, try rerunning on an xfs filesystem. ; \
+ fi
+ simple_test_case "" ""
+}
+
+function charasbytes() {
+ [ -z "$GAWKLOCALE" ] && GAWKLOCALE=en_US.UTF-8; \
+ AWKPATH=${SRCDIR} $GAWKEXE -b -v BINMODE=2 -f ${TESTCASE}.awk ${SRCDIR}/${TESTCASE}.in | \
+ od -c -t x1 | sed -e 's/ */ /g' -e 's/ *$//' >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function symtab6() {
+ $GAWKEXE -d__${TESTCASE} -f ${SRCDIR}/${TESTCASE}.awk
+ grep -v '^ENVIRON' __${TESTCASE} | grep -v '^PROCINFO' > _${TESTCASE} ; rm __${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function symtab8() {
+ $GAWKEXE -d__${TESTCASE} -f ${SRCDIR}/${TESTCASE}.awk ${SRCDIR}/${TESTCASE}.in >_${TESTCASE}
+ grep -v '^ENVIRON' __${TESTCASE} | grep -v '^PROCINFO' | grep -v '^FILENAME' >> _${TESTCASE} ; rm __${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function colonwarn() {
+ for i in 1 2 3 ; \
+ do $GAWKEXE -f ${SRCDIR}/${TESTCASE}.awk $i < ${SRCDIR}/${TESTCASE}.in ; \
+ done > _${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function litoct() {
+ echo ab | $GAWKEXE --traditional -f ${SRCDIR}/litoct.awk >_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function devfd() {
+ $GAWKEXE 1 /dev/fd/4 /dev/fd/5 4<${SRCDIR}/devfd.in4 5<${SRCDIR}/devfd.in5 >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >> _${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function fflush() {
+ ${SRCDIR}/fflush.sh >_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function mmap8k() {
+ $GAWKEXE '{ print }' ${SRCDIR}/mmap8k.in >_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/mmap8k.in _${TESTCASE} && rm -f _${TESTCASE} || cp ${SRCDIR}/${TESTCASE}.in ${TESTCASE}.ok
+}
+
+function pid() {
+ AWKPATH=${SRCDIR} AWK=$GAWKEXE ${SHELL} ${SRCDIR}/pid.sh $$ > _${TESTCASE} ; :
+ ${COMPARE} ${SRCDIR}/pid.ok _`basename ${TESTCASE}` && rm -f _${TESTCASE}
+}
+
+function strftlng() {
+ TZ=UTC; export TZ; $GAWKEXE -f ${SRCDIR}/strftlng.awk >_${TESTCASE}
+ if ${COMPARE} ${SRCDIR}/strftlng.ok _${TESTCASE} >/dev/null 2>&1 ; then : ; else \
+ TZ=UTC0; export TZ; $GAWKEXE -f ${SRCDIR}/strftlng.awk >_${TESTCASE} ; \
+ fi
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function nors() {
+ echo A B C D E | tr -d '\12\15' | $GAWKEXE '{ print $NF }' - ${SRCDIR}/nors.in > _${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function fmtspcl() {
+ $GAWKEXE -v "sd=${SRCDIR}" 'BEGIN {pnan = sprintf("%g",sqrt(-1)); nnan = sprintf("%g",-sqrt(-1)); pinf = sprintf("%g",-log(0)); ninf = sprintf("%g",log(0))} {sub(/positive_nan/,pnan); sub(/negative_nan/,nnan); sub(/positive_infinity/,pinf); sub(/negative_infinity/,ninf); sub(/fmtspcl/,(sd"/fmtspcl")); print}' < ${SRCDIR}/fmtspcl.tok > ${TESTCASE}.ok 2>/dev/null
+ $GAWKEXE $AWKFLAGS -f ${SRCDIR}/fmtspcl.awk --lint >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ if test -z "$AWKFLAGS" ; then
+ ${COMPARE} ${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+ else
+ ${COMPARE} ${SRCDIR}/${TESTCASE}-mpfr.ok _${TESTCASE} && rm -f _${TESTCASE}
+ fi
+}
+
+function pipeio2() { simple_test_case "-v SRCDIR=${SRCDIR}" "" ; }
+
+function arynocls() {
+ AWKPATH=${SRCDIR} $GAWKEXE -v INPUT=${SRCDIR}/arynocls.in -f arynocls.awk >_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function inetechu() {
+ echo This test is for establishing UDP connections
+ $GAWKEXE 'BEGIN {print "" |& "/inet/udp/0/127.0.0.1/9"}'
+}
+
+function inetecht() {
+ echo This test is for establishing TCP connections
+ $GAWKEXE 'BEGIN {print "" |& "/inet/tcp/0/127.0.0.1/9"}'
+}
+
+function inetdayu() {
+ echo This test is for bidirectional UDP transmission
+ $GAWKEXE 'BEGIN { print "" |& "/inet/udp/0/127.0.0.1/13"; \
+ "/inet/udp/0/127.0.0.1/13" |& getline; print $0}'
+}
+
+function inetdayt() {
+ echo This test is for bidirectional TCP transmission
+ $GAWKEXE 'BEGIN { print "" |& "/inet/tcp/0/127.0.0.1/13"; \
+ "/inet/tcp/0/127.0.0.1/13" |& getline; print $0}'
+}
+
+function redfilnm() { simple_test_case "" "srcdir=${SRCDIR}" ; }
+
+function leaddig() { simple_test_case "-v x=2E" "" ; }
+function longwrds() { simple_test_case "-vSORT=sort" "" ; }
+
+function gsubtst3() {
+ $GAWKEXE --re-interval -f ${SRCDIR}/${TESTCASE}.awk ${SRCDIR}/${TESTCASE}.in >_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function space() {
+ $GAWKEXE -f ' ' ${SRCDIR}/space.awk >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function rsnulbig() {
+ # Suppose that block size for pipe is at most 128kB:
+ $GAWKEXE 'BEGIN { for (i = 1; i <= 128*64+1; i++) print "abcdefgh123456\n" }' 2>&1 | \
+ $GAWKEXE 'BEGIN { RS = ""; ORS = "\n\n" }; { print }' 2>&1 | \
+ $GAWKEXE '/^[^a]/; END{ print NR }' >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function rsnulbig2() {
+ $GAWKEXE 'BEGIN { ORS = ""; n = "\n"; for (i = 1; i <= 10; i++) n = (n n); \
+ for (i = 1; i <= 128; i++) print n; print "abc\n" }' 2>&1 | \
+ $GAWKEXE 'BEGIN { RS = ""; ORS = "\n\n" };{ print }' 2>&1 | \
+ $GAWKEXE '/^[^a]/; END { print NR }' >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function printf0() { simple_test_case "--posix" "" ; }
+
+function profile1() {
+ $GAWKEXE --pretty-print=ap-${TESTCASE}.out -f ${SRCDIR}/xref.awk ${SRCDIR}/dtdgport.awk > _${TESTCASE}.out1
+ $GAWKEXE -f ap-${TESTCASE}.out ${SRCDIR}/dtdgport.awk > _${TESTCASE}.out2 ; rm ap-${TESTCASE}.out
+ ${COMPARE} _${TESTCASE}.out1 _${TESTCASE}.out2 && rm _${TESTCASE}.out[12] || { echo EXIT CODE: $$? >>_${TESTCASE} ; \
+ cp $(srcdir)/dtdgport.awk > ${TESTCASE}.ok ; }
+}
+
+function profile2() {
+ $GAWKEXE --profile=ap-${TESTCASE}.out -v sortcmd=sort -f ${SRCDIR}/xref.awk ${SRCDIR}/dtdgport.awk > /dev/null
+ sed 1,2d < ap-${TESTCASE}.out > _${TESTCASE}; rm ap-${TESTCASE}.out
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function profile3() {
+ $GAWKEXE --profile=ap-${TESTCASE}.out -f ${SRCDIR}/${TESTCASE}.awk > /dev/null
+ sed 1,2d < ap-${TESTCASE}.out > _${TESTCASE}; rm ap-${TESTCASE}.out
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function profile4() {
+ GAWK_NO_PP_RUN=1 $GAWKEXE --profile=ap-${TESTCASE}.out -f ${SRCDIR}/${TESTCASE}.awk > /dev/null
+ sed 1,2d < ap-${TESTCASE}.out > _${TESTCASE}; rm ap-${TESTCASE}.out
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function profile5() {
+ GAWK_NO_PP_RUN=1 $GAWKEXE --profile=ap-${TESTCASE}.out -f ${SRCDIR}/${TESTCASE}.awk > /dev/null
+ sed 1,2d < ap-${TESTCASE}.out > _${TESTCASE}; rm ap-${TESTCASE}.out
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function posix2008sub() {
+ $GAWKEXE --posix -f ${SRCDIR}/${TESTCASE}.awk > _${TESTCASE} 2>&1
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function next() {
+ LC_ALL=${GAWKLOCALE:-C} LANG=${GAWKLOCALE:-C} AWK="$GAWKEXE" ${SRCDIR}/${TESTCASE}.sh > _${TESTCASE} 2>&1
+ LC_ALL=C ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function exit() {
+ AWK="$GAWKEXE" ${SRCDIR}/${TESTCASE}.sh > _${TESTCASE} 2>&1
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function mpfrexprange() { simple_test_case "-M -vPREC=53 " "" ; }
+function mpfrrnd() { simple_test_case "-M -vPREC=53 " "" ; }
+function mpfrnr() { simple_test_case "-M -vPREC=113" "" ; }
+function mpfrbigint() { simple_test_case "-M " "" ; }
+
+function jarebug() {
+ ${SRCDIR}/${TESTCASE}.sh "$GAWKEXE" "${SRCDIR}/${TESTCASE}.awk" "${SRCDIR}/${TESTCASE}.in" "_${TESTCASE}"
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function rtlen() {
+ ${SRCDIR}/${TESTCASE}.sh >_${TESTCASE} || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function rtlen01() {
+ ${SRCDIR}/${TESTCASE}.sh >_${TESTCASE} || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function rtlenmb() {
+ GAWKLOCALE=en_US.UTF-8 ; export GAWKLOCALE
+ ${SRCDIR}/rtlen.sh >_${TESTCASE} || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function nondec2() { simple_test_case "--non-decimal-data -v a=0x1" "" ; }
+
+function nofile() {
+ $GAWKEXE '{}' no/such/file >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function binmode1() {
+ $GAWKEXE -v BINMODE=3 'BEGIN { print BINMODE }' >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function devfd1() {
+ $GAWKEXE -f ${SRCDIR}/${TESTCASE}.awk 4< ${SRCDIR}/devfd.in1 5< ${SRCDIR}/devfd.in2 >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function devfd2() {
+ # The program text is the '1' which will print each record. How compact can you get?
+ $GAWKEXE 1 /dev/fd/4 /dev/fd/5 4< ${SRCDIR}/devfd.in1 5< ${SRCDIR}/devfd.in2 >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+# Is this test case implemented as a function ?
+if [ "$( type -t $TESTCASE )" = "function" ]
+then
+ $TESTCASE
+else
+ # If no function exists, then treat the test case in standard way.
+ simple_test_case "" ""
+fi
+
diff --git a/cmake/configure b/cmake/configure
new file mode 100755
index 00000000..d375a81c
--- /dev/null
+++ b/cmake/configure
@@ -0,0 +1,58 @@
+#!/bin/sh
+# On 2013-05-14 Arnold wrote in an e-mail:
+
+# <QUOTE)
+# I think that using CMake would be more palatable if there is also a simple
+# configure wrapper that can be used by people who build distributions. This would
+# mean things like
+#
+# configure CC=XXXX # XXXX in { gcc, clang, tcc } or native platform cc
+# configure --prefix=/path/to/install
+#
+# And the few other current configure options like --with-whiny-user-strftime,
+# --disable-nls, etc. I don't know if we need all the standard configure options,
+# but I do want the ones I've added in configure.ac.
+# </QUOTE)
+
+
+# Anyone using this script still needs an out-of-source build directory.
+if [ -f CMakeLists.txt ] ; then
+ echo "Your current working directory contains a file CMakeLists.txt, indicating"
+ echo "that this is a source directory. Create a new directory elsewhere, change into"
+ echo "this empty directory and try again."
+ echo " mkdir build"
+ echo " cd build"
+ echo " ../$0"
+ exit 1
+fi
+
+# TODO: Evaluate all the options and translate the options into CMake variables.
+CC=$( which cc )
+PREFIX=""
+SRCDIR=".."
+WHINY=""
+
+for p in $@
+do
+ if [ ${p:0:3} = "CC=" ]; then CC=${p:3}; fi
+ if [ ${p:0:9} = "--prefix=" ]; then PREFIX=-DCMAKE_INSTALL_PREFIX=${p:9}; fi
+ if [ ${p:0:9} = "--srcdir=" ]; then SRCDIR=${p:9}; fi
+ if [ ${p:0:26} = "--with-whiny-user-strftime" ]; then WHINY=-DUSE_INCLUDED_STRFTIME=1; fi
+done
+CC=$( which $CC )
+
+rm -f Toolchain.cmake
+(
+ echo "set(CMAKE_C_COMPILER $CC)"
+ echo "set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)"
+ echo "set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)"
+ echo "set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)"
+) > Toolchain.cmake
+
+if ! [ -f ${SRCDIR}/CMakeLists.txt ] ; then
+ echo "The source directory (${SRCDIR}) does not contain a file CMakeLists.txt."
+ exit 1
+fi
+
+cmake ${PREFIX} ${WHINY} -DCMAKE_TOOLCHAIN_FILE=Toolchain.cmake ${SRCDIR}
+
diff --git a/cmake/configure.cmake b/cmake/configure.cmake
new file mode 100644
index 00000000..7dbe841c
--- /dev/null
+++ b/cmake/configure.cmake
@@ -0,0 +1,300 @@
+#
+# cmake/configure --- CMake input file for gawk
+#
+# Copyright (C) 2013-2014
+# the Free Software Foundation, Inc.
+#
+# This file is part of GAWK, the GNU implementation of the
+# AWK Programming Language.
+#
+# GAWK is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# GAWK is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+#
+
+## process this file with CMake to produce Makefile
+
+option (USE_CONFIG_H "Generate a file config.h for inclusion into C source code" ON)
+if (USE_CONFIG_H)
+ file( WRITE config.h "/* all settings defined by CMake. */\n\n" )
+ ADD_DEFINITIONS (-D HAVE_CONFIG_H)
+ # Configure a header file to pass some of the CMake settings
+ # to the source code
+ # http://www.cmake.org/cmake/help/v2.8.8/cmake.html#command:configure_file
+ # CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/config.cmake.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h IMMEDIATE )
+else()
+ file( WRITE config.h "/* empty file, all settings defined by CMake. */" )
+endif()
+
+include(CheckIncludeFiles)
+include(CheckIncludeFile)
+include(CheckSymbolExists)
+include(CheckFunctionExists)
+include(CheckLibraryExists)
+include(CheckTypeSize)
+include(CheckStructHasMember)
+INCLUDE(CheckCSourceCompiles)
+include(CheckPrototypeDefinition)
+
+MACRO(DefineConfigH feature)
+# message(STATUS feature=${feature}=${${feature}})
+ if (${feature})
+ if (${USE_CONFIG_H} STREQUAL ON)
+ FILE( APPEND config.h "#define ${feature} ${${feature}}\n")
+ else()
+ #ADD_DEFINITIONS (-D ${feature})
+ ADD_DEFINITIONS (-D${feature}=${${feature}})
+ endif ()
+ endif ()
+ENDMACRO(DefineConfigH)
+
+MACRO(DefineConfigHValue feature value)
+ set(${feature} ${value})
+ DefineConfigH(${feature})
+ENDMACRO(DefineConfigHValue)
+
+MACRO(DefineFunctionIfAvailable func feature)
+ check_function_exists("${func}" "${feature}")
+ DefineConfigH(${feature})
+ENDMACRO(DefineFunctionIfAvailable)
+
+MACRO(DefineHFileIfAvailable hfile feature)
+ check_include_file("${hfile}" "${feature}")
+ DefineConfigH(${feature})
+ENDMACRO(DefineHFileIfAvailable)
+
+MACRO(DefineTypeIfAvailable type feature)
+ check_type_size("${type}" "${feature}")
+ DefineConfigH(${feature})
+ENDMACRO(DefineTypeIfAvailable)
+
+MACRO(DefineSymbolIfAvailable symbol hfile feature)
+ check_symbol_exists("${symbol}" "${hfile}" "${feature}")
+ DefineConfigH(${feature})
+ENDMACRO(DefineSymbolIfAvailable)
+
+MACRO(DefineStructHasMemberIfAvailable struct member hfile feature)
+ check_struct_has_member("${struct}" "${member}" "${hfile}" "${feature}")
+ DefineConfigH(${feature})
+ENDMACRO(DefineStructHasMemberIfAvailable)
+
+MACRO(DefineLibraryIfAvailable lib func location feature)
+ check_library_exists("${lib}" "${func}" "${location}" "${feature}")
+ DefineConfigH(${feature})
+ENDMACRO(DefineLibraryIfAvailable)
+
+MACRO(DefineIfSourceCompiles source feature)
+ check_c_source_compiles( "${source}" "${feature}")
+ DefineConfigH(${feature})
+ENDMACRO(DefineIfSourceCompiles)
+
+FILE( READ configure.ac CONFIG_AUTOMAKE )
+STRING( REGEX MATCH "AC_INIT\\(\\[GNU Awk\\], ([0-9]+\\.[0-9]+\\.[0-9]+)" GAWK_AUTOMAKE_LINE_VERSION "${CONFIG_AUTOMAKE}")
+STRING( REGEX REPLACE ".*([0-9]+)\\.[0-9]+\\.[0-9]+.*" "\\1" GAWK_MAJOR_VERSION "${GAWK_AUTOMAKE_LINE_VERSION}")
+STRING( REGEX REPLACE ".*[0-9]+\\.([0-9]+)\\.[0-9]+.*" "\\1" GAWK_MINOR_VERSION "${GAWK_AUTOMAKE_LINE_VERSION}")
+STRING( REGEX REPLACE ".*[0-9]+\\.[0-9]+\\.([0-9]+).*" "\\1" GAWK_BUGFIX_VERSION "${GAWK_AUTOMAKE_LINE_VERSION}")
+
+# The definition of the symbol GAWK cannot be passed in config.h
+# because the extensions will fail to build.
+add_definitions(-DGAWK)
+DefineConfigHValue(_GL_ATTRIBUTE_PURE "__attribute__ ((__pure__))")
+DefineConfigHValue(GAWK_VERSION "${GAWK_MAJOR_VERSION}.${GAWK_MINOR_VERSION}.${GAWK_BUGFIX_VERSION}")
+DefineConfigHValue(VERSION \\"${GAWK_VERSION}\\")
+DefineConfigHValue(PACKAGE \\"gawk\\")
+DefineConfigHValue(PACKAGE_STRING \\"GNU Awk ${GAWK_VERSION}\\")
+DefineConfigHValue(PACKAGE_TARNAME \\"gawk\\")
+DefineConfigHValue(PACKAGE_URL \\"http://www.gnu.org/software/gawk/\\")
+DefineConfigHValue(PACKAGE_VERSION \\"${GAWK_VERSION}\\")
+DefineConfigHValue(DEFPATH \\"${CMAKE_BINARY_DIR}/awk\\")
+DefineConfigHValue(DEFLIBPATH \\"${CMAKE_BINARY_DIR}/lib\\")
+if (CMAKE_DL_LIBS)
+ message(STATUS "Found CMAKE_DL_LIBS:${CMAKE_DL_LIBS}")
+else()
+ message(STATUS "Found no CMAKE_DL_LIBS")
+endif()
+if (CMAKE_SHARED_LIBRARY_SUFFIX)
+ DefineConfigHValue(DYNAMIC 1)
+ STRING( REGEX REPLACE "^(\\.)([a-zA-Z0-9])" "\\2" SHLIBEXT "${CMAKE_SHARED_LIBRARY_SUFFIX}")
+ DefineConfigHValue(SHLIBEXT \\"${SHLIBEXT}\\")
+ message(STATUS "Found SHLIBEXT: ${SHLIBEXT}")
+else()
+ message(STATUS "Found no SHLIBEXT")
+endif()
+DefineTypeIfAvailable("unsigned int" SIZEOF_UNSIGNED_INT)
+DefineTypeIfAvailable("unsigned long" SIZEOF_UNSIGNED_LONG)
+#/* Define to 1 if *printf supports %F format */
+add_definitions(-D PRINTF_HAS_F_FORMAT)
+#/* Define as the return type of signal handlers (`int' or `void'). */
+add_definitions(-D RETSIGTYPE=void)
+#add_definitions(-D PIPES_SIMULATED)
+check_prototype_definition(getpgrp "pid_t getpgrp(void)" "NULL" "unistd.h" GETPGRP_VOID)
+DefineConfigH(GETPGRP_VOID)
+#add_definitions(-D YYPARSE_PARAM)
+
+DefineFunctionIfAvailable(snprintf HAVE_SNPRINTF)
+DefineFunctionIfAvailable(vprintf HAVE_VPRINTF)
+DefineHFileIfAvailable(sys/types.h HAVE_SYS_TYPES_H)
+DefineHFileIfAvailable(sys/stat.h HAVE_SYS_STAT_H)
+DefineHFileIfAvailable(string.h HAVE_STRING_H)
+DefineHFileIfAvailable(memory.h HAVE_MEMORY_H)
+DefineHFileIfAvailable(strings.h HAVE_STRINGS_H)
+DefineHFileIfAvailable(stdint.h HAVE_STDINT_H)
+DefineHFileIfAvailable(inttypes.h HAVE_INTTYPES_H)
+DefineHFileIfAvailable(stdlib.h HAVE_STDLIB_H)
+DefineHFileIfAvailable(unistd.h HAVE_UNISTD_H)
+FIND_PATH(INTL_INCLUDE_DIR libintl.h PATHS /usr/include /usr/local/include)
+FIND_LIBRARY(INTL_LIBRARIES intl c PATHS /usr/lib/ /usr/local/lib)
+DefineSymbolIfAvailable("CODESET" "langinfo.h" HAVE_LANGINFO_CODESET)
+DefineSymbolIfAvailable("LC_MESSAGES" "locale.h" HAVE_LC_MESSAGES)
+DefineTypeIfAvailable("_Bool" HAVE__BOOL)
+if (${HAVE_GETTEXT} AND ${HAVE_DCGETTEXT} AND ${HAVE_LANGINFO_CODESET} AND ${HAVE_LC_MESSAGES})
+ add_definitions(-D LOCALEDIR=\\"/usr/share/locale\\")
+ add_definitions(-D ENABLE_NLS)
+ ADD_SUBDIRECTORY( po )
+endif()
+DefineHFileIfAvailable(stdbool.h HAVE_STDBOOL_H)
+DefineHFileIfAvailable(sys/wait.h HAVE_SYS_WAIT_H)
+DefineHFileIfAvailable(arpa/inet.h HAVE_ARPA_INET_H)
+DefineHFileIfAvailable(fcntl.h HAVE_FCNTL_H)
+DefineHFileIfAvailable(limits.h HAVE_LIMITS_H)
+DefineHFileIfAvailable(locale.h HAVE_LOCALE_H)
+DefineHFileIfAvailable(libintl.h HAVE_LIBINTL_H)
+DefineHFileIfAvailable(mcheck.h HAVE_MCHECK_H)
+DefineHFileIfAvailable(netdb.h HAVE_NETDB_H)
+DefineHFileIfAvailable(netinet/in.h HAVE_NETINET_IN_H)
+DefineHFileIfAvailable(stdarg.h HAVE_STDARG_H)
+DefineHFileIfAvailable(stddef.h HAVE_STDDEF_H)
+DefineHFileIfAvailable(sys/ioctl.h HAVE_SYS_IOCTL_H)
+DefineHFileIfAvailable(sys/param.h HAVE_SYS_PARAM_H)
+DefineHFileIfAvailable(sys/socket.h HAVE_SYS_SOCKET_H)
+DefineHFileIfAvailable(sys/termios.h HAVE_TERMIOS_H)
+DefineHFileIfAvailable(stropts.h HAVE_STROPTS_H)
+DefineHFileIfAvailable(wchar.h HAVE_WCHAR_H)
+DefineHFileIfAvailable(wctype.h HAVE_WCTYPE_H)
+DefineTypeIfAvailable("long long int" HAVE_LONG_LONG_INT)
+DefineTypeIfAvailable("unsigned long long int" HAVE_UNSIGNED_LONG_LONG_INT)
+DefineTypeIfAvailable(intmax_t INTMAX_T)
+DefineTypeIfAvailable(uintmax_t UINTMAX_T)
+DefineTypeIfAvailable("time_t" TIME_T_IN_SYS_TYPES_H)
+SET(CMAKE_EXTRA_INCLUDE_FILES wctype.h)
+DefineTypeIfAvailable("wctype_t" HAVE_WCTYPE_T)
+DefineTypeIfAvailable("wint_t" HAVE_WINT_T)
+SET(CMAKE_EXTRA_INCLUDE_FILES)
+
+DefineStructHasMemberIfAvailable("struct sockaddr_storage" ss_family sys/socket.h HAVE_SOCKADDR_STORAGE)
+DefineStructHasMemberIfAvailable("struct stat" st_blksize sys/stat.h HAVE_STRUCT_STAT_ST_BLKSIZE)
+DefineStructHasMemberIfAvailable("struct stat" st_blksize sys/stat.h HAVE_ST_BLKSIZE)
+DefineStructHasMemberIfAvailable("struct tm" tm_zone time.h HAVE_TM_ZONE)
+DefineStructHasMemberIfAvailable("struct tm" tm_zone time.h HAVE_STRUCT_TM_TM_ZONE)
+
+DefineHFileIfAvailable(sys/time.h HAVE_SYS_TIME_H)
+DefineFunctionIfAvailable(alarm HAVE_ALARM)
+DefineFunctionIfAvailable(tzname HAVE_DECL_TZNAME)
+DefineFunctionIfAvailable(mktime HAVE_MKTIME)
+DefineFunctionIfAvailable(getaddrinfo HAVE_GETADDRINFO)
+DefineFunctionIfAvailable(atexit HAVE_ATEXIT)
+DefineFunctionIfAvailable(btowc HAVE_BTOWC)
+DefineFunctionIfAvailable(fmod HAVE_FMOD)
+DefineFunctionIfAvailable(isinf HAVE_ISINF)
+DefineFunctionIfAvailable(ismod HAVE_ISMOD)
+DefineFunctionIfAvailable(getgrent HAVE_GETGRENT)
+DefineSymbolIfAvailable("getgroups" "unistd.h" HAVE_GETGROUPS)
+if (${HAVE_GETGROUPS})
+ check_prototype_definition(getgroups "int getgroups(int size, gid_t list[])" "NULL" "unistd.h" GETGROUPS_T)
+ if (${GETGROUPS_T})
+ DefineConfigHValue(GETGROUPS_T gid_t)
+ else()
+ DefineConfigHValue(GETGROUPS_T int)
+ endif()
+endif()
+
+DefineTypeIfAvailable("pid_t" PID_T)
+DefineTypeIfAvailable("intmax_t" HAVE_INTMAX_T)
+DefineFunctionIfAvailable(grantpt HAVE_GRANTPT)
+DefineFunctionIfAvailable(isascii HAVE_ISASCII)
+DefineFunctionIfAvailable(iswctype HAVE_ISWCTYPE)
+DefineFunctionIfAvailable(iswlower HAVE_ISWLOWER)
+DefineFunctionIfAvailable(iswupper HAVE_ISWUPPER)
+DefineFunctionIfAvailable(mbrlen HAVE_MBRLEN)
+DefineFunctionIfAvailable(memcmp HAVE_MEMCMP)
+DefineFunctionIfAvailable(memcpy HAVE_MEMCPY)
+DefineFunctionIfAvailable(memmove HAVE_MEMMOVE)
+DefineFunctionIfAvailable(memset HAVE_MEMSET)
+DefineFunctionIfAvailable(mkstemp HAVE_MKSTEMP)
+DefineFunctionIfAvailable(posix_openpt HAVE_POSIX_OPENPT)
+DefineFunctionIfAvailable(setenv HAVE_SETENV)
+DefineFunctionIfAvailable(setlocale HAVE_SETLOCALE)
+DefineFunctionIfAvailable(setsid HAVE_SETSID)
+DefineFunctionIfAvailable(strchr HAVE_STRCHR)
+DefineFunctionIfAvailable(strerror HAVE_STRERROR)
+DefineFunctionIfAvailable(strftime HAVE_STRFTIME)
+DefineFunctionIfAvailable(strncasecmp HAVE_STRNCASECMP)
+DefineFunctionIfAvailable(strcoll HAVE_STRCOLL)
+DefineFunctionIfAvailable(strtod HAVE_STRTOD)
+DefineFunctionIfAvailable(strtoul HAVE_STRTOUL)
+DefineFunctionIfAvailable(system HAVE_SYSTEM)
+DefineFunctionIfAvailable(tmpfile HAVE_TMPFILE)
+DefineFunctionIfAvailable(towlower HAVE_TOWLOWER)
+DefineFunctionIfAvailable(towupper HAVE_TOWUPPER)
+DefineFunctionIfAvailable(tzset HAVE_TZSET)
+DefineFunctionIfAvailable(usleep HAVE_USLEEP)
+DefineFunctionIfAvailable(wcrtomb HAVE_WCRTOMB)
+DefineFunctionIfAvailable(wcscoll HAVE_WCSCOLL)
+DefineFunctionIfAvailable(wctype HAVE_WCTYPE)
+DefineFunctionIfAvailable(mbrtowc HAVE_MBRTOWC)
+
+add_definitions(-D HAVE_STRINGIZE)
+add_definitions(-D _Noreturn=)
+
+find_package(BISON QUIET)
+# If there is a bison installed on this platform,
+if (${BISON_FOUND} STREQUAL "TRUE")
+ # then let bison generate awkgram.c.
+ BISON_TARGET(awkgram awkgram.y ${CMAKE_SOURCE_DIR}/awkgram.c)
+else()
+ # otherwise use the existing awkgram.c.
+ set(BISON_awkgram_OUTPUTS ${CMAKE_SOURCE_DIR}/awkgram.c)
+endif()
+
+find_package(Gettext REQUIRED)
+if (GETTEXT_FOUND STREQUAL "TRUE")
+ include_directories(${GETTEXT_INCLUDE_DIR})
+ DefineFunctionIfAvailable(gettext HAVE_GETTEXT)
+ DefineFunctionIfAvailable(dcgettext HAVE_DCGETTEXT)
+else ()
+ message( FATAL_ERROR "Gettext not found" )
+endif()
+
+find_package(LATEX)
+include(GNUInstallDirs)
+include(GetPrerequisites)
+
+# For some unknown reason the defines for the extensions
+# are written into config.h only if they are implemented
+# here and not in extension/CMakeLists.txt.
+DefineLibraryIfAvailable(m sin "" HAVE_LIBM)
+DefineLibraryIfAvailable(mpfr mpfr_add_si "" HAVE_MPFR)
+DefineLibraryIfAvailable(c socket "" HAVE_SOCKETS)
+DefineLibraryIfAvailable(readline readline "" HAVE_LIBREADLINE)
+DefineFunctionIfAvailable(fnmatch HAVE_FNMATCH)
+DefineHFileIfAvailable(fnmatch.h HAVE_FNMATCH_H)
+DefineHFileIfAvailable(dirent.h HAVE_DIRENT_H)
+DefineFunctionIfAvailable(dirfd HAVE_DIRFD)
+DefineFunctionIfAvailable(getdtablesize HAVE_GETDTABLESIZE)
+DefineFunctionIfAvailable(select HAVE_SELECT)
+DefineFunctionIfAvailable(gettimeofday HAVE_GETTIMEOFDAY)
+DefineHFileIfAvailable(sys/select.h HAVE_SYS_SELECT_H)
+DefineFunctionIfAvailable(nanosleep HAVE_NANOSLEEP)
+DefineHFileIfAvailable(time.h HAVE_TIME_H)
+DefineFunctionIfAvailable(GetSystemTimeAsFileTime HAVE_GETSYSTEMTIMEASFILETIME)
+
diff --git a/cmake/docmaker b/cmake/docmaker
new file mode 100755
index 00000000..4af7cee1
--- /dev/null
+++ b/cmake/docmaker
@@ -0,0 +1,100 @@
+#!/bin/sh
+
+# The first parameter is the target, the file to be built.
+# All remaining parameters are dependencies (file names).
+if [ $# -lt 1 ] ; then
+ echo " $0: Incorrect number ($#) of parameters passed: $*"
+ exit 1
+fi
+OUTFILE=$1
+shift 1
+INFILES="$@"
+
+MAKEINFO="makeinfo --no-split --force"
+TROFF="groff -t -Tps -U"
+SEDME="sed -e \"s/^level0 restore/level0 restore flashme 100 72 moveto (Copyright `date '+%m-%d-%y %T'`, FSF, Inc. (all)) show/\" -e \"s/^\/level0 save def/\/level0 save def 30 -48 translate/\""
+SEDME2="sed '/%%Page: 10 10/,/0 Cg EP/d'"
+
+function BuildTarget()
+{
+ local OUTFILE=$1
+ local INFILE=""
+ local COMMAND=""
+
+ FILEBASE=${OUTFILE%.*}
+ case $OUTFILE in
+ *\.in | *\.1 | macros | cardfonts | colors | ad.block | setter.outline | \
+ gawkinet.texi | rflashlight.eps | api-figure1.fig | api-figure2.fig | api-figure3.fig | \
+ general-program.fig | process-flow.fig | statist.eps)
+ INFILE=$OUTFILE
+ ;;
+ *\.texi)
+ if [ $FILEBASE = gawk ] ; then
+ INFILE=gawktexi.in
+ else
+ INFILE=$OUTFILE.in
+ fi
+ COMMAND="awk -f sidebar.awk < $INFILE > $OUTFILE"
+ ;;
+ *\.dvi)
+ INFILE=$FILEBASE.texi
+ COMMAND="texi2dvi -q --clean $INFILE"
+ ;;
+ *\.info)
+ INFILE=$FILEBASE.texi
+ COMMAND="${MAKEINFO} $INFILE"
+ ;;
+ *\.ps)
+ if [ $FILEBASE = awkcard ] ; then
+ INFILE=awkcard.in
+ COMMAND="${TROFF} $* | ${SEDME} | cat setter.outline - | ${SEDME2} > awkcard.ps"
+ elif [ $FILEBASE = gawk.1 -o $FILEBASE = igawk.1 ] ; then
+ INFILE=$FILEBASE
+ COMMAND="groff -z -man $INFILE > $OUTFILE"
+ else
+ INFILE=$FILEBASE.dvi
+ COMMAND="dvips -q -o $OUTFILE $INFILE"
+ fi
+ ;;
+ *\.pdf)
+ INFILE=$FILEBASE.ps
+ COMMAND="ps2pdf -q $INFILE $OUTFILE"
+ ;;
+ *\.tr)
+ INFILE=$FILEBASE.in
+ COMMAND="sed 's:SRCDIR:.:' < $INFILE > $OUTFILE"
+ ;;
+ *\.nc)
+ INFILE=$FILEBASE.in
+ COMMAND="sed 's:SRCDIR:.:' < $INFILE > $OUTFILE"
+ COMMAND="${TROFF} $* | ${SEDME} | cat setter.outline - | ${SEDME2} > $FILEBASE.ps && touch $OUTFILE"
+ ;;
+ *)
+ echo " unknwon target $OUTFILE"
+ exit 1
+ esac
+
+ if [ ! -r "$INFILE" ] ; then
+ echo " $0: Cannot read input file $INFILE"
+ exit 1
+ fi
+
+ if [ -f "$OUTFILE" ] ; then
+ if [ "$INFILE" -ot "$OUTFILE" ] ; then
+ #printf " Target %15s is up-to-date\n" $OUTFILE
+ COMMAND=""
+ fi
+ fi
+ #echo " Generating $OUTFILE from $INFILE"
+ echo $COMMAND | sh -x
+ #echo "COMMAND=$COMMAND"
+}
+
+# Build all dependencies first, then build the target.
+for dep in $INFILES
+do
+ #echo $OUTFILE depends on $dep
+ BuildTarget $dep
+done
+BuildTarget $OUTFILE
+
diff --git a/cmake/package.cmake b/cmake/package.cmake
new file mode 100644
index 00000000..203a8c3b
--- /dev/null
+++ b/cmake/package.cmake
@@ -0,0 +1,54 @@
+#
+# cmake/package --- CMake input file for gawk
+#
+# Copyright (C) 2013-2014
+# the Free Software Foundation, Inc.
+#
+# This file is part of GAWK, the GNU implementation of the
+# AWK Programming Language.
+#
+# GAWK is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# GAWK is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+#
+
+## process this file with CMake to produce Makefile
+
+SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "This is GNU Awk ${GAWK_VERSION}")
+set(CPACK_PACKAGE_VENDOR "GNU Project - Free Software Foundation (FSF)")
+SET(CPACK_PACKAGE_NAME "gawk")
+SET(CPACK_PACKAGE_VERSION "${GAWK_VERSION}")
+SET(CPACK_PACKAGE_VERSION_MAJOR "${GAWK_MAJOR_VERSION}")
+SET(CPACK_PACKAGE_VERSION_MINOR "${GAWK_MINOR_VERSION}")
+SET(CPACK_PACKAGE_VERSION_PATCH "${GAWK_BUGFIX_VERSION}")
+SET(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_SOURCE_DIR}/COPYING")
+SET(CPACK_RESOURCE_FILE_README "${CMAKE_SOURCE_DIR}/README")
+set(CPACK_PACKAGE_CONTACT "bug-gawk@gnu.org")
+
+IF (WIN32)
+ SET(CPACK_GENERATOR "NSIS")
+ set(CPACK_NSIS_INSTALL_ROOT "C:")
+ set(CPACK_NSIS_MENU_LINKS "http://www.gnu.org/software/gawk" "GNU Awk")
+ set(CPACK_NSIS_MUI_ICON "${CMAKE_SOURCE_DIR}/cmake/auk.ico")
+ set(CPACK_NSIS_MUI_UNIICON "${CMAKE_SOURCE_DIR}/cmake/auk.ico")
+ set(CPACK_NSIS_CONTACT "bug-gawk@gnu.org")
+ set(CPACK_NSIS_DISPLAY_NAME "GNU Awk")
+ELSE()
+ SET(CPACK_PACKAGING_INSTALL_PREFIX /usr)
+ IF(NOT CPACK_GENERATOR)
+ SET(CPACK_GENERATOR "TGZ")
+ ENDIF()
+ message(STATUS "CPACK_GENERATOR set to ${CPACK_GENERATOR}")
+ENDIF()
+
+INCLUDE(CPack)
diff --git a/command.c b/command.c
index 60afe370..a5ed47bf 100644
--- a/command.c
+++ b/command.c
@@ -2863,7 +2863,7 @@ again:
}
while (c != '\0' && c != ' ' && c != '\t') {
- if (! isalpha(c) && ! in_eval) {
+ if (! is_alpha(c) && ! in_eval) {
yyerror(_("invalid character in command"));
return '\n';
}
@@ -3016,12 +3016,12 @@ err:
|| c == ',' || c == '=')
return *lexptr++;
- if (c != '_' && ! isalpha(c)) {
+ if (c != '_' && ! is_alpha(c)) {
yyerror(_("invalid character"));
return '\n';
}
- while (isalnum(c) || c == '_')
+ while (is_identchar(c))
c = *++lexptr;
toklen = lexptr - tokstart;
diff --git a/command.y b/command.y
index 4f809f80..a8942513 100644
--- a/command.y
+++ b/command.y
@@ -1112,7 +1112,7 @@ again:
}
while (c != '\0' && c != ' ' && c != '\t') {
- if (! isalpha(c) && ! in_eval) {
+ if (! is_alpha(c) && ! in_eval) {
yyerror(_("invalid character in command"));
return '\n';
}
@@ -1265,12 +1265,12 @@ err:
|| c == ',' || c == '=')
return *lexptr++;
- if (c != '_' && ! isalpha(c)) {
+ if (c != '_' && ! is_alpha(c)) {
yyerror(_("invalid character"));
return '\n';
}
- while (isalnum(c) || c == '_')
+ while (is_identchar(c))
c = *++lexptr;
toklen = lexptr - tokstart;
diff --git a/configh.in b/configh.in
index bfffd853..1ca2946a 100644
--- a/configh.in
+++ b/configh.in
@@ -195,6 +195,9 @@
/* Define to 1 if you have the <stdlib.h> header file. */
#undef HAVE_STDLIB_H
+/* Define to 1 if you have the `strcasecmp' function. */
+#undef HAVE_STRCASECMP
+
/* Define to 1 if you have the `strchr' function. */
#undef HAVE_STRCHR
@@ -317,9 +320,6 @@
/* Define to 1 if the system has the type `_Bool'. */
#undef HAVE__BOOL
-/* libc is broken for regex handling */
-#undef LIBC_IS_BORKED
-
/* disable lint checks */
#undef NO_LINT
diff --git a/configure b/configure
index 078920ae..038e2081 100755
--- a/configure
+++ b/configure
@@ -5999,14 +5999,6 @@ then
CFLAGS="$CFLAGS -D_SYSV3"
fi
-case $host_os in
-mirbsd*)
-
-$as_echo "#define LIBC_IS_BORKED 1" >>confdefs.h
-
- ;;
-esac
-
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
@@ -10007,7 +9999,7 @@ for ac_func in atexit btowc fmod getgrent getgroups grantpt \
isascii iswctype iswlower iswupper mbrlen \
memcmp memcpy memcpy_ulong memmove memset \
memset_ulong mkstemp posix_openpt setenv setlocale setsid snprintf strchr \
- strerror strftime strncasecmp strcoll strtod strtoul \
+ strerror strftime strcasecmp strncasecmp strcoll strtod strtoul \
system tmpfile towlower towupper tzset usleep wcrtomb \
wcscoll wctype
do :
@@ -10151,7 +10143,7 @@ $as_echo "#define DYNAMIC 1" >>confdefs.h
if test "$GCC" = yes; then
case $host_os in
linux*|freebsd*)
- LDFLAGS="$LDFLAGS -export-dynamic"
+ LDFLAGS="$LDFLAGS -Wl,-export-dynamic"
;;
esac
fi
diff --git a/configure.ac b/configure.ac
index 9c39db76..8b4f188e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -119,13 +119,6 @@ dnl need -D_SYSV3 for ISC
CFLAGS="$CFLAGS -D_SYSV3"
fi
-dnl check for systems where libc is borked for regex handling
-case $host_os in
-mirbsd*)
- AC_DEFINE([LIBC_IS_BORKED], 1, [libc is broken for regex handling])
- ;;
-esac
-
dnl Set the programming language for checks. Fortunately,
dnl this only needs to be set once, since everything is in C.
AC_LANG([C])
@@ -275,7 +268,7 @@ AC_CHECK_FUNCS(atexit btowc fmod getgrent getgroups grantpt \
isascii iswctype iswlower iswupper mbrlen \
memcmp memcpy memcpy_ulong memmove memset \
memset_ulong mkstemp posix_openpt setenv setlocale setsid snprintf strchr \
- strerror strftime strncasecmp strcoll strtod strtoul \
+ strerror strftime strcasecmp strncasecmp strcoll strtod strtoul \
system tmpfile towlower towupper tzset usleep wcrtomb \
wcscoll wctype)
dnl this check is for both mbrtowc and the mbstate_t type, which is good
@@ -313,7 +306,7 @@ EOF
if test "$GCC" = yes; then
case $host_os in
linux*|freebsd*)
- LDFLAGS="$LDFLAGS -export-dynamic"
+ LDFLAGS="$LDFLAGS -Wl,-export-dynamic"
;;
esac
fi
diff --git a/custom.h b/custom.h
index 36b4aa0b..efaa0f27 100644
--- a/custom.h
+++ b/custom.h
@@ -47,12 +47,6 @@
#define HAVE_MKTIME 1
#endif
-/* For ULTRIX 4.3 */
-#ifdef ultrix
-#define HAVE_MKTIME 1
-#define GETGROUPS_NOT_STANDARD 1
-#endif
-
/* For whiny users */
#ifdef USE_INCLUDED_STRFTIME
#undef HAVE_STRFTIME
@@ -76,3 +70,11 @@
extern int setenv(const char *name, const char *value, int rewrite);
extern int unsetenv(const char *name);
#endif
+
+/* Junk for dfa.[ch] */
+/* The __pure__ attribute was added in gcc 2.96. */
+#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)
+# define _GL_ATTRIBUTE_PURE __attribute__ ((__pure__))
+#else
+# define _GL_ATTRIBUTE_PURE /* empty */
+#endif
diff --git a/debug.c b/debug.c
index edf91719..d129c4e3 100644
--- a/debug.c
+++ b/debug.c
@@ -2802,7 +2802,7 @@ debug_prog(INSTRUCTION *pc)
unserialize(OPTION);
unsetenv("DGAWK_RESTART");
fprintf(out_fp, "Restarting ...\n");
- if (run[0] == 'T')
+ if (strcasecmp(run, "true") == 0)
(void) do_run(NULL, 0);
} else if (command_file != NULL) {
@@ -5429,6 +5429,7 @@ do_eval(CMDARG *arg, int cmd ATTRIBUTE_UNUSED)
int ecount = 0, pcount = 0;
int ret;
int save_flags = do_flags;
+ SRCFILE *the_source;
if (prog_running) {
this_frame = find_frame(0);
@@ -5439,7 +5440,7 @@ do_eval(CMDARG *arg, int cmd ATTRIBUTE_UNUSED)
ctxt = new_context();
ctxt->install_func = append_symbol; /* keep track of newly installed globals */
push_context(ctxt);
- (void) add_srcfile(SRC_CMDLINE, arg->a_string, srcfiles, NULL, NULL);
+ the_source = add_srcfile(SRC_CMDLINE, arg->a_string, srcfiles, NULL, NULL);
do_flags = false;
ret = parse_program(&code);
do_flags = save_flags;
@@ -5540,14 +5541,27 @@ do_eval(CMDARG *arg, int cmd ATTRIBUTE_UNUSED)
this_func->param_cnt -= ecount;
}
- /* always destroy symbol "@eval", however destroy all newly installed
+ /*
+ * Always destroy symbol "@eval", however destroy all newly installed
* globals only if fatal error (execute_code() returing NULL).
*/
pop_context(); /* switch to prev context */
free_context(ctxt, (ret_val != NULL)); /* free all instructions and optionally symbols */
- if (ret_val != NULL)
- destroy_symbol(f); /* destroy "@eval" */
+
+ if (ret_val != NULL) {
+ /*
+ * Remove @eval from FUNCTAB, so that above code
+ * will work the next time around.
+ */
+ NODE *s = make_string("@eval", 5);
+
+ (void) assoc_remove(func_table, s);
+ unref(s);
+ }
+
+ free_srcfile(the_source);
+
return false;
}
diff --git a/dfa.c b/dfa.c
index 378305df..2d0e7f20 100644
--- a/dfa.c
+++ b/dfa.c
@@ -37,21 +37,11 @@
#if HAVE_SETLOCALE
#include <locale.h>
#endif
-#ifdef HAVE_STDBOOL_H
-#include <stdbool.h>
-#else
-#include "missing_d/gawkbool.h"
-#endif /* HAVE_STDBOOL_H */
-/* Gawk doesn't use Gnulib, so don't assume that setlocale and
- static_assert are present. */
+/* Gawk doesn't use Gnulib, so don't assume that setlocale is present. */
#ifndef LC_ALL
# define setlocale(category, locale) NULL
#endif
-#ifndef static_assert
-# define static_assert(cond, diagnostic) \
- extern int (*foo (void)) [!!sizeof (struct { int foo: (cond) ? 8 : -1; })]
-#endif
#define STREQ(a, b) (strcmp (a, b) == 0)
@@ -65,7 +55,6 @@
host does not conform to Posix. */
#define ISASCIIDIGIT(c) ((unsigned) (c) - '0' <= 9)
-/* gettext.h ensures that we don't use gettext if ENABLE_NLS is not defined */
#include "gettext.h"
#define _(str) gettext (str)
@@ -76,19 +65,6 @@
# include <wctype.h>
#endif
-#ifdef GAWK
-/* The __pure__ attribute was added in gcc 2.96. */
-#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)
-# define _GL_ATTRIBUTE_PURE __attribute__ ((__pure__))
-#else
-# define _GL_ATTRIBUTE_PURE /* empty */
-#endif
-#endif /* GAWK */
-
-#if HAVE_LANGINFO_CODESET
-# include <langinfo.h>
-#endif
-
#include "xalloc.h"
#include "dfa.h"
@@ -101,14 +77,6 @@ is_blank (int c)
}
#endif /* GAWK */
-#ifdef LIBC_IS_BORKED
-extern int gawk_mb_cur_max;
-#undef MB_CUR_MAX
-#define MB_CUR_MAX gawk_mb_cur_max
-#undef mbrtowc
-#define mbrtowc(a, b, c, d) (-1)
-#endif
-
/* HPUX defines these as macros in sys/param.h. */
#ifdef setbit
# undef setbit
@@ -117,24 +85,29 @@ extern int gawk_mb_cur_max;
# undef clrbit
#endif
-/* Number of bits in an unsigned char. */
-#ifndef CHARBITS
-# define CHARBITS 8
-#endif
-
/* First integer value that is greater than any character code. */
-#define NOTCHAR (1 << CHARBITS)
+enum { NOTCHAR = 1 << CHAR_BIT };
-/* INTBITS need not be exact, just a lower bound. */
-#ifndef INTBITS
-# define INTBITS (CHARBITS * sizeof (int))
-#endif
+/* This represents part of a character class. It must be unsigned and
+ at least CHARCLASS_WORD_BITS wide. Any excess bits are zero. */
+typedef unsigned int charclass_word;
+
+/* The number of bits used in a charclass word. utf8_classes assumes
+ this is exactly 32. */
+enum { CHARCLASS_WORD_BITS = 32 };
-/* Number of ints required to hold a bit for every character. */
-#define CHARCLASS_INTS ((NOTCHAR + INTBITS - 1) / INTBITS)
+/* The maximum useful value of a charclass_word; all used bits are 1. */
+#define CHARCLASS_WORD_MASK \
+ (((charclass_word) 1 << (CHARCLASS_WORD_BITS - 1) << 1) - 1)
+
+/* Number of words required to hold a bit for every character. */
+enum
+{
+ CHARCLASS_WORDS = (NOTCHAR + CHARCLASS_WORD_BITS - 1) / CHARCLASS_WORD_BITS
+};
/* Sets of unsigned characters are stored as bit vectors in arrays of ints. */
-typedef unsigned int charclass[CHARCLASS_INTS];
+typedef charclass_word charclass[CHARCLASS_WORDS];
/* Convert a possibly-signed character to an unsigned character. This is
a bit safer than casting to unsigned char, since it catches some type
@@ -237,27 +210,25 @@ enum
a backtracking matcher. */
BEGLINE, /* BEGLINE is a terminal symbol that matches
- the empty string if it is at the beginning
- of a line. */
+ the empty string at the beginning of a
+ line. */
ENDLINE, /* ENDLINE is a terminal symbol that matches
- the empty string if it is at the end of
- a line. */
+ the empty string at the end of a line. */
BEGWORD, /* BEGWORD is a terminal symbol that matches
- the empty string if it is at the beginning
- of a word. */
+ the empty string at the beginning of a
+ word. */
ENDWORD, /* ENDWORD is a terminal symbol that matches
- the empty string if it is at the end of
- a word. */
+ the empty string at the end of a word. */
LIMWORD, /* LIMWORD is a terminal symbol that matches
- the empty string if it is at the beginning
- or the end of a word. */
+ the empty string at the beginning or the
+ end of a word. */
NOTLIMWORD, /* NOTLIMWORD is a terminal symbol that
- matches the empty string if it is not at
+ matches the empty string not at
the beginning or end of a word. */
QMARK, /* QMARK is an operator of one argument that
@@ -338,7 +309,8 @@ typedef struct
size_t hash; /* Hash of the positions of this state. */
position_set elems; /* Positions this state could match. */
unsigned char context; /* Context from previous state. */
- char backref; /* True if this state matches a \<digit>. */
+ bool has_backref; /* This state matches a \<digit>. */
+ bool has_mbcset; /* This state matches a MBCSET. */
unsigned short constraint; /* Constraint for this state to accept. */
token first_end; /* Token value of the first END in elems. */
position_set mbps; /* Positions which can match multibyte
@@ -355,13 +327,16 @@ typedef ptrdiff_t state_num;
struct mb_char_classes
{
ptrdiff_t cset;
- int invert;
+ bool invert;
wchar_t *chars; /* Normal characters. */
size_t nchars;
wctype_t *ch_classes; /* Character classes. */
size_t nch_classes;
- wchar_t *range_sts; /* Range characters (start of the range). */
- wchar_t *range_ends; /* Range characters (end of the range). */
+ struct /* Range characters. */
+ {
+ wchar_t beg; /* Range start. */
+ wchar_t end; /* Range end. */
+ } *ranges;
size_t nranges;
char **equivs; /* Equivalence classes. */
size_t nequivs;
@@ -387,10 +362,12 @@ struct dfa
size_t nleaves; /* Number of leaves on the parse tree. */
size_t nregexps; /* Count of parallel regexps being built
with dfaparse. */
- unsigned int mb_cur_max; /* Cached value of MB_CUR_MAX. */
+ bool fast; /* The DFA is fast. */
+ bool multibyte; /* MB_CUR_MAX > 1. */
token utf8_anychar_classes[5]; /* To lower ANYCHAR in UTF-8 locales. */
+ mbstate_t mbs; /* Multibyte conversion state. */
- /* The following are used only if MB_CUR_MAX > 1. */
+ /* The following are valid only if MB_CUR_MAX > 1. */
/* The value of multibyte_prop[i] is defined by following rule.
if tokens[i] < NOTCHAR
@@ -409,14 +386,12 @@ struct dfa
multibyte_prop
= 3 , 1 , 0 , 2 , 3
*/
- size_t nmultibyte_prop;
int *multibyte_prop;
#if MBS_SUPPORT
/* A table indexed by byte values that contains the corresponding wide
- character (if any) for that byte. WEOF means the byte is the
- leading byte of a multibyte character. Invalid and null bytes are
- mapped to themselves. */
+ character (if any) for that byte. WEOF means the byte is not a
+ valid single-byte character. */
wint_t mbrtowc_cache[NOTCHAR];
#endif
@@ -425,10 +400,13 @@ struct dfa
size_t nmbcsets;
size_t mbcsets_alloc;
+ /* Fields filled by the superset. */
+ struct dfa *superset; /* Hint of the dfa. */
+
/* Fields filled by the state builder. */
dfa_state *states; /* States of the dfa. */
state_num sindex; /* Index for adding new states. */
- state_num salloc; /* Number of states currently allocated. */
+ size_t salloc; /* Number of states currently allocated. */
/* Fields filled by the parse tree->NFA conversion. */
position_set *follows; /* Array of follow sets, indexed by position
@@ -438,7 +416,7 @@ struct dfa
matching the given position in a string
matching the regexp. Allocated to the
maximum possible position index. */
- int searchflag; /* True if we are supposed to build a searching
+ bool searchflag; /* We are supposed to build a searching
as opposed to an exact matcher. A searching
matcher finds the first and shortest string
matching a regexp anywhere in the buffer,
@@ -448,16 +426,16 @@ struct dfa
/* Fields filled by dfaexec. */
state_num tralloc; /* Number of transition tables that have
- slots so far. */
+ slots so far, not counting trans[-1]. */
int trcount; /* Number of transition tables that have
actually been built. */
state_num **trans; /* Transition tables for states that can
never accept. If the transitions for a
state have not yet been computed, or the
state could possibly accept, its entry in
- this table is NULL. */
- state_num **realtrans; /* Trans always points to realtrans + 1; this
- is so trans[-1] can contain NULL. */
+ this table is NULL. This points to one
+ past the start of the allocated array,
+ and trans[-1] is always NULL. */
state_num **fails; /* Transition tables after failing to accept
on a state that potentially could do so. */
int *success; /* Table of acceptance conditions used in
@@ -472,56 +450,25 @@ struct dfa
struct dfamust *musts; /* List of strings, at least one of which
is known to appear in any r.e. matching
the dfa. */
+ position_set mb_follows; /* Follow set added by ANYCHAR and/or MBCSET
+ on demand. */
+ int *mb_match_lens; /* Array of length reduced by ANYCHAR and/or
+ MBCSET. Null if mb_follows.elems has not
+ been allocated. */
};
/* Some macros for user access to dfa internals. */
-/* ACCEPTING returns true if s could possibly be an accepting state of r. */
+/* S could possibly be an accepting state of R. */
#define ACCEPTING(s, r) ((r).states[s].constraint)
-/* ACCEPTS_IN_CONTEXT returns true if the given state accepts in the
- specified context. */
+/* STATE accepts in the specified context. */
#define ACCEPTS_IN_CONTEXT(prev, curr, state, dfa) \
SUCCEEDS_IN_CONTEXT ((dfa).states[state].constraint, prev, curr)
static void dfamust (struct dfa *dfa);
static void regexp (void);
-/* These two macros are identical to the ones in gnulib's xalloc.h,
- except that they do not cast the result to "(t *)", and thus may
- be used via type-free CALLOC and MALLOC macros. */
-#undef XNMALLOC
-#undef XCALLOC
-
-/* Allocate memory for N elements of type T, with error checking. */
-/* extern t *XNMALLOC (size_t n, typename t); */
-# define XNMALLOC(n, t) \
- (sizeof (t) == 1 ? xmalloc (n) : xnmalloc (n, sizeof (t)))
-
-/* Allocate memory for N elements of type T, with error checking,
- and zero it. */
-/* extern t *XCALLOC (size_t n, typename t); */
-# define XCALLOC(n, t) \
- (sizeof (t) == 1 ? xzalloc (n) : xcalloc (n, sizeof (t)))
-
-#define CALLOC(p, n) do { (p) = XCALLOC (n, *(p)); } while (0)
-#undef MALLOC /* Irix defines this */
-#define MALLOC(p, n) do { (p) = XNMALLOC (n, *(p)); } while (0)
-#define REALLOC(p, n) do {(p) = xnrealloc (p, n, sizeof (*(p))); } while (0)
-
-/* Reallocate an array of type *P if N_ALLOC is <= N_REQUIRED. */
-#define REALLOC_IF_NECESSARY(p, n_alloc, n_required) \
- do \
- { \
- if ((n_alloc) <= (n_required)) \
- { \
- size_t new_n_alloc = (n_required) + !(p); \
- (p) = x2nrealloc (p, &new_n_alloc, sizeof (*(p))); \
- (n_alloc) = new_n_alloc; \
- } \
- } \
- while (false)
-
static void
dfambcache (struct dfa *d)
{
@@ -533,52 +480,52 @@ dfambcache (struct dfa *d)
unsigned char uc = i;
mbstate_t s = { 0 };
wchar_t wc;
- wint_t wi;
- switch (mbrtowc (&wc, &c, 1, &s))
- {
- default: wi = wc; break;
- case (size_t) -2: wi = WEOF; break;
- case (size_t) -1: wi = uc; break;
- }
- d->mbrtowc_cache[uc] = wi;
+ d->mbrtowc_cache[uc] = mbrtowc (&wc, &c, 1, &s) <= 1 ? wc : WEOF;
}
#endif
}
#if MBS_SUPPORT
-/* Given the dfa D, store into *PWC the result of converting the
- leading bytes of the multibyte buffer S of length N bytes, updating
- the conversion state in *MBS. On conversion error, convert just a
- single byte as-is. Return the number of bytes converted.
+/* Store into *PWC the result of converting the leading bytes of the
+ multibyte buffer S of length N bytes, using the mbrtowc_cache in *D
+ and updating the conversion state in *D. On conversion error,
+ convert just a single byte, to WEOF. Return the number of bytes
+ converted.
- This differs from mbrtowc (PWC, S, N, MBS) as follows:
+ This differs from mbrtowc (PWC, S, N, &D->mbs) as follows:
- * Extra arg D, containing an mbrtowc_cache for speed.
+ * PWC points to wint_t, not to wchar_t.
+ * The last arg is a dfa *D instead of merely a multibyte conversion
+ state D->mbs. D also contains an mbrtowc_cache for speed.
* N must be at least 1.
* S[N - 1] must be a sentinel byte.
* Shift encodings are not supported.
* The return value is always in the range 1..N.
- * *MBS is always valid afterwards.
+ * D->mbs is always valid afterwards.
* *PWC is always set to something. */
static size_t
-mbs_to_wchar (struct dfa *d, wchar_t *pwc, char const *s, size_t n,
- mbstate_t *mbs)
+mbs_to_wchar (wint_t *pwc, char const *s, size_t n, struct dfa *d)
{
unsigned char uc = s[0];
wint_t wc = d->mbrtowc_cache[uc];
if (wc == WEOF)
{
- size_t nbytes = mbrtowc (pwc, s, n, mbs);
+ wchar_t wch;
+ size_t nbytes = mbrtowc (&wch, s, n, &d->mbs);
if (0 < nbytes && nbytes < (size_t) -2)
- return nbytes;
- memset (mbs, 0, sizeof *mbs);
- wc = uc;
+ {
+ *pwc = wch;
+ return nbytes;
+ }
+ memset (&d->mbs, 0, sizeof d->mbs);
}
*pwc = wc;
return 1;
}
+#else
+#define mbs_to_wchar(pwc, s, n, d) (WEOF)
#endif
#ifdef DEBUG
@@ -664,19 +611,20 @@ prtok (token t)
static bool
tstbit (unsigned int b, charclass const c)
{
- return c[b / INTBITS] >> b % INTBITS & 1;
+ return c[b / CHARCLASS_WORD_BITS] >> b % CHARCLASS_WORD_BITS & 1;
}
static void
setbit (unsigned int b, charclass c)
{
- c[b / INTBITS] |= 1U << b % INTBITS;
+ c[b / CHARCLASS_WORD_BITS] |= (charclass_word) 1 << b % CHARCLASS_WORD_BITS;
}
static void
clrbit (unsigned int b, charclass c)
{
- c[b / INTBITS] &= ~(1U << b % INTBITS);
+ c[b / CHARCLASS_WORD_BITS] &= ~((charclass_word) 1
+ << b % CHARCLASS_WORD_BITS);
}
static void
@@ -696,40 +644,64 @@ notset (charclass s)
{
int i;
- for (i = 0; i < CHARCLASS_INTS; ++i)
- s[i] = ~s[i];
+ for (i = 0; i < CHARCLASS_WORDS; ++i)
+ s[i] = CHARCLASS_WORD_MASK & ~s[i];
}
-static int
+static bool
equal (charclass const s1, charclass const s2)
{
return memcmp (s1, s2, sizeof (charclass)) == 0;
}
-/* A pointer to the current dfa is kept here during parsing. */
-static struct dfa *dfa;
+/* Ensure that the array addressed by PTR holds at least NITEMS +
+ (PTR || !NITEMS) items. Either return PTR, or reallocate the array
+ and return its new address. Although PTR may be null, the returned
+ value is never null.
+
+ The array holds *NALLOC items; *NALLOC is updated on reallocation.
+ ITEMSIZE is the size of one item. Avoid O(N**2) behavior on arrays
+ growing linearly. */
+static void *
+maybe_realloc (void *ptr, size_t nitems, size_t *nalloc, size_t itemsize)
+{
+ if (nitems < *nalloc)
+ return ptr;
+ *nalloc = nitems;
+ return x2nrealloc (ptr, nalloc, itemsize);
+}
-/* Find the index of charclass s in dfa->charclasses, or allocate a
- new charclass. */
+/* In DFA D, find the index of charclass S, or allocate a new one. */
static size_t
-charclass_index (charclass const s)
+dfa_charclass_index (struct dfa *d, charclass const s)
{
size_t i;
- for (i = 0; i < dfa->cindex; ++i)
- if (equal (s, dfa->charclasses[i]))
+ for (i = 0; i < d->cindex; ++i)
+ if (equal (s, d->charclasses[i]))
return i;
- REALLOC_IF_NECESSARY (dfa->charclasses, dfa->calloc, dfa->cindex + 1);
- ++dfa->cindex;
- copyset (s, dfa->charclasses[i]);
+ d->charclasses = maybe_realloc (d->charclasses, d->cindex, &d->calloc,
+ sizeof *d->charclasses);
+ ++d->cindex;
+ copyset (s, d->charclasses[i]);
return i;
}
+/* A pointer to the current dfa is kept here during parsing. */
+static struct dfa *dfa;
+
+/* Find the index of charclass S in the current DFA, or allocate a new one. */
+static size_t
+charclass_index (charclass const s)
+{
+ return dfa_charclass_index (dfa, s);
+}
+
/* Syntax bits controlling the behavior of the lexical analyzer. */
static reg_syntax_t syntax_bits, syntax_bits_set;
/* Flag for case-folding letters into sets. */
-static int case_fold;
+static bool case_fold;
/* End-of-line byte in data. */
static unsigned char eolbyte;
@@ -752,14 +724,14 @@ static charclass newline;
# define is_valid_unibyte_character(c) (! (MBS_SUPPORT && btowc (c) == WEOF))
#endif
-/* Return non-zero if C is a "word-constituent" byte; zero otherwise. */
+/* C is a "word-constituent" byte. */
#define IS_WORD_CONSTITUENT(C) \
(is_valid_unibyte_character (C) && (isalnum (C) || (C) == '_'))
static int
char_context (unsigned char c)
{
- if (c == eolbyte || c == 0)
+ if (c == eolbyte)
return CTX_NEWLINE;
if (IS_WORD_CONSTITUENT (c))
return CTX_LETTER;
@@ -784,7 +756,7 @@ dfasyntax (reg_syntax_t bits, int fold, unsigned char eol)
syntax_bits_set = 1;
syntax_bits = bits;
- case_fold = fold;
+ case_fold = fold != 0;
eolbyte = eol;
for (i = 0; i < NOTCHAR; ++i)
@@ -843,23 +815,16 @@ int
using_utf8 (void)
{
static int utf8 = -1;
- if (utf8 == -1)
+ if (utf8 < 0)
{
-#if defined HAVE_LANGINFO_CODESET && MBS_SUPPORT
- utf8 = (STREQ (nl_langinfo (CODESET), "UTF-8"));
-#else
- utf8 = 0;
-#endif
-#ifdef LIBC_IS_BORKED
- if (gawk_mb_cur_max == 1)
- utf8 = 0;
-#endif
+ wchar_t wc;
+ mbstate_t mbs = { 0 };
+ utf8 = mbrtowc (&wc, "\xc4\x80", 2, &mbs) == 2 && wc == 0x100;
}
-
return utf8;
}
-/* Return true if the current locale is known to be a unibyte locale
+/* The current locale is known to be a unibyte locale
without multicharacter collating sequences and where range
comparisons simply use the native encoding. These locales can be
processed more efficiently. */
@@ -867,7 +832,7 @@ using_utf8 (void)
static bool
using_simple_locale (void)
{
- /* True if the native character set is known to be compatible with
+ /* The native character set is known to be compatible with
the C locale. The following test isn't perfect, but it's good
enough in practice, as only ASCII and EBCDIC are in common use
and this test correctly accepts ASCII and rejects EBCDIC. */
@@ -883,7 +848,7 @@ using_simple_locale (void)
&& '}' == 125 && '~' == 126)
};
- if (! native_c_charset || MB_CUR_MAX > 1)
+ if (! native_c_charset || dfa->multibyte)
return false;
else
{
@@ -907,39 +872,28 @@ using_simple_locale (void)
static char const *lexptr; /* Pointer to next input character. */
static size_t lexleft; /* Number of characters remaining. */
static token lasttok; /* Previous token returned; initially END. */
-static int laststart; /* True if we're separated from beginning or (,
+static bool laststart; /* We're separated from beginning or (,
| only by zero-width characters. */
static size_t parens; /* Count of outstanding left parens. */
static int minrep, maxrep; /* Repeat counts for {m,n}. */
static int cur_mb_len = 1; /* Length of the multibyte representation of
wctok. */
-/* These variables are used only if (MB_CUR_MAX > 1). */
-static mbstate_t mbs; /* mbstate for mbrtowc. */
-static wchar_t wctok; /* Wide character representation of the current
- multibyte character. */
-static unsigned char *mblen_buf;/* Correspond to the input buffer in dfaexec.
- Each element stores the number of remaining
- bytes of the corresponding multibyte
- character in the input string. A element's
- value is 0 if the corresponding character is
- single-byte.
- e.g., input : 'a', <mb(0)>, <mb(1)>, <mb(2)>
- mblen_buf : 0, 3, 2, 1
- */
-static wchar_t *inputwcs; /* Wide character representation of the input
- string in dfaexec.
- The length of this array is the same as
- the length of input string (char array).
- inputstring[i] is a single-byte char,
- or the first byte of a multibyte char;
- inputwcs[i] is the codepoint. */
-static unsigned char const *buf_begin; /* reference to begin in dfaexec. */
-static unsigned char const *buf_end; /* reference to end in dfaexec. */
+
+static wint_t wctok; /* Wide character representation of the current
+ multibyte character, or WEOF if there was
+ an encoding error. Used only if
+ MB_CUR_MAX > 1. */
#if MBS_SUPPORT
-/* Note that characters become unsigned here. */
+/* Fetch the next lexical input character. Set C (of type int) to the
+ next input byte, except set C to EOF if the input is a multibyte
+ character of length greater than 1. Set WC (of type wint_t) to the
+ value of the input if it is a valid multibyte character (possibly
+ of length 1); otherwise set WC to WEOF. If there is no more input,
+ report EOFERR if EOFERR is not null, and return lasttok = END
+ otherwise. */
# define FETCH_WC(c, wc, eoferr) \
do { \
if (! lexleft) \
@@ -951,8 +905,8 @@ static unsigned char const *buf_end; /* reference to end in dfaexec. */
} \
else \
{ \
- wchar_t _wc; \
- size_t nbytes = mbs_to_wchar (dfa, &_wc, lexptr, lexleft, &mbs); \
+ wint_t _wc; \
+ size_t nbytes = mbs_to_wchar (&_wc, lexptr, lexleft, dfa); \
cur_mb_len = nbytes; \
(wc) = _wc; \
(c) = nbytes == 1 ? to_uchar (*lexptr) : EOF; \
@@ -999,14 +953,17 @@ static short const lonesome_lower[] =
0x03F5, 0x1E9B, 0x1FBE,
};
-static_assert ((sizeof lonesome_lower / sizeof *lonesome_lower + 2
- == CASE_FOLDED_BUFSIZE),
- "CASE_FOLDED_BUFSIZE is wrong");
+/* Maximum number of characters that can be the case-folded
+ counterparts of a single character, not counting the character
+ itself. This is 1 for towupper, 1 for towlower, and 1 for each
+ entry in LONESOME_LOWER. */
+enum
+{ CASE_FOLDED_BUFSIZE = 2 + sizeof lonesome_lower / sizeof *lonesome_lower };
/* Find the characters equal to C after case-folding, other than C
itself, and store them into FOLDED. Return the number of characters
stored. */
-int
+static int
case_folded_counterparts (wchar_t c, wchar_t folded[CASE_FOLDED_BUFSIZE])
{
int i;
@@ -1071,11 +1028,11 @@ find_pred (const char *str)
static token
parse_bracket_exp (void)
{
- int invert;
+ bool invert;
int c, c1, c2;
charclass ccl;
- /* True if this is a bracket expression that dfaexec is known to
+ /* This is a bracket expression that dfaexec is known to
process correctly. */
bool known_bracket_exp = true;
@@ -1092,16 +1049,14 @@ parse_bracket_exp (void)
/* Work area to build a mb_char_classes. */
struct mb_char_classes *work_mbc;
- size_t chars_al, range_sts_al, range_ends_al, ch_classes_al,
- equivs_al, coll_elems_al;
+ size_t chars_al, ranges_al, ch_classes_al, equivs_al, coll_elems_al;
- chars_al = 0;
- range_sts_al = range_ends_al = 0;
- ch_classes_al = equivs_al = coll_elems_al = 0;
- if (MB_CUR_MAX > 1)
+ chars_al = ranges_al = ch_classes_al = equivs_al = coll_elems_al = 0;
+ if (dfa->multibyte)
{
- REALLOC_IF_NECESSARY (dfa->mbcsets, dfa->mbcsets_alloc,
- dfa->nmbcsets + 1);
+ dfa->mbcsets = maybe_realloc (dfa->mbcsets, dfa->nmbcsets,
+ &dfa->mbcsets_alloc,
+ sizeof *dfa->mbcsets);
/* dfa->multibyte_prop[] hold the index of dfa->mbcsets.
We will update dfa->multibyte_prop[] in addtok, because we can't
@@ -1119,16 +1074,16 @@ parse_bracket_exp (void)
if (c == '^')
{
FETCH_WC (c, wc, _("unbalanced ["));
- invert = 1;
+ invert = true;
known_bracket_exp = using_simple_locale ();
}
else
- invert = 0;
+ invert = false;
colon_warning_state = (c == ':');
do
{
- c1 = EOF; /* mark c1 is not initialized". */
+ c1 = NOTCHAR; /* Mark c1 as not initialized. */
colon_warning_state &= ~2;
/* Note that if we're looking at some other [:...:] construct,
@@ -1137,13 +1092,13 @@ parse_bracket_exp (void)
dfa is ever called. */
if (c == '[')
{
-#define MAX_BRACKET_STRING_LEN 32
- char str[MAX_BRACKET_STRING_LEN + 1];
FETCH_WC (c1, wc1, _("unbalanced ["));
if ((c1 == ':' && (syntax_bits & RE_CHAR_CLASSES))
|| c1 == '.' || c1 == '=')
{
+ enum { MAX_BRACKET_STRING_LEN = 32 };
+ char str[MAX_BRACKET_STRING_LEN + 1];
size_t len = 0;
for (;;)
{
@@ -1173,14 +1128,15 @@ parse_bracket_exp (void)
if (!pred)
dfaerror (_("invalid character class"));
- if (MB_CUR_MAX > 1 && !pred->single_byte_only)
+ if (dfa->multibyte && !pred->single_byte_only)
{
/* Store the character class as wctype_t. */
wctype_t wt = (wctype_t) wctype (class);
- REALLOC_IF_NECESSARY (work_mbc->ch_classes,
- ch_classes_al,
- work_mbc->nch_classes + 1);
+ work_mbc->ch_classes
+ = maybe_realloc (work_mbc->ch_classes,
+ work_mbc->nch_classes, &ch_classes_al,
+ sizeof *work_mbc->ch_classes);
work_mbc->ch_classes[work_mbc->nch_classes++] = wt;
}
@@ -1205,7 +1161,7 @@ parse_bracket_exp (void)
if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
FETCH_WC (c, wc, _("unbalanced ["));
- if (c1 == EOF)
+ if (c1 == NOTCHAR)
FETCH_WC (c1, wc1, _("unbalanced ["));
if (c1 == '-')
@@ -1227,29 +1183,30 @@ parse_bracket_exp (void)
if (c2 == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
FETCH_WC (c2, wc2, _("unbalanced ["));
- if (MB_CUR_MAX > 1)
+ if (dfa->multibyte)
{
/* When case folding map a range, say [m-z] (or even [M-z])
to the pair of ranges, [m-z] [M-Z]. Although this code
is wrong in multiple ways, it's never used in practice.
FIXME: Remove this (and related) unused code. */
- REALLOC_IF_NECESSARY (work_mbc->range_sts,
- range_sts_al, work_mbc->nranges + 1);
- REALLOC_IF_NECESSARY (work_mbc->range_ends,
- range_ends_al, work_mbc->nranges + 1);
- work_mbc->range_sts[work_mbc->nranges] =
- case_fold ? towlower (wc) : (wchar_t) wc;
- work_mbc->range_ends[work_mbc->nranges++] =
- case_fold ? towlower (wc2) : (wchar_t) wc2;
-
- if (case_fold && (iswalpha (wc) || iswalpha (wc2)))
+ if (wc != WEOF && wc2 != WEOF)
{
- REALLOC_IF_NECESSARY (work_mbc->range_sts,
- range_sts_al, work_mbc->nranges + 1);
- work_mbc->range_sts[work_mbc->nranges] = towupper (wc);
- REALLOC_IF_NECESSARY (work_mbc->range_ends,
- range_ends_al, work_mbc->nranges + 1);
- work_mbc->range_ends[work_mbc->nranges++] = towupper (wc2);
+ work_mbc->ranges
+ = maybe_realloc (work_mbc->ranges,
+ work_mbc->nranges + 2,
+ &ranges_al, sizeof *work_mbc->ranges);
+ work_mbc->ranges[work_mbc->nranges].beg
+ = case_fold ? towlower (wc) : wc;
+ work_mbc->ranges[work_mbc->nranges++].end
+ = case_fold ? towlower (wc2) : wc2;
+
+ if (case_fold && (iswalpha (wc) || iswalpha (wc2)))
+ {
+ work_mbc->ranges[work_mbc->nranges].beg
+ = towupper (wc);
+ work_mbc->ranges[work_mbc->nranges++].end
+ = towupper (wc2);
+ }
}
}
else if (using_simple_locale ())
@@ -1284,7 +1241,7 @@ parse_bracket_exp (void)
colon_warning_state |= (c == ':') ? 2 : 4;
- if (MB_CUR_MAX == 1)
+ if (!dfa->multibyte)
{
if (case_fold)
setbit_case_fold_c (c, ccl);
@@ -1293,21 +1250,23 @@ parse_bracket_exp (void)
continue;
}
- if (case_fold)
+ if (wc == WEOF)
+ known_bracket_exp = false;
+ else
{
- wchar_t folded[CASE_FOLDED_BUFSIZE];
- int i, n = case_folded_counterparts (wc, folded);
- REALLOC_IF_NECESSARY (work_mbc->chars, chars_al,
- work_mbc->nchars + n);
+ wchar_t folded[CASE_FOLDED_BUFSIZE + 1];
+ int i;
+ int n = (case_fold ? case_folded_counterparts (wc, folded + 1) + 1
+ : 1);
+ folded[0] = wc;
for (i = 0; i < n; i++)
if (!setbit_wc (folded[i], ccl))
- work_mbc->chars[work_mbc->nchars++] = folded[i];
- }
- if (!setbit_wc (wc, ccl))
- {
- REALLOC_IF_NECESSARY (work_mbc->chars, chars_al,
- work_mbc->nchars + 1);
- work_mbc->chars[work_mbc->nchars++] = wc;
+ {
+ work_mbc->chars
+ = maybe_realloc (work_mbc->chars, work_mbc->nchars,
+ &chars_al, sizeof *work_mbc->chars);
+ work_mbc->chars[work_mbc->nchars++] = folded[i];
+ }
}
}
while ((wc = wc1, (c = c1) != ']'));
@@ -1318,7 +1277,7 @@ parse_bracket_exp (void)
if (! known_bracket_exp)
return BACKREF;
- if (MB_CUR_MAX > 1)
+ if (dfa->multibyte)
{
static charclass zeroclass;
work_mbc->invert = invert;
@@ -1328,7 +1287,7 @@ parse_bracket_exp (void)
if (invert)
{
- assert (MB_CUR_MAX == 1);
+ assert (!dfa->multibyte);
notset (ccl);
if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE)
clrbit (eolbyte, ccl);
@@ -1340,8 +1299,8 @@ parse_bracket_exp (void)
static token
lex (void)
{
- unsigned int c, c2;
- int backslash = 0;
+ int c, c2;
+ bool backslash = false;
charclass ccl;
int i;
@@ -1354,8 +1313,6 @@ lex (void)
for (i = 0; i < 2; ++i)
{
FETCH_WC (c, wctok, NULL);
- if (c == (unsigned int) EOF)
- goto normal_char;
switch (c)
{
@@ -1364,7 +1321,7 @@ lex (void)
goto normal_char;
if (lexleft == 0)
dfaerror (_("unfinished \\ escape"));
- backslash = 1;
+ backslash = true;
break;
case '^':
@@ -1402,7 +1359,7 @@ lex (void)
case '9':
if (backslash && !(syntax_bits & RE_NO_BK_REFS))
{
- laststart = 0;
+ laststart = false;
return lasttok = BACKREF;
}
goto normal_char;
@@ -1510,14 +1467,14 @@ lex (void)
{
if (syntax_bits & RE_INVALID_INTERVAL_ORD)
goto normal_char;
- dfaerror (_("Invalid content of \\{\\}"));
+ dfaerror (_("invalid content of \\{\\}"));
}
if (RE_DUP_MAX < maxrep)
- dfaerror (_("Regular expression too big"));
+ dfaerror (_("regular expression too big"));
lexptr = p;
lexleft = lim - p;
}
- laststart = 0;
+ laststart = false;
return lasttok = REPMN;
case '|':
@@ -1525,21 +1482,21 @@ lex (void)
goto normal_char;
if (backslash != ((syntax_bits & RE_NO_BK_VBAR) == 0))
goto normal_char;
- laststart = 1;
+ laststart = true;
return lasttok = OR;
case '\n':
if (syntax_bits & RE_LIMITED_OPS
|| backslash || !(syntax_bits & RE_NEWLINE_ALT))
goto normal_char;
- laststart = 1;
+ laststart = true;
return lasttok = OR;
case '(':
if (backslash != ((syntax_bits & RE_NO_BK_PARENS) == 0))
goto normal_char;
++parens;
- laststart = 1;
+ laststart = true;
return lasttok = LPAREN;
case ')':
@@ -1548,17 +1505,17 @@ lex (void)
if (parens == 0 && syntax_bits & RE_UNMATCHED_RIGHT_PAREN_ORD)
goto normal_char;
--parens;
- laststart = 0;
+ laststart = false;
return lasttok = RPAREN;
case '.':
if (backslash)
goto normal_char;
- if (MB_CUR_MAX > 1)
+ if (dfa->multibyte)
{
/* In multibyte environment period must match with a single
character not a byte. So we use ANYCHAR. */
- laststart = 0;
+ laststart = false;
return lasttok = ANYCHAR;
}
zeroset (ccl);
@@ -1567,14 +1524,14 @@ lex (void)
clrbit (eolbyte, ccl);
if (syntax_bits & RE_DOT_NOT_NULL)
clrbit ('\0', ccl);
- laststart = 0;
+ laststart = false;
return lasttok = CSET + charclass_index (ccl);
case 's':
case 'S':
if (!backslash || (syntax_bits & RE_NO_GNU_OPS))
goto normal_char;
- if (MB_CUR_MAX == 1)
+ if (!dfa->multibyte)
{
zeroset (ccl);
for (c2 = 0; c2 < NOTCHAR; ++c2)
@@ -1582,7 +1539,7 @@ lex (void)
setbit (c2, ccl);
if (c == 'S')
notset (ccl);
- laststart = 0;
+ laststart = false;
return lasttok = CSET + charclass_index (ccl);
}
@@ -1612,7 +1569,7 @@ lex (void)
POP_LEX_STATE ();
- laststart = 0;
+ laststart = false;
return lasttok;
case 'w':
@@ -1625,21 +1582,21 @@ lex (void)
setbit (c2, ccl);
if (c == 'W')
notset (ccl);
- laststart = 0;
+ laststart = false;
return lasttok = CSET + charclass_index (ccl);
case '[':
if (backslash)
goto normal_char;
- laststart = 0;
+ laststart = false;
return lasttok = parse_bracket_exp ();
default:
normal_char:
- laststart = 0;
+ laststart = false;
/* For multibyte character sets, folding is done in atom. Always
return WCHAR. */
- if (MB_CUR_MAX > 1)
+ if (dfa->multibyte)
return lasttok = WCHAR;
if (case_fold && isalpha (c))
@@ -1671,14 +1628,16 @@ static size_t depth; /* Current depth of a hypothetical stack
static void
addtok_mb (token t, int mbprop)
{
- if (MB_CUR_MAX > 1)
+ if (dfa->talloc == dfa->tindex)
{
- REALLOC_IF_NECESSARY (dfa->multibyte_prop, dfa->nmultibyte_prop,
- dfa->tindex + 1);
- dfa->multibyte_prop[dfa->tindex] = mbprop;
+ dfa->tokens = x2nrealloc (dfa->tokens, &dfa->talloc,
+ sizeof *dfa->tokens);
+ if (dfa->multibyte)
+ dfa->multibyte_prop = xnrealloc (dfa->multibyte_prop, dfa->talloc,
+ sizeof *dfa->multibyte_prop);
}
-
- REALLOC_IF_NECESSARY (dfa->tokens, dfa->talloc, dfa->tindex + 1);
+ if (dfa->multibyte)
+ dfa->multibyte_prop[dfa->tindex] = mbprop;
dfa->tokens[dfa->tindex++] = t;
switch (t)
@@ -1693,8 +1652,12 @@ addtok_mb (token t, int mbprop)
--depth;
break;
+ case BACKREF:
+ dfa->fast = false;
+ /* fallthrough */
default:
++dfa->nleaves;
+ /* fallthrough */
case EMPTY:
++depth;
break;
@@ -1710,7 +1673,7 @@ static void addtok_wc (wint_t wc);
static void
addtok (token t)
{
- if (MB_CUR_MAX > 1 && t == MBCSET)
+ if (dfa->multibyte && t == MBCSET)
{
bool need_or = false;
struct mb_char_classes *work_mbc = &dfa->mbcsets[dfa->nmbcsets - 1];
@@ -1805,11 +1768,21 @@ add_utf8_anychar (void)
{
#if MBS_SUPPORT
static const charclass utf8_classes[5] = {
- {0, 0, 0, 0, ~0, ~0, 0, 0}, /* 80-bf: non-leading bytes */
- {~0, ~0, ~0, ~0, 0, 0, 0, 0}, /* 00-7f: 1-byte sequence */
- {0, 0, 0, 0, 0, 0, ~3, 0}, /* c2-df: 2-byte sequence */
- {0, 0, 0, 0, 0, 0, 0, 0xffff}, /* e0-ef: 3-byte sequence */
- {0, 0, 0, 0, 0, 0, 0, 0xff0000} /* f0-f7: 4-byte sequence */
+ /* 80-bf: non-leading bytes. */
+ {0, 0, 0, 0, CHARCLASS_WORD_MASK, CHARCLASS_WORD_MASK, 0, 0},
+
+ /* 00-7f: 1-byte sequence. */
+ {CHARCLASS_WORD_MASK, CHARCLASS_WORD_MASK, CHARCLASS_WORD_MASK,
+ CHARCLASS_WORD_MASK, 0, 0, 0, 0},
+
+ /* c2-df: 2-byte sequence. */
+ {0, 0, 0, 0, 0, 0, ~3 & CHARCLASS_WORD_MASK, 0},
+
+ /* e0-ef: 3-byte sequence. */
+ {0, 0, 0, 0, 0, 0, 0, 0xffff},
+
+ /* f0-f7: 4-byte sequence. */
+ {0, 0, 0, 0, 0, 0, 0, 0xff0000}
};
const unsigned int n = sizeof (utf8_classes) / sizeof (utf8_classes[0]);
unsigned int i;
@@ -1891,16 +1864,21 @@ atom (void)
{
if (MBS_SUPPORT && tok == WCHAR)
{
- addtok_wc (wctok);
-
- if (case_fold)
+ if (wctok == WEOF)
+ addtok (BACKREF);
+ else
{
- wchar_t folded[CASE_FOLDED_BUFSIZE];
- int i, n = case_folded_counterparts (wctok, folded);
- for (i = 0; i < n; i++)
+ addtok_wc (wctok);
+
+ if (case_fold)
{
- addtok_wc (folded[i]);
- addtok (OR);
+ wchar_t folded[CASE_FOLDED_BUFSIZE];
+ int i, n = case_folded_counterparts (wctok, folded);
+ for (i = 0; i < n; i++)
+ {
+ addtok_wc (folded[i]);
+ addtok (OR);
+ }
}
}
@@ -1967,7 +1945,7 @@ copytoks (size_t tindex, size_t ntokens)
{
size_t i;
- if (MB_CUR_MAX > 1)
+ if (dfa->multibyte)
for (i = 0; i < ntokens; ++i)
addtok_mb (dfa->tokens[tindex + i], dfa->multibyte_prop[tindex + i]);
else
@@ -2050,12 +2028,12 @@ dfaparse (char const *s, size_t len, struct dfa *d)
lexptr = s;
lexleft = len;
lasttok = END;
- laststart = 1;
+ laststart = true;
parens = 0;
- if (MB_CUR_MAX > 1)
+ if (dfa->multibyte)
{
cur_mb_len = 0;
- memset (&mbs, 0, sizeof mbs);
+ memset (&d->mbs, 0, sizeof d->mbs);
}
if (!syntax_bits_set)
@@ -2080,19 +2058,24 @@ dfaparse (char const *s, size_t len, struct dfa *d)
/* Some primitives for operating on sets of positions. */
-/* Copy one set to another; the destination must be large enough. */
+/* Copy one set to another. */
static void
copy (position_set const *src, position_set * dst)
{
- REALLOC_IF_NECESSARY (dst->elems, dst->alloc, src->nelem);
- memcpy (dst->elems, src->elems, sizeof (dst->elems[0]) * src->nelem);
+ if (dst->alloc < src->nelem)
+ {
+ free (dst->elems);
+ dst->alloc = src->nelem;
+ dst->elems = x2nrealloc (NULL, &dst->alloc, sizeof *dst->elems);
+ }
+ memcpy (dst->elems, src->elems, src->nelem * sizeof *dst->elems);
dst->nelem = src->nelem;
}
static void
alloc_position_set (position_set * s, size_t size)
{
- MALLOC (s->elems, size);
+ s->elems = xnmalloc (size, sizeof *s->elems);
s->alloc = size;
s->nelem = 0;
}
@@ -2122,7 +2105,7 @@ insert (position p, position_set * s)
return;
}
- REALLOC_IF_NECESSARY (s->elems, s->alloc, count + 1);
+ s->elems = maybe_realloc (s->elems, count, &s->alloc, sizeof *s->elems);
for (i = count; i > lo; i--)
s->elems[i] = s->elems[i - 1];
s->elems[lo] = p;
@@ -2136,7 +2119,12 @@ merge (position_set const *s1, position_set const *s2, position_set * m)
{
size_t i = 0, j = 0;
- REALLOC_IF_NECESSARY (m->elems, m->alloc, s1->nelem + s2->nelem);
+ if (m->alloc < s1->nelem + s2->nelem)
+ {
+ free (m->elems);
+ m->elems = maybe_realloc (NULL, s1->nelem + s2->nelem, &m->alloc,
+ sizeof *m->elems);
+ }
m->nelem = 0;
while (i < s1->nelem && j < s2->nelem)
if (s1->elems[i].index > s2->elems[j].index)
@@ -2197,19 +2185,19 @@ state_index (struct dfa *d, position_set const *s, int context)
}
/* We'll have to create a new state. */
- REALLOC_IF_NECESSARY (d->states, d->salloc, d->sindex + 1);
+ d->states = maybe_realloc (d->states, d->sindex, &d->salloc,
+ sizeof *d->states);
d->states[i].hash = hash;
alloc_position_set (&d->states[i].elems, s->nelem);
copy (s, &d->states[i].elems);
d->states[i].context = context;
- d->states[i].backref = 0;
+ d->states[i].has_backref = false;
+ d->states[i].has_mbcset = false;
d->states[i].constraint = 0;
d->states[i].first_end = 0;
- if (MBS_SUPPORT)
- {
- d->states[i].mbps.nelem = 0;
- d->states[i].mbps.elems = NULL;
- }
+ d->states[i].mbps.nelem = 0;
+ d->states[i].mbps.elems = NULL;
+
for (j = 0; j < s->nelem; ++j)
if (d->tokens[s->elems[j].index] < 0)
{
@@ -2222,7 +2210,7 @@ state_index (struct dfa *d, position_set const *s, int context)
else if (d->tokens[s->elems[j].index] == BACKREF)
{
d->states[i].constraint = NO_CONSTRAINT;
- d->states[i].backref = 1;
+ d->states[i].has_backref = true;
}
++d->sindex;
@@ -2236,13 +2224,11 @@ state_index (struct dfa *d, position_set const *s, int context)
constraint. Repeat exhaustively until no funny positions are left.
S->elems must be large enough to hold the result. */
static void
-epsclosure (position_set * s, struct dfa const *d)
+epsclosure (position_set *s, struct dfa const *d, char *visited)
{
size_t i, j;
- char *visited; /* Array of booleans, enough to use char, not int. */
position p, old;
-
- CALLOC (visited, d->tindex);
+ bool initialized = false;
for (i = 0; i < s->nelem; ++i)
if (d->tokens[s->elems[i].index] >= NOTCHAR
@@ -2253,6 +2239,11 @@ epsclosure (position_set * s, struct dfa const *d)
#endif
&& d->tokens[s->elems[i].index] < CSET)
{
+ if (!initialized)
+ {
+ memset (visited, 0, d->tindex * sizeof (*visited));
+ initialized = true;
+ }
old = s->elems[i];
p.constraint = old.constraint;
delete (s->elems[i], s);
@@ -2293,8 +2284,6 @@ epsclosure (position_set * s, struct dfa const *d)
/* Force rescan to start at the beginning. */
i = -1;
}
-
- free (visited);
}
/* Returns the set of contexts for which there is at least one
@@ -2309,7 +2298,7 @@ charclass_context (charclass c)
if (tstbit (eolbyte, c))
context |= CTX_NEWLINE;
- for (j = 0; j < CHARCLASS_INTS; ++j)
+ for (j = 0; j < CHARCLASS_WORDS; ++j)
{
if (c[j] & letters[j])
context |= CTX_LETTER;
@@ -2399,19 +2388,29 @@ state_separate_contexts (position_set const *s)
void
dfaanalyze (struct dfa *d, int searchflag)
{
- int *nullable; /* Nullable stack. */
- size_t *nfirstpos; /* Element count stack for firstpos sets. */
- position *firstpos; /* Array where firstpos elements are stored. */
- size_t *nlastpos; /* Element count stack for lastpos sets. */
- position *lastpos; /* Array where lastpos elements are stored. */
+ /* Array allocated to hold position sets. */
+ position *posalloc = xnmalloc (d->nleaves, 2 * sizeof *posalloc);
+ /* Firstpos and lastpos elements. */
+ position *firstpos = posalloc + d->nleaves;
+ position *lastpos = firstpos + d->nleaves;
+
+ /* Stack for element counts and nullable flags. */
+ struct
+ {
+ /* Whether the entry is nullable. */
+ bool nullable;
+
+ /* Counts of firstpos and lastpos sets. */
+ size_t nfirstpos;
+ size_t nlastpos;
+ } *stkalloc = xnmalloc (d->depth, sizeof *stkalloc), *stk = stkalloc;
+
position_set tmp; /* Temporary set for merging sets. */
position_set merged; /* Result of merging sets. */
int separate_contexts; /* Context wanted by some position. */
- int *o_nullable;
- size_t *o_nfirst, *o_nlast;
- position *o_firstpos, *o_lastpos;
size_t i, j;
position *pos;
+ char *visited = xnmalloc (d->tindex, sizeof *visited);
#ifdef DEBUG
fprintf (stderr, "dfaanalyze:\n");
@@ -2423,21 +2422,9 @@ dfaanalyze (struct dfa *d, int searchflag)
putc ('\n', stderr);
#endif
- d->searchflag = searchflag;
-
- MALLOC (nullable, d->depth);
- o_nullable = nullable;
- MALLOC (nfirstpos, d->depth);
- o_nfirst = nfirstpos;
- MALLOC (firstpos, d->nleaves);
- o_firstpos = firstpos, firstpos += d->nleaves;
- MALLOC (nlastpos, d->depth);
- o_nlast = nlastpos;
- MALLOC (lastpos, d->nleaves);
- o_lastpos = lastpos, lastpos += d->nleaves;
+ d->searchflag = searchflag != 0;
alloc_position_set (&merged, d->nleaves);
-
- CALLOC (d->follows, d->tindex);
+ d->follows = xcalloc (d->tindex, sizeof *d->follows);
for (i = 0; i < d->tindex; ++i)
{
@@ -2445,38 +2432,40 @@ dfaanalyze (struct dfa *d, int searchflag)
{
case EMPTY:
/* The empty set is nullable. */
- *nullable++ = 1;
+ stk->nullable = true;
/* The firstpos and lastpos of the empty leaf are both empty. */
- *nfirstpos++ = *nlastpos++ = 0;
+ stk->nfirstpos = stk->nlastpos = 0;
+ stk++;
break;
case STAR:
case PLUS:
/* Every element in the firstpos of the argument is in the follow
of every element in the lastpos. */
- tmp.nelem = nfirstpos[-1];
+ tmp.nelem = stk[-1].nfirstpos;
tmp.elems = firstpos;
pos = lastpos;
- for (j = 0; j < nlastpos[-1]; ++j)
+ for (j = 0; j < stk[-1].nlastpos; ++j)
{
merge (&tmp, &d->follows[pos[j].index], &merged);
copy (&merged, &d->follows[pos[j].index]);
}
+ /* fallthrough */
case QMARK:
/* A QMARK or STAR node is automatically nullable. */
if (d->tokens[i] != PLUS)
- nullable[-1] = 1;
+ stk[-1].nullable = true;
break;
case CAT:
/* Every element in the firstpos of the second argument is in the
follow of every element in the lastpos of the first argument. */
- tmp.nelem = nfirstpos[-1];
+ tmp.nelem = stk[-1].nfirstpos;
tmp.elems = firstpos;
- pos = lastpos + nlastpos[-1];
- for (j = 0; j < nlastpos[-2]; ++j)
+ pos = lastpos + stk[-1].nlastpos;
+ for (j = 0; j < stk[-2].nlastpos; ++j)
{
merge (&tmp, &d->follows[pos[j].index], &merged);
copy (&merged, &d->follows[pos[j].index]);
@@ -2484,43 +2473,39 @@ dfaanalyze (struct dfa *d, int searchflag)
/* The firstpos of a CAT node is the firstpos of the first argument,
union that of the second argument if the first is nullable. */
- if (nullable[-2])
- nfirstpos[-2] += nfirstpos[-1];
+ if (stk[-2].nullable)
+ stk[-2].nfirstpos += stk[-1].nfirstpos;
else
- firstpos += nfirstpos[-1];
- --nfirstpos;
+ firstpos += stk[-1].nfirstpos;
/* The lastpos of a CAT node is the lastpos of the second argument,
union that of the first argument if the second is nullable. */
- if (nullable[-1])
- nlastpos[-2] += nlastpos[-1];
+ if (stk[-1].nullable)
+ stk[-2].nlastpos += stk[-1].nlastpos;
else
{
- pos = lastpos + nlastpos[-2];
- for (j = nlastpos[-1]; j-- > 0;)
+ pos = lastpos + stk[-2].nlastpos;
+ for (j = stk[-1].nlastpos; j-- > 0;)
pos[j] = lastpos[j];
- lastpos += nlastpos[-2];
- nlastpos[-2] = nlastpos[-1];
+ lastpos += stk[-2].nlastpos;
+ stk[-2].nlastpos = stk[-1].nlastpos;
}
- --nlastpos;
/* A CAT node is nullable if both arguments are nullable. */
- nullable[-2] = nullable[-1] && nullable[-2];
- --nullable;
+ stk[-2].nullable &= stk[-1].nullable;
+ stk--;
break;
case OR:
/* The firstpos is the union of the firstpos of each argument. */
- nfirstpos[-2] += nfirstpos[-1];
- --nfirstpos;
+ stk[-2].nfirstpos += stk[-1].nfirstpos;
/* The lastpos is the union of the lastpos of each argument. */
- nlastpos[-2] += nlastpos[-1];
- --nlastpos;
+ stk[-2].nlastpos += stk[-1].nlastpos;
/* An OR node is nullable if either argument is nullable. */
- nullable[-2] = nullable[-1] || nullable[-2];
- --nullable;
+ stk[-2].nullable |= stk[-1].nullable;
+ stk--;
break;
default:
@@ -2529,10 +2514,12 @@ dfaanalyze (struct dfa *d, int searchflag)
an "epsilon closure" effectively makes them nullable later.
Backreferences have to get a real position so we can detect
transitions on them later. But they are nullable. */
- *nullable++ = d->tokens[i] == BACKREF;
+ stk->nullable = d->tokens[i] == BACKREF;
/* This position is in its own firstpos and lastpos. */
- *nfirstpos++ = *nlastpos++ = 1;
+ stk->nfirstpos = stk->nlastpos = 1;
+ stk++;
+
--firstpos, --lastpos;
firstpos->index = lastpos->index = i;
firstpos->constraint = lastpos->constraint = NO_CONSTRAINT;
@@ -2546,15 +2533,16 @@ dfaanalyze (struct dfa *d, int searchflag)
fprintf (stderr, "node %zd:", i);
prtok (d->tokens[i]);
putc ('\n', stderr);
- fprintf (stderr, nullable[-1] ? " nullable: yes\n" : " nullable: no\n");
+ fprintf (stderr,
+ stk[-1].nullable ? " nullable: yes\n" : " nullable: no\n");
fprintf (stderr, " firstpos:");
- for (j = nfirstpos[-1]; j-- > 0;)
+ for (j = stk[-1].nfirstpos; j-- > 0;)
{
fprintf (stderr, " %zd:", firstpos[j].index);
prtok (d->tokens[firstpos[j].index]);
}
fprintf (stderr, "\n lastpos:");
- for (j = nlastpos[-1]; j-- > 0;)
+ for (j = stk[-1].nlastpos; j-- > 0;)
{
fprintf (stderr, " %zd:", lastpos[j].index);
prtok (d->tokens[lastpos[j].index]);
@@ -2584,33 +2572,27 @@ dfaanalyze (struct dfa *d, int searchflag)
putc ('\n', stderr);
#endif
copy (&d->follows[i], &merged);
- epsclosure (&merged, d);
+ epsclosure (&merged, d, visited);
copy (&merged, &d->follows[i]);
}
/* Get the epsilon closure of the firstpos of the regexp. The result will
be the set of positions of state 0. */
merged.nelem = 0;
- for (i = 0; i < nfirstpos[-1]; ++i)
+ for (i = 0; i < stk[-1].nfirstpos; ++i)
insert (firstpos[i], &merged);
- epsclosure (&merged, d);
+ epsclosure (&merged, d, visited);
/* Build the initial state. */
- d->salloc = 1;
- d->sindex = 0;
- MALLOC (d->states, d->salloc);
-
separate_contexts = state_separate_contexts (&merged);
state_index (d, &merged,
(separate_contexts & CTX_NEWLINE
? CTX_NEWLINE : separate_contexts ^ CTX_ANY));
- free (o_nullable);
- free (o_nfirst);
- free (o_firstpos);
- free (o_nlast);
- free (o_lastpos);
+ free (posalloc);
+ free (stkalloc);
free (merged.elems);
+ free (visited);
}
@@ -2647,16 +2629,16 @@ dfaanalyze (struct dfa *d, int searchflag)
void
dfastate (state_num s, struct dfa *d, state_num trans[])
{
- leaf_set *grps; /* As many as will ever be needed. */
- charclass *labels; /* Labels corresponding to the groups. */
+ leaf_set grps[NOTCHAR]; /* As many as will ever be needed. */
+ charclass labels[NOTCHAR]; /* Labels corresponding to the groups. */
size_t ngrps = 0; /* Number of groups actually used. */
position pos; /* Current position being considered. */
charclass matches; /* Set of matching characters. */
- int matchesf; /* True if matches is nonempty. */
+ charclass_word matchesf; /* Nonzero if matches is nonempty. */
charclass intersect; /* Intersection with some label set. */
- int intersectf; /* True if intersect is nonempty. */
+ charclass_word intersectf; /* Nonzero if intersect is nonempty. */
charclass leftovers; /* Stuff in the label that didn't match. */
- int leftoversf; /* True if leftovers is nonempty. */
+ charclass_word leftoversf; /* Nonzero if leftovers is nonempty. */
position_set follows; /* Union of the follows of some group. */
position_set tmp; /* Temporary space for merging sets. */
int possible_contexts; /* Contexts that this group can match. */
@@ -2664,12 +2646,9 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
state_num state; /* New state. */
state_num state_newline; /* New state on a newline transition. */
state_num state_letter; /* New state on a letter transition. */
- int next_isnt_1st_byte = 0; /* Flag if we can't add state0. */
+ bool next_isnt_1st_byte = false; /* We can't add state0. */
size_t i, j, k;
- MALLOC (grps, NOTCHAR);
- MALLOC (labels, NOTCHAR);
-
zeroset (matches);
for (i = 0; i < d->states[s].elems.nelem; ++i)
@@ -2679,21 +2658,24 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
setbit (d->tokens[pos.index], matches);
else if (d->tokens[pos.index] >= CSET)
copyset (d->charclasses[d->tokens[pos.index] - CSET], matches);
- else if (MBS_SUPPORT
- && (d->tokens[pos.index] == ANYCHAR
- || d->tokens[pos.index] == MBCSET))
- /* MB_CUR_MAX > 1 */
+ else
{
- /* ANYCHAR and MBCSET must match with a single character, so we
- must put it to d->states[s].mbps, which contains the positions
- which can match with a single character not a byte. */
- if (d->states[s].mbps.nelem == 0)
- alloc_position_set (&d->states[s].mbps, 1);
- insert (pos, &(d->states[s].mbps));
+ if (MBS_SUPPORT
+ && (d->tokens[pos.index] == MBCSET
+ || d->tokens[pos.index] == ANYCHAR))
+ {
+ /* MB_CUR_MAX > 1 */
+ if (d->tokens[pos.index] == MBCSET)
+ d->states[s].has_mbcset = true;
+ /* ANYCHAR and MBCSET must match with a single character, so we
+ must put it to d->states[s].mbps, which contains the positions
+ which can match with a single character not a byte. */
+ if (d->states[s].mbps.nelem == 0)
+ alloc_position_set (&d->states[s].mbps, 1);
+ insert (pos, &(d->states[s].mbps));
+ }
continue;
}
- else
- continue;
/* Some characters may need to be eliminated from matches because
they fail in the current context. */
@@ -2701,21 +2683,21 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
{
if (!SUCCEEDS_IN_CONTEXT (pos.constraint,
d->states[s].context, CTX_NEWLINE))
- for (j = 0; j < CHARCLASS_INTS; ++j)
+ for (j = 0; j < CHARCLASS_WORDS; ++j)
matches[j] &= ~newline[j];
if (!SUCCEEDS_IN_CONTEXT (pos.constraint,
d->states[s].context, CTX_LETTER))
- for (j = 0; j < CHARCLASS_INTS; ++j)
+ for (j = 0; j < CHARCLASS_WORDS; ++j)
matches[j] &= ~letters[j];
if (!SUCCEEDS_IN_CONTEXT (pos.constraint,
d->states[s].context, CTX_NONE))
- for (j = 0; j < CHARCLASS_INTS; ++j)
+ for (j = 0; j < CHARCLASS_WORDS; ++j)
matches[j] &= letters[j] | newline[j];
/* If there are no characters left, there's no point in going on. */
- for (j = 0; j < CHARCLASS_INTS && !matches[j]; ++j)
+ for (j = 0; j < CHARCLASS_WORDS && !matches[j]; ++j)
continue;
- if (j == CHARCLASS_INTS)
+ if (j == CHARCLASS_WORDS)
continue;
}
@@ -2731,20 +2713,20 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
/* Check if this group's label has a nonempty intersection with
matches. */
intersectf = 0;
- for (k = 0; k < CHARCLASS_INTS; ++k)
- (intersect[k] = matches[k] & labels[j][k]) ? (intersectf = 1) : 0;
+ for (k = 0; k < CHARCLASS_WORDS; ++k)
+ intersectf |= intersect[k] = matches[k] & labels[j][k];
if (!intersectf)
continue;
/* It does; now find the set differences both ways. */
leftoversf = matchesf = 0;
- for (k = 0; k < CHARCLASS_INTS; ++k)
+ for (k = 0; k < CHARCLASS_WORDS; ++k)
{
/* Even an optimizing compiler can't know this for sure. */
- int match = matches[k], label = labels[j][k];
+ charclass_word match = matches[k], label = labels[j][k];
- (leftovers[k] = ~match & label) ? (leftoversf = 1) : 0;
- (matches[k] = match & ~label) ? (matchesf = 1) : 0;
+ leftoversf |= leftovers[k] = ~match & label;
+ matchesf |= matches[k] = match & ~label;
}
/* If there were leftovers, create a new group labeled with them. */
@@ -2752,7 +2734,8 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
{
copyset (leftovers, labels[ngrps]);
copyset (intersect, labels[j]);
- MALLOC (grps[ngrps].elems, d->nleaves);
+ grps[ngrps].elems = xnmalloc (d->nleaves,
+ sizeof *grps[ngrps].elems);
memcpy (grps[ngrps].elems, grps[j].elems,
sizeof (grps[j].elems[0]) * grps[j].nelem);
grps[ngrps].nelem = grps[j].nelem;
@@ -2775,7 +2758,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
{
copyset (matches, labels[ngrps]);
zeroset (matches);
- MALLOC (grps[ngrps].elems, d->nleaves);
+ grps[ngrps].elems = xnmalloc (d->nleaves, sizeof *grps[ngrps].elems);
grps[ngrps].nelem = 1;
grps[ngrps].elems[0] = pos.index;
++ngrps;
@@ -2821,7 +2804,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
for (k = 0; k < d->follows[grps[i].elems[j]].nelem; ++k)
insert (d->follows[grps[i].elems[j]].elems[k], &follows);
- if (d->mb_cur_max > 1)
+ if (d->multibyte)
{
/* If a token in follows.elems is not 1st byte of a multibyte
character, or the states of follows must accept the bytes
@@ -2841,12 +2824,12 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
codepoint of <sb a>, it must not be <sb a> but 2nd byte of
<mb A>, so we cannot add state[0]. */
- next_isnt_1st_byte = 0;
+ next_isnt_1st_byte = false;
for (j = 0; j < follows.nelem; ++j)
{
if (!(d->multibyte_prop[follows.elems[j].index] & 1))
{
- next_isnt_1st_byte = 1;
+ next_isnt_1st_byte = true;
break;
}
}
@@ -2854,10 +2837,11 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
/* If we are building a searching matcher, throw in the positions
of state 0 as well. */
- if (d->searchflag
- && (!MBS_SUPPORT || (d->mb_cur_max == 1 || !next_isnt_1st_byte)))
- for (j = 0; j < d->states[0].elems.nelem; ++j)
- insert (d->states[0].elems.elems[j], &follows);
+ if (d->searchflag && (!d->multibyte || !next_isnt_1st_byte))
+ {
+ merge (&d->states[0].elems, &follows, &tmp);
+ copy (&tmp, &follows);
+ }
/* Find out if the new state will want any context information. */
possible_contexts = charclass_context (labels[i]);
@@ -2878,11 +2862,11 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
state_letter = state;
/* Set the transitions for each character in the current label. */
- for (j = 0; j < CHARCLASS_INTS; ++j)
- for (k = 0; k < INTBITS; ++k)
- if (labels[i][j] & 1U << k)
+ for (j = 0; j < CHARCLASS_WORDS; ++j)
+ for (k = 0; k < CHARCLASS_WORD_BITS; ++k)
+ if (labels[i][j] >> k & 1)
{
- int c = j * INTBITS + k;
+ int c = j * CHARCLASS_WORD_BITS + k;
if (c == eolbyte)
trans[c] = state_newline;
@@ -2897,8 +2881,31 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
free (grps[i].elems);
free (follows.elems);
free (tmp.elems);
- free (grps);
- free (labels);
+}
+
+/* Make sure D's state arrays are large enough to hold NEW_STATE. */
+static void
+realloc_trans_if_necessary (struct dfa *d, state_num new_state)
+{
+ state_num oldalloc = d->tralloc;
+ if (oldalloc <= new_state)
+ {
+ state_num **realtrans = d->trans ? d->trans - 1 : NULL;
+ size_t newalloc, newalloc1;
+ newalloc1 = new_state + 1;
+ realtrans = x2nrealloc (realtrans, &newalloc1, sizeof *realtrans);
+ realtrans[0] = NULL;
+ d->trans = realtrans + 1;
+ d->tralloc = newalloc = newalloc1 - 1;
+ d->fails = xnrealloc (d->fails, newalloc, sizeof *d->fails);
+ d->success = xnrealloc (d->success, newalloc, sizeof *d->success);
+ d->newlines = xnrealloc (d->newlines, newalloc, sizeof *d->newlines);
+ for (; oldalloc < newalloc; oldalloc++)
+ {
+ d->trans[oldalloc] = NULL;
+ d->fails[oldalloc] = NULL;
+ }
+ }
}
/* Some routines for manipulating a compiled dfa's transition tables.
@@ -2912,21 +2919,22 @@ static void
build_state (state_num s, struct dfa *d)
{
state_num *trans; /* The new transition table. */
- state_num i;
+ state_num i, maxstate;
/* Set an upper limit on the number of transition tables that will ever
exist at once. 1024 is arbitrary. The idea is that the frequently
used transition tables will be quickly rebuilt, whereas the ones that
- were only needed once or twice will be cleared away. */
+ were only needed once or twice will be cleared away. However, do
+ not clear the initial state, as it's always used. */
if (d->trcount >= 1024)
{
- for (i = 0; i < d->tralloc; ++i)
+ for (i = 1; i < d->tralloc; ++i)
{
free (d->trans[i]);
free (d->fails[i]);
d->trans[i] = d->fails[i] = NULL;
}
- d->trcount = 0;
+ d->trcount = 1;
}
++d->trcount;
@@ -2940,30 +2948,17 @@ build_state (state_num s, struct dfa *d)
if (ACCEPTS_IN_CONTEXT (d->states[s].context, CTX_NONE, s, *d))
d->success[s] |= CTX_NONE;
- MALLOC (trans, NOTCHAR);
+ trans = xmalloc (NOTCHAR * sizeof *trans);
dfastate (s, d, trans);
/* Now go through the new transition table, and make sure that the trans
and fail arrays are allocated large enough to hold a pointer for the
largest state mentioned in the table. */
+ maxstate = -1;
for (i = 0; i < NOTCHAR; ++i)
- if (trans[i] >= d->tralloc)
- {
- state_num oldalloc = d->tralloc;
-
- while (trans[i] >= d->tralloc)
- d->tralloc *= 2;
- REALLOC (d->realtrans, d->tralloc + 1);
- d->trans = d->realtrans + 1;
- REALLOC (d->fails, d->tralloc);
- REALLOC (d->success, d->tralloc);
- REALLOC (d->newlines, d->tralloc);
- while (oldalloc < d->tralloc)
- {
- d->trans[oldalloc] = NULL;
- d->fails[oldalloc++] = NULL;
- }
- }
+ if (maxstate < trans[i])
+ maxstate = trans[i];
+ realloc_trans_if_necessary (d, maxstate);
/* Keep the newline transition in a special place so we can use it as
a sentinel. */
@@ -2976,68 +2971,8 @@ build_state (state_num s, struct dfa *d)
d->trans[s] = trans;
}
-static void
-build_state_zero (struct dfa *d)
-{
- d->tralloc = 1;
- d->trcount = 0;
- CALLOC (d->realtrans, d->tralloc + 1);
- d->trans = d->realtrans + 1;
- CALLOC (d->fails, d->tralloc);
- MALLOC (d->success, d->tralloc);
- MALLOC (d->newlines, d->tralloc);
- build_state (0, d);
-}
-
/* Multibyte character handling sub-routines for dfaexec. */
-/* The initial state may encounter a byte which is not a single byte character
- nor the first byte of a multibyte character. But it is incorrect for the
- initial state to accept such a byte. For example, in Shift JIS the regular
- expression "\\" accepts the codepoint 0x5c, but should not accept the second
- byte of the codepoint 0x815c. Then the initial state must skip the bytes
- that are not a single byte character nor the first byte of a multibyte
- character. */
-#define SKIP_REMAINS_MB_IF_INITIAL_STATE(s, p) \
- if (s == 0) \
- { \
- while (inputwcs[p - buf_begin] == 0 \
- && mblen_buf[p - buf_begin] > 0 \
- && (unsigned char const *) p < buf_end) \
- ++p; \
- if ((char *) p >= end) \
- { \
- free (mblen_buf); \
- free (inputwcs); \
- *end = saved_end; \
- return NULL; \
- } \
- }
-
-static void
-realloc_trans_if_necessary (struct dfa *d, state_num new_state)
-{
- /* Make sure that the trans and fail arrays are allocated large enough
- to hold a pointer for the new state. */
- if (new_state >= d->tralloc)
- {
- state_num oldalloc = d->tralloc;
-
- while (new_state >= d->tralloc)
- d->tralloc *= 2;
- REALLOC (d->realtrans, d->tralloc + 1);
- d->trans = d->realtrans + 1;
- REALLOC (d->fails, d->tralloc);
- REALLOC (d->success, d->tralloc);
- REALLOC (d->newlines, d->tralloc);
- while (oldalloc < d->tralloc)
- {
- d->trans[oldalloc] = NULL;
- d->fails[oldalloc++] = NULL;
- }
- }
-}
-
/* Return values of transit_state_singlebyte, and
transit_state_consume_1char. */
typedef enum
@@ -3070,14 +3005,7 @@ transit_state_singlebyte (struct dfa *d, state_num s, unsigned char const *p,
works = 0;
}
else if (works < 0)
- {
- if (p == buf_end)
- {
- /* At the moment, it must not happen. */
- abort ();
- }
- works = 0;
- }
+ works = 0;
else if (d->fails[works])
{
works = d->fails[works][*p];
@@ -3092,18 +3020,13 @@ transit_state_singlebyte (struct dfa *d, state_num s, unsigned char const *p,
return rval;
}
-/* Match a "." against the current context. buf_begin[IDX] is the
- current position. Return the length of the match, in bytes.
- POS is the position of the ".". */
+/* Match a "." against the current context. Return the length of the
+ match, in bytes. POS is the position of the ".". */
static int
-match_anychar (struct dfa *d, state_num s, position pos, size_t idx)
+match_anychar (struct dfa *d, state_num s, position pos,
+ wint_t wc, size_t mbclen)
{
int context;
- wchar_t wc;
- int mbclen;
-
- wc = inputwcs[idx];
- mbclen = (mblen_buf[idx] == 0) ? 1 : mblen_buf[idx];
/* Check syntax bits. */
if (wc == (wchar_t) eolbyte)
@@ -3116,6 +3039,8 @@ match_anychar (struct dfa *d, state_num s, position pos, size_t idx)
if (syntax_bits & RE_DOT_NOT_NULL)
return 0;
}
+ else if (wc == WEOF)
+ return 0;
context = wchar_context (wc);
if (!SUCCEEDS_IN_CONTEXT (pos.constraint, d->states[s].context, context))
@@ -3125,16 +3050,14 @@ match_anychar (struct dfa *d, state_num s, position pos, size_t idx)
}
/* Match a bracket expression against the current context.
- buf_begin[IDX] is the current position.
Return the length of the match, in bytes.
POS is the position of the bracket expression. */
static int
-match_mb_charset (struct dfa *d, state_num s, position pos, size_t idx)
+match_mb_charset (struct dfa *d, state_num s, position pos,
+ char const *p, wint_t wc, size_t match_len)
{
size_t i;
- int match; /* Matching succeeded. */
- int match_len; /* Length of the character (or collating element)
- with which this operator matches. */
+ bool match; /* Matching succeeded. */
int op_len; /* Length of the operator. */
char buffer[128];
@@ -3142,9 +3065,6 @@ match_mb_charset (struct dfa *d, state_num s, position pos, size_t idx)
struct mb_char_classes *work_mbc;
int context;
- wchar_t wc; /* Current referring character. */
-
- wc = inputwcs[idx];
/* Check syntax bits. */
if (wc == (wchar_t) eolbyte)
@@ -3157,6 +3077,8 @@ match_mb_charset (struct dfa *d, state_num s, position pos, size_t idx)
if (syntax_bits & RE_DOT_NOT_NULL)
return 0;
}
+ else if (wc == WEOF)
+ return 0;
context = wchar_context (wc);
if (!SUCCEEDS_IN_CONTEXT (pos.constraint, d->states[s].context, context))
@@ -3165,7 +3087,6 @@ match_mb_charset (struct dfa *d, state_num s, position pos, size_t idx)
/* Assign the current referring operator to work_mbc. */
work_mbc = &(d->mbcsets[(d->multibyte_prop[pos.index]) >> 2]);
match = !work_mbc->invert;
- match_len = (mblen_buf[idx] == 0) ? 1 : mblen_buf[idx];
/* Match in range 0-255? */
if (wc < NOTCHAR && work_mbc->cset != -1
@@ -3179,14 +3100,14 @@ match_mb_charset (struct dfa *d, state_num s, position pos, size_t idx)
goto charset_matched;
}
- strncpy (buffer, (char const *) buf_begin + idx, match_len);
+ strncpy (buffer, p, match_len);
buffer[match_len] = '\0';
/* match with an equivalence class? */
for (i = 0; i < work_mbc->nequivs; i++)
{
op_len = strlen (work_mbc->equivs[i]);
- strncpy (buffer, (char const *) buf_begin + idx, op_len);
+ strncpy (buffer, p, op_len);
buffer[op_len] = '\0';
if (strcoll (work_mbc->equivs[i], buffer) == 0)
{
@@ -3199,7 +3120,7 @@ match_mb_charset (struct dfa *d, state_num s, position pos, size_t idx)
for (i = 0; i < work_mbc->ncoll_elems; i++)
{
op_len = strlen (work_mbc->coll_elems[i]);
- strncpy (buffer, (char const *) buf_begin + idx, op_len);
+ strncpy (buffer, p, op_len);
buffer[op_len] = '\0';
if (strcoll (work_mbc->coll_elems[i], buffer) == 0)
@@ -3212,7 +3133,7 @@ match_mb_charset (struct dfa *d, state_num s, position pos, size_t idx)
/* match with a range? */
for (i = 0; i < work_mbc->nranges; i++)
{
- if (work_mbc->range_sts[i] <= wc && wc <= work_mbc->range_ends[i])
+ if (work_mbc->ranges[i].beg <= wc && wc <= work_mbc->ranges[i].end)
goto charset_matched;
}
@@ -3233,27 +3154,25 @@ charset_matched:
array which corresponds to 'd->states[s].mbps.elem'; each element of the
array contains the number of bytes with which the element can match.
- 'idx' is the index from buf_begin, and it is the current position
- in the buffer.
-
The caller MUST free the array which this function return. */
static int *
-check_matching_with_multibyte_ops (struct dfa *d, state_num s, size_t idx)
+check_matching_with_multibyte_ops (struct dfa *d, state_num s,
+ char const *p, wint_t wc, size_t mbclen)
{
size_t i;
int *rarray;
- MALLOC (rarray, d->states[s].mbps.nelem);
+ rarray = d->mb_match_lens;
for (i = 0; i < d->states[s].mbps.nelem; ++i)
{
position pos = d->states[s].mbps.elems[i];
switch (d->tokens[pos.index])
{
case ANYCHAR:
- rarray[i] = match_anychar (d, s, pos, idx);
+ rarray[i] = match_anychar (d, s, pos, wc, mbclen);
break;
case MBCSET:
- rarray[i] = match_mb_charset (d, s, pos, idx);
+ rarray[i] = match_mb_charset (d, s, pos, p, wc, mbclen);
break;
default:
break; /* cannot happen. */
@@ -3273,48 +3192,39 @@ check_matching_with_multibyte_ops (struct dfa *d, state_num s, size_t idx)
static status_transit_state
transit_state_consume_1char (struct dfa *d, state_num s,
unsigned char const **pp,
- int *match_lens, int *mbclen, position_set * pps)
+ wint_t wc, size_t mbclen,
+ int *match_lens)
{
size_t i, j;
int k;
state_num s1, s2;
- int *work_mbls;
status_transit_state rs = TRANSIT_STATE_DONE;
- /* Calculate the length of the (single/multi byte) character
- to which p points. */
- *mbclen = (mblen_buf[*pp - buf_begin] == 0) ? 1 : mblen_buf[*pp - buf_begin];
+ if (! match_lens && d->states[s].mbps.nelem != 0)
+ match_lens = check_matching_with_multibyte_ops (d, s, (char const *) *pp,
+ wc, mbclen);
/* Calculate the state which can be reached from the state 's' by
- consuming '*mbclen' single bytes from the buffer. */
+ consuming 'mbclen' single bytes from the buffer. */
s1 = s;
- for (k = 0; k < *mbclen; k++)
+ for (k = 0; k < mbclen; k++)
{
s2 = s1;
rs = transit_state_singlebyte (d, s2, (*pp)++, &s1);
}
- /* Copy the positions contained by 's1' to the set 'pps'. */
- copy (&(d->states[s1].elems), pps);
-
- /* Check (input) match_lens, and initialize if it is NULL. */
- if (match_lens == NULL && d->states[s].mbps.nelem != 0)
- work_mbls = check_matching_with_multibyte_ops (d, s, *pp - buf_begin);
- else
- work_mbls = match_lens;
+ copy (&d->states[s1].elems, &d->mb_follows);
/* Add all of the positions which can be reached from 's' by consuming
a single character. */
for (i = 0; i < d->states[s].mbps.nelem; i++)
{
- if (work_mbls[i] == *mbclen)
+ if (match_lens[i] == mbclen)
for (j = 0; j < d->follows[d->states[s].mbps.elems[i].index].nelem;
j++)
- insert (d->follows[d->states[s].mbps.elems[i].index].elems[j], pps);
+ insert (d->follows[d->states[s].mbps.elems[i].index].elems[j],
+ &d->mb_follows);
}
- if (match_lens == NULL && work_mbls != NULL)
- free (work_mbls);
-
/* FIXME: this return value is always ignored. */
return rs;
}
@@ -3323,7 +3233,8 @@ transit_state_consume_1char (struct dfa *d, state_num s,
buffer. This function is for some operator which can match with a multi-
byte character or a collating element (which may be multi characters). */
static state_num
-transit_state (struct dfa *d, state_num s, unsigned char const **pp)
+transit_state (struct dfa *d, state_num s, unsigned char const **pp,
+ unsigned char const *end)
{
state_num s1;
int mbclen; /* The length of current input multibyte character. */
@@ -3331,16 +3242,17 @@ transit_state (struct dfa *d, state_num s, unsigned char const **pp)
size_t i, j;
int *match_lens = NULL;
size_t nelem = d->states[s].mbps.nelem; /* Just a alias. */
- position_set follows;
unsigned char const *p1 = *pp;
- wchar_t wc;
+ wint_t wc;
if (nelem > 0)
/* This state has (a) multibyte operator(s).
We check whether each of them can match or not. */
{
/* Note: caller must free the return value of this function. */
- match_lens = check_matching_with_multibyte_ops (d, s, *pp - buf_begin);
+ mbclen = mbs_to_wchar (&wc, (char const *) *pp, end - *pp, d);
+ match_lens = check_matching_with_multibyte_ops (d, s, (char const *) *pp,
+ wc, mbclen);
for (i = 0; i < nelem; i++)
/* Search the operator which match the longest string,
@@ -3362,26 +3274,25 @@ transit_state (struct dfa *d, state_num s, unsigned char const **pp)
if (rs == TRANSIT_STATE_DONE)
++*pp;
- free (match_lens);
return s1;
}
/* This state has some operators which can match a multibyte character. */
- alloc_position_set (&follows, d->nleaves);
+ d->mb_follows.nelem = 0;
/* 'maxlen' may be longer than the length of a character, because it may
not be a character but a (multi character) collating element.
We enumerate all of the positions which 's' can reach by consuming
'maxlen' bytes. */
- transit_state_consume_1char (d, s, pp, match_lens, &mbclen, &follows);
+ transit_state_consume_1char (d, s, pp, wc, mbclen, match_lens);
- wc = inputwcs[*pp - mbclen - buf_begin];
- s1 = state_index (d, &follows, wchar_context (wc));
+ s1 = state_index (d, &d->mb_follows, wchar_context (wc));
realloc_trans_if_necessary (d, s1);
while (*pp - p1 < maxlen)
{
- transit_state_consume_1char (d, s1, pp, NULL, &mbclen, &follows);
+ mbclen = mbs_to_wchar (&wc, (char const *) *pp, end - *pp, d);
+ transit_state_consume_1char (d, s1, pp, wc, mbclen, NULL);
for (i = 0; i < nelem; i++)
{
@@ -3389,51 +3300,15 @@ transit_state (struct dfa *d, state_num s, unsigned char const **pp)
for (j = 0;
j < d->follows[d->states[s1].mbps.elems[i].index].nelem; j++)
insert (d->follows[d->states[s1].mbps.elems[i].index].elems[j],
- &follows);
+ &d->mb_follows);
}
- wc = inputwcs[*pp - mbclen - buf_begin];
- s1 = state_index (d, &follows, wchar_context (wc));
+ s1 = state_index (d, &d->mb_follows, wchar_context (wc));
realloc_trans_if_necessary (d, s1);
}
- free (match_lens);
- free (follows.elems);
return s1;
}
-
-/* Initialize mblen_buf and inputwcs with data from the next line. */
-
-static void
-prepare_wc_buf (struct dfa *d, const char *begin, const char *end)
-{
-#if MBS_SUPPORT
- unsigned char eol = eolbyte;
- size_t i;
- size_t ilim = end - begin + 1;
-
- buf_begin = (unsigned char *) begin;
-
- for (i = 0; i < ilim; i++)
- {
- size_t nbytes = mbs_to_wchar (d, inputwcs + i, begin + i, ilim - i, &mbs);
- mblen_buf[i] = nbytes - (nbytes == 1);
- if (begin[i] == eol)
- break;
- while (--nbytes != 0)
- {
- i++;
- mblen_buf[i] = nbytes;
- inputwcs[i] = 0;
- }
- }
-
- buf_end = (unsigned char *) (begin + i);
- mblen_buf[i] = 0;
- inputwcs[i] = 0; /* sentinel */
-#endif /* MBS_SUPPORT */
-}
-
/* Search through a buffer looking for a match to the given struct dfa.
Find the first occurrence of a string matching the regexp in the
buffer, and the shortest possible version thereof. Return a pointer to
@@ -3451,39 +3326,67 @@ dfaexec (struct dfa *d, char const *begin, char *end,
int allow_nl, size_t *count, int *backref)
{
state_num s, s1; /* Current state. */
- unsigned char const *p; /* Current input character. */
+ unsigned char const *p, *mbp; /* Current input character. */
state_num **trans, *t; /* Copy of d->trans so it can be optimized
into a register. */
unsigned char eol = eolbyte; /* Likewise for eolbyte. */
unsigned char saved_end;
+ size_t nlcount = 0;
if (!d->tralloc)
- build_state_zero (d);
+ {
+ realloc_trans_if_necessary (d, 1);
+ build_state (0, d);
+ }
s = s1 = 0;
- p = (unsigned char const *) begin;
+ p = mbp = (unsigned char const *) begin;
trans = d->trans;
saved_end = *(unsigned char *) end;
*end = eol;
- if (d->mb_cur_max > 1)
+ if (d->multibyte)
{
- MALLOC (mblen_buf, end - begin + 2);
- MALLOC (inputwcs, end - begin + 2);
- memset (&mbs, 0, sizeof (mbstate_t));
- prepare_wc_buf (d, (const char *) p, end);
+ memset (&d->mbs, 0, sizeof d->mbs);
+ if (! d->mb_match_lens)
+ {
+ d->mb_match_lens = xnmalloc (d->nleaves, sizeof *d->mb_match_lens);
+ alloc_position_set (&d->mb_follows, d->nleaves);
+ }
}
for (;;)
{
- if (d->mb_cur_max > 1)
+ if (d->multibyte)
{
while ((t = trans[s]) != NULL)
{
- if (p > buf_end)
- break;
s1 = s;
- SKIP_REMAINS_MB_IF_INITIAL_STATE (s, p);
+
+ if (s == 0)
+ {
+ /* The initial state may encounter a byte which is not
+ a single byte character nor the first byte of a
+ multibyte character. But it is incorrect for the
+ initial state to accept such a byte. For example,
+ in Shift JIS the regular expression "\\" accepts
+ the codepoint 0x5c, but should not accept the second
+ byte of the codepoint 0x815c. Then the initial
+ state must skip the bytes that are not a single
+ byte character nor the first byte of a multibyte
+ character. */
+ wint_t wc;
+ while (mbp < p)
+ mbp += mbs_to_wchar (&wc, (char const *) mbp,
+ end - (char const *) mbp, d);
+ p = mbp;
+
+ if ((char *) p > end)
+ {
+ p = NULL;
+ goto done;
+ }
+ }
if (d->states[s].mbps.nelem == 0)
{
@@ -3495,18 +3398,16 @@ dfaexec (struct dfa *d, char const *begin, char *end,
better performance (up to 25% better on [a-z], for
example) and enables support for collating symbols and
equivalence classes. */
- if (backref)
+ if (d->states[s].has_mbcset && backref)
{
*backref = 1;
- free (mblen_buf);
- free (inputwcs);
- *end = saved_end;
- return (char *) p;
+ goto done;
}
/* Can match with a multibyte character (and multi character
collating element). Transition table might be updated. */
- s = transit_state (d, s, &p);
+ s = transit_state (d, s, &p, (unsigned char *) end);
+ mbp = p;
trans = d->trans;
}
}
@@ -3526,27 +3427,28 @@ dfaexec (struct dfa *d, char const *begin, char *end,
}
}
- if (s >= 0 && (char *) p <= end && d->fails[s])
+ if ((char *) p > end)
+ {
+ p = NULL;
+ goto done;
+ }
+
+ if (s >= 0 && d->fails[s])
{
if (d->success[s] & sbit[*p])
{
if (backref)
- *backref = (d->states[s].backref != 0);
- if (d->mb_cur_max > 1)
- {
- free (mblen_buf);
- free (inputwcs);
- }
- *end = saved_end;
- return (char *) p;
+ *backref = d->states[s].has_backref;
+ goto done;
}
s1 = s;
- if (d->mb_cur_max > 1)
+ if (d->multibyte)
{
/* Can match with a multibyte character (and multicharacter
collating element). Transition table might be updated. */
- s = transit_state (d, s, &p);
+ s = transit_state (d, s, &p, (unsigned char *) end);
+ mbp = p;
trans = d->trans;
}
else
@@ -3554,31 +3456,18 @@ dfaexec (struct dfa *d, char const *begin, char *end,
continue;
}
- /* If the previous character was a newline, count it. */
- if ((char *) p <= end && p[-1] == eol)
- {
- if (count)
- ++*count;
-
- if (d->mb_cur_max > 1)
- prepare_wc_buf (d, (const char *) p, end);
- }
-
- /* Check if we've run off the end of the buffer. */
- if ((char *) p > end)
+ /* If the previous character was a newline, count it, and skip
+ checking of multibyte character boundary until here. */
+ if (p[-1] == eol)
{
- if (d->mb_cur_max > 1)
- {
- free (mblen_buf);
- free (inputwcs);
- }
- *end = saved_end;
- return NULL;
+ nlcount++;
+ mbp = p;
}
if (s >= 0)
{
- build_state (s, d);
+ if (!d->trans[s])
+ build_state (s, d);
trans = d->trans;
continue;
}
@@ -3591,6 +3480,24 @@ dfaexec (struct dfa *d, char const *begin, char *end,
s = 0;
}
+
+ done:
+ if (count)
+ *count += nlcount;
+ *end = saved_end;
+ return (char *) p;
+}
+
+struct dfa *
+dfasuperset (struct dfa const *d)
+{
+ return d->superset;
+}
+
+bool
+dfaisfast (struct dfa const *d)
+{
+ return d->fast;
}
static void
@@ -3599,7 +3506,6 @@ free_mbdata (struct dfa *d)
size_t i;
free (d->multibyte_prop);
- d->multibyte_prop = NULL;
for (i = 0; i < d->nmbcsets; ++i)
{
@@ -3607,8 +3513,7 @@ free_mbdata (struct dfa *d)
struct mb_char_classes *p = &(d->mbcsets[i]);
free (p->chars);
free (p->ch_classes);
- free (p->range_sts);
- free (p->range_ends);
+ free (p->ranges);
for (j = 0; j < p->nequivs; ++j)
free (p->equivs[j]);
@@ -3620,8 +3525,9 @@ free_mbdata (struct dfa *d)
}
free (d->mbcsets);
- d->mbcsets = NULL;
- d->nmbcsets = 0;
+ free (d->mb_follows.elems);
+ free (d->mb_match_lens);
+ d->mb_match_lens = NULL;
}
/* Initialize the components of a dfa that the other routines don't
@@ -3630,28 +3536,15 @@ void
dfainit (struct dfa *d)
{
memset (d, 0, sizeof *d);
-
- d->calloc = 1;
- MALLOC (d->charclasses, d->calloc);
-
- d->talloc = 1;
- MALLOC (d->tokens, d->talloc);
-
- d->mb_cur_max = MB_CUR_MAX;
-
- if (d->mb_cur_max > 1)
- {
- d->nmultibyte_prop = 1;
- MALLOC (d->multibyte_prop, d->nmultibyte_prop);
- d->mbcsets_alloc = 1;
- MALLOC (d->mbcsets, d->mbcsets_alloc);
- }
+ d->multibyte = MB_CUR_MAX > 1;
+ d->fast = !d->multibyte;
}
static void
dfaoptimize (struct dfa *d)
{
size_t i;
+ bool have_backref = false;
if (!MBS_SUPPORT || !using_utf8 ())
return;
@@ -3663,6 +3556,9 @@ dfaoptimize (struct dfa *d)
case ANYCHAR:
/* Lowered. */
abort ();
+ case BACKREF:
+ have_backref = true;
+ break;
case MBCSET:
/* Requires multi-byte algorithm. */
return;
@@ -3671,8 +3567,95 @@ dfaoptimize (struct dfa *d)
}
}
+ if (!have_backref && d->superset)
+ {
+ /* The superset DFA is not likely to be much faster, so remove it. */
+ dfafree (d->superset);
+ free (d->superset);
+ d->superset = NULL;
+ }
+
free_mbdata (d);
- d->mb_cur_max = 1;
+ d->multibyte = false;
+}
+
+static void
+dfassbuild (struct dfa *d)
+{
+ size_t i, j;
+ charclass ccl;
+ bool have_achar = false;
+ bool have_nchar = false;
+ struct dfa *sup = dfaalloc ();
+
+ *sup = *d;
+ sup->multibyte = false;
+ sup->multibyte_prop = NULL;
+ sup->mbcsets = NULL;
+ sup->superset = NULL;
+ sup->states = NULL;
+ sup->sindex = 0;
+ sup->follows = NULL;
+ sup->tralloc = 0;
+ sup->trans = NULL;
+ sup->fails = NULL;
+ sup->success = NULL;
+ sup->newlines = NULL;
+ sup->musts = NULL;
+
+ sup->charclasses = xnmalloc (sup->calloc, sizeof *sup->charclasses);
+ memcpy (sup->charclasses, d->charclasses,
+ d->cindex * sizeof *sup->charclasses);
+
+ sup->tokens = xnmalloc (d->tindex, 2 * sizeof *sup->tokens);
+ sup->talloc = d->tindex * 2;
+
+ for (i = j = 0; i < d->tindex; i++)
+ {
+ switch (d->tokens[i])
+ {
+ case ANYCHAR:
+ case MBCSET:
+ case BACKREF:
+ zeroset (ccl);
+ notset (ccl);
+ sup->tokens[j++] = CSET + dfa_charclass_index (sup, ccl);
+ sup->tokens[j++] = STAR;
+ if (d->tokens[i + 1] == QMARK || d->tokens[i + 1] == STAR
+ || d->tokens[i + 1] == PLUS)
+ i++;
+ have_achar = true;
+ break;
+ case BEGWORD:
+ case ENDWORD:
+ case LIMWORD:
+ case NOTLIMWORD:
+ if (d->multibyte)
+ {
+ /* These constraints aren't supported in a multibyte locale.
+ Ignore them in the superset DFA, and treat them as
+ backreferences in the main DFA. */
+ sup->tokens[j++] = EMPTY;
+ d->tokens[i] = BACKREF;
+ break;
+ }
+ default:
+ sup->tokens[j++] = d->tokens[i];
+ if ((0 <= d->tokens[i] && d->tokens[i] < NOTCHAR)
+ || d->tokens[i] >= CSET)
+ have_nchar = true;
+ break;
+ }
+ }
+ sup->tindex = j;
+
+ if (have_nchar && (have_achar || d->multibyte))
+ d->superset = sup;
+ else
+ {
+ dfafree (sup);
+ free (sup);
+ }
}
/* Parse and analyze a single string of the given length. */
@@ -3683,8 +3666,14 @@ dfacomp (char const *s, size_t len, struct dfa *d, int searchflag)
dfambcache (d);
dfaparse (s, len, d);
dfamust (d);
+ dfassbuild (d);
dfaoptimize (d);
dfaanalyze (d, searchflag);
+ if (d->superset)
+ {
+ d->fast = true;
+ dfaanalyze (d->superset, searchflag);
+ }
}
/* Free the storage held by the components of a dfa. */
@@ -3697,34 +3686,46 @@ dfafree (struct dfa *d)
free (d->charclasses);
free (d->tokens);
- if (d->mb_cur_max > 1)
+ if (d->multibyte)
free_mbdata (d);
for (i = 0; i < d->sindex; ++i)
{
free (d->states[i].elems.elems);
- if (MBS_SUPPORT)
- free (d->states[i].mbps.elems);
+ free (d->states[i].mbps.elems);
}
free (d->states);
- for (i = 0; i < d->tindex; ++i)
- free (d->follows[i].elems);
- free (d->follows);
- for (i = 0; i < d->tralloc; ++i)
+
+ if (d->follows)
{
- free (d->trans[i]);
- free (d->fails[i]);
+ for (i = 0; i < d->tindex; ++i)
+ free (d->follows[i].elems);
+ free (d->follows);
}
- free (d->realtrans);
- free (d->fails);
- free (d->newlines);
- free (d->success);
+
+ if (d->trans)
+ {
+ for (i = 0; i < d->tralloc; ++i)
+ {
+ free (d->trans[i]);
+ free (d->fails[i]);
+ }
+
+ free (d->trans - 1);
+ free (d->fails);
+ free (d->newlines);
+ free (d->success);
+ }
+
for (dm = d->musts; dm; dm = ndm)
{
ndm = dm->next;
free (dm->must);
free (dm);
}
+
+ if (d->superset)
+ dfafree (d->superset);
}
/* Having found the postfix representation of the regular expression,
@@ -3772,13 +3773,13 @@ dfafree (struct dfa *d)
CAT (p->is==ZERO)? (q->is==ZERO)? (p->is!=ZERO && p->in plus
p->left : q->right : q->is!=ZERO) ? q->in plus
- p->is##q->left p->right##q->is p->is##q->is : p->right##q->left
+ p->is##q->left p->right##q->is p->is##q->is : p->right##q->left
ZERO
- OR longest common longest common (do p->is and substrings common to
- leading trailing q->is have same p->in and q->in
- (sub)sequence (sub)sequence length and
- of p->left of p->right content) ?
+ OR longest common longest common (do p->is and substrings common
+ leading trailing to q->is have same p->in and
+ (sub)sequence (sub)sequence q->in length and content) ?
+ of p->left of p->right
and q->left and q->right p->is : NULL
If there's anything else we recognize in the tree, all four sequences get set
@@ -3815,64 +3816,32 @@ static char *
icatalloc (char *old, char const *new)
{
char *result;
- size_t oldsize = old == NULL ? 0 : strlen (old);
- size_t newsize = new == NULL ? 0 : strlen (new);
+ size_t oldsize;
+ size_t newsize = strlen (new);
if (newsize == 0)
return old;
+ oldsize = strlen (old);
result = xrealloc (old, oldsize + newsize + 1);
memcpy (result + oldsize, new, newsize + 1);
return result;
}
-static char *
-icpyalloc (char const *string)
-{
- return icatalloc (NULL, string);
-}
-
-static char *_GL_ATTRIBUTE_PURE
-istrstr (char const *lookin, char const *lookfor)
-{
- char const *cp;
- size_t len;
-
- len = strlen (lookfor);
- for (cp = lookin; *cp != '\0'; ++cp)
- if (strncmp (cp, lookfor, len) == 0)
- return (char *) cp;
- return NULL;
-}
-
static void
freelist (char **cpp)
{
- size_t i;
-
- if (cpp == NULL)
- return;
- for (i = 0; cpp[i] != NULL; ++i)
- {
- free (cpp[i]);
- cpp[i] = NULL;
- }
+ while (*cpp)
+ free (*cpp++);
}
static char **
enlist (char **cpp, char *new, size_t len)
{
size_t i, j;
-
- if (cpp == NULL)
- return NULL;
- if ((new = icpyalloc (new)) == NULL)
- {
- freelist (cpp);
- return NULL;
- }
+ new = memcpy (xmalloc (len + 1), new, len);
new[len] = '\0';
/* Is there already something in the list that's new (or longer)? */
for (i = 0; cpp[i] != NULL; ++i)
- if (istrstr (cpp[i], new) != NULL)
+ if (strstr (cpp[i], new) != NULL)
{
free (new);
return cpp;
@@ -3880,7 +3849,7 @@ enlist (char **cpp, char *new, size_t len)
/* Eliminate any obsoleted strings. */
j = 0;
while (cpp[j] != NULL)
- if (istrstr (new, cpp[j]) == NULL)
+ if (strstr (new, cpp[j]) == NULL)
++j;
else
{
@@ -3891,53 +3860,35 @@ enlist (char **cpp, char *new, size_t len)
cpp[i] = NULL;
}
/* Add the new string. */
- REALLOC (cpp, i + 2);
+ cpp = xnrealloc (cpp, i + 2, sizeof *cpp);
cpp[i] = new;
cpp[i + 1] = NULL;
return cpp;
}
/* Given pointers to two strings, return a pointer to an allocated
- list of their distinct common substrings. Return NULL if something
- seems wild. */
+ list of their distinct common substrings. */
static char **
comsubs (char *left, char const *right)
{
- char **cpp;
+ char **cpp = xzalloc (sizeof *cpp);
char *lcp;
- char *rcp;
- size_t i, len;
-
- if (left == NULL || right == NULL)
- return NULL;
- cpp = malloc (sizeof *cpp);
- if (cpp == NULL)
- return NULL;
- cpp[0] = NULL;
+
for (lcp = left; *lcp != '\0'; ++lcp)
{
- len = 0;
- rcp = strchr (right, *lcp);
+ size_t len = 0;
+ char *rcp = strchr (right, *lcp);
while (rcp != NULL)
{
+ size_t i;
for (i = 1; lcp[i] != '\0' && lcp[i] == rcp[i]; ++i)
continue;
if (i > len)
len = i;
rcp = strchr (rcp + 1, *lcp);
}
- if (len == 0)
- continue;
- {
- char **p = enlist (cpp, lcp, len);
- if (p == NULL)
- {
- freelist (cpp);
- cpp = NULL;
- break;
- }
- cpp = p;
- }
+ if (len != 0)
+ cpp = enlist (cpp, lcp, len);
}
return cpp;
}
@@ -3945,16 +3896,8 @@ comsubs (char *left, char const *right)
static char **
addlists (char **old, char **new)
{
- size_t i;
-
- if (old == NULL || new == NULL)
- return NULL;
- for (i = 0; new[i] != NULL; ++i)
- {
- old = enlist (old, new[i], strlen (new[i]));
- if (old == NULL)
- break;
- }
+ for (; *new; new++)
+ old = enlist (old, *new, strlen (*new));
return old;
}
@@ -3963,125 +3906,134 @@ addlists (char **old, char **new)
static char **
inboth (char **left, char **right)
{
- char **both;
- char **temp;
+ char **both = xzalloc (sizeof *both);
size_t lnum, rnum;
- if (left == NULL || right == NULL)
- return NULL;
- both = malloc (sizeof *both);
- if (both == NULL)
- return NULL;
- both[0] = NULL;
for (lnum = 0; left[lnum] != NULL; ++lnum)
{
for (rnum = 0; right[rnum] != NULL; ++rnum)
{
- temp = comsubs (left[lnum], right[rnum]);
- if (temp == NULL)
- {
- freelist (both);
- return NULL;
- }
+ char **temp = comsubs (left[lnum], right[rnum]);
both = addlists (both, temp);
freelist (temp);
free (temp);
- if (both == NULL)
- return NULL;
}
}
return both;
}
-typedef struct
+typedef struct must must;
+
+struct must
{
char **in;
char *left;
char *right;
char *is;
-} must;
+ bool begline;
+ bool endline;
+ must *prev;
+};
+
+static must *
+allocmust (must *mp)
+{
+ must *new_mp = xmalloc (sizeof *new_mp);
+ new_mp->in = xzalloc (sizeof *new_mp->in);
+ new_mp->left = xzalloc (2);
+ new_mp->right = xzalloc (2);
+ new_mp->is = xzalloc (2);
+ new_mp->begline = false;
+ new_mp->endline = false;
+ new_mp->prev = mp;
+ return new_mp;
+}
static void
-resetmust (must * mp)
+resetmust (must *mp)
{
+ freelist (mp->in);
+ mp->in[0] = NULL;
mp->left[0] = mp->right[0] = mp->is[0] = '\0';
+ mp->begline = false;
+ mp->endline = false;
+}
+
+static void
+freemust (must *mp)
+{
freelist (mp->in);
+ free (mp->in);
+ free (mp->left);
+ free (mp->right);
+ free (mp->is);
+ free (mp);
}
static void
dfamust (struct dfa *d)
{
- must *musts;
- must *mp;
- char *result;
+ must *mp = NULL;
+ char const *result = "";
size_t ri;
size_t i;
- int exact;
- token t;
- static must must0;
+ bool exact = false;
+ bool begline = false;
+ bool endline = false;
struct dfamust *dm;
- static char empty_string[] = "";
-
- result = empty_string;
- exact = 0;
- MALLOC (musts, d->tindex + 1);
- mp = musts;
- for (i = 0; i <= d->tindex; ++i)
- mp[i] = must0;
- for (i = 0; i <= d->tindex; ++i)
- {
- mp[i].in = xmalloc (sizeof *mp[i].in);
- mp[i].left = xmalloc (2);
- mp[i].right = xmalloc (2);
- mp[i].is = xmalloc (2);
- mp[i].left[0] = mp[i].right[0] = mp[i].is[0] = '\0';
- mp[i].in[0] = NULL;
- }
-#ifdef DEBUG
- fprintf (stderr, "dfamust:\n");
- for (i = 0; i < d->tindex; ++i)
- {
- fprintf (stderr, " %zd:", i);
- prtok (d->tokens[i]);
- }
- putc ('\n', stderr);
-#endif
+
for (ri = 0; ri < d->tindex; ++ri)
{
- switch (t = d->tokens[ri])
+ token t = d->tokens[ri];
+ switch (t)
{
+ case BEGLINE:
+ mp = allocmust (mp);
+ mp->begline = true;
+ break;
+ case ENDLINE:
+ mp = allocmust (mp);
+ mp->endline = true;
+ break;
case LPAREN:
case RPAREN:
assert (!"neither LPAREN nor RPAREN may appear here");
+
case EMPTY:
- case BEGLINE:
- case ENDLINE:
case BEGWORD:
case ENDWORD:
case LIMWORD:
case NOTLIMWORD:
case BACKREF:
- resetmust (mp);
+ case ANYCHAR:
+ case MBCSET:
+ mp = allocmust (mp);
break;
+
case STAR:
case QMARK:
- assert (musts < mp);
- --mp;
resetmust (mp);
break;
+
case OR:
- assert (&musts[2] <= mp);
{
char **new;
- must *lmp;
- must *rmp;
+ must *rmp = mp;
+ must *lmp = mp = mp->prev;
size_t j, ln, rn, n;
- rmp = --mp;
- lmp = --mp;
/* Guaranteed to be. Unlikely, but ... */
- if (!STREQ (lmp->is, rmp->is))
- lmp->is[0] = '\0';
+ if (STREQ (lmp->is, rmp->is))
+ {
+ lmp->begline &= rmp->begline;
+ lmp->endline &= rmp->endline;
+ }
+ else
+ {
+ lmp->is[0] = '\0';
+ lmp->begline = false;
+ lmp->endline = false;
+ }
/* Left side--easy */
i = 0;
while (lmp->left[i] != '\0' && lmp->left[i] == rmp->left[i])
@@ -4100,133 +4052,126 @@ dfamust (struct dfa *d)
lmp->right[j] = lmp->right[(ln - i) + j];
lmp->right[j] = '\0';
new = inboth (lmp->in, rmp->in);
- if (new == NULL)
- goto done;
freelist (lmp->in);
free (lmp->in);
lmp->in = new;
+ freemust (rmp);
}
break;
+
case PLUS:
- assert (musts < mp);
- --mp;
mp->is[0] = '\0';
break;
+
case END:
- assert (mp == &musts[1]);
- for (i = 0; musts[0].in[i] != NULL; ++i)
- if (strlen (musts[0].in[i]) > strlen (result))
- result = musts[0].in[i];
- if (STREQ (result, musts[0].is))
- exact = 1;
+ assert (!mp->prev);
+ for (i = 0; mp->in[i] != NULL; ++i)
+ if (strlen (mp->in[i]) > strlen (result))
+ result = mp->in[i];
+ if (STREQ (result, mp->is))
+ {
+ exact = true;
+ begline = mp->begline;
+ endline = mp->endline;
+ }
goto done;
+
case CAT:
- assert (&musts[2] <= mp);
{
- must *lmp;
- must *rmp;
+ must *rmp = mp;
+ must *lmp = mp = mp->prev;
- rmp = --mp;
- lmp = --mp;
/* In. Everything in left, plus everything in
right, plus concatenation of
left's right and right's left. */
lmp->in = addlists (lmp->in, rmp->in);
- if (lmp->in == NULL)
- goto done;
if (lmp->right[0] != '\0' && rmp->left[0] != '\0')
{
- char *tp;
-
- tp = icpyalloc (lmp->right);
- tp = icatalloc (tp, rmp->left);
- lmp->in = enlist (lmp->in, tp, strlen (tp));
+ size_t lrlen = strlen (lmp->right);
+ size_t rllen = strlen (rmp->left);
+ char *tp = xmalloc (lrlen + rllen);
+ memcpy (tp, lmp->right, lrlen);
+ memcpy (tp + lrlen, rmp->left, rllen);
+ lmp->in = enlist (lmp->in, tp, lrlen + rllen);
free (tp);
- if (lmp->in == NULL)
- goto done;
}
/* Left-hand */
if (lmp->is[0] != '\0')
- {
- lmp->left = icatalloc (lmp->left, rmp->left);
- if (lmp->left == NULL)
- goto done;
- }
+ lmp->left = icatalloc (lmp->left, rmp->left);
/* Right-hand */
if (rmp->is[0] == '\0')
lmp->right[0] = '\0';
lmp->right = icatalloc (lmp->right, rmp->right);
- if (lmp->right == NULL)
- goto done;
/* Guaranteed to be */
- if (lmp->is[0] != '\0' && rmp->is[0] != '\0')
+ if ((lmp->is[0] != '\0' || lmp->begline)
+ && (rmp->is[0] != '\0' || rmp->endline))
{
lmp->is = icatalloc (lmp->is, rmp->is);
- if (lmp->is == NULL)
- goto done;
+ lmp->endline = rmp->endline;
}
else
- lmp->is[0] = '\0';
+ {
+ lmp->is[0] = '\0';
+ lmp->begline = false;
+ lmp->endline = false;
+ }
+ freemust (rmp);
}
break;
+
+ case '\0':
+ /* Not on *my* shift. */
+ goto done;
+
default:
- if (t < END)
- {
- assert (!"oops! t >= END");
- }
- else if (t == '\0')
- {
- /* not on *my* shift */
- goto done;
- }
- else if (t >= CSET || !MBS_SUPPORT || t == ANYCHAR || t == MBCSET)
+ mp = allocmust (mp);
+ if (CSET <= t)
{
- /* easy enough */
- resetmust (mp);
- }
- else
- {
- /* plain character */
- resetmust (mp);
- mp->is[0] = mp->left[0] = mp->right[0] = t;
- mp->is[1] = mp->left[1] = mp->right[1] = '\0';
- mp->in = enlist (mp->in, mp->is, (size_t) 1);
- if (mp->in == NULL)
- goto done;
+ /* If T is a singleton, or if case-folding in a unibyte
+ locale and T's members all case-fold to the same char,
+ convert T to one of its members. Otherwise, do
+ nothing further with T. */
+ charclass *ccl = &d->charclasses[t - CSET];
+ int j;
+ for (j = 0; j < NOTCHAR; j++)
+ if (tstbit (j, *ccl))
+ break;
+ if (! (j < NOTCHAR))
+ break;
+ t = j;
+ while (++j < NOTCHAR)
+ if (tstbit (j, *ccl)
+ && ! (case_fold && !d->multibyte
+ && toupper (j) == toupper (t)))
+ break;
+ if (j < NOTCHAR)
+ break;
}
+ mp->is[0] = mp->left[0] = mp->right[0]
+ = case_fold && !d->multibyte ? toupper (t) : t;
+ mp->is[1] = mp->left[1] = mp->right[1] = '\0';
+ mp->in = enlist (mp->in, mp->is, 1);
break;
}
-#ifdef DEBUG
- fprintf (stderr, " node: %zd:", ri);
- prtok (d->tokens[ri]);
- fprintf (stderr, "\n in:");
- for (i = 0; mp->in[i]; ++i)
- fprintf (stderr, " \"%s\"", mp->in[i]);
- fprintf (stderr, "\n is: \"%s\"\n", mp->is);
- fprintf (stderr, " left: \"%s\"\n", mp->left);
- fprintf (stderr, " right: \"%s\"\n", mp->right);
-#endif
- ++mp;
}
done:
- if (strlen (result))
+ if (*result)
{
- MALLOC (dm, 1);
+ dm = xmalloc (sizeof *dm);
dm->exact = exact;
- dm->must = xmemdup (result, strlen (result) + 1);
+ dm->begline = begline;
+ dm->endline = endline;
+ dm->must = xstrdup (result);
dm->next = d->musts;
d->musts = dm;
}
- mp = musts;
- for (i = 0; i <= d->tindex; ++i)
+
+ while (mp)
{
- freelist (mp[i].in);
- free (mp[i].in);
- free (mp[i].left);
- free (mp[i].right);
- free (mp[i].is);
+ must *prev = mp->prev;
+ freemust (mp);
+ mp = prev;
}
- free (mp);
}
struct dfa *
diff --git a/dfa.h b/dfa.h
index 24fbcbe7..4eb42968 100644
--- a/dfa.h
+++ b/dfa.h
@@ -19,13 +19,20 @@
/* Written June, 1988 by Mike Haertel */
#include <regex.h>
+#ifdef HAVE_STDBOOL_H
+#include <stdbool.h>
+#else
+#include "missing_d/gawkbool.h"
+#endif /* HAVE_STDBOOL_H */
#include <stddef.h>
/* Element of a list of strings, at least one of which is known to
appear in any R.E. matching the DFA. */
struct dfamust
{
- int exact;
+ bool exact;
+ bool begline;
+ bool endline;
char *must;
struct dfamust *next;
};
@@ -68,6 +75,15 @@ extern void dfacomp (char const *, size_t, struct dfa *, int);
extern char *dfaexec (struct dfa *d, char const *begin, char *end,
int newline, size_t *count, int *backref);
+/* Return a superset for D. The superset matches everything that D
+ matches, along with some other strings (though the latter should be
+ rare, for efficiency reasons). Return a null pointer if no useful
+ superset is available. */
+extern struct dfa *dfasuperset (struct dfa const *d) _GL_ATTRIBUTE_PURE;
+
+/* The DFA is likely to be fast. */
+extern bool dfaisfast (struct dfa const *) _GL_ATTRIBUTE_PURE;
+
/* Free the storage held by the components of a struct dfa. */
extern void dfafree (struct dfa *);
@@ -101,11 +117,3 @@ extern void dfawarn (const char *);
extern _Noreturn void dfaerror (const char *);
extern int using_utf8 (void);
-
-/* Maximum number of characters that can be the case-folded
- counterparts of a single character, not counting the character
- itself. This is 1 for towupper, 1 for towlower, and 1 for each
- entry in LONESOME_LOWER; see dfa.c. */
-enum { CASE_FOLDED_BUFSIZE = 1 + 1 + 19 };
-
-extern int case_folded_counterparts (wchar_t, wchar_t[CASE_FOLDED_BUFSIZE]);
diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt
new file mode 100644
index 00000000..e12f5de0
--- /dev/null
+++ b/doc/CMakeLists.txt
@@ -0,0 +1,95 @@
+#
+# doc/CMakeLists.txt --- CMake input file for gawk
+#
+# Copyright (C) 2013
+# the Free Software Foundation, Inc.
+#
+# This file is part of GAWK, the GNU implementation of the
+# AWK Programming Language.
+#
+# GAWK is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# GAWK is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+#
+
+## process this file with CMake to produce Makefile
+
+MACRO(DocDependency outfile)
+ add_dependencies(doc ${outfile})
+ add_custom_target(
+ ${outfile}
+ DEPENDS ${ARGN}
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+ COMMAND ${CMAKE_SOURCE_DIR}/cmake/docmaker ${outfile} ${ARGN}
+ )
+ENDMACRO(DocDependency)
+
+find_program(TEXI2DVI_CONVERTER texi2dvi)
+if (TEXI2DVI_CONVERTER)
+ add_custom_target(doc)
+ DocDependency(gawk.texi gawktexi.in rflashlight.eps api-figure1.fig api-figure2.fig api-figure3.fig general-program.fig process-flow.fig)
+ DocDependency(rflashlight.eps)
+ DocDependency(api-figure1.fig)
+ DocDependency(api-figure2.fig)
+ DocDependency(api-figure3.fig)
+ DocDependency(general-program.fig)
+ DocDependency(process-flow.fig)
+ DocDependency(gawk.dvi gawk.texi)
+ DocDependency(gawk.info gawk.texi)
+ DocDependency(gawkinet.dvi gawkinet.texi)
+ DocDependency(gawkinet.info gawkinet.texi)
+ DocDependency(gawkinet.texi statist.eps)
+ DocDependency(gawk.1.ps gawk.1)
+ DocDependency(igawk.1.ps igawk.1)
+ find_program(DVIPS_CONVERTER dvips)
+ if (DVIPS_CONVERTER)
+ DocDependency(gawk.ps gawk.dvi)
+ DocDependency(gawkinet.ps gawkinet.dvi)
+ find_program(PS2PDF_CONVERTER ps2pdf)
+ if (PS2PDF_CONVERTER)
+ DocDependency(gawk.1.pdf gawk.1.ps)
+ DocDependency(igawk.1.pdf igawk.1.ps)
+ DocDependency(gawk.pdf gawk.ps)
+ DocDependency(gawkinet.pdf gawkinet.ps)
+ install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/gawk.1.pdf DESTINATION doc)
+ install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/igawk.1.pdf DESTINATION doc)
+ install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/gawk.info DESTINATION doc)
+ install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/gawk.pdf DESTINATION doc)
+ install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/gawkinet.info DESTINATION doc)
+ install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/gawkinet.pdf DESTINATION doc)
+
+ set(CARDSRC macros cardfonts colors awkcard.tr)
+ set(CARDSRC_N macros cardfonts no.colors awkcard.tr)
+ set(CARDFILES ${CARDSRC} ad.block awkcard.in setter.outline)
+ DocDependency(awkcard.tr awkcard.in)
+ DocDependency(awkcard.nc ${CARDFILES})
+ DocDependency(awkcard.ps ${CARDFILES})
+ DocDependency(awkcard.pdf awkcard.ps)
+ install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/awkcard.pdf DESTINATION doc)
+
+ else()
+ message(WARNING "Found no ps2pdf tool; no doc will be generated")
+ install(CODE "MESSAGE(\"doc generated only in .ps files\")")
+ endif()
+ else()
+ message(WARNING "Found no dvips tool; no doc will be generated")
+ install(CODE "MESSAGE(\"doc generated only in .dvi files and man pages in .ps files\")")
+ endif()
+else()
+ message(WARNING "Found no texi2dvi tool; no doc will be generated")
+ add_custom_command(
+ TARGET doc
+ COMMAND echo no doc generated because of missing texi2dvi
+ )
+endif()
+
diff --git a/doc/ChangeLog b/doc/ChangeLog
index d4f6881b..2522ad87 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,3 +1,297 @@
+2014-09-15 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Document that identifiers must use the English
+ letters.
+
+2014-09-14 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: More edits during review, minor addition.
+
+2014-09-08 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Remove text that won't get used.
+
+2014-09-07 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Minor cleanups.
+
+2014-09-05 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Document builtin functions in FUNCTAB and in
+ PROCINFO["identifiers"].
+ * gawk.1: Ditto.
+
+ Unrelated:
+
+ * gawktexi.in: More stuff from reviewer comments.
+
+2014-09-04 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Document that indirect calls now work on built-in
+ and extension functions.
+ * gawk.1: Same.
+
+2014-09-03 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Further fixes from reviews and bug reports.
+
+2014-09-02 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Corrections to walkthrough in debugger chapter.
+ Thanks to David Ward <dlward134@gmail.com> for the problem report.
+
+2014-09-01 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Add index entry for @ - @load, @include,
+ and indirect function calls. Thanks to "Kenny McKormack" in
+ comp.lang.awk.
+
+2014-08-29 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Continuing on reviewer comments, and other
+ bug fixes, miscellanious improvements.
+
+2014-08-26 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Use a different mechanism to exclude
+ exercises. Remove use of LC_ALL in an example; doesn't seem
+ to be needed anymore.
+
+ Unrelated:
+
+ * gawktexi.in: Document that MirBSD is no longer supported.
+
+2014-08-25 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Exercises are excluded from print edition.
+
+2014-08-24 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Continuing on reviewer comments.
+
+2014-08-23 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Continuing on reviewer comments.
+
+2014-08-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Continuing on reviewer comments.
+
+2014-08-20 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Continuing on reviewer comments.
+
+2014-08-16 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Continuing on reviewer comments.
+
+2014-08-15 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Continuing on reviewer comments.
+
+2014-08-13 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Starting on reviewer comments.
+ Update acknowledgements.
+
+2014-08-12 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Cause div.awk to get into the example files.
+
+2014-08-06 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Misc minor additions.
+
+2014-08-03 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: For sprintf %c document that if value is a valid
+ wide character, gawk uses the low 8 bits of the value.
+
+ Unrelated:
+
+ * gawktexi.in: Fix doc for API get_record - errcode needs to
+ be greater than zero.
+
+2014-07-24 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in (Numeric Functions): For `div()', clarify
+ truncation is towards zero. Thanks to Michal Jaegermann
+ for pointing out the need to clarify this.
+
+2014-07-10 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in (Numeric Functions): Document new `div()' function.
+ (Arbitrary Precision Integers): Document raison d'etre for div().
+ * gawk.1, awkcard.in: Document `div()'.
+
+2014-07-04 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in (Bracket Expressions): Add a note about how to
+ match ASCII characters. Thanks to Hermann Peifer.
+
+2014-06-25 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Update permissions on copyright page per
+ latest maintain.texi. Add GPL to print version of book.
+
+2014-06-24 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Document that --pretty-print no longer runs the
+ program. Remove mention of GAWK_NO_PP_RUN env var.
+
+2014-06-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Typo fixes and minor corrections.
+
+2014-06-19 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Add thanks to Patrice Dumas and to Karl Berry.
+ Per request from Hermann Peifer, try to clarify how local variables
+ in functions are initialized.
+
+2014-06-18 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Split 6.1.4 into subsections. Other minor fixes.
+
+2014-06-17 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Finish adding exerices.
+ Rework chapter 15 on floating point and MPFR.
+ Spell check. Fix menues.
+
+2014-06-16 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Start adding exercises.
+
+2014-06-15 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Finish up summaries. Improvements in mystrtonum().
+
+2014-06-13 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Fix typos from changes of 3 June when macros were
+ added for filename, data file, etc. Ooops.
+
+2014-06-12 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: More "Summary" sections. Through chapter 14.
+
+2014-06-11 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: More "Summary" sections. Through chapter 10.
+
+2014-06-10 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Update docbook figure markup.
+
+2014-06-09 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: More "Summary" sections.
+ Judiciously arrange for full xrefs in docbook in a few spots.
+
+2014-06-08 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Start adding "Summary" sections.
+
+2014-06-03 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Restore macros for file name vs. filename etc.
+ Go through @if... and @ifnot... and fix them up too. Other misc.
+ cleanup.
+
+2014-05-29 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Remove some obsolete bits, fix up some other
+ minor stuff.
+
+2014-05-27 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Edits through the end!
+
+2014-05-25 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Edits through Appendix A.
+ * gawktexi.in: Tweak nested lists for docbook.
+
+2014-05-24 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in (Staying current): New section.
+
+2014-05-22 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * gawktexi.in (BEGINFILE/ENDFILE): Update doc for getline - any
+ redirected form is allowed inside BEGINFILE/ENDFILE.
+
+2014-05-21 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Add comments for where we need full xrefs in
+ docbook.
+
+2014-05-20 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Misc improvements for docbook, consistency
+ in table and figure captions.
+
+2014-05-17 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Edits through Chapter 16.
+
+2014-05-16 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Edits through Chapter 14.
+
+2014-05-15 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Fix displays for docbook, edits through Chapter 11.
+
+2014-05-14 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Fix real preface for docbook.
+
+2014-05-13 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Complete formatting for FOR_PRINT and not FOR_PRINT.
+
+2014-05-07 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Docbook edits for preface and parts.
+ Document AWKBUFSIZE.
+
+2014-05-05 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Editing progress. Through Chapter 9.
+
+2014-05-05 Michal Jaegermann <michal@harddata.com>
+
+ * array-elements.fig: Fix subscripts to be aligned
+ horizontally. Regenerate the other files.
+
+2014-05-02 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Editing progress. Through Chapter 8.
+ * array-elements.eps, array-elements.fig, array-elements.pdf,
+ array-elements.png array-elements.txt: New files.
+ * Makefile.am (EXTRA_DIST): Add them.
+
+2014-04-30 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Editing progress. Through Chapter 5.
+ * gawktexi.in: Editing progress. Through Chapter 6 and into
+ Chapter 7.
+
+2014-04-29 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Editing progress. Through Chapter 3.
+
+2014-04-24 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Start on revisions.
+
+2014-04-17 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawk.1: Remove the bit about single character programs overflowing
+ the parse stack. It doesn't seem to be true anymore.
+
2014-04-08 Arnold D. Robbins <arnold@skeeve.com>
* 4.1.1: Release tar ball made.
diff --git a/doc/Makefile.am b/doc/Makefile.am
index 12692bd0..8a0442a7 100644
--- a/doc/Makefile.am
+++ b/doc/Makefile.am
@@ -26,7 +26,7 @@
info_TEXINFOS = gawk.texi gawkinet.texi
-man_MANS = gawk.1 igawk.1
+man_MANS = gawk.1
EXTRA_DIST = ChangeLog ChangeLog.0 README.card ad.block setter.outline \
awkcard.in awkforai.txt texinfo.tex cardfonts \
@@ -36,6 +36,8 @@ EXTRA_DIST = ChangeLog ChangeLog.0 README.card ad.block setter.outline \
api-figure2.png api-figure2.txt \
api-figure3.eps api-figure3.fig api-figure3.pdf \
api-figure3.png api-figure3.txt \
+ array-elements.eps array-elements.fig array-elements.pdf \
+ array-elements.png array-elements.txt \
gawktexi.in sidebar.awk \
general-program.eps general-program.fig general-program.pdf \
general-program.png general-program.txt \
@@ -48,7 +50,7 @@ EXTRA_DIST = ChangeLog ChangeLog.0 README.card ad.block setter.outline \
bc_notes
# Get rid of generated files when cleaning
-CLEANFILES = *.ps *.html *.dvi *~ awkcard.nc awkcard.tr gawk.pdf gawkinet.pdf awkcard.pdf gawk.1.pdf igawk.1.pdf
+CLEANFILES = *.ps *.html *.dvi *~ awkcard.nc awkcard.tr gawk.pdf gawkinet.pdf awkcard.pdf gawk.1.pdf
MAKEINFO = @MAKEINFO@ --no-split --force
@@ -73,9 +75,9 @@ AWKCARD = awkcard.ps
gawk.texi: $(srcdir)/gawktexi.in $(srcdir)/sidebar.awk
awk -f $(srcdir)/sidebar.awk < $(srcdir)/gawktexi.in > gawk.texi
-postscript: gawk.ps gawkinet.ps gawk.1.ps igawk.1.ps $(AWKCARD)
+postscript: gawk.ps gawkinet.ps gawk.1.ps $(AWKCARD)
-pdf: postscript gawk.pdf gawkinet.pdf awkcard.pdf gawk.1.pdf igawk.1.pdf
+pdf: postscript gawk.pdf gawkinet.pdf awkcard.pdf gawk.1.pdf
gawk.ps: gawk.dvi
TEXINPUTS=$(srcdir): dvips -o gawk.ps gawk.dvi
@@ -89,12 +91,6 @@ gawk.1.ps: gawk.1
gawk.1.pdf: gawk.1.ps
ps2pdf gawk.1.ps gawk.1.pdf
-igawk.1.ps: igawk.1
- -groff -man $(srcdir)/igawk.1 > igawk.1.ps
-
-igawk.1.pdf: igawk.1.ps
- ps2pdf igawk.1.ps igawk.1.pdf
-
awkcard.tr: awkcard.in
sed 's:SRCDIR:$(srcdir):' < $(srcdir)/awkcard.in > awkcard.tr
diff --git a/doc/Makefile.in b/doc/Makefile.in
index 52e5f873..d89beffd 100644
--- a/doc/Makefile.in
+++ b/doc/Makefile.in
@@ -341,7 +341,7 @@ top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
info_TEXINFOS = gawk.texi gawkinet.texi
-man_MANS = gawk.1 igawk.1
+man_MANS = gawk.1
EXTRA_DIST = ChangeLog ChangeLog.0 README.card ad.block setter.outline \
awkcard.in awkforai.txt texinfo.tex cardfonts \
api-figure1.eps api-figure1.fig api-figure1.pdf \
@@ -350,6 +350,8 @@ EXTRA_DIST = ChangeLog ChangeLog.0 README.card ad.block setter.outline \
api-figure2.png api-figure2.txt \
api-figure3.eps api-figure3.fig api-figure3.pdf \
api-figure3.png api-figure3.txt \
+ array-elements.eps array-elements.fig array-elements.pdf \
+ array-elements.png array-elements.txt \
gawktexi.in sidebar.awk \
general-program.eps general-program.fig general-program.pdf \
general-program.png general-program.txt \
@@ -363,7 +365,7 @@ EXTRA_DIST = ChangeLog ChangeLog.0 README.card ad.block setter.outline \
# Get rid of generated files when cleaning
-CLEANFILES = *.ps *.html *.dvi *~ awkcard.nc awkcard.tr gawk.pdf gawkinet.pdf awkcard.pdf gawk.1.pdf igawk.1.pdf
+CLEANFILES = *.ps *.html *.dvi *~ awkcard.nc awkcard.tr gawk.pdf gawkinet.pdf awkcard.pdf gawk.1.pdf
TROFF = groff -t -Tps -U
SEDME = sed -e "s/^level0 restore/level0 restore flashme 100 72 moveto (Copyright `date '+%m-%d-%y %T'`, FSF, Inc. (all)) show/" \
-e "s/^\/level0 save def/\/level0 save def 30 -48 translate/"
@@ -867,9 +869,9 @@ uninstall-man: uninstall-man1
gawk.texi: $(srcdir)/gawktexi.in $(srcdir)/sidebar.awk
awk -f $(srcdir)/sidebar.awk < $(srcdir)/gawktexi.in > gawk.texi
-postscript: gawk.ps gawkinet.ps gawk.1.ps igawk.1.ps $(AWKCARD)
+postscript: gawk.ps gawkinet.ps gawk.1.ps $(AWKCARD)
-pdf: postscript gawk.pdf gawkinet.pdf awkcard.pdf gawk.1.pdf igawk.1.pdf
+pdf: postscript gawk.pdf gawkinet.pdf awkcard.pdf gawk.1.pdf
gawk.ps: gawk.dvi
TEXINPUTS=$(srcdir): dvips -o gawk.ps gawk.dvi
@@ -883,12 +885,6 @@ gawk.1.ps: gawk.1
gawk.1.pdf: gawk.1.ps
ps2pdf gawk.1.ps gawk.1.pdf
-igawk.1.ps: igawk.1
- -groff -man $(srcdir)/igawk.1 > igawk.1.ps
-
-igawk.1.pdf: igawk.1.ps
- ps2pdf igawk.1.ps igawk.1.pdf
-
awkcard.tr: awkcard.in
sed 's:SRCDIR:$(srcdir):' < $(srcdir)/awkcard.in > awkcard.tr
diff --git a/doc/array-elements.eps b/doc/array-elements.eps
new file mode 100644
index 00000000..041c0b39
--- /dev/null
+++ b/doc/array-elements.eps
@@ -0,0 +1,158 @@
+%!PS-Adobe-3.0 EPSF-3.0
+%%Title: array-elements.fig
+%%Creator: fig2dev Version 3.2 Patchlevel 5d
+%%CreationDate: Sun May 4 22:46:26 2014
+%%BoundingBox: 0 0 379 76
+%Magnification: 1.0000
+%%EndComments
+%%BeginProlog
+/$F2psDict 200 dict def
+$F2psDict begin
+$F2psDict /mtrx matrix put
+/col-1 {0 setgray} bind def
+/col0 {0.000 0.000 0.000 srgb} bind def
+/col1 {0.000 0.000 1.000 srgb} bind def
+/col2 {0.000 1.000 0.000 srgb} bind def
+/col3 {0.000 1.000 1.000 srgb} bind def
+/col4 {1.000 0.000 0.000 srgb} bind def
+/col5 {1.000 0.000 1.000 srgb} bind def
+/col6 {1.000 1.000 0.000 srgb} bind def
+/col7 {1.000 1.000 1.000 srgb} bind def
+/col8 {0.000 0.000 0.560 srgb} bind def
+/col9 {0.000 0.000 0.690 srgb} bind def
+/col10 {0.000 0.000 0.820 srgb} bind def
+/col11 {0.530 0.810 1.000 srgb} bind def
+/col12 {0.000 0.560 0.000 srgb} bind def
+/col13 {0.000 0.690 0.000 srgb} bind def
+/col14 {0.000 0.820 0.000 srgb} bind def
+/col15 {0.000 0.560 0.560 srgb} bind def
+/col16 {0.000 0.690 0.690 srgb} bind def
+/col17 {0.000 0.820 0.820 srgb} bind def
+/col18 {0.560 0.000 0.000 srgb} bind def
+/col19 {0.690 0.000 0.000 srgb} bind def
+/col20 {0.820 0.000 0.000 srgb} bind def
+/col21 {0.560 0.000 0.560 srgb} bind def
+/col22 {0.690 0.000 0.690 srgb} bind def
+/col23 {0.820 0.000 0.820 srgb} bind def
+/col24 {0.500 0.190 0.000 srgb} bind def
+/col25 {0.630 0.250 0.000 srgb} bind def
+/col26 {0.750 0.380 0.000 srgb} bind def
+/col27 {1.000 0.500 0.500 srgb} bind def
+/col28 {1.000 0.630 0.630 srgb} bind def
+/col29 {1.000 0.750 0.750 srgb} bind def
+/col30 {1.000 0.880 0.880 srgb} bind def
+/col31 {1.000 0.840 0.000 srgb} bind def
+
+end
+
+/cp {closepath} bind def
+/ef {eofill} bind def
+/gr {grestore} bind def
+/gs {gsave} bind def
+/sa {save} bind def
+/rs {restore} bind def
+/l {lineto} bind def
+/m {moveto} bind def
+/rm {rmoveto} bind def
+/n {newpath} bind def
+/s {stroke} bind def
+/sh {show} bind def
+/slc {setlinecap} bind def
+/slj {setlinejoin} bind def
+/slw {setlinewidth} bind def
+/srgb {setrgbcolor} bind def
+/rot {rotate} bind def
+/sc {scale} bind def
+/sd {setdash} bind def
+/ff {findfont} bind def
+/sf {setfont} bind def
+/scf {scalefont} bind def
+/sw {stringwidth} bind def
+/tr {translate} bind def
+/tnt {dup dup currentrgbcolor
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add
+ 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb}
+ bind def
+/shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul
+ 4 -2 roll mul srgb} bind def
+/$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def
+/$F2psEnd {$F2psEnteredState restore end} def
+
+/pageheader {
+save
+newpath 0 76 moveto 0 0 lineto 379 0 lineto 379 76 lineto closepath clip newpath
+-203.3 199.4 translate
+1 -1 scale
+$F2psBegin
+10 setmiterlimit
+0 slj 0 slc
+ 0.06299 0.06299 sc
+} bind def
+/pagefooter {
+$F2psEnd
+restore
+} bind def
+%%EndProlog
+pageheader
+%
+% Fig objects follow
+%
+%
+% here starts figure with depth 50
+% Polyline
+0 slj
+0 slc
+7.500 slw
+n 4455 1980 m 4455 2700 l 4455 2655 l
+ 4455 2700 l gs col0 s gr
+% Polyline
+n 6075 1980 m
+ 6075 2700 l gs col0 s gr
+% Polyline
+n 7425 1980 m
+ 7425 2700 l gs col0 s gr
+/Courier-Bold ff 180.00 scf sf
+3735 2340 m
+gs 1 -1 sc (8) col0 sh gr
+/Courier-Bold ff 180.00 scf sf
+5175 2340 m
+gs 1 -1 sc ("foo") col0 sh gr
+/Courier-Bold ff 180.00 scf sf
+6795 2340 m
+gs 1 -1 sc ("") col0 sh gr
+/Courier-Bold ff 180.00 scf sf
+7875 2340 m
+gs 1 -1 sc (30) col0 sh gr
+/Times-Roman ff 180.00 scf sf
+3735 3150 m
+gs 1 -1 sc (0) col0 sh gr
+/Times-Roman ff 180.00 scf sf
+5175 3150 m
+gs 1 -1 sc (1) col0 sh gr
+/Times-Roman ff 180.00 scf sf
+6795 3150 m
+gs 1 -1 sc (2) col0 sh gr
+/Times-Roman ff 180.00 scf sf
+7875 3150 m
+gs 1 -1 sc (3) col0 sh gr
+/Times-Roman ff 180.00 scf sf
+8730 2340 m
+gs 1 -1 sc (Value) col0 sh gr
+/Times-Roman ff 180.00 scf sf
+8730 3150 m
+gs 1 -1 sc (Index) col0 sh gr
+% here ends figure;
+%
+% here starts figure with depth 40
+% Polyline
+0 slj
+0 slc
+7.500 slw
+n 3240 1980 m 8415 1980 l 8415 2700 l 3240 2700 l
+ cp gs col0 s gr
+% here ends figure;
+pagefooter
+showpage
+%%Trailer
+%EOF
diff --git a/doc/array-elements.fig b/doc/array-elements.fig
new file mode 100644
index 00000000..63b5ffbf
--- /dev/null
+++ b/doc/array-elements.fig
@@ -0,0 +1,27 @@
+#FIG 3.2 Produced by xfig version 3.2.5b
+Landscape
+Center
+Metric
+A4
+100.00
+Single
+-2
+1200 2
+2 2 0 1 0 7 40 -1 -1 0.000 0 0 -1 0 0 5
+ 3240 1980 8415 1980 8415 2700 3240 2700 3240 1980
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 4
+ 4455 1980 4455 2700 4455 2655 4455 2700
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+ 6075 1980 6075 2700
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+ 7425 1980 7425 2700
+4 0 0 50 -1 14 12 0.0000 4 120 120 3735 2340 8\001
+4 0 0 50 -1 14 12 0.0000 4 120 600 5175 2340 "foo"\001
+4 0 0 50 -1 14 12 0.0000 4 60 240 6795 2340 ""\001
+4 0 0 50 -1 14 12 0.0000 4 120 240 7875 2340 30\001
+4 0 0 50 -1 0 12 0.0000 4 135 105 3735 3150 0\001
+4 0 0 50 -1 0 12 0.0000 4 135 105 5175 3150 1\001
+4 0 0 50 -1 0 12 0.0000 4 135 105 6795 3150 2\001
+4 0 0 50 -1 0 12 0.0000 4 135 105 7875 3150 3\001
+4 0 0 50 -1 0 12 0.0000 4 135 480 8730 2340 Value\001
+4 0 0 50 -1 0 12 0.0000 4 135 465 8730 3150 Index\001
diff --git a/doc/array-elements.pdf b/doc/array-elements.pdf
new file mode 100644
index 00000000..328cbd1a
--- /dev/null
+++ b/doc/array-elements.pdf
Binary files differ
diff --git a/doc/array-elements.png b/doc/array-elements.png
new file mode 100644
index 00000000..b57d66b7
--- /dev/null
+++ b/doc/array-elements.png
Binary files differ
diff --git a/doc/array-elements.txt b/doc/array-elements.txt
new file mode 100644
index 00000000..8906318a
--- /dev/null
+++ b/doc/array-elements.txt
@@ -0,0 +1,4 @@
++---------+---------+--------+---------+
+| 8 | "foo" | "" | 30 | @r{Value}
++---------+---------+--------+---------+
+ 0 1 2 3 @r{Index}
diff --git a/doc/awkcard.in b/doc/awkcard.in
index ca28f0a7..556bdc1e 100644
--- a/doc/awkcard.in
+++ b/doc/awkcard.in
@@ -1609,6 +1609,9 @@ expand;
l lw(2i).
\*(CD\*(FCatan2(\*(FIy\*(FC, \*(FIx\*(FC)\*(FR The arctangent of \*(FIy/x\fP in radians.
\*(FCcos(\*(FIexpr\*(FC)\*(FR The cosine of \*(FIexpr\fP, which is in radians.
+\*(CB\*(FCdiv(\*(FIn\*(FR\*(FC,\*(FI d\*(FR\*(FC,\*(FI res\*(FR\*(FC)\*(FR T{
+Return the result of integer division in \*(FIres\*(FR.\*(CD
+T}
\*(FCexp(\*(FIexpr\*(FC)\*(FR The exponential function (\*(FIe \*(FC^ \*(FIx\*(FR).
\*(FCint(\*(FIexpr\*(FC)\*(FR Truncate to integer.
\*(FClog(\*(FIexpr\*(FC)\*(FR The natural logarithm function (base \*(FIe\^\*(FR).
diff --git a/doc/gawk.1 b/doc/gawk.1
index a879b527..a4d66720 100644
--- a/doc/gawk.1
+++ b/doc/gawk.1
@@ -13,7 +13,7 @@
. if \w'\(rq' .ds rq "\(rq
. \}
.\}
-.TH GAWK 1 "Mar 08 2014" "Free Software Foundation" "Utility Commands"
+.TH GAWK 1 "Aug 03 2014" "Free Software Foundation" "Utility Commands"
.SH NAME
gawk \- pattern scanning and processing language
.SH SYNOPSIS
@@ -1132,9 +1132,14 @@ For each identifier, the value of the element is one of the following:
\fB"array"\fR
The identifier is an array.
.TP
+\fB"builtin"\fR
+The identifier is a built-in function.
+.TP
\fB"extension"\fR
The identifier is an extension function loaded via
-.BR @load .
+.B @load
+or
+.BR \-l .
.TP
\fB"scalar"\fR
The identifier is a scalar.
@@ -2629,6 +2634,23 @@ Return the cosine of
.IR expr ,
which is in radians.
.TP
+.BI div( num ", " denom ", " result )
+Truncate
+.I num
+and
+.I denom
+to integers. Return the quotient of
+.I num
+divided by
+.I denom
+in \fIresult\fB["quotient"]\fR
+and the remainder in
+in \fIresult\fB["remainder"]\fR.
+This is a
+.I gawk
+extension, primarily of value when working with
+arbitrarily large integers.
+.TP
.BI exp( expr )
The exponential function.
.TP
@@ -3270,7 +3292,7 @@ sign, like so:
.RS
.ft B
.nf
-function myfunc()
+function myfunc()
{
print "myfunc called"
\&.\|.\|.
@@ -3284,6 +3306,8 @@ function myfunc()
.fi
.ft R
.RE
+As of version 4.1.2, this works with user-defined functions,
+built-in functions, and extension functions.
.PP
If
.B \-\^\-lint
@@ -3912,11 +3936,6 @@ The
.B \-F
option is not necessary given the command line variable assignment feature;
it remains only for backwards compatibility.
-.PP
-Syntactically invalid single character programs tend to overflow
-the parse stack, generating a rather unhelpful message. Such programs
-are surprisingly difficult to diagnose in the completely general case,
-and the effort to do so really is not worth it.
.SH SEE ALSO
.IR egrep (1),
.IR sed (1),
diff --git a/doc/gawk.info b/doc/gawk.info
index aad73f7a..07f283ff 100644
--- a/doc/gawk.info
+++ b/doc/gawk.info
@@ -20,16 +20,13 @@ implementation of AWK.
Permission is granted to copy, distribute and/or modify this document
under the terms of the GNU Free Documentation License, Version 1.3 or
any later version published by the Free Software Foundation; with the
-Invariant Sections being "GNU General Public License", the Front-Cover
-texts being (a) (see below), and with the Back-Cover Texts being (b)
-(see below). A copy of the license is included in the section entitled
-"GNU Free Documentation License".
+Invariant Sections being "GNU General Public License", with the
+Front-Cover Texts being "A GNU Manual", and with the Back-Cover Texts
+as in (a) below. A copy of the license is included in the section
+entitled "GNU Free Documentation License".
- a. "A GNU Manual"
-
- b. "You have the freedom to copy and modify this GNU manual. Buying
- copies from the FSF supports it in developing GNU and promoting
- software freedom."
+ a. The FSF's Back-Cover Text is: "You have the freedom to copy and
+ modify this GNU manual."

File: gawk.info, Node: Top, Next: Foreword, Up: (dir)
@@ -51,16 +48,13 @@ implementation of AWK.
Permission is granted to copy, distribute and/or modify this document
under the terms of the GNU Free Documentation License, Version 1.3 or
any later version published by the Free Software Foundation; with the
-Invariant Sections being "GNU General Public License", the Front-Cover
-texts being (a) (see below), and with the Back-Cover Texts being (b)
-(see below). A copy of the license is included in the section entitled
-"GNU Free Documentation License".
-
- a. "A GNU Manual"
+Invariant Sections being "GNU General Public License", with the
+Front-Cover Texts being "A GNU Manual", and with the Back-Cover Texts
+as in (a) below. A copy of the license is included in the section
+entitled "GNU Free Documentation License".
- b. "You have the freedom to copy and modify this GNU manual. Buying
- copies from the FSF supports it in developing GNU and promoting
- software freedom."
+ a. The FSF's Back-Cover Text is: "You have the freedom to copy and
+ modify this GNU manual."
* Menu:
@@ -126,8 +120,8 @@ texts being (a) (see below), and with the Back-Cover Texts being (b)
includes command-line syntax.
* One-shot:: Running a short throwaway
`awk' program.
-* Read Terminal:: Using no input files (input from
- terminal instead).
+* Read Terminal:: Using no input files (input from the
+ keyboard instead).
* Long:: Putting permanent `awk'
programs in files.
* Executable Scripts:: Making self-contained `awk'
@@ -149,6 +143,7 @@ texts being (a) (see below), and with the Back-Cover Texts being (b)
* Other Features:: Other Features of `awk'.
* When:: When to use `gawk' and when to
use other things.
+* Intro Summary:: Summary of the introduction.
* Command Line:: How to run `awk'.
* Options:: Command-line options and their
meanings.
@@ -170,16 +165,21 @@ texts being (a) (see below), and with the Back-Cover Texts being (b)
program.
* Obsolete:: Obsolete Options and/or features.
* Undocumented:: Undocumented Options and Features.
+* Invoking Summary:: Invocation summary.
* Regexp Usage:: How to Use Regular Expressions.
* Escape Sequences:: How to write nonprinting characters.
* Regexp Operators:: Regular Expression Operators.
* Bracket Expressions:: What can go between `[...]'.
-* GNU Regexp Operators:: Operators specific to GNU software.
-* Case-sensitivity:: How to do case-insensitive matching.
* Leftmost Longest:: How much text matches.
* Computed Regexps:: Using Dynamic Regexps.
+* GNU Regexp Operators:: Operators specific to GNU software.
+* Case-sensitivity:: How to do case-insensitive matching.
+* Regexp Summary:: Regular expressions summary.
* Records:: Controlling how data is split into
records.
+* awk split records:: How standard `awk' splits
+ records.
+* gawk split records:: How `gawk' splits records.
* Fields:: An introduction to fields.
* Nonconstant Fields:: Nonconstant Field Numbers.
* Changing Fields:: Changing the Contents of a Field.
@@ -190,7 +190,7 @@ texts being (a) (see below), and with the Back-Cover Texts being (b)
* Single Character Fields:: Making each character a separate
field.
* Command Line Field Separator:: Setting `FS' from the
- command-line.
+ command line.
* Full Line Fields:: Making the full line be a single
field.
* Field Splitting Summary:: Some final points and a summary table.
@@ -216,8 +216,10 @@ texts being (a) (see below), and with the Back-Cover Texts being (b)
`getline'.
* Getline Summary:: Summary of `getline' Variants.
* Read Timeout:: Reading input with a timeout.
-* Command line directories:: What happens if you put a directory on
+* Command-line directories:: What happens if you put a directory on
the command line.
+* Input Summary:: Input summary.
+* Input Exercises:: Exercises.
* Print:: The `print' statement.
* Print Examples:: Simple examples of `print'
statements.
@@ -241,6 +243,8 @@ texts being (a) (see below), and with the Back-Cover Texts being (b)
* Special Caveats:: Things to watch out for.
* Close Files And Pipes:: Closing Input and Output Files and
Pipes.
+* Output Summary:: Output summary.
+* Output Exercises:: Exercises.
* Values:: Constants, Variables, and Regular
Expressions.
* Constants:: String, numeric and regexp constants.
@@ -251,11 +255,14 @@ texts being (a) (see below), and with the Back-Cover Texts being (b)
* Variables:: Variables give names to values for
later use.
* Using Variables:: Using variables in your programs.
-* Assignment Options:: Setting variables on the command-line
+* Assignment Options:: Setting variables on the command line
and a summary of command-line syntax.
This is an advanced method of input.
* Conversion:: The conversion of strings to numbers
and vice versa.
+* Strings And Numbers:: How `awk' Converts Between
+ Strings And Numbers.
+* Locale influences conversions:: How the locale may affect conversions.
* All Operators:: `gawk''s operators.
* Arithmetic Ops:: Arithmetic operations (`+',
`-', etc.)
@@ -283,6 +290,7 @@ texts being (a) (see below), and with the Back-Cover Texts being (b)
* Function Calls:: A function call is an expression.
* Precedence:: How various operators nest.
* Locales:: How the locale affects things.
+* Expressions Summary:: Expressions summary.
* Pattern Overview:: What goes into a pattern.
* Regexp Patterns:: Using regexps as patterns.
* Expression Patterns:: Any expression can be used as a
@@ -329,6 +337,7 @@ texts being (a) (see below), and with the Back-Cover Texts being (b)
gives you information.
* ARGC and ARGV:: Ways to use `ARGC' and
`ARGV'.
+* Pattern Action Summary:: Patterns and Actions summary.
* Array Basics:: The basics of arrays.
* Array Intro:: Introduction to Arrays
* Reference to Elements:: How to examine one element of an
@@ -351,6 +360,7 @@ texts being (a) (see below), and with the Back-Cover Texts being (b)
`awk'.
* Multiscanning:: Scanning multidimensional arrays.
* Arrays of Arrays:: True multidimensional arrays.
+* Arrays Summary:: Summary of arrays.
* Built-in:: Summarizes the built-in functions.
* Calling Built-in:: How to call built-in functions.
* Numeric Functions:: Functions that work with numbers,
@@ -385,6 +395,7 @@ texts being (a) (see below), and with the Back-Cover Texts being (b)
runtime.
* Indirect Calls:: Choosing the function to call at
runtime.
+* Functions Summary:: Summary of functions.
* Library Names:: How to best name private global
variables in library functions.
* General Functions:: Functions that are of general use.
@@ -419,6 +430,8 @@ texts being (a) (see below), and with the Back-Cover Texts being (b)
* Group Functions:: Functions for getting group
information.
* Walking Arrays:: A function to walk arrays of arrays.
+* Library Functions Summary:: Summary of library functions.
+* Library Exercises:: Exercises.
* Running Examples:: How to run these examples.
* Clones:: Clones of common utilities.
* Cut Program:: The `cut' utility.
@@ -448,6 +461,8 @@ texts being (a) (see below), and with the Back-Cover Texts being (b)
* Anagram Program:: Finding anagrams from a dictionary.
* Signature Program:: People do amazing things with too much
time on their hands.
+* Programs Summary:: Summary of programs.
+* Programs Exercises:: Exercises.
* Nondecimal Data:: Allowing nondecimal input data.
* Array Sorting:: Facilities for controlling array
traversal and sorting arrays.
@@ -459,6 +474,7 @@ texts being (a) (see below), and with the Back-Cover Texts being (b)
* TCP/IP Networking:: Using `gawk' for network
programming.
* Profiling:: Profiling your `awk' programs.
+* Advanced Features Summary:: Summary of advanced features.
* I18N and L10N:: Internationalization and Localization.
* Explaining gettext:: How GNU `gettext' works.
* Programmer i18n:: Features for the programmer.
@@ -470,6 +486,7 @@ texts being (a) (see below), and with the Back-Cover Texts being (b)
* I18N Example:: A simple i18n example.
* Gawk I18N:: `gawk' is also
internationalized.
+* I18N Summary:: Summary of I18N stuff.
* Debugging:: Introduction to `gawk'
debugger.
* Debugging Concepts:: Debugging in General.
@@ -488,31 +505,23 @@ texts being (a) (see below), and with the Back-Cover Texts being (b)
* Miscellaneous Debugger Commands:: Miscellaneous Commands.
* Readline Support:: Readline support.
* Limitations:: Limitations and future plans.
-* General Arithmetic:: An introduction to computer
- arithmetic.
-* Floating Point Issues:: Stuff to know about floating-point
- numbers.
-* String Conversion Precision:: The String Value Can Lie.
-* Unexpected Results:: Floating Point Numbers Are Not
- Abstract Numbers.
-* POSIX Floating Point Problems:: Standards Versus Existing Practice.
-* Integer Programming:: Effective integer programming.
-* Floating-point Programming:: Effective Floating-point Programming.
-* Floating-point Representation:: Binary floating-point representation.
-* Floating-point Context:: Floating-point context.
-* Rounding Mode:: Floating-point rounding mode.
-* Gawk and MPFR:: How `gawk' provides
- arbitrary-precision arithmetic.
-* Arbitrary Precision Floats:: Arbitrary Precision Floating-point
- Arithmetic with `gawk'.
-* Setting Precision:: Setting the working precision.
-* Setting Rounding Mode:: Setting the rounding mode.
-* Floating-point Constants:: Representing floating-point constants.
-* Changing Precision:: Changing the precision of a number.
-* Exact Arithmetic:: Exact arithmetic with floating-point
- numbers.
+* Debugging Summary:: Debugging summary.
+* Computer Arithmetic:: A quick intro to computer math.
+* Math Definitions:: Defining terms used.
+* MPFR features:: The MPFR features in `gawk'.
+* FP Math Caution:: Things to know.
+* Inexactness of computations:: Floating point math is not exact.
+* Inexact representation:: Numbers are not exactly represented.
+* Comparing FP Values:: How to compare floating point values.
+* Errors accumulate:: Errors get bigger as they go.
+* Getting Accuracy:: Getting more accuracy takes some work.
+* Try To Round:: Add digits and round.
+* Setting precision:: How to set the precision.
+* Setting the rounding mode:: How to set the rounding mode.
* Arbitrary Precision Integers:: Arbitrary Precision Integer Arithmetic
with `gawk'.
+* POSIX Floating Point Problems:: Standards Versus Existing Practice.
+* Floating point summary:: Summary of floating point discussion.
* Extension Intro:: What is an extension.
* Plugin License:: A note about licensing.
* Extension Mechanism Outline:: An outline of how it works.
@@ -574,6 +583,8 @@ texts being (a) (see below), and with the Back-Cover Texts being (b)
* Extension Sample Time:: An interface to `gettimeofday()'
and `sleep()'.
* gawkextlib:: The `gawkextlib' project.
+* Extension summary:: Extension summary.
+* Extension Exercises:: Exercises.
* V7/SVR3.1:: The major changes between V7 and
System V Release 3.1.
* SVR4:: Minor changes between System V
@@ -590,6 +601,7 @@ texts being (a) (see below), and with the Back-Cover Texts being (b)
ranges.
* Contributors:: The major contributors to
`gawk'.
+* History summary:: History summary.
* Gawk Distribution:: What is in the `gawk'
distribution.
* Getting:: How to get the distribution.
@@ -628,6 +640,7 @@ texts being (a) (see below), and with the Back-Cover Texts being (b)
* Bugs:: Reporting Problems and Bugs.
* Other Versions:: Other freely available `awk'
implementations.
+* Installation summary:: Summary of installation.
* Compatibility Mode:: How to disable certain `gawk'
extensions.
* Additions:: Making Additions To `gawk'.
@@ -636,8 +649,8 @@ texts being (a) (see below), and with the Back-Cover Texts being (b)
`gawk'.
* New Ports:: Porting `gawk' to a new
operating system.
-* Derived Files:: Why derived files are kept in the
- `git' repository.
+* Derived Files:: Why derived files are kept in the Git
+ repository.
* Future Extensions:: New features that may be implemented
one day.
* Implementation Limitations:: Some limitations of the
@@ -648,18 +661,19 @@ texts being (a) (see below), and with the Back-Cover Texts being (b)
* Extension Other Design Decisions:: Some other design decisions.
* Extension Future Growth:: Some room for future growth.
* Old Extension Mechanism:: Some compatibility for old extensions.
+* Notes summary:: Summary of implementation notes.
* Basic High Level:: The high level view.
* Basic Data Typing:: A very quick intro to data types.
- To Miriam, for making me complete.
+ To my parents, for their love, and for the wonderful example they
+set for me.
- To Chana, for the joy you bring us.
+ To my wife Miriam, for making me complete. Thank you for building
+your life together with me.
- To Rivka, for the exponential increase.
+ To our children Chana, Rivka, Nachum and Malka, for enrichening our
+lives in innumerable ways.
- To Nachum, for the added dimension.
-
- To Malka, for the new beginning.

File: gawk.info, Node: Foreword, Next: Preface, Prev: Top, Up: Top
@@ -755,12 +769,10 @@ Preface
Several kinds of tasks occur repeatedly when working with text files.
You might want to extract certain lines and discard the rest. Or you
-may need to make changes wherever certain patterns appear, but leave
-the rest of the file alone. Writing single-use programs for these
-tasks in languages such as C, C++, or Java is time-consuming and
-inconvenient. Such jobs are often easier with `awk'. The `awk'
-utility interprets a special-purpose programming language that makes it
-easy to handle simple data-reformatting jobs.
+may need to make changes wherever certain patterns appear, but leave the
+rest of the file alone. Such jobs are often easy with `awk'. The
+`awk' utility interprets a special-purpose programming language that
+makes it easy to handle simple data-reformatting jobs.
The GNU implementation of `awk' is called `gawk'; if you invoke it
with the proper options or environment variables (*note Options::), it
@@ -791,6 +803,10 @@ and other `awk' implementations.
* Perform simple network communications
+ * Profile and debug `awk' programs.
+
+ * Extend the language with functions written in C or C++.
+
This Info file teaches you about the `awk' language and how you can
use it effectively. You should already be familiar with basic system
commands, such as `cat' and `ls',(2) as well as basic shell facilities,
@@ -799,13 +815,12 @@ such as input/output (I/O) redirection and pipes.
Implementations of the `awk' language are available for many
different computing environments. This Info file, while describing the
`awk' language in general, also describes the particular implementation
-of `awk' called `gawk' (which stands for "GNU awk"). `gawk' runs on a
-broad range of Unix systems, ranging from Intel(R)-architecture
-PC-based computers up through large-scale systems, such as Crays.
-`gawk' has also been ported to Mac OS X, Microsoft Windows (all
-versions) and OS/2 PCs, and VMS. (Some other, obsolete systems to
-which `gawk' was once ported are no longer supported and the code for
-those systems has been removed.)
+of `awk' called `gawk' (which stands for "GNU `awk'"). `gawk' runs on
+a broad range of Unix systems, ranging from Intel-architecture PC-based
+computers up through large-scale systems. `gawk' has also been ported
+to Mac OS X, Microsoft Windows (all versions) and OS/2 PCs, and OpenVMS.
+(Some other, obsolete systems to which `gawk' was once ported are no
+longer supported and the code for those systems has been removed.)
* Menu:
@@ -822,7 +837,7 @@ those systems has been removed.)
---------- Footnotes ----------
- (1) The 2008 POSIX standard is online at
+ (1) The 2008 POSIX standard is accessible online at
`http://www.opengroup.org/onlinepubs/9699919799/'.
(2) These commands are available on POSIX-compliant systems, as well
@@ -866,7 +881,7 @@ contributed parts of the code as well. In 1988 and 1989, David
Trueman, with help from me, thoroughly reworked `gawk' for compatibility
with the newer `awk'. Circa 1994, I became the primary maintainer.
Current development focuses on bug fixes, performance improvements,
-standards compliance, and occasionally, new features.
+standards compliance and, occasionally, new features.
In May of 1997, Ju"rgen Kahrs felt the need for network access from
`awk', and with a little help from me, set about adding features to do
@@ -890,22 +905,19 @@ A Rose by Any Other Name
The `awk' language has evolved over the years. Full details are
provided in *note Language History::. The language described in this
-Info file is often referred to as "new `awk'" (`nawk').
-
- Because of this, there are systems with multiple versions of `awk'.
-Some systems have an `awk' utility that implements the original version
-of the `awk' language and a `nawk' utility for the new version. Others
-have an `oawk' version for the "old `awk'" language and plain `awk' for
-the new one. Still others only have one version, which is usually the
-new one.(1)
-
- All in all, this makes it difficult for you to know which version of
-`awk' you should run when writing your programs. The best advice we
-can give here is to check your local documentation. Look for `awk',
-`oawk', and `nawk', as well as for `gawk'. It is likely that you
-already have some version of new `awk' on your system, which is what
-you should use when running your programs. (Of course, if you're
-reading this Info file, chances are good that you have `gawk'!)
+Info file is often referred to as "new `awk'". By analogy, the
+original version of `awk' is referred to as "old `awk'."
+
+ Today, on most systems, when you run the `awk' utility, you get some
+version of new `awk'.(1) If your system's standard `awk' is the old
+one, you will see something like this if you try the test program:
+
+ $ awk 1 /dev/null
+ error--> awk: syntax error near line 1
+ error--> awk: bailing out near line 1
+
+In this case, you should find a version of new `awk', or just install
+`gawk'!
Throughout this Info file, whenever we refer to a language feature
that should be available in any complete implementation of POSIX `awk',
@@ -914,7 +926,9 @@ specific to the GNU implementation, we use the term `gawk'.
---------- Footnotes ----------
- (1) Often, these systems use `gawk' for their `awk' implementation!
+ (1) Only Solaris systems still use an old `awk' for the default
+`awk' utility. A more modern `awk' lives in `/usr/xpg6/bin' on these
+systems.

File: gawk.info, Node: This Manual, Next: Conventions, Prev: Names, Up: Preface
@@ -1043,7 +1057,7 @@ material for those who are completely unfamiliar with computer
programming.
The *note Glossary::, defines most, if not all, the significant
-terms used throughout the book. If you find terms that you aren't
+terms used throughout the Info file. If you find terms that you aren't
familiar with, try looking them up here.
*note Copying::, and *note GNU Free Documentation License::, present
@@ -1067,7 +1081,7 @@ formatting language. A single Texinfo source file is used to produce
both the printed and online versions of the documentation. This minor
node briefly documents the typographical conventions used in Texinfo.
- Examples you would type at the command-line are preceded by the
+ Examples you would type at the command line are preceded by the
common shell primary and secondary prompts, `$' and `>'. Input that
you type is shown `like this'. Output from the command is preceded by
the glyph "-|". This typically represents the command's standard
@@ -1086,6 +1100,10 @@ key and another key, at the same time. For example, a `Ctrl-d' is typed
by first pressing and holding the `CONTROL' key, next pressing the `d'
key and finally releasing both keys.
+ For the sake of brevity, throughout this Info file, we refer to
+Brian Kernighan's version of `awk' as "BWK `awk'." (*Note Other
+Versions::, for information on his and other versions.)
+
Dark Corners
------------
@@ -1121,7 +1139,7 @@ editor. GNU Emacs is the most widely used version of Emacs today.
Software Foundation to create a complete, freely distributable,
POSIX-compliant computing environment. The FSF uses the "GNU General
Public License" (GPL) to ensure that their software's source code is
-always available to the end user. A copy of the GPL is included for
+always available to the end user. A copy of the GPL is included for
your reference (*note Copying::). The GPL applies to the C language
source code for `gawk'. To find out more about the FSF and the GNU
Project online, see the GNU Project's home page (http://www.gnu.org).
@@ -1136,17 +1154,10 @@ released but remains in an early stage of development.
Until the GNU operating system is more fully developed, you should
consider using GNU/Linux, a freely distributable, Unix-like operating
-system for Intel(R), Power Architecture, Sun SPARC, IBM S/390, and other
+system for Intel, Power Architecture, Sun SPARC, IBM S/390, and other
systems.(2) Many GNU/Linux distributions are available for download
from the Internet.
- (There are numerous other freely available, Unix-like operating
-systems based on the Berkeley Software Distribution, and some of them
-use recent versions of `gawk' for their versions of `awk'. NetBSD
-(http://www.netbsd.org), FreeBSD (http://www.freebsd.org), and OpenBSD
-(http://www.openbsd.org) are three of the most popular ones, but there
-are others.)
-
The Info file itself has gone through a number of previous editions.
Paul Rubin wrote the very first draft of `The GAWK Manual'; it was
around 40 pages in size. Diane Close and Richard Stallman improved it,
@@ -1160,19 +1171,18 @@ published the first two editions under the title `The GNU Awk User's
Guide'.
This edition maintains the basic structure of the previous editions.
-For Edition 4.0, the content has been thoroughly reviewed and updated.
-All references to `gawk' versions prior to 4.0 have been removed. Of
-significant note for this edition was *note Debugger::.
+For FSF edition 4.0, the content has been thoroughly reviewed and
+updated. All references to `gawk' versions prior to 4.0 have been
+removed. Of significant note for this edition was *note Debugger::.
- For edition 4.1, the content has been reorganized into parts, and
-the major new additions are *note Arbitrary Precision Arithmetic::, and
-*note Dynamic Extensions::.
+ For FSF edition 4.1, the content has been reorganized into parts,
+and the major new additions are *note Arbitrary Precision Arithmetic::,
+and *note Dynamic Extensions::.
- `GAWK: Effective AWK Programming' will undoubtedly continue to
-evolve. An electronic version comes with the `gawk' distribution from
-the FSF. If you find an error in this Info file, please report it!
-*Note Bugs::, for information on submitting problem reports
-electronically.
+ This Info file will undoubtedly continue to evolve. An electronic
+version comes with the `gawk' distribution from the FSF. If you find
+an error in this Info file, please report it! *Note Bugs::, for
+information on submitting problem reports electronically.
---------- Footnotes ----------
@@ -1199,14 +1209,17 @@ something more broad, I acquired the `awk.info' domain.
contributed code: the archive did not grow and the domain went unused
for several years.
- Fortunately, late in 2008, a volunteer took on the task of setting up
-an `awk'-related web site--`http://awk.info'--and did a very nice job.
+ Late in 2008, a volunteer took on the task of setting up an
+`awk'-related web site--`http://awk.info'--and did a very nice job.
If you have written an interesting `awk' program, or have written a
`gawk' extension that you would like to share with the rest of the
world, please see `http://awk.info/?contribute' for how to contribute
it to the web site.
+ As of this writing, this website is in search of a maintainer; please
+contact me if you are interested.
+

File: gawk.info, Node: Acknowledgments, Prev: How To Contribute, Up: Preface
@@ -1279,12 +1292,26 @@ be a pleasure working with this team of fine people.
Notable code and documentation contributions were made by a number
of people. *Note Contributors::, for the full list.
+ Thanks to Patrice Dumas for the new `makeinfo' program. Thanks to
+Karl Berry who continues to work to keep the Texinfo markup language
+sane.
+
+ Robert P.J. Day, Michael Brennan and Brian Kernighan kindly acted as
+reviewers for the 2015 edition of this Info file. Their feedback helped
+improve the final work.
+
I would like to thank Brian Kernighan for invaluable assistance
during the testing and debugging of `gawk', and for ongoing help and
advice in clarifying numerous points about the language. We could not
have done nearly as good a job on either `gawk' or its documentation
without his help.
+ Brian is in a class by himself as a programmer and technical author.
+I have to thank him (yet again) for his ongoing friendship and the role
+model he has been for me for close to 30 years! Having him as a
+reviewer is an exciting privilege. It has also been extremely
+humbling...
+
I must thank my wonderful wife, Miriam, for her patience through the
many versions of this project, for her proofreading, and for sharing me
with the computer. I would like to thank my parents for their love,
@@ -1293,12 +1320,6 @@ also must acknowledge my gratitude to G-d, for the many opportunities
He has sent my way, as well as for the gifts He has given me with which
to take advantage of those opportunities.
-
-Arnold Robbins
-Nof Ayalon
-ISRAEL
-May, 2013
-

File: gawk.info, Node: Getting Started, Next: Invoking Gawk, Prev: Preface, Up: Top
@@ -1327,7 +1348,7 @@ for now. *Note User-defined::.) Each rule specifies one pattern to
search for and one action to perform upon finding the pattern.
Syntactically, a rule consists of a pattern followed by an action.
-The action is enclosed in curly braces to separate it from the pattern.
+The action is enclosed in braces to separate it from the pattern.
Newlines usually separate rules. Therefore, an `awk' program looks
like this:
@@ -1350,6 +1371,7 @@ like this:
* Other Features:: Other Features of `awk'.
* When:: When to use `gawk' and when to use
other things.
+* Intro Summary:: Summary of the introduction.

File: gawk.info, Node: Running gawk, Next: Sample Data Files, Up: Getting Started
@@ -1375,7 +1397,7 @@ variations of each.
* One-shot:: Running a short throwaway `awk'
program.
-* Read Terminal:: Using no input files (input from terminal
+* Read Terminal:: Using no input files (input from the keyboard
instead).
* Long:: Putting permanent `awk' programs in
files.
@@ -1425,7 +1447,7 @@ following command line:
awk 'PROGRAM'
`awk' applies the PROGRAM to the "standard input", which usually means
-whatever you type on the terminal. This continues until you indicate
+whatever you type on the keyboard. This continues until you indicate
end-of-file by typing `Ctrl-d'. (On other operating systems, the
end-of-file character may be different. For example, on OS/2, it is
`Ctrl-z'.)
@@ -1433,15 +1455,23 @@ end-of-file character may be different. For example, on OS/2, it is
As an example, the following program prints a friendly piece of
advice (from Douglas Adams's `The Hitchhiker's Guide to the Galaxy'),
to keep you from worrying about the complexities of computer
-programming(1) (`BEGIN' is a feature we haven't discussed yet):
+programming:
- $ awk "BEGIN { print \"Don't Panic!\" }"
+ $ awk "BEGIN { print "Don\47t Panic!" }"
-| Don't Panic!
- This program does not read any input. The `\' before each of the
-inner double quotes is necessary because of the shell's quoting
-rules--in particular because it mixes both single quotes and double
-quotes.(2)
+ `awk' executes statements associated with `BEGIN' before reading any
+input. If there are no other statements in your program, as is the
+case here, `awk' just stops, instead of trying to read input it doesn't
+know how to process. The `\47' is a magic way of getting a single
+quote into the program, without having to engage in ugly shell quoting
+tricks.
+
+ NOTE: As a side note, if you use Bash as your shell, you should
+ execute the command `set +H' before running this program
+ interactively, to disable the C shell-style command history, which
+ treats `!' as a special character. We recommend putting this
+ command into your personal startup file.
This next simple `awk' program emulates the `cat' utility; it copies
whatever you type on the keyboard to its standard output (why this
@@ -1458,17 +1488,6 @@ works is explained shortly).
-| What, me worry?
Ctrl-d
- ---------- Footnotes ----------
-
- (1) If you use Bash as your shell, you should execute the command
-`set +H' before running this program interactively, to disable the C
-shell-style command history, which treats `!' as a special character.
-We recommend putting this command into your personal startup file.
-
- (2) Although we generally recommend the use of single quotes around
-the program text, double quotes are needed here in order to put the
-single quote into the message.
-

File: gawk.info, Node: Long, Next: Executable Scripts, Prev: Read Terminal, Up: Running gawk
@@ -1482,8 +1501,8 @@ tell `awk' to use that file for its program, you type:
awk -f SOURCE-FILE INPUT-FILE1 INPUT-FILE2 ...
The `-f' instructs the `awk' utility to get the `awk' program from
-the file SOURCE-FILE. Any file name can be used for SOURCE-FILE. For
-example, you could put the program:
+the file SOURCE-FILE (*note Options::). Any file name can be used for
+SOURCE-FILE. For example, you could put the program:
BEGIN { print "Don't Panic!" }
@@ -1523,8 +1542,8 @@ like this:
BEGIN { print "Don't Panic!" }
After making this file executable (with the `chmod' utility), simply
-type `advice' at the shell and the system arranges to run `awk'(2) as
-if you had typed `awk -f advice':
+type `advice' at the shell and the system arranges to run `awk' as if
+you had typed `awk -f advice':
$ chmod +x advice
$ advice
@@ -1538,7 +1557,24 @@ at the shell.)
program that users can invoke without their having to know that the
program is written in `awk'.
- Portability Issues with `#!'
+ Understanding `#!'
+
+ `awk' is an "interpreted" language. This means that the `awk'
+utility reads your program and then processes your data according to
+the instructions in your program. (This is different from a "compiled"
+language such as C, where your program is first compiled into machine
+code that is executed directly by your system's hardware.) The `awk'
+utility is thus termed an "interpreter". Many modern languages are
+interperted.
+
+ The line beginning with `#!' lists the full file name of an
+interpreter to run and a single optional initial command-line argument
+to pass to that interpreter. The operating system then runs the
+interpreter with the given argument and the full argument list of the
+executed program. The first argument in the list is the full file name
+of the `awk' program. The rest of the argument list contains either
+options to `awk', or data files, or both. Note that on many systems
+`awk' may be found in `/usr/bin' instead of in `/bin'. Caveat Emptor.
Some systems limit the length of the interpreter name to 32
characters. Often, this can be dealt with by using a symbolic link.
@@ -1560,15 +1596,6 @@ the name of your script (`advice'). (d.c.) Don't rely on the value of
(1) The `#!' mechanism works on GNU/Linux systems, BSD-based systems
and commercial Unix systems.
- (2) The line beginning with `#!' lists the full file name of an
-interpreter to run and an optional initial command-line argument to
-pass to that interpreter. The operating system then runs the
-interpreter with the given argument and the full argument list of the
-executed program. The first argument in the list is the full file name
-of the `awk' program. The rest of the argument list contains either
-options to `awk', or data files, or both. Note that on many systems
-`awk' may be found in `/usr/bin' instead of in `/bin'. Caveat Emptor.
-

File: gawk.info, Node: Comments, Next: Quoting, Prev: Executable Scripts, Up: Running gawk
@@ -1606,7 +1633,7 @@ at a later time.
will probably print strange messages about syntax errors. For
example, look at the following:
- $ awk '{ print "hello" } # let's be cute'
+ $ awk 'BEGIN { print "hello" } # let's be cute'
>
The shell sees that the first two quotes match, and that a new
@@ -1646,6 +1673,23 @@ knowledge of shell quoting rules. The following rules apply only to
POSIX-compliant, Bourne-style shells (such as Bash, the GNU Bourne-Again
Shell). If you use the C shell, you're on your own.
+ Before diving into the rules, we introduce a concept that appears
+throughout this Info file, which is that of the "null", or empty,
+string.
+
+ The null string is character data that has no value. In other
+words, it is empty. It is written in `awk' programs like this: `""'.
+In the shell, it can be written using single or double quotes: `""' or
+`'''. While the null string has no characters in it, it does exist.
+Consider this command:
+
+ $ echo ""
+
+Here, the `echo' utility receives a single argument, even though that
+argument has no characters in it. In the rest of this Info file, we use
+the terms "null string" and "empty string" interchangeably. Now, on to
+the quoting rules.
+
* Quoted items can be concatenated with nonquoted items as well as
with other quoted items. The shell turns everything into one
argument for the command.
@@ -1679,9 +1723,9 @@ Shell). If you use the C shell, you're on your own.
Note that the single quote is not special within double quotes.
* Null strings are removed when they occur as part of a non-null
- command-line argument, while explicit non-null objects are kept.
- For example, to specify that the field separator `FS' should be
- set to the null string, use:
+ command-line argument, while explicit null objects are kept. For
+ example, to specify that the field separator `FS' should be set to
+ the null string, use:
awk -F "" 'PROGRAM' FILES # correct
@@ -1774,10 +1818,10 @@ one "record".
In the data file `mail-list', each record contains the name of a
person, his/her phone number, his/her email-address, and a code for
-their relationship with the author of the list. An `A' in the last
-column means that the person is an acquaintance. An `F' in the last
-column means that the person is a friend. An `R' means that the person
-is a relative:
+their relationship with the author of the list. The columns are
+aligned using spaces. An `A' in the last column means that the person
+is an acquaintance. An `F' in the last column means that the person is
+a friend. An `R' means that the person is a relative:
Amelia 555-5553 amelia.zodiacusque@gmail.com F
Anthony 555-3412 anthony.asserturo@hotmail.com A
@@ -1796,7 +1840,8 @@ shipments during the year. Each record contains the month, the number
of green crates shipped, the number of red boxes shipped, the number of
orange bags shipped, and the number of blue packages shipped,
respectively. There are 16 entries, covering the 12 months of last year
-and the first four months of the current year.
+and the first four months of the current year. An empty line separates
+the data for the two years.
Jan 13 25 15 115
Feb 15 32 24 226
@@ -1859,10 +1904,10 @@ for _every_ input line. If the action is omitted, the default action
is to print all lines that match the pattern.
Thus, we could leave out the action (the `print' statement and the
-curly braces) in the previous example and the result would be the same:
-`awk' prints all lines matching the pattern `li'. By comparison,
-omitting the `print' statement but retaining the curly braces makes an
-empty action that does nothing (i.e., no lines are printed).
+braces) in the previous example and the result would be the same: `awk'
+prints all lines matching the pattern `li'. By comparison, omitting
+the `print' statement but retaining the braces makes an empty action
+that does nothing (i.e., no lines are printed).
Many practical `awk' programs are just a line or two. Following is a
collection of useful, short programs to get you started. Some of these
@@ -1876,26 +1921,30 @@ often more than one way to do things in `awk'. At some point, you may
want to look back at these examples and see if you can come up with
different ways to do the same things shown here:
- * Print the length of the longest input line:
-
- awk '{ if (length($0) > max) max = length($0) }
- END { print max }' data
-
* Print every line that is longer than 80 characters:
awk 'length($0) > 80' data
The sole rule has a relational expression as its pattern and it
- has no action--so the default action, printing the record, is used.
+ has no action--so it uses the default action, printing the record.
+
+ * Print the length of the longest input line:
+
+ awk '{ if (length($0) > max) max = length($0) }
+ END { print max }' data
+
+ The code associated with `END' executes after all input has been
+ read; it's the other side of the coin to `BEGIN'.
* Print the length of the longest line in `data':
- expand data | awk '{ if (x < length()) x = length() }
+ expand data | awk '{ if (x < length($0)) x = length($0) }
END { print "maximum line length is " x }'
- The input is processed by the `expand' utility to change TABs into
- spaces, so the widths compared are actually the right-margin
- columns.
+ This example differs slightly from the previous one: The input is
+ processed by the `expand' utility to change TABs into spaces, so
+ the widths compared are actually the right-margin columns, as
+ opposed to the number of input characters on each line.
* Print every line that has at least one field:
@@ -1943,9 +1992,8 @@ File: gawk.info, Node: Two Rules, Next: More Complex, Prev: Very Simple, Up:
The `awk' utility reads the input files one line at a time. For each
line, `awk' tries the patterns of each of the rules. If several
-patterns match, then several actions are run in the order in which they
-appear in the `awk' program. If no patterns match, then no actions are
-run.
+patterns match, then several actions execute in the order in which they
+appear in the `awk' program. If no patterns match, then no actions run.
After processing all the rules that match the line (and perhaps
there are none), `awk' reads the next line. (However, *note Next
@@ -1993,8 +2041,8 @@ summarize, select, and rearrange the output of another utility. It uses
features that haven't been covered yet, so don't worry if you don't
understand all the details:
- LC_ALL=C ls -l | awk '$6 == "Nov" { sum += $5 }
- END { print sum }'
+ ls -l | awk '$6 == "Nov" { sum += $5 }
+ END { print sum }'
This command prints the total number of bytes in all the files in the
current directory that were last modified in November (of any year).
@@ -2021,12 +2069,12 @@ contains the file name.(1)
The `$6 == "Nov"' in our `awk' program is an expression that tests
whether the sixth field of the output from `ls -l' matches the string
-`Nov'. Each time a line has the string `Nov' for its sixth field, the
-action `sum += $5' is performed. This adds the fifth field (the file's
-size) to the variable `sum'. As a result, when `awk' has finished
-reading all the input lines, `sum' is the total of the sizes of the
-files whose lines matched the pattern. (This works because `awk'
-variables are automatically initialized to zero.)
+`Nov'. Each time a line has the string `Nov' for its sixth field,
+`awk' performs the action `sum += $5'. This adds the fifth field (the
+file's size) to the variable `sum'. As a result, when `awk' has
+finished reading all the input lines, `sum' is the total of the sizes
+of the files whose lines matched the pattern. (This works because
+`awk' variables are automatically initialized to zero.)
After the last line of output from `ls' has been processed, the
`END' rule executes and prints the value of `sum'. In this example,
@@ -2078,15 +2126,15 @@ We have generally not used backslash continuation in our sample
programs. `gawk' places no limit on the length of a line, so backslash
continuation is never strictly necessary; it just makes programs more
readable. For this same reason, as well as for clarity, we have kept
-most statements short in the sample programs presented throughout the
-Info file. Backslash continuation is most useful when your `awk'
-program is in a separate source file instead of entered from the
-command line. You should also note that many `awk' implementations are
-more particular about where you may use backslash continuation. For
-example, they may not allow you to split a string constant using
-backslash continuation. Thus, for maximum portability of your `awk'
-programs, it is best not to split your lines in the middle of a regular
-expression or a string.
+most statements short in the programs presented throughout the Info
+file. Backslash continuation is most useful when your `awk' program is
+in a separate source file instead of entered from the command line.
+You should also note that many `awk' implementations are more
+particular about where you may use backslash continuation. For example,
+they may not allow you to split a string constant using backslash
+continuation. Thus, for maximum portability of your `awk' programs, it
+is best not to split your lines in the middle of a regular expression
+or a string.
CAUTION: _Backslash continuation does not work as described with
the C shell._ It works for `awk' programs in files and for
@@ -2128,7 +2176,7 @@ comment, it ignores _everything_ on the rest of the line. For example:
> BEGIN rule
> }'
error--> gawk: cmd. line:2: BEGIN rule
- error--> gawk: cmd. line:2: ^ parse error
+ error--> gawk: cmd. line:2: ^ syntax error
In this case, it looks like the backslash would continue the comment
onto the next line. However, the backslash-newline combination is never
@@ -2174,10 +2222,10 @@ determining the type of a variable, and array sorting.
As we develop our presentation of the `awk' language, we introduce
most of the variables and many of the functions. They are described
-systematically in *note Built-in Variables::, and *note Built-in::.
+systematically in *note Built-in Variables::, and in *note Built-in::.

-File: gawk.info, Node: When, Prev: Other Features, Up: Getting Started
+File: gawk.info, Node: When, Next: Intro Summary, Prev: Other Features, Up: Getting Started
1.8 When to Use `awk'
=====================
@@ -2201,20 +2249,45 @@ edit-compile-test-debug cycle of software development.
retargetable assembler for eight-bit microprocessors (*note Glossary::,
for more information), and a microcode assembler for a special-purpose
Prolog computer. While the original `awk''s capabilities were strained
-by tasks of such complexity, modern versions are more capable. Even
-Brian Kernighan's version of `awk' has fewer predefined limits, and
-those that it has are much larger than they used to be.
+by tasks of such complexity, modern versions are more capable.
If you find yourself writing `awk' scripts of more than, say, a few
hundred lines, you might consider using a different programming
-language. Emacs Lisp is a good choice if you need sophisticated string
-or pattern matching capabilities. The shell is also good at string and
-pattern matching; in addition, it allows powerful use of the system
-utilities. More conventional languages, such as C, C++, and Java, offer
-better facilities for system programming and for managing the complexity
-of large programs. Programs in these languages may require more lines
-of source code than the equivalent `awk' programs, but they are easier
-to maintain and usually run more efficiently.
+language. The shell is good at string and pattern matching; in
+addition, it allows powerful use of the system utilities. Python
+offers a nice balance between high-level ease of programming and access
+to system facilities.(1)
+
+ ---------- Footnotes ----------
+
+ (1) Other popular scripting languages include Ruby and Perl.
+
+
+File: gawk.info, Node: Intro Summary, Prev: When, Up: Getting Started
+
+1.9 Summary
+===========
+
+ * Programs in `awk' consist of PATTERN-ACTION pairs.
+
+ * An ACTION without a PATTERN always runs. The default ACTION for a
+ pattern without one is `{ print $0 }'.
+
+ * Use either `awk 'PROGRAM' FILES' or `awk -f PROGRAM-FILE FILES' to
+ run `awk'.
+
+ * You may use the special `#!' header line to create `awk' programs
+ that are directly executable.
+
+ * Comments in `awk' programs start with `#' and continue to the end
+ of the same line.
+
+ * Be aware of quoting issues when writing `awk' programs as part of
+ a larger shell script (or MS-Windows batch file).
+
+ * You may use backslash continuation to continue a source line.
+ Lines are automatically continued after a comma, open brace,
+ question mark, colon, `||', `&&', `do' and `else'.

File: gawk.info, Node: Invoking Gawk, Next: Regexp, Prev: Getting Started, Up: Top
@@ -2246,6 +2319,7 @@ this major node that don't interest you right now.
* Loading Shared Libraries:: Loading shared libraries into your program.
* Obsolete:: Obsolete Options and/or features.
* Undocumented:: Undocumented Options and Features.
+* Invoking Summary:: Invocation summary.

File: gawk.info, Node: Command Line, Next: Options, Up: Invoking Gawk
@@ -2257,8 +2331,8 @@ There are two ways to run `awk'--with an explicit program or with one
or more program files. Here are templates for both of them; items
enclosed in [...] in these templates are optional:
- awk [OPTIONS] -f progfile [`--'] FILE ...
- awk [OPTIONS] [`--'] 'PROGRAM' FILE ...
+ `awk' [OPTIONS] `-f' PROGFILE [`--'] FILE ...
+ `awk' [OPTIONS] [`--'] `'PROGRAM'' FILE ...
Besides traditional one-letter POSIX-style options, `gawk' also
supports GNU long options.
@@ -2344,26 +2418,26 @@ The following list describes options mandated by the POSIX standard:
treated as single-byte characters.
Normally, `gawk' follows the POSIX standard and attempts to process
- its input data according to the current locale. This can often
- involve converting multibyte characters into wide characters
- (internally), and can lead to problems or confusion if the input
- data does not contain valid multibyte characters. This option is
- an easy way to tell `gawk': "hands off my data!".
+ its input data according to the current locale (*note Locales::).
+ This can often involve converting multibyte characters into wide
+ characters (internally), and can lead to problems or confusion if
+ the input data does not contain valid multibyte characters. This
+ option is an easy way to tell `gawk': "hands off my data!".
`-c'
`--traditional'
Specify "compatibility mode", in which the GNU extensions to the
- `awk' language are disabled, so that `gawk' behaves just like
- Brian Kernighan's version `awk'. *Note POSIX/GNU::, which
- summarizes the extensions. Also see *note Compatibility Mode::.
+ `awk' language are disabled, so that `gawk' behaves just like BWK
+ `awk'. *Note POSIX/GNU::, which summarizes the extensions. Also
+ see *note Compatibility Mode::.
`-C'
`--copyright'
Print the short version of the General Public License and then
exit.
-`-d[FILE]'
-`--dump-variables[=FILE]'
+`-d'[FILE]
+`--dump-variables'[`='FILE]
Print a sorted list of global variables, their types, and final
values to FILE. If no FILE is provided, print this list to the
file named `awkvars.out' in the current directory. No space is
@@ -2377,25 +2451,25 @@ The following list describes options mandated by the POSIX standard:
particularly easy mistake to make with simple variable names like
`i', `j', etc.)
-`-D[FILE]'
-`--debug=[FILE]'
+`-D'[FILE]
+`--debug'[`='FILE]
Enable debugging of `awk' programs (*note Debugging::). By
default, the debugger reads commands interactively from the
- terminal. The optional FILE argument allows you to specify a file
+ keyboard. The optional FILE argument allows you to specify a file
with a list of commands for the debugger to execute
non-interactively. No space is allowed between the `-D' and FILE,
if FILE is supplied.
-`-e PROGRAM-TEXT'
-`--source PROGRAM-TEXT'
+`-e' PROGRAM-TEXT
+`--source' PROGRAM-TEXT
Provide program source code in the PROGRAM-TEXT. This option
allows you to mix source code in files with source code that you
enter on the command line. This is particularly useful when you
have library functions that you want to use from your command-line
programs (*note AWKPATH Variable::).
-`-E FILE'
-`--exec FILE'
+`-E' FILE
+`--exec' FILE
Similar to `-f', read `awk' program text from FILE. There are two
differences from `-f':
@@ -2408,9 +2482,9 @@ The following list describes options mandated by the POSIX standard:
This option is particularly necessary for World Wide Web CGI
applications that pass arguments through the URL; using this
option prevents a malicious (or other) user from passing in
- options, assignments, or `awk' source code (via `--source') to the
- CGI application. This option should be used with `#!' scripts
- (*note Executable Scripts::), like so:
+ options, assignments, or `awk' source code (via `-e') to the CGI
+ application. This option should be used with `#!' scripts (*note
+ Executable Scripts::), like so:
#! /usr/local/bin/gawk -E
@@ -2428,37 +2502,41 @@ The following list describes options mandated by the POSIX standard:
Print a "usage" message summarizing the short and long style
options that `gawk' accepts and then exit.
-`-i SOURCE-FILE'
-`--include SOURCE-FILE'
+`-i' SOURCE-FILE
+`--include' SOURCE-FILE
Read `awk' source library from SOURCE-FILE. This option is
completely equivalent to using the `@include' directive inside
your program. This option is very similar to the `-f' option, but
there are two important differences. First, when `-i' is used,
- the program source will not be loaded if it has been previously
- loaded, whereas the `-f' will always load the file. Second,
- because this option is intended to be used with code libraries,
- `gawk' does not recognize such files as constituting main program
- input. Thus, after processing an `-i' argument, `gawk' still
- expects to find the main source code via the `-f' option or on the
- command-line.
-
-`-l LIB'
-`--load LIB'
- Load a shared library LIB. This searches for the library using the
- `AWKLIBPATH' environment variable. The correct library suffix for
- your platform will be supplied by default, so it need not be
- specified in the library name. The library initialization routine
- should be named `dl_load()'. An alternative is to use the `@load'
- keyword inside the program to load a shared library.
-
-`-L [value]'
-`--lint[=value]'
+ the program source is not loaded if it has been previously loaded,
+ whereas with `-f', `gawk' always loads the file. Second, because
+ this option is intended to be used with code libraries, `gawk'
+ does not recognize such files as constituting main program input.
+ Thus, after processing an `-i' argument, `gawk' still expects to
+ find the main source code via the `-f' option or on the command
+ line.
+
+`-l' EXT
+`--load' EXT
+ Load a dynamic extension named EXT. Extensions are stored as
+ system shared libraries. This option searches for the library
+ using the `AWKLIBPATH' environment variable. The correct library
+ suffix for your platform will be supplied by default, so it need
+ not be specified in the extension name. The extension
+ initialization routine should be named `dl_load()'. An
+ alternative is to use the `@load' keyword inside the program to
+ load a shared library. This feature is described in detail in
+ *note Dynamic Extensions::.
+
+`-L'[VALUE]
+`--lint'[`='VALUE]
Warn about constructs that are dubious or nonportable to other
- `awk' implementations. Some warnings are issued when `gawk' first
- reads your program. Others are issued at runtime, as your program
- executes. With an optional argument of `fatal', lint warnings
- become fatal errors. This may be drastic, but its use will
- certainly encourage the development of cleaner `awk' programs.
+ `awk' implementations. No space is allowed between the `-L' and
+ VALUE, if VALUE is supplied. Some warnings are issued when `gawk'
+ first reads your program. Others are issued at runtime, as your
+ program executes. With an optional argument of `fatal', lint
+ warnings become fatal errors. This may be drastic, but its use
+ will certainly encourage the development of cleaner `awk' programs.
With an optional argument of `invalid', only warnings about things
that are actually invalid are issued. (This is not fully
implemented yet.)
@@ -2474,7 +2552,7 @@ The following list describes options mandated by the POSIX standard:
`--bignum'
Force arbitrary precision arithmetic on numbers. This option has
no effect if `gawk' is not compiled to use the GNU MPFR and MP
- libraries (*note Gawk and MPFR::).
+ libraries (*note Arbitrary Precision Arithmetic::).
`-n'
`--non-decimal-data'
@@ -2489,23 +2567,24 @@ The following list describes options mandated by the POSIX standard:
Force the use of the locale's decimal point character when parsing
numeric input data (*note Locales::).
-`-o[FILE]'
-`--pretty-print[=FILE]'
+`-o'[FILE]
+`--pretty-print'[`='FILE]
Enable pretty-printing of `awk' programs. By default, output
- program is created in a file named `awkprof.out'. The optional
- FILE argument allows you to specify a different file name for the
- output. No space is allowed between the `-o' and FILE, if FILE is
- supplied.
+ program is created in a file named `awkprof.out' (*note
+ Profiling::). The optional FILE argument allows you to specify a
+ different file name for the output. No space is allowed between
+ the `-o' and FILE, if FILE is supplied.
+
+ NOTE: In the past, this option would also execute your
+ program. This is no longer the case.
`-O'
`--optimize'
Enable some optimizations on the internal representation of the
- program. At the moment this includes just simple constant
- folding. The `gawk' maintainer hopes to add more optimizations
- over time.
+ program. At the moment this includes just simple constant folding.
-`-p[FILE]'
-`--profile[=FILE]'
+`-p'[FILE]
+`--profile'[`='FILE]
Enable profiling of `awk' programs (*note Profiling::). By
default, profiles are created in a file named `awkprof.out'. The
optional FILE argument allows you to specify a different file name
@@ -2530,7 +2609,7 @@ The following list describes options mandated by the POSIX standard:
* Newlines are not allowed after `?' or `:' (*note Conditional
Exp::).
- * Specifying `-Ft' on the command-line does not set the value
+ * Specifying `-Ft' on the command line does not set the value
of `FS' to be a single TAB character (*note Field
Separators::).
@@ -2538,15 +2617,15 @@ The following list describes options mandated by the POSIX standard:
data (*note Locales::).
If you supply both `--traditional' and `--posix' on the command
- line, `--posix' takes precedence. `gawk' also issues a warning if
- both options are supplied.
+ line, `--posix' takes precedence. `gawk' issues a warning if both
+ options are supplied.
`-r'
`--re-interval'
Allow interval expressions (*note Regexp Operators::) in regexps.
This is now `gawk''s default behavior. Nevertheless, this option
remains both for backward compatibility, and for use in
- combination with the `--traditional' option.
+ combination with `--traditional'.
`-S'
`--sandbox'
@@ -2586,35 +2665,33 @@ having to be included into each individual program. (As mentioned in
*note Definition Syntax::, function names must be unique.)
With standard `awk', library functions can still be used, even if
-the program is entered at the terminal, by specifying `-f /dev/tty'.
+the program is entered at the keyboard, by specifying `-f /dev/tty'.
After typing your program, type `Ctrl-d' (the end-of-file character) to
terminate it. (You may also use `-f -' to read program source from the
standard input but then you will not be able to also use the standard
input as a source of data.)
Because it is clumsy using the standard `awk' mechanisms to mix
-source file and command-line `awk' programs, `gawk' provides the
-`--source' option. This does not require you to pre-empt the standard
-input for your source code; it allows you to easily mix command-line
-and library source code (*note AWKPATH Variable::). The `--source'
-option may also be used multiple times on the command line.
+source file and command-line `awk' programs, `gawk' provides the `-e'
+option. This does not require you to pre-empt the standard input for
+your source code; it allows you to easily mix command-line and library
+source code (*note AWKPATH Variable::). As with `-f', the `-e' and `-i'
+options may also be used multiple times on the command line.
- If no `-f' or `--source' option is specified, then `gawk' uses the
-first non-option command-line argument as the text of the program
-source code.
+ If no `-f' or `-e' option is specified, then `gawk' uses the first
+non-option command-line argument as the text of the program source code.
If the environment variable `POSIXLY_CORRECT' exists, then `gawk'
-behaves in strict POSIX mode, exactly as if you had supplied the
-`--posix' command-line option. Many GNU programs look for this
-environment variable to suppress extensions that conflict with POSIX,
-but `gawk' behaves differently: it suppresses all extensions, even
-those that do not conflict with POSIX, and behaves in strict POSIX
-mode. If `--lint' is supplied on the command line and `gawk' turns on
-POSIX mode because of `POSIXLY_CORRECT', then it issues a warning
-message indicating that POSIX mode is in effect. You would typically
-set this variable in your shell's startup file. For a
-Bourne-compatible shell (such as Bash), you would add these lines to
-the `.profile' file in your home directory:
+behaves in strict POSIX mode, exactly as if you had supplied `--posix'.
+Many GNU programs look for this environment variable to suppress
+extensions that conflict with POSIX, but `gawk' behaves differently: it
+suppresses all extensions, even those that do not conflict with POSIX,
+and behaves in strict POSIX mode. If `--lint' is supplied on the
+command line and `gawk' turns on POSIX mode because of
+`POSIXLY_CORRECT', then it issues a warning message indicating that
+POSIX mode is in effect. You would typically set this variable in your
+shell's startup file. For a Bourne-compatible shell (such as Bash),
+you would add these lines to the `.profile' file in your home directory:
POSIXLY_CORRECT=true
export POSIXLY_CORRECT
@@ -2651,6 +2728,10 @@ arguments, including variable assignments, are included. As each
element of `ARGV' is processed, `gawk' sets the variable `ARGIND' to
the index in `ARGV' of the current element.
+ Changing `ARGC' and `ARGV' in your `awk' program lets you control
+how `awk' processes the input files; this is described in more detail
+in *note ARGC and ARGV::.
+
The distinction between file name arguments and variable-assignment
arguments is made when `awk' is about to open the next input file. At
that point in execution, it checks the file name to see whether it is
@@ -2666,18 +2747,18 @@ begins scanning the argument list.
The variable values given on the command line are processed for
escape sequences (*note Escape Sequences::). (d.c.)
- In some earlier implementations of `awk', when a variable assignment
-occurred before any file names, the assignment would happen _before_
-the `BEGIN' rule was executed. `awk''s behavior was thus inconsistent;
-some command-line assignments were available inside the `BEGIN' rule,
-while others were not. Unfortunately, some applications came to depend
-upon this "feature." When `awk' was changed to be more consistent, the
-`-v' option was added to accommodate applications that depended upon
-the old behavior.
+ In some very early implementations of `awk', when a variable
+assignment occurred before any file names, the assignment would happen
+_before_ the `BEGIN' rule was executed. `awk''s behavior was thus
+inconsistent; some command-line assignments were available inside the
+`BEGIN' rule, while others were not. Unfortunately, some applications
+came to depend upon this "feature." When `awk' was changed to be more
+consistent, the `-v' option was added to accommodate applications that
+depended upon the old behavior.
The variable assignment feature is most useful for assigning to
variables such as `RS', `OFS', and `ORS', which control input and
-output formats before scanning the data files. It is also useful for
+output formats, before scanning the data files. It is also useful for
controlling state if multiple passes are needed over a data file. For
example:
@@ -2712,7 +2793,7 @@ with `getline' (*note Getline/File::).
In addition, `gawk' allows you to specify the special file name
`/dev/stdin', both on the command line and with `getline'. Some other
versions of `awk' also support this, but it is not standard. (Some
-operating systems provide a `/dev/stdin' file in the file system,
+operating systems provide a `/dev/stdin' file in the filesystem;
however, `gawk' always processes this file name itself.)

@@ -2738,7 +2819,7 @@ File: gawk.info, Node: AWKPATH Variable, Next: AWKLIBPATH Variable, Up: Envir
----------------------------------------
The previous minor node described how `awk' program files can be named
-on the command-line with the `-f' option. In most `awk'
+on the command line with the `-f' option. In most `awk'
implementations, you must supply a precise path name for each program
file, unless the file is in the current directory. But in `gawk', if
the file name supplied to the `-f' or `-i' options does not contain a
@@ -2751,27 +2832,29 @@ colons(1). `gawk' gets its search path from the `AWKPATH' environment
variable. If that variable does not exist, `gawk' uses a default path,
`.:/usr/local/share/awk'.(2)
- The search path feature is particularly useful for building libraries
-of useful `awk' functions. The library files can be placed in a
-standard directory in the default path and then specified on the
+ The search path feature is particularly helpful for building
+libraries of useful `awk' functions. The library files can be placed
+in a standard directory in the default path and then specified on the
command line with a short file name. Otherwise, the full file name
would have to be typed for each file.
- By using the `-i' option, or the `--source' and `-f' options, your
+ By using the `-i' option, or the `-e' and `-f' options, your
command-line `awk' programs can use facilities in `awk' library files
(*note Library Functions::). Path searching is not done if `gawk' is
in compatibility mode. This is true for both `--traditional' and
`--posix'. *Note Options::.
If the source code is not found after the initial search, the path
-is searched again after adding the default `.awk' suffix to the
-filename.
+is searched again after adding the default `.awk' suffix to the file
+name.
NOTE: To include the current directory in the path, either place
`.' explicitly in the path or write a null entry in the path. (A
null entry is indicated by starting or ending the path with a
- colon or by placing two colons next to each other (`::').) This
- path search mechanism is similar to the shell's.
+ colon or by placing two colons next to each other [`::'].) This
+ path search mechanism is similar to the shell's. (See `The
+ Bourne-Again SHell manual'.
+ (http://www.gnu.org/software/bash/manual/))
However, `gawk' always looks in the current directory _before_
searching `AWKPATH', so there is no real reason to include the
@@ -2779,8 +2862,8 @@ filename.
If `AWKPATH' is not defined in the environment, `gawk' places its
default search path into `ENVIRON["AWKPATH"]'. This makes it easy to
-determine the actual search path that `gawk' will use from within an
-`awk' program.
+determine the actual search path that `gawk' used from within an `awk'
+program.
While you can change `ENVIRON["AWKPATH"]' within your `awk' program,
this has no effect on the running program's behavior. This makes
@@ -2804,13 +2887,13 @@ File: gawk.info, Node: AWKLIBPATH Variable, Next: Other Environment Variables,
-------------------------------------------
The `AWKLIBPATH' environment variable is similar to the `AWKPATH'
-variable, but it is used to search for shared libraries specified with
-the `-l' option rather than for source files. If the library is not
-found, the path is searched again after adding the appropriate shared
-library suffix for the platform. For example, on GNU/Linux systems,
-the suffix `.so' is used. The search path specified is also used for
-libraries loaded via the `@load' keyword (*note Loading Shared
-Libraries::).
+variable, but it is used to search for loadable extensions (stored as
+system shared libraries) specified with the `-l' option rather than for
+source files. If the extension is not found, the path is searched
+again after adding the appropriate shared library suffix for the
+platform. For example, on GNU/Linux systems, the suffix `.so' is used.
+The search path specified is also used for extensions loaded via the
+`@load' keyword (*note Loading Shared Libraries::).

File: gawk.info, Node: Other Environment Variables, Prev: AWKLIBPATH Variable, Up: Environment Variables
@@ -2823,11 +2906,11 @@ they are more specialized. Those in the following list are meant to be
used by regular users.
`POSIXLY_CORRECT'
- Causes `gawk' to switch POSIX compatibility mode, disabling all
+ Causes `gawk' to switch to POSIX compatibility mode, disabling all
traditional and GNU extensions. *Note Options::.
`GAWK_SOCK_RETRIES'
- Controls the number of time `gawk' will attempt to retry a two-way
+ Controls the number of times `gawk' attempts to retry a two-way
TCP/IP (socket) connection before giving up. *Note TCP/IP
Networking::.
@@ -2844,9 +2927,18 @@ used by regular users.
the `gawk' developers for testing and tuning. They are subject to
change. The variables are:
+`AWKBUFSIZE'
+ This variable only affects `gawk' on POSIX-compliant systems.
+ With a value of `exact', `gawk' uses the size of each input file
+ as the size of the memory buffer to allocate for I/O. Otherwise,
+ the value should be a number, and `gawk' uses that number as the
+ size of the buffer to allocate. (When this variable is not set,
+ `gawk' uses the smaller of the file's size and the "default"
+ blocksize, which is usually the filesystems I/O blocksize.)
+
`AWK_HASH'
- If this variable exists with a value of `gst', `gawk' will switch
- to using the hash function from GNU Smalltalk for managing arrays.
+ If this variable exists with a value of `gst', `gawk' switches to
+ using the hash function from GNU Smalltalk for managing arrays.
This function may be marginally faster than the standard function.
`AWKREADFUNC'
@@ -2949,7 +3041,7 @@ enclosed in double quotes.
NOTE: Keep in mind that this is a language construct and the file
name cannot be a string variable, but rather just a literal string
- in double quotes.
+ constant in double quotes.
The files to be included may be nested; e.g., given a third script,
namely `test3':
@@ -3004,22 +3096,22 @@ and this also applies to files named with `@include'.

File: gawk.info, Node: Loading Shared Libraries, Next: Obsolete, Prev: Include Files, Up: Invoking Gawk
-2.8 Loading Shared Libraries Into Your Program
-==============================================
+2.8 Loading Dynamic Extensions Into Your Program
+================================================
This minor node describes a feature that is specific to `gawk'.
- The `@load' keyword can be used to read external `awk' shared
-libraries. This allows you to link in compiled code that may offer
-superior performance and/or give you access to extended capabilities
-not supported by the `awk' language. The `AWKLIBPATH' variable is used
-to search for the shared library. Using `@load' is completely
-equivalent to using the `-l' command-line option.
+ The `@load' keyword can be used to read external `awk' extensions
+(stored as system shared libraries). This allows you to link in
+compiled code that may offer superior performance and/or give you
+access to extended capabilities not supported by the `awk' language.
+The `AWKLIBPATH' variable is used to search for the extension. Using
+`@load' is completely equivalent to using the `-l' command-line option.
- If the shared library is not initially found in `AWKLIBPATH', another
+ If the extension is not initially found in `AWKLIBPATH', another
search is conducted after appending the platform's default shared
-library suffix to the filename. For example, on GNU/Linux systems, the
-suffix `.so' is used.
+library suffix to the file name. For example, on GNU/Linux systems,
+the suffix `.so' is used.
$ gawk '@load "ordchr"; BEGIN {print chr(65)}'
-| A
@@ -3031,7 +3123,7 @@ This is equivalent to the following example:
For command-line usage, the `-l' option is more convenient, but `@load'
is useful for embedding inside an `awk' source file that requires
-access to a shared library.
+access to an extension.
*note Dynamic Extensions::, describes how to write extensions (in C
or C++) that can be loaded with either `@load' or the `-l' option.
@@ -3053,7 +3145,7 @@ worked. As of version 4.0, they are no longer interpreted specially by
`gawk'. (Use `PROCINFO' instead; see *note Auto-set::.)

-File: gawk.info, Node: Undocumented, Prev: Obsolete, Up: Invoking Gawk
+File: gawk.info, Node: Undocumented, Next: Invoking Summary, Prev: Obsolete, Up: Invoking Gawk
2.10 Undocumented Options and Features
======================================
@@ -3063,6 +3155,48 @@ File: gawk.info, Node: Undocumented, Prev: Obsolete, Up: Invoking Gawk
This minor node intentionally left blank.

+File: gawk.info, Node: Invoking Summary, Prev: Undocumented, Up: Invoking Gawk
+
+2.11 Summary
+============
+
+ * Use either `awk 'PROGRAM' FILES' or `awk -f PROGRAM-FILE FILES' to
+ run `awk'.
+
+ * The three standard options for all versions of `awk' are `-f',
+ `-F' and `-v'. `gawk' supplies these and many others, as well as
+ corresponding GNU-style long options.
+
+ * Non-option command-line arguments are usually treated as file
+ names, unless they have the form `VAR=VALUE', in which case they
+ are taken as variable assignments to be performed at that point in
+ processing the input.
+
+ * All non-option command-line arguments, excluding the program text,
+ are placed in the `ARGV' array. Adjusting `ARGC' and `ARGV'
+ affects how `awk' processes input.
+
+ * You can use a single minus sign (`-') to refer to standard input
+ on the command line.
+
+ * `gawk' pays attention to a number of environment variables.
+ `AWKPATH', `AWKLIBPATH', and `POSIXLY_CORRECT' are the most
+ important ones.
+
+ * `gawk''s exit status conveys information to the program that
+ invoked it. Use the `exit' statement from within an `awk' program
+ to set the exit status.
+
+ * `gawk' allows you to include other `awk' source files into your
+ program using the `@include' statement and/or the `-i' and `-f'
+ command-line options.
+
+ * `gawk' allows you to load additional functions written in C or C++
+ using the `@load' statement and/or the `-l' option. (This
+ advanced feature is described later on in *note Dynamic
+ Extensions::.)
+
+
File: gawk.info, Node: Regexp, Next: Reading Files, Prev: Invoking Gawk, Up: Top
3 Regular Expressions
@@ -3077,7 +3211,7 @@ that matches every input record whose text belongs to that set. The
simplest regular expression is a sequence of letters, numbers, or both.
Such a regexp matches any string that contains that sequence. Thus,
the regexp `foo' matches any string containing `foo'. Therefore, the
-pattern `/foo/' matches any input record containing the three
+pattern `/foo/' matches any input record containing the three adjacent
characters `foo' _anywhere_ in the record. Other kinds of regexps let
you specify more complicated classes of strings.
@@ -3087,10 +3221,11 @@ you specify more complicated classes of strings.
* Escape Sequences:: How to write nonprinting characters.
* Regexp Operators:: Regular Expression Operators.
* Bracket Expressions:: What can go between `[...]'.
-* GNU Regexp Operators:: Operators specific to GNU software.
-* Case-sensitivity:: How to do case-insensitive matching.
* Leftmost Longest:: How much text matches.
* Computed Regexps:: Using Dynamic Regexps.
+* GNU Regexp Operators:: Operators specific to GNU software.
+* Case-sensitivity:: How to do case-insensitive matching.
+* Regexp Summary:: Regular expressions summary.

File: gawk.info, Node: Regexp Usage, Next: Escape Sequences, Up: Regexp
@@ -3102,8 +3237,8 @@ A regular expression can be used as a pattern by enclosing it in
slashes. Then the regular expression is tested against the entire text
of each record. (Normally, it only needs to match some part of the
text in order to succeed.) For example, the following prints the
-second field of each record that contains the string `li' anywhere in
-it:
+second field of each record where the string `li' appears anywhere in
+the record:
$ awk '/li/ { print $2 }' mail-list
-| 555-5553
@@ -3188,8 +3323,8 @@ apply to both string constants and regexp constants:
A literal backslash, `\'.
`\a'
- The "alert" character, `Ctrl-g', ASCII code 7 (BEL). (This
- usually makes some sort of audible noise.)
+ The "alert" character, `Ctrl-g', ASCII code 7 (BEL). (This often
+ makes some sort of audible noise.)
`\b'
Backspace, `Ctrl-h', ASCII code 8 (BS).
@@ -3216,26 +3351,34 @@ apply to both string constants and regexp constants:
`\xHH...'
The hexadecimal value HH, where HH stands for a sequence of
- hexadecimal digits (`0'-`9', and either `A'-`F' or `a'-`f'). Like
- the same construct in ISO C, the escape sequence continues until
- the first nonhexadecimal digit is seen. (c.e.) However, using
- more than two hexadecimal digits produces undefined results. (The
- `\x' escape sequence is not allowed in POSIX `awk'.)
+ hexadecimal digits (`0'-`9', and either `A'-`F' or `a'-`f'). A
+ maximum of two digts are allowed after the `\x'. Any further
+ hexadecimal digits are treated as simple letters or numbers.
+ (c.e.)
+
+ CAUTION: In ISO C, the escape sequence continues until the
+ first nonhexadecimal digit is seen. For many years, `gawk'
+ would continue incorporating hexadecimal digits into the
+ value until a non-hexadecimal digit or the end of the string
+ was encountered. However, using more than two hexadecimal
+ digits produces
`\/'
A literal slash (necessary for regexp constants only). This
sequence is used when you want to write a regexp constant that
- contains a slash. Because the regexp is delimited by slashes, you
- need to escape the slash that is part of the pattern, in order to
+ contains a slash (such as `/.*:\/home\/[[:alnum:]]+:.*/'; the
+ `[[:alnum:]]' notation is discussed shortly, in *note Bracket
+ Expressions::). Because the regexp is delimited by slashes, you
+ need to escape any slash that is part of the pattern, in order to
tell `awk' to keep processing the rest of the regexp.
`\"'
A literal double quote (necessary for string constants only).
This sequence is used when you want to write a string constant
- that contains a double quote. Because the string is delimited by
- double quotes, you need to escape the quote that is part of the
- string, in order to tell `awk' to keep processing the rest of the
- string.
+ that contains a double quote (such as `"He said \"hi!\" to her."').
+ Because the string is delimited by double quotes, you need to
+ escape any quote that is part of the string, in order to tell
+ `awk' to keep processing the rest of the string.
In `gawk', a number of additional two-character sequences that begin
with a backslash have special meaning in regexps. *Note GNU Regexp
@@ -3270,12 +3413,12 @@ is not one of the characters previously listed, POSIX `awk' purposely
leaves what happens as undefined. There are two choices:
Strip the backslash out
- This is what Brian Kernighan's `awk' and `gawk' both do. For
- example, `"a\qc"' is the same as `"aqc"'. (Because this is such
- an easy bug both to introduce and to miss, `gawk' warns you about
- it.) Consider `FS = "[ \t]+\|[ \t]+"' to use vertical bars
- surrounded by whitespace as the field separator. There should be
- two backslashes in the string: `FS = "[ \t]+\\|[ \t]+"'.)
+ This is what BWK `awk' and `gawk' both do. For example, `"a\qc"'
+ is the same as `"aqc"'. (Because this is such an easy bug both to
+ introduce and to miss, `gawk' warns you about it.) Consider `FS =
+ "[ \t]+\|[ \t]+"' to use vertical bars surrounded by whitespace as
+ the field separator. There should be two backslashes in the
+ string: `FS = "[ \t]+\\|[ \t]+"'.)
Leave the backslash alone
Some other `awk' implementations do this. In such
@@ -3324,20 +3467,21 @@ sequences and that are not listed in the table stand for themselves:
at the beginning of the string.
It is important to realize that `^' does not match the beginning of
- a line embedded in a string. The condition is not true in the
- following example:
+ a line (the point right after a `\n' newline character) embedded
+ in a string. The condition is not true in the following example:
if ("line1\nLINE 2" ~ /^L/) ...
`$'
This is similar to `^', but it matches only at the end of a string.
For example, `p$' matches a record that ends with a `p'. The `$'
- is an anchor and does not match the end of a line embedded in a
- string. The condition in the following example is not true:
+ is an anchor and does not match the end of a line (the point right
+ before a `\n' newline character) embedded in a string. The
+ condition in the following example is not true:
if ("line1\nLINE 2" ~ /1$/) ...
-`. (period)'
+`.' (period)
This matches any single character, _including_ the newline
character. For example, `.P' matches any single character
followed by a `P' in a string. Using concatenation, we can make a
@@ -3349,7 +3493,7 @@ sequences and that are not listed in the table stand for themselves:
Otherwise, NUL is just another character. Other versions of `awk'
may not be able to match the NUL character.
-`[...]'
+`['...`]'
This is called a "bracket expression".(1) It matches any _one_ of
the characters that are enclosed in the square brackets. For
example, `[MVX]' matches any one of the characters `M', `V', or
@@ -3357,7 +3501,7 @@ sequences and that are not listed in the table stand for themselves:
square brackets of a bracket expression is given in *note Bracket
Expressions::.
-`[^ ...]'
+`[^'...`]'
This is a "complemented bracket expression". The first character
after the `[' _must_ be a `^'. It matches any characters _except_
those in the square brackets. For example, `[^awk]' matches any
@@ -3366,14 +3510,15 @@ sequences and that are not listed in the table stand for themselves:
`|'
This is the "alternation operator" and it is used to specify
alternatives. The `|' has the lowest precedence of all the regular
- expression operators. For example, `^P|[[:digit:]]' matches any
- string that matches either `^P' or `[[:digit:]]'. This means it
- matches any string that starts with `P' or contains a digit.
+ expression operators. For example, `^P|[aeiouy]' matches any
+ string that matches either `^P' or `[aeiouy]'. This means it
+ matches any string that starts with `P' or contains (anywhere
+ within it) a lowercase English vowel.
The alternation applies to the largest possible regexps on either
side.
-`(...)'
+`('...`)'
Parentheses are used for grouping in regular expressions, as in
arithmetic. They can be used to concatenate regular expressions
containing the alternation operator, `|'. For example,
@@ -3388,31 +3533,29 @@ sequences and that are not listed in the table stand for themselves:
matches of one `p' followed by any number of `h's. This also
matches just `p' if no `h's are present.
- The `*' repeats the _smallest_ possible preceding expression.
- (Use parentheses if you want to repeat a larger expression.) It
- finds as many repetitions as possible. For example, `awk
- '/\(c[ad][ad]*r x\)/ { print }' sample' prints every record in
- `sample' containing a string of the form `(car x)', `(cdr x)',
- `(cadr x)', and so on. Notice the escaping of the parentheses by
- preceding them with backslashes.
+ There are two subtle points to understand about how `*' works.
+ First, the `*' applies only to the single preceding regular
+ expression component (e.g., in `ph*', it applies just to the `h').
+ To cause `*' to apply to a larger sub-expression, use parentheses:
+ `(ph)*' matches `ph', `phph', `phphph' and so on.
+
+ Second, `*' finds as many repetititons as possible. If the text to
+ be matched is `phhhhhhhhhhhhhhooey', `ph*' matches all of the `h's.
`+'
This symbol is similar to `*', except that the preceding
expression must be matched at least once. This means that `wh+y'
would match `why' and `whhy', but not `wy', whereas `wh*y' would
- match all three of these strings. The following is a simpler way
- of writing the last `*' example:
-
- awk '/\(c[ad]+r x\)/ { print }' sample
+ match all three.
`?'
This symbol is similar to `*', except that the preceding
expression can be matched either once or not at all. For example,
`fe?d' matches `fed' and `fd', but nothing else.
-`{N}'
-`{N,}'
-`{N,M}'
+`{'N`}'
+`{'N`,}'
+`{'N`,'M`}'
One or two numbers inside braces denote an "interval expression".
If there is one number in the braces, the preceding regexp is
repeated N times. If there are two numbers separated by a comma,
@@ -3473,7 +3616,7 @@ list".
regexp operator or function.

-File: gawk.info, Node: Bracket Expressions, Next: GNU Regexp Operators, Prev: Regexp Operators, Up: Regexp
+File: gawk.info, Node: Bracket Expressions, Next: Leftmost Longest, Prev: Regexp Operators, Up: Regexp
3.4 Using Bracket Expressions
=============================
@@ -3494,7 +3637,9 @@ expression, put a `\' in front of it. For example:
[d\]]
-matches either `d' or `]'.
+matches either `d' or `]'. Additionally, if you place `]' right after
+the opening `[', the closing bracket is treated as one of the
+characters to be matched.
This treatment of `\' in bracket expressions is compatible with
other `awk' implementations and is also mandated by POSIX. The regular
@@ -3541,6 +3686,14 @@ set had other alphabetic characters in it, this would not match them.
With the POSIX character classes, you can write `/[[:alnum:]]/' to
match the alphabetic and numeric characters in your character set.
+ Some utilities that match regular expressions provide a non-standard
+`[:ascii:]' character class; `awk' does not. However, you can simulate
+such a construct using `[\x00-\x7F]'. This matches all values
+numerically between zero and 127, which is the defined range of the
+ASCII character set. Use a complemented character list
+(`[^\x00-\x7F]') to match any single-byte characters that are not in
+the ASCII range.
+
Two additional special sequences can appear in bracket expressions.
These apply to non-ASCII character sets, which can have single symbols
(called "collating elements") that are represented with more than one
@@ -3568,9 +3721,118 @@ Equivalence classes
classes.

-File: gawk.info, Node: GNU Regexp Operators, Next: Case-sensitivity, Prev: Bracket Expressions, Up: Regexp
+File: gawk.info, Node: Leftmost Longest, Next: Computed Regexps, Prev: Bracket Expressions, Up: Regexp
+
+3.5 How Much Text Matches?
+==========================
+
+Consider the following:
+
+ echo aaaabcd | awk '{ sub(/a+/, "<A>"); print }'
+
+ This example uses the `sub()' function (which we haven't discussed
+yet; *note String Functions::) to make a change to the input record.
+Here, the regexp `/a+/' indicates "one or more `a' characters," and the
+replacement text is `<A>'.
+
+ The input contains four `a' characters. `awk' (and POSIX) regular
+expressions always match the leftmost, _longest_ sequence of input
+characters that can match. Thus, all four `a' characters are replaced
+with `<A>' in this example:
+
+ $ echo aaaabcd | awk '{ sub(/a+/, "<A>"); print }'
+ -| <A>bcd
+
+ For simple match/no-match tests, this is not so important. But when
+doing text matching and substitutions with the `match()', `sub()',
+`gsub()', and `gensub()' functions, it is very important. *Note String
+Functions::, for more information on these functions. Understanding
+this principle is also important for regexp-based record and field
+splitting (*note Records::, and also *note Field Separators::).
+
+
+File: gawk.info, Node: Computed Regexps, Next: GNU Regexp Operators, Prev: Leftmost Longest, Up: Regexp
+
+3.6 Using Dynamic Regexps
+=========================
+
+The righthand side of a `~' or `!~' operator need not be a regexp
+constant (i.e., a string of characters between slashes). It may be any
+expression. The expression is evaluated and converted to a string if
+necessary; the contents of the string are then used as the regexp. A
+regexp computed in this way is called a "dynamic regexp" or a "computed
+regexp":
+
+ BEGIN { digits_regexp = "[[:digit:]]+" }
+ $0 ~ digits_regexp { print }
+
+This sets `digits_regexp' to a regexp that describes one or more digits,
+and tests whether the input record matches this regexp.
+
+ NOTE: When using the `~' and `!~' operators, there is a difference
+ between a regexp constant enclosed in slashes and a string
+ constant enclosed in double quotes. If you are going to use a
+ string constant, you have to understand that the string is, in
+ essence, scanned _twice_: the first time when `awk' reads your
+ program, and the second time when it goes to match the string on
+ the lefthand side of the operator with the pattern on the right.
+ This is true of any string-valued expression (such as
+ `digits_regexp', shown previously), not just string constants.
+
+ What difference does it make if the string is scanned twice? The
+answer has to do with escape sequences, and particularly with
+backslashes. To get a backslash into a regular expression inside a
+string, you have to type two backslashes.
+
+ For example, `/\*/' is a regexp constant for a literal `*'. Only
+one backslash is needed. To do the same thing with a string, you have
+to type `"\\*"'. The first backslash escapes the second one so that
+the string actually contains the two characters `\' and `*'.
+
+ Given that you can use both regexp and string constants to describe
+regular expressions, which should you use? The answer is "regexp
+constants," for several reasons:
+
+ * String constants are more complicated to write and more difficult
+ to read. Using regexp constants makes your programs less
+ error-prone. Not understanding the difference between the two
+ kinds of constants is a common source of errors.
+
+ * It is more efficient to use regexp constants. `awk' can note that
+ you have supplied a regexp and store it internally in a form that
+ makes pattern matching more efficient. When using a string
+ constant, `awk' must first convert the string into this internal
+ form and then perform the pattern matching.
+
+ * Using regexp constants is better form; it shows clearly that you
+ intend a regexp match.
+
+ Using `\n' in Bracket Expressions of Dynamic Regexps
+
+ Some versions of `awk' do not allow the newline character to be used
+inside a bracket expression for a dynamic regexp:
-3.5 `gawk'-Specific Regexp Operators
+ $ awk '$0 ~ "[ \t\n]"'
+ error--> awk: newline in character class [
+ error--> ]...
+ error--> source line number 1
+ error--> context is
+ error--> >>> <<<
+
+ But a newline in a regexp constant works with no problem:
+
+ $ awk '$0 ~ /[ \t\n]/'
+ here is a sample line
+ -| here is a sample line
+ Ctrl-d
+
+ `gawk' does not have this problem, and it isn't likely to occur
+often in practice, but it's worth noting for future reference.
+
+
+File: gawk.info, Node: GNU Regexp Operators, Next: Case-sensitivity, Prev: Computed Regexps, Up: Regexp
+
+3.7 `gawk'-Specific Regexp Operators
====================================
GNU software that deals with regular expressions provides a number of
@@ -3654,9 +3916,9 @@ No options
Traditional Unix `awk' regexps are matched. The GNU operators are
not special, and interval expressions are not available. The
POSIX character classes (`[[:alnum:]]', etc.) are supported, as
- Brian Kernighan's `awk' does support them. Characters described
- by octal and hexadecimal escape sequences are treated literally,
- even if they represent regexp metacharacters.
+ BWK `awk' does support them. Characters described by octal and
+ hexadecimal escape sequences are treated literally, even if they
+ represent regexp metacharacters.
`--re-interval'
Allow interval expressions in regexps, if `--traditional' has been
@@ -3664,9 +3926,9 @@ No options
default.

-File: gawk.info, Node: Case-sensitivity, Next: Leftmost Longest, Prev: GNU Regexp Operators, Up: Regexp
+File: gawk.info, Node: Case-sensitivity, Next: Regexp Summary, Prev: GNU Regexp Operators, Up: Regexp
-3.6 Case Sensitivity in Matching
+3.8 Case Sensitivity in Matching
================================
Case is normally significant in regular expressions, both when matching
@@ -3692,7 +3954,9 @@ works in any POSIX-compliant `awk'.
Another method, specific to `gawk', is to set the variable
`IGNORECASE' to a nonzero value (*note Built-in Variables::). When
`IGNORECASE' is not zero, _all_ regexp and string operations ignore
-case. Changing the value of `IGNORECASE' dynamically controls the
+case.
+
+ Changing the value of `IGNORECASE' dynamically controls the
case-sensitivity of the program as it runs. Case is significant by
default because `IGNORECASE' (like most variables) is initialized to
zero:
@@ -3715,9 +3979,6 @@ dynamically turn case-sensitivity on or off for all the rules at once.
`IGNORECASE' from the command line is a way to make a program
case-insensitive without having to edit it.
- Both regexp and string comparison operations are affected by
-`IGNORECASE'.
-
In multibyte locales, the equivalences between upper- and lowercase
characters are tested based on the wide-character values of the
locale's character set. Otherwise, the characters are tested based on
@@ -3740,112 +4001,42 @@ obscure and we don't recommend it.
means that `gawk' does the right thing.

-File: gawk.info, Node: Leftmost Longest, Next: Computed Regexps, Prev: Case-sensitivity, Up: Regexp
-
-3.7 How Much Text Matches?
-==========================
+File: gawk.info, Node: Regexp Summary, Prev: Case-sensitivity, Up: Regexp
-Consider the following:
-
- echo aaaabcd | awk '{ sub(/a+/, "<A>"); print }'
-
- This example uses the `sub()' function (which we haven't discussed
-yet; *note String Functions::) to make a change to the input record.
-Here, the regexp `/a+/' indicates "one or more `a' characters," and the
-replacement text is `<A>'.
-
- The input contains four `a' characters. `awk' (and POSIX) regular
-expressions always match the leftmost, _longest_ sequence of input
-characters that can match. Thus, all four `a' characters are replaced
-with `<A>' in this example:
-
- $ echo aaaabcd | awk '{ sub(/a+/, "<A>"); print }'
- -| <A>bcd
-
- For simple match/no-match tests, this is not so important. But when
-doing text matching and substitutions with the `match()', `sub()',
-`gsub()', and `gensub()' functions, it is very important. *Note String
-Functions::, for more information on these functions. Understanding
-this principle is also important for regexp-based record and field
-splitting (*note Records::, and also *note Field Separators::).
-
-
-File: gawk.info, Node: Computed Regexps, Prev: Leftmost Longest, Up: Regexp
-
-3.8 Using Dynamic Regexps
-=========================
-
-The righthand side of a `~' or `!~' operator need not be a regexp
-constant (i.e., a string of characters between slashes). It may be any
-expression. The expression is evaluated and converted to a string if
-necessary; the contents of the string are then used as the regexp. A
-regexp computed in this way is called a "dynamic regexp":
-
- BEGIN { digits_regexp = "[[:digit:]]+" }
- $0 ~ digits_regexp { print }
-
-This sets `digits_regexp' to a regexp that describes one or more digits,
-and tests whether the input record matches this regexp.
-
- NOTE: When using the `~' and `!~' operators, there is a difference
- between a regexp constant enclosed in slashes and a string
- constant enclosed in double quotes. If you are going to use a
- string constant, you have to understand that the string is, in
- essence, scanned _twice_: the first time when `awk' reads your
- program, and the second time when it goes to match the string on
- the lefthand side of the operator with the pattern on the right.
- This is true of any string-valued expression (such as
- `digits_regexp', shown previously), not just string constants.
-
- What difference does it make if the string is scanned twice? The
-answer has to do with escape sequences, and particularly with
-backslashes. To get a backslash into a regular expression inside a
-string, you have to type two backslashes.
-
- For example, `/\*/' is a regexp constant for a literal `*'. Only
-one backslash is needed. To do the same thing with a string, you have
-to type `"\\*"'. The first backslash escapes the second one so that
-the string actually contains the two characters `\' and `*'.
+3.9 Summary
+===========
- Given that you can use both regexp and string constants to describe
-regular expressions, which should you use? The answer is "regexp
-constants," for several reasons:
+ * Regular expressions describe sets of strings to be matched. In
+ `awk', regular expression constants are written enclosed between
+ slashes: `/'...`/'.
- * String constants are more complicated to write and more difficult
- to read. Using regexp constants makes your programs less
- error-prone. Not understanding the difference between the two
- kinds of constants is a common source of errors.
-
- * It is more efficient to use regexp constants. `awk' can note that
- you have supplied a regexp and store it internally in a form that
- makes pattern matching more efficient. When using a string
- constant, `awk' must first convert the string into this internal
- form and then perform the pattern matching.
+ * Regexp constants may be used standalone in patterns and in
+ conditional expressions, or as part of matching expressions using
+ the `~' and `!~' operators.
- * Using regexp constants is better form; it shows clearly that you
- intend a regexp match.
+ * Escape sequences let you represent non-printable characters and
+ also let you represent regexp metacharacters as literal characters
+ to be matched.
- Using `\n' in Bracket Expressions of Dynamic Regexps
+ * Regexp operators provide grouping, alternation and repetition.
- Some commercial versions of `awk' do not allow the newline character
-to be used inside a bracket expression for a dynamic regexp:
+ * Bracket expressions give you a shorthand for specifying sets of
+ characters that can match at a particular point in a regexp.
+ Within bracket expressions, POSIX character classes let you specify
+ certain groups of characters in a locale-independent fashion.
- $ awk '$0 ~ "[ \t\n]"'
- error--> awk: newline in character class [
- error--> ]...
- error--> source line number 1
- error--> context is
- error--> >>> <<<
+ * `gawk''s `IGNORECASE' variable lets you control the case
+ sensitivity of regexp matching. In other `awk' versions, use
+ `tolower()' or `toupper()'.
- But a newline in a regexp constant works with no problem:
+ * Regular expressions match the leftmost longest text in the string
+ being matched. This matters for cases where you need to know the
+ extent of the match, such as for text substitution and when the
+ record separator is a regexp.
- $ awk '$0 ~ /[ \t\n]/'
- here is a sample line
- -| here is a sample line
- Ctrl-d
+ * Matching expressions may use dynamic regexps, that is, string
+ values treated as regular expressions.
- `gawk' does not have this problem, and it isn't likely to occur
-often in practice, but it's worth noting for future reference.

File: gawk.info, Node: Reading Files, Next: Printing, Prev: Regexp, Up: Top
@@ -3885,8 +4076,10 @@ have to be named on the `awk' command line (*note Getline::).
* Getline:: Reading files under explicit program control
using the `getline' function.
* Read Timeout:: Reading input with a timeout.
-* Command line directories:: What happens if you put a directory on the
+* Command-line directories:: What happens if you put a directory on the
command line.
+* Input Summary:: Input summary.
+* Input Exercises:: Exercises.

File: gawk.info, Node: Records, Next: Fields, Up: Reading Files
@@ -3894,16 +4087,27 @@ File: gawk.info, Node: Records, Next: Fields, Up: Reading Files
4.1 How Input Is Split into Records
===================================
-The `awk' utility divides the input for your `awk' program into records
-and fields. `awk' keeps track of the number of records that have been
-read so far from the current input file. This value is stored in a
-built-in variable called `FNR'. It is reset to zero when a new file is
-started. Another built-in variable, `NR', records the total number of
-input records read so far from all data files. It starts at zero, but
-is never automatically reset to zero.
+`awk' divides the input for your program into records and fields. It
+keeps track of the number of records that have been read so far from
+the current input file. This value is stored in a built-in variable
+called `FNR' which is reset to zero when a new file is started.
+Another built-in variable, `NR', records the total number of input
+records read so far from all data files. It starts at zero, but is
+never automatically reset to zero.
+
+* Menu:
+
+* awk split records:: How standard `awk' splits records.
+* gawk split records:: How `gawk' splits records.
- Records are separated by a character called the "record separator".
-By default, the record separator is the newline character. This is why
+
+File: gawk.info, Node: awk split records, Next: gawk split records, Up: Records
+
+4.1.1 Record Splitting With Standard `awk'
+------------------------------------------
+
+Records are separated by a character called the "record separator". By
+default, the record separator is the newline character. This is why
records are, by default, single lines. A different character can be
used for the record separator by assigning the character to the
built-in variable `RS'.
@@ -4023,16 +4227,22 @@ affected.
After the end of the record has been determined, `gawk' sets the
variable `RT' to the text in the input that matched `RS'.
- When using `gawk', the value of `RS' is not limited to a
-one-character string. It can be any regular expression (*note
-Regexp::). (c.e.) In general, each record ends at the next string that
-matches the regular expression; the next record starts at the end of
-the matching string. This general rule is actually at work in the
-usual case, where `RS' contains just a newline: a record ends at the
-beginning of the next matching string (the next newline in the input),
-and the following record starts just after the end of this string (at
-the first character of the following line). The newline, because it
-matches `RS', is not part of either record.
+
+File: gawk.info, Node: gawk split records, Prev: awk split records, Up: Records
+
+4.1.2 Record Splitting With `gawk'
+----------------------------------
+
+When using `gawk', the value of `RS' is not limited to a one-character
+string. It can be any regular expression (*note Regexp::). (c.e.) In
+general, each record ends at the next string that matches the regular
+expression; the next record starts at the end of the matching string.
+This general rule is actually at work in the usual case, where `RS'
+contains just a newline: a record ends at the beginning of the next
+matching string (the next newline in the input), and the following
+record starts just after the end of this string (at the first character
+of the following line). The newline, because it matches `RS', is not
+part of either record.
When `RS' is a single character, `RT' contains the same single
character. However, when `RS' is a regular expression, `RT' contains
@@ -4048,16 +4258,16 @@ trailing whitespace:
$ echo record 1 AAAA record 2 BBBB record 3 |
> gawk 'BEGIN { RS = "\n|( *[[:upper:]]+ *)" }
- > { print "Record =", $0, "and RT =", RT }'
- -| Record = record 1 and RT = AAAA
- -| Record = record 2 and RT = BBBB
- -| Record = record 3 and RT =
- -|
+ > { print "Record =", $0,"and RT = [" RT "]" }'
+ -| Record = record 1 and RT = [ AAAA ]
+ -| Record = record 2 and RT = [ BBBB ]
+ -| Record = record 3 and RT = [
+ -| ]
-The final line of output has an extra blank line. This is because the
-value of `RT' is a newline, and the `print' statement supplies its own
-terminating newline. *Note Simple Sed::, for a more useful example of
-`RS' as a regexp and `RT'.
+The square brackets delineate the contents of `RT', letting you see the
+leading and trailing whitespace. The final value of `RT' `RT' is a
+newline. *Note Simple Sed::, for a more useful example of `RS' as a
+regexp and `RT'.
If you set `RS' to a regular expression that allows optional
trailing text, such as `RS = "abc(XYZ)?"' it is possible, due to
@@ -4095,8 +4305,10 @@ use for `RS' in this case:
BEGIN { RS = "\0" } # whole file becomes one record?
`gawk' in fact accepts this, and uses the NUL character for the
-record separator. However, this usage is _not_ portable to most other
-`awk' implementations.
+record separator. This works for certain special files, such as
+`/proc/environ' on GNU/Linux systems, where the NUL character is in
+fact the record separator. However, this usage is _not_ portable to
+most other `awk' implementations.
Almost all other `awk' implementations(1) store strings internally
as C-style strings. C strings use the NUL character as the string
@@ -4107,10 +4319,9 @@ terminator. In effect, this means that `RS = "\0"' is the same as `RS
as a record separator. However, this is a special case: `mawk' does not
allow embedded NUL characters in strings.
- The best way to treat a whole file as a single record is to simply
-read the file in, one record at a time, concatenating each record onto
-the end of the previous ones.
-
+ *Note Readfile Function::, for an interesting, portable way to read
+whole files. If you are using `gawk', see *note Extension Sample
+Readfile::, for another option.
---------- Footnotes ----------
@@ -4135,7 +4346,7 @@ to these pieces of the record. You don't have to use them--you can
operate on the whole record if you want--but fields are what make
simple `awk' programs so powerful.
- A dollar-sign (`$') is used to refer to a field in an `awk' program,
+ You use a dollar-sign (`$') to refer to a field in an `awk' program,
followed by the number of the field you want. Thus, `$1' refers to the
first field, `$2' to the second, and so on. (Unlike the Unix shells,
the field numbers are not limited to single digits. `$127' is the one
@@ -4158,8 +4369,9 @@ the last one (such as `$8' when the record has only seven fields), you
get the empty string. (If used in a numeric operation, you get zero.)
The use of `$0', which looks like a reference to the "zero-th"
-field, is a special case: it represents the whole input record when you
-are not interested in specific fields. Here are some more examples:
+field, is a special case: it represents the whole input record. Use it
+when you are not interested in specific fields. Here are some more
+examples:
$ awk '$1 ~ /li/ { print $0 }' mail-list
-| Amelia 555-5553 amelia.zodiacusque@gmail.com F
@@ -4191,11 +4403,11 @@ File: gawk.info, Node: Nonconstant Fields, Next: Changing Fields, Prev: Field
4.3 Nonconstant Field Numbers
=============================
-The number of a field does not need to be a constant. Any expression in
-the `awk' language can be used after a `$' to refer to a field. The
-value of the expression specifies the field number. If the value is a
-string, rather than a number, it is converted to a number. Consider
-this example:
+A field number need not be a constant. Any expression in the `awk'
+language can be used after a `$' to refer to a field. The value of the
+expression specifies the field number. If the value is a string,
+rather than a number, it is converted to a number. Consider this
+example:
awk '{ print $NR }'
@@ -4212,7 +4424,7 @@ another example of using expressions as field numbers:
number of the field to print. The `*' sign represents multiplication,
so the expression `2*2' evaluates to four. The parentheses are used so
that the multiplication is done before the `$' operation; they are
-necessary whenever there is a binary operator in the field-number
+necessary whenever there is a binary operator(1) in the field-number
expression. This example, then, prints the type of relationship (the
fourth field) for every line of the file `mail-list'. (All of the
`awk' operators are listed, in order of decreasing precedence, in *note
@@ -4231,6 +4443,12 @@ Variables::). The expression `$NF' is not a special feature--it is the
direct consequence of evaluating `NF' and using its value as a field
number.
+ ---------- Footnotes ----------
+
+ (1) A "binary operator", such as `*' for multiplication, is one that
+takes two operands. The distinction is required, since `awk' also has
+unary (one-operand) and ternary (three-operand) operators.
+

File: gawk.info, Node: Changing Fields, Next: Field Separators, Prev: Nonconstant Fields, Up: Reading Files
@@ -4256,11 +4474,11 @@ three minus ten: `$3 - 10'. (*Note Arithmetic Ops::.) Then it prints
the original and new values for field three. (Someone in the warehouse
made a consistent mistake while inventorying the red boxes.)
- For this to work, the text in field `$3' must make sense as a
-number; the string of characters must be converted to a number for the
-computer to do arithmetic on it. The number resulting from the
-subtraction is converted back to a string of characters that then
-becomes field three. *Note Conversion::.
+ For this to work, the text in `$3' must make sense as a number; the
+string of characters must be converted to a number for the computer to
+do arithmetic on it. The number resulting from the subtraction is
+converted back to a string of characters that then becomes field three.
+*Note Conversion::.
When the value of a field is changed (as perceived by `awk'), the
text of the input record is recalculated to contain the new field where
@@ -4325,7 +4543,7 @@ even when you assign the empty string to a field. For example:
-| a::c:d
-| 4
-The field is still there; it just has an empty value, denoted by the
+The field is still there; it just has an empty value, delimited by the
two colons between `a' and `c'. This example shows what happens if you
create a new field:
@@ -4391,7 +4609,7 @@ File: gawk.info, Node: Field Separators, Next: Constant Size, Prev: Changing
* Default Field Splitting:: How fields are normally separated.
* Regexp Field Splitting:: Using regexps as the field separator.
* Single Character Fields:: Making each character a separate field.
-* Command Line Field Separator:: Setting `FS' from the command-line.
+* Command Line Field Separator:: Setting `FS' from the command line.
* Full Line Fields:: Making the full line be a single field.
* Field Splitting Summary:: Some final points and a summary table.
@@ -4511,7 +4729,7 @@ letter):
> { print $2 }'
-| a
-In this case, the first field is "null" or empty.
+In this case, the first field is null, or empty.
The stripping of leading and trailing whitespace also comes into
play whenever `$0' is recomputed. For instance, study this pipeline:
@@ -4535,9 +4753,8 @@ field separator a new string? It turns out that different `awk'
versions answer this question differently, and you should not rely on
any specific behavior in your programs. (d.c.)
- As a point of information, Brian Kernighan's `awk' allows `^' to
-match only at the beginning of the record. `gawk' also works this way.
-For example:
+ As a point of information, BWK `awk' allows `^' to match only at the
+beginning of the record. `gawk' also works this way. For example:
$ echo 'xxAA xxBxx C' |
> gawk -F '(^x+)|( +)' '{ for (i = 1; i <= NF; i++)
@@ -4610,7 +4827,8 @@ argument to `-F' is `t', then `FS' is set to the TAB character. If you
type `-F\t' at the shell, without any quotes, the `\' gets deleted, so
`awk' figures that you really want your fields to be separated with
TABs and not `t's. Use `-v FS="t"' or `-F"[t]"' on the command line if
-you really do want to separate your fields with `t's.
+you really do want to separate your fields with `t's. Use `-F '\t''
+when not in compatibility mode to specify that TABs separate fields.
As an example, let's use an `awk' program file called `edu.awk' that
contains the pattern `/edu/' and the action `print $1':
@@ -4730,8 +4948,8 @@ which usually prints:
root
-on an incorrect implementation of `awk', while `gawk' prints something
-like:
+on an incorrect implementation of `awk', while `gawk' prints the full
+first line of the file, something like:
root:nSijPlPhZZwgE:0:0:Root:/:
@@ -4811,7 +5029,7 @@ calculated idle time:
BEGIN { FIELDWIDTHS = "9 6 10 6 7 7 35" }
NR > 2 {
idle = $4
- sub(/^ */, "", idle) # strip leading spaces
+ sub(/^ +/, "", idle) # strip leading spaces
if (idle == "")
idle = 0
if (idle ~ /:/) {
@@ -4942,15 +5160,17 @@ would be to remove the quotes when they occur, with something like this:
As with `FS', the `IGNORECASE' variable (*note User-modified::)
affects field splitting with `FPAT'.
- Similar to `FIELDWIDTHS', the value of `PROCINFO["FS"]' will be
-`"FPAT"' if content-based field splitting is being used.
+ Assigning a value to `FPAT' overrides field splitting with `FS' and
+with `FIELDWIDTHS'. Similar to `FIELDWIDTHS', the value of
+`PROCINFO["FS"]' will be `"FPAT"' if content-based field splitting is
+being used.
NOTE: Some programs export CSV data that contains embedded
newlines between the double quotes. `gawk' provides no way to
deal with this. Since there is no formal specification for CSV
data, there isn't much more to be done; the `FPAT' mechanism
provides an elegant solution for the majority of cases, and the
- `gawk' maintainer is satisfied with that.
+ `gawk' developers are satisfied with that.
As written, the regexp used for `FPAT' requires that each field have
a least one character. A straightforward modification (changing
@@ -4961,6 +5181,11 @@ changed the first `+' to `*') allows fields to be empty:
Finally, the `patsplit()' function makes the same functionality
available for splitting regular strings (*note String Functions::).
+ To recap, `gawk' provides three independent methods to split input
+records into fields. `gawk' uses whichever mechanism was last chosen
+based on which of the three variables--`FS', `FIELDWIDTHS', and
+`FPAT'--was last assigned to.
+
---------- Footnotes ----------
(1) At least, we don't know of one.
@@ -5000,8 +5225,8 @@ doesn't start until the first nonblank line that follows--no matter how
many blank lines appear in a row, they are considered one record
separator.
- There is an important difference between `RS = ""' and `RS =
-"\n\n+"'. In the first case, leading newlines in the input data file
+ However, there is an important difference between `RS = ""' and `RS
+= "\n\n+"'. In the first case, leading newlines in the input data file
are ignored, and if a file ends without extra blank lines after the
last record, the final newline is removed from the record. In the
second case, this special processing is not done. (d.c.)
@@ -5111,7 +5336,7 @@ File: gawk.info, Node: Getline, Next: Read Timeout, Prev: Multiple Line, Up:
=================================
So far we have been getting our input data from `awk''s main input
-stream--either the standard input (usually your terminal, sometimes the
+stream--either the standard input (usually your keyboard, sometimes the
output from another program) or from the files specified on the command
line. The `awk' language has a special built-in command called
`getline' that can be used to read input under your explicit control.
@@ -5123,8 +5348,8 @@ yet. Therefore, come back and study the `getline' command _after_ you
have reviewed the rest of this Info file and have a good knowledge of
how `awk' works.
- The `getline' command returns one if it finds a record and zero if
-it encounters the end of the file. If there is some error in getting a
+ The `getline' command returns 1 if it finds a record and 0 if it
+encounters the end of the file. If there is some error in getting a
record, such as a file that cannot be opened, then `getline' returns
-1. In this case, `gawk' sets the variable `ERRNO' to a string
describing the error that occurred.
@@ -5163,35 +5388,47 @@ input record and split it up into fields. This is useful if you've
finished processing the current record, but want to do some special
processing on the next record _right now_. For example:
+ # Remove text between /* and */, inclusive
{
- if ((t = index($0, "/*")) != 0) {
- # value of `tmp' will be "" if t is 1
- tmp = substr($0, 1, t - 1)
- u = index(substr($0, t + 2), "*/")
- offset = t + 2
- while (u == 0) {
- if (getline <= 0) {
+ if ((i = index($0, "/*")) != 0) {
+ out = substr($0, 1, i - 1) # leading part of the string
+ rest = substr($0, i + 2) # ... */ ...
+ j = index(rest, "*/") # is */ in trailing part?
+ if (j > 0) {
+ rest = substr(rest, j + 2) # remove comment
+ } else {
+ while (j == 0) {
+ # get more text
+ if (getline <= 0) {
m = "unexpected EOF or error"
m = (m ": " ERRNO)
print m > "/dev/stderr"
exit
- }
- u = index($0, "*/")
- offset = 0
- }
- # substr() expression will be "" if */
- # occurred at end of line
- $0 = tmp substr($0, offset + u + 2)
- }
- print $0
+ }
+ # build up the line using string concatenation
+ rest = rest $0
+ j = index(rest, "*/") # is */ in trailing part?
+ if (j != 0) {
+ rest = substr(rest, j + 2)
+ break
+ }
+ }
+ }
+ # build up the output line using string concatenation
+ $0 = out rest
+ }
+ print $0
}
This `awk' program deletes C-style comments (`/* ... */') from the
-input. By replacing the `print $0' with other statements, you could
-perform more complicated processing on the decommented input, such as
-searching for matches of a regular expression. (This program has a
-subtle problem--it does not work if one comment ends and another begins
-on the same line.)
+input. It uses a number of features we haven't covered yet, including
+string concatenation (*note Concatenation::) and the `index()' and
+`substr()' built-in functions (*note String Functions::). By replacing
+the `print $0' with other statements, you could perform more
+complicated processing on the decommented input, such as searching for
+matches of a regular expression. (This program has a subtle
+problem--it does not work if one comment ends and another begins on the
+same line.)
This form of the `getline' command sets `NF', `NR', `FNR', `RT', and
the value of `$0'.
@@ -5273,9 +5510,9 @@ are changed, resulting in a new value of `NF'. `RT' is also set.
According to POSIX, `getline < EXPRESSION' is ambiguous if
EXPRESSION contains unparenthesized operators other than `$'; for
example, `getline < dir "/" file' is ambiguous because the
-concatenation operator is not parenthesized. You should write it as
-`getline < (dir "/" file)' if you want your program to be portable to
-all `awk' implementations.
+concatenation operator (not discussed yet; *note Concatenation::) is
+not parenthesized. You should write it as `getline < (dir "/" file)' if
+you want your program to be portable to all `awk' implementations.

File: gawk.info, Node: Getline/Variable/File, Next: Getline/Pipe, Prev: Getline/File, Up: Getline
@@ -5386,10 +5623,10 @@ all `awk' implementations.
NOTE: Unfortunately, `gawk' has not been consistent in its
treatment of a construct like `"echo " "date" | getline'. Most
versions, including the current version, treat it at as `("echo "
- "date") | getline'. (This how Brian Kernighan's `awk' behaves.)
- Some versions changed and treated it as `"echo " ("date" |
- getline)'. (This is how `mawk' behaves.) In short, _always_ use
- explicit parentheses, and then you won't have to worry.
+ "date") | getline'. (This how BWK `awk' behaves.) Some versions
+ changed and treated it as `"echo " ("date" | getline)'. (This is
+ how `mawk' behaves.) In short, _always_ use explicit parentheses,
+ and then you won't have to worry.

File: gawk.info, Node: Getline/Variable/Pipe, Next: Getline/Coprocess, Prev: Getline/Pipe, Up: Getline
@@ -5480,10 +5717,10 @@ in mind:
testing the new record against every pattern. However, the new
record is tested against any subsequent rules.
- * Many `awk' implementations limit the number of pipelines that an
- `awk' program may have open to just one. In `gawk', there is no
- such limit. You can open as many pipelines (and coprocesses) as
- the underlying operating system permits.
+ * Some very old `awk' implementations limit the number of pipelines
+ that an `awk' program may have open to just one. In `gawk', there
+ is no such limit. You can open as many pipelines (and
+ coprocesses) as the underlying operating system permits.
* An interesting side effect occurs if you use `getline' without a
redirection inside a `BEGIN' rule. Because an unredirected
@@ -5522,9 +5759,9 @@ in mind:
file is encountered, before the element in `a' is assigned?
`gawk' treats `getline' like a function call, and evaluates the
- expression `a[++c]' before attempting to read from `f'. Other
- versions of `awk' only evaluate the expression once they know that
- there is a string value to be assigned. Caveat Emptor.
+ expression `a[++c]' before attempting to read from `f'. However,
+ some versions of `awk' only evaluate the expression once they know
+ that there is a string value to be assigned. Caveat Emptor.

File: gawk.info, Node: Getline Summary, Prev: Getline Notes, Up: Getline
@@ -5555,15 +5792,17 @@ VAR
Table 4.1: `getline' Variants and What They Set

-File: gawk.info, Node: Read Timeout, Next: Command line directories, Prev: Getline, Up: Reading Files
+File: gawk.info, Node: Read Timeout, Next: Command-line directories, Prev: Getline, Up: Reading Files
4.10 Reading Input With A Timeout
=================================
-You may specify a timeout in milliseconds for reading input from a
-terminal, pipe or two-way communication including, TCP/IP sockets. This
-can be done on a per input, command or connection basis, by setting a
-special element in the `PROCINFO' array:
+This minor node describes a feature that is specific to `gawk'.
+
+ You may specify a timeout in milliseconds for reading input from the
+keyboard, a pipe, or two-way communication, including TCP/IP sockets.
+This can be done on a per input, command or connection basis, by
+setting a special element in the `PROCINFO' (*note Auto-set::) array:
PROCINFO["input_name", "READ_TIMEOUT"] = TIMEOUT IN MILLISECONDS
@@ -5579,19 +5818,19 @@ from the server after a certain amount of time:
else if (ERRNO != "")
print ERRNO
- Here is how to read interactively from the terminal(1) without
-waiting for more than five seconds:
+ Here is how to read interactively from the user(1) without waiting
+for more than five seconds:
PROCINFO["/dev/stdin", "READ_TIMEOUT"] = 5000
while ((getline < "/dev/stdin") > 0)
print $0
- `gawk' will terminate the read operation if input does not arrive
-after waiting for the timeout period, return failure and set the
-`ERRNO' variable to an appropriate string value. A negative or zero
-value for the timeout is the same as specifying no timeout at all.
+ `gawk' terminates the read operation if input does not arrive after
+waiting for the timeout period, returns failure and sets the `ERRNO'
+variable to an appropriate string value. A negative or zero value for
+the timeout is the same as specifying no timeout at all.
- A timeout can also be set for reading from the terminal in the
+ A timeout can also be set for reading from the keyboard in the
implicit loop that reads input records and matches them against
patterns, like so:
@@ -5644,23 +5883,118 @@ writing.
---------- Footnotes ----------
- (1) This assumes that standard input is the keyboard
+ (1) This assumes that standard input is the keyboard.

-File: gawk.info, Node: Command line directories, Prev: Read Timeout, Up: Reading Files
+File: gawk.info, Node: Command-line directories, Next: Input Summary, Prev: Read Timeout, Up: Reading Files
4.11 Directories On The Command Line
====================================
According to the POSIX standard, files named on the `awk' command line
-must be text files. It is a fatal error if they are not. Most
-versions of `awk' treat a directory on the command line as a fatal
-error.
+must be text files; it is a fatal error if they are not. Most versions
+of `awk' treat a directory on the command line as a fatal error.
By default, `gawk' produces a warning for a directory on the command
-line, but otherwise ignores it. If either of the `--posix' or
-`--traditional' options is given, then `gawk' reverts to treating a
-directory on the command line as a fatal error.
+line, but otherwise ignores it. This makes it easier to use shell
+wildcards with your `awk' program:
+
+ $ gawk -f whizprog.awk * Directories could kill this progam
+
+ If either of the `--posix' or `--traditional' options is given, then
+`gawk' reverts to treating a directory on the command line as a fatal
+error.
+
+ *Note Extension Sample Readdir::, for a way to treat directories as
+usable data from an `awk' program.
+
+
+File: gawk.info, Node: Input Summary, Next: Input Exercises, Prev: Command-line directories, Up: Reading Files
+
+4.12 Summary
+============
+
+ * Input is split into records based on the value of `RS'. The
+ possibilities are as follows:
+
+ Value of `RS' Records are split on `awk' / `gawk'
+ ----------------------------------------------------------------------
+ Any single That character `awk'
+ character
+ The empty string Runs of two or more `awk'
+ (`""') newlines
+ A regexp Text that matches the `gawk'
+ regexp
+
+ * `gawk' sets `RT' to the text matched by `RS'.
+
+ * After splitting the input into records, `awk' further splits the
+ record into individual fields, named `$1', `$2' and so on. `$0' is
+ the whole record, and `NF' indicates how many fields there are.
+ The default way to split fields is between whitespace characters.
+
+ * Fields may be referenced using a variable, as in `$NF'. Fields
+ may also be assigned values, which causes the value of `$0' to be
+ recomputed when it is later referenced. Assigning to a field with
+ a number greater than `NF' creates the field and rebuilds the
+ record, using `OFS' to separate the fields. Incrementing `NF'
+ does the same thing. Decrementing `NF' throws away fields and
+ rebuilds the record.
+
+ * Field splitting is more complicated than record splitting.
+
+ Field separator value Fields are split ... `awk' /
+ `gawk'
+ ----------------------------------------------------------------------
+ `FS == " "' On runs of whitespace `awk'
+ `FS == ANY SINGLE On that character `awk'
+ CHARACTER'
+ `FS == REGEXP' On text matching the `awk'
+ regexp
+ `FS == ""' Each individual character `gawk'
+ is a separate field
+ `FIELDWIDTHS == LIST OF Based on character `gawk'
+ COLUMNS' position
+ `FPAT == REGEXP' On text around text `gawk'
+ matching the regexp
+
+ Using `FS = "\n"' causes the entire record to be a single field
+ (assuming that newlines separate records).
+
+ * `FS' may be set from the command line using the `-F' option. This
+ can also be done using command-line variable assignment.
+
+ * `PROCINFO["FS"]' can be used to see how fields are being split.
+
+ * Use `getline' in its various forms to read additional records,
+ from the default input stream, from a file, or from a pipe or
+ co-process.
+
+ * Use `PROCINFO[FILE, "READ_TIMEOUT"]' to cause reads to timeout for
+ FILE.
+
+ * Directories on the command line are fatal for standard `awk';
+ `gawk' ignores them if not in POSIX mode.
+
+
+
+File: gawk.info, Node: Input Exercises, Prev: Input Summary, Up: Reading Files
+
+4.13 Exercises
+==============
+
+ 1. Using the `FIELDWIDTHS' variable (*note Constant Size::), write a
+ program to read election data, where each record represents one
+ voter's votes. Come up with a way to define which columns are
+ associated with each ballot item, and print the total votes,
+ including abstentions, for each item.
+
+ 2. *note Plain Getline::, presented a program to remove C-style
+ comments (`/* ... */') from the input. That program does not work
+ if one comment ends on one line and another one starts later on
+ the same line. That can be fixed by making one simple change.
+ What is it?
+

File: gawk.info, Node: Printing, Next: Expressions, Prev: Reading Files, Up: Top
@@ -5696,6 +6030,8 @@ function.
`gawk' allows access to inherited file
descriptors.
* Close Files And Pipes:: Closing Input and Output Files and Pipes.
+* Output Summary:: Output summary.
+* Output Exercises:: Exercises.

File: gawk.info, Node: Print, Next: Print Examples, Up: Printing
@@ -5704,9 +6040,9 @@ File: gawk.info, Node: Print, Next: Print Examples, Up: Printing
=========================
The `print' statement is used for producing output with simple,
-standardized formatting. Specify only the strings or numbers to print,
-in a list separated by commas. They are output, separated by single
-spaces, followed by a newline. The statement looks like this:
+standardized formatting. You specify only the strings or numbers to
+print, in a list separated by commas. They are output, separated by
+single spaces, followed by a newline. The statement looks like this:
print ITEM1, ITEM2, ...
@@ -5727,6 +6063,10 @@ you forget to use the double-quote characters, your text is taken as an
`awk' expression, and you will probably get an error. Keep in mind
that a space is printed between any two items.
+ Note that the `print' statement is a statement and not an
+expression--you can't use it the pattern part of a pattern-action
+statement, for example.
+

File: gawk.info, Node: Print Examples, Next: Output Separators, Prev: Print, Up: Printing
@@ -5773,8 +6113,8 @@ Here is the same program, without the comma:
To someone unfamiliar with the `inventory-shipped' file, neither
example's output makes much sense. A heading line at the beginning
would make it clearer. Let's add some headings to our table of months
-(`$1') and green crates shipped (`$2'). We do this using the `BEGIN'
-pattern (*note BEGIN/END::) so that the headings are only printed once:
+(`$1') and green crates shipped (`$2'). We do this using a `BEGIN'
+rule (*note BEGIN/END::) so that the headings are only printed once:
awk 'BEGIN { print "Month Crates"
print "----- ------" }
@@ -5981,17 +6321,19 @@ width. Here is a list of the format-control letters:
valid wide character and then to print the multibyte encoding
of that character. Similarly, when printing a numeric value,
`gawk' allows the value to be within the numeric range of
- values that can be held in a wide character.
+ values that can be held in a wide character. If the
+ conversion to multibyte encoding fails, `gawk' uses the low
+ eight bits of the value as the character to print.
Other `awk' versions generally restrict themselves to printing
the first byte of a string or to numeric values within the
range of a single byte (0-255).
-`%d, %i'
+`%d', `%i'
Print a decimal integer. The two control letters are equivalent.
(The `%i' specification is for compatibility with ISO C.)
-`%e, %E'
+`%e', `%E'
Print a number in scientific (exponential) notation; for example:
printf "%4.3e\n", 1950
@@ -6013,7 +6355,8 @@ width. Here is a list of the format-control letters:
On systems supporting IEEE 754 floating point format, values
representing negative infinity are formatted as `-inf' or
`-infinity', and positive infinity as `inf' and `infinity'. The
- special "not a number" value formats as `-nan' or `nan'.
+ special "not a number" value formats as `-nan' or `nan' (*note
+ Math Definitions::).
`%F'
Like `%f' but the infinity and "not a number" values are spelled
@@ -6022,7 +6365,7 @@ width. Here is a list of the format-control letters:
The `%F' format is a POSIX extension to ISO C; not all systems
support it. On those that don't, `gawk' uses `%f' instead.
-`%g, %G'
+`%g', `%G'
Print a number in either scientific notation or in floating-point
notation, whichever uses fewer characters; if the result is
printed in scientific notation, `%G' uses `E' instead of `e'.
@@ -6038,7 +6381,7 @@ width. Here is a list of the format-control letters:
use, because all numbers in `awk' are floating-point; it is
provided primarily for compatibility with C.)
-`%x, %X'
+`%x', `%X'
Print an unsigned hexadecimal integer; `%X' uses the letters `A'
through `F' instead of `a' through `f' (*note
Nondecimal-numbers::).
@@ -6213,11 +6556,12 @@ string, like so:
This is not particularly easy to read but it does work.
- C programmers may be used to supplying additional `l', `L', and `h'
-modifiers in `printf' format strings. These are not valid in `awk'.
-Most `awk' implementations silently ignore them. If `--lint' is
-provided on the command line (*note Options::), `gawk' warns about
-their use. If `--posix' is supplied, their use is a fatal error.
+ C programmers may be used to supplying additional modifiers (`h',
+`j', `l', `L', `t', and `z') in `printf' format strings. These are not
+valid in `awk'. Most `awk' implementations silently ignore them. If
+`--lint' is provided on the command line (*note Options::), `gawk'
+warns about their use. If `--posix' is supplied, their use is a fatal
+error.

File: gawk.info, Node: Printf Examples, Prev: Format Modifiers, Up: Printf
@@ -6258,7 +6602,7 @@ they are last on their lines. They don't need to have spaces after
them.
The table could be made to look even nicer by adding headings to the
-tops of the columns. This is done using the `BEGIN' pattern (*note
+tops of the columns. This is done using a `BEGIN' rule (*note
BEGIN/END::) so that the headers are only printed once, at the
beginning of the `awk' program:
@@ -6285,11 +6629,6 @@ be emphasized by storing it in a variable, like this:
printf format, "----", "------" }
{ printf format, $1, $2 }' mail-list
- At this point, it would be a worthwhile exercise to use the `printf'
-statement to line up the headings and table data for the
-`inventory-shipped' example that was covered earlier in the minor node
-on the `print' statement (*note Print::).
-

File: gawk.info, Node: Redirection, Next: Special Files, Prev: Printf, Up: Printing
@@ -6309,7 +6648,7 @@ commands, except that they are written inside the `awk' program.
There are four forms of output redirection: output to a file, output
appended to a file, output through a pipe to another command, and output
-to a coprocess. They are all shown for the `print' statement, but they
+to a coprocess. We show them all for the `print' statement, but they
work identically for `printf':
`print ITEMS > OUTPUT-FILE'
@@ -6390,7 +6729,7 @@ work identically for `printf':
FILE or COMMAND--it is not necessary to always use a string
constant. Using a variable is generally a good idea, because (if
you mean to refer to that same file or command) `awk' requires
- that the string value be spelled identically every time.
+ that the string value be written identically every time.
`print ITEMS |& COMMAND'
This redirection prints the items to the input of COMMAND. The
@@ -6486,9 +6825,9 @@ message to standard error in an `awk' program is as follows:
This works by opening a pipeline to a shell command that can access the
standard error stream that it inherits from the `awk' process. This is
-far from elegant, and it is also inefficient, because it requires a
-separate process. So people writing `awk' programs often don't do
-this. Instead, they send the error messages to the screen, like this:
+far from elegant, and it also requires a separate process. So people
+writing `awk' programs often don't do this. Instead, they send the
+error messages to the screen, like this:
print "Serious error detected!" > "/dev/tty"
@@ -6502,7 +6841,7 @@ run from a background job, it may not have a terminal at all. Then
opening `/dev/tty' fails.
`gawk' provides special file names for accessing the three standard
-streams. (c.e.). It also provides syntax for accessing any other
+streams. (c.e.) It also provides syntax for accessing any other
inherited open files. If the file name matches one of these special
names when `gawk' redirects input or output, then it directly uses the
stream that the file name stands for. These special file names work
@@ -6587,7 +6926,7 @@ names that `gawk' provides:
behavior.

-File: gawk.info, Node: Close Files And Pipes, Prev: Special Files, Up: Printing
+File: gawk.info, Node: Close Files And Pipes, Next: Output Summary, Prev: Special Files, Up: Printing
5.8 Closing Input and Output Redirections
=========================================
@@ -6695,14 +7034,15 @@ end-of-file return status from `getline'), the child process is not
terminated;(1) more importantly, the file descriptor for the pipe is
not closed and released until `close()' is called or `awk' exits.
- `close()' will silently do nothing if given an argument that does
-not represent a file, pipe or coprocess that was opened with a
-redirection.
+ `close()' silently does nothing if given an argument that does not
+represent a file, pipe or coprocess that was opened with a redirection.
+In such a case, it returns a negative value, indicating an error. In
+addition, `gawk' sets `ERRNO' to a string indicating the error.
Note also that `close(FILENAME)' has no "magic" effects on the
implicit loop that reads through the files named on the command line.
-It is, more likely, a close of a file that was never opened, so `awk'
-silently does nothing.
+It is, more likely, a close of a file that was never opened with a
+redirection, so `awk' silently does nothing.
When using the `|&' operator to communicate with a coprocess, it is
occasionally useful to be able to close one end of the two-way pipe
@@ -6716,9 +7056,9 @@ I/O::, which discusses it in more detail and gives an example.
Using `close()''s Return Value
- In many versions of Unix `awk', the `close()' function is actually a
-statement. It is a syntax error to try and use the return value from
-`close()': (d.c.)
+ In many older versions of Unix `awk', the `close()' function is
+actually a statement. It is a syntax error to try and use the return
+value from `close()': (d.c.)
command = "..."
command | getline info
@@ -6753,6 +7093,56 @@ call. See the system manual pages for information on how to decode this
value.

+File: gawk.info, Node: Output Summary, Next: Output Exercises, Prev: Close Files And Pipes, Up: Printing
+
+5.9 Summary
+===========
+
+ * The `print' statement prints comma-separated expressions. Each
+ expression is separated by the value of `OFS' and terminated by
+ the value of `ORS'. `OFMT' provides the conversion format for
+ numeric values for the `print' statement.
+
+ * The `printf' statement provides finer-grained control over output,
+ with format control letters for different data types and various
+ flags that modify the behavior of the format control letters.
+
+ * Output from both `print' and `printf' may be redirected to files,
+ pipes, and co-processes.
+
+ * `gawk' provides special file names for access to standard input,
+ output and error, and for network communications.
+
+ * Use `close()' to close open file, pipe and co-process redirections.
+ For co-processes, it is possible to close only one direction of the
+ communications.
+
+
+
+File: gawk.info, Node: Output Exercises, Prev: Output Summary, Up: Printing
+
+5.10 Exercises
+==============
+
+ 1. Rewrite the program:
+
+ awk 'BEGIN { print "Month Crates"
+ print "----- ------" }
+ { print $1, " ", $2 }' inventory-shipped
+
+ from *note Output Separators::, by using a new value of `OFS'.
+
+ 2. Use the `printf' statement to line up the headings and table data
+ for the `inventory-shipped' example that was covered in *note
+ Print::.
+
+ 3. What happens if you forget the double quotes when redirecting
+ output, as follows:
+
+ BEGIN { print "Serious error detected!" > /dev/stderr }
+
+
+
File: gawk.info, Node: Expressions, Next: Patterns and Actions, Prev: Printing, Up: Top
6 Expressions
@@ -6778,6 +7168,7 @@ operators.
* Function Calls:: A function call is an expression.
* Precedence:: How various operators nest.
* Locales:: How the locale affects things.
+* Expressions Summary:: Expressions summary.

File: gawk.info, Node: Values, Next: All Operators, Up: Expressions
@@ -6847,7 +7238,8 @@ codes.
(1) The internal representation of all numbers, including integers,
uses double precision floating-point numbers. On most modern systems,
-these are in IEEE 754 standard format.
+these are in IEEE 754 standard format. *Note Arbitrary Precision
+Arithmetic::, for much more information.

File: gawk.info, Node: Nondecimal-numbers, Next: Regexp Constants, Prev: Scalar Constants, Up: Constants
@@ -6932,8 +7324,9 @@ File: gawk.info, Node: Regexp Constants, Prev: Nondecimal-numbers, Up: Consta
A regexp constant is a regular expression description enclosed in
slashes, such as `/^beginning and end$/'. Most regexps used in `awk'
programs are constant, but the `~' and `!~' matching operators can also
-match computed or dynamic regexps (which are just ordinary strings or
-variables that contain a regexp).
+match computed or dynamic regexps (which are typically just ordinary
+strings or variables that contain a regexp, but could be a more complex
+expression).

File: gawk.info, Node: Using Constant Regexps, Next: Variables, Prev: Constants, Up: Values
@@ -6980,8 +7373,8 @@ the contents of the current input record.
Constant regular expressions are also used as the first argument for
the `gensub()', `sub()', and `gsub()' functions, as the second argument
-of the `match()' function, and as the third argument of the
-`patsplit()' function (*note String Functions::). Modern
+of the `match()' function, and as the third argument of the `split()'
+and `patsplit()' functions (*note String Functions::). Modern
implementations of `awk', including `gawk', allow the third argument of
`split()' to be a regexp constant, but some older implementations do
not. (d.c.) This can lead to confusion when attempting to use regexp
@@ -7005,12 +7398,12 @@ For example:
}
In this example, the programmer wants to pass a regexp constant to
-the user-defined function `mysub', which in turn passes it on to either
-`sub()' or `gsub()'. However, what really happens is that the `pat'
-parameter is either one or zero, depending upon whether or not `$0'
-matches `/hi/'. `gawk' issues a warning when it sees a regexp constant
-used as a parameter to a user-defined function, since passing a truth
-value in this way is probably not what was intended.
+the user-defined function `mysub()', which in turn passes it on to
+either `sub()' or `gsub()'. However, what really happens is that the
+`pat' parameter is either one or zero, depending upon whether or not
+`$0' matches `/hi/'. `gawk' issues a warning when it sees a regexp
+constant used as a parameter to a user-defined function, since passing
+a truth value in this way is probably not what was intended.

File: gawk.info, Node: Variables, Next: Conversion, Prev: Using Constant Regexps, Up: Values
@@ -7026,7 +7419,7 @@ on the `awk' command line.
* Menu:
* Using Variables:: Using variables in your programs.
-* Assignment Options:: Setting variables on the command-line and a
+* Assignment Options:: Setting variables on the command line and a
summary of command-line syntax. This is an
advanced method of input.
@@ -7039,8 +7432,11 @@ File: gawk.info, Node: Using Variables, Next: Assignment Options, Up: Variabl
Variables let you give names to values and refer to them later.
Variables have already been used in many of the examples. The name of
a variable must be a sequence of letters, digits, or underscores, and
-it may not begin with a digit. Case is significant in variable names;
-`a' and `A' are distinct variables.
+it may not begin with a digit. Here, a "letter" is any one of the 52
+upper- and lowercase English letters. Other characters that may be
+defined as letters in non-English locales are not valid in variable
+names. Case is significant in variable names; `a' and `A' are distinct
+variables.
A variable name is a valid expression by itself; it represents the
variable's current value. Variables are given new values with
@@ -7119,6 +7515,22 @@ File: gawk.info, Node: Conversion, Prev: Variables, Up: Values
6.1.4 Conversion of Strings and Numbers
---------------------------------------
+Number to string and string to number conversion are generally
+straightforward. There can be subtleties to be aware of; this minor
+node discusses this important facet of `awk'.
+
+* Menu:
+
+* Strings And Numbers:: How `awk' Converts Between Strings And
+ Numbers.
+* Locale influences conversions:: How the locale may affect conversions.
+
+
+File: gawk.info, Node: Strings And Numbers, Next: Locale influences conversions, Up: Conversion
+
+6.1.4.1 How `awk' Converts Between Strings And Numbers
+......................................................
+
Strings are converted to numbers and numbers are converted to strings,
if the context of the `awk' program demands it. For example, if the
value of either `foo' or `bar' in the expression `foo + bar' happens to
@@ -7168,35 +7580,47 @@ value of `CONVFMT' may be. Given the following code fragment:
`b' has the value `"12"', not `"12.00"'. (d.c.)
+ Pre-POSIX `awk' Used `OFMT' For String Conversion
+
Prior to the POSIX standard, `awk' used the value of `OFMT' for
converting numbers to strings. `OFMT' specifies the output format to
use when printing numbers with `print'. `CONVFMT' was introduced in
order to separate the semantics of conversion from the semantics of
printing. Both `CONVFMT' and `OFMT' have the same default value:
`"%.6g"'. In the vast majority of cases, old `awk' programs do not
-change their behavior. However, these semantics for `OFMT' are
-something to keep in mind if you must port your new-style program to
-older implementations of `awk'. We recommend that instead of changing
-your programs, just port `gawk' itself. *Note Print::, for more
-information on the `print' statement.
-
- And, once again, where you are can matter when it comes to converting
-between numbers and strings. In *note Locales::, we mentioned that the
-local character set and language (the locale) can affect how `gawk'
-matches characters. The locale also affects numeric formats. In
-particular, for `awk' programs, it affects the decimal point character.
-The `"C"' locale, and most English-language locales, use the period
-character (`.') as the decimal point. However, many (if not most)
-European and non-English locales use the comma (`,') as the decimal
-point character.
+change their behavior. *Note Print::, for more information on the
+`print' statement.
+
+ ---------- Footnotes ----------
+
+ (1) Pathological cases can require up to 752 digits (!), but we
+doubt that you need to worry about this.
+
+
+File: gawk.info, Node: Locale influences conversions, Prev: Strings And Numbers, Up: Conversion
+
+6.1.4.2 Locales Can Influence Conversion
+........................................
+
+Where you are can matter when it comes to converting between numbers and
+strings. The local character set and language--the "locale"--can
+affect numeric formats. In particular, for `awk' programs, it affects
+the decimal point character and the thousands-separator character. The
+`"C"' locale, and most English-language locales, use the period
+character (`.') as the decimal point and don't have a thousands
+separator. However, many (if not most) European and non-English
+locales use the comma (`,') as the decimal point character. European
+locales often use either a space or a period as the thousands
+separator, if they have one.
The POSIX standard says that `awk' always uses the period as the
decimal point when reading the `awk' program source code, and for
command-line variable assignments (*note Other Arguments::). However,
when interpreting input data, for `print' and `printf' output, and for
number to string conversion, the local decimal point character is used.
-(d.c.) Here are some examples indicating the difference in behavior,
-on a GNU/Linux system:
+(d.c.) In all cases, numbers in source code and in input data cannot
+have a thousands separator. Here are some examples indicating the
+difference in behavior, on a GNU/Linux system:
$ export POSIXLY_CORRECT=1 Force POSIX behavior
$ gawk 'BEGIN { printf "%g\n", 3.1415927 }'
@@ -7241,11 +7665,6 @@ representation can have an unusual but important effect on the way
`gawk' converts some special string values to numbers. The details are
presented in *note POSIX Floating Point Problems::.
- ---------- Footnotes ----------
-
- (1) Pathological cases can require up to 752 digits (!), but we
-doubt that you need to worry about this.
-

File: gawk.info, Node: All Operators, Next: Truth Values and Conditions, Prev: Values, Up: Expressions
@@ -7401,9 +7820,9 @@ example:
print (a " " (a = "panic"))
}
-It is not defined whether the assignment to `a' happens before or after
-the value of `a' is retrieved for producing the concatenated value.
-The result could be either `don't panic', or `panic panic'.
+It is not defined whether the second assignment to `a' happens before
+or after the value of `a' is retrieved for producing the concatenated
+value. The result could be either `don't panic', or `panic panic'.
The precedence of concatenation, when mixed with other operators, is
often counter-intuitive. Consider this example:
@@ -7427,13 +7846,14 @@ Otherwise, it's parsed as follows:
=> -12 (-24)
=> -12-24
- As mentioned earlier, when doing concatenation, _parenthesize_.
-Otherwise, you're never quite sure what you'll get.
+ As mentioned earlier, when mixing concatenation with other
+operators, _parenthesize_. Otherwise, you're never quite sure what
+you'll get.
---------- Footnotes ----------
- (1) It happens that Brian Kernighan's `awk', `gawk' and `mawk' all
-"get it right," but you should not rely on this.
+ (1) It happens that BWK `awk', `gawk' and `mawk' all "get it right,"
+but you should not rely on this.

File: gawk.info, Node: Assignment Ops, Next: Increment Ops, Prev: Concatenation, Up: All Operators
@@ -7476,9 +7896,9 @@ that the assignment stores in the specified variable, field, or array
element. (Such values are called "rvalues".)
It is important to note that variables do _not_ have permanent types.
-A variable's type is simply the type of whatever value it happens to
-hold at the moment. In the following program fragment, the variable
-`foo' has a numeric value at first, and a string value later on:
+A variable's type is simply the type of whatever value was last assigned
+to it. In the following program fragment, the variable `foo' has a
+numeric value at first, and a string value later on:
foo = 1
print foo
@@ -7551,9 +7971,10 @@ The indices of `bar' are practically guaranteed to be different, because
the `rand()' function haven't been covered yet. *Note Arrays::, and
see *note Numeric Functions::, for more information). This example
illustrates an important fact about assignment operators: the lefthand
-expression is only evaluated _once_. It is up to the implementation as
-to which expression is evaluated first, the lefthand or the righthand.
-Consider this example:
+expression is only evaluated _once_.
+
+ It is up to the implementation as to which expression is evaluated
+first, the lefthand or the righthand. Consider this example:
i = 1
a[i += 2] = i + 1
@@ -7566,14 +7987,14 @@ converted to a number.
Operator Effect
--------------------------------------------------------------------------
-LVALUE `+=' INCREMENT Adds INCREMENT to the value of LVALUE.
-LVALUE `-=' DECREMENT Subtracts DECREMENT from the value of LVALUE.
-LVALUE `*=' Multiplies the value of LVALUE by COEFFICIENT.
+LVALUE `+=' INCREMENT Add INCREMENT to the value of LVALUE.
+LVALUE `-=' DECREMENT Subtract DECREMENT from the value of LVALUE.
+LVALUE `*=' Multiply the value of LVALUE by COEFFICIENT.
COEFFICIENT
-LVALUE `/=' DIVISOR Divides the value of LVALUE by DIVISOR.
-LVALUE `%=' MODULUS Sets LVALUE to its remainder by MODULUS.
+LVALUE `/=' DIVISOR Divide the value of LVALUE by DIVISOR.
+LVALUE `%=' MODULUS Set LVALUE to its remainder by MODULUS.
LVALUE `^=' POWER
-LVALUE `**=' POWER Raises LVALUE to the power POWER. (c.e.)
+LVALUE `**=' POWER Raise LVALUE to the power POWER. (c.e.)
Table 6.2: Arithmetic Assignment Operators
@@ -7596,8 +8017,8 @@ A workaround is:
awk '/[=]=/' /dev/null
- `gawk' does not have this problem, nor do the other freely available
-versions described in *note Other Versions::.
+ `gawk' does not have this problem; BWK `awk' and `mawk' also do not
+(*note Other Versions::).

File: gawk.info, Node: Increment Ops, Prev: Assignment Ops, Up: All Operators
@@ -7612,13 +8033,13 @@ they are convenient abbreviations for very common operations.
The operator used for adding one is written `++'. It can be used to
increment a variable either before or after taking its value. To
-pre-increment a variable `v', write `++v'. This adds one to the value
-of `v'--that new value is also the value of the expression. (The
+"pre-increment" a variable `v', write `++v'. This adds one to the
+value of `v'--that new value is also the value of the expression. (The
assignment expression `v += 1' is completely equivalent.) Writing the
-`++' after the variable specifies post-increment. This increments the
-variable value just the same; the difference is that the value of the
-increment expression itself is the variable's _old_ value. Thus, if
-`foo' has the value four, then the expression `foo++' has the value
+`++' after the variable specifies "post-increment". This increments
+the variable value just the same; the difference is that the value of
+the increment expression itself is the variable's _old_ value. Thus,
+if `foo' has the value four, then the expression `foo++' has the value
four, but it changes the value of `foo' to five. In other words, the
operator returns the old value of the variable, but with the side
effect of incrementing it.
@@ -7761,14 +8182,11 @@ File: gawk.info, Node: Variable Typing, Next: Comparison Operators, Up: Typin
6.3.2.1 String Type Versus Numeric Type
.......................................
-The 1992 POSIX standard introduced the concept of a "numeric string",
-which is simply a string that looks like a number--for example,
-`" +2"'. This concept is used for determining the type of a variable.
-The type of the variable is important because the types of two variables
-determine how they are compared. The various versions of the POSIX
-standard did not get the rules quite right for several editions.
-Fortunately, as of at least the 2008 standard (and possibly earlier),
-the standard has been fixed, and variable typing follows these rules:(1)
+The POSIX standard introduced the concept of a "numeric string", which
+is simply a string that looks like a number--for example, `" +2"'.
+This concept is used for determining the type of a variable. The type
+of the variable is important because the types of two variables
+determine how they are compared. Variable typing follows these rules:
* A numeric constant or the result of a numeric operation has the
NUMERIC attribute.
@@ -7814,7 +8232,7 @@ operands, according to the following symmetric matrix:
user input--should be treated as numeric, even though it is actually
made of characters and is therefore also a string. Thus, for example,
the string constant `" +3.14"', when it appears in program source code,
-is a string--even though it looks numeric--and is _never_ treated as
+is a string--even though it looks numeric--and is _never_ treated as a
number for comparison purposes.
In short, when one operand is a "pure" string, such as a string
@@ -7825,10 +8243,9 @@ comparison is performed.
characters, and so is first and foremost of STRING type; input strings
that look numeric are additionally given the STRNUM attribute. Thus,
the six-character input string ` +3.14' receives the STRNUM attribute.
-In contrast, the eight-character literal `" +3.14"' appearing in
-program text is a string constant. The following examples print `1'
-when the comparison between the two different constants is true, `0'
-otherwise:
+In contrast, the eight characters `" +3.14"' appearing in program text
+comprise a string constant. The following examples print `1' when the
+comparison between the two different constants is true, `0' otherwise:
$ echo ' +3.14' | gawk '{ print $0 == " +3.14" }' True
-| 1
@@ -7847,11 +8264,6 @@ otherwise:
$ echo ' +3.14' | gawk '{ print $1 == 3.14 }' True
-| 1
- ---------- Footnotes ----------
-
- (1) `gawk' has followed these rules for many years, and it is
-gratifying that the POSIX standard is also now correct.
-

File: gawk.info, Node: Comparison Operators, Next: POSIX String Comparison, Prev: Variable Typing, Up: Typing and Comparison
@@ -7950,7 +8362,7 @@ has the value one if `x' contains `foo', such as `"Oh, what a fool am
I!"'.
The righthand operand of the `~' and `!~' operators may be either a
-regexp constant (`/.../') or an ordinary expression. In the latter
+regexp constant (`/'...`/') or an ordinary expression. In the latter
case, the value of the expression as a string is used as a dynamic
regexp (*note Regexp Usage::; also *note Computed Regexps::).
@@ -7971,9 +8383,10 @@ File: gawk.info, Node: POSIX String Comparison, Prev: Comparison Operators, U
..........................................
The POSIX standard says that string comparison is performed based on
-the locale's collating order. This is usually very different from the
-results obtained when doing straight character-by-character
-comparison.(1)
+the locale's "collating order". This is the order in which characters
+sort, as defined by the locale (for more discussion, *note Ranges and
+Locales::). This order is usually very different from the results
+obtained when doing straight character-by-character comparison.(1)
Because this behavior differs considerably from existing practice,
`gawk' only implements it when in POSIX mode (*note Options::). Here
@@ -8053,9 +8466,9 @@ because of the way they work. Evaluation of the full expression is
"short-circuited" if the result can be determined part way through its
evaluation.
- Statements that use `&&' or `||' can be continued simply by putting
-a newline after them. But you cannot put a newline in front of either
-of these operators without using backslash continuation (*note
+ Statements that end with `&&' or `||' can be continued simply by
+putting a newline after them. But you cannot put a newline in front of
+either of these operators without using backslash continuation (*note
Statements/Lines::).
The actual value of an expression using the `!' operator is either
@@ -8066,7 +8479,7 @@ following program is one way to print lines in between special
bracketing lines:
$1 == "START" { interested = ! interested; next }
- interested == 1 { print }
+ interested { print }
$1 == "END" { interested = ! interested; next }
The variable `interested', as with all `awk' variables, starts out
@@ -8076,6 +8489,14 @@ using `!'. The next rule prints lines as long as `interested' is true.
When a line is seen whose first field is `END', `interested' is toggled
back to false.(1)
+ Most commonly, the `!' operator is used in the conditions of `if'
+and `while' statements, where it often makes more sense to phrase the
+logic in the negative:
+
+ if (! SOME CONDITION || SOME OTHER CONDITION) {
+ ... DO WHATEVER PROCESSING ...
+ }
+
NOTE: The `next' statement is discussed in *note Next Statement::.
`next' tells `awk' to skip the rest of the rules, get the next
record, and start processing the rules over again at the top. The
@@ -8125,7 +8546,7 @@ not. *Note Arrays::, for more information about arrays.
continued simply by putting a newline after either character. However,
putting a newline in front of either character does not work without
using backslash continuation (*note Statements/Lines::). If `--posix'
-is specified (*note Options::), then this extension is disabled.
+is specified (*note Options::), this extension is disabled.

File: gawk.info, Node: Function Calls, Next: Precedence, Prev: Truth Values and Conditions, Up: Expressions
@@ -8142,6 +8563,8 @@ available in every `awk' program. The `sqrt()' function is one of
these. *Note Built-in::, for a list of built-in functions and their
descriptions. In addition, you can define functions for use in your
program. *Note User-defined::, for instructions on how to do this.
+Finally, `gawk' lets you write functions in C or C++ that may be called
+from your program: see *note Dynamic Extensions::.
The way to use a function is with a "function call" expression,
which consists of the function name followed immediately by a list of
@@ -8173,19 +8596,21 @@ the number of which to take the square root:
If those arguments are not supplied, the functions use a reasonable
default value. *Note Built-in::, for full details. If arguments are
omitted in calls to user-defined functions, then those arguments are
-treated as local variables and initialized to the empty string (*note
-User-defined::).
+treated as local variables. Such local variables act like the empty
+string if referenced where a string value is required, and like zero if
+referenced where a numeric value is required (*note User-defined::).
As an advanced feature, `gawk' provides indirect function calls,
which is a way to choose the function to call at runtime, instead of
when you write the source code to your program. We defer discussion of
this feature until later; see *note Indirect Calls::.
- Like every other expression, the function call has a value, which is
-computed by the function based on the arguments you give it. In this
-example, the value of `sqrt(ARGUMENT)' is the square root of ARGUMENT.
-The following program reads numbers, one number per line, and prints the
-square root of each one:
+ Like every other expression, the function call has a value, often
+called the "return value", which is computed by the function based on
+the arguments you give it. In this example, the return value of
+`sqrt(ARGUMENT)' is the square root of ARGUMENT. The following program
+reads numbers, one number per line, and prints the square root of each
+one:
$ awk '{ print "The square root of", $1, "is", sqrt($1) }'
1
@@ -8258,7 +8683,7 @@ to avoid the problem the expression can be rewritten as `$($0++)--'.
This table presents `awk''s operators, in order of highest to lowest
precedence:
-`(...)'
+`('...`)'
Grouping.
`$'
@@ -8279,7 +8704,7 @@ precedence:
`+ -'
Addition, subtraction.
-`String Concatenation'
+String Concatenation
There is no special symbol for concatenation. The operands are
simply written side by side (*note Concatenation::).
@@ -8320,13 +8745,15 @@ precedence:
POSIX. For maximum portability, do not use them.

-File: gawk.info, Node: Locales, Prev: Precedence, Up: Expressions
+File: gawk.info, Node: Locales, Next: Expressions Summary, Prev: Precedence, Up: Expressions
6.6 Where You Are Makes A Difference
====================================
Modern systems support the notion of "locales": a way to tell the
-system about the local character set and language.
+system about the local character set and language. The ISO C standard
+defines a default `"C"' locale, which is an environment that is typical
+of what many C programmers are used to.
Once upon a time, the locale setting used to affect regexp matching
(*note Ranges and Locales::), but this is no longer true.
@@ -8338,6 +8765,13 @@ much better performance when reading records. Otherwise, `gawk' has to
make several function calls, _per input character_, to find the record
terminator.
+ Locales can affect how dates and times are formatted (*note Time
+Functions::). For example, a common way to abbreviate the date
+September 4, 2015 in the United States is "9/4/15." In many countries
+in Europe, however, it is abbreviated "4.9.15." Thus, the `%x'
+specification in a `"US"' locale might produce `9/4/15', while in a
+`"EUROPE"' locale, it might produce `4.9.15'.
+
According to POSIX, string comparison is also affected by locales
(similar to regular expressions). The details are presented in *note
POSIX String Comparison::.
@@ -8347,6 +8781,63 @@ used when `gawk' parses input data. This is discussed in detail in
*note Conversion::.

+File: gawk.info, Node: Expressions Summary, Prev: Locales, Up: Expressions
+
+6.7 Summary
+===========
+
+ * Expressions are the basic elements of computation in programs.
+ They are built from constants, variables, function calls and
+ combinations of the various kinds of values with operators.
+
+ * `awk' supplies three kinds of constants: numeric, string, and
+ regexp. `gawk' lets you specify numeric constants in octal and
+ hexadecimal (bases 8 and 16) in addition to decimal (base 10). In
+ certain contexts, a standalone regexp constant such as `/foo/' has
+ the same meaning as `$0 ~ /foo/'.
+
+ * Variables hold values between uses in computations. A number of
+ built-in variables provide information to your `awk' program, and
+ a number of others let you control how `awk' behaves.
+
+ * Numbers are automatically converted to strings, and strings to
+ numbers, as needed by `awk'. Numeric values are converted as if
+ they were formatted with `sprintf()' using the format in `CONVFMT'.
+ Locales can influence the conversions.
+
+ * `awk' provides the usual arithmetic operators (addition,
+ subtraction, multiplication, division, modulus), and unary plus
+ and minus. It also provides comparison operators, boolean
+ operators, and regexp matching operators. String concatenation is
+ accomplished by placing two expressions next to each other; there
+ is no explicit operator. The three-operand `?:' operator provides
+ an "if-else" test within expressions.
+
+ * Assignment operators provide convenient shorthands for common
+ arithmetic operations.
+
+ * In `awk', a value is considered to be true if it is non-zero _or_
+ non-null. Otherwise, the value is false.
+
+ * A value's type is set upon each assignment and may change over its
+ lifetime. The type determines how it behaves in comparisons
+ (string or numeric).
+
+ * Function calls return a value which may be used as part of a larger
+ expression. Expressions used to pass parameter values are fully
+ evaluated before the function is called. `awk' provides built-in
+ and user-defined functions; this is described later on in this
+ Info file.
+
+ * Operator precedence specifies the order in which operations are
+ performed, unless explicitly overridden by parentheses. `awk''s
+ operator precedence is compatible with that of C.
+
+ * Locales can affect the format of data as output by an `awk'
+ program, and occasionally the format for data read as input.
+
+
+
File: gawk.info, Node: Patterns and Actions, Next: Arrays, Prev: Expressions, Up: Top
7 Patterns, Actions, and Variables
@@ -8370,6 +8861,7 @@ top of. Now it's time to start building something useful.
* Statements:: Describes the various control statements in
detail.
* Built-in Variables:: Summarizes the built-in variables.
+* Pattern Action Summary:: Patterns and Actions summary.

File: gawk.info, Node: Pattern Overview, Next: Using Shell Variables, Up: Patterns and Actions
@@ -8398,10 +8890,10 @@ summary of the types of `awk' patterns:
A single expression. It matches when its value is nonzero (if a
number) or non-null (if a string). (*Note Expression Patterns::.)
-`PAT1, PAT2'
+`BEGPAT, ENDPAT'
A pair of patterns separated by a comma, specifying a range of
records. The range includes both the initial record that matches
- PAT1 and the final record that matches PAT2. (*Note Ranges::.)
+ BEGPAT and the final record that matches ENDPAT. (*Note Ranges::.)
`BEGIN'
`END'
@@ -8411,7 +8903,7 @@ summary of the types of `awk' patterns:
`BEGINFILE'
`ENDFILE'
Special patterns for you to supply startup or cleanup actions to be
- done on a per file basis. (*Note BEGINFILE/ENDFILE::.)
+ done on a per-file basis. (*Note BEGINFILE/ENDFILE::.)
`EMPTY'
The empty pattern matches every input record. (*Note Empty::.)
@@ -8460,7 +8952,7 @@ precisely `li':
`li'.) Contrast this with the following regular expression match, which
accepts any record with a first field that contains `li':
- $ awk '$1 ~ /foo/ { print $2 }' mail-list
+ $ awk '$1 ~ /li/ { print $2 }' mail-list
-| 555-5553
-| 555-6699
@@ -8531,7 +9023,7 @@ record. When a record matches BEGPAT, the range pattern is "turned on"
and the range pattern matches this record as well. As long as the
range pattern stays turned on, it automatically matches every input
record read. The range pattern also matches ENDPAT against every input
-record; when this succeeds, the range pattern is turned off again for
+record; when this succeeds, the range pattern is "turned off" again for
the following record. Then the range pattern goes back to checking
BEGPAT against each record.
@@ -8663,10 +9155,10 @@ File: gawk.info, Node: I/O And BEGIN/END, Prev: Using BEGIN/END, Up: BEGIN/EN
7.1.4.2 Input/Output from `BEGIN' and `END' Rules
.................................................
-There are several (sometimes subtle) points to remember when doing I/O
-from a `BEGIN' or `END' rule. The first has to do with the value of
-`$0' in a `BEGIN' rule. Because `BEGIN' rules are executed before any
-input is read, there simply is no input record, and therefore no
+There are several (sometimes subtle) points to be aware of when doing
+I/O from a `BEGIN' or `END' rule. The first has to do with the value
+of `$0' in a `BEGIN' rule. Because `BEGIN' rules are executed before
+any input is read, there simply is no input record, and therefore no
fields, when executing `BEGIN' rules. References to `$0' and the fields
yield a null string or zero, depending upon the context. One way to
give `$0' a real value is to execute a `getline' command without a
@@ -8681,8 +9173,7 @@ number of fields from the last input record. Most probably due to an
oversight, the standard does not say that `$0' is also preserved,
although logically one would think that it should be. In fact, `gawk'
does preserve the value of `$0' for use in `END' rules. Be aware,
-however, that Brian Kernighan's `awk', and possibly other
-implementations, do not.
+however, that BWK `awk', and possibly other implementations, do not.
The third point follows from the first two. The meaning of `print'
inside a `BEGIN' or `END' rule is the same as always: `print $0'. If
@@ -8734,10 +9225,10 @@ tasks that would otherwise be difficult or impossible to perform:
entirely. Otherwise, `gawk' exits with the usual fatal error.
* If you have written extensions that modify the record handling (by
- inserting an "input parser"), you can invoke them at this point,
- before `gawk' has started processing the file. (This is a _very_
- advanced feature, currently used only by the `gawkextlib' project
- (http://gawkextlib.sourceforge.net).)
+ inserting an "input parser," *note Input Parsers::), you can invoke
+ them at this point, before `gawk' has started processing the file.
+ (This is a _very_ advanced feature, currently used only by the
+ `gawkextlib' project (http://gawkextlib.sourceforge.net).)
The `ENDFILE' rule is called when `gawk' has finished processing the
last record in an input file. For the last input file, it will be
@@ -8756,7 +9247,7 @@ either a `BEGINFILE' or and `ENDFILE' rule. The `nextfile' statement
but not inside an `ENDFILE' rule.
The `getline' statement (*note Getline::) is restricted inside both
-`BEGINFILE' and `ENDFILE'. Only the `getline VARIABLE < FILE' form is
+`BEGINFILE' and `ENDFILE': only redirected forms of `getline' are
allowed.
`BEGINFILE' and `ENDFILE' are `gawk' extensions. In most other
@@ -8789,15 +9280,15 @@ to get the value of the shell variable into the body of the `awk'
program.
The most common method is to use shell quoting to substitute the
-variable's value into the program inside the script. For example, in
-the following program:
+variable's value into the program inside the script. For example,
+consider the following program:
printf "Enter search pattern: "
read pattern
awk "/$pattern/ "'{ nmatches++ }
END { print nmatches, "found" }' /path/to/data
-the `awk' program consists of two pieces of quoted text that are
+The `awk' program consists of two pieces of quoted text that are
concatenated together to form the program. The first part is
double-quoted, which allows substitution of the `pattern' shell
variable inside the quotes. The second part is single-quoted.
@@ -8809,7 +9300,7 @@ quotes when reading the program.
A better method is to use `awk''s variable assignment feature (*note
Assignment Options::) to assign the shell variable's value to an `awk'
-variable's value. Then use dynamic regexps to match the pattern (*note
+variable. Then use dynamic regexps to match the pattern (*note
Computed Regexps::). The following shows how to redo the previous
example using this technique:
@@ -8840,19 +9331,19 @@ which (but not both) may be omitted. The purpose of the "action" is to
tell `awk' what to do once a match for the pattern is found. Thus, in
outline, an `awk' program generally looks like this:
- [PATTERN] { ACTION }
- PATTERN [{ ACTION }]
+ [PATTERN] `{ ACTION }'
+ PATTERN [`{ ACTION }']
...
- function NAME(ARGS) { ... }
+ `function NAME(ARGS) { ... }'
...
An action consists of one or more `awk' "statements", enclosed in
-curly braces (`{...}'). Each statement specifies one thing to do. The
-statements are separated by newlines or semicolons. The curly braces
-around an action must be used even if the action contains only one
-statement, or if it contains no statements at all. However, if you
-omit the action entirely, omit the curly braces as well. An omitted
-action is equivalent to `{ print $0 }':
+braces (`{...}'). Each statement specifies one thing to do. The
+statements are separated by newlines or semicolons. The braces around
+an action must be used even if the action contains only one statement,
+or if it contains no statements at all. However, if you omit the
+action entirely, omit the braces as well. An omitted action is
+equivalent to `{ print $0 }':
/foo/ { } match `foo', do nothing -- empty action
/foo/ match `foo', print the record -- omitted action
@@ -8871,9 +9362,9 @@ Control statements
well as a few special ones (*note Statements::).
Compound statements
- Consist of one or more statements enclosed in curly braces. A
- compound statement is used in order to put several statements
- together in the body of an `if', `while', `do', or `for' statement.
+ Enclose one or more statements in braces. A compound statement is
+ used in order to put several statements together in the body of an
+ `if', `while', `do', or `for' statement.
Input statements
Use the `getline' command (*note Getline::). Also supplied in
@@ -8902,7 +9393,7 @@ statements contain other statements. For example, the `if' statement
contains another statement that may or may not be executed. The
contained statement is called the "body". To include more than one
statement in the body, group them into a single "compound statement"
-with curly braces, separating them with newlines or semicolons.
+with braces, separating them with newlines or semicolons.
* Menu:
@@ -8931,7 +9422,7 @@ File: gawk.info, Node: If Statement, Next: While Statement, Up: Statements
The `if'-`else' statement is `awk''s decision-making statement. It
looks like this:
- if (CONDITION) THEN-BODY [else ELSE-BODY]
+ `if (CONDITION) THEN-BODY' [`else ELSE-BODY']
The CONDITION is an expression that controls what the rest of the
statement does. If the CONDITION is true, THEN-BODY is executed;
@@ -8950,8 +9441,8 @@ the value of `x' is evenly divisible by two), then the first `print'
statement is executed; otherwise, the second `print' statement is
executed. If the `else' keyword appears on the same line as THEN-BODY
and THEN-BODY is not a compound statement (i.e., not surrounded by
-curly braces), then a semicolon must separate THEN-BODY from the `else'.
-To illustrate this, the previous example can be rewritten as:
+braces), then a semicolon must separate THEN-BODY from the `else'. To
+illustrate this, the previous example can be rewritten as:
if (x % 2 == 0) print "x is even"; else
print "x is odd"
@@ -9136,7 +9627,8 @@ File: gawk.info, Node: Switch Statement, Next: Break Statement, Prev: For Sta
7.4.5 The `switch' Statement
----------------------------
-This minor node describes a `gawk'-specific feature.
+This minor node describes a `gawk'-specific feature. If `gawk' is in
+compatibility mode (*note Options::), it is not available.
The `switch' statement allows the evaluation of an expression and
the execution of statements based on a `case' match. Case statements
@@ -9163,32 +9655,36 @@ match to a given case is made, the case statement bodies execute until
a `break', `continue', `next', `nextfile' or `exit' is encountered, or
the end of the `switch' statement itself. For example:
- switch (NR * 2 + 1) {
- case 3:
- case "11":
- print NR - 1
- break
-
- case /2[[:digit:]]+/:
- print NR
-
- default:
- print NR + 1
-
- case -1:
- print NR * -1
+ while ((c = getopt(ARGC, ARGV, "aksx")) != -1) {
+ switch (c) {
+ case "a":
+ # report size of all files
+ all_files = TRUE;
+ break
+ case "k":
+ BLOCK_SIZE = 1024 # 1K block size
+ break
+ case "s":
+ # do sums only
+ sum_only = TRUE
+ break
+ case "x":
+ # don't cross filesystems
+ fts_flags = or(fts_flags, FTS_XDEV)
+ break
+ case "?":
+ default:
+ usage()
+ break
+ }
}
Note that if none of the statements specified above halt execution
of a matched `case' statement, execution falls through to the next
-`case' until execution halts. In the above example, for any case value
-starting with `2' followed by one or more digits, the `print' statement
-is executed and then falls through into the `default' section,
-executing its `print' statement. In turn, the -1 case will also be
-executed since the `default' does not halt execution.
-
- This `switch' statement is a `gawk' extension. If `gawk' is in
-compatibility mode (*note Options::), it is not available.
+`case' until execution halts. In the above example, the `case' for
+`"?"' falls through to the `default' case, which is to call a function
+named `usage()'. (The `getopt()' function being called here is
+described in *note Getopt Function::.)

File: gawk.info, Node: Break Statement, Next: Continue Statement, Prev: Switch Statement, Up: Statements
@@ -9202,15 +9698,15 @@ divisor of any integer, and also identifies prime numbers:
# find smallest divisor of num
{
- num = $1
- for (div = 2; div * div <= num; div++) {
- if (num % div == 0)
- break
- }
- if (num % div == 0)
- printf "Smallest divisor of %d is %d\n", num, div
- else
- printf "%d is prime\n", num
+ num = $1
+ for (div = 2; div * div <= num; div++) {
+ if (num % div == 0)
+ break
+ }
+ if (num % div == 0)
+ printf "Smallest divisor of %d is %d\n", num, div
+ else
+ printf "%d is prime\n", num
}
When the remainder is zero in the first `if' statement, `awk'
@@ -9225,17 +9721,17 @@ Statement::.)
# find smallest divisor of num
{
- num = $1
- for (div = 2; ; div++) {
- if (num % div == 0) {
- printf "Smallest divisor of %d is %d\n", num, div
- break
- }
- if (div * div > num) {
- printf "%d is prime\n", num
- break
+ num = $1
+ for (div = 2; ; div++) {
+ if (num % div == 0) {
+ printf "Smallest divisor of %d is %d\n", num, div
+ break
+ }
+ if (div * div > num) {
+ printf "%d is prime\n", num
+ break
+ }
}
- }
}
The `break' statement is also used to break out of the `switch'
@@ -9245,8 +9741,8 @@ statement. This is discussed in *note Switch Statement::.
loop or `switch'. However, although it was never documented,
historical implementations of `awk' treated the `break' statement
outside of a loop as if it were a `next' statement (*note Next
-Statement::). (d.c.) Recent versions of Brian Kernighan's `awk' no
-longer allow this usage, nor does `gawk'.
+Statement::). (d.c.) Recent versions of BWK `awk' no longer allow
+this usage, nor does `gawk'.

File: gawk.info, Node: Continue Statement, Next: Next Statement, Prev: Break Statement, Up: Statements
@@ -9289,15 +9785,16 @@ the previous example with the following `while' loop:
print ""
}
-This program loops forever once `x' reaches 5.
+This program loops forever once `x' reaches 5, since the increment
+(`x++') is never reached.
The `continue' statement has no special meaning with respect to the
`switch' statement, nor does it have any meaning when used outside the
body of a loop. Historical versions of `awk' treated a `continue'
statement outside a loop the same way they treated a `break' statement
outside a loop: as if it were a `next' statement (*note Next
-Statement::). (d.c.) Recent versions of Brian Kernighan's `awk' no
-longer work this way, nor does `gawk'.
+Statement::). (d.c.) Recent versions of BWK `awk' no longer work this
+way, nor does `gawk'.

File: gawk.info, Node: Next Statement, Next: Nextfile Statement, Prev: Continue Statement, Up: Statements
@@ -9328,9 +9825,8 @@ complicating the rest of the program, write a "weed out" rule near the
beginning, in the following manner:
NF != 4 {
- err = sprintf("%s:%d: skipped: NF != 4\n", FILENAME, FNR)
- print err > "/dev/stderr"
- next
+ printf("%s:%d: skipped: NF != 4\n", FILENAME, FNR) > "/dev/stderr"
+ next
}
Because of the `next' statement, the program's subsequent rules won't
@@ -9346,7 +9842,7 @@ rules. *Note BEGINFILE/ENDFILE::.
According to the POSIX standard, the behavior is undefined if the
`next' statement is used in a `BEGIN' or `END' rule. `gawk' treats it
-as a syntax error. Although POSIX permits it, some other `awk'
+as a syntax error. Although POSIX permits it, most other `awk'
implementations don't allow the `next' statement inside function bodies
(*note User-defined::). Just as with any other `next' statement, a
`next' statement inside a function body reads the next record and
@@ -9368,7 +9864,7 @@ reset to one, and processing starts over with the first rule in the
program. If the `nextfile' statement causes the end of the input to be
reached, then the code in any `END' rules is executed. An exception to
this is when `nextfile' is invoked during execution of any statement in
-an `END' rule; In this case, it causes the program to stop immediately.
+an `END' rule; in this case, it causes the program to stop immediately.
*Note BEGIN/END::.
The `nextfile' statement is useful when there are many data files to
@@ -9378,10 +9874,10 @@ would have to continue scanning the unwanted records. The `nextfile'
statement accomplishes this much more efficiently.
In `gawk', execution of `nextfile' causes additional things to
-happen: any `ENDFILE' rules are executed except in the case as
-mentioned below, `ARGIND' is incremented, and any `BEGINFILE' rules are
-executed. (`ARGIND' hasn't been introduced yet. *Note Built-in
-Variables::.)
+happen: any `ENDFILE' rules are executed if `gawk' is not currently in
+an `END' or `BEGINFILE' rule, `ARGIND' is incremented, and any
+`BEGINFILE' rules are executed. (`ARGIND' hasn't been introduced yet.
+*Note Built-in Variables::.)
With `gawk', `nextfile' is useful inside a `BEGINFILE' rule to skip
over a file that would otherwise cause `gawk' to exit with a fatal
@@ -9399,12 +9895,12 @@ listed in `ARGV'.
standard. See the Austin Group website
(http://austingroupbugs.net/view.php?id=607).
- The current version of the Brian Kernighan's `awk', and `mawk'
-(*note Other Versions::) also support `nextfile'. However, they don't
-allow the `nextfile' statement inside function bodies (*note
-User-defined::). `gawk' does; a `nextfile' inside a function body
-reads the next record and starts processing it with the first rule in
-the program, just as any other `nextfile' statement.
+ The current version of BWK `awk', and `mawk' (*note Other
+Versions::) also support `nextfile'. However, they don't allow the
+`nextfile' statement inside function bodies (*note User-defined::).
+`gawk' does; a `nextfile' inside a function body reads the next record
+and starts processing it with the first rule in the program, just as
+any other `nextfile' statement.

File: gawk.info, Node: Exit Statement, Prev: Nextfile Statement, Up: Statements
@@ -9416,7 +9912,7 @@ The `exit' statement causes `awk' to immediately stop executing the
current rule and to stop processing input; any remaining input is
ignored. The `exit' statement is written as follows:
- exit [RETURN CODE]
+ `exit' [RETURN CODE]
When an `exit' statement is executed from a `BEGIN' rule, the
program stops processing everything immediately. No input records are
@@ -9450,12 +9946,12 @@ with a nonzero status. An `awk' program can do this using an `exit'
statement with a nonzero argument, as shown in the following example:
BEGIN {
- if (("date" | getline date_now) <= 0) {
- print "Can't get system date" > "/dev/stderr"
- exit 1
- }
- print "current date is", date_now
- close("date")
+ if (("date" | getline date_now) <= 0) {
+ print "Can't get system date" > "/dev/stderr"
+ exit 1
+ }
+ print "current date is", date_now
+ close("date")
}
NOTE: For full portability, exit values should be between zero and
@@ -9464,7 +9960,7 @@ statement with a nonzero argument, as shown in the following example:
systems.

-File: gawk.info, Node: Built-in Variables, Prev: Statements, Up: Patterns and Actions
+File: gawk.info, Node: Built-in Variables, Next: Pattern Action Summary, Prev: Statements, Up: Patterns and Actions
7.5 Built-in Variables
======================
@@ -9477,9 +9973,9 @@ of these automatically, so that they enable you to tell `awk' how to do
certain things. Others are set automatically by `awk', so that they
carry information from the internal workings of `awk' to your program.
- This minor node documents all the built-in variables of `gawk', most
-of which are also documented in the chapters describing their areas of
-activity.
+ This minor node documents all of `gawk''s built-in variables, most
+of which are also documented in the major nodes describing their areas
+of activity.
* Menu:
@@ -9496,8 +9992,13 @@ File: gawk.info, Node: User-modified, Next: Auto-set, Up: Built-in Variables
-------------------------------------------
The following is an alphabetical list of variables that you can change
-to control how `awk' does certain things. The variables that are
-specific to `gawk' are marked with a pound sign (`#').
+to control how `awk' does certain things.
+
+ The variables that are specific to `gawk' are marked with a pound
+sign (`#'). These variables are `gawk' extensions. In other `awk'
+implementations or if `gawk' is in compatibility mode (*note
+Options::), they are not special. (Any exceptions are noted in the
+description of each variable.)
`BINMODE #'
On non-POSIX systems, this variable specifies use of binary mode
@@ -9510,14 +10011,11 @@ specific to `gawk' are marked with a pound sign (`#').
string value of `"rw"' or `"wr"' indicates that all files should
use binary I/O. Any other string value is treated the same as
`"rw"', but causes `gawk' to generate a warning message.
- `BINMODE' is described in more detail in *note PC Using::.
-
- This variable is a `gawk' extension. In other `awk'
- implementations (except `mawk', *note Other Versions::), or if
- `gawk' is in compatibility mode (*note Options::), it is not
- special.
+ `BINMODE' is described in more detail in *note PC Using::. `mawk'
+ *note Other Versions::), also supports this variable, but only
+ using numeric values.
-`CONVFMT'
+``CONVFMT''
This string controls conversion of numbers to strings (*note
Conversion::). It works by being passed, in effect, as the first
argument to the `sprintf()' function (*note String Functions::).
@@ -9525,29 +10023,21 @@ specific to `gawk' are marked with a pound sign (`#').
POSIX standard.
`FIELDWIDTHS #'
- This is a space-separated list of columns that tells `gawk' how to
- split input with fixed columnar boundaries. Assigning a value to
+ A space-separated list of columns that tells `gawk' how to split
+ input with fixed columnar boundaries. Assigning a value to
`FIELDWIDTHS' overrides the use of `FS' and `FPAT' for field
splitting. *Note Constant Size::, for more information.
- If `gawk' is in compatibility mode (*note Options::), then
- `FIELDWIDTHS' has no special meaning, and field-splitting
- operations occur based exclusively on the value of `FS'.
-
`FPAT #'
- This is a regular expression (as a string) that tells `gawk' to
- create the fields based on text that matches the regular
- expression. Assigning a value to `FPAT' overrides the use of `FS'
- and `FIELDWIDTHS' for field splitting. *Note Splitting By
- Content::, for more information.
-
- If `gawk' is in compatibility mode (*note Options::), then `FPAT'
- has no special meaning, and field-splitting operations occur based
- exclusively on the value of `FS'.
+ A regular expression (as a string) that tells `gawk' to create the
+ fields based on text that matches the regular expression.
+ Assigning a value to `FPAT' overrides the use of `FS' and
+ `FIELDWIDTHS' for field splitting. *Note Splitting By Content::,
+ for more information.
`FS'
- This is the input field separator (*note Field Separators::). The
- value is a single-character string or a multicharacter regular
+ The input field separator (*note Field Separators::). The value
+ is a single-character string or a multicharacter regular
expression that matches the separations between fields in an input
record. If the value is the null string (`""'), then each
character in the record becomes a separate field. (This behavior
@@ -9583,13 +10073,9 @@ specific to `gawk' are marked with a pound sign (`#').
splitting when using a single-character field separator. *Note
Case-sensitivity::.
- If `gawk' is in compatibility mode (*note Options::), then
- `IGNORECASE' has no special meaning. Thus, string and regexp
- operations are always case-sensitive.
-
`LINT #'
When this variable is true (nonzero or non-null), `gawk' behaves
- as if the `--lint' command-line option is in effect. (*note
+ as if the `--lint' command-line option is in effect (*note
Options::). With a value of `"fatal"', lint warnings become fatal
errors. With a value of `"invalid"', only warnings about things
that are actually invalid are issued. (This is not fully
@@ -9605,13 +10091,13 @@ specific to `gawk' are marked with a pound sign (`#').
execution is independent of the flavor of `awk' being executed.
`OFMT'
- This string controls conversion of numbers to strings (*note
- Conversion::) for printing with the `print' statement. It works
- by being passed as the first argument to the `sprintf()' function
- (*note String Functions::). Its default value is `"%.6g"'.
- Earlier versions of `awk' also used `OFMT' to specify the format
- for converting numbers to strings in general expressions; this is
- now done by `CONVFMT'.
+ Controls conversion of numbers to strings (*note Conversion::) for
+ printing with the `print' statement. It works by being passed as
+ the first argument to the `sprintf()' function (*note String
+ Functions::). Its default value is `"%.6g"'. Earlier versions of
+ `awk' also used `OFMT' to specify the format for converting
+ numbers to strings in general expressions; this is now done by
+ `CONVFMT'.
`OFS'
This is the output field separator (*note Output Separators::).
@@ -9619,49 +10105,45 @@ specific to `gawk' are marked with a pound sign (`#').
Its default value is `" "', a string consisting of a single space.
`ORS'
- This is the output record separator. It is output at the end of
- every `print' statement. Its default value is `"\n"', the newline
+ The output record separator. It is output at the end of every
+ `print' statement. Its default value is `"\n"', the newline
character. (*Note Output Separators::.)
`PREC #'
The working precision of arbitrary precision floating-point
- numbers, 53 bits by default (*note Setting Precision::).
+ numbers, 53 bits by default (*note Setting precision::).
`ROUNDMODE #'
The rounding mode to use for arbitrary precision arithmetic on
- numbers, by default `"N"' (`roundTiesToEven' in the IEEE-754
- standard) (*note Setting Rounding Mode::).
+ numbers, by default `"N"' (`roundTiesToEven' in the IEEE 754
+ standard; *note Setting the rounding mode::).
-`RS'
- This is `awk''s input record separator. Its default value is a
- string containing a single newline character, which means that an
- input record consists of a single line of text. It can also be
- the null string, in which case records are separated by runs of
- blank lines. If it is a regexp, records are separated by matches
- of the regexp in the input text. (*Note Records::.)
+``RS''
+ The input record separator. Its default value is a string
+ containing a single newline character, which means that an input
+ record consists of a single line of text. It can also be the null
+ string, in which case records are separated by runs of blank lines.
+ If it is a regexp, records are separated by matches of the regexp
+ in the input text. (*Note Records::.)
The ability for `RS' to be a regular expression is a `gawk'
extension. In most other `awk' implementations, or if `gawk' is
in compatibility mode (*note Options::), just the first character
of `RS''s value is used.
-`SUBSEP'
- This is the subscript separator. It has the default value of
- `"\034"' and is used to separate the parts of the indices of a
- multidimensional array. Thus, the expression `foo["A", "B"]'
- really accesses `foo["A\034B"]' (*note Multidimensional::).
+``SUBSEP''
+ The subscript separator. It has the default value of `"\034"' and
+ is used to separate the parts of the indices of a multidimensional
+ array. Thus, the expression `foo["A", "B"]' really accesses
+ `foo["A\034B"]' (*note Multidimensional::).
`TEXTDOMAIN #'
- This variable is used for internationalization of programs at the
- `awk' level. It sets the default text domain for specially marked
- string constants in the source text, as well as for the
- `dcgettext()', `dcngettext()' and `bindtextdomain()' functions
- (*note Internationalization::). The default value of `TEXTDOMAIN'
- is `"messages"'.
-
- This variable is a `gawk' extension. In other `awk'
- implementations, or if `gawk' is in compatibility mode (*note
- Options::), it is not special.
+ Used for internationalization of programs at the `awk' level. It
+ sets the default text domain for specially marked string constants
+ in the source text, as well as for the `dcgettext()',
+ `dcngettext()' and `bindtextdomain()' functions (*note
+ Internationalization::). The default value of `TEXTDOMAIN' is
+ `"messages"'.
---------- Footnotes ----------
@@ -9675,10 +10157,14 @@ File: gawk.info, Node: Auto-set, Next: ARGC and ARGV, Prev: User-modified, U
The following is an alphabetical list of variables that `awk' sets
automatically on certain occasions in order to provide information to
-your program. The variables that are specific to `gawk' are marked
-with a pound sign (`#').
+your program.
-`ARGC, ARGV'
+ The variables that are specific to `gawk' are marked with a pound
+sign (`#'). These variables are `gawk' extensions. In other `awk'
+implementations or if `gawk' is in compatibility mode (*note
+Options::), they are not special.
+
+`ARGC', `ARGV'
The command-line arguments available to `awk' programs are stored
in an array called `ARGV'. `ARGC' is the number of command-line
arguments present. *Note Other Arguments::. Unlike most `awk'
@@ -9723,10 +10209,6 @@ with a pound sign (`#').
program, `gawk' automatically sets it to a new value when the next
file is opened.
- This variable is a `gawk' extension. In other `awk'
- implementations, or if `gawk' is in compatibility mode (*note
- Options::), it is not special.
-
`ENVIRON'
An associative array containing the values of the environment.
The array indices are the environment variable names; the elements
@@ -9746,12 +10228,12 @@ with a pound sign (`#').
Some operating systems may not have environment variables. On
such systems, the `ENVIRON' array is empty (except for
- `ENVIRON["AWKPATH"]', *note AWKPATH Variable:: and
- `ENVIRON["AWKLIBPATH"]', *note AWKLIBPATH Variable::).
+ `ENVIRON["AWKPATH"]' and `ENVIRON["AWKLIBPATH"]'; *note AWKPATH
+ Variable::, and *note AWKLIBPATH Variable::).
`ERRNO #'
- If a system error occurs during a redirection for `getline',
- during a read for `getline', or during a `close()' operation, then
+ If a system error occurs during a redirection for `getline', during
+ a read for `getline', or during a `close()' operation, then
`ERRNO' contains a string describing the error.
In addition, `gawk' clears `ERRNO' before opening each
@@ -9765,19 +10247,14 @@ with a pound sign (`#').
`getline' returning -1. You are, of course, free to clear it
yourself before doing an I/O operation.
- This variable is a `gawk' extension. In other `awk'
- implementations, or if `gawk' is in compatibility mode (*note
- Options::), it is not special.
-
`FILENAME'
- The name of the file that `awk' is currently reading. When no
- data files are listed on the command line, `awk' reads from the
- standard input and `FILENAME' is set to `"-"'. `FILENAME' is
- changed each time a new file is read (*note Reading Files::).
- Inside a `BEGIN' rule, the value of `FILENAME' is `""', since
- there are no input files being processed yet.(1) (d.c.) Note,
- though, that using `getline' (*note Getline::) inside a `BEGIN'
- rule can give `FILENAME' a value.
+ The name of the current input file. When no data files are listed
+ on the command line, `awk' reads from the standard input and
+ `FILENAME' is set to `"-"'. `FILENAME' changes each time a new
+ file is read (*note Reading Files::). Inside a `BEGIN' rule, the
+ value of `FILENAME' is `""', since there are no input files being
+ processed yet.(1) (d.c.) Note, though, that using `getline' (*note
+ Getline::) inside a `BEGIN' rule can give `FILENAME' a value.
`FNR'
The current record number in the current file. `FNR' is
@@ -9797,12 +10274,12 @@ with a pound sign (`#').
`FUNCTAB #'
An array whose indices and corresponding values are the names of
- all the user-defined or extension functions in the program.
+ all the built-in, user-defined and extension functions in the
+ program.
NOTE: Attempting to use the `delete' statement with the
- `FUNCTAB' array will cause a fatal error. Any attempt to
- assign to an element of the `FUNCTAB' array will also cause a
- fatal error.
+ `FUNCTAB' array causes a fatal error. Any attempt to assign
+ to an element of `FUNCTAB' also causes a fatal error.
`NR'
The number of input records `awk' has processed since the
@@ -9828,15 +10305,21 @@ with a pound sign (`#').
`PROCINFO["identifiers"]'
A subarray, indexed by the names of all identifiers used in
- the text of the AWK program. For each identifier, the value
- of the element is one of the following:
+ the text of the AWK program. An "identifier" is simply the
+ name of a variable (be it scalar or array), built-in
+ function, user-defined function, or extension function. For
+ each identifier, the value of the element is one of the
+ following:
`"array"'
The identifier is an array.
+ `"builtin"'
+ The identifier is a built-in function.
+
`"extension"'
The identifier is an extension function loaded via
- `@load'.
+ `@load' or `-l'.
`"scalar"'
The identifier is a scalar.
@@ -9866,8 +10349,8 @@ with a pound sign (`#').
`PROCINFO["sorted_in"]'
If this element exists in `PROCINFO', its value controls the
- order in which array indices will be processed by `for (index
- in array) ...' loops. Since this is an advanced feature, we
+ order in which array indices will be processed by `for (INDEX
+ in ARRAY)' loops. Since this is an advanced feature, we
defer the full description until later; see *note Scanning an
Array::.
@@ -9884,8 +10367,8 @@ with a pound sign (`#').
The following additional elements in the array are available to
provide information about the MPFR and GMP libraries if your
- version of `gawk' supports arbitrary precision numbers (*note Gawk
- and MPFR::):
+ version of `gawk' supports arbitrary precision numbers (*note
+ Arbitrary Precision Arithmetic::):
`PROCINFO["mpfr_version"]'
The version of the GNU MPFR library.
@@ -9925,10 +10408,6 @@ with a pound sign (`#').
open input file, pipe, or coprocess. *Note Read Timeout::,
for more information.
- This array is a `gawk' extension. In other `awk' implementations,
- or if `gawk' is in compatibility mode (*note Options::), it is not
- special.
-
`RLENGTH'
The length of the substring matched by the `match()' function
(*note String Functions::). `RLENGTH' is set by invoking the
@@ -9943,12 +10422,8 @@ with a pound sign (`#').
match was found.
`RT #'
- This is set each time a record is read. It contains the input text
- that matched the text denoted by `RS', the record separator.
-
- This variable is a `gawk' extension. In other `awk'
- implementations, or if `gawk' is in compatibility mode (*note
- Options::), it is not special.
+ The input text that matched the text denoted by `RS', the record
+ separator. It is set every time a record is read.
`SYMTAB #'
An array whose indices are the names of all currently defined
@@ -9984,7 +10459,7 @@ with a pound sign (`#').
return SYMTAB[variable] *= amount
}
- NOTE: In order to avoid severe time-travel paradoxes(2),
+ NOTE: In order to avoid severe time-travel paradoxes,(2)
neither `FUNCTAB' nor `SYMTAB' are available as elements
within the `SYMTAB' array.
@@ -10081,8 +10556,16 @@ elements from `ARGV' (*note Delete::).
All of these actions are typically done in the `BEGIN' rule, before
actual processing of the input begins. *Note Split Program::, and see
*note Tee Program::, for examples of each way of removing elements from
-`ARGV'. The following fragment processes `ARGV' in order to examine,
-and then remove, command-line options:
+`ARGV'.
+
+ To actually get options into an `awk' program, end the `awk' options
+with `--' and then supply the `awk' program's options, in the following
+manner:
+
+ awk -f myprog.awk -- -v -q file1 file2 ...
+
+ The following fragment processes `ARGV' in order to examine, and
+then remove, the above command-line options:
BEGIN {
for (i = 1; i < ARGC; i++) {
@@ -10100,25 +10583,69 @@ and then remove, command-line options:
}
}
- To actually get the options into the `awk' program, end the `awk'
-options with `--' and then supply the `awk' program's options, in the
-following manner:
-
- awk -f myprog -- -v -q file1 file2 ...
+ Ending the `awk' options with `--' isn't necessary in `gawk'. Unless
+`--posix' has been specified, `gawk' silently puts any unrecognized
+options into `ARGV' for the `awk' program to deal with. As soon as it
+sees an unknown option, `gawk' stops looking for other options that it
+might otherwise recognize. The previous command line with `gawk' would
+be:
- This is not necessary in `gawk'. Unless `--posix' has been
-specified, `gawk' silently puts any unrecognized options into `ARGV'
-for the `awk' program to deal with. As soon as it sees an unknown
-option, `gawk' stops looking for other options that it might otherwise
-recognize. The previous example with `gawk' would be:
-
- gawk -f myprog -q -v file1 file2 ...
+ gawk -f myprog.awk -q -v file1 file2 ...
Because `-q' is not a valid `gawk' option, it and the following `-v'
are passed on to the `awk' program. (*Note Getopt Function::, for an
`awk' library function that parses command-line options.)

+File: gawk.info, Node: Pattern Action Summary, Prev: Built-in Variables, Up: Patterns and Actions
+
+7.6 Summary
+===========
+
+ * Pattern-action pairs make up the basic elements of an `awk'
+ program. Patterns are either normal expressions, range
+ expressions, regexp constants, one of the special keywords
+ `BEGIN', `END', `BEGINFILE', `ENDFILE', or empty. The action
+ executes if the current record matches the pattern. Empty
+ (missing) patterns match all records.
+
+ * I/O from `BEGIN' and `END' rules have certain constraints. This
+ is also true, only more so, for `BEGINFILE' and `ENDFILE' rules.
+ The latter two give you "hooks" into `gawk''s file processing,
+ allowing you to recover from a file that otherwise would cause a
+ fatal error (such as a file that cannot be opened).
+
+ * Shell variables can be used in `awk' programs by careful use of
+ shell quoting. It is easier to pass a shell variable into `awk'
+ by using the `-v' option and an `awk' variable.
+
+ * Actions consist of statements enclosed in curly braces. Statements
+ are built up from expressions, control statements, compound
+ statements, input and output statements, and deletion statements.
+
+ * The control statements in `awk' are `if'-`else', `while', `for',
+ and `do'-`while'. `gawk' adds the `switch' statement. There are
+ two flavors of `for' statement: one for for performing general
+ looping, and the other iterating through an array.
+
+ * `break' and `continue' let you exit early or start the next
+ iteration of a loop (or get out of a `switch').
+
+ * `next' and `nextfile' let you read the next record and start over
+ at the top of your program, or skip to the next input file and
+ start over, respectively.
+
+ * The `exit' statement terminates your program. When executed from
+ an action (or function body) it transfers control to the `END'
+ statements. From an `END' statement body, it exits immediately.
+ You may pass an optional numeric value to be used at `awk''s exit
+ status.
+
+ * Some built-in variables provide control over `awk', mainly for I/O.
+ Other variables convey information from `awk' to your program.
+
+
+
File: gawk.info, Node: Arrays, Next: Functions, Prev: Patterns and Actions, Up: Top
8 Arrays in `awk'
@@ -10134,7 +10661,7 @@ remove array elements. It also describes how `awk' simulates
multidimensional arrays, as well as some of the less obvious points
about array usage. The major node moves on to discuss `gawk''s facility
for sorting arrays, and ends with a brief description of `gawk''s
-ability to support true multidimensional arrays.
+ability to support true arrays of arrays.
`awk' maintains a single set of names that may be used for naming
variables, arrays, and functions (*note User-defined::). Thus, you
@@ -10152,6 +10679,7 @@ cannot have a variable and an array with the same name in the same
* Multidimensional:: Emulating multidimensional arrays in
`awk'.
* Arrays of Arrays:: True multidimensional arrays.
+* Arrays Summary:: Summary of arrays.

File: gawk.info, Node: Array Basics, Next: Delete, Up: Arrays
@@ -10212,12 +10740,13 @@ declared.)
A contiguous array of four elements might look like the following
example, conceptually, if the element values are 8, `"foo"', `""', and
-30:
+30 as shown in *note figure-array-elements:::
- +---------+---------+--------+---------+
- | 8 | "foo" | "" | 30 | Value
- +---------+---------+--------+---------+
- 0 1 2 3 Index
++---------+---------+--------+---------+
+| 8 | "foo" | "" | 30 | @r{Value}
++---------+---------+--------+---------+
+ 0 1 2 3 @r{Index}
+Figure 8.1: A Contiguous Array
Only the values are stored; the indices are implicit from the order of
the values. Here, 8 is the value at index zero, because 8 appears in the
@@ -10232,7 +10761,8 @@ array element value:
Index 0 Value 8
Index 2 Value ""
-The pairs are shown in jumbled order because their order is irrelevant.
+The pairs are shown in jumbled order because their order is
+irrelevant.(1)
One advantage of associative arrays is that new pairs can be added
at any time. For example, suppose a tenth element is added to the array
@@ -10260,10 +10790,11 @@ from English to French:
Here we decided to translate the number one in both spelled-out and
numeric form--thus illustrating that a single array can have both
-numbers and strings as indices. In fact, array subscripts are always
-strings; this is discussed in more detail in *note Numeric Array
-Subscripts::. Here, the number `1' isn't double-quoted, since `awk'
-automatically converts it to a string.
+numbers and strings as indices. (In fact, array subscripts are always
+strings. There are some subtleties to how numbers work when used as
+array subscripts; this is discussed in more detail in *note Numeric
+Array Subscripts::.) Here, the number `1' isn't double-quoted, since
+`awk' automatically converts it to a string.
The value of `IGNORECASE' has no effect upon array subscripting.
The identical string value used to store an array element must be used
@@ -10274,6 +10805,11 @@ starting at one. (*Note String Functions::.)
`awk''s arrays are efficient--the time to access an element is
independent of the number of elements in the array.
+ ---------- Footnotes ----------
+
+ (1) The ordering will vary among `awk' implementations, which
+typically use hash tables to store array elements and values.
+

File: gawk.info, Node: Reference to Elements, Next: Assigning Elements, Prev: Array Intro, Up: Array Basics
@@ -10308,18 +10844,20 @@ been assigned any value as well as elements that have been deleted
# Check if "foo" exists in a: Incorrect!
if (a["foo"] != "") ...
- This is incorrect, since this will _create_ `a["foo"]' if it
- didn't exist before!
+ This is incorrect for two reasons. First, it _creates_ `a["foo"]'
+ if it didn't exist before! Second, it is valid (if a bit unusual)
+ to set an array element equal to the empty string.
To determine whether an element exists in an array at a certain
index, use the following expression:
- IND in ARRAY
+ INDX in ARRAY
-This expression tests whether the particular index IND exists, without
+This expression tests whether the particular index INDX exists, without
the side effect of creating that element if it is not present. The
-expression has the value one (true) if `ARRAY[IND]' exists and zero
-(false) if it does not exist. For example, this statement tests
+expression has the value one (true) if `ARRAY[INDX]' exists and zero
+(false) if it does not exist. (We use INDX here, since `index' is the
+name of a built-in function.) For example, this statement tests
whether the array `frequencies' contains the index `2':
if (2 in frequencies)
@@ -10453,19 +10991,54 @@ built-in function `length()'.
The order in which elements of the array are accessed by this
statement is determined by the internal arrangement of the array
-elements within `awk' and normally cannot be controlled or changed.
-This can lead to problems if new elements are added to ARRAY by
-statements in the loop body; it is not predictable whether the `for'
+elements within `awk' and in standard `awk' cannot be controlled or
+changed. This can lead to problems if new elements are added to ARRAY
+by statements in the loop body; it is not predictable whether the `for'
loop will reach them. Similarly, changing VAR inside the loop may
produce strange results. It is best to avoid such things.
+ As a point of information, `gawk' sets up the list of elements to be
+iterated over before the loop starts, and does not change it. But not
+all `awk' versions do so. Consider this program, named `loopcheck.awk':
+
+ BEGIN {
+ a["here"] = "here"
+ a["is"] = "is"
+ a["a"] = "a"
+ a["loop"] = "loop"
+ for (i in a) {
+ j++
+ a[j] = j
+ print i
+ }
+ }
+
+ Here is what happens when run with `gawk':
+
+ $ gawk -f loopcheck.awk
+ -| here
+ -| loop
+ -| a
+ -| is
+
+ Contrast this to BWK `awk':
+
+ $ nawk -f loopcheck.awk
+ -| loop
+ -| here
+ -| is
+ -| a
+ -| 1
+

File: gawk.info, Node: Controlling Scanning, Prev: Scanning an Array, Up: Array Basics
-8.1.6 Using Predefined Array Scanning Orders
---------------------------------------------
+8.1.6 Using Predefined Array Scanning Orders With `gawk'
+--------------------------------------------------------
+
+This node describes a feature that is specific to `gawk'.
-By default, when a `for' loop traverses an array, the order is
+ By default, when a `for' loop traverses an array, the order is
undefined, meaning that the `awk' implementation determines the order
in which the array is traversed. This order is usually based on the
internal implementation of arrays and will vary from one version of
@@ -10654,9 +11227,9 @@ at a time.
`gawk' extension. As of September, 2012, it was accepted for
inclusion into the POSIX standard. See the Austin Group website
(http://austingroupbugs.net/view.php?id=544). This form of the
- `delete' statement is also supported by Brian Kernighan's `awk'
- and `mawk', as well as by a number of other implementations (*note
- Other Versions::).
+ `delete' statement is also supported by BWK `awk' and `mawk', as
+ well as by a number of other implementations (*note Other
+ Versions::).
The following statement provides a portable but nonobvious way to
clear out an array:(1)
@@ -10745,13 +11318,13 @@ might look like this:
> line 2
> line 3' | awk '{ l[lines] = $0; ++lines }
> END {
- > for (i = lines-1; i >= 0; --i)
+ > for (i = lines - 1; i >= 0; i--)
> print l[i]
> }'
-| line 3
-| line 2
- Unfortunately, the very first line of input data did not come out in
+ Unfortunately, the very first line of input data did not appear in
the output!
Upon first glance, we would think that this program should have
@@ -10766,7 +11339,7 @@ following version of the program works correctly:
{ l[lines++] = $0 }
END {
- for (i = lines - 1; i >= 0; --i)
+ for (i = lines - 1; i >= 0; i--)
print l[i]
}
@@ -10827,10 +11400,11 @@ multidimensional array, use the same operator (`in') that is used for
single dimensional arrays. Write the whole sequence of indices in
parentheses, separated by commas, as the left operand:
- (SUBSCRIPT1, SUBSCRIPT2, ...) in ARRAY
+ if ((SUBSCRIPT1, SUBSCRIPT2, ...) in ARRAY)
+ ...
- The following example treats its input as a two-dimensional array of
-fields; it rotates this array 90 degrees clockwise and prints the
+ Here is an example that treats its input as a two-dimensional array
+of fields; it rotates this array 90 degrees clockwise and prints the
result. It assumes that all lines have the same number of elements:
{
@@ -10906,7 +11480,7 @@ The result is to set `separate[1]' to `"1"' and `separate[2]' to
recovered.

-File: gawk.info, Node: Arrays of Arrays, Prev: Multidimensional, Up: Arrays
+File: gawk.info, Node: Arrays of Arrays, Next: Arrays Summary, Prev: Multidimensional, Up: Arrays
8.6 Arrays of Arrays
====================
@@ -11028,6 +11602,54 @@ by creating an arbitrary index:
-| a

+File: gawk.info, Node: Arrays Summary, Prev: Arrays of Arrays, Up: Arrays
+
+8.7 Summary
+===========
+
+ * Standard `awk' provides one-dimensional associative arrays (arrays
+ indexed by string values). All arrays are associative; numeric
+ indices are converted automatically to strings.
+
+ * Array elements are referenced as `ARRAY[INDX]'. Referencing an
+ element creates it if it did not exist previously.
+
+ * The proper way to see if an array has an element with a given index
+ is to use the `in' operator: `INDX in ARRAY'.
+
+ * Use `for (INDX in ARRAY) ...' to scan through all the individual
+ elements of an array. In the body of the loop, INDX takes on the
+ value of each element's index in turn.
+
+ * The order in which a `for (INDX in ARRAY)' loop traverses an array
+ is undefined in POSIX `awk' and varies among implementations.
+ `gawk' lets you control the order by assigning special predefined
+ values to `PROCINFO["sorted_in"]'.
+
+ * Use `delete ARRAY[INDX]' to delete an individual element. You may
+ also use `delete ARRAY' to delete all of the elements in the
+ array. This latter feature has been a common extension for many
+ years and is now standard, but may not be supported by all
+ commercial versions of `awk'.
+
+ * Standard `awk' simulates multidimensional arrays by separating
+ subscript values with a comma. The values are concatenated into a
+ single string, separated by the value of `SUBSEP'. The fact that
+ such a subscript was created in this way is not retained; thus
+ changing `SUBSEP' may have unexpected consequences. You can use
+ `(SUB1, SUB2, ...) in ARRAY' to see if such a multidimensional
+ subscript exists in ARRAY.
+
+ * `gawk' provides true arrays of arrays. You use a separate set of
+ square brackets for each dimension in such an array:
+ `data[row][col]', for example. Array elements may thus be either
+ scalar values (number or string) or another array.
+
+ * Use the `isarray()' built-in function to determine if an array
+ element is itself a subarray.
+
+
+
File: gawk.info, Node: Functions, Next: Library Functions, Prev: Arrays, Up: Top
9 Functions
@@ -11047,6 +11669,7 @@ major node describes these "user-defined" functions.
* Built-in:: Summarizes the built-in functions.
* User-defined:: Describes User-defined functions in detail.
* Indirect Calls:: Choosing the function to call at runtime.
+* Functions Summary:: Summary of functions.

File: gawk.info, Node: Built-in, Next: User-defined, Up: Functions
@@ -11134,6 +11757,21 @@ brackets ([ ]):
`cos(X)'
Return the cosine of X, with X in radians.
+`div(NUMERATOR, DENOMINATOR, RESULT)'
+ Perform integer division, similar to the standard C function of the
+ same name. First, truncate `numerator' and `denominator' towards
+ zero, creating integer values. Clear the `result' array, and then
+ set `result["quotient"]' to the result of `numerator /
+ denominator', truncated towards zero to an integer, and set
+ `result["remainder"]' to the result of `numerator % denominator',
+ truncated towards zero to an integer. This function is primarily
+ intended for use with arbitrary length integers; it avoids
+ creating MPFR arbitrary precision floating-point values (*note
+ Arbitrary Precision Integers::).
+
+ This function is a `gawk' extension. It is not available in
+ compatibility mode (*note Options::).
+
`exp(X)'
Return the exponential of X (`e ^ X') or report an error if X is
out of range. The range of values X can have depends on your
@@ -11148,7 +11786,8 @@ brackets ([ ]):
`log(X)'
Return the natural logarithm of X, if X is positive; otherwise,
- report an error.
+ return `NaN' ("not a number") on IEEE 754 systems. Additionally,
+ `gawk' prints a warning message when `x' is negative.
`rand()'
Return a random number. The values of `rand()' are uniformly
@@ -11198,7 +11837,7 @@ brackets ([ ]):
Return the positive square root of X. `gawk' prints a warning
message if X is negative. Thus, `sqrt(4)' is 2.
-`srand([X])'
+`srand('[X]`)'
Set the starting point, or seed, for generating random numbers to
the value X.
@@ -11219,6 +11858,9 @@ brackets ([ ]):
easy to keep track of the seeds in case you need to consistently
reproduce sequences of random numbers.
+ POSIX does not specify the initial seed; it differs among `awk'
+ implementations.
+
---------- Footnotes ----------
(1) The C version of `rand()' on many Unix systems is known to
@@ -11252,12 +11894,22 @@ returns the number of characters in a string, and not the number of
bytes used to represent those characters. Similarly, `index()' works
with character indices, and not byte indices.
+ CAUTION: A number of functions deal with indices into strings.
+ For these functions, the first character of a string is at
+ position (index) one. This is different from C and the languages
+ descended from it, where the first character is at position zero.
+ You need to remember this when doing index calculations,
+ particularly if you are used to C.
+
In the following list, optional parameters are enclosed in square
brackets ([ ]). Several functions perform string substitution; the
full discussion is provided in the description of the `sub()' function,
which comes towards the end since the list is presented in alphabetic
-order. Those functions that are specific to `gawk' are marked with a
-pound sign (`#'):
+order.
+
+ Those functions that are specific to `gawk' are marked with a pound
+sign (`#'). They are not available in compatibility mode (*note
+Options::):
* Menu:
@@ -11265,8 +11917,8 @@ pound sign (`#'):
`&' with `sub()', `gsub()', and
`gensub()'.
-`asort(SOURCE [, DEST [, HOW ] ]) #'
-`asorti(SOURCE [, DEST [, HOW ] ]) #'
+`asort('SOURCE [`,' DEST [`,' HOW ] ]`) #'
+`asorti('SOURCE [`,' DEST [`,' HOW ] ]`) #'
These two functions are similar in behavior, so they are described
together.
@@ -11313,10 +11965,7 @@ pound sign (`#'):
a[2] = "last"
a[3] = "middle"
- `asort()' and `asorti()' are `gawk' extensions; they are not
- available in compatibility mode (*note Options::).
-
-`gensub(REGEXP, REPLACEMENT, HOW [, TARGET]) #'
+`gensub(REGEXP, REPLACEMENT, HOW' [`, TARGET']`) #'
Search the target string TARGET for matches of the regular
expression REGEXP. If HOW is a string beginning with `g' or `G'
(short for "global"), then replace all matches of REGEXP with
@@ -11366,10 +12015,7 @@ pound sign (`#'):
If REGEXP does not match TARGET, `gensub()''s return value is the
original unchanged value of TARGET.
- `gensub()' is a `gawk' extension; it is not available in
- compatibility mode (*note Options::).
-
-`gsub(REGEXP, REPLACEMENT [, TARGET])'
+`gsub(REGEXP, REPLACEMENT' [`, TARGET']`)'
Search TARGET for _all_ of the longest, leftmost, _nonoverlapping_
matching substrings it can find and replace them with REPLACEMENT.
The `g' in `gsub()' stands for "global," which means replace
@@ -11393,12 +12039,11 @@ pound sign (`#'):
$ awk 'BEGIN { print index("peanut", "an") }'
-| 3
- If FIND is not found, `index()' returns zero. (Remember that
- string indices in `awk' start at one.)
+ If FIND is not found, `index()' returns zero.
It is a fatal error to use a regexp constant for FIND.
-`length([STRING])'
+`length('[STRING]`)'
Return the number of characters in STRING. If STRING is a number,
the length of the digit string representing that number is
returned. For example, `length("abcde")' is five. By contrast,
@@ -11438,14 +12083,14 @@ pound sign (`#'):
array argument is not portable. If `--posix' is supplied, using
an array argument is a fatal error (*note Arrays::).
-`match(STRING, REGEXP [, ARRAY])'
+`match(STRING, REGEXP' [`, ARRAY']`)'
Search STRING for the longest, leftmost substring matched by the
- regular expression, REGEXP and return the character position, or
- "index", at which that substring begins (one, if it starts at the
+ regular expression, REGEXP and return the character position
+ (index) at which that substring begins (one, if it starts at the
beginning of STRING). If no match is found, return zero.
- The REGEXP argument may be either a regexp constant (`/.../') or a
- string constant (`"..."'). In the latter case, the string is
+ The REGEXP argument may be either a regexp constant (`/'...`/') or
+ a string constant (`"'...`"'). In the latter case, the string is
treated as a regexp to be matched. *Note Computed Regexps::, for a
discussion of the difference between the two forms, and the
implications for writing your program correctly.
@@ -11525,7 +12170,7 @@ pound sign (`#'):
compatibility mode (*note Options::), using a third argument is a
fatal error.
-`patsplit(STRING, ARRAY [, FIELDPAT [, SEPS ] ]) #'
+`patsplit(STRING, ARRAY' [`, FIELDPAT' [`, SEPS' ] ]`) #'
Divide STRING into pieces defined by FIELDPAT and store the pieces
in ARRAY and the separator strings in the SEPS array. The first
piece is stored in `ARRAY[1]', the second piece in `ARRAY[2]', and
@@ -11544,10 +12189,7 @@ pound sign (`#'):
Before splitting the string, `patsplit()' deletes any previously
existing elements in the arrays ARRAY and SEPS.
- The `patsplit()' function is a `gawk' extension. In compatibility
- mode (*note Options::), it is not available.
-
-`split(STRING, ARRAY [, FIELDSEP [, SEPS ] ])'
+`split(STRING, ARRAY' [`, FIELDSEP' [`, SEPS' ] ]`)'
Divide STRING into pieces separated by FIELDSEP and store the
pieces in ARRAY and the separator strings in the SEPS array. The
first piece is stored in `ARRAY[1]', the second piece in
@@ -11612,6 +12254,9 @@ pound sign (`#'):
has one element only. The value of that element is the original
STRING.
+ In POSIX mode (*note Options::), the fourth argument is not
+ allowed.
+
`sprintf(FORMAT, EXPRESSION1, ...)'
Return (without printing) the string that `printf' would have
printed out with the same arguments (*note Printf::). For example:
@@ -11637,18 +12282,15 @@ pound sign (`#'):
Note also that `strtonum()' uses the current locale's decimal point
for recognizing numbers (*note Locales::).
- `strtonum()' is a `gawk' extension; it is not available in
- compatibility mode (*note Options::).
-
-`sub(REGEXP, REPLACEMENT [, TARGET])'
+`sub(REGEXP, REPLACEMENT' [`, TARGET']`)'
Search TARGET, which is treated as a string, for the leftmost,
longest substring matched by the regular expression REGEXP.
Modify the entire string by replacing the matched text with
REPLACEMENT. The modified string becomes the new value of TARGET.
Return the number of substitutions made (zero or one).
- The REGEXP argument may be either a regexp constant (`/.../') or a
- string constant (`"..."'). In the latter case, the string is
+ The REGEXP argument may be either a regexp constant (`/'...`/') or
+ a string constant (`"'...`"'). In the latter case, the string is
treated as a regexp to be matched. *Note Computed Regexps::, for a
discussion of the difference between the two forms, and the
implications for writing your program correctly.
@@ -11713,7 +12355,7 @@ pound sign (`#'):
into a string, and then the value of that string is treated as the
regexp to match.
-`substr(STRING, START [, LENGTH])'
+`substr(STRING, START' [`, LENGTH' ]`)'
Return a LENGTH-character-long substring of STRING, starting at
character number START. The first character of a string is
character number one.(3) For example, `substr("washington", 5, 3)'
@@ -11726,11 +12368,11 @@ pound sign (`#'):
remaining in the string, counting from character START.
If START is less than one, `substr()' treats it as if it was one.
- (POSIX doesn't specify what to do in this case: Brian Kernighan's
- `awk' acts this way, and therefore `gawk' does too.) If START is
- greater than the number of characters in the string, `substr()'
- returns the null string. Similarly, if LENGTH is present but less
- than or equal to zero, the null string is returned.
+ (POSIX doesn't specify what to do in this case: BWK `awk' acts
+ this way, and therefore `gawk' does too.) If START is greater
+ than the number of characters in the string, `substr()' returns
+ the null string. Similarly, if LENGTH is present but less than or
+ equal to zero, the null string is returned.
The string returned by `substr()' _cannot_ be assigned. Thus, it
is a mistake to attempt to change a portion of a string, as shown
@@ -11786,23 +12428,27 @@ File: gawk.info, Node: Gory Details, Up: String Functions
9.1.3.1 More About `\' and `&' with `sub()', `gsub()', and `gensub()'
.....................................................................
-When using `sub()', `gsub()', or `gensub()', and trying to get literal
-backslashes and ampersands into the replacement text, you need to
-remember that there are several levels of "escape processing" going on.
+ CAUTION: This section has been known to cause headaches. You
+ might want to skip it upon first reading.
+
+ When using `sub()', `gsub()', or `gensub()', and trying to get
+literal backslashes and ampersands into the replacement text, you need
+to remember that there are several levels of "escape processing" going
+on.
First, there is the "lexical" level, which is when `awk' reads your
-program and builds an internal copy of it that can be executed. Then
-there is the runtime level, which is when `awk' actually scans the
-replacement string to determine what to generate.
+program and builds an internal copy of it to execute. Then there is
+the runtime level, which is when `awk' actually scans the replacement
+string to determine what to generate.
At both levels, `awk' looks for a defined set of characters that can
come after a backslash. At the lexical level, it looks for the escape
sequences listed in *note Escape Sequences::. Thus, for every `\' that
`awk' processes at the runtime level, you must type two backslashes at
the lexical level. When a character that is not valid for an escape
-sequence follows the `\', Brian Kernighan's `awk' and `gawk' both
-simply remove the initial `\' and put the next character into the
-string. Thus, for example, `"a\qb"' is treated as `"aqb"'.
+sequence follows the `\', BWK `awk' and `gawk' both simply remove the
+initial `\' and put the next character into the string. Thus, for
+example, `"a\qb"' is treated as `"aqb"'.
At the runtime level, the various functions handle sequences of `\'
and `&' differently. The situation is (sadly) somewhat complex.
@@ -11814,13 +12460,13 @@ is illustrated in *note table-sub-escapes::.
You type `sub()' sees `sub()' generates
------- --------- --------------
- `\&' `&' the matched text
- `\\&' `\&' a literal `&'
- `\\\&' `\&' a literal `&'
- `\\\\&' `\\&' a literal `\&'
- `\\\\\&' `\\&' a literal `\&'
- `\\\\\\&' `\\\&' a literal `\\&'
- `\\q' `\q' a literal `\q'
+ `\&' `&' The matched text
+ `\\&' `\&' A literal `&'
+ `\\\&' `\&' A literal `&'
+ `\\\\&' `\\&' A literal `\&'
+ `\\\\\&' `\\&' A literal `\&'
+ `\\\\\\&' `\\\&' A literal `\\&'
+ `\\q' `\q' A literal `\q'
Table 9.1: Historical Escape Sequence Processing for `sub()' and
`gsub()'
@@ -11834,50 +12480,25 @@ backslashes entered at the lexical level.)
The problem with the historical approach is that there is no way to
get a literal `\' followed by the matched text.
- The 1992 POSIX standard attempted to fix this problem. That standard
-says that `sub()' and `gsub()' look for either a `\' or an `&' after
-the `\'. If either one follows a `\', that character is output
-literally. The interpretation of `\' and `&' then becomes as shown in
-*note table-sub-posix-92::.
-
- You type `sub()' sees `sub()' generates
- ------- --------- --------------
- `&' `&' the matched text
- `\\&' `\&' a literal `&'
- `\\\\&' `\\&' a literal `\', then the matched text
- `\\\\\\&' `\\\&' a literal `\&'
+ Several editions of the POSIX standard attempted to fix this problem
+but weren't successful. The details are irrelevant at this point in
+time.
-Table 9.2: 1992 POSIX Rules for `sub()' and `gsub()' Escape Sequence
-Processing
-
-This appears to solve the problem. Unfortunately, the phrasing of the
-standard is unusual. It says, in effect, that `\' turns off the special
-meaning of any following character, but for anything other than `\' and
-`&', such special meaning is undefined. This wording leads to two
-problems:
-
- * Backslashes must now be doubled in the REPLACEMENT string, breaking
- historical `awk' programs.
-
- * To make sure that an `awk' program is portable, _every_ character
- in the REPLACEMENT string must be preceded with a backslash.(1)
-
- Because of the problems just listed, in 1996, the `gawk' maintainer
-submitted proposed text for a revised standard that reverts to rules
-that correspond more closely to the original existing practice. The
-proposed rules have special cases that make it possible to produce a
-`\' preceding the matched text. This is shown in *note
-table-sub-proposed::.
+ At one point, the `gawk' maintainer submitted proposed text for a
+revised standard that reverts to rules that correspond more closely to
+the original existing practice. The proposed rules have special cases
+that make it possible to produce a `\' preceding the matched text.
+This is shown in *note table-sub-proposed::.
You type `sub()' sees `sub()' generates
------- --------- --------------
- `\\\\\\&' `\\\&' a literal `\&'
- `\\\\&' `\\&' a literal `\', followed by the matched text
- `\\&' `\&' a literal `&'
- `\\q' `\q' a literal `\q'
+ `\\\\\\&' `\\\&' A literal `\&'
+ `\\\\&' `\\&' A literal `\', followed by the matched text
+ `\\&' `\&' A literal `&'
+ `\\q' `\q' A literal `\q'
`\\\\' `\\' `\\'
-Table 9.3: Proposed Rules For `sub()' And Backslash
+Table 9.2: GNU `awk' Rules For `sub()' And Backslash
In a nutshell, at the runtime level, there are now three special
sequences of characters (`\\\&', `\\&' and `\&') whereas historically
@@ -11885,11 +12506,11 @@ there was only one. However, as in the historical case, any `\' that
is not part of one of these three sequences is not special and appears
in the output literally.
- `gawk' 3.0 and 3.1 follow these proposed POSIX rules for `sub()' and
-`gsub()'. The POSIX standard took much longer to be revised than was
-expected in 1996. The 2001 standard does not follow the above rules.
-Instead, the rules there are somewhat simpler. The results are similar
-except for one case.
+ `gawk' 3.0 and 3.1 follow these rules for `sub()' and `gsub()'. The
+POSIX standard took much longer to be revised than was expected. In
+addition, the `gawk' maintainer's proposal was lost during the
+standardization process. The final rules are somewhat simpler. The
+results are similar except for one case.
The POSIX rules state that `\&' in the replacement string produces a
literal `&', `\\' produces a literal `\', and `\' followed by anything
@@ -11898,25 +12519,25 @@ rules are presented in *note table-posix-sub::.
You type `sub()' sees `sub()' generates
------- --------- --------------
- `\\\\\\&' `\\\&' a literal `\&'
- `\\\\&' `\\&' a literal `\', followed by the matched text
- `\\&' `\&' a literal `&'
- `\\q' `\q' a literal `\q'
+ `\\\\\\&' `\\\&' A literal `\&'
+ `\\\\&' `\\&' A literal `\', followed by the matched text
+ `\\&' `\&' A literal `&'
+ `\\q' `\q' A literal `\q'
`\\\\' `\\' `\'
-Table 9.4: POSIX Rules For `sub()' And `gsub()'
+Table 9.3: POSIX Rules For `sub()' And `gsub()'
The only case where the difference is noticeable is the last one:
`\\\\' is seen as `\\' and produces `\' instead of `\\'.
Starting with version 3.1.4, `gawk' followed the POSIX rules when
`--posix' is specified (*note Options::). Otherwise, it continued to
-follow the 1996 proposed rules, since that had been its behavior for
-many years.
+follow the proposed rules, since that had been its behavior for many
+years.
When version 4.0.0 was released, the `gawk' maintainer made the
POSIX rules the default, breaking well over a decade's worth of
-backwards compatibility.(2) Needless to say, this was a bad idea, and
+backwards compatibility.(1) Needless to say, this was a bad idea, and
as of version 4.0.1, `gawk' resumed its historical behavior, and only
follows the POSIX rules when `--posix' is given.
@@ -11929,14 +12550,14 @@ the `\' does not, as shown in *note table-gensub-escapes::.
You type `gensub()' sees `gensub()' generates
------- ------------ -----------------
- `&' `&' the matched text
- `\\&' `\&' a literal `&'
- `\\\\' `\\' a literal `\'
- `\\\\&' `\\&' a literal `\', then the matched text
- `\\\\\\&' `\\\&' a literal `\&'
- `\\q' `\q' a literal `q'
+ `&' `&' The matched text
+ `\\&' `\&' A literal `&'
+ `\\\\' `\\' A literal `\'
+ `\\\\&' `\\&' A literal `\', then the matched text
+ `\\\\\\&' `\\\&' A literal `\&'
+ `\\q' `\q' A literal `q'
-Table 9.5: Escape Sequence Processing For `gensub()'
+Table 9.4: Escape Sequence Processing For `gensub()'
Because of the complexity of the lexical and runtime level processing
and the special cases for `sub()' and `gsub()', we recommend the use of
@@ -11955,9 +12576,7 @@ Although this makes a certain amount of sense, it can be surprising.
---------- Footnotes ----------
- (1) This consequence was certainly unintended.
-
- (2) This was rather naive of him, despite there being a note in this
+ (1) This was rather naive of him, despite there being a note in this
section indicating that the next major version would move to the POSIX
rules.
@@ -11970,7 +12589,7 @@ File: gawk.info, Node: I/O Functions, Next: Time Functions, Prev: String Func
The following functions relate to input/output (I/O). Optional
parameters are enclosed in square brackets ([ ]):
-`close(FILENAME [, HOW])'
+`close('FILENAME [`,' HOW]`)'
Close the file FILENAME for input or output. Alternatively, the
argument may be a shell command that was used for creating a
coprocess, or for redirecting to or from a pipe; then the
@@ -11985,7 +12604,10 @@ parameters are enclosed in square brackets ([ ]):
not matter. *Note Two-way I/O::, which discusses this feature in
more detail and gives an example.
-`fflush([FILENAME])'
+ Note that the second argument to `close()' is a `gawk' extension;
+ it is not available in compatibility mode (*note Options::).
+
+`fflush('[FILENAME]`)'
Flush any buffered output associated with FILENAME, which is
either a file opened for writing or a shell command for
redirecting output to a pipe or coprocess.
@@ -12001,10 +12623,10 @@ parameters are enclosed in square brackets ([ ]):
function--`gawk' also buffers its output and the `fflush()'
function forces `gawk' to flush its buffers.
- `fflush()' was added to Brian Kernighan's version of `awk' in
- April of 1992. For two decades, it was not part of the POSIX
- standard. As of December, 2012, it was accepted for inclusion
- into the POSIX standard. See the Austin Group website
+ `fflush()' was added to BWK `awk' in April of 1992. For two
+ decades, it was not part of the POSIX standard. As of December,
+ 2012, it was accepted for inclusion into the POSIX standard. See
+ the Austin Group website
(http://austingroupbugs.net/view.php?id=634).
POSIX standardizes `fflush()' as follows: If there is no argument,
@@ -12022,7 +12644,7 @@ parameters are enclosed in square brackets ([ ]):
to flush only the standard output.
`fflush()' returns zero if the buffer is successfully flushed;
- otherwise, it returns non-zero (`gawk' returns -1). In the case
+ otherwise, it returns non-zero. (`gawk' returns -1.) In the case
where all buffers are flushed, the return value is zero only if
all buffers were flushed successfully. Otherwise, it is -1, and
`gawk' warns about the problem FILENAME.
@@ -12192,7 +12814,7 @@ enclosed in square brackets ([ ]):
If DATESPEC does not contain enough elements or if the resulting
time is out of range, `mktime()' returns -1.
-`strftime([FORMAT [, TIMESTAMP [, UTC-FLAG]]])'
+`strftime(' [FORMAT [`,' TIMESTAMP [`,' UTC-FLAG] ] ]`)'
Format the time specified by TIMESTAMP based on the contents of
the FORMAT string and return the result. It is similar to the
function of the same name in ISO C. If UTC-FLAG is present and is
@@ -12272,11 +12894,11 @@ the following date format specifications:
`%g'
The year modulo 100 of the ISO 8601 week number, as a decimal
- number (00-99). For example, January 1, 1993 is in week 53 of
- 1992. Thus, the year of its ISO 8601 week number is 1992, even
- though its year is 1993. Similarly, December 31, 1973 is in week
- 1 of 1974. Thus, the year of its ISO week number is 1974, even
- though its year is 1973.
+ number (00-99). For example, January 1, 2012 is in week 53 of
+ 2011. Thus, the year of its ISO 8601 week number is 2011, even
+ though its year is 2012. Similarly, December 31, 2012 is in week
+ 1 of 2013. Thus, the year of its ISO week number is 2013, even
+ though its year is 2012.
`%G'
The full year of the ISO week number, as a decimal number.
@@ -12356,7 +12978,7 @@ the following date format specifications:
The year modulo 100 as a decimal number (00-99).
`%Y'
- The full year as a decimal number (e.g., 2011).
+ The full year as a decimal number (e.g., 2015).
`%z'
The timezone offset in a +HHMM format (e.g., the format necessary
@@ -12378,15 +13000,6 @@ the following date format specifications:
If a conversion specifier is not one of the above, the behavior is
undefined.(6)
- Informally, a "locale" is the geographic place in which a program is
-meant to run. For example, a common way to abbreviate the date
-September 4, 2012 in the United States is "9/4/12." In many countries
-in Europe, however, it is abbreviated "4.9.12." Thus, the `%x'
-specification in a `"US"' locale might produce `9/4/12', while in a
-`"EUROPE"' locale, it might produce `4.9.12'. The ISO C standard
-defines a default `"C"' locale, which is an environment that is typical
-of what many C programmers are used to.
-
For systems that are not yet fully standards-compliant, `gawk'
supplies a copy of `strftime()' from the GNU C Library. It supports
all of the just-listed format specifications. If that version is used
@@ -12416,7 +13029,7 @@ to the standard output and interprets the current time according to the
format specifiers in the string. For example:
$ date '+Today is %A, %B %d, %Y.'
- -| Today is Wednesday, March 30, 2011.
+ -| Today is Monday, May 05, 2014.
Here is the `gawk' version of the `date' utility. It has a shell
"wrapper" to handle the `-u' option, which requires that `date' run as
@@ -12433,7 +13046,7 @@ if the time zone is set to UTC:
esac
gawk 'BEGIN {
- format = "%a %b %e %H:%M:%S %Z %Y"
+ format = PROCINFO["strftime"]
exitval = 0
if (ARGC > 2)
@@ -12494,7 +13107,7 @@ table-bitwise-ops::.
0 | 0 0 | 0 1 | 0 1
1 | 0 1 | 1 1 | 1 0
-Table 9.6: Bitwise Operations
+Table 9.5: Bitwise Operations
As you can see, the result of an AND operation is 1 only when _both_
bits are 1. The result of an OR operation is 1 if _either_ bit is 1.
@@ -12510,23 +13123,23 @@ again with `10111001' and shift it left by three bits, you end up with
`11001000'. `gawk' provides built-in functions that implement the
bitwise operations just described. They are:
-`and(V1, V2 [, ...])'
+``and(V1, V2' [`,' ...]`)''
Return the bitwise AND of the arguments. There must be at least
two.
-`compl(VAL)'
+``compl(VAL)''
Return the bitwise complement of VAL.
-`lshift(VAL, COUNT)'
+``lshift(VAL, COUNT)''
Return the value of VAL, shifted left by COUNT bits.
-`or(V1, V2 [, ...])'
+``or(V1, V2' [`,' ...]`)''
Return the bitwise OR of the arguments. There must be at least two.
-`rshift(VAL, COUNT)'
+``rshift(VAL, COUNT)''
Return the value of VAL, shifted right by COUNT bits.
-`xor(V1, V2 [, ...])'
+``xor(V1, V2' [`,' ...]`)''
Return the bitwise XOR of the arguments. There must be at least
two.
@@ -12611,8 +13224,8 @@ File: gawk.info, Node: Type Functions, Next: I18N Functions, Prev: Bitwise Fu
`gawk' provides a single function that lets you distinguish an array
from a scalar variable. This is necessary for writing code that
-traverses every element of a true multidimensional array (*note Arrays
-of Arrays::).
+traverses every element of an array of arrays. (*note Arrays of
+Arrays::).
`isarray(X)'
Return a true value if X is an array. Otherwise return false.
@@ -12642,7 +13255,7 @@ descriptions here are purposely brief. *Note Internationalization::,
for the full story. Optional parameters are enclosed in square
brackets ([ ]):
-`bindtextdomain(DIRECTORY [, DOMAIN])'
+`bindtextdomain(DIRECTORY' [`,' DOMAIN]`)'
Set the directory in which `gawk' will look for message
translation files, in case they will not or cannot be placed in
the "standard" locations (e.g., during testing). It returns the
@@ -12652,13 +13265,13 @@ brackets ([ ]):
the null string (`""'), then `bindtextdomain()' returns the
current binding for the given DOMAIN.
-`dcgettext(STRING [, DOMAIN [, CATEGORY]])'
+`dcgettext(STRING' [`,' DOMAIN [`,' CATEGORY] ]`)'
Return the translation of STRING in text domain DOMAIN for locale
category CATEGORY. The default value for DOMAIN is the current
value of `TEXTDOMAIN'. The default value for CATEGORY is
`"LC_MESSAGES"'.
-`dcngettext(STRING1, STRING2, NUMBER [, DOMAIN [, CATEGORY]])'
+`dcngettext(STRING1, STRING2, NUMBER' [`,' DOMAIN [`,' CATEGORY] ]`)'
Return the plural form used for NUMBER of the translation of
STRING1 and STRING2 in text domain DOMAIN for locale category
CATEGORY. STRING1 is the English singular variant of a message,
@@ -12692,7 +13305,10 @@ File: gawk.info, Node: Definition Syntax, Next: Function Example, Up: User-de
9.2.1 Function Definition Syntax
--------------------------------
-Definitions of functions can appear anywhere between the rules of an
+ It's entirely fair to say that the `awk' syntax for local variable
+ definitions is appallingly awful. -- Brian Kernighan
+
+ Definitions of functions can appear anywhere between the rules of an
`awk' program. Thus, the general form of an `awk' program is extended
to include sequences of rules _and_ user-defined function definitions.
There is no need to put the definition of a function before all uses of
@@ -12701,28 +13317,35 @@ starting to execute any of it.
The definition of a function named NAME looks like this:
- function NAME([PARAMETER-LIST])
- {
+ `function' NAME`('[PARAMETER-LIST]`)'
+ `{'
BODY-OF-FUNCTION
- }
+ `}'
Here, NAME is the name of the function to define. A valid function
name is like a valid variable name: a sequence of letters, digits, and
-underscores that doesn't start with a digit. Within a single `awk'
-program, any particular name can only be used as a variable, array, or
-function.
+underscores that doesn't start with a digit. Here too, only the 52
+upper- and lowercase English letters may be used in a function name.
+Within a single `awk' program, any particular name can only be used as
+a variable, array, or function.
PARAMETER-LIST is an optional list of the function's arguments and
local variable names, separated by commas. When the function is called,
the argument names are used to hold the argument values given in the
-call. The local variables are initialized to the empty string. A
-function cannot have two parameters with the same name, nor may it have
-a parameter with the same name as the function itself.
+call.
+
+ A function cannot have two parameters with the same name, nor may it
+have a parameter with the same name as the function itself. In
+addition, according to the POSIX standard, function parameters cannot
+have the same name as one of the special built-in variables (*note
+Built-in Variables::). Not all versions of `awk' enforce this
+restriction.
- In addition, according to the POSIX standard, function parameters
-cannot have the same name as one of the special built-in variables
-(*note Built-in Variables::. Not all versions of `awk' enforce this
-restriction.)
+ Local variables act like the empty string if referenced where a
+string value is required, and like zero if referenced where a numeric
+value is required. This is the same as regular variables that have
+never been assigned a value. (There is more to understand about local
+variables; *note Dynamic Typing::.)
The BODY-OF-FUNCTION consists of `awk' statements. It is the most
important part of the definition, because it says what the function
@@ -12823,7 +13446,8 @@ this program, using our function to format the results, prints:
5.6
21.2
- This function deletes all the elements in an array:
+ This function deletes all the elements in an array (recall that the
+extra whitespace signifies the start of the local variable list):
function delarray(a, i)
{
@@ -12842,22 +13466,22 @@ standard.)
The following is an example of a recursive function. It takes a
string as an input parameter and returns the string in backwards order.
Recursive functions must always have a test that stops the recursion.
-In this case, the recursion terminates when the starting position is
-zero, i.e., when there are no more characters left in the string.
+In this case, the recursion terminates when the input string is already
+empty.
- function rev(str, start)
+ function rev(str)
{
- if (start == 0)
+ if (str == "")
return ""
- return (substr(str, start, 1) rev(str, start - 1))
+ return (rev(substr(str, 2)) substr(str, 1, 1))
}
If this function is in a file named `rev.awk', it can be tested this
way:
$ echo "Don't Panic!" |
- > gawk --source '{ print rev($0, length($0)) }' -f rev.awk
+ > gawk -e '{ print rev($0) }' -f rev.awk
-| !cinaP t'noD
The C `ctime()' function takes a timestamp and returns it in a
@@ -12871,7 +13495,7 @@ an `awk' version of `ctime()':
function ctime(ts, format)
{
- format = "%a %b %e %H:%M:%S %Z %Y"
+ format = PROCINFO["strftime"]
if (ts == 0)
ts = systime() # use current time as default
return strftime(format, ts)
@@ -12925,9 +13549,10 @@ File: gawk.info, Node: Variable Scope, Next: Pass By Value/Reference, Prev: C
9.2.3.2 Controlling Variable Scope
..................................
-There is no way to make a variable local to a `{ ... }' block in `awk',
-but you can make a variable local to a function. It is good practice to
-do so whenever a variable is needed only in that function.
+Unlike many languages, there is no way to make a variable local to a
+`{' ... `}' block in `awk', but you can make a variable local to a
+function. It is good practice to do so whenever a variable is needed
+only in that function.
To make a variable local to a function, simply declare the variable
as an argument after the actual function arguments (*note Definition
@@ -13106,8 +13731,8 @@ function _are_ visible outside that function.
a[1], a[2], a[3]
}
- prints `a[1] = 1, a[2] = two, a[3] = 3', because `changeit' stores
- `"two"' in the second element of `a'.
+ prints `a[1] = 1, a[2] = two, a[3] = 3', because `changeit()'
+ stores `"two"' in the second element of `a'.
Some `awk' implementations allow you to call a function that has not
been defined. They only report a problem at runtime when the program
@@ -13146,11 +13771,11 @@ control to the calling part of the `awk' program. It can also be used
to return a value for use in the rest of the `awk' program. It looks
like this:
- return [EXPRESSION]
+ `return' [EXPRESSION]
The EXPRESSION part is optional. Due most likely to an oversight,
POSIX does not define what the return value is if you omit the
-EXPRESSION. Technically speaking, this make the returned value
+EXPRESSION. Technically speaking, this makes the returned value
undefined, and therefore, unpredictable. In practice, though, all
versions of `awk' simply return the null string, which acts like zero
if used in a numeric context.
@@ -13244,19 +13869,19 @@ Here is an annotated sample program:
}
In this example, the first call to `foo()' generates a fatal error,
-so `gawk' will not report the second error. If you comment out that
-call, though, then `gawk' will report the second error.
+so `awk' will not report the second error. If you comment out that
+call, though, then `awk' does report the second error.
Usually, such things aren't a big issue, but it's worth being aware
of them.

-File: gawk.info, Node: Indirect Calls, Prev: User-defined, Up: Functions
+File: gawk.info, Node: Indirect Calls, Next: Functions Summary, Prev: User-defined, Up: Functions
9.3 Indirect Function Calls
===========================
-This section describes a `gawk'-specific extension.
+This section describes an advanced, `gawk'-specific extension.
Often, you may wish to defer the choice of function to call until
runtime. For example, you may have different kinds of records, each of
@@ -13295,7 +13920,7 @@ your test scores:
This style of programming works, but can be awkward. With "indirect"
function calls, you tell `gawk' to use the _value_ of a variable as the
-name of the function to call.
+_name_ of the function to call.
The syntax is similar to that of a regular function call: an
identifier immediately followed by a left parenthesis, any arguments,
@@ -13337,7 +13962,6 @@ using indirect function calls.
Otherwise they perform the expected computations and are not unusual.
# For each record, print the class name and the requested statistics
-
{
class_name = $1
gsub(/_/, " ", class_name) # Replace _ with spaces
@@ -13524,11 +14148,11 @@ names of the two comparison functions:
Remember that you must supply a leading `@' in front of an indirect
function call.
- Unfortunately, indirect function calls cannot be used with the
-built-in functions. However, you can generally write "wrapper"
-functions which call the built-in ones, and those can be called
-indirectly. (Other than, perhaps, the mathematical functions, there is
-not a lot of reason to try to call the built-in functions indirectly.)
+ Starting with version 4.1.2 of `gawk', indirect function calls may
+also be used with built-in functions and with extension functions
+(*note Dynamic Extensions::). The only thing you cannot do is pass a
+regular expression constant to a built-in function through an indirect
+function call.(1)
`gawk' does its best to make indirect function calls efficient. For
example, in the following case:
@@ -13536,7 +14160,70 @@ example, in the following case:
for (i = 1; i <= n; i++)
@the_func()
-`gawk' will look up the actual function to call only once.
+`gawk' looks up the actual function to call only once.
+
+ ---------- Footnotes ----------
+
+ (1) This may change in a future version; recheck the documentation
+that comes with your version of `gawk' to see if it has.
+
+
+File: gawk.info, Node: Functions Summary, Prev: Indirect Calls, Up: Functions
+
+9.4 Summary
+===========
+
+ * `awk' provides built-in functions and lets you define your own
+ functions.
+
+ * POSIX `awk' provides three kinds of built-in functions: numeric,
+ string, and I/O. `gawk' provides functions that work with values
+ representing time, do bit manipulation, sort arrays, and
+ internationalize and localize programs. `gawk' also provides
+ several extensions to some of standard functions, typically in the
+ form of additional arguments.
+
+ * Functions accept zero or more arguments and return a value. The
+ expressions that provide the argument values are completely
+ evaluated before the function is called. Order of evaluation is
+ not defined. The return value can be ignored.
+
+ * The handling of backslash in `sub()' and `gsub()' is not simple.
+ It is more straightforward in `gawk''s `gensub()' function, but
+ that function still requires care in its use.
+
+ * User-defined functions provide important capabilities but come with
+ some syntactic inelegancies. In a function call, there cannot be
+ any space between the function name and the opening left
+ parenthesis of the argument list. Also, there is no provision for
+ local variables, so the convention is to add extra parameters, and
+ to separate them visually from the real parameters by extra
+ whitespace.
+
+ * User-defined functions may call other user-defined (and built-in)
+ functions and may call themselves recursively. Function parameters
+ "hide" any global variables of the same names. You cannot use the
+ name of a reserved variable (such as `ARGC') as the name of a
+ parameter in user-defined functions.
+
+ * Scalar values are passed to user-defined functions by value. Array
+ parameters are passed by reference; any changes made by the
+ function to array parameters are thus visible after the function
+ has returned.
+
+ * Use the `return' statement to return from a user-defined function.
+ An optional expression becomes the function's return value. Only
+ scalar values may be returned by a function.
+
+ * If a variable that has never been used is passed to a user-defined
+ function, how that function treats the variable can set its nature:
+ either scalar or array.
+
+ * `gawk' provides indirect function calls using a special syntax.
+ By setting a variable to the name of a function, you can determine
+ at runtime what function will be called at that point in the
+ program. This is equivalent to function pointers in C and C++.
+

File: gawk.info, Node: Library Functions, Next: Sample Programs, Prev: Functions, Up: Top
@@ -13563,7 +14250,7 @@ P.J. Plauger wrote:
In fact, they felt this idea was so important that they placed this
statement on the cover of their book. Because we believe strongly that
their statement is correct, this major node and *note Sample
-Programs::, provide a good-sized body of code for you to read, and we
+Programs::, provide a good-sized body of code for you to read and, we
hope, to learn from.
This major node presents a library of useful `awk' functions. Many
@@ -13614,6 +14301,8 @@ for different implementations of `awk' is pretty straightforward.
* Passwd Functions:: Functions for getting user information.
* Group Functions:: Functions for getting group information.
* Walking Arrays:: A function to walk arrays of arrays.
+* Library Functions Summary:: Summary of library functions.
+* Library Exercises:: Exercises.
---------- Footnotes ----------
@@ -13740,7 +14429,7 @@ versions of `awk':
# mystrtonum --- convert string to number
- function mystrtonum(str, ret, chars, n, i, k, c)
+ function mystrtonum(str, ret, n, i, k, c)
{
if (str ~ /^0[0-7]*$/) {
# octal
@@ -13748,12 +14437,13 @@ versions of `awk':
ret = 0
for (i = 1; i <= n; i++) {
c = substr(str, i, 1)
- if ((k = index("01234567", c)) > 0)
- k-- # adjust for 1-basing in awk
+ # index() returns 0 if c not in string,
+ # includes c == "0"
+ k = index("1234567", c)
ret = ret * 8 + k
}
- } else if (str ~ /^0[xX][[:xdigit:]]+/) {
+ } else if (str ~ /^0[xX][[:xdigit:]]+$/) {
# hexadecimal
str = substr(str, 3) # lop off leading 0x
n = length(str)
@@ -13761,10 +14451,9 @@ versions of `awk':
for (i = 1; i <= n; i++) {
c = substr(str, i, 1)
c = tolower(c)
- if ((k = index("0123456789", c)) > 0)
- k-- # adjust for 1-basing in awk
- else if ((k = index("abcdef", c)) > 0)
- k += 9
+ # index() returns 0 if c not in string,
+ # includes c == "0"
+ k = index("123456789abcdef", c)
ret = ret * 16 + k
}
@@ -14044,8 +14733,7 @@ worrying about:
}
#### test code ####
- # BEGIN \
- # {
+ # BEGIN {
# for (;;) {
# printf("enter a character: ")
# if (getline var <= 0)
@@ -14164,7 +14852,7 @@ current time formatted in the same way as the `date' utility:
now = systime()
# return date(1)-style output
- ret = strftime("%a %b %e %H:%M:%S %Z %Y", now)
+ ret = strftime(PROCINFO["strftime"], now)
# clear out target array
delete time
@@ -14414,7 +15102,9 @@ presented in *note Filetrans Function::, to either update `ARGIND' on
your own or modify this code as appropriate.
The `rewind()' function also relies on the `nextfile' keyword (*note
-Nextfile Statement::).
+Nextfile Statement::). Because of this, you should not call it from an
+`ENDFILE' rule. (This isn't necessary anyway, since as soon as an
+`ENDFILE' rule finishes `gawk' goes to the next file!)

File: gawk.info, Node: File Checking, Next: Empty Files, Prev: Rewind Function, Up: Data File Management
@@ -14424,8 +15114,8 @@ File: gawk.info, Node: File Checking, Next: Empty Files, Prev: Rewind Functio
Normally, if you give `awk' a data file that isn't readable, it stops
with a fatal error. There are times when you might want to just ignore
-such files and keep going. You can do this by prepending the following
-program to your `awk' program:
+such files and keep going.(1) You can do this by prepending the
+following program to your `awk' program:
# readable.awk --- library file to skip over unreadable files
@@ -14445,10 +15135,16 @@ program to your `awk' program:
element from `ARGV' with `delete' skips the file (since it's no longer
in the list). See also *note ARGC and ARGV::.
+ ---------- Footnotes ----------
+
+ (1) The `BEGINFILE' special pattern (*note BEGINFILE/ENDFILE::)
+provides an alternative mechanism for dealing with files that can't be
+opened. However, the code here provides a portable solution.
+

File: gawk.info, Node: Empty Files, Next: Ignoring Assigns, Prev: File Checking, Up: Data File Management
-10.3.4 Checking For Zero-length Files
+10.3.4 Checking for Zero-length Files
-------------------------------------
All known `awk' implementations silently skip over zero-length files.
@@ -14496,12 +15192,6 @@ normal case.
end of the command-line arguments. Note that the test in the condition
of the `for' loop uses the `<=' operator, not `<'.
- As an exercise, you might consider whether this same problem can be
-solved without relying on `gawk''s `ARGIND' variable.
-
- As a second exercise, revise this code to handle the case where an
-intervening value in `ARGV' is a variable assignment.
-

File: gawk.info, Node: Ignoring Assigns, Prev: Empty Files, Up: Data File Management
@@ -14711,8 +15401,7 @@ not an option, and it ends option processing. Continuing on:
i = index(options, thisopt)
if (i == 0) {
if (Opterr)
- printf("%c -- invalid option\n",
- thisopt) > "/dev/stderr"
+ printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
if (_opti >= length(argv[Optind])) {
Optind++
_opti = 0
@@ -14793,7 +15482,7 @@ is in `ARGV[0]':
# test program
if (_getopt_test) {
while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1)
- printf("c = <%c>, optarg = <%s>\n",
+ printf("c = <%c>, Optarg = <%s>\n",
_go_c, Optarg)
printf("non-option arguments:\n")
for (; Optind < ARGC; Optind++)
@@ -14806,17 +15495,17 @@ is in `ARGV[0]':
result of two sample runs of the test program:
$ awk -f getopt.awk -v _getopt_test=1 -- -a -cbARG bax -x
- -| c = <a>, optarg = <>
- -| c = <c>, optarg = <>
- -| c = <b>, optarg = <ARG>
+ -| c = <a>, Optarg = <>
+ -| c = <c>, Optarg = <>
+ -| c = <b>, Optarg = <ARG>
-| non-option arguments:
-| ARGV[3] = <bax>
-| ARGV[4] = <-x>
$ awk -f getopt.awk -v _getopt_test=1 -- -a -x -- xyz abc
- -| c = <a>, optarg = <>
+ -| c = <a>, Optarg = <>
error--> x -- invalid option
- -| c = <?>, optarg = <>
+ -| c = <?>, Optarg = <>
-| non-option arguments:
-| ARGV[4] = <xyz>
-| ARGV[5] = <abc>
@@ -14875,7 +15564,7 @@ that "cats" the password database:
/*
* pwcat.c
*
- * Generate a printable version of the password database
+ * Generate a printable version of the password database.
*/
#include <stdio.h>
#include <pwd.h>
@@ -15100,7 +15789,7 @@ group database, is as follows:
/*
* grcat.c
*
- * Generate a printable version of the group database
+ * Generate a printable version of the group database.
*/
#include <stdio.h>
#include <grp.h>
@@ -15136,9 +15825,10 @@ Group Password
used; it is usually empty or set to `*'.
Group ID Number
- The group's numeric group ID number; this number must be unique
- within the file. (On some systems it's a C `long', and not an
- `int'. Thus we cast it to `long' for all cases.)
+ The group's numeric group ID number; the association of name to
+ number must be unique within the file. (On some systems it's a C
+ `long', and not an `int'. Thus we cast it to `long' for all
+ cases.)
Group Member List
A comma-separated list of user names. These users are members of
@@ -15165,8 +15855,7 @@ the same names:
# group.awk --- functions for dealing with the group file
- BEGIN \
- {
+ BEGIN {
# Change to suit your system
_gr_awklib = "/usr/local/libexec/awk/"
}
@@ -15242,15 +15931,12 @@ the database for the same group. This is common when a group has a
large number of members. A pair of such entries might look like the
following:
- tvpeople:*:101:johnny,jay,arsenio
+ tvpeople:*:101:johny,jay,arsenio
tvpeople:*:101:david,conan,tom,joan
For this reason, `_gr_init()' looks to see if a group name or group
ID number is already seen. If it is, then the user names are simply
-concatenated onto the previous list of users. (There is actually a
-subtle problem with the code just presented. Suppose that the first
-time there were no names. This code adds the names with a leading
-comma. It also doesn't check that there is a `$4'.)
+concatenated onto the previous list of users.(1)
Finally, `_gr_init()' closes the pipeline to `grcat', restores `FS'
(and `FIELDWIDTHS' or `FPAT' if necessary), `RS', and `$0', initializes
@@ -15315,8 +16001,14 @@ very simple, relying on `awk''s associative arrays to do work.
The `id' program in *note Id Program::, uses these functions.
+ ---------- Footnotes ----------
+
+ (1) There is actually a subtle problem with the code just presented.
+Suppose that the first time there were no names. This code adds the
+names with a leading comma. It also doesn't check that there is a `$4'.
+

-File: gawk.info, Node: Walking Arrays, Prev: Group Functions, Up: Library Functions
+File: gawk.info, Node: Walking Arrays, Next: Library Functions Summary, Prev: Group Functions, Up: Library Functions
10.7 Traversing Arrays of Arrays
================================
@@ -15366,17 +16058,73 @@ value. Here is a main program to demonstrate:
-| a[2][2] = 22
-| a[3] = 3
- Walking an array and processing each element is a general-purpose
-operation. You might want to consider generalizing the `walk_array()'
-function by adding an additional parameter named `process'.
+
+File: gawk.info, Node: Library Functions Summary, Next: Library Exercises, Prev: Walking Arrays, Up: Library Functions
+
+10.8 Summary
+============
+
+ * Reading programs is an excellent way to learn Good Programming.
+ The functions provided in this major node and the next are intended
+ to serve that purpose.
+
+ * When writing general-purpose library functions, put some thought
+ into how to name any global variables so that they won't conflict
+ with variables from a user's program.
+
+ * The functions presented here fit into the following categories:
- Then, inside the loop, instead of simply printing the array element's
-index and value, use the indirect function call syntax (*note Indirect
-Calls::) on `process', passing it the index and the value.
+ General problems
+ Number to string conversion, assertions, rounding, random
+ number generation, converting characters to numbers, joining
+ strings, getting easily usable time-of-day information, and
+ reading a whole file in one shot.
+
+ Managing data files
+ Noting data file boundaries, rereading the current file,
+ checking for readable files, checking for zero-length files,
+ and treating assignments as file names.
+
+ Processing command-line options
+ An `awk' version of the standard C `getopt()' function.
+
+ Reading the user and group databases
+ Two sets of routines that parallel the C library versions.
+
+ Traversing arrays of arrays
+ A simple function to traverse an array of arrays to any depth.
+
+
+
+File: gawk.info, Node: Library Exercises, Prev: Library Functions Summary, Up: Library Functions
+
+10.9 Exercises
+==============
+
+ 1. In *note Empty Files::, we presented the `zerofile.awk' program,
+ which made use of `gawk''s `ARGIND' variable. Can this problem be
+ solved without relying on `ARGIND'? If so, how?
+
+ 2. As a related challenge, revise that code to handle the case where
+ an intervening value in `ARGV' is a variable assignment.
+
+ 3. *note Walking Arrays::, presented a function that walked a
+ multidimensional array to print it out. However, walking an array
+ and processing each element is a general-purpose operation.
+ Generalize the `walk_array()' function by adding an additional
+ parameter named `process'.
+
+ Then, inside the loop, instead of printing the array element's
+ index and value, use the indirect function call syntax (*note
+ Indirect Calls::) on `process', passing it the index and the value.
+
+ When calling `walk_array()', you would pass the name of a
+ user-defined function that expects to receive an index and a value,
+ and then processes the element.
+
+ Test your new version by printing the array; you should end up with
+ output identical to that of the original version.
- When calling `walk_array()', you would pass the name of a
-user-defined function that expects to receive an index and a value, and
-then processes the element.

File: gawk.info, Node: Sample Programs, Next: Advanced Features, Prev: Library Functions, Up: Top
@@ -15397,6 +16145,8 @@ Library Functions::.
* Running Examples:: How to run these examples.
* Clones:: Clones of common utilities.
* Miscellaneous Programs:: Some interesting `awk' programs.
+* Programs Summary:: Summary of programs.
+* Programs Exercises:: Exercises.

File: gawk.info, Node: Running Examples, Next: Clones, Up: Sample Programs
@@ -15528,8 +16278,7 @@ through the command-line options. Exactly one of the variables
should be done by fields or by characters, respectively. When cutting
by characters, the output field separator is set to the null string:
- BEGIN \
- {
+ BEGIN {
FS = "\t" # default
OFS = FS
while ((c = getopt(ARGC, ARGV, "sf:c:d:")) != -1) {
@@ -15542,7 +16291,7 @@ by characters, the output field separator is set to the null string:
OFS = ""
} else if (c == "d") {
if (length(Optarg) > 1) {
- printf("Using first character of %s" \
+ printf("cut: using first character of %s" \
" for delimiter\n", Optarg) > "/dev/stderr"
Optarg = substr(Optarg, 1, 1)
}
@@ -15551,7 +16300,7 @@ by characters, the output field separator is set to the null string:
if (FS == " ") # defeat awk semantics
FS = "[ ]"
} else if (c == "s")
- suppress++
+ suppress = 1
else
usage()
}
@@ -15609,7 +16358,7 @@ splitting:
if (index(f[i], "-") != 0) { # a range
m = split(f[i], g, "-")
if (m != 2 || g[1] >= g[2]) {
- printf("bad field list: %s\n",
+ printf("cut: bad field list: %s\n",
f[i]) > "/dev/stderr"
exit 1
}
@@ -15647,7 +16396,7 @@ filler fields:
if (index(f[i], "-") != 0) { # range
m = split(f[i], g, "-")
if (m != 2 || g[1] >= g[2]) {
- printf("bad character list: %s\n",
+ printf("cut: bad character list: %s\n",
f[i]) > "/dev/stderr"
exit 1
}
@@ -15720,7 +16469,7 @@ The `egrep' utility searches files for patterns. It uses regular
expressions that are almost identical to those available in `awk'
(*note Regexp::). You invoke it as follows:
- egrep [ OPTIONS ] 'PATTERN' FILES ...
+ `egrep' [OPTIONS] `'PATTERN'' FILES ...
The PATTERN is a regular expression. In typical usage, the regular
expression is quoted to prevent the shell from expanding any of the
@@ -15864,6 +16613,11 @@ know the total number of lines that matched the pattern:
total += fcount
}
+ The `BEGINFILE' and `ENDFILE' special patterns (*note
+BEGINFILE/ENDFILE::) could be used, but then the program would be
+`gawk'-specific. Additionally, this example was written before `gawk'
+acquired `BEGINFILE' and `ENDFILE'.
+
The following rule does most of the work of matching lines. The
variable `matches' is true if the line matched the pattern. If the user
wants lines that did not match, the sense of `matches' is inverted
@@ -15909,11 +16663,8 @@ line is printed, with a leading file name and colon if necessary:
The `END' rule takes care of producing the correct exit status. If
there are no matches, the exit status is one; otherwise it is zero:
- END \
- {
- if (total == 0)
- exit 1
- exit 0
+ END {
+ exit (total == 0)
}
The `usage()' function prints a usage message in case of invalid
@@ -15930,13 +16681,6 @@ options, and then exits:
The variable `e' is used so that the function fits nicely on the
printed page.
- Just a note on programming style: you may have noticed that the `END'
-rule uses backslash continuation, with the open brace on a line by
-itself. This is so that it more closely resembles the way functions
-are written. Many of the examples in this major node use this style.
-You can decide for yourself if you like writing your `BEGIN' and `END'
-rules this way or not.
-
---------- Footnotes ----------
(1) It also introduces a subtle bug; if a match happens, we output
@@ -15955,7 +16699,7 @@ different from the real ones. If possible, `id' also supplies the
corresponding user and group names. The output might look like this:
$ id
- -| uid=500(arnold) gid=500(arnold) groups=6(disk),7(lp),19(floppy)
+ -| uid=1000(arnold) gid=1000(arnold) groups=1000(arnold),4(adm),7(lp),27(sudo)
This information is part of what is provided by `gawk''s `PROCINFO'
array (*note Built-in Variables::). However, the `id' utility provides
@@ -15979,8 +16723,7 @@ and the group numbers:
# uid=12(foo) euid=34(bar) gid=3(baz) \
# egid=5(blat) groups=9(nine),2(two),1(one)
- BEGIN \
- {
+ BEGIN {
uid = PROCINFO["uid"]
euid = PROCINFO["euid"]
gid = PROCINFO["gid"]
@@ -15988,34 +16731,26 @@ and the group numbers:
printf("uid=%d", uid)
pw = getpwuid(uid)
- if (pw != "") {
- split(pw, a, ":")
- printf("(%s)", a[1])
- }
+ if (pw != "")
+ pr_first_field(pw)
if (euid != uid) {
printf(" euid=%d", euid)
pw = getpwuid(euid)
- if (pw != "") {
- split(pw, a, ":")
- printf("(%s)", a[1])
- }
+ if (pw != "")
+ pr_first_field(pw)
}
printf(" gid=%d", gid)
pw = getgrgid(gid)
- if (pw != "") {
- split(pw, a, ":")
- printf("(%s)", a[1])
- }
+ if (pw != "")
+ pr_first_field(pw)
if (egid != gid) {
printf(" egid=%d", egid)
pw = getgrgid(egid)
- if (pw != "") {
- split(pw, a, ":")
- printf("(%s)", a[1])
- }
+ if (pw != "")
+ pr_first_field(pw)
}
for (i = 1; ("group" i) in PROCINFO; i++) {
@@ -16024,10 +16759,8 @@ and the group numbers:
group = PROCINFO["group" i]
printf("%d", group)
pw = getgrgid(group)
- if (pw != "") {
- split(pw, a, ":")
- printf("(%s)", a[1])
- }
+ if (pw != "")
+ pr_first_field(pw)
if (("group" (i+1)) in PROCINFO)
printf(",")
}
@@ -16035,6 +16768,12 @@ and the group numbers:
print ""
}
+ function pr_first_field(str, a)
+ {
+ split(str, a, ":")
+ printf("(%s)", a[1])
+ }
+
The test in the `for' loop is worth noting. Any supplementary
groups in the `PROCINFO' array have the indices `"group1"' through
`"groupN"' for some N, i.e., the total number of supplementary groups.
@@ -16049,6 +16788,10 @@ the last group in the array and the loop exits.
then the condition is false the first time it's tested, and the loop
body never executes.
+ The `pr_first_field()' function simply isolates out some code that
+is used repeatedly, making the whole program slightly shorter and
+cleaner.
+

File: gawk.info, Node: Split Program, Next: Tee Program, Prev: Id Program, Up: Clones
@@ -16058,7 +16801,7 @@ File: gawk.info, Node: Split Program, Next: Tee Program, Prev: Id Program, U
The `split' program splits large text files into smaller pieces. Usage
is as follows:(1)
- split [-COUNT] file [ PREFIX ]
+ `split' [`-COUNT'] [FILE] [PREFIX]
By default, the output files are named `xaa', `xab', and so on. Each
file has 1000 lines in it, with the likely exception of the last file.
@@ -16082,7 +16825,7 @@ output file names:
# split.awk --- do split in awk
#
# Requires ord() and chr() library functions
- # usage: split [-num] [file] [outname]
+ # usage: split [-count] [file] [outname]
BEGIN {
outfile = "x" # default
@@ -16091,7 +16834,7 @@ output file names:
usage()
i = 1
- if (ARGV[i] ~ /^-[[:digit:]]+$/) {
+ if (i in ARGV && ARGV[i] ~ /^-[[:digit:]]+$/) {
count = -ARGV[i]
ARGV[i] = ""
i++
@@ -16167,7 +16910,7 @@ The `tee' program is known as a "pipe fitting." `tee' copies its
standard input to its standard output and also duplicates it to the
files named on the command line. Its usage is as follows:
- tee [-a] file ...
+ `tee' [`-a'] FILE ...
The `-a' option tells `tee' to append to the named files, instead of
truncating them and starting over.
@@ -16188,8 +16931,7 @@ input by setting `ARGV[1]' to `"-"' and `ARGC' to two:
# Copy standard input to all named output files.
# Append content if -a option is supplied.
#
- BEGIN \
- {
+ BEGIN {
for (i = 1; i < ARGC; i++)
copy[i] = ARGV[i]
@@ -16239,8 +16981,7 @@ N input records and M output files, the first method only executes N
Finally, the `END' rule cleans up by closing all the output files:
- END \
- {
+ END {
for (i in copy)
close(copy[i])
}
@@ -16256,7 +16997,7 @@ and by default removes duplicate lines. In other words, it only prints
unique lines--hence the name. `uniq' has a number of options. The
usage is as follows:
- uniq [-udc [-N]] [+N] [ INPUT FILE [ OUTPUT FILE ]]
+ `uniq' [`-udc' [`-N']] [`+N'] [INPUTFILE [OUTPUTFILE]]
The options for `uniq' are:
@@ -16279,11 +17020,11 @@ usage is as follows:
Skip N characters before comparing lines. Any fields specified
with `-N' are skipped first.
-`INPUT FILE'
+`INPUTFILE'
Data is read from the input file named on the command line,
instead of from the standard input.
-`OUTPUT FILE'
+`OUTPUTFILE'
The generated output is sent to the named output file, instead of
to the standard output.
@@ -16327,8 +17068,7 @@ standard output, `/dev/stdout':
# -n skip n fields
# +n skip n characters, skip fields first
- BEGIN \
- {
+ BEGIN {
count = 1
outputfile = "/dev/stdout"
opts = "udc0:1:2:3:4:5:6:7:8:9:"
@@ -16340,7 +17080,7 @@ standard output, `/dev/stdout':
else if (c == "c")
do_count++
else if (index("0123456789", c) != 0) {
- # getopt requires args to options
+ # getopt() requires args to options
# this messes us up for things like -5
if (Optarg ~ /^[[:digit:]]+$/)
fcount = (c Optarg) + 0
@@ -16473,7 +17213,7 @@ File: gawk.info, Node: Wc Program, Prev: Uniq Program, Up: Clones
The `wc' (word count) utility counts lines, words, and characters in
one or more input files. Its usage is as follows:
- wc [-lwc] [ FILES ... ]
+ `wc' [`-lwc'] [FILES ...]
If no files are specified on the command line, `wc' reads its
standard input. If there are multiple files, it also prints total
@@ -16553,7 +17293,7 @@ lines, words, and characters to zero, and saves the current file name in
}
The `endfile()' function adds the current file's numbers to the
-running totals of lines, words, and characters.(1) It then prints out
+running totals of lines, words, and characters. It then prints out
those numbers for the file that was just read. It relies on
`beginfile()' to reset the numbers for the following data file:
@@ -16572,7 +17312,7 @@ those numbers for the file that was just read. It relies on
}
There is one rule that is executed for each line. It adds the length
-of the record, plus one, to `chars'.(2) Adding one plus the record
+of the record, plus one, to `chars'.(1) Adding one plus the record
length is needed because the newline character separating records (the
value of `RS') is not part of the record itself, and thus not included
in its length. Next, `lines' is incremented for each line read, and
@@ -16602,15 +17342,11 @@ in its length. Next, `lines' is incremented for each line read, and
---------- Footnotes ----------
- (1) `wc' can't just use the value of `FNR' in `endfile()'. If you
-examine the code in *note Filetrans Function::, you will see that `FNR'
-has already been reset by the time `endfile()' is called.
-
- (2) Since `gawk' understands multibyte locales, this code counts
+ (1) Since `gawk' understands multibyte locales, this code counts
characters, not bytes.

-File: gawk.info, Node: Miscellaneous Programs, Prev: Clones, Up: Sample Programs
+File: gawk.info, Node: Miscellaneous Programs, Next: Programs Summary, Prev: Clones, Up: Sample Programs
11.3 A Grab Bag of `awk' Programs
=================================
@@ -16720,8 +17456,7 @@ Statement::), but the processing could be done with a series of
# Requires getlocaltime() library function
# usage: alarm time [ "message" [ count [ delay ] ] ]
- BEGIN \
- {
+ BEGIN {
# Initial argument sanity checking
usage1 = "usage: alarm time ['message' [count [delay]]]"
usage2 = sprintf("\t(%s) time ::= hh:mm", ARGV[1])
@@ -16791,7 +17526,7 @@ alarm:
# how long to sleep for
naptime = target - current
if (naptime <= 0) {
- print "time is in the past!" > "/dev/stderr"
+ print "alarm: time is in the past!" > "/dev/stderr"
exit 1
}
@@ -16839,11 +17574,11 @@ there are more characters in the "from" list than in the "to" list, the
last character of the "to" list is used for the remaining characters in
the "from" list.
- Some time ago, a user proposed that a transliteration function should
-be added to `gawk'. The following program was written to prove that
-character transliteration could be done with a user-level function.
-This program is not as complete as the system `tr' utility but it does
-most of the job.
+ Once upon a time, a user proposed that a transliteration function
+should be added to `gawk'. The following program was written to prove
+that character transliteration could be done with a user-level
+function. This program is not as complete as the system `tr' utility
+but it does most of the job.
The `translate' program demonstrates one of the few weaknesses of
standard `awk': dealing with individual characters is very painful,
@@ -16924,8 +17659,8 @@ record:
While it is possible to do character transliteration in a user-level
function, it is not necessarily efficient, and we (the `gawk' authors)
started to consider adding a built-in function. However, shortly after
-writing this program, we learned that the System V Release 4 `awk' had
-added the `toupper()' and `tolower()' functions (*note String
+writing this program, we learned that Brian Kernighan had added the
+`toupper()' and `tolower()' functions to his `awk' (*note String
Functions::). These functions handle the vast majority of the cases
where character transliteration is necessary, and so we chose to simply
add those functions to `gawk' as well and then leave well enough alone.
@@ -16935,15 +17670,19 @@ array only once, in a `BEGIN' rule. However, this assumes that the
"from" and "to" lists will never change throughout the lifetime of the
program.
+ Another obvious improvement is to enable the use of ranges, such as
+`a-z', as allowed by the `tr' utility. Look at the code for `cut.awk'
+(*note Cut Program::) for inspiration.
+
---------- Footnotes ----------
- (1) On some older systems, including Solaris, `tr' may require that
-the lists be written as range expressions enclosed in square brackets
-(`[a-z]') and quoted, to prevent the shell from attempting a file name
-expansion. This is not a feature.
+ (1) On some older systems, including Solaris, the system version of
+`tr' may require that the lists be written as range expressions
+enclosed in square brackets (`[a-z]') and quoted, to prevent the shell
+from attempting a file name expansion. This is not a feature.
- (2) This program was written before `gawk' acquired the ability to
-split each character in a string into separate array elements.
+ (2) This program was also written before `gawk' acquired the ability
+to split each character in a string into separate array elements.

File: gawk.info, Node: Labels Program, Next: Word Sorting, Prev: Translate Program, Up: Miscellaneous Programs
@@ -17042,8 +17781,7 @@ not have been an even multiple of 20 labels in the data:
Count++
}
- END \
- {
+ END {
printpage()
}
@@ -17060,7 +17798,7 @@ File: gawk.info, Node: Word Sorting, Next: History Sorting, Prev: Labels Prog
When working with large amounts of text, it can be interesting to know
how often different words appear. For example, an author may overuse
-certain words, in which case she might wish to find synonyms to
+certain words, in which case he or she might wish to find synonyms to
substitute for words that appear too often. This node develops a
program for counting words and presenting the frequency information in
a useful format.
@@ -17123,6 +17861,10 @@ script. Here is the new version of the program:
printf "%s\t%d\n", word, freq[word]
}
+ The regexp `/[^[:alnum:]_[:blank:]]/' might have been written
+`/[[:punct:]]/', but then underscores would also be removed, and we
+want to keep them.
+
Assuming we have saved this program in a file named `wordfreq.awk',
and that the data is in `file1', the following pipeline:
@@ -17200,8 +17942,7 @@ information. For example, using the following `print' statement in the
print data[lines[i]], lines[i]
- This works because `data[$0]' is incremented each time a line is
-seen.
+This works because `data[$0]' is incremented each time a line is seen.

File: gawk.info, Node: Extract Program, Next: Simple Sed, Prev: History Sorting, Up: Miscellaneous Programs
@@ -17290,7 +18031,7 @@ with a zero exit status, signifying OK:
/^@c(omment)?[ \t]+system/ \
{
if (NF < 3) {
- e = (FILENAME ":" FNR)
+ e = ("extract: " FILENAME ":" FNR)
e = (e ": badly formed `system' line")
print e > "/dev/stderr"
next
@@ -17299,7 +18040,7 @@ with a zero exit status, signifying OK:
$2 = ""
stat = system($0)
if (stat != 0) {
- e = (FILENAME ":" FNR)
+ e = ("extract: " FILENAME ":" FNR)
e = (e ": warning: system returned " stat)
print e > "/dev/stderr"
}
@@ -17329,16 +18070,17 @@ function (*note String Functions::). The `@' symbol is used as the
separator character. Each element of `a' that is empty indicates two
successive `@' symbols in the original line. For each two empty
elements (`@@' in the original file), we have to add a single `@'
-symbol back in.(1)
+symbol back in.
When the processing of the array is finished, `join()' is called
-with the value of `SUBSEP', to rejoin the pieces back into a single
-line. That line is then printed to the output file:
+with the value of `SUBSEP' (*note Multidimensional::), to rejoin the
+pieces back into a single line. That line is then printed to the
+output file:
/^@c(omment)?[ \t]+file/ \
{
if (NF != 3) {
- e = (FILENAME ":" FNR ": badly formed `file' line")
+ e = ("extract: " FILENAME ":" FNR ": badly formed `file' line")
print e > "/dev/stderr"
next
}
@@ -17389,7 +18131,7 @@ closing the open file:
function unexpected_eof()
{
- printf("%s:%d: unexpected EOF or error\n",
+ printf("extract: %s:%d: unexpected EOF or error\n",
FILENAME, FNR) > "/dev/stderr"
exit 1
}
@@ -17399,11 +18141,6 @@ closing the open file:
close(curfile)
}
- ---------- Footnotes ----------
-
- (1) This program was written before `gawk' had the `gensub()'
-function. Consider how you might use it to simplify the code.
-

File: gawk.info, Node: Simple Sed, Next: Igawk Program, Prev: Extract Program, Up: Miscellaneous Programs
@@ -17539,8 +18276,8 @@ language.(1) It works as follows:
2. For any arguments that do represent `awk' text, put the arguments
into a shell variable that will be expanded. There are two cases:
- a. Literal text, provided with `--source' or `--source='. This
- text is just appended directly.
+ a. Literal text, provided with `-e' or `--source'. This text is
+ just appended directly.
b. Source file names, provided with `-f'. We use a neat trick
and append `@include FILENAME' to the shell variable's
@@ -17580,10 +18317,10 @@ are several cases of interest:
programming trick. Don't worry about it if you are not familiar
with `sh'.)
-`-v, -F'
+`-v', `-F'
These are saved and passed on to `gawk'.
-`-f, --file, --file=, -Wfile='
+`-f', `--file', `--file=', `-Wfile='
The file name is appended to the shell variable `program' with an
`@include' statement. The `expr' utility is used to remove the
leading option part of the argument (e.g., `--file='). (Typical
@@ -17592,10 +18329,10 @@ are several cases of interest:
sequences in their arguments, possibly mangling the program text.
Using `expr' avoids this problem.)
-`--source, --source=, -Wsource='
+`--source', `--source=', `-Wsource='
The source text is appended to `program'.
-`--version, -Wversion'
+`--version', `-Wversion'
`igawk' prints its version number, runs `gawk --version' to get
the `gawk' version information, and then exits.
@@ -17741,12 +18478,12 @@ which represents the current directory:
pathlist[i] = "."
}
- The stack is initialized with `ARGV[1]', which will be `/dev/stdin'.
-The main loop comes next. Input lines are read in succession. Lines
-that do not start with `@include' are printed verbatim. If the line
-does start with `@include', the file name is in `$2'. `pathto()' is
-called to generate the full path. If it cannot, then the program
-prints an error message and continues.
+ The stack is initialized with `ARGV[1]', which will be
+`"/dev/stdin"'. The main loop comes next. Input lines are read in
+succession. Lines that do not start with `@include' are printed
+verbatim. If the line does start with `@include', the file name is in
+`$2'. `pathto()' is called to generate the full path. If it cannot,
+then the program prints an error message and continues.
The next thing to check is if the file is included already. The
`processed' array is indexed by the full file name of each included
@@ -17769,7 +18506,7 @@ zero, the program is done:
}
fpath = pathto($2)
if (fpath == "") {
- printf("igawk:%s:%d: cannot find %s\n",
+ printf("igawk: %s:%d: cannot find %s\n",
input[stackptr], FNR, $2) > "/dev/stderr"
continue
}
@@ -17823,7 +18560,7 @@ supplied.
The `eval' command is a shell construct that reruns the shell's
parsing process. This keeps things properly quoted.
- This version of `igawk' represents my fifth version of this program.
+ This version of `igawk' represents the fifth version of this program.
There are four key simplifications that make the program work better:
* Using `@include' even for the files named with `-f' makes building
@@ -17853,26 +18590,6 @@ manipulation using the shell than it is in `awk'.
Finally, `igawk' shows that it is not always necessary to add new
features to a program; they can often be layered on top.
- As an additional example of this, consider the idea of having two
-files in a directory in the search path:
-
-`default.awk'
- This file contains a set of default library functions, such as
- `getopt()' and `assert()'.
-
-`site.awk'
- This file contains library functions that are specific to a site or
- installation; i.e., locally developed functions. Having a
- separate file allows `default.awk' to change with new `gawk'
- releases, without requiring the system administrator to update it
- each time by adding the local functions.
-
- One user suggested that `gawk' be modified to automatically read
-these files upon startup. Instead, it would be very simple to modify
-`igawk' to do this. Since `igawk' can process nested `@include'
-directives, `default.awk' could simply contain `@include' statements
-for the desired library functions.
-
---------- Footnotes ----------
(1) Fully explaining the `sh' language is beyond the scope of this
@@ -17997,7 +18714,141 @@ supplies the following copyright terms:
X*(X-x)-o*o,(x+X)*o*o+o,x*(X-x)-O-O,x-O+(O+o+X+x)*(o+O),X*X-X*(x-O)-x+O,
O+X*(o*(o+O)+O),+x+O+X*o,x*(x-o),(o+X+x)*o*o-(x-O-O),O+(X-x)*(X+O),x-O}'
- We leave it to you to determine what the program does.
+ We leave it to you to determine what the program does. (If you are
+truly desperate to understand it, see Chris Johansen's explanation,
+which is embedded in the Texinfo source file for this Info file.)
+
+
+File: gawk.info, Node: Programs Summary, Next: Programs Exercises, Prev: Miscellaneous Programs, Up: Sample Programs
+
+11.4 Summary
+============
+
+ * The functions provided in this major node and the previous one
+ continue on the theme that reading programs is an excellent way to
+ learn Good Programming.
+
+ * Using `#!' to make `awk' programs directly runnable makes them
+ easier to use. Otherwise, invoke the program using `awk -f ...'.
+
+ * Reimplementing standard POSIX programs in `awk' is a pleasant
+ exercise; `awk''s expressive power lets you write such programs in
+ relatively few lines of code, yet they are functionally complete
+ and usable.
+
+ * One of standard `awk''s weaknesses is working with individual
+ characters. The ability to use `split()' with the empty string as
+ the separator can considerably simplify such tasks.
+
+ * The library functions from *note Library Functions::, proved their
+ usefulness for a number of real (if small) programs.
+
+ * Besides reinventing POSIX wheels, other programs solved a
+ selection of interesting problems, such as finding duplicates
+ words in text, printing mailing labels, and finding anagrams.
+
+
+
+File: gawk.info, Node: Programs Exercises, Prev: Programs Summary, Up: Sample Programs
+
+11.5 Exercises
+==============
+
+ 1. Rewrite `cut.awk' (*note Cut Program::) using `split()' with `""'
+ as the seperator.
+
+ 2. In *note Egrep Program::, we mentioned that `egrep -i' could be
+ simulated in versions of `awk' without `IGNORECASE' by using
+ `tolower()' on the line and the pattern. In a footnote there, we
+ also mentioned that this solution has a bug: the translated line is
+ output, and not the original one. Fix this problem.
+
+ 3. The POSIX version of `id' takes options that control which
+ information is printed. Modify the `awk' version (*note Id
+ Program::) to accept the same arguments and perform in the same
+ way.
+
+ 4. The `split.awk' program (*note Split Program::) assumes that
+ letters are contiguous in the character set, which isn't true for
+ EBCDIC systems. Fix this problem. (Hint: Consider a different
+ way to work through the alphabet, without relying on `ord()' and
+ `chr()'.)
+
+ 5. In `uniq.awk' (*note Uniq Program::, the logic for choosing which
+ lines to print represents a "state machine", which is "a device
+ that can be in one of a set number of stable conditions depending
+ on its previous condition and on the present values of its
+ inputs."(1) Brian Kernighan suggests that "an alternative approach
+ to state mechines is to just read the input into an array, then
+ use indexing. It's almost always easier code, and for most inputs
+ where you would use this, just as fast." Rewrite the logic to
+ follow this suggestion.
+
+ 6. Why can't the `wc.awk' program (*note Wc Program::) just use the
+ value of `FNR' in `endfile()'? Hint: Examine the code in *note
+ Filetrans Function::.
+
+ 7. Manipulation of individual characters in the `translate' program
+ (*note Translate Program::) is painful using standard `awk'
+ functions. Given that `gawk' can split strings into individual
+ characters using `""' as the separator, how might you use this
+ feature to simplify the program?
+
+ 8. The `extract.awk' program (*note Extract Program::) was written
+ before `gawk' had the `gensub()' function. Use it to simplify the
+ code.
+
+ 9. Compare the performance of the `awksed.awk' program (*note Simple
+ Sed::) with the more straightforward:
+
+ BEGIN {
+ pat = ARGV[1]
+ repl = ARGV[2]
+ ARGV[1] = ARGV[2] = ""
+ }
+
+ { gsub(pat, repl); print }
+
+ 10. What are the advantages and disadvantages of `awksed.awk' versus
+ the real `sed' utility?
+
+ 11. In *note Igawk Program::, we mentioned that not trying to save the
+ line read with `getline' in the `pathto()' function when testing
+ for the file's accessibility for use with the main program
+ simplifies things considerably. What problem does this engender
+ though?
+
+ 12. As an additional example of the idea that it is not always
+ necessary to add new features to a program, consider the idea of
+ having two files in a directory in the search path:
+
+ `default.awk'
+ This file contains a set of default library functions, such
+ as `getopt()' and `assert()'.
+
+ `site.awk'
+ This file contains library functions that are specific to a
+ site or installation; i.e., locally developed functions.
+ Having a separate file allows `default.awk' to change with
+ new `gawk' releases, without requiring the system
+ administrator to update it each time by adding the local
+ functions.
+
+ One user suggested that `gawk' be modified to automatically read
+ these files upon startup. Instead, it would be very simple to
+ modify `igawk' to do this. Since `igawk' can process nested
+ `@include' directives, `default.awk' could simply contain
+ `@include' statements for the desired library functions. Make
+ this change.
+
+ 13. Modify `anagram.awk' (*note Anagram Program::), to avoid the use
+ of the external `sort' utility.
+
+
+ ---------- Footnotes ----------
+
+ (1) This is the definition returned from entering `define: state
+machine' into Google.

File: gawk.info, Node: Advanced Features, Next: Internationalization, Prev: Sample Programs, Up: Top
@@ -18043,6 +18894,7 @@ own:
* Two-way I/O:: Two-way communications with another process.
* TCP/IP Networking:: Using `gawk' for network programming.
* Profiling:: Profiling your `awk' programs.
+* Advanced Features Summary:: Summary of advanced features.

File: gawk.info, Node: Nondecimal Data, Next: Array Sorting, Up: Advanced Features
@@ -18066,8 +18918,8 @@ your data as numeric:
The `print' statement treats its expressions as strings. Although the
fields can act as numbers when necessary, they are still strings, so
-`print' does not try to treat them numerically. You may need to add
-zero to a field to force it to be treated as a number. For example:
+`print' does not try to treat them numerically. You need to add zero
+to a field to force it to be treated as a number. For example:
$ echo 0123 123 0x123 | gawk --non-decimal-data '
> { print $1, $2, $3
@@ -18082,7 +18934,7 @@ request it.
CAUTION: _Use of this option is not recommended._ It can break old
programs very badly. Instead, use the `strtonum()' function to
- convert your data (*note Nondecimal-numbers::). This makes your
+ convert your data (*note String Functions::). This makes your
programs easier to write and easier to read, and leads to less
surprising results.
@@ -18121,7 +18973,7 @@ you do this.
*note Controlling Scanning::, describes how you can assign special,
pre-defined values to `PROCINFO["sorted_in"]' in order to control the
-order in which `gawk' will traverse an array during a `for' loop.
+order in which `gawk' traverses an array during a `for' loop.
In addition, the value of `PROCINFO["sorted_in"]' can be a function
name. This lets you traverse an array based on any custom criterion.
@@ -18394,9 +19246,9 @@ become the values of the result array:
So far, so good. Now it starts to get interesting. Both `asort()'
and `asorti()' accept a third string argument to control comparison of
-array elements. In *note String Functions::, we ignored this third
-argument; however, the time has now come to describe how this argument
-affects these two functions.
+array elements. When we introduced `asort()' and `asorti()' in *note
+String Functions::, we ignored this third argument; however, now is the
+time to describe how this argument affects these two functions.
Basically, the third argument specifies how the array is to be
sorted. There are two possibilities. As with `PROCINFO["sorted_in"]',
@@ -18436,24 +19288,7 @@ File: gawk.info, Node: Two-way I/O, Next: TCP/IP Networking, Prev: Array Sort
12.3 Two-Way Communications with Another Process
================================================
- From: brennan@whidbey.com (Mike Brennan)
- Newsgroups: comp.lang.awk
- Subject: Re: Learn the SECRET to Attract Women Easily
- Date: 4 Aug 1997 17:34:46 GMT
- Message-ID: <5s53rm$eca@news.whidbey.com>
-
- On 3 Aug 1997 13:17:43 GMT, Want More Dates???
- <tracy78@kilgrona.com> wrote:
- >Learn the SECRET to Attract Women Easily
- >
- >The SCENT(tm) Pheromone Sex Attractant For Men to Attract Women
-
- The scent of awk programmers is a lot more attractive to women than
- the scent of perl programmers.
- --
- Mike Brennan
-
- It is often useful to be able to send data to a separate program for
+It is often useful to be able to send data to a separate program for
processing and then read the result. This can always be done with
temporary files:
@@ -18472,12 +19307,11 @@ temporary files:
This works, but not elegantly. Among other things, it requires that
the program be run in a directory that cannot be shared among users;
for example, `/tmp' will not do, as another user might happen to be
-using a temporary file with the same name.
-
- However, with `gawk', it is possible to open a _two-way_ pipe to
-another process. The second process is termed a "coprocess", since it
-runs in parallel with `gawk'. The two-way connection is created using
-the `|&' operator (borrowed from the Korn shell, `ksh'):(1)
+using a temporary file with the same name.(1) However, with `gawk', it
+is possible to open a _two-way_ pipe to another process. The second
+process is termed a "coprocess", since it runs in parallel with `gawk'.
+The two-way connection is created using the `|&' operator (borrowed
+from the Korn shell, `ksh'):(2)
do {
print DATA |& "subprogram"
@@ -18544,7 +19378,8 @@ the `gawk' program. Once all of the data has been read, `gawk'
terminates the coprocess and exits.
As a side note, the assignment `LC_ALL=C' in the `sort' command
-ensures traditional Unix (ASCII) sorting from `sort'.
+ensures traditional Unix (ASCII) sorting from `sort'. This is not
+strictly necessary here, but it's good to know how to do this.
You may also use pseudo-ttys (ptys) for two-way communication
instead of pipes, if your system supports them. This is done on a
@@ -18556,14 +19391,18 @@ per-command basis, by setting a special element in the `PROCINFO' array
print ... |& command # start two-way pipe
...
-Using ptys avoids the buffer deadlock issues described earlier, at some
-loss in performance. If your system does not have ptys, or if all the
-system's ptys are in use, `gawk' automatically falls back to using
-regular pipes.
+Using ptys usually avoids the buffer deadlock issues described earlier,
+at some loss in performance. If your system does not have ptys, or if
+all the system's ptys are in use, `gawk' automatically falls back to
+using regular pipes.
---------- Footnotes ----------
- (1) This is very different from the same operator in the C shell and
+ (1) Michael Brennan suggests the use of `rand()' to generate unique
+file names. This is a valid point; nevertheless, temporary files remain
+more difficult than two-way pipes.
+
+ (2) This is very different from the same operator in the C shell and
in Bash.

@@ -18586,7 +19425,7 @@ network connection.
You can think of this as just a _very long_ two-way pipeline to a
coprocess. The way `gawk' decides that you want to use TCP/IP
networking is by recognizing special file names that begin with one of
-`/inet/', `/inet4/' or `/inet6'.
+`/inet/', `/inet4/' or `/inet6/'.
The full syntax of the special file name is
`/NET-TYPE/PROTOCOL/LOCAL-PORT/REMOTE-HOST/REMOTE-PORT'. The
@@ -18644,7 +19483,7 @@ much more complete introduction and discussion, as well as extensive
examples.

-File: gawk.info, Node: Profiling, Prev: TCP/IP Networking, Up: Advanced Features
+File: gawk.info, Node: Profiling, Next: Advanced Features Summary, Prev: TCP/IP Networking, Up: Advanced Features
12.5 Profiling Your `awk' Programs
==================================
@@ -18840,7 +19679,7 @@ As usual, the profiled version of the program is written to
`awkprof.out', or to a different file if one specified with the
`--profile' option.
- Along with the regular profile, as shown earlier, the profile
+ Along with the regular profile, as shown earlier, the profile file
includes a trace of any active functions:
# Function Call Stack:
@@ -18869,8 +19708,50 @@ by the `Ctrl-<\>' key.
called this way, `gawk' "pretty prints" the program into `awkprof.out',
without any execution counts.
- NOTE: The `--pretty-print' option still runs your program. This
- will change in the next major release.
+ NOTE: Once upon a time, the `--pretty-print' option would also run
+ your program. This is is no longer the case.
+
+
+File: gawk.info, Node: Advanced Features Summary, Prev: Profiling, Up: Advanced Features
+
+12.6 Summary
+============
+
+ * The `--non-decimal-data' option causes `gawk' to treat octal- and
+ hexadecimal-looking input data as octal and hexadecimal. This
+ option should be used with caution or not at all; use of
+ `strtonum()' is preferable.
+
+ * You can take over complete control of sorting in `for (INDX in
+ ARRAY)' array traversal by setting `PROCINFO["sorted_in"]' to the
+ name of a user-defined function that does the comparison of array
+ elements based on index and value.
+
+ * Similarly, you can supply the name of a user-defined comparison
+ function as the third argument to either `asort()' or `asorti()'
+ to control how those functions sort arrays. Or you may provide one
+ of the predefined control strings that work for
+ `PROCINFO["sorted_in"]'.
+
+ * You can use the `|&' operator to create a two-way pipe to a
+ co-process. You read from the co-process with `getline' and write
+ to it with `print' or `printf'. Use `close()' to close off the
+ co-process completely, or optionally, close off one side of the
+ two-way communications.
+
+ * By using special "file names" with the `|&' operator, you can open
+ a TCP/IP (or UDP/IP) connection to remote hosts in the Internet.
+ `gawk' supports both IPv4 an IPv6.
+
+ * You can generate statement count profiles of your program. This
+ can help you determine which parts of your program may be taking
+ the most time and let you tune them more easily. Sending the
+ `USR1' signal while profiling causes `gawk' to dump the profile
+ and keep going, including a function call stack.
+
+ * You can also just "pretty print" the program. This currently also
+ runs the program, but that will change in the next major release.
+

File: gawk.info, Node: Internationalization, Next: Debugger, Prev: Advanced Features, Up: Top
@@ -18902,6 +19783,7 @@ requirement.
* Translator i18n:: Features for the translator.
* I18N Example:: A simple i18n example.
* Gawk I18N:: `gawk' is also internationalized.
+* I18N Summary:: Summary of I18N stuff.

File: gawk.info, Node: I18N and L10N, Next: Explaining gettext, Up: Internationalization
@@ -18924,6 +19806,7 @@ File: gawk.info, Node: Explaining gettext, Next: Programmer i18n, Prev: I18N
13.2 GNU `gettext'
==================
+`gawk' uses GNU `gettext' to provide its internationalization features.
The facilities in GNU `gettext' focus on messages; strings printed by a
program, either directly or via formatting with `printf' or
`sprintf()'.(1)
@@ -19017,9 +19900,9 @@ are:
`LC_CTYPE'
Character-type information (alphabetic, digit, upper- or
- lowercase, and so on). This information is accessed via the POSIX
- character classes in regular expressions, such as `/[[:alnum:]]/'
- (*note Regexp Operators::).
+ lowercase, and so on) as well as character encoding. This
+ information is accessed via the POSIX character classes in regular
+ expressions, such as `/[[:alnum:]]/' (*note Regexp Operators::).
`LC_MONETARY'
Monetary information, such as the currency symbol, and whether the
@@ -19029,10 +19912,6 @@ are:
Numeric information, such as which characters to use for the
decimal point and the thousands separator.(2)
-`LC_RESPONSE'
- Response information, such as how "yes" and "no" appear in the
- local language, and possibly other information as well.
-
`LC_TIME'
Time- and date-related information, such as 12- or 24-hour clock,
month printed before or after the day in a date, local month
@@ -19070,7 +19949,7 @@ internationalization:
for translation at runtime. String constants without a leading
underscore are not translated.
-`dcgettext(STRING [, DOMAIN [, CATEGORY]])'
+``dcgettext(STRING' [`,' DOMAIN [`,' CATEGORY]]`)''
Return the translation of STRING in text domain DOMAIN for locale
category CATEGORY. The default value for DOMAIN is the current
value of `TEXTDOMAIN'. The default value for CATEGORY is
@@ -19087,7 +19966,7 @@ internationalization:
be simple and to allow for reasonable `awk'-style default
arguments.
-`dcngettext(STRING1, STRING2, NUMBER [, DOMAIN [, CATEGORY]])'
+``dcngettext(STRING1, STRING2, NUMBER' [`,' DOMAIN [`,' CATEGORY]]`)''
Return the plural form used for NUMBER of the translation of
STRING1 and STRING2 in text domain DOMAIN for locale category
CATEGORY. STRING1 is the English singular variant of a message,
@@ -19098,7 +19977,7 @@ internationalization:
The same remarks about argument order as for the `dcgettext()'
function apply.
-`bindtextdomain(DIRECTORY [, DOMAIN])'
+``bindtextdomain(DIRECTORY' [`,' DOMAIN ]`)''
Change the directory in which `gettext' looks for `.gmo' files, in
case they will not or cannot be placed in the standard locations
(e.g., during testing). Return the directory in which DOMAIN is
@@ -19129,16 +20008,27 @@ outlined in *note Explaining gettext::, like so:
printf(_"Number of users is %d\n", nusers)
3. If you are creating strings dynamically, you can still translate
- them, using the `dcgettext()' built-in function:
+ them, using the `dcgettext()' built-in function:(1)
- message = nusers " users logged in"
- message = dcgettext(message, "adminprog")
- print message
+ if (groggy)
+ message = dcgettext("%d customers disturbing me\n", "adminprog")
+ else
+ message = dcgettext("enjoying %d customers\n", "adminprog")
+ printf(message, ncustomers)
Here, the call to `dcgettext()' supplies a different text domain
(`"adminprog"') in which to find the message, but it uses the
default `"LC_MESSAGES"' category.
+ The previous example only works if `ncustomers' is greater than
+ one. This example would be better done with `dcngettext()':
+
+ if (groggy)
+ message = dcngettext("%d customer disturbing me\n", "%d customers disturbing me\n", "adminprog")
+ else
+ message = dcngettext("enjoying %d customer\n", "enjoying %d customers\n", "adminprog")
+ printf(message, ncustomers)
+
4. During development, you might want to put the `.gmo' file in a
private directory for testing. This is done with the
`bindtextdomain()' built-in function:
@@ -19158,6 +20048,10 @@ outlined in *note Explaining gettext::, like so:
*Note I18N Example::, for an example program showing the steps to
create and use translations from `awk'.
+ ---------- Footnotes ----------
+
+ (1) Thanks to Bruno Haible for this example.
+

File: gawk.info, Node: Translator i18n, Next: I18N Example, Prev: Programmer i18n, Up: Internationalization
@@ -19197,8 +20091,11 @@ Instead, it parses it as usual and prints all marked strings to
standard output in the format of a GNU `gettext' Portable Object file.
Also included in the output are any constant strings that appear as the
first argument to `dcgettext()' or as the first and second argument to
-`dcngettext()'.(1) *Note I18N Example::, for the full list of steps to
-go through to create and test translations for `guide'.
+`dcngettext()'.(1) You should distribute the generated `.pot' file with
+your `awk' program; translators will eventually use it to provide you
+translations that you can also then distribute. *Note I18N Example::,
+for the full list of steps to go through to create and test
+translations for `guide'.
---------- Footnotes ----------
@@ -19403,19 +20300,19 @@ Following are the translations:
msgstr "Like, the scoop is"
The next step is to make the directory to hold the binary message
-object file and then to create the `guide.gmo' file. The directory
+object file and then to create the `guide.mo' file. We pretend that
+our file is to be used in the `en_US.UTF-8' locale. The directory
layout shown here is standard for GNU `gettext' on GNU/Linux systems.
Other versions of `gettext' may use a different layout:
- $ mkdir en_US en_US/LC_MESSAGES
+ $ mkdir en_US.UTF-8 en_US.UTF-8/LC_MESSAGES
The `msgfmt' utility does the conversion from human-readable `.po'
-file to machine-readable `.gmo' file. By default, `msgfmt' creates a
+file to machine-readable `.mo' file. By default, `msgfmt' creates a
file named `messages'. This file must be renamed and placed in the
proper directory so that `gawk' can find it:
- $ msgfmt guide-mellow.po
- $ mv messages en_US/LC_MESSAGES/guide.gmo
+ $ msgfmt guide-mellow.po -o en_US.UTF-8/LC_MESSAGES/guide.mo
Finally, we run the program to test it:
@@ -19438,7 +20335,7 @@ and `bindtextdomain()' (*note I18N Portability::) are in a file named
(1) Perhaps it would be better if it were called "Hippy." Ah, well.

-File: gawk.info, Node: Gawk I18N, Prev: I18N Example, Up: Internationalization
+File: gawk.info, Node: Gawk I18N, Next: I18N Summary, Prev: I18N Example, Up: Internationalization
13.6 `gawk' Can Speak Your Language
===================================
@@ -19446,13 +20343,46 @@ File: gawk.info, Node: Gawk I18N, Prev: I18N Example, Up: Internationalizatio
`gawk' itself has been internationalized using the GNU `gettext'
package. (GNU `gettext' is described in complete detail in *note (GNU
`gettext' utilities)Top:: gettext, GNU gettext tools.) As of this
-writing, the latest version of GNU `gettext' is version 0.18.2.1
-(ftp://ftp.gnu.org/gnu/gettext/gettext-0.18.2.1.tar.gz).
+writing, the latest version of GNU `gettext' is version 0.19.1
+(ftp://ftp.gnu.org/gnu/gettext/gettext-0.19.1.tar.gz).
If a translation of `gawk''s messages exists, then `gawk' produces
usage messages, warnings, and fatal errors in the local language.

+File: gawk.info, Node: I18N Summary, Prev: Gawk I18N, Up: Internationalization
+
+13.7 Summary
+============
+
+ * Internationalization means writing a program such that it can use
+ multiple languages without requiring source-code changes.
+ Localization means providing the data necessary for an
+ internationalized program to work in a particular language.
+
+ * `gawk' uses GNU `gettext' to let you internationalize and localize
+ `awk' programs. A program's text domain identifies the program
+ for grouping all messages and other data together.
+
+ * You mark a program's strings for translation by preceding them with
+ an underscore. Once that is done, the strings are extracted into a
+ `.pot' file. This file is copied for each language into a `.po'
+ file, and the `.po' files are compiled into `.gmo' files for use
+ at runtime.
+
+ * You can use position specifications with `sprintf()' and `printf'
+ to rearrange the placement of argument values in formatted strings
+ and output. This is useful for the translations of format control
+ strings.
+
+ * The internationalization features have been designed so that they
+ can be easily worked around in a standard `awk'.
+
+ * `gawk' itself has been internationalized and ships with a number
+ of translations for its messages.
+
+
+
File: gawk.info, Node: Debugger, Next: Arbitrary Precision Arithmetic, Prev: Internationalization, Up: Top
14 Debugging `awk' Programs
@@ -19475,12 +20405,13 @@ program is easy.
* List of Debugger Commands:: Main debugger commands.
* Readline Support:: Readline support.
* Limitations:: Limitations and future plans.
+* Debugging Summary:: Debugging summary.

File: gawk.info, Node: Debugging, Next: Sample Debugging Session, Up: Debugger
-14.1 Introduction to `gawk' Debugger
-====================================
+14.1 Introduction to The `gawk' Debugger
+========================================
This minor node introduces debugging in general and begins the
discussion of debugging in `gawk'.
@@ -19635,7 +20566,7 @@ options. (`gawk' is not designed to debug command-line programs, only
programs contained in files.) In our case, we invoke the debugger like
this:
- $ gawk -D -f getopt.awk -f join.awk -f uniq.awk inputfile
+ $ gawk -D -f getopt.awk -f join.awk -f uniq.awk -1 inputfile
where both `getopt.awk' and `uniq.awk' are in `$AWKPATH'. (Experienced
users of GDB or similar debuggers should note that this syntax is
@@ -19687,7 +20618,7 @@ for a breakpoint in `uniq.awk' is at the beginning of the function
To set the breakpoint, use the `b' (breakpoint) command:
gawk> b are_equal
- -| Breakpoint 1 set at file `awklib/eg/prog/uniq.awk', line 64
+ -| Breakpoint 1 set at file `awklib/eg/prog/uniq.awk', line 63
The debugger tells us the file and line number where the breakpoint
is. Now type `r' or `run' and the program runs until it hits the
@@ -19697,8 +20628,8 @@ breakpoint for the first time:
-| Starting program:
-| Stopping in Rule ...
-| Breakpoint 1, are_equal(n, m, clast, cline, alast, aline)
- at `awklib/eg/prog/uniq.awk':64
- -| 64 if (fcount == 0 && charcount == 0)
+ at `awklib/eg/prog/uniq.awk':63
+ -| 63 if (fcount == 0 && charcount == 0)
gawk>
Now we can look at what's going on inside our program. First of all,
@@ -19708,11 +20639,11 @@ the current stack frames:
gawk> bt
-| #0 are_equal(n, m, clast, cline, alast, aline)
- at `awklib/eg/prog/uniq.awk':69
- -| #1 in main() at `awklib/eg/prog/uniq.awk':89
+ at `awklib/eg/prog/uniq.awk':68
+ -| #1 in main() at `awklib/eg/prog/uniq.awk':88
This tells us that `are_equal()' was called by the main program at
-line 89 of `uniq.awk'. (This is not a big surprise, since this is the
+line 88 of `uniq.awk'. (This is not a big surprise, since this is the
only call to `are_equal()' in the program, but in more complex
programs, knowing who called a function and with what parameters can be
the key to finding the source of the problem.)
@@ -19731,13 +20662,13 @@ function was called without arguments (*note Function Calls::).
A more useful variable to display might be the current record:
gawk> p $0
- -| $0 = string ("gawk is a wonderful program!")
+ -| $0 = "gawk is a wonderful program!"
This might be a bit puzzling at first since this is the second line of
our test input above. Let's look at `NR':
gawk> p NR
- -| NR = number (2)
+ -| NR = 2
So we can see that `are_equal()' was only called for the second record
of the file. Of course, this is because our program contains a rule for
@@ -19751,7 +20682,7 @@ of the file. Of course, this is because our program contains a rule for
OK, let's just check that that rule worked correctly:
gawk> p last
- -| last = string ("awk is a wonderful program!")
+ -| last = "awk is a wonderful program!"
Everything we have done so far has verified that the program has
worked as planned, up to and including the call to `are_equal()', so
@@ -19760,42 +20691,39 @@ must begin "stepping through" the lines of `are_equal()'. We start by
typing `n' (for "next"):
gawk> n
- -| 67 if (fcount > 0) {
+ -| 66 if (fcount > 0) {
- This tells us that `gawk' is now ready to execute line 67, which
+ This tells us that `gawk' is now ready to execute line 66, which
decides whether to give the lines the special "field skipping" treatment
-indicated by the `-f' command-line option. (Notice that we skipped
-from where we were before at line 64 to here, since the condition in
-line 64
-
- if (fcount == 0 && charcount == 0)
-
-was false.)
+indicated by the `-1' command-line option. (Notice that we skipped
+from where we were before at line 63 to here, since the condition in
+line 63 `if (fcount == 0 && charcount == 0)' was false.)
Continuing to step, we now get to the splitting of the current and
last records:
gawk> n
- -| 68 n = split(last, alast)
+ -| 67 n = split(last, alast)
gawk> n
- -| 69 m = split($0, aline)
+ -| 68 m = split($0, aline)
At this point, we should be curious to see what our records were
split into, so we try to look:
gawk> p n m alast aline
- -| n = number (5)
- -| m = number (5)
+ -| n = 5
+ -| m = untyped variable
-| alast = array, 5 elements
- -| aline = array, 5 elements
+ -| aline = untyped variable
(The `p' command can take more than one argument, similar to `awk''s
`print' statement.)
This is kind of disappointing, though. All we found out is that
-there are five elements in each of our arrays. Useful enough (we now
-know that none of the words were accidentally left out), but what if we
-want to see inside the array?
+there are five elements in `alast'; `m' and `aline' don't have values
+yet since we are at line 68 but haven't executed it yet. This
+information is useful enough (we now know that none of the words were
+accidentally left out), but what if we want to see inside the array?
The first choice would be to use subscripts:
@@ -19805,25 +20733,25 @@ want to see inside the array?
Oops!
gawk> p alast[1]
- -| alast["1"] = string ("awk")
+ -| alast["1"] = "awk"
This would be kind of slow for a 100-member array, though, so `gawk'
provides a shortcut (reminiscent of another language not to be
mentioned):
gawk> p @alast
- -| alast["1"] = string ("awk")
- -| alast["2"] = string ("is")
- -| alast["3"] = string ("a")
- -| alast["4"] = string ("wonderful")
- -| alast["5"] = string ("program!")
+ -| alast["1"] = "awk"
+ -| alast["2"] = "is"
+ -| alast["3"] = "a"
+ -| alast["4"] = "wonderful"
+ -| alast["5"] = "program!"
It looks like we got this far OK. Let's take another step or two:
gawk> n
- -| 70 clast = join(alast, fcount, n)
+ -| 69 clast = join(alast, fcount, n)
gawk> n
- -| 71 cline = join(aline, fcount, m)
+ -| 70 cline = join(aline, fcount, m)
Well, here we are at our error (sorry to spoil the suspense). What
we had in mind was to join the fields starting from the second one to
@@ -19831,8 +20759,8 @@ make the virtual record to compare, and if the first field was numbered
zero, this would work. Let's look at what we've got:
gawk> p cline clast
- -| cline = string ("gawk is a wonderful program!")
- -| clast = string ("awk is a wonderful program!")
+ -| cline = "gawk is a wonderful program!"
+ -| clast = "awk is a wonderful program!"
Hey, those look pretty familiar! They're just our original,
unaltered, input records. A little thinking (the human brain is still
@@ -19876,7 +20804,7 @@ following descriptions, commands which may be abbreviated show the
abbreviation on a second description line. A debugger command name may
also be truncated if that partial name is unambiguous. The debugger has
the built-in capability to automatically repeat the previous command
-when just hitting <Enter>. This works for the commands `list', `next',
+just by hitting <Enter>. This works for the commands `list', `next',
`nexti', `step', `stepi' and `continue' executed without any argument.
* Menu:
@@ -20136,7 +21064,7 @@ AWK STATEMENTS
`set' VAR`='VALUE
Assign a constant (number or string) value to an `awk' variable or
field. String values must be enclosed between double quotes
- (`"..."').
+ (`"'...`"').
You can also set special `awk' variables, such as `FS', `NF',
`NR', etc.
@@ -20190,11 +21118,12 @@ are:
`frame' [N]
`f' [N]
- Select and print (frame number, function and argument names,
- source file, and the source line) stack frame N. Frame 0 is the
- currently executing, or "innermost", frame (function call), frame
- 1 is the frame that called the innermost one. The highest numbered
- frame is the one for the main program.
+ Select and print stack frame N. Frame 0 is the currently
+ executing, or "innermost", frame (function call), frame 1 is the
+ frame that called the innermost one. The highest numbered frame is
+ the one for the main program. The printed information consists of
+ the frame number, function and argument names, source file, and
+ the source line.
`up' [COUNT]
Move COUNT (default 1) frames up the stack toward the outermost
@@ -20279,16 +21208,16 @@ from a file. The commands are:
`prompt'
The debugger prompt. The default is `gawk> '.
- `save_history [on | off]'
+ `save_history' [`on' | `off']
Save command history to file `./.gawk_history'. The default
is `on'.
- `save_options [on | off]'
+ `save_options' [`on' | `off']
Save current options to file `./.gawkrc' upon exit. The
default is `on'. Options are read back in to the next
session upon startup.
- `trace [on | off]'
+ `trace' [`on' | `off']
Turn instruction tracing on or off. The default is `off'.
`save' FILENAME
@@ -20327,7 +21256,7 @@ categories, as follows:
Program::) demonstrates:
gawk> dump
- -| # BEGIN
+ -| # BEGIN
-|
-| [ 1:0xfcd340] Op_rule : [in_rule = BEGIN] [source_file = brini.awk]
-| [ 1:0xfcc240] Op_push_i : "~" [MALLOC|STRING|STRCUR]
@@ -20417,7 +21346,7 @@ categories, as follows:
accidentally type `q' or `quit', to make sure you really want to
quit.
-`trace' `on' | `off'
+`trace' [`on' | `off']
Turn on or off a continuous printing of instructions which are
about to be executed, along with printing the `awk' line which they
implement. The default is `off'.
@@ -20433,9 +21362,10 @@ File: gawk.info, Node: Readline Support, Next: Limitations, Prev: List of Deb
14.4 Readline Support
=====================
-If `gawk' is compiled with the `readline' library, you can take
-advantage of that library's command completion and history expansion
-features. The following types of completion are available:
+If `gawk' is compiled with the `readline' library
+(http://cnswww.cns.cwru.edu/php/chet/readline/readline.html), you can
+take advantage of that library's command completion and history
+expansion features. The following types of completion are available:
Command completion
Command names.
@@ -20455,7 +21385,7 @@ Variable name completion

-File: gawk.info, Node: Limitations, Prev: Readline Support, Up: Debugger
+File: gawk.info, Node: Limitations, Next: Debugging Summary, Prev: Readline Support, Up: Debugger
14.5 Limitations and Future Plans
=================================
@@ -20470,19 +21400,18 @@ some limitations. A few which are worth being aware of are:
what your mistake was, though, you'll feel like a real guru.
* If you perused the dump of opcodes in *note Miscellaneous Debugger
- Commands::, (or if you are already familiar with `gawk' internals),
+ Commands:: (or if you are already familiar with `gawk' internals),
you will realize that much of the internal manipulation of data in
`gawk', as in many interpreters, is done on a stack. `Op_push',
`Op_pop', etc., are the "bread and butter" of most `gawk' code.
- Unfortunately, as of now, the `gawk' debugger does not allow you
- to examine the stack's contents.
- That is, the intermediate results of expression evaluation are on
- the stack, but cannot be printed. Rather, only variables which
- are defined in the program can be printed. Of course, a
- workaround for this is to use more explicit variables at the
- debugging stage and then change back to obscure, perhaps more
- optimal code later.
+ Unfortunately, as of now, the `gawk' debugger does not allow you
+ to examine the stack's contents. That is, the intermediate
+ results of expression evaluation are on the stack, but cannot be
+ printed. Rather, only variables which are defined in the program
+ can be printed. Of course, a workaround for this is to use more
+ explicit variables at the debugging stage and then change back to
+ obscure, perhaps more optimal code later.
* There is no way to look "inside" the process of compiling regular
expressions to see if you got it right. As an `awk' programmer,
@@ -20503,362 +21432,336 @@ features may be added, and of course feel free to try to add them
yourself!

-File: gawk.info, Node: Arbitrary Precision Arithmetic, Next: Dynamic Extensions, Prev: Debugger, Up: Top
+File: gawk.info, Node: Debugging Summary, Prev: Limitations, Up: Debugger
-15 Arithmetic and Arbitrary Precision Arithmetic with `gawk'
-************************************************************
+14.6 Summary
+============
- There's a credibility gap: We don't know how much of the
- computer's answers to believe. Novice computer users solve this
- problem by implicitly trusting in the computer as an infallible
- authority; they tend to believe that all digits of a printed
- answer are significant. Disillusioned computer users have just the
- opposite approach; they are constantly afraid that their answers
- are almost meaningless.(1) -- Donald Knuth
+ * Programs rarely work correctly the first time. Finding bugs is
+ "debugging" and a program that helps you find bugs is a
+ "debugger". `gawk' has a built-in debugger that works very
+ similarly to the GNU Debugger, GDB.
- This major node discusses issues that you may encounter when
-performing arithmetic. It begins by discussing some of the general
-attributes of computer arithmetic, along with how this can influence
-what you see when running `awk' programs. This discussion applies to
-all versions of `awk'.
+ * Debuggers let you step through your program one statement at a
+ time, examine and change variable and array values, and do a
+ number of other things that let you understand what your program
+ is actually doing (as opposed to what it is supposed to do).
- The major node then moves on to describe "arbitrary precision
-arithmetic", a feature which is specific to `gawk'.
-
-* Menu:
+ * Like most debuggers, the `gawk' debugger works in terms of stack
+ frames, and lets you set both breakpoints (stop at a point in the
+ code) and watchpoints (stop when a data value changes).
-* General Arithmetic:: An introduction to computer arithmetic.
-* Floating-point Programming:: Effective Floating-point Programming.
-* Gawk and MPFR:: How `gawk' provides
- arbitrary-precision arithmetic.
-* Arbitrary Precision Floats:: Arbitrary Precision Floating-point Arithmetic
- with `gawk'.
-* Arbitrary Precision Integers:: Arbitrary Precision Integer Arithmetic with
- `gawk'.
+ * The debugger command set is fairly complete, providing control over
+ breakpoints, execution, viewing and changing data, working with
+ the stack, getting information, and other tasks.
- ---------- Footnotes ----------
+ * If the `readline' library is available when `gawk' is compiled, it
+ is used by the debugger to provide command-line history and
+ editing.
- (1) Donald E. Knuth. `The Art of Computer Programming'. Volume 2,
-`Seminumerical Algorithms', third edition, 1998, ISBN 0-201-89683-4, p.
-229.

-File: gawk.info, Node: General Arithmetic, Next: Floating-point Programming, Up: Arbitrary Precision Arithmetic
+File: gawk.info, Node: Arbitrary Precision Arithmetic, Next: Dynamic Extensions, Prev: Debugger, Up: Top
-15.1 A General Description of Computer Arithmetic
-=================================================
+15 Arithmetic and Arbitrary Precision Arithmetic with `gawk'
+************************************************************
+
+This major node introduces some basic concepts relating to how
+computers do arithmetic and briefly lists the features in `gawk' for
+performing arbitrary precision floating point computations. It then
+proceeds to describe floating-point arithmetic, which is what `awk'
+uses for all its computations, including a discussion of arbitrary
+precision floating point arithmetic, which is a feature available only
+in `gawk'. It continues on to present arbitrary precision integers, and
+concludes with a description of some points where `gawk' and the POSIX
+standard are not quite in agreement.
-Within computers, there are two kinds of numeric values: "integers" and
-"floating-point". In school, integer values were referred to as
-"whole" numbers--that is, numbers without any fractional part, such as
-1, 42, or -17. The advantage to integer numbers is that they represent
-values exactly. The disadvantage is that their range is limited. On
-most systems, this range is -2,147,483,648 to 2,147,483,647. However,
-many systems now support a range from -9,223,372,036,854,775,808 to
-9,223,372,036,854,775,807.
-
- Integer values come in two flavors: "signed" and "unsigned". Signed
-values may be negative or positive, with the range of values just
-described. Unsigned values are always positive. On most systems, the
-range is from 0 to 4,294,967,295. However, many systems now support a
-range from 0 to 18,446,744,073,709,551,615.
-
- Floating-point numbers represent what are called "real" numbers;
-i.e., those that do have a fractional part, such as 3.1415927. The
-advantage to floating-point numbers is that they can represent a much
-larger range of values. The disadvantage is that there are numbers
-that they cannot represent exactly. `awk' uses "double precision"
-floating-point numbers, which can hold more digits than "single
-precision" floating-point numbers.
-
- There a several important issues to be aware of, described next.
+ NOTE: Most users of `gawk' can safely skip this chapter. But if
+ you want to do scientific calculations with `gawk', this is the
+ place to be.
* Menu:
-* Floating Point Issues:: Stuff to know about floating-point numbers.
-* Integer Programming:: Effective integer programming.
+* Computer Arithmetic:: A quick intro to computer math.
+* Math Definitions:: Defining terms used.
+* MPFR features:: The MPFR features in `gawk'.
+* FP Math Caution:: Things to know.
+* Arbitrary Precision Integers:: Arbitrary Precision Integer Arithmetic with
+ `gawk'.
+* POSIX Floating Point Problems:: Standards Versus Existing Practice.
+* Floating point summary:: Summary of floating point discussion.

-File: gawk.info, Node: Floating Point Issues, Next: Integer Programming, Up: General Arithmetic
+File: gawk.info, Node: Computer Arithmetic, Next: Math Definitions, Up: Arbitrary Precision Arithmetic
-15.1.1 Floating-Point Number Caveats
-------------------------------------
-
-This minor node describes some of the issues involved in using
-floating-point numbers.
+15.1 A General Description of Computer Arithmetic
+=================================================
- There is a very nice paper on floating-point arithmetic
-(http://www.validlab.com/goldberg/paper.pdf) by David Goldberg, "What
-Every Computer Scientist Should Know About Floating-point Arithmetic,"
-`ACM Computing Surveys' *23*, 1 (1991-03), 5-48. This is worth reading
-if you are interested in the details, but it does require a background
-in computer science.
+Until now, we have worked with data as either numbers or strings.
+Ultimately, however, computers represent everything in terms of "binary
+digits", or "bits". A decimal digit can take on any of 10 values: zero
+through nine. A binary digit can take on any of two values, zero or
+one. Using binary, computers (and computer software) can represent and
+manipulate numerical and character data. In general, the more bits you
+can use to represent a particular thing, the greater the range of
+possible values it can take on.
+
+ Modern computers support at least two, and often more, ways to do
+arithmetic. Each kind of arithmetic uses a different representation
+(organization of the bits) for the numbers. The kinds of arithmetic
+that interest us are:
+
+Decimal arithmetic
+ This is the kind of arithmetic you learned in elementary school,
+ using paper and pencil (and/or a calculator). In theory, numbers
+ can have an arbitrary number of digits on either side (or both
+ sides) of the decimal point, and the results of a computation are
+ always exact.
+
+ Some modern system can do decimal arithmetic in hardware, but
+ usually you need a special software library to provide access to
+ these instructions. There are also libraries that do decimal
+ arithmetic entirely in software.
+
+ Despite the fact that some users expect `gawk' to be performing
+ decimal arithmetic,(1) it does not do so.
+
+Integer arithmetic
+ In school, integer values were referred to as "whole" numbers--that
+ is, numbers without any fractional part, such as 1, 42, or -17.
+ The advantage to integer numbers is that they represent values
+ exactly. The disadvantage is that their range is limited.
+
+ In computers, integer values come in two flavors: "signed" and
+ "unsigned". Signed values may be negative or positive, whereas
+ unsigned values are always positive (that is, greater than or equal
+ to zero).
+
+ In computer systems, integer arithmetic is exact, but the possible
+ range of values is limited. Integer arithmetic is generally
+ faster than floating point arithmetic.
+
+Floating point arithmetic
+ Floating-point numbers represent what were called in school "real"
+ numbers; i.e., those that have a fractional part, such as
+ 3.1415927. The advantage to floating-point numbers is that they
+ can represent a much larger range of values than can integers.
+ The disadvantage is that there are numbers that they cannot
+ represent exactly.
+
+ Modern systems support floating point arithmetic in hardware, with
+ a limited range of values. There are software libraries that allow
+ the use of arbitrary precision floating point calculations.
+
+ POSIX `awk' uses "double precision" floating-point numbers, which
+ can hold more digits than "single precision" floating-point
+ numbers. `gawk' has facilities for performing arbitrary precision
+ floating point arithmetic, which we describe in more detail
+ shortly.
+
+ Computers work with integer and floating point values of different
+ranges. Integer values are usually either 32 or 64 bits in size. Single
+precision floating point values occupy 32 bits, whereas double precision
+floating point values occupy 64 bits. Floating point values are always
+signed. The possible ranges of values are shown in the following table.
+
+Numeric representation Miniumum value Maximum value
+---------------------------------------------------------------------------
+32-bit signed integer -2,147,483,648 2,147,483,647
+32-bit unsigned integer 0 4,294,967,295
+64-bit signed integer -9,223,372,036,854,775,8089,223,372,036,854,775,807
+64-bit unsigned integer 0 18,446,744,073,709,551,615
+Single precision `1.175494e-38' `3.402823e+38'
+floating point
+(approximate)
+Double precision `2.225074e-308' `1.797693e+308'
+floating point
+(approximate)
-* Menu:
+ ---------- Footnotes ----------
-* String Conversion Precision:: The String Value Can Lie.
-* Unexpected Results:: Floating Point Numbers Are Not Abstract
- Numbers.
-* POSIX Floating Point Problems:: Standards Versus Existing Practice.
+ (1) We don't know why they expect this, but they do.

-File: gawk.info, Node: String Conversion Precision, Next: Unexpected Results, Up: Floating Point Issues
+File: gawk.info, Node: Math Definitions, Next: MPFR features, Prev: Computer Arithmetic, Up: Arbitrary Precision Arithmetic
-15.1.1.1 The String Value Can Lie
-.................................
+15.2 Other Stuff To Know
+========================
-Internally, `awk' keeps both the numeric value (double precision
-floating-point) and the string value for a variable. Separately, `awk'
-keeps track of what type the variable has (*note Typing and
-Comparison::), which plays a role in how variables are used in
-comparisons.
+The rest of this major node uses a number of terms. Here are some
+informal definitions that should help you work your way through the
+material here.
- It is important to note that the string value for a number may not
-reflect the full value (all the digits) that the numeric value actually
-contains. The following program, `values.awk', illustrates this:
+"Accuracy"
+ A floating-point calculation's accuracy is how close it comes to
+ the real (paper and pencil) value.
- {
- sum = $1 + $2
- # see it for what it is
- printf("sum = %.12g\n", sum)
- # use CONVFMT
- a = "<" sum ">"
- print "a =", a
- # use OFMT
- print "sum =", sum
- }
+"Error"
+ The difference between what the result of a computation "should be"
+ and what it actually is. It is best to minimize error as much as
+ possible.
-This program shows the full value of the sum of `$1' and `$2' using
-`printf', and then prints the string values obtained from both
-automatic conversion (via `CONVFMT') and from printing (via `OFMT').
+"Exponent"
+ The order of magnitude of a value; some number of bits in a
+ floating-point value store the exponent.
- Here is what happens when the program is run:
+"Inf"
+ A special value representing infinity. Operations involving another
+ number and infinity produce infinity.
- $ echo 3.654321 1.2345678 | awk -f values.awk
- -| sum = 4.8888888
- -| a = <4.88889>
- -| sum = 4.88889
+"NaN"
+ "Not A Number."(1). A special value that results from attempting a
+ calculation that has no answer as a real number. In such a case,
+ programs can either receive a floating-point exception, or get
+ `NaN' back as the result. The IEEE 754 standard recommends that
+ systems return `NaN'. Some examples:
- This makes it clear that the full numeric value is different from
-what the default string representations show.
+ `sqrt(-1)'
+ This makes sense in the range of complex numbers, but not in
+ the range of real numbers, so the result is `NaN'.
- `CONVFMT''s default value is `"%.6g"', which yields a value with at
-most six significant digits. For some applications, you might want to
-change it to specify more precision. On most modern machines, most of
-the time, 17 digits is enough to capture a floating-point number's
-value exactly.(1)
+ `log(-8)'
+ -8 is out of the domain of `log()', so the result is `NaN'.
- ---------- Footnotes ----------
+"Normalized"
+ How the significand (see later in this list) is usually stored. The
+ value is adjusted so that the first bit is one, and then that
+ leading one is assumed instead of physically stored. This
+ provides one extra bit of precision.
- (1) Pathological cases can require up to 752 digits (!), but we
-doubt that you need to worry about this.
+"Precision"
+ The number of bits used to represent a floating-point number. The
+ more bits, the more digits you can represent. Binary and decimal
+ precisions are related approximately, according to the formula:
-
-File: gawk.info, Node: Unexpected Results, Next: POSIX Floating Point Problems, Prev: String Conversion Precision, Up: Floating Point Issues
+ PREC = 3.322 * DPS
-15.1.1.2 Floating Point Numbers Are Not Abstract Numbers
-........................................................
+ Here, PREC denotes the binary precision (measured in bits) and DPS
+ (short for decimal places) is the decimal digits.
-Unlike numbers in the abstract sense (such as what you studied in high
-school or college arithmetic), numbers stored in computers are limited
-in certain ways. They cannot represent an infinite number of digits,
-nor can they always represent things exactly. In particular,
-floating-point numbers cannot always represent values exactly. Here is
-an example:
+"Rounding mode"
+ How numbers are rounded up or down when necessary. More details
+ are provided later.
- $ awk '{ printf("%010d\n", $1 * 100) }'
- 515.79
- -| 0000051579
- 515.80
- -| 0000051579
- 515.81
- -| 0000051580
- 515.82
- -| 0000051582
- Ctrl-d
+"Significand"
+ A floating point value consists the significand multiplied by 10
+ to the power of the exponent. For example, in `1.2345e67', the
+ significand is `1.2345'.
-This shows that some values can be represented exactly, whereas others
-are only approximated. This is not a "bug" in `awk', but simply an
-artifact of how computers represent numbers.
+"Stability"
+ From the Wikipedia article on numerical stability
+ (http://en.wikipedia.org/wiki/Numerical_stability): "Calculations
+ that can be proven not to magnify approximation errors are called
+ "numerically stable"."
- NOTE: It cannot be emphasized enough that the behavior just
- described is fundamental to modern computers. You will see this
- kind of thing happen in _any_ programming language using hardware
- floating-point numbers. It is _not_ a bug in `gawk', nor is it
- something that can be "just fixed."
+ See the Wikipedia article on accuracy and precision
+(http://en.wikipedia.org/wiki/Accuracy_and_precision) for more
+information on some of those terms.
- Another peculiarity of floating-point numbers on modern systems is
-that they often have more than one representation for the number zero!
-In particular, it is possible to represent "minus zero" as well as
-regular, or "positive" zero.
+ On modern systems, floating-point hardware uses the representation
+and operations defined by the IEEE 754 standard. Three of the standard
+IEEE 754 types are 32-bit single precision, 64-bit double precision and
+128-bit quadruple precision. The standard also specifies extended
+precision formats to allow greater precisions and larger exponent
+ranges. (`awk' uses only the 64-bit double precision format.)
- This example shows that negative and positive zero are distinct
-values when stored internally, but that they are in fact equal to each
-other, as well as to "regular" zero:
-
- $ gawk 'BEGIN { mz = -0 ; pz = 0
- > printf "-0 = %g, +0 = %g, (-0 == +0) -> %d\n", mz, pz, mz == pz
- > printf "mz == 0 -> %d, pz == 0 -> %d\n", mz == 0, pz == 0
- > }'
- -| -0 = -0, +0 = 0, (-0 == +0) -> 1
- -| mz == 0 -> 1, pz == 0 -> 1
+ *note table-ieee-formats:: lists the precision and exponent field
+values for the basic IEEE 754 binary formats:
- It helps to keep this in mind should you process numeric data that
-contains negative zero values; the fact that the zero is negative is
-noted and can affect comparisons.
+Name Total bits Precision emin emax
+---------------------------------------------------------------------------
+Single 32 24 -126 +127
+Double 64 53 -1022 +1023
+Quadruple 128 113 -16382 +16383
-
-File: gawk.info, Node: POSIX Floating Point Problems, Prev: Unexpected Results, Up: Floating Point Issues
+Table 15.1: Basic IEEE Format Context Values
-15.1.1.3 Standards Versus Existing Practice
-...........................................
+ NOTE: The precision numbers include the implied leading one that
+ gives them one extra bit of significand.
-Historically, `awk' has converted any non-numeric looking string to the
-numeric value zero, when required. Furthermore, the original
-definition of the language and the original POSIX standards specified
-that `awk' only understands decimal numbers (base 10), and not octal
-(base 8) or hexadecimal numbers (base 16).
+ ---------- Footnotes ----------
- Changes in the language of the 2001 and 2004 POSIX standards can be
-interpreted to imply that `awk' should support additional features.
-These features are:
+ (1) Thanks to Michael Brennan for this description, which I have
+paraphrased, and for the examples
- * Interpretation of floating point data values specified in
- hexadecimal notation (`0xDEADBEEF'). (Note: data values, _not_
- source code constants.)
+
+File: gawk.info, Node: MPFR features, Next: FP Math Caution, Prev: Math Definitions, Up: Arbitrary Precision Arithmetic
- * Support for the special IEEE 754 floating point values "Not A
- Number" (NaN), positive Infinity ("inf") and negative Infinity
- ("-inf"). In particular, the format for these values is as
- specified by the ISO 1999 C standard, which ignores case and can
- allow machine-dependent additional characters after the `nan' and
- allow either `inf' or `infinity'.
+15.3 Arbitrary Precison Arithmetic Features In `gawk'
+=====================================================
- The first problem is that both of these are clear changes to
-historical practice:
+By default, `gawk' uses the double precision floating point values
+supplied by the hardware of the system it runs on. However, if it was
+compiled to do, `gawk' uses the GNU MPFR (http://www.mpfr.org) and GNU
+MP (http://gmplib.org) (GMP) libraries for arbitrary precision
+arithmetic on numbers. You can see if MPFR support is available like
+so:
- * The `gawk' maintainer feels that supporting hexadecimal floating
- point values, in particular, is ugly, and was never intended by the
- original designers to be part of the language.
+ $ gawk --version
+ -| GNU Awk 4.1.1, API: 1.1 (GNU MPFR 3.1.0-p3, GNU MP 5.0.2)
+ -| Copyright (C) 1989, 1991-2014 Free Software Foundation.
+ ...
- * Allowing completely alphabetic strings to have valid numeric
- values is also a very severe departure from historical practice.
+(You may see different version numbers than what's shown here. That's
+OK; what's important is to see that GNU MPFR and GNU MP are listed in
+the output.)
- The second problem is that the `gawk' maintainer feels that this
-interpretation of the standard, which requires a certain amount of
-"language lawyering" to arrive at in the first place, was not even
-intended by the standard developers. In other words, "we see how you
-got where you are, but we don't think that that's where you want to be."
+ Additionally, there are a few elements available in the `PROCINFO'
+array to provide information about the MPFR and GMP libraries (*note
+Auto-set::).
+
+ The MPFR library provides precise control over precisions and
+rounding modes, and gives correctly rounded, reproducible,
+platform-independent results. With the `-M' command-line option, all
+floating-point arithmetic operators and numeric functions can yield
+results to any desired precision level supported by MPFR.
+
+ Two built-in variables, `PREC' and `ROUNDMODE', provide control over
+the working precision and the rounding mode. The precision and the
+rounding mode are set globally for every operation to follow. *Note
+Auto-set::, for more information.
- Recognizing the above issues, but attempting to provide compatibility
-with the earlier versions of the standard, the 2008 POSIX standard
-added explicit wording to allow, but not require, that `awk' support
-hexadecimal floating point values and special values for "Not A Number"
-and infinity.
+
+File: gawk.info, Node: FP Math Caution, Next: Arbitrary Precision Integers, Prev: MPFR features, Up: Arbitrary Precision Arithmetic
- Although the `gawk' maintainer continues to feel that providing
-those features is inadvisable, nevertheless, on systems that support
-IEEE floating point, it seems reasonable to provide _some_ way to
-support NaN and Infinity values. The solution implemented in `gawk' is
-as follows:
+15.4 Floating Point Arithmetic: Caveat Emptor!
+==============================================
- * With the `--posix' command-line option, `gawk' becomes "hands
- off." String values are passed directly to the system library's
- `strtod()' function, and if it successfully returns a numeric
- value, that is what's used.(1) By definition, the results are not
- portable across different systems. They are also a little
- surprising:
+ Math class is tough! -- Teen Talk Barbie, July 1992
- $ echo nanny | gawk --posix '{ print $1 + 0 }'
- -| nan
- $ echo 0xDeadBeef | gawk --posix '{ print $1 + 0 }'
- -| 3735928559
+ This minor node provides a high level overview of the issues
+involved when doing lots of floating-point arithmetic.(1) The
+discussion applies to both hardware and arbitrary-precision
+floating-point arithmetic.
- * Without `--posix', `gawk' interprets the four strings `+inf',
- `-inf', `+nan', and `-nan' specially, producing the corresponding
- special numeric values. The leading sign acts a signal to `gawk'
- (and the user) that the value is really numeric. Hexadecimal
- floating point is not supported (unless you also use
- `--non-decimal-data', which is _not_ recommended). For example:
+ CAUTION: The material here is purposely general. If you need to do
+ serious computer arithmetic, you should do some research first,
+ and not rely just on what we tell you.
- $ echo nanny | gawk '{ print $1 + 0 }'
- -| 0
- $ echo +nan | gawk '{ print $1 + 0 }'
- -| nan
- $ echo 0xDeadBeef | gawk '{ print $1 + 0 }'
- -| 0
+* Menu:
- `gawk' does ignore case in the four special values. Thus `+nan'
- and `+NaN' are the same.
+* Inexactness of computations:: Floating point math is not exact.
+* Getting Accuracy:: Getting more accuracy takes some work.
+* Try To Round:: Add digits and round.
+* Setting precision:: How to set the precision.
+* Setting the rounding mode:: How to set the rounding mode.
---------- Footnotes ----------
- (1) You asked for it, you got it.
+ (1) There is a very nice paper on floating-point arithmetic
+(http://www.validlab.com/goldberg/paper.pdf) by David Goldberg, "What
+Every Computer Scientist Should Know About Floating-point Arithmetic,"
+`ACM Computing Surveys' *23*, 1 (1991-03), 5-48. This is worth reading
+if you are interested in the details, but it does require a background
+in computer science.

-File: gawk.info, Node: Integer Programming, Prev: Floating Point Issues, Up: General Arithmetic
-
-15.1.2 Mixing Integers And Floating-point
------------------------------------------
+File: gawk.info, Node: Inexactness of computations, Next: Getting Accuracy, Up: FP Math Caution
-As has been mentioned already, `awk' uses hardware double precision
-with 64-bit IEEE binary floating-point representation for numbers on
-most systems. A large integer like 9,007,199,254,740,997 has a binary
-representation that, although finite, is more than 53 bits long; it
-must also be rounded to 53 bits. The biggest integer that can be
-stored in a C `double' is usually the same as the largest possible
-value of a `double'. If your system `double' is an IEEE 64-bit
-`double', this largest possible value is an integer and can be
-represented precisely. What more should one know about integers?
-
- If you want to know what is the largest integer, such that it and
-all smaller integers can be stored in 64-bit doubles without losing
-precision, then the answer is 2^53. The next representable number is
-the even number 2^53 + 2, meaning it is unlikely that you will be able
-to make `gawk' print 2^53 + 1 in integer format. The range of integers
-exactly representable by a 64-bit double is [-2^53, 2^53]. If you ever
-see an integer outside this range in `awk' using 64-bit doubles, you
-have reason to be very suspicious about the accuracy of the output.
-Here is a simple program with erroneous output:
-
- $ gawk 'BEGIN { i = 2^53 - 1; for (j = 0; j < 4; j++) print i + j }'
- -| 9007199254740991
- -| 9007199254740992
- -| 9007199254740992
- -| 9007199254740994
-
- The lesson is to not assume that any large integer printed by `awk'
-represents an exact result from your computation, especially if it wraps
-around on your screen.
-
-
-File: gawk.info, Node: Floating-point Programming, Next: Gawk and MPFR, Prev: General Arithmetic, Up: Arbitrary Precision Arithmetic
-
-15.2 Understanding Floating-point Programming
-=============================================
+15.4.1 Floating Point Arithmetic Is Not Exact
+---------------------------------------------
-Numerical programming is an extensive area; if you need to develop
-sophisticated numerical algorithms then `gawk' may not be the ideal
-tool, and this documentation may not be sufficient. It might require
-digesting a book or two(1) to really internalize how to compute with
-ideal accuracy and precision, and the result often depends on the
-particular application.
-
- NOTE: A floating-point calculation's "accuracy" is how close it
- comes to the real value. This is as opposed to the "precision",
- which usually refers to the number of bits used to represent the
- number (see the Wikipedia article
- (http://en.wikipedia.org/wiki/Accuracy_and_precision) for more
- information).
-
- There are two options for doing floating-point calculations:
-hardware floating-point (as used by standard `awk' and the default for
-`gawk'), and "arbitrary-precision" floating-point, which is software
-based. From this point forward, this major node aims to provide enough
-information to understand both, and then will focus on `gawk''s
-facilities for the latter.(2)
-
- Binary floating-point representations and arithmetic are inexact.
+Binary floating-point representations and arithmetic are inexact.
Simple values like 0.1 cannot be precisely represented using binary
floating-point numbers, and the limited precision of floating-point
numbers means that slight changes in the order of operations or the
@@ -20867,9 +21770,21 @@ matters worse, with arbitrary precision floating-point, you can set the
precision before starting a computation, but then you cannot be sure of
the number of significant decimal places in the final result.
- Sometimes, before you start to write any code, you should think more
-about what you really want and what's really happening. Consider the
-two numbers in the following example:
+* Menu:
+
+* Inexact representation:: Numbers are not exactly represented.
+* Comparing FP Values:: How to compare floating point values.
+* Errors accumulate:: Errors get bigger as they go.
+
+
+File: gawk.info, Node: Inexact representation, Next: Comparing FP Values, Up: Inexactness of computations
+
+15.4.1.1 Many Numbers Cannot Be Represented Exactly
+...................................................
+
+So, before you start to write any code, you should think about what you
+really want and what's really happening. Consider the two numbers in
+the following example:
x = 0.875 # 1/2 + 1/4 + 1/8
y = 0.425
@@ -20892,20 +21807,44 @@ you can always specify how much precision you would like in your output.
Usually this is a format string like `"%.15g"', which when used in the
previous example, produces an output identical to the input.
- Because the underlying representation can be a little bit off from
-the exact value, comparing floating-point values to see if they are
-equal is generally not a good idea. Here is an example where it does
-not work like you expect:
+
+File: gawk.info, Node: Comparing FP Values, Next: Errors accumulate, Prev: Inexact representation, Up: Inexactness of computations
+
+15.4.1.2 Be Careful Comparing Values
+....................................
+
+Because the underlying representation can be a little bit off from the
+exact value, comparing floating-point values to see if they are exactly
+equal is generally a bad idea. Here is an example where it does not
+work like you would expect:
$ gawk 'BEGIN { print (0.1 + 12.2 == 12.3) }'
-| 0
- The loss of accuracy during a single computation with floating-point
+ The general wisdom when comparing floating-point values is to see if
+they are within some small range of each other (called a "delta", or
+"tolerance"). You have to decide how small a delta is important to
+you. Code to do this looks something like this:
+
+ delta = 0.00001 # for example
+ difference = abs(a) - abs(b) # subtract the two values
+ if (difference < delta)
+ # all ok
+ else
+ # not ok
+
+
+File: gawk.info, Node: Errors accumulate, Prev: Comparing FP Values, Up: Inexactness of computations
+
+15.4.1.3 Errors Accumulate
+..........................
+
+The loss of accuracy during a single computation with floating-point
numbers usually isn't enough to worry about. However, if you compute a
value which is the result of a sequence of floating point operations,
the error can accumulate and greatly affect the computation itself.
-Here is an attempt to compute the value of the constant pi using one of
-its many series representations:
+Here is an attempt to compute the value of pi using one of its many
+series representations:
BEGIN {
x = 1.0 / sqrt(3.0)
@@ -20917,9 +21856,9 @@ its many series representations:
}
}
- When run, the early errors propagating through later computations
-cause the loop to terminate prematurely after an attempt to divide by
-zero.
+ When run, the early errors propagate through later computations,
+causing the loop to terminate prematurely after attempting to divide by
+zero:
$ gawk -f pi.awk
-| 3.215390309173475
@@ -20942,166 +21881,176 @@ representations yield an unexpected result:
> }'
-| 4
- Can computation using arbitrary precision help with the previous
-examples? If you are impatient to know, see *note Exact Arithmetic::.
+
+File: gawk.info, Node: Getting Accuracy, Next: Try To Round, Prev: Inexactness of computations, Up: FP Math Caution
- Instead of arbitrary precision floating-point arithmetic, often all
-you need is an adjustment of your logic or a different order for the
-operations in your calculation. The stability and the accuracy of the
-computation of the constant pi in the earlier example can be enhanced
-by using the following simple algebraic transformation:
+15.4.2 Getting The Accuracy You Need
+------------------------------------
- (sqrt(x * x + 1) - 1) / x = x / (sqrt(x * x + 1) + 1)
+Can arbitrary precision arithmetic give exact results? There are no
+easy answers. The standard rules of algebra often do not apply when
+using floating-point arithmetic. Among other things, the distributive
+and associative laws do not hold completely, and order of operation may
+be important for your computation. Rounding error, cumulative precision
+loss and underflow are often troublesome.
-After making this, change the program does converge to pi in under 30
-iterations:
+ When `gawk' tests the expressions `0.1 + 12.2' and `12.3' for
+equality using the machine double precision arithmetic, it decides that
+they are not equal! (*Note Comparing FP Values::.) You can get the
+result you want by increasing the precision; 56 bits in this case does
+the job:
- $ gawk -f pi2.awk
- -| 3.215390309173473
- -| 3.159659942097501
- -| 3.146086215131436
- -| 3.142714599645370
- -| 3.141873049979825
- ...
- -| 3.141592653589797
- -| 3.141592653589797
+ $ gawk -M -v PREC=56 'BEGIN { print (0.1 + 12.2 == 12.3) }'
+ -| 1
- There is no need to be unduly suspicious about the results from
-floating-point arithmetic. The lesson to remember is that
-floating-point arithmetic is always more complex than arithmetic using
-pencil and paper. In order to take advantage of the power of computer
-floating-point, you need to know its limitations and work within them.
-For most casual use of floating-point arithmetic, you will often get
-the expected result in the end if you simply round the display of your
-final results to the correct number of significant decimal digits.
+ If adding more bits is good, perhaps adding even more bits of
+precision is better? Here is what happens if we use an even larger
+value of `PREC':
- As general advice, avoid presenting numerical data in a manner that
-implies better precision than is actually the case.
+ $ gawk -M -v PREC=201 'BEGIN { print (0.1 + 12.2 == 12.3) }'
+ -| 0
-* Menu:
+ This is not a bug in `gawk' or in the MPFR library. It is easy to
+forget that the finite number of bits used to store the value is often
+just an approximation after proper rounding. The test for equality
+succeeds if and only if _all_ bits in the two operands are exactly the
+same. Since this is not necessarily true after floating-point
+computations with a particular precision and effective rounding rule, a
+straight test for equality may not work. Instead, compare the two
+numbers to see if they are within the desirable delta of each other.
-* Floating-point Representation:: Binary floating-point representation.
-* Floating-point Context:: Floating-point context.
-* Rounding Mode:: Floating-point rounding mode.
+ In applications where 15 or fewer decimal places suffice, hardware
+double precision arithmetic can be adequate, and is usually much faster.
+But you need to keep in mind that every floating-point operation can
+suffer a new rounding error with catastrophic consequences as
+illustrated by our earlier attempt to compute the value of pi. Extra
+precision can greatly enhance the stability and the accuracy of your
+computation in such cases.
- ---------- Footnotes ----------
+ Repeated addition is not necessarily equivalent to multiplication in
+floating-point arithmetic. In the example in *note Errors accumulate:::
- (1) One recommended title is `Numerical Computing with IEEE Floating
-Point Arithmetic', Michael L. Overton, Society for Industrial and
-Applied Mathematics, 2004. ISBN: 0-89871-482-6, ISBN-13:
-978-0-89871-482-1. See `http://www.cs.nyu.edu/cs/faculty/overton/book'.
+ $ gawk 'BEGIN {
+ > for (d = 1.1; d <= 1.5; d += 0.1) # loop five times (?)
+ > i++
+ > print i
+ > }'
+ -| 4
- (2) If you are interested in other tools that perform arbitrary
-precision arithmetic, you may want to investigate the POSIX `bc' tool.
-See the POSIX specification for it
-(http://pubs.opengroup.org/onlinepubs/009695399/utilities/bc.html), for
-more information.
+you may or may not succeed in getting the correct result by choosing an
+arbitrarily large value for `PREC'. Reformulation of the problem at
+hand is often the correct approach in such situations.

-File: gawk.info, Node: Floating-point Representation, Next: Floating-point Context, Up: Floating-point Programming
-
-15.2.1 Binary Floating-point Representation
--------------------------------------------
-
-Although floating-point representations vary from machine to machine,
-the most commonly encountered representation is that defined by the
-IEEE 754 Standard. An IEEE-754 format value has three components:
+File: gawk.info, Node: Try To Round, Next: Setting precision, Prev: Getting Accuracy, Up: FP Math Caution
- * A sign bit telling whether the number is positive or negative.
+15.4.3 Try A Few Extra Bits of Precision and Rounding
+-----------------------------------------------------
- * An "exponent", E, giving its order of magnitude.
+Instead of arbitrary precision floating-point arithmetic, often all you
+need is an adjustment of your logic or a different order for the
+operations in your calculation. The stability and the accuracy of the
+computation of pi in the earlier example can be enhanced by using the
+following simple algebraic transformation:
- * A "significand", S, specifying the actual digits of the number.
+ (sqrt(x * x + 1) - 1) / x == x / (sqrt(x * x + 1) + 1)
- The value of the number is then S * 2^E. The first bit of a
-non-zero binary significand is always one, so the significand in an
-IEEE-754 format only includes the fractional part, leaving the leading
-one implicit. The significand is stored in "normalized" format, which
-means that the first bit is always a one.
+After making this, change the program converges to pi in under 30
+iterations:
- Three of the standard IEEE-754 types are 32-bit single precision,
-64-bit double precision and 128-bit quadruple precision. The standard
-also specifies extended precision formats to allow greater precisions
-and larger exponent ranges.
+ $ gawk -f pi2.awk
+ -| 3.215390309173473
+ -| 3.159659942097501
+ -| 3.146086215131436
+ -| 3.142714599645370
+ -| 3.141873049979825
+ ...
+ -| 3.141592653589797
+ -| 3.141592653589797

-File: gawk.info, Node: Floating-point Context, Next: Rounding Mode, Prev: Floating-point Representation, Up: Floating-point Programming
-
-15.2.2 Floating-point Context
------------------------------
-
-A floating-point "context" defines the environment for arithmetic
-operations. It governs precision, sets rules for rounding, and limits
-the range for exponents. The context has the following primary
-components:
-
-"Precision"
- Precision of the floating-point format in bits.
-
-"emax"
- Maximum exponent allowed for the format.
-
-"emin"
- Minimum exponent allowed for the format.
+File: gawk.info, Node: Setting precision, Next: Setting the rounding mode, Prev: Try To Round, Up: FP Math Caution
-"Underflow behavior"
- The format may or may not support gradual underflow.
-
-"Rounding"
- The rounding mode of the context.
+15.4.4 Setting The Precision
+----------------------------
- *note table-ieee-formats:: lists the precision and exponent field
-values for the basic IEEE-754 binary formats:
+`gawk' uses a global working precision; it does not keep track of the
+precision or accuracy of individual numbers. Performing an arithmetic
+operation or calling a built-in function rounds the result to the
+current working precision. The default working precision is 53 bits,
+which you can modify using the built-in variable `PREC'. You can also
+set the value to one of the predefined case-insensitive strings shown
+in *note table-predefined-precision-strings::, to emulate an IEEE 754
+binary format.
-Name Total bits Precision emin emax
----------------------------------------------------------------------------
-Single 32 24 -126 +127
-Double 64 53 -1022 +1023
-Quadruple 128 113 -16382 +16383
+`PREC' IEEE 754 Binary Format
+---------------------------------------------------
+`"half"' 16-bit half-precision.
+`"single"' Basic 32-bit single precision.
+`"double"' Basic 64-bit double precision.
+`"quad"' Basic 128-bit quadruple precision.
+`"oct"' 256-bit octuple precision.
-Table 15.1: Basic IEEE Format Context Values
+Table 15.2: Predefined Precision Strings For `PREC'
- NOTE: The precision numbers include the implied leading one that
- gives them one extra bit of significand.
+ The following example illustrates the effects of changing precision
+on arithmetic operations:
- A floating-point context can also determine which signals are treated
-as exceptions, and can set rules for arithmetic with special values.
-Please consult the IEEE-754 standard or other resources for details.
+ $ gawk -M -v PREC=100 'BEGIN { x = 1.0e-400; print x + 0
+ > PREC = "double"; print x + 0 }'
+ -| 1e-400
+ -| 0
- `gawk' ordinarily uses the hardware double precision representation
-for numbers. On most systems, this is IEEE-754 floating-point format,
-corresponding to 64-bit binary with 53 bits of precision.
+ CAUTION: Be wary of floating-point constants! When reading a
+ floating-point constant from program source code, `gawk' uses the
+ default precision (that of a C `double'), unless overridden by an
+ assignment to the special variable `PREC' on the command line, to
+ store it internally as a MPFR number. Changing the precision
+ using `PREC' in the program text does _not_ change the precision
+ of a constant.
+
+ If you need to represent a floating-point constant at a higher
+ precision than the default and cannot use a command-line
+ assignment to `PREC', you should either specify the constant as a
+ string, or as a rational number, whenever possible. The following
+ example illustrates the differences among various ways to print a
+ floating-point constant:
- NOTE: In case an underflow occurs, the standard allows, but does
- not require, the result from an arithmetic operation to be a
- number smaller than the smallest nonzero normalized number. Such
- numbers do not have as many significant digits as normal numbers,
- and are called "denormals" or "subnormals". The alternative,
- simply returning a zero, is called "flush to zero". The basic
- IEEE-754 binary formats support subnormal numbers.
+ $ gawk -M 'BEGIN { PREC = 113; printf("%0.25f\n", 0.1) }'
+ -| 0.1000000000000000055511151
+ $ gawk -M -v PREC=113 'BEGIN { printf("%0.25f\n", 0.1) }'
+ -| 0.1000000000000000000000000
+ $ gawk -M 'BEGIN { PREC = 113; printf("%0.25f\n", "0.1") }'
+ -| 0.1000000000000000000000000
+ $ gawk -M 'BEGIN { PREC = 113; printf("%0.25f\n", 1/10) }'
+ -| 0.1000000000000000000000000

-File: gawk.info, Node: Rounding Mode, Prev: Floating-point Context, Up: Floating-point Programming
+File: gawk.info, Node: Setting the rounding mode, Prev: Setting precision, Up: FP Math Caution
-15.2.3 Floating-point Rounding Mode
------------------------------------
+15.4.5 Setting The Rounding Mode
+--------------------------------
-The "rounding mode" specifies the behavior for the results of numerical
-operations when discarding extra precision. Each rounding mode indicates
-how the least significant returned digit of a rounded result is to be
-calculated. *note table-rounding-modes:: lists the IEEE-754 defined
-rounding modes:
+The `ROUNDMODE' variable provides program level control over the
+rounding mode. The correspondence between `ROUNDMODE' and the IEEE
+rounding modes is shown in *note table-gawk-rounding-modes::.
-Rounding Mode IEEE Name
---------------------------------------------------------------------------
-Round to nearest, ties to even `roundTiesToEven'
-Round toward plus Infinity `roundTowardPositive'
-Round toward negative Infinity `roundTowardNegative'
-Round toward zero `roundTowardZero'
-Round to nearest, ties away `roundTiesToAway'
-from zero
+Rounding Mode IEEE Name `ROUNDMODE'
+---------------------------------------------------------------------------
+Round to nearest, ties to even `roundTiesToEven' `"N"' or `"n"'
+Round toward plus Infinity `roundTowardPositive' `"U"' or `"u"'
+Round toward negative Infinity `roundTowardNegative' `"D"' or `"d"'
+Round toward zero `roundTowardZero' `"Z"' or `"z"'
+Round to nearest, ties away `roundTiesToAway' `"A"' or `"a"'
+from zero
+
+Table 15.3: `gawk' Rounding Modes
-Table 15.2: IEEE 754 Rounding Modes
+ `ROUNDMODE' has the default value `"N"', which selects the IEEE 754
+rounding mode `roundTiesToEven'. In *note Table 15.3:
+table-gawk-rounding-modes, the value `"A"' selects `roundTiesToAway'.
+This is only available if your version of the MPFR library supports it;
+otherwise setting `ROUNDMODE' to `"A"' has no effect.
The default mode `roundTiesToEven' is the most preferred, but the
least intuitive. This method does the obvious thing for most values, by
@@ -21136,20 +22085,19 @@ produces the following output when run on the author's system:(1)
3.5 => 4
4.5 => 4
- The theory behind the rounding mode `roundTiesToEven' is that it
-more or less evenly distributes upward and downward rounds of exact
-halves, which might cause any round-off error to cancel itself out.
-This is the default rounding mode used in IEEE-754 computing functions
-and operators.
+ The theory behind `roundTiesToEven' is that it more or less evenly
+distributes upward and downward rounds of exact halves, which might
+cause any accumulating round-off error to cancel itself out. This is the
+default rounding mode for IEEE 754 computing functions and operators.
The other rounding modes are rarely used. Round toward positive
infinity (`roundTowardPositive') and round toward negative infinity
-(`roundTowardNegative') are often used to implement interval arithmetic,
-where you adjust the rounding mode to calculate upper and lower bounds
-for the range of output. The `roundTowardZero' mode can be used for
-converting floating-point numbers to integers. The rounding mode
-`roundTiesToAway' rounds the result to the nearest number and selects
-the number with the larger magnitude if a tie occurs.
+(`roundTowardNegative') are often used to implement interval
+arithmetic, where you adjust the rounding mode to calculate upper and
+lower bounds for the range of output. The `roundTowardZero' mode can be
+used for converting floating-point numbers to integers. The rounding
+mode `roundTiesToAway' rounds the result to the nearest number and
+selects the number with the larger magnitude if a tie occurs.
Some numerical analysts will tell you that your choice of rounding
style has tremendous impact on the final outcome, and advise you to
@@ -21158,418 +22106,255 @@ round-off error problems by setting the precision initially to some
value sufficiently larger than the final desired precision, so that the
accumulation of round-off error does not influence the outcome. If you
suspect that results from your computation are sensitive to
-accumulation of round-off error, one way to be sure is to look for a
-significant difference in output when you change the rounding mode.
+accumulation of round-off error, look for a significant difference in
+output when you change the rounding mode to be sure.
---------- Footnotes ----------
(1) It is possible for the output to be completely different if the
-C library in your system does not use the IEEE-754 even-rounding rule
+C library in your system does not use the IEEE 754 even-rounding rule
to round halfway cases for `printf'.

-File: gawk.info, Node: Gawk and MPFR, Next: Arbitrary Precision Floats, Prev: Floating-point Programming, Up: Arbitrary Precision Arithmetic
+File: gawk.info, Node: Arbitrary Precision Integers, Next: POSIX Floating Point Problems, Prev: FP Math Caution, Up: Arbitrary Precision Arithmetic
-15.3 `gawk' + MPFR = Powerful Arithmetic
-========================================
-
-The rest of this major node describes how to use the arbitrary precision
-(also known as "multiple precision" or "infinite precision") numeric
-capabilities in `gawk' to produce maximally accurate results when you
-need it.
+15.5 Arbitrary Precision Integer Arithmetic with `gawk'
+=======================================================
- But first you should check if your version of `gawk' supports
-arbitrary precision arithmetic. The easiest way to find out is to look
-at the output of the following command:
+When given the `-M' option, `gawk' performs all integer arithmetic
+using GMP arbitrary precision integers. Any number that looks like an
+integer in a source or data file is stored as an arbitrary precision
+integer. The size of the integer is limited only by the available
+memory. For example, the following computes 5^4^3^2, the result of
+which is beyond the limits of ordinary hardware double-precision
+floating point values:
- $ ./gawk --version
- -| GNU Awk 4.1.1, API: 1.1 (GNU MPFR 3.1.0-p3, GNU MP 5.0.2)
- -| Copyright (C) 1989, 1991-2014 Free Software Foundation.
- ...
+ $ gawk -M 'BEGIN {
+ > x = 5^4^3^2
+ > print "# of digits =", length(x)
+ > print substr(x, 1, 20), "...", substr(x, length(x) - 19, 20)
+ > }'
+ -| # of digits = 183231
+ -| 62060698786608744707 ... 92256259918212890625
-(You may see different version numbers than what's shown here. That's
-OK; what's important is to see that GNU MPFR and GNU MP are listed in
-the output.)
+ If instead you were to compute the same value using arbitrary
+precision floating-point values, the precision needed for correct
+output (using the formula `prec = 3.322 * dps'), would be 3.322 x
+183231, or 608693.
- `gawk' uses the GNU MPFR (http://www.mpfr.org) and GNU MP
-(http://gmplib.org) (GMP) libraries for arbitrary precision arithmetic
-on numbers. So if you do not see the names of these libraries in the
-output, then your version of `gawk' does not support arbitrary
-precision arithmetic.
+ The result from an arithmetic operation with an integer and a
+floating-point value is a floating-point value with a precision equal
+to the working precision. The following program calculates the eighth
+term in Sylvester's sequence(1) using a recurrence:
- Additionally, there are a few elements available in the `PROCINFO'
-array to provide information about the MPFR and GMP libraries. *Note
-Auto-set::, for more information.
+ $ gawk -M 'BEGIN {
+ > s = 2.0
+ > for (i = 1; i <= 7; i++)
+ > s = s * (s - 1) + 1
+ > print s
+ > }'
+ -| 113423713055421845118910464
-
-File: gawk.info, Node: Arbitrary Precision Floats, Next: Arbitrary Precision Integers, Prev: Gawk and MPFR, Up: Arbitrary Precision Arithmetic
-
-15.4 Arbitrary Precision Floating-point Arithmetic with `gawk'
-==============================================================
-
-`gawk' uses the GNU MPFR library for arbitrary precision floating-point
-arithmetic. The MPFR library provides precise control over precisions
-and rounding modes, and gives correctly rounded, reproducible,
-platform-independent results. With one of the command-line options
-`--bignum' or `-M', all floating-point arithmetic operators and numeric
-functions can yield results to any desired precision level supported by
-MPFR. Two built-in variables, `PREC' and `ROUNDMODE', provide control
-over the working precision and the rounding mode (*note Setting
-Precision::, and *note Setting Rounding Mode::). The precision and the
-rounding mode are set globally for every operation to follow.
-
- The default working precision for arbitrary precision floating-point
-values is 53 bits, and the default value for `ROUNDMODE' is `"N"',
-which selects the IEEE-754 `roundTiesToEven' rounding mode (*note
-Rounding Mode::).(1) `gawk' uses the default exponent range in MPFR
-(EMAX = 2^30 - 1, EMIN = -EMAX) for all floating-point contexts. There
-is no explicit mechanism to adjust the exponent range. MPFR does not
-implement subnormal numbers by default, and this behavior cannot be
-changed in `gawk'.
-
- NOTE: When emulating an IEEE-754 format (*note Setting
- Precision::), `gawk' internally adjusts the exponent range to the
- value defined for the format and also performs computations needed
- for gradual underflow (subnormal numbers).
-
- NOTE: MPFR numbers are variable-size entities, consuming only as
- much space as needed to store the significant digits. Since the
- performance using MPFR numbers pales in comparison to doing
- arithmetic using the underlying machine types, you should consider
- using only as much precision as needed by your program.
+ The output differs from the actual number,
+113,423,713,055,421,844,361,000,443, because the default precision of
+53 bits is not enough to represent the floating-point results exactly.
+You can either increase the precision (100 bits is enough in this
+case), or replace the floating-point constant `2.0' with an integer, to
+perform all computations using integer arithmetic to get the correct
+output.
-* Menu:
+ Sometimes `gawk' must implicitly convert an arbitrary precision
+integer into an arbitrary precision floating-point value. This is
+primarily because the MPFR library does not always provide the relevant
+interface to process arbitrary precision integers or mixed-mode numbers
+as needed by an operation or function. In such a case, the precision is
+set to the minimum value necessary for exact conversion, and the working
+precision is not used for this purpose. If this is not what you need or
+want, you can employ a subterfuge, and convert the integer to floating
+point first, like this:
-* Setting Precision:: Setting the working precision.
-* Setting Rounding Mode:: Setting the rounding mode.
-* Floating-point Constants:: Representing floating-point constants.
-* Changing Precision:: Changing the precision of a number.
-* Exact Arithmetic:: Exact arithmetic with floating-point numbers.
+ gawk -M 'BEGIN { n = 13; print (n + 0.0) % 2.0 }'
- ---------- Footnotes ----------
+ You can avoid this issue altogether by specifying the number as a
+floating-point value to begin with:
- (1) The default precision is 53 bits, since according to the MPFR
-documentation, the library should be able to exactly reproduce all
-computations with double-precision machine floating-point numbers
-(`double' type in C), except the default exponent range is much wider
-and subnormal numbers are not implemented.
+ gawk -M 'BEGIN { n = 13.0; print n % 2.0 }'
-
-File: gawk.info, Node: Setting Precision, Next: Setting Rounding Mode, Up: Arbitrary Precision Floats
+ Note that for the particular example above, it is likely best to
+just use the following:
-15.4.1 Setting the Working Precision
-------------------------------------
+ gawk -M 'BEGIN { n = 13; print n % 2 }'
-`gawk' uses a global working precision; it does not keep track of the
-precision or accuracy of individual numbers. Performing an arithmetic
-operation or calling a built-in function rounds the result to the
-current working precision. The default working precision is 53 bits,
-which can be modified using the built-in variable `PREC'. You can also
-set the value to one of the pre-defined case-insensitive strings shown
-in *note table-predefined-precision-strings::, to emulate an IEEE-754
-binary format.
+ When dividing two arbitrary precision integers with either `/' or
+`%', the result is typically an arbitrary precision floating point
+value (unless the denominator evenly divides into the numerator). In
+order to do integer division or remainder with arbitrary precision
+integers, use the built-in `div()' function (*note Numeric Functions::).
-`PREC' IEEE-754 Binary Format
----------------------------------------------------
-`"half"' 16-bit half-precision.
-`"single"' Basic 32-bit single precision.
-`"double"' Basic 64-bit double precision.
-`"quad"' Basic 128-bit quadruple precision.
-`"oct"' 256-bit octuple precision.
+ You can simulate the `div()' function in standard `awk' using this
+user-defined function:
-Table 15.3: Predefined precision strings for `PREC'
+ # div --- do integer division
- The following example illustrates the effects of changing precision
-on arithmetic operations:
+ function div(numerator, denominator, result, i)
+ {
+ split("", result)
- $ gawk -M -v PREC=100 'BEGIN { x = 1.0e-400; print x + 0
- > PREC = "double"; print x + 0 }'
- -| 1e-400
- -| 0
+ numerator = int(numerator)
+ denominator = int(denominator)
+ result["quotient"] = int(numerator / denominator)
+ result["remainder"] = int(numerator % denominator)
- Binary and decimal precisions are related approximately, according
-to the formula:
-
- PREC = 3.322 * DPS
-
-Here, PREC denotes the binary precision (measured in bits) and DPS
-(short for decimal places) is the decimal digits. We can easily
-calculate how many decimal digits the 53-bit significand of an IEEE
-double is equivalent to: 53 / 3.322 which is equal to about 15.95. But
-what does 15.95 digits actually mean? It depends whether you are
-concerned about how many digits you can rely on, or how many digits you
-need.
-
- It is important to know how many bits it takes to uniquely identify
-a double-precision value (the C type `double'). If you want to convert
-from `double' to decimal and back to `double' (e.g., saving a `double'
-representing an intermediate result to a file, and later reading it
-back to restart the computation), then a few more decimal digits are
-required. 17 digits is generally enough for a `double'.
-
- It can also be important to know what decimal numbers can be uniquely
-represented with a `double'. If you want to convert from decimal to
-`double' and back again, 15 digits is the most that you can get. Stated
-differently, you should not present the numbers from your
-floating-point computations with more than 15 significant digits in
-them.
+ return 0.0
+ }
- Conversely, it takes a precision of 332 bits to hold an approximation
-of the constant pi that is accurate to 100 decimal places.
+ ---------- Footnotes ----------
- You should always add some extra bits in order to avoid the
-confusing round-off issues that occur because numbers are stored
-internally in binary.
+ (1) Weisstein, Eric W. `Sylvester's Sequence'. From MathWorld--A
+Wolfram Web Resource
+(`http://mathworld.wolfram.com/SylvestersSequence.html').

-File: gawk.info, Node: Setting Rounding Mode, Next: Floating-point Constants, Prev: Setting Precision, Up: Arbitrary Precision Floats
+File: gawk.info, Node: POSIX Floating Point Problems, Next: Floating point summary, Prev: Arbitrary Precision Integers, Up: Arbitrary Precision Arithmetic
-15.4.2 Setting the Rounding Mode
---------------------------------
-
-The `ROUNDMODE' variable provides program level control over the
-rounding mode. The correspondence between `ROUNDMODE' and the IEEE
-rounding modes is shown in *note table-gawk-rounding-modes::.
+15.6 Standards Versus Existing Practice
+=======================================
-Rounding Mode IEEE Name `ROUNDMODE'
----------------------------------------------------------------------------
-Round to nearest, ties to even `roundTiesToEven' `"N"' or `"n"'
-Round toward plus Infinity `roundTowardPositive' `"U"' or `"u"'
-Round toward negative Infinity `roundTowardNegative' `"D"' or `"d"'
-Round toward zero `roundTowardZero' `"Z"' or `"z"'
-Round to nearest, ties away `roundTiesToAway' `"A"' or `"a"'
-from zero
+Historically, `awk' has converted any non-numeric looking string to the
+numeric value zero, when required. Furthermore, the original
+definition of the language and the original POSIX standards specified
+that `awk' only understands decimal numbers (base 10), and not octal
+(base 8) or hexadecimal numbers (base 16).
-Table 15.4: `gawk' Rounding Modes
+ Changes in the language of the 2001 and 2004 POSIX standards can be
+interpreted to imply that `awk' should support additional features.
+These features are:
- `ROUNDMODE' has the default value `"N"', which selects the IEEE-754
-rounding mode `roundTiesToEven'. In *note Table 15.4:
-table-gawk-rounding-modes, `"A"' is listed to select the IEEE-754 mode
-`roundTiesToAway'. This is only available if your version of the MPFR
-library supports it; otherwise setting `ROUNDMODE' to this value has no
-effect. *Note Rounding Mode::, for the meanings of the various rounding
-modes.
+ * Interpretation of floating point data values specified in
+ hexadecimal notation (e.g., `0xDEADBEEF'). (Note: data values,
+ _not_ source code constants.)
- Here is an example of how to change the default rounding behavior of
-`printf''s output:
+ * Support for the special IEEE 754 floating point values "Not A
+ Number" (NaN), positive Infinity ("inf") and negative Infinity
+ ("-inf"). In particular, the format for these values is as
+ specified by the ISO 1999 C standard, which ignores case and can
+ allow implementation-dependent additional characters after the
+ `nan' and allow either `inf' or `infinity'.
- $ gawk -M -v ROUNDMODE="Z" 'BEGIN { printf("%.2f\n", 1.378) }'
- -| 1.37
+ The first problem is that both of these are clear changes to
+historical practice:
-
-File: gawk.info, Node: Floating-point Constants, Next: Changing Precision, Prev: Setting Rounding Mode, Up: Arbitrary Precision Floats
+ * The `gawk' maintainer feels that supporting hexadecimal floating
+ point values, in particular, is ugly, and was never intended by the
+ original designers to be part of the language.
-15.4.3 Representing Floating-point Constants
---------------------------------------------
+ * Allowing completely alphabetic strings to have valid numeric
+ values is also a very severe departure from historical practice.
-Be wary of floating-point constants! When reading a floating-point
-constant from program source code, `gawk' uses the default precision,
-unless overridden by an assignment to the special variable `PREC' on
-the command line, to store it internally as a MPFR number. Changing
-the precision using `PREC' in the program text does _not_ change the
-precision of a constant. If you need to represent a floating-point
-constant at a higher precision than the default and cannot use a
-command line assignment to `PREC', you should either specify the
-constant as a string, or as a rational number, whenever possible. The
-following example illustrates the differences among various ways to
-print a floating-point constant:
+ The second problem is that the `gawk' maintainer feels that this
+interpretation of the standard, which requires a certain amount of
+"language lawyering" to arrive at in the first place, was not even
+intended by the standard developers. In other words, "we see how you
+got where you are, but we don't think that that's where you want to be."
- $ gawk -M 'BEGIN { PREC = 113; printf("%0.25f\n", 0.1) }'
- -| 0.1000000000000000055511151
- $ gawk -M -v PREC=113 'BEGIN { printf("%0.25f\n", 0.1) }'
- -| 0.1000000000000000000000000
- $ gawk -M 'BEGIN { PREC = 113; printf("%0.25f\n", "0.1") }'
- -| 0.1000000000000000000000000
- $ gawk -M 'BEGIN { PREC = 113; printf("%0.25f\n", 1/10) }'
- -| 0.1000000000000000000000000
+ Recognizing the above issues, but attempting to provide compatibility
+with the earlier versions of the standard, the 2008 POSIX standard
+added explicit wording to allow, but not require, that `awk' support
+hexadecimal floating point values and special values for "Not A Number"
+and infinity.
- In the first case, the number is stored with the default precision
-of 53 bits.
+ Although the `gawk' maintainer continues to feel that providing
+those features is inadvisable, nevertheless, on systems that support
+IEEE floating point, it seems reasonable to provide _some_ way to
+support NaN and Infinity values. The solution implemented in `gawk' is
+as follows:
-
-File: gawk.info, Node: Changing Precision, Next: Exact Arithmetic, Prev: Floating-point Constants, Up: Arbitrary Precision Floats
+ * With the `--posix' command-line option, `gawk' becomes "hands
+ off." String values are passed directly to the system library's
+ `strtod()' function, and if it successfully returns a numeric
+ value, that is what's used.(1) By definition, the results are not
+ portable across different systems. They are also a little
+ surprising:
-15.4.4 Changing the Precision of a Number
------------------------------------------
+ $ echo nanny | gawk --posix '{ print $1 + 0 }'
+ -| nan
+ $ echo 0xDeadBeef | gawk --posix '{ print $1 + 0 }'
+ -| 3735928559
- The point is that in any variable-precision package, a decision is
- made on how to treat numbers given as data, or arising in
- intermediate results, which are represented in floating-point
- format to a precision lower than working precision. Do we promote
- them to full membership of the high-precision club, or do we treat
- them and all their associates as second-class citizens? Sometimes
- the first course is proper, sometimes the second, and it takes
- careful analysis to tell which.(1) -- Dirk Laurie
-
- `gawk' does not implicitly modify the precision of any previously
-computed results when the working precision is changed with an
-assignment to `PREC'. The precision of a number is always the one that
-was used at the time of its creation, and there is no way for the user
-to explicitly change it afterwards. However, since the result of a
-floating-point arithmetic operation is always an arbitrary precision
-floating-point value--with a precision set by the value of `PREC'--one
-of the following workarounds effectively accomplishes the desired
-behavior:
-
- x = x + 0.0
+ * Without `--posix', `gawk' interprets the four strings `+inf',
+ `-inf', `+nan', and `-nan' specially, producing the corresponding
+ special numeric values. The leading sign acts a signal to `gawk'
+ (and the user) that the value is really numeric. Hexadecimal
+ floating point is not supported (unless you also use
+ `--non-decimal-data', which is _not_ recommended). For example:
-or:
+ $ echo nanny | gawk '{ print $1 + 0 }'
+ -| 0
+ $ echo +nan | gawk '{ print $1 + 0 }'
+ -| nan
+ $ echo 0xDeadBeef | gawk '{ print $1 + 0 }'
+ -| 0
- x += 0.0
+ `gawk' ignores case in the four special values. Thus `+nan' and
+ `+NaN' are the same.
---------- Footnotes ----------
- (1) Dirk Laurie. `Variable-precision Arithmetic Considered Perilous
--- A Detective Story'. Electronic Transactions on Numerical Analysis.
-Volume 28, pp. 168-173, 2008.
-
-
-File: gawk.info, Node: Exact Arithmetic, Prev: Changing Precision, Up: Arbitrary Precision Floats
-
-15.4.5 Exact Arithmetic with Floating-point Numbers
----------------------------------------------------
-
- CAUTION: Never depend on the exactness of floating-point
- arithmetic, even for apparently simple expressions!
-
- Can arbitrary precision arithmetic give exact results? There are no
-easy answers. The standard rules of algebra often do not apply when
-using floating-point arithmetic. Among other things, the distributive
-and associative laws do not hold completely, and order of operation may
-be important for your computation. Rounding error, cumulative precision
-loss and underflow are often troublesome.
-
- When `gawk' tests the expressions `0.1 + 12.2' and `12.3' for
-equality using the machine double precision arithmetic, it decides that
-they are not equal! (*Note Floating-point Programming::.) You can get
-the result you want by increasing the precision; 56 bits in this case
-will get the job done:
-
- $ gawk -M -v PREC=56 'BEGIN { print (0.1 + 12.2 == 12.3) }'
- -| 1
-
- If adding more bits is good, perhaps adding even more bits of
-precision is better? Here is what happens if we use an even larger
-value of `PREC':
-
- $ gawk -M -v PREC=201 'BEGIN { print (0.1 + 12.2 == 12.3) }'
- -| 0
-
- This is not a bug in `gawk' or in the MPFR library. It is easy to
-forget that the finite number of bits used to store the value is often
-just an approximation after proper rounding. The test for equality
-succeeds if and only if _all_ bits in the two operands are exactly the
-same. Since this is not necessarily true after floating-point
-computations with a particular precision and effective rounding rule, a
-straight test for equality may not work.
-
- So, don't assume that floating-point values can be compared for
-equality. You should also exercise caution when using other forms of
-comparisons. The standard way to compare between floating-point
-numbers is to determine how much error (or "tolerance") you will allow
-in a comparison and check to see if one value is within this error
-range of the other.
-
- In applications where 15 or fewer decimal places suffice, hardware
-double precision arithmetic can be adequate, and is usually much faster.
-But you do need to keep in mind that every floating-point operation can
-suffer a new rounding error with catastrophic consequences as
-illustrated by our earlier attempt to compute the value of the constant
-pi (*note Floating-point Programming::). Extra precision can greatly
-enhance the stability and the accuracy of your computation in such
-cases.
-
- Repeated addition is not necessarily equivalent to multiplication in
-floating-point arithmetic. In the example in *note Floating-point
-Programming:::
-
- $ gawk 'BEGIN {
- > for (d = 1.1; d <= 1.5; d += 0.1) # loop five times (?)
- > i++
- > print i
- > }'
- -| 4
-
-you may or may not succeed in getting the correct result by choosing an
-arbitrarily large value for `PREC'. Reformulation of the problem at
-hand is often the correct approach in such situations.
+ (1) You asked for it, you got it.

-File: gawk.info, Node: Arbitrary Precision Integers, Prev: Arbitrary Precision Floats, Up: Arbitrary Precision Arithmetic
+File: gawk.info, Node: Floating point summary, Prev: POSIX Floating Point Problems, Up: Arbitrary Precision Arithmetic
-15.5 Arbitrary Precision Integer Arithmetic with `gawk'
-=======================================================
+15.7 Summary
+============
-If one of the options `--bignum' or `-M' is specified, `gawk' performs
-all integer arithmetic using GMP arbitrary precision integers. Any
-number that looks like an integer in a program source or data file is
-stored as an arbitrary precision integer. The size of the integer is
-limited only by your computer's memory. The current floating-point
-context has no effect on operations involving integers. For example,
-the following computes 5^4^3^2, the result of which is beyond the
-limits of ordinary `gawk' numbers:
+ * Most computer arithmetic is done using either integers or
+ floating-point values. The default for `awk' is to use
+ double-precision floating-point values.
- $ gawk -M 'BEGIN {
- > x = 5^4^3^2
- > print "# of digits =", length(x)
- > print substr(x, 1, 20), "...", substr(x, length(x) - 19, 20)
- > }'
- -| # of digits = 183231
- -| 62060698786608744707 ... 92256259918212890625
+ * In the early 1990's, Barbie mistakenly said "Math class is tough!"
+ While math isn't tough, floating-point arithmetic isn't the same
+ as pencil and paper math, and care must be taken:
- If you were to compute the same value using arbitrary precision
-floating-point values instead, the precision needed for correct output
-(using the formula `prec = 3.322 * dps'), would be 3.322 x 183231, or
-608693.
+ - Not all numbers can be represented exactly.
- The result from an arithmetic operation with an integer and a
-floating-point value is a floating-point value with a precision equal
-to the working precision. The following program calculates the eighth
-term in Sylvester's sequence(1) using a recurrence:
+ - Comparing values should use a delta, instead of being done
+ directly with `==' and `!='.
- $ gawk -M 'BEGIN {
- > s = 2.0
- > for (i = 1; i <= 7; i++)
- > s = s * (s - 1) + 1
- > print s
- > }'
- -| 113423713055421845118910464
+ - Errors accumulate.
- The output differs from the actual number,
-113,423,713,055,421,844,361,000,443, because the default precision of
-53 bits is not enough to represent the floating-point results exactly.
-You can either increase the precision (100 bits is enough in this
-case), or replace the floating-point constant `2.0' with an integer, to
-perform all computations using integer arithmetic to get the correct
-output.
+ - Operations are not always truly associative or distributive.
- It will sometimes be necessary for `gawk' to implicitly convert an
-arbitrary precision integer into an arbitrary precision floating-point
-value. This is primarily because the MPFR library does not always
-provide the relevant interface to process arbitrary precision integers
-or mixed-mode numbers as needed by an operation or function. In such a
-case, the precision is set to the minimum value necessary for exact
-conversion, and the working precision is not used for this purpose. If
-this is not what you need or want, you can employ a subterfuge like
-this:
+ * Increasing the accuracy can help, but it is not a panacea.
- gawk -M 'BEGIN { n = 13; print (n + 0.0) % 2.0 }'
+ * Often, increasing the accuracy and then rounding to the desired
+ number of digits produces reasonable results.
- You can avoid this issue altogether by specifying the number as a
-floating-point value to begin with:
+ * Use `-M' (or `--bignum') to enable MPFR arithmetic. Use `PREC' to
+ set the precision in bits, and `ROUNDMODE' to set the IEEE 754
+ rounding mode.
- gawk -M 'BEGIN { n = 13.0; print n % 2.0 }'
+ * With `-M', `gawk' performs arbitrary precision integer arithmetic
+ using the GMP library. This is faster and more space efficient
+ than using MPFR for the same calculations.
- Note that for the particular example above, it is likely best to
-just use the following:
+ * There are several "dark corners" with respect to floating-point
+ numbers where `gawk' disagrees with the POSIX standard. It pays
+ to be aware of them.
- gawk -M 'BEGIN { n = 13; print n % 2 }'
+ * Overall, there is no need to be unduly suspicious about the
+ results from floating-point arithmetic. The lesson to remember is
+ that floating-point arithmetic is always more complex than
+ arithmetic using pencil and paper. In order to take advantage of
+ the power of computer floating-point, you need to know its
+ limitations and work within them. For most casual use of
+ floating-point arithmetic, you will often get the expected result
+ if you simply round the display of your final results to the
+ correct number of significant decimal digits.
- ---------- Footnotes ----------
+ * As general advice, avoid presenting numerical data in a manner that
+ implies better precision than is actually the case.
- (1) Weisstein, Eric W. `Sylvester's Sequence'. From MathWorld--A
-Wolfram Web Resource.
-`http://mathworld.wolfram.com/SylvestersSequence.html'

File: gawk.info, Node: Dynamic Extensions, Next: Language History, Prev: Arbitrary Precision Arithmetic, Up: Top
@@ -21602,6 +22387,8 @@ sample extensions are automatically built and installed when `gawk' is.
* Extension Samples:: The sample extensions that ship with
`gawk'.
* gawkextlib:: The `gawkextlib' project.
+* Extension summary:: Extension summary.
+* Extension Exercises:: Exercises.

File: gawk.info, Node: Extension Intro, Next: Plugin License, Up: Dynamic Extensions
@@ -21655,7 +22442,8 @@ File: gawk.info, Node: Extension Mechanism Outline, Next: Extension API Descri
Communication between `gawk' and an extension is two-way. First, when
an extension is loaded, it is passed a pointer to a `struct' whose
-fields are function pointers. This is shown in *note load-extension::.
+fields are function pointers. This is shown in *note
+figure-load-extension::.
API
Struct
@@ -21687,7 +22475,7 @@ Figure 16.1: Loading The Extension
function pointers, at runtime, without needing (link-time) access to
`gawk''s symbols. One of these function pointers is to a function for
"registering" new built-in functions. This is shown in *note
-load-new-function::.
+figure-load-new-function::.
register_ext_func({ "chdir", do_chdir, 1 });
@@ -21707,7 +22495,7 @@ Figure 16.2: Loading The New Function
with `gawk' by passing function pointers to the functions that provide
the new feature (`do_chdir()', for example). `gawk' associates the
function pointer with a name and can then call it, using a defined
-calling convention. This is shown in *note call-new-function::.
+calling convention. This is shown in *note figure-call-new-function::.
BEGIN {
chdir("/path") (*fnptr)(1);
@@ -21728,9 +22516,9 @@ Figure 16.3: Calling The New Function
the API `struct' to do its work, such as updating variables or arrays,
printing messages, setting `ERRNO', and so on.
- Convenience macros in the `gawkapi.h' header file make calling
-through the function pointers look like regular function calls so that
-extension code is quite readable and understandable.
+ Convenience macros make calling through the function pointers look
+like regular function calls so that extension code is quite readable
+and understandable.
Although all of this sounds somewhat complicated, the result is that
extension code is quite straightforward to write and to read. You can
@@ -21740,7 +22528,7 @@ Example::) and also the `testext.c' code for testing the APIs.
Some other bits and pieces:
* The API provides access to `gawk''s `do_XXX' values, reflecting
- command line options, like `do_lint', `do_profiling' and so on
+ command-line options, like `do_lint', `do_profiling' and so on
(*note Extension API Variables::). These are informational: an
extension cannot affect their values inside `gawk'. In addition,
attempting to assign to them produces a compile-time error.
@@ -21757,7 +22545,10 @@ File: gawk.info, Node: Extension API Description, Next: Finding Extensions, P
16.4 API Description
====================
-This (rather large) minor node describes the API in detail.
+C or C++ code for an extension must include the header file
+`gawkapi.h', which declares the functions and defines the data types
+used to communicate with `gawk'. This (rather large) minor node
+describes the API in detail.
* Menu:
@@ -21789,7 +22580,7 @@ through function pointers passed into your extension.
API function pointers are provided for the following kinds of
operations:
- * Registrations functions. You may register:
+ * Registration functions. You may register:
- extension functions,
- exit callbacks,
@@ -21841,6 +22632,7 @@ operations:
C Entity Header File
-------------------------------------------
`EOF' `<stdio.h>'
+ Values for `errno' `<errno.h>'
`FILE' `<stdio.h>'
`NULL' `<stddef.h>'
`memcpy()' `<string.h>'
@@ -21855,9 +22647,6 @@ operations:
a portability hodge-podge as can be seen in some parts of the
`gawk' source code.
- To pass reasonable integer values for `ERRNO', you will also need
- to include `<errno.h>'.
-
* The `gawkapi.h' file may be included more than once without ill
effect. Doing so, however, is poor coding practice.
@@ -21867,7 +22656,7 @@ operations:
place `-Dinline=''' on your command line, or use the GNU Autotools
and include a `config.h' file in your extensions.
- * All pointers filled in by `gawk' are to memory managed by `gawk'
+ * All pointers filled in by `gawk' point to memory managed by `gawk'
and should be treated by the extension as read-only. Memory for
_all_ strings passed into `gawk' from the extension _must_ come
from calling the API-provided function pointers `api_malloc()',
@@ -21877,7 +22666,7 @@ operations:
* The API defines several simple `struct's that map values as seen
from `awk'. A value can be a `double', a string, or an array (as
in multidimensional arrays, or when creating a new array). String
- values maintain both pointer and length since embedded `NUL'
+ values maintain both pointer and length since embedded NUL
characters are allowed.
NOTE: By intent, strings are maintained using the current
@@ -22001,7 +22790,7 @@ that use them.
indicates what is in the `union'.
Representing numbers is easy--the API uses a C `double'. Strings
-require more work. Since `gawk' allows embedded `NUL' bytes in string
+require more work. Since `gawk' allows embedded NUL bytes in string
values, a string must be represented as a pair containing a
data-pointer and length. This is the `awk_string_t' type.
@@ -22081,7 +22870,7 @@ Requested: Scalar Scalar Scalar false false
Value false false false false
Cookie
-Table 16.1: Value Types Returned
+Table 16.1: API Value Types Returned

File: gawk.info, Node: Memory Allocation Functions, Next: Constructor Functions, Prev: Requesting Values, Up: Extension API Description
@@ -22124,6 +22913,7 @@ not return a value.
`#define emalloc(pointer, type, size, message) ...'
The arguments to this macro are as follows:
+
`pointer'
The pointer variable to point at the allocated storage.
@@ -22275,14 +23065,15 @@ File: gawk.info, Node: Exit Callback Functions, Next: Extension Version String
..............................................
An "exit callback" function is a function that `gawk' calls before it
-exits. Such functions are useful if you have general "clean up" tasks
-that should be performed in your extension (such as closing data base
+exits. Such functions are useful if you have general "cleanup" tasks
+that should be performed in your extension (such as closing database
connections or other resource deallocations). You can register such a
function with `gawk' using the following function.
`void awk_atexit(void (*funcp)(void *data, int exit_status),'
` void *arg0);'
The parameters are:
+
`funcp'
A pointer to the function to be called before `gawk' exits.
The `data' parameter will be the original value of `arg0'.
@@ -22371,7 +23162,8 @@ used for `RT', if any.
A pointer to your `XXX_take_control_of()' function.
`awk_const struct input_parser *awk_const next;'
- This pointer is used by `gawk'. The extension cannot modify it.
+ This is for use by `gawk'; therefore it is marked `awk_const' so
+ that the extension cannot modify it.
The steps are as follows:
@@ -22410,8 +23202,8 @@ as follows:
Otherwise, it will.
`struct stat sbuf;'
- If file descriptor is valid, then `gawk' will have filled in this
- structure via a call to the `fstat()' system call.
+ If the file descriptor is valid, then `gawk' will have filled in
+ this structure via a call to the `fstat()' system call.
The `XXX_can_take_file()' function should examine these fields and
decide if the input parser should be used for the file. The decision
@@ -22494,10 +23286,10 @@ need to test for a `NULL' value. `gawk' sets `*errcode' to zero, so
there is no need to set it unless an error occurs.
If an error does occur, the function should return `EOF' and set
-`*errcode' to a non-zero value. In that case, if `*errcode' does not
-equal -1, `gawk' automatically updates the `ERRNO' variable based on
-the value of `*errcode'. (In general, setting `*errcode = errno'
-should do the right thing.)
+`*errcode' to a value greater than zero. In that case, if `*errcode'
+does not equal zero, `gawk' automatically updates the `ERRNO' variable
+based on the value of `*errcode'. (In general, setting `*errcode =
+errno' should do the right thing.)
As an alternative to supplying a function that returns an input
record, you may instead supply a function that simply reads bytes, and
@@ -22574,8 +23366,8 @@ an extension to take over the output to a file opened with the `>' or
false otherwise.
`awk_const struct output_wrapper *awk_const next;'
- This is for use by `gawk'; therefore they are marked `awk_const'
- so that the extension cannot modify them.
+ This is for use by `gawk'; therefore it is marked `awk_const' so
+ that the extension cannot modify it.
The `awk_output_buf_t' structure looks like this:
@@ -22632,9 +23424,9 @@ in the `awk_output_buf_t'. The data members are as follows:
the `name' and `mode' fields, and any additional state (such as `awk'
variable values) that is appropriate.
- When `gawk' calls `XXX_take_control_of()', it should fill in the
-other fields, as appropriate, except for `fp', which it should just use
-normally.
+ When `gawk' calls `XXX_take_control_of()', that function should fill
+in the other fields, as appropriate, except for `fp', which it should
+just use normally.
You register your output wrapper with the following function:
@@ -22671,7 +23463,7 @@ structures as described earlier.
`awk_bool_t (*can_take_two_way)(const char *name);'
This function returns true if it wants to take over two-way I/O
- for this filename. It should not change any state (variable
+ for this file name. It should not change any state (variable
values, etc.) within `gawk'.
`awk_bool_t (*take_control_of)(const char *name,'
@@ -22682,8 +23474,8 @@ structures as described earlier.
respectively. These structures were described earlier.
`awk_const struct two_way_processor *awk_const next;'
- This is for use by `gawk'; therefore they are marked `awk_const'
- so that the extension cannot modify them.
+ This is for use by `gawk'; therefore it is marked `awk_const' so
+ that the extension cannot modify it.
As with the input parser and output processor, you provide "yes I
can take this" and "take over for this" functions,
@@ -22852,7 +23644,7 @@ was discussed earlier, in *note General Data Types::.
`awk_bool_t sym_update_scalar(awk_scalar_t cookie, awk_value_t *value);'
Update the value associated with a scalar cookie. Return false if
- the new value is not one of `AWK_STRING' or `AWK_NUMBER'. Here
+ the new value is not of type `AWK_STRING' or `AWK_NUMBER'. Here
too, the built-in variables may not be updated.
It is not obvious at first glance how to work with scalar cookies or
@@ -22967,9 +23759,10 @@ follows:
`awk_bool_t create_value(awk_value_t *value, awk_value_cookie_t *result);'
Create a cached string or numeric value from `value' for efficient
- later assignment. Only `AWK_NUMBER' and `AWK_STRING' values are
- allowed. Any other type is rejected. While `AWK_UNDEFINED' could
- be allowed, doing so would result in inferior performance.
+ later assignment. Only values of type `AWK_NUMBER' and
+ `AWK_STRING' are allowed. Any other type is rejected. While
+ `AWK_UNDEFINED' could be allowed, doing so would result in
+ inferior performance.
`awk_bool_t release_value(awk_value_cookie_t vc);'
Release the memory associated with a value cookie obtained from
@@ -23023,13 +23816,13 @@ if `awk' code assigns a new value to `VAR1', are all the others be
changed too?"
That's a great question. The answer is that no, it's not a problem.
-Internally, `gawk' uses reference-counted strings. This means that many
-variables can share the same string value, and `gawk' keeps track of
-the usage. When a variable's value changes, `gawk' simply decrements
-the reference count on the old value and updates the variable to use
-the new value.
+Internally, `gawk' uses "reference-counted strings". This means that
+many variables can share the same string value, and `gawk' keeps track
+of the usage. When a variable's value changes, `gawk' simply
+decrements the reference count on the old value and updates the
+variable to use the new value.
- Finally, as part of your clean up action (*note Exit Callback
+ Finally, as part of your cleanup action (*note Exit Callback
Functions::) you should release any cached values that you created,
using `release_value()'.
@@ -23170,7 +23963,7 @@ The following functions relate to individual array elements.
` const awk_value_t *const value);'
In the array represented by `a_cookie', create or modify the
element whose index is given by `index'. The `ARGV' and `ENVIRON'
- arrays may not be changed.
+ arrays may not be changed, although the `PROCINFO' array can be.
`awk_bool_t set_array_element_by_elem(awk_array_t a_cookie,'
` awk_element_t element);'
@@ -23408,8 +24201,8 @@ code:
Thus, the correct way to build an array is to work "top down."
Create the array, and immediately install it in `gawk''s symbol
table using `sym_update()', or install it as an element in a
- previously existing array using `set_element()'. We show example
- code shortly.
+ previously existing array using `set_array_element()'. We show
+ example code shortly.
2. Due to gawk internals, after using `sym_update()' to install an
array into `gawk', you have to retrieve the array cookie from the
@@ -23599,13 +24392,15 @@ The API provides access to several variables that describe whether the
corresponding command-line options were enabled when `gawk' was
invoked. The variables are:
+`do_debug'
+ This variable is true if `gawk' was invoked with `--debug' option.
+
`do_lint'
This variable is true if `gawk' was invoked with `--lint' option
(*note Options::).
-`do_traditional'
- This variable is true if `gawk' was invoked with `--traditional'
- option.
+`do_mpfr'
+ This variable is true if `gawk' was invoked with `--bignum' option.
`do_profile'
This variable is true if `gawk' was invoked with `--profile'
@@ -23615,11 +24410,9 @@ invoked. The variables are:
This variable is true if `gawk' was invoked with `--sandbox'
option.
-`do_debug'
- This variable is true if `gawk' was invoked with `--debug' option.
-
-`do_mpfr'
- This variable is true if `gawk' was invoked with `--bignum' option.
+`do_traditional'
+ This variable is true if `gawk' was invoked with `--traditional'
+ option.
The value of `do_lint' can change if `awk' code modifies the `LINT'
built-in variable (*note Built-in Variables::). The others should not
@@ -24219,7 +25012,9 @@ for loading each function into `gawk':
static awk_ext_func_t func_table[] = {
{ "chdir", do_chdir, 1 },
{ "stat", do_stat, 2 },
+ #ifndef __MINGW32__
{ "fts", do_fts, 3 },
+ #endif
};
Each extension must have a routine named `dl_load()' to load
@@ -24230,8 +25025,7 @@ everything that needs to be loaded. It is simplest to use the
dl_load_func(func_table, filefuncs, "")
- And that's it! As an exercise, consider adding functions to
-implement system calls such as `chown()', `chmod()', and `umask()'.
+ And that's it!
---------- Footnotes ----------
@@ -24284,8 +25078,8 @@ create a GNU/Linux shared library:
}
The `AWKLIBPATH' environment variable tells `gawk' where to find
-shared libraries (*note Finding Extensions::). We set it to the
-current directory and run the program:
+extensions (*note Finding Extensions::). We set it to the current
+directory and run the program:
$ AWKLIBPATH=$PWD gawk -f testff.awk
-| /tmp
@@ -24315,7 +25109,7 @@ current directory and run the program:
---------- Footnotes ----------
(1) In practice, you would probably want to use the GNU
-Autotools--Automake, Autoconf, Libtool, and Gettext--to configure and
+Autotools--Automake, Autoconf, Libtool, and `gettext'--to configure and
build your libraries. Instructions for doing so are beyond the scope of
this Info file. *Note gawkextlib::, for WWW links to the tools.
@@ -24358,7 +25152,7 @@ File: gawk.info, Node: Extension Sample File Functions, Next: Extension Sample
The `filefuncs' extension provides three different functions, as
follows: The usage is:
-`@load "filefuncs"'
+@load "filefuncs"
This is how you load the extension.
`result = chdir("/some/directory")'
@@ -24367,7 +25161,7 @@ follows: The usage is:
success or less than zero upon error. In the latter case it
updates `ERRNO'.
-`result = stat("/some/path", statdata [, follow])'
+`result = stat("/some/path", statdata' [`, follow']`)'
The `stat()' function provides a hook into the `stat()' system
call. It returns zero upon success or less than zero upon error.
In the latter case it updates `ERRNO'.
@@ -24379,52 +25173,36 @@ follows: The usage is:
successful, `stat()' fills the `statdata' array with information
retrieved from the filesystem, as follows:
- `statdata["name"]' The name of the file.
- `statdata["dev"]' Corresponds to the `st_dev' field in
- the `struct stat'.
- `statdata["ino"]' Corresponds to the `st_ino' field in
- the `struct stat'.
- `statdata["mode"]' Corresponds to the `st_mode' field in
- the `struct stat'.
- `statdata["nlink"]' Corresponds to the `st_nlink' field in
- the `struct stat'.
- `statdata["uid"]' Corresponds to the `st_uid' field in
- the `struct stat'.
- `statdata["gid"]' Corresponds to the `st_gid' field in
- the `struct stat'.
- `statdata["size"]' Corresponds to the `st_size' field in
- the `struct stat'.
- `statdata["atime"]' Corresponds to the `st_atime' field in
- the `struct stat'.
- `statdata["mtime"]' Corresponds to the `st_mtime' field in
- the `struct stat'.
- `statdata["ctime"]' Corresponds to the `st_ctime' field in
- the `struct stat'.
- `statdata["rdev"]' Corresponds to the `st_rdev' field in
- the `struct stat'. This element is
- only present for device files.
- `statdata["major"]' Corresponds to the `st_major' field in
- the `struct stat'. This element is
- only present for device files.
- `statdata["minor"]' Corresponds to the `st_minor' field in
- the `struct stat'. This element is
- only present for device files.
- `statdata["blksize"]' Corresponds to the `st_blksize' field
- in the `struct stat', if this field is
- present on your system. (It is present
- on all modern systems that we know of.)
- `statdata["pmode"]' A human-readable version of the mode
- value, such as printed by `ls'. For
- example, `"-rwxr-xr-x"'.
- `statdata["linkval"]' If the named file is a symbolic link,
- this element will exist and its value
- is the value of the symbolic link
- (where the symbolic link points to).
- `statdata["type"]' The type of the file as a string. One
- of `"file"', `"blockdev"', `"chardev"',
- `"directory"', `"socket"', `"fifo"',
- `"symlink"', `"door"', or `"unknown"'.
- Not all systems support all file types.
+ Subscript Field in `struct stat' File type
+ ------------------------------------------------------------
+ `"name"' The file name All
+ `"dev"' `st_dev' All
+ `"ino"' `st_ino' All
+ `"mode"' `st_mode' All
+ `"nlink"' `st_nlink' All
+ `"uid"' `st_uid' All
+ `"gid"' `st_gid' All
+ `"size"' `st_size' All
+ `"atime"' `st_atime' All
+ `"mtime"' `st_mtime' All
+ `"ctime"' `st_ctime' All
+ `"rdev"' `st_rdev' Device files
+ `"major"' `st_major' Device files
+ `"minor"' `st_minor' Device files
+ `"blksize"'`st_blksize' All
+ `"pmode"' A human-readable version of the All
+ mode value, such as printed by
+ `ls'. For example,
+ `"-rwxr-xr-x"'
+ `"linkval"'The value of the symbolic link Symbolic
+ links
+ `"type"' The type of the file as a string. All
+ One of `"file"', `"blockdev"',
+ `"chardev"', `"directory"',
+ `"socket"', `"fifo"', `"symlink"',
+ `"door"', or `"unknown"'. Not
+ all systems support all file
+ types.
`flags = or(FTS_PHYSICAL, ...)'
`result = fts(pathlist, flags, filedata)'
@@ -24442,7 +25220,7 @@ requested hierarchies.
The arguments are as follows:
`pathlist'
- An array of filenames. The element values are used; the index
+ An array of file names. The element values are used; the index
values are ignored.
`flags'
@@ -24558,10 +25336,10 @@ constant (`FNM_NOMATCH'), and an array of flag values named `FNM'.
The arguments to `fnmatch()' are:
`pattern'
- The filename wildcard to match.
+ The file name wildcard to match.
`string'
- The filename string.
+ The file name string.
`flag'
Either zero, or the bitwise OR of one or more of the flags in the
@@ -24569,18 +25347,14 @@ constant (`FNM_NOMATCH'), and an array of flag values named `FNM'.
The flags are follows:
-`FNM["CASEFOLD"]' Corresponds to the `FNM_CASEFOLD' flag as defined in
- `fnmatch()'.
-`FNM["FILE_NAME"]' Corresponds to the `FNM_FILE_NAME' flag as defined
- in `fnmatch()'.
-`FNM["LEADING_DIR"]' Corresponds to the `FNM_LEADING_DIR' flag as defined
- in `fnmatch()'.
-`FNM["NOESCAPE"]' Corresponds to the `FNM_NOESCAPE' flag as defined in
- `fnmatch()'.
-`FNM["PATHNAME"]' Corresponds to the `FNM_PATHNAME' flag as defined in
- `fnmatch()'.
-`FNM["PERIOD"]' Corresponds to the `FNM_PERIOD' flag as defined in
- `fnmatch()'.
+Array element Corresponding flag defined by `fnmatch()'
+--------------------------------------------------------------------------
+`FNM["CASEFOLD"]' `FNM_CASEFOLD'
+`FNM["FILE_NAME"]' `FNM_FILE_NAME'
+`FNM["LEADING_DIR"]'`FNM_LEADING_DIR'
+`FNM["NOESCAPE"]' `FNM_NOESCAPE'
+`FNM["PATHNAME"]' `FNM_PATHNAME'
+`FNM["PERIOD"]' `FNM_PERIOD'
Here is an example:
@@ -24657,8 +25431,8 @@ standard output to a temporary file configured to have the same owner
and permissions as the original. After the file has been processed,
the extension restores standard output to its original destination. If
`INPLACE_SUFFIX' is not an empty string, the original file is linked to
-a backup filename created by appending that suffix. Finally, the
-temporary file is renamed to the original filename.
+a backup file name created by appending that suffix. Finally, the
+temporary file is renamed to the original file name.
If any error occurs, the extension issues a fatal error to terminate
processing immediately without damaging the original file.
@@ -24672,9 +25446,6 @@ processing immediately without damaging the original file.
$ gawk -i inplace -v INPLACE_SUFFIX=.bak '{ gsub(/foo/, "bar") }
> { print }' file1 file2 file3
- We leave it as an exercise to write a wrapper script that presents an
-interface similar to `sed -i'.
-

File: gawk.info, Node: Extension Sample Ord, Next: Extension Sample Readdir, Prev: Extension Sample Inplace, Up: Extension Samples
@@ -24718,10 +25489,11 @@ on the command line (or with `getline'), they are read, with each entry
returned as a record.
The record consists of three fields. The first two are the inode
-number and the filename, separated by a forward slash character. On
+number and the file name, separated by a forward slash character. On
systems where the directory entry contains the file type, the record
has a third field (also separated by a slash) which is a single letter
-indicating the type of the file:
+indicating the type of the file. The letters are file types are shown
+in *note table-readdir-file-types::.
Letter File Type
--------------------------------------------------------------------------
@@ -24734,6 +25506,8 @@ Letter File Type
`s' Socket
`u' Anything else (unknown)
+Table 16.2: File Types Returned By `readdir()'
+
On systems without the file type information, the third field is
always `u'.
@@ -24765,10 +25539,10 @@ unwary. Here is an example:
BEGIN {
REVOUT = 1
- print "hello, world" > "/dev/stdout"
+ print "don't panic" > "/dev/stdout"
}
- The output from this program is: `dlrow ,olleh'.
+ The output from this program is: `cinap t'nod'.

File: gawk.info, Node: Extension Sample Rev2way, Next: Extension Sample Read write array, Prev: Extension Sample Revout, Up: Extension Samples
@@ -24786,12 +25560,14 @@ example shows how to use it:
BEGIN {
cmd = "/magic/mirror"
- print "hello, world" |& cmd
+ print "don't panic" |& cmd
cmd |& getline result
print result
close(cmd)
}
+ The output from this program is: `cinap t'nod'.
+

File: gawk.info, Node: Extension Sample Read write array, Next: Extension Sample Readfile, Prev: Extension Sample Rev2way, Up: Extension Samples
@@ -24803,8 +25579,8 @@ The `rwarray' extension adds two functions, named `writea()' and
`ret = writea(file, array)'
This function takes a string argument, which is the name of the
- file to which dump the array, and the array itself as the second
- argument. `writea()' understands multidimensional arrays. It
+ file to which to dump the array, and the array itself as the
+ second argument. `writea()' understands arrays of arrays. It
returns one on success, or zero upon failure.
`ret = reada(file, array)'
@@ -24887,9 +25663,8 @@ File: gawk.info, Node: Extension Sample Time, Prev: Extension Sample API Tests
16.7.12 Extension Time Functions
--------------------------------
-These functions can be used either by invoking `gawk' with a
-command-line argument of `-l time' or by inserting `@load "time"' in
-your script.
+The `time' extension adds two functions, named `gettimeofday()' and
+`sleep()', as follows:
`@load "time"'
This is how you load the extension.
@@ -24901,7 +25676,7 @@ your script.
have sub-second precision, but the actual precision may vary based
on the platform. If the standard C `gettimeofday()' system call
is available on this platform, then it simply returns the value.
- Otherwise, if on Windows, it tries to use
+ Otherwise, if on MS-Windows, it tries to use
`GetSystemTimeAsFileTime()'.
`result = sleep(SECONDS)'
@@ -24914,7 +25689,7 @@ your script.
delay.

-File: gawk.info, Node: gawkextlib, Prev: Extension Samples, Up: Dynamic Extensions
+File: gawk.info, Node: gawkextlib, Next: Extension summary, Prev: Extension Samples, Up: Dynamic Extensions
16.8 The `gawkextlib' Project
=============================
@@ -24943,7 +25718,7 @@ Time::) was originally from this project but has been moved in to the
main `gawk' distribution.
You can check out the code for the `gawkextlib' project using the
-GIT (http://git-scm.com) distributed source code control system. The
+Git (http://git-scm.com) distributed source code control system. The
command is as follows:
git clone git://git.code.sf.net/p/gawkextlib/code gawkextlib-code
@@ -24954,7 +25729,7 @@ parser library installed in order to build and use the XML extension.
In addition, you must have the GNU Autotools installed (Autoconf
(http://www.gnu.org/software/autoconf), Automake
(http://www.gnu.org/software/automake), Libtool
-(http://www.gnu.org/software/libtool), and Gettext
+(http://www.gnu.org/software/libtool), and GNU `gettext'
(http://www.gnu.org/software/gettext)).
The simple recipe for building and testing `gawkextlib' is as
@@ -24984,6 +25759,115 @@ users, please consider doing so through the `gawkextlib' project. See
the project's web site for more information.

+File: gawk.info, Node: Extension summary, Next: Extension Exercises, Prev: gawkextlib, Up: Dynamic Extensions
+
+16.9 Summary
+============
+
+ * You can write extensions (sometimes called plug-ins) for `gawk' in
+ C or C++ using the Application Programming Interface (API) defined
+ by the `gawk' developers.
+
+ * Extensions must have a license compatible with the GNU General
+ Public License (GPL), and they must assert that fact by declaring
+ a variable named `plugin_is_GPL_compatible'.
+
+ * Communication between `gawk' and an extension is two-way. `gawk'
+ passes a `struct' to the extension which contains various data
+ fields and function pointers. The extension can then call into
+ `gawk' via the supplied function pointers to accomplish certain
+ tasks.
+
+ * One of these tasks is to "register" the name and implementation of
+ a new `awk'-level function with `gawk'. The implementation takes
+ the form of a C function pointer with a defined signature. By
+ convention, implementation functions are named `do_XXXX()' for
+ some `awk'-level function `XXXX()'.
+
+ * The API is defined in a header file named `gawkpi.h'. You must
+ include a number of standard header files _before_ including it in
+ your source file.
+
+ * API function pointers are provided for the following kinds of
+ operations:
+
+ * Registration functions. You may register extension functions,
+ exit callbacks, a version string, input parsers, output
+ wrappers, and two-way processors.
+
+ * Printing fatal, warning, and "lint" warning messages.
+
+ * Updating `ERRNO', or unsetting it.
+
+ * Accessing parameters, including converting an undefined
+ parameter into an array.
+
+ * Symbol table access: retrieving a global variable, creating
+ one, or changing one.
+
+ * Allocating, reallocating, and releasing memory.
+
+ * Creating and releasing cached values; this provides an
+ efficient way to use values for multiple variables and can be
+ a big performance win.
+
+ * Manipulating arrays: retrieving, adding, deleting, and
+ modifying elements; getting the count of elements in an array;
+ creating a new array; clearing an array; and flattening an
+ array for easy C style looping over all its indices and
+ elements
+
+ * The API defines a number of standard data types for representing
+ `awk' values, array elements, and arrays.
+
+ * The API provide convenience functions for constructing values. It
+ also provides memory management functions to ensure compatibility
+ between memory allocated by `gawk' and memory allocated by an
+ extension.
+
+ * _All_ memory passed from `gawk' to an extension must be treated as
+ read-only by the extension.
+
+ * _All_ memory passed from an extension to `gawk' must come from the
+ API's memory allocation functions. `gawk' takes responsibility for
+ the memory and will release it when appropriate.
+
+ * The API provides information about the running version of `gawk' so
+ that an extension can make sure it is compatible with the `gawk'
+ that loaded it.
+
+ * It is easiest to start a new extension by copying the boilerplate
+ code described in this major node. Macros in the `gawkapi.h' make
+ this easier to do.
+
+ * The `gawk' distribution includes a number of small but useful
+ sample extensions. The `gawkextlib' project includes several more,
+ larger, extensions. If you wish to write an extension and
+ contribute it to the community of `gawk' users, the `gawkextlib'
+ project should be the place to do so.
+
+
+
+File: gawk.info, Node: Extension Exercises, Prev: Extension summary, Up: Dynamic Extensions
+
+16.10 Exercises
+===============
+
+ 1. Add functions to implement system calls such as `chown()',
+ `chmod()', and `umask()' to the file operations extension
+ presented in *note Internal File Ops::.
+
+ 2. (Hard.) How would you provide namespaces in `gawk', so that the
+ names of functions in different extensions don't conflict with
+ each other? If you come up with a really good scheme, contact the
+ `gawk' maintainer to tell him about it.
+
+ 3. Write a wrapper script that provides an interface similar to `sed
+ -i' for the "inplace" extension presented in *note Extension
+ Sample Inplace::.
+
+
+
File: gawk.info, Node: Language History, Next: Installation, Prev: Dynamic Extensions, Up: Top
Appendix A The Evolution of the `awk' Language
@@ -24994,7 +25878,7 @@ the POSIX specification. Many long-time `awk' users learned `awk'
programming with the original `awk' implementation in Version 7 Unix.
(This implementation was the basis for `awk' in Berkeley Unix, through
4.3-Reno. Subsequent versions of Berkeley Unix, and some systems
-derived from 4.4BSD-Lite, use various versions of `gawk' for their
+derived from 4.4BSD-Lite, used various versions of `gawk' for their
`awk'.) This major node briefly describes the evolution of the `awk'
language, with cross-references to other parts of the Info file where
you can find more information.
@@ -25014,6 +25898,7 @@ you can find more information.
* Common Extensions:: Common Extensions Summary.
* Ranges and Locales:: How locales used to affect regexp ranges.
* Contributors:: The major contributors to `gawk'.
+* History summary:: History summary.

File: gawk.info, Node: V7/SVR3.1, Next: SVR4, Up: Language History
@@ -25237,7 +26122,7 @@ the current version of `gawk'.
- Indirect function calls (*note Indirect Calls::).
- Directories on the command line produce a warning and are
- skipped (*note Command line directories::).
+ skipped (*note Command-line directories::).
* New keywords:
@@ -25286,8 +26171,8 @@ the current version of `gawk'.
- The `bindtextdomain()', `dcgettext()' and `dcngettext()'
functions for internationalization (*note Programmer i18n::).
- - The `fflush()' function from Brian Kernighan's version of
- `awk' (*note I/O Functions::).
+ - The `fflush()' function from BWK `awk' (*note I/O
+ Functions::).
- The `gensub()', `patsplit()', and `strtonum()' functions for
more powerful text manipulation (*note String Functions::).
@@ -25308,7 +26193,7 @@ the current version of `gawk'.
`-r', `-S', `-t', and `-V' short options. Also, the ability
to use GNU-style long-named options that start with `--' and
the `--assign', `--bignum', `--characters-as-bytes',
- `--copyright', `--debug', `--dump-variables', `--execle',
+ `--copyright', `--debug', `--dump-variables', `--exec',
`--field-separator', `--file', `--gen-pot', `--help',
`--include', `--lint', `--lint-old', `--load',
`--non-decimal-data', `--optimize', `--posix',
@@ -25344,6 +26229,13 @@ the current version of `gawk'.
- GCC for VAX and Alpha has not been tested for a while.
+ * Support for the following obsolete systems was removed from the
+ code and the documentation for `gawk' version 4.1:
+
+ - Ultrix
+
+ * Support for MirBSD was removed at `gawk' version 4.2.
+

File: gawk.info, Node: Feature History, Next: Common Extensions, Prev: POSIX/GNU, Up: Language History
@@ -25402,7 +26294,7 @@ in POSIX `awk', in the order they were added to `gawk'.
* The ability to delete all of an array at once with `delete ARRAY'
(*note Delete::).
- * Command line option changes (*note Options::):
+ * Command-line option changes (*note Options::):
- The ability to use GNU-style long-named options that start
with `--'.
@@ -25436,17 +26328,18 @@ in POSIX `awk', in the order they were added to `gawk'.
* The `next file' statement became `nextfile' (*note Nextfile
Statement::).
- * The `fflush()' function from the Bell Laboratories research
- version of `awk' (*note I/O Functions::).
+ * The `fflush()' function from BWK `awk' (then at Bell Laboratories;
+ *note I/O Functions::).
- * New command line options:
+ * New command-line options:
- The `--lint-old' option to warn about constructs that are not
available in the original Version 7 Unix version of `awk'
(*note V7/SVR3.1::).
- - The `-m' option from the Bell Laboratories research version
- of `awk' This was later removed.
+ - The `-m' option from BWK `awk'. (Brian was still at Bell
+ Laboratories at the time.) This was later removed from both
+ his `awk' and from `gawk'.
- The `--re-interval' option to provide interval expressions in
regexps (*note Regexp Operators::).
@@ -25457,7 +26350,7 @@ in POSIX `awk', in the order they were added to `gawk'.
* The use of GNU Autoconf to control the configuration process
(*note Quick Installation::).
- * Amiga support.
+ * Amiga support. This has since been removed.
Version 3.1 of `gawk' introduced the following features:
@@ -25516,7 +26409,7 @@ in POSIX `awk', in the order they were added to `gawk'.
* The support for `next file' as two words was removed completely
(*note Nextfile Statement::).
- * Additional commnd line options (*note Options::):
+ * Additional command-line options (*note Options::):
- The `--dump-variables' option to print a list of all global
variables.
@@ -25548,7 +26441,8 @@ in POSIX `awk', in the order they were added to `gawk'.
* Tandem support. This was later removed.
- * The Atari port became officially unsupported.
+ * The Atari port became officially unsupported and was later removed
+ entirely.
* The source code changed to use ISO C standard-style function
definitions.
@@ -25605,13 +26499,13 @@ in POSIX `awk', in the order they were added to `gawk'.
* An optional third argument to `asort()' and `asorti()', specifying
how to sort (*note String Functions::).
- * The behavior of `fflush()' changed to match Brian Kernighan's `awk'
- and for POSIX; now both `fflush()' and `fflush("")' flush all open
- output redirections (*note I/O Functions::).
+ * The behavior of `fflush()' changed to match BWK `awk' and for
+ POSIX; now both `fflush()' and `fflush("")' flush all open output
+ redirections (*note I/O Functions::).
* The `isarray()' function which distinguishes if an item is an array
- or not, to make it possible to traverse multidimensional arrays
- (*note Type Functions::).
+ or not, to make it possible to traverse arrays of arrays (*note
+ Type Functions::).
* The `patsplit()' function which gives the same capability as
`FPAT', for splitting (*note String Functions::).
@@ -25630,7 +26524,7 @@ in POSIX `awk', in the order they were added to `gawk'.
* `switch' / `case' are enabled by default (*note Switch
Statement::).
- * Command line option changes (*note Options::):
+ * Command-line option changes (*note Options::):
- The `-b' and `--characters-as-bytes' options which prevent
`gawk' from treating input as a multibyte string.
@@ -25648,7 +26542,7 @@ in POSIX `awk', in the order they were added to `gawk'.
* Directories named on the command line now produce a warning, not a
fatal error, unless `--posix' or `--traditional' are used (*note
- Command line directories::).
+ Command-line directories::).
* The `gawk' internals were rewritten, bringing the `dgawk' debugger
and possibly improved performance (*note Debugger::).
@@ -25697,10 +26591,10 @@ in POSIX `awk', in the order they were added to `gawk'.
`PROCINFO["identifiers"]' (*note Auto-set::).
* The three executables `gawk', `pgawk', and `dgawk', were merged
- into one, named just `gawk'. As a result the command line options
+ into one, named just `gawk'. As a result the command-line options
changed.
- * Command line option changes (*note Options::):
+ * Command-line option changes (*note Options::):
- The `-D' option invokes the debugger.
@@ -25717,8 +26611,8 @@ in POSIX `awk', in the order they were added to `gawk'.
- The `-R' option was removed.
- * Support for high precision arithmetic with MPFR. (*note Gawk and
- MPFR::).
+ * Support for high precision arithmetic with MPFR. (*note Arbitrary
+ Precision Arithmetic::).
* The `and()', `or()' and `xor()' functions changed to allow any
number of arguments, with a minimum of two (*note Bitwise
@@ -25866,7 +26760,7 @@ and its rationale
(http://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xbd_chap09.html#tag_21_09_03_05).

-File: gawk.info, Node: Contributors, Prev: Ranges and Locales, Up: Language History
+File: gawk.info, Node: Contributors, Next: History summary, Prev: Ranges and Locales, Up: Language History
A.9 Major Contributors to `gawk'
================================
@@ -25912,8 +26806,8 @@ Info file, in approximate chronological order:
* Michal Jaegermann provided the port to Atari systems and its
documentation. (This port is no longer supported.) He continues
- to provide portability checking with DEC Alpha systems, and has
- done a lot of work to make sure `gawk' works on non-32-bit systems.
+ to provide portability checking, and has done a lot of work to
+ make sure `gawk' works on non-32-bit systems.
* Fred Fish provided the port to Amiga systems and its documentation.
(With Fred's sad passing, this is no longer supported.)
@@ -25978,8 +26872,7 @@ Info file, in approximate chronological order:
- The modifications to convert `gawk' into a byte-code
interpreter, including the debugger.
- - The addition of true multidimensional arrays. *note Arrays
- of Arrays::.
+ - The addition of true arrays of arrays.
- The additional modifications for support of arbitrary
precision arithmetic.
@@ -25994,6 +26887,9 @@ Info file, in approximate chronological order:
- The improved array sorting features were driven by John
together with Pat Rankin.
+ * Panos Papadopoulos contributed the original text for *note Include
+ Files::.
+
* Efraim Yawitz contributed the original text for *note Debugger::.
* The development of the extension API first released with `gawk'
@@ -26009,6 +26905,38 @@ Info file, in approximate chronological order:
1994.

+File: gawk.info, Node: History summary, Prev: Contributors, Up: Language History
+
+A.10 Summary
+============
+
+ * The `awk' language has evolved over time. The first release was
+ with V7 Unix circa 1978. In 1987 for System V Release 3.1, major
+ additions, including user-defined functions, were made to the
+ language. Additional changes were made for System V Release 4, in
+ 1989. Since then, further minor changes happen under the auspices
+ of the POSIX standard.
+
+ * Brian Kernighan's `awk' provides a small number of extensions that
+ are implemented in common with other versions of `awk'.
+
+ * `gawk' provides a large number of extensions over POSIX `awk'.
+ They can be disabled with either the `--traditional' or `--posix'
+ options.
+
+ * The interaction of POSIX locales and regexp matching in `gawk' has
+ been confusing over the years. Today, `gawk' implements Rational
+ Range Interpretation, where ranges of the form `[a-z]' match
+ _only_ the characters numerically between `a' through `z' in the
+ machine's native character set. Usually this is ASCII but it can
+ be EBCDIC on IBM S/390 systems.
+
+ * Many people have contributed to `gawk' development over the years.
+ We hope that the list provided in this major node is complete and
+ gives the appropriate credit where credit is due.
+
+
+
File: gawk.info, Node: Installation, Next: Notes, Prev: Language History, Up: Top
Appendix B Installing `gawk'
@@ -26029,6 +26957,7 @@ people who did the respective ports.
* Bugs:: Reporting Problems and Bugs.
* Other Versions:: Other freely available `awk'
implementations.
+* Installation summary:: Summary of installation.

File: gawk.info, Node: Gawk Distribution, Next: Unix Installation, Up: Installation
@@ -26051,7 +26980,7 @@ File: gawk.info, Node: Getting, Next: Extracting, Up: Gawk Distribution
B.1.1 Getting the `gawk' Distribution
-------------------------------------
-There are three ways to get GNU software:
+There are two ways to get GNU software:
* Copy it from someone else who already has it.
@@ -26083,7 +27012,6 @@ the GNU Zip program, `gzip'.
use `gzip' to expand the file and then use `tar' to extract it. You
can use the following pipeline to produce the `gawk' distribution:
- # Under System V, add 'o' to the tar options
gzip -d -c gawk-4.1.1.tar.gz | tar -xvpf -
On a system with GNU `tar', you can let `tar' do the decompression
@@ -26211,7 +27139,9 @@ Various `.c', `.y', and `.h' files
`doc/igawk.1'
The `troff' source for a manual page describing the `igawk'
- program presented in *note Igawk Program::.
+ program presented in *note Igawk Program::. (Since `gawk' can do
+ its own `@include' processing, neither `igawk' nor `igawk.1' are
+ installed.)
`doc/Makefile.in'
The input file used during the configuration process to generate
@@ -26219,8 +27149,8 @@ Various `.c', `.y', and `.h' files
`Makefile.am'
`*/Makefile.am'
- Files used by the GNU `automake' software for generating the
- `Makefile.in' files used by `autoconf' and `configure'.
+ Files used by the GNU Automake software for generating the
+ `Makefile.in' files used by Autoconf and `configure'.
`Makefile.in'
`aclocal.m4'
@@ -26253,11 +27183,10 @@ Various `.c', `.y', and `.h' files
contains a `Makefile.in' file, which `configure' uses to generate
a `Makefile'. `Makefile.am' is used by GNU Automake to create
`Makefile.in'. The library functions from *note Library
- Functions::, and the `igawk' program from *note Igawk Program::,
- are included as ready-to-use files in the `gawk' distribution.
- They are installed as part of the installation process. The rest
- of the programs in this Info file are available in appropriate
- subdirectories of `awklib/eg'.
+ Functions::, are included as ready-to-use files in the `gawk'
+ distribution. They are installed as part of the installation
+ process. The rest of the programs in this Info file are available
+ in appropriate subdirectories of `awklib/eg'.
`extension/*'
The source code, manual pages, and infrastructure files for the
@@ -26272,8 +27201,8 @@ Various `.c', `.y', and `.h' files
PC Installation::, for details).
`vms/*'
- Files needed for building `gawk' under VMS (*note VMS
- Installation::, for details).
+ Files needed for building `gawk' under Vax/VMS and OpenVMS (*note
+ VMS Installation::, for details).
`test/*'
A test suite for `gawk'. You can use `make check' from the
@@ -26311,8 +27240,8 @@ environment for MS-Windows.
`gawk-4.1.1'. Like most GNU software, `gawk' is configured
automatically for your system by running the `configure' program. This
program is a Bourne shell script that is generated automatically using
-GNU `autoconf'. (The `autoconf' software is described fully starting
-with *note (Autoconf)Top:: autoconf,Autoconf--Generating Automatic
+GNU Autoconf. (The Autoconf software is described fully starting with
+*note (Autoconf)Top:: autoconf,Autoconf--Generating Automatic
Configuration Scripts.)
To configure `gawk', simply run `configure':
@@ -26390,8 +27319,8 @@ command line when compiling `gawk' from scratch, including:
improvement.
`--with-whiny-user-strftime'
- Force use of the included version of the `strftime()' function for
- deficient systems.
+ Force use of the included version of the C `strftime()' function
+ for deficient systems.
Use the command `./configure --help' to see the full list of options
that `configure' supplies.
@@ -26435,9 +27364,9 @@ any constants that `configure' defined and should not have. `custom.h'
is automatically included by `config.h'.
It is also possible that the `configure' program generated by
-`autoconf' will not work on your system in some other fashion. If you
-do have a problem, the file `configure.ac' is the input for `autoconf'.
-You may be able to change this file and generate a new version of
+Autoconf will not work on your system in some other fashion. If you do
+have a problem, the file `configure.ac' is the input for Autoconf. You
+may be able to change this file and generate a new version of
`configure' that works on your system (*note Bugs::, for information on
how to report problems in configuring `gawk'). The same mechanism may
be used to send in updates to `configure.ac' and/or `custom.h'.
@@ -26466,14 +27395,14 @@ B.3.1 Installation on PC Operating Systems
This minor node covers installation and usage of `gawk' on x86 machines
running MS-DOS, any version of MS-Windows, or OS/2. In this minor
node, the term "Windows32" refers to any of Microsoft
-Windows-95/98/ME/NT/2000/XP/Vista/7.
+Windows-95/98/ME/NT/2000/XP/Vista/7/8.
- The limitations of MS-DOS (and MS-DOS shells under Windows32 or
-OS/2) has meant that various "DOS extenders" are often used with
-programs such as `gawk'. The varying capabilities of Microsoft Windows
-3.1 and Windows32 can add to the confusion. For an overview of the
-considerations, please refer to `README_d/README.pc' in the
-distribution.
+ The limitations of MS-DOS (and MS-DOS shells under the other
+operating systems) has meant that various "DOS extenders" are often
+used with programs such as `gawk'. The varying capabilities of
+Microsoft Windows 3.1 and Windows32 can add to the confusion. For an
+overview of the considerations, please refer to `README_d/README.pc' in
+the distribution.
* Menu:
@@ -26532,13 +27461,13 @@ B.3.1.2 Compiling `gawk' for PC Operating Systems
.................................................
`gawk' can be compiled for MS-DOS, Windows32, and OS/2 using the GNU
-development tools from DJ Delorie (DJGPP: MS-DOS only) or Eberhard
-Mattes (EMX: MS-DOS, Windows32 and OS/2). The file
-`README_d/README.pc' in the `gawk' distribution contains additional
-notes, and `pc/Makefile' contains important information on compilation
-options.
+development tools from DJ Delorie (DJGPP: MS-DOS only), MinGW
+(Windows32) or Eberhard Mattes (EMX: MS-DOS, Windows32 and OS/2). The
+file `README_d/README.pc' in the `gawk' distribution contains
+additional notes, and `pc/Makefile' contains important information on
+compilation options.
- To build `gawk' for MS-DOS and Windows32, copy the files in the `pc'
+To build `gawk' for MS-DOS and Windows32, copy the files in the `pc'
directory (_except_ for `ChangeLog') to the directory with the rest of
the `gawk' sources, then invoke `make' with the appropriate target name
as an argument to build `gawk'. The `Makefile' copied from the `pc'
@@ -26598,7 +27527,12 @@ other set of (self-consistent) environment variables and compiler flags.
NOTE: Ancient OS/2 ports of GNU `make' are not able to handle the
Makefiles of this package. If you encounter any problems with
`make', try GNU Make 3.79.1 or later versions. You should find
- the latest version on `ftp://hobbes.nmsu.edu/pub/os2/'.
+ the latest version on `ftp://hobbes.nmsu.edu/pub/os2/'.(1)
+
+ ---------- Footnotes ----------
+
+ (1) As of May, 2014, this site is still there, but the author could
+not find a package for GNU Make.

File: gawk.info, Node: PC Testing, Next: PC Using, Prev: PC Compiling, Up: PC Installation
@@ -26639,11 +27573,11 @@ Networking::). EMX (OS/2 only) supports at least the `|&' operator.
files as described in *note AWKPATH Variable::. However, semicolons
(rather than colons) separate elements in the `AWKPATH' variable. If
`AWKPATH' is not set or is empty, then the default search path for
-MS-Windows and MS-DOS versions is `".;c:/lib/awk;c:/gnu/lib/awk"'.
+MS-Windows and MS-DOS versions is `.;c:/lib/awk;c:/gnu/lib/awk'.
The search path for OS/2 (32 bit, EMX) is determined by the prefix
directory (most likely `/usr' or `c:/usr') that has been specified as
-an option of the `configure' script like it is the case for the Unix
+an option of the `configure' script as is the case for the Unix
versions. If `c:/usr' is the prefix directory then the default search
path contains `.' and `c:/usr/share/awk'. Additionally, to support
binary distributions of `gawk' for OS/2 systems whose drive `c:' might
@@ -26651,7 +27585,7 @@ not support long file names or might not exist at all, there is a
special environment variable. If `UNIXROOT' specifies a drive then
this specific drive is also searched for program files. E.g., if
`UNIXROOT' is set to `e:' the complete default search path is
-`".;c:/usr/share/awk;e:/usr/share/awk"'.
+`.;c:/usr/share/awk;e:/usr/share/awk'.
An `sh'-like shell (as opposed to `command.com' under MS-DOS or
`cmd.exe' under MS-Windows or OS/2) may be useful for `awk' programming.
@@ -26659,10 +27593,9 @@ The DJGPP collection of tools includes an MS-DOS port of Bash, and
several shells are available for OS/2, including `ksh'.
Under MS-Windows, OS/2 and MS-DOS, `gawk' (and many other text
-programs) silently translate end-of-line `"\r\n"' to `"\n"' on input
-and `"\n"' to `"\r\n"' on output. A special `BINMODE' variable
-(c.e.) allows control over these translations and is interpreted as
-follows:
+programs) silently translate end-of-line `\r\n' to `\n' on input and
+`\n' to `\r\n' on output. A special `BINMODE' variable (c.e.) allows
+control over these translations and is interpreted as follows:
* If `BINMODE' is `"r"', or one, then binary mode is set on read
(i.e., no translations on reads).
@@ -26688,11 +27621,11 @@ and cannot be changed mid-stream.
Versions::). `mawk' and `gawk' handle `BINMODE' similarly; however,
`mawk' adds a `-W BINMODE=N' option and an environment variable that
can set `BINMODE', `RS', and `ORS'. The files `binmode[1-3].awk'
-(under `gnu/lib/awk' in some of the prepared distributions) have been
-chosen to match `mawk''s `-W BINMODE=N' option. These can be changed
-or discarded; in particular, the setting of `RS' giving the fewest
-"surprises" is open to debate. `mawk' uses `RS = "\r\n"' if binary
-mode is set on read, which is appropriate for files with the
+(under `gnu/lib/awk' in some of the prepared binary distributions) have
+been chosen to match `mawk''s `-W BINMODE=N' option. These can be
+changed or discarded; in particular, the setting of `RS' giving the
+fewest "surprises" is open to debate. `mawk' uses `RS = "\r\n"' if
+binary mode is set on read, which is appropriate for files with the
MS-DOS-style end-of-line.
To illustrate, the following examples set binary mode on writes for
@@ -26709,7 +27642,7 @@ These give the same result as the `-W BINMODE=2' option in `mawk'. The
following changes the record separator to `"\r\n"' and sets binary mode
on reads, but does not affect the mode on standard input:
- gawk -v RS="\r\n" --source "BEGIN { BINMODE = 1 }" ...
+ gawk -v RS="\r\n" -e "BEGIN { BINMODE = 1 }" ...
or:
@@ -26757,8 +27690,8 @@ translation of `"\r\n"', since it won't. Caveat Emptor!

File: gawk.info, Node: VMS Installation, Prev: PC Installation, Up: Non-Unix Installation
-B.3.2 How to Compile and Install `gawk' on VMS
-----------------------------------------------
+B.3.2 How to Compile and Install `gawk' on Vax/VMS and OpenVMS
+--------------------------------------------------------------
This node describes how to compile and install `gawk' under VMS. The
older designation "VMS" is used throughout to refer to OpenVMS.
@@ -26795,10 +27728,10 @@ or:
$ MMK/DESCRIPTION=[.vms]descrip.mms gawk
`MMK' is an open source, free, near-clone of `MMS' and can better
-handle `ODS-5' volumes with upper- and lowercase filenames. `MMK' is
+handle ODS-5 volumes with upper- and lowercase file names. `MMK' is
available from `https://github.com/endlesssoftware/mmk'.
- With `ODS-5' volumes and extended parsing enabled, the case of the
+ With ODS-5 volumes and extended parsing enabled, the case of the
target parameter may need to be exact.
`gawk' has been tested under VAX/VMS 7.3 and Alpha/VMS 7.3-1 using
@@ -26806,8 +27739,8 @@ Compaq C V6.4, and Alpha/VMS 7.3, Alpha/VMS 7.3-2, and IA64/VMS 8.3.
The most recent builds used HP C V7.3 on Alpha VMS 8.3 and both Alpha
and IA64 VMS 8.4 used HP C 7.3.(1)
- The `[.vms]gawk_build_steps.txt' provides information on how to build
-`gawk' into a PCSI kit that is compatible with the GNV product.
+ *Note VMS GNV::, for information on building `gawk' as a PCSI kit
+that is compatible with the GNV product.
---------- Footnotes ----------
@@ -26916,7 +27849,7 @@ has no device or directory path information in it, `gawk' looks in the
current directory first, then in the directory specified by the
translation of `AWK_LIBRARY' if the file is not found. If, after
searching in both directories, the file still is not found, `gawk'
-appends the suffix `.awk' to the filename and retries the file search.
+appends the suffix `.awk' to the file name and retries the file search.
If `AWK_LIBRARY' has no definition, a default value of `SYS$LIBRARY:'
is used for it.
@@ -27049,11 +27982,12 @@ get this information with the command `gawk --version'.
Once you have a precise problem, send email to <bug-gawk@gnu.org>.
- Using this address automatically sends a copy of your mail to me.
-If necessary, I can be reached directly at <arnold@skeeve.com>. The
-bug reporting address is preferred since the email list is archived at
-the GNU Project. _All email should be in English, since that is my
-native language._
+ The `gawk' maintainers subscribe to this address and thus they will
+receive your bug report. If necessary, the primary maintainer can be
+reached directly at <arnold@skeeve.com>. The bug reporting address is
+preferred since the email list is archived at the GNU Project. _All
+email should be in English. This is the only language understood in
+common by all the maintainers._
CAUTION: Do _not_ try to report bugs in `gawk' by posting to the
Usenet/Internet newsgroup `comp.lang.awk'. While the `gawk'
@@ -27088,7 +28022,7 @@ considered authoritative if it conflicts with this Info file.
The people maintaining the non-Unix ports of `gawk' are as follows:
MS-DOS with DJGPP Scott Deifik, <scottd.mail@sbcglobal.net>.
-MS-Windows with MINGW Eli Zaretskii, <eliz@gnu.org>.
+MS-Windows with MinGW Eli Zaretskii, <eliz@gnu.org>.
OS/2 Andreas Buening, <andreas.buening@nexgo.de>.
VMS Pat Rankin, <r.pat.rankin@gmail.com>, and John
Malmberg, <wb8tyw@qsl.net>.
@@ -27098,7 +28032,7 @@ z/OS (OS/390) Dave Pitts, <dpitts@cozx.com>.
your report to the <bug-gawk@gnu.org> email list as well.

-File: gawk.info, Node: Other Versions, Prev: Bugs, Up: Installation
+File: gawk.info, Node: Other Versions, Next: Installation summary, Prev: Bugs, Up: Installation
B.5 Other Freely Available `awk' Implementations
================================================
@@ -27176,11 +28110,11 @@ Unix `awk'
since approximately 2003.
`pawk'
- Nelson H.F. Beebe at the University of Utah has modified Brian
- Kernighan's `awk' to provide timing and profiling information. It
- is different from `gawk' with the `--profile' option. (*note
- Profiling::), in that it uses CPU-based profiling, not line-count
- profiling. You may find it at either
+ Nelson H.F. Beebe at the University of Utah has modified BWK `awk'
+ to provide timing and profiling information. It is different from
+ `gawk' with the `--profile' option. (*note Profiling::), in that
+ it uses CPU-based profiling, not line-count profiling. You may
+ find it at either
`ftp://ftp.math.utah.edu/pub/pawk/pawk-20030606.tar.gz' or
`http://www.math.utah.edu/pub/pawk/pawk-20030606.tar.gz'.
@@ -27194,12 +28128,13 @@ Busybox Awk
(http://busybox.net).
The OpenSolaris POSIX `awk'
- The version of `awk' in `/usr/xpg4/bin' on Solaris is more-or-less
- POSIX-compliant. It is based on the `awk' from Mortice Kern
- Systems for PCs. This author was able to make it compile and work
- under GNU/Linux with 1-2 hours of work. Making it more generally
- portable (using GNU Autoconf and/or Automake) would take more
- work, and this has not been done, at least to our knowledge.
+ The versions of `awk' in `/usr/xpg4/bin' and `/usr/xpg6/bin' on
+ Solaris are more-or-less POSIX-compliant. They are based on the
+ `awk' from Mortice Kern Systems for PCs. This author was able to
+ make this code compile and work under GNU/Linux with 1-2 hours of
+ work. Making it more generally portable (using GNU Autoconf
+ and/or Automake) would take more work, and this has not been done,
+ at least to our knowledge.
The source code used to be available from the OpenSolaris web site.
However, that project was ended and the web site shut down.
@@ -27224,7 +28159,7 @@ Libmawk
This is a Python module that claims to bring `awk'-like features
to Python. See `https://github.com/alecthomas/pawk' for more
information. (This is not related to Nelson Beebe's modified
- version of Brian Kernighan's `awk', described earlier.)
+ version of BWK `awk', described earlier.)
QSE Awk
This is an embeddable `awk' interpreter. For more information see
@@ -27237,6 +28172,9 @@ QSE Awk
`http://www.quiktrim.org/QTawk.html' for more information,
including the manual and a download link.
+ The project may also be frozen; no new code changes have been made
+ since approximately 2008.
+
Other Versions
See also the Wikipedia article
(http://en.wikipedia.org/wiki/Awk_language#Versions_and_implementations),
@@ -27244,6 +28182,34 @@ Other Versions

+File: gawk.info, Node: Installation summary, Prev: Other Versions, Up: Installation
+
+B.6 Summary
+===========
+
+ * The `gawk' distribution is available from GNU project's main
+ distribution site, `ftp.gnu.org'. The canonical build recipe is:
+
+ wget http://ftp.gnu.org/gnu/gawk/gawk-4.1.1.tar.gz
+ tar -xvpzf gawk-4.1.1.tar.gz
+ cd gawk-4.1.1
+ ./configure && make && make check
+
+ * `gawk' may be built on non-POSIX systems as well. The currently
+ supported systems are MS-Windows using DJGPP, MSYS, MinGW and
+ Cygwin, OS/2 using EMX, and both Vax/VMS and OpenVMS.
+ Instructions for each system are included in this major node.
+
+ * Bug reports should be sent via email to <bug-gawk@gnu.org>. Bug
+ reports should be in English, and should include the version of
+ `gawk', how it was compiled, and a short program and data file
+ which demonstrate the problem.
+
+ * There are a number of other freely available `awk'
+ implementations. Many are POSIX compliant; others are less so.
+
+
+
File: gawk.info, Node: Notes, Next: Basic Concepts, Prev: Installation, Up: Top
Appendix C Implementation Notes
@@ -27262,6 +28228,7 @@ and maintainers of `gawk'. Everything in it applies specifically to
* Implementation Limitations:: Some limitations of the implementation.
* Extension Design:: Design notes about the extension API.
* Old Extension Mechanism:: Some compatibility for old extensions.
+* Notes summary:: Summary of implementation notes.

File: gawk.info, Node: Compatibility Mode, Next: Additions, Up: Notes
@@ -27279,7 +28246,7 @@ one more option available on the command line:
`-Y'
`--parsedebug'
- Prints out the parse stack information as the program is being
+ Print out the parse stack information as the program is being
parsed.
This option is intended only for serious `gawk' developers and not
@@ -27307,8 +28274,8 @@ as well as any considerations you should bear in mind.
`gawk'.
* New Ports:: Porting `gawk' to a new operating
system.
-* Derived Files:: Why derived files are kept in the
- `git' repository.
+* Derived Files:: Why derived files are kept in the Git
+ repository.

File: gawk.info, Node: Accessing The Source, Next: Adding Code, Up: Additions
@@ -27329,9 +28296,9 @@ doesn't have it. Once you have done so, use the command:
git clone git://git.savannah.gnu.org/gawk.git
-This will clone the `gawk' repository. If you are behind a firewall
-that will not allow you to use the Git native protocol, you can still
-access the repository using:
+This clones the `gawk' repository. If you are behind a firewall that
+does not allow you to use the Git native protocol, you can still access
+the repository using:
git clone http://git.savannah.gnu.org/r/gawk.git
@@ -27353,7 +28320,7 @@ C.2.2 Adding New Features
You are free to add any new features you like to `gawk'. However, if
you want your changes to be incorporated into the `gawk' distribution,
there are several steps that you need to take in order to make it
-possible to include your changes:
+possible to include them:
1. Before building the new feature into `gawk' itself, consider
writing it as an extension module (*note Dynamic Extensions::).
@@ -27370,9 +28337,10 @@ possible to include your changes:
3. Get the latest version. It is much easier for me to integrate
changes if they are relative to the most recent distributed
- version of `gawk'. If your version of `gawk' is very old, I may
- not be able to integrate them at all. (*Note Getting::, for
- information on getting the latest version of `gawk'.)
+ version of `gawk', or better yet, relative to the latest code in
+ the Git repository. If your version of `gawk' is very old, I may
+ not be able to integrate your changes at all. (*Note Getting::,
+ for information on getting the latest version of `gawk'.)
4. See *note (Version)Top:: standards, GNU Coding Standards. This
document describes how GNU software should be written. If you
@@ -27469,7 +28437,8 @@ possible to include your changes:
8. Include an entry for the `ChangeLog' file with your submission.
This helps further minimize the amount of work I have to do,
- making it easier for me to accept patches.
+ making it easier for me to accept patches. It is simplest if you
+ just make this part of your diff.
Although this sounds like a lot of work, please remember that while
you may write the new code, I have to maintain it and support it. If it
@@ -27510,18 +28479,24 @@ steps:
people. Thus, you should not change them unless it is for a very
good reason; i.e., changes are not out of the question, but
changes to these files are scrutinized extra carefully. The files
- are `dfa.c', `dfa.h', `getopt1.c', `getopt.c', `getopt.h',
- `install-sh', `mkinstalldirs', `regcomp.c', `regex.c',
- `regexec.c', `regexex.c', `regex.h', `regex_internal.c', and
- `regex_internal.h'.
-
- 5. Be willing to continue to maintain the port. Non-Unix operating
+ are `dfa.c', `dfa.h', `getopt.c', `getopt.h', `getopt1.c',
+ `getopt_int.h', `gettext.h', `regcomp.c', `regex.c', `regex.h',
+ `regex_internal.c', `regex_internal.h', and `regexec.c'.
+
+ 5. A number of other files are provided by the GNU Autotools
+ (Autoconf, Automake, and GNU `gettext'). You should not change
+ them either, unless it is for a very good reason. The files are
+ `ABOUT-NLS', `config.guess', `config.rpath', `config.sub',
+ `depcomp', `INSTALL', `install-sh', `missing', `mkinstalldirs',
+ `xalloc.h', and `ylwrap'.
+
+ 6. Be willing to continue to maintain the port. Non-Unix operating
systems are supported by volunteers who maintain the code needed
to compile and run `gawk' on their systems. If noone volunteers to
maintain a port, it becomes unsupported and it may be necessary to
remove it from the distribution.
- 6. Supply an appropriate `gawkmisc.???' file. Each port has its own
+ 7. Supply an appropriate `gawkmisc.???' file. Each port has its own
`gawkmisc.???' that implements certain operating system specific
functions. This is cleaner than a plethora of `#ifdef's scattered
throughout the code. The `gawkmisc.c' in the main source
@@ -27537,7 +28512,7 @@ steps:
(Currently, this is only an issue for the PC operating system
ports.)
- 7. Supply a `Makefile' as well as any other C source and header files
+ 8. Supply a `Makefile' as well as any other C source and header files
that are necessary for your operating system. All your code
should be in a separate subdirectory, with a name that is the same
as, or reminiscent of, either your operating system or the
@@ -27547,7 +28522,7 @@ steps:
avoid using names for your files that duplicate the names of files
in the main source directory.
- 8. Update the documentation. Please write a section (or sections)
+ 9. Update the documentation. Please write a section (or sections)
for this Info file describing the installation and compilation
steps needed to compile and/or install `gawk' for your system.
@@ -27561,21 +28536,21 @@ style and brace layout that suits your taste.

File: gawk.info, Node: Derived Files, Prev: New Ports, Up: Additions
-C.2.4 Why Generated Files Are Kept In `git'
--------------------------------------------
+C.2.4 Why Generated Files Are Kept In Git
+-----------------------------------------
-If you look at the `gawk' source in the `git' repository, you will
-notice that it includes files that are automatically generated by GNU
-infrastructure tools, such as `Makefile.in' from `automake' and even
-`configure' from `autoconf'.
+If you look at the `gawk' source in the Git repository, you will notice
+that it includes files that are automatically generated by GNU
+infrastructure tools, such as `Makefile.in' from Automake and even
+`configure' from Autoconf.
This is different from many Free Software projects that do not store
the derived files, because that keeps the repository less cluttered,
and it is easier to see the substantive changes when comparing versions
and trying to understand what changed between commits.
- However, there are two reasons why the `gawk' maintainer likes to
-have everything in the repository.
+ However, there are several reasons why the `gawk' maintainer likes
+to have everything in the repository.
First, because it is then easy to reproduce any given version
completely, without relying upon the availability of (older, likely
@@ -27594,10 +28569,10 @@ build?)
If the repository has all the generated files, then it's easy to
just check them out and build. (Or _easier_, depending upon how far
-back we go. `:-)')
+back we go.)
And that brings us to the second (and stronger) reason why all the
-files really need to be in `git'. It boils down to who do you cater
+files really need to be in Git. It boils down to who do you cater
to--the `gawk' developer(s), or the user who just wants to check out a
version and try it out?
@@ -27623,11 +28598,18 @@ idea how to create it, and that was not the only problem.)
He felt _extremely_ frustrated. With respect to that branch, the
maintainer is no different than Jane User who wants to try to build
-`gawk-4.0-stable' or `master' from the repository.
+`gawk-4.1-stable' or `master' from the repository.
Thus, the maintainer thinks that it's not just important, but
critical, that for any given branch, the above incantation _just works_.
+ A third reason to have all the files is that without them, using `git
+bisect' to try to find the commit that introduced a bug is exceedingly
+difficult. The maintainer tried to do that on another project that
+requires running bootstrapping scripts just to create `configure' and
+so on; it was really painful. When the repository is self-contained,
+using `git bisect' in it is very easy.
+
What are some of the consequences and/or actions to take?
1. We don't mind that there are differing files in the different
@@ -27638,32 +28620,26 @@ critical, that for any given branch, the above incantation _just works_.
B. He is really good at `git diff x y > /tmp/diff1 ; gvim
/tmp/diff1' to remove the diffs that aren't of interest in
- order to review code. `:-)'
+ order to review code.
2. It would certainly help if everyone used the same versions of the
GNU tools as he does, which in general are the latest released
- versions of `automake', `autoconf', `bison', and `gettext'.
+ versions of Automake, Autoconf, `bison', and GNU `gettext'.
- A. Installing from source is quite easy. It's how the maintainer
- worked for years under Fedora. He had `/usr/local/bin' at
- the front of his `PATH' and just did:
-
- wget http://ftp.gnu.org/gnu/PACKAGE/PACKAGE-X.Y.Z.tar.gz
- tar -xpzvf PACKAGE-X.Y.Z.tar.gz
- cd PACKAGE-X.Y.Z
- ./configure && make && make check
- make install # as root
-
- B. These days the maintainer uses Ubuntu 12.04 which is medium
- current, but he is already doing the above for `autoconf',
- `automake' and `bison'.
+ Installing from source is quite easy. It's how the maintainer
+ worked for years (and still works). He had `/usr/local/bin' at
+ the front of his `PATH' and just did:
+ wget http://ftp.gnu.org/gnu/PACKAGE/PACKAGE-X.Y.Z.tar.gz
+ tar -xpzvf PACKAGE-X.Y.Z.tar.gz
+ cd PACKAGE-X.Y.Z
+ ./configure && make && make check
+ make install # as root
Most of the above was originally written by the maintainer to other
`gawk' developers. It raised the objection from one of the developers
-"... that anybody pulling down the source from `git' is not an end
-user."
+"... that anybody pulling down the source from Git is not an end user."
However, this is not true. There are "power `awk' users" who can
build `gawk' (using the magic incantation shown previously) but who
@@ -27672,12 +28648,12 @@ all the time.
It was then suggested that there be a `cron' job to create nightly
tarballs of "the source." Here, the problem is that there are source
-trees, corresponding to the various branches! So, nightly tar balls
+trees, corresponding to the various branches! So, nightly tarballs
aren't the answer, especially as the repository can go for weeks
without significant change being introduced.
- Fortunately, the `git' server can meet this need. For any given
-branch named BRANCHNAME, use:
+ Fortunately, the Git server can meet this need. For any given branch
+named BRANCHNAME, use:
wget http://git.savannah.gnu.org/cgit/gawk.git/snapshot/gawk-BRANCHNAME.tar.gz
@@ -27688,9 +28664,9 @@ to retrieve a snapshot of the given branch.
(1) We tried. It was painful.
(2) There is one GNU program that is (in our opinion) severely
-difficult to bootstrap from the `git' repository. For example, on the
-author's old (but still working) PowerPC macintosh with Mac OS X 10.5,
-it was necessary to bootstrap a ton of software, starting with `git'
+difficult to bootstrap from the Git repository. For example, on the
+author's old (but still working) PowerPC Macintosh with Mac OS X 10.5,
+it was necessary to bootstrap a ton of software, starting with Git
itself, in order to try to work with the latest code. It's not
pleasant, and especially on older systems, it's a big waste of time.
@@ -27698,8 +28674,8 @@ pleasant, and especially on older systems, it's a big waste of time.
maintainers had dropped `.gz' and `.bz2' files and only distribute
`.tar.xz' files. It was necessary to bootstrap `xz' first!
- (3) A branch created by one of the other developers that did not
-include the generated files.
+ (3) A branch (since removed) created by one of the other developers
+that did not include the generated files.

File: gawk.info, Node: Future Extensions, Next: Implementation Limitations, Prev: Additions, Up: Notes
@@ -27712,11 +28688,11 @@ C.3 Probable Future Extensions
Hey! -- Larry Wall
- The `TODO' file in the `gawk' Git repository lists possible future
-enhancements. Some of these relate to the source code, and others to
-possible new features. Please see that file for the list. *Note
-Additions::, if you are interested in tackling any of the projects
-listed there.
+ The `TODO' file in the `master' branch of the `gawk' Git repository
+lists possible future enhancements. Some of these relate to the source
+code, and others to possible new features. Please see that file for
+the list. *Note Additions::, if you are interested in tackling any of
+the projects listed there.

File: gawk.info, Node: Implementation Limitations, Next: Extension Design, Prev: Future Extensions, Up: Notes
@@ -27732,7 +28708,7 @@ Item Limit
--------------------------------------------------------------------------
Characters in a character 2^(number of bits per byte)
class
-Length of input record `MAX_INT '
+Length of input record `MAX_INT'
Length of output record Unlimited
Length of source line Unlimited
Number of fields in a record `MAX_LONG'
@@ -27745,9 +28721,9 @@ Number of pipe redirections min(number of processes per user, number
of open files)
Numeric values Double-precision floating point (if not
using MPFR)
-Size of a field `MAX_INT '
-Size of a literal string `MAX_INT '
-Size of a printf string `MAX_INT '
+Size of a field `MAX_INT'
+Size of a literal string `MAX_INT'
+Size of a printf string `MAX_INT'

File: gawk.info, Node: Extension Design, Next: Old Extension Mechanism, Prev: Implementation Limitations, Up: Notes
@@ -27793,9 +28769,9 @@ The old extension mechanism had several problems:
* Being able to call into `gawk' from an extension required linker
facilities that are common on Unix-derived systems but that did
- not work on Windows systems; users wanting extensions on Windows
- had to statically link them into `gawk', even though Windows
- supports dynamic loading of shared objects.
+ not work on MS-Windows systems; users wanting extensions on
+ MS-Windows had to statically link them into `gawk', even though
+ MS-Windows supports dynamic loading of shared objects.
* The API would change occasionally as `gawk' changed; no
compatibility between versions was ever offered or planned for.
@@ -27843,8 +28819,8 @@ Some goals for the new API were:
flattening") in order to loop over all the element in an easy
fashion for C code.
- - The ability to create arrays (including `gawk''s true
- multidimensional arrays).
+ - The ability to create arrays (including `gawk''s true arrays
+ of arrays).
Some additional important goals were:
@@ -27858,7 +28834,7 @@ Some goals for the new API were:
* The API mechanism should not require access to `gawk''s symbols(1)
by the compile-time or dynamic linker, in order to enable creation
- of extensions that also work on Windows.
+ of extensions that also work on MS-Windows.
During development, it became clear that there were other features
that should be available to extensions, which were also subsequently
@@ -27871,7 +28847,7 @@ provided:
hook into input processing, output processing, and two-way I/O.
* An extension should be able to provide a "call back" function to
- perform clean up actions when `gawk' exits.
+ perform cleanup actions when `gawk' exits.
* An extension should be able to provide a version string so that
`gawk''s `--version' option can provide information about
@@ -27896,7 +28872,7 @@ Mechanism Outline::, for the details.
(1) The "symbols" are the variables and functions defined inside
`gawk'. Access to these symbols by code external to `gawk' loaded
-dynamically at runtime is problematic on Windows.
+dynamically at runtime is problematic on MS-Windows.

File: gawk.info, Node: Extension Other Design Decisions, Next: Extension Future Growth, Prev: Extension New Mechanism Goals, Up: Extension Design
@@ -27964,7 +28940,7 @@ The API can later be expanded, in two ways:
respect to any of the above.

-File: gawk.info, Node: Old Extension Mechanism, Prev: Extension Design, Up: Notes
+File: gawk.info, Node: Old Extension Mechanism, Next: Notes summary, Prev: Extension Design, Up: Notes
C.6 Compatibility For Old Extensions
====================================
@@ -28002,6 +28978,37 @@ old extensions that you may have to use the new API described in *note
Dynamic Extensions::.

+File: gawk.info, Node: Notes summary, Prev: Old Extension Mechanism, Up: Notes
+
+C.7 Summary
+===========
+
+ * `gawk''s extensions can be disabled with either the
+ `--traditional' option or with the `--posix' option. The
+ `--parsedebug' option is available if `gawk' is compiled with
+ `-DDEBUG'.
+
+ * The source code for `gawk' is maintained in a publicly accessible
+ Git repository. Anyone may check it out and view the source.
+
+ * Contributions to `gawk' are welcome. Following the steps outlined
+ in this major node will make it easier to integrate your
+ contributions into the code base. This applies both to new
+ feature contributions and to ports to additional operating systems.
+
+ * `gawk' has some limits--generally those that are imposed by the
+ machine architecture.
+
+ * The extension API design was intended to solve a number of problems
+ with the previous extension mechanism, enable features needed by
+ the `xgawk' project, and provide binary compatibility going
+ forward.
+
+ * The previous extension mechanism is still supported in version 4.1
+ of `gawk', but it _will_ be removed in the next major release.
+
+
+
File: gawk.info, Node: Basic Concepts, Next: Glossary, Prev: Notes, Up: Top
Appendix D Basic Programming Concepts
@@ -28138,7 +29145,7 @@ characters that comprise them. Individual variables, as well as
numeric and string variables, are referred to as "scalar" values.
Groups of values, such as arrays, are not scalars.
- *note General Arithmetic::, provided a basic introduction to numeric
+ *note Computer Arithmetic::, provided a basic introduction to numeric
types (integer and floating-point) and how they are used in a computer.
Please review that information, including a number of caveats that were
presented.
@@ -28152,15 +29159,14 @@ like this: `""'.
Humans are used to working in decimal; i.e., base 10. In base 10,
numbers go from 0 to 9, and then "roll over" into the next column.
-(Remember grade school? 42 is 4 times 10 plus 2.)
+(Remember grade school? 42 = 4 x 10 + 2.)
There are other number bases though. Computers commonly use base 2
or "binary", base 8 or "octal", and base 16 or "hexadecimal". In
binary, each column represents two times the value in the column to its
right. Each column may contain either a 0 or a 1. Thus, binary 1010
-represents 1 times 8, plus 0 times 4, plus 1 times 2, plus 0 times 1,
-or decimal 10. Octal and hexadecimal are discussed more in *note
-Nondecimal-numbers::.
+represents (1 x 8) + (0 x 4) + (1 x 2) + (0 x 1), or decimal 10. Octal
+and hexadecimal are discussed more in *note Nondecimal-numbers::.
At the very lowest level, computers store values as groups of binary
digits, or "bits". Modern computers group bits into groups of eight,
@@ -28192,8 +29198,7 @@ Glossary
Action
A series of `awk' statements attached to a rule. If the rule's
pattern matches an input record, `awk' executes the rule's action.
- Actions are always enclosed in curly braces. (*Note Action
- Overview::.)
+ Actions are always enclosed in braces. (*Note Action Overview::.)
Amazing `awk' Assembler
Henry Spencer at the University of Toronto wrote a retargetable
@@ -28279,9 +29284,9 @@ Boolean Expression
Bourne Shell
The standard shell (`/bin/sh') on Unix and Unix-like systems,
- originally written by Steven R. Bourne. Many shells (Bash, `ksh',
- `pdksh', `zsh') are generally upwardly compatible with the Bourne
- shell.
+ originally written by Steven R. Bourne at Bell Laboratories. Many
+ shells (Bash, `ksh', `pdksh', `zsh') are generally upwardly
+ compatible with the Bourne shell.
Built-in Function
The `awk' language provides built-in functions that perform various
@@ -28302,7 +29307,8 @@ Built-in Variable
Variables::.)
Braces
- See "Curly Braces."
+ The characters `{' and `}'. Braces are used in `awk' for
+ delimiting actions, compound statements, and function bodies.
C
The system programming language that most GNU software is written
@@ -28323,8 +29329,8 @@ Character Set
ASCII (American Standard Code for Information Interchange). Many
European countries use an extension of ASCII known as ISO-8859-1
(ISO Latin-1). The Unicode character set (http://www.unicode.org)
- is becoming increasingly popular and standard, and is particularly
- widely used on GNU/Linux systems.
+ is increasingly popular and standard, and is particularly widely
+ used on GNU/Linux systems.
CHEM
A preprocessor for `pic' that reads descriptions of molecules and
@@ -28334,7 +29340,7 @@ CHEM
Cookie
A peculiar goodie, token, saying or remembrance produced by or
- presented to a program. (With thanks to Doug McIlroy.)
+ presented to a program. (With thanks to Professor Doug McIlroy.)
Coprocess
A subordinate program with which two-way communications is
@@ -28369,8 +29375,7 @@ Comparison Expression
process. (*Note Typing and Comparison::.)
Curly Braces
- The characters `{' and `}'. Curly braces are used in `awk' for
- delimiting actions, compound statements, and function bodies.
+ See "Braces."
Dark Corner
An area in the language where specifications often were (or still
@@ -28410,8 +29415,8 @@ Dynamic Regular Expression
(*Note Computed Regexps::.)
Environment
- A collection of strings, of the form NAME`='`val', that each
- program has available to it. Users generally place values into the
+ A collection of strings, of the form `NAME=VAL', that each program
+ has available to it. Users generally place values into the
environment in order to provide information to various programs.
Typical examples are the environment variables `HOME' and `PATH'.
@@ -28461,11 +29466,11 @@ Floating-Point Number
See also "Double Precision" and "Single Precision."
Format
- Format strings are used to control the appearance of output in the
- `strftime()' and `sprintf()' functions, and are used in the
- `printf' statement as well. Also, data conversions from numbers
- to strings are controlled by the format strings contained in the
- built-in variables `CONVFMT' and `OFMT'. (*Note Control Letters::.)
+ Format strings control the appearance of output in the
+ `strftime()' and `sprintf()' functions, and in the `printf'
+ statement as well. Also, data conversions from numbers to strings
+ are controlled by the format strings contained in the built-in
+ variables `CONVFMT' and `OFMT'. (*Note Control Letters::.)
Free Documentation License
This document describes the terms under which this Info file is
@@ -28520,8 +29525,8 @@ Hexadecimal
Base 16 notation, where the digits are `0'-`9' and `A'-`F', with
`A' representing 10, `B' representing 11, and so on, up to `F' for
15. Hexadecimal numbers are written in C using a leading `0x', to
- indicate their base. Thus, `0x12' is 18 (1 times 16 plus 2).
- *Note Nondecimal-numbers::.
+ indicate their base. Thus, `0x12' is 18 ((1 x 16) + 2). *Note
+ Nondecimal-numbers::.
I/O
Abbreviation for "Input/Output," the act of moving data into and/or
@@ -28578,7 +29583,7 @@ Keyword
`gawk''s keywords are: `BEGIN', `BEGINFILE', `END', `ENDFILE',
`break', `case', `continue', `default' `delete', `do...while',
`else', `exit', `for...in', `for', `function', `func', `if',
- `nextfile', `next', `switch', and `while'.
+ `next', `nextfile', `switch', and `while'.
Lesser General Public License
This document describes the terms under which binary library
@@ -28634,11 +29639,7 @@ Number
Octal
Base-eight notation, where the digits are `0'-`7'. Octal numbers
are written in C using a leading `0', to indicate their base.
- Thus, `013' is 11 (one times 8 plus 3). *Note
- Nondecimal-numbers::.
-
-P1003.1
- See "POSIX."
+ Thus, `013' is 11 ((1 x 8) + 3). *Note Nondecimal-numbers::.
Pattern
Patterns tell `awk' which input records are interesting to which
@@ -28679,9 +29680,9 @@ Range (of input lines)
specify single lines. (*Note Pattern Overview::.)
Recursion
- When a function calls itself, either directly or indirectly. As
- long as this is not clear, refer to the entry for "recursion." If
- this is clear, stop, and proceed to the next entry.
+ When a function calls itself, either directly or indirectly. If
+ this is clear, stop, and proceed to the next entry. Otherwise,
+ refer to the entry for "recursion."
Redirection
Redirection means performing input from something other than the
@@ -28762,8 +29763,8 @@ Single Precision
parts. Single precision numbers keep track of fewer digits than
do double precision numbers, but operations on them are sometimes
less expensive in terms of CPU time. This is the type used by
- some very old versions of `awk' to store numeric values. It is
- the C type `float'.
+ some ancient versions of `awk' to store numeric values. It is the
+ C type `float'.
Space
The character generated by hitting the space bar on the keyboard.
@@ -28797,7 +29798,7 @@ Text Domain
Timestamp
A value in the "seconds since the epoch" format used by Unix and
POSIX systems. Used for the `gawk' functions `mktime()',
- `strftime()', and `systime()'. See also "Epoch" and "UTC."
+ `strftime()', and `systime()'. See also "Epoch," "GMT," and "UTC."
Unix
A computer operating system originally developed in the early
@@ -30036,7 +31037,7 @@ Index
* Menu:
* ! (exclamation point), ! operator: Boolean Ops. (line 67)
-* ! (exclamation point), ! operator <1>: Egrep Program. (line 170)
+* ! (exclamation point), ! operator <1>: Egrep Program. (line 175)
* ! (exclamation point), ! operator <2>: Ranges. (line 48)
* ! (exclamation point), ! operator: Precedence. (line 52)
* ! (exclamation point), != operator <1>: Precedence. (line 65)
@@ -30048,12 +31049,11 @@ Index
* ! (exclamation point), !~ operator <3>: Comparison Operators.
(line 11)
* ! (exclamation point), !~ operator <4>: Regexp Constants. (line 6)
-* ! (exclamation point), !~ operator <5>: Computed Regexps. (line 6)
-* ! (exclamation point), !~ operator <6>: Case-sensitivity. (line 26)
+* ! (exclamation point), !~ operator <5>: Case-sensitivity. (line 26)
+* ! (exclamation point), !~ operator <6>: Computed Regexps. (line 6)
* ! (exclamation point), !~ operator: Regexp Usage. (line 19)
-* " (double quote) in shell commands: Read Terminal. (line 25)
-* " (double quote), in regexp constants: Computed Regexps. (line 28)
-* " (double quote), in shell commands: Quoting. (line 37)
+* " (double quote), in regexp constants: Computed Regexps. (line 29)
+* " (double quote), in shell commands: Quoting. (line 54)
* # (number sign), #! (executable scripts): Executable Scripts.
(line 6)
* # (number sign), commenting: Comments. (line 6)
@@ -30064,46 +31064,46 @@ Index
* $ (dollar sign), regexp operator: Regexp Operators. (line 35)
* % (percent sign), % operator: Precedence. (line 55)
* % (percent sign), %= operator <1>: Precedence. (line 95)
-* % (percent sign), %= operator: Assignment Ops. (line 129)
+* % (percent sign), %= operator: Assignment Ops. (line 130)
* & (ampersand), && operator <1>: Precedence. (line 86)
* & (ampersand), && operator: Boolean Ops. (line 57)
* & (ampersand), gsub()/gensub()/sub() functions and: Gory Details.
(line 6)
* ' (single quote): One-shot. (line 15)
* ' (single quote) in gawk command lines: Long. (line 33)
-* ' (single quote), in shell commands: Quoting. (line 31)
+* ' (single quote), in shell commands: Quoting. (line 48)
* ' (single quote), vs. apostrophe: Comments. (line 27)
-* ' (single quote), with double quotes: Quoting. (line 53)
+* ' (single quote), with double quotes: Quoting. (line 70)
* () (parentheses), in a profile: Profiling. (line 146)
-* () (parentheses), regexp operator: Regexp Operators. (line 79)
+* () (parentheses), regexp operator: Regexp Operators. (line 81)
* * (asterisk), * operator, as multiplication operator: Precedence.
(line 55)
* * (asterisk), * operator, as regexp operator: Regexp Operators.
- (line 87)
+ (line 89)
* * (asterisk), * operator, null strings, matching: Gory Details.
- (line 164)
+ (line 143)
* * (asterisk), ** operator <1>: Precedence. (line 49)
* * (asterisk), ** operator: Arithmetic Ops. (line 81)
* * (asterisk), **= operator <1>: Precedence. (line 95)
-* * (asterisk), **= operator: Assignment Ops. (line 129)
+* * (asterisk), **= operator: Assignment Ops. (line 130)
* * (asterisk), *= operator <1>: Precedence. (line 95)
-* * (asterisk), *= operator: Assignment Ops. (line 129)
+* * (asterisk), *= operator: Assignment Ops. (line 130)
* + (plus sign), + operator: Precedence. (line 52)
* + (plus sign), ++ operator <1>: Precedence. (line 46)
* + (plus sign), ++ operator: Increment Ops. (line 11)
* + (plus sign), += operator <1>: Precedence. (line 95)
* + (plus sign), += operator: Assignment Ops. (line 82)
-* + (plus sign), regexp operator: Regexp Operators. (line 102)
+* + (plus sign), regexp operator: Regexp Operators. (line 105)
* , (comma), in range patterns: Ranges. (line 6)
* - (hyphen), - operator: Precedence. (line 52)
* - (hyphen), -- operator <1>: Precedence. (line 46)
* - (hyphen), -- operator: Increment Ops. (line 48)
* - (hyphen), -= operator <1>: Precedence. (line 95)
-* - (hyphen), -= operator: Assignment Ops. (line 129)
+* - (hyphen), -= operator: Assignment Ops. (line 130)
* - (hyphen), filenames beginning with: Options. (line 59)
* - (hyphen), in bracket expressions: Bracket Expressions. (line 17)
* --assign option: Options. (line 32)
-* --bignum option: Options. (line 201)
+* --bignum option: Options. (line 205)
* --characters-as-bytes option: Options. (line 68)
* --copyright option: Options. (line 88)
* --debug option: Options. (line 108)
@@ -30123,32 +31123,32 @@ Index
* --gen-pot option: Options. (line 147)
* --help option: Options. (line 154)
* --include option: Options. (line 159)
-* --lint option <1>: Options. (line 182)
+* --lint option <1>: Options. (line 185)
* --lint option: Command Line. (line 20)
-* --lint-old option: Options. (line 288)
+* --lint-old option: Options. (line 293)
* --load option: Options. (line 173)
* --non-decimal-data option <1>: Nondecimal Data. (line 6)
-* --non-decimal-data option: Options. (line 207)
+* --non-decimal-data option: Options. (line 211)
* --non-decimal-data option, strtonum() function and: Nondecimal Data.
(line 36)
-* --optimize option: Options. (line 228)
-* --posix option: Options. (line 247)
-* --posix option, --traditional option and: Options. (line 266)
-* --pretty-print option: Options. (line 220)
+* --optimize option: Options. (line 235)
+* --posix option: Options. (line 252)
+* --posix option, --traditional option and: Options. (line 271)
+* --pretty-print option: Options. (line 224)
* --profile option <1>: Profiling. (line 12)
-* --profile option: Options. (line 235)
-* --re-interval option: Options. (line 272)
-* --sandbox option: Options. (line 279)
+* --profile option: Options. (line 240)
+* --re-interval option: Options. (line 277)
+* --sandbox option: Options. (line 284)
* --sandbox option, disabling system() function: I/O Functions.
- (line 94)
+ (line 97)
* --sandbox option, input redirection with getline: Getline. (line 19)
* --sandbox option, output redirection with print, printf: Redirection.
(line 6)
* --source option: Options. (line 117)
* --traditional option: Options. (line 81)
-* --traditional option, --posix option and: Options. (line 266)
-* --use-lc-numeric option: Options. (line 215)
-* --version option: Options. (line 293)
+* --traditional option, --posix option and: Options. (line 271)
+* --use-lc-numeric option: Options. (line 219)
+* --version option: Options. (line 298)
* --with-whiny-user-strftime configuration option: Additional Configuration Options.
(line 35)
* -b option: Options. (line 68)
@@ -30156,50 +31156,51 @@ Index
* -c option: Options. (line 81)
* -D option: Options. (line 108)
* -d option: Options. (line 93)
+* -e option: Options. (line 333)
* -E option: Options. (line 125)
* -e option: Options. (line 117)
* -f option: Options. (line 25)
* -F option: Options. (line 21)
* -f option: Long. (line 12)
-* -F option, -Ft sets FS to TAB: Options. (line 301)
-* -F option, command line: Command Line Field Separator.
+* -F option, -Ft sets FS to TAB: Options. (line 306)
+* -F option, command-line: Command Line Field Separator.
(line 6)
-* -f option, multiple uses: Options. (line 306)
+* -f option, multiple uses: Options. (line 311)
* -g option: Options. (line 147)
* -h option: Options. (line 154)
* -i option: Options. (line 159)
-* -L option: Options. (line 288)
+* -L option: Options. (line 293)
* -l option: Options. (line 173)
-* -M option: Options. (line 201)
-* -N option: Options. (line 215)
-* -n option: Options. (line 207)
-* -O option: Options. (line 228)
-* -o option: Options. (line 220)
-* -P option: Options. (line 247)
-* -p option: Options. (line 235)
-* -r option: Options. (line 272)
-* -S option: Options. (line 279)
+* -M option: Options. (line 205)
+* -N option: Options. (line 219)
+* -n option: Options. (line 211)
+* -O option: Options. (line 235)
+* -o option: Options. (line 224)
+* -P option: Options. (line 252)
+* -p option: Options. (line 240)
+* -r option: Options. (line 277)
+* -S option: Options. (line 284)
* -v option: Assignment Options. (line 12)
-* -V option: Options. (line 293)
+* -V option: Options. (line 298)
* -v option: Options. (line 32)
* -W option: Options. (line 46)
-* . (period), regexp operator: Regexp Operators. (line 43)
-* .gmo files: Explaining gettext. (line 41)
-* .gmo files, converting from .po: I18N Example. (line 62)
+* . (period), regexp operator: Regexp Operators. (line 44)
+* .gmo files: Explaining gettext. (line 42)
* .gmo files, specifying directory of <1>: Programmer i18n. (line 47)
-* .gmo files, specifying directory of: Explaining gettext. (line 53)
+* .gmo files, specifying directory of: Explaining gettext. (line 54)
+* .mo files, converting from .po: I18N Example. (line 63)
* .po files <1>: Translator i18n. (line 6)
-* .po files: Explaining gettext. (line 36)
-* .po files, converting to .gmo: I18N Example. (line 62)
-* .pot files: Explaining gettext. (line 30)
+* .po files: Explaining gettext. (line 37)
+* .po files, converting to .mo: I18N Example. (line 63)
+* .pot files: Explaining gettext. (line 31)
* / (forward slash) to enclose regular expressions: Regexp. (line 10)
* / (forward slash), / operator: Precedence. (line 55)
* / (forward slash), /= operator <1>: Precedence. (line 95)
-* / (forward slash), /= operator: Assignment Ops. (line 129)
+* / (forward slash), /= operator: Assignment Ops. (line 130)
* / (forward slash), /= operator, vs. /=.../ regexp constant: Assignment Ops.
- (line 147)
+ (line 148)
* / (forward slash), patterns and: Expression Patterns. (line 24)
-* /= operator vs. /=.../ regexp constant: Assignment Ops. (line 147)
+* /= operator vs. /=.../ regexp constant: Assignment Ops. (line 148)
* /dev/... special files: Special FD. (line 46)
* /dev/fd/N special files (gawk): Special FD. (line 46)
* /inet/... special files (gawk): TCP/IP Networking. (line 6)
@@ -30236,13 +31237,16 @@ Index
* ? (question mark), regexp operator <1>: GNU Regexp Operators.
(line 59)
* ? (question mark), regexp operator: Regexp Operators. (line 111)
-* [] (square brackets), regexp operator: Regexp Operators. (line 55)
+* @-notation for indirect function calls: Indirect Calls. (line 47)
+* @include directive: Include Files. (line 8)
+* @load directive: Loading Shared Libraries.
+ (line 8)
+* [] (square brackets), regexp operator: Regexp Operators. (line 56)
* \ (backslash): Comments. (line 50)
-* \ (backslash) in shell commands: Read Terminal. (line 25)
-* \ (backslash), \" escape sequence: Escape Sequences. (line 76)
+* \ (backslash), \" escape sequence: Escape Sequences. (line 84)
* \ (backslash), \' operator (gawk): GNU Regexp Operators.
(line 56)
-* \ (backslash), \/ escape sequence: Escape Sequences. (line 69)
+* \ (backslash), \/ escape sequence: Escape Sequences. (line 75)
* \ (backslash), \< operator (gawk): GNU Regexp Operators.
(line 30)
* \ (backslash), \> operator (gawk): GNU Regexp Operators.
@@ -30272,7 +31276,6 @@ Index
(line 38)
* \ (backslash), as field separator: Command Line Field Separator.
(line 27)
-* \ (backslash), continuing lines and <1>: Egrep Program. (line 220)
* \ (backslash), continuing lines and: Statements/Lines. (line 19)
* \ (backslash), continuing lines and, comments and: Statements/Lines.
(line 76)
@@ -30283,24 +31286,24 @@ Index
* \ (backslash), in bracket expressions: Bracket Expressions. (line 17)
* \ (backslash), in escape sequences: Escape Sequences. (line 6)
* \ (backslash), in escape sequences, POSIX and: Escape Sequences.
- (line 112)
-* \ (backslash), in regexp constants: Computed Regexps. (line 28)
-* \ (backslash), in shell commands: Quoting. (line 31)
+ (line 120)
+* \ (backslash), in regexp constants: Computed Regexps. (line 29)
+* \ (backslash), in shell commands: Quoting. (line 48)
* \ (backslash), regexp operator: Regexp Operators. (line 18)
* ^ (caret), ^ operator: Precedence. (line 49)
* ^ (caret), ^= operator <1>: Precedence. (line 95)
-* ^ (caret), ^= operator: Assignment Ops. (line 129)
+* ^ (caret), ^= operator: Assignment Ops. (line 130)
* ^ (caret), in bracket expressions: Bracket Expressions. (line 17)
* ^ (caret), in FS: Regexp Field Splitting.
(line 59)
* ^ (caret), regexp operator <1>: GNU Regexp Operators.
(line 59)
* ^ (caret), regexp operator: Regexp Operators. (line 22)
-* _ (underscore), C macro: Explaining gettext. (line 70)
+* _ (underscore), C macro: Explaining gettext. (line 71)
* _ (underscore), in names of private variables: Library Names.
(line 29)
* _ (underscore), translatable string: Programmer i18n. (line 69)
-* _gr_init() user-defined function: Group Functions. (line 82)
+* _gr_init() user-defined function: Group Functions. (line 83)
* _ord_init() user-defined function: Ordinal Functions. (line 16)
* _pw_init() user-defined function: Passwd Functions. (line 105)
* accessing fields: Fields. (line 6)
@@ -30312,7 +31315,7 @@ Index
* actions, control statements in: Statements. (line 6)
* actions, default: Very Simple. (line 34)
* actions, empty: Very Simple. (line 39)
-* Ada programming language: Glossary. (line 20)
+* Ada programming language: Glossary. (line 19)
* adding, features to gawk: Adding Code. (line 6)
* adding, fields: Changing Fields. (line 53)
* advanced features, fixed-width data: Constant Size. (line 10)
@@ -30320,7 +31323,7 @@ Index
* advanced features, network programming: TCP/IP Networking. (line 6)
* advanced features, nondecimal input data: Nondecimal Data. (line 6)
* advanced features, processes, communicating with: Two-way I/O.
- (line 23)
+ (line 6)
* advanced features, specifying field content: Splitting By Content.
(line 10)
* Aho, Alfred <1>: Contributors. (line 11)
@@ -30330,11 +31333,10 @@ Index
* algorithms: Basic High Level. (line 68)
* allocating memory for extensions: Memory Allocation Functions.
(line 6)
-* Alpha (DEC): Manual History. (line 28)
-* amazing awk assembler (aaa): Glossary. (line 12)
-* amazingly workable formatter (awf): Glossary. (line 25)
+* amazing awk assembler (aaa): Glossary. (line 11)
+* amazingly workable formatter (awf): Glossary. (line 24)
* ambiguity, syntactic: /= operator vs. /=.../ regexp constant: Assignment Ops.
- (line 147)
+ (line 148)
* ampersand (&), && operator <1>: Precedence. (line 86)
* ampersand (&), && operator: Boolean Ops. (line 57)
* ampersand (&), gsub()/gensub()/sub() functions and: Gory Details.
@@ -30344,7 +31346,7 @@ Index
* and: Bitwise Functions. (line 39)
* AND bitwise operation: Bitwise Functions. (line 6)
* and Boolean-logic operator: Boolean Ops. (line 6)
-* ANSI: Glossary. (line 35)
+* ANSI: Glossary. (line 34)
* API informational variables: Extension API Informational Variables.
(line 6)
* API version: Extension Versioning.
@@ -30355,18 +31357,18 @@ Index
(line 6)
* archeologists: Bugs. (line 6)
* arctangent: Numeric Functions. (line 11)
-* ARGC/ARGV variables: Auto-set. (line 11)
+* ARGC/ARGV variables: Auto-set. (line 15)
* ARGC/ARGV variables, command-line arguments: Other Arguments.
(line 12)
* ARGC/ARGV variables, how to use: ARGC and ARGV. (line 6)
-* ARGC/ARGV variables, portability and: Executable Scripts. (line 42)
-* ARGIND variable: Auto-set. (line 40)
+* ARGC/ARGV variables, portability and: Executable Scripts. (line 59)
+* ARGIND variable: Auto-set. (line 44)
* ARGIND variable, command-line arguments: Other Arguments. (line 12)
* arguments, command-line <1>: ARGC and ARGV. (line 6)
-* arguments, command-line <2>: Auto-set. (line 11)
+* arguments, command-line <2>: Auto-set. (line 15)
* arguments, command-line: Other Arguments. (line 6)
* arguments, command-line, invoking awk: Command Line. (line 6)
-* arguments, in function calls: Function Calls. (line 16)
+* arguments, in function calls: Function Calls. (line 18)
* arguments, processing: Getopt Function. (line 6)
* ARGV array, indexing into: Other Arguments. (line 12)
* arithmetic operators: Arithmetic Ops. (line 6)
@@ -30374,15 +31376,15 @@ Index
* array members: Reference to Elements.
(line 6)
* array scanning order, controlling: Controlling Scanning.
- (line 12)
-* array, number of elements: String Functions. (line 194)
+ (line 14)
+* array, number of elements: String Functions. (line 197)
* arrays: Arrays. (line 6)
* arrays of arrays: Arrays of Arrays. (line 6)
* arrays, an example of using: Array Example. (line 6)
-* arrays, and IGNORECASE variable: Array Intro. (line 91)
+* arrays, and IGNORECASE variable: Array Intro. (line 94)
* arrays, as parameters to functions: Pass By Value/Reference.
(line 47)
-* arrays, associative: Array Intro. (line 49)
+* arrays, associative: Array Intro. (line 50)
* arrays, associative, library functions and: Library Names. (line 57)
* arrays, deleting entire contents: Delete. (line 39)
* arrays, elements that don't exist: Reference to Elements.
@@ -30391,9 +31393,9 @@ Index
* arrays, elements, deleting: Delete. (line 6)
* arrays, elements, order of access by in operator: Scanning an Array.
(line 48)
-* arrays, elements, retrieving number of: String Functions. (line 32)
+* arrays, elements, retrieving number of: String Functions. (line 42)
* arrays, for statement and: Scanning an Array. (line 20)
-* arrays, indexing: Array Intro. (line 49)
+* arrays, indexing: Array Intro. (line 50)
* arrays, merging into strings: Join Function. (line 6)
* arrays, multidimensional: Multidimensional. (line 10)
* arrays, multidimensional, scanning: Multiscanning. (line 11)
@@ -30407,7 +31409,7 @@ Index
(line 6)
* arrays, sorting, and IGNORECASE variable: Array Sorting Functions.
(line 83)
-* arrays, sparse: Array Intro. (line 70)
+* arrays, sparse: Array Intro. (line 72)
* arrays, subscripts, uninitialized variables as: Uninitialized Subscripts.
(line 6)
* arrays, unassigned elements: Reference to Elements.
@@ -30418,12 +31420,12 @@ Index
* ASCII: Ordinal Functions. (line 45)
* asort <1>: Array Sorting Functions.
(line 6)
-* asort: String Functions. (line 32)
+* asort: String Functions. (line 42)
* asort() function (gawk), arrays, sorting: Array Sorting Functions.
(line 6)
* asorti <1>: Array Sorting Functions.
(line 6)
-* asorti: String Functions. (line 32)
+* asorti: String Functions. (line 42)
* asorti() function (gawk), arrays, sorting: Array Sorting Functions.
(line 6)
* assert() function (C library): Assert Function. (line 6)
@@ -30435,29 +31437,29 @@ Index
* assignment operators, evaluation order: Assignment Ops. (line 111)
* assignment operators, lvalues/rvalues: Assignment Ops. (line 32)
* assignments as filenames: Ignoring Assigns. (line 6)
-* associative arrays: Array Intro. (line 49)
+* associative arrays: Array Intro. (line 50)
* asterisk (*), * operator, as multiplication operator: Precedence.
(line 55)
* asterisk (*), * operator, as regexp operator: Regexp Operators.
- (line 87)
+ (line 89)
* asterisk (*), * operator, null strings, matching: Gory Details.
- (line 164)
+ (line 143)
* asterisk (*), ** operator <1>: Precedence. (line 49)
* asterisk (*), ** operator: Arithmetic Ops. (line 81)
* asterisk (*), **= operator <1>: Precedence. (line 95)
-* asterisk (*), **= operator: Assignment Ops. (line 129)
+* asterisk (*), **= operator: Assignment Ops. (line 130)
* asterisk (*), *= operator <1>: Precedence. (line 95)
-* asterisk (*), *= operator: Assignment Ops. (line 129)
+* asterisk (*), *= operator: Assignment Ops. (line 130)
* atan2: Numeric Functions. (line 11)
* automatic displays, in debugger: Debugger Info. (line 24)
-* awf (amazingly workable formatter) program: Glossary. (line 25)
+* awf (amazingly workable formatter) program: Glossary. (line 24)
* awk debugging, enabling: Options. (line 108)
-* awk language, POSIX version: Assignment Ops. (line 136)
-* awk profiling, enabling: Options. (line 235)
+* awk language, POSIX version: Assignment Ops. (line 137)
+* awk profiling, enabling: Options. (line 240)
* awk programs <1>: Two Rules. (line 6)
* awk programs <2>: Executable Scripts. (line 6)
* awk programs: Getting Started. (line 12)
-* awk programs, complex: When. (line 29)
+* awk programs, complex: When. (line 27)
* awk programs, documenting <1>: Library Names. (line 6)
* awk programs, documenting: Comments. (line 6)
* awk programs, examples of: Sample Programs. (line 6)
@@ -30477,25 +31479,24 @@ Index
(line 6)
* awk, function of: Getting Started. (line 6)
* awk, gawk and <1>: This Manual. (line 14)
-* awk, gawk and: Preface. (line 23)
+* awk, gawk and: Preface. (line 21)
* awk, history of: History. (line 17)
* awk, implementation issues, pipes: Redirection. (line 135)
* awk, implementations: Other Versions. (line 6)
* awk, implementations, limits: Getline Notes. (line 14)
* awk, invoking: Command Line. (line 6)
* awk, new vs. old: Names. (line 6)
-* awk, new vs. old, OFMT variable: Conversion. (line 55)
-* awk, POSIX and: Preface. (line 23)
-* awk, POSIX and, See Also POSIX awk: Preface. (line 23)
+* awk, new vs. old, OFMT variable: Strings And Numbers. (line 57)
+* awk, POSIX and: Preface. (line 21)
+* awk, POSIX and, See Also POSIX awk: Preface. (line 21)
* awk, regexp constants and: Comparison Operators.
(line 102)
-* awk, See Also gawk: Preface. (line 36)
+* awk, See Also gawk: Preface. (line 34)
* awk, terms describing: This Manual. (line 6)
* awk, uses for <1>: When. (line 6)
* awk, uses for <2>: Getting Started. (line 12)
-* awk, uses for: Preface. (line 23)
-* awk, versions of <1>: V7/SVR3.1. (line 6)
-* awk, versions of: Names. (line 10)
+* awk, uses for: Preface. (line 21)
+* awk, versions of: V7/SVR3.1. (line 6)
* awk, versions of, changes between SVR3.1 and SVR4: SVR4. (line 6)
* awk, versions of, changes between SVR4 and POSIX awk: POSIX.
(line 6)
@@ -30512,11 +31513,10 @@ Index
* awkvars.out file: Options. (line 93)
* b debugger command (alias for break): Breakpoint Control. (line 11)
* backslash (\): Comments. (line 50)
-* backslash (\) in shell commands: Read Terminal. (line 25)
-* backslash (\), \" escape sequence: Escape Sequences. (line 76)
+* backslash (\), \" escape sequence: Escape Sequences. (line 84)
* backslash (\), \' operator (gawk): GNU Regexp Operators.
(line 56)
-* backslash (\), \/ escape sequence: Escape Sequences. (line 69)
+* backslash (\), \/ escape sequence: Escape Sequences. (line 75)
* backslash (\), \< operator (gawk): GNU Regexp Operators.
(line 30)
* backslash (\), \> operator (gawk): GNU Regexp Operators.
@@ -30546,7 +31546,6 @@ Index
(line 38)
* backslash (\), as field separator: Command Line Field Separator.
(line 27)
-* backslash (\), continuing lines and <1>: Egrep Program. (line 220)
* backslash (\), continuing lines and: Statements/Lines. (line 19)
* backslash (\), continuing lines and, comments and: Statements/Lines.
(line 76)
@@ -30557,9 +31556,9 @@ Index
* backslash (\), in bracket expressions: Bracket Expressions. (line 17)
* backslash (\), in escape sequences: Escape Sequences. (line 6)
* backslash (\), in escape sequences, POSIX and: Escape Sequences.
- (line 112)
-* backslash (\), in regexp constants: Computed Regexps. (line 28)
-* backslash (\), in shell commands: Quoting. (line 31)
+ (line 120)
+* backslash (\), in regexp constants: Computed Regexps. (line 29)
+* backslash (\), in shell commands: Quoting. (line 48)
* backslash (\), regexp operator: Regexp Operators. (line 18)
* backtrace debugger command: Execution Stack. (line 13)
* Beebe, Nelson H.F. <1>: Other Versions. (line 78)
@@ -30575,9 +31574,9 @@ Index
* BEGIN pattern, getline and: Getline Notes. (line 19)
* BEGIN pattern, headings, adding: Print Examples. (line 43)
* BEGIN pattern, next/nextfile statements and <1>: Next Statement.
- (line 45)
+ (line 44)
* BEGIN pattern, next/nextfile statements and: I/O And BEGIN/END.
- (line 37)
+ (line 36)
* BEGIN pattern, OFS/ORS variables, assigning values to: Output Separators.
(line 20)
* BEGIN pattern, operators and: Using BEGIN/END. (line 17)
@@ -30593,14 +31592,14 @@ Index
* Benzinger, Michael: Contributors. (line 97)
* Berry, Karl <1>: Ranges and Locales. (line 74)
* Berry, Karl: Acknowledgments. (line 33)
-* binary input/output: User-modified. (line 10)
+* binary input/output: User-modified. (line 15)
* bindtextdomain <1>: Programmer i18n. (line 47)
* bindtextdomain: I18N Functions. (line 12)
-* bindtextdomain() function (C library): Explaining gettext. (line 49)
+* bindtextdomain() function (C library): Explaining gettext. (line 50)
* bindtextdomain() function (gawk), portability and: I18N Portability.
(line 33)
* BINMODE variable <1>: PC Using. (line 33)
-* BINMODE variable: User-modified. (line 10)
+* BINMODE variable: User-modified. (line 15)
* bit-manipulation functions: Bitwise Functions. (line 6)
* bits2str() user-defined function: Bitwise Functions. (line 70)
* bitwise AND: Bitwise Functions. (line 39)
@@ -30620,17 +31619,17 @@ Index
* braces ({}), actions and: Action Overview. (line 19)
* braces ({}), statements, grouping: Statements. (line 10)
* bracket expressions <1>: Bracket Expressions. (line 6)
-* bracket expressions: Regexp Operators. (line 55)
+* bracket expressions: Regexp Operators. (line 56)
* bracket expressions, character classes: Bracket Expressions.
- (line 30)
+ (line 32)
* bracket expressions, collating elements: Bracket Expressions.
- (line 69)
+ (line 79)
* bracket expressions, collating symbols: Bracket Expressions.
- (line 76)
-* bracket expressions, complemented: Regexp Operators. (line 63)
+ (line 86)
+* bracket expressions, complemented: Regexp Operators. (line 64)
* bracket expressions, equivalence classes: Bracket Expressions.
- (line 82)
-* bracket expressions, non-ASCII: Bracket Expressions. (line 69)
+ (line 92)
+* bracket expressions, non-ASCII: Bracket Expressions. (line 79)
* bracket expressions, range expressions: Bracket Expressions.
(line 6)
* break debugger command: Breakpoint Control. (line 11)
@@ -30644,16 +31643,16 @@ Index
* breakpoint, how to disable or enable: Breakpoint Control. (line 69)
* breakpoint, setting: Breakpoint Control. (line 11)
* Brennan, Michael <1>: Other Versions. (line 6)
-* Brennan, Michael <2>: Two-way I/O. (line 6)
-* Brennan, Michael <3>: Simple Sed. (line 25)
-* Brennan, Michael <4>: Delete. (line 56)
+* Brennan, Michael <2>: Simple Sed. (line 25)
+* Brennan, Michael <3>: Delete. (line 56)
+* Brennan, Michael <4>: Acknowledgments. (line 76)
* Brennan, Michael: Foreword. (line 83)
-* Brian Kernighan's awk <1>: I/O Functions. (line 40)
-* Brian Kernighan's awk <2>: Gory Details. (line 15)
+* Brian Kernighan's awk <1>: I/O Functions. (line 43)
+* Brian Kernighan's awk <2>: Gory Details. (line 19)
* Brian Kernighan's awk <3>: String Functions. (line 490)
* Brian Kernighan's awk <4>: Delete. (line 48)
* Brian Kernighan's awk <5>: Nextfile Statement. (line 47)
-* Brian Kernighan's awk <6>: Continue Statement. (line 43)
+* Brian Kernighan's awk <6>: Continue Statement. (line 44)
* Brian Kernighan's awk <7>: Break Statement. (line 51)
* Brian Kernighan's awk <8>: I/O And BEGIN/END. (line 16)
* Brian Kernighan's awk <9>: Concatenation. (line 36)
@@ -30662,23 +31661,23 @@ Index
(line 67)
* Brian Kernighan's awk <12>: GNU Regexp Operators.
(line 83)
-* Brian Kernighan's awk <13>: Escape Sequences. (line 116)
-* Brian Kernighan's awk <14>: When. (line 21)
-* Brian Kernighan's awk: Preface. (line 15)
+* Brian Kernighan's awk <13>: Escape Sequences. (line 124)
+* Brian Kernighan's awk: When. (line 21)
* Brian Kernighan's awk, extensions: BTL. (line 6)
* Brian Kernighan's awk, source code: Other Versions. (line 13)
* Brini, Davide: Signature Program. (line 6)
+* Brink, Jeroen: DOS Quoting. (line 10)
* Broder, Alan J.: Contributors. (line 88)
* Brown, Martin: Contributors. (line 82)
-* BSD-based operating systems: Glossary. (line 616)
+* BSD-based operating systems: Glossary. (line 611)
* bt debugger command (alias for backtrace): Execution Stack. (line 13)
-* Buening, Andreas <1>: Bugs. (line 70)
+* Buening, Andreas <1>: Bugs. (line 71)
* Buening, Andreas <2>: Contributors. (line 92)
* Buening, Andreas: Acknowledgments. (line 60)
-* buffering, input/output <1>: Two-way I/O. (line 70)
-* buffering, input/output: I/O Functions. (line 137)
-* buffering, interactive vs. noninteractive: I/O Functions. (line 106)
-* buffers, flushing: I/O Functions. (line 29)
+* buffering, input/output <1>: Two-way I/O. (line 52)
+* buffering, input/output: I/O Functions. (line 140)
+* buffering, interactive vs. noninteractive: I/O Functions. (line 109)
+* buffers, flushing: I/O Functions. (line 32)
* buffers, operators for: GNU Regexp Operators.
(line 48)
* bug reports, email address, bug-gawk@gnu.org: Bugs. (line 30)
@@ -30690,6 +31689,7 @@ Index
* built-in variables, conveying information: Auto-set. (line 6)
* built-in variables, user-modifiable: User-modified. (line 6)
* Busybox Awk: Other Versions. (line 88)
+* c.e., See common extensions: Conventions. (line 51)
* call by reference: Pass By Value/Reference.
(line 47)
* call by value: Pass By Value/Reference.
@@ -30697,30 +31697,29 @@ Index
* call stack, display in debugger: Execution Stack. (line 13)
* caret (^), ^ operator: Precedence. (line 49)
* caret (^), ^= operator <1>: Precedence. (line 95)
-* caret (^), ^= operator: Assignment Ops. (line 129)
+* caret (^), ^= operator: Assignment Ops. (line 130)
* caret (^), in bracket expressions: Bracket Expressions. (line 17)
* caret (^), regexp operator <1>: GNU Regexp Operators.
(line 59)
* caret (^), regexp operator: Regexp Operators. (line 22)
* case keyword: Switch Statement. (line 6)
-* case sensitivity, and regexps: User-modified. (line 82)
-* case sensitivity, and string comparisons: User-modified. (line 82)
-* case sensitivity, array indices and: Array Intro. (line 91)
+* case sensitivity, and regexps: User-modified. (line 76)
+* case sensitivity, and string comparisons: User-modified. (line 76)
+* case sensitivity, array indices and: Array Intro. (line 94)
* case sensitivity, converting case: String Functions. (line 520)
* case sensitivity, example programs: Library Functions. (line 53)
* case sensitivity, gawk: Case-sensitivity. (line 26)
* case sensitivity, regexps and: Case-sensitivity. (line 6)
* CGI, awk scripts for: Options. (line 125)
-* changing precision of a number: Changing Precision. (line 6)
* character classes, See bracket expressions: Regexp Operators.
- (line 55)
+ (line 56)
* character lists in regular expression: Bracket Expressions. (line 6)
-* character lists, See bracket expressions: Regexp Operators. (line 55)
+* character lists, See bracket expressions: Regexp Operators. (line 56)
* character sets (machine character encodings) <1>: Glossary. (line 133)
* character sets (machine character encodings): Ordinal Functions.
(line 45)
* character sets, See Also bracket expressions: Regexp Operators.
- (line 55)
+ (line 56)
* characters, counting: Wc Program. (line 6)
* characters, transliterating: Translate Program. (line 6)
* characters, values of as numbers: Ordinal Functions. (line 6)
@@ -30743,23 +31742,23 @@ Index
* close() function, portability: Close Files And Pipes.
(line 81)
* close() function, return value: Close Files And Pipes.
- (line 130)
-* close() function, two-way pipes and: Two-way I/O. (line 77)
+ (line 131)
+* close() function, two-way pipes and: Two-way I/O. (line 59)
* Close, Diane <1>: Contributors. (line 20)
-* Close, Diane: Manual History. (line 41)
+* Close, Diane: Manual History. (line 34)
* Collado, Manuel: Acknowledgments. (line 60)
-* collating elements: Bracket Expressions. (line 69)
-* collating symbols: Bracket Expressions. (line 76)
-* Colombo, Antonio <1>: Contributors. (line 135)
+* collating elements: Bracket Expressions. (line 79)
+* collating symbols: Bracket Expressions. (line 86)
+* Colombo, Antonio <1>: Contributors. (line 137)
* Colombo, Antonio: Acknowledgments. (line 60)
* columns, aligning: Print Examples. (line 70)
* columns, cutting: Cut Program. (line 6)
* comma (,), in range patterns: Ranges. (line 6)
* command completion, in debugger: Readline Support. (line 6)
* command line, arguments <1>: ARGC and ARGV. (line 6)
-* command line, arguments <2>: Auto-set. (line 11)
+* command line, arguments <2>: Auto-set. (line 15)
* command line, arguments: Other Arguments. (line 6)
-* command line, directories on: Command line directories.
+* command line, directories on: Command-line directories.
(line 6)
* command line, formats: Running gawk. (line 12)
* command line, FS on, setting: Command Line Field Separator.
@@ -30778,20 +31777,20 @@ Index
* commenting: Comments. (line 6)
* commenting, backslash continuation and: Statements/Lines. (line 76)
* common extensions, ** operator: Arithmetic Ops. (line 30)
-* common extensions, **= operator: Assignment Ops. (line 136)
+* common extensions, **= operator: Assignment Ops. (line 137)
* common extensions, /dev/stderr special file: Special FD. (line 46)
* common extensions, /dev/stdin special file: Special FD. (line 46)
* common extensions, /dev/stdout special file: Special FD. (line 46)
* common extensions, \x escape sequence: Escape Sequences. (line 61)
* common extensions, BINMODE variable: PC Using. (line 33)
* common extensions, delete to delete entire arrays: Delete. (line 39)
-* common extensions, func keyword: Definition Syntax. (line 83)
+* common extensions, func keyword: Definition Syntax. (line 93)
* common extensions, length() applied to an array: String Functions.
- (line 194)
-* common extensions, RS as a regexp: Records. (line 135)
+ (line 197)
+* common extensions, RS as a regexp: gawk split records. (line 6)
* common extensions, single character fields: Single Character Fields.
(line 6)
-* comp.lang.awk newsgroup: Bugs. (line 38)
+* comp.lang.awk newsgroup: Bugs. (line 39)
* comparison expressions: Typing and Comparison.
(line 9)
* comparison expressions, as patterns: Expression Patterns. (line 14)
@@ -30827,35 +31826,31 @@ Index
* configuration options, gawk: Additional Configuration Options.
(line 6)
* constant regexps: Regexp Usage. (line 57)
-* constants, floating-point: Floating-point Constants.
- (line 6)
* constants, nondecimal: Nondecimal Data. (line 6)
* constants, numeric: Scalar Constants. (line 6)
* constants, types of: Constants. (line 6)
-* context, floating-point: Floating-point Context.
- (line 6)
* continue program, in debugger: Debugger Execution Control.
(line 33)
* continue statement: Continue Statement. (line 6)
* control statements: Statements. (line 6)
* controlling array scanning order: Controlling Scanning.
- (line 12)
+ (line 14)
* convert string to lower case: String Functions. (line 521)
-* convert string to number: String Functions. (line 385)
+* convert string to number: String Functions. (line 388)
* convert string to upper case: String Functions. (line 527)
* converting integer array subscripts: Numeric Array Subscripts.
(line 31)
* converting, dates to timestamps: Time Functions. (line 76)
* converting, numbers to strings <1>: Bitwise Functions. (line 109)
-* converting, numbers to strings: Conversion. (line 6)
+* converting, numbers to strings: Strings And Numbers. (line 6)
* converting, strings to numbers <1>: Bitwise Functions. (line 109)
-* converting, strings to numbers: Conversion. (line 6)
-* CONVFMT variable <1>: User-modified. (line 28)
-* CONVFMT variable: Conversion. (line 29)
+* converting, strings to numbers: Strings And Numbers. (line 6)
+* CONVFMT variable <1>: User-modified. (line 30)
+* CONVFMT variable: Strings And Numbers. (line 29)
* CONVFMT variable, and array subscripts: Numeric Array Subscripts.
(line 6)
* cookie: Glossary. (line 149)
-* coprocesses <1>: Two-way I/O. (line 44)
+* coprocesses <1>: Two-way I/O. (line 25)
* coprocesses: Redirection. (line 102)
* coprocesses, closing: Close Files And Pipes.
(line 6)
@@ -30864,10 +31859,10 @@ Index
* cosine: Numeric Functions. (line 15)
* counting: Wc Program. (line 6)
* csh utility: Statements/Lines. (line 44)
-* csh utility, POSIXLY_CORRECT environment variable: Options. (line 348)
-* csh utility, |& operator, comparison with: Two-way I/O. (line 44)
-* ctime() user-defined function: Function Example. (line 73)
-* currency symbols, localization: Explaining gettext. (line 103)
+* csh utility, POSIXLY_CORRECT environment variable: Options. (line 351)
+* csh utility, |& operator, comparison with: Two-way I/O. (line 25)
+* ctime() user-defined function: Function Example. (line 74)
+* currency symbols, localization: Explaining gettext. (line 104)
* current system time: Time Functions. (line 66)
* custom.h file: Configuration Philosophy.
(line 30)
@@ -30877,61 +31872,63 @@ Index
* cut utility: Cut Program. (line 6)
* cut.awk program: Cut Program. (line 45)
* d debugger command (alias for delete): Breakpoint Control. (line 64)
-* d.c., See dark corner: Conventions. (line 38)
-* dark corner <1>: Glossary. (line 189)
-* dark corner: Conventions. (line 38)
+* d.c., See dark corner: Conventions. (line 42)
+* dark corner <1>: Glossary. (line 188)
+* dark corner: Conventions. (line 42)
* dark corner, "0" is actually true: Truth Values. (line 24)
* dark corner, /= operator vs. /=.../ regexp constant: Assignment Ops.
- (line 147)
+ (line 148)
* dark corner, ^, in FS: Regexp Field Splitting.
(line 59)
* dark corner, array subscripts: Uninitialized Subscripts.
(line 43)
* dark corner, break statement: Break Statement. (line 51)
* dark corner, close() function: Close Files And Pipes.
- (line 130)
+ (line 131)
* dark corner, command-line arguments: Assignment Options. (line 43)
-* dark corner, continue statement: Continue Statement. (line 43)
-* dark corner, CONVFMT variable: Conversion. (line 40)
-* dark corner, escape sequences: Other Arguments. (line 31)
+* dark corner, continue statement: Continue Statement. (line 44)
+* dark corner, CONVFMT variable: Strings And Numbers. (line 40)
+* dark corner, escape sequences: Other Arguments. (line 35)
* dark corner, escape sequences, for metacharacters: Escape Sequences.
- (line 134)
+ (line 142)
* dark corner, exit statement: Exit Statement. (line 30)
* dark corner, field separators: Field Splitting Summary.
(line 46)
-* dark corner, FILENAME variable <1>: Auto-set. (line 102)
+* dark corner, FILENAME variable <1>: Auto-set. (line 98)
* dark corner, FILENAME variable: Getline Notes. (line 19)
-* dark corner, FNR/NR variables: Auto-set. (line 323)
+* dark corner, FNR/NR variables: Auto-set. (line 316)
* dark corner, format-control characters: Control Letters. (line 18)
* dark corner, FS as null string: Single Character Fields.
(line 20)
-* dark corner, input files: Records. (line 118)
+* dark corner, input files: awk split records. (line 110)
* dark corner, invoking awk: Command Line. (line 16)
-* dark corner, length() function: String Functions. (line 180)
-* dark corner, locale's decimal point character: Conversion. (line 77)
+* dark corner, length() function: String Functions. (line 183)
+* dark corner, locale's decimal point character: Locale influences conversions.
+ (line 17)
* dark corner, multiline records: Multiple Line. (line 35)
* dark corner, NF variable, decrementing: Changing Fields. (line 107)
* dark corner, OFMT variable: OFMT. (line 27)
* dark corner, regexp constants: Using Constant Regexps.
(line 6)
* dark corner, regexp constants, /= operator and: Assignment Ops.
- (line 147)
+ (line 148)
* dark corner, regexp constants, as arguments to user-defined functions: Using Constant Regexps.
(line 43)
* dark corner, split() function: String Functions. (line 359)
-* dark corner, strings, storing: Records. (line 210)
-* dark corner, value of ARGV[0]: Auto-set. (line 35)
+* dark corner, strings, storing: gawk split records. (line 83)
+* dark corner, value of ARGV[0]: Auto-set. (line 39)
* data, fixed-width: Constant Size. (line 10)
* data-driven languages: Basic High Level. (line 85)
* database, group, reading: Group Functions. (line 6)
* database, users, reading: Passwd Functions. (line 6)
* date utility, GNU: Time Functions. (line 17)
-* date utility, POSIX: Time Functions. (line 263)
+* date utility, POSIX: Time Functions. (line 254)
* dates, converting to timestamps: Time Functions. (line 76)
* dates, information related to, localization: Explaining gettext.
- (line 115)
+ (line 112)
* Davies, Stephen <1>: Contributors. (line 74)
* Davies, Stephen: Acknowledgments. (line 60)
+* Day, Robert P.J.: Acknowledgments. (line 76)
* dcgettext <1>: Programmer i18n. (line 19)
* dcgettext: I18N Functions. (line 22)
* dcgettext() function (gawk), portability and: I18N Portability.
@@ -30940,7 +31937,7 @@ Index
* dcngettext: I18N Functions. (line 28)
* dcngettext() function (gawk), portability and: I18N Portability.
(line 33)
-* deadlocks: Two-way I/O. (line 70)
+* deadlocks: Two-way I/O. (line 52)
* debugger commands, b (break): Breakpoint Control. (line 11)
* debugger commands, backtrace: Execution Stack. (line 13)
* debugger commands, break: Breakpoint Control. (line 11)
@@ -31032,7 +32029,7 @@ Index
(line 83)
* debugger commands, unwatch: Viewing And Changing Data.
(line 84)
-* debugger commands, up: Execution Stack. (line 33)
+* debugger commands, up: Execution Stack. (line 34)
* debugger commands, w (watch): Viewing And Changing Data.
(line 67)
* debugger commands, watch: Viewing And Changing Data.
@@ -31046,10 +32043,10 @@ Index
* debugger, read commands from a file: Debugger Info. (line 96)
* debugging awk programs: Debugger. (line 6)
* debugging gawk, bug reports: Bugs. (line 9)
-* decimal point character, locale specific: Options. (line 263)
+* decimal point character, locale specific: Options. (line 268)
* decrement operators: Increment Ops. (line 35)
* default keyword: Switch Statement. (line 6)
-* Deifik, Scott <1>: Bugs. (line 70)
+* Deifik, Scott <1>: Bugs. (line 71)
* Deifik, Scott <2>: Contributors. (line 53)
* Deifik, Scott: Acknowledgments. (line 60)
* delete ARRAY: Delete. (line 39)
@@ -31063,10 +32060,10 @@ Index
* deleting entire arrays: Delete. (line 39)
* Demaille, Akim: Acknowledgments. (line 60)
* describe call stack frame, in debugger: Debugger Info. (line 27)
-* differences between gawk and awk: String Functions. (line 194)
+* differences between gawk and awk: String Functions. (line 197)
* differences in awk and gawk, ARGC/ARGV variables: ARGC and ARGV.
- (line 88)
-* differences in awk and gawk, ARGIND variable: Auto-set. (line 40)
+ (line 90)
+* differences in awk and gawk, ARGIND variable: Auto-set. (line 44)
* differences in awk and gawk, array elements, deleting: Delete.
(line 39)
* differences in awk and gawk, AWKLIBPATH environment variable: AWKLIBPATH Variable.
@@ -31080,22 +32077,22 @@ Index
* differences in awk and gawk, BINMODE variable <1>: PC Using.
(line 33)
* differences in awk and gawk, BINMODE variable: User-modified.
- (line 23)
+ (line 15)
* differences in awk and gawk, close() function: Close Files And Pipes.
(line 81)
-* differences in awk and gawk, command line directories: Command line directories.
+* differences in awk and gawk, command-line directories: Command-line directories.
(line 6)
* differences in awk and gawk, ERRNO variable: Auto-set. (line 82)
* differences in awk and gawk, error messages: Special FD. (line 16)
* differences in awk and gawk, FIELDWIDTHS variable: User-modified.
- (line 35)
-* differences in awk and gawk, FPAT variable: User-modified. (line 45)
-* differences in awk and gawk, FUNCTAB variable: Auto-set. (line 128)
+ (line 37)
+* differences in awk and gawk, FPAT variable: User-modified. (line 43)
+* differences in awk and gawk, FUNCTAB variable: Auto-set. (line 123)
* differences in awk and gawk, function arguments (gawk): Calling Built-in.
(line 16)
* differences in awk and gawk, getline command: Getline. (line 19)
* differences in awk and gawk, IGNORECASE variable: User-modified.
- (line 82)
+ (line 76)
* differences in awk and gawk, implementation limitations <1>: Redirection.
(line 135)
* differences in awk and gawk, implementation limitations: Getline Notes.
@@ -31108,34 +32105,38 @@ Index
(line 6)
* differences in awk and gawk, line continuations: Conditional Exp.
(line 34)
-* differences in awk and gawk, LINT variable: User-modified. (line 98)
+* differences in awk and gawk, LINT variable: User-modified. (line 88)
* differences in awk and gawk, match() function: String Functions.
- (line 257)
+ (line 260)
* differences in awk and gawk, print/printf statements: Format Modifiers.
(line 13)
-* differences in awk and gawk, PROCINFO array: Auto-set. (line 142)
-* differences in awk and gawk, record separators: Records. (line 132)
+* differences in awk and gawk, PROCINFO array: Auto-set. (line 137)
+* differences in awk and gawk, read timeouts: Read Timeout. (line 6)
+* differences in awk and gawk, record separators: awk split records.
+ (line 124)
* differences in awk and gawk, regexp constants: Using Constant Regexps.
(line 43)
* differences in awk and gawk, regular expressions: Case-sensitivity.
(line 26)
-* differences in awk and gawk, RS/RT variables: Records. (line 187)
-* differences in awk and gawk, RT variable: Auto-set. (line 275)
+* differences in awk and gawk, RS/RT variables: gawk split records.
+ (line 58)
+* differences in awk and gawk, RT variable: Auto-set. (line 272)
* differences in awk and gawk, single-character fields: Single Character Fields.
(line 6)
* differences in awk and gawk, split() function: String Functions.
(line 347)
* differences in awk and gawk, strings: Scalar Constants. (line 20)
-* differences in awk and gawk, strings, storing: Records. (line 206)
-* differences in awk and gawk, SYMTAB variable: Auto-set. (line 283)
+* differences in awk and gawk, strings, storing: gawk split records.
+ (line 77)
+* differences in awk and gawk, SYMTAB variable: Auto-set. (line 276)
* differences in awk and gawk, TEXTDOMAIN variable: User-modified.
- (line 162)
+ (line 152)
* differences in awk and gawk, trunc-mod operation: Arithmetic Ops.
(line 66)
-* directories, command line: Command line directories.
+* directories, command-line: Command-line directories.
(line 6)
-* directories, searching: Igawk Program. (line 368)
-* directories, searching for shared libraries: AWKLIBPATH Variable.
+* directories, searching: Programs Exercises. (line 70)
+* directories, searching for loadable extensions: AWKLIBPATH Variable.
(line 6)
* directories, searching for source files: AWKPATH Variable. (line 6)
* disable breakpoint: Breakpoint Control. (line 69)
@@ -31143,6 +32144,7 @@ Index
* display debugger command: Viewing And Changing Data.
(line 8)
* display debugger options: Debugger Info. (line 57)
+* div: Numeric Functions. (line 18)
* division: Arithmetic Ops. (line 44)
* do-while statement: Do Statement. (line 6)
* do-while statement, use of regexps in: Regexp Usage. (line 19)
@@ -31154,10 +32156,8 @@ Index
* dollar sign ($), incrementing fields and arrays: Increment Ops.
(line 30)
* dollar sign ($), regexp operator: Regexp Operators. (line 35)
-* double precision floating-point: General Arithmetic. (line 21)
-* double quote (") in shell commands: Read Terminal. (line 25)
-* double quote ("), in regexp constants: Computed Regexps. (line 28)
-* double quote ("), in shell commands: Quoting. (line 37)
+* double quote ("), in regexp constants: Computed Regexps. (line 29)
+* double quote ("), in shell commands: Quoting. (line 54)
* down debugger command: Execution Stack. (line 21)
* Drepper, Ulrich: Acknowledgments. (line 52)
* dump all variables of a program: Options. (line 93)
@@ -31168,10 +32168,10 @@ Index
* dynamically loaded extensions: Dynamic Extensions. (line 6)
* e debugger command (alias for enable): Breakpoint Control. (line 73)
* EBCDIC: Ordinal Functions. (line 45)
-* effective group ID of gawk user: Auto-set. (line 147)
-* effective user ID of gawk user: Auto-set. (line 151)
+* effective group ID of gawk user: Auto-set. (line 142)
+* effective user ID of gawk user: Auto-set. (line 146)
* egrep utility <1>: Egrep Program. (line 6)
-* egrep utility: Bracket Expressions. (line 24)
+* egrep utility: Bracket Expressions. (line 26)
* egrep.awk program: Egrep Program. (line 54)
* elements in arrays, assigning values: Assigning Elements. (line 6)
* elements in arrays, deleting: Delete. (line 6)
@@ -31185,7 +32185,7 @@ Index
* empty array elements: Reference to Elements.
(line 18)
* empty pattern: Empty. (line 6)
-* empty strings: Records. (line 122)
+* empty strings: awk split records. (line 114)
* empty strings, See null strings: Regexp Field Splitting.
(line 43)
* enable breakpoint: Breakpoint Control. (line 73)
@@ -31197,37 +32197,37 @@ Index
* END pattern, and profiling: Profiling. (line 62)
* END pattern, assert() user-defined function and: Assert Function.
(line 75)
-* END pattern, backslash continuation and: Egrep Program. (line 220)
* END pattern, Boolean patterns and: Expression Patterns. (line 70)
* END pattern, exit statement and: Exit Statement. (line 12)
* END pattern, next/nextfile statements and <1>: Next Statement.
- (line 45)
+ (line 44)
* END pattern, next/nextfile statements and: I/O And BEGIN/END.
- (line 37)
+ (line 36)
* END pattern, operators and: Using BEGIN/END. (line 17)
* END pattern, print statement and: I/O And BEGIN/END. (line 16)
* ENDFILE pattern: BEGINFILE/ENDFILE. (line 6)
* ENDFILE pattern, Boolean patterns and: Expression Patterns. (line 70)
* endfile() user-defined function: Filetrans Function. (line 62)
-* endgrent() function (C library): Group Functions. (line 215)
-* endgrent() user-defined function: Group Functions. (line 218)
+* endgrent() function (C library): Group Functions. (line 212)
+* endgrent() user-defined function: Group Functions. (line 215)
* endpwent() function (C library): Passwd Functions. (line 210)
* endpwent() user-defined function: Passwd Functions. (line 213)
+* English, Steve: Advanced Features. (line 6)
* ENVIRON array: Auto-set. (line 60)
* environment variables used by gawk: Environment Variables.
(line 6)
* environment variables, in ENVIRON array: Auto-set. (line 60)
-* epoch, definition of: Glossary. (line 235)
+* epoch, definition of: Glossary. (line 234)
* equals sign (=), = operator: Assignment Ops. (line 6)
* equals sign (=), == operator <1>: Precedence. (line 65)
* equals sign (=), == operator: Comparison Operators.
(line 11)
-* EREs (Extended Regular Expressions): Bracket Expressions. (line 24)
+* EREs (Extended Regular Expressions): Bracket Expressions. (line 26)
* ERRNO variable <1>: TCP/IP Networking. (line 54)
* ERRNO variable: Auto-set. (line 82)
* ERRNO variable, with BEGINFILE pattern: BEGINFILE/ENDFILE. (line 26)
* ERRNO variable, with close() function: Close Files And Pipes.
- (line 138)
+ (line 139)
* ERRNO variable, with getline command: Getline. (line 19)
* error handling: Special FD. (line 16)
* error handling, ERRNO variable and: Auto-set. (line 82)
@@ -31243,7 +32243,7 @@ Index
* evaluation order, concatenation: Concatenation. (line 41)
* evaluation order, functions: Calling Built-in. (line 30)
* examining fields: Fields. (line 6)
-* exclamation point (!), ! operator <1>: Egrep Program. (line 170)
+* exclamation point (!), ! operator <1>: Egrep Program. (line 175)
* exclamation point (!), ! operator <2>: Precedence. (line 52)
* exclamation point (!), ! operator: Boolean Ops. (line 67)
* exclamation point (!), != operator <1>: Precedence. (line 65)
@@ -31255,18 +32255,18 @@ Index
* exclamation point (!), !~ operator <3>: Comparison Operators.
(line 11)
* exclamation point (!), !~ operator <4>: Regexp Constants. (line 6)
-* exclamation point (!), !~ operator <5>: Computed Regexps. (line 6)
-* exclamation point (!), !~ operator <6>: Case-sensitivity. (line 26)
+* exclamation point (!), !~ operator <5>: Case-sensitivity. (line 26)
+* exclamation point (!), !~ operator <6>: Computed Regexps. (line 6)
* exclamation point (!), !~ operator: Regexp Usage. (line 19)
* exit statement: Exit Statement. (line 6)
* exit status, of gawk: Exit Status. (line 6)
* exit status, of VMS: VMS Running. (line 29)
* exit the debugger: Miscellaneous Debugger Commands.
(line 99)
-* exp: Numeric Functions. (line 18)
-* expand utility: Very Simple. (line 69)
+* exp: Numeric Functions. (line 33)
+* expand utility: Very Simple. (line 72)
* Expat XML parser library: gawkextlib. (line 35)
-* exponent: Numeric Functions. (line 18)
+* exponent: Numeric Functions. (line 33)
* expressions: Expressions. (line 6)
* expressions, as patterns: Expression Patterns. (line 6)
* expressions, assignment: Assignment Ops. (line 6)
@@ -31277,14 +32277,14 @@ Index
* expressions, matching, See comparison expressions: Typing and Comparison.
(line 9)
* expressions, selecting: Conditional Exp. (line 6)
-* Extended Regular Expressions (EREs): Bracket Expressions. (line 24)
+* Extended Regular Expressions (EREs): Bracket Expressions. (line 26)
* extension API: Extension API Description.
(line 6)
* extension API informational variables: Extension API Informational Variables.
(line 6)
* extension API version: Extension Versioning.
(line 6)
-* extension API, version number: Auto-set. (line 238)
+* extension API, version number: Auto-set. (line 239)
* extension example: Extension Example. (line 6)
* extension registration: Registration Functions.
(line 6)
@@ -31295,21 +32295,23 @@ Index
* extensions, Brian Kernighan's awk <1>: Common Extensions. (line 6)
* extensions, Brian Kernighan's awk: BTL. (line 6)
* extensions, common, ** operator: Arithmetic Ops. (line 30)
-* extensions, common, **= operator: Assignment Ops. (line 136)
+* extensions, common, **= operator: Assignment Ops. (line 137)
* extensions, common, /dev/stderr special file: Special FD. (line 46)
* extensions, common, /dev/stdin special file: Special FD. (line 46)
* extensions, common, /dev/stdout special file: Special FD. (line 46)
* extensions, common, \x escape sequence: Escape Sequences. (line 61)
* extensions, common, BINMODE variable: PC Using. (line 33)
* extensions, common, delete to delete entire arrays: Delete. (line 39)
-* extensions, common, fflush() function: I/O Functions. (line 40)
-* extensions, common, func keyword: Definition Syntax. (line 83)
+* extensions, common, fflush() function: I/O Functions. (line 43)
+* extensions, common, func keyword: Definition Syntax. (line 93)
* extensions, common, length() applied to an array: String Functions.
- (line 194)
-* extensions, common, RS as a regexp: Records. (line 135)
+ (line 197)
+* extensions, common, RS as a regexp: gawk split records. (line 6)
* extensions, common, single character fields: Single Character Fields.
(line 6)
* extensions, in gawk, not in POSIX awk: POSIX/GNU. (line 6)
+* extensions, loading, @load directive: Loading Shared Libraries.
+ (line 8)
* extensions, mawk: Common Extensions. (line 6)
* extensions, where to find: gawkextlib. (line 6)
* extract.awk program: Extract Program. (line 79)
@@ -31320,12 +32322,11 @@ Index
* FDL (Free Documentation License): GNU Free Documentation License.
(line 7)
* features, adding to gawk: Adding Code. (line 6)
-* features, advanced, See advanced features: Obsolete. (line 6)
* features, deprecated: Obsolete. (line 6)
* features, undocumented: Undocumented. (line 6)
* Fenlason, Jay <1>: Contributors. (line 18)
* Fenlason, Jay: History. (line 30)
-* fflush: I/O Functions. (line 25)
+* fflush: I/O Functions. (line 28)
* field numbers: Nonconstant Fields. (line 6)
* field operator $: Fields. (line 19)
* field operators, dollar sign as: Fields. (line 19)
@@ -31334,17 +32335,17 @@ Index
(line 6)
* field separator, POSIX and: Field Splitting Summary.
(line 40)
-* field separators <1>: User-modified. (line 56)
+* field separators <1>: User-modified. (line 50)
* field separators: Field Separators. (line 15)
* field separators, choice of: Field Separators. (line 51)
-* field separators, FIELDWIDTHS variable and: User-modified. (line 35)
-* field separators, FPAT variable and: User-modified. (line 45)
+* field separators, FIELDWIDTHS variable and: User-modified. (line 37)
+* field separators, FPAT variable and: User-modified. (line 43)
* field separators, POSIX and: Fields. (line 6)
* field separators, regular expressions as <1>: Regexp Field Splitting.
(line 6)
* field separators, regular expressions as: Field Separators. (line 51)
* field separators, See Also OFS: Changing Fields. (line 64)
-* field separators, spaces as: Cut Program. (line 109)
+* field separators, spaces as: Cut Program. (line 108)
* fields <1>: Basic High Level. (line 73)
* fields <2>: Fields. (line 6)
* fields: Reading Files. (line 14)
@@ -31358,24 +32359,25 @@ Index
* fields, separating: Field Separators. (line 15)
* fields, single-character: Single Character Fields.
(line 6)
-* FIELDWIDTHS variable <1>: User-modified. (line 35)
+* FIELDWIDTHS variable <1>: User-modified. (line 37)
* FIELDWIDTHS variable: Constant Size. (line 23)
* file descriptors: Special FD. (line 6)
-* file names, distinguishing: Auto-set. (line 52)
+* file inclusion, @include directive: Include Files. (line 8)
+* file names, distinguishing: Auto-set. (line 56)
* file names, in compatibility mode: Special Caveats. (line 9)
* file names, standard streams in gawk: Special FD. (line 46)
-* FILENAME variable <1>: Auto-set. (line 102)
+* FILENAME variable <1>: Auto-set. (line 98)
* FILENAME variable: Reading Files. (line 6)
* FILENAME variable, getline, setting with: Getline Notes. (line 19)
* filenames, assignments as: Ignoring Assigns. (line 6)
-* files, .gmo: Explaining gettext. (line 41)
-* files, .gmo, converting from .po: I18N Example. (line 62)
+* files, .gmo: Explaining gettext. (line 42)
* files, .gmo, specifying directory of <1>: Programmer i18n. (line 47)
-* files, .gmo, specifying directory of: Explaining gettext. (line 53)
+* files, .gmo, specifying directory of: Explaining gettext. (line 54)
+* files, .mo, converting from .po: I18N Example. (line 63)
* files, .po <1>: Translator i18n. (line 6)
-* files, .po: Explaining gettext. (line 36)
-* files, .po, converting to .gmo: I18N Example. (line 62)
-* files, .pot: Explaining gettext. (line 30)
+* files, .po: Explaining gettext. (line 37)
+* files, .po, converting to .mo: I18N Example. (line 63)
+* files, .pot: Explaining gettext. (line 31)
* files, /dev/... special files: Special FD. (line 46)
* files, /inet/... (gawk): TCP/IP Networking. (line 6)
* files, /inet4/... (gawk): TCP/IP Networking. (line 6)
@@ -31392,33 +32394,33 @@ Index
* files, managing: Data File Management.
(line 6)
* files, managing, data file boundaries: Filetrans Function. (line 6)
-* files, message object: Explaining gettext. (line 41)
+* files, message object: Explaining gettext. (line 42)
* files, message object, converting from portable object files: I18N Example.
- (line 62)
+ (line 63)
* files, message object, specifying directory of <1>: Programmer i18n.
(line 47)
* files, message object, specifying directory of: Explaining gettext.
- (line 53)
-* files, multiple passes over: Other Arguments. (line 49)
+ (line 54)
+* files, multiple passes over: Other Arguments. (line 53)
* files, multiple, duplicating output into: Tee Program. (line 6)
* files, output, See output files: Close Files And Pipes.
(line 6)
* files, password: Passwd Functions. (line 16)
* files, portable object <1>: Translator i18n. (line 6)
-* files, portable object: Explaining gettext. (line 36)
-* files, portable object template: Explaining gettext. (line 30)
+* files, portable object: Explaining gettext. (line 37)
+* files, portable object template: Explaining gettext. (line 31)
* files, portable object, converting to message object files: I18N Example.
- (line 62)
+ (line 63)
* files, portable object, generating: Options. (line 147)
-* files, processing, ARGIND variable and: Auto-set. (line 47)
+* files, processing, ARGIND variable and: Auto-set. (line 51)
* files, reading: Rewind Function. (line 6)
* files, reading, multiline records: Multiple Line. (line 6)
* files, searching for regular expressions: Egrep Program. (line 6)
* files, skipping: File Checking. (line 6)
-* files, source, search path for: Igawk Program. (line 368)
+* files, source, search path for: Programs Exercises. (line 70)
* files, splitting: Split Program. (line 6)
* files, Texinfo, extracting programs from: Extract Program. (line 6)
-* find substring in string: String Functions. (line 151)
+* find substring in string: String Functions. (line 155)
* finding extensions: Finding Extensions. (line 6)
* finish debugger command: Debugger Execution Control.
(line 39)
@@ -31426,17 +32428,15 @@ Index
* fixed-width data: Constant Size. (line 10)
* flag variables <1>: Tee Program. (line 20)
* flag variables: Boolean Ops. (line 67)
-* floating-point, numbers <1>: Unexpected Results. (line 6)
-* floating-point, numbers: General Arithmetic. (line 6)
* floating-point, numbers, arbitrary precision: Arbitrary Precision Arithmetic.
(line 6)
* floating-point, VAX/VMS: VMS Running. (line 51)
-* flush buffered output: I/O Functions. (line 25)
+* flush buffered output: I/O Functions. (line 28)
* fnmatch() extension function: Extension Sample Fnmatch.
(line 12)
-* FNR variable <1>: Auto-set. (line 112)
+* FNR variable <1>: Auto-set. (line 107)
* FNR variable: Records. (line 6)
-* FNR variable, changing: Auto-set. (line 323)
+* FNR variable, changing: Auto-set. (line 316)
* for statement: For Statement. (line 6)
* for statement, looping over arrays: Scanning an Array. (line 20)
* fork() extension function: Extension Sample Fork.
@@ -31450,30 +32450,30 @@ Index
* format time string: Time Functions. (line 48)
* formats, numeric output: OFMT. (line 6)
* formatting output: Printf. (line 6)
-* formatting strings: String Functions. (line 378)
+* formatting strings: String Functions. (line 381)
* forward slash (/) to enclose regular expressions: Regexp. (line 10)
* forward slash (/), / operator: Precedence. (line 55)
* forward slash (/), /= operator <1>: Precedence. (line 95)
-* forward slash (/), /= operator: Assignment Ops. (line 129)
+* forward slash (/), /= operator: Assignment Ops. (line 130)
* forward slash (/), /= operator, vs. /=.../ regexp constant: Assignment Ops.
- (line 147)
+ (line 148)
* forward slash (/), patterns and: Expression Patterns. (line 24)
-* FPAT variable <1>: User-modified. (line 45)
+* FPAT variable <1>: User-modified. (line 43)
* FPAT variable: Splitting By Content.
(line 27)
* frame debugger command: Execution Stack. (line 25)
* Free Documentation License (FDL): GNU Free Documentation License.
(line 7)
-* Free Software Foundation (FSF) <1>: Glossary. (line 297)
+* Free Software Foundation (FSF) <1>: Glossary. (line 296)
* Free Software Foundation (FSF) <2>: Getting. (line 10)
* Free Software Foundation (FSF): Manual History. (line 6)
-* FreeBSD: Glossary. (line 616)
-* FS variable <1>: User-modified. (line 56)
+* FreeBSD: Glossary. (line 611)
+* FS variable <1>: User-modified. (line 50)
* FS variable: Field Separators. (line 15)
* FS variable, --field-separator option and: Options. (line 21)
* FS variable, as null string: Single Character Fields.
(line 20)
-* FS variable, as TAB character: Options. (line 259)
+* FS variable, as TAB character: Options. (line 264)
* FS variable, changing value of: Field Separators. (line 35)
* FS variable, running awk programs and: Cut Program. (line 68)
* FS variable, setting from command line: Command Line Field Separator.
@@ -31481,14 +32481,15 @@ Index
* FS, containing ^: Regexp Field Splitting.
(line 59)
* FS, in multiline records: Multiple Line. (line 41)
-* FSF (Free Software Foundation) <1>: Glossary. (line 297)
+* FSF (Free Software Foundation) <1>: Glossary. (line 296)
* FSF (Free Software Foundation) <2>: Getting. (line 10)
* FSF (Free Software Foundation): Manual History. (line 6)
* fts() extension function: Extension Sample File Functions.
- (line 77)
-* FUNCTAB array: Auto-set. (line 128)
+ (line 61)
+* FUNCTAB array: Auto-set. (line 123)
* function calls: Function Calls. (line 6)
* function calls, indirect: Indirect Calls. (line 6)
+* function calls, indirect, @-notation for: Indirect Calls. (line 47)
* function definition example: Function Example. (line 6)
* function pointers: Indirect Calls. (line 6)
* functions, arrays as parameters to: Pass By Value/Reference.
@@ -31496,7 +32497,7 @@ Index
* functions, built-in <1>: Functions. (line 6)
* functions, built-in: Function Calls. (line 10)
* functions, built-in, evaluation order: Calling Built-in. (line 30)
-* functions, defining: Definition Syntax. (line 6)
+* functions, defining: Definition Syntax. (line 9)
* functions, library: Library Functions. (line 6)
* functions, library, assertions: Assert Function. (line 6)
* functions, library, associative arrays and: Library Names. (line 57)
@@ -31519,9 +32520,9 @@ Index
* functions, library, rounding numbers: Round Function. (line 6)
* functions, library, user database, reading: Passwd Functions.
(line 6)
-* functions, names of <1>: Definition Syntax. (line 20)
+* functions, names of <1>: Definition Syntax. (line 23)
* functions, names of: Arrays. (line 18)
-* functions, recursive: Definition Syntax. (line 73)
+* functions, recursive: Definition Syntax. (line 83)
* functions, string-translation: I18N Functions. (line 6)
* functions, undefined: Pass By Value/Reference.
(line 71)
@@ -31532,19 +32533,19 @@ Index
* functions, user-defined, next/nextfile statements and <1>: Nextfile Statement.
(line 47)
* functions, user-defined, next/nextfile statements and: Next Statement.
- (line 45)
-* G-d: Acknowledgments. (line 78)
+ (line 44)
+* G-d: Acknowledgments. (line 92)
* Garfinkle, Scott: Contributors. (line 34)
* gawk program, dynamic profiling: Profiling. (line 179)
-* gawk version: Auto-set. (line 213)
+* gawk version: Auto-set. (line 214)
* gawk, ARGIND variable in: Other Arguments. (line 12)
* gawk, awk and <1>: This Manual. (line 14)
-* gawk, awk and: Preface. (line 23)
+* gawk, awk and: Preface. (line 21)
* gawk, bitwise operations in: Bitwise Functions. (line 39)
* gawk, break statement in: Break Statement. (line 51)
* gawk, built-in variables and: Built-in Variables. (line 14)
-* gawk, character classes and: Bracket Expressions. (line 90)
-* gawk, coding style in: Adding Code. (line 38)
+* gawk, character classes and: Bracket Expressions. (line 100)
+* gawk, coding style in: Adding Code. (line 39)
* gawk, command-line options, and regular expressions: GNU Regexp Operators.
(line 70)
* gawk, comparison operators and: Comparison Operators.
@@ -31553,35 +32554,35 @@ Index
(line 6)
* gawk, configuring, options: Additional Configuration Options.
(line 6)
-* gawk, continue statement in: Continue Statement. (line 43)
+* gawk, continue statement in: Continue Statement. (line 44)
* gawk, distribution: Distribution contents.
(line 6)
* gawk, ERRNO variable in <1>: TCP/IP Networking. (line 54)
* gawk, ERRNO variable in <2>: Auto-set. (line 82)
* gawk, ERRNO variable in <3>: BEGINFILE/ENDFILE. (line 26)
* gawk, ERRNO variable in <4>: Close Files And Pipes.
- (line 138)
+ (line 139)
* gawk, ERRNO variable in: Getline. (line 19)
-* gawk, escape sequences: Escape Sequences. (line 124)
-* gawk, extensions, disabling: Options. (line 247)
+* gawk, escape sequences: Escape Sequences. (line 132)
+* gawk, extensions, disabling: Options. (line 252)
* gawk, features, adding: Adding Code. (line 6)
* gawk, features, advanced: Advanced Features. (line 6)
-* gawk, field separators and: User-modified. (line 77)
-* gawk, FIELDWIDTHS variable in <1>: User-modified. (line 35)
+* gawk, field separators and: User-modified. (line 71)
+* gawk, FIELDWIDTHS variable in <1>: User-modified. (line 37)
* gawk, FIELDWIDTHS variable in: Constant Size. (line 23)
* gawk, file names in: Special Files. (line 6)
* gawk, format-control characters: Control Letters. (line 18)
-* gawk, FPAT variable in <1>: User-modified. (line 45)
+* gawk, FPAT variable in <1>: User-modified. (line 43)
* gawk, FPAT variable in: Splitting By Content.
(line 27)
-* gawk, FUNCTAB array in: Auto-set. (line 128)
+* gawk, FUNCTAB array in: Auto-set. (line 123)
* gawk, function arguments and: Calling Built-in. (line 16)
* gawk, hexadecimal numbers and: Nondecimal-numbers. (line 42)
* gawk, IGNORECASE variable in <1>: Array Sorting Functions.
(line 83)
-* gawk, IGNORECASE variable in <2>: String Functions. (line 48)
-* gawk, IGNORECASE variable in <3>: Array Intro. (line 91)
-* gawk, IGNORECASE variable in <4>: User-modified. (line 82)
+* gawk, IGNORECASE variable in <2>: String Functions. (line 58)
+* gawk, IGNORECASE variable in <3>: Array Intro. (line 94)
+* gawk, IGNORECASE variable in <4>: User-modified. (line 76)
* gawk, IGNORECASE variable in: Case-sensitivity. (line 26)
* gawk, implementation issues: Notes. (line 6)
* gawk, implementation issues, debugging: Compatibility Mode. (line 6)
@@ -31596,16 +32597,16 @@ Index
(line 6)
* gawk, interval expressions and: Regexp Operators. (line 139)
* gawk, line continuation in: Conditional Exp. (line 34)
-* gawk, LINT variable in: User-modified. (line 98)
+* gawk, LINT variable in: User-modified. (line 88)
* gawk, list of contributors to: Contributors. (line 6)
* gawk, MS-DOS version of: PC Using. (line 10)
* gawk, MS-Windows version of: PC Using. (line 10)
* gawk, newlines in: Statements/Lines. (line 12)
* gawk, octal numbers and: Nondecimal-numbers. (line 42)
-* gawk, OS/2 version of: PC Using. (line 10)
-* gawk, PROCINFO array in <1>: Two-way I/O. (line 116)
+* gawk, OS/2 version of: PC Using. (line 16)
+* gawk, PROCINFO array in <1>: Two-way I/O. (line 99)
* gawk, PROCINFO array in <2>: Time Functions. (line 47)
-* gawk, PROCINFO array in: Auto-set. (line 142)
+* gawk, PROCINFO array in: Auto-set. (line 137)
* gawk, regexp constants and: Using Constant Regexps.
(line 28)
* gawk, regular expressions, case sensitivity: Case-sensitivity.
@@ -31613,48 +32614,48 @@ Index
* gawk, regular expressions, operators: GNU Regexp Operators.
(line 6)
* gawk, regular expressions, precedence: Regexp Operators. (line 161)
-* gawk, RT variable in <1>: Auto-set. (line 275)
+* gawk, RT variable in <1>: Auto-set. (line 272)
* gawk, RT variable in <2>: Multiple Line. (line 129)
-* gawk, RT variable in: Records. (line 132)
-* gawk, See Also awk: Preface. (line 36)
+* gawk, RT variable in: awk split records. (line 124)
+* gawk, See Also awk: Preface. (line 34)
* gawk, source code, obtaining: Getting. (line 6)
* gawk, splitting fields and: Constant Size. (line 88)
* gawk, string-translation functions: I18N Functions. (line 6)
-* gawk, SYMTAB array in: Auto-set. (line 283)
-* gawk, TEXTDOMAIN variable in: User-modified. (line 162)
+* gawk, SYMTAB array in: Auto-set. (line 276)
+* gawk, TEXTDOMAIN variable in: User-modified. (line 152)
* gawk, timestamps: Time Functions. (line 6)
-* gawk, uses for: Preface. (line 36)
-* gawk, versions of, information about, printing: Options. (line 293)
+* gawk, uses for: Preface. (line 34)
+* gawk, versions of, information about, printing: Options. (line 298)
* gawk, VMS version of: VMS Installation. (line 6)
* gawk, word-boundary operator: GNU Regexp Operators.
(line 63)
* gawkextlib: gawkextlib. (line 6)
* gawkextlib project: gawkextlib. (line 6)
-* General Public License (GPL): Glossary. (line 306)
+* General Public License (GPL): Glossary. (line 305)
* General Public License, See GPL: Manual History. (line 11)
* generate time values: Time Functions. (line 25)
-* gensub <1>: String Functions. (line 82)
+* gensub <1>: String Functions. (line 89)
* gensub: Using Constant Regexps.
(line 43)
* gensub() function (gawk), escape processing: Gory Details. (line 6)
* getaddrinfo() function (C library): TCP/IP Networking. (line 38)
* getgrent() function (C library): Group Functions. (line 6)
* getgrent() user-defined function: Group Functions. (line 6)
-* getgrgid() function (C library): Group Functions. (line 186)
-* getgrgid() user-defined function: Group Functions. (line 189)
-* getgrnam() function (C library): Group Functions. (line 175)
-* getgrnam() user-defined function: Group Functions. (line 180)
-* getgruser() function (C library): Group Functions. (line 195)
-* getgruser() function, user-defined: Group Functions. (line 198)
+* getgrgid() function (C library): Group Functions. (line 183)
+* getgrgid() user-defined function: Group Functions. (line 186)
+* getgrnam() function (C library): Group Functions. (line 172)
+* getgrnam() user-defined function: Group Functions. (line 177)
+* getgruser() function (C library): Group Functions. (line 192)
+* getgruser() function, user-defined: Group Functions. (line 195)
* getline command: Reading Files. (line 20)
* getline command, _gr_init() user-defined function: Group Functions.
- (line 82)
+ (line 83)
* getline command, _pw_init() function: Passwd Functions. (line 154)
* getline command, coprocesses, using from <1>: Close Files And Pipes.
(line 6)
* getline command, coprocesses, using from: Getline/Coprocess.
(line 6)
-* getline command, deadlock and: Two-way I/O. (line 70)
+* getline command, deadlock and: Two-way I/O. (line 52)
* getline command, explicit input with: Getline. (line 6)
* getline command, FILENAME variable and: Getline Notes. (line 19)
* getline command, return values: Getline. (line 19)
@@ -31674,42 +32675,41 @@ Index
* getpwuid() function (C library): Passwd Functions. (line 188)
* getpwuid() user-defined function: Passwd Functions. (line 192)
* gettext library: Explaining gettext. (line 6)
-* gettext library, locale categories: Explaining gettext. (line 80)
-* gettext() function (C library): Explaining gettext. (line 62)
+* gettext library, locale categories: Explaining gettext. (line 81)
+* gettext() function (C library): Explaining gettext. (line 63)
* gettimeofday() extension function: Extension Sample Time.
- (line 13)
-* git utility <1>: Adding Code. (line 111)
+ (line 12)
+* git utility <1>: Adding Code. (line 112)
* git utility <2>: Accessing The Source.
(line 10)
* git utility <3>: Other Versions. (line 29)
* git utility: gawkextlib. (line 29)
-* git, use of for gawk source code: Derived Files. (line 6)
-* GMP: Gawk and MPFR. (line 6)
+* Git, use of for gawk source code: Derived Files. (line 6)
* GNITS mailing list: Acknowledgments. (line 52)
-* GNU awk, See gawk: Preface. (line 49)
+* GNU awk, See gawk: Preface. (line 51)
* GNU Free Documentation License: GNU Free Documentation License.
(line 7)
-* GNU General Public License: Glossary. (line 306)
-* GNU Lesser General Public License: Glossary. (line 397)
+* GNU General Public License: Glossary. (line 305)
+* GNU Lesser General Public License: Glossary. (line 396)
* GNU long options <1>: Options. (line 6)
* GNU long options: Command Line. (line 13)
* GNU long options, printing list of: Options. (line 154)
-* GNU Project <1>: Glossary. (line 315)
+* GNU Project <1>: Glossary. (line 314)
* GNU Project: Manual History. (line 11)
-* GNU/Linux <1>: Glossary. (line 616)
+* GNU/Linux <1>: Glossary. (line 611)
* GNU/Linux <2>: I18N Example. (line 55)
* GNU/Linux: Manual History. (line 28)
* Gordon, Assaf: Contributors. (line 105)
-* GPL (General Public License) <1>: Glossary. (line 306)
+* GPL (General Public License) <1>: Glossary. (line 305)
* GPL (General Public License): Manual History. (line 11)
* GPL (General Public License), printing: Options. (line 88)
* grcat program: Group Functions. (line 16)
* Grigera, Juan: Contributors. (line 57)
* group database, reading: Group Functions. (line 6)
* group file: Group Functions. (line 6)
-* group ID of gawk user: Auto-set. (line 186)
+* group ID of gawk user: Auto-set. (line 187)
* groups, information about: Group Functions. (line 6)
-* gsub <1>: String Functions. (line 135)
+* gsub <1>: String Functions. (line 139)
* gsub: Using Constant Regexps.
(line 43)
* gsub() function, arguments of: String Functions. (line 460)
@@ -31725,7 +32725,7 @@ Index
* help debugger command: Miscellaneous Debugger Commands.
(line 66)
* hexadecimal numbers: Nondecimal-numbers. (line 6)
-* hexadecimal values, enabling interpretation of: Options. (line 207)
+* hexadecimal values, enabling interpretation of: Options. (line 211)
* history expansion, in debugger: Readline Support. (line 6)
* histsort.awk program: History Sorting. (line 25)
* Hughes, Phil: Acknowledgments. (line 43)
@@ -31734,30 +32734,28 @@ Index
* hyphen (-), -- operator <1>: Precedence. (line 46)
* hyphen (-), -- operator: Increment Ops. (line 48)
* hyphen (-), -= operator <1>: Precedence. (line 95)
-* hyphen (-), -= operator: Assignment Ops. (line 129)
+* hyphen (-), -= operator: Assignment Ops. (line 130)
* hyphen (-), filenames beginning with: Options. (line 59)
* hyphen (-), in bracket expressions: Bracket Expressions. (line 17)
* i debugger command (alias for info): Debugger Info. (line 13)
* id utility: Id Program. (line 6)
* id.awk program: Id Program. (line 30)
-* IEEE-754 format: Floating-point Representation.
- (line 6)
* if statement: If Statement. (line 6)
* if statement, actions, changing: Ranges. (line 25)
* if statement, use of regexps in: Regexp Usage. (line 19)
* igawk.sh program: Igawk Program. (line 124)
* ignore breakpoint: Breakpoint Control. (line 87)
* ignore debugger command: Breakpoint Control. (line 87)
-* IGNORECASE variable: User-modified. (line 82)
-* IGNORECASE variable, and array indices: Array Intro. (line 91)
+* IGNORECASE variable: User-modified. (line 76)
+* IGNORECASE variable, and array indices: Array Intro. (line 94)
* IGNORECASE variable, and array sorting functions: Array Sorting Functions.
(line 83)
* IGNORECASE variable, in example programs: Library Functions.
(line 53)
* IGNORECASE variable, with ~ and !~ operators: Case-sensitivity.
(line 26)
-* Illumos: Other Versions. (line 104)
-* Illumos, POSIX-compliant awk: Other Versions. (line 104)
+* Illumos: Other Versions. (line 105)
+* Illumos, POSIX-compliant awk: Other Versions. (line 105)
* implementation issues, gawk: Notes. (line 6)
* implementation issues, gawk, debugging: Compatibility Mode. (line 6)
* implementation issues, gawk, limits <1>: Redirection. (line 135)
@@ -31770,12 +32768,14 @@ Index
(line 43)
* in operator, order of array access: Scanning an Array. (line 48)
* in operator, testing if array element exists: Reference to Elements.
- (line 37)
+ (line 38)
* in operator, use in loops: Scanning an Array. (line 17)
+* including files, @include directive: Include Files. (line 8)
* increment operators: Increment Ops. (line 6)
-* index: String Functions. (line 151)
-* indexing arrays: Array Intro. (line 49)
+* index: String Functions. (line 155)
+* indexing arrays: Array Intro. (line 50)
* indirect function calls: Indirect Calls. (line 6)
+* indirect function calls, @-notation: Indirect Calls. (line 47)
* infinite precision: Arbitrary Precision Arithmetic.
(line 6)
* info debugger command: Debugger Info. (line 13)
@@ -31789,9 +32789,9 @@ Index
* input files, examples: Sample Data Files. (line 6)
* input files, reading: Reading Files. (line 6)
* input files, running awk without: Read Terminal. (line 6)
-* input files, variable assignments and: Other Arguments. (line 19)
+* input files, variable assignments and: Other Arguments. (line 23)
* input pipeline: Getline/Pipe. (line 9)
-* input record, length of: String Functions. (line 171)
+* input record, length of: String Functions. (line 174)
* input redirection: Getline/File. (line 6)
* input, data, nondecimal: Nondecimal Data. (line 6)
* input, explicit: Getline. (line 6)
@@ -31801,90 +32801,87 @@ Index
* input, standard <1>: Special FD. (line 6)
* input, standard: Read Terminal. (line 6)
* input/output functions: I/O Functions. (line 6)
-* input/output, binary: User-modified. (line 10)
+* input/output, binary: User-modified. (line 15)
* input/output, from BEGIN and END: I/O And BEGIN/END. (line 6)
-* input/output, two-way: Two-way I/O. (line 44)
+* input/output, two-way: Two-way I/O. (line 25)
* insomnia, cure for: Alarm Program. (line 6)
* installation, VMS: VMS Installation. (line 6)
* installing gawk: Installation. (line 6)
* instruction tracing, in debugger: Debugger Info. (line 89)
-* int: Numeric Functions. (line 23)
+* int: Numeric Functions. (line 38)
* INT signal (MS-Windows): Profiling. (line 214)
* integer array indices: Numeric Array Subscripts.
(line 31)
-* integers: General Arithmetic. (line 6)
* integers, arbitrary precision: Arbitrary Precision Integers.
(line 6)
-* integers, unsigned: General Arithmetic. (line 15)
-* interacting with other programs: I/O Functions. (line 72)
+* integers, unsigned: Computer Arithmetic. (line 41)
+* interacting with other programs: I/O Functions. (line 75)
* internationalization <1>: I18N and L10N. (line 6)
* internationalization: I18N Functions. (line 6)
* internationalization, localization <1>: Internationalization.
(line 13)
-* internationalization, localization: User-modified. (line 162)
+* internationalization, localization: User-modified. (line 152)
* internationalization, localization, character classes: Bracket Expressions.
- (line 90)
+ (line 100)
* internationalization, localization, gawk and: Internationalization.
(line 13)
* internationalization, localization, locale categories: Explaining gettext.
- (line 80)
+ (line 81)
* internationalization, localization, marked strings: Programmer i18n.
(line 14)
* internationalization, localization, portability and: I18N Portability.
(line 6)
* internationalizing a program: Explaining gettext. (line 6)
-* interpreted programs <1>: Glossary. (line 357)
+* interpreted programs <1>: Glossary. (line 356)
* interpreted programs: Basic High Level. (line 15)
* interval expressions, regexp operator: Regexp Operators. (line 116)
* inventory-shipped file: Sample Data Files. (line 32)
-* invoke shell command: I/O Functions. (line 72)
+* invoke shell command: I/O Functions. (line 75)
* isarray: Type Functions. (line 11)
-* ISO: Glossary. (line 368)
+* ISO: Glossary. (line 367)
* ISO 8859-1: Glossary. (line 133)
* ISO Latin-1: Glossary. (line 133)
* Jacobs, Andrew: Passwd Functions. (line 90)
* Jaegermann, Michal <1>: Contributors. (line 45)
* Jaegermann, Michal: Acknowledgments. (line 60)
-* Java implementation of awk: Other Versions. (line 112)
-* Java programming language: Glossary. (line 380)
-* jawk: Other Versions. (line 112)
+* Java implementation of awk: Other Versions. (line 113)
+* Java programming language: Glossary. (line 379)
+* jawk: Other Versions. (line 113)
* Jedi knights: Undocumented. (line 6)
+* Johansen, Chris: Signature Program. (line 25)
* join() user-defined function: Join Function. (line 18)
* Kahrs, Ju"rgen <1>: Contributors. (line 70)
* Kahrs, Ju"rgen: Acknowledgments. (line 60)
* Kasal, Stepan: Acknowledgments. (line 60)
* Kenobi, Obi-Wan: Undocumented. (line 6)
* Kernighan, Brian <1>: Glossary. (line 143)
-* Kernighan, Brian <2>: Basic Data Typing. (line 55)
+* Kernighan, Brian <2>: Basic Data Typing. (line 54)
* Kernighan, Brian <3>: Other Versions. (line 13)
* Kernighan, Brian <4>: Contributors. (line 11)
* Kernighan, Brian <5>: BTL. (line 6)
* Kernighan, Brian <6>: Library Functions. (line 12)
* Kernighan, Brian <7>: Concatenation. (line 6)
* Kernighan, Brian <8>: Getline/Pipe. (line 6)
-* Kernighan, Brian <9>: Acknowledgments. (line 72)
-* Kernighan, Brian <10>: Conventions. (line 34)
+* Kernighan, Brian <9>: Acknowledgments. (line 76)
+* Kernighan, Brian <10>: Conventions. (line 38)
* Kernighan, Brian: History. (line 17)
* kill command, dynamic profiling: Profiling. (line 188)
* Knights, jedi: Undocumented. (line 6)
-* Knuth, Donald: Arbitrary Precision Arithmetic.
- (line 6)
* Kwok, Conrad: Contributors. (line 34)
* l debugger command (alias for list): Miscellaneous Debugger Commands.
(line 72)
* labels.awk program: Labels Program. (line 51)
+* Langston, Peter: Advanced Features. (line 6)
* languages, data-driven: Basic High Level. (line 85)
-* Laurie, Dirk: Changing Precision. (line 6)
-* LC_ALL locale category: Explaining gettext. (line 120)
-* LC_COLLATE locale category: Explaining gettext. (line 93)
-* LC_CTYPE locale category: Explaining gettext. (line 97)
-* LC_MESSAGES locale category: Explaining gettext. (line 87)
+* LC_ALL locale category: Explaining gettext. (line 117)
+* LC_COLLATE locale category: Explaining gettext. (line 94)
+* LC_CTYPE locale category: Explaining gettext. (line 98)
+* LC_MESSAGES locale category: Explaining gettext. (line 88)
* LC_MESSAGES locale category, bindtextdomain() function (gawk): Programmer i18n.
- (line 88)
-* LC_MONETARY locale category: Explaining gettext. (line 103)
-* LC_NUMERIC locale category: Explaining gettext. (line 107)
-* LC_RESPONSE locale category: Explaining gettext. (line 111)
-* LC_TIME locale category: Explaining gettext. (line 115)
+ (line 99)
+* LC_MONETARY locale category: Explaining gettext. (line 104)
+* LC_NUMERIC locale category: Explaining gettext. (line 108)
+* LC_TIME locale category: Explaining gettext. (line 112)
* left angle bracket (<), < operator <1>: Precedence. (line 65)
* left angle bracket (<), < operator: Comparison Operators.
(line 11)
@@ -31895,12 +32892,12 @@ Index
* left shift: Bitwise Functions. (line 46)
* left shift, bitwise: Bitwise Functions. (line 32)
* leftmost longest match: Multiple Line. (line 26)
-* length: String Functions. (line 164)
-* length of input record: String Functions. (line 171)
-* length of string: String Functions. (line 164)
-* Lesser General Public License (LGPL): Glossary. (line 397)
-* LGPL (Lesser General Public License): Glossary. (line 397)
-* libmawk: Other Versions. (line 120)
+* length: String Functions. (line 167)
+* length of input record: String Functions. (line 174)
+* length of string: String Functions. (line 167)
+* Lesser General Public License (LGPL): Glossary. (line 396)
+* LGPL (Lesser General Public License): Glossary. (line 396)
+* libmawk: Other Versions. (line 121)
* libraries of awk functions: Library Functions. (line 6)
* libraries of awk functions, assertions: Assert Function. (line 6)
* libraries of awk functions, associative arrays and: Library Names.
@@ -31933,35 +32930,37 @@ Index
* lines, duplicate, removing: History Sorting. (line 6)
* lines, matching ranges of: Ranges. (line 6)
* lines, skipping between markers: Ranges. (line 43)
-* lint checking: User-modified. (line 98)
+* lint checking: User-modified. (line 88)
* lint checking, array elements: Delete. (line 34)
* lint checking, array subscripts: Uninitialized Subscripts.
(line 43)
* lint checking, empty programs: Command Line. (line 16)
-* lint checking, issuing warnings: Options. (line 182)
+* lint checking, issuing warnings: Options. (line 185)
* lint checking, POSIXLY_CORRECT environment variable: Options.
- (line 332)
+ (line 336)
* lint checking, undefined functions: Pass By Value/Reference.
(line 88)
-* LINT variable: User-modified. (line 98)
-* Linux <1>: Glossary. (line 616)
+* LINT variable: User-modified. (line 88)
+* Linux <1>: Glossary. (line 611)
* Linux <2>: I18N Example. (line 55)
* Linux: Manual History. (line 28)
* list all global variables, in debugger: Debugger Info. (line 48)
* list debugger command: Miscellaneous Debugger Commands.
(line 72)
* list function definitions, in debugger: Debugger Info. (line 30)
-* loading, library: Options. (line 173)
+* loading extensions, @load directive: Loading Shared Libraries.
+ (line 8)
+* loading, extensions: Options. (line 173)
* local variables, in a function: Variable Scope. (line 6)
-* locale categories: Explaining gettext. (line 80)
-* locale decimal point character: Options. (line 263)
+* locale categories: Explaining gettext. (line 81)
+* locale decimal point character: Options. (line 268)
* locale, definition of: Locales. (line 6)
* localization: I18N and L10N. (line 6)
* localization, See internationalization, localization: I18N and L10N.
(line 6)
-* log: Numeric Functions. (line 30)
+* log: Numeric Functions. (line 45)
* log files, timestamps in: Time Functions. (line 6)
-* logarithm: Numeric Functions. (line 30)
+* logarithm: Numeric Functions. (line 45)
* logical false/true: Truth Values. (line 6)
* logical operators, See Boolean expressions: Boolean Ops. (line 6)
* login information: Passwd Functions. (line 16)
@@ -31982,44 +32981,45 @@ Index
* mail-list file: Sample Data Files. (line 6)
* mailing labels, printing: Labels Program. (line 6)
* mailing list, GNITS: Acknowledgments. (line 52)
-* Malmberg, John <1>: Bugs. (line 70)
+* Malmberg, John <1>: Bugs. (line 71)
* Malmberg, John: Acknowledgments. (line 60)
* mark parity: Ordinal Functions. (line 45)
* marked string extraction (internationalization): String Extraction.
(line 6)
* marked strings, extracting: String Extraction. (line 6)
* Marx, Groucho: Increment Ops. (line 60)
-* match: String Functions. (line 204)
-* match regexp in string: String Functions. (line 204)
+* match: String Functions. (line 207)
+* match regexp in string: String Functions. (line 207)
* match() function, RSTART/RLENGTH variables: String Functions.
- (line 221)
+ (line 224)
* matching, expressions, See comparison expressions: Typing and Comparison.
(line 9)
* matching, leftmost longest: Multiple Line. (line 26)
-* matching, null strings: Gory Details. (line 164)
+* matching, null strings: Gory Details. (line 143)
* mawk utility <1>: Other Versions. (line 44)
* mawk utility <2>: Nextfile Statement. (line 47)
* mawk utility <3>: Concatenation. (line 36)
* mawk utility <4>: Getline/Pipe. (line 62)
-* mawk utility: Escape Sequences. (line 124)
-* maximum precision supported by MPFR library: Auto-set. (line 227)
+* mawk utility: Escape Sequences. (line 132)
+* maximum precision supported by MPFR library: Auto-set. (line 228)
+* McIlroy, Doug: Glossary. (line 149)
* McPhee, Patrick: Contributors. (line 100)
-* message object files: Explaining gettext. (line 41)
+* message object files: Explaining gettext. (line 42)
* message object files, converting from portable object files: I18N Example.
- (line 62)
+ (line 63)
* message object files, specifying directory of <1>: Programmer i18n.
(line 47)
* message object files, specifying directory of: Explaining gettext.
- (line 53)
+ (line 54)
* messages from extensions: Printing Messages. (line 6)
* metacharacters in regular expressions: Regexp Operators. (line 6)
-* metacharacters, escape sequences for: Escape Sequences. (line 130)
-* minimum precision supported by MPFR library: Auto-set. (line 230)
+* metacharacters, escape sequences for: Escape Sequences. (line 138)
+* minimum precision supported by MPFR library: Auto-set. (line 231)
* mktime: Time Functions. (line 25)
* modifiers, in format specifiers: Format Modifiers. (line 6)
-* monetary information, localization: Explaining gettext. (line 103)
-* MPFR: Gawk and MPFR. (line 6)
-* msgfmt utility: I18N Example. (line 62)
+* monetary information, localization: Explaining gettext. (line 104)
+* Moore, Duncan: Getline Notes. (line 40)
+* msgfmt utility: I18N Example. (line 63)
* multiple precision: Arbitrary Precision Arithmetic.
(line 6)
* multiple-line records: Multiple Line. (line 6)
@@ -32028,46 +33028,44 @@ Index
* names, arrays/variables <1>: Library Names. (line 6)
* names, arrays/variables: Arrays. (line 18)
* names, functions <1>: Library Names. (line 6)
-* names, functions: Definition Syntax. (line 20)
+* names, functions: Definition Syntax. (line 23)
* namespace issues <1>: Library Names. (line 6)
* namespace issues: Arrays. (line 18)
-* namespace issues, functions: Definition Syntax. (line 20)
-* nawk utility: Names. (line 17)
-* negative zero: Unexpected Results. (line 34)
-* NetBSD: Glossary. (line 616)
+* namespace issues, functions: Definition Syntax. (line 23)
+* NetBSD: Glossary. (line 611)
* networks, programming: TCP/IP Networking. (line 6)
* networks, support for: Special Network. (line 6)
* newlines <1>: Boolean Ops. (line 67)
-* newlines <2>: Options. (line 253)
+* newlines <2>: Options. (line 258)
* newlines: Statements/Lines. (line 6)
* newlines, as field separators: Default Field Splitting.
(line 6)
-* newlines, as record separators: Records. (line 20)
-* newlines, in dynamic regexps: Computed Regexps. (line 58)
-* newlines, in regexp constants: Computed Regexps. (line 68)
+* newlines, as record separators: awk split records. (line 12)
+* newlines, in dynamic regexps: Computed Regexps. (line 59)
+* newlines, in regexp constants: Computed Regexps. (line 69)
* newlines, printing: Print Examples. (line 12)
* newlines, separating statements in actions <1>: Statements. (line 10)
* newlines, separating statements in actions: Action Overview.
(line 19)
* next debugger command: Debugger Execution Control.
(line 43)
-* next file statement: Feature History. (line 168)
+* next file statement: Feature History. (line 169)
* next statement <1>: Next Statement. (line 6)
-* next statement: Boolean Ops. (line 85)
-* next statement, BEGIN/END patterns and: I/O And BEGIN/END. (line 37)
+* next statement: Boolean Ops. (line 93)
+* next statement, BEGIN/END patterns and: I/O And BEGIN/END. (line 36)
* next statement, BEGINFILE/ENDFILE patterns and: BEGINFILE/ENDFILE.
(line 49)
-* next statement, user-defined functions and: Next Statement. (line 45)
+* next statement, user-defined functions and: Next Statement. (line 44)
* nextfile statement: Nextfile Statement. (line 6)
* nextfile statement, BEGIN/END patterns and: I/O And BEGIN/END.
- (line 37)
+ (line 36)
* nextfile statement, BEGINFILE/ENDFILE patterns and: BEGINFILE/ENDFILE.
(line 26)
* nextfile statement, user-defined functions and: Nextfile Statement.
(line 47)
* nexti debugger command: Debugger Execution Control.
(line 49)
-* NF variable <1>: Auto-set. (line 117)
+* NF variable <1>: Auto-set. (line 112)
* NF variable: Fields. (line 33)
* NF variable, decrementing: Changing Fields. (line 107)
* ni debugger command (alias for nexti): Debugger Execution Control.
@@ -32076,22 +33074,23 @@ Index
* non-existent array elements: Reference to Elements.
(line 23)
* not Boolean-logic operator: Boolean Ops. (line 6)
-* NR variable <1>: Auto-set. (line 137)
+* NR variable <1>: Auto-set. (line 132)
* NR variable: Records. (line 6)
-* NR variable, changing: Auto-set. (line 323)
+* NR variable, changing: Auto-set. (line 316)
* null strings <1>: Basic Data Typing. (line 26)
* null strings <2>: Truth Values. (line 6)
* null strings <3>: Regexp Field Splitting.
(line 43)
-* null strings: Records. (line 122)
-* null strings in gawk arguments, quoting and: Quoting. (line 62)
+* null strings: awk split records. (line 114)
+* null strings in gawk arguments, quoting and: Quoting. (line 79)
* null strings, and deleting array elements: Delete. (line 27)
* null strings, as array subscripts: Uninitialized Subscripts.
(line 43)
-* null strings, converting numbers to strings: Conversion. (line 21)
-* null strings, matching: Gory Details. (line 164)
+* null strings, converting numbers to strings: Strings And Numbers.
+ (line 21)
+* null strings, matching: Gory Details. (line 143)
* number as string of bits: Bitwise Functions. (line 109)
-* number of array elements: String Functions. (line 194)
+* number of array elements: String Functions. (line 197)
* number sign (#), #! (executable scripts): Executable Scripts.
(line 6)
* number sign (#), commenting: Comments. (line 6)
@@ -32101,9 +33100,8 @@ Index
* numbers, Cliff random: Cliff Random Function.
(line 6)
* numbers, converting <1>: Bitwise Functions. (line 109)
-* numbers, converting: Conversion. (line 6)
-* numbers, converting, to strings: User-modified. (line 28)
-* numbers, floating-point: General Arithmetic. (line 6)
+* numbers, converting: Strings And Numbers. (line 6)
+* numbers, converting, to strings: User-modified. (line 30)
* numbers, hexadecimal: Nondecimal-numbers. (line 6)
* numbers, octal: Nondecimal-numbers. (line 6)
* numbers, rounding: Round Function. (line 6)
@@ -32112,18 +33110,17 @@ Index
* numeric, output format: OFMT. (line 6)
* numeric, strings: Variable Typing. (line 6)
* o debugger command (alias for option): Debugger Info. (line 57)
-* oawk utility: Names. (line 17)
* obsolete features: Obsolete. (line 6)
* octal numbers: Nondecimal-numbers. (line 6)
-* octal values, enabling interpretation of: Options. (line 207)
-* OFMT variable <1>: User-modified. (line 115)
-* OFMT variable <2>: Conversion. (line 55)
+* octal values, enabling interpretation of: Options. (line 211)
+* OFMT variable <1>: User-modified. (line 105)
+* OFMT variable <2>: Strings And Numbers. (line 57)
* OFMT variable: OFMT. (line 15)
* OFMT variable, POSIX awk and: OFMT. (line 27)
-* OFS variable <1>: User-modified. (line 124)
+* OFS variable <1>: User-modified. (line 114)
* OFS variable <2>: Output Separators. (line 6)
* OFS variable: Changing Fields. (line 64)
-* OpenBSD: Glossary. (line 616)
+* OpenBSD: Glossary. (line 611)
* OpenSolaris: Other Versions. (line 96)
* operating systems, BSD-based: Manual History. (line 28)
* operating systems, PC, gawk on: PC Using. (line 6)
@@ -32173,14 +33170,14 @@ Index
(line 12)
* ord() user-defined function: Ordinal Functions. (line 16)
* order of evaluation, concatenation: Concatenation. (line 41)
-* ORS variable <1>: User-modified. (line 129)
+* ORS variable <1>: User-modified. (line 119)
* ORS variable: Output Separators. (line 20)
* output field separator, See OFS variable: Changing Fields. (line 64)
* output record separator, See ORS variable: Output Separators.
(line 20)
* output redirection: Redirection. (line 6)
* output wrapper: Output Wrappers. (line 6)
-* output, buffering: I/O Functions. (line 29)
+* output, buffering: I/O Functions. (line 32)
* output, duplicating into files: Tee Program. (line 6)
* output, files, closing: Close Files And Pipes.
(line 6)
@@ -32192,12 +33189,12 @@ Index
* output, standard: Special FD. (line 6)
* p debugger command (alias for print): Viewing And Changing Data.
(line 36)
-* P1003.1 POSIX standard: Glossary. (line 454)
-* parent process ID of gawk process: Auto-set. (line 195)
+* Papadopoulos, Panos: Contributors. (line 128)
+* parent process ID of gawk process: Auto-set. (line 196)
* parentheses (), in a profile: Profiling. (line 146)
-* parentheses (), regexp operator: Regexp Operators. (line 79)
+* parentheses (), regexp operator: Regexp Operators. (line 81)
* password file: Passwd Functions. (line 16)
-* patsplit: String Functions. (line 291)
+* patsplit: String Functions. (line 294)
* patterns: Patterns and Actions.
(line 6)
* patterns, comparison expressions as: Expression Patterns. (line 14)
@@ -32210,13 +33207,13 @@ Index
* patterns, types of: Pattern Overview. (line 15)
* pawk (profiling version of Brian Kernighan's awk): Other Versions.
(line 78)
-* pawk, awk-like facilities for Python: Other Versions. (line 124)
+* pawk, awk-like facilities for Python: Other Versions. (line 125)
* PC operating systems, gawk on: PC Using. (line 6)
* PC operating systems, gawk on, installing: PC Installation. (line 6)
* percent sign (%), % operator: Precedence. (line 55)
* percent sign (%), %= operator <1>: Precedence. (line 95)
-* percent sign (%), %= operator: Assignment Ops. (line 129)
-* period (.), regexp operator: Regexp Operators. (line 43)
+* percent sign (%), %= operator: Assignment Ops. (line 130)
+* period (.), regexp operator: Regexp Operators. (line 44)
* Perl: Future Extensions. (line 6)
* Peters, Arno: Contributors. (line 85)
* Peterson, Hal: Contributors. (line 39)
@@ -32224,7 +33221,7 @@ Index
(line 6)
* pipe, input: Getline/Pipe. (line 9)
* pipe, output: Redirection. (line 57)
-* Pitts, Dave <1>: Bugs. (line 70)
+* Pitts, Dave <1>: Bugs. (line 71)
* Pitts, Dave: Acknowledgments. (line 60)
* Plauger, P.J.: Library Functions. (line 12)
* plug-in: Extension Intro. (line 6)
@@ -32233,89 +33230,88 @@ Index
* plus sign (+), ++ operator: Increment Ops. (line 11)
* plus sign (+), += operator <1>: Precedence. (line 95)
* plus sign (+), += operator: Assignment Ops. (line 82)
-* plus sign (+), regexp operator: Regexp Operators. (line 102)
+* plus sign (+), regexp operator: Regexp Operators. (line 105)
* pointers to functions: Indirect Calls. (line 6)
-* portability: Escape Sequences. (line 94)
+* portability: Escape Sequences. (line 102)
* portability, #! (executable scripts): Executable Scripts. (line 33)
* portability, ** operator and: Arithmetic Ops. (line 81)
-* portability, **= operator and: Assignment Ops. (line 142)
-* portability, ARGV variable: Executable Scripts. (line 42)
+* portability, **= operator and: Assignment Ops. (line 143)
+* portability, ARGV variable: Executable Scripts. (line 59)
* portability, backslash continuation and: Statements/Lines. (line 30)
* portability, backslash in escape sequences: Escape Sequences.
- (line 112)
+ (line 120)
* portability, close() function and: Close Files And Pipes.
(line 81)
-* portability, data files as single record: Records. (line 194)
+* portability, data files as single record: gawk split records.
+ (line 65)
* portability, deleting array elements: Delete. (line 56)
* portability, example programs: Library Functions. (line 42)
-* portability, functions, defining: Definition Syntax. (line 99)
+* portability, functions, defining: Definition Syntax. (line 109)
* portability, gawk: New Ports. (line 6)
-* portability, gettext library and: Explaining gettext. (line 10)
+* portability, gettext library and: Explaining gettext. (line 11)
* portability, internationalization and: I18N Portability. (line 6)
-* portability, length() function: String Functions. (line 173)
-* portability, new awk vs. old awk: Conversion. (line 55)
+* portability, length() function: String Functions. (line 176)
+* portability, new awk vs. old awk: Strings And Numbers. (line 57)
* portability, next statement in user-defined functions: Pass By Value/Reference.
(line 91)
* portability, NF variable, decrementing: Changing Fields. (line 115)
* portability, operators: Increment Ops. (line 60)
* portability, operators, not in POSIX awk: Precedence. (line 98)
-* portability, POSIXLY_CORRECT environment variable: Options. (line 353)
+* portability, POSIXLY_CORRECT environment variable: Options. (line 356)
* portability, substr() function: String Functions. (line 510)
* portable object files <1>: Translator i18n. (line 6)
-* portable object files: Explaining gettext. (line 36)
+* portable object files: Explaining gettext. (line 37)
* portable object files, converting to message object files: I18N Example.
- (line 62)
+ (line 63)
* portable object files, generating: Options. (line 147)
-* portable object template files: Explaining gettext. (line 30)
+* portable object template files: Explaining gettext. (line 31)
* porting gawk: New Ports. (line 6)
* positional specifiers, printf statement <1>: Printf Ordering.
(line 6)
* positional specifiers, printf statement: Format Modifiers. (line 13)
* positional specifiers, printf statement, mixing with regular formats: Printf Ordering.
(line 57)
-* positive zero: Unexpected Results. (line 34)
-* POSIX awk <1>: Assignment Ops. (line 136)
+* POSIX awk <1>: Assignment Ops. (line 137)
* POSIX awk: This Manual. (line 14)
* POSIX awk, ** operator and: Precedence. (line 98)
-* POSIX awk, **= operator and: Assignment Ops. (line 142)
+* POSIX awk, **= operator and: Assignment Ops. (line 143)
* POSIX awk, < operator and: Getline/File. (line 26)
* POSIX awk, arithmetic operators and: Arithmetic Ops. (line 30)
* POSIX awk, backslashes in string constants: Escape Sequences.
- (line 112)
+ (line 120)
* POSIX awk, BEGIN/END patterns: I/O And BEGIN/END. (line 16)
-* POSIX awk, bracket expressions and: Bracket Expressions. (line 24)
+* POSIX awk, bracket expressions and: Bracket Expressions. (line 26)
* POSIX awk, bracket expressions and, character classes: Bracket Expressions.
- (line 30)
+ (line 32)
* POSIX awk, break statement and: Break Statement. (line 51)
* POSIX awk, changes in awk versions: POSIX. (line 6)
-* POSIX awk, continue statement and: Continue Statement. (line 43)
-* POSIX awk, CONVFMT variable and: User-modified. (line 28)
-* POSIX awk, date utility and: Time Functions. (line 263)
+* POSIX awk, continue statement and: Continue Statement. (line 44)
+* POSIX awk, CONVFMT variable and: User-modified. (line 30)
+* POSIX awk, date utility and: Time Functions. (line 254)
* POSIX awk, field separators and <1>: Field Splitting Summary.
(line 40)
* POSIX awk, field separators and: Fields. (line 6)
-* POSIX awk, FS variable and: User-modified. (line 66)
-* POSIX awk, function keyword in: Definition Syntax. (line 83)
-* POSIX awk, functions and, gsub()/sub(): Gory Details. (line 54)
-* POSIX awk, functions and, length(): String Functions. (line 173)
+* POSIX awk, FS variable and: User-modified. (line 60)
+* POSIX awk, function keyword in: Definition Syntax. (line 93)
+* POSIX awk, functions and, gsub()/sub(): Gory Details. (line 90)
+* POSIX awk, functions and, length(): String Functions. (line 176)
* POSIX awk, GNU long options and: Options. (line 15)
* POSIX awk, interval expressions in: Regexp Operators. (line 135)
-* POSIX awk, next/nextfile statements and: Next Statement. (line 45)
+* POSIX awk, next/nextfile statements and: Next Statement. (line 44)
* POSIX awk, numeric strings and: Variable Typing. (line 6)
-* POSIX awk, OFMT variable and <1>: Conversion. (line 55)
+* POSIX awk, OFMT variable and <1>: Strings And Numbers. (line 57)
* POSIX awk, OFMT variable and: OFMT. (line 27)
-* POSIX awk, period (.), using: Regexp Operators. (line 50)
+* POSIX awk, period (.), using: Regexp Operators. (line 51)
* POSIX awk, printf format strings and: Format Modifiers. (line 159)
* POSIX awk, regular expressions and: Regexp Operators. (line 161)
* POSIX awk, timestamps and: Time Functions. (line 6)
* POSIX awk, | I/O operator and: Getline/Pipe. (line 55)
-* POSIX mode: Options. (line 247)
-* POSIX, awk and: Preface. (line 23)
+* POSIX mode: Options. (line 252)
+* POSIX, awk and: Preface. (line 21)
* POSIX, gawk extensions not included in: POSIX/GNU. (line 6)
* POSIX, programs, implementing in awk: Clones. (line 6)
-* POSIXLY_CORRECT environment variable: Options. (line 332)
-* PREC variable <1>: Setting Precision. (line 6)
-* PREC variable: User-modified. (line 134)
+* POSIXLY_CORRECT environment variable: Options. (line 336)
+* PREC variable: User-modified. (line 124)
* precedence <1>: Precedence. (line 6)
* precedence: Increment Ops. (line 60)
* precedence, regexp operators: Regexp Operators. (line 156)
@@ -32326,7 +33322,7 @@ Index
* print statement, commas, omitting: Print Examples. (line 31)
* print statement, I/O operators in: Precedence. (line 71)
* print statement, line continuations and: Print Examples. (line 76)
-* print statement, OFMT variable and: User-modified. (line 124)
+* print statement, OFMT variable and: User-modified. (line 114)
* print statement, See Also redirection, of output: Redirection.
(line 17)
* print statement, sprintf() function and: Round Function. (line 6)
@@ -32357,78 +33353,77 @@ Index
* printing, unduplicated lines of text: Uniq Program. (line 6)
* printing, user information: Id Program. (line 6)
* private variables: Library Names. (line 11)
-* process group idIDof gawk process: Auto-set. (line 189)
-* process ID of gawk process: Auto-set. (line 192)
-* processes, two-way communications with: Two-way I/O. (line 23)
+* process group idIDof gawk process: Auto-set. (line 190)
+* process ID of gawk process: Auto-set. (line 193)
+* processes, two-way communications with: Two-way I/O. (line 6)
* processing data: Basic High Level. (line 6)
* PROCINFO array <1>: Passwd Functions. (line 6)
* PROCINFO array <2>: Time Functions. (line 47)
-* PROCINFO array: Auto-set. (line 142)
-* PROCINFO array, and communications via ptys: Two-way I/O. (line 116)
+* PROCINFO array: Auto-set. (line 137)
+* PROCINFO array, and communications via ptys: Two-way I/O. (line 99)
* PROCINFO array, and group membership: Group Functions. (line 6)
* PROCINFO array, and user and group ID numbers: Id Program. (line 15)
* PROCINFO array, testing the field splitting: Passwd Functions.
(line 161)
-* PROCINFO array, uses: Auto-set. (line 248)
+* PROCINFO array, uses: Auto-set. (line 249)
* PROCINFO, values of sorted_in: Controlling Scanning.
- (line 24)
+ (line 26)
* profiling awk programs: Profiling. (line 6)
* profiling awk programs, dynamically: Profiling. (line 179)
-* program identifiers: Auto-set. (line 160)
+* program identifiers: Auto-set. (line 155)
* program, definition of: Getting Started. (line 21)
-* programmers, attractiveness of: Two-way I/O. (line 6)
* programming conventions, --non-decimal-data option: Nondecimal Data.
(line 36)
-* programming conventions, ARGC/ARGV variables: Auto-set. (line 31)
+* programming conventions, ARGC/ARGV variables: Auto-set. (line 35)
* programming conventions, exit statement: Exit Statement. (line 38)
* programming conventions, function parameters: Return Statement.
(line 45)
* programming conventions, functions, calling: Calling Built-in.
(line 10)
* programming conventions, functions, writing: Definition Syntax.
- (line 55)
+ (line 65)
* programming conventions, gawk extensions: Internal File Ops.
(line 45)
* programming conventions, private variable names: Library Names.
(line 23)
* programming language, recipe for: History. (line 6)
-* programming languages, Ada: Glossary. (line 20)
+* programming languages, Ada: Glossary. (line 19)
* programming languages, data-driven vs. procedural: Getting Started.
(line 12)
-* programming languages, Java: Glossary. (line 380)
+* programming languages, Java: Glossary. (line 379)
* programming, basic steps: Basic High Level. (line 20)
* programming, concepts: Basic Concepts. (line 6)
* pwcat program: Passwd Functions. (line 23)
* q debugger command (alias for quit): Miscellaneous Debugger Commands.
(line 99)
-* QSE Awk: Other Versions. (line 130)
+* QSE Awk: Other Versions. (line 131)
* Quanstrom, Erik: Alarm Program. (line 8)
* question mark (?), ?: operator: Precedence. (line 92)
* question mark (?), regexp operator <1>: GNU Regexp Operators.
(line 59)
* question mark (?), regexp operator: Regexp Operators. (line 111)
-* QuikTrim Awk: Other Versions. (line 134)
+* QuikTrim Awk: Other Versions. (line 135)
* quit debugger command: Miscellaneous Debugger Commands.
(line 99)
* QUIT signal (MS-Windows): Profiling. (line 214)
* quoting in gawk command lines: Long. (line 26)
-* quoting in gawk command lines, tricks for: Quoting. (line 71)
+* quoting in gawk command lines, tricks for: Quoting. (line 88)
* quoting, for small awk programs: Comments. (line 27)
* r debugger command (alias for run): Debugger Execution Control.
(line 62)
* Rakitzis, Byron: History Sorting. (line 25)
* Ramey, Chet <1>: General Data Types. (line 6)
* Ramey, Chet: Acknowledgments. (line 60)
-* rand: Numeric Functions. (line 34)
+* rand: Numeric Functions. (line 50)
* random numbers, Cliff: Cliff Random Function.
(line 6)
* random numbers, rand()/srand() functions: Numeric Functions.
- (line 34)
-* random numbers, seed of: Numeric Functions. (line 64)
+ (line 50)
+* random numbers, seed of: Numeric Functions. (line 80)
* range expressions (regexps): Bracket Expressions. (line 6)
* range patterns: Ranges. (line 6)
* range patterns, line continuation and: Ranges. (line 65)
-* Rankin, Pat <1>: Bugs. (line 70)
+* Rankin, Pat <1>: Bugs. (line 71)
* Rankin, Pat <2>: Contributors. (line 37)
* Rankin, Pat <3>: Assignment Ops. (line 100)
* Rankin, Pat: Acknowledgments. (line 60)
@@ -32443,19 +33438,20 @@ Index
* readfile() user-defined function: Readfile Function. (line 30)
* reading input files: Reading Files. (line 6)
* recipe for a programming language: History. (line 6)
-* record separators <1>: User-modified. (line 143)
-* record separators: Records. (line 14)
-* record separators, changing: Records. (line 93)
-* record separators, regular expressions as: Records. (line 132)
+* record separators <1>: User-modified. (line 133)
+* record separators: awk split records. (line 6)
+* record separators, changing: awk split records. (line 85)
+* record separators, regular expressions as: awk split records.
+ (line 124)
* record separators, with multiline records: Multiple Line. (line 10)
* records <1>: Basic High Level. (line 73)
* records: Reading Files. (line 14)
* records, multiline: Multiple Line. (line 6)
* records, printing: Print. (line 22)
* records, splitting input into: Records. (line 6)
-* records, terminating: Records. (line 132)
-* records, treating files as: Records. (line 219)
-* recursive functions: Definition Syntax. (line 73)
+* records, terminating: awk split records. (line 124)
+* records, treating files as: gawk split records. (line 92)
+* recursive functions: Definition Syntax. (line 83)
* redirect gawk output, in debugger: Debugger Info. (line 72)
* redirection of input: Getline/File. (line 6)
* redirection of output: Redirection. (line 6)
@@ -32466,12 +33462,12 @@ Index
(line 102)
* regexp constants <2>: Regexp Constants. (line 6)
* regexp constants: Regexp Usage. (line 57)
-* regexp constants, /=.../, /= operator and: Assignment Ops. (line 147)
+* regexp constants, /=.../, /= operator and: Assignment Ops. (line 148)
* regexp constants, as patterns: Expression Patterns. (line 34)
* regexp constants, in gawk: Using Constant Regexps.
(line 28)
-* regexp constants, slashes vs. quotes: Computed Regexps. (line 28)
-* regexp constants, vs. string constants: Computed Regexps. (line 38)
+* regexp constants, slashes vs. quotes: Computed Regexps. (line 29)
+* regexp constants, vs. string constants: Computed Regexps. (line 39)
* register extension: Registration Functions.
(line 6)
* regular expressions: Regexp. (line 6)
@@ -32481,18 +33477,19 @@ Index
(line 6)
* regular expressions, as patterns <1>: Regexp Patterns. (line 6)
* regular expressions, as patterns: Regexp Usage. (line 6)
-* regular expressions, as record separators: Records. (line 132)
-* regular expressions, case sensitivity <1>: User-modified. (line 82)
+* regular expressions, as record separators: awk split records.
+ (line 124)
+* regular expressions, case sensitivity <1>: User-modified. (line 76)
* regular expressions, case sensitivity: Case-sensitivity. (line 6)
* regular expressions, computed: Computed Regexps. (line 6)
* regular expressions, constants, See regexp constants: Regexp Usage.
(line 57)
* regular expressions, dynamic: Computed Regexps. (line 6)
* regular expressions, dynamic, with embedded newlines: Computed Regexps.
- (line 58)
+ (line 59)
* regular expressions, gawk, command-line options: GNU Regexp Operators.
(line 70)
-* regular expressions, interval expressions and: Options. (line 272)
+* regular expressions, interval expressions and: Options. (line 277)
* regular expressions, leftmost longest match: Leftmost Longest.
(line 6)
* regular expressions, operators <1>: Regexp Operators. (line 6)
@@ -32513,8 +33510,8 @@ Index
(line 54)
* return statement, user-defined functions: Return Statement. (line 6)
* return value, close() function: Close Files And Pipes.
- (line 130)
-* rev() user-defined function: Function Example. (line 53)
+ (line 131)
+* rev() user-defined function: Function Example. (line 54)
* revoutput extension: Extension Sample Revout.
(line 11)
* revtwoway extension: Extension Sample Rev2way.
@@ -32531,41 +33528,38 @@ Index
* right angle bracket (>), >> operator (I/O): Redirection. (line 50)
* right shift: Bitwise Functions. (line 52)
* right shift, bitwise: Bitwise Functions. (line 32)
-* Ritchie, Dennis: Basic Data Typing. (line 55)
-* RLENGTH variable: Auto-set. (line 262)
-* RLENGTH variable, match() function and: String Functions. (line 221)
+* Ritchie, Dennis: Basic Data Typing. (line 54)
+* RLENGTH variable: Auto-set. (line 259)
+* RLENGTH variable, match() function and: String Functions. (line 224)
* Robbins, Arnold <1>: Future Extensions. (line 6)
* Robbins, Arnold <2>: Bugs. (line 32)
-* Robbins, Arnold <3>: Contributors. (line 139)
+* Robbins, Arnold <3>: Contributors. (line 141)
* Robbins, Arnold <4>: General Data Types. (line 6)
* Robbins, Arnold <5>: Alarm Program. (line 6)
* Robbins, Arnold <6>: Passwd Functions. (line 90)
* Robbins, Arnold <7>: Getline/Pipe. (line 39)
* Robbins, Arnold: Command Line Field Separator.
- (line 73)
+ (line 74)
* Robbins, Bill: Getline/Pipe. (line 39)
-* Robbins, Harry: Acknowledgments. (line 78)
-* Robbins, Jean: Acknowledgments. (line 78)
+* Robbins, Harry: Acknowledgments. (line 92)
+* Robbins, Jean: Acknowledgments. (line 92)
* Robbins, Miriam <1>: Passwd Functions. (line 90)
* Robbins, Miriam <2>: Getline/Pipe. (line 39)
-* Robbins, Miriam: Acknowledgments. (line 78)
+* Robbins, Miriam: Acknowledgments. (line 92)
* Rommel, Kai Uwe: Contributors. (line 42)
-* round to nearest integer: Numeric Functions. (line 23)
+* round to nearest integer: Numeric Functions. (line 38)
* round() user-defined function: Round Function. (line 16)
-* rounding mode, floating-point: Rounding Mode. (line 6)
* rounding numbers: Round Function. (line 6)
-* ROUNDMODE variable <1>: Setting Rounding Mode.
- (line 6)
-* ROUNDMODE variable: User-modified. (line 138)
-* RS variable <1>: User-modified. (line 143)
-* RS variable: Records. (line 20)
+* ROUNDMODE variable: User-modified. (line 128)
+* RS variable <1>: User-modified. (line 133)
+* RS variable: awk split records. (line 12)
* RS variable, multiline records and: Multiple Line. (line 17)
* rshift: Bitwise Functions. (line 52)
-* RSTART variable: Auto-set. (line 268)
-* RSTART variable, match() function and: String Functions. (line 221)
-* RT variable <1>: Auto-set. (line 275)
+* RSTART variable: Auto-set. (line 265)
+* RSTART variable, match() function and: String Functions. (line 224)
+* RT variable <1>: Auto-set. (line 272)
* RT variable <2>: Multiple Line. (line 129)
-* RT variable: Records. (line 132)
+* RT variable: awk split records. (line 124)
* Rubin, Paul <1>: Contributors. (line 15)
* Rubin, Paul: History. (line 30)
* rule, definition of: Getting Started. (line 21)
@@ -32576,33 +33570,34 @@ Index
(line 68)
* sample debugging session: Sample Debugging Session.
(line 6)
-* sandbox mode: Options. (line 279)
+* sandbox mode: Options. (line 284)
* save debugger options: Debugger Info. (line 84)
* scalar or array: Type Functions. (line 11)
* scalar values: Basic Data Typing. (line 13)
* scanning arrays: Scanning an Array. (line 6)
* scanning multidimensional arrays: Multiscanning. (line 11)
-* Schorr, Andrew <1>: Contributors. (line 131)
+* Schorr, Andrew <1>: Contributors. (line 133)
+* Schorr, Andrew <2>: Auto-set. (line 299)
* Schorr, Andrew: Acknowledgments. (line 60)
* Schreiber, Bert: Acknowledgments. (line 38)
* Schreiber, Rita: Acknowledgments. (line 38)
-* search and replace in strings: String Functions. (line 82)
-* search in string: String Functions. (line 151)
+* search and replace in strings: String Functions. (line 89)
+* search in string: String Functions. (line 155)
* search paths <1>: VMS Running. (line 58)
* search paths <2>: PC Using. (line 10)
-* search paths: Igawk Program. (line 368)
-* search paths, for shared libraries: AWKLIBPATH Variable. (line 6)
+* search paths: Programs Exercises. (line 70)
+* search paths, for loadable extensions: AWKLIBPATH Variable. (line 6)
* search paths, for source files <1>: VMS Running. (line 58)
* search paths, for source files <2>: PC Using. (line 10)
-* search paths, for source files <3>: Igawk Program. (line 368)
+* search paths, for source files <3>: Programs Exercises. (line 70)
* search paths, for source files: AWKPATH Variable. (line 6)
* searching, files for regular expressions: Egrep Program. (line 6)
* searching, for words: Dupword Program. (line 6)
-* sed utility <1>: Glossary. (line 12)
+* sed utility <1>: Glossary. (line 11)
* sed utility <2>: Simple Sed. (line 6)
* sed utility: Field Splitting Summary.
(line 46)
-* seeding random number generator: Numeric Functions. (line 64)
+* seeding random number generator: Numeric Functions. (line 80)
* semicolon (;), AWKPATH variable and: PC Using. (line 10)
* semicolon (;), separating statements in actions <1>: Statements.
(line 10)
@@ -32610,26 +33605,23 @@ Index
(line 19)
* semicolon (;), separating statements in actions: Statements/Lines.
(line 91)
-* separators, field: User-modified. (line 56)
-* separators, field, FIELDWIDTHS variable and: User-modified. (line 35)
-* separators, field, FPAT variable and: User-modified. (line 45)
+* separators, field: User-modified. (line 50)
+* separators, field, FIELDWIDTHS variable and: User-modified. (line 37)
+* separators, field, FPAT variable and: User-modified. (line 43)
* separators, field, POSIX and: Fields. (line 6)
-* separators, for records <1>: User-modified. (line 143)
-* separators, for records: Records. (line 14)
-* separators, for records, regular expressions as: Records. (line 132)
+* separators, for records <1>: User-modified. (line 133)
+* separators, for records: awk split records. (line 6)
+* separators, for records, regular expressions as: awk split records.
+ (line 124)
* separators, for statements in actions: Action Overview. (line 19)
-* separators, subscript: User-modified. (line 156)
+* separators, subscript: User-modified. (line 146)
* set breakpoint: Breakpoint Control. (line 11)
* set debugger command: Viewing And Changing Data.
(line 59)
* set directory of message catalogs: I18N Functions. (line 12)
* set watchpoint: Viewing And Changing Data.
(line 67)
-* setting rounding mode: Setting Rounding Mode.
- (line 6)
-* setting working precision: Setting Precision. (line 6)
-* shadowing of variable values: Definition Syntax. (line 61)
-* shell quoting, double quote: Read Terminal. (line 25)
+* shadowing of variable values: Definition Syntax. (line 71)
* shell quoting, rules for: Quoting. (line 6)
* shells, piping commands into: Redirection. (line 142)
* shells, quoting: Using Shell Variables.
@@ -32653,7 +33645,7 @@ Index
* side effects <1>: Increment Ops. (line 11)
* side effects: Concatenation. (line 41)
* side effects, array indexing: Reference to Elements.
- (line 42)
+ (line 43)
* side effects, asort() function: Array Sorting Functions.
(line 24)
* side effects, assignment expressions: Assignment Ops. (line 23)
@@ -32661,38 +33653,40 @@ Index
* side effects, conditional expressions: Conditional Exp. (line 22)
* side effects, decrement/increment operators: Increment Ops. (line 11)
* side effects, FILENAME variable: Getline Notes. (line 19)
-* side effects, function calls: Function Calls. (line 54)
+* side effects, function calls: Function Calls. (line 57)
* side effects, statements: Action Overview. (line 32)
* sidebar, A Constant's Base Does Not Affect Its Value: Nondecimal-numbers.
(line 64)
* sidebar, Backslash Before Regular Characters: Escape Sequences.
- (line 110)
+ (line 118)
* sidebar, Changing FS Does Not Affect the Fields: Field Splitting Summary.
(line 38)
-* sidebar, Changing NR and FNR: Auto-set. (line 321)
+* sidebar, Changing NR and FNR: Auto-set. (line 314)
* sidebar, Controlling Output Buffering with system(): I/O Functions.
- (line 135)
+ (line 138)
* sidebar, Escape Sequences for Metacharacters: Escape Sequences.
- (line 128)
+ (line 136)
* sidebar, FS and IGNORECASE: Field Splitting Summary.
(line 64)
* sidebar, Interactive Versus Noninteractive Buffering: I/O Functions.
- (line 104)
-* sidebar, Matching the Null String: Gory Details. (line 162)
+ (line 107)
+* sidebar, Matching the Null String: Gory Details. (line 141)
* sidebar, Operator Evaluation Order: Increment Ops. (line 58)
* sidebar, Piping into sh: Redirection. (line 140)
-* sidebar, Portability Issues with #!: Executable Scripts. (line 31)
+* sidebar, Pre-POSIX awk Used OFMT For String Conversion: Strings And Numbers.
+ (line 55)
* sidebar, Recipe For A Programming Language: History. (line 6)
-* sidebar, RS = "\0" Is Not Portable: Records. (line 192)
+* sidebar, RS = "\0" Is Not Portable: gawk split records. (line 63)
* sidebar, So Why Does gawk have BEGINFILE and ENDFILE?: Filetrans Function.
(line 83)
* sidebar, Syntactic Ambiguities Between /= and Regular Expressions: Assignment Ops.
- (line 145)
+ (line 146)
+* sidebar, Understanding #!: Executable Scripts. (line 31)
* sidebar, Understanding $0: Changing Fields. (line 134)
* sidebar, Using \n in Bracket Expressions of Dynamic Regexps: Computed Regexps.
- (line 56)
+ (line 57)
* sidebar, Using close()'s Return Value: Close Files And Pipes.
- (line 128)
+ (line 129)
* SIGHUP signal, for dynamic profiling: Profiling. (line 211)
* SIGINT signal (MS-Windows): Profiling. (line 214)
* signals, HUP/SIGHUP, for profiling: Profiling. (line 211)
@@ -32704,64 +33698,63 @@ Index
* SIGUSR1 signal, for dynamic profiling: Profiling. (line 188)
* silent debugger command: Debugger Execution Control.
(line 10)
-* sin: Numeric Functions. (line 75)
-* sine: Numeric Functions. (line 75)
-* single precision floating-point: General Arithmetic. (line 21)
+* sin: Numeric Functions. (line 91)
+* sine: Numeric Functions. (line 91)
* single quote ('): One-shot. (line 15)
* single quote (') in gawk command lines: Long. (line 33)
-* single quote ('), in shell commands: Quoting. (line 31)
+* single quote ('), in shell commands: Quoting. (line 48)
* single quote ('), vs. apostrophe: Comments. (line 27)
-* single quote ('), with double quotes: Quoting. (line 53)
+* single quote ('), with double quotes: Quoting. (line 70)
* single-character fields: Single Character Fields.
(line 6)
* single-step execution, in the debugger: Debugger Execution Control.
(line 43)
* Skywalker, Luke: Undocumented. (line 6)
-* sleep utility: Alarm Program. (line 111)
+* sleep utility: Alarm Program. (line 110)
* sleep() extension function: Extension Sample Time.
- (line 23)
+ (line 22)
* Solaris, POSIX-compliant awk: Other Versions. (line 96)
-* sort array: String Functions. (line 32)
-* sort array indices: String Functions. (line 32)
+* sort array: String Functions. (line 42)
+* sort array indices: String Functions. (line 42)
* sort function, arrays, sorting: Array Sorting Functions.
(line 6)
* sort utility: Word Sorting. (line 50)
-* sort utility, coprocesses and: Two-way I/O. (line 83)
+* sort utility, coprocesses and: Two-way I/O. (line 65)
* sorting characters in different languages: Explaining gettext.
- (line 93)
+ (line 94)
* source code, awka: Other Versions. (line 64)
* source code, Brian Kernighan's awk: Other Versions. (line 13)
* source code, Busybox Awk: Other Versions. (line 88)
* source code, gawk: Gawk Distribution. (line 6)
-* source code, Illumos awk: Other Versions. (line 104)
-* source code, jawk: Other Versions. (line 112)
-* source code, libmawk: Other Versions. (line 120)
+* source code, Illumos awk: Other Versions. (line 105)
+* source code, jawk: Other Versions. (line 113)
+* source code, libmawk: Other Versions. (line 121)
* source code, mawk: Other Versions. (line 44)
* source code, mixing: Options. (line 117)
* source code, pawk: Other Versions. (line 78)
-* source code, pawk (Python version): Other Versions. (line 124)
-* source code, QSE Awk: Other Versions. (line 130)
-* source code, QuikTrim Awk: Other Versions. (line 134)
+* source code, pawk (Python version): Other Versions. (line 125)
+* source code, QSE Awk: Other Versions. (line 131)
+* source code, QuikTrim Awk: Other Versions. (line 135)
* source code, Solaris awk: Other Versions. (line 96)
-* source files, search path for: Igawk Program. (line 368)
-* sparse arrays: Array Intro. (line 70)
-* Spencer, Henry: Glossary. (line 12)
+* source files, search path for: Programs Exercises. (line 70)
+* sparse arrays: Array Intro. (line 72)
+* Spencer, Henry: Glossary. (line 11)
* split: String Functions. (line 313)
-* split string into array: String Functions. (line 291)
+* split string into array: String Functions. (line 294)
* split utility: Split Program. (line 6)
* split() function, array elements, deleting: Delete. (line 61)
* split.awk program: Split Program. (line 30)
-* sprintf <1>: String Functions. (line 378)
+* sprintf <1>: String Functions. (line 381)
* sprintf: OFMT. (line 15)
-* sprintf() function, OFMT variable and: User-modified. (line 124)
+* sprintf() function, OFMT variable and: User-modified. (line 114)
* sprintf() function, print/printf statements and: Round Function.
(line 6)
-* sqrt: Numeric Functions. (line 78)
-* square brackets ([]), regexp operator: Regexp Operators. (line 55)
-* square root: Numeric Functions. (line 78)
-* srand: Numeric Functions. (line 82)
+* sqrt: Numeric Functions. (line 94)
+* square brackets ([]), regexp operator: Regexp Operators. (line 56)
+* square root: Numeric Functions. (line 94)
+* srand: Numeric Functions. (line 98)
* stack frame: Debugging Terms. (line 10)
-* Stallman, Richard <1>: Glossary. (line 297)
+* Stallman, Richard <1>: Glossary. (line 296)
* Stallman, Richard <2>: Contributors. (line 23)
* Stallman, Richard <3>: Acknowledgments. (line 18)
* Stallman, Richard: Manual History. (line 6)
@@ -32786,21 +33779,21 @@ Index
(line 46)
* strftime: Time Functions. (line 48)
* string constants: Scalar Constants. (line 15)
-* string constants, vs. regexp constants: Computed Regexps. (line 38)
+* string constants, vs. regexp constants: Computed Regexps. (line 39)
* string extraction (internationalization): String Extraction.
(line 6)
-* string length: String Functions. (line 164)
+* string length: String Functions. (line 167)
* string operators: Concatenation. (line 8)
-* string, regular expression match: String Functions. (line 204)
+* string, regular expression match: String Functions. (line 207)
* string-manipulation functions: String Functions. (line 6)
* string-matching operators: Regexp Usage. (line 19)
* string-translation functions: I18N Functions. (line 6)
* strings splitting, example: String Functions. (line 333)
* strings, converting <1>: Bitwise Functions. (line 109)
-* strings, converting: Conversion. (line 6)
+* strings, converting: Strings And Numbers. (line 6)
* strings, converting letter case: String Functions. (line 520)
-* strings, converting, numbers to: User-modified. (line 28)
-* strings, empty, See null strings: Records. (line 122)
+* strings, converting, numbers to: User-modified. (line 30)
+* strings, empty, See null strings: awk split records. (line 114)
* strings, extracting: String Extraction. (line 6)
* strings, for localization: Programmer i18n. (line 14)
* strings, length limitations: Scalar Constants. (line 20)
@@ -32808,7 +33801,7 @@ Index
* strings, null: Regexp Field Splitting.
(line 43)
* strings, numeric: Variable Typing. (line 6)
-* strtonum: String Functions. (line 385)
+* strtonum: String Functions. (line 388)
* strtonum() function (gawk), --non-decimal-data option and: Nondecimal Data.
(line 36)
* sub <1>: String Functions. (line 406)
@@ -32816,7 +33809,7 @@ Index
(line 43)
* sub() function, arguments of: String Functions. (line 460)
* sub() function, escape processing: Gory Details. (line 6)
-* subscript separators: User-modified. (line 156)
+* subscript separators: User-modified. (line 146)
* subscripts in arrays, multidimensional: Multidimensional. (line 10)
* subscripts in arrays, multidimensional, scanning: Multiscanning.
(line 11)
@@ -32824,19 +33817,19 @@ Index
(line 6)
* subscripts in arrays, uninitialized variables as: Uninitialized Subscripts.
(line 6)
-* SUBSEP variable: User-modified. (line 156)
+* SUBSEP variable: User-modified. (line 146)
* SUBSEP variable, and multidimensional arrays: Multidimensional.
(line 16)
-* substitute in string: String Functions. (line 82)
+* substitute in string: String Functions. (line 89)
* substr: String Functions. (line 479)
* substring: String Functions. (line 479)
* Sumner, Andrew: Other Versions. (line 64)
-* supplementary groups of gawk process: Auto-set. (line 243)
+* supplementary groups of gawk process: Auto-set. (line 244)
* switch statement: Switch Statement. (line 6)
-* SYMTAB array: Auto-set. (line 283)
+* SYMTAB array: Auto-set. (line 276)
* syntactic ambiguity: /= operator vs. /=.../ regexp constant: Assignment Ops.
- (line 147)
-* system: I/O Functions. (line 72)
+ (line 148)
+* system: I/O Functions. (line 75)
* systime: Time Functions. (line 66)
* t debugger command (alias for tbreak): Breakpoint Control. (line 90)
* tbreak debugger command: Breakpoint Control. (line 90)
@@ -32846,11 +33839,11 @@ Index
* tee utility: Tee Program. (line 6)
* tee.awk program: Tee Program. (line 26)
* temporary breakpoint: Breakpoint Control. (line 90)
-* terminating records: Records. (line 132)
+* terminating records: awk split records. (line 124)
* testbits.awk program: Bitwise Functions. (line 70)
* testext extension: Extension Sample API Tests.
(line 6)
-* Texinfo <1>: Adding Code. (line 99)
+* Texinfo <1>: Adding Code. (line 100)
* Texinfo <2>: Distribution contents.
(line 77)
* Texinfo <3>: Extract Program. (line 12)
@@ -32863,21 +33856,21 @@ Index
* text, printing: Print. (line 22)
* text, printing, unduplicated lines of: Uniq Program. (line 6)
* TEXTDOMAIN variable <1>: Programmer i18n. (line 9)
-* TEXTDOMAIN variable: User-modified. (line 162)
+* TEXTDOMAIN variable: User-modified. (line 152)
* TEXTDOMAIN variable, BEGIN pattern and: Programmer i18n. (line 60)
* TEXTDOMAIN variable, portability and: I18N Portability. (line 20)
-* textdomain() function (C library): Explaining gettext. (line 27)
+* textdomain() function (C library): Explaining gettext. (line 28)
* tilde (~), ~ operator <1>: Expression Patterns. (line 24)
* tilde (~), ~ operator <2>: Precedence. (line 80)
* tilde (~), ~ operator <3>: Comparison Operators.
(line 11)
* tilde (~), ~ operator <4>: Regexp Constants. (line 6)
-* tilde (~), ~ operator <5>: Computed Regexps. (line 6)
-* tilde (~), ~ operator <6>: Case-sensitivity. (line 26)
+* tilde (~), ~ operator <5>: Case-sensitivity. (line 26)
+* tilde (~), ~ operator <6>: Computed Regexps. (line 6)
* tilde (~), ~ operator: Regexp Usage. (line 19)
* time functions: Time Functions. (line 6)
* time, alarm clock example program: Alarm Program. (line 11)
-* time, localization and: Explaining gettext. (line 115)
+* time, localization and: Explaining gettext. (line 112)
* time, managing: Getlocaltime Function.
(line 6)
* time, retrieving: Time Functions. (line 17)
@@ -32894,38 +33887,37 @@ Index
* traceback, display in debugger: Execution Stack. (line 13)
* translate string: I18N Functions. (line 22)
* translate.awk program: Translate Program. (line 55)
-* treating files, as single records: Records. (line 219)
-* troubleshooting, --non-decimal-data option: Options. (line 207)
+* treating files, as single records: gawk split records. (line 92)
+* troubleshooting, --non-decimal-data option: Options. (line 211)
* troubleshooting, == operator: Comparison Operators.
(line 37)
* troubleshooting, awk uses FS not IFS: Field Separators. (line 30)
* troubleshooting, backslash before nonspecial character: Escape Sequences.
- (line 112)
+ (line 120)
* troubleshooting, division: Arithmetic Ops. (line 44)
* troubleshooting, fatal errors, field widths, specifying: Constant Size.
(line 23)
* troubleshooting, fatal errors, printf format strings: Format Modifiers.
(line 159)
-* troubleshooting, fflush() function: I/O Functions. (line 60)
-* troubleshooting, function call syntax: Function Calls. (line 28)
+* troubleshooting, fflush() function: I/O Functions. (line 63)
+* troubleshooting, function call syntax: Function Calls. (line 30)
* troubleshooting, gawk: Compatibility Mode. (line 6)
* troubleshooting, gawk, bug reports: Bugs. (line 9)
* troubleshooting, gawk, fatal errors, function arguments: Calling Built-in.
(line 16)
* troubleshooting, getline function: File Checking. (line 25)
* troubleshooting, gsub()/sub() functions: String Functions. (line 470)
-* troubleshooting, match() function: String Functions. (line 286)
-* troubleshooting, patsplit() function: String Functions. (line 309)
+* troubleshooting, match() function: String Functions. (line 289)
* troubleshooting, print statement, omitting commas: Print Examples.
(line 31)
* troubleshooting, printing: Redirection. (line 118)
* troubleshooting, quotes with file names: Special FD. (line 68)
* troubleshooting, readable data files: File Checking. (line 6)
* troubleshooting, regexp constants vs. string constants: Computed Regexps.
- (line 38)
+ (line 39)
* troubleshooting, string concatenation: Concatenation. (line 26)
* troubleshooting, substr() function: String Functions. (line 497)
-* troubleshooting, system() function: I/O Functions. (line 94)
+* troubleshooting, system() function: I/O Functions. (line 97)
* troubleshooting, typographical errors, global variables: Options.
(line 98)
* true, logical: Truth Values. (line 6)
@@ -32934,14 +33926,14 @@ Index
* Trueman, David: History. (line 30)
* trunc-mod operation: Arithmetic Ops. (line 66)
* truth values: Truth Values. (line 6)
-* type conversion: Conversion. (line 21)
+* type conversion: Strings And Numbers. (line 21)
* u debugger command (alias for until): Debugger Execution Control.
(line 83)
* unassigned array elements: Reference to Elements.
(line 18)
* undefined functions: Pass By Value/Reference.
(line 71)
-* underscore (_), C macro: Explaining gettext. (line 70)
+* underscore (_), C macro: Explaining gettext. (line 71)
* underscore (_), in names of private variables: Library Names.
(line 29)
* underscore (_), translatable string: Programmer i18n. (line 69)
@@ -32955,21 +33947,21 @@ Index
(line 6)
* uniq utility: Uniq Program. (line 6)
* uniq.awk program: Uniq Program. (line 65)
-* Unix: Glossary. (line 616)
+* Unix: Glossary. (line 611)
* Unix awk, backslashes in escape sequences: Escape Sequences.
- (line 124)
+ (line 132)
* Unix awk, close() function and: Close Files And Pipes.
- (line 130)
+ (line 131)
* Unix awk, password files, field separators and: Command Line Field Separator.
- (line 64)
+ (line 65)
* Unix, awk scripts and: Executable Scripts. (line 6)
* UNIXROOT variable, on OS/2 systems: PC Using. (line 16)
-* unsigned integers: General Arithmetic. (line 15)
+* unsigned integers: Computer Arithmetic. (line 41)
* until debugger command: Debugger Execution Control.
(line 83)
* unwatch debugger command: Viewing And Changing Data.
(line 84)
-* up debugger command: Execution Stack. (line 33)
+* up debugger command: Execution Stack. (line 34)
* user database, reading: Passwd Functions. (line 6)
* user-defined functions: User-defined. (line 6)
* user-defined, functions, counts, in a profile: Profiling. (line 137)
@@ -32980,14 +33972,14 @@ Index
* USR1 signal, for dynamic profiling: Profiling. (line 188)
* values, numeric: Basic Data Typing. (line 13)
* values, string: Basic Data Typing. (line 13)
-* variable assignments and input files: Other Arguments. (line 19)
+* variable assignments and input files: Other Arguments. (line 23)
* variable typing: Typing and Comparison.
(line 9)
* variables <1>: Basic Data Typing. (line 6)
* variables: Other Features. (line 6)
* variables, assigning on command line: Assignment Options. (line 6)
* variables, built-in <1>: Built-in Variables. (line 6)
-* variables, built-in: Using Variables. (line 20)
+* variables, built-in: Using Variables. (line 23)
* variables, built-in, -v option, setting with: Options. (line 40)
* variables, built-in, conveying information: Auto-set. (line 6)
* variables, flag: Boolean Ops. (line 67)
@@ -33000,26 +33992,26 @@ Index
* variables, getline command into, using: Getline/Variable. (line 6)
* variables, global, for library functions: Library Names. (line 11)
* variables, global, printing list of: Options. (line 93)
-* variables, initializing: Using Variables. (line 20)
+* variables, initializing: Using Variables. (line 23)
* variables, local to a function: Variable Scope. (line 6)
* variables, names of: Arrays. (line 18)
* variables, private: Library Names. (line 11)
* variables, setting: Options. (line 32)
-* variables, shadowing: Definition Syntax. (line 61)
+* variables, shadowing: Definition Syntax. (line 71)
* variables, types of: Assignment Ops. (line 40)
* variables, types of, comparison expressions and: Typing and Comparison.
(line 9)
* variables, uninitialized, as array subscripts: Uninitialized Subscripts.
(line 6)
* variables, user-defined: Variables. (line 6)
-* version of gawk: Auto-set. (line 213)
-* version of gawk extension API: Auto-set. (line 238)
-* version of GNU MP library: Auto-set. (line 224)
-* version of GNU MPFR library: Auto-set. (line 220)
-* vertical bar (|): Regexp Operators. (line 69)
+* version of gawk: Auto-set. (line 214)
+* version of gawk extension API: Auto-set. (line 239)
+* version of GNU MP library: Auto-set. (line 225)
+* version of GNU MPFR library: Auto-set. (line 221)
+* vertical bar (|): Regexp Operators. (line 70)
* vertical bar (|), | operator (I/O) <1>: Precedence. (line 65)
* vertical bar (|), | operator (I/O): Getline/Pipe. (line 9)
-* vertical bar (|), |& operator (I/O) <1>: Two-way I/O. (line 44)
+* vertical bar (|), |& operator (I/O) <1>: Two-way I/O. (line 25)
* vertical bar (|), |& operator (I/O) <2>: Precedence. (line 65)
* vertical bar (|), |& operator (I/O): Getline/Coprocess. (line 6)
* vertical bar (|), || operator <1>: Precedence. (line 89)
@@ -33036,7 +34028,7 @@ Index
* Wall, Larry <1>: Future Extensions. (line 6)
* Wall, Larry: Array Intro. (line 6)
* Wallin, Anders: Contributors. (line 103)
-* warnings, issuing: Options. (line 182)
+* warnings, issuing: Options. (line 185)
* watch debugger command: Viewing And Changing Data.
(line 67)
* watchpoint: Debugging Terms. (line 42)
@@ -33049,7 +34041,7 @@ Index
* whitespace, as field separators: Default Field Splitting.
(line 6)
* whitespace, functions, calling: Calling Built-in. (line 10)
-* whitespace, newlines as: Options. (line 253)
+* whitespace, newlines as: Options. (line 258)
* Williams, Kent: Contributors. (line 34)
* Woehlke, Matthew: Contributors. (line 79)
* Woods, John: Contributors. (line 27)
@@ -33068,26 +34060,25 @@ Index
* xgettext utility: String Extraction. (line 13)
* xor: Bitwise Functions. (line 55)
* XOR bitwise operation: Bitwise Functions. (line 6)
-* Yawitz, Efraim: Contributors. (line 129)
-* Zaretskii, Eli <1>: Bugs. (line 70)
+* Yawitz, Efraim: Contributors. (line 131)
+* Zaretskii, Eli <1>: Bugs. (line 71)
* Zaretskii, Eli <2>: Contributors. (line 55)
* Zaretskii, Eli: Acknowledgments. (line 60)
-* zero, negative vs. positive: Unexpected Results. (line 34)
* zerofile.awk program: Empty Files. (line 21)
* Zoulas, Christos: Contributors. (line 66)
* {} (braces): Profiling. (line 142)
* {} (braces), actions and: Action Overview. (line 19)
* {} (braces), statements, grouping: Statements. (line 10)
-* | (vertical bar): Regexp Operators. (line 69)
+* | (vertical bar): Regexp Operators. (line 70)
* | (vertical bar), | operator (I/O) <1>: Precedence. (line 65)
* | (vertical bar), | operator (I/O) <2>: Redirection. (line 57)
* | (vertical bar), | operator (I/O): Getline/Pipe. (line 9)
-* | (vertical bar), |& operator (I/O) <1>: Two-way I/O. (line 44)
+* | (vertical bar), |& operator (I/O) <1>: Two-way I/O. (line 25)
* | (vertical bar), |& operator (I/O) <2>: Precedence. (line 65)
* | (vertical bar), |& operator (I/O) <3>: Redirection. (line 102)
* | (vertical bar), |& operator (I/O): Getline/Coprocess. (line 6)
* | (vertical bar), |& operator (I/O), pipes, closing: Close Files And Pipes.
- (line 118)
+ (line 119)
* | (vertical bar), || operator <1>: Precedence. (line 89)
* | (vertical bar), || operator: Boolean Ops. (line 57)
* ~ (tilde), ~ operator <1>: Expression Patterns. (line 24)
@@ -33095,537 +34086,561 @@ Index
* ~ (tilde), ~ operator <3>: Comparison Operators.
(line 11)
* ~ (tilde), ~ operator <4>: Regexp Constants. (line 6)
-* ~ (tilde), ~ operator <5>: Computed Regexps. (line 6)
-* ~ (tilde), ~ operator <6>: Case-sensitivity. (line 26)
+* ~ (tilde), ~ operator <5>: Case-sensitivity. (line 26)
+* ~ (tilde), ~ operator <6>: Computed Regexps. (line 6)
* ~ (tilde), ~ operator: Regexp Usage. (line 19)

Tag Table:
-Node: Top1292
-Node: Foreword40821
-Node: Preface45166
-Ref: Preface-Footnote-148219
-Ref: Preface-Footnote-248315
-Node: History48547
-Node: Names50921
-Ref: Names-Footnote-152398
-Node: This Manual52470
-Ref: This Manual-Footnote-158244
-Node: Conventions58344
-Node: Manual History60500
-Ref: Manual History-Footnote-163948
-Ref: Manual History-Footnote-263989
-Node: How To Contribute64063
-Node: Acknowledgments65207
-Node: Getting Started69401
-Node: Running gawk71780
-Node: One-shot72966
-Node: Read Terminal74191
-Ref: Read Terminal-Footnote-175841
-Ref: Read Terminal-Footnote-276117
-Node: Long76288
-Node: Executable Scripts77664
-Ref: Executable Scripts-Footnote-179497
-Ref: Executable Scripts-Footnote-279599
-Node: Comments80146
-Node: Quoting82613
-Node: DOS Quoting87236
-Node: Sample Data Files87911
-Node: Very Simple90426
-Node: Two Rules95077
-Node: More Complex96975
-Ref: More Complex-Footnote-199905
-Node: Statements/Lines99990
-Ref: Statements/Lines-Footnote-1104453
-Node: Other Features104718
-Node: When105646
-Node: Invoking Gawk107793
-Node: Command Line109256
-Node: Options110039
-Ref: Options-Footnote-1125417
-Node: Other Arguments125442
-Node: Naming Standard Input128100
-Node: Environment Variables129194
-Node: AWKPATH Variable129752
-Ref: AWKPATH Variable-Footnote-1132533
-Ref: AWKPATH Variable-Footnote-2132578
-Node: AWKLIBPATH Variable132838
-Node: Other Environment Variables133556
-Node: Exit Status136519
-Node: Include Files137194
-Node: Loading Shared Libraries140763
-Node: Obsolete142127
-Node: Undocumented142824
-Node: Regexp143066
-Node: Regexp Usage144455
-Node: Escape Sequences146480
-Node: Regexp Operators152149
-Ref: Regexp Operators-Footnote-1159529
-Ref: Regexp Operators-Footnote-2159676
-Node: Bracket Expressions159774
-Ref: table-char-classes161664
-Node: GNU Regexp Operators164187
-Node: Case-sensitivity167910
-Ref: Case-sensitivity-Footnote-1170878
-Ref: Case-sensitivity-Footnote-2171113
-Node: Leftmost Longest171221
-Node: Computed Regexps172422
-Node: Reading Files175759
-Node: Records177761
-Ref: Records-Footnote-1187284
-Node: Fields187321
-Ref: Fields-Footnote-1190277
-Node: Nonconstant Fields190363
-Node: Changing Fields192569
-Node: Field Separators198528
-Node: Default Field Splitting201230
-Node: Regexp Field Splitting202347
-Node: Single Character Fields205689
-Node: Command Line Field Separator206748
-Node: Full Line Fields210090
-Ref: Full Line Fields-Footnote-1210598
-Node: Field Splitting Summary210644
-Ref: Field Splitting Summary-Footnote-1213743
-Node: Constant Size213844
-Node: Splitting By Content218451
-Ref: Splitting By Content-Footnote-1222200
-Node: Multiple Line222240
-Ref: Multiple Line-Footnote-1228087
-Node: Getline228266
-Node: Plain Getline230482
-Node: Getline/Variable232577
-Node: Getline/File233724
-Node: Getline/Variable/File235065
-Ref: Getline/Variable/File-Footnote-1236664
-Node: Getline/Pipe236751
-Node: Getline/Variable/Pipe239450
-Node: Getline/Coprocess240557
-Node: Getline/Variable/Coprocess241809
-Node: Getline Notes242546
-Node: Getline Summary245333
-Ref: table-getline-variants245741
-Node: Read Timeout246653
-Ref: Read Timeout-Footnote-1250394
-Node: Command line directories250451
-Node: Printing251081
-Node: Print252712
-Node: Print Examples254049
-Node: Output Separators256833
-Node: OFMT258849
-Node: Printf260207
-Node: Basic Printf261113
-Node: Control Letters262652
-Node: Format Modifiers266464
-Node: Printf Examples272473
-Node: Redirection275185
-Node: Special Files282159
-Node: Special FD282692
-Ref: Special FD-Footnote-1286317
-Node: Special Network286391
-Node: Special Caveats287241
-Node: Close Files And Pipes288037
-Ref: Close Files And Pipes-Footnote-1295020
-Ref: Close Files And Pipes-Footnote-2295168
-Node: Expressions295318
-Node: Values296450
-Node: Constants297126
-Node: Scalar Constants297806
-Ref: Scalar Constants-Footnote-1298665
-Node: Nondecimal-numbers298847
-Node: Regexp Constants301847
-Node: Using Constant Regexps302322
-Node: Variables305377
-Node: Using Variables306032
-Node: Assignment Options307756
-Node: Conversion309631
-Ref: table-locale-affects315131
-Ref: Conversion-Footnote-1315755
-Node: All Operators315864
-Node: Arithmetic Ops316494
-Node: Concatenation318999
-Ref: Concatenation-Footnote-1321787
-Node: Assignment Ops321907
-Ref: table-assign-ops326895
-Node: Increment Ops328226
-Node: Truth Values and Conditions331660
-Node: Truth Values332743
-Node: Typing and Comparison333792
-Node: Variable Typing334585
-Ref: Variable Typing-Footnote-1338482
-Node: Comparison Operators338604
-Ref: table-relational-ops339014
-Node: POSIX String Comparison342562
-Ref: POSIX String Comparison-Footnote-1343518
-Node: Boolean Ops343656
-Ref: Boolean Ops-Footnote-1347726
-Node: Conditional Exp347817
-Node: Function Calls349549
-Node: Precedence353143
-Node: Locales356812
-Node: Patterns and Actions357901
-Node: Pattern Overview358955
-Node: Regexp Patterns360624
-Node: Expression Patterns361167
-Node: Ranges364948
-Node: BEGIN/END368052
-Node: Using BEGIN/END368814
-Ref: Using BEGIN/END-Footnote-1371550
-Node: I/O And BEGIN/END371656
-Node: BEGINFILE/ENDFILE373938
-Node: Empty376852
-Node: Using Shell Variables377169
-Node: Action Overview379454
-Node: Statements381811
-Node: If Statement383665
-Node: While Statement385164
-Node: Do Statement387208
-Node: For Statement388364
-Node: Switch Statement391516
-Node: Break Statement393670
-Node: Continue Statement395660
-Node: Next Statement397453
-Node: Nextfile Statement399843
-Node: Exit Statement402498
-Node: Built-in Variables404914
-Node: User-modified406009
-Ref: User-modified-Footnote-1414367
-Node: Auto-set414429
-Ref: Auto-set-Footnote-1427886
-Ref: Auto-set-Footnote-2428091
-Node: ARGC and ARGV428147
-Node: Arrays432001
-Node: Array Basics433506
-Node: Array Intro434332
-Node: Reference to Elements438649
-Node: Assigning Elements440919
-Node: Array Example441410
-Node: Scanning an Array443142
-Node: Controlling Scanning445456
-Ref: Controlling Scanning-Footnote-1450543
-Node: Delete450859
-Ref: Delete-Footnote-1453624
-Node: Numeric Array Subscripts453681
-Node: Uninitialized Subscripts455864
-Node: Multidimensional457491
-Node: Multiscanning460584
-Node: Arrays of Arrays462173
-Node: Functions466813
-Node: Built-in467632
-Node: Calling Built-in468710
-Node: Numeric Functions470698
-Ref: Numeric Functions-Footnote-1474530
-Ref: Numeric Functions-Footnote-2474887
-Ref: Numeric Functions-Footnote-3474935
-Node: String Functions475204
-Ref: String Functions-Footnote-1498162
-Ref: String Functions-Footnote-2498291
-Ref: String Functions-Footnote-3498539
-Node: Gory Details498626
-Ref: table-sub-escapes500305
-Ref: table-sub-posix-92501659
-Ref: table-sub-proposed503010
-Ref: table-posix-sub504364
-Ref: table-gensub-escapes505909
-Ref: Gory Details-Footnote-1507085
-Ref: Gory Details-Footnote-2507136
-Node: I/O Functions507287
-Ref: I/O Functions-Footnote-1514277
-Node: Time Functions514424
-Ref: Time Functions-Footnote-1525407
-Ref: Time Functions-Footnote-2525475
-Ref: Time Functions-Footnote-3525633
-Ref: Time Functions-Footnote-4525744
-Ref: Time Functions-Footnote-5525856
-Ref: Time Functions-Footnote-6526083
-Node: Bitwise Functions526349
-Ref: table-bitwise-ops526911
-Ref: Bitwise Functions-Footnote-1531132
-Node: Type Functions531316
-Node: I18N Functions532467
-Node: User-defined534094
-Node: Definition Syntax534898
-Ref: Definition Syntax-Footnote-1539812
-Node: Function Example539881
-Ref: Function Example-Footnote-1542530
-Node: Function Caveats542552
-Node: Calling A Function543070
-Node: Variable Scope544025
-Node: Pass By Value/Reference546988
-Node: Return Statement550496
-Node: Dynamic Typing553477
-Node: Indirect Calls554408
-Node: Library Functions564095
-Ref: Library Functions-Footnote-1567608
-Ref: Library Functions-Footnote-2567751
-Node: Library Names567922
-Ref: Library Names-Footnote-1571395
-Ref: Library Names-Footnote-2571615
-Node: General Functions571701
-Node: Strtonum Function572729
-Node: Assert Function575659
-Node: Round Function578985
-Node: Cliff Random Function580526
-Node: Ordinal Functions581542
-Ref: Ordinal Functions-Footnote-1584619
-Ref: Ordinal Functions-Footnote-2584871
-Node: Join Function585082
-Ref: Join Function-Footnote-1586853
-Node: Getlocaltime Function587053
-Node: Readfile Function590794
-Node: Data File Management592633
-Node: Filetrans Function593265
-Node: Rewind Function597334
-Node: File Checking598721
-Node: Empty Files599815
-Node: Ignoring Assigns602045
-Node: Getopt Function603599
-Ref: Getopt Function-Footnote-1614902
-Node: Passwd Functions615105
-Ref: Passwd Functions-Footnote-1624083
-Node: Group Functions624171
-Node: Walking Arrays632255
-Node: Sample Programs634391
-Node: Running Examples635065
-Node: Clones635793
-Node: Cut Program637017
-Node: Egrep Program646868
-Ref: Egrep Program-Footnote-1654641
-Node: Id Program654751
-Node: Split Program658400
-Ref: Split Program-Footnote-1661919
-Node: Tee Program662047
-Node: Uniq Program664850
-Node: Wc Program672279
-Ref: Wc Program-Footnote-1676545
-Ref: Wc Program-Footnote-2676745
-Node: Miscellaneous Programs676837
-Node: Dupword Program678025
-Node: Alarm Program680056
-Node: Translate Program684863
-Ref: Translate Program-Footnote-1689250
-Ref: Translate Program-Footnote-2689498
-Node: Labels Program689632
-Ref: Labels Program-Footnote-1693003
-Node: Word Sorting693087
-Node: History Sorting696971
-Node: Extract Program698810
-Ref: Extract Program-Footnote-1706313
-Node: Simple Sed706441
-Node: Igawk Program709503
-Ref: Igawk Program-Footnote-1724660
-Ref: Igawk Program-Footnote-2724861
-Node: Anagram Program724999
-Node: Signature Program728067
-Node: Advanced Features729167
-Node: Nondecimal Data731053
-Node: Array Sorting732636
-Node: Controlling Array Traversal733333
-Node: Array Sorting Functions741617
-Ref: Array Sorting Functions-Footnote-1745486
-Node: Two-way I/O745680
-Ref: Two-way I/O-Footnote-1751112
-Node: TCP/IP Networking751194
-Node: Profiling754038
-Node: Internationalization761541
-Node: I18N and L10N762966
-Node: Explaining gettext763652
-Ref: Explaining gettext-Footnote-1768720
-Ref: Explaining gettext-Footnote-2768904
-Node: Programmer i18n769069
-Node: Translator i18n773271
-Node: String Extraction774065
-Ref: String Extraction-Footnote-1775026
-Node: Printf Ordering775112
-Ref: Printf Ordering-Footnote-1777894
-Node: I18N Portability777958
-Ref: I18N Portability-Footnote-1780407
-Node: I18N Example780470
-Ref: I18N Example-Footnote-1783108
-Node: Gawk I18N783180
-Node: Debugger783801
-Node: Debugging784772
-Node: Debugging Concepts785205
-Node: Debugging Terms787061
-Node: Awk Debugging789658
-Node: Sample Debugging Session790550
-Node: Debugger Invocation791070
-Node: Finding The Bug792403
-Node: List of Debugger Commands798890
-Node: Breakpoint Control800224
-Node: Debugger Execution Control803888
-Node: Viewing And Changing Data807248
-Node: Execution Stack810604
-Node: Debugger Info812071
-Node: Miscellaneous Debugger Commands816053
-Node: Readline Support821229
-Node: Limitations822060
-Node: Arbitrary Precision Arithmetic824312
-Ref: Arbitrary Precision Arithmetic-Footnote-1825961
-Node: General Arithmetic826109
-Node: Floating Point Issues827829
-Node: String Conversion Precision828710
-Ref: String Conversion Precision-Footnote-1830415
-Node: Unexpected Results830524
-Node: POSIX Floating Point Problems832677
-Ref: POSIX Floating Point Problems-Footnote-1836502
-Node: Integer Programming836540
-Node: Floating-point Programming838279
-Ref: Floating-point Programming-Footnote-1844610
-Ref: Floating-point Programming-Footnote-2844880
-Node: Floating-point Representation845144
-Node: Floating-point Context846309
-Ref: table-ieee-formats847148
-Node: Rounding Mode848532
-Ref: table-rounding-modes849011
-Ref: Rounding Mode-Footnote-1852026
-Node: Gawk and MPFR852205
-Node: Arbitrary Precision Floats853616
-Ref: Arbitrary Precision Floats-Footnote-1856059
-Node: Setting Precision856375
-Ref: table-predefined-precision-strings857061
-Node: Setting Rounding Mode859206
-Ref: table-gawk-rounding-modes859610
-Node: Floating-point Constants860797
-Node: Changing Precision862226
-Ref: Changing Precision-Footnote-1863623
-Node: Exact Arithmetic863797
-Node: Arbitrary Precision Integers866935
-Ref: Arbitrary Precision Integers-Footnote-1869950
-Node: Dynamic Extensions870097
-Node: Extension Intro871555
-Node: Plugin License872820
-Node: Extension Mechanism Outline873505
-Ref: load-extension873922
-Ref: load-new-function875400
-Ref: call-new-function876395
-Node: Extension API Description878410
-Node: Extension API Functions Introduction879697
-Node: General Data Types884624
-Ref: General Data Types-Footnote-1890319
-Node: Requesting Values890618
-Ref: table-value-types-returned891355
-Node: Memory Allocation Functions892309
-Ref: Memory Allocation Functions-Footnote-1895055
-Node: Constructor Functions895151
-Node: Registration Functions896909
-Node: Extension Functions897594
-Node: Exit Callback Functions899896
-Node: Extension Version String901145
-Node: Input Parsers901795
-Node: Output Wrappers911552
-Node: Two-way processors916062
-Node: Printing Messages918270
-Ref: Printing Messages-Footnote-1919347
-Node: Updating `ERRNO'919499
-Node: Accessing Parameters920238
-Node: Symbol Table Access921468
-Node: Symbol table by name921982
-Node: Symbol table by cookie923958
-Ref: Symbol table by cookie-Footnote-1928090
-Node: Cached values928153
-Ref: Cached values-Footnote-1931643
-Node: Array Manipulation931734
-Ref: Array Manipulation-Footnote-1932832
-Node: Array Data Types932871
-Ref: Array Data Types-Footnote-1935574
-Node: Array Functions935666
-Node: Flattening Arrays939502
-Node: Creating Arrays946354
-Node: Extension API Variables951079
-Node: Extension Versioning951715
-Node: Extension API Informational Variables953616
-Node: Extension API Boilerplate954702
-Node: Finding Extensions958506
-Node: Extension Example959066
-Node: Internal File Description959796
-Node: Internal File Ops963887
-Ref: Internal File Ops-Footnote-1975396
-Node: Using Internal File Ops975536
-Ref: Using Internal File Ops-Footnote-1977889
-Node: Extension Samples978155
-Node: Extension Sample File Functions979679
-Node: Extension Sample Fnmatch988164
-Node: Extension Sample Fork989933
-Node: Extension Sample Inplace991146
-Node: Extension Sample Ord992924
-Node: Extension Sample Readdir993760
-Node: Extension Sample Revout995292
-Node: Extension Sample Rev2way995885
-Node: Extension Sample Read write array996575
-Node: Extension Sample Readfile998458
-Node: Extension Sample API Tests999558
-Node: Extension Sample Time1000083
-Node: gawkextlib1001447
-Node: Language History1004228
-Node: V7/SVR3.11005821
-Node: SVR41008141
-Node: POSIX1009583
-Node: BTL1010969
-Node: POSIX/GNU1011703
-Node: Feature History1017302
-Node: Common Extensions1030278
-Node: Ranges and Locales1031590
-Ref: Ranges and Locales-Footnote-11036207
-Ref: Ranges and Locales-Footnote-21036234
-Ref: Ranges and Locales-Footnote-31036468
-Node: Contributors1036689
-Node: Installation1042070
-Node: Gawk Distribution1042964
-Node: Getting1043448
-Node: Extracting1044274
-Node: Distribution contents1045966
-Node: Unix Installation1051671
-Node: Quick Installation1052288
-Node: Additional Configuration Options1054734
-Node: Configuration Philosophy1056470
-Node: Non-Unix Installation1058824
-Node: PC Installation1059282
-Node: PC Binary Installation1060581
-Node: PC Compiling1062429
-Node: PC Testing1065373
-Node: PC Using1066549
-Node: Cygwin1070717
-Node: MSYS1071526
-Node: VMS Installation1072040
-Node: VMS Compilation1072804
-Ref: VMS Compilation-Footnote-11074056
-Node: VMS Dynamic Extensions1074114
-Node: VMS Installation Details1075487
-Node: VMS Running1077738
-Node: VMS GNV1080572
-Node: VMS Old Gawk1081295
-Node: Bugs1081765
-Node: Other Versions1085683
-Node: Notes1091767
-Node: Compatibility Mode1092567
-Node: Additions1093350
-Node: Accessing The Source1094277
-Node: Adding Code1095717
-Node: New Ports1101762
-Node: Derived Files1105897
-Ref: Derived Files-Footnote-11111218
-Ref: Derived Files-Footnote-21111252
-Ref: Derived Files-Footnote-31111852
-Node: Future Extensions1111950
-Node: Implementation Limitations1112533
-Node: Extension Design1113785
-Node: Old Extension Problems1114939
-Ref: Old Extension Problems-Footnote-11116447
-Node: Extension New Mechanism Goals1116504
-Ref: Extension New Mechanism Goals-Footnote-11119869
-Node: Extension Other Design Decisions1120055
-Node: Extension Future Growth1122161
-Node: Old Extension Mechanism1122997
-Node: Basic Concepts1124737
-Node: Basic High Level1125418
-Ref: figure-general-flow1125690
-Ref: figure-process-flow1126289
-Ref: Basic High Level-Footnote-11129518
-Node: Basic Data Typing1129703
-Node: Glossary1133058
-Node: Copying1158289
-Node: GNU Free Documentation License1195845
-Node: Index1220981
+Node: Top1204
+Node: Foreword41858
+Node: Preface46203
+Ref: Preface-Footnote-149226
+Ref: Preface-Footnote-249333
+Node: History49565
+Node: Names51939
+Ref: Names-Footnote-153033
+Node: This Manual53179
+Ref: This Manual-Footnote-158958
+Node: Conventions59058
+Node: Manual History61403
+Ref: Manual History-Footnote-164479
+Ref: Manual History-Footnote-264520
+Node: How To Contribute64594
+Node: Acknowledgments65833
+Node: Getting Started70581
+Node: Running gawk73015
+Node: One-shot74205
+Node: Read Terminal75430
+Node: Long77455
+Node: Executable Scripts78849
+Ref: Executable Scripts-Footnote-181650
+Node: Comments81752
+Node: Quoting84225
+Node: DOS Quoting89538
+Node: Sample Data Files90213
+Node: Very Simple92820
+Node: Two Rules97705
+Node: More Complex99599
+Ref: More Complex-Footnote-1102513
+Node: Statements/Lines102598
+Ref: Statements/Lines-Footnote-1107054
+Node: Other Features107319
+Node: When108250
+Ref: When-Footnote-1110006
+Node: Intro Summary110071
+Node: Invoking Gawk110954
+Node: Command Line112469
+Node: Options113260
+Ref: Options-Footnote-1128907
+Node: Other Arguments128932
+Node: Naming Standard Input131760
+Node: Environment Variables132853
+Node: AWKPATH Variable133411
+Ref: AWKPATH Variable-Footnote-1136277
+Ref: AWKPATH Variable-Footnote-2136322
+Node: AWKLIBPATH Variable136582
+Node: Other Environment Variables137341
+Node: Exit Status140793
+Node: Include Files141468
+Node: Loading Shared Libraries145046
+Node: Obsolete146430
+Node: Undocumented147127
+Node: Invoking Summary147394
+Node: Regexp148994
+Node: Regexp Usage150453
+Node: Escape Sequences152486
+Node: Regexp Operators158557
+Ref: Regexp Operators-Footnote-1165988
+Ref: Regexp Operators-Footnote-2166135
+Node: Bracket Expressions166233
+Ref: table-char-classes168251
+Node: Leftmost Longest171191
+Node: Computed Regexps172395
+Node: GNU Regexp Operators175773
+Node: Case-sensitivity179479
+Ref: Case-sensitivity-Footnote-1182369
+Ref: Case-sensitivity-Footnote-2182604
+Node: Regexp Summary182712
+Node: Reading Files184181
+Node: Records186273
+Node: awk split records186995
+Node: gawk split records191853
+Ref: gawk split records-Footnote-1196374
+Node: Fields196411
+Ref: Fields-Footnote-1199375
+Node: Nonconstant Fields199461
+Ref: Nonconstant Fields-Footnote-1201691
+Node: Changing Fields201893
+Node: Field Separators207847
+Node: Default Field Splitting210549
+Node: Regexp Field Splitting211666
+Node: Single Character Fields214993
+Node: Command Line Field Separator216052
+Node: Full Line Fields219478
+Ref: Full Line Fields-Footnote-1219986
+Node: Field Splitting Summary220032
+Ref: Field Splitting Summary-Footnote-1223164
+Node: Constant Size223265
+Node: Splitting By Content227871
+Ref: Splitting By Content-Footnote-1231944
+Node: Multiple Line231984
+Ref: Multiple Line-Footnote-1237840
+Node: Getline238019
+Node: Plain Getline240230
+Node: Getline/Variable242936
+Node: Getline/File244083
+Node: Getline/Variable/File245467
+Ref: Getline/Variable/File-Footnote-1247066
+Node: Getline/Pipe247153
+Node: Getline/Variable/Pipe249839
+Node: Getline/Coprocess250946
+Node: Getline/Variable/Coprocess252198
+Node: Getline Notes252935
+Node: Getline Summary255739
+Ref: table-getline-variants256147
+Node: Read Timeout257059
+Ref: Read Timeout-Footnote-1260886
+Node: Command-line directories260944
+Node: Input Summary261848
+Node: Input Exercises264985
+Node: Printing265713
+Node: Print267435
+Node: Print Examples268928
+Node: Output Separators271707
+Node: OFMT273723
+Node: Printf275081
+Node: Basic Printf275987
+Node: Control Letters277526
+Node: Format Modifiers281517
+Node: Printf Examples287544
+Node: Redirection290008
+Node: Special Files296980
+Node: Special FD297513
+Ref: Special FD-Footnote-1301110
+Node: Special Network301184
+Node: Special Caveats302034
+Node: Close Files And Pipes302830
+Ref: Close Files And Pipes-Footnote-1309991
+Ref: Close Files And Pipes-Footnote-2310139
+Node: Output Summary310289
+Node: Output Exercises311286
+Node: Expressions311966
+Node: Values313151
+Node: Constants313827
+Node: Scalar Constants314507
+Ref: Scalar Constants-Footnote-1315366
+Node: Nondecimal-numbers315616
+Node: Regexp Constants318616
+Node: Using Constant Regexps319141
+Node: Variables322213
+Node: Using Variables322868
+Node: Assignment Options324774
+Node: Conversion326649
+Node: Strings And Numbers327173
+Ref: Strings And Numbers-Footnote-1330235
+Node: Locale influences conversions330344
+Ref: table-locale-affects333061
+Node: All Operators333649
+Node: Arithmetic Ops334279
+Node: Concatenation336784
+Ref: Concatenation-Footnote-1339603
+Node: Assignment Ops339709
+Ref: table-assign-ops344692
+Node: Increment Ops345995
+Node: Truth Values and Conditions349433
+Node: Truth Values350516
+Node: Typing and Comparison351565
+Node: Variable Typing352358
+Node: Comparison Operators356010
+Ref: table-relational-ops356420
+Node: POSIX String Comparison359970
+Ref: POSIX String Comparison-Footnote-1361054
+Node: Boolean Ops361192
+Ref: Boolean Ops-Footnote-1365531
+Node: Conditional Exp365622
+Node: Function Calls367349
+Node: Precedence371229
+Node: Locales374898
+Node: Expressions Summary376529
+Node: Patterns and Actions379070
+Node: Pattern Overview380186
+Node: Regexp Patterns381863
+Node: Expression Patterns382406
+Node: Ranges386186
+Node: BEGIN/END389292
+Node: Using BEGIN/END390054
+Ref: Using BEGIN/END-Footnote-1392790
+Node: I/O And BEGIN/END392896
+Node: BEGINFILE/ENDFILE395167
+Node: Empty398098
+Node: Using Shell Variables398415
+Node: Action Overview400698
+Node: Statements403025
+Node: If Statement404873
+Node: While Statement406371
+Node: Do Statement408415
+Node: For Statement409571
+Node: Switch Statement412723
+Node: Break Statement415111
+Node: Continue Statement417152
+Node: Next Statement418977
+Node: Nextfile Statement421347
+Node: Exit Statement424004
+Node: Built-in Variables426408
+Node: User-modified427535
+Ref: User-modified-Footnote-1435224
+Node: Auto-set435286
+Ref: Auto-set-Footnote-1448475
+Ref: Auto-set-Footnote-2448680
+Node: ARGC and ARGV448736
+Node: Pattern Action Summary452640
+Node: Arrays454863
+Node: Array Basics456412
+Node: Array Intro457238
+Ref: figure-array-elements459211
+Ref: Array Intro-Footnote-1461735
+Node: Reference to Elements461863
+Node: Assigning Elements464313
+Node: Array Example464804
+Node: Scanning an Array466536
+Node: Controlling Scanning469537
+Ref: Controlling Scanning-Footnote-1474710
+Node: Delete475026
+Ref: Delete-Footnote-1477777
+Node: Numeric Array Subscripts477834
+Node: Uninitialized Subscripts480017
+Node: Multidimensional481644
+Node: Multiscanning484757
+Node: Arrays of Arrays486346
+Node: Arrays Summary491009
+Node: Functions493114
+Node: Built-in493987
+Node: Calling Built-in495065
+Node: Numeric Functions497053
+Ref: Numeric Functions-Footnote-1501889
+Ref: Numeric Functions-Footnote-2502246
+Ref: Numeric Functions-Footnote-3502294
+Node: String Functions502563
+Ref: String Functions-Footnote-1525560
+Ref: String Functions-Footnote-2525689
+Ref: String Functions-Footnote-3525937
+Node: Gory Details526024
+Ref: table-sub-escapes527797
+Ref: table-sub-proposed529317
+Ref: table-posix-sub530681
+Ref: table-gensub-escapes532221
+Ref: Gory Details-Footnote-1533397
+Node: I/O Functions533548
+Ref: I/O Functions-Footnote-1540658
+Node: Time Functions540805
+Ref: Time Functions-Footnote-1551269
+Ref: Time Functions-Footnote-2551337
+Ref: Time Functions-Footnote-3551495
+Ref: Time Functions-Footnote-4551606
+Ref: Time Functions-Footnote-5551718
+Ref: Time Functions-Footnote-6551945
+Node: Bitwise Functions552211
+Ref: table-bitwise-ops552773
+Ref: Bitwise Functions-Footnote-1557018
+Node: Type Functions557202
+Node: I18N Functions558344
+Node: User-defined559989
+Node: Definition Syntax560793
+Ref: Definition Syntax-Footnote-1566197
+Node: Function Example566266
+Ref: Function Example-Footnote-1568906
+Node: Function Caveats568928
+Node: Calling A Function569446
+Node: Variable Scope570401
+Node: Pass By Value/Reference573389
+Node: Return Statement576899
+Node: Dynamic Typing579883
+Node: Indirect Calls580812
+Ref: Indirect Calls-Footnote-1590528
+Node: Functions Summary590656
+Node: Library Functions593306
+Ref: Library Functions-Footnote-1596924
+Ref: Library Functions-Footnote-2597067
+Node: Library Names597238
+Ref: Library Names-Footnote-1600711
+Ref: Library Names-Footnote-2600931
+Node: General Functions601017
+Node: Strtonum Function602045
+Node: Assert Function604947
+Node: Round Function608273
+Node: Cliff Random Function609814
+Node: Ordinal Functions610830
+Ref: Ordinal Functions-Footnote-1613895
+Ref: Ordinal Functions-Footnote-2614147
+Node: Join Function614358
+Ref: Join Function-Footnote-1616129
+Node: Getlocaltime Function616329
+Node: Readfile Function620065
+Node: Data File Management621904
+Node: Filetrans Function622536
+Node: Rewind Function626605
+Node: File Checking628163
+Ref: File Checking-Footnote-1629295
+Node: Empty Files629496
+Node: Ignoring Assigns631475
+Node: Getopt Function633029
+Ref: Getopt Function-Footnote-1644293
+Node: Passwd Functions644496
+Ref: Passwd Functions-Footnote-1653475
+Node: Group Functions653563
+Ref: Group Functions-Footnote-1661494
+Node: Walking Arrays661707
+Node: Library Functions Summary663310
+Node: Library Exercises664698
+Node: Sample Programs665978
+Node: Running Examples666748
+Node: Clones667476
+Node: Cut Program668700
+Node: Egrep Program678558
+Ref: Egrep Program-Footnote-1686145
+Node: Id Program686255
+Node: Split Program689909
+Ref: Split Program-Footnote-1693447
+Node: Tee Program693575
+Node: Uniq Program696362
+Node: Wc Program703785
+Ref: Wc Program-Footnote-1708050
+Node: Miscellaneous Programs708142
+Node: Dupword Program709355
+Node: Alarm Program711386
+Node: Translate Program716190
+Ref: Translate Program-Footnote-1720763
+Ref: Translate Program-Footnote-2721033
+Node: Labels Program721172
+Ref: Labels Program-Footnote-1724533
+Node: Word Sorting724617
+Node: History Sorting728660
+Node: Extract Program730496
+Node: Simple Sed738032
+Node: Igawk Program741094
+Ref: Igawk Program-Footnote-1755398
+Ref: Igawk Program-Footnote-2755599
+Node: Anagram Program755737
+Node: Signature Program758805
+Node: Programs Summary760052
+Node: Programs Exercises761267
+Ref: Programs Exercises-Footnote-1765398
+Node: Advanced Features765489
+Node: Nondecimal Data767437
+Node: Array Sorting769014
+Node: Controlling Array Traversal769711
+Node: Array Sorting Functions777991
+Ref: Array Sorting Functions-Footnote-1781898
+Node: Two-way I/O782092
+Ref: Two-way I/O-Footnote-1787036
+Ref: Two-way I/O-Footnote-2787215
+Node: TCP/IP Networking787297
+Node: Profiling790142
+Node: Advanced Features Summary797693
+Node: Internationalization799557
+Node: I18N and L10N801037
+Node: Explaining gettext801723
+Ref: Explaining gettext-Footnote-1806749
+Ref: Explaining gettext-Footnote-2806933
+Node: Programmer i18n807098
+Ref: Programmer i18n-Footnote-1811892
+Node: Translator i18n811941
+Node: String Extraction812735
+Ref: String Extraction-Footnote-1813868
+Node: Printf Ordering813954
+Ref: Printf Ordering-Footnote-1816736
+Node: I18N Portability816800
+Ref: I18N Portability-Footnote-1819249
+Node: I18N Example819312
+Ref: I18N Example-Footnote-1822018
+Node: Gawk I18N822090
+Node: I18N Summary822728
+Node: Debugger824067
+Node: Debugging825089
+Node: Debugging Concepts825530
+Node: Debugging Terms827386
+Node: Awk Debugging829983
+Node: Sample Debugging Session830875
+Node: Debugger Invocation831395
+Node: Finding The Bug832731
+Node: List of Debugger Commands839210
+Node: Breakpoint Control840542
+Node: Debugger Execution Control844206
+Node: Viewing And Changing Data847566
+Node: Execution Stack850924
+Node: Debugger Info852437
+Node: Miscellaneous Debugger Commands856431
+Node: Readline Support861615
+Node: Limitations862507
+Node: Debugging Summary864780
+Node: Arbitrary Precision Arithmetic865948
+Node: Computer Arithmetic867435
+Ref: Computer Arithmetic-Footnote-1871822
+Node: Math Definitions871879
+Ref: table-ieee-formats875168
+Ref: Math Definitions-Footnote-1875708
+Node: MPFR features875811
+Node: FP Math Caution877428
+Ref: FP Math Caution-Footnote-1878478
+Node: Inexactness of computations878847
+Node: Inexact representation879795
+Node: Comparing FP Values881150
+Node: Errors accumulate882114
+Node: Getting Accuracy883547
+Node: Try To Round886206
+Node: Setting precision887105
+Ref: table-predefined-precision-strings887787
+Node: Setting the rounding mode889580
+Ref: table-gawk-rounding-modes889944
+Ref: Setting the rounding mode-Footnote-1893398
+Node: Arbitrary Precision Integers893577
+Ref: Arbitrary Precision Integers-Footnote-1897350
+Node: POSIX Floating Point Problems897499
+Ref: POSIX Floating Point Problems-Footnote-1901375
+Node: Floating point summary901413
+Node: Dynamic Extensions903617
+Node: Extension Intro905169
+Node: Plugin License906434
+Node: Extension Mechanism Outline907119
+Ref: figure-load-extension907543
+Ref: figure-load-new-function909028
+Ref: figure-call-new-function910030
+Node: Extension API Description912014
+Node: Extension API Functions Introduction913464
+Node: General Data Types918331
+Ref: General Data Types-Footnote-1924024
+Node: Requesting Values924323
+Ref: table-value-types-returned925060
+Node: Memory Allocation Functions926018
+Ref: Memory Allocation Functions-Footnote-1928765
+Node: Constructor Functions928861
+Node: Registration Functions930619
+Node: Extension Functions931304
+Node: Exit Callback Functions933606
+Node: Extension Version String934854
+Node: Input Parsers935504
+Node: Output Wrappers945318
+Node: Two-way processors949834
+Node: Printing Messages952038
+Ref: Printing Messages-Footnote-1953115
+Node: Updating `ERRNO'953267
+Node: Accessing Parameters954006
+Node: Symbol Table Access955236
+Node: Symbol table by name955750
+Node: Symbol table by cookie957726
+Ref: Symbol table by cookie-Footnote-1961859
+Node: Cached values961922
+Ref: Cached values-Footnote-1965426
+Node: Array Manipulation965517
+Ref: Array Manipulation-Footnote-1966615
+Node: Array Data Types966654
+Ref: Array Data Types-Footnote-1969357
+Node: Array Functions969449
+Node: Flattening Arrays973323
+Node: Creating Arrays980175
+Node: Extension API Variables984906
+Node: Extension Versioning985542
+Node: Extension API Informational Variables987443
+Node: Extension API Boilerplate988529
+Node: Finding Extensions992333
+Node: Extension Example992893
+Node: Internal File Description993623
+Node: Internal File Ops997714
+Ref: Internal File Ops-Footnote-11009146
+Node: Using Internal File Ops1009286
+Ref: Using Internal File Ops-Footnote-11011633
+Node: Extension Samples1011901
+Node: Extension Sample File Functions1013425
+Node: Extension Sample Fnmatch1020993
+Node: Extension Sample Fork1022475
+Node: Extension Sample Inplace1023688
+Node: Extension Sample Ord1025363
+Node: Extension Sample Readdir1026199
+Ref: table-readdir-file-types1027055
+Node: Extension Sample Revout1027854
+Node: Extension Sample Rev2way1028445
+Node: Extension Sample Read write array1029186
+Node: Extension Sample Readfile1031065
+Node: Extension Sample API Tests1032165
+Node: Extension Sample Time1032690
+Node: gawkextlib1034005
+Node: Extension summary1036818
+Node: Extension Exercises1040511
+Node: Language History1041233
+Node: V7/SVR3.11042876
+Node: SVR41045196
+Node: POSIX1046638
+Node: BTL1048024
+Node: POSIX/GNU1048758
+Node: Feature History1054534
+Node: Common Extensions1067625
+Node: Ranges and Locales1068937
+Ref: Ranges and Locales-Footnote-11073554
+Ref: Ranges and Locales-Footnote-21073581
+Ref: Ranges and Locales-Footnote-31073815
+Node: Contributors1074036
+Node: History summary1079461
+Node: Installation1080830
+Node: Gawk Distribution1081781
+Node: Getting1082265
+Node: Extracting1083089
+Node: Distribution contents1084731
+Node: Unix Installation1090501
+Node: Quick Installation1091118
+Node: Additional Configuration Options1093560
+Node: Configuration Philosophy1095298
+Node: Non-Unix Installation1097649
+Node: PC Installation1098107
+Node: PC Binary Installation1099418
+Node: PC Compiling1101266
+Ref: PC Compiling-Footnote-11104265
+Node: PC Testing1104370
+Node: PC Using1105546
+Node: Cygwin1109698
+Node: MSYS1110507
+Node: VMS Installation1111021
+Node: VMS Compilation1111817
+Ref: VMS Compilation-Footnote-11113039
+Node: VMS Dynamic Extensions1113097
+Node: VMS Installation Details1114470
+Node: VMS Running1116722
+Node: VMS GNV1119556
+Node: VMS Old Gawk1120279
+Node: Bugs1120749
+Node: Other Versions1124753
+Node: Installation summary1130980
+Node: Notes1132036
+Node: Compatibility Mode1132901
+Node: Additions1133683
+Node: Accessing The Source1134608
+Node: Adding Code1136044
+Node: New Ports1142222
+Node: Derived Files1146703
+Ref: Derived Files-Footnote-11152178
+Ref: Derived Files-Footnote-21152212
+Ref: Derived Files-Footnote-31152808
+Node: Future Extensions1152922
+Node: Implementation Limitations1153528
+Node: Extension Design1154776
+Node: Old Extension Problems1155930
+Ref: Old Extension Problems-Footnote-11157447
+Node: Extension New Mechanism Goals1157504
+Ref: Extension New Mechanism Goals-Footnote-11160864
+Node: Extension Other Design Decisions1161053
+Node: Extension Future Growth1163159
+Node: Old Extension Mechanism1163995
+Node: Notes summary1165757
+Node: Basic Concepts1166943
+Node: Basic High Level1167624
+Ref: figure-general-flow1167896
+Ref: figure-process-flow1168495
+Ref: Basic High Level-Footnote-11171724
+Node: Basic Data Typing1171909
+Node: Glossary1175237
+Node: Copying1200389
+Node: GNU Free Documentation License1237945
+Node: Index1263081

End Tag Table
diff --git a/doc/gawk.texi b/doc/gawk.texi
index 539ea53d..148032aa 100644
--- a/doc/gawk.texi
+++ b/doc/gawk.texi
@@ -24,6 +24,7 @@
\gdef\xrefprintnodename#1{``#1''}
@end tex
@end ifset
+
@ifclear FOR_PRINT
@c With early 2014 texinfo.tex, restore PDF links and colors
@tex
@@ -33,6 +34,18 @@
@end tex
@end ifclear
+@ifnotdocbook
+@set BULLET @bullet{}
+@set MINUS @minus{}
+@set NUL @sc{nul}
+@end ifnotdocbook
+
+@ifdocbook
+@set BULLET
+@set MINUS
+@set NUL NUL
+@end ifdocbook
+
@set xref-automatic-section-title
@c The following information should be updated here only!
@@ -40,12 +53,10 @@
@c applies to and all the info about who's publishing this edition
@c These apply across the board.
-@set UPDATE-MONTH April, 2014
+@set UPDATE-MONTH August, 2014
@set VERSION 4.1
@set PATCHLEVEL 1
-@set FSF
-
@set TITLE GAWK: Effective AWK Programming
@set SUBTITLE A User's Guide for GNU Awk
@set EDITION 4.1
@@ -58,6 +69,7 @@
@set SUBSECTION subsection
@set DARKCORNER @inmargin{@image{lflashlight,1cm}, @image{rflashlight,1cm}}
@set COMMONEXT (c.e.)
+@set PAGE page
@end iftex
@ifinfo
@set DOCUMENT Info file
@@ -67,6 +79,7 @@
@set SUBSECTION node
@set DARKCORNER (d.c.)
@set COMMONEXT (c.e.)
+@set PAGE screen
@end ifinfo
@ifhtml
@set DOCUMENT Web page
@@ -76,6 +89,7 @@
@set SUBSECTION subsection
@set DARKCORNER (d.c.)
@set COMMONEXT (c.e.)
+@set PAGE screen
@end ifhtml
@ifdocbook
@set DOCUMENT book
@@ -85,6 +99,7 @@
@set SUBSECTION subsection
@set DARKCORNER (d.c.)
@set COMMONEXT (c.e.)
+@set PAGE page
@end ifdocbook
@ifxml
@set DOCUMENT book
@@ -94,6 +109,7 @@
@set SUBSECTION subsection
@set DARKCORNER (d.c.)
@set COMMONEXT (c.e.)
+@set PAGE page
@end ifxml
@ifplaintext
@set DOCUMENT book
@@ -103,16 +119,38 @@
@set SUBSECTION subsection
@set DARKCORNER (d.c.)
@set COMMONEXT (c.e.)
+@set PAGE page
@end ifplaintext
+@ifdocbook
+@c empty on purpose
+@set PART1
+@set PART2
+@set PART3
+@set PART4
+@end ifdocbook
+
+@ifnotdocbook
+@set PART1 Part I:@*
+@set PART2 Part II:@*
+@set PART3 Part III:@*
+@set PART4 Part IV:@*
+@end ifnotdocbook
+
@c some special symbols
@iftex
@set LEQ @math{@leq}
@set PI @math{@pi}
@end iftex
+@ifdocbook
+@set LEQ @inlineraw{docbook, &le;}
+@set PI @inlineraw{docbook, &pgr;}
+@end ifdocbook
@ifnottex
+@ifnotdocbook
@set LEQ <=
@set PI @i{pi}
+@end ifnotdocbook
@end ifnottex
@ifnottex
@@ -129,6 +167,34 @@
@end macro
@end ifdocbook
+@c hack for docbook, where comma shouldn't always follow an @ref{}
+@ifdocbook
+@macro DBREF{text}
+@ref{\text\}
+@end macro
+@end ifdocbook
+
+@ifnotdocbook
+@macro DBREF{text}
+@ref{\text\},
+@end macro
+@end ifnotdocbook
+
+@ifclear FOR_PRINT
+@set FN file name
+@set FFN File Name
+@set DF data file
+@set DDF Data File
+@set PVERSION version
+@end ifclear
+@ifset FOR_PRINT
+@set FN filename
+@set FFN Filename
+@set DF datafile
+@set DDF Datafile
+@set PVERSION Version
+@end ifset
+
@c For HTML, spell out email addresses, to avoid problems with
@c address harvesters for spammers.
@ifhtml
@@ -203,6 +269,10 @@ quirk of the language / makeinfo, and isn't going to change.
@copying
@docbook
+<para>
+&ldquo;To boldly go where no man has gone before&rdquo; is a
+Registered Trademark of Paramount Pictures Corporation.</para>
+
<para>Published by:</para>
<literallayout class="normal">Free Software Foundation
@@ -231,19 +301,24 @@ implementation of AWK.
Permission is granted to copy, distribute and/or modify this document
under the terms of the GNU Free Documentation License, Version 1.3 or
any later version published by the Free Software Foundation; with the
-Invariant Sections being ``GNU General Public License'', the Front-Cover
-texts being (a) (see below), and with the Back-Cover Texts being (b)
-(see below). A copy of the license is included in the section entitled
+Invariant Sections being ``GNU General Public License'', with the
+Front-Cover Texts being ``A GNU Manual'', and with the Back-Cover Texts
+as in (a) below.
+@ifclear FOR_PRINT
+A copy of the license is included in the section entitled
``GNU Free Documentation License''.
+@end ifclear
+@ifset FOR_PRINT
+A copy of the license
+may be found on the Internet at
+@uref{http://www.gnu.org/software/gawk/manual/html_node/GNU-Free-Documentation-License.html,
+the GNU Project's web site}.
+@end ifset
@enumerate a
@item
-``A GNU Manual''
-
-@item
-``You have the freedom to
-copy and modify this GNU manual. Buying copies from the FSF
-supports it in developing GNU and promoting software freedom.''
+The FSF's Back-Cover Text is: ``You have the freedom to
+copy and modify this GNU manual.''
@end enumerate
@end copying
@@ -303,15 +378,13 @@ ISBN 1-882114-28-0 @*
@page
@w{ }
@sp 9
-@center @i{To Miriam, for making me complete.}
+@center @i{To my parents, for their love, and for the wonderful example they set for me.}
@sp 1
-@center @i{To Chana, for the joy you bring us.}
+@center @i{To my wife Miriam, for making me complete.
+Thank you for building your life together with me.}
@sp 1
-@center @i{To Rivka, for the exponential increase.}
+@center @i{To our children Chana, Rivka, Nachum and Malka, for enrichening our lives in innumerable ways.}
@sp 1
-@center @i{To Nachum, for the added dimension.}
-@sp 1
-@center @i{To Malka, for the new beginning.}
@w{ }
@page
@w{ }
@@ -321,13 +394,12 @@ ISBN 1-882114-28-0 @*
@docbook
<dedication>
-<simplelist>
-<member>To Miriam, for making me complete.</member>
-<member>To Chana, for the joy you bring us.</member>
-<member>To Rivka, for the exponential increase.</member>
-<member>To Nachum, for the added dimension.</member>
-<member>To Malka, for the new beginning.</member>
-</simplelist>
+<para>To my parents, for their love, and for the wonderful
+example they set for me.</para>
+<para>To my wife Miriam, for making me complete.
+Thank you for building your life together with me.</para>
+<para>To our children Chana, Rivka, Nachum and Malka,
+for enrichening our lives in innumerable ways.</para>
</dedication>
@end docbook
@@ -419,8 +491,8 @@ particular records in a file and perform operations upon them.
includes command-line syntax.
* One-shot:: Running a short throwaway
@command{awk} program.
-* Read Terminal:: Using no input files (input from
- terminal instead).
+* Read Terminal:: Using no input files (input from the
+ keyboard instead).
* Long:: Putting permanent @command{awk}
programs in files.
* Executable Scripts:: Making self-contained @command{awk}
@@ -442,6 +514,7 @@ particular records in a file and perform operations upon them.
* Other Features:: Other Features of @command{awk}.
* When:: When to use @command{gawk} and when to
use other things.
+* Intro Summary:: Summary of the introduction.
* Command Line:: How to run @command{awk}.
* Options:: Command-line options and their
meanings.
@@ -463,16 +536,21 @@ particular records in a file and perform operations upon them.
program.
* Obsolete:: Obsolete Options and/or features.
* Undocumented:: Undocumented Options and Features.
+* Invoking Summary:: Invocation summary.
* Regexp Usage:: How to Use Regular Expressions.
* Escape Sequences:: How to write nonprinting characters.
* Regexp Operators:: Regular Expression Operators.
* Bracket Expressions:: What can go between @samp{[...]}.
-* GNU Regexp Operators:: Operators specific to GNU software.
-* Case-sensitivity:: How to do case-insensitive matching.
* Leftmost Longest:: How much text matches.
* Computed Regexps:: Using Dynamic Regexps.
+* GNU Regexp Operators:: Operators specific to GNU software.
+* Case-sensitivity:: How to do case-insensitive matching.
+* Regexp Summary:: Regular expressions summary.
* Records:: Controlling how data is split into
records.
+* awk split records:: How standard @command{awk} splits
+ records.
+* gawk split records:: How @command{gawk} splits records.
* Fields:: An introduction to fields.
* Nonconstant Fields:: Nonconstant Field Numbers.
* Changing Fields:: Changing the Contents of a Field.
@@ -483,7 +561,7 @@ particular records in a file and perform operations upon them.
* Single Character Fields:: Making each character a separate
field.
* Command Line Field Separator:: Setting @code{FS} from the
- command-line.
+ command line.
* Full Line Fields:: Making the full line be a single
field.
* Field Splitting Summary:: Some final points and a summary table.
@@ -509,8 +587,10 @@ particular records in a file and perform operations upon them.
@code{getline}.
* Getline Summary:: Summary of @code{getline} Variants.
* Read Timeout:: Reading input with a timeout.
-* Command line directories:: What happens if you put a directory on
+* Command-line directories:: What happens if you put a directory on
the command line.
+* Input Summary:: Input summary.
+* Input Exercises:: Exercises.
* Print:: The @code{print} statement.
* Print Examples:: Simple examples of @code{print}
statements.
@@ -534,6 +614,8 @@ particular records in a file and perform operations upon them.
* Special Caveats:: Things to watch out for.
* Close Files And Pipes:: Closing Input and Output Files and
Pipes.
+* Output Summary:: Output summary.
+* Output Exercises:: Exercises.
* Values:: Constants, Variables, and Regular
Expressions.
* Constants:: String, numeric and regexp constants.
@@ -544,11 +626,14 @@ particular records in a file and perform operations upon them.
* Variables:: Variables give names to values for
later use.
* Using Variables:: Using variables in your programs.
-* Assignment Options:: Setting variables on the command-line
+* Assignment Options:: Setting variables on the command line
and a summary of command-line syntax.
This is an advanced method of input.
* Conversion:: The conversion of strings to numbers
and vice versa.
+* Strings And Numbers:: How @command{awk} Converts Between
+ Strings And Numbers.
+* Locale influences conversions:: How the locale may affect conversions.
* All Operators:: @command{gawk}'s operators.
* Arithmetic Ops:: Arithmetic operations (@samp{+},
@samp{-}, etc.)
@@ -576,6 +661,7 @@ particular records in a file and perform operations upon them.
* Function Calls:: A function call is an expression.
* Precedence:: How various operators nest.
* Locales:: How the locale affects things.
+* Expressions Summary:: Expressions summary.
* Pattern Overview:: What goes into a pattern.
* Regexp Patterns:: Using regexps as patterns.
* Expression Patterns:: Any expression can be used as a
@@ -622,6 +708,7 @@ particular records in a file and perform operations upon them.
gives you information.
* ARGC and ARGV:: Ways to use @code{ARGC} and
@code{ARGV}.
+* Pattern Action Summary:: Patterns and Actions summary.
* Array Basics:: The basics of arrays.
* Array Intro:: Introduction to Arrays
* Reference to Elements:: How to examine one element of an
@@ -644,6 +731,7 @@ particular records in a file and perform operations upon them.
@command{awk}.
* Multiscanning:: Scanning multidimensional arrays.
* Arrays of Arrays:: True multidimensional arrays.
+* Arrays Summary:: Summary of arrays.
* Built-in:: Summarizes the built-in functions.
* Calling Built-in:: How to call built-in functions.
* Numeric Functions:: Functions that work with numbers,
@@ -678,6 +766,7 @@ particular records in a file and perform operations upon them.
runtime.
* Indirect Calls:: Choosing the function to call at
runtime.
+* Functions Summary:: Summary of functions.
* Library Names:: How to best name private global
variables in library functions.
* General Functions:: Functions that are of general use.
@@ -712,6 +801,8 @@ particular records in a file and perform operations upon them.
* Group Functions:: Functions for getting group
information.
* Walking Arrays:: A function to walk arrays of arrays.
+* Library Functions Summary:: Summary of library functions.
+* Library Exercises:: Exercises.
* Running Examples:: How to run these examples.
* Clones:: Clones of common utilities.
* Cut Program:: The @command{cut} utility.
@@ -741,6 +832,8 @@ particular records in a file and perform operations upon them.
* Anagram Program:: Finding anagrams from a dictionary.
* Signature Program:: People do amazing things with too much
time on their hands.
+* Programs Summary:: Summary of programs.
+* Programs Exercises:: Exercises.
* Nondecimal Data:: Allowing nondecimal input data.
* Array Sorting:: Facilities for controlling array
traversal and sorting arrays.
@@ -752,8 +845,9 @@ particular records in a file and perform operations upon them.
* TCP/IP Networking:: Using @command{gawk} for network
programming.
* Profiling:: Profiling your @command{awk} programs.
+* Advanced Features Summary:: Summary of advanced features.
* I18N and L10N:: Internationalization and Localization.
-* Explaining gettext:: How GNU @code{gettext} works.
+* Explaining gettext:: How GNU @command{gettext} works.
* Programmer i18n:: Features for the programmer.
* Translator i18n:: Features for the translator.
* String Extraction:: Extracting marked strings.
@@ -763,6 +857,7 @@ particular records in a file and perform operations upon them.
* I18N Example:: A simple i18n example.
* Gawk I18N:: @command{gawk} is also
internationalized.
+* I18N Summary:: Summary of I18N stuff.
* Debugging:: Introduction to @command{gawk}
debugger.
* Debugging Concepts:: Debugging in General.
@@ -781,31 +876,23 @@ particular records in a file and perform operations upon them.
* Miscellaneous Debugger Commands:: Miscellaneous Commands.
* Readline Support:: Readline support.
* Limitations:: Limitations and future plans.
-* General Arithmetic:: An introduction to computer
- arithmetic.
-* Floating Point Issues:: Stuff to know about floating-point
- numbers.
-* String Conversion Precision:: The String Value Can Lie.
-* Unexpected Results:: Floating Point Numbers Are Not
- Abstract Numbers.
-* POSIX Floating Point Problems:: Standards Versus Existing Practice.
-* Integer Programming:: Effective integer programming.
-* Floating-point Programming:: Effective Floating-point Programming.
-* Floating-point Representation:: Binary floating-point representation.
-* Floating-point Context:: Floating-point context.
-* Rounding Mode:: Floating-point rounding mode.
-* Gawk and MPFR:: How @command{gawk} provides
- arbitrary-precision arithmetic.
-* Arbitrary Precision Floats:: Arbitrary Precision Floating-point
- Arithmetic with @command{gawk}.
-* Setting Precision:: Setting the working precision.
-* Setting Rounding Mode:: Setting the rounding mode.
-* Floating-point Constants:: Representing floating-point constants.
-* Changing Precision:: Changing the precision of a number.
-* Exact Arithmetic:: Exact arithmetic with floating-point
- numbers.
+* Debugging Summary:: Debugging summary.
+* Computer Arithmetic:: A quick intro to computer math.
+* Math Definitions:: Defining terms used.
+* MPFR features:: The MPFR features in @command{gawk}.
+* FP Math Caution:: Things to know.
+* Inexactness of computations:: Floating point math is not exact.
+* Inexact representation:: Numbers are not exactly represented.
+* Comparing FP Values:: How to compare floating point values.
+* Errors accumulate:: Errors get bigger as they go.
+* Getting Accuracy:: Getting more accuracy takes some work.
+* Try To Round:: Add digits and round.
+* Setting precision:: How to set the precision.
+* Setting the rounding mode:: How to set the rounding mode.
* Arbitrary Precision Integers:: Arbitrary Precision Integer Arithmetic
with @command{gawk}.
+* POSIX Floating Point Problems:: Standards Versus Existing Practice.
+* Floating point summary:: Summary of floating point discussion.
* Extension Intro:: What is an extension.
* Plugin License:: A note about licensing.
* Extension Mechanism Outline:: An outline of how it works.
@@ -867,6 +954,8 @@ particular records in a file and perform operations upon them.
* Extension Sample Time:: An interface to @code{gettimeofday()}
and @code{sleep()}.
* gawkextlib:: The @code{gawkextlib} project.
+* Extension summary:: Extension summary.
+* Extension Exercises:: Exercises.
* V7/SVR3.1:: The major changes between V7 and
System V Release 3.1.
* SVR4:: Minor changes between System V
@@ -883,6 +972,7 @@ particular records in a file and perform operations upon them.
ranges.
* Contributors:: The major contributors to
@command{gawk}.
+* History summary:: History summary.
* Gawk Distribution:: What is in the @command{gawk}
distribution.
* Getting:: How to get the distribution.
@@ -921,6 +1011,7 @@ particular records in a file and perform operations upon them.
* Bugs:: Reporting Problems and Bugs.
* Other Versions:: Other freely available @command{awk}
implementations.
+* Installation summary:: Summary of installation.
* Compatibility Mode:: How to disable certain @command{gawk}
extensions.
* Additions:: Making Additions To @command{gawk}.
@@ -929,8 +1020,8 @@ particular records in a file and perform operations upon them.
@command{gawk}.
* New Ports:: Porting @command{gawk} to a new
operating system.
-* Derived Files:: Why derived files are kept in the
- @command{git} repository.
+* Derived Files:: Why derived files are kept in the Git
+ repository.
* Future Extensions:: New features that may be implemented
one day.
* Implementation Limitations:: Some limitations of the
@@ -941,6 +1032,7 @@ particular records in a file and perform operations upon them.
* Extension Other Design Decisions:: Some other design decisions.
* Extension Future Growth:: Some room for future growth.
* Old Extension Mechanism:: Some compatibility for old extensions.
+* Notes summary:: Summary of implementation notes.
* Basic High Level:: The high level view.
* Basic Data Typing:: A very quick intro to data types.
@end detailmenu
@@ -948,15 +1040,14 @@ particular records in a file and perform operations upon them.
@c dedication for Info file
@ifinfo
-@center To Miriam, for making me complete.
+To my parents, for their love, and for the wonderful
+example they set for me.
@sp 1
-@center To Chana, for the joy you bring us.
+To my wife Miriam, for making me complete.
+Thank you for building your life together with me.
@sp 1
-@center To Rivka, for the exponential increase.
-@sp 1
-@center To Nachum, for the added dimension.
-@sp 1
-@center To Malka, for the new beginning.
+To our children Chana, Rivka, Nachum and Malka,
+for enrichening our lives in innumerable ways.
@end ifinfo
@summarycontents
@@ -965,6 +1056,21 @@ particular records in a file and perform operations upon them.
@node Foreword
@unnumbered Foreword
+@c This bit is post-processed by a script which turns the chapter
+@c tag into a preface tag, and moves this stuff to before the title.
+@c Bleah.
+@docbook
+ <prefaceinfo>
+ <author>
+ <firstname>Michael</firstname>
+ <surname>Brennan</surname>
+ <!-- can't put mawk into command tags. sigh. -->
+ <affiliation><jobtitle>Author of mawk</jobtitle></affiliation>
+ </author>
+ <date>March, 2001</date>
+ </prefaceinfo>
+@end docbook
+
Arnold Robbins and I are good friends. We were introduced
@c 11 years ago
in 1990
@@ -1089,12 +1195,14 @@ Arnold has distilled over a decade of experience writing and
using AWK programs, and developing @command{gawk}, into this book. If you use
AWK or want to learn how, then read this book.
+@ifnotdocbook
@cindex Brennan, Michael
@display
Michael Brennan
Author of @command{mawk}
March, 2001
@end display
+@end ifnotdocbook
@node Preface
@unnumbered Preface
@@ -1103,24 +1211,34 @@ March, 2001
@c
@c 12/2000: Chuck wants the preface & intro combined.
-Several kinds of tasks occur repeatedly
-when working with text files.
-You might want to extract certain lines and discard the rest.
-Or you may need to make changes wherever certain patterns appear,
-but leave the rest of the file alone.
-Writing single-use programs for these tasks in languages such as C, C++,
-or Java is time-consuming and inconvenient.
-Such jobs are often easier with @command{awk}.
-The @command{awk} utility interprets a special-purpose programming language
-that makes it easy to handle simple data-reformatting jobs.
+@c This bit is post-processed by a script which turns the chapter
+@c tag into a preface tag, and moves this stuff to before the title.
+@c Bleah.
+@docbook
+ <prefaceinfo>
+ <author>
+ <firstname>Arnold</firstname>
+ <surname>Robbins</surname>
+ <affiliation><jobtitle>Nof Ayalon</jobtitle></affiliation>
+ <affiliation><jobtitle>ISRAEL</jobtitle></affiliation>
+ </author>
+ <date>June, 2014</date>
+ </prefaceinfo>
+@end docbook
+
+Several kinds of tasks occur repeatedly when working with text files.
+You might want to extract certain lines and discard the rest. Or you
+may need to make changes wherever certain patterns appear, but leave the
+rest of the file alone. Such jobs are often easy with @command{awk}.
+The @command{awk} utility interprets a special-purpose programming
+language that makes it easy to handle simple data-reformatting jobs.
-@cindex Brian Kernighan's @command{awk}
The GNU implementation of @command{awk} is called @command{gawk}; if you
invoke it with the proper options or environment variables
(@pxref{Options}), it is fully
compatible with
-the POSIX@footnote{The 2008 POSIX standard is online at
-@url{http://www.opengroup.org/onlinepubs/9699919799/}.}
+the POSIX@footnote{The 2008 POSIX standard is accessible online at
+@w{@url{http://www.opengroup.org/onlinepubs/9699919799/}.}}
specification of the @command{awk} language
and with the Unix version of @command{awk} maintained
by Brian Kernighan.
@@ -1137,7 +1255,7 @@ Thus, we usually don't distinguish between @command{gawk} and other
@cindex @command{awk}, uses for
Using @command{awk} allows you to:
-@itemize @bullet
+@itemize @value{BULLET}
@item
Manage small, personal databases
@@ -1162,7 +1280,7 @@ In addition,
@command{gawk}
provides facilities that make it easy to:
-@itemize @bullet
+@itemize @value{BULLET}
@item
Extract bits and pieces of data for processing
@@ -1171,6 +1289,12 @@ Sort data
@item
Perform simple network communications
+
+@item
+Profile and debug @command{awk} programs.
+
+@item
+Extend the language with functions written in C or C++.
@end itemize
This @value{DOCUMENT} teaches you about the @command{awk} language and
@@ -1186,12 +1310,18 @@ Implementations of the @command{awk} language are available for many
different computing environments. This @value{DOCUMENT}, while describing
the @command{awk} language in general, also describes the particular
implementation of @command{awk} called @command{gawk} (which stands for
-``GNU awk''). @command{gawk} runs on a broad range of Unix systems,
-ranging from Intel@registeredsymbol{}-architecture PC-based computers
-up through large-scale systems,
-such as Crays. @command{gawk} has also been ported to Mac OS X,
-Microsoft Windows (all versions) and OS/2 PCs,
-and VMS.
+``GNU @command{awk}''). @command{gawk} runs on a broad range of Unix systems,
+ranging from Intel-architecture PC-based computers
+up through large-scale systems.
+@command{gawk} has also been ported to Mac OS X,
+Microsoft Windows
+@ifset FOR_PRINT
+(all versions),
+@end ifset
+@ifclear FOR_PRINT
+(all versions) and OS/2 PCs,
+@end ifclear
+and OpenVMS.
(Some other, obsolete systems to which @command{gawk} was once ported
are no longer supported and the code for those systems
has been removed.)
@@ -1284,7 +1414,7 @@ help from me, thoroughly reworked @command{gawk} for compatibility
with the newer @command{awk}.
Circa 1994, I became the primary maintainer.
Current development focuses on bug fixes,
-performance improvements, standards compliance, and occasionally, new features.
+performance improvements, standards compliance and, occasionally, new features.
In May of 1997, J@"urgen Kahrs felt the need for network access
from @command{awk}, and with a little help from me, set about adding
@@ -1293,11 +1423,11 @@ wrote the bulk of
@cite{TCP/IP Internetworking with @command{gawk}}
(a separate document, available as part of the @command{gawk} distribution).
His code finally became part of the main @command{gawk} distribution
-with @command{gawk} version 3.1.
+with @command{gawk} @value{PVERSION} 3.1.
John Haque rewrote the @command{gawk} internals, in the process providing
an @command{awk}-level debugger. This version became available as
-@command{gawk} version 4.0, in 2011.
+@command{gawk} @value{PVERSION} 4.0, in 2011.
@xref{Contributors},
for a complete list of those who made important contributions to @command{gawk}.
@@ -1309,29 +1439,27 @@ for a complete list of those who made important contributions to @command{gawk}.
The @command{awk} language has evolved over the years. Full details are
provided in @ref{Language History}.
The language described in this @value{DOCUMENT}
-is often referred to as ``new @command{awk}'' (@command{nawk}).
+is often referred to as ``new @command{awk}''.
+By analogy, the original version of @command{awk} is
+referred to as ``old @command{awk}.''
-@cindex @command{awk}, versions of
-Because of this, there are systems with multiple
-versions of @command{awk}.
-Some systems have an @command{awk} utility that implements the
-original version of the @command{awk} language and a @command{nawk} utility
-for the new version.
-Others have an @command{oawk} version for the ``old @command{awk}''
-language and plain @command{awk} for the new one. Still others only
-have one version, which is usually the new one.@footnote{Often, these systems
-use @command{gawk} for their @command{awk} implementation!}
-
-@cindex @command{nawk} utility
-@cindex @command{oawk} utility
-All in all, this makes it difficult for you to know which version of
-@command{awk} you should run when writing your programs. The best advice
-we can give here is to check your local documentation. Look for @command{awk},
-@command{oawk}, and @command{nawk}, as well as for @command{gawk}.
-It is likely that you already
-have some version of new @command{awk} on your system, which is what
-you should use when running your programs. (Of course, if you're reading
-this @value{DOCUMENT}, chances are good that you have @command{gawk}!)
+Today, on most systems, when you run the @command{awk} utility,
+you get some version of new @command{awk}.@footnote{Only
+Solaris systems still use an old @command{awk} for the
+default @command{awk} utility. A more modern @command{awk} lives in
+@file{/usr/xpg6/bin} on these systems.} If your system's standard
+@command{awk} is the old one, you will see something like this
+if you try the test program:
+
+@example
+$ @kbd{awk 1 /dev/null}
+@error{} awk: syntax error near line 1
+@error{} awk: bailing out near line 1
+@end example
+
+@noindent
+In this case, you should find a version of new @command{awk},
+or just install @command{gawk}!
Throughout this @value{DOCUMENT}, whenever we refer to a language feature
that should be available in any complete implementation of POSIX @command{awk},
@@ -1359,9 +1487,15 @@ Primarily, this @value{DOCUMENT} explains the features of @command{awk}
as defined in the POSIX standard. It does so in the context of the
@command{gawk} implementation. While doing so, it also
attempts to describe important differences between @command{gawk}
-and other @command{awk} implementations.@footnote{All such differences
+and other @command{awk}
+@ifclear FOR_PRINT
+implementations.@footnote{All such differences
appear in the index under the
entry ``differences in @command{awk} and @command{gawk}.''}
+@end ifclear
+@ifset FOR_PRINT
+implementations.
+@end ifset
Finally, any @command{gawk} features that are not in
the POSIX standard for @command{awk} are noted.
@@ -1369,14 +1503,16 @@ the POSIX standard for @command{awk} are noted.
This @value{DOCUMENT} has the difficult task of being both a tutorial and a reference.
If you are a novice, feel free to skip over details that seem too complex.
You should also ignore the many cross-references; they are for the
-expert user and for the online Info and HTML versions of the document.
+expert user and for the online Info and HTML versions of the @value{DOCUMENT}.
@end ifnotinfo
There are sidebars
scattered throughout the @value{DOCUMENT}.
They add a more complete explanation of points that are relevant, but not likely
to be of interest on first reading.
+@ifclear FOR_PRINT
All appear in the index, under the heading ``sidebar.''
+@end ifclear
Most of the time, the examples use complete @command{awk} programs.
Some of the more advanced sections show only the part of the @command{awk}
@@ -1393,6 +1529,8 @@ should be of interest.
This @value{DOCUMENT} is split into several parts, as follows:
+@c FULLXREF ON
+
Part I describes the @command{awk} language and @command{gawk} program in detail.
It starts with the basics, and continues through all of the features of @command{awk}.
It contains the following chapters:
@@ -1476,9 +1614,15 @@ describes advanced arithmetic facilities provided by
@ref{Dynamic Extensions}, describes how to add new variables and
functions to @command{gawk} by writing extensions in C or C++.
+@ifclear FOR_PRINT
Part IV provides the appendices, the Glossary, and two licenses that cover
the @command{gawk} source code and this @value{DOCUMENT}, respectively.
It contains the following appendices:
+@end ifclear
+@ifset FOR_PRINT
+Part IV provides the following appendices,
+including the GNU General Public License:
+@end ifset
@ref{Language History},
describes how the @command{awk} language has evolved since
@@ -1493,6 +1637,42 @@ non-POSIX systems. It also describes how to report bugs
in @command{gawk} and where to get other freely
available @command{awk} implementations.
+@ifset FOR_PRINT
+
+@ref{Copying},
+presents the license that covers the @command{gawk} source code.
+
+The version of this @value{DOCUMENT} distributed with @command{gawk}
+contains additional appendices and other end material.
+To save space, we have omitted them from the
+printed edition. You may find them online, as follows:
+
+@uref{http://www.gnu.org/software/gawk/manual/html_node/Notes.html,
+The appendix on implementation notes}
+describes how to disable @command{gawk}'s extensions, as
+well as how to contribute new code to @command{gawk},
+and some possible future directions for @command{gawk} development.
+
+@uref{http://www.gnu.org/software/gawk/manual/html_node/Basic-Concepts.html,
+The appendix on basic concepts}
+provides some very cursory background material for those who
+are completely unfamiliar with computer programming.
+
+@uref{http://www.gnu.org/software/gawk/manual/html_node/Glossary.html,
+The Glossary}
+defines most, if not all, the significant terms used
+throughout the @value{DOCUMENT}. If you find terms that you aren't familiar with,
+try looking them up here.
+
+@uref{http://www.gnu.org/software/gawk/manual/html_node/GNU-Free-Documentation-License.html,
+The GNU FDL}
+is the license that covers this @value{DOCUMENT}.
+
+Some of the chapters have exercise sections; these have also been
+omitted from the print edition.
+@end ifset
+
+@ifclear FOR_PRINT
@ref{Notes},
describes how to disable @command{gawk}'s extensions, as
well as how to contribute new code to @command{gawk},
@@ -1503,13 +1683,16 @@ provides some very cursory background material for those who
are completely unfamiliar with computer programming.
The @ref{Glossary}, defines most, if not all, the significant terms used
-throughout the book. If you find terms that you aren't familiar with,
+throughout the @value{DOCUMENT}. If you find terms that you aren't familiar with,
try looking them up here.
@ref{Copying}, and
@ref{GNU Free Documentation License},
present the licenses that cover the @command{gawk} source code
and this @value{DOCUMENT}, respectively.
+@end ifclear
+
+@c FULLXREF OFF
@node Conventions
@unnumberedsec Typographical Conventions
@@ -1527,11 +1710,18 @@ are slightly different than in other books you may have read.
This @value{SECTION} briefly documents the typographical conventions used in Texinfo.
@end ifinfo
-Examples you would type at the command-line are preceded by the common
+Examples you would type at the command line are preceded by the common
shell primary and secondary prompts, @samp{$} and @samp{>}.
Input that you type is shown @kbd{like this}.
+@c 8/2014: @print{} is stripped from the texi to make docbook.
+@ifclear FOR_PRINT
Output from the command is preceded by the glyph ``@print{}''.
This typically represents the command's standard output.
+@end ifclear
+@ifset FOR_PRINT
+Output from the command, usually its standard output, appears
+@code{like this}.
+@end ifset
Error messages, and other output on the command's standard error, are preceded
by the glyph ``@error{}''. For example:
@@ -1551,7 +1741,7 @@ emphasized @emph{like this}, and if a point needs to be made
strongly, it is done @strong{like this}. The first occurrence of
a new term is usually its @dfn{definition} and appears in the same
font as the previous occurrence of ``definition'' in this sentence.
-Finally, file names are indicated like this: @file{/path/to/ourfile}.
+Finally, @value{FN}s are indicated like this: @file{/path/to/ourfile}.
@end ifnotinfo
Characters that you type at the keyboard look @kbd{like this}. In particular,
@@ -1561,6 +1751,20 @@ another key, at the same time. For example, a @kbd{Ctrl-d} is typed
by first pressing and holding the @kbd{CONTROL} key, next
pressing the @kbd{d} key and finally releasing both keys.
+For the sake of brevity, throughout this @value{DOCUMENT}, we refer to
+Brian Kernighan's version of @command{awk} as ``BWK @command{awk}.''
+(@xref{Other Versions}, for information on his and other versions.)
+
+@ifset FOR_PRINT
+@quotation NOTE
+Notes of interest look like this.
+@end quotation
+
+@quotation CAUTION
+Cautionary or warning notes look like this.
+@end quotation
+@end ifset
+
@c fakenode --- for prepinfo
@unnumberedsubsec Dark Corners
@cindex Kernighan, Brian
@@ -1583,16 +1787,23 @@ the picture of a flashlight in the margin, as shown here.
@ifnottex
``(d.c.)''.
@end ifnottex
+@ifclear FOR_PRINT
They also appear in the index under the heading ``dark corner.''
+@end ifclear
-As noted by the opening quote, though, any
-coverage of dark corners
-is, by definition, incomplete.
+As noted by the opening quote, though, any coverage of dark corners is,
+by definition, incomplete.
+@cindex c.e., See common extensions
Extensions to the standard @command{awk} language that are supported by
more than one @command{awk} implementation are marked
+@ifclear FOR_PRINT
``@value{COMMONEXT},'' and listed in the index under ``common extensions''
and ``extensions, common.''
+@end ifclear
+@ifset FOR_PRINT
+``@value{COMMONEXT}'' for ``common extension.''
+@end ifset
@node Manual History
@unnumberedsec The GNU Project and This Book
@@ -1615,19 +1826,22 @@ Foundation to create a complete, freely distributable, POSIX-compliant
computing environment.
The FSF uses the ``GNU General Public License'' (GPL) to ensure that
their software's
-source code is always available to the end user. A
-copy of the GPL is included
+source code is always available to the end user.
+@ifclear FOR_PRINT
+A copy of the GPL is included
@ifnotinfo
in this @value{DOCUMENT}
@end ifnotinfo
for your reference
(@pxref{Copying}).
+@end ifclear
The GPL applies to the C language source code for @command{gawk}.
To find out more about the FSF and the GNU Project online,
see @uref{http://www.gnu.org, the GNU Project's home page}.
This @value{DOCUMENT} may also be read from
@uref{http://www.gnu.org/software/gawk/manual/, their web site}.
+@ifclear FOR_PRINT
A shell, an editor (Emacs), highly portable optimizing C, C++, and
Objective-C compilers, a symbolic debugger and dozens of large and
small utilities (such as @command{gawk}), have all been completed and are
@@ -1638,80 +1852,29 @@ stage of development.
@cindex Linux
@cindex GNU/Linux
@cindex operating systems, BSD-based
-@cindex Alpha (DEC)
Until the GNU operating system is more fully developed, you should
consider using GNU/Linux, a freely distributable, Unix-like operating
-system for Intel@registeredsymbol{},
+system for Intel,
Power Architecture,
Sun SPARC, IBM S/390, and other
systems.@footnote{The terminology ``GNU/Linux'' is explained
in the @ref{Glossary}.}
Many GNU/Linux distributions are
available for download from the Internet.
-
-(There are numerous other freely available, Unix-like operating systems
-based on the
-Berkeley Software Distribution, and some of them use recent versions
-of @command{gawk} for their versions of @command{awk}.
-@uref{http://www.netbsd.org, NetBSD},
-@uref{http://www.freebsd.org, FreeBSD},
-and
-@uref{http://www.openbsd.org, OpenBSD}
-are three of the most popular ones, but there
-are others.)
+@end ifclear
@ifnotinfo
The @value{DOCUMENT} you are reading is actually free---at least, the
information in it is free to anyone. The machine-readable
source code for the @value{DOCUMENT} comes with @command{gawk}; anyone
may take this @value{DOCUMENT} to a copying machine and make as many
-copies as they like. (Take a moment to check the Free Documentation
+copies as they like.
+@ifclear FOR_PRINT
+(Take a moment to check the Free Documentation
License in @ref{GNU Free Documentation License}.)
+@end ifclear
@end ifnotinfo
-@ignore
-@cindex Close, Diane
-The @value{DOCUMENT} itself has gone through several previous,
-preliminary editions.
-Paul Rubin wrote the very first draft of @cite{The GAWK Manual};
-it was around 40 pages in size.
-Diane Close and Richard Stallman improved it, yielding the
-version which I started working with in the fall of 1988.
-It was around 90 pages long and barely described the original, ``old''
-version of @command{awk}. After substantial revision, the first version of
-the @cite{The GAWK Manual} to be released was Edition 0.11 Beta in
-October of 1989. The manual then underwent more substantial revision
-for Edition 0.13 of December 1991.
-David Trueman, Pat Rankin and Michal Jaegermann contributed sections
-of the manual for Edition 0.13.
-That edition was published by the
-FSF as a bound book early in 1992. Since then there were several
-minor revisions, notably Edition 0.14 of November 1992 that was published
-by the FSF in January of 1993 and Edition 0.16 of August 1993.
-
-Edition 1.0 of @cite{GAWK: The GNU Awk User's Guide} represented a significant re-working
-of @cite{The GAWK Manual}, with much additional material.
-The FSF and I agreed that I was now the primary author.
-@c I also felt that the manual needed a more descriptive title.
-
-In January 1996, SSC published Edition 1.0 under the title @cite{Effective AWK Programming}.
-In February 1997, they published Edition 1.0.3 which had minor changes
-as a ``second edition.''
-In 1999, the FSF published this same version as Edition 2
-of @cite{GAWK: The GNU Awk User's Guide}.
-
-Edition @value{EDITION} maintains the basic structure of Edition 1.0,
-but with significant additional material, reflecting the host of new features
-in @command{gawk} version @value{VERSION}.
-Of particular note is
-@ref{Array Sorting},
-@ref{Bitwise Functions},
-@ref{Internationalization},
-@ref{Advanced Features},
-and
-@ref{Dynamic Extensions}.
-@end ignore
-
@cindex Close, Diane
The @value{DOCUMENT} itself has gone through a number of previous editions.
Paul Rubin wrote the very first draft of @cite{The GAWK Manual};
@@ -1727,24 +1890,50 @@ the FSF published several preliminary versions (numbered 0.@var{x}).
In 1996, Edition 1.0 was released with @command{gawk} 3.0.0.
The FSF published the first two editions under
the title @cite{The GNU Awk User's Guide}.
+@ifset FOR_PRINT
+SSC published two editions of the @value{DOCUMENT} under the
+title @cite{Effective awk Programming}, and in O'Reilly published
+the third edition in 2001.
+@end ifset
This edition maintains the basic structure of the previous editions.
-For Edition 4.0, the content has been thoroughly reviewed
+For FSF edition 4.0, the content has been thoroughly reviewed
and updated. All references to @command{gawk} versions prior to 4.0 have been
removed.
Of significant note for this edition was @ref{Debugger}.
-For edition @value{EDITION}, the content has been reorganized into parts,
+For FSF edition
+@ifclear FOR_PRINT
+@value{EDITION},
+@end ifclear
+@ifset FOR_PRINT
+@value{EDITION}
+(the fourth edition as published by O'Reilly),
+@end ifset
+the content has been reorganized into parts,
and the major new additions are @ref{Arbitrary Precision Arithmetic},
and @ref{Dynamic Extensions}.
-@cite{@value{TITLE}} will undoubtedly continue to evolve.
-An electronic version
-comes with the @command{gawk} distribution from the FSF.
-If you find an error in this @value{DOCUMENT}, please report it!
-@xref{Bugs}, for information on submitting
-problem reports electronically.
+This @value{DOCUMENT} will undoubtedly continue to evolve. An electronic
+version comes with the @command{gawk} distribution from the FSF. If you
+find an error in this @value{DOCUMENT}, please report it! @xref{Bugs},
+for information on submitting problem reports electronically.
+@ifset FOR_PRINT
+@c fakenode --- for prepinfo
+@unnumberedsec How to Stay Current
+
+It may be you have a version of @command{gawk} which is newer than the
+one described in this @value{DOCUMENT}. To find out what has changed,
+you should first look at the @file{NEWS} file in the @command{gawk}
+distribution, which provides a high level summary of what changed in
+each release.
+
+You can then look at the @uref{http://www.gnu.org/software/gawk/manual/,
+online version} of this @value{DOCUMENT} to read about any new features.
+@end ifset
+
+@ifclear FOR_PRINT
@node How To Contribute
@unnumberedsec How to Contribute
@@ -1761,7 +1950,7 @@ However, I found that I could not dedicate enough time to managing
contributed code: the archive did not grow and the domain went unused
for several years.
-Fortunately, late in 2008, a volunteer took on the task of setting up
+Late in 2008, a volunteer took on the task of setting up
an @command{awk}-related web site---@uref{http://awk.info}---and did a very
nice job.
@@ -1770,11 +1959,15 @@ a @command{gawk} extension that you would like to share with the rest
of the world, please see @uref{http://awk.info/?contribute} for how to
contribute it to the web site.
+As of this writing, this website is in search of a maintainer; please
+contact me if you are interested.
+
@ignore
Other links:
http://www.reddit.com/r/linux/comments/dtect/composing_music_in_awk/
@end ignore
+@end ifclear
@node Acknowledgments
@unnumberedsec Acknowledgments
@@ -1912,13 +2105,29 @@ people.
Notable code and documentation contributions were made by
a number of people. @xref{Contributors}, for the full list.
+Thanks to Patrice Dumas for the new @command{makeinfo} program.
+Thanks to Karl Berry who continues to work to keep
+the Texinfo markup language sane.
+
@cindex Kernighan, Brian
+@cindex Brennan, Michael
+@cindex Day, Robert P.J.@:
+Robert P.J.@: Day, Michael Brennan and Brian Kernighan kindly acted as
+reviewers for the 2015 edition of this @value{DOCUMENT}. Their feedback
+helped improve the final work.
+
I would like to thank Brian Kernighan for invaluable assistance during the
testing and debugging of @command{gawk}, and for ongoing
help and advice in clarifying numerous points about the language.
We could not have done nearly as good a job on either @command{gawk}
or its documentation without his help.
+Brian is in a class by himself as a programmer and technical
+author. I have to thank him (yet again) for his ongoing friendship
+and the role model he has been for me for close to 30 years!
+Having him as a reviewer is an exciting privilege. It has also
+been extremely humbling@enddots{}
+
@cindex Robbins, Miriam
@cindex Robbins, Jean
@cindex Robbins, Harry
@@ -1931,26 +2140,28 @@ which they raised and educated me.
Finally, I also must acknowledge my gratitude to G-d, for the many opportunities
He has sent my way, as well as for the gifts He has given me with which to
take advantage of those opportunities.
+@iftex
@sp 2
@noindent
Arnold Robbins @*
Nof Ayalon @*
ISRAEL @*
-May, 2013
-
-@iftex
-@part Part I:@* The @command{awk} Language
+May, 2014
@end iftex
-@ignore
+@ifnotinfo
+@part @value{PART1}The @command{awk} Language
+@end ifnotinfo
+
@ifdocbook
-@part Part I:@* The @command{awk} Language
-Part I describes the @command{awk} language and @command{gawk} program in detail.
-It starts with the basics, and continues through all of the features of @command{awk}
-and @command{gawk}. It contains the following chapters:
+Part I describes the @command{awk} language and @command{gawk} program
+in detail. It starts with the basics, and continues through all of
+the features of @command{awk}. Included also are many, but not all,
+of the features of @command{gawk}. This part contains the
+following chapters:
-@itemize @bullet
+@itemize @value{BULLET}
@item
@ref{Getting Started}.
@@ -1979,7 +2190,6 @@ and @command{gawk}. It contains the following chapters:
@ref{Functions}.
@end itemize
@end ifdocbook
-@end ignore
@node Getting Started
@chapter Getting Started with @command{awk}
@@ -2019,7 +2229,7 @@ pattern to search for and one action to perform
upon finding the pattern.
Syntactically, a rule consists of a pattern followed by an action. The
-action is enclosed in curly braces to separate it from the pattern.
+action is enclosed in braces to separate it from the pattern.
Newlines usually separate rules. Therefore, an @command{awk}
program looks like this:
@@ -2043,6 +2253,7 @@ program looks like this:
* Other Features:: Other Features of @command{awk}.
* When:: When to use @command{gawk} and when to use
other things.
+* Intro Summary:: Summary of the introduction.
@end menu
@node Running gawk
@@ -2071,7 +2282,7 @@ variations of each.
@menu
* One-shot:: Running a short throwaway @command{awk}
program.
-* Read Terminal:: Using no input files (input from terminal
+* Read Terminal:: Using no input files (input from the keyboard
instead).
* Long:: Putting permanent @command{awk} programs in
files.
@@ -2135,10 +2346,15 @@ awk '@var{program}'
@noindent
@command{awk} applies the @var{program} to the @dfn{standard input},
-which usually means whatever you type on the terminal. This continues
+which usually means whatever you type on the keyboard. This continues
until you indicate end-of-file by typing @kbd{Ctrl-d}.
+@ifset FOR_PRINT
+(On other operating systems, the end-of-file character may be different.)
+@end ifset
+@ifclear FOR_PRINT
(On other operating systems, the end-of-file character may be different.
For example, on OS/2, it is @kbd{Ctrl-z}.)
+@end ifclear
@cindex files, input, See input files
@cindex input files, running @command{awk} without
@@ -2146,29 +2362,27 @@ For example, on OS/2, it is @kbd{Ctrl-z}.)
As an example, the following program prints a friendly piece of advice
(from Douglas Adams's @cite{The Hitchhiker's Guide to the Galaxy}),
to keep you from worrying about the complexities of computer
-programming@footnote{If you use Bash as your shell, you should execute
-the command @samp{set +H} before running this program interactively,
-to disable the C shell-style command history, which treats
-@samp{!} as a special character. We recommend putting this command into
-your personal startup file.}
-(@code{BEGIN} is a feature we haven't discussed yet):
+programming:
@example
-$ @kbd{awk "BEGIN @{ print \"Don't Panic!\" @}"}
+$ @kbd{awk "BEGIN @{ print "Don\47t Panic!" @}"}
@print{} Don't Panic!
@end example
-@cindex shell quoting, double quote
-@cindex double quote (@code{"}) in shell commands
-@cindex @code{"} (double quote) in shell commands
-@cindex @code{\} (backslash) in shell commands
-@cindex backslash (@code{\}) in shell commands
-This program does not read any input. The @samp{\} before each of the
-inner double quotes is necessary because of the shell's quoting
-rules---in particular because it mixes both single quotes and
-double quotes.@footnote{Although we generally recommend the use of single
-quotes around the program text, double quotes are needed here in order to
-put the single quote into the message.}
+@command{awk} executes statements associated with @code{BEGIN} before
+reading any input. If there are no other statements in your program,
+as is the case here, @command{awk} just stops, instead of trying to read
+input it doesn't know how to process.
+The @samp{\47} is a magic way of getting a single quote into
+the program, without having to engage in ugly shell quoting tricks.
+
+@quotation NOTE
+As a side note, if you use Bash as your shell, you should execute the
+command @samp{set +H} before running this program interactively, to
+disable the C shell-style command history, which treats @samp{!} as a
+special character. We recommend putting this command into your personal
+startup file.
+@end quotation
This next simple @command{awk} program
emulates the @command{cat} utility; it copies whatever you type on the
@@ -2203,9 +2417,10 @@ awk -f @var{source-file} @var{input-file1} @var{input-file2} @dots{}
@cindex @option{-f} option
@cindex command line, option @option{-f}
-The @option{-f} instructs the @command{awk} utility to get the @command{awk} program
-from the file @var{source-file}. Any file name can be used for
-@var{source-file}. For example, you could put the program:
+The @option{-f} instructs the @command{awk} utility to get the
+@command{awk} program from the file @var{source-file} (@pxref{Options}).
+Any @value{FN} can be used for @var{source-file}. For example, you
+could put the program:
@example
BEGIN @{ print "Don't Panic!" @}
@@ -2229,8 +2444,8 @@ awk "BEGIN @{ print \"Don't Panic!\" @}"
@noindent
This was explained earlier
(@pxref{Read Terminal}).
-Note that you don't usually need single quotes around the file name that you
-specify with @option{-f}, because most file names don't contain any of the shell's
+Note that you don't usually need single quotes around the @value{FN} that you
+specify with @option{-f}, because most @value{FN}s don't contain any of the shell's
special characters. Notice that in @file{advice}, the @command{awk}
program did not have single quotes around it. The quotes are only needed
for programs that are provided on the @command{awk} command line.
@@ -2240,7 +2455,7 @@ for programs that are provided on the @command{awk} command line.
@c STARTOFRANGE qs2x
@cindex @code{'} (single quote) in @command{gawk} command lines
If you want to clearly identify your @command{awk} program files as such,
-you can add the extension @file{.awk} to the file name. This doesn't
+you can add the extension @file{.awk} to the @value{FN}. This doesn't
affect the execution of the @command{awk} program but it does make
``housekeeping'' easier.
@@ -2266,16 +2481,7 @@ BEGIN @{ print "Don't Panic!" @}
@noindent
After making this file executable (with the @command{chmod} utility),
simply type @samp{advice}
-at the shell and the system arranges to run @command{awk}@footnote{The
-line beginning with @samp{#!} lists the full file name of an interpreter
-to run and an optional initial command-line argument to pass to that
-interpreter. The operating system then runs the interpreter with the given
-argument and the full argument list of the executed program. The first argument
-in the list is the full file name of the @command{awk} program.
-The rest of the
-argument list contains either options to @command{awk}, or data files,
-or both. Note that on many systems @command{awk} may be found in
-@file{/usr/bin} instead of in @file{/bin}. Caveat Emptor.} as if you had
+at the shell and the system arranges to run @command{awk} as if you had
typed @samp{awk -f advice}:
@example
@@ -2293,14 +2499,32 @@ Self-contained @command{awk} scripts are useful when you want to write a
program that users can invoke without their having to know that the program is
written in @command{awk}.
-@cindex sidebar, Portability Issues with @samp{#!}
+@cindex sidebar, Understanding @samp{#!}
@ifdocbook
@docbook
-<sidebar><title>Portability Issues with @samp{#!}</title>
+<sidebar><title>Understanding @samp{#!}</title>
@end docbook
@cindex portability, @code{#!} (executable scripts)
+@command{awk} is an @dfn{interpreted} language. This means that the
+@command{awk} utility reads your program and then processes your data
+according to the instructions in your program. (This is different
+from a @dfn{compiled} language such as C, where your program is first
+compiled into machine code that is executed directly by your system's
+hardware.) The @command{awk} utility is thus termed an @dfn{interpreter}.
+Many modern languages are interperted.
+
+The line beginning with @samp{#!} lists the full @value{FN} of an
+interpreter to run and a single optional initial command-line argument
+to pass to that interpreter. The operating system then runs the
+interpreter with the given argument and the full argument list of the
+executed program. The first argument in the list is the full @value{FN}
+of the @command{awk} program. The rest of the argument list contains
+either options to @command{awk}, or @value{DF}s, or both. Note that on
+many systems @command{awk} may be found in @file{/usr/bin} instead of
+in @file{/bin}. Caveat Emptor.
+
Some systems limit the length of the interpreter name to 32 characters.
Often, this can be dealt with by using a symbolic link.
@@ -2312,8 +2536,7 @@ of some sort from @command{awk}.
@cindex @code{ARGC}/@code{ARGV} variables, portability and
@cindex portability, @code{ARGV} variable
-Finally,
-the value of @code{ARGV[0]}
+Finally, the value of @code{ARGV[0]}
(@pxref{Built-in Variables})
varies depending upon your operating system.
Some systems put @samp{awk} there, some put the full pathname
@@ -2329,11 +2552,29 @@ to provide your script name.
@ifnotdocbook
@cartouche
-@center @b{Portability Issues with @samp{#!}}
+@center @b{Understanding @samp{#!}}
@cindex portability, @code{#!} (executable scripts)
+@command{awk} is an @dfn{interpreted} language. This means that the
+@command{awk} utility reads your program and then processes your data
+according to the instructions in your program. (This is different
+from a @dfn{compiled} language such as C, where your program is first
+compiled into machine code that is executed directly by your system's
+hardware.) The @command{awk} utility is thus termed an @dfn{interpreter}.
+Many modern languages are interperted.
+
+The line beginning with @samp{#!} lists the full @value{FN} of an
+interpreter to run and a single optional initial command-line argument
+to pass to that interpreter. The operating system then runs the
+interpreter with the given argument and the full argument list of the
+executed program. The first argument in the list is the full @value{FN}
+of the @command{awk} program. The rest of the argument list contains
+either options to @command{awk}, or @value{DF}s, or both. Note that on
+many systems @command{awk} may be found in @file{/usr/bin} instead of
+in @file{/bin}. Caveat Emptor.
+
Some systems limit the length of the interpreter name to 32 characters.
Often, this can be dealt with by using a symbolic link.
@@ -2345,8 +2586,7 @@ of some sort from @command{awk}.
@cindex @code{ARGC}/@code{ARGV} variables, portability and
@cindex portability, @code{ARGV} variable
-Finally,
-the value of @code{ARGV[0]}
+Finally, the value of @code{ARGV[0]}
(@pxref{Built-in Variables})
varies depending upon your operating system.
Some systems put @samp{awk} there, some put the full pathname
@@ -2403,7 +2643,7 @@ runs, it will probably print strange messages about syntax errors.
For example, look at the following:
@example
-$ @kbd{awk '@{ print "hello" @} # let's be cute'}
+$ @kbd{awk 'BEGIN @{ print "hello" @} # let's be cute'}
>
@end example
@@ -2451,7 +2691,28 @@ knowledge of shell quoting rules. The following rules apply only to
POSIX-compliant, Bourne-style shells (such as Bash, the GNU Bourne-Again
Shell). If you use the C shell, you're on your own.
-@itemize @bullet
+Before diving into the rules, we introduce a concept that appears
+throughout this @value{DOCUMENT}, which is that of the @dfn{null},
+or empty, string.
+
+The null string is character data that has no value.
+In other words, it is empty. It is written in @command{awk} programs
+like this: @code{""}. In the shell, it can be written using single
+or double quotes: @code{""} or @code{''}. While the null string has
+no characters in it, it does exist. Consider this command:
+
+@example
+$ @kbd{echo ""}
+@end example
+
+@noindent
+Here, the @command{echo} utility receives a single argument, even
+though that argument has no characters in it. In the rest of this
+@value{DOCUMENT}, we use the terms @dfn{null string} and @dfn{empty string}
+interchangeably. Now, on to the quoting rules.
+
+
+@itemize @value{BULLET}
@item
Quoted items can be concatenated with nonquoted items as well as with other
quoted items. The shell turns everything into one argument for
@@ -2505,7 +2766,7 @@ Note that the single quote is not special within double quotes.
@item
Null strings are removed when they occur as part of a non-null
-command-line argument, while explicit non-null objects are kept.
+command-line argument, while explicit null objects are kept.
For example, to specify that the field separator @code{FS} should
be set to the null string, use:
@@ -2523,7 +2784,7 @@ awk -F"" '@var{program}' @var{files} # wrong!
@noindent
In the second case, @command{awk} will attempt to use the text of the program
-as the value of @code{FS}, and the first file name as the text of the program!
+as the value of @code{FS}, and the first @value{FN} as the text of the program!
This results in syntax errors at best, and confusing behavior at worst.
@end itemize
@@ -2626,6 +2887,7 @@ Although this @value{DOCUMENT} generally only worries about POSIX systems and th
POSIX shell, the following issue arises often enough for many users that
it is worth addressing.
+@cindex Brink, Jeroen
The ``shells'' on Microsoft Windows systems use the double-quote
character for quoting, and make it difficult or impossible to include an
escaped double-quote character in a command-line script.
@@ -2638,21 +2900,22 @@ gawk "@{ print \"\042\" $0 \"\042\" @}" @var{file}
@node Sample Data Files
-@section Data Files for the Examples
-@c For gawk >= 4.0, update these data files. No-one has such slow modems!
+@section @value{DDF}s for the Examples
@cindex input files, examples
@cindex @code{mail-list} file
Many of the examples in this @value{DOCUMENT} take their input from two sample
-data files. The first, @file{mail-list}, represents a list of peoples' names
+@value{DF}s. The first, @file{mail-list}, represents a list of peoples' names
together with their email addresses and information about those people.
-The second data file, called @file{inventory-shipped}, contains
+The second @value{DF}, called @file{inventory-shipped}, contains
information about monthly shipments. In both files,
each line is considered to be one @dfn{record}.
-In the data file @file{mail-list}, each record contains the name of a person,
+In the @value{DF} @file{mail-list}, each record contains the name of a person,
his/her phone number, his/her email-address, and a code for their relationship
-with the author of the list. An @samp{A} in the last column
+with the author of the list.
+The columns are aligned using spaces.
+An @samp{A} in the last column
means that the person is an acquaintance. An @samp{F} in the last
column means that the person is a friend.
An @samp{R} means that the person is a relative:
@@ -2679,13 +2942,14 @@ Jean-Paul 555-2127 jeanpaul.campanorum@@nyu.edu R
@end example
@cindex @code{inventory-shipped} file
-The data file @file{inventory-shipped} represents
+The @value{DF} @file{inventory-shipped} represents
information about shipments during the year.
Each record contains the month, the number
of green crates shipped, the number of red boxes shipped, the number of
orange bags shipped, and the number of blue packages shipped,
respectively. There are 16 entries, covering the 12 months of last year
and the first four months of the current year.
+An empty line separates the data for the two years.
@example
@c file eg/data/inventory-shipped
@@ -2759,10 +3023,10 @@ for @emph{every} input line. If the action is omitted, the default
action is to print all lines that match the pattern.
@cindex actions, empty
-Thus, we could leave out the action (the @code{print} statement and the curly
+Thus, we could leave out the action (the @code{print} statement and the
braces) in the previous example and the result would be the same:
@command{awk} prints all lines matching the pattern @samp{li}. By comparison,
-omitting the @code{print} statement but retaining the curly braces makes an
+omitting the @code{print} statement but retaining the braces makes an
empty action that does nothing (i.e., no lines are printed).
@cindex @command{awk} programs, one-line examples
@@ -2771,44 +3035,49 @@ collection of useful, short programs to get you started. Some of these
programs contain constructs that haven't been covered yet. (The description
of the program will give you a good idea of what is going on, but please
read the rest of the @value{DOCUMENT} to become an @command{awk} expert!)
-Most of the examples use a data file named @file{data}. This is just a
+Most of the examples use a @value{DF} named @file{data}. This is just a
placeholder; if you use these programs yourself, substitute
-your own file names for @file{data}.
+your own @value{FN}s for @file{data}.
For future reference, note that there is often more than
one way to do things in @command{awk}. At some point, you may want
to look back at these examples and see if
you can come up with different ways to do the same things shown here:
-@itemize @bullet
+@itemize @value{BULLET}
@item
-Print the length of the longest input line:
+Print every line that is longer than 80 characters:
@example
-awk '@{ if (length($0) > max) max = length($0) @}
- END @{ print max @}' data
+awk 'length($0) > 80' data
@end example
+The sole rule has a relational expression as its pattern and it has no
+action---so it uses the default action, printing the record.
+
@item
-Print every line that is longer than 80 characters:
+Print the length of the longest input line:
@example
-awk 'length($0) > 80' data
+awk '@{ if (length($0) > max) max = length($0) @}
+ END @{ print max @}' data
@end example
-The sole rule has a relational expression as its pattern and it has no
-action---so the default action, printing the record, is used.
+The code associated with @code{END} executes after all
+input has been read; it's the other side of the coin to @code{BEGIN}.
@cindex @command{expand} utility
@item
Print the length of the longest line in @file{data}:
@example
-expand data | awk '@{ if (x < length()) x = length() @}
+expand data | awk '@{ if (x < length($0)) x = length($0) @}
END @{ print "maximum line length is " x @}'
@end example
+This example differs slightly from the previous one:
The input is processed by the @command{expand} utility to change TABs
-into spaces, so the widths compared are actually the right-margin columns.
+into spaces, so the widths compared are actually the right-margin columns,
+as opposed to the number of input characters on each line.
@item
Print every line that has at least one field:
@@ -2863,7 +3132,7 @@ awk 'END @{ print NR @}' data
@end example
@item
-Print the even-numbered lines in the data file:
+Print the even-numbered lines in the @value{DF}:
@example
awk 'NR % 2 == 0' data
@@ -2879,9 +3148,9 @@ the program would print the odd-numbered lines.
The @command{awk} utility reads the input files one line at a
time. For each line, @command{awk} tries the patterns of each of the rules.
-If several patterns match, then several actions are run in the order in
+If several patterns match, then several actions execute in the order in
which they appear in the @command{awk} program. If no patterns match, then
-no actions are run.
+no actions run.
After processing all the rules that match the line (and perhaps there are none),
@command{awk} reads the next line. (However,
@@ -2905,7 +3174,7 @@ This program prints every line that contains the string
@samp{12} @emph{or} the string @samp{21}. If a line contains both
strings, it is printed twice, once by each rule.
-This is what happens if we run this program on our two sample data files,
+This is what happens if we run this program on our two sample @value{DF}s,
@file{mail-list} and @file{inventory-shipped}:
@example
@@ -2935,8 +3204,8 @@ features that haven't been covered yet, so don't worry if you don't
understand all the details:
@example
-LC_ALL=C ls -l | awk '$6 == "Nov" @{ sum += $5 @}
- END @{ print sum @}'
+ls -l | awk '$6 == "Nov" @{ sum += $5 @}
+ END @{ print sum @}'
@end example
@cindex @command{ls} utility
@@ -2965,7 +3234,7 @@ the file. The fourth field identifies the group of the file.
The fifth field contains the size of the file in bytes. The
sixth, seventh, and eighth fields contain the month, day, and time,
respectively, that the file was last modified. Finally, the ninth field
-contains the file name.@footnote{The @samp{LC_ALL=C} is
+contains the @value{FN}.@footnote{The @samp{LC_ALL=C} is
needed to produce this traditional-style output from @command{ls}.}
@c @cindex automatic initialization
@@ -2973,8 +3242,8 @@ needed to produce this traditional-style output from @command{ls}.}
The @samp{$6 == "Nov"} in our @command{awk} program is an expression that
tests whether the sixth field of the output from @w{@samp{ls -l}}
matches the string @samp{Nov}. Each time a line has the string
-@samp{Nov} for its sixth field, the action @samp{sum += $5} is
-performed. This adds the fifth field (the file's size) to the variable
+@samp{Nov} for its sixth field, @command{awk} performs the action
+@samp{sum += $5}. This adds the fifth field (the file's size) to the variable
@code{sum}. As a result, when @command{awk} has finished reading all the
input lines, @code{sum} is the total of the sizes of the files whose
lines matched the pattern. (This works because @command{awk} variables
@@ -3041,7 +3310,7 @@ We have generally not used backslash continuation in our sample programs.
@command{gawk} places no limit on the
length of a line, so backslash continuation is never strictly necessary;
it just makes programs more readable. For this same reason, as well as
-for clarity, we have kept most statements short in the sample programs
+for clarity, we have kept most statements short in the programs
presented throughout the @value{DOCUMENT}. Backslash continuation is
most useful when your @command{awk} program is in a separate source file
instead of entered from the command line. You should also note that
@@ -3106,7 +3375,7 @@ $ gawk 'BEGIN @{ print "dont panic" # a friendly \
> BEGIN rule
> @}'
@error{} gawk: cmd. line:2: BEGIN rule
-@error{} gawk: cmd. line:2: ^ parse error
+@error{} gawk: cmd. line:2: ^ syntax error
@end example
@noindent
@@ -3154,7 +3423,7 @@ and array sorting.
As we develop our presentation of the @command{awk} language, we introduce
most of the variables and many of the functions. They are described
-systematically in @ref{Built-in Variables}, and
+systematically in @ref{Built-in Variables}, and in
@ref{Built-in}.
@node When
@@ -3179,25 +3448,64 @@ edit-compile-test-debug cycle of software development.
@cindex Brian Kernighan's @command{awk}
Complex programs have been written in @command{awk}, including a complete
-retargetable assembler for eight-bit microprocessors (@pxref{Glossary}, for
-more information), and a microcode assembler for a special-purpose Prolog
+retargetable assembler for
+@ifclear FOR_PRINT
+eight-bit microprocessors (@pxref{Glossary}, for more information),
+@end ifclear
+@ifset FOR_PRINT
+eight-bit microprocessors,
+@end ifset
+and a microcode assembler for a special-purpose Prolog
computer.
While the original @command{awk}'s capabilities were strained by tasks
-of such complexity, modern versions are more capable. Even Brian Kernighan's
-version of @command{awk} has fewer predefined limits, and those
-that it has are much larger than they used to be.
+of such complexity, modern versions are more capable.
@cindex @command{awk} programs, complex
-If you find yourself writing @command{awk} scripts of more than, say, a few
-hundred lines, you might consider using a different programming
-language. Emacs Lisp is a good choice if you need sophisticated string
-or pattern matching capabilities. The shell is also good at string and
-pattern matching; in addition, it allows powerful use of the system
-utilities. More conventional languages, such as C, C++, and Java, offer
-better facilities for system programming and for managing the complexity
-of large programs. Programs in these languages may require more lines
-of source code than the equivalent @command{awk} programs, but they are
-easier to maintain and usually run more efficiently.
+If you find yourself writing @command{awk} scripts of more than, say,
+a few hundred lines, you might consider using a different programming
+language. The shell is good at string and pattern matching; in addition,
+it allows powerful use of the system utilities. Python offers a nice
+balance between high-level ease of programming and access to system
+facilities.@footnote{Other popular scripting languages include Ruby
+and Perl.}
+
+@node Intro Summary
+@section Summary
+
+@c FIXME: Review this chapter for summary of builtin functions called.
+@itemize @value{BULLET}
+@item
+Programs in @command{awk} consist of @var{pattern}-@var{action} pairs.
+
+@item
+An @var{action} without a @var{pattern} always runs. The default
+@var{action} for a pattern without one is @samp{@{ print $0 @}}.
+
+@item
+Use either
+@samp{awk '@var{program}' @var{files}}
+or
+@samp{awk -f @var{program-file} @var{files}}
+to run @command{awk}.
+
+@item
+You may use the special @samp{#!} header line to create @command{awk}
+programs that are directly executable.
+
+@item
+Comments in @command{awk} programs start with @samp{#} and continue to
+the end of the same line.
+
+@item
+Be aware of quoting issues when writing @command{awk} programs as
+part of a larger shell script (or MS-Windows batch file).
+
+@item
+You may use backslash continuation to continue a source line.
+Lines are automatically continued after
+a comma, open brace, question mark, colon,
+@samp{||}, @samp{&&}, @code{do} and @code{else}.
+@end itemize
@node Invoking Gawk
@chapter Running @command{awk} and @command{gawk}
@@ -3227,6 +3535,7 @@ things in this @value{CHAPTER} that don't interest you right now.
* Loading Shared Libraries:: Loading shared libraries into your program.
* Obsolete:: Obsolete Options and/or features.
* Undocumented:: Undocumented Options and Features.
+* Invoking Summary:: Invocation summary.
@end menu
@node Command Line
@@ -3240,19 +3549,10 @@ There are two ways to run @command{awk}---with an explicit program or with
one or more program files. Here are templates for both of them; items
enclosed in [@dots{}] in these templates are optional:
-@ifnotdocbook
-@example
-awk @r{[@var{options}]} -f progfile @r{[@code{--}]} @var{file} @dots{}
-awk @r{[@var{options}]} @r{[@code{--}]} '@var{program}' @var{file} @dots{}
-@end example
-@end ifnotdocbook
-
-@c FIXME - find a better way to mark this up in docbook
-@docbook
-<screen>awk [<replaceable>options</replaceable>] -f progfile [<literal>--</literal>] <replaceable>file</replaceable> &#8230;
-awk [<replaceable>options</replaceable>] [<literal>--</literal>] '<replaceable>program</replaceable>' <replaceable>file</replaceable> &#8230;
-</screen>
-@end docbook
+@display
+@command{awk} [@var{options}] @option{-f} @var{progfile} [@option{--}] @var{file} @dots{}
+@command{awk} [@var{options}] [@option{--}] @code{'@var{program}'} @var{file} @dots{}
+@end display
@cindex GNU long options
@cindex long options
@@ -3368,8 +3668,8 @@ conventions.
@cindex @code{-} (hyphen), filenames beginning with
@cindex hyphen (@code{-}), filenames beginning with
-This is useful if you have file names that start with @samp{-},
-or in shell scripts, if you have file names that will be specified
+This is useful if you have @value{FN}s that start with @samp{-},
+or in shell scripts, if you have @value{FN}s that will be specified
by the user that could start with @samp{-}.
It is also useful for passing options on to the @command{awk}
program; see @ref{Getopt Function}.
@@ -3379,9 +3679,10 @@ program; see @ref{Getopt Function}.
The following list describes @command{gawk}-specific options:
-@table @code
-@item -b
-@itemx --characters-as-bytes
+@c Have to use @asis here to get docbook to come out right.
+@table @asis
+@item @option{-b}
+@itemx @option{--characters-as-bytes}
@cindex @option{-b} option
@cindex @option{--characters-as-bytes} option
Cause @command{gawk} to treat all input data as single-byte characters.
@@ -3389,33 +3690,36 @@ In addition, all output written with @code{print} or @code{printf}
are treated as single-byte characters.
Normally, @command{gawk} follows the POSIX standard and attempts to process
-its input data according to the current locale. This can often involve
+its input data according to the current locale (@pxref{Locales}). This can often involve
converting multibyte characters into wide characters (internally), and
can lead to problems or confusion if the input data does not contain valid
multibyte characters. This option is an easy way to tell @command{gawk}:
``hands off my data!''.
-@item -c
-@itemx --traditional
+@item @option{-c}
+@itemx @option{--traditional}
@cindex @option{-c} option
@cindex @option{--traditional} option
@cindex compatibility mode (@command{gawk}), specifying
Specify @dfn{compatibility mode}, in which the GNU extensions to
the @command{awk} language are disabled, so that @command{gawk} behaves just
-like Brian Kernighan's version @command{awk}.
+like BWK @command{awk}.
@xref{POSIX/GNU},
-which summarizes the extensions. Also see
+which summarizes the extensions.
+@ifclear FOR_PRINT
+Also see
@ref{Compatibility Mode}.
+@end ifclear
-@item -C
-@itemx --copyright
+@item @option{-C}
+@itemx @option{--copyright}
@cindex @option{-C} option
@cindex @option{--copyright} option
@cindex GPL (General Public License), printing
Print the short version of the General Public License and then exit.
-@item -d@r{[}@var{file}@r{]}
-@itemx --dump-variables@r{[}=@var{file}@r{]}
+@item @option{-d}[@var{file}]
+@itemx @option{--dump-variables}[@code{=}@var{file}]
@cindex @option{-d} option
@cindex @option{--dump-variables} option
@cindex dump all variables of a program
@@ -3437,21 +3741,21 @@ inadvertently use global variables that you meant to be local.
(This is a particularly easy mistake to make with simple variable
names like @code{i}, @code{j}, etc.)
-@item -D@r{[}@var{file}@r{]}
-@itemx --debug=@r{[}@var{file}@r{]}
+@item @option{-D}[@var{file}]
+@itemx @option{--debug}[@code{=}@var{file}]
@cindex @option{-D} option
@cindex @option{--debug} option
@cindex @command{awk} debugging, enabling
Enable debugging of @command{awk} programs
(@pxref{Debugging}).
-By default, the debugger reads commands interactively from the terminal.
+By default, the debugger reads commands interactively from the keyboard.
The optional @var{file} argument allows you to specify a file with a list
of commands for the debugger to execute non-interactively.
No space is allowed between the @option{-D} and @var{file}, if
@var{file} is supplied.
-@item -e @var{program-text}
-@itemx --source @var{program-text}
+@item @option{-e} @var{program-text}
+@itemx @option{--source} @var{program-text}
@cindex @option{-e} option
@cindex @option{--source} option
@cindex source code, mixing
@@ -3462,8 +3766,8 @@ This is particularly useful
when you have library functions that you want to use from your command-line
programs (@pxref{AWKPATH Variable}).
-@item -E @var{file}
-@itemx --exec @var{file}
+@item @option{-E} @var{file}
+@itemx @option{--exec} @var{file}
@cindex @option{-E} option
@cindex @option{--exec} option
@cindex @command{awk} programs, location of
@@ -3471,7 +3775,7 @@ programs (@pxref{AWKPATH Variable}).
Similar to @option{-f}, read @command{awk} program text from @var{file}.
There are two differences from @option{-f}:
-@itemize @bullet
+@itemize @value{BULLET}
@item
This option terminates option processing; anything
else on the command line is passed on directly to the @command{awk} program.
@@ -3484,7 +3788,7 @@ Command-line variable assignments of the form
This option is particularly necessary for World Wide Web CGI applications
that pass arguments through the URL; using this option prevents a malicious
(or other) user from passing in options, assignments, or @command{awk} source
-code (via @option{--source}) to the CGI application. This option should be used
+code (via @option{-e}) to the CGI application. This option should be used
with @samp{#!} scripts (@pxref{Executable Scripts}), like so:
@example
@@ -3493,20 +3797,20 @@ with @samp{#!} scripts (@pxref{Executable Scripts}), like so:
@var{awk program here @dots{}}
@end example
-@item -g
-@itemx --gen-pot
+@item @option{-g}
+@itemx @option{--gen-pot}
@cindex @option{-g} option
@cindex @option{--gen-pot} option
@cindex portable object files, generating
@cindex files, portable object, generating
Analyze the source program and
-generate a GNU @code{gettext} Portable Object Template file on standard
+generate a GNU @command{gettext} Portable Object Template file on standard
output for all string constants that have been marked for translation.
@xref{Internationalization},
for information about this option.
-@item -h
-@itemx --help
+@item @option{-h}
+@itemx @option{--help}
@cindex @option{-h} option
@cindex @option{--help} option
@cindex GNU long options, printing list of
@@ -3515,42 +3819,47 @@ for information about this option.
Print a ``usage'' message summarizing the short and long style options
that @command{gawk} accepts and then exit.
-@item -i @var{source-file}
-@itemx --include @var{source-file}
+@item @option{-i} @var{source-file}
+@itemx @option{--include} @var{source-file}
@cindex @option{-i} option
@cindex @option{--include} option
@cindex @command{awk} programs, location of
-Read @command{awk} source library from @var{source-file}. This option is
-completely equivalent to using the @samp{@@include} directive inside
-your program. This option is very
-similar to the @option{-f} option, but there are two important differences.
-First, when @option{-i} is used, the program source will not be loaded if it has
-been previously loaded, whereas the @option{-f} will always load the file.
+Read @command{awk} source library from @var{source-file}. This option
+is completely equivalent to using the @code{@@include} directive inside
+your program. This option is very similar to the @option{-f} option,
+but there are two important differences. First, when @option{-i} is
+used, the program source is not loaded if it has been previously
+loaded, whereas with @option{-f}, @command{gawk} always loads the file.
Second, because this option is intended to be used with code libraries,
@command{gawk} does not recognize such files as constituting main program
-input. Thus, after processing an @option{-i} argument, @command{gawk} still expects to
-find the main source code via the @option{-f} option or on the command-line.
+input. Thus, after processing an @option{-i} argument, @command{gawk}
+still expects to find the main source code via the @option{-f} option
+or on the command line.
-@item -l @var{lib}
-@itemx --load @var{lib}
+@item @option{-l} @var{ext}
+@itemx @option{--load} @var{ext}
@cindex @option{-l} option
@cindex @option{--load} option
-@cindex loading, library
-Load a shared library @var{lib}. This searches for the library using the @env{AWKLIBPATH}
+@cindex loading, extensions
+Load a dynamic extension named @var{ext}. Extensions
+are stored as system shared libraries.
+This option searches for the library using the @env{AWKLIBPATH}
environment variable. The correct library suffix for your platform will be
-supplied by default, so it need not be specified in the library name.
-The library initialization routine should be named @code{dl_load()}.
-An alternative is to use the @samp{@@load} keyword inside the program to load
-a shared library.
+supplied by default, so it need not be specified in the extension name.
+The extension initialization routine should be named @code{dl_load()}.
+An alternative is to use the @code{@@load} keyword inside the program to load
+a shared library. This feature is described in detail in @ref{Dynamic Extensions}.
-@item -L @r{[}value@r{]}
-@itemx --lint@r{[}=value@r{]}
+@item @option{-L}[@var{value}]
+@itemx @option{--lint}[@code{=}@var{value}]
@cindex @option{-l} option
@cindex @option{--lint} option
@cindex lint checking, issuing warnings
@cindex warnings, issuing
Warn about constructs that are dubious or nonportable to
other @command{awk} implementations.
+No space is allowed between the @option{-L} and @var{value}, if
+@var{value} is supplied.
Some warnings are issued when @command{gawk} first reads your program. Others
are issued at runtime, as your program executes.
With an optional argument of @samp{fatal},
@@ -3566,16 +3875,16 @@ when eliminating problems pointed out by @option{--lint}, you should take
care to search for all occurrences of each inappropriate construct. As
@command{awk} programs are usually short, doing so is not burdensome.
-@item -M
-@itemx --bignum
+@item @option{-M}
+@itemx @option{--bignum}
@cindex @option{-M} option
@cindex @option{--bignum} option
Force arbitrary precision arithmetic on numbers. This option has no effect
if @command{gawk} is not compiled to use the GNU MPFR and MP libraries
-(@pxref{Gawk and MPFR}).
+(@pxref{Arbitrary Precision Arithmetic}).
-@item -n
-@itemx --non-decimal-data
+@item @option{-n}
+@itemx @option{--non-decimal-data}
@cindex @option{-n} option
@cindex @option{--non-decimal-data} option
@cindex hexadecimal values@comma{} enabling interpretation of
@@ -3590,34 +3899,39 @@ This option can severely break old programs.
Use with care.
@end quotation
-@item -N
-@itemx --use-lc-numeric
+@item @option{-N}
+@itemx @option{--use-lc-numeric}
@cindex @option{-N} option
@cindex @option{--use-lc-numeric} option
Force the use of the locale's decimal point character
when parsing numeric input data (@pxref{Locales}).
-@item -o@r{[}@var{file}@r{]}
-@itemx --pretty-print@r{[}=@var{file}@r{]}
+@item @option{-o}[@var{file}]
+@itemx @option{--pretty-print}[@code{=}@var{file}]
@cindex @option{-o} option
@cindex @option{--pretty-print} option
Enable pretty-printing of @command{awk} programs.
-By default, output program is created in a file named @file{awkprof.out}.
+By default, output program is created in a file named @file{awkprof.out}
+(@pxref{Profiling}).
The optional @var{file} argument allows you to specify a different
-file name for the output.
+@value{FN} for the output.
No space is allowed between the @option{-o} and @var{file}, if
@var{file} is supplied.
-@item -O
-@itemx --optimize
+@quotation NOTE
+In the past, this option would also execute your program.
+This is no longer the case.
+@end quotation
+
+@item @option{-O}
+@itemx @option{--optimize}
@cindex @option{--optimize} option
@cindex @option{-O} option
Enable some optimizations on the internal representation of the program.
-At the moment this includes just simple constant folding. The @command{gawk}
-maintainer hopes to add more optimizations over time.
+At the moment this includes just simple constant folding.
-@item -p@r{[}@var{file}@r{]}
-@itemx --profile@r{[}=@var{file}@r{]}
+@item @option{-p}[@var{file}]
+@itemx @option{--profile}[@code{=}@var{file}]
@cindex @option{-p} option
@cindex @option{--profile} option
@cindex @command{awk} profiling, enabling
@@ -3625,15 +3939,15 @@ Enable profiling of @command{awk} programs
(@pxref{Profiling}).
By default, profiles are created in a file named @file{awkprof.out}.
The optional @var{file} argument allows you to specify a different
-file name for the profile file.
+@value{FN} for the profile file.
No space is allowed between the @option{-p} and @var{file}, if
@var{file} is supplied.
The profile contains execution counts for each statement in the program
in the left margin, and function call counts for each function.
-@item -P
-@itemx --posix
+@item @option{-P}
+@itemx @option{--posix}
@cindex @option{-P} option
@cindex @option{--posix} option
@cindex POSIX mode
@@ -3647,7 +3961,7 @@ Also,
the following additional
restrictions apply:
-@itemize @bullet
+@itemize @value{BULLET}
@cindex newlines
@cindex whitespace, newlines as
@@ -3663,7 +3977,7 @@ Newlines are not allowed after @samp{?} or @samp{:}
@cindex @code{FS} variable, as TAB character
@item
-Specifying @samp{-Ft} on the command-line does not set the value
+Specifying @samp{-Ft} on the command line does not set the value
of @code{FS} to be a single TAB character
(@pxref{Field Separators}).
@@ -3680,10 +3994,10 @@ data (@pxref{Locales}).
@cindex @option{--posix} option, @code{--traditional} option and
If you supply both @option{--traditional} and @option{--posix} on the
command line, @option{--posix} takes precedence. @command{gawk}
-also issues a warning if both options are supplied.
+issues a warning if both options are supplied.
-@item -r
-@itemx --re-interval
+@item @option{-r}
+@itemx @option{--re-interval}
@cindex @option{-r} option
@cindex @option{--re-interval} option
@cindex regular expressions, interval expressions and
@@ -3692,10 +4006,10 @@ Allow interval expressions
in regexps.
This is now @command{gawk}'s default behavior.
Nevertheless, this option remains both for backward compatibility,
-and for use in combination with the @option{--traditional} option.
+and for use in combination with @option{--traditional}.
-@item -S
-@itemx --sandbox
+@item @option{-S}
+@itemx @option{--sandbox}
@cindex @option{-S} option
@cindex @option{--sandbox} option
@cindex sandbox mode
@@ -3705,18 +4019,18 @@ output redirections with @code{print} and @code{printf},
and dynamic extensions.
This is particularly useful when you want to run @command{awk} scripts
from questionable sources and need to make sure the scripts
-can't access your system (other than the specified input data file).
+can't access your system (other than the specified input @value{DF}).
-@item -t
-@itemx --lint-old
+@item @option{-t}
+@itemx @option{--lint-old}
@cindex @option{-L} option
@cindex @option{--lint-old} option
Warn about constructs that are not available in the original version of
@command{awk} from Version 7 Unix
(@pxref{V7/SVR3.1}).
-@item -V
-@itemx --version
+@item @option{-V}
+@itemx @option{--version}
@cindex @option{-V} option
@cindex @option{--version} option
@cindex @command{gawk}, versions of, information about@comma{} printing
@@ -3751,23 +4065,23 @@ of having to be included into each individual program.
function names must be unique.)
With standard @command{awk}, library functions can still be used, even
-if the program is entered at the terminal,
+if the program is entered at the keyboard,
by specifying @samp{-f /dev/tty}. After typing your program,
type @kbd{Ctrl-d} (the end-of-file character) to terminate it.
(You may also use @samp{-f -} to read program source from the standard
input but then you will not be able to also use the standard input as a
source of data.)
-Because it is clumsy using the standard @command{awk} mechanisms to mix source
-file and command-line @command{awk} programs, @command{gawk} provides the
-@option{--source} option. This does not require you to pre-empt the standard
-input for your source code; it allows you to easily mix command-line
-and library source code
-(@pxref{AWKPATH Variable}).
-The @option{--source} option may also be used multiple times on the command line.
+Because it is clumsy using the standard @command{awk} mechanisms to mix
+source file and command-line @command{awk} programs, @command{gawk}
+provides the @option{-e} option. This does not require you to
+pre-empt the standard input for your source code; it allows you to easily
+mix command-line and library source code (@pxref{AWKPATH Variable}).
+As with @option{-f}, the @option{-e} and @option{-i}
+options may also be used multiple times on the command line.
-@cindex @option{--source} option
-If no @option{-f} or @option{--source} option is specified, then @command{gawk}
+@cindex @option{-e} option
+If no @option{-f} or @option{-e} option is specified, then @command{gawk}
uses the first non-option command-line argument as the text of the
program source code.
@@ -3776,7 +4090,7 @@ program source code.
@cindex POSIX mode
If the environment variable @env{POSIXLY_CORRECT} exists,
then @command{gawk} behaves in strict POSIX mode, exactly as if
-you had supplied the @option{--posix} command-line option.
+you had supplied @option{--posix}.
Many GNU programs look for this environment variable to suppress
extensions that conflict with POSIX, but @command{gawk} behaves
differently: it suppresses all extensions, even those that do not
@@ -3835,11 +4149,16 @@ included. As each element of @code{ARGV} is processed, @command{gawk}
sets the variable @code{ARGIND} to the index in @code{ARGV} of the
current element.
+@c FIXME: One day, move the ARGC and ARGV node closer to here.
+Changing @code{ARGC} and @code{ARGV} in your @command{awk} program lets
+you control how @command{awk} processes the input files; this is described
+in more detail in @ref{ARGC and ARGV}.
+
@cindex input files, variable assignments and
@cindex variable assignments and input files
-The distinction between file name arguments and variable-assignment
+The distinction between @value{FN} arguments and variable-assignment
arguments is made when @command{awk} is about to open the next input file.
-At that point in execution, it checks the file name to see whether
+At that point in execution, it checks the @value{FN} to see whether
it is really a variable assignment; if so, @command{awk} sets the variable
instead of reading a file.
@@ -3855,8 +4174,8 @@ The variable values given on the command line are processed for escape
sequences (@pxref{Escape Sequences}).
@value{DARKCORNER}
-In some earlier implementations of @command{awk}, when a variable assignment
-occurred before any file names, the assignment would happen @emph{before}
+In some very early implementations of @command{awk}, when a variable assignment
+occurred before any @value{FN}s, the assignment would happen @emph{before}
the @code{BEGIN} rule was executed. @command{awk}'s behavior was thus
inconsistent; some command-line assignments were available inside the
@code{BEGIN} rule, while others were not. Unfortunately,
@@ -3867,8 +4186,8 @@ upon the old behavior.
The variable assignment feature is most useful for assigning to variables
such as @code{RS}, @code{OFS}, and @code{ORS}, which control input and
-output formats before scanning the data files. It is also useful for
-controlling state if multiple passes are needed over a data file. For
+output formats, before scanning the @value{DF}s. It is also useful for
+controlling state if multiple passes are needed over a @value{DF}. For
example:
@cindex files, multiple passes over
@@ -3904,13 +4223,13 @@ You may also use @code{"-"} to name standard input when reading
files with @code{getline} (@pxref{Getline/File}).
In addition, @command{gawk} allows you to specify the special
-file name @file{/dev/stdin}, both on the command line and
+@value{FN} @file{/dev/stdin}, both on the command line and
with @code{getline}.
Some other versions of @command{awk} also support this, but it
is not standard.
(Some operating systems provide a @file{/dev/stdin} file
-in the file system, however, @command{gawk} always processes
-this file name itself.)
+in the filesystem; however, @command{gawk} always processes
+this @value{FN} itself.)
@node Environment Variables
@section The Environment Variables @command{gawk} Uses
@@ -3935,12 +4254,12 @@ behaves.
@cindex differences in @command{awk} and @command{gawk}, @code{AWKPATH} environment variable
@ifinfo
The previous @value{SECTION} described how @command{awk} program files can be named
-on the command-line with the @option{-f} option.
+on the command line with the @option{-f} option.
@end ifinfo
In most @command{awk}
implementations, you must supply a precise path name for each program
file, unless the file is in the current directory.
-But in @command{gawk}, if the file name supplied to the @option{-f}
+But in @command{gawk}, if the @value{FN} supplied to the @option{-f}
or @option{-i} options
does not contain a directory separator @samp{/}, then @command{gawk} searches a list of
directories (called the @dfn{search path}), one by one, looking for a
@@ -3957,13 +4276,13 @@ directory is the value of @samp{$(datadir)} generated when
@command{gawk} was configured. You probably don't need to worry about this,
though.}
-The search path feature is particularly useful for building libraries
+The search path feature is particularly helpful for building libraries
of useful @command{awk} functions. The library files can be placed in a
standard directory in the default path and then specified on
-the command line with a short file name. Otherwise, the full file name
+the command line with a short @value{FN}. Otherwise, the full @value{FN}
would have to be typed for each file.
-By using the @option{-i} option, or the @option{--source} and @option{-f} options, your command-line
+By using the @option{-i} option, or the @option{-e} and @option{-f} options, your command-line
@command{awk} programs can use facilities in @command{awk} library files
(@pxref{Library Functions}).
Path searching is not done if @command{gawk} is in compatibility mode.
@@ -3971,17 +4290,20 @@ This is true for both @option{--traditional} and @option{--posix}.
@xref{Options}.
If the source code is not found after the initial search, the path is searched
-again after adding the default @samp{.awk} suffix to the filename.
+again after adding the default @samp{.awk} suffix to the @value{FN}.
@quotation NOTE
+@c 4/2014:
+@c using @samp{.} to get quotes, since @file{} no longer supplies them.
To include
the current directory in the path, either place
-@file{.} explicitly in the path or write a null entry in the
+@samp{.} explicitly in the path or write a null entry in the
path. (A null entry is indicated by starting or ending the path with a
-colon or by placing two colons next to each other (@samp{::}).)
+colon or by placing two colons next to each other [@samp{::}].)
This path search mechanism is similar
to the shell's.
-@c someday, @cite{The Bourne Again Shell}....
+(See @uref{http://www.gnu.org/software/bash/manual/,
+@cite{The Bourne-Again SHell manual}.})
However, @command{gawk} always looks in the current directory @emph{before}
searching @env{AWKPATH}, so there is no real reason to include
@@ -3993,7 +4315,7 @@ the current directory in the search path.
If @env{AWKPATH} is not defined in the
environment, @command{gawk} places its default search path into
@code{ENVIRON["AWKPATH"]}. This makes it easy to determine
-the actual search path that @command{gawk} will use
+the actual search path that @command{gawk} used
from within an @command{awk} program.
While you can change @code{ENVIRON["AWKPATH"]} within your @command{awk}
@@ -4005,18 +4327,18 @@ found, and @command{gawk} no longer needs to use @env{AWKPATH}.
@node AWKLIBPATH Variable
@subsection The @env{AWKLIBPATH} Environment Variable
@cindex @env{AWKLIBPATH} environment variable
-@cindex directories, searching for shared libraries
-@cindex search paths, for shared libraries
+@cindex directories, searching for loadable extensions
+@cindex search paths, for loadable extensions
@cindex differences in @command{awk} and @command{gawk}, @code{AWKLIBPATH} environment variable
The @env{AWKLIBPATH} environment variable is similar to the @env{AWKPATH}
-variable, but it is used to search for shared libraries specified
-with the @option{-l} option rather than for source files. If the library
-is not found, the path is searched again after adding the appropriate
-shared library suffix for the platform. For example, on GNU/Linux systems,
-the suffix @samp{.so} is used.
-The search path specified is also used for libraries loaded via the
-@samp{@@load} keyword (@pxref{Loading Shared Libraries}).
+variable, but it is used to search for loadable extensions (stored as
+system shared libraries) specified with the @option{-l} option rather
+than for source files. If the extension is not found, the path is
+searched again after adding the appropriate shared library suffix for
+the platform. For example, on GNU/Linux systems, the suffix @samp{.so}
+is used. The search path specified is also used for extensions loaded
+via the @code{@@load} keyword (@pxref{Loading Shared Libraries}).
@node Other Environment Variables
@subsection Other Environment Variables
@@ -4027,12 +4349,12 @@ list are meant to be used by regular users.
@table @env
@item POSIXLY_CORRECT
-Causes @command{gawk} to switch POSIX compatibility
+Causes @command{gawk} to switch to POSIX compatibility
mode, disabling all traditional and GNU extensions.
@xref{Options}.
@item GAWK_SOCK_RETRIES
-Controls the number of time @command{gawk} will attempt to
+Controls the number of times @command{gawk} attempts to
retry a two-way TCP/IP (socket) connection before giving up.
@xref{TCP/IP Networking}.
@@ -4053,9 +4375,18 @@ for use by the @command{gawk} developers for testing and tuning.
They are subject to change. The variables are:
@table @env
+@item AWKBUFSIZE
+This variable only affects @command{gawk} on POSIX-compliant systems.
+With a value of @samp{exact}, @command{gawk} uses the size of each input
+file as the size of the memory buffer to allocate for I/O. Otherwise,
+the value should be a number, and @command{gawk} uses that number as
+the size of the buffer to allocate. (When this variable is not set,
+@command{gawk} uses the smaller of the file's size and the ``default''
+blocksize, which is usually the filesystems I/O blocksize.)
+
@item AWK_HASH
If this variable exists with a value of @samp{gst}, @command{gawk}
-will switch to using the hash function from GNU Smalltalk for
+switches to using the hash function from GNU Smalltalk for
managing arrays.
This function may be marginally faster than the standard function.
@@ -4124,13 +4455,16 @@ to @code{EXIT_FAILURE}.
This @value{SECTION} describes a feature that is specific to @command{gawk}.
-The @samp{@@include} keyword can be used to read external @command{awk} source
+@cindex @code{@@include} directive
+@cindex file inclusion, @code{@@include} directive
+@cindex including files, @code{@@include} directive
+The @code{@@include} keyword can be used to read external @command{awk} source
files. This gives you the ability to split large @command{awk} source files
into smaller, more manageable pieces, and also lets you reuse common @command{awk}
code from various @command{awk} scripts. In other words, you can group
together @command{awk} functions, used to carry out specific tasks,
into external files. These files can be used just like function libraries,
-using the @samp{@@include} keyword in conjunction with the @env{AWKPATH}
+using the @code{@@include} keyword in conjunction with the @env{AWKPATH}
environment variable. Note that source files may also be included
using the @option{-i} option.
@@ -4164,14 +4498,14 @@ $ @kbd{gawk -f test2}
@end example
@code{gawk} runs the @file{test2} script which includes @file{test1}
-using the @samp{@@include}
+using the @code{@@include}
keyword. So, to include external @command{awk} source files you just
-use @samp{@@include} followed by the name of the file to be included,
+use @code{@@include} followed by the name of the file to be included,
enclosed in double quotes.
@quotation NOTE
-Keep in mind that this is a language construct and the file name cannot
-be a string variable, but rather just a literal string in double quotes.
+Keep in mind that this is a language construct and the @value{FN} cannot
+be a string variable, but rather just a literal string constant in double quotes.
@end quotation
The files to be included may be nested; e.g., given a third
@@ -4195,7 +4529,7 @@ $ @kbd{gawk -f test3}
@print{} This is file test3.
@end example
-The file name can, of course, be a pathname. For example:
+The @value{FN} can, of course, be a pathname. For example:
@example
@@include "../io_funcs"
@@ -4210,49 +4544,53 @@ or:
@noindent
are valid. The @code{AWKPATH} environment variable can be of great
-value when using @samp{@@include}. The same rules for the use
+value when using @code{@@include}. The same rules for the use
of the @code{AWKPATH} variable in command-line file searches
(@pxref{AWKPATH Variable}) apply to
-@samp{@@include} also.
+@code{@@include} also.
This is very helpful in constructing @command{gawk} function libraries.
If you have a large script with useful, general purpose @command{awk}
functions, you can break it down into library files and put those files
in a special directory. You can then include those ``libraries,'' using
either the full pathnames of the files, or by setting the @code{AWKPATH}
-environment variable accordingly and then using @samp{@@include} with
+environment variable accordingly and then using @code{@@include} with
just the file part of the full pathname. Of course you can have more
than one directory to keep library files; the more complex the working
environment is, the more directories you may need to organize the files
to be included.
Given the ability to specify multiple @option{-f} options, the
-@samp{@@include} mechanism is not strictly necessary.
-However, the @samp{@@include} keyword
+@code{@@include} mechanism is not strictly necessary.
+However, the @code{@@include} keyword
can help you in constructing self-contained @command{gawk} programs,
thus reducing the need for writing complex and tedious command lines.
-In particular, @samp{@@include} is very useful for writing CGI scripts
+In particular, @code{@@include} is very useful for writing CGI scripts
to be run from web pages.
As mentioned in @ref{AWKPATH Variable}, the current directory is always
searched first for source files, before searching in @env{AWKPATH},
-and this also applies to files named with @samp{@@include}.
+and this also applies to files named with @code{@@include}.
@node Loading Shared Libraries
-@section Loading Shared Libraries Into Your Program
+@section Loading Dynamic Extensions Into Your Program
This @value{SECTION} describes a feature that is specific to @command{gawk}.
-The @samp{@@load} keyword can be used to read external @command{awk} shared
-libraries. This allows you to link in compiled code that may offer superior
+@cindex @code{@@load} directive
+@cindex loading extensions, @code{@@load} directive
+@cindex extensions, loading, @code{@@load} directive
+The @code{@@load} keyword can be used to read external @command{awk} extensions
+(stored as system shared libraries).
+This allows you to link in compiled code that may offer superior
performance and/or give you access to extended capabilities not supported
by the @command{awk} language. The @env{AWKLIBPATH} variable is used to
-search for the shared library. Using @samp{@@load} is completely equivalent
+search for the extension. Using @code{@@load} is completely equivalent
to using the @option{-l} command-line option.
-If the shared library is not initially found in @env{AWKLIBPATH}, another
+If the extension is not initially found in @env{AWKLIBPATH}, another
search is conducted after appending the platform's default shared library
-suffix to the filename. For example, on GNU/Linux systems, the suffix
+suffix to the @value{FN}. For example, on GNU/Linux systems, the suffix
@samp{.so} is used.
@example
@@ -4270,16 +4608,17 @@ $ @kbd{gawk -lordchr 'BEGIN @{print chr(65)@}'}
@noindent
For command-line usage, the @option{-l} option is more convenient,
-but @samp{@@load} is useful for embedding inside an @command{awk} source file
-that requires access to a shared library.
+but @code{@@load} is useful for embedding inside an @command{awk} source file
+that requires access to an extension.
@ref{Dynamic Extensions}, describes how to write extensions (in C or C++)
-that can be loaded with either @samp{@@load} or the @option{-l} option.
+that can be loaded with either @code{@@load} or the @option{-l} option.
@node Obsolete
@section Obsolete Options and/or Features
-@cindex features, advanced, See advanced features
+@c update this section for each release!
+
@cindex options, deprecated
@cindex features, deprecated
@cindex obsolete features
@@ -4288,11 +4627,9 @@ previous releases of @command{gawk} that are either not available in the
current version or that are still supported but deprecated (meaning that
they will @emph{not} be in the next release).
-@c update this section for each release!
-
The process-related special files @file{/dev/pid}, @file{/dev/ppid},
@file{/dev/pgrpid}, and @file{/dev/user} were deprecated in @command{gawk}
-3.1, but still worked. As of version 4.0, they are no longer
+3.1, but still worked. As of @value{PVERSION} 4.0, they are no longer
interpreted specially by @command{gawk}. (Use @code{PROCINFO} instead;
see @ref{Auto-set}.)
@@ -4372,6 +4709,58 @@ long-undocumented ``feature'' of Unix @code{awk}.
@end ignore
+@node Invoking Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+Use either
+@samp{awk '@var{program}' @var{files}}
+or
+@samp{awk -f @var{program-file} @var{files}}
+to run @command{awk}.
+
+@item
+The three standard options for all versions of @command{awk} are
+@option{-f}, @option{-F} and @option{-v}. @command{gawk} supplies these
+and many others, as well as corresponding GNU-style long options.
+
+@item
+Non-option command-line arguments are usually treated as @value{FN}s,
+unless they have the form @samp{@var{var}=@var{value}}, in which case
+they are taken as variable assignments to be performed at that point
+in processing the input.
+
+@item
+All non-option command-line arguments, excluding the program text,
+are placed in the @code{ARGV} array. Adjusting @code{ARGC} and @code{ARGV}
+affects how @command{awk} processes input.
+
+@item
+You can use a single minus sign (@samp{-}) to refer to standard input
+on the command line.
+
+@item
+@command{gawk} pays attention to a number of environment variables.
+@env{AWKPATH}, @env{AWKLIBPATH}, and @env{POSIXLY_CORRECT} are the
+most important ones.
+
+@item
+@command{gawk}'s exit status conveys information to the program
+that invoked it. Use the @code{exit} statement from within
+an @command{awk} program to set the exit status.
+
+@item
+@command{gawk} allows you to include other @command{awk} source files into
+your program using the @code{@@include} statement and/or the @option{-i}
+and @option{-f} command-line options.
+
+@item
+@command{gawk} allows you to load additional functions written in C
+or C++ using the @code{@@load} statement and/or the @option{-l} option.
+(This advanced feature is described later on in @ref{Dynamic Extensions}.)
+@end itemize
+
@node Regexp
@chapter Regular Expressions
@cindex regexp
@@ -4392,7 +4781,7 @@ The simplest regular expression is a sequence of letters, numbers, or
both. Such a regexp matches any string that contains that sequence.
Thus, the regexp @samp{foo} matches any string containing @samp{foo}.
Therefore, the pattern @code{/foo/} matches any input record containing
-the three characters @samp{foo} @emph{anywhere} in the record. Other
+the three adjacent characters @samp{foo} @emph{anywhere} in the record. Other
kinds of regexps let you specify more complicated classes of strings.
@ifnotinfo
@@ -4406,10 +4795,11 @@ regular expressions work, we present more complicated instances.
* Escape Sequences:: How to write nonprinting characters.
* Regexp Operators:: Regular Expression Operators.
* Bracket Expressions:: What can go between @samp{[...]}.
-* GNU Regexp Operators:: Operators specific to GNU software.
-* Case-sensitivity:: How to do case-insensitive matching.
* Leftmost Longest:: How much text matches.
* Computed Regexps:: Using Dynamic Regexps.
+* GNU Regexp Operators:: Operators specific to GNU software.
+* Case-sensitivity:: How to do case-insensitive matching.
+* Regexp Summary:: Regular expressions summary.
@end menu
@node Regexp Usage
@@ -4420,8 +4810,8 @@ A regular expression can be used as a pattern by enclosing it in
slashes. Then the regular expression is tested against the
entire text of each record. (Normally, it only needs
to match some part of the text in order to succeed.) For example, the
-following prints the second field of each record that contains the string
-@samp{li} anywhere in it:
+following prints the second field of each record where the string
+@samp{li} appears anywhere in the record:
@example
$ @kbd{awk '/li/ @{ print $2 @}' mail-list}
@@ -4551,7 +4941,7 @@ A literal backslash, @samp{\}.
@cindex backslash (@code{\}), @code{\a} escape sequence
@item \a
The ``alert'' character, @kbd{Ctrl-g}, ASCII code 7 (BEL).
-(This usually makes some sort of audible noise.)
+(This often makes some sort of audible noise.)
@cindex @code{\} (backslash), @code{\b} escape sequence
@cindex backslash (@code{\}), @code{\b} escape sequence
@@ -4600,20 +4990,30 @@ between @samp{0} and @samp{7}. For example, the code for the ASCII ESC
@item \x@var{hh}@dots{}
The hexadecimal value @var{hh}, where @var{hh} stands for a sequence
of hexadecimal digits (@samp{0}--@samp{9}, and either @samp{A}--@samp{F}
-or @samp{a}--@samp{f}). Like the same construct
-in ISO C, the escape sequence continues until the first nonhexadecimal
-digit is seen. @value{COMMONEXT}
+or @samp{a}--@samp{f}). A maximum of two digts are allowed after
+the @samp{\x}. Any further hexadecimal digits are treated as simple
+letters or numbers. @value{COMMONEXT}
+
+@quotation CAUTION
+In ISO C, the escape sequence continues until the first nonhexadecimal
+digit is seen.
+@c FIXME: Add exact version here.
+For many years, @command{gawk} would continue incorporating
+hexadecimal digits into the value until a non-hexadecimal digit
+or the end of the string was encountered.
However, using more than two hexadecimal digits produces
-undefined results. (The @samp{\x} escape sequence is not allowed in
-POSIX @command{awk}.)
+@end quotation
@cindex @code{\} (backslash), @code{\/} escape sequence
@cindex backslash (@code{\}), @code{\/} escape sequence
@item \/
A literal slash (necessary for regexp constants only).
This sequence is used when you want to write a regexp
-constant that contains a slash. Because the regexp is delimited by
-slashes, you need to escape the slash that is part of the pattern,
+constant that contains a slash
+(such as @code{/.*:\/home\/[[:alnum:]]+:.*/}; the @samp{[[:alnum:]]}
+notation is discussed shortly, in @ref{Bracket Expressions}).
+Because the regexp is delimited by
+slashes, you need to escape any slash that is part of the pattern,
in order to tell @command{awk} to keep processing the rest of the regexp.
@cindex @code{\} (backslash), @code{\"} escape sequence
@@ -4621,8 +5021,10 @@ in order to tell @command{awk} to keep processing the rest of the regexp.
@item \"
A literal double quote (necessary for string constants only).
This sequence is used when you want to write a string
-constant that contains a double quote. Because the string is delimited by
-double quotes, you need to escape the quote that is part of the string,
+constant that contains a double quote
+(such as @code{"He said \"hi!\" to her."}).
+Because the string is delimited by
+double quotes, you need to escape any quote that is part of the string,
in order to tell @command{awk} to keep processing the rest of the string.
@end table
@@ -4645,7 +5047,7 @@ shown in the previous list.
To summarize:
-@itemize @bullet
+@itemize @value{BULLET}
@item
The escape sequences in the table above are always processed first,
for both string constants and regexp constants. This happens very early,
@@ -4683,7 +5085,7 @@ leaves what happens as undefined. There are two choices:
@cindex Brian Kernighan's @command{awk}
@table @asis
@item Strip the backslash out
-This is what Brian Kernighan's @command{awk} and @command{gawk} both do.
+This is what BWK @command{awk} and @command{gawk} both do.
For example, @code{"a\qc"} is the same as @code{"aqc"}.
(Because this is such an easy bug both to introduce and to miss,
@command{gawk} warns you about it.)
@@ -4726,7 +5128,7 @@ leaves what happens as undefined. There are two choices:
@cindex Brian Kernighan's @command{awk}
@table @asis
@item Strip the backslash out
-This is what Brian Kernighan's @command{awk} and @command{gawk} both do.
+This is what BWK @command{awk} and @command{gawk} both do.
For example, @code{"a\qc"} is the same as @code{"aqc"}.
(Because this is such an easy bug both to introduce and to miss,
@command{gawk} warns you about it.)
@@ -4814,7 +5216,7 @@ The escape sequences described
@ifnotinfo
earlier
@end ifnotinfo
-in @ref{Escape Sequences},
+in @DBREF{Escape Sequences}
are valid inside a regexp. They are introduced by a @samp{\} and
are recognized and converted into corresponding real characters as
the very first step in processing regexps.
@@ -4822,10 +5224,11 @@ the very first step in processing regexps.
Here is a list of metacharacters. All characters that are not escape
sequences and that are not listed in the table stand for themselves:
-@table @code
+@c Use @asis so the docbook comes out ok. Sigh.
+@table @asis
@cindex backslash (@code{\}), regexp operator
@cindex @code{\} (backslash), regexp operator
-@item \
+@item @code{\}
This is used to suppress the special meaning of a character when
matching. For example, @samp{\$}
matches the character @samp{$}.
@@ -4834,7 +5237,7 @@ matches the character @samp{$}.
@cindex Texinfo, chapter beginnings in files
@cindex @code{^} (caret), regexp operator
@cindex caret (@code{^}), regexp operator
-@item ^
+@item @code{^}
This matches the beginning of a string. For example, @samp{^@@chapter}
matches @samp{@@chapter} at the beginning of a string and can be used
to identify chapter beginnings in Texinfo source files.
@@ -4842,7 +5245,7 @@ The @samp{^} is known as an @dfn{anchor}, because it anchors the pattern to
match only at the beginning of the string.
It is important to realize that @samp{^} does not match the beginning of
-a line embedded in a string.
+a line (the point right after a @samp{\n} newline character) embedded in a string.
The condition is not true in the following example:
@example
@@ -4851,11 +5254,13 @@ if ("line1\nLINE 2" ~ /^L/) @dots{}
@cindex @code{$} (dollar sign), regexp operator
@cindex dollar sign (@code{$}), regexp operator
-@item $
+@item @code{$}
This is similar to @samp{^}, but it matches only at the end of a string.
For example, @samp{p$}
matches a record that ends with a @samp{p}. The @samp{$} is an anchor
-and does not match the end of a line embedded in a string.
+and does not match the end of a line
+(the point right before a @samp{\n} newline character)
+embedded in a string.
The condition in the following example is not true:
@example
@@ -4864,7 +5269,7 @@ if ("line1\nLINE 2" ~ /1$/) @dots{}
@cindex @code{.} (period), regexp operator
@cindex period (@code{.}), regexp operator
-@item . @r{(period)}
+@item @code{.} (period)
This matches any single character,
@emph{including} the newline character. For example, @samp{.P}
matches any single character followed by a @samp{P} in a string. Using
@@ -4874,10 +5279,10 @@ with @samp{A}.
@cindex POSIX @command{awk}, period (@code{.})@comma{} using
In strict POSIX mode (@pxref{Options}),
-@samp{.} does not match the @sc{nul}
+@samp{.} does not match the @value{NUL}
character, which is a character with all bits equal to zero.
-Otherwise, @sc{nul} is just another character. Other versions of @command{awk}
-may not be able to match the @sc{nul} character.
+Otherwise, @value{NUL} is just another character. Other versions of @command{awk}
+may not be able to match the @value{NUL} character.
@cindex @code{[]} (square brackets), regexp operator
@cindex square brackets (@code{[]}), regexp operator
@@ -4885,7 +5290,7 @@ may not be able to match the @sc{nul} character.
@cindex character sets, See Also bracket expressions
@cindex character lists, See bracket expressions
@cindex character classes, See bracket expressions
-@item [@dots{}]
+@item @code{[}@dots{}@code{]}
This is called a @dfn{bracket expression}.@footnote{In other literature,
you may see a bracket expression referred to as either a
@dfn{character set}, a @dfn{character class}, or a @dfn{character list}.}
@@ -4897,7 +5302,7 @@ is given in
@ref{Bracket Expressions}.
@cindex bracket expressions, complemented
-@item [^ @dots{}]
+@item @code{[^}@dots{}@code{]}
This is a @dfn{complemented bracket expression}. The first character after
the @samp{[} @emph{must} be a @samp{^}. It matches any characters
@emph{except} those in the square brackets. For example, @samp{[^awk]}
@@ -4906,20 +5311,19 @@ or @samp{k}.
@cindex @code{|} (vertical bar)
@cindex vertical bar (@code{|})
-@item |
+@item @code{|}
This is the @dfn{alternation operator} and it is used to specify
-alternatives.
-The @samp{|} has the lowest precedence of all the regular
-expression operators.
-For example, @samp{^P|[[:digit:]]}
-matches any string that matches either @samp{^P} or @samp{[[:digit:]]}. This
-means it matches any string that starts with @samp{P} or contains a digit.
+alternatives. The @samp{|} has the lowest precedence of all the regular
+expression operators. For example, @samp{^P|[aeiouy]} matches any string
+that matches either @samp{^P} or @samp{[aeiouy]}. This means it matches
+any string that starts with @samp{P} or contains (anywhere within it)
+a lowercase English vowel.
The alternation applies to the largest possible regexps on either side.
@cindex @code{()} (parentheses), regexp operator
@cindex parentheses @code{()}, regexp operator
-@item (@dots{})
+@item @code{(}@dots{}@code{)}
Parentheses are used for grouping in regular expressions, as in
arithmetic. They can be used to concatenate regular expressions
containing the alternation operator, @samp{|}. For example,
@@ -4930,47 +5334,42 @@ explained further on in this list.)
@cindex @code{*} (asterisk), @code{*} operator, as regexp operator
@cindex asterisk (@code{*}), @code{*} operator, as regexp operator
-@item *
+@item @code{*}
This symbol means that the preceding regular expression should be
repeated as many times as necessary to find a match. For example, @samp{ph*}
applies the @samp{*} symbol to the preceding @samp{h} and looks for matches
of one @samp{p} followed by any number of @samp{h}s. This also matches
just @samp{p} if no @samp{h}s are present.
-The @samp{*} repeats the @emph{smallest} possible preceding expression.
-(Use parentheses if you want to repeat a larger expression.) It finds
-as many repetitions as possible. For example,
-@samp{awk '/\(c[ad][ad]*r x\)/ @{ print @}' sample}
-prints every record in @file{sample} containing a string of the form
-@samp{(car x)}, @samp{(cdr x)}, @samp{(cadr x)}, and so on.
-Notice the escaping of the parentheses by preceding them
-with backslashes.
+There are two subtle points to understand about how @samp{*} works.
+First, the @samp{*} applies only to the single preceding regular expression
+component (e.g., in @samp{ph*}, it applies just to the @samp{h}).
+To cause @samp{*} to apply to a larger sub-expression, use parentheses:
+@samp{(ph)*} matches @samp{ph}, @samp{phph}, @samp{phphph} and so on.
+
+Second, @samp{*} finds as many repetititons as possible. If the text
+to be matched is @samp{phhhhhhhhhhhhhhooey}, @samp{ph*} matches all of
+the @samp{h}s.
@cindex @code{+} (plus sign), regexp operator
@cindex plus sign (@code{+}), regexp operator
-@item +
+@item @code{+}
This symbol is similar to @samp{*}, except that the preceding expression must be
matched at least once. This means that @samp{wh+y}
would match @samp{why} and @samp{whhy}, but not @samp{wy}, whereas
-@samp{wh*y} would match all three of these strings.
-The following is a simpler
-way of writing the last @samp{*} example:
-
-@example
-awk '/\(c[ad]+r x\)/ @{ print @}' sample
-@end example
+@samp{wh*y} would match all three.
@cindex @code{?} (question mark), regexp operator
@cindex question mark (@code{?}), regexp operator
-@item ?
+@item @code{?}
This symbol is similar to @samp{*}, except that the preceding expression can be
matched either once or not at all. For example, @samp{fe?d}
matches @samp{fed} and @samp{fd}, but nothing else.
@cindex interval expressions, regexp operator
-@item @{@var{n}@}
-@itemx @{@var{n},@}
-@itemx @{@var{n},@var{m}@}
+@item @code{@{}@var{n}@code{@}}
+@itemx @code{@{}@var{n}@code{,@}}
+@itemx @code{@{}@var{n}@code{,}@var{m}@code{@}}
One or two numbers inside braces denote an @dfn{interval expression}.
If there is one number in the braces, the preceding regexp is repeated
@var{n} times.
@@ -5001,7 +5400,7 @@ constants,
@command{gawk} did @emph{not} match interval expressions
in regexps.
-However, beginning with version 4.0,
+However, beginning with @value{PVERSION} 4.0,
@command{gawk} does match interval expressions by default.
This is because compatibility with POSIX has become more
important to most @command{gawk} users than compatibility with
@@ -5053,7 +5452,7 @@ Within a bracket expression, a @dfn{range expression} consists of two
characters separated by a hyphen. It matches any single character that
sorts between the two characters, based upon the system's native character
set. For example, @samp{[0-9]} is equivalent to @samp{[0123456789]}.
-(See @ref{Ranges and Locales}, for an explanation of how the POSIX
+(See @DBREF{Ranges and Locales} for an explanation of how the POSIX
standard and @command{gawk} have changed over time. This is mainly
of historical interest.)
@@ -5072,6 +5471,9 @@ bracket expression, put a @samp{\} in front of it. For example:
@noindent
matches either @samp{d} or @samp{]}.
+Additionally, if you place @samp{]} right after the opening
+@samp{[}, the closing bracket is treated as one of the
+characters to be matched.
@cindex POSIX @command{awk}, bracket expressions and
@cindex Extended Regular Expressions (EREs)
@@ -5129,6 +5531,17 @@ With the POSIX character classes, you can write
@code{/[[:alnum:]]/} to match the alphabetic
and numeric characters in your character set.
+@c Thanks to
+@c Date: Tue, 01 Jul 2014 07:39:51 +0200
+@c From: Hermann Peifer <peifer@gmx.eu>
+Some utilities that match regular expressions provide a non-standard
+@code{[:ascii:]} character class; @command{awk} does not. However, you
+can simulate such a construct using @code{[\x00-\x7F]}. This matches
+all values numerically between zero and 127, which is the defined
+range of the ASCII character set. Use a complemented character list
+(@code{[^\x00-\x7F]}) to match any single-byte characters that are not
+in the ASCII range.
+
@cindex bracket expressions, collating elements
@cindex bracket expressions, non-ASCII
@cindex collating elements
@@ -5172,6 +5585,204 @@ they do not recognize collating symbols or equivalence classes.
@c maybe one day ...
@c ENDOFRANGE charlist
+@node Leftmost Longest
+@section How Much Text Matches?
+
+@cindex regular expressions, leftmost longest match
+@c @cindex matching, leftmost longest
+Consider the following:
+
+@example
+echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'
+@end example
+
+This example uses the @code{sub()} function (which we haven't discussed yet;
+@pxref{String Functions})
+to make a change to the input record. Here, the regexp @code{/a+/}
+indicates ``one or more @samp{a} characters,'' and the replacement
+text is @samp{<A>}.
+
+The input contains four @samp{a} characters.
+@command{awk} (and POSIX) regular expressions always match
+the leftmost, @emph{longest} sequence of input characters that can
+match. Thus, all four @samp{a} characters are
+replaced with @samp{<A>} in this example:
+
+@example
+$ @kbd{echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'}
+@print{} <A>bcd
+@end example
+
+For simple match/no-match tests, this is not so important. But when doing
+text matching and substitutions with the @code{match()}, @code{sub()}, @code{gsub()},
+and @code{gensub()} functions, it is very important.
+@ifinfo
+@xref{String Functions},
+for more information on these functions.
+@end ifinfo
+Understanding this principle is also important for regexp-based record
+and field splitting (@pxref{Records},
+and also @pxref{Field Separators}).
+
+@node Computed Regexps
+@section Using Dynamic Regexps
+
+@c STARTOFRANGE dregexp
+@cindex regular expressions, computed
+@c STARTOFRANGE regexpd
+@cindex regular expressions, dynamic
+@cindex @code{~} (tilde), @code{~} operator
+@cindex tilde (@code{~}), @code{~} operator
+@cindex @code{!} (exclamation point), @code{!~} operator
+@cindex exclamation point (@code{!}), @code{!~} operator
+@c @cindex operators, @code{~}
+@c @cindex operators, @code{!~}
+The righthand side of a @samp{~} or @samp{!~} operator need not be a
+regexp constant (i.e., a string of characters between slashes). It may
+be any expression. The expression is evaluated and converted to a string
+if necessary; the contents of the string are then used as the
+regexp. A regexp computed in this way is called a @dfn{dynamic
+regexp} or a @dfn{computed regexp}:
+
+@example
+BEGIN @{ digits_regexp = "[[:digit:]]+" @}
+$0 ~ digits_regexp @{ print @}
+@end example
+
+@noindent
+This sets @code{digits_regexp} to a regexp that describes one or more digits,
+and tests whether the input record matches this regexp.
+
+@quotation NOTE
+When using the @samp{~} and @samp{!~}
+operators, there is a difference between a regexp constant
+enclosed in slashes and a string constant enclosed in double quotes.
+If you are going to use a string constant, you have to understand that
+the string is, in essence, scanned @emph{twice}: the first time when
+@command{awk} reads your program, and the second time when it goes to
+match the string on the lefthand side of the operator with the pattern
+on the right. This is true of any string-valued expression (such as
+@code{digits_regexp}, shown previously), not just string constants.
+@end quotation
+
+@cindex regexp constants, slashes vs.@: quotes
+@cindex @code{\} (backslash), in regexp constants
+@cindex backslash (@code{\}), in regexp constants
+@cindex @code{"} (double quote), in regexp constants
+@cindex double quote (@code{"}), in regexp constants
+What difference does it make if the string is
+scanned twice? The answer has to do with escape sequences, and particularly
+with backslashes. To get a backslash into a regular expression inside a
+string, you have to type two backslashes.
+
+For example, @code{/\*/} is a regexp constant for a literal @samp{*}.
+Only one backslash is needed. To do the same thing with a string,
+you have to type @code{"\\*"}. The first backslash escapes the
+second one so that the string actually contains the
+two characters @samp{\} and @samp{*}.
+
+@cindex troubleshooting, regexp constants vs.@: string constants
+@cindex regexp constants, vs.@: string constants
+@cindex string constants, vs.@: regexp constants
+Given that you can use both regexp and string constants to describe
+regular expressions, which should you use? The answer is ``regexp
+constants,'' for several reasons:
+
+@itemize @value{BULLET}
+@item
+String constants are more complicated to write and
+more difficult to read. Using regexp constants makes your programs
+less error-prone. Not understanding the difference between the two
+kinds of constants is a common source of errors.
+
+@item
+It is more efficient to use regexp constants. @command{awk} can note
+that you have supplied a regexp and store it internally in a form that
+makes pattern matching more efficient. When using a string constant,
+@command{awk} must first convert the string into this internal form and
+then perform the pattern matching.
+
+@item
+Using regexp constants is better form; it shows clearly that you
+intend a regexp match.
+@end itemize
+
+@cindex sidebar, Using @code{\n} in Bracket Expressions of Dynamic Regexps
+@ifdocbook
+@docbook
+<sidebar><title>Using @code{\n} in Bracket Expressions of Dynamic Regexps</title>
+@end docbook
+
+@cindex regular expressions, dynamic, with embedded newlines
+@cindex newlines, in dynamic regexps
+
+Some versions of @command{awk} do not allow the newline
+character to be used inside a bracket expression for a dynamic regexp:
+
+@example
+$ @kbd{awk '$0 ~ "[ \t\n]"'}
+@error{} awk: newline in character class [
+@error{} ]...
+@error{} source line number 1
+@error{} context is
+@error{} >>> <<<
+@end example
+
+@cindex newlines, in regexp constants
+But a newline in a regexp constant works with no problem:
+
+@example
+$ @kbd{awk '$0 ~ /[ \t\n]/'}
+@kbd{here is a sample line}
+@print{} here is a sample line
+@kbd{Ctrl-d}
+@end example
+
+@command{gawk} does not have this problem, and it isn't likely to
+occur often in practice, but it's worth noting for future reference.
+
+@docbook
+</sidebar>
+@end docbook
+@end ifdocbook
+
+@ifnotdocbook
+@cartouche
+@center @b{Using @code{\n} in Bracket Expressions of Dynamic Regexps}
+
+
+@cindex regular expressions, dynamic, with embedded newlines
+@cindex newlines, in dynamic regexps
+
+Some versions of @command{awk} do not allow the newline
+character to be used inside a bracket expression for a dynamic regexp:
+
+@example
+$ @kbd{awk '$0 ~ "[ \t\n]"'}
+@error{} awk: newline in character class [
+@error{} ]...
+@error{} source line number 1
+@error{} context is
+@error{} >>> <<<
+@end example
+
+@cindex newlines, in regexp constants
+But a newline in a regexp constant works with no problem:
+
+@example
+$ @kbd{awk '$0 ~ /[ \t\n]/'}
+@kbd{here is a sample line}
+@print{} here is a sample line
+@kbd{Ctrl-d}
+@end example
+
+@command{gawk} does not have this problem, and it isn't likely to
+occur often in practice, but it's worth noting for future reference.
+@end cartouche
+@end ifnotdocbook
+@c ENDOFRANGE dregexp
+@c ENDOFRANGE regexpd
+
@node GNU Regexp Operators
@section @command{gawk}-Specific Regexp Operators
@@ -5304,9 +5915,6 @@ GNU operators, but this was deemed too confusing. The current
method of using @samp{\y} for the GNU @samp{\b} appears to be the
lesser of two evils.
-@c NOTE!!! Keep this in sync with the same table in the summary appendix!
-@c
-@c Should really do this with file inclusion.
@cindex regular expressions, @command{gawk}, command-line options
@cindex @command{gawk}, command-line options, and regular expressions
The various command-line options
@@ -5322,8 +5930,10 @@ previously described
GNU regexp operators.
@end ifnotinfo
@ifnottex
+@ifnotdocbook
GNU regexp operators described
in @ref{Regexp Operators}.
+@end ifnotdocbook
@end ifnottex
@item @code{--posix}
@@ -5336,7 +5946,7 @@ are allowed.
Traditional Unix @command{awk} regexps are matched. The GNU operators
are not special, and interval expressions are not available.
The POSIX character classes (@samp{[[:alnum:]]}, etc.) are supported,
-as Brian Kernighan's @command{awk} does support them.
+as BWK @command{awk} does support them.
Characters described by octal and hexadecimal escape sequences are
treated literally, even if they represent regexp metacharacters.
@@ -5393,10 +6003,12 @@ This works in any POSIX-compliant @command{awk}.
Another method, specific to @command{gawk}, is to set the variable
@code{IGNORECASE} to a nonzero value (@pxref{Built-in Variables}).
When @code{IGNORECASE} is not zero, @emph{all} regexp and string
-operations ignore case. Changing the value of
-@code{IGNORECASE} dynamically controls the case-sensitivity of the
-program as it runs. Case is significant by default because
-@code{IGNORECASE} (like most variables) is initialized to zero:
+operations ignore case.
+
+Changing the value of @code{IGNORECASE} dynamically controls the
+case-sensitivity of the program as it runs. Case is significant by
+default because @code{IGNORECASE} (like most variables) is initialized
+to zero:
@example
x = "aB"
@@ -5426,9 +6038,6 @@ case-sensitivity on or off for all the rules at once.
Setting @code{IGNORECASE} from the command line is a way to make
a program case-insensitive without having to edit it.
-Both regexp and string comparison
-operations are affected by @code{IGNORECASE}.
-
@c @cindex ISO 8859-1
@c @cindex ISO Latin-1
In multibyte locales,
@@ -5449,203 +6058,51 @@ Case is always significant in compatibility mode.
@c ENDOFRANGE csregexp
@c ENDOFRANGE regexpcs
-@node Leftmost Longest
-@section How Much Text Matches?
-
-@cindex regular expressions, leftmost longest match
-@c @cindex matching, leftmost longest
-Consider the following:
+@node Regexp Summary
+@section Summary
-@example
-echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'
-@end example
-
-This example uses the @code{sub()} function (which we haven't discussed yet;
-@pxref{String Functions})
-to make a change to the input record. Here, the regexp @code{/a+/}
-indicates ``one or more @samp{a} characters,'' and the replacement
-text is @samp{<A>}.
-
-The input contains four @samp{a} characters.
-@command{awk} (and POSIX) regular expressions always match
-the leftmost, @emph{longest} sequence of input characters that can
-match. Thus, all four @samp{a} characters are
-replaced with @samp{<A>} in this example:
-
-@example
-$ @kbd{echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'}
-@print{} <A>bcd
-@end example
-
-For simple match/no-match tests, this is not so important. But when doing
-text matching and substitutions with the @code{match()}, @code{sub()}, @code{gsub()},
-and @code{gensub()} functions, it is very important.
-@ifinfo
-@xref{String Functions},
-for more information on these functions.
-@end ifinfo
-Understanding this principle is also important for regexp-based record
-and field splitting (@pxref{Records},
-and also @pxref{Field Separators}).
-
-@node Computed Regexps
-@section Using Dynamic Regexps
-
-@c STARTOFRANGE dregexp
-@cindex regular expressions, computed
-@c STARTOFRANGE regexpd
-@cindex regular expressions, dynamic
-@cindex @code{~} (tilde), @code{~} operator
-@cindex tilde (@code{~}), @code{~} operator
-@cindex @code{!} (exclamation point), @code{!~} operator
-@cindex exclamation point (@code{!}), @code{!~} operator
-@c @cindex operators, @code{~}
-@c @cindex operators, @code{!~}
-The righthand side of a @samp{~} or @samp{!~} operator need not be a
-regexp constant (i.e., a string of characters between slashes). It may
-be any expression. The expression is evaluated and converted to a string
-if necessary; the contents of the string are then used as the
-regexp. A regexp computed in this way is called a @dfn{dynamic
-regexp}:
-
-@example
-BEGIN @{ digits_regexp = "[[:digit:]]+" @}
-$0 ~ digits_regexp @{ print @}
-@end example
-
-@noindent
-This sets @code{digits_regexp} to a regexp that describes one or more digits,
-and tests whether the input record matches this regexp.
-
-@quotation NOTE
-When using the @samp{~} and @samp{!~}
-operators, there is a difference between a regexp constant
-enclosed in slashes and a string constant enclosed in double quotes.
-If you are going to use a string constant, you have to understand that
-the string is, in essence, scanned @emph{twice}: the first time when
-@command{awk} reads your program, and the second time when it goes to
-match the string on the lefthand side of the operator with the pattern
-on the right. This is true of any string-valued expression (such as
-@code{digits_regexp}, shown previously), not just string constants.
-@end quotation
-
-@cindex regexp constants, slashes vs.@: quotes
-@cindex @code{\} (backslash), in regexp constants
-@cindex backslash (@code{\}), in regexp constants
-@cindex @code{"} (double quote), in regexp constants
-@cindex double quote (@code{"}), in regexp constants
-What difference does it make if the string is
-scanned twice? The answer has to do with escape sequences, and particularly
-with backslashes. To get a backslash into a regular expression inside a
-string, you have to type two backslashes.
-
-For example, @code{/\*/} is a regexp constant for a literal @samp{*}.
-Only one backslash is needed. To do the same thing with a string,
-you have to type @code{"\\*"}. The first backslash escapes the
-second one so that the string actually contains the
-two characters @samp{\} and @samp{*}.
-
-@cindex troubleshooting, regexp constants vs.@: string constants
-@cindex regexp constants, vs.@: string constants
-@cindex string constants, vs.@: regexp constants
-Given that you can use both regexp and string constants to describe
-regular expressions, which should you use? The answer is ``regexp
-constants,'' for several reasons:
-
-@itemize @bullet
+@itemize @value{BULLET}
@item
-String constants are more complicated to write and
-more difficult to read. Using regexp constants makes your programs
-less error-prone. Not understanding the difference between the two
-kinds of constants is a common source of errors.
+Regular expressions describe sets of strings to be matched.
+In @command{awk}, regular expression constants are written enclosed
+between slashes: @code{/}@dots{}@code{/}.
@item
-It is more efficient to use regexp constants. @command{awk} can note
-that you have supplied a regexp and store it internally in a form that
-makes pattern matching more efficient. When using a string constant,
-@command{awk} must first convert the string into this internal form and
-then perform the pattern matching.
+Regexp constants may be used standalone in patterns and
+in conditional expressions, or as part of matching expressions
+using the @samp{~} and @samp{!~} operators.
@item
-Using regexp constants is better form; it shows clearly that you
-intend a regexp match.
-@end itemize
-
-@cindex sidebar, Using @code{\n} in Bracket Expressions of Dynamic Regexps
-@ifdocbook
-@docbook
-<sidebar><title>Using @code{\n} in Bracket Expressions of Dynamic Regexps</title>
-@end docbook
-
-@cindex regular expressions, dynamic, with embedded newlines
-@cindex newlines, in dynamic regexps
-
-Some commercial versions of @command{awk} do not allow the newline
-character to be used inside a bracket expression for a dynamic regexp:
-
-@example
-$ @kbd{awk '$0 ~ "[ \t\n]"'}
-@error{} awk: newline in character class [
-@error{} ]...
-@error{} source line number 1
-@error{} context is
-@error{} >>> <<<
-@end example
-
-@cindex newlines, in regexp constants
-But a newline in a regexp constant works with no problem:
-
-@example
-$ @kbd{awk '$0 ~ /[ \t\n]/'}
-@kbd{here is a sample line}
-@print{} here is a sample line
-@kbd{Ctrl-d}
-@end example
-
-@command{gawk} does not have this problem, and it isn't likely to
-occur often in practice, but it's worth noting for future reference.
-
-@docbook
-</sidebar>
-@end docbook
-@end ifdocbook
-
-@ifnotdocbook
-@cartouche
-@center @b{Using @code{\n} in Bracket Expressions of Dynamic Regexps}
+Escape sequences let you represent non-printable characters and
+also let you represent regexp metacharacters as literal characters
+to be matched.
+@item
+Regexp operators provide grouping, alternation and repetition.
-@cindex regular expressions, dynamic, with embedded newlines
-@cindex newlines, in dynamic regexps
+@item
+Bracket expressions give you a shorthand for specifying sets
+of characters that can match at a particular point in a regexp.
+Within bracket expressions, POSIX character classes let you specify
+certain groups of characters in a locale-independent fashion.
-Some commercial versions of @command{awk} do not allow the newline
-character to be used inside a bracket expression for a dynamic regexp:
+@item
+@command{gawk}'s @code{IGNORECASE} variable lets you control the
+case sensitivity of regexp matching. In other @command{awk}
+versions, use @code{tolower()} or @code{toupper()}.
-@example
-$ @kbd{awk '$0 ~ "[ \t\n]"'}
-@error{} awk: newline in character class [
-@error{} ]...
-@error{} source line number 1
-@error{} context is
-@error{} >>> <<<
-@end example
+@item
+Regular expressions match the leftmost longest text in the string being
+matched. This matters for cases where you need to know the extent of
+the match, such as for text substitution and when the record separator
+is a regexp.
-@cindex newlines, in regexp constants
-But a newline in a regexp constant works with no problem:
+@item
+Matching expressions may use dynamic regexps, that is, string values
+treated as regular expressions.
-@example
-$ @kbd{awk '$0 ~ /[ \t\n]/'}
-@kbd{here is a sample line}
-@print{} here is a sample line
-@kbd{Ctrl-d}
-@end example
+@end itemize
-@command{gawk} does not have this problem, and it isn't likely to
-occur often in practice, but it's worth noting for future reference.
-@end cartouche
-@end ifnotdocbook
-@c ENDOFRANGE dregexp
-@c ENDOFRANGE regexpd
@c ENDOFRANGE regexp
@node Reading Files
@@ -5693,8 +6150,10 @@ used with it do not have to be named on the @command{awk} command line
* Getline:: Reading files under explicit program control
using the @code{getline} function.
* Read Timeout:: Reading input with a timeout.
-* Command line directories:: What happens if you put a directory on the
+* Command-line directories:: What happens if you put a directory on the
command line.
+* Input Summary:: Input summary.
+* Input Exercises:: Exercises.
@end menu
@node Records
@@ -5706,16 +6165,21 @@ used with it do not have to be named on the @command{awk} command line
@cindex records, splitting input into
@cindex @code{NR} variable
@cindex @code{FNR} variable
-The @command{awk} utility divides the input for your @command{awk}
-program into records and fields.
-@command{awk} keeps track of the number of records that have
-been read
-so far
-from the current input file. This value is stored in a
-built-in variable called @code{FNR}. It is reset to zero when a new
-file is started. Another built-in variable, @code{NR}, records the total
-number of input records read so far from all data files. It starts at zero,
-but is never automatically reset to zero.
+@command{awk} divides the input for your program into records and fields.
+It keeps track of the number of records that have been read so far from
+the current input file. This value is stored in a built-in variable
+called @code{FNR} which is reset to zero when a new file is started.
+Another built-in variable, @code{NR}, records the total number of input
+records read so far from all @value{DF}s. It starts at zero, but is
+never automatically reset to zero.
+
+@menu
+* awk split records:: How standard @command{awk} splits records.
+* gawk split records:: How @command{gawk} splits records.
+@end menu
+
+@node awk split records
+@subsection Record Splitting With Standard @command{awk}
@cindex separators, for records
@cindex record separators
@@ -5799,7 +6263,7 @@ $ @kbd{awk 'BEGIN @{ RS = "u" @}}
@noindent
Note that the entry for the name @samp{Bill} is not split.
-In the original data file
+In the original @value{DF}
(@pxref{Sample Data Files}),
the line looks like this:
@@ -5812,7 +6276,7 @@ It contains no @samp{u} so there is no reason to split the record,
unlike the others which have one or more occurrences of the @samp{u}.
In fact, this record is treated as part of the previous record;
the newline separating them in the output
-is the original newline in the data file, not the one added by
+is the original newline in the @value{DF}, not the one added by
@command{awk} when it printed the record!
@cindex record separators, changing
@@ -5880,6 +6344,9 @@ After the end of the record has been determined, @command{gawk}
sets the variable @code{RT} to the text in the input that matched
@code{RS}.
+@node gawk split records
+@subsection Record Splitting With @command{gawk}
+
@cindex common extensions, @code{RS} as a regexp
@cindex extensions, common@comma{} @code{RS} as a regexp
When using @command{gawk},
@@ -5911,17 +6378,17 @@ with optional leading and/or trailing whitespace:
@example
$ @kbd{echo record 1 AAAA record 2 BBBB record 3 |}
> @kbd{gawk 'BEGIN @{ RS = "\n|( *[[:upper:]]+ *)" @}}
-> @kbd{@{ print "Record =", $0, "and RT =", RT @}'}
-@print{} Record = record 1 and RT = AAAA
-@print{} Record = record 2 and RT = BBBB
-@print{} Record = record 3 and RT =
-@print{}
+> @kbd{@{ print "Record =", $0,"and RT = [" RT "]" @}'}
+@print{} Record = record 1 and RT = [ AAAA ]
+@print{} Record = record 2 and RT = [ BBBB ]
+@print{} Record = record 3 and RT = [
+@print{} ]
@end example
@noindent
-The final line of output has an extra blank line. This is because the
-value of @code{RT} is a newline, and the @code{print} statement
-supplies its own terminating newline.
+The square brackets delineate the contents of @code{RT}, letting you
+see the leading and trailing whitespace. The final value of @code{RT}
+@code{RT} is a newline.
@xref{Simple Sed}, for a more useful example
of @code{RS} as a regexp and @code{RT}.
@@ -5958,14 +6425,13 @@ In compatibility mode, only the first character of the value of
@end docbook
@cindex portability, data files as single record
-There are times when you might want to treat an entire data file as a
+There are times when you might want to treat an entire @value{DF} as a
single record. The only way to make this happen is to give @code{RS}
a value that you know doesn't occur in the input file. This is hard
to do in a general way, such that a program always works for arbitrary
input files.
-@c can you say `understatement' boys and girls?
-You might think that for text files, the @sc{nul} character, which
+You might think that for text files, the @value{NUL} character, which
consists of a character with all bits equal to zero, is a good
value to use for @code{RS} in this case:
@@ -5974,29 +6440,29 @@ BEGIN @{ RS = "\0" @} # whole file becomes one record?
@end example
@cindex differences in @command{awk} and @command{gawk}, strings, storing
-@command{gawk} in fact accepts this, and uses the @sc{nul}
+@command{gawk} in fact accepts this, and uses the @value{NUL}
character for the record separator.
+This works for certain special files, such as @file{/proc/environ} on
+GNU/Linux systems, where the @value{NUL} character is in fact the record separator.
However, this usage is @emph{not} portable
to most other @command{awk} implementations.
@cindex dark corner, strings, storing
Almost all other @command{awk} implementations@footnote{At least that we know
about.} store strings internally as C-style strings. C strings use the
-@sc{nul} character as the string terminator. In effect, this means that
+@value{NUL} character as the string terminator. In effect, this means that
@samp{RS = "\0"} is the same as @samp{RS = ""}.
@value{DARKCORNER}
-It happens that recent versions of @command{mawk} can use the @sc{nul}
+It happens that recent versions of @command{mawk} can use the @value{NUL}
character as a record separator. However, this is a special case:
-@command{mawk} does not allow embedded @sc{nul} characters in strings.
+@command{mawk} does not allow embedded @value{NUL} characters in strings.
@cindex records, treating files as
@cindex treating files, as single records
-The best way to treat a whole file as a single record is to
-simply read the file in, one record at a time, concatenating each
-record onto the end of the previous ones.
-
-@c @strong{FIXME}: Using @sc{nul} is good for @file{/proc/environ} etc.
+@xref{Readfile Function}, for an interesting, portable way to read
+whole files. If you are using @command{gawk}, see @ref{Extension Sample
+Readfile}, for another option.
@docbook
</sidebar>
@@ -6009,14 +6475,13 @@ record onto the end of the previous ones.
@cindex portability, data files as single record
-There are times when you might want to treat an entire data file as a
+There are times when you might want to treat an entire @value{DF} as a
single record. The only way to make this happen is to give @code{RS}
a value that you know doesn't occur in the input file. This is hard
to do in a general way, such that a program always works for arbitrary
input files.
-@c can you say `understatement' boys and girls?
-You might think that for text files, the @sc{nul} character, which
+You might think that for text files, the @value{NUL} character, which
consists of a character with all bits equal to zero, is a good
value to use for @code{RS} in this case:
@@ -6025,29 +6490,29 @@ BEGIN @{ RS = "\0" @} # whole file becomes one record?
@end example
@cindex differences in @command{awk} and @command{gawk}, strings, storing
-@command{gawk} in fact accepts this, and uses the @sc{nul}
+@command{gawk} in fact accepts this, and uses the @value{NUL}
character for the record separator.
+This works for certain special files, such as @file{/proc/environ} on
+GNU/Linux systems, where the @value{NUL} character is in fact the record separator.
However, this usage is @emph{not} portable
to most other @command{awk} implementations.
@cindex dark corner, strings, storing
Almost all other @command{awk} implementations@footnote{At least that we know
about.} store strings internally as C-style strings. C strings use the
-@sc{nul} character as the string terminator. In effect, this means that
+@value{NUL} character as the string terminator. In effect, this means that
@samp{RS = "\0"} is the same as @samp{RS = ""}.
@value{DARKCORNER}
-It happens that recent versions of @command{mawk} can use the @sc{nul}
+It happens that recent versions of @command{mawk} can use the @value{NUL}
character as a record separator. However, this is a special case:
-@command{mawk} does not allow embedded @sc{nul} characters in strings.
+@command{mawk} does not allow embedded @value{NUL} characters in strings.
@cindex records, treating files as
@cindex treating files, as single records
-The best way to treat a whole file as a single record is to
-simply read the file in, one record at a time, concatenating each
-record onto the end of the previous ones.
-
-@c @strong{FIXME}: Using @sc{nul} is good for @file{/proc/environ} etc.
+@xref{Readfile Function}, for an interesting, portable way to read
+whole files. If you are using @command{gawk}, see @ref{Extension Sample
+Readfile}, for another option.
@end cartouche
@end ifnotdocbook
@c ENDOFRANGE inspl
@@ -6084,7 +6549,7 @@ simple @command{awk} programs so powerful.
@cindex @code{$} (dollar sign), @code{$} field operator
@cindex dollar sign (@code{$}), @code{$} field operator
@cindex field operators@comma{} dollar sign as
-A dollar-sign (@samp{$}) is used
+You use a dollar-sign (@samp{$})
to refer to a field in an @command{awk} program,
followed by the number of the field you want. Thus, @code{$1}
refers to the first field, @code{$2} to the second, and so on.
@@ -6115,7 +6580,7 @@ one (such as @code{$8} when the record has only seven fields), you get
the empty string. (If used in a numeric operation, you get zero.)
The use of @code{$0}, which looks like a reference to the ``zero-th'' field, is
-a special case: it represents the whole input record
+a special case: it represents the whole input record. Use it
when you are not interested in specific fields.
Here are some more examples:
@@ -6151,7 +6616,7 @@ $ @kbd{awk '/li/ @{ print $1, $NF @}' mail-list}
@cindex fields, numbers
@cindex field numbers
-The number of a field does not need to be a constant. Any expression in
+A field number need not be a constant. Any expression in
the @command{awk} language can be used after a @samp{$} to refer to a
field. The value of the expression specifies the field number. If the
value is a string, rather than a number, it is converted to a number.
@@ -6178,7 +6643,11 @@ its value as the number of the field to print. The @samp{*} sign
represents multiplication, so the expression @samp{2*2} evaluates to four.
The parentheses are used so that the multiplication is done before the
@samp{$} operation; they are necessary whenever there is a binary
-operator in the field-number expression. This example, then, prints the
+operator@footnote{A @dfn{binary operator}, such as @samp{*} for
+multiplication, is one that takes two operands. The distinction
+is required, since @command{awk} also has unary (one-operand)
+and ternary (three-operand) operators.}
+in the field-number expression. This example, then, prints the
type of relationship (the fourth field) for every line of the file
@file{mail-list}. (All of the @command{awk} operators are listed, in
order of decreasing precedence, in
@@ -6228,7 +6697,7 @@ Then it prints the original and new values for field three.
(Someone in the warehouse made a consistent mistake while inventorying
the red boxes.)
-For this to work, the text in field @code{$3} must make sense
+For this to work, the text in @code{$3} must make sense
as a number; the string of characters must be converted to a number
for the computer to do arithmetic on it. The number resulting
from the subtraction is converted back to a string of characters that
@@ -6319,7 +6788,7 @@ $ @kbd{echo a b c d | awk '@{ OFS = ":"; $2 = ""}
@end example
@noindent
-The field is still there; it just has an empty value, denoted by
+The field is still there; it just has an empty value, delimited by
the two colons between @samp{a} and @samp{c}.
This example shows what happens if you create a new field:
@@ -6433,7 +6902,7 @@ with a statement such as @samp{$1 = $1}, as described earlier.
* Default Field Splitting:: How fields are normally separated.
* Regexp Field Splitting:: Using regexps as the field separator.
* Single Character Fields:: Making each character a separate field.
-* Command Line Field Separator:: Setting @code{FS} from the command-line.
+* Command Line Field Separator:: Setting @code{FS} from the command line.
* Full Line Fields:: Making the full line be a single field.
* Field Splitting Summary:: Some final points and a summary table.
@end menu
@@ -6602,7 +7071,7 @@ $ @kbd{echo ' a b c d ' | awk 'BEGIN @{ FS = "[ \t\n]+" @}}
@cindex null strings
@cindex strings, null
@cindex empty strings, See null strings
-In this case, the first field is @dfn{null} or empty.
+In this case, the first field is null, or empty.
The stripping of leading and trailing whitespace also comes into
play whenever @code{$0} is recomputed. For instance, study this pipeline:
@@ -6634,7 +7103,7 @@ should not rely on any specific behavior in your programs.
@value{DARKCORNER}
@cindex Brian Kernighan's @command{awk}
-As a point of information, Brian Kernighan's @command{awk} allows @samp{^}
+As a point of information, BWK @command{awk} allows @samp{^}
to match only at the beginning of the record. @command{gawk}
also works this way. For example:
@@ -6689,7 +7158,7 @@ behaves this way.
@node Command Line Field Separator
@subsection Setting @code{FS} from the Command Line
-@cindex @option{-F} option, command line
+@cindex @option{-F} option, command-line
@cindex field separator, on command line
@cindex command line, @code{FS} on@comma{} setting
@cindex @code{FS} variable, setting from command line
@@ -6739,6 +7208,8 @@ shell, without any quotes, the @samp{\} gets deleted, so @command{awk}
figures that you really want your fields to be separated with TABs and
not @samp{t}s. Use @samp{-v FS="t"} or @samp{-F"[t]"} on the command line
if you really do want to separate your fields with @samp{t}s.
+Use @samp{-F '\t'} when not in compatibility mode to specify that TABs
+separate fields.
As an example, let's use an @command{awk} program file called @file{edu.awk}
that contains the pattern @code{/edu/} and the action @samp{print $1}:
@@ -6752,7 +7223,6 @@ program on the file @file{mail-list}. The following command prints a
list of the names of the people that work at or attend a university, and
the first three digits of their phone numbers:
-@c tweaked to make the tex output look better in @smallbook
@example
$ @kbd{awk -F- -f edu.awk mail-list}
@print{} Fabius 555
@@ -6890,7 +7360,7 @@ root
@noindent
on an incorrect implementation of @command{awk}, while @command{gawk}
-prints something like:
+prints the full first line of the file, something like:
@example
root:nSijPlPhZZwgE:0:0:Root:/:
@@ -6943,7 +7413,7 @@ root
@noindent
on an incorrect implementation of @command{awk}, while @command{gawk}
-prints something like:
+prints the full first line of the file, something like:
@example
root:nSijPlPhZZwgE:0:0:Root:/:
@@ -7080,7 +7550,7 @@ haven't been introduced yet.
BEGIN @{ FIELDWIDTHS = "9 6 10 6 7 7 35" @}
NR > 2 @{
idle = $4
- sub(/^ */, "", idle) # strip leading spaces
+ sub(/^ +/, "", idle) # strip leading spaces
if (idle == "")
idle = 0
if (idle ~ /:/) @{
@@ -7117,10 +7587,6 @@ program for processing such data could use the @code{FIELDWIDTHS} feature
to simplify reading the data. (Of course, getting @command{gawk} to run on
a system with card readers is another story!)
-@ignore
-Exercise: Write a ballot card reading program
-@end ignore
-
@cindex @command{gawk}, splitting fields and
Assigning a value to @code{FS} causes @command{gawk} to use
@code{FS} for field splitting again. Use @samp{FS = FS} to make this happen,
@@ -7137,7 +7603,7 @@ if (PROCINFO["FS"] == "FS")
else if (PROCINFO["FS"] == "FIELDWIDTHS")
@var{fixed-width field splitting} @dots{}
else
- @var{content-based field splitting} @dots{} (see next @value{SECTION})
+ @var{content-based field splitting} @dots{} @ii{(see next @value{SECTION})}
@end example
This information is useful when writing a function
@@ -7242,6 +7708,8 @@ if (substr($i, 1, 1) == "\"") @{
As with @code{FS}, the @code{IGNORECASE} variable (@pxref{User-modified})
affects field splitting with @code{FPAT}.
+Assigning a value to @code{FPAT} overrides field splitting
+with @code{FS} and with @code{FIELDWIDTHS}.
Similar to @code{FIELDWIDTHS}, the value of @code{PROCINFO["FS"]}
will be @code{"FPAT"} if content-based field splitting is being used.
@@ -7251,7 +7719,7 @@ the double quotes. @command{gawk} provides no way to deal with this.
Since there is no formal specification for CSV data, there isn't much
more to be done;
the @code{FPAT} mechanism provides an elegant solution for the majority
-of cases, and the @command{gawk} maintainer is satisfied with that.
+of cases, and the @command{gawk} developers are satisfied with that.
@end quotation
As written, the regexp used for @code{FPAT} requires that each field
@@ -7265,6 +7733,12 @@ FPAT = "([^,]*)|(\"[^\"]+\")"
Finally, the @code{patsplit()} function makes the same functionality
available for splitting regular strings (@pxref{String Functions}).
+To recap, @command{gawk} provides three independent methods
+to split input records into fields. @command{gawk} uses whichever
+mechanism was last chosen based on which of the three
+variables---@code{FS}, @code{FIELDWIDTHS}, and @code{FPAT}---was
+last assigned to.
+
@node Multiple Line
@section Multiple-Line Records
@@ -7313,9 +7787,9 @@ the first nonblank line that follows---no matter how many blank lines
appear in a row, they are considered one record separator.
@cindex dark corner, multiline records
-There is an important difference between @samp{RS = ""} and
+However, there is an important difference between @samp{RS = ""} and
@samp{RS = "\n\n+"}. In the first case, leading newlines in the input
-data file are ignored, and if a file ends without extra blank lines
+@value{DF} are ignored, and if a file ends without extra blank lines
after the last record, the final newline is removed from the record.
In the second case, this special processing is not done.
@value{DARKCORNER}
@@ -7351,7 +7825,7 @@ Another way to separate fields is to
put each field on a separate line: to do this, just set the
variable @code{FS} to the string @code{"\n"}. (This single
character separator matches a single newline.)
-A practical example of a data file organized this way might be a mailing
+A practical example of a @value{DF} organized this way might be a mailing
list, where each entry is separated by blank lines. Consider a mailing
list in a file named @file{addresses}, which looks like this:
@@ -7416,7 +7890,7 @@ value of
@table @code
@item RS == "\n"
Records are separated by the newline character (@samp{\n}). In effect,
-every line in the data file is a separate record, including blank lines.
+every line in the @value{DF} is a separate record, including blank lines.
This is the default.
@item RS == @var{any single character}
@@ -7455,7 +7929,7 @@ then @command{gawk} sets @code{RT} to the null string.
@c STARTOFRANGE inex
@cindex input, explicit
So far we have been getting our input data from @command{awk}'s main
-input stream---either the standard input (usually your terminal, sometimes
+input stream---either the standard input (usually your keyboard, sometimes
the output from another program) or from the
files specified on the command line. The @command{awk} language has a
special built-in command called @code{getline} that
@@ -7466,7 +7940,19 @@ The @code{getline} command is used in several different ways and should
The examples that follow the explanation of the @code{getline} command
include material that has not been covered yet. Therefore, come back
and study the @code{getline} command @emph{after} you have reviewed the
-rest of this @value{DOCUMENT} and have a good knowledge of how @command{awk} works.
+rest of
+@ifinfo
+this @value{DOCUMENT}
+@end ifinfo
+@ifhtml
+this @value{DOCUMENT}
+@end ifhtml
+@ifnotinfo
+@ifnothtml
+Parts I and II
+@end ifnothtml
+@end ifnotinfo
+and have a good knowledge of how @command{awk} works.
@cindex @command{gawk}, @code{ERRNO} variable in
@cindex @code{ERRNO} variable, with @command{getline} command
@@ -7474,7 +7960,7 @@ rest of this @value{DOCUMENT} and have a good knowledge of how @command{awk} wor
@cindex @code{getline} command, return values
@cindex @option{--sandbox} option, input redirection with @code{getline}
-The @code{getline} command returns one if it finds a record and zero if
+The @code{getline} command returns 1 if it finds a record and 0 if
it encounters the end of the file. If there is some error in getting
a record, such as a file that cannot be opened, then @code{getline}
returns @minus{}1. In this case, @command{gawk} sets the variable
@@ -7514,42 +8000,63 @@ finished processing the current record, but want to do some special
processing on the next record @emph{right now}. For example:
@example
+# Remove text between /* and */, inclusive
@{
- if ((t = index($0, "/*")) != 0) @{
- # value of `tmp' will be "" if t is 1
- tmp = substr($0, 1, t - 1)
- u = index(substr($0, t + 2), "*/")
- offset = t + 2
- while (u == 0) @{
- if (getline <= 0) @{
+ if ((i = index($0, "/*")) != 0) @{
+ out = substr($0, 1, i - 1) # leading part of the string
+ rest = substr($0, i + 2) # ... */ ...
+ j = index(rest, "*/") # is */ in trailing part?
+ if (j > 0) @{
+ rest = substr(rest, j + 2) # remove comment
+ @} else @{
+ while (j == 0) @{
+ # get more text
+ if (getline <= 0) @{
m = "unexpected EOF or error"
m = (m ": " ERRNO)
print m > "/dev/stderr"
exit
- @}
- u = index($0, "*/")
- offset = 0
- @}
- # substr() expression will be "" if */
- # occurred at end of line
- $0 = tmp substr($0, offset + u + 2)
- @}
- print $0
+ @}
+ # build up the line using string concatenation
+ rest = rest $0
+ j = index(rest, "*/") # is */ in trailing part?
+ if (j != 0) @{
+ rest = substr(rest, j + 2)
+ break
+ @}
+ @}
+ @}
+ # build up the output line using string concatenation
+ $0 = out rest
+ @}
+ print $0
@}
@end example
+@c 8/2014: Here is some sample input:
+@ignore
+mon/*comment*/key
+rab/*commen
+t*/bit
+horse /*comment*/more text
+part 1 /*comment*/part 2 /*comment*/part 3
+no comment
+@end ignore
+
This @command{awk} program deletes C-style comments (@samp{/* @dots{}
-*/}) from the input. By replacing the @samp{print $0} with other
+*/}) from the input.
+It uses a number of features we haven't covered yet, including
+string concatenation
+(@pxref{Concatenation})
+and the @code{index()} and @code{substr()} built-in
+functions
+(@pxref{String Functions}).
+By replacing the @samp{print $0} with other
statements, you could perform more complicated processing on the
decommented input, such as searching for matches of a regular
expression. (This program has a subtle problem---it does not work if one
comment ends and another begins on the same line.)
-@ignore
-Exercise,
-write a program that does handle multiple comments on the line.
-@end ignore
-
This form of the @code{getline} command sets @code{NF},
@code{NR}, @code{FNR}, @code{RT}, and the value of @code{$0}.
@@ -7625,7 +8132,7 @@ the value of @code{NF} do not change.
@cindex operators, input/output
Use @samp{getline < @var{file}} to read the next record from @var{file}.
Here @var{file} is a string-valued expression that
-specifies the file name. @samp{< @var{file}} is called a @dfn{redirection}
+specifies the @value{FN}. @samp{< @var{file}} is called a @dfn{redirection}
because it directs input to come from a different place.
For example, the following
program reads its input record from the file @file{secondary.input} when it
@@ -7653,9 +8160,9 @@ changed, resulting in a new value of @code{NF}.
According to POSIX, @samp{getline < @var{expression}} is ambiguous if
@var{expression} contains unparenthesized operators other than
@samp{$}; for example, @samp{getline < dir "/" file} is ambiguous
-because the concatenation operator is not parenthesized. You should
-write it as @samp{getline < (dir "/" file)} if you want your program
-to be portable to all @command{awk} implementations.
+because the concatenation operator (not discussed yet; @pxref{Concatenation})
+is not parenthesized. You should write it as @samp{getline < (dir "/" file)} if
+you want your program to be portable to all @command{awk} implementations.
@node Getline/Variable/File
@subsection Using @code{getline} into a Variable from a File
@@ -7688,19 +8195,19 @@ Such a record is replaced by the contents of the file
Note here how the name of the extra input file is not built into
the program; it is taken directly from the data, specifically from the second field on
-the @samp{@@include} line.
+the @code{@@include} line.
The @code{close()} function is called to ensure that if two identical
-@samp{@@include} lines appear in the input, the entire specified file is
+@code{@@include} lines appear in the input, the entire specified file is
included twice.
@xref{Close Files And Pipes}.
One deficiency of this program is that it does not process nested
-@samp{@@include} statements
-(i.e., @samp{@@include} statements in included files)
+@code{@@include} statements
+(i.e., @code{@@include} statements in included files)
the way a true macro preprocessor would.
@xref{Igawk Program}, for a program
-that does handle nested @samp{@@include} statements.
+that does handle nested @code{@@include} statements.
@node Getline/Pipe
@subsection Using @code{getline} from a Pipe
@@ -7744,9 +8251,10 @@ The @code{close()} function is called to ensure that if two identical
@samp{@@execute} lines appear in the input, the command is run for
each one.
@ifnottex
+@ifnotdocbook
@xref{Close Files And Pipes}.
+@end ifnotdocbook
@end ifnottex
-@c Exercise!!
@c This example is unrealistic, since you could just use system
Given the input:
@@ -7800,7 +8308,7 @@ Unfortunately, @command{gawk} has not been consistent in its treatment
of a construct like @samp{@w{"echo "} "date" | getline}.
Most versions, including the current version, treat it at as
@samp{@w{("echo "} "date") | getline}.
-(This how Brian Kernighan's @command{awk} behaves.)
+(This how BWK @command{awk} behaves.)
Some versions changed and treated it as
@samp{@w{"echo "} ("date" | getline)}.
(This is how @command{mawk} behaves.)
@@ -7906,7 +8414,7 @@ where coprocesses are discussed in more detail.
Here are some miscellaneous points about @code{getline} that
you should bear in mind:
-@itemize @bullet
+@itemize @value{BULLET}
@item
When @code{getline} changes the value of @code{$0} and @code{NF},
@command{awk} does @emph{not} automatically jump to the start of the
@@ -7918,7 +8426,7 @@ However, the new record is tested against any subsequent rules.
@cindex @command{awk}, implementations, limits
@cindex @command{gawk}, implementation issues, limits
@item
-Many @command{awk} implementations limit the number of pipelines that an @command{awk}
+Some very old @command{awk} implementations limit the number of pipelines that an @command{awk}
program may have open to just one. In @command{gawk}, there is no such limit.
You can open as many pipelines (and coprocesses) as the underlying operating
system permits.
@@ -7931,10 +8439,10 @@ system permits.
@item
An interesting side effect occurs if you use @code{getline} without a
redirection inside a @code{BEGIN} rule. Because an unredirected @code{getline}
-reads from the command-line data files, the first @code{getline} command
+reads from the command-line @value{DF}s, the first @code{getline} command
causes @command{awk} to set the value of @code{FILENAME}. Normally,
@code{FILENAME} does not have a value inside @code{BEGIN} rules, because you
-have not yet started to process the command-line data files.
+have not yet started to process the command-line @value{DF}s.
@value{DARKCORNER}
(@xref{BEGIN/END},
also @pxref{Auto-set}.)
@@ -7950,13 +8458,14 @@ probably by accident, and you should reconsider what it is you're
trying to accomplish.
@item
-@ref{Getline Summary}, presents a table summarizing the
+@DBREF{Getline Summary} presents a table summarizing the
@code{getline} variants and which variables they can affect.
It is worth noting that those variants which do not use redirection
can cause @code{FILENAME} to be updated if they cause
@command{awk} to start reading a new input file.
@item
+@cindex Moore, Duncan
If the variable being assigned is an expression with side effects,
different versions of @command{awk} behave differently upon encountering
end-of-file. Some versions don't evaluate the expression; many versions
@@ -7981,7 +8490,7 @@ end of file is encountered, before the element in @code{a} is assigned?
@command{gawk} treats @code{getline} like a function call, and evaluates
the expression @samp{a[++c]} before attempting to read from @file{f}.
-Other versions of @command{awk} only evaluate the expression once they
+However, some versions of @command{awk} only evaluate the expression once they
know that there is a string value to be assigned. Caveat Emptor.
@end itemize
@@ -8017,10 +8526,13 @@ Note: for each variant, @command{gawk} sets the @code{RT} built-in variable.
@section Reading Input With A Timeout
@cindex timeout, reading input
-You may specify a timeout in milliseconds for reading input from a terminal,
-pipe or two-way communication including, TCP/IP sockets. This can be done
+@cindex differences in @command{awk} and @command{gawk}, read timeouts
+This @value{SECTION} describes a feature that is specific to @command{gawk}.
+
+You may specify a timeout in milliseconds for reading input from the keyboard,
+a pipe, or two-way communication, including TCP/IP sockets. This can be done
on a per input, command or connection basis, by setting a special element
-in the @code{PROCINFO} array:
+in the @code{PROCINFO} (@pxref{Auto-set}) array:
@example
PROCINFO["input_name", "READ_TIMEOUT"] = @var{timeout in milliseconds}
@@ -8040,8 +8552,8 @@ else if (ERRNO != "")
print ERRNO
@end example
-Here is how to read interactively from the terminal@footnote{This assumes
-that standard input is the keyboard} without waiting
+Here is how to read interactively from the user@footnote{This assumes
+that standard input is the keyboard.} without waiting
for more than five seconds:
@example
@@ -8050,13 +8562,13 @@ while ((getline < "/dev/stdin") > 0)
print $0
@end example
-@command{gawk} will terminate the read operation if input does not
-arrive after waiting for the timeout period, return failure
-and set the @code{ERRNO} variable to an appropriate string value.
+@command{gawk} terminates the read operation if input does not
+arrive after waiting for the timeout period, returns failure
+and sets the @code{ERRNO} variable to an appropriate string value.
A negative or zero value for the timeout is the same as specifying
no timeout at all.
-A timeout can also be set for reading from the terminal in the implicit
+A timeout can also be set for reading from the keyboard in the implicit
loop that reads input records and matches them against patterns,
like so:
@@ -8117,22 +8629,124 @@ a connection before it can start reading any data,
or the attempt to open a FIFO special file for reading can block
indefinitely until some other process opens it for writing.
-@node Command line directories
+@node Command-line directories
@section Directories On The Command Line
-@cindex differences in @command{awk} and @command{gawk}, command line directories
-@cindex directories, command line
+@cindex differences in @command{awk} and @command{gawk}, command-line directories
+@cindex directories, command-line
@cindex command line, directories on
According to the POSIX standard, files named on the @command{awk}
-command line must be text files. It is a fatal error if they are not.
+command line must be text files; it is a fatal error if they are not.
Most versions of @command{awk} treat a directory on the command line as
a fatal error.
By default, @command{gawk} produces a warning for a directory on the
-command line, but otherwise ignores it. If either of the @option{--posix}
+command line, but otherwise ignores it. This makes it easier to use
+shell wildcards with your @command{awk} program:
+
+@example
+$ @kbd{gawk -f whizprog.awk *} @ii{Directories could kill this progam}
+@end example
+
+If either of the @option{--posix}
or @option{--traditional} options is given, then @command{gawk} reverts
to treating a directory on the command line as a fatal error.
+@xref{Extension Sample Readdir}, for a way to treat directories
+as usable data from an @command{awk} program.
+
+@node Input Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+Input is split into records based on the value of @code{RS}.
+The possibilities are as follows:
+
+@multitable @columnfractions .25 .35 .40
+@headitem Value of @code{RS} @tab Records are split on @tab @command{awk} / @command{gawk}
+@item Any single character @tab That character @tab @command{awk}
+@item The empty string (@code{""}) @tab Runs of two or more newlines @tab @command{awk}
+@item A regexp @tab Text that matches the regexp @tab @command{gawk}
+@end multitable
+
+@item
+@command{gawk} sets @code{RT} to the text matched by @code{RS}.
+
+@item
+After splitting the input into records, @command{awk} further splits
+the record into individual fields, named @code{$1}, @code{$2} and so
+on. @code{$0} is the whole record, and @code{NF} indicates how many
+fields there are. The default way to split fields is between whitespace
+characters.
+
+@item
+Fields may be referenced using a variable, as in @samp{$NF}. Fields
+may also be assigned values, which causes the value of @code{$0} to be
+recomputed when it is later referenced. Assigning to a field with a number
+greater than @code{NF} creates the field and rebuilds the record, using
+@code{OFS} to separate the fields. Incrementing @code{NF} does the same
+thing. Decrementing @code{NF} throws away fields and rebuilds the record.
+
+@item
+Field splitting is more complicated than record splitting.
+
+@multitable @columnfractions .40 .40 .20
+@headitem Field separator value @tab Fields are split @dots{} @tab @command{awk} / @command{gawk}
+@item @code{FS == " "} @tab On runs of whitespace @tab @command{awk}
+@item @code{FS == @var{any single character}} @tab On that character @tab @command{awk}
+@item @code{FS == @var{regexp}} @tab On text matching the regexp @tab @command{awk}
+@item @code{FS == ""} @tab Each individual character is a separate field @tab @command{gawk}
+@item @code{FIELDWIDTHS == @var{list of columns}} @tab Based on character position @tab @command{gawk}
+@item @code{FPAT == @var{regexp}} @tab On text around text matching the regexp @tab @command{gawk}
+@end multitable
+
+Using @samp{FS = "\n"} causes the entire record to be a single field
+(assuming that newlines separate records).
+
+@item
+@code{FS} may be set from the command line using the @option{-F} option.
+This can also be done using command-line variable assignment.
+
+@item
+@code{PROCINFO["FS"]} can be used to see how fields are being split.
+
+@item
+Use @code{getline} in its various forms to read additional records,
+from the default input stream, from a file, or from a pipe or co-process.
+
+@item
+Use @code{PROCINFO[@var{file}, "READ_TIMEOUT"]} to cause reads to timeout
+for @var{file}.
+
+@item
+Directories on the command line are fatal for standard @command{awk};
+@command{gawk} ignores them if not in POSIX mode.
+
+@end itemize
+
+@c EXCLUDE START
+@node Input Exercises
+@section Exercises
+
+@enumerate
+@item
+Using the @code{FIELDWIDTHS} variable (@pxref{Constant Size}),
+write a program to read election data, where each record represents
+one voter's votes. Come up with a way to define which columns are
+associated with each ballot item, and print the total votes,
+including abstentions, for each item.
+
+@item
+@ref{Plain Getline}, presented a program to remove C-style
+comments (@samp{/* @dots{} */}) from the input. That program
+does not work if one comment ends on one line and another one
+starts later on the same line.
+That can be fixed by making one simple change. What is it?
+
+@end enumerate
+@c EXCLUDE END
+
@node Printing
@chapter Printing Output
@@ -8157,7 +8771,7 @@ For printing with specifications, you need the @code{printf} statement
@cindex @code{printf} statement
Besides basic and formatted printing, this @value{CHAPTER}
also covers I/O redirections to files and pipes, introduces
-the special file names that @command{gawk} processes internally,
+the special @value{FN}s that @command{gawk} processes internally,
and discusses the @code{close()} built-in function.
@menu
@@ -8172,13 +8786,15 @@ and discusses the @code{close()} built-in function.
@command{gawk} allows access to inherited file
descriptors.
* Close Files And Pipes:: Closing Input and Output Files and Pipes.
+* Output Summary:: Output summary.
+* Output Exercises:: Exercises.
@end menu
@node Print
@section The @code{print} Statement
The @code{print} statement is used for producing output with simple, standardized
-formatting. Specify only the strings or numbers to print, in a
+formatting. You specify only the strings or numbers to print, in a
list separated by commas. They are output, separated by single spaces,
followed by a newline. The statement looks like this:
@@ -8208,6 +8824,10 @@ double-quote characters, your text is taken as an @command{awk}
expression, and you will probably get an error. Keep in mind that a
space is printed between any two items.
+Note that the @code{print} statement is a statement and not an
+expression---you can't use it the pattern part of a pattern-action
+statement, for example.
+
@node Print Examples
@section @code{print} Statement Examples
@@ -8261,10 +8881,9 @@ $ @kbd{awk '@{ print $1 $2 @}' inventory-shipped}
To someone unfamiliar with the @file{inventory-shipped} file, neither
example's output makes much sense. A heading line at the beginning
would make it clearer. Let's add some headings to our table of months
-(@code{$1}) and green crates shipped (@code{$2}). We do this using the
-@code{BEGIN} pattern
-(@pxref{BEGIN/END})
-so that the headings are only printed once:
+(@code{$1}) and green crates shipped (@code{$2}). We do this using
+a @code{BEGIN} rule (@pxref{BEGIN/END}) so that the headings are only
+printed once:
@example
awk 'BEGIN @{ print "Month Crates"
@@ -8350,16 +8969,6 @@ The following example prints the first and second fields of each input
record, separated by a semicolon, with a blank line added after each
newline:
-@ignore
-Exercise,
-Rewrite the
-@example
-awk 'BEGIN @{ print "Month Crates"
- print "----- ------" @}
- @{ print $1, " ", $2 @}' inventory-shipped
-@end example
-program by using a new value of @code{OFS}.
-@end ignore
@example
$ @kbd{awk 'BEGIN @{ OFS = ";"; ORS = "\n\n" @}}
@@ -8516,8 +9125,9 @@ of value to print. The rest of the format specifier is made up of
optional @dfn{modifiers} that control @emph{how} to print the value, such as
the field width. Here is a list of the format-control letters:
-@table @code
-@item %c
+@c @asis for docbook to come out right
+@table @asis
+@item @code{%c}
Print a number as an ASCII character; thus, @samp{printf "%c",
65} outputs the letter @samp{A}. The output for a string value is
the first character of the string.
@@ -8525,16 +9135,6 @@ the first character of the string.
@cindex dark corner, format-control characters
@cindex @command{gawk}, format-control characters
@quotation NOTE
-@ignore
-The @samp{%c} format does @emph{not} handle values outside the range
-0--255. On most systems, values from 0--127 are within the range of
-ASCII and will yield an ASCII character. Values in the range 128--255
-may format as characters in some extended character set, or they may not.
-System 390 (IBM architecture mainframe) systems use 8-bit characters,
-and thus values from 0--255 yield the corresponding EBCDIC character.
-Any value above 255 is treated as modulo 255; i.e., the lowest eight bits
-of the value are used. The locale and character set are always ignored.
-@end ignore
The POSIX standard says the first character of a string is printed.
In locales with multibyte characters, @command{gawk} attempts to
convert the leading bytes of the string into a valid wide character
@@ -8542,6 +9142,8 @@ and then to print the multibyte encoding of that character.
Similarly, when printing a numeric value, @command{gawk} allows the
value to be within the numeric range of values that can be held
in a wide character.
+If the conversion to multibyte encoding fails, @command{gawk}
+uses the low eight bits of the value as the character to print.
Other @command{awk} versions generally restrict themselves to printing
the first byte of a string or to numeric values within the range of
@@ -8549,12 +9151,12 @@ a single byte (0--255).
@end quotation
-@item %d@r{,} %i
+@item @code{%d}, @code{%i}
Print a decimal integer.
The two control letters are equivalent.
(The @samp{%i} specification is for compatibility with ISO C.)
-@item %e@r{,} %E
+@item @code{%e}, @code{%E}
Print a number in scientific (exponential) notation;
for example:
@@ -8569,7 +9171,7 @@ which follow the decimal point.
discussed in the next @value{SUBSECTION}.)
@samp{%E} uses @samp{E} instead of @samp{e} in the output.
-@item %f
+@item @code{%f}
Print a number in floating-point notation.
For example:
@@ -8589,39 +9191,40 @@ infinity are formatted as
@samp{-inf} or @samp{-infinity},
and positive infinity as
@samp{inf} and @samp{infinity}.
-The special ``not a number'' value formats as @samp{-nan} or @samp{nan}.
+The special ``not a number'' value formats as @samp{-nan} or @samp{nan}
+(@pxref{Math Definitions}).
-@item %F
+@item @code{%F}
Like @samp{%f} but the infinity and ``not a number'' values are spelled
using uppercase letters.
The @samp{%F} format is a POSIX extension to ISO C; not all systems
support it. On those that don't, @command{gawk} uses @samp{%f} instead.
-@item %g@r{,} %G
+@item @code{%g}, @code{%G}
Print a number in either scientific notation or in floating-point
notation, whichever uses fewer characters; if the result is printed in
scientific notation, @samp{%G} uses @samp{E} instead of @samp{e}.
-@item %o
+@item @code{%o}
Print an unsigned octal integer
(@pxref{Nondecimal-numbers}).
-@item %s
+@item @code{%s}
Print a string.
-@item %u
+@item @code{%u}
Print an unsigned decimal integer.
(This format is of marginal use, because all numbers in @command{awk}
are floating-point; it is provided primarily for compatibility with C.)
-@item %x@r{,} %X
+@item @code{%x}, @code{%X}
Print an unsigned hexadecimal integer;
@samp{%X} uses the letters @samp{A} through @samp{F}
instead of @samp{a} through @samp{f}
(@pxref{Nondecimal-numbers}).
-@item %%
+@item @code{%%}
Print a single @samp{%}.
This does not consume an
argument and it ignores any modifiers.
@@ -8656,7 +9259,7 @@ which they may appear:
@table @code
@cindex differences in @command{awk} and @command{gawk}, @code{print}/@code{printf} statements
@cindex @code{printf} statement, positional specifiers
-@c the command does NOT start a secondary
+@c the code{} does NOT start a secondary
@cindex positional specifiers, @code{printf} statement
@item @var{N}$
An integer constant followed by a @samp{$} is a @dfn{positional specifier}.
@@ -8732,7 +9335,7 @@ For example:
$ @kbd{cat thousands.awk} @ii{Show source program}
@print{} BEGIN @{ printf "%'d\n", 1234567 @}
$ @kbd{LC_ALL=C gawk -f thousands.awk}
-@print{} 1234567 @ii{Results in "C" locale}
+@print{} 1234567 @ii{Results in} "C" @ii{locale}
$ @kbd{LC_ALL=en_US.UTF-8 gawk -f thousands.awk}
@print{} 1,234,567 @ii{Results in US English UTF locale}
@end example
@@ -8842,14 +9445,12 @@ This is not particularly easy to read but it does work.
@c @cindex lint checks
@cindex troubleshooting, fatal errors, @code{printf} format strings
@cindex POSIX @command{awk}, @code{printf} format strings and
-C programmers may be used to supplying additional
-@samp{l}, @samp{L}, and @samp{h}
-modifiers in @code{printf} format strings. These are not valid in @command{awk}.
-Most @command{awk} implementations silently ignore them.
-If @option{--lint} is provided on the command line
-(@pxref{Options}),
-@command{gawk} warns about their use. If @option{--posix} is supplied,
-their use is a fatal error.
+C programmers may be used to supplying additional modifiers (@samp{h},
+@samp{j}, @samp{l}, @samp{L}, @samp{t}, and @samp{z}) in @code{printf}
+format strings. These are not valid in @command{awk}. Most @command{awk}
+implementations silently ignore them. If @option{--lint} is provided
+on the command line (@pxref{Options}), @command{gawk} warns about their
+use. If @option{--posix} is supplied, their use is a fatal error.
@c ENDOFRANGE pfm
@node Printf Examples
@@ -8895,7 +9496,7 @@ they are last on their lines. They don't need to have spaces
after them.
The table could be made to look even nicer by adding headings to the
-tops of the columns. This is done using the @code{BEGIN} pattern
+tops of the columns. This is done using a @code{BEGIN} rule
(@pxref{BEGIN/END})
so that the headers are only printed once, at the beginning of
the @command{awk} program:
@@ -8931,12 +9532,6 @@ awk 'BEGIN @{ format = "%-10s %s\n"
@{ printf format, $1, $2 @}' mail-list
@end example
-@c !!! exercise
-At this point, it would be a worthwhile exercise to use the
-@code{printf} statement to line up the headings and table data for the
-@file{inventory-shipped} example that was covered earlier in the @value{SECTION}
-on the @code{print} statement
-(@pxref{Print}).
@c ENDOFRANGE printfs
@node Redirection
@@ -8967,7 +9562,7 @@ commands, except that they are written inside the @command{awk} program.
@cindex @code{printf} statement, See Also redirection@comma{} of output
There are four forms of output redirection: output to a file, output
appended to a file, output through a pipe to another command, and output
-to a coprocess. They are all shown for the @code{print} statement,
+to a coprocess. We show them all for the @code{print} statement,
but they work identically for @code{printf}:
@table @code
@@ -8976,9 +9571,9 @@ but they work identically for @code{printf}:
@cindex operators, input/output
@item print @var{items} > @var{output-file}
This redirection prints the items into the output file named
-@var{output-file}. The file name @var{output-file} can be any
+@var{output-file}. The @value{FN} @var{output-file} can be any
expression. Its value is changed to a string and then used as a
-file name (@pxref{Expressions}).
+@value{FN} (@pxref{Expressions}).
When this type of redirection is used, the @var{output-file} is erased
before the first output is written to it. Subsequent writes to the same
@@ -9072,7 +9667,7 @@ This example also illustrates the use of a variable to represent
a @var{file} or @var{command}---it is not necessary to always
use a string constant. Using a variable is generally a good idea,
because (if you mean to refer to that same file or command)
-@command{awk} requires that the string value be spelled identically
+@command{awk} requires that the string value be written identically
every time.
@cindex coprocesses
@@ -9131,7 +9726,9 @@ As mentioned earlier
many
@end ifnotinfo
@ifnottex
+@ifnotdocbook
Many
+@end ifnotdocbook
@end ifnottex
older
@command{awk} implementations limit the number of pipelines that an @command{awk}
@@ -9149,7 +9746,7 @@ open as many pipelines as the underlying operating system permits.
A particularly powerful way to use redirection is to build command lines
and pipe them into the shell, @command{sh}. For example, suppose you
-have a list of files brought over from a system where all the file names
+have a list of files brought over from a system where all the @value{FN}s
are stored in uppercase, and you wish to rename them to have names in
all lowercase. The following program is both simple and efficient:
@@ -9181,7 +9778,7 @@ It then sends the list to the shell for execution.
A particularly powerful way to use redirection is to build command lines
and pipe them into the shell, @command{sh}. For example, suppose you
-have a list of files brought over from a system where all the file names
+have a list of files brought over from a system where all the @value{FN}s
are stored in uppercase, and you wish to rename them to have names in
all lowercase. The following program is both simple and efficient:
@@ -9204,12 +9801,12 @@ It then sends the list to the shell for execution.
@c ENDOFRANGE reout
@node Special Files
-@section Special File Names in @command{gawk}
+@section Special @value{FFN}s in @command{gawk}
@c STARTOFRANGE gfn
@cindex @command{gawk}, file names in
-@command{gawk} provides a number of special file names that it interprets
-internally. These file names provide access to standard file descriptors
+@command{gawk} provides a number of special @value{FN}s that it interprets
+internally. These @value{FN}s provide access to standard file descriptors
and TCP/IP networking.
@menu
@@ -9251,7 +9848,8 @@ print "Serious error detected!" | "cat 1>&2"
@noindent
This works by opening a pipeline to a shell command that can access the
standard error stream that it inherits from the @command{awk} process.
-This is far from elegant, and it is also inefficient, because it requires a
+@c 8/2014: Mike Brennan says not to cite this as inefficient. So, fixed.
+This is far from elegant, and it also requires a
separate process. So people writing @command{awk} programs often
don't do this. Instead, they send the error messages to the
screen, like this:
@@ -9273,12 +9871,12 @@ that happens, writing to the screen is not correct. In fact, if
terminal at all.
Then opening @file{/dev/tty} fails.
-@command{gawk} provides special file names for accessing the three standard
-streams. @value{COMMONEXT}. It also provides syntax for accessing
-any other inherited open files. If the file name matches
+@command{gawk} provides special @value{FN}s for accessing the three standard
+streams. @value{COMMONEXT} It also provides syntax for accessing
+any other inherited open files. If the @value{FN} matches
one of these special names when @command{gawk} redirects input or output,
-then it directly uses the stream that the file name stands for.
-These special file names work for all operating systems that @command{gawk}
+then it directly uses the stream that the @value{FN} stands for.
+These special @value{FN}s work for all operating systems that @command{gawk}
has been ported to, not just those that are POSIX-compliant:
@cindex common extensions, @code{/dev/stdin} special file
@@ -9308,7 +9906,7 @@ the shell). Unless special pains are taken in the shell from which
@command{gawk} is invoked, only descriptors 0, 1, and 2 are available.
@end table
-The file names @file{/dev/stdin}, @file{/dev/stdout}, and @file{/dev/stderr}
+The @value{FN}s @file{/dev/stdin}, @file{/dev/stdout}, and @file{/dev/stderr}
are aliases for @file{/dev/fd/0}, @file{/dev/fd/1}, and @file{/dev/fd/2},
respectively. However, they are more self-explanatory.
The proper way to write an error message in a @command{gawk} program
@@ -9319,13 +9917,12 @@ print "Serious error detected!" > "/dev/stderr"
@end example
@cindex troubleshooting, quotes with file names
-Note the use of quotes around the file name.
+Note the use of quotes around the @value{FN}.
Like any other redirection, the value must be a string.
It is a common error to omit the quotes, which leads
to confusing results.
-@c Exercise: What does it do? :-)
-Finally, using the @code{close()} function on a file name of the
+Finally, using the @code{close()} function on a @value{FN} of the
form @code{"/dev/fd/@var{N}"}, for file descriptor numbers
above two, does actually close the given file descriptor.
@@ -9341,7 +9938,7 @@ versions of @command{awk}.
@command{gawk} programs
can open a two-way
TCP/IP connection, acting as either a client or a server.
-This is done using a special file name of the form:
+This is done using a special @value{FN} of the form:
@example
@file{/@var{net-type}/@var{protocol}/@var{local-port}/@var{remote-host}/@var{remote-port}}
@@ -9351,7 +9948,7 @@ The @var{net-type} is one of @samp{inet}, @samp{inet4} or @samp{inet6}.
The @var{protocol} is one of @samp{tcp} or @samp{udp},
and the other fields represent the other essential pieces of information
for making a networking connection.
-These file names are used with the @samp{|&} operator for communicating
+These @value{FN}s are used with the @samp{|&} operator for communicating
with a coprocess
(@pxref{Two-way I/O}).
This is an advanced feature, mentioned here only for completeness.
@@ -9359,21 +9956,21 @@ Full discussion is delayed until
@ref{TCP/IP Networking}.
@node Special Caveats
-@subsection Special File Name Caveats
+@subsection Special @value{FFN} Caveats
Here is a list of things to bear in mind when using the
-special file names that @command{gawk} provides:
+special @value{FN}s that @command{gawk} provides:
-@itemize @bullet
+@itemize @value{BULLET}
@cindex compatibility mode (@command{gawk}), file names
@cindex file names, in compatibility mode
@item
-Recognition of these special file names is disabled if @command{gawk} is in
+Recognition of these special @value{FN}s is disabled if @command{gawk} is in
compatibility mode (@pxref{Options}).
@item
@command{gawk} @emph{always}
-interprets these special file names.
+interprets these special @value{FN}s.
For example, using @samp{/dev/fd/4}
for output actually writes on file descriptor 4, and not on a new
file descriptor that is @code{dup()}'ed from file descriptor 4. Most of
@@ -9396,7 +9993,7 @@ Doing so results in unpredictable behavior.
@cindex coprocesses, closing
@cindex @code{getline} command, coprocesses@comma{} using from
-If the same file name or the same shell command is used with @code{getline}
+If the same @value{FN} or the same shell command is used with @code{getline}
more than once during the execution of an @command{awk} program
(@pxref{Getline}),
the file is opened (or the command is executed) the first time only.
@@ -9405,7 +10002,7 @@ The next time the same file or command is used with @code{getline},
another record is read from it, and so on.
Similarly, when a file or pipe is opened for output, @command{awk} remembers
-the file name or command associated with it, and subsequent
+the @value{FN} or command associated with it, and subsequent
writes to the same file or command are appended to the previous writes.
The file or pipe stays open until @command{awk} exits.
@@ -9447,7 +10044,7 @@ file or command, or the next @code{print} or @code{printf} to that
file or command, reopens the file or reruns the command.
Because the expression that you use to close a file or pipeline must
exactly match the expression used to open the file or run the command,
-it is good practice to use a variable to store the file name or command.
+it is good practice to use a variable to store the @value{FN} or command.
The previous example becomes the following:
@example
@@ -9461,7 +10058,7 @@ close(sortcom)
This helps avoid hard-to-find typographical errors in your @command{awk}
programs. Here are some of the reasons for closing an output file:
-@itemize @bullet
+@itemize @value{BULLET}
@item
To write a file and read it back later on in the same @command{awk}
program. Close the file after writing it, then
@@ -9497,7 +10094,7 @@ a separate message.
@cindex @code{close()} function, portability
If you use more files than the system allows you to have open,
@command{gawk} attempts to multiplex the available open files among
-your data files. @command{gawk}'s ability to do this depends upon the
+your @value{DF}s. @command{gawk}'s ability to do this depends upon the
facilities of your operating system, so it may not always work. It is
therefore both good practice and good portability advice to always
use @code{close()} on your files when you are done with them.
@@ -9530,15 +10127,16 @@ more importantly, the file descriptor for the pipe
is not closed and released until @code{close()} is called or
@command{awk} exits.
-@code{close()} will silently do nothing if given an argument that
+@code{close()} silently does nothing if given an argument that
does not represent a file, pipe or coprocess that was opened with
-a redirection.
+a redirection. In such a case, it returns a negative value,
+indicating an error. In addition, @command{gawk} sets @code{ERRNO}
+to a string indicating the error.
-Note also that @samp{close(FILENAME)} has no
-``magic'' effects on the implicit loop that reads through the
-files named on the command line. It is, more likely, a close
-of a file that was never opened, so @command{awk} silently
-does nothing.
+Note also that @samp{close(FILENAME)} has no ``magic'' effects on the
+implicit loop that reads through the files named on the command line.
+It is, more likely, a close of a file that was never opened with a
+redirection, so @command{awk} silently does nothing.
@cindex @code{|} (vertical bar), @code{|&} operator (I/O), pipes@comma{} closing
When using the @samp{|&} operator to communicate with a coprocess,
@@ -9567,7 +10165,7 @@ which discusses it in more detail and gives an example.
@cindex differences in @command{awk} and @command{gawk}, @code{close()} function
@cindex Unix @command{awk}, @code{close()} function and
-In many versions of Unix @command{awk}, the @code{close()} function
+In many older versions of Unix @command{awk}, the @code{close()} function
is actually a statement. It is a syntax error to try and use the return
value from @code{close()}:
@value{DARKCORNER}
@@ -9623,7 +10221,7 @@ when closing a pipe.
@cindex differences in @command{awk} and @command{gawk}, @code{close()} function
@cindex Unix @command{awk}, @code{close()} function and
-In many versions of Unix @command{awk}, the @code{close()} function
+In many older versions of Unix @command{awk}, the @code{close()} function
is actually a statement. It is a syntax error to try and use the return
value from @code{close()}:
@value{DARKCORNER}
@@ -9669,6 +10267,69 @@ when closing a pipe.
@c ENDOFRANGE ofc
@c ENDOFRANGE pc
@c ENDOFRANGE cc
+
+@node Output Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+The @code{print} statement prints comma-separated expressions. Each
+expression is separated by the value of @code{OFS} and terminated by
+the value of @code{ORS}. @code{OFMT} provides the conversion format
+for numeric values for the @code{print} statement.
+
+@item
+The @code{printf} statement provides finer-grained control over output,
+with format control letters for different data types and various flags
+that modify the behavior of the format control letters.
+
+@item
+Output from both @code{print} and @code{printf} may be redirected to
+files, pipes, and co-processes.
+
+@item
+@command{gawk} provides special file names for access to standard input,
+output and error, and for network communications.
+
+@item
+Use @code{close()} to close open file, pipe and co-process redirections.
+For co-processes, it is possible to close only one direction of the
+communications.
+
+@end itemize
+
+@c EXCLUDE START
+@node Output Exercises
+@section Exercises
+
+@enumerate
+@item
+Rewrite the program:
+
+@example
+awk 'BEGIN @{ print "Month Crates"
+ print "----- ------" @}
+ @{ print $1, " ", $2 @}' inventory-shipped
+@end example
+
+@noindent
+from @ref{Output Separators}, by using a new value of @code{OFS}.
+
+@item
+Use the @code{printf} statement to line up the headings and table data
+for the @file{inventory-shipped} example that was covered in @ref{Print}.
+
+@item
+What happens if you forget the double quotes when redirecting
+output, as follows:
+
+@example
+BEGIN @{ print "Serious error detected!" > /dev/stderr @}
+@end example
+
+@end enumerate
+@c EXCLUDE END
+
@c ENDOFRANGE prnt
@node Expressions
@@ -9695,6 +10356,7 @@ combinations of these with various operators.
* Function Calls:: A function call is an expression.
* Precedence:: How various operators nest.
* Locales:: How the locale affects things.
+* Expressions Summary:: Expressions summary.
@end menu
@node Values
@@ -9740,9 +10402,9 @@ have different forms, but are stored identically internally.
A @dfn{numeric constant} stands for a number. This number can be an
integer, a decimal fraction, or a number in scientific (exponential)
notation.@footnote{The internal representation of all numbers,
-including integers, uses double precision
-floating-point numbers.
-On most modern systems, these are in IEEE 754 standard format.}
+including integers, uses double precision floating-point numbers.
+On most modern systems, these are in IEEE 754 standard format.
+@xref{Arbitrary Precision Arithmetic}, for much more information.}
Here are some examples of numeric constants that all
have the same value:
@@ -9765,7 +10427,7 @@ double-quotation marks. For example:
@cindex strings, length limitations
represents the string whose contents are @samp{parrot}. Strings in
@command{gawk} can be of any length, and they can contain any of the possible
-eight-bit ASCII characters including ASCII @sc{nul} (character code zero).
+eight-bit ASCII characters including ASCII @value{NUL} (character code zero).
Other @command{awk}
implementations may have difficulty with some character codes.
@@ -9910,7 +10572,8 @@ A regexp constant is a regular expression description enclosed in
slashes, such as @code{@w{/^beginning and end$/}}. Most regexps used in
@command{awk} programs are constant, but the @samp{~} and @samp{!~}
matching operators can also match computed or dynamic regexps
-(which are just ordinary strings or variables that contain a regexp).
+(which are typically just ordinary strings or variables that contain a regexp,
+but could be a more complex expression).
@c ENDOFRANGE cnst
@node Using Constant Regexps
@@ -9984,7 +10647,7 @@ upon the contents of the current input record.
Constant regular expressions are also used as the first argument for
the @code{gensub()}, @code{sub()}, and @code{gsub()} functions, as the
second argument of the @code{match()} function,
-and as the third argument of the @code{patsplit()} function
+and as the third argument of the @code{split()} and @code{patsplit()} functions
(@pxref{String Functions}).
Modern implementations of @command{awk}, including @command{gawk}, allow
the third argument of @code{split()} to be a regexp constant, but some
@@ -10016,7 +10679,7 @@ function mysub(pat, repl, str, global)
@c @cindex automatic warnings
@c @cindex warnings, automatic
In this example, the programmer wants to pass a regexp constant to the
-user-defined function @code{mysub}, which in turn passes it on to
+user-defined function @code{mysub()}, which in turn passes it on to
either @code{sub()} or @code{gsub()}. However, what really happens is that
the @code{pat} parameter is either one or zero, depending upon whether
or not @code{$0} matches @code{/hi/}.
@@ -10037,7 +10700,7 @@ on the @command{awk} command line.
@menu
* Using Variables:: Using variables in your programs.
-* Assignment Options:: Setting variables on the command-line and a
+* Assignment Options:: Setting variables on the command line and a
summary of command-line syntax. This is an
advanced method of input.
@end menu
@@ -10048,7 +10711,11 @@ on the @command{awk} command line.
Variables let you give names to values and refer to them later. Variables
have already been used in many of the examples. The name of a variable
must be a sequence of letters, digits, or underscores, and it may not begin
-with a digit. Case is significant in variable names; @code{a} and @code{A}
+with a digit.
+Here, a @dfn{letter} is any one of the 52 upper- and lowercase
+English letters. Other characters that may be defined as letters
+in non-English locales are not valid in variable names.
+Case is significant in variable names; @code{a} and @code{A}
are distinct variables.
A variable name is a valid expression by itself; it represents the
@@ -10105,7 +10772,7 @@ as in the following:
@noindent
the variable is set at the very beginning, even before the
@code{BEGIN} rules execute. The @option{-v} option and its assignment
-must precede all the file name arguments, as well as the program text.
+must precede all the @value{FN} arguments, as well as the program text.
(@xref{Options}, for more information about
the @option{-v} option.)
Otherwise, the variable assignment is performed at a time determined by
@@ -10146,6 +10813,19 @@ sequences
@node Conversion
@subsection Conversion of Strings and Numbers
+Number to string and string to number conversion are generally
+straightforward. There can be subtleties to be aware of;
+this @value{SECTION} discusses this important facet of @command{awk}.
+
+@menu
+* Strings And Numbers:: How @command{awk} Converts Between Strings And
+ Numbers.
+* Locale influences conversions:: How the locale may affect conversions.
+@end menu
+
+@node Strings And Numbers
+@subsubsection How @command{awk} Converts Between Strings And Numbers
+
@cindex converting, strings to numbers
@cindex strings, converting
@cindex numbers, converting
@@ -10215,6 +10895,12 @@ b = a ""
@code{b} has the value @code{"12"}, not @code{"12.00"}.
@value{DARKCORNER}
+@cindex sidebar, Pre-POSIX @command{awk} Used @code{OFMT} For String Conversion
+@ifdocbook
+@docbook
+<sidebar><title>Pre-POSIX @command{awk} Used @code{OFMT} For String Conversion</title>
+@end docbook
+
@cindex POSIX @command{awk}, @code{OFMT} variable and
@cindex @code{OFMT} variable
@cindex portability, new @command{awk} vs.@: old @command{awk}
@@ -10226,32 +10912,56 @@ specifies the output format to use when printing numbers with @code{print}.
conversion from the semantics of printing. Both @code{CONVFMT} and
@code{OFMT} have the same default value: @code{"%.6g"}. In the vast majority
of cases, old @command{awk} programs do not change their behavior.
-However, these semantics for @code{OFMT} are something to keep in mind if you must
-port your new-style program to older implementations of @command{awk}.
-We recommend
-that instead of changing your programs, just port @command{gawk} itself.
-@xref{Print},
-for more information on the @code{print} statement.
-
-And, once again, where you are can matter when it comes to converting
-between numbers and strings. In @ref{Locales}, we mentioned that
-the local character set and language (the locale) can affect how
-@command{gawk} matches characters. The locale also affects numeric
-formats. In particular, for @command{awk} programs, it affects the
-decimal point character. The @code{"C"} locale, and most English-language
-locales, use the period character (@samp{.}) as the decimal point.
-However, many (if not most) European and non-English locales use the comma
-(@samp{,}) as the decimal point character.
+@xref{Print}, for more information on the @code{print} statement.
+
+@docbook
+</sidebar>
+@end docbook
+@end ifdocbook
+
+@ifnotdocbook
+@cartouche
+@center @b{Pre-POSIX @command{awk} Used @code{OFMT} For String Conversion}
+
+
+@cindex POSIX @command{awk}, @code{OFMT} variable and
+@cindex @code{OFMT} variable
+@cindex portability, new @command{awk} vs.@: old @command{awk}
+@cindex @command{awk}, new vs.@: old, @code{OFMT} variable
+Prior to the POSIX standard, @command{awk} used the value
+of @code{OFMT} for converting numbers to strings. @code{OFMT}
+specifies the output format to use when printing numbers with @code{print}.
+@code{CONVFMT} was introduced in order to separate the semantics of
+conversion from the semantics of printing. Both @code{CONVFMT} and
+@code{OFMT} have the same default value: @code{"%.6g"}. In the vast majority
+of cases, old @command{awk} programs do not change their behavior.
+@xref{Print}, for more information on the @code{print} statement.
+@end cartouche
+@end ifnotdocbook
+
+@node Locale influences conversions
+@subsubsection Locales Can Influence Conversion
+
+Where you are can matter when it comes to converting between numbers and
+strings. The local character set and language---the @dfn{locale}---can
+affect numeric formats. In particular, for @command{awk} programs,
+it affects the decimal point character and the thousands-separator
+character. The @code{"C"} locale, and most English-language locales,
+use the period character (@samp{.}) as the decimal point and don't
+have a thousands separator. However, many (if not most) European and
+non-English locales use the comma (@samp{,}) as the decimal point
+character. European locales often use either a space or a period as
+the thousands separator, if they have one.
@cindex dark corner, locale's decimal point character
The POSIX standard says that @command{awk} always uses the period as the decimal
-point when reading the @command{awk} program source code, and for command-line
-variable assignments (@pxref{Other Arguments}).
-However, when interpreting input data, for @code{print} and @code{printf} output,
-and for number to string conversion, the local decimal point character is used.
-@value{DARKCORNER}
-Here are some examples indicating the difference in behavior,
-on a GNU/Linux system:
+point when reading the @command{awk} program source code, and for
+command-line variable assignments (@pxref{Other Arguments}). However,
+when interpreting input data, for @code{print} and @code{printf} output,
+and for number to string conversion, the local decimal point character
+is used. @value{DARKCORNER} In all cases, numbers in source code and
+in input data cannot have a thousands separator. Here are some examples
+indicating the difference in behavior, on a GNU/Linux system:
@example
$ @kbd{export POSIXLY_CORRECT=1} @ii{Force POSIX behavior}
@@ -10266,7 +10976,7 @@ $ @kbd{echo 4,321 | LC_ALL=en_DK.utf-8 gawk '@{ print $1 + 1 @}'}
@end example
@noindent
-The @samp{en_DK.utf-8} locale is for English in Denmark, where the comma acts as
+The @code{en_DK.utf-8} locale is for English in Denmark, where the comma acts as
the decimal point separator. In the normal @code{"C"} locale, @command{gawk}
treats @samp{4,321} as @samp{4}, while in the Danish locale, it's treated
as the full number, 4.321.
@@ -10413,7 +11123,7 @@ b * int(a / b) + (a % b) == a
@end example
One possibly undesirable effect of this definition of remainder is that
-@code{@var{x} % @var{y}} is negative if @var{x} is negative. Thus:
+@samp{@var{x} % @var{y}} is negative if @var{x} is negative. Thus:
@example
-17 % 8 = -1
@@ -10421,7 +11131,7 @@ One possibly undesirable effect of this definition of remainder is that
In other @command{awk} implementations, the signedness of the remainder
may be machine-dependent.
-@c !!! what does posix say?
+@c FIXME !!! what does posix say?
@cindex portability, @code{**} operator and
@cindex @code{*} (asterisk), @code{**} operator
@@ -10481,7 +11191,7 @@ print "something meaningful" > file name
@cindex @command{mawk} utility
@noindent
This produces a syntax error with some versions of Unix
-@command{awk}.@footnote{It happens that Brian Kernighan's
+@command{awk}.@footnote{It happens that BWK
@command{awk}, @command{gawk} and @command{mawk} all ``get it right,''
but you should not rely on this.}
It is necessary to use the following:
@@ -10507,7 +11217,7 @@ BEGIN @{
@end example
@noindent
-It is not defined whether the assignment to @code{a} happens
+It is not defined whether the second assignment to @code{a} happens
before or after the value of @code{a} is retrieved for producing the
concatenated value. The result could be either @samp{don't panic},
or @samp{panic panic}.
@@ -10566,7 +11276,7 @@ Otherwise, it's parsed as follows:
@end display
As mentioned earlier,
-when doing concatenation, @emph{parenthesize}. Otherwise,
+when mixing concatenation with other operators, @emph{parenthesize}. Otherwise,
you're never quite sure what you'll get.
@node Assignment Ops
@@ -10629,8 +11339,8 @@ element. (Such values are called @dfn{rvalues}.)
@cindex variables, types of
It is important to note that variables do @emph{not} have permanent types.
-A variable's type is simply the type of whatever value it happens
-to hold at the moment. In the following program fragment, the variable
+A variable's type is simply the type of whatever value was last assigned
+to it. In the following program fragment, the variable
@code{foo} has a numeric value at first, and a string value later on:
@example
@@ -10731,6 +11441,7 @@ The indices of @code{bar} are practically guaranteed to be different, because
and see @ref{Numeric Functions}, for more information).
This example illustrates an important fact about assignment
operators: the lefthand expression is only evaluated @emph{once}.
+
It is up to the implementation as to which expression is evaluated
first, the lefthand or the righthand.
Consider this example:
@@ -10763,17 +11474,17 @@ to a number.
@caption{Arithmetic Assignment Operators}
@multitable @columnfractions .30 .70
@headitem Operator @tab Effect
-@item @var{lvalue} @code{+=} @var{increment} @tab Adds @var{increment} to the value of @var{lvalue}.
-@item @var{lvalue} @code{-=} @var{decrement} @tab Subtracts @var{decrement} from the value of @var{lvalue}.
-@item @var{lvalue} @code{*=} @var{coefficient} @tab Multiplies the value of @var{lvalue} by @var{coefficient}.
-@item @var{lvalue} @code{/=} @var{divisor} @tab Divides the value of @var{lvalue} by @var{divisor}.
-@item @var{lvalue} @code{%=} @var{modulus} @tab Sets @var{lvalue} to its remainder by @var{modulus}.
+@item @var{lvalue} @code{+=} @var{increment} @tab Add @var{increment} to the value of @var{lvalue}.
+@item @var{lvalue} @code{-=} @var{decrement} @tab Subtract @var{decrement} from the value of @var{lvalue}.
+@item @var{lvalue} @code{*=} @var{coefficient} @tab Multiply the value of @var{lvalue} by @var{coefficient}.
+@item @var{lvalue} @code{/=} @var{divisor} @tab Divide the value of @var{lvalue} by @var{divisor}.
+@item @var{lvalue} @code{%=} @var{modulus} @tab Set @var{lvalue} to its remainder by @var{modulus}.
@cindex common extensions, @code{**=} operator
@cindex extensions, common@comma{} @code{**=} operator
@cindex @command{awk} language, POSIX version
@cindex POSIX @command{awk}
@item @var{lvalue} @code{^=} @var{power} @tab
-@item @var{lvalue} @code{**=} @var{power} @tab Raises @var{lvalue} to the power @var{power}. @value{COMMONEXT}
+@item @var{lvalue} @code{**=} @var{power} @tab Raise @var{lvalue} to the power @var{power}. @value{COMMONEXT}
@end multitable
@end float
@@ -10823,10 +11534,8 @@ A workaround is:
awk '/[=]=/' /dev/null
@end example
-@command{gawk} does not have this problem,
-nor do the other
-freely available versions described in
-@ref{Other Versions}.
+@command{gawk} does not have this problem; BWK @command{awk}
+and @command{mawk} also do not (@pxref{Other Versions}).
@docbook
</sidebar>
@@ -10871,10 +11580,8 @@ A workaround is:
awk '/[=]=/' /dev/null
@end example
-@command{gawk} does not have this problem,
-nor do the other
-freely available versions described in
-@ref{Other Versions}.
+@command{gawk} does not have this problem; BWK @command{awk}
+and @command{mawk} also do not (@pxref{Other Versions}).
@end cartouche
@end ifnotdocbook
@c ENDOFRANGE exas
@@ -10899,11 +11606,10 @@ are convenient abbreviations for very common operations.
@cindex side effects, decrement/increment operators
The operator used for adding one is written @samp{++}. It can be used to increment
a variable either before or after taking its value.
-To pre-increment a variable @code{v}, write @samp{++v}. This adds
+To @dfn{pre-increment} a variable @code{v}, write @samp{++v}. This adds
one to the value of @code{v}---that new value is also the value of the
-expression. (The assignment expression @samp{v += 1} is completely
-equivalent.)
-Writing the @samp{++} after the variable specifies post-increment. This
+expression. (The assignment expression @samp{v += 1} is completely equivalent.)
+Writing the @samp{++} after the variable specifies @dfn{post-increment}. This
increments the variable value just the same; the difference is that the
value of the increment expression itself is the variable's @emph{old}
value. Thus, if @code{foo} has the value four, then the expression @samp{foo++}
@@ -10915,7 +11621,18 @@ The post-increment @samp{foo++} is nearly the same as writing @samp{(foo
+= 1) - 1}. It is not perfectly equivalent because all numbers in
@command{awk} are floating-point---in floating-point, @samp{foo + 1 - 1} does
not necessarily equal @code{foo}. But the difference is minute as
-long as you stick to numbers that are fairly small (less than 10e12).
+long as you stick to numbers that are fairly small (less than
+@iftex
+@math{10^12}).
+@end iftex
+@ifnottex
+@ifnotdocbook
+10e12).
+@end ifnotdocbook
+@end ifnottex
+@docbook
+10<superscript>12</superscript>). @c
+@end docbook
@cindex @code{$} (dollar sign), incrementing fields and arrays
@cindex dollar sign (@code{$}), incrementing fields and arrays
@@ -11155,20 +11872,16 @@ compares variables.
@cindex numeric, strings
@cindex strings, numeric
@cindex POSIX @command{awk}, numeric strings and
-The 1992 POSIX standard introduced
+The POSIX standard introduced
the concept of a @dfn{numeric string}, which is simply a string that looks
like a number---for example, @code{@w{" +2"}}. This concept is used
for determining the type of a variable.
The type of the variable is important because the types of two variables
determine how they are compared.
-The various versions of the POSIX standard did not get the rules
-quite right for several editions. Fortunately, as of at least the
-2008 standard (and possibly earlier), the standard has been fixed,
-and variable typing follows these rules:@footnote{@command{gawk} has
-followed these rules for many years,
-and it is gratifying that the POSIX standard is also now correct.}
+Variable typing follows these rules:
-@itemize @bullet
+
+@itemize @value{BULLET}
@item
A numeric constant or the result of a numeric operation has the @var{numeric}
attribute.
@@ -11254,6 +11967,7 @@ STRNUM &&string &numeric &numeric\cr
}}}
@end tex
@ifnottex
+@ifnotdocbook
@display
+----------------------------------------------
| STRING NUMERIC STRNUM
@@ -11266,7 +11980,51 @@ NUMERIC | string numeric numeric
STRNUM | string numeric numeric
--------+----------------------------------------------
@end display
+@end ifnotdocbook
@end ifnottex
+@docbook
+<informaltable>
+<tgroup cols="4">
+<colspec colname="1" align="left"/>
+<colspec colname="2" align="left"/>
+<colspec colname="3" align="left"/>
+<colspec colname="4" align="left"/>
+<thead>
+<row>
+<entry/>
+<entry>STRING</entry>
+<entry>NUMERIC</entry>
+<entry>STRNUM</entry>
+</row>
+</thead>
+
+<tbody>
+<row>
+<entry><emphasis role="bold">STRING</emphasis></entry>
+<entry>string</entry>
+<entry>string</entry>
+<entry>string</entry>
+</row>
+
+<row>
+<entry><emphasis role="bold">NUMERIC</emphasis></entry>
+<entry>string</entry>
+<entry>numeric</entry>
+<entry>numeric</entry>
+</row>
+
+<row>
+<entry><emphasis role="bold">STRNUM</emphasis></entry>
+<entry>string</entry>
+<entry>numeric</entry>
+<entry>numeric</entry>
+</row>
+
+</tbody>
+</tgroup>
+</informaltable>
+
+@end docbook
The basic idea is that user input that looks numeric---and @emph{only}
user input---should be treated as numeric, even though it is actually
@@ -11274,7 +12032,7 @@ made of characters and is therefore also a string.
Thus, for example, the string constant @w{@code{" +3.14"}},
when it appears in program source code,
is a string---even though it looks numeric---and
-is @emph{never} treated as number for comparison
+is @emph{never} treated as a number for comparison
purposes.
In short, when one operand is a ``pure'' string, such as a string
@@ -11285,8 +12043,8 @@ This point bears additional emphasis: All user input is made of characters,
and so is first and foremost of @var{string} type; input strings
that look numeric are additionally given the @var{strnum} attribute.
Thus, the six-character input string @w{@samp{ +3.14}} receives the
-@var{strnum} attribute. In contrast, the eight-character literal
-@w{@code{" +3.14"}} appearing in program text is a string constant.
+@var{strnum} attribute. In contrast, the eight characters
+@w{@code{" +3.14"}} appearing in program text comprise a string constant.
The following examples print @samp{1} when the comparison between
the two different constants is true, @samp{0} otherwise:
@@ -11447,7 +12205,7 @@ has the value one if @code{x} contains @samp{foo}, such as
@cindex @code{!} (exclamation point), @code{!~} operator
@cindex exclamation point (@code{!}), @code{!~} operator
The righthand operand of the @samp{~} and @samp{!~} operators may be
-either a regexp constant (@code{/@dots{}/}) or an ordinary
+either a regexp constant (@code{/}@dots{}@code{/}) or an ordinary
expression. In the latter case, the value of the expression as a string is used as a
dynamic regexp (@pxref{Regexp Usage}; also
@pxref{Computed Regexps}).
@@ -11472,7 +12230,9 @@ where this is discussed in more detail.
@subsubsection String Comparison With POSIX Rules
The POSIX standard says that string comparison is performed based
-on the locale's collating order. This is usually very different
+on the locale's @dfn{collating order}. This is the order in which
+characters sort, as defined by the locale (for more discussion,
+@pxref{Ranges and Locales}). This order is usually very different
from the results obtained when doing straight character-by-character
comparison.@footnote{Technically, string comparison is supposed
to behave the same way as if the strings are compared with the C
@@ -11480,7 +12240,7 @@ to behave the same way as if the strings are compared with the C
Because this behavior differs considerably from existing practice,
@command{gawk} only implements it when in POSIX mode (@pxref{Options}).
-Here is an example to illustrate the difference, in an @samp{en_US.UTF-8}
+Here is an example to illustrate the difference, in an @code{en_US.UTF-8}
locale:
@example
@@ -11589,7 +12349,7 @@ is ``short-circuited'' if the result can be determined part way through
its evaluation.
@cindex line continuations
-Statements that use @samp{&&} or @samp{||} can be continued simply
+Statements that end with @samp{&&} or @samp{||} can be continued simply
by putting a newline after them. But you cannot put a newline in front
of either of these operators without using backslash continuation
(@pxref{Statements/Lines}).
@@ -11608,7 +12368,7 @@ program is one way to print lines in between special bracketing lines:
@example
$1 == "START" @{ interested = ! interested; next @}
-interested == 1 @{ print @}
+interested @{ print @}
$1 == "END" @{ interested = ! interested; next @}
@end example
@@ -11628,6 +12388,16 @@ bogus input data, but the point is to illustrate the use of `!',
so we'll leave well enough alone.
@end ignore
+Most commonly, the @samp{!} operator is used in the conditions of
+@code{if} and @code{while} statements, where it often makes more
+sense to phrase the logic in the negative:
+
+@example
+if (! @var{some condition} || @var{some other condition}) @{
+ @var{@dots{} do whatever processing @dots{}}
+@}
+@end example
+
@cindex @code{next} statement
@quotation NOTE
The @code{next} statement is discussed in
@@ -11696,7 +12466,7 @@ However, putting a newline in front
of either character does not work without using backslash continuation
(@pxref{Statements/Lines}).
If @option{--posix} is specified
-(@pxref{Options}), then this extension is disabled.
+(@pxref{Options}), this extension is disabled.
@node Function Calls
@section Function Calls
@@ -11715,6 +12485,8 @@ functions and their descriptions. In addition, you can define
functions for use in your program.
@xref{User-defined},
for instructions on how to do this.
+Finally, @command{gawk} lets you write functions in C or C++
+that may be called from your program: see @ref{Dynamic Extensions}.
@cindex arguments, in function calls
The way to use a function is with a @dfn{function call} expression,
@@ -11756,7 +12528,9 @@ If those arguments are not supplied, the functions
use a reasonable default value.
@xref{Built-in}, for full details. If arguments
are omitted in calls to user-defined functions, then those arguments are
-treated as local variables and initialized to the empty string
+treated as local variables. Such local variables act like the
+empty string if referenced where a string value is required,
+and like zero if referenced where a numeric value is required
(@pxref{User-defined}).
As an advanced feature, @command{gawk} provides indirect function calls,
@@ -11765,12 +12539,12 @@ when you write the source code to your program. We defer discussion of
this feature until later; see @ref{Indirect Calls}.
@cindex side effects, function calls
-Like every other expression, the function call has a value, which is
-computed by the function based on the arguments you give it. In this
-example, the value of @samp{sqrt(@var{argument})} is the square root of
-@var{argument}.
-The following program reads numbers, one number per line, and prints the
-square root of each one:
+Like every other expression, the function call has a value, often
+called the @dfn{return value}, which is computed by the function
+based on the arguments you give it. In this example, the return value
+of @samp{sqrt(@var{argument})} is the square root of @var{argument}.
+The following program reads numbers, one number per line, and prints
+the square root of each one:
@example
$ @kbd{awk '@{ print "The square root of", $1, "is", sqrt($1) @}'}
@@ -11858,28 +12632,28 @@ expression because the first @samp{$} has higher precedence than the
This table presents @command{awk}'s operators, in order of highest
to lowest precedence:
-@c use @code in the items, looks better in TeX w/o all the quotes
-@table @code
-@item (@dots{})
+@c @asis for docbook to come out right
+@table @asis
+@item @code{(}@dots{}@code{)}
Grouping.
@cindex @code{$} (dollar sign), @code{$} field operator
@cindex dollar sign (@code{$}), @code{$} field operator
-@item $
+@item @code{$}
Field reference.
@cindex @code{+} (plus sign), @code{++} operator
@cindex plus sign (@code{+}), @code{++} operator
@cindex @code{-} (hyphen), @code{--} operator
@cindex hyphen (@code{-}), @code{--} operator
-@item ++ --
+@item @code{++ --}
Increment, decrement.
@cindex @code{^} (caret), @code{^} operator
@cindex caret (@code{^}), @code{^} operator
@cindex @code{*} (asterisk), @code{**} operator
@cindex asterisk (@code{*}), @code{**} operator
-@item ^ **
+@item @code{^ **}
Exponentiation. These operators group right-to-left.
@cindex @code{+} (plus sign), @code{+} operator
@@ -11888,7 +12662,7 @@ Exponentiation. These operators group right-to-left.
@cindex hyphen (@code{-}), @code{-} operator
@cindex @code{!} (exclamation point), @code{!} operator
@cindex exclamation point (@code{!}), @code{!} operator
-@item + - !
+@item @code{+ - !}
Unary plus, minus, logical ``not.''
@cindex @code{*} (asterisk), @code{*} operator, as multiplication operator
@@ -11897,17 +12671,17 @@ Unary plus, minus, logical ``not.''
@cindex forward slash (@code{/}), @code{/} operator
@cindex @code{%} (percent sign), @code{%} operator
@cindex percent sign (@code{%}), @code{%} operator
-@item * / %
+@item @code{* / %}
Multiplication, division, remainder.
@cindex @code{+} (plus sign), @code{+} operator
@cindex plus sign (@code{+}), @code{+} operator
@cindex @code{-} (hyphen), @code{-} operator
@cindex hyphen (@code{-}), @code{-} operator
-@item + -
+@item @code{+ -}
Addition, subtraction.
-@item @r{String Concatenation}
+@item String Concatenation
There is no special symbol for concatenation.
The operands are simply written side by side
(@pxref{Concatenation}).
@@ -11933,7 +12707,7 @@ The operands are simply written side by side
@cindex @code{|} (vertical bar), @code{|&} operator (I/O)
@cindex vertical bar (@code{|}), @code{|&} operator (I/O)
@cindex operators, input/output
-@item < <= == != > >= >> | |&
+@item @code{< <= == != > >= >> | |&}
Relational and redirection.
The relational operators and the redirections have the same precedence
level. Characters such as @samp{>} serve both as relationals and as
@@ -11954,26 +12728,26 @@ The correct way to write this statement is @samp{print foo > (a ? b : c)}.
@cindex tilde (@code{~}), @code{~} operator
@cindex @code{!} (exclamation point), @code{!~} operator
@cindex exclamation point (@code{!}), @code{!~} operator
-@item ~ !~
+@item @code{~ !~}
Matching, nonmatching.
@cindex @code{in} operator
-@item in
+@item @code{in}
Array membership.
@cindex @code{&} (ampersand), @code{&&} operator
@cindex ampersand (@code{&}), @code{&&} operator
-@item &&
+@item @code{&&}
Logical ``and''.
@cindex @code{|} (vertical bar), @code{||} operator
@cindex vertical bar (@code{|}), @code{||} operator
-@item ||
+@item @code{||}
Logical ``or''.
@cindex @code{?} (question mark), @code{?:} operator
@cindex question mark (@code{?}), @code{?:} operator
-@item ?:
+@item @code{?:}
Conditional. This operator groups right-to-left.
@cindex @code{+} (plus sign), @code{+=} operator
@@ -11990,7 +12764,7 @@ Conditional. This operator groups right-to-left.
@cindex percent sign (@code{%}), @code{%=} operator
@cindex @code{^} (caret), @code{^=} operator
@cindex caret (@code{^}), @code{^=} operator
-@item = += -= *= /= %= ^= **=
+@item @code{= += -= *= /= %= ^= **=}
Assignment. These operators group right-to-left.
@end table
@@ -12007,27 +12781,102 @@ For maximum portability, do not use them.
@section Where You Are Makes A Difference
@cindex locale, definition of
-Modern systems support the notion of @dfn{locales}: a way to tell
-the system about the local character set and language.
+Modern systems support the notion of @dfn{locales}: a way to tell the
+system about the local character set and language. The ISO C standard
+defines a default @code{"C"} locale, which is an environment that is
+typical of what many C programmers are used to.
Once upon a time, the locale setting used to affect regexp matching
(@pxref{Ranges and Locales}), but this is no longer true.
-Locales can affect record splitting.
-For the normal case of @samp{RS = "\n"}, the locale is largely irrelevant.
-For other single-character record separators, setting @samp{LC_ALL=C}
-in the environment
-will give you much better performance when reading records. Otherwise,
+Locales can affect record splitting. For the normal case of @samp{RS =
+"\n"}, the locale is largely irrelevant. For other single-character
+record separators, setting @samp{LC_ALL=C} in the environment will
+give you much better performance when reading records. Otherwise,
@command{gawk} has to make several function calls, @emph{per input
character}, to find the record terminator.
-According to POSIX, string comparison is also affected by locales
-(similar to regular expressions). The details are presented in
-@ref{POSIX String Comparison}.
+Locales can affect how dates and times are formatted (@pxref{Time
+Functions}). For example, a common way to abbreviate the date September
+4, 2015 in the United States is ``9/4/15.'' In many countries in
+Europe, however, it is abbreviated ``4.9.15.'' Thus, the @samp{%x}
+specification in a @code{"US"} locale might produce @samp{9/4/15},
+while in a @code{"EUROPE"} locale, it might produce @samp{4.9.15}.
+
+According to POSIX, string comparison is also affected by locales (similar
+to regular expressions). The details are presented in @ref{POSIX String
+Comparison}.
Finally, the locale affects the value of the decimal point character
-used when @command{gawk} parses input data. This is discussed in
-detail in @ref{Conversion}.
+used when @command{gawk} parses input data. This is discussed in detail
+in @ref{Conversion}.
+
+@node Expressions Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+Expressions are the basic elements of computation in programs. They are
+built from constants, variables, function calls and combinations of the
+various kinds of values with operators.
+
+@item
+@command{awk} supplies three kinds of constants: numeric, string, and
+regexp. @command{gawk} lets you specify numeric constants in octal
+and hexadecimal (bases 8 and 16) in addition to decimal (base 10).
+In certain contexts, a standalone regexp constant such as @code{/foo/}
+has the same meaning as @samp{$0 ~ /foo/}.
+
+@item
+Variables hold values between uses in computations. A number of built-in
+variables provide information to your @command{awk} program, and a number
+of others let you control how @command{awk} behaves.
+
+@item
+Numbers are automatically converted to strings, and strings to numbers,
+as needed by @command{awk}. Numeric values are converted as if they were
+formatted with @code{sprintf()} using the format in @code{CONVFMT}.
+Locales can influence the conversions.
+
+@item
+@command{awk} provides the usual arithmetic operators (addition,
+subtraction, multiplication, division, modulus), and unary plus and minus.
+It also provides comparison operators, boolean operators, and regexp
+matching operators. String concatenation is accomplished by placing
+two expressions next to each other; there is no explicit operator.
+The three-operand @samp{?:} operator provides an ``if-else'' test within
+expressions.
+
+@item
+Assignment operators provide convenient shorthands for common arithmetic
+operations.
+
+@item
+In @command{awk}, a value is considered to be true if it is non-zero
+@emph{or} non-null. Otherwise, the value is false.
+
+@item
+A value's type is set upon each assignment and may change over its
+lifetime. The type determines how it behaves in comparisons (string
+or numeric).
+
+@item
+Function calls return a value which may be used as part of a larger
+expression. Expressions used to pass parameter values are fully
+evaluated before the function is called. @command{awk} provides
+built-in and user-defined functions; this is described later on in this
+@value{DOCUMENT}.
+
+@item
+Operator precedence specifies the order in which operations are performed,
+unless explicitly overridden by parentheses. @command{awk}'s operator
+precedence is compatible with that of C.
+
+@item
+Locales can affect the format of data as output by an @command{awk}
+program, and occasionally the format for data read as input.
+
+@end itemize
@c ENDOFRANGE exps
@@ -12055,6 +12904,7 @@ building something useful.
* Statements:: Describes the various control statements in
detail.
* Built-in Variables:: Summarizes the built-in variables.
+* Pattern Action Summary:: Patterns and Actions summary.
@end menu
@node Pattern Overview
@@ -12085,10 +12935,10 @@ A single expression. It matches when its value
is nonzero (if a number) or non-null (if a string).
(@xref{Expression Patterns}.)
-@item @var{pat1}, @var{pat2}
+@item @var{begpat}, @var{endpat}
A pair of patterns separated by a comma, specifying a range of records.
-The range includes both the initial record that matches @var{pat1} and
-the final record that matches @var{pat2}.
+The range includes both the initial record that matches @var{begpat} and
+the final record that matches @var{endpat}.
(@xref{Ranges}.)
@item BEGIN
@@ -12100,7 +12950,7 @@ Special patterns for you to supply startup or cleanup actions for your
@item BEGINFILE
@itemx ENDFILE
Special patterns for you to supply startup or cleanup actions to be
-done on a per file basis.
+done on a per-file basis.
(@xref{BEGINFILE/ENDFILE}.)
@item @var{empty}
@@ -12168,7 +13018,7 @@ Contrast this with the following regular expression match, which
accepts any record with a first field that contains @samp{li}:
@example
-$ @kbd{awk '$1 ~ /foo/ @{ print $2 @}' mail-list}
+$ @kbd{awk '$1 ~ /li/ @{ print $2 @}' mail-list}
@print{} 555-5553
@print{} 555-6699
@end example
@@ -12261,7 +13111,7 @@ input record. When a record matches @var{begpat}, the range pattern is
@dfn{turned on} and the range pattern matches this record as well. As long as
the range pattern stays turned on, it automatically matches every input
record read. The range pattern also matches @var{endpat} against every
-input record; when this succeeds, the range pattern is turned off again
+input record; when this succeeds, the range pattern is @dfn{turned off} again
for the following record. Then the range pattern goes back to checking
@var{begpat} against each record.
@@ -12415,7 +13265,7 @@ rule checks the @code{FNR} and @code{NR} variables.
@subsubsection Input/Output from @code{BEGIN} and @code{END} Rules
@cindex input/output, from @code{BEGIN} and @code{END}
-There are several (sometimes subtle) points to remember when doing I/O
+There are several (sometimes subtle) points to be aware of when doing I/O
from a @code{BEGIN} or @code{END} rule.
The first has to do with the value of @code{$0} in a @code{BEGIN}
rule. Because @code{BEGIN} rules are executed before any input is read,
@@ -12440,7 +13290,7 @@ rule. It contains the number of fields from the last input record.
Most probably due to an oversight, the standard does not say that @code{$0}
is also preserved, although logically one would think that it should be.
In fact, @command{gawk} does preserve the value of @code{$0} for use in
-@code{END} rules. Be aware, however, that Brian Kernighan's @command{awk}, and possibly
+@code{END} rules. Be aware, however, that BWK @command{awk}, and possibly
other implementations, do not.
The third point follows from the first two. The meaning of @samp{print}
@@ -12476,8 +13326,19 @@ This @value{SECTION} describes a @command{gawk}-specific feature.
Two special kinds of rule, @code{BEGINFILE} and @code{ENDFILE}, give
you ``hooks'' into @command{gawk}'s command-line file processing loop.
-As with the @code{BEGIN} and @code{END} rules (@pxref{BEGIN/END}), all
-@code{BEGINFILE} rules in a program are merged, in the order they are
+As with the @code{BEGIN} and @code{END} rules
+@ifnottex
+@ifnotdocbook
+(@pxref{BEGIN/END}),
+@end ifnotdocbook
+@end ifnottex
+@iftex
+(see the previous section),
+@end iftex
+@ifdocbook
+(see the previous section),
+@end ifdocbook
+all @code{BEGINFILE} rules in a program are merged, in the order they are
read by @command{gawk}, and all @code{ENDFILE} rules are merged as well.
The body of the @code{BEGINFILE} rules is executed just before
@@ -12487,7 +13348,7 @@ is set to the name of the current file, and @code{FNR} is set to zero.
The @code{BEGINFILE} rule provides you the opportunity to accomplish two tasks
that would otherwise be difficult or impossible to perform:
-@itemize @bullet
+@itemize @value{BULLET}
@item
You can test if the file is readable. Normally, it is a fatal error if a
file named on the command line cannot be opened for reading. However,
@@ -12505,10 +13366,11 @@ the file entirely. Otherwise, @command{gawk} exits with the usual
fatal error.
@item
-If you have written extensions that modify the record handling (by inserting
-an ``input parser''), you can invoke them at this point, before @command{gawk}
-has started processing the file. (This is a @emph{very} advanced feature,
-currently used only by the @uref{http://gawkextlib.sourceforge.net, @code{gawkextlib} project}.)
+If you have written extensions that modify the record handling (by
+inserting an ``input parser,'' @pxref{Input Parsers}), you can invoke
+them at this point, before @command{gawk} has started processing the file.
+(This is a @emph{very} advanced feature, currently used only by the
+@uref{http://gawkextlib.sourceforge.net, @code{gawkextlib} project}.)
@end itemize
The @code{ENDFILE} rule is called when @command{gawk} has finished processing
@@ -12530,8 +13392,8 @@ statement (@pxref{Nextfile Statement}) is allowed only inside a
@cindex @code{getline} statement, @code{BEGINFILE}/@code{ENDFILE} patterns and
The @code{getline} statement (@pxref{Getline}) is restricted inside
-both @code{BEGINFILE} and @code{ENDFILE}. Only the @samp{getline
-@var{variable} < @var{file}} form is allowed.
+both @code{BEGINFILE} and @code{ENDFILE}: only redirected
+forms of @code{getline} are allowed.
@code{BEGINFILE} and @code{ENDFILE} are @command{gawk} extensions.
In most other @command{awk} implementations, or if @command{gawk} is in
@@ -12591,7 +13453,7 @@ into the body of the @command{awk} program.
@cindex shells, quoting
The most common method is to use shell quoting to substitute
the variable's value into the program inside the script.
-For example, in the following program:
+For example, consider the following program:
@example
printf "Enter search pattern: "
@@ -12601,7 +13463,7 @@ awk "/$pattern/ "'@{ nmatches++ @}
@end example
@noindent
-the @command{awk} program consists of two pieces of quoted text
+The @command{awk} program consists of two pieces of quoted text
that are concatenated together to form the program.
The first part is double-quoted, which allows substitution of
the @code{pattern} shell variable inside the quotes.
@@ -12615,8 +13477,8 @@ match up the quotes when reading the program.
A better method is to use @command{awk}'s variable assignment feature
(@pxref{Assignment Options})
-to assign the shell variable's value to an @command{awk} variable's
-value. Then use dynamic regexps to match the pattern
+to assign the shell variable's value to an @command{awk} variable.
+Then use dynamic regexps to match the pattern
(@pxref{Computed Regexps}).
The following shows how to redo the
previous example using this technique:
@@ -12654,13 +13516,13 @@ both) may be omitted. The purpose of the @dfn{action} is to tell
@command{awk} what to do once a match for the pattern is found. Thus,
in outline, an @command{awk} program generally looks like this:
-@example
-@r{[}@var{pattern}@r{]} @{ @var{action} @}
- @var{pattern} @r{[}@{ @var{action} @}@r{]}
+@display
+[@var{pattern}] @code{@{ @var{action} @}}
+ @var{pattern} [@code{@{ @var{action} @}}]
@dots{}
-function @var{name}(@var{args}) @{ @dots{} @}
+@code{function @var{name}(@var{args}) @{ @dots{} @}}
@dots{}
-@end example
+@end display
@cindex @code{@{@}} (braces), actions and
@cindex braces (@code{@{@}}), actions and
@@ -12669,11 +13531,11 @@ function @var{name}(@var{args}) @{ @dots{} @}
@cindex @code{;} (semicolon), separating statements in actions
@cindex semicolon (@code{;}), separating statements in actions
An action consists of one or more @command{awk} @dfn{statements}, enclosed
-in curly braces (@samp{@{@dots{}@}}). Each statement specifies one
+in braces (@samp{@{@r{@dots{}}@}}). Each statement specifies one
thing to do. The statements are separated by newlines or semicolons.
-The curly braces around an action must be used even if the action
+The braces around an action must be used even if the action
contains only one statement, or if it contains no statements at
-all. However, if you omit the action entirely, omit the curly braces as
+all. However, if you omit the action entirely, omit the braces as
well. An omitted action is equivalent to @samp{@{ print $0 @}}:
@example
@@ -12699,10 +13561,9 @@ programs. The @command{awk} language gives you C-like constructs
special ones (@pxref{Statements}).
@item Compound statements
-Consist of one or more statements enclosed in
-curly braces. A compound statement is used in order to put several
-statements together in the body of an @code{if}, @code{while}, @code{do},
-or @code{for} statement.
+Enclose one or more statements in braces. A compound statement
+is used in order to put several statements together in the body of an
+@code{if}, @code{while}, @code{do}, or @code{for} statement.
@item Input statements
Use the @code{getline} command
@@ -12748,7 +13609,7 @@ Many control statements contain other statements. For example, the
@code{if} statement contains another statement that may or may not be
executed. The contained statement is called the @dfn{body}.
To include more than one statement in the body, group them into a
-single @dfn{compound statement} with curly braces, separating them with
+single @dfn{compound statement} with braces, separating them with
newlines or semicolons.
@menu
@@ -12776,9 +13637,9 @@ newlines or semicolons.
The @code{if}-@code{else} statement is @command{awk}'s decision-making
statement. It looks like this:
-@example
-if (@var{condition}) @var{then-body} @r{[}else @var{else-body}@r{]}
-@end example
+@display
+@code{if (@var{condition}) @var{then-body}} [@code{else @var{else-body}}]
+@end display
@noindent
The @var{condition} is an expression that controls what the rest of the
@@ -12802,7 +13663,7 @@ if the value of @code{x} is evenly divisible by two), then the first
statement is executed.
If the @code{else} keyword appears on the same line as @var{then-body} and
@var{then-body} is not a compound statement (i.e., not surrounded by
-curly braces), then a semicolon must separate @var{then-body} from
+braces), then a semicolon must separate @var{then-body} from
the @code{else}.
To illustrate this, the previous example can be rewritten as:
@@ -13036,6 +13897,8 @@ for more information on this version of the @code{for} loop.
@cindex @code{default} keyword
This @value{SECTION} describes a @command{gawk}-specific feature.
+If @command{gawk} is in compatibility mode (@pxref{Options}),
+it is not available.
The @code{switch} statement allows the evaluation of an expression and
the execution of statements based on a @code{case} match. Case statements
@@ -13066,36 +13929,38 @@ case is made, the case statement bodies execute until a @code{break},
or the end of the @code{switch} statement itself. For example:
@example
-switch (NR * 2 + 1) @{
-case 3:
-case "11":
- print NR - 1
- break
-
-case /2[[:digit:]]+/:
- print NR
-
-default:
- print NR + 1
-
-case -1:
- print NR * -1
+while ((c = getopt(ARGC, ARGV, "aksx")) != -1) @{
+ switch (c) @{
+ case "a":
+ # report size of all files
+ all_files = TRUE;
+ break
+ case "k":
+ BLOCK_SIZE = 1024 # 1K block size
+ break
+ case "s":
+ # do sums only
+ sum_only = TRUE
+ break
+ case "x":
+ # don't cross filesystems
+ fts_flags = or(fts_flags, FTS_XDEV)
+ break
+ case "?":
+ default:
+ usage()
+ break
+ @}
@}
@end example
Note that if none of the statements specified above halt execution
of a matched @code{case} statement, execution falls through to the
-next @code{case} until execution halts. In the above example, for
-any case value starting with @samp{2} followed by one or more digits,
-the @code{print} statement is executed and then falls through into the
-@code{default} section, executing its @code{print} statement. In turn,
-the @minus{}1 case will also be executed since the @code{default} does
-not halt execution.
-
-This @code{switch} statement is a @command{gawk} extension.
-If @command{gawk} is in compatibility mode
-(@pxref{Options}),
-it is not available.
+next @code{case} until execution halts. In the above example, the
+@code{case} for @code{"?"} falls through to the @code{default}
+case, which is to call a function named @code{usage()}.
+(The @code{getopt()} function being called here is
+described in @ref{Getopt Function}.)
@node Break Statement
@subsection The @code{break} Statement
@@ -13111,15 +13976,15 @@ numbers:
@example
# find smallest divisor of num
@{
- num = $1
- for (div = 2; div * div <= num; div++) @{
- if (num % div == 0)
- break
- @}
- if (num % div == 0)
- printf "Smallest divisor of %d is %d\n", num, div
- else
- printf "%d is prime\n", num
+ num = $1
+ for (div = 2; div * div <= num; div++) @{
+ if (num % div == 0)
+ break
+ @}
+ if (num % div == 0)
+ printf "Smallest divisor of %d is %d\n", num, div
+ else
+ printf "%d is prime\n", num
@}
@end example
@@ -13137,17 +14002,17 @@ an @code{if}:
@example
# find smallest divisor of num
@{
- num = $1
- for (div = 2; ; div++) @{
- if (num % div == 0) @{
- printf "Smallest divisor of %d is %d\n", num, div
- break
- @}
- if (div * div > num) @{
- printf "%d is prime\n", num
- break
+ num = $1
+ for (div = 2; ; div++) @{
+ if (num % div == 0) @{
+ printf "Smallest divisor of %d is %d\n", num, div
+ break
+ @}
+ if (div * div > num) @{
+ printf "%d is prime\n", num
+ break
+ @}
@}
- @}
@}
@end example
@@ -13169,7 +14034,7 @@ historical implementations of @command{awk} treated the @code{break}
statement outside of a loop as if it were a @code{next} statement
(@pxref{Next Statement}).
@value{DARKCORNER}
-Recent versions of Brian Kernighan's @command{awk} no longer allow this usage,
+Recent versions of BWK @command{awk} no longer allow this usage,
nor does @command{gawk}.
@node Continue Statement
@@ -13218,7 +14083,8 @@ BEGIN @{
@end example
@noindent
-This program loops forever once @code{x} reaches 5.
+This program loops forever once @code{x} reaches 5, since
+the increment (@samp{x++}) is never reached.
@c @cindex @code{continue}, outside of loops
@c @cindex historical features
@@ -13235,7 +14101,7 @@ statement outside a loop: as if it were a @code{next}
statement
(@pxref{Next Statement}).
@value{DARKCORNER}
-Recent versions of Brian Kernighan's @command{awk} no longer work this way, nor
+Recent versions of BWK @command{awk} no longer work this way, nor
does @command{gawk}.
@node Next Statement
@@ -13268,9 +14134,8 @@ the beginning, in the following manner:
@example
NF != 4 @{
- err = sprintf("%s:%d: skipped: NF != 4\n", FILENAME, FNR)
- print err > "/dev/stderr"
- next
+ printf("%s:%d: skipped: NF != 4\n", FILENAME, FNR) > "/dev/stderr"
+ next
@}
@end example
@@ -13296,16 +14161,14 @@ The @code{next} statement is not allowed inside @code{BEGINFILE} and
@cindex POSIX @command{awk}, @code{next}/@code{nextfile} statements and
@cindex @code{next} statement, user-defined functions and
@cindex functions, user-defined, @code{next}/@code{nextfile} statements and
-According to the POSIX standard, the behavior is undefined if
-the @code{next} statement is used in a @code{BEGIN} or @code{END} rule.
-@command{gawk} treats it as a syntax error.
-Although POSIX permits it,
-some other @command{awk} implementations don't allow the @code{next}
-statement inside function bodies
-(@pxref{User-defined}).
-Just as with any other @code{next} statement, a @code{next} statement inside a
-function body reads the next record and starts processing it with the
-first rule in the program.
+According to the POSIX standard, the behavior is undefined if the
+@code{next} statement is used in a @code{BEGIN} or @code{END} rule.
+@command{gawk} treats it as a syntax error. Although POSIX permits it,
+most other @command{awk} implementations don't allow the @code{next}
+statement inside function bodies (@pxref{User-defined}). Just as with any
+other @code{next} statement, a @code{next} statement inside a function
+body reads the next record and starts processing it with the first rule
+in the program.
@node Nextfile Statement
@subsection The @code{nextfile} Statement
@@ -13315,34 +14178,32 @@ The @code{nextfile} statement
is similar to the @code{next} statement.
However, instead of abandoning processing of the current record, the
@code{nextfile} statement instructs @command{awk} to stop processing the
-current data file.
+current @value{DF}.
Upon execution of the @code{nextfile} statement,
@code{FILENAME} is
-updated to the name of the next data file listed on the command line,
+updated to the name of the next @value{DF} listed on the command line,
@code{FNR} is reset to one,
and processing
starts over with the first rule in the program.
If the @code{nextfile} statement causes the end of the input to be reached,
then the code in any @code{END} rules is executed. An exception to this is
when @code{nextfile} is invoked during execution of any statement in an
-@code{END} rule; In this case, it causes the program to stop immediately. @xref{BEGIN/END}.
+@code{END} rule; in this case, it causes the program to stop immediately.
+@xref{BEGIN/END}.
-The @code{nextfile} statement is useful when there are many data files
+The @code{nextfile} statement is useful when there are many @value{DF}s
to process but it isn't necessary to process every record in every file.
Without @code{nextfile},
-in order to move on to the next data file, a program
+in order to move on to the next @value{DF}, a program
would have to continue scanning the unwanted records. The @code{nextfile}
statement accomplishes this much more efficiently.
In @command{gawk}, execution of @code{nextfile} causes additional things
-to happen:
-any @code{ENDFILE} rules are executed except in the case as
-mentioned below,
-@code{ARGIND} is incremented,
-and
-any @code{BEGINFILE} rules are executed.
-(@code{ARGIND} hasn't been introduced yet. @xref{Built-in Variables}.)
+to happen: any @code{ENDFILE} rules are executed if @command{gawk} is
+not currently in an @code{END} or @code{BEGINFILE} rule, @code{ARGIND} is
+incremented, and any @code{BEGINFILE} rules are executed. (@code{ARGIND}
+hasn't been introduced yet. @xref{Built-in Variables}.)
With @command{gawk}, @code{nextfile} is useful inside a @code{BEGINFILE}
rule to skip over a file that would otherwise cause @command{gawk}
@@ -13366,7 +14227,7 @@ See @uref{http://austingroupbugs.net/view.php?id=607, the Austin Group website}.
@cindex @code{nextfile} statement, user-defined functions and
@cindex Brian Kernighan's @command{awk}
@cindex @command{mawk} utility
-The current version of the Brian Kernighan's @command{awk}, and @command{mawk} (@pxref{Other
+The current version of BWK @command{awk}, and @command{mawk} (@pxref{Other
Versions}) also support @code{nextfile}. However, they don't allow the
@code{nextfile} statement inside function bodies (@pxref{User-defined}).
@command{gawk} does; a @code{nextfile} inside a function body reads the
@@ -13381,9 +14242,9 @@ The @code{exit} statement causes @command{awk} to immediately stop
executing the current rule and to stop processing input; any remaining input
is ignored. The @code{exit} statement is written as follows:
-@example
-exit @r{[}@var{return code}@r{]}
-@end example
+@display
+@code{exit} [@var{return code}]
+@end display
@cindex @code{BEGIN} pattern, @code{exit} statement and
@cindex @code{END} pattern, @code{exit} statement and
@@ -13416,8 +14277,7 @@ status code for the @command{awk} process. If no argument is supplied,
In the case where an argument
is supplied to a first @code{exit} statement, and then @code{exit} is
called a second time from an @code{END} rule with no argument,
-@command{awk} uses the previously supplied exit value.
-@value{DARKCORNER}
+@command{awk} uses the previously supplied exit value. @value{DARKCORNER}
@xref{Exit Status}, for more information.
@cindex programming conventions, @code{exit} statement
@@ -13429,12 +14289,12 @@ in the following example:
@example
BEGIN @{
- if (("date" | getline date_now) <= 0) @{
- print "Can't get system date" > "/dev/stderr"
- exit 1
- @}
- print "current date is", date_now
- close("date")
+ if (("date" | getline date_now) <= 0) @{
+ print "Can't get system date" > "/dev/stderr"
+ exit 1
+ @}
+ print "current date is", date_now
+ close("date")
@}
@end example
@@ -13465,9 +14325,9 @@ automatically by @command{awk}, so that they carry information from the
internal workings of @command{awk} to your program.
@cindex @command{gawk}, built-in variables and
-This @value{SECTION} documents all the built-in variables of
-@command{gawk}, most of which are also documented in the chapters
-describing their areas of activity.
+This @value{SECTION} documents all of @command{gawk}'s built-in variables,
+most of which are also documented in the @value{CHAPTER}s describing
+their areas of activity.
@menu
* User-modified:: Built-in variables that you change to control
@@ -13485,44 +14345,38 @@ describing their areas of activity.
@cindex user-modifiable variables
The following is an alphabetical list of variables that you can change to
-control how @command{awk} does certain things. The variables that are
-specific to @command{gawk} are marked with a pound sign@w{ (@samp{#}).}
+control how @command{awk} does certain things.
+
+The variables that are specific to @command{gawk} are marked with a pound
+sign (@samp{#}). These variables are @command{gawk} extensions. In other
+@command{awk} implementations or if @command{gawk} is in compatibility
+mode (@pxref{Options}), they are not special. (Any exceptions are noted
+in the description of each variable.)
@table @code
@cindex @code{BINMODE} variable
@cindex binary input/output
@cindex input/output, binary
-@item BINMODE #
-On non-POSIX systems, this variable specifies use of binary mode for all I/O.
-Numeric values of one, two, or three specify that input files, output files, or
-all files, respectively, should use binary I/O.
-A numeric value less than zero is treated as zero, and a numeric value greater than
-three is treated as three.
-Alternatively,
-string values of @code{"r"} or @code{"w"} specify that input files and
-output files, respectively, should use binary I/O.
-A string value of @code{"rw"} or @code{"wr"} indicates that all
-files should use binary I/O.
-Any other string value is treated the same as @code{"rw"},
-but causes @command{gawk}
-to generate a warning message.
-@code{BINMODE} is described in more detail in
-@ref{PC Using}.
-
@cindex differences in @command{awk} and @command{gawk}, @code{BINMODE} variable
-This variable is a @command{gawk} extension.
-In other @command{awk} implementations
-(except @command{mawk},
-@pxref{Other Versions}),
-or if @command{gawk} is in compatibility mode
-(@pxref{Options}),
-it is not special.
+@item BINMODE #
+On non-POSIX systems, this variable specifies use of binary mode
+for all I/O. Numeric values of one, two, or three specify that input
+files, output files, or all files, respectively, should use binary I/O.
+A numeric value less than zero is treated as zero, and a numeric value
+greater than three is treated as three. Alternatively, string values
+of @code{"r"} or @code{"w"} specify that input files and output files,
+respectively, should use binary I/O. A string value of @code{"rw"} or
+@code{"wr"} indicates that all files should use binary I/O. Any other
+string value is treated the same as @code{"rw"}, but causes @command{gawk}
+to generate a warning message. @code{BINMODE} is described in more
+detail in @ref{PC Using}. @command{mawk} @pxref{Other Versions}),
+also supports this variable, but only using numeric values.
@cindex @code{CONVFMT} variable
@cindex POSIX @command{awk}, @code{CONVFMT} variable and
@cindex numbers, converting, to strings
@cindex strings, converting, numbers to
-@item CONVFMT
+@item @code{CONVFMT}
This string controls conversion of numbers to
strings (@pxref{Conversion}).
It works by being passed, in effect, as the first argument to the
@@ -13537,40 +14391,29 @@ Its default value is @code{"%.6g"}.
@cindex field separators, @code{FIELDWIDTHS} variable and
@cindex separators, field, @code{FIELDWIDTHS} variable and
@item FIELDWIDTHS #
-This is a space-separated list of columns that tells @command{gawk}
+A space-separated list of columns that tells @command{gawk}
how to split input with fixed columnar boundaries.
Assigning a value to @code{FIELDWIDTHS}
overrides the use of @code{FS} and @code{FPAT} for field splitting.
@xref{Constant Size}, for more information.
-If @command{gawk} is in compatibility mode
-(@pxref{Options}), then @code{FIELDWIDTHS}
-has no special meaning, and field-splitting operations occur based
-exclusively on the value of @code{FS}.
-
@cindex @command{gawk}, @code{FPAT} variable in
@cindex @code{FPAT} variable
@cindex differences in @command{awk} and @command{gawk}, @code{FPAT} variable
@cindex field separators, @code{FPAT} variable and
@cindex separators, field, @code{FPAT} variable and
@item FPAT #
-This is a regular expression (as a string) that tells @command{gawk}
+A regular expression (as a string) that tells @command{gawk}
to create the fields based on text that matches the regular expression.
Assigning a value to @code{FPAT}
overrides the use of @code{FS} and @code{FIELDWIDTHS} for field splitting.
@xref{Splitting By Content}, for more information.
-If @command{gawk} is in compatibility mode
-(@pxref{Options}), then @code{FPAT}
-has no special meaning, and field-splitting operations occur based
-exclusively on the value of @code{FS}.
-
@cindex @code{FS} variable
@cindex separators, field
@cindex field separators
@item FS
-This is the input field separator
-(@pxref{Field Separators}).
+The input field separator (@pxref{Field Separators}).
The value is a single-character string or a multicharacter regular
expression that matches the separations between fields in an input
record. If the value is the null string (@code{""}), then each
@@ -13620,18 +14463,13 @@ and it does not affect field splitting when using a single-character
field separator.
@xref{Case-sensitivity}.
-If @command{gawk} is in compatibility mode
-(@pxref{Options}),
-then @code{IGNORECASE} has no special meaning. Thus, string
-and regexp operations are always case-sensitive.
-
@cindex @command{gawk}, @code{LINT} variable in
@cindex @code{LINT} variable
@cindex differences in @command{awk} and @command{gawk}, @code{LINT} variable
@cindex lint checking
@item LINT #
When this variable is true (nonzero or non-null), @command{gawk}
-behaves as if the @option{--lint} command-line option is in effect.
+behaves as if the @option{--lint} command-line option is in effect
(@pxref{Options}).
With a value of @code{"fatal"}, lint warnings become fatal errors.
With a value of @code{"invalid"}, only warnings about things that are
@@ -13652,7 +14490,7 @@ of @command{awk} being executed.
@cindex numbers, converting, to strings
@cindex strings, converting, numbers to
@item OFMT
-This string controls conversion of numbers to
+Controls conversion of numbers to
strings (@pxref{Conversion}) for
printing with the @code{print} statement. It works by being passed
as the first argument to the @code{sprintf()} function
@@ -13673,27 +14511,26 @@ default value is @w{@code{" "}}, a string consisting of a single space.
@cindex @code{ORS} variable
@item ORS
-This is the output record separator. It is output at the end of every
+The output record separator. It is output at the end of every
@code{print} statement. Its default value is @code{"\n"}, the newline
character. (@xref{Output Separators}.)
@cindex @code{PREC} variable
@item PREC #
The working precision of arbitrary precision floating-point numbers,
-53 bits by default (@pxref{Setting Precision}).
+53 bits by default (@pxref{Setting precision}).
@cindex @code{ROUNDMODE} variable
@item ROUNDMODE #
The rounding mode to use for arbitrary precision arithmetic on
numbers, by default @code{"N"} (@samp{roundTiesToEven} in
-the IEEE-754 standard)
-(@pxref{Setting Rounding Mode}).
+the IEEE 754 standard; @pxref{Setting the rounding mode}).
@cindex @code{RS} variable
@cindex separators, for records
@cindex record separators
-@item RS
-This is @command{awk}'s input record separator. Its default value is a string
+@item @code{RS}
+The input record separator. Its default value is a string
containing a single newline character, which means that an input record
consists of a single line of text.
It can also be the null string, in which case records are separated by
@@ -13712,8 +14549,8 @@ just the first character of @code{RS}'s value is used.
@cindex @code{SUBSEP} variable
@cindex separators, subscript
@cindex subscript separators
-@item SUBSEP
-This is the subscript separator. It has the default value of
+@item @code{SUBSEP}
+The subscript separator. It has the default value of
@code{"\034"} and is used to separate the parts of the indices of a
multidimensional array. Thus, the expression @code{@w{foo["A", "B"]}}
really accesses @code{foo["A\034B"]}
@@ -13724,18 +14561,12 @@ really accesses @code{foo["A\034B"]}
@cindex differences in @command{awk} and @command{gawk}, @code{TEXTDOMAIN} variable
@cindex internationalization, localization
@item TEXTDOMAIN #
-This variable is used for internationalization of programs at the
+Used for internationalization of programs at the
@command{awk} level. It sets the default text domain for specially
marked string constants in the source text, as well as for the
@code{dcgettext()}, @code{dcngettext()} and @code{bindtextdomain()} functions
(@pxref{Internationalization}).
The default value of @code{TEXTDOMAIN} is @code{"messages"}.
-
-This variable is a @command{gawk} extension.
-In other @command{awk} implementations,
-or if @command{gawk} is in compatibility mode
-(@pxref{Options}),
-it is not special.
@end table
@c ENDOFRANGE bvar
@c ENDOFRANGE varb
@@ -13751,14 +14582,19 @@ it is not special.
@cindex variables, built-in, conveying information
The following is an alphabetical list of variables that @command{awk}
sets automatically on certain occasions in order to provide
-information to your program. The variables that are specific to
-@command{gawk} are marked with a pound sign@w{ (@samp{#}).}
+information to your program.
-@table @code
+The variables that are specific to @command{gawk} are marked with a pound
+sign (@samp{#}). These variables are @command{gawk} extensions. In other
+@command{awk} implementations or if @command{gawk} is in compatibility
+mode (@pxref{Options}), they are not special.
+
+@c @asis for docbook
+@table @asis
@cindex @code{ARGC}/@code{ARGV} variables
@cindex arguments, command-line
@cindex command line, arguments
-@item ARGC@r{,} ARGV
+@item @code{ARGC}, @code{ARGV}
The command-line arguments available to @command{awk} programs are stored in
an array called @code{ARGV}. @code{ARGC} is the number of command-line
arguments present. @xref{Other Arguments}.
@@ -13798,36 +14634,30 @@ about how @command{awk} uses these variables.
@cindex @code{ARGIND} variable
@cindex differences in @command{awk} and @command{gawk}, @code{ARGIND} variable
-@item ARGIND #
+@item @code{ARGIND #}
The index in @code{ARGV} of the current file being processed.
-Every time @command{gawk} opens a new data file for processing, it sets
-@code{ARGIND} to the index in @code{ARGV} of the file name.
+Every time @command{gawk} opens a new @value{DF} for processing, it sets
+@code{ARGIND} to the index in @code{ARGV} of the @value{FN}.
When @command{gawk} is processing the input files,
@samp{FILENAME == ARGV[ARGIND]} is always true.
@cindex files, processing@comma{} @code{ARGIND} variable and
This variable is useful in file processing; it allows you to tell how far
-along you are in the list of data files as well as to distinguish between
-successive instances of the same file name on the command line.
+along you are in the list of @value{DF}s as well as to distinguish between
+successive instances of the same @value{FN} on the command line.
@cindex file names, distinguishing
While you can change the value of @code{ARGIND} within your @command{awk}
program, @command{gawk} automatically sets it to a new value when the
next file is opened.
-This variable is a @command{gawk} extension.
-In other @command{awk} implementations,
-or if @command{gawk} is in compatibility mode
-(@pxref{Options}),
-it is not special.
-
@cindex @code{ENVIRON} array
@cindex environment variables, in @code{ENVIRON} array
-@item ENVIRON
+@item @code{ENVIRON}
An associative array containing the values of the environment. The array
indices are the environment variable names; the elements are the values of
the particular environment variables. For example,
-@code{ENVIRON["HOME"]} might be @file{/home/arnold}.
+@code{ENVIRON["HOME"]} might be @code{/home/arnold}.
For POSIX @command{awk}, changing this array does not affect the
environment passed on to any programs that @command{awk} may spawn via
@@ -13842,69 +14672,55 @@ executable programs.
Some operating systems may not have environment variables.
On such systems, the @code{ENVIRON} array is empty (except for
-@w{@code{ENVIRON["AWKPATH"]}},
-@pxref{AWKPATH Variable} and
-@w{@code{ENVIRON["AWKLIBPATH"]}},
+@w{@code{ENVIRON["AWKPATH"]}} and
+@w{@code{ENVIRON["AWKLIBPATH"]}};
+@pxref{AWKPATH Variable}, and
@pxref{AWKLIBPATH Variable}).
@cindex @command{gawk}, @code{ERRNO} variable in
@cindex @code{ERRNO} variable
@cindex differences in @command{awk} and @command{gawk}, @code{ERRNO} variable
@cindex error handling, @code{ERRNO} variable and
-@item ERRNO #
-If a system error occurs during a redirection for @code{getline},
-during a read for @code{getline}, or during a @code{close()} operation,
-then @code{ERRNO} contains a string describing the error.
-
-In addition, @command{gawk} clears @code{ERRNO}
-before opening each command-line input file. This enables checking if
-the file is readable inside a @code{BEGINFILE} pattern (@pxref{BEGINFILE/ENDFILE}).
-
-Otherwise,
-@code{ERRNO} works similarly to the C variable @code{errno}.
-Except for the case just mentioned,
-@command{gawk} @emph{never} clears it (sets it
-to zero or @code{""}). Thus, you should only expect its value
-to be meaningful when an I/O operation returns a failure
-value, such as @code{getline} returning @minus{}1.
-You are, of course, free to clear it yourself before doing an
-I/O operation.
-
-This variable is a @command{gawk} extension.
-In other @command{awk} implementations,
-or if @command{gawk} is in compatibility mode
-(@pxref{Options}),
-it is not special.
+@item @code{ERRNO #}
+If a system error occurs during a redirection for @code{getline}, during
+a read for @code{getline}, or during a @code{close()} operation, then
+@code{ERRNO} contains a string describing the error.
+
+In addition, @command{gawk} clears @code{ERRNO} before opening each
+command-line input file. This enables checking if the file is readable
+inside a @code{BEGINFILE} pattern (@pxref{BEGINFILE/ENDFILE}).
+
+Otherwise, @code{ERRNO} works similarly to the C variable @code{errno}.
+Except for the case just mentioned, @command{gawk} @emph{never} clears
+it (sets it to zero or @code{""}). Thus, you should only expect its
+value to be meaningful when an I/O operation returns a failure value,
+such as @code{getline} returning @minus{}1. You are, of course, free
+to clear it yourself before doing an I/O operation.
@cindex @code{FILENAME} variable
@cindex dark corner, @code{FILENAME} variable
-@item FILENAME
-The name of the file that @command{awk} is currently reading.
-When no data files are listed on the command line, @command{awk} reads
-from the standard input and @code{FILENAME} is set to @code{"-"}.
-@code{FILENAME} is changed each time a new file is read
-(@pxref{Reading Files}).
-Inside a @code{BEGIN} rule, the value of @code{FILENAME} is
-@code{""}, since there are no input files being processed
-yet.@footnote{Some early implementations of Unix @command{awk} initialized
-@code{FILENAME} to @code{"-"}, even if there were data files to be
-processed. This behavior was incorrect and should not be relied
-upon in your programs.}
-@value{DARKCORNER}
-Note, though, that using @code{getline}
-(@pxref{Getline})
-inside a @code{BEGIN} rule can give
-@code{FILENAME} a value.
+@item @code{FILENAME}
+The name of the current input file. When no @value{DF}s are listed
+on the command line, @command{awk} reads from the standard input and
+@code{FILENAME} is set to @code{"-"}. @code{FILENAME} changes each
+time a new file is read (@pxref{Reading Files}). Inside a @code{BEGIN}
+rule, the value of @code{FILENAME} is @code{""}, since there are no input
+files being processed yet.@footnote{Some early implementations of Unix
+@command{awk} initialized @code{FILENAME} to @code{"-"}, even if there
+were @value{DF}s to be processed. This behavior was incorrect and should
+not be relied upon in your programs.} @value{DARKCORNER} Note, though,
+that using @code{getline} (@pxref{Getline}) inside a @code{BEGIN} rule
+can give @code{FILENAME} a value.
@cindex @code{FNR} variable
-@item FNR
+@item @code{FNR}
The current record number in the current file. @code{FNR} is
incremented each time a new record is read
(@pxref{Records}). It is reinitialized
to zero each time a new input file is started.
@cindex @code{NF} variable
-@item NF
+@item @code{NF}
The number of fields in the current input record.
@code{NF} is set each time a new record is read, when a new field is
created or when @code{$0} changes (@pxref{Fields}).
@@ -13918,18 +14734,18 @@ current record. @xref{Changing Fields}.
@cindex @code{FUNCTAB} array
@cindex @command{gawk}, @code{FUNCTAB} array in
@cindex differences in @command{awk} and @command{gawk}, @code{FUNCTAB} variable
-@item FUNCTAB #
+@item @code{FUNCTAB #}
An array whose indices and corresponding values are the names of all
-the user-defined or extension functions in the program.
+the built-in, user-defined and extension functions in the program.
@quotation NOTE
Attempting to use the @code{delete} statement with the @code{FUNCTAB}
-array will cause a fatal error. Any attempt to assign to an element of
-the @code{FUNCTAB} array will also cause a fatal error.
+array causes a fatal error. Any attempt to assign to an element of
+@code{FUNCTAB} also causes a fatal error.
@end quotation
@cindex @code{NR} variable
-@item NR
+@item @code{NR}
The number of input records @command{awk} has processed since
the beginning of the program's execution
(@pxref{Records}).
@@ -13938,7 +14754,7 @@ the beginning of the program's execution
@cindex @command{gawk}, @code{PROCINFO} array in
@cindex @code{PROCINFO} array
@cindex differences in @command{awk} and @command{gawk}, @code{PROCINFO} array
-@item PROCINFO #
+@item @code{PROCINFO #}
The elements of this array provide access to information about the
running @command{awk} program.
The following elements (listed alphabetically)
@@ -13961,16 +14777,22 @@ or @code{"FPAT"} if field matching with @code{FPAT} is in effect.
@item PROCINFO["identifiers"]
@cindex program identifiers
-A subarray, indexed by the names of all identifiers used in the
-text of the AWK program. For each identifier, the value of the element is one of the following:
+A subarray, indexed by the names of all identifiers used in the text of
+the AWK program. An @dfn{identifier} is simply the name of a variable
+(be it scalar or array), built-in function, user-defined function, or
+extension function. For each identifier, the value of the element is
+one of the following:
@table @code
@item "array"
The identifier is an array.
+@item "builtin"
+The identifier is a built-in function.
+
@item "extension"
The identifier is an extension function loaded via
-@code{@@load}.
+@code{@@load} or @option{-l}.
@item "scalar"
The identifier is a scalar.
@@ -14007,7 +14829,7 @@ The parent process ID of the current process.
@item PROCINFO["sorted_in"]
If this element exists in @code{PROCINFO}, its value controls the
order in which array indices will be processed by
-@samp{for (index in array) @dots{}} loops.
+@samp{for (@var{index} in @var{array})} loops.
Since this is an advanced feature, we defer the
full description until later; see
@ref{Scanning an Array}.
@@ -14029,7 +14851,7 @@ The version of @command{gawk}.
The following additional elements in the array
are available to provide information about the MPFR and GMP libraries
if your version of @command{gawk} supports arbitrary precision numbers
-(@pxref{Gawk and MPFR}):
+(@pxref{Arbitrary Precision Arithmetic}):
@table @code
@cindex version of GNU MPFR library
@@ -14075,12 +14897,11 @@ to test for these elements
@cindex @code{PROCINFO} array, uses
The @code{PROCINFO} array has the following additional uses:
-@itemize @bullet
+@itemize @value{BULLET}
@item
-It may be
-used to cause coprocesses
-to communicate over pseudo-ttys instead of through two-way pipes;
-this is discussed further in @ref{Two-way I/O}.
+It may be used to cause coprocesses to communicate over pseudo-ttys
+instead of through two-way pipes; this is discussed further in
+@ref{Two-way I/O}.
@item
It may be used to provide a timeout when reading from any
@@ -14088,14 +14909,8 @@ open input file, pipe, or coprocess.
@xref{Read Timeout}, for more information.
@end itemize
-This array is a @command{gawk} extension.
-In other @command{awk} implementations,
-or if @command{gawk} is in compatibility mode
-(@pxref{Options}),
-it is not special.
-
@cindex @code{RLENGTH} variable
-@item RLENGTH
+@item @code{RLENGTH}
The length of the substring matched by the
@code{match()} function
(@pxref{String Functions}).
@@ -14103,7 +14918,7 @@ The length of the substring matched by the
is the length of the matched string, or @minus{}1 if no match is found.
@cindex @code{RSTART} variable
-@item RSTART
+@item @code{RSTART}
The start-index in characters of the substring that is matched by the
@code{match()} function
(@pxref{String Functions}).
@@ -14114,20 +14929,14 @@ if no match was found.
@cindex @command{gawk}, @code{RT} variable in
@cindex @code{RT} variable
@cindex differences in @command{awk} and @command{gawk}, @code{RT} variable
-@item RT #
-This is set each time a record is read. It contains the input text
-that matched the text denoted by @code{RS}, the record separator.
-
-This variable is a @command{gawk} extension.
-In other @command{awk} implementations,
-or if @command{gawk} is in compatibility mode
-(@pxref{Options}),
-it is not special.
+@item @code{RT #}
+The input text that matched the text denoted by @code{RS},
+the record separator. It is set every time a record is read.
@cindex @command{gawk}, @code{SYMTAB} array in
@cindex @code{SYMTAB} array
@cindex differences in @command{awk} and @command{gawk}, @code{SYMTAB} variable
-@item SYMTAB #
+@item @code{SYMTAB #}
An array whose indices are the names of all currently defined
global variables and arrays in the program. The array may be used
for indirect access to read or write the value of a variable:
@@ -14156,6 +14965,7 @@ This works as expected: in this case @code{SYMTAB} acts just like
a regular array. The only difference is that you can't then delete
@code{SYMTAB["xxx"]}.
+@cindex Schorr, Andrew
The @code{SYMTAB} array is more interesting than it looks. Andrew Schorr
points out that it effectively gives @command{awk} data pointers. Consider his
example:
@@ -14170,8 +14980,8 @@ function multiply(variable, amount)
@end example
@quotation NOTE
-In order to avoid severe time-travel paradoxes@footnote{Not to mention difficult
-implementation issues.}, neither @code{FUNCTAB} nor @code{SYMTAB}
+In order to avoid severe time-travel paradoxes,@footnote{Not to mention difficult
+implementation issues.} neither @code{FUNCTAB} nor @code{SYMTAB}
are available as elements within the @code{SYMTAB} array.
@end quotation
@end table
@@ -14262,7 +15072,7 @@ changed.
@cindex arguments, command-line
@cindex command line, arguments
-@ref{Auto-set},
+@DBREF{Auto-set}
presented the following program describing the information contained in @code{ARGC}
and @code{ARGV}:
@@ -14320,11 +15130,11 @@ additional files to be read.
If the value of @code{ARGC} is decreased, that eliminates input files
from the end of the list. By recording the old value of @code{ARGC}
elsewhere, a program can treat the eliminated arguments as
-something other than file names.
+something other than @value{FN}s.
To eliminate a file from the middle of the list, store the null string
(@code{""}) into @code{ARGV} in place of the file's name. As a
-special feature, @command{awk} ignores file names that have been
+special feature, @command{awk} ignores @value{FN}s that have been
replaced with the null string.
Another option is to
use the @code{delete} statement to remove elements from
@@ -14335,8 +15145,17 @@ before actual processing of the input begins.
@xref{Split Program}, and see
@ref{Tee Program}, for examples
of each way of removing elements from @code{ARGV}.
+
+To actually get options into an @command{awk} program,
+end the @command{awk} options with @option{--} and then supply
+the @command{awk} program's options, in the following manner:
+
+@example
+awk -f myprog.awk -- -v -q file1 file2 @dots{}
+@end example
+
The following fragment processes @code{ARGV} in order to examine, and
-then remove, command-line options:
+then remove, the above command-line options:
@example
BEGIN @{
@@ -14356,32 +15175,83 @@ BEGIN @{
@}
@end example
-To actually get the options into the @command{awk} program,
-end the @command{awk} options with @option{--} and then supply
-the @command{awk} program's options, in the following manner:
-
-@example
-awk -f myprog -- -v -q file1 file2 @dots{}
-@end example
-
@cindex differences in @command{awk} and @command{gawk}, @code{ARGC}/@code{ARGV} variables
-This is not necessary in @command{gawk}. Unless @option{--posix} has
+Ending the @command{awk} options with @option{--} isn't
+necessary in @command{gawk}. Unless @option{--posix} has
been specified, @command{gawk} silently puts any unrecognized options
into @code{ARGV} for the @command{awk} program to deal with. As soon
as it sees an unknown option, @command{gawk} stops looking for other
-options that it might otherwise recognize. The previous example with
+options that it might otherwise recognize. The previous command line with
@command{gawk} would be:
@example
-gawk -f myprog -q -v file1 file2 @dots{}
+gawk -f myprog.awk -q -v file1 file2 @dots{}
@end example
@noindent
-Because @option{-q} is not a valid @command{gawk} option,
-it and the following @option{-v}
-are passed on to the @command{awk} program.
-(@xref{Getopt Function}, for an @command{awk} library function
-that parses command-line options.)
+Because @option{-q} is not a valid @command{gawk} option, it and the
+following @option{-v} are passed on to the @command{awk} program.
+(@xref{Getopt Function}, for an @command{awk} library function that
+parses command-line options.)
+
+@node Pattern Action Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+Pattern-action pairs make up the basic elements of an @command{awk}
+program. Patterns are either normal expressions, range expressions,
+regexp constants, one of the special keywords @code{BEGIN}, @code{END},
+@code{BEGINFILE}, @code{ENDFILE}, or empty. The action executes if
+the current record matches the pattern. Empty (missing) patterns match
+all records.
+
+@item
+I/O from @code{BEGIN} and @code{END} rules have certain constraints.
+This is also true, only more so, for @code{BEGINFILE} and @code{ENDFILE}
+rules. The latter two give you ``hooks'' into @command{gawk}'s file
+processing, allowing you to recover from a file that otherwise would
+cause a fatal error (such as a file that cannot be opened).
+
+@item
+Shell variables can be used in @command{awk} programs by careful
+use of shell quoting. It is easier to pass a shell variable into
+@command{awk} by using the @option{-v} option and an @command{awk}
+variable.
+
+@item
+Actions consist of statements enclosed in curly braces. Statements
+are built up from expressions, control statements, compound statements,
+input and output statements, and deletion statements.
+
+@item
+The control statements in @command{awk} are @code{if}-@code{else},
+@code{while}, @code{for}, and @code{do}-@code{while}. @command{gawk}
+adds the @code{switch} statement. There are two flavors of @code{for}
+statement: one for for performing general looping, and the other iterating
+through an array.
+
+@item
+@code{break} and @code{continue} let you exit early or start the next
+iteration of a loop (or get out of a @code{switch}).
+
+@item
+@code{next} and @code{nextfile} let you read the next record and start
+over at the top of your program, or skip to the next input file and
+start over, respectively.
+
+@item
+The @code{exit} statement terminates your program. When executed
+from an action (or function body) it transfers control to the
+@code{END} statements. From an @code{END} statement body, it exits
+immediately. You may pass an optional numeric value to be used
+at @command{awk}'s exit status.
+
+@item
+Some built-in variables provide control over @command{awk}, mainly for I/O.
+Other variables convey information from @command{awk} to your program.
+
+@end itemize
@node Arrays
@chapter Arrays in @command{awk}
@@ -14399,7 +15269,7 @@ It also describes how @command{awk} simulates multidimensional
arrays, as well as some of the less obvious points about array usage.
The @value{CHAPTER} moves on to discuss @command{gawk}'s facility
for sorting arrays, and ends with a brief description of @command{gawk}'s
-ability to support true multidimensional arrays.
+ability to support true arrays of arrays.
@cindex variables, names of
@cindex functions, names of
@@ -14422,6 +15292,7 @@ same @command{awk} program.
* Multidimensional:: Emulating multidimensional arrays in
@command{awk}.
* Arrays of Arrays:: True multidimensional arrays.
+* Arrays Summary:: Summary of arrays.
@end menu
@node Array Basics
@@ -14483,35 +15354,34 @@ the array is declared.)
A contiguous array of four elements might look like the following example,
conceptually, if the element values are 8, @code{"foo"},
-@code{""}, and 30:
+@code{""}, and 30
+@ifnotdocbook
+as shown in @ref{figure-array-elements}:
+@end ifnotdocbook
+@ifdocbook
+as shown in @inlineraw{docbook, <xref linkend="figure-array-elements"/>}:
+@end ifdocbook
-@c @strong{FIXME: NEXT ED:} Use real images here, and an @float
-@iftex
-@c from Karl Berry, much thanks for the help.
-@tex
-\bigskip % space above the table (about 1 linespace)
-\offinterlineskip
-\newdimen\width \width = 1.5cm
-\newdimen\hwidth \hwidth = 4\width \advance\hwidth by 2pt % 5 * 0.4pt
-\centerline{\vbox{
-\halign{\strut\hfil\ignorespaces#&&\vrule#&\hbox to\width{\hfil#\unskip\hfil}\cr
-\noalign{\hrule width\hwidth}
- &&{\tt 8} &&{\tt "foo"} &&{\tt ""} &&{\tt 30} &&\quad Value\cr
-\noalign{\hrule width\hwidth}
-\noalign{\smallskip}
- &\omit&0&\omit &1 &\omit&2 &\omit&3 &\omit&\quad Index\cr
-}
-}}
-@end tex
-@end iftex
-@ifnottex
-@example
-+---------+---------+--------+---------+
-| 8 | "foo" | "" | 30 | @r{Value}
-+---------+---------+--------+---------+
- 0 1 2 3 @r{Index}
-@end example
-@end ifnottex
+@ifnotdocbook
+@float Figure,figure-array-elements
+@caption{A Contiguous Array}
+@ifinfo
+@center @image{array-elements, , , Basic Program Stages, txt}
+@end ifinfo
+@ifnotinfo
+@center @image{array-elements, , , Basic Program Stages}
+@end ifnotinfo
+@end float
+@end ifnotdocbook
+
+@docbook
+<figure id="figure-array-elements" float="0">
+<title>A Contiguous Array</title>
+<mediaobject>
+<imageobject role="web"><imagedata fileref="array-elements.png" format="PNG"/></imageobject>
+</mediaobject>
+</figure>
+@end docbook
@noindent
Only the values are stored; the indices are implicit from the order of
@@ -14528,20 +15398,65 @@ Arrays in @command{awk} are different---they are @dfn{associative}. This means
that each array is a collection of pairs: an index and its corresponding
array element value:
+@ifnotdocbook
@example
@r{Index} 3 @r{Value} 30
@r{Index} 1 @r{Value} "foo"
@r{Index} 0 @r{Value} 8
@r{Index} 2 @r{Value} ""
@end example
+@end ifnotdocbook
+
+@docbook
+<informaltable>
+<tgroup cols="2">
+<colspec colname="1" align="center"/>
+<colspec colname="2" align="center"/>
+<thead>
+<row>
+<entry>Index</entry>
+<entry>Value</entry>
+</row>
+</thead>
+
+<tbody>
+<row>
+<entry><literal>3</literal></entry>
+<entry><literal>30</literal></entry>
+</row>
+
+<row>
+<entry><literal>1</literal></entry>
+<entry><literal>"foo"</literal></entry>
+</row>
+
+<row>
+<entry><literal>0</literal></entry>
+<entry><literal>8</literal></entry>
+</row>
+
+<row>
+<entry><literal>2</literal></entry>
+<entry><literal>""</literal></entry>
+</row>
+
+</tbody>
+</tgroup>
+</informaltable>
+
+@end docbook
@noindent
-The pairs are shown in jumbled order because their order is irrelevant.
+The pairs are shown in jumbled order because their order is
+irrelevant.@footnote{The ordering will vary among @command{awk}
+implementations, which typically use hash tables to store array elements
+and values.}
One advantage of associative arrays is that new pairs can be added
at any time. For example, suppose a tenth element is added to the array
whose value is @w{@code{"number ten"}}. The result is:
+@ifnotdocbook
@example
@r{Index} 10 @r{Value} "number ten"
@r{Index} 3 @r{Value} 30
@@ -14549,6 +15464,51 @@ whose value is @w{@code{"number ten"}}. The result is:
@r{Index} 0 @r{Value} 8
@r{Index} 2 @r{Value} ""
@end example
+@end ifnotdocbook
+
+@docbook
+<informaltable>
+<tgroup cols="2">
+<colspec colname="1" align="center"/>
+<colspec colname="2" align="center"/>
+<thead>
+<row>
+<entry>Index</entry>
+<entry>Value</entry>
+</row>
+</thead>
+<tbody>
+
+<row>
+<entry><literal>10</literal></entry>
+<entry><literal>"number ten"</literal></entry>
+</row>
+
+<row>
+<entry><literal>3</literal></entry>
+<entry><literal>30</literal></entry>
+</row>
+
+<row>
+<entry><literal>1</literal></entry>
+<entry><literal>"foo"</literal></entry>
+</row>
+
+<row>
+<entry><literal>0</literal></entry>
+<entry><literal>8</literal></entry>
+</row>
+
+<row>
+<entry><literal>2</literal></entry>
+<entry><literal>""</literal></entry>
+</row>
+
+</tbody>
+</tgroup>
+</informaltable>
+
+@end docbook
@noindent
@cindex sparse arrays
@@ -14561,20 +15521,61 @@ have to be positive integers. Any number, or even a string, can be
an index. For example, the following is an array that translates words from
English to French:
+@ifnotdocbook
@example
@r{Index} "dog" @r{Value} "chien"
@r{Index} "cat" @r{Value} "chat"
@r{Index} "one" @r{Value} "un"
@r{Index} 1 @r{Value} "un"
@end example
+@end ifnotdocbook
+
+@docbook
+<informaltable>
+<tgroup cols="2">
+<colspec colname="1" align="center"/>
+<colspec colname="2" align="center"/>
+<thead>
+<row>
+<entry>Index</entry>
+<entry>Value</entry>
+</row>
+</thead>
+<tbody>
+<row>
+<entry><literal>"dog"</literal></entry>
+<entry><literal>"chien"</literal></entry>
+</row>
+
+<row>
+<entry><literal>"cat"</literal></entry>
+<entry><literal>"chat"</literal></entry>
+</row>
+
+<row>
+<entry><literal>"one"</literal></entry>
+<entry><literal>"un"</literal></entry>
+</row>
+
+<row>
+<entry><literal>1</literal></entry>
+<entry><literal>"un"</literal></entry>
+</row>
+
+</tbody>
+</tgroup>
+</informaltable>
+
+@end docbook
@noindent
Here we decided to translate the number one in both spelled-out and
numeric form---thus illustrating that a single array can have both
numbers and strings as indices.
-In fact, array subscripts are always strings; this is discussed
-in more detail in
-@ref{Numeric Array Subscripts}.
+(In fact, array subscripts are always strings.
+There are some subtleties to how numbers work when used as
+array subscripts; this is discussed in more detail in
+@ref{Numeric Array Subscripts}.)
Here, the number @code{1} isn't double-quoted, since @command{awk}
automatically converts it to a string.
@@ -14640,8 +15641,9 @@ if (a["foo"] != "") @dots{}
@end example
@noindent
-This is incorrect, since this will @emph{create} @code{a["foo"]}
-if it didn't exist before!
+This is incorrect for two reasons. First, it @emph{creates} @code{a["foo"]}
+if it didn't exist before! Second, it is valid (if a bit unusual) to set
+an array element equal to the empty string.
@end quotation
@c @cindex arrays, @code{in} operator and
@@ -14650,15 +15652,17 @@ To determine whether an element exists in an array at a certain index, use
the following expression:
@example
-@var{ind} in @var{array}
+@var{indx} in @var{array}
@end example
@cindex side effects, array indexing
@noindent
-This expression tests whether the particular index @var{ind} exists,
+This expression tests whether the particular index @var{indx} exists,
without the side effect of creating that element if it is not present.
-The expression has the value one (true) if @code{@var{array}[@var{ind}]}
+The expression has the value one (true) if @code{@var{array}[@var{indx}]}
exists and zero (false) if it does not exist.
+(We use @var{indx} here, since @samp{index} is the name of a built-in
+function.)
For example, this statement tests whether the array @code{frequencies}
contains the index @samp{2}:
@@ -14831,14 +15835,56 @@ for a more detailed example of this type.
@cindex @code{in} operator, order of array access
The order in which elements of the array are accessed by this statement
is determined by the internal arrangement of the array elements within
-@command{awk} and normally cannot be controlled or changed. This can lead to
-problems if new elements are added to @var{array} by statements in
-the loop body; it is not predictable whether the @code{for} loop will
-reach them. Similarly, changing @var{var} inside the loop may produce
-strange results. It is best to avoid such things.
+@command{awk} and in standard @command{awk} cannot be controlled
+or changed. This can lead to problems if new elements are added to
+@var{array} by statements in the loop body; it is not predictable whether
+the @code{for} loop will reach them. Similarly, changing @var{var} inside
+the loop may produce strange results. It is best to avoid such things.
+
+As a point of information, @command{gawk} sets up the list of elements
+to be iterated over before the loop starts, and does not change it.
+But not all @command{awk} versions do so. Consider this program, named
+@file{loopcheck.awk}:
+
+@example
+BEGIN @{
+ a["here"] = "here"
+ a["is"] = "is"
+ a["a"] = "a"
+ a["loop"] = "loop"
+ for (i in a) @{
+ j++
+ a[j] = j
+ print i
+ @}
+@}
+@end example
+
+Here is what happens when run with @command{gawk}:
+
+@example
+$ @kbd{gawk -f loopcheck.awk}
+@print{} here
+@print{} loop
+@print{} a
+@print{} is
+@end example
+
+Contrast this to BWK @command{awk}:
+
+@example
+$ @kbd{nawk -f loopcheck.awk}
+@print{} loop
+@print{} here
+@print{} is
+@print{} a
+@print{} 1
+@end example
@node Controlling Scanning
-@subsection Using Predefined Array Scanning Orders
+@subsection Using Predefined Array Scanning Orders With @command{gawk}
+
+This @value{SUBSECTION} describes a feature that is specific to @command{gawk}.
By default, when a @code{for} loop traverses an array, the order
is undefined, meaning that the @command{awk} implementation
@@ -14853,7 +15899,7 @@ Often, though, you may wish to do something simple, such as
or ``traverse the array by comparing the values in descending order.''
@command{gawk} provides two mechanisms which give you this control.
-@itemize @bullet
+@itemize @value{BULLET}
@item
Set @code{PROCINFO["sorted_in"]} to one of a set of predefined values.
We describe this now.
@@ -14960,7 +16006,7 @@ order relative to each other is determined by their index strings.
Here are some additional things to bear in mind about sorted
array traversal.
-@itemize @bullet
+@itemize @value{BULLET}
@item
The value of @code{PROCINFO["sorted_in"]} is global. That is, it affects
all array traversal @code{for} loops. If you need to change it within your
@@ -15069,7 +16115,7 @@ using @code{delete} without a subscript was a @command{gawk} extension.
As of September, 2012, it was accepted for
inclusion into the POSIX standard. See @uref{http://austingroupbugs.net/view.php?id=544,
the Austin Group website}. This form of the @code{delete} statement is also supported
-by Brian Kernighan's @command{awk} and @command{mawk}, as well as
+by BWK @command{awk} and @command{mawk}, as well as
by a number of other implementations (@pxref{Other Versions}).
@end quotation
@@ -15185,14 +16231,14 @@ $ @kbd{echo 'line 1}
> @kbd{line 2}
> @kbd{line 3' | awk '@{ l[lines] = $0; ++lines @}}
> @kbd{END @{}
-> @kbd{for (i = lines-1; i >= 0; --i)}
+> @kbd{for (i = lines - 1; i >= 0; i--)}
> @kbd{print l[i]}
> @kbd{@}'}
@print{} line 3
@print{} line 2
@end example
-Unfortunately, the very first line of input data did not come out in the
+Unfortunately, the very first line of input data did not appear in the
output!
Upon first glance, we would think that this program should have worked.
@@ -15209,7 +16255,7 @@ The following version of the program works correctly:
@example
@{ l[lines++] = $0 @}
END @{
- for (i = lines - 1; i >= 0; --i)
+ for (i = lines - 1; i >= 0; i--)
print l[i]
@}
@end example
@@ -15283,10 +16329,11 @@ used for single dimensional arrays. Write the whole sequence of indices
in parentheses, separated by commas, as the left operand:
@example
-(@var{subscript1}, @var{subscript2}, @dots{}) in @var{array}
+if ((@var{subscript1}, @var{subscript2}, @dots{}) in @var{array})
+ @dots{}
@end example
-The following example treats its input as a two-dimensional array of
+Here is an example that treats its input as a two-dimensional array of
fields; it rotates this array 90 degrees clockwise and prints the
result. It assumes that all lines have the same number of
elements:
@@ -15390,7 +16437,7 @@ separate indices is recovered.
array access and provides true arrays of
arrays. Elements of a subarray are referred to by their own indices
enclosed in square brackets, just like the elements of the main array.
-For example, the following creates a two-element subarray at index @samp{1}
+For example, the following creates a two-element subarray at index @code{1}
of the main array @code{a}:
@example
@@ -15414,7 +16461,7 @@ Each subarray and the main array can be of different length. In fact, the
elements of an array or its subarray do not all have to have the same
type. This means that the main array and any of its subarrays can be
non-rectangular, or jagged in structure. One can assign a scalar value to
-the index @samp{4} of the main array @code{a}:
+the index @code{4} of the main array @code{a}:
@example
a[4] = "An element in a jagged array"
@@ -15435,7 +16482,7 @@ a[4][5][6][7] = "An element in a four-dimensional array"
@end example
@noindent
-This removes the scalar value from index @samp{4} and then inserts a
+This removes the scalar value from index @code{4} and then inserts a
subarray of subarray of subarray containing a scalar. You can also
delete an entire subarray or subarray of subarrays:
@@ -15536,6 +16583,63 @@ creating an arbitrary index:
$ @kbd{gawk 'BEGIN @{ b[1][1] = ""; split("a b c d", b[1]); print b[1][1] @}'}
@print{} a
@end example
+
+@node Arrays Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+Standard @command{awk} provides one-dimensional associative arrays
+(arrays indexed by string values). All arrays are associative; numeric
+indices are converted automatically to strings.
+
+@item
+Array elements are referenced as @code{@var{array}[@var{indx}]}.
+Referencing an element creates it if it did not exist previously.
+
+@item
+The proper way to see if an array has an element with a given index
+is to use the @code{in} operator: @samp{@var{indx} in @var{array}}.
+
+@item
+Use @samp{for (@var{indx} in @var{array}) @dots{}} to scan through all the
+individual elements of an array. In the body of the loop, @var{indx} takes
+on the value of each element's index in turn.
+
+@item
+The order in which a @samp{for (@var{indx} in @var{array})} loop
+traverses an array is undefined in POSIX @command{awk} and varies among
+implementations. @command{gawk} lets you control the order by assigning
+special predefined values to @code{PROCINFO["sorted_in"]}.
+
+@item
+Use @samp{delete @var{array}[@var{indx}]} to delete an individual element.
+You may also use @samp{delete @var{array}} to delete all of the elements
+in the array. This latter feature has been a common extension for many
+years and is now standard, but may not be supported by all commercial
+versions of @command{awk}.
+
+@item
+Standard @command{awk} simulates multidimensional arrays by separating
+subscript values with a comma. The values are concatenated into a
+single string, separated by the value of @code{SUBSEP}. The fact
+that such a subscript was created in this way is not retained; thus
+changing @code{SUBSEP} may have unexpected consequences. You can use
+@samp{(@var{sub1}, @var{sub2}, @dots{}) in @var{array}} to see if such
+a multidimensional subscript exists in @var{array}.
+
+@item
+@command{gawk} provides true arrays of arrays. You use a separate
+set of square brackets for each dimension in such an array:
+@code{data[row][col]}, for example. Array elements may thus be either
+scalar values (number or string) or another array.
+
+@item
+Use the @code{isarray()} built-in function to determine if an array
+element is itself a subarray.
+
+@end itemize
+
@c ENDOFRANGE arrs
@node Functions
@@ -15560,6 +16664,7 @@ The second half of this @value{CHAPTER} describes these
* Built-in:: Summarizes the built-in functions.
* User-defined:: Describes User-defined functions in detail.
* Indirect Calls:: Choosing the function to call at runtime.
+* Functions Summary:: Summary of functions.
@end menu
@node Built-in
@@ -15650,26 +16755,45 @@ The following list describes all of
the built-in functions that work with numbers.
Optional parameters are enclosed in square brackets@w{ ([ ]):}
-@table @code
-@item atan2(@var{y}, @var{x})
+@c @asis for docbook
+@table @asis
+@item @code{atan2(@var{y}, @var{x})}
@cindexawkfunc{atan2}
@cindex arctangent
Return the arctangent of @code{@var{y} / @var{x}} in radians.
-You can use @samp{pi = atan2(0, -1)} to retrieve the value of @value{PI}.
+You can use @samp{pi = atan2(0, -1)} to retrieve the value of
+@value{PI}.
-@item cos(@var{x})
+@item @code{cos(@var{x})}
@cindexawkfunc{cos}
@cindex cosine
Return the cosine of @var{x}, with @var{x} in radians.
-@item exp(@var{x})
+@item @code{div(@var{numerator}, @var{denominator}, @var{result})}
+@cindexawkfunc{div}
+@cindex div
+Perform integer division, similar to the standard C function of the
+same name. First, truncate @code{numerator} and @code{denominator}
+towards zero, creating integer values. Clear the @code{result}
+array, and then set @code{result["quotient"]} to the result of
+@samp{numerator / denominator}, truncated towards zero to an integer,
+and set @code{result["remainder"]} to the result of @samp{numerator %
+denominator}, truncated towards zero to an integer. This function is
+primarily intended for use with arbitrary length integers; it avoids
+creating MPFR arbitrary precision floating-point values (@pxref{Arbitrary
+Precision Integers}).
+
+This function is a @code{gawk} extension. It is not available in
+compatibility mode (@pxref{Options}).
+
+@item @code{exp(@var{x})}
@cindexawkfunc{exp}
@cindex exponent
Return the exponential of @var{x} (@code{e ^ @var{x}}) or report
an error if @var{x} is out of range. The range of values @var{x} can have
depends on your machine's floating-point representation.
-@item int(@var{x})
+@item @code{int(@var{x})}
@cindexawkfunc{int}
@cindex round to nearest integer
Return the nearest integer to @var{x}, located between @var{x} and zero and
@@ -15678,13 +16802,15 @@ truncated toward zero.
For example, @code{int(3)} is 3, @code{int(3.9)} is 3, @code{int(-3.9)}
is @minus{}3, and @code{int(-3)} is @minus{}3 as well.
-@item log(@var{x})
+@item @code{log(@var{x})}
@cindexawkfunc{log}
@cindex logarithm
Return the natural logarithm of @var{x}, if @var{x} is positive;
-otherwise, report an error.
+otherwise, return @code{NaN} (``not a number'') on IEEE 754 systems.
+Additionally, @command{gawk} prints a warning message when @code{x}
+is negative.
-@item rand()
+@item @code{rand()}
@cindexawkfunc{rand}
@cindex random numbers, @code{rand()}/@code{srand()} functions
Return a random number. The values of @code{rand()} are
@@ -15742,19 +16868,19 @@ the seed to a value that is different in each run. To do this,
use @code{srand()}.
@end quotation
-@item sin(@var{x})
+@item @code{sin(@var{x})}
@cindexawkfunc{sin}
@cindex sine
Return the sine of @var{x}, with @var{x} in radians.
-@item sqrt(@var{x})
+@item @code{sqrt(@var{x})}
@cindexawkfunc{sqrt}
@cindex square root
Return the positive square root of @var{x}.
@command{gawk} prints a warning message
if @var{x} is negative. Thus, @code{sqrt(4)} is 2.
-@item srand(@r{[}@var{x}@r{]})
+@item @code{srand(}[@var{x}]@code{)}
@cindexawkfunc{srand}
Set the starting point, or seed,
for generating random numbers to the value @var{x}.
@@ -15781,6 +16907,9 @@ numbers that are truly unpredictable.
The return value of @code{srand()} is the previous seed. This makes it
easy to keep track of the seeds in case you need to consistently reproduce
sequences of random numbers.
+
+POSIX does not specify the initial seed; it differs among @command{awk}
+implementations.
@end table
@node String Functions
@@ -15798,12 +16927,23 @@ example, @code{length()} returns the number of characters in a string,
and not the number of bytes used to represent those characters. Similarly,
@code{index()} works with character indices, and not byte indices.
+@quotation CAUTION
+A number of functions deal with indices into strings. For these
+functions, the first character of a string is at position (index) one.
+This is different from C and the languages descended from it, where the
+first character is at position zero. You need to remember this when
+doing index calculations, particularly if you are used to C.
+@end quotation
+
In the following list, optional parameters are enclosed in square brackets@w{ ([ ]).}
Several functions perform string substitution; the full discussion is
provided in the description of the @code{sub()} function, which comes
towards the end since the list is presented in alphabetic order.
+
Those functions that are specific to @command{gawk} are marked with a
-pound sign@w{ (@samp{#}):}
+pound sign (@samp{#}). They are not available in compatibility mode
+(@pxref{Options}):
+
@menu
* Gory Details:: More than you want to know about @samp{\} and
@@ -15811,9 +16951,10 @@ pound sign@w{ (@samp{#}):}
@code{gensub()}.
@end menu
-@table @code
-@item asort(@var{source} @r{[}, @var{dest} @r{[}, @var{how} @r{]} @r{]}) #
-@itemx asorti(@var{source} @r{[}, @var{dest} @r{[}, @var{how} @r{]} @r{]}) #
+@c @asis for docbook
+@table @asis
+@item @code{asort(}@var{source} [@code{,} @var{dest} [@code{,} @var{how} ] ]@code{) #}
+@itemx @code{asorti(}@var{source} [@code{,} @var{dest} [@code{,} @var{how} ] ]@code{) #}
@cindexgawkfunc{asorti}
@cindex sort array
@cindex arrays, elements, retrieving number of
@@ -15877,10 +17018,7 @@ a[2] = "last"
a[3] = "middle"
@end example
-@code{asort()} and @code{asorti()} are @command{gawk} extensions; they
-are not available in compatibility mode (@pxref{Options}).
-
-@item gensub(@var{regexp}, @var{replacement}, @var{how} @r{[}, @var{target}@r{]}) #
+@item @code{gensub(@var{regexp}, @var{replacement}, @var{how}} [@code{, @var{target}}]@code{) #}
@cindexgawkfunc{gensub}
@cindex search and replace in strings
@cindex substitute in string
@@ -15942,10 +17080,7 @@ a warning message.
If @var{regexp} does not match @var{target}, @code{gensub()}'s return value
is the original unchanged value of @var{target}.
-@code{gensub()} is a @command{gawk} extension; it is not available
-in compatibility mode (@pxref{Options}).
-
-@item gsub(@var{regexp}, @var{replacement} @r{[}, @var{target}@r{]})
+@item @code{gsub(@var{regexp}, @var{replacement}} [@code{, @var{target}}]@code{)}
@cindexawkfunc{gsub}
Search @var{target} for
@emph{all} of the longest, leftmost, @emph{nonoverlapping} matching
@@ -15967,7 +17102,7 @@ omitted, then the entire input record (@code{$0}) is used.
As in @code{sub()}, the characters @samp{&} and @samp{\} are special,
and the third argument must be assignable.
-@item index(@var{in}, @var{find})
+@item @code{index(@var{in}, @var{find})}
@cindexawkfunc{index}
@cindex search in string
@cindex find substring in string
@@ -15982,19 +17117,29 @@ $ @kbd{awk 'BEGIN @{ print index("peanut", "an") @}'}
@noindent
If @var{find} is not found, @code{index()} returns zero.
-(Remember that string indices in @command{awk} start at one.)
It is a fatal error to use a regexp constant for @var{find}.
-@item length(@r{[}@var{string}@r{]})
+@item @code{length(}[@var{string}]@code{)}
@cindexawkfunc{length}
@cindex string length
@cindex length of string
Return the number of characters in @var{string}. If
@var{string} is a number, the length of the digit string representing
that number is returned. For example, @code{length("abcde")} is five. By
-contrast, @code{length(15 * 35)} works out to three. In this example, 15 * 35 =
-525, and 525 is then converted to the string @code{"525"}, which has
+contrast, @code{length(15 * 35)} works out to three. In this example,
+@iftex
+@math{15 @cdot 35 = 525},
+@end iftex
+@ifnottex
+@ifnotdocbook
+15 * 35 = 525,
+@end ifnotdocbook
+@end ifnottex
+@docbook
+15 &sdot; 35 = 525, @c
+@end docbook
+and 525 is then converted to the string @code{"525"}, which has
three characters.
@cindex length of input record
@@ -16051,18 +17196,18 @@ If @option{--lint} is provided on the command line
If @option{--posix} is supplied, using an array argument is a fatal error
(@pxref{Arrays}).
-@item match(@var{string}, @var{regexp} @r{[}, @var{array}@r{]})
+@item @code{match(@var{string}, @var{regexp}} [@code{, @var{array}}]@code{)}
@cindexawkfunc{match}
@cindex string, regular expression match
@cindex match regexp in string
Search @var{string} for the
longest, leftmost substring matched by the regular expression,
-@var{regexp} and return the character position, or @dfn{index},
+@var{regexp} and return the character position (index)
at which that substring begins (one, if it starts at the beginning of
@var{string}). If no match is found, return zero.
The @var{regexp} argument may be either a regexp constant
-(@code{/@dots{}/}) or a string constant (@code{"@dots{}"}).
+(@code{/}@dots{}@code{/}) or a string constant (@code{"}@dots{}@code{"}).
In the latter case, the string is treated as a regexp to be matched.
@xref{Computed Regexps}, for a
discussion of the difference between the two forms, and the
@@ -16168,7 +17313,7 @@ The @var{array} argument to @code{match()} is a
(@pxref{Options}),
using a third argument is a fatal error.
-@item patsplit(@var{string}, @var{array} @r{[}, @var{fieldpat} @r{[}, @var{seps} @r{]} @r{]}) #
+@item @code{patsplit(@var{string}, @var{array}} [@code{, @var{fieldpat}} [@code{, @var{seps}} ] ]@code{) #}
@cindexgawkfunc{patsplit}
@cindex split string into array
Divide
@@ -16194,13 +17339,7 @@ manner similar to the way input lines are split into fields using @code{FPAT}
Before splitting the string, @code{patsplit()} deletes any previously existing
elements in the arrays @var{array} and @var{seps}.
-@cindex troubleshooting, @code{patsplit()} function
-The @code{patsplit()} function is a
-@command{gawk} extension. In compatibility mode
-(@pxref{Options}),
-it is not available.
-
-@item split(@var{string}, @var{array} @r{[}, @var{fieldsep} @r{[}, @var{seps} @r{]} @r{]})
+@item @code{split(@var{string}, @var{array}} [@code{, @var{fieldsep}} [@code{, @var{seps}} ] ]@code{)}
@cindexawkfunc{split}
Divide @var{string} into pieces separated by @var{fieldsep}
and store the pieces in @var{array} and the separator strings in the
@@ -16285,7 +17424,9 @@ If @var{string} does not match @var{fieldsep} at all (but is not null),
@var{array} has one element only. The value of that element is the original
@var{string}.
-@item sprintf(@var{format}, @var{expression1}, @dots{})
+In POSIX mode (@pxref{Options}), the fourth argument is not allowed.
+
+@item @code{sprintf(@var{format}, @var{expression1}, @dots{})}
@cindexawkfunc{sprintf}
@cindex formatting strings
Return (without printing) the string that @code{printf} would
@@ -16302,7 +17443,7 @@ assigns the string @w{@samp{pi = 3.14 (approx.)}} to the variable @code{pival}.
@cindexgawkfunc{strtonum}
@cindex convert string to number
-@item strtonum(@var{str}) #
+@item @code{strtonum(@var{str}) #}
Examine @var{str} and return its numeric value. If @var{str}
begins with a leading @samp{0}, @code{strtonum()} assumes that @var{str}
is an octal number. If @var{str} begins with a leading @samp{0x} or
@@ -16324,10 +17465,7 @@ you use the @option{--non-decimal-data} option, which isn't recommended.
Note also that @code{strtonum()} uses the current locale's decimal point
for recognizing numbers (@pxref{Locales}).
-@code{strtonum()} is a @command{gawk} extension; it is not available
-in compatibility mode (@pxref{Options}).
-
-@item sub(@var{regexp}, @var{replacement} @r{[}, @var{target}@r{]})
+@item @code{sub(@var{regexp}, @var{replacement}} [@code{, @var{target}}]@code{)}
@cindexawkfunc{sub}
@cindex replace in string
Search @var{target}, which is treated as a string, for the
@@ -16338,7 +17476,7 @@ The modified string becomes the new value of @var{target}.
Return the number of substitutions made (zero or one).
The @var{regexp} argument may be either a regexp constant
-(@code{/@dots{}/}) or a string constant (@code{"@dots{}"}).
+(@code{/}@dots{}@code{/}) or a string constant (@code{"}@dots{}@code{"}).
In the latter case, the string is treated as a regexp to be matched.
@xref{Computed Regexps}, for a
discussion of the difference between the two forms, and the
@@ -16428,7 +17566,7 @@ will not run.
Finally, if the @var{regexp} is not a regexp constant, it is converted into a
string, and then the value of that string is treated as the regexp to match.
-@item substr(@var{string}, @var{start} @r{[}, @var{length}@r{]})
+@item @code{substr(@var{string}, @var{start}} [@code{, @var{length}} ]@code{)}
@cindexawkfunc{substr}
@cindex substring
Return a @var{length}-character-long substring of @var{string},
@@ -16447,7 +17585,7 @@ in the string, counting from character @var{start}.
@cindex Brian Kernighan's @command{awk}
If @var{start} is less than one, @code{substr()} treats it as
if it was one. (POSIX doesn't specify what to do in this case:
-Brian Kernighan's @command{awk} acts this way, and therefore @command{gawk}
+BWK @command{awk} acts this way, and therefore @command{gawk}
does too.)
If @var{start} is greater than the number of characters
in the string, @code{substr()} returns the null string.
@@ -16488,7 +17626,7 @@ string = substr(string, 1, 2) "CDE" substr(string, 6)
@cindex case sensitivity, converting case
@cindex strings, converting letter case
-@item tolower(@var{string})
+@item @code{tolower(@var{string})}
@cindexawkfunc{tolower}
@cindex convert string to lower case
Return a copy of @var{string}, with each uppercase character
@@ -16496,7 +17634,7 @@ in the string replaced with its corresponding lowercase character.
Nonalphabetic characters are left unchanged. For example,
@code{tolower("MiXeD cAsE 123")} returns @code{"mixed case 123"}.
-@item toupper(@var{string})
+@item @code{toupper(@var{string})}
@cindexawkfunc{toupper}
@cindex convert string to upper case
Return a copy of @var{string}, with each lowercase character
@@ -16516,13 +17654,19 @@ Nonalphabetic characters are left unchanged. For example,
@cindex backslash (@code{\}), @code{gsub()}/@code{gensub()}/@code{sub()} functions and
@cindex @code{&} (ampersand), @code{gsub()}/@code{gensub()}/@code{sub()} functions and
@cindex ampersand (@code{&}), @code{gsub()}/@code{gensub()}/@code{sub()} functions and
+
+@quotation CAUTION
+This section has been known to cause headaches.
+You might want to skip it upon first reading.
+@end quotation
+
When using @code{sub()}, @code{gsub()}, or @code{gensub()}, and trying to get literal
backslashes and ampersands into the replacement text, you need to remember
that there are several levels of @dfn{escape processing} going on.
First, there is the @dfn{lexical} level, which is when @command{awk} reads
your program
-and builds an internal copy of it that can be executed.
+and builds an internal copy of it to execute.
Then there is the runtime level, which is when @command{awk} actually scans the
replacement string to determine what to generate.
@@ -16533,7 +17677,7 @@ escape sequences listed in @ref{Escape Sequences}.
Thus, for every @samp{\} that @command{awk} processes at the runtime
level, you must type two backslashes at the lexical level.
When a character that is not valid for an escape sequence follows the
-@samp{\}, Brian Kernighan's @command{awk} and @command{gawk} both simply remove the initial
+@samp{\}, BWK @command{awk} and @command{gawk} both simply remove the initial
@samp{\} and put the next character into the string. Thus, for
example, @code{"a\qb"} is treated as @code{"aqb"}.
@@ -16558,26 +17702,26 @@ through unchanged. This is illustrated in @ref{table-sub-escapes}.
_halign{_hfil#!_qquad_hfil#!_qquad#_hfil_cr
You type!@code{sub()} sees!@code{sub()} generates_cr
_hrulefill!_hrulefill!_hrulefill_cr
- @code{\&}! @code{&}!the matched text_cr
- @code{\\&}! @code{\&}!a literal @samp{&}_cr
- @code{\\\&}! @code{\&}!a literal @samp{&}_cr
- @code{\\\\&}! @code{\\&}!a literal @samp{\&}_cr
- @code{\\\\\&}! @code{\\&}!a literal @samp{\&}_cr
-@code{\\\\\\&}! @code{\\\&}!a literal @samp{\\&}_cr
- @code{\\q}! @code{\q}!a literal @samp{\q}_cr
+ @code{\&}! @code{&}!The matched text_cr
+ @code{\\&}! @code{\&}!A literal @samp{&}_cr
+ @code{\\\&}! @code{\&}!A literal @samp{&}_cr
+ @code{\\\\&}! @code{\\&}!A literal @samp{\&}_cr
+ @code{\\\\\&}! @code{\\&}!A literal @samp{\&}_cr
+@code{\\\\\\&}! @code{\\\&}!A literal @samp{\\&}_cr
+ @code{\\q}! @code{\q}!A literal @samp{\q}_cr
}
_bigskip}
@end tex
@ifdocbook
@multitable @columnfractions .20 .20 .60
@headitem You type @tab @code{sub()} sees @tab @code{sub()} generates
-@item @code{\&} @tab @code{&} @tab the matched text
-@item @code{\\&} @tab @code{\&} @tab a literal @samp{&}
-@item @code{\\\&} @tab @code{\&} @tab a literal @samp{&}
-@item @code{\\\\&} @tab @code{\\&} @tab a literal @samp{\&}
-@item @code{\\\\\&} @tab @code{\\&} @tab a literal @samp{\&}
-@item @code{\\\\\\&} @tab @code{\\\&} @tab a literal @samp{\\&}
-@item @code{\\q} @tab @code{\q} @tab a literal @samp{\q}
+@item @code{\&} @tab @code{&} @tab The matched text
+@item @code{\\&} @tab @code{\&} @tab A literal @samp{&}
+@item @code{\\\&} @tab @code{\&} @tab A literal @samp{&}
+@item @code{\\\\&} @tab @code{\\&} @tab A literal @samp{\&}
+@item @code{\\\\\&} @tab @code{\\&} @tab A literal @samp{\&}
+@item @code{\\\\\\&} @tab @code{\\\&} @tab A literal @samp{\\&}
+@item @code{\\q} @tab @code{\q} @tab A literal @samp{\q}
@end multitable
@end ifdocbook
@ifnottex
@@ -16585,13 +17729,13 @@ _bigskip}
@display
You type @code{sub()} sees @code{sub()} generates
-------- ---------- ---------------
- @code{\&} @code{&} the matched text
- @code{\\&} @code{\&} a literal @samp{&}
- @code{\\\&} @code{\&} a literal @samp{&}
- @code{\\\\&} @code{\\&} a literal @samp{\&}
- @code{\\\\\&} @code{\\&} a literal @samp{\&}
-@code{\\\\\\&} @code{\\\&} a literal @samp{\\&}
- @code{\\q} @code{\q} a literal @samp{\q}
+ @code{\&} @code{&} The matched text
+ @code{\\&} @code{\&} A literal @samp{&}
+ @code{\\\&} @code{\&} A literal @samp{&}
+ @code{\\\\&} @code{\\&} A literal @samp{\&}
+ @code{\\\\\&} @code{\\&} A literal @samp{\&}
+@code{\\\\\\&} @code{\\\&} A literal @samp{\\&}
+ @code{\\q} @code{\q} A literal @samp{\q}
@end display
@end ifnotdocbook
@end ifnottex
@@ -16607,86 +17751,19 @@ case of even numbers of backslashes entered at the lexical level.)
The problem with the historical approach is that there is no way to get
a literal @samp{\} followed by the matched text.
-@c @cindex @command{awk} language, POSIX version
-@cindex POSIX @command{awk}, functions and, @code{gsub()}/@code{sub()}
-The 1992 POSIX standard attempted to fix this problem. That standard
-says that @code{sub()} and @code{gsub()} look for either a @samp{\} or an @samp{&}
-after the @samp{\}. If either one follows a @samp{\}, that character is
-output literally. The interpretation of @samp{\} and @samp{&} then becomes
-as shown in @ref{table-sub-posix-92}.
-
-@float Table,table-sub-posix-92
-@caption{1992 POSIX Rules for @code{sub()} and @code{gsub()} Escape Sequence Processing}
-@c thanks to Karl Berry for formatting this table
-@tex
-\vbox{\bigskip
-% We need more characters for escape and tab ...
-\catcode`_ = 0
-\catcode`! = 4
-% ... since this table has lots of &'s and \'s, so we unspecialize them.
-\catcode`\& = \other \catcode`\\ = \other
-_halign{_hfil#!_qquad_hfil#!_qquad#_hfil_cr
- You type!@code{sub()} sees!@code{sub()} generates_cr
-_hrulefill!_hrulefill!_hrulefill_cr
- @code{&}! @code{&}!the matched text_cr
- @code{\\&}! @code{\&}!a literal @samp{&}_cr
-@code{\\\\&}! @code{\\&}!a literal @samp{\}, then the matched text_cr
-@code{\\\\\\&}! @code{\\\&}!a literal @samp{\&}_cr
-}
-_bigskip}
-@end tex
-@ifdocbook
-@multitable @columnfractions .20 .20 .60
-@headitem You type @tab @code{sub()} sees @tab @code{sub()} generates
-@item @code{&} @tab @code{&} @tab the matched text
-@item @code{\\&} @tab @code{\&} @tab a literal @samp{&}
-@item @code{\\\\&} @tab @code{\\&} @tab a literal @samp{\}, then the matched text
-@item @code{\\\\\\&} @tab @code{\\\&} @tab a literal @samp{\&}
-@end multitable
-@end ifdocbook
-@ifnottex
-@ifnotdocbook
-@display
- You type @code{sub()} sees @code{sub()} generates
- -------- ---------- ---------------
- @code{&} @code{&} the matched text
- @code{\\&} @code{\&} a literal @samp{&}
- @code{\\\\&} @code{\\&} a literal @samp{\}, then the matched text
-@code{\\\\\\&} @code{\\\&} a literal @samp{\&}
-@end display
-@end ifnotdocbook
-@end ifnottex
-@end float
-
-@noindent
-This appears to solve the problem.
-Unfortunately, the phrasing of the standard is unusual. It
-says, in effect, that @samp{\} turns off the special meaning of any
-following character, but for anything other than @samp{\} and @samp{&},
-such special meaning is undefined. This wording leads to two problems:
+Several editions of the POSIX standard attempted to fix this problem
+but weren't successful. The details are irrelevant at this point in time.
-@itemize @bullet
-@item
-Backslashes must now be doubled in the @var{replacement} string, breaking
-historical @command{awk} programs.
-
-@item
-To make sure that an @command{awk} program is portable, @emph{every} character
-in the @var{replacement} string must be preceded with a
-backslash.@footnote{This consequence was certainly unintended.}
-@c I can say that, 'cause I was involved in making this change
-@end itemize
-
-Because of the problems just listed,
-in 1996, the @command{gawk} maintainer submitted
+At one point, the @command{gawk} maintainer submitted
proposed text for a revised standard that
reverts to rules that correspond more closely to the original existing
practice. The proposed rules have special cases that make it possible
-to produce a @samp{\} preceding the matched text. This is shown in
+to produce a @samp{\} preceding the matched text.
+This is shown in
@ref{table-sub-proposed}.
@float Table,table-sub-proposed
-@caption{Proposed Rules For @code{sub()} And Backslash}
+@caption{GNU @command{awk} Rules For @code{sub()} And Backslash}
@tex
\vbox{\bigskip
% We need more characters for escape and tab ...
@@ -16697,10 +17774,10 @@ to produce a @samp{\} preceding the matched text. This is shown in
_halign{_hfil#!_qquad_hfil#!_qquad#_hfil_cr
You type!@code{sub()} sees!@code{sub()} generates_cr
_hrulefill!_hrulefill!_hrulefill_cr
-@code{\\\\\\&}! @code{\\\&}!a literal @samp{\&}_cr
-@code{\\\\&}! @code{\\&}!a literal @samp{\}, followed by the matched text_cr
- @code{\\&}! @code{\&}!a literal @samp{&}_cr
- @code{\\q}! @code{\q}!a literal @samp{\q}_cr
+@code{\\\\\\&}! @code{\\\&}!A literal @samp{\&}_cr
+@code{\\\\&}! @code{\\&}!A literal @samp{\}, followed by the matched text_cr
+ @code{\\&}! @code{\&}!A literal @samp{&}_cr
+ @code{\\q}! @code{\q}!A literal @samp{\q}_cr
@code{\\\\}! @code{\\}!@code{\\}_cr
}
_bigskip}
@@ -16708,10 +17785,10 @@ _bigskip}
@ifdocbook
@multitable @columnfractions .20 .20 .60
@headitem You type @tab @code{sub()} sees @tab @code{sub()} generates
-@item @code{\\\\\\&} @tab @code{\\\&} @tab a literal @samp{\&}
-@item @code{\\\\&} @tab @code{\\&} @tab a literal @samp{\}, followed by the matched text
-@item @code{\\&} @tab @code{\&} @tab a literal @samp{&}
-@item @code{\\q} @tab @code{\q} @tab a literal @samp{\q}
+@item @code{\\\\\\&} @tab @code{\\\&} @tab A literal @samp{\&}
+@item @code{\\\\&} @tab @code{\\&} @tab A literal @samp{\}, followed by the matched text
+@item @code{\\&} @tab @code{\&} @tab A literal @samp{&}
+@item @code{\\q} @tab @code{\q} @tab A literal @samp{\q}
@item @code{\\\\} @tab @code{\\} @tab @code{\\}
@end multitable
@end ifdocbook
@@ -16720,10 +17797,10 @@ _bigskip}
@display
You type @code{sub()} sees @code{sub()} generates
-------- ---------- ---------------
-@code{\\\\\\&} @code{\\\&} a literal @samp{\&}
- @code{\\\\&} @code{\\&} a literal @samp{\}, followed by the matched text
- @code{\\&} @code{\&} a literal @samp{&}
- @code{\\q} @code{\q} a literal @samp{\q}
+@code{\\\\\\&} @code{\\\&} A literal @samp{\&}
+ @code{\\\\&} @code{\\&} A literal @samp{\}, followed by the matched text
+ @code{\\&} @code{\&} A literal @samp{&}
+ @code{\\q} @code{\q} A literal @samp{\q}
@code{\\\\} @code{\\} @code{\\}
@end display
@end ifnotdocbook
@@ -16736,13 +17813,13 @@ there was only one. However, as in the historical case, any @samp{\} that
is not part of one of these three sequences is not special and appears
in the output literally.
-@command{gawk} 3.0 and 3.1 follow these proposed POSIX rules for @code{sub()} and
-@code{gsub()}.
-@c As much as we think it's a lousy idea. You win some, you lose some. Sigh.
-The POSIX standard took much longer to be revised than was expected in 1996.
-The 2001 standard does not follow the above rules. Instead, the rules
-there are somewhat simpler. The results are similar except for one case.
+@command{gawk} 3.0 and 3.1 follow these rules for @code{sub()} and
+@code{gsub()}. The POSIX standard took much longer to be revised than
+was expected. In addition, the @command{gawk} maintainer's proposal was
+lost during the standardization process. The final rules are
+somewhat simpler. The results are similar except for one case.
+@cindex POSIX @command{awk}, functions and, @code{gsub()}/@code{sub()}
The POSIX rules state that @samp{\&} in the replacement string produces
a literal @samp{&}, @samp{\\} produces a literal @samp{\}, and @samp{\} followed
by anything else is not special; the @samp{\} is placed straight into the output.
@@ -16760,10 +17837,10 @@ These rules are presented in @ref{table-posix-sub}.
_halign{_hfil#!_qquad_hfil#!_qquad#_hfil_cr
You type!@code{sub()} sees!@code{sub()} generates_cr
_hrulefill!_hrulefill!_hrulefill_cr
-@code{\\\\\\&}! @code{\\\&}!a literal @samp{\&}_cr
-@code{\\\\&}! @code{\\&}!a literal @samp{\}, followed by the matched text_cr
- @code{\\&}! @code{\&}!a literal @samp{&}_cr
- @code{\\q}! @code{\q}!a literal @samp{\q}_cr
+@code{\\\\\\&}! @code{\\\&}!A literal @samp{\&}_cr
+@code{\\\\&}! @code{\\&}!A literal @samp{\}, followed by the matched text_cr
+ @code{\\&}! @code{\&}!A literal @samp{&}_cr
+ @code{\\q}! @code{\q}!A literal @samp{\q}_cr
@code{\\\\}! @code{\\}!@code{\}_cr
}
_bigskip}
@@ -16771,10 +17848,10 @@ _bigskip}
@ifdocbook
@multitable @columnfractions .20 .20 .60
@headitem You type @tab @code{sub()} sees @tab @code{sub()} generates
-@item @code{\\\\\\&} @tab @code{\\\&} @tab a literal @samp{\&}
-@item @code{\\\\&} @tab @code{\\&} @tab a literal @samp{\}, followed by the matched text
-@item @code{\\&} @tab @code{\&} @tab a literal @samp{&}
-@item @code{\\q} @tab @code{\q} @tab a literal @samp{\q}
+@item @code{\\\\\\&} @tab @code{\\\&} @tab A literal @samp{\&}
+@item @code{\\\\&} @tab @code{\\&} @tab A literal @samp{\}, followed by the matched text
+@item @code{\\&} @tab @code{\&} @tab A literal @samp{&}
+@item @code{\\q} @tab @code{\q} @tab A literal @samp{\q}
@item @code{\\\\} @tab @code{\\} @tab @code{\}
@end multitable
@end ifdocbook
@@ -16783,10 +17860,10 @@ _bigskip}
@display
You type @code{sub()} sees @code{sub()} generates
-------- ---------- ---------------
-@code{\\\\\\&} @code{\\\&} a literal @samp{\&}
- @code{\\\\&} @code{\\&} a literal @samp{\}, followed by the matched text
- @code{\\&} @code{\&} a literal @samp{&}
- @code{\\q} @code{\q} a literal @samp{\q}
+@code{\\\\\\&} @code{\\\&} A literal @samp{\&}
+ @code{\\\\&} @code{\\&} A literal @samp{\}, followed by the matched text
+ @code{\\&} @code{\&} A literal @samp{&}
+ @code{\\q} @code{\q} A literal @samp{\q}
@code{\\\\} @code{\\} @code{\}
@end display
@end ifnotdocbook
@@ -16796,17 +17873,17 @@ _bigskip}
The only case where the difference is noticeable is the last one: @samp{\\\\}
is seen as @samp{\\} and produces @samp{\} instead of @samp{\\}.
-Starting with version 3.1.4, @command{gawk} followed the POSIX rules
+Starting with @value{PVERSION} 3.1.4, @command{gawk} followed the POSIX rules
when @option{--posix} is specified (@pxref{Options}). Otherwise,
-it continued to follow the 1996 proposed rules, since
+it continued to follow the proposed rules, since
that had been its behavior for many years.
-When version 4.0.0 was released, the @command{gawk} maintainer
+When @value{PVERSION} 4.0.0 was released, the @command{gawk} maintainer
made the POSIX rules the default, breaking well over a decade's worth
of backwards compatibility.@footnote{This was rather naive of him, despite
there being a note in this section indicating that the next major version
would move to the POSIX rules.} Needless to say, this was a bad idea,
-and as of version 4.0.1, @command{gawk} resumed its historical
+and as of @value{PVERSION} 4.0.1, @command{gawk} resumed its historical
behavior, and only follows the POSIX rules when @option{--posix} is given.
The rules for @code{gensub()} are considerably simpler. At the runtime
@@ -16829,24 +17906,24 @@ as shown in @ref{table-gensub-escapes}.
_halign{_hfil#!_qquad_hfil#!_qquad#_hfil_cr
You type!@code{gensub()} sees!@code{gensub()} generates_cr
_hrulefill!_hrulefill!_hrulefill_cr
- @code{&}! @code{&}!the matched text_cr
- @code{\\&}! @code{\&}!a literal @samp{&}_cr
- @code{\\\\}! @code{\\}!a literal @samp{\}_cr
- @code{\\\\&}! @code{\\&}!a literal @samp{\}, then the matched text_cr
-@code{\\\\\\&}! @code{\\\&}!a literal @samp{\&}_cr
- @code{\\q}! @code{\q}!a literal @samp{q}_cr
+ @code{&}! @code{&}!The matched text_cr
+ @code{\\&}! @code{\&}!A literal @samp{&}_cr
+ @code{\\\\}! @code{\\}!A literal @samp{\}_cr
+ @code{\\\\&}! @code{\\&}!A literal @samp{\}, then the matched text_cr
+@code{\\\\\\&}! @code{\\\&}!A literal @samp{\&}_cr
+ @code{\\q}! @code{\q}!A literal @samp{q}_cr
}
_bigskip}
@end tex
@ifdocbook
@multitable @columnfractions .20 .20 .60
@headitem You type @tab @code{gensub()} sees @tab @code{gensub()} generates
-@item @code{&} @tab @code{&} @tab the matched text
-@item @code{\\&} @tab @code{\&} @tab a literal @samp{&}
-@item @code{\\\\} @tab @code{\\} @tab a literal @samp{\}
-@item @code{\\\\&} @tab @code{\\&} @tab a literal @samp{\}, then the matched text
-@item @code{\\\\\\&} @tab @code{\\\&} @tab a literal @samp{\&}
-@item @code{\\q} @tab @code{\q} @tab a literal @samp{q}
+@item @code{&} @tab @code{&} @tab The matched text
+@item @code{\\&} @tab @code{\&} @tab A literal @samp{&}
+@item @code{\\\\} @tab @code{\\} @tab A literal @samp{\}
+@item @code{\\\\&} @tab @code{\\&} @tab A literal @samp{\}, then the matched text
+@item @code{\\\\\\&} @tab @code{\\\&} @tab A literal @samp{\&}
+@item @code{\\q} @tab @code{\q} @tab A literal @samp{q}
@end multitable
@end ifdocbook
@ifnottex
@@ -16854,12 +17931,12 @@ _bigskip}
@display
You type @code{gensub()} sees @code{gensub()} generates
-------- ------------- ------------------
- @code{&} @code{&} the matched text
- @code{\\&} @code{\&} a literal @samp{&}
- @code{\\\\} @code{\\} a literal @samp{\}
- @code{\\\\&} @code{\\&} a literal @samp{\}, then the matched text
-@code{\\\\\\&} @code{\\\&} a literal @samp{\&}
- @code{\\q} @code{\q} a literal @samp{q}
+ @code{&} @code{&} The matched text
+ @code{\\&} @code{\&} A literal @samp{&}
+ @code{\\\\} @code{\\} A literal @samp{\}
+ @code{\\\\&} @code{\\&} A literal @samp{\}, then the matched text
+@code{\\\\\\&} @code{\\\&} A literal @samp{\&}
+ @code{\\q} @code{\q} A literal @samp{q}
@end display
@end ifnotdocbook
@end ifnottex
@@ -16929,8 +18006,8 @@ Although this makes a certain amount of sense, it can be surprising.
The following functions relate to input/output (I/O).
Optional parameters are enclosed in square brackets ([ ]):
-@table @code
-@item close(@var{filename} @r{[}, @var{how}@r{]})
+@table @asis
+@item @code{close(}@var{filename} [@code{,} @var{how}]@code{)}
@cindexawkfunc{close}
@cindex files, closing
@cindex close file or coprocess
@@ -16949,7 +18026,10 @@ not matter.
@xref{Two-way I/O},
which discusses this feature in more detail and gives an example.
-@item fflush(@r{[}@var{filename}@r{]})
+Note that the second argument to @code{close()} is a @command{gawk}
+extension; it is not available in compatibility mode (@pxref{Options}).
+
+@item @code{fflush(}[@var{filename}]@code{)}
@cindexawkfunc{fflush}
@cindex flush buffered output
Flush any buffered output associated with @var{filename}, which is either a
@@ -16971,7 +18051,7 @@ buffers its output and the @code{fflush()} function forces
@cindex extensions, common@comma{} @code{fflush()} function
@cindex Brian Kernighan's @command{awk}
-@code{fflush()} was added to Brian Kernighan's version of @command{awk} in
+@code{fflush()} was added to BWK @command{awk} in
April of 1992. For two decades, it was not part of the POSIX standard.
As of December, 2012, it was accepted for inclusion into the POSIX
standard.
@@ -16983,7 +18063,7 @@ then @command{awk} flushes the buffers for @emph{all} open output files
and pipes.
@quotation NOTE
-Prior to version 4.0.2, @command{gawk}
+Prior to @value{PVERSION} 4.0.2, @command{gawk}
would flush only the standard output if there was no argument,
and flush all output files and pipes if the argument was the null
string. This was changed in order to be compatible with Brian
@@ -16999,7 +18079,7 @@ only the standard output.
@c @cindex warnings, automatic
@cindex troubleshooting, @code{fflush()} function
@code{fflush()} returns zero if the buffer is successfully flushed;
-otherwise, it returns non-zero (@command{gawk} returns @minus{}1).
+otherwise, it returns non-zero. (@command{gawk} returns @minus{}1.)
In the case where all buffers are flushed, the return value is zero
only if all buffers were flushed successfully. Otherwise, it is
@minus{}1, and @command{gawk} warns about the problem @var{filename}.
@@ -17009,7 +18089,7 @@ a file or pipe that was opened for reading (such as with @code{getline}),
or if @var{filename} is not an open file, pipe, or coprocess.
In such a case, @code{fflush()} returns @minus{}1, as well.
-@item system(@var{command})
+@item @code{system(@var{command})}
@cindexawkfunc{system}
@cindex invoke shell command
@cindex interacting with other programs
@@ -17303,8 +18383,13 @@ particular log record was written. Many programs log their timestamp
in the form returned by the @code{time()} system call, which is the
number of seconds since a particular epoch. On POSIX-compliant systems,
it is the number of seconds since
-1970-01-01 00:00:00 UTC, not counting leap seconds.@footnote{@xref{Glossary},
-especially the entries ``Epoch'' and ``UTC.''}
+1970-01-01 00:00:00 UTC, not counting leap
+@ifclear FOR_PRINT
+seconds.@footnote{@xref{Glossary}, especially the entries ``Epoch'' and ``UTC.''}
+@end ifclear
+@ifset FOR_PRINT
+seconds.
+@end ifset
All known POSIX-compliant systems support timestamps from 0 through
@iftex
@math{2^{31} - 1},
@@ -17334,8 +18419,9 @@ However, recent versions
of @command{mawk} (@pxref{Other Versions}) also support these functions.
Optional parameters are enclosed in square brackets ([ ]):
-@table @code
-@item mktime(@var{datespec})
+@c @asis for docbook
+@table @asis
+@item @code{mktime(@var{datespec})}
@cindexgawkfunc{mktime}
@cindex generate time values
Turn @var{datespec} into a timestamp in the same form
@@ -17365,7 +18451,7 @@ is out of range, @code{mktime()} returns @minus{}1.
@cindex @command{gawk}, @code{PROCINFO} array in
@cindex @code{PROCINFO} array
-@item strftime(@r{[}@var{format} @r{[}, @var{timestamp} @r{[}, @var{utc-flag}@r{]]]})
+@item @code{strftime(} [@var{format} [@code{,} @var{timestamp} [@code{,} @var{utc-flag}] ] ]@code{)}
@c STARTOFRANGE strf
@cindexgawkfunc{strftime}
@cindex format time string
@@ -17387,7 +18473,7 @@ output that is equivalent to that of the @command{date} utility.
You can assign a new value to @code{PROCINFO["strftime"]} to
change the default format; see below for the various format directives.
-@item systime()
+@item @code{systime()}
@cindexgawkfunc{systime}
@cindex timestamps
@cindex current system time
@@ -17462,10 +18548,10 @@ This is the ISO 8601 date format.
@item %g
The year modulo 100 of the ISO 8601 week number, as a decimal number (00--99).
-For example, January 1, 1993 is in week 53 of 1992. Thus, the year
-of its ISO 8601 week number is 1992, even though its year is 1993.
-Similarly, December 31, 1973 is in week 1 of 1974. Thus, the year
-of its ISO week number is 1974, even though its year is 1973.
+For example, January 1, 2012 is in week 53 of 2011. Thus, the year
+of its ISO 8601 week number is 2011, even though its year is 2012.
+Similarly, December 31, 2012 is in week 1 of 2013. Thus, the year
+of its ISO week number is 2013, even though its year is 2012.
@item %G
The full year of the ISO week number, as a decimal number.
@@ -17546,7 +18632,7 @@ The locale's ``appropriate'' time representation.
The year modulo 100 as a decimal number (00--99).
@item %Y
-The full year as a decimal number (e.g., 2011).
+The full year as a decimal number (e.g., 2015).
@c @cindex RFC 822
@c @cindex RFC 1036
@@ -17580,17 +18666,6 @@ uses the system's version of @code{strftime()} if it's there.
Typically, the conversion specifier either does not appear in the
returned string or appears literally.}
-@c @cindex locale, definition of
-Informally, a @dfn{locale} is the geographic place in which a program
-is meant to run. For example, a common way to abbreviate the date
-September 4, 2012 in the United States is ``9/4/12.''
-In many countries in Europe, however, it is abbreviated ``4.9.12.''
-Thus, the @samp{%x} specification in a @code{"US"} locale might produce
-@samp{9/4/12}, while in a @code{"EUROPE"} locale, it might produce
-@samp{4.9.12}. The ISO C standard defines a default @code{"C"}
-locale, which is an environment that is typical of what many C programmers
-are used to.
-
For systems that are not yet fully standards-compliant,
@command{gawk} supplies a copy of
@code{strftime()} from the GNU C Library.
@@ -17643,7 +18718,7 @@ the string. For example:
@example
$ date '+Today is %A, %B %d, %Y.'
-@print{} Today is Wednesday, March 30, 2011.
+@print{} Today is Monday, May 05, 2014.
@end example
Here is the @command{gawk} version of the @command{date} utility.
@@ -17663,7 +18738,7 @@ case $1 in
esac
gawk 'BEGIN @{
- format = "%a %b %e %H:%M:%S %Z %Y"
+ format = PROCINFO["strftime"]
exitval = 0
if (ARGC > 2)
@@ -17751,9 +18826,7 @@ Operands | 0 | 1 | 0 | 1 | 0 | 1
@end tex
@docbook
-<!-- FIXME: Fix ID and add xref in text. -->
-<table id="table-bitwise-ops">
-<title>Bitwise Operations</title>
+<informaltable>
<tgroup cols="7" colsep="1">
<colspec colname="c1"/>
@@ -17813,7 +18886,7 @@ Operands | 0 | 1 | 0 | 1 | 0 | 1
</tbody>
</tgroup>
-</table>
+</informaltable>
@end docbook
@end float
@@ -17849,32 +18922,32 @@ bitwise operations just described. They are:
@table @code
@cindexgawkfunc{and}
@cindex bitwise AND
-@item and(@var{v1}, @var{v2} @r{[}, @r{@dots{}]})
+@item @code{and(@var{v1}, @var{v2}} [@code{,} @dots{}]@code{)}
Return the bitwise AND of the arguments. There must be at least two.
@cindexgawkfunc{compl}
@cindex bitwise complement
-@item compl(@var{val})
+@item @code{compl(@var{val})}
Return the bitwise complement of @var{val}.
@cindexgawkfunc{lshift}
@cindex left shift
-@item lshift(@var{val}, @var{count})
+@item @code{lshift(@var{val}, @var{count})}
Return the value of @var{val}, shifted left by @var{count} bits.
@cindexgawkfunc{or}
@cindex bitwise OR
-@item or(@var{v1}, @var{v2} @r{[}, @r{@dots{}]})
+@item @code{or(@var{v1}, @var{v2}} [@code{,} @dots{}]@code{)}
Return the bitwise OR of the arguments. There must be at least two.
@cindexgawkfunc{rshift}
@cindex right shift
-@item rshift(@var{val}, @var{count})
+@item @code{rshift(@var{val}, @var{count})}
Return the value of @var{val}, shifted right by @var{count} bits.
@cindexgawkfunc{xor}
@cindex bitwise XOR
-@item xor(@var{v1}, @var{v2} @r{[}, @r{@dots{}]})
+@item @code{xor(@var{v1}, @var{v2}} [@code{,} @dots{}]@code{)}
Return the bitwise XOR of the arguments. There must be at least two.
@end table
@@ -17997,7 +19070,7 @@ results of the @code{compl()}, @code{lshift()}, and @code{rshift()} functions.
@command{gawk} provides a single function that lets you distinguish
an array from a scalar variable. This is necessary for writing code
-that traverses every element of a true multidimensional array
+that traverses every element of an array of arrays.
(@pxref{Arrays of Arrays}).
@table @code
@@ -18035,10 +19108,10 @@ The descriptions here are purposely brief.
for the full story.
Optional parameters are enclosed in square brackets ([ ]):
-@table @code
+@table @asis
@cindexgawkfunc{bindtextdomain}
@cindex set directory of message catalogs
-@item bindtextdomain(@var{directory} @r{[}, @var{domain}@r{]})
+@item @code{bindtextdomain(@var{directory}} [@code{,} @var{domain}]@code{)}
Set the directory in which
@command{gawk} will look for message translation files, in case they
will not or cannot be placed in the ``standard'' locations
@@ -18052,14 +19125,14 @@ given @var{domain}.
@cindexgawkfunc{dcgettext}
@cindex translate string
-@item dcgettext(@var{string} @r{[}, @var{domain} @r{[}, @var{category}@r{]]})
+@item @code{dcgettext(@var{string}} [@code{,} @var{domain} [@code{,} @var{category}] ]@code{)}
Return the translation of @var{string} in
text domain @var{domain} for locale category @var{category}.
The default value for @var{domain} is the current value of @code{TEXTDOMAIN}.
The default value for @var{category} is @code{"LC_MESSAGES"}.
@cindexgawkfunc{dcngettext}
-@item dcngettext(@var{string1}, @var{string2}, @var{number} @r{[}, @var{domain} @r{[}, @var{category}@r{]]})
+@item @code{dcngettext(@var{string1}, @var{string2}, @var{number}} [@code{,} @var{domain} [@code{,} @var{category}] ]@code{)}
Return the plural form used for @var{number} of the
translation of @var{string1} and @var{string2} in text domain
@var{domain} for locale category @var{category}. @var{string1} is the
@@ -18095,6 +19168,12 @@ them, i.e., to tell @command{awk} what they should do.
@node Definition Syntax
@subsection Function Definition Syntax
+@quotation
+It's entirely fair to say that the @command{awk} syntax for local
+variable definitions is appallingly awful.
+@author Brian Kernighan
+@end quotation
+
@c STARTOFRANGE fdef
@cindex functions, defining
Definitions of functions can appear anywhere between the rules of an
@@ -18107,12 +19186,12 @@ entire program before starting to execute any of it.
The definition of a function named @var{name} looks like this:
-@example
-function @var{name}(@r{[}@var{parameter-list}@r{]})
-@{
+@display
+@code{function} @var{name}@code{(}[@var{parameter-list}]@code{)}
+@code{@{}
@var{body-of-function}
-@}
-@end example
+@code{@}}
+@end display
@cindex names, functions
@cindex functions, names of
@@ -18121,20 +19200,28 @@ function @var{name}(@r{[}@var{parameter-list}@r{]})
Here, @var{name} is the name of the function to define. A valid function
name is like a valid variable name: a sequence of letters, digits, and
underscores that doesn't start with a digit.
+Here too, only the 52 upper- and lowercase English letters may
+be used in a function name.
Within a single @command{awk} program, any particular name can only be
used as a variable, array, or function.
@var{parameter-list} is an optional list of the function's arguments and local
variable names, separated by commas. When the function is called,
the argument names are used to hold the argument values given in
-the call. The local variables are initialized to the empty string.
+the call.
+
A function cannot have two parameters with the same name, nor may it
have a parameter with the same name as the function itself.
+In addition, according to the POSIX standard, function parameters
+cannot have the same name as one of the special built-in variables
+(@pxref{Built-in Variables}). Not all versions of @command{awk} enforce
+this restriction.
-In addition, according to the POSIX standard, function parameters cannot have the same
-name as one of the special built-in variables
-(@pxref{Built-in Variables}. Not all versions of @command{awk}
-enforce this restriction.)
+Local variables act like the empty string if referenced where a string
+value is required, and like zero if referenced where a numeric value
+is required. This is the same as regular variables that have never been
+assigned a value. (There is more to understand about local variables;
+@pxref{Dynamic Typing}.)
The @var{body-of-function} consists of @command{awk} statements. It is the
most important part of the definition, because it says what the function
@@ -18258,7 +19345,8 @@ this program, using our function to format the results, prints:
21.2
@end example
-This function deletes all the elements in an array:
+This function deletes all the elements in an array (recall that the
+extra whitespace signifies the start of the local variable list):
@example
function delarray(a, i)
@@ -18281,17 +19369,18 @@ addition to the POSIX standard.)
The following is an example of a recursive function. It takes a string
as an input parameter and returns the string in backwards order.
Recursive functions must always have a test that stops the recursion.
-In this case, the recursion terminates when the starting position
-is zero, i.e., when there are no more characters left in the string.
+In this case, the recursion terminates when the input string is
+already empty.
+@c 8/2014: Thanks to Mike Brennan for the improved formulation
@cindex @code{rev()} user-defined function
@example
-function rev(str, start)
+function rev(str)
@{
- if (start == 0)
+ if (str == "")
return ""
- return (substr(str, start, 1) rev(str, start - 1))
+ return (rev(substr(str, 2)) substr(str, 1, 1))
@}
@end example
@@ -18300,7 +19389,7 @@ this way:
@example
$ @kbd{echo "Don't Panic!" |}
-> @kbd{gawk --source '@{ print rev($0, length($0)) @}' -f rev.awk}
+> @kbd{gawk -e '@{ print rev($0) @}' -f rev.awk}
@print{} !cinaP t'noD
@end example
@@ -18319,7 +19408,7 @@ to create an @command{awk} version of @code{ctime()}:
function ctime(ts, format)
@{
- format = "%a %b %e %H:%M:%S %Z %Y"
+ format = PROCINFO["strftime"]
if (ts == 0)
ts = systime() # use current time as default
return strftime(format, ts)
@@ -18371,7 +19460,8 @@ an error.
@cindex local variables, in a function
@cindex variables, local to a function
-There is no way to make a variable local to a @code{@{ @dots{} @}} block in
+Unlike many languages,
+there is no way to make a variable local to a @code{@{} @dots{} @code{@}} block in
@command{awk}, but you can make a variable local to a function. It is
good practice to do so whenever a variable is needed only in that
function.
@@ -18584,7 +19674,7 @@ BEGIN @{
@noindent
prints @samp{a[1] = 1, a[2] = two, a[3] = 3}, because
-@code{changeit} stores @code{"two"} in the second element of @code{a}.
+@code{changeit()} stores @code{"two"} in the second element of @code{a}.
@end quotation
@cindex undefined functions
@@ -18633,14 +19723,14 @@ This statement returns control to the calling part of the @command{awk} program.
can also be used to return a value for use in the rest of the @command{awk}
program. It looks like this:
-@example
-return @r{[}@var{expression}@r{]}
-@end example
+@display
+@code{return} [@var{expression}]
+@end display
The @var{expression} part is optional.
Due most likely to an oversight, POSIX does not define what the return
value is if you omit the @var{expression}. Technically speaking, this
-make the returned value undefined, and therefore, unpredictable.
+makes the returned value undefined, and therefore, unpredictable.
In practice, though, all versions of @command{awk} simply return the
null string, which acts like zero if used in a numeric context.
@@ -18743,9 +19833,9 @@ BEGIN @{
@end example
In this example, the first call to @code{foo()} generates
-a fatal error, so @command{gawk} will not report the second
-error. If you comment out that call, though, then @command{gawk}
-will report the second error.
+a fatal error, so @command{awk} will not report the second
+error. If you comment out that call, though, then @command{awk}
+does report the second error.
Usually, such things aren't a big issue, but it's worth
being aware of them.
@@ -18760,7 +19850,7 @@ being aware of them.
@cindex pointers to functions
@cindex differences in @command{awk} and @command{gawk}, indirect function calls
-This section describes a @command{gawk}-specific extension.
+This section describes an advanced, @command{gawk}-specific extension.
Often, you may wish to defer the choice of function to call until runtime.
For example, you may have different kinds of records, each of which
@@ -18806,8 +19896,11 @@ To process the data, you might write initially:
@noindent
This style of programming works, but can be awkward. With @dfn{indirect}
function calls, you tell @command{gawk} to use the @emph{value} of a
-variable as the name of the function to call.
+variable as the @emph{name} of the function to call.
+@cindex @code{@@}-notation for indirect function calls
+@cindex indirect function calls, @code{@@}-notation
+@cindex function calls, indirect, @code{@@}-notation for
The syntax is similar to that of a regular function call: an identifier
immediately followed by a left parenthesis, any arguments, and then
a closing right parenthesis, with the addition of a leading @samp{@@}
@@ -18865,7 +19958,6 @@ Otherwise they perform the expected computations and are not unusual.
@example
@c file eg/prog/indirectcall.awk
# For each record, print the class name and the requested statistics
-
@{
class_name = $1
gsub(/_/, " ", class_name) # Replace _ with spaces
@@ -19059,7 +20151,7 @@ function rsort(first, last)
@c endfile
@end example
-Here is an extended version of the data file:
+Here is an extended version of the @value{DF}:
@example
@c file eg/data/class_data2
@@ -19094,10 +20186,12 @@ $ @kbd{gawk -f quicksort.awk -f indirectcall.awk class_data2}
Remember that you must supply a leading @samp{@@} in front of an indirect function call.
-Unfortunately, indirect function calls cannot be used with the built-in functions. However,
-you can generally write ``wrapper'' functions which call the built-in ones, and those can
-be called indirectly. (Other than, perhaps, the mathematical functions, there is not a lot
-of reason to try to call the built-in functions indirectly.)
+Starting with @value{PVERSION} 4.1.2 of @command{gawk}, indirect function
+calls may also be used with built-in functions and with extension functions
+(@pxref{Dynamic Extensions}). The only thing you cannot do is pass a regular
+expression constant to a built-in function through an indirect function
+call.@footnote{This may change in a future version; recheck the documentation that
+comes with your version of @command{gawk} to see if it has.}
@command{gawk} does its best to make indirect function calls efficient.
For example, in the following case:
@@ -19108,23 +20202,84 @@ for (i = 1; i <= n; i++)
@end example
@noindent
-@code{gawk} will look up the actual function to call only once.
+@code{gawk} looks up the actual function to call only once.
+
+@node Functions Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+@command{awk} provides built-in functions and lets you define your own
+functions.
+
+@item
+POSIX @command{awk} provides three kinds of built-in functions: numeric,
+string, and I/O. @command{gawk} provides functions that work with values
+representing time, do bit manipulation, sort arrays, and internationalize
+and localize programs. @command{gawk} also provides several extensions to
+some of standard functions, typically in the form of additional arguments.
+
+@item
+Functions accept zero or more arguments and return a value. The
+expressions that provide the argument values are completely evaluated
+before the function is called. Order of evaluation is not defined.
+The return value can be ignored.
+
+@item
+The handling of backslash in @code{sub()} and @code{gsub()} is not simple.
+It is more straightforward in @command{gawk}'s @code{gensub()} function,
+but that function still requires care in its use.
+
+@item
+User-defined functions provide important capabilities but come with
+some syntactic inelegancies. In a function call, there cannot be any
+space between the function name and the opening left parenthesis of the
+argument list. Also, there is no provision for local variables, so the
+convention is to add extra parameters, and to separate them visually
+from the real parameters by extra whitespace.
+
+@item
+User-defined functions may call other user-defined (and built-in)
+functions and may call themselves recursively. Function parameters
+``hide'' any global variables of the same names.
+You cannot use the name of a reserved variable (such as @code{ARGC})
+as the name of a parameter in user-defined functions.
+
+@item
+Scalar values are passed to user-defined functions by value. Array
+parameters are passed by reference; any changes made by the function to
+array parameters are thus visible after the function has returned.
+
+@item
+Use the @code{return} statement to return from a user-defined function.
+An optional expression becomes the function's return value. Only scalar
+values may be returned by a function.
+
+@item
+If a variable that has never been used is passed to a user-defined
+function, how that function treats the variable can set its nature:
+either scalar or array.
+
+@item
+@command{gawk} provides indirect function calls using a special syntax.
+By setting a variable to the name of a function, you can
+determine at runtime what function will be called at that point in the
+program. This is equivalent to function pointers in C and C++.
+
+@end itemize
@c ENDOFRANGE funcud
-@iftex
-@part Part II:@* Problem Solving With @command{awk}
-@end iftex
+@ifnotinfo
+@part @value{PART2}Problem Solving With @command{awk}
+@end ifnotinfo
-@ignore
@ifdocbook
-@part Part II:@* Problem Solving With @command{awk}
-
Part II shows how to use @command{awk} and @command{gawk} for problem solving.
There is lots of code here for you to read and learn from.
It contains the following chapters:
-@itemize @bullet
+@itemize @value{BULLET}
@item
@ref{Library Functions}.
@@ -19132,7 +20287,6 @@ It contains the following chapters:
@ref{Sample Programs}.
@end itemize
@end ifdocbook
-@end ignore
@node Library Functions
@chapter A Library of @command{awk} Functions
@@ -19143,7 +20297,7 @@ It contains the following chapters:
@c STARTOFRANGE fudlib
@cindex functions, user-defined, library of
-@ref{User-defined}, describes how to write
+@DBREF{User-defined} describes how to write
your own @command{awk} functions. Writing functions is important, because
it allows you to encapsulate algorithms and program tasks in a single
place. It simplifies programming, making program development more
@@ -19167,25 +20321,26 @@ of good programs leads to better writing.
In fact, they felt this idea was so important that they placed this
statement on the cover of their book. Because we believe strongly
that their statement is correct, this @value{CHAPTER} and @ref{Sample
-Programs}, provide a good-sized body of code for you to read, and we hope,
+Programs}, provide a good-sized body of code for you to read and, we hope,
to learn from.
-@c 2e: USE TEXINFO-2 FUNCTION DEFINITION STUFF!!!!!!!!!!!!!
This @value{CHAPTER} presents a library of useful @command{awk} functions.
Many of the sample programs presented later in this @value{DOCUMENT}
use these functions.
The functions are presented here in a progression from simple to complex.
@cindex Texinfo
-@ref{Extract Program},
+@DBREF{Extract Program}
presents a program that you can use to extract the source code for
these example library functions and programs from the Texinfo source
for this @value{DOCUMENT}.
(This has already been done as part of the @command{gawk} distribution.)
+@ifclear FOR_PRINT
If you have written one or more useful, general-purpose @command{awk} functions
and would like to contribute them to the @command{awk} user community, see
@ref{How To Contribute}, for more information.
+@end ifclear
@cindex portability, example programs
The programs in this @value{CHAPTER} and in
@@ -19194,7 +20349,7 @@ freely use features that are @command{gawk}-specific.
Rewriting these programs for different implementations of @command{awk}
is pretty straightforward.
-@itemize @bullet
+@itemize @value{BULLET}
@item
Diagnostic error messages are sent to @file{/dev/stderr}.
Use @samp{| "cat 1>&2"} instead of @samp{> "/dev/stderr"} if your system
@@ -19238,6 +20393,8 @@ comparisons use only lowercase letters.
* Passwd Functions:: Functions for getting user information.
* Group Functions:: Functions for getting group information.
* Walking Arrays:: A function to walk arrays of arrays.
+* Library Functions Summary:: Summary of library functions.
+* Library Exercises:: Exercises.
@end menu
@node Library Names
@@ -19324,7 +20481,7 @@ A different convention, common in the Tcl community, is to use a single
associative array to hold the values needed by the library function(s), or
``package.'' This significantly decreases the number of actual global names
in use. For example, the functions described in
-@ref{Passwd Functions},
+@DBREF{Passwd Functions}
might have used array elements @code{@w{PW_data["inited"]}}, @code{@w{PW_data["total"]}},
@code{@w{PW_data["count"]}}, and @code{@w{PW_data["awklib"]}}, instead of
@code{@w{_pw_inited}}, @code{@w{_pw_awklib}}, @code{@w{_pw_total}},
@@ -19372,11 +20529,12 @@ provides an implementation for other versions of @command{awk}:
#
# Arnold Robbins, arnold@@skeeve.com, Public Domain
# February, 2004
+# Revised June, 2014
@c endfile
@end ignore
@c file eg/lib/strtonum.awk
-function mystrtonum(str, ret, chars, n, i, k, c)
+function mystrtonum(str, ret, n, i, k, c)
@{
if (str ~ /^0[0-7]*$/) @{
# octal
@@ -19384,12 +20542,13 @@ function mystrtonum(str, ret, chars, n, i, k, c)
ret = 0
for (i = 1; i <= n; i++) @{
c = substr(str, i, 1)
- if ((k = index("01234567", c)) > 0)
- k-- # adjust for 1-basing in awk
+ # index() returns 0 if c not in string,
+ # includes c == "0"
+ k = index("1234567", c)
ret = ret * 8 + k
@}
- @} else if (str ~ /^0[xX][[:xdigit:]]+/) @{
+ @} else if (str ~ /^0[xX][[:xdigit:]]+$/) @{
# hexadecimal
str = substr(str, 3) # lop off leading 0x
n = length(str)
@@ -19397,10 +20556,9 @@ function mystrtonum(str, ret, chars, n, i, k, c)
for (i = 1; i <= n; i++) @{
c = substr(str, i, 1)
c = tolower(c)
- if ((k = index("0123456789", c)) > 0)
- k-- # adjust for 1-basing in awk
- else if ((k = index("abcdef", c)) > 0)
- k += 9
+ # index() returns 0 if c not in string,
+ # includes c == "0"
+ k = index("123456789abcdef", c)
ret = ret * 16 + k
@}
@@ -19568,7 +20726,7 @@ An @code{END} rule is automatically added
to the program calling @code{assert()}. Normally, if a program consists
of just a @code{BEGIN} rule, the input files and/or standard input are
not read. However, now that the program has an @code{END} rule, @command{awk}
-attempts to read the input data files or standard input
+attempts to read the input @value{DF}s or standard input
(@pxref{Using BEGIN/END}),
most likely causing the program to hang as it waits for input.
@@ -19801,8 +20959,7 @@ function chr(c)
@c endfile
#### test code ####
-# BEGIN \
-# @{
+# BEGIN @{
# for (;;) @{
# printf("enter a character: ")
# if (getline var <= 0)
@@ -19887,7 +21044,7 @@ more difficult than they really need to be.}
@cindex timestamps, formatted
@cindex time, managing
The @code{systime()} and @code{strftime()} functions described in
-@ref{Time Functions},
+@DBREF{Time Functions}
provide the minimum functionality necessary for dealing with the time of day
in human readable form. While @code{strftime()} is extensive, the control
formats are not necessarily easy to remember or intuitively obvious when
@@ -19939,7 +21096,7 @@ function getlocaltime(time, ret, now, i)
now = systime()
# return date(1)-style output
- ret = strftime("%a %b %e %H:%M:%S %Z %Y", now)
+ ret = strftime(PROCINFO["strftime"], now)
# clear out target array
delete time
@@ -19973,7 +21130,7 @@ function getlocaltime(time, ret, now, i)
The string indices are easier to use and read than the various formats
required by @code{strftime()}. The @code{alarm} program presented in
-@ref{Alarm Program},
+@DBREF{Alarm Program}
uses this function.
A more general design for the @code{getlocaltime()} function would have
allowed the user to supply an optional timestamp value to use instead
@@ -20055,7 +21212,7 @@ This tests the result to see if it is empty or not. An equivalent
test would be @samp{contents == ""}.
@node Data File Management
-@section Data File Management
+@section @value{DDF} Management
@c STARTOFRANGE dataf
@cindex files, managing
@@ -20064,7 +21221,7 @@ test would be @samp{contents == ""}.
@c STARTOFRANGE flibdataf
@cindex functions, library, managing data files
This @value{SECTION} presents functions that are useful for managing
-command-line data files.
+command-line @value{DF}s.
@menu
* Filetrans Function:: A function for handling data file transitions.
@@ -20075,7 +21232,7 @@ command-line data files.
@end menu
@node Filetrans Function
-@subsection Noting Data File Boundaries
+@subsection Noting @value{DDF} Boundaries
@cindex files, managing, data file boundaries
@cindex files, initialization and cleanup
@@ -20083,8 +21240,8 @@ The @code{BEGIN} and @code{END} rules are each executed exactly once at
the beginning and end of your @command{awk} program, respectively
(@pxref{BEGIN/END}).
We (the @command{gawk} authors) once had a user who mistakenly thought that the
-@code{BEGIN} rule is executed at the beginning of each data file and the
-@code{END} rule is executed at the end of each data file.
+@code{BEGIN} rule is executed at the beginning of each @value{DF} and the
+@code{END} rule is executed at the end of each @value{DF}.
When informed
that this was not the case, the user requested that we add new special
@@ -20095,7 +21252,7 @@ Adding these special patterns to @command{gawk} wasn't necessary;
the job can be done cleanly in @command{awk} itself, as illustrated
by the following library program.
It arranges to call two user-supplied functions, @code{beginfile()} and
-@code{endfile()}, at the beginning and end of each data file.
+@code{endfile()}, at the beginning and end of each @value{DF}.
Besides solving the problem in only nine(!) lines of code, it does so
@emph{portably}; this works with any implementation of @command{awk}:
@@ -20126,17 +21283,17 @@ This file must be loaded before the user's ``main'' program, so that the
rule it supplies is executed first.
This rule relies on @command{awk}'s @code{FILENAME} variable that
-automatically changes for each new data file. The current file name is
+automatically changes for each new @value{DF}. The current @value{FN} is
saved in a private variable, @code{_oldfilename}. If @code{FILENAME} does
-not equal @code{_oldfilename}, then a new data file is being processed and
+not equal @code{_oldfilename}, then a new @value{DF} is being processed and
it is necessary to call @code{endfile()} for the old file. Because
@code{endfile()} should only be called if a file has been processed, the
program first checks to make sure that @code{_oldfilename} is not the null
-string. The program then assigns the current file name to
+string. The program then assigns the current @value{FN} to
@code{_oldfilename} and calls @code{beginfile()} for the file.
Because, like all @command{awk} variables, @code{_oldfilename} is
initialized to the null string, this rule executes correctly even for the
-first data file.
+first @value{DF}.
The program also supplies an @code{END} rule to do the final processing for
the last file. Because this @code{END} rule comes before any @code{END} rules
@@ -20145,7 +21302,7 @@ again the value of multiple @code{BEGIN} and @code{END} rules should be clear.
@cindex @code{beginfile()} user-defined function
@cindex @code{endfile()} user-defined function
-If the same data file occurs twice in a row on the command line, then
+If the same @value{DF} occurs twice in a row on the command line, then
@code{endfile()} and @code{beginfile()} are not executed at the end of the
first pass and at the beginning of the second pass.
The following version solves the problem:
@@ -20176,7 +21333,7 @@ END @{ endfile(_filename_) @}
@c endfile
@end example
-@ref{Wc Program},
+@DBREF{Wc Program}
shows how this library function can be used and
how it simplifies writing the main program.
@@ -20286,19 +21443,23 @@ to either update @code{ARGIND} on your own
or modify this code as appropriate.
The @code{rewind()} function also relies on the @code{nextfile} keyword
-(@pxref{Nextfile Statement}).
+(@pxref{Nextfile Statement}). Because of this, you should not call it
+from an @code{ENDFILE} rule. (This isn't necessary anyway, since as soon
+as an @code{ENDFILE} rule finishes @command{gawk} goes to the next file!)
@node File Checking
-@subsection Checking for Readable Data Files
+@subsection Checking for Readable @value{DDF}s
@cindex troubleshooting, readable data files
@cindex readable data files@comma{} checking
@cindex files, skipping
-Normally, if you give @command{awk} a data file that isn't readable,
-it stops with a fatal error. There are times when you
-might want to just ignore such files and keep going. You can
-do this by prepending the following program to your @command{awk}
-program:
+Normally, if you give @command{awk} a @value{DF} that isn't readable,
+it stops with a fatal error. There are times when you might want to
+just ignore such files and keep going.@footnote{The @code{BEGINFILE}
+special pattern (@pxref{BEGINFILE/ENDFILE}) provides an alternative
+mechanism for dealing with files that can't be opened. However, the
+code here provides a portable solution.} You can do this by prepending
+the following program to your @command{awk} program:
@cindex @code{readable.awk} program
@example
@@ -20336,22 +21497,22 @@ skips the file (since it's no longer in the list).
See also @ref{ARGC and ARGV}.
@node Empty Files
-@subsection Checking For Zero-length Files
+@subsection Checking for Zero-length Files
All known @command{awk} implementations silently skip over zero-length files.
This is a by-product of @command{awk}'s implicit
read-a-record-and-match-against-the-rules loop: when @command{awk}
tries to read a record from an empty file, it immediately receives an
end of file indication, closes the file, and proceeds on to the next
-command-line data file, @emph{without} executing any user-level
+command-line @value{DF}, @emph{without} executing any user-level
@command{awk} program code.
Using @command{gawk}'s @code{ARGIND} variable
(@pxref{Built-in Variables}), it is possible to detect when an empty
-data file has been skipped. Similar to the library file presented
+@value{DF} has been skipped. Similar to the library file presented
in @ref{Filetrans Function}, the following library file calls a function named
@code{zerofile()} that the user must provide. The arguments passed are
-the file name and the position in @code{ARGV} where it was found:
+the @value{FN} and the position in @code{ARGV} where it was found:
@cindex @code{zerofile.awk} program
@example
@@ -20398,56 +21559,16 @@ the end of the command-line arguments. Note that the test in the
condition of the @code{for} loop uses the @samp{<=} operator,
not @samp{<}.
-As an exercise, you might consider whether this same problem can
-be solved without relying on @command{gawk}'s @code{ARGIND} variable.
-
-As a second exercise, revise this code to handle the case where
-an intervening value in @code{ARGV} is a variable assignment.
-
-@ignore
-# zerofile2.awk --- same thing, portably
-
-BEGIN @{
- ARGIND = Argind = 0
- for (i = 1; i < ARGC; i++)
- Fnames[ARGV[i]]++
-
-@}
-FNR == 1 @{
- while (ARGV[ARGIND] != FILENAME)
- ARGIND++
- Seen[FILENAME]++
- if (Seen[FILENAME] == Fnames[FILENAME])
- do
- ARGIND++
- while (ARGV[ARGIND] != FILENAME)
-@}
-ARGIND > Argind + 1 @{
- for (Argind++; Argind < ARGIND; Argind++)
- zerofile(ARGV[Argind], Argind)
-@}
-ARGIND != Argind @{
- Argind = ARGIND
-@}
-END @{
- if (ARGIND < ARGC - 1)
- ARGIND = ARGC - 1
- if (ARGIND > Argind)
- for (Argind++; Argind <= ARGIND; Argind++)
- zerofile(ARGV[Argind], Argind)
-@}
-@end ignore
-
@node Ignoring Assigns
-@subsection Treating Assignments as File Names
+@subsection Treating Assignments as @value{FFN}s
@cindex assignments as filenames
@cindex filenames, assignments as
Occasionally, you might not want @command{awk} to process command-line
variable assignments
(@pxref{Assignment Options}).
-In particular, if you have a file name that contains an @samp{=} character,
-@command{awk} treats the file name as an assignment, and does not process it.
+In particular, if you have a @value{FN} that contains an @samp{=} character,
+@command{awk} treats the @value{FN} as an assignment, and does not process it.
Some users have suggested an additional command-line option for @command{gawk}
to disable command-line assignments. However, some simple programming with
@@ -20491,7 +21612,7 @@ awk -v No_command_assign=1 -f noassign.awk -f yourprog.awk *
The function works by looping through the arguments.
It prepends @samp{./} to
any argument that matches the form
-of a variable assignment, turning that argument into a file name.
+of a variable assignment, turning that argument into a @value{FN}.
The use of @code{No_command_assign} allows you to disable command-line
assignments at invocation time, by giving the variable a true value.
@@ -20575,7 +21696,6 @@ application might want to print its own error message.)
@item optopt
The letter representing the command-line option.
-@c While not usually documented, most versions supply this variable.
@end table
The following C fragment shows how @code{getopt()} might process command-line
@@ -20626,7 +21746,6 @@ necessary for accessing individual characters
function was written before @command{gawk} acquired the ability to
split strings into single characters using @code{""} as the separator.
We have left it alone, since using @code{substr()} is more portable.}
-@c FIXME: could use split(str, a, "") to do it more easily.
The discussion that follows walks through the code a bit at a time:
@@ -20714,8 +21833,7 @@ it is not an option, and it ends option processing. Continuing on:
i = index(options, thisopt)
if (i == 0) @{
if (Opterr)
- printf("%c -- invalid option\n",
- thisopt) > "/dev/stderr"
+ printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
if (_opti >= length(argv[Optind])) @{
Optind++
_opti = 0
@@ -20809,7 +21927,7 @@ BEGIN @{
# test program
if (_getopt_test) @{
while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1)
- printf("c = <%c>, optarg = <%s>\n",
+ printf("c = <%c>, Optarg = <%s>\n",
_go_c, Optarg)
printf("non-option arguments:\n")
for (; Optind < ARGC; Optind++)
@@ -20825,32 +21943,31 @@ result of two sample runs of the test program:
@example
$ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a -cbARG bax -x}
-@print{} c = <a>, optarg = <>
-@print{} c = <c>, optarg = <>
-@print{} c = <b>, optarg = <ARG>
+@print{} c = <a>, Optarg = <>
+@print{} c = <c>, Optarg = <>
+@print{} c = <b>, Optarg = <ARG>
@print{} non-option arguments:
@print{} ARGV[3] = <bax>
@print{} ARGV[4] = <-x>
$ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a -x -- xyz abc}
-@print{} c = <a>, optarg = <>
+@print{} c = <a>, Optarg = <>
@error{} x -- invalid option
-@print{} c = <?>, optarg = <>
+@print{} c = <?>, Optarg = <>
@print{} non-option arguments:
@print{} ARGV[4] = <xyz>
@print{} ARGV[5] = <abc>
@end example
-In both runs,
-the first @option{--} terminates the arguments to @command{awk}, so that it does
-not try to interpret the @option{-a}, etc., as its own options.
+In both runs, the first @option{--} terminates the arguments to
+@command{awk}, so that it does not try to interpret the @option{-a},
+etc., as its own options.
@quotation NOTE
-After @code{getopt()} is through, it is the responsibility of the user level
-code to
-clear out all the elements of @code{ARGV} from 1 to @code{Optind},
-so that @command{awk} does not try to process the command-line options
-as file names.
+After @code{getopt()} is through, it is the responsibility of the
+user level code to clear out all the elements of @code{ARGV} from 1
+to @code{Optind}, so that @command{awk} does not try to process the
+command-line options as @value{FN}s.
@end quotation
Several of the sample programs presented in
@@ -20912,14 +22029,12 @@ no more entries, it returns @code{NULL}, the null pointer. When this
happens, the C program should call @code{endpwent()} to close the database.
Following is @command{pwcat}, a C program that ``cats'' the password database:
-@c Use old style function header for portability to old systems (SunOS, HP/UX).
-
@example
@c file eg/lib/pwcat.c
/*
* pwcat.c
*
- * Generate a printable version of the password database
+ * Generate a printable version of the password database.
*/
@c endfile
@ignore
@@ -21221,7 +22336,7 @@ once. If you are worried about squeezing every last cycle out of your
this is not necessary, since most @command{awk} programs are I/O-bound,
and such a change would clutter up the code.
-The @command{id} program in @ref{Id Program},
+The @command{id} program in @DBREF{Id Program}
uses these functions.
@c ENDOFRANGE libfudata
@c ENDOFRANGE flibudata
@@ -21247,7 +22362,7 @@ uses these functions.
@cindex group file
@cindex files, group
Much of the discussion presented in
-@ref{Passwd Functions},
+@DBREF{Passwd Functions}
applies to the group database as well. Although there has traditionally
been a well-known file (@file{/etc/group}) in a well-known format, the POSIX
standard only provides a set of C library routines
@@ -21265,7 +22380,7 @@ is as follows:
/*
* grcat.c
*
- * Generate a printable version of the group database
+ * Generate a printable version of the group database.
*/
@c endfile
@ignore
@@ -21352,7 +22467,7 @@ it is usually empty or set to @samp{*}.
@item Group ID Number
The group's numeric group ID number;
-this number must be unique within the file.
+the association of name to number must be unique within the file.
(On some systems it's a C @code{long}, and not an @code{int}. Thus
we cast it to @code{long} for all cases.)
@@ -21400,8 +22515,7 @@ There are several, modeled after the C library functions of the same names:
@c line break on _gr_init for smallbook
@c file eg/lib/groupawk.in
-BEGIN \
-@{
+BEGIN @{
# Change to suit your system
_gr_awklib = "/usr/local/libexec/awk/"
@}
@@ -21482,16 +22596,16 @@ database for the same group. This is common when a group has a large number
of members. A pair of such entries might look like the following:
@example
-tvpeople:*:101:johnny,jay,arsenio
+tvpeople:*:101:johny,jay,arsenio
tvpeople:*:101:david,conan,tom,joan
@end example
For this reason, @code{_gr_init()} looks to see if a group name or
group ID number is already seen. If it is, then the user names are
-simply concatenated onto the previous list of users. (There is actually a
+simply concatenated onto the previous list of users.@footnote{There is actually a
subtle problem with the code just presented. Suppose that
the first time there were no names. This code adds the names with
-a leading comma. It also doesn't check that there is a @code{$4}.)
+a leading comma. It also doesn't check that there is a @code{$4}.}
Finally, @code{_gr_init()} closes the pipeline to @command{grcat}, restores
@code{FS} (and @code{FIELDWIDTHS} or @code{FPAT} if necessary), @code{RS}, and @code{$0},
@@ -21587,13 +22701,13 @@ Most of the work is in scanning the database and building the various
associative arrays. The functions that the user calls are themselves very
simple, relying on @command{awk}'s associative arrays to do work.
-The @command{id} program in @ref{Id Program},
+The @command{id} program in @DBREF{Id Program}
uses these functions.
@node Walking Arrays
@section Traversing Arrays of Arrays
-@ref{Arrays of Arrays}, described how @command{gawk}
+@DBREF{Arrays of Arrays} described how @command{gawk}
provides arrays of arrays. In particular, any element of
an array may be either a scalar, or another array. The
@code{isarray()} function (@pxref{Type Functions})
@@ -21651,24 +22765,123 @@ $ @kbd{gawk -f walk_array.awk}
@print{} a[3] = 3
@end example
-Walking an array and processing each element is a general-purpose
-operation. You might want to consider generalizing the @code{walk_array()}
-function by adding an additional parameter named @code{process}.
-
-Then, inside the loop, instead of simply printing the array element's
-index and value, use the indirect function call syntax
-(@pxref{Indirect Calls}) on @code{process}, passing it the index
-and the value.
-
-When calling @code{walk_array()}, you would pass the name of a user-defined
-function that expects to receive an index and a value, and then processes
-the element.
-
-
@c ENDOFRANGE libfgdata
@c ENDOFRANGE flibgdata
@c ENDOFRANGE gdatar
@c ENDOFRANGE libf
+
+@node Library Functions Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+Reading programs is an excellent way to learn Good Programming.
+The functions provided in this @value{CHAPTER} and the next are intended
+to serve that purpose.
+
+@item
+When writing general-purpose library functions, put some thought into how
+to name any global variables so that they won't conflict with variables
+from a user's program.
+
+@item
+The functions presented here fit into the following categories:
+
+@c nested list
+@table @asis
+@item General problems
+Number to string conversion, assertions, rounding, random number
+generation, converting characters to numbers, joining strings, getting
+easily usable time-of-day information, and reading a whole file in
+one shot.
+
+@item Managing @value{DF}s
+Noting @value{DF} boundaries, rereading the current file, checking for
+readable files, checking for zero-length files, and treating assignments
+as @value{FN}s.
+
+@item Processing command-line options
+An @command{awk} version of the standard C @code{getopt()} function.
+
+@item Reading the user and group databases
+Two sets of routines that parallel the C library versions.
+
+@item Traversing arrays of arrays
+A simple function to traverse an array of arrays to any depth.
+@end table
+@c end nested list
+
+@end itemize
+
+@c EXCLUDE START
+@node Library Exercises
+@section Exercises
+
+@enumerate
+@item
+In @ref{Empty Files}, we presented the @file{zerofile.awk} program,
+which made use of @command{gawk}'s @code{ARGIND} variable. Can this
+problem be solved without relying on @code{ARGIND}? If so, how?
+
+@ignore
+# zerofile2.awk --- same thing, portably
+
+BEGIN @{
+ ARGIND = Argind = 0
+ for (i = 1; i < ARGC; i++)
+ Fnames[ARGV[i]]++
+
+@}
+FNR == 1 @{
+ while (ARGV[ARGIND] != FILENAME)
+ ARGIND++
+ Seen[FILENAME]++
+ if (Seen[FILENAME] == Fnames[FILENAME])
+ do
+ ARGIND++
+ while (ARGV[ARGIND] != FILENAME)
+@}
+ARGIND > Argind + 1 @{
+ for (Argind++; Argind < ARGIND; Argind++)
+ zerofile(ARGV[Argind], Argind)
+@}
+ARGIND != Argind @{
+ Argind = ARGIND
+@}
+END @{
+ if (ARGIND < ARGC - 1)
+ ARGIND = ARGC - 1
+ if (ARGIND > Argind)
+ for (Argind++; Argind <= ARGIND; Argind++)
+ zerofile(ARGV[Argind], Argind)
+@}
+@end ignore
+
+@item
+As a related challenge, revise that code to handle the case where
+an intervening value in @code{ARGV} is a variable assignment.
+
+@item
+@DBREF{Walking Arrays} presented a function that walked a multidimensional
+array to print it out. However, walking an array and processing
+each element is a general-purpose operation. Generalize the
+@code{walk_array()} function by adding an additional parameter named
+@code{process}.
+
+Then, inside the loop, instead of printing the array element's index and
+value, use the indirect function call syntax (@pxref{Indirect Calls})
+on @code{process}, passing it the index and the value.
+
+When calling @code{walk_array()}, you would pass the name of a
+user-defined function that expects to receive an index and a value,
+and then processes the element.
+
+Test your new version by printing the array; you should end up with
+output identical to that of the original version.
+
+@end enumerate
+@c EXCLUDE END
+
@c ENDOFRANGE flib
@c ENDOFRANGE fudlib
@c ENDOFRANGE datagr
@@ -21678,11 +22891,13 @@ the element.
@c STARTOFRANGE awkpex
@cindex @command{awk} programs, examples of
+@c FULLXREF ON
@ref{Library Functions},
presents the idea that reading programs in a language contributes to
learning that language. This @value{CHAPTER} continues that theme,
presenting a potpourri of @command{awk} programs for your reading
enjoyment.
+@c FULLXREF OFF
@ifnotinfo
There are three sections.
The first describes how to run the programs presented
@@ -21709,6 +22924,8 @@ Many of these programs use library functions presented in
* Running Examples:: How to run these examples.
* Clones:: Clones of common utilities.
* Miscellaneous Programs:: Some interesting @command{awk} programs.
+* Programs Summary:: Summary of programs.
+* Programs Exercises:: Exercises.
@end menu
@node Running Examples
@@ -21723,7 +22940,7 @@ awk -f @var{program} -- @var{options} @var{files}
@noindent
Here, @var{program} is the name of the @command{awk} program (such as
@file{cut.awk}), @var{options} are any command-line options for the
-program that start with a @samp{-}, and @var{files} are the actual data files.
+program that start with a @samp{-}, and @var{files} are the actual @value{DF}s.
If your system supports the @samp{#!} executable interpreter mechanism
(@pxref{Executable Scripts}),
@@ -21861,13 +23078,7 @@ function usage( e1, e2)
@noindent
The variables @code{e1} and @code{e2} are used so that the function
-fits nicely on the
-@ifnotinfo
-page.
-@end ifnotinfo
-@ifnottex
-screen.
-@end ifnottex
+fits nicely on the @value{PAGE}.
@cindex @code{BEGIN} pattern, running @command{awk} programs and
@cindex @code{FS} variable, running @command{awk} programs and
@@ -21883,8 +23094,7 @@ string:
@example
@c file eg/prog/cut.awk
-BEGIN \
-@{
+BEGIN @{
FS = "\t" # default
OFS = FS
while ((c = getopt(ARGC, ARGV, "sf:c:d:")) != -1) @{
@@ -21897,7 +23107,7 @@ BEGIN \
OFS = ""
@} else if (c == "d") @{
if (length(Optarg) > 1) @{
- printf("Using first character of %s" \
+ printf("cut: using first character of %s" \
" for delimiter\n", Optarg) > "/dev/stderr"
Optarg = substr(Optarg, 1, 1)
@}
@@ -21906,7 +23116,7 @@ BEGIN \
if (FS == " ") # defeat awk semantics
FS = "[ ]"
@} else if (c == "s")
- suppress++
+ suppress = 1
else
usage()
@}
@@ -21928,7 +23138,7 @@ spaces. Also remember that after @code{getopt()} is through
we have to
clear out all the elements of @code{ARGV} from 1 to @code{Optind},
so that @command{awk} does not try to process the command-line options
-as file names.
+as @value{FN}s.
After dealing with the command-line options, the program verifies that the
options make sense. Only one or the other of @option{-c} and @option{-f}
@@ -21978,7 +23188,7 @@ function set_fieldlist( n, m, i, j, k, f, g)
m = split(f[i], g, "-")
@group
if (m != 2 || g[1] >= g[2]) @{
- printf("bad field list: %s\n",
+ printf("cut: bad field list: %s\n",
f[i]) > "/dev/stderr"
exit 1
@}
@@ -22025,7 +23235,7 @@ function set_charlist( field, i, j, f, g, n, m, t,
if (index(f[i], "-") != 0) @{ # range
m = split(f[i], g, "-")
if (m != 2 || g[1] >= g[2]) @{
- printf("bad character list: %s\n",
+ printf("cut: bad character list: %s\n",
f[i]) > "/dev/stderr"
exit 1
@}
@@ -22101,7 +23311,6 @@ of picking the input line apart by characters.
@c ENDOFRANGE ficut
@c ENDOFRANGE colcut
-@c Exercise: Rewrite using split with "".
@node Egrep Program
@subsection Searching for Regular Expressions in Files
@@ -22119,14 +23328,14 @@ expressions that are almost identical to those available in @command{awk}
(@pxref{Regexp}).
You invoke it as follows:
-@example
-egrep @r{[} @var{options} @r{]} '@var{pattern}' @var{files} @dots{}
-@end example
+@display
+@command{egrep} [@var{options}] @code{'@var{pattern}'} @var{files} @dots{}
+@end display
The @var{pattern} is a regular expression. In typical usage, the regular
expression is quoted to prevent the shell from expanding any of the
-special characters as file name wildcards. Normally, @command{egrep}
-prints the lines that matched. If multiple file names are provided on
+special characters as @value{FN} wildcards. Normally, @command{egrep}
+prints the lines that matched. If multiple @value{FN}s are provided on
the command line, each output line is preceded by the name of the file
and a colon.
@@ -22217,7 +23426,7 @@ pattern is supplied with @option{-e}, the first nonoption on the
command line is used. The @command{awk} command-line arguments up to @code{ARGV[Optind]}
are cleared, so that @command{awk} won't try to process them as files. If no
files are specified, the standard input is used, and if multiple files are
-specified, we make sure to note this so that the file names can precede the
+specified, we make sure to note this so that the @value{FN}s can precede the
matched lines in the output:
@example
@@ -22251,8 +23460,6 @@ if a match happens, we output the translated line, not the original.}
The rule is
commented out since it is not necessary with @command{gawk}:
-@c Exercise: Fix this, w/array and new line as key to original line
-
@example
@c file eg/prog/egrep.awk
#@{
@@ -22303,6 +23510,11 @@ function endfile(file)
@c endfile
@end example
+The @code{BEGINFILE} and @code{ENDFILE} special patterns
+(@pxref{BEGINFILE/ENDFILE}) could be used, but then the program would be
+@command{gawk}-specific. Additionally, this example was written before
+@command{gawk} acquired @code{BEGINFILE} and @code{ENDFILE}.
+
The following rule does most of the work of matching lines. The variable
@code{matches} is true if the line matched the pattern. If the user
wants lines that did not match, the sense of @code{matches} is inverted
@@ -22315,9 +23527,9 @@ A number of additional tests are made, but they are only done if we
are not counting lines. First, if the user only wants exit status
(@code{no_print} is true), then it is enough to know that @emph{one}
line in this file matched, and we can skip on to the next file with
-@code{nextfile}. Similarly, if we are only printing file names, we can
-print the file name, and then skip to the next file with @code{nextfile}.
-Finally, each line is printed, with a leading file name and colon
+@code{nextfile}. Similarly, if we are only printing @value{FN}s, we can
+print the @value{FN}, and then skip to the next file with @code{nextfile}.
+Finally, each line is printed, with a leading @value{FN} and colon
if necessary:
@cindex @code{!} (exclamation point), @code{!} operator
@@ -22357,11 +23569,8 @@ there are no matches, the exit status is one; otherwise it is zero:
@example
@c file eg/prog/egrep.awk
-END \
-@{
- if (total == 0)
- exit 1
- exit 0
+END @{
+ exit (total == 0)
@}
@c endfile
@end example
@@ -22384,17 +23593,6 @@ function usage( e)
The variable @code{e} is used so that the function fits nicely
on the printed page.
-@cindex @code{END} pattern, backslash continuation and
-@cindex @code{\} (backslash), continuing lines and
-@cindex backslash (@code{\}), continuing lines and
-Just a note on programming style: you may have noticed that the @code{END}
-rule uses backslash continuation, with the open brace on a line by
-itself. This is so that it more closely resembles the way functions
-are written. Many of the examples
-in this @value{CHAPTER}
-use this style. You can decide for yourself if you like writing
-your @code{BEGIN} and @code{END} rules this way
-or not.
@c ENDOFRANGE regexps
@c ENDOFRANGE sfregexp
@c ENDOFRANGE fsregexp
@@ -22415,7 +23613,7 @@ corresponding user and group names. The output might look like this:
@example
$ @kbd{id}
-@print{} uid=500(arnold) gid=500(arnold) groups=6(disk),7(lp),19(floppy)
+@print{} uid=1000(arnold) gid=1000(arnold) groups=1000(arnold),4(adm),7(lp),27(sudo)
@end example
@cindex @code{PROCINFO} array, and user and group ID numbers
@@ -22451,6 +23649,7 @@ numbers:
# Arnold Robbins, arnold@@skeeve.com, Public Domain
# May 1993
# Revised February 1996
+# Revised May 2014
@c endfile
@end ignore
@@ -22460,8 +23659,7 @@ numbers:
# egid=5(blat) groups=9(nine),2(two),1(one)
@group
-BEGIN \
-@{
+BEGIN @{
uid = PROCINFO["uid"]
euid = PROCINFO["euid"]
gid = PROCINFO["gid"]
@@ -22470,34 +23668,26 @@ BEGIN \
printf("uid=%d", uid)
pw = getpwuid(uid)
- if (pw != "") @{
- split(pw, a, ":")
- printf("(%s)", a[1])
- @}
+ if (pw != "")
+ pr_first_field(pw)
if (euid != uid) @{
printf(" euid=%d", euid)
pw = getpwuid(euid)
- if (pw != "") @{
- split(pw, a, ":")
- printf("(%s)", a[1])
- @}
+ if (pw != "")
+ pr_first_field(pw)
@}
printf(" gid=%d", gid)
pw = getgrgid(gid)
- if (pw != "") @{
- split(pw, a, ":")
- printf("(%s)", a[1])
- @}
+ if (pw != "")
+ pr_first_field(pw)
if (egid != gid) @{
printf(" egid=%d", egid)
pw = getgrgid(egid)
- if (pw != "") @{
- split(pw, a, ":")
- printf("(%s)", a[1])
- @}
+ if (pw != "")
+ pr_first_field(pw)
@}
for (i = 1; ("group" i) in PROCINFO; i++) @{
@@ -22506,16 +23696,20 @@ BEGIN \
group = PROCINFO["group" i]
printf("%d", group)
pw = getgrgid(group)
- if (pw != "") @{
- split(pw, a, ":")
- printf("(%s)", a[1])
- @}
+ if (pw != "")
+ pr_first_field(pw)
if (("group" (i+1)) in PROCINFO)
printf(",")
@}
print ""
@}
+
+function pr_first_field(str, a)
+@{
+ split(str, a, ":")
+ printf("(%s)", a[1])
+@}
@c endfile
@end example
@@ -22535,12 +23729,10 @@ The loop is also correct if there are @emph{no} supplementary
groups; then the condition is false the first time it's
tested, and the loop body never executes.
-@c exercise!!!
-@ignore
-The POSIX version of @command{id} takes arguments that control which
-information is printed. Modify this version to accept the same
-arguments and perform in the same way.
-@end ignore
+The @code{pr_first_field()} function simply isolates out some
+code that is used repeatedly, making the whole program
+slightly shorter and cleaner.
+
@c ENDOFRANGE id
@node Split Program
@@ -22557,9 +23749,9 @@ Usage is as follows:@footnote{This is the traditional usage. The
POSIX usage is different, but not relevant for what the program
aims to demonstrate.}
-@example
-split @r{[}-@var{count}@r{]} file @r{[} @var{prefix} @r{]}
-@end example
+@display
+@command{split} [@code{-@var{count}}] [@var{file}] [@var{prefix}]
+@end display
By default,
the output files are named @file{xaa}, @file{xab}, and so on. Each file has
@@ -22568,7 +23760,7 @@ number of lines in each file, supply a number on the command line
preceded with a minus; e.g., @samp{-500} for files with 500 lines in them
instead of 1000. To change the name of the output files to something like
@file{myfileaa}, @file{myfileab}, and so on, supply an additional
-argument that specifies the file name prefix.
+argument that specifies the @value{FN} prefix.
Here is a version of @command{split} in @command{awk}. It uses the
@code{ord()} and @code{chr()} functions presented in
@@ -22578,8 +23770,8 @@ The program first sets its defaults, and then tests to make sure there are
not too many arguments. It then looks at each argument in turn. The
first argument could be a minus sign followed by a number. If it is, this happens
to look like a negative number, so it is made positive, and that is the
-count of lines. The data file name is skipped over and the final argument
-is used as the prefix for the output file names:
+count of lines. The @value{DF} name is skipped over and the final argument
+is used as the prefix for the output @value{FN}s:
@cindex @code{split.awk} program
@example
@@ -22593,11 +23785,12 @@ is used as the prefix for the output file names:
#
# Arnold Robbins, arnold@@skeeve.com, Public Domain
# May 1993
+# Revised slightly, May 2014
@c endfile
@end ignore
@c file eg/prog/split.awk
-# usage: split [-num] [file] [outname]
+# usage: split [-count] [file] [outname]
BEGIN @{
outfile = "x" # default
@@ -22606,7 +23799,7 @@ BEGIN @{
usage()
i = 1
- if (ARGV[i] ~ /^-[[:digit:]]+$/) @{
+ if (i in ARGV && ARGV[i] ~ /^-[[:digit:]]+$/) @{
count = -ARGV[i]
ARGV[i] = ""
i++
@@ -22628,7 +23821,7 @@ BEGIN @{
The next rule does most of the work. @code{tcount} (temporary count) tracks
how many lines have been printed to the output file so far. If it is greater
than @code{count}, it is time to close the current file and start a new one.
-@code{s1} and @code{s2} track the current suffixes for the file name. If
+@code{s1} and @code{s2} track the current suffixes for the @value{FN}. If
they are both @samp{z}, the file is just too big. Otherwise, @code{s1}
moves to the next letter in the alphabet and @code{s2} starts over again at
@samp{a}:
@@ -22660,8 +23853,6 @@ moves to the next letter in the alphabet and @code{s2} starts over again at
@c endfile
@end example
-@c Exercise: do this with just awk builtin functions, index("abc..."), substr, etc.
-
@noindent
The @code{usage()} function simply prints an error message and exits:
@@ -22678,21 +23869,19 @@ function usage( e)
@noindent
The variable @code{e} is used so that the function
-fits nicely on the
-@ifinfo
-screen.
-@end ifinfo
-@ifnotinfo
-page.
-@end ifnotinfo
+fits nicely on the @value{PAGE}.
This program is a bit sloppy; it relies on @command{awk} to automatically close the last file
instead of doing it in an @code{END} rule.
It also assumes that letters are contiguous in the character set,
which isn't true for EBCDIC systems.
-@c Exercise: Fix these problems.
-@c BFD...
+@ifset FOR_PRINT
+You might want to consider how to eliminate the use of
+@code{ord()} and @code{chr()}; this can be done in such a
+way as to solve the EBCDIC issue as well.
+@end ifset
+
@c ENDOFRANGE filspl
@c ENDOFRANGE split
@@ -22707,9 +23896,9 @@ The @code{tee} program is known as a ``pipe fitting.'' @code{tee} copies
its standard input to its standard output and also duplicates it to the
files named on the command line. Its usage is as follows:
-@example
-tee @r{[}-a@r{]} file @dots{}
-@end example
+@display
+@command{tee} [@option{-a}] @var{file} @dots{}
+@end display
The @option{-a} option tells @code{tee} to append to the named files, instead of
truncating them and starting over.
@@ -22718,13 +23907,13 @@ The @code{BEGIN} rule first makes a copy of all the command-line arguments
into an array named @code{copy}.
@code{ARGV[0]} is not copied, since it is not needed.
@code{tee} cannot use @code{ARGV} directly, since @command{awk} attempts to
-process each file name in @code{ARGV} as input data.
+process each @value{FN} in @code{ARGV} as input data.
@cindex flag variables
If the first argument is @option{-a}, then the flag variable
@code{append} is set to true, and both @code{ARGV[1]} and
@code{copy[1]} are deleted. If @code{ARGC} is less than two, then no
-file names were supplied and @code{tee} prints a usage message and exits.
+@value{FN}s were supplied and @code{tee} prints a usage message and exits.
Finally, @command{awk} is forced to read the standard input by setting
@code{ARGV[1]} to @code{"-"} and @code{ARGC} to two:
@@ -22746,8 +23935,7 @@ Finally, @command{awk} is forced to read the standard input by setting
@c endfile
@end ignore
@c file eg/prog/tee.awk
-BEGIN \
-@{
+BEGIN @{
for (i = 1; i < ARGC; i++)
copy[i] = ARGV[i]
@@ -22809,8 +23997,7 @@ Finally, the @code{END} rule cleans up by closing all the output files:
@example
@c file eg/prog/tee.awk
-END \
-@{
+END @{
for (i in copy)
close(copy[i])
@}
@@ -22834,9 +24021,9 @@ input, and by default removes duplicate lines. In other words, it only
prints unique lines---hence the name. @command{uniq} has a number of
options. The usage is as follows:
-@example
-uniq @r{[}-udc @r{[}-@var{n}@r{]]} @r{[}+@var{n}@r{]} @r{[} @var{input file} @r{[} @var{output file} @r{]]}
-@end example
+@display
+@command{uniq} [@option{-udc} [@code{-@var{n}}]] [@code{+@var{n}}] [@var{inputfile} [@var{outputfile}]]
+@end display
The options for @command{uniq} are:
@@ -22860,11 +24047,11 @@ by runs of spaces and/or TABs.
Skip @var{n} characters before comparing lines. Any fields specified with
@samp{-@var{n}} are skipped first.
-@item @var{input file}
+@item @var{inputfile}
Data is read from the input file named on the command line, instead of from
the standard input.
-@item @var{output file}
+@item @var{outputfile}
The generated output is sent to the named output file, instead of to the
standard output.
@end table
@@ -22927,8 +24114,7 @@ function usage( e)
# -n skip n fields
# +n skip n characters, skip fields first
-BEGIN \
-@{
+BEGIN @{
count = 1
outputfile = "/dev/stdout"
opts = "udc0:1:2:3:4:5:6:7:8:9:"
@@ -22940,7 +24126,7 @@ BEGIN \
else if (c == "c")
do_count++
else if (index("0123456789", c) != 0) @{
- # getopt requires args to options
+ # getopt() requires args to options
# this messes us up for things like -5
if (Optarg ~ /^[[:digit:]]+$/)
fcount = (c Optarg) + 0
@@ -23077,6 +24263,22 @@ END @{
@}
@c endfile
@end example
+
+@ifset FOR_PRINT
+The logic for choosing which lines to print represents a @dfn{state
+machine}, which is ``a device that can be in one of a set number of stable
+conditions depending on its previous condition and on the present values
+of its inputs.''@footnote{This is the definition returned from entering
+@code{define: state machine} into Google.}
+Brian Kernighan suggests that
+``an alternative approach to state mechines is to just read
+the input into an array, then use indexing. It's almost always
+easier code, and for most inputs where you would use this, just
+as fast.'' Consider how to rewrite the logic to follow this
+suggestion.
+@end ifset
+
+
@c ENDOFRANGE prunt
@c ENDOFRANGE tpul
@c ENDOFRANGE uniq
@@ -23101,9 +24303,9 @@ END @{
The @command{wc} (word count) utility counts lines, words, and characters in
one or more input files. Its usage is as follows:
-@example
-wc @r{[}-lwc@r{]} @r{[} @var{files} @dots{} @r{]}
-@end example
+@display
+@command{wc} [@option{-lwc}] [@var{files} @dots{}]
+@end display
If no files are specified on the command line, @command{wc} reads its standard
input. If there are multiple files, it also prints total counts for all
@@ -23190,7 +24392,7 @@ BEGIN @{
@end example
The @code{beginfile()} function is simple; it just resets the counts of lines,
-words, and characters to zero, and saves the current file name in
+words, and characters to zero, and saves the current @value{FN} in
@code{fname}:
@example
@@ -23203,18 +24405,10 @@ function beginfile(file)
@c endfile
@end example
-The @code{endfile()} function adds the current file's numbers to the running
-totals of lines, words, and characters.@footnote{@command{wc} can't just use the value of
-@code{FNR} in @code{endfile()}. If you examine
-the code in
-@ref{Filetrans Function},
-you will see that
-@code{FNR} has already been reset by the time
-@code{endfile()} is called.} It then prints out those numbers
-for the file that was just read. It relies on @code{beginfile()} to reset the
-numbers for the following data file:
-@c FIXME: ONE DAY: make the above footnote an exercise,
-@c instead of giving away the answer.
+The @code{endfile()} function adds the current file's numbers to the
+running totals of lines, words, and characters. It then prints out those
+numbers for the file that was just read. It relies on @code{beginfile()}
+to reset the numbers for the following @value{DF}:
@example
@c file eg/prog/wc.awk
@@ -23455,8 +24649,7 @@ Here is the program:
@c file eg/prog/alarm.awk
# usage: alarm time [ "message" [ count [ delay ] ] ]
-BEGIN \
-@{
+BEGIN @{
# Initial argument sanity checking
usage1 = "usage: alarm time ['message' [count [delay]]]"
usage2 = sprintf("\t(%s) time ::= hh:mm", ARGV[1])
@@ -23531,7 +24724,7 @@ is how long to wait before setting off the alarm:
# how long to sleep for
naptime = target - current
if (naptime <= 0) @{
- print "time is in the past!" > "/dev/stderr"
+ print "alarm: time is in the past!" > "/dev/stderr"
exit 1
@}
@c endfile
@@ -23584,19 +24777,18 @@ often used to map uppercase letters into lowercase for further processing:
@end example
@command{tr} requires two lists of characters.@footnote{On some older
-systems,
-including Solaris,
-@command{tr} may require that the lists be written as
-range expressions enclosed in square brackets (@samp{[a-z]}) and quoted,
-to prevent the shell from attempting a file name expansion. This is
-not a feature.} When processing the input, the first character in the
-first list is replaced with the first character in the second list,
-the second character in the first list is replaced with the second
-character in the second list, and so on. If there are more characters
-in the ``from'' list than in the ``to'' list, the last character of the
-``to'' list is used for the remaining characters in the ``from'' list.
-
-Some time ago,
+systems, including Solaris, the system version of @command{tr} may require
+that the lists be written as range expressions enclosed in square brackets
+(@samp{[a-z]}) and quoted, to prevent the shell from attempting a file
+name expansion. This is not a feature.} When processing the input, the
+first character in the first list is replaced with the first character
+in the second list, the second character in the first list is replaced
+with the second character in the second list, and so on. If there are
+more characters in the ``from'' list than in the ``to'' list, the last
+character of the ``to'' list is used for the remaining characters in the
+``from'' list.
+
+Once upon a time,
@c early or mid-1989!
a user proposed that a transliteration function should
be added to @command{gawk}.
@@ -23612,9 +24804,8 @@ of standard @command{awk}: dealing with individual characters is very
painful, requiring repeated use of the @code{substr()}, @code{index()},
and @code{gsub()} built-in functions
(@pxref{String Functions}).@footnote{This
-program was written before @command{gawk} acquired the ability to
+program was also written before @command{gawk} acquired the ability to
split each character in a string into separate array elements.}
-@c Exercise: How might you use this new feature to simplify the program?
There are two functions. The first, @code{stranslate()}, takes three
arguments:
@@ -23710,18 +24901,23 @@ BEGIN @{
While it is possible to do character transliteration in a user-level
function, it is not necessarily efficient, and we (the @command{gawk}
authors) started to consider adding a built-in function. However,
-shortly after writing this program, we learned that the System V Release 4
-@command{awk} had added the @code{toupper()} and @code{tolower()} functions
-(@pxref{String Functions}).
-These functions handle the vast majority of the
-cases where character transliteration is necessary, and so we chose to
-simply add those functions to @command{gawk} as well and then leave well
-enough alone.
+shortly after writing this program, we learned that Brian Kernighan
+had added the @code{toupper()} and @code{tolower()} functions to his
+@command{awk} (@pxref{String Functions}). These functions handle the
+vast majority of the cases where character transliteration is necessary,
+and so we chose to simply add those functions to @command{gawk} as well
+and then leave well enough alone.
An obvious improvement to this program would be to set up the
@code{t_ar} array only once, in a @code{BEGIN} rule. However, this
assumes that the ``from'' and ``to'' lists
will never change throughout the lifetime of the program.
+
+Another obvious improvement is to enable the use of ranges,
+such as @samp{a-z}, as allowed by the @command{tr} utility.
+Look at the code for @file{cut.awk} (@pxref{Cut Program})
+for inspiration.
+
@c ENDOFRANGE chtra
@c ENDOFRANGE tr
@@ -23749,7 +24945,18 @@ The @code{BEGIN} rule simply sets @code{RS} to the empty string, so that
@command{awk} splits records at blank lines
(@pxref{Records}).
It sets @code{MAXLINES} to 100, since 100 is the maximum number
-of lines on the page (20 * 5 = 100).
+of lines on the page
+@iftex
+(@math{20 @cdot 5 = 100}).
+@end iftex
+@ifnottex
+@ifnotdocbook
+(20 * 5 = 100).
+@end ifnotdocbook
+@end ifnottex
+@docbook
+(20 &sdot; 5 = 100). @c
+@end docbook
Most of the work is done in the @code{printpage()} function.
The label lines are stored sequentially in the @code{line} array. But they
@@ -23843,8 +25050,7 @@ function printpage( i, j)
Count++
@}
-END \
-@{
+END @{
printpage()
@}
@c endfile
@@ -23861,7 +25067,7 @@ END \
When working with large amounts of text, it can be interesting to know
how often different words appear. For example, an author may overuse
-certain words, in which case she might wish to find synonyms to substitute
+certain words, in which case he or she might wish to find synonyms to substitute
for words that appear too often. This @value{SUBSECTION} develops a
program for counting words and presenting the frequency information
in a useful format.
@@ -23891,7 +25097,7 @@ it prints the counts.
This program has several problems that prevent it from being
useful on real text files:
-@itemize @bullet
+@itemize @value{BULLET}
@item
The @command{awk} language considers upper- and lowercase characters to be
distinct. Therefore, ``bartender'' and ``Bartender'' are not treated
@@ -23939,6 +25145,10 @@ END @{
@}
@end example
+The regexp @samp{/[^[:alnum:]_[:blank:]]/} might have been written
+@samp{/[[:punct:]]/}, but then underscores would also be removed,
+and we want to keep them.
+
Assuming we have saved this program in a file named @file{wordfreq.awk},
and that the data is in @file{file1}, the following pipeline:
@@ -23991,7 +25201,7 @@ The @command{uniq} program
(@pxref{Uniq Program}),
removes duplicate lines from @emph{sorted} data.
-Suppose, however, you need to remove duplicate lines from a data file but
+Suppose, however, you need to remove duplicate lines from a @value{DF} but
that you want to preserve the order the lines are in. A good example of
this might be a shell history file. The history file keeps a copy of all
the commands you have entered, and it is not unusual to repeat a command
@@ -24050,6 +25260,7 @@ information. For example, using the following @code{print} statement in the
print data[lines[i]], lines[i]
@end example
+@noindent
This works because @code{data[$0]} is incremented each time a line is
seen.
@c ENDOFRANGE lidu
@@ -24096,7 +25307,7 @@ The Texinfo language is described fully, starting with
For our purposes, it is enough to know three things about Texinfo input
files:
-@itemize @bullet
+@itemize @value{BULLET}
@item
The ``at'' symbol (@samp{@@}) is special in Texinfo, much as
the backslash (@samp{\}) is in C
@@ -24186,7 +25397,7 @@ BEGIN @{ IGNORECASE = 1 @}
/^@@c(omment)?[ \t]+system/ \
@{
if (NF < 3) @{
- e = (FILENAME ":" FNR)
+ e = ("extract: " FILENAME ":" FNR)
e = (e ": badly formed `system' line")
print e > "/dev/stderr"
next
@@ -24195,7 +25406,7 @@ BEGIN @{ IGNORECASE = 1 @}
$2 = ""
stat = system($0)
if (stat != 0) @{
- e = (FILENAME ":" FNR)
+ e = ("extract: " FILENAME ":" FNR)
e = (e ": warning: system returned " stat)
print e > "/dev/stderr"
@}
@@ -24205,16 +25416,10 @@ BEGIN @{ IGNORECASE = 1 @}
@noindent
The variable @code{e} is used so that the rule
-fits nicely on the
-@ifnotinfo
-page.
-@end ifnotinfo
-@ifnottex
-screen.
-@end ifnottex
+fits nicely on the @value{PAGE}.
The second rule handles moving data into files. It verifies that a
-file name is given in the directive. If the file named is not the
+@value{FN} is given in the directive. If the file named is not the
current file, then the current file is closed. Keeping the current file
open until a new file is encountered allows the use of the @samp{>}
redirection for printing the contents, keeping open file management
@@ -24238,12 +25443,11 @@ the array @code{a}, using the @code{split()} function
The @samp{@@} symbol is used as the separator character.
Each element of @code{a} that is empty indicates two successive @samp{@@}
symbols in the original line. For each two empty elements (@samp{@@@@} in
-the original file), we have to add a single @samp{@@} symbol back
-in.@footnote{This program was written before @command{gawk} had the
-@code{gensub()} function. Consider how you might use it to simplify the code.}
+the original file), we have to add a single @samp{@@} symbol back in.
When the processing of the array is finished, @code{join()} is called with the
-value of @code{SUBSEP}, to rejoin the pieces back into a single
+value of @code{SUBSEP} (@pxref{Multidimensional}),
+to rejoin the pieces back into a single
line. That line is then printed to the output file:
@example
@@ -24251,7 +25455,7 @@ line. That line is then printed to the output file:
/^@@c(omment)?[ \t]+file/ \
@{
if (NF != 3) @{
- e = (FILENAME ":" FNR ": badly formed `file' line")
+ e = ("extract: " FILENAME ":" FNR ": badly formed `file' line")
print e > "/dev/stderr"
next
@}
@@ -24296,20 +25500,19 @@ subsequent output is appended to the file
(@pxref{Redirection}).
This makes it easy to mix program text and explanatory prose for the same
sample source file (as has been done here!) without any hassle. The file is
-only closed when a new data file name is encountered or at the end of the
+only closed when a new @value{DF} name is encountered or at the end of the
input file.
Finally, the function @code{@w{unexpected_eof()}} prints an appropriate
error message and then exits.
The @code{END} rule handles the final cleanup, closing the open file:
-@c function lb put on same line for page breaking. sigh
@example
@c file eg/prog/extract.awk
@group
function unexpected_eof()
@{
- printf("%s:%d: unexpected EOF or error\n",
+ printf("extract: %s:%d: unexpected EOF or error\n",
FILENAME, FNR) > "/dev/stderr"
exit 1
@}
@@ -24349,7 +25552,7 @@ Here, @samp{s/old/new/g} tells @command{sed} to look for the regexp
The following program, @file{awksed.awk}, accepts at least two command-line
arguments: the pattern to look for and the text to replace it with. Any
-additional arguments are treated as data file names to process. If none
+additional arguments are treated as @value{DF} names to process. If none
are provided, the standard input is used:
@cindex Brennan, Michael
@@ -24423,33 +25626,13 @@ The @code{BEGIN} rule handles the setup, checking for the right number
of arguments and calling @code{usage()} if there is a problem. Then it sets
@code{RS} and @code{ORS} from the command-line arguments and sets
@code{ARGV[1]} and @code{ARGV[2]} to the null string, so that they are
-not treated as file names
+not treated as @value{FN}s
(@pxref{ARGC and ARGV}).
The @code{usage()} function prints an error message and exits.
Finally, the single rule handles the printing scheme outlined above,
using @code{print} or @code{printf} as appropriate, depending upon the
value of @code{RT}.
-
-@ignore
-Exercise, compare the performance of this version with the more
-straightforward:
-
-BEGIN {
- pat = ARGV[1]
- repl = ARGV[2]
- ARGV[1] = ARGV[2] = ""
-}
-
-{ gsub(pat, repl); print }
-
-Exercise: what are the advantages and disadvantages of this version versus sed?
- Advantage: egrep regexps
- speed (?)
- Disadvantage: no & in replacement text
-
-Others?
-@end ignore
@c ENDOFRANGE awksed
@node Igawk Program
@@ -24492,7 +25675,7 @@ BEGIN @{
The following program, @file{igawk.sh}, provides this service.
It simulates @command{gawk}'s searching of the @env{AWKPATH} variable
and also allows @dfn{nested} includes; i.e., a file that is included
-with @samp{@@include} can contain further @samp{@@include} statements.
+with @code{@@include} can contain further @code{@@include} statements.
@command{igawk} makes an effort to only include files once, so that nested
includes don't accidentally include a library function twice.
@@ -24518,11 +25701,11 @@ a shell variable that will be expanded. There are two cases:
@enumerate a
@item
-Literal text, provided with @option{--source} or @option{--source=}. This
+Literal text, provided with @option{-e} or @option{--source}. This
text is just appended directly.
@item
-Source file names, provided with @option{-f}. We use a neat trick and append
+Source @value{FN}s, provided with @option{-f}. We use a neat trick and append
@samp{@@include @var{filename}} to the shell variable's contents. Since the file-inclusion
program works the way @command{gawk} does, this gets the text
of the file included into the program at the correct point.
@@ -24530,12 +25713,12 @@ of the file included into the program at the correct point.
@item
Run an @command{awk} program (naturally) over the shell variable's contents to expand
-@samp{@@include} statements. The expanded program is placed in a second
+@code{@@include} statements. The expanded program is placed in a second
shell variable.
@item
Run the expanded program with @command{gawk} and any other original command-line
-arguments that the user supplied (such as the data file names).
+arguments that the user supplied (such as the @value{DF} names).
@end enumerate
This program uses shell variables extensively: for storing command-line arguments,
@@ -24550,24 +25733,25 @@ argument is @samp{debug}.
The next part loops through all the command-line arguments.
There are several cases of interest:
-@table @code
-@item --
+@c @asis for docbook
+@table @asis
+@item @option{--}
This ends the arguments to @command{igawk}. Anything else should be passed on
to the user's @command{awk} program without being evaluated.
-@item -W
+@item @option{-W}
This indicates that the next option is specific to @command{gawk}. To make
argument processing easier, the @option{-W} is appended to the front of the
remaining arguments and the loop continues. (This is an @command{sh}
programming trick. Don't worry about it if you are not familiar with
@command{sh}.)
-@item -v@r{,} -F
+@item @option{-v}, @option{-F}
These are saved and passed on to @command{gawk}.
-@item -f@r{,} --file@r{,} --file=@r{,} -Wfile=
-The file name is appended to the shell variable @code{program} with an
-@samp{@@include} statement.
+@item @option{-f}, @option{--file}, @option{--file=}, @option{-Wfile=}
+The @value{FN} is appended to the shell variable @code{program} with an
+@code{@@include} statement.
The @command{expr} utility is used to remove the leading option part of the
argument (e.g., @samp{--file=}).
(Typical @command{sh} usage would be to use the @command{echo} and @command{sed}
@@ -24575,10 +25759,10 @@ utilities to do this work. Unfortunately, some versions of @command{echo} evalu
escape sequences in their arguments, possibly mangling the program text.
Using @command{expr} avoids this problem.)
-@item --source@r{,} --source=@r{,} -Wsource=
+@item @option{--source}, @option{--source=}, @option{-Wsource=}
The source text is appended to @code{program}.
-@item --version@r{,} -Wversion
+@item @option{--version}, @option{-Wversion}
@command{igawk} prints its version number, runs @samp{gawk --version}
to get the @command{gawk} version information, and then exits.
@end table
@@ -24686,15 +25870,15 @@ fi
@c endfile
@end example
-The @command{awk} program to process @samp{@@include} directives
+The @command{awk} program to process @code{@@include} directives
is stored in the shell variable @code{expand_prog}. Doing this keeps
the shell script readable. The @command{awk} program
reads through the user's program, one line at a time, using @code{getline}
(@pxref{Getline}). The input
-file names and @samp{@@include} statements are managed using a stack.
-As each @samp{@@include} is encountered, the current file name is
-``pushed'' onto the stack and the file named in the @samp{@@include}
-directive becomes the current file name. As each file is finished,
+@value{FN}s and @code{@@include} statements are managed using a stack.
+As each @code{@@include} is encountered, the current @value{FN} is
+``pushed'' onto the stack and the file named in the @code{@@include}
+directive becomes the current @value{FN}. As each file is finished,
the stack is ``popped,'' and the previous input file becomes the current
input file again. The process is started by making the original file
the first one on the stack.
@@ -24703,16 +25887,16 @@ The @code{pathto()} function does the work of finding the full path to
a file. It simulates @command{gawk}'s behavior when searching the
@env{AWKPATH} environment variable
(@pxref{AWKPATH Variable}).
-If a file name has a @samp{/} in it, no path search is done.
-Similarly, if the file name is @code{"-"}, then that string is
+If a @value{FN} has a @samp{/} in it, no path search is done.
+Similarly, if the @value{FN} is @code{"-"}, then that string is
used as-is. Otherwise,
-the file name is concatenated with the name of each directory in
-the path, and an attempt is made to open the generated file name.
+the @value{FN} is concatenated with the name of each directory in
+the path, and an attempt is made to open the generated @value{FN}.
The only way to test if a file can be read in @command{awk} is to go
ahead and try to read it with @code{getline}; this is what @code{pathto()}
does.@footnote{On some very old versions of @command{awk}, the test
@samp{getline junk < t} can loop forever if the file exists but is empty.
-Caveat emptor.} If the file can be read, it is closed and the file name
+Caveat emptor.} If the file can be read, it is closed and the @value{FN}
is returned:
@ignore
@@ -24767,17 +25951,17 @@ BEGIN @{
@c endfile
@end example
-The stack is initialized with @code{ARGV[1]}, which will be @samp{/dev/stdin}.
+The stack is initialized with @code{ARGV[1]}, which will be @code{"/dev/stdin"}.
The main loop comes next. Input lines are read in succession. Lines that
-do not start with @samp{@@include} are printed verbatim.
-If the line does start with @samp{@@include}, the file name is in @code{$2}.
+do not start with @code{@@include} are printed verbatim.
+If the line does start with @code{@@include}, the @value{FN} is in @code{$2}.
@code{pathto()} is called to generate the full path. If it cannot, then the program
prints an error message and continues.
The next thing to check is if the file is included already. The
-@code{processed} array is indexed by the full file name of each included
+@code{processed} array is indexed by the full @value{FN} of each included
file and it tracks this information for us. If the file is
-seen again, a warning message is printed. Otherwise, the new file name is
+seen again, a warning message is printed. Otherwise, the new @value{FN} is
pushed onto the stack and processing continues.
Finally, when @code{getline} encounters the end of the input file, the file
@@ -24798,7 +25982,7 @@ the program is done:
fpath = pathto($2)
@group
if (fpath == "") @{
- printf("igawk:%s:%d: cannot find %s\n",
+ printf("igawk: %s:%d: cannot find %s\n",
input[stackptr], FNR, $2) > "/dev/stderr"
continue
@}
@@ -24838,7 +26022,7 @@ It's done in these steps:
@enumerate
@item
-Run @command{gawk} with the @samp{@@include}-processing program (the
+Run @command{gawk} with the @code{@@include}-processing program (the
value of the @code{expand_prog} shell variable) on standard input.
@item
@@ -24855,14 +26039,14 @@ options and command-line arguments that the user supplied.
@c this causes more problems than it solves, so leave it out.
@ignore
-The special file @file{/dev/null} is passed as a data file to @command{gawk}
+The special file @file{/dev/null} is passed as a @value{DF} to @command{gawk}
to handle an interesting case. Suppose that the user's program only has
-a @code{BEGIN} rule and there are no data files to read.
-The program should exit without reading any data files.
+a @code{BEGIN} rule and there are no @value{DF}s to read.
+The program should exit without reading any @value{DF}s.
However, suppose that an included library file defines an @code{END}
rule of its own. In this case, @command{gawk} will hang, reading standard
input. In order to avoid this, @file{/dev/null} is explicitly added to the
-command-line. Reading from @file{/dev/null} always returns an immediate
+command line. Reading from @file{/dev/null} always returns an immediate
end of file indication.
@c Hmm. Add /dev/null if $# is 0? Still messes up ARGV. Sigh.
@@ -24877,27 +26061,25 @@ eval gawk $opts -- '"$processed_program"' '"$@@"'
The @command{eval} command is a shell construct that reruns the shell's parsing
process. This keeps things properly quoted.
-This version of @command{igawk} represents my fifth version of this program.
+This version of @command{igawk} represents the fifth version of this program.
There are four key simplifications that make the program work better:
-@itemize @bullet
+@itemize @value{BULLET}
@item
-Using @samp{@@include} even for the files named with @option{-f} makes building
+Using @code{@@include} even for the files named with @option{-f} makes building
the initial collected @command{awk} program much simpler; all the
-@samp{@@include} processing can be done once.
+@code{@@include} processing can be done once.
@item
Not trying to save the line read with @code{getline}
in the @code{pathto()} function when testing for the
file's accessibility for use with the main program simplifies things
considerably.
-@c what problem does this engender though - exercise
-@c answer, reading from "-" or /dev/stdin
@item
Using a @code{getline} loop in the @code{BEGIN} rule does it all in one
place. It is not necessary to call out to a separate loop for processing
-nested @samp{@@include} statements.
+nested @code{@@include} statements.
@item
Instead of saving the expanded program in a temporary file, putting it in a shell variable
@@ -24917,40 +26099,9 @@ Finally, @command{igawk} shows that it is not always necessary to add new
features to a program; they can often be layered on top.
@ignore
With @command{igawk},
-there is no real reason to build @samp{@@include} processing into
+there is no real reason to build @code{@@include} processing into
@command{gawk} itself.
@end ignore
-
-@cindex search paths
-@cindex search paths, for source files
-@cindex source files@comma{} search path for
-@cindex files, source@comma{} search path for
-@cindex directories, searching
-As an additional example of this, consider the idea of having two
-files in a directory in the search path:
-
-@table @file
-@item default.awk
-This file contains a set of default library functions, such
-as @code{getopt()} and @code{assert()}.
-
-@item site.awk
-This file contains library functions that are specific to a site or
-installation; i.e., locally developed functions.
-Having a separate file allows @file{default.awk} to change with
-new @command{gawk} releases, without requiring the system administrator to
-update it each time by adding the local functions.
-@end table
-
-One user
-@c Karl Berry, karl@ileaf.com, 10/95
-suggested that @command{gawk} be modified to automatically read these files
-upon startup. Instead, it would be very simple to modify @command{igawk}
-to do this. Since @command{igawk} can process nested @samp{@@include}
-directives, @file{default.awk} could simply contain @samp{@@include}
-statements for the desired library functions.
-
-@c Exercise: make this change
@c ENDOFRANGE libfex
@c ENDOFRANGE flibex
@c ENDOFRANGE awkpex
@@ -25087,6 +26238,7 @@ babels beslab
babery yabber
@dots{}
@end example
+
@c ENDOFRANGE anagram
@node Signature Program
@@ -25118,7 +26270,10 @@ X*(X-x)-o*o,(x+X)*o*o+o,x*(X-x)-O-O,x-O+(O+o+X+x)*(o+O),X*X-X*(x-O)-x+O,
O+X*(o*(o+O)+O),+x+O+X*o,x*(x-o),(o+X+x)*o*o-(x-O-O),O+(X-x)*(X+O),x-O@}'
@end example
-We leave it to you to determine what the program does.
+@cindex Johansen, Chris
+We leave it to you to determine what the program does. (If you are
+truly desperate to understand it, see Chris Johansen's explanation,
+which is embedded in the Texinfo source file for this @value{DOCUMENT}.)
@ignore
To: "Arnold Robbins" <arnold@skeeve.com>
@@ -25198,19 +26353,193 @@ BEGIN {
}
@end ignore
-@iftex
-@part Part III:@* Moving Beyond Standard @command{awk} With @command{gawk}
-@end iftex
+@node Programs Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+The functions provided in this @value{CHAPTER} and the previous one
+continue on the theme that reading programs is an excellent way to learn
+Good Programming.
+
+@item
+Using @samp{#!} to make @command{awk} programs directly runnable makes
+them easier to use. Otherwise, invoke the program using @samp{awk
+-f @dots{}}.
+
+@item
+Reimplementing standard POSIX programs in @command{awk} is a pleasant
+exercise; @command{awk}'s expressive power lets you write such programs
+in relatively few lines of code, yet they are functionally complete
+and usable.
+
+@item
+One of standard @command{awk}'s weaknesses is working with individual
+characters. The ability to use @code{split()} with the empty string as
+the separator can considerably simplify such tasks.
+
+@item
+The library functions from @ref{Library Functions}, proved their
+usefulness for a number of real (if small) programs.
+
+@item
+Besides reinventing POSIX wheels, other programs solved a selection of
+interesting problems, such as finding duplicates words in text, printing
+mailing labels, and finding anagrams.
+
+@end itemize
+
+@c EXCLUDE START
+@node Programs Exercises
+@section Exercises
+
+@enumerate
+@item
+Rewrite @file{cut.awk} (@pxref{Cut Program})
+using @code{split()} with @code{""} as the seperator.
+
+@item
+In @ref{Egrep Program}, we mentioned that @samp{egrep -i} could be
+simulated in versions of @command{awk} without @code{IGNORECASE} by
+using @code{tolower()} on the line and the pattern. In a footnote there,
+we also mentioned that this solution has a bug: the translated line is
+output, and not the original one. Fix this problem.
+@c Exercise: Fix this, w/array and new line as key to original line
+
+@item
+The POSIX version of @command{id} takes options that control which
+information is printed. Modify the @command{awk} version
+(@pxref{Id Program}) to accept the same arguments and perform in the
+same way.
+
+@item
+The @code{split.awk} program (@pxref{Split Program}) assumes
+that letters are contiguous in the character set,
+which isn't true for EBCDIC systems.
+Fix this problem.
+(Hint: Consider a different way to work through the alphabet,
+without relying on @code{ord()} and @code{chr()}.)
+
+@item
+In @file{uniq.awk} (@pxref{Uniq Program}, the
+logic for choosing which lines to print represents a @dfn{state
+machine}, which is ``a device that can be in one of a set number of stable
+conditions depending on its previous condition and on the present values
+of its inputs.''@footnote{This is the definition returned from entering
+@code{define: state machine} into Google.}
+Brian Kernighan suggests that
+``an alternative approach to state mechines is to just read
+the input into an array, then use indexing. It's almost always
+easier code, and for most inputs where you would use this, just
+as fast.'' Rewrite the logic to follow this
+suggestion.
+
+
+@item
+Why can't the @file{wc.awk} program (@pxref{Wc Program}) just
+use the value of @code{FNR} in @code{endfile()}?
+Hint: Examine the code in @ref{Filetrans Function}.
@ignore
-@ifdocbook
+@command{wc} can't just use the value of @code{FNR} in
+@code{endfile()}. If you examine the code in @ref{Filetrans Function},
+you will see that @code{FNR} has already been reset by the time
+@code{endfile()} is called.
+@end ignore
+
+@item
+Manipulation of individual characters in the @command{translate} program
+(@pxref{Translate Program}) is painful using standard @command{awk}
+functions. Given that @command{gawk} can split strings into individual
+characters using @code{""} as the separator, how might you use this
+feature to simplify the program?
+
+@item
+The @file{extract.awk} program (@pxref{Extract Program}) was written
+before @command{gawk} had the @code{gensub()} function. Use it
+to simplify the code.
+
+@item
+Compare the performance of the @file{awksed.awk} program
+(@pxref{Simple Sed}) with the more straightforward:
+
+@example
+BEGIN @{
+ pat = ARGV[1]
+ repl = ARGV[2]
+ ARGV[1] = ARGV[2] = ""
+@}
-@part Part III:@* Moving Beyond Standard @command{awk} With @command{gawk}
+@{ gsub(pat, repl); print @}
+@end example
+
+@item
+What are the advantages and disadvantages of @file{awksed.awk} versus
+the real @command{sed} utility?
+
+@ignore
+ Advantage: egrep regexps
+ speed (?)
+ Disadvantage: no & in replacement text
+
+Others?
+@end ignore
+
+@item
+In @ref{Igawk Program}, we mentioned that not trying to save the line
+read with @code{getline} in the @code{pathto()} function when testing
+for the file's accessibility for use with the main program simplifies
+things considerably. What problem does this engender though?
+@c answer, reading from "-" or /dev/stdin
+
+@cindex search paths
+@cindex search paths, for source files
+@cindex source files@comma{} search path for
+@cindex files, source@comma{} search path for
+@cindex directories, searching
+@item
+As an additional example of the idea that it is not always necessary to
+add new features to a program, consider the idea of having two files in
+a directory in the search path:
+@table @file
+@item default.awk
+This file contains a set of default library functions, such
+as @code{getopt()} and @code{assert()}.
+
+@item site.awk
+This file contains library functions that are specific to a site or
+installation; i.e., locally developed functions.
+Having a separate file allows @file{default.awk} to change with
+new @command{gawk} releases, without requiring the system administrator to
+update it each time by adding the local functions.
+@end table
+
+One user
+@c Karl Berry, karl@ileaf.com, 10/95
+suggested that @command{gawk} be modified to automatically read these files
+upon startup. Instead, it would be very simple to modify @command{igawk}
+to do this. Since @command{igawk} can process nested @code{@@include}
+directives, @file{default.awk} could simply contain @code{@@include}
+statements for the desired library functions.
+Make this change.
+
+@item
+Modify @file{anagram.awk} (@pxref{Anagram Program}), to avoid
+the use of the external @command{sort} utility.
+
+@end enumerate
+@c EXCLUDE END
+
+@ifnotinfo
+@part @value{PART3}Moving Beyond Standard @command{awk} With @command{gawk}
+@end ifnotinfo
+
+@ifdocbook
Part III focuses on features specific to @command{gawk}.
It contains the following chapters:
-@itemize @bullet
+@itemize @value{BULLET}
@item
@ref{Advanced Features}.
@@ -25227,13 +26556,9 @@ It contains the following chapters:
@ref{Dynamic Extensions}.
@end itemize
@end ifdocbook
-@end ignore
@node Advanced Features
@chapter Advanced Features of @command{gawk}
-@ifset WITH_NETWORK_CHAPTER
-@cindex advanced features, network connections, See Also networks@comma{} connections
-@end ifset
@c STARTOFRANGE gawadv
@cindex @command{gawk}, features, advanced
@c STARTOFRANGE advgaw
@@ -25246,6 +26571,8 @@ Contributed by: Peter Langston <pud!psl@bellcore.bellcore.com>
"Write documentation as if whoever reads it is a violent psychopath
who knows where you live."
@end ignore
+@cindex Langston, Peter
+@cindex English, Steve
@quotation
@i{Write documentation as if whoever reads it is
a violent psychopath who knows where you live.}
@@ -25265,10 +26592,11 @@ of TCP/IP networking. Finally, @command{gawk}
can @dfn{profile} an @command{awk} program, making it possible to tune
it for performance.
+@c FULLXREF ON
A number of advanced features require separate @value{CHAPTER}s of their
own:
-@itemize @bullet
+@itemize @value{BULLET}
@item
@ref{Internationalization}, discusses how to internationalize
your @command{awk} programs, so that they can speak multiple
@@ -25287,6 +26615,7 @@ debugger for debugging @command{awk} programs.
discusses the ability to dynamically add new built-in functions to
@command{gawk}.
@end itemize
+@c FULLXREF OFF
@menu
* Nondecimal Data:: Allowing nondecimal input data.
@@ -25295,6 +26624,7 @@ discusses the ability to dynamically add new built-in functions to
* Two-way I/O:: Two-way communications with another process.
* TCP/IP Networking:: Using @command{gawk} for network programming.
* Profiling:: Profiling your @command{awk} programs.
+* Advanced Features Summary:: Summary of advanced features.
@end menu
@node Nondecimal Data
@@ -25327,7 +26657,7 @@ $ @kbd{echo 0123 123 0x123 | gawk '@{ print $1, $2, $3 @}'}
The @code{print} statement treats its expressions as strings.
Although the fields can act as numbers when necessary,
they are still strings, so @code{print} does not try to treat them
-numerically. You may need to add zero to a field to force it to
+numerically. You need to add zero to a field to force it to
be treated as a number. For example:
@example
@@ -25349,7 +26679,7 @@ disabled. If you want it, you must explicitly request it.
@emph{Use of this option is not recommended.}
It can break old programs very badly.
Instead, use the @code{strtonum()} function to convert your data
-(@pxref{Nondecimal-numbers}).
+(@pxref{String Functions}).
This makes your programs easier to write and easier to read, and
leads to less surprising results.
@end quotation
@@ -25381,9 +26711,9 @@ Often, though, it is desirable to be able to loop over the elements
in a particular order that you, the programmer, choose. @command{gawk}
lets you do this.
-@ref{Controlling Scanning}, describes how you can assign special,
+@DBREF{Controlling Scanning} describes how you can assign special,
pre-defined values to @code{PROCINFO["sorted_in"]} in order to
-control the order in which @command{gawk} will traverse an array
+control the order in which @command{gawk} traverses an array
during a @code{for} loop.
In addition, the value of @code{PROCINFO["sorted_in"]} can be a function name.
@@ -25707,9 +27037,9 @@ END @{
So far, so good. Now it starts to get interesting. Both @code{asort()}
and @code{asorti()} accept a third string argument to control comparison
-of array elements. In @ref{String Functions}, we ignored this third
-argument; however, the time has now come to describe how this argument
-affects these two functions.
+of array elements. When we introduced @code{asort()} and @code{asorti()}
+in @ref{String Functions}, we ignored this third argument; however,
+now is the time to describe how this argument affects these two functions.
Basically, the third argument specifies how the array is to be sorted.
There are two possibilities. As with @code{PROCINFO["sorted_in"]},
@@ -25750,6 +27080,9 @@ Caveat Emptor.
@node Two-way I/O
@section Two-Way Communications with Another Process
+
+@c 8/2014. Neither Mike nor BWK saw this as relevant. Commenting it out.
+@ignore
@cindex Brennan, Michael
@cindex programmers, attractiveness of
@smallexample
@@ -25779,6 +27112,7 @@ the scent of perl programmers.
Mike Brennan
@c brennan@@whidbey.com
@end smallexample
+@end ignore
@cindex advanced features, processes@comma{} communicating with
@cindex processes, two-way communications with
@@ -25805,7 +27139,10 @@ system("rm " tempfile)
This works, but not elegantly. Among other things, it requires that
the program be run in a directory that cannot be shared among users;
for example, @file{/tmp} will not do, as another user might happen
-to be using a temporary file with the same name.
+to be using a temporary file with the same name.@footnote{Michael
+Brennan suggests the use of @command{rand()} to generate unique
+@value{FN}s. This is a valid point; nevertheless, temporary files
+remain more difficult than two-way pipes.} @c 8/2014
@cindex coprocesses
@cindex input/output, two-way
@@ -25839,7 +27176,7 @@ the shell.
There are some cautionary items to be aware of:
-@itemize @bullet
+@itemize @value{BULLET}
@item
As the code inside @command{gawk} currently stands, the coprocess's
standard error goes to the same place that the parent @command{gawk}'s
@@ -25905,6 +27242,7 @@ has been read, @command{gawk} terminates the coprocess and exits.
As a side note, the assignment @samp{LC_ALL=C} in the @command{sort}
command ensures traditional Unix (ASCII) sorting from @command{sort}.
+This is not strictly necessary here, but it's good to know how to do this.
@cindex @command{gawk}, @code{PROCINFO} array in
@cindex @code{PROCINFO} array, and communications via ptys
@@ -25923,7 +27261,7 @@ print @dots{} |& command # start two-way pipe
@end example
@noindent
-Using ptys avoids the buffer deadlock issues described earlier, at some
+Using ptys usually avoids the buffer deadlock issues described earlier, at some
loss in performance. If your system does not have ptys, or if all the
system's ptys are in use, @command{gawk} automatically falls back to
using regular pipes.
@@ -25958,10 +27296,10 @@ another process on another system across an IP network connection.
You can think of this as just a @emph{very long} two-way pipeline to
a coprocess.
The way @command{gawk} decides that you want to use TCP/IP networking is
-by recognizing special file names that begin with one of @samp{/inet/},
-@samp{/inet4/} or @samp{/inet6}.
+by recognizing special @value{FN}s that begin with one of @samp{/inet/},
+@samp{/inet4/} or @samp{/inet6/}.
-The full syntax of the special file name is
+The full syntax of the special @value{FN} is
@file{/@var{net-type}/@var{protocol}/@var{local-port}/@var{remote-host}/@var{remote-port}}.
The components are:
@@ -26027,7 +27365,9 @@ See
@inforef{Top, , General Introduction, gawkinet, TCP/IP Internetworking with @command{gawk}},
@end ifinfo
@ifnotinfo
-See @cite{TCP/IP Internetworking with @command{gawk}},
+See
+@uref{http://www.gnu.org/software/gawk/manual/gawkinet/,
+@cite{TCP/IP Internetworking with @command{gawk}}},
which comes as part of the @command{gawk} distribution,
@end ifnotinfo
for a much more complete introduction and discussion, as well as
@@ -26164,7 +27504,7 @@ in the morning to work.)
This example illustrates many of the basic features of profiling output.
They are as follows:
-@itemize @bullet
+@itemize @value{BULLET}
@item
The program is printed in the order @code{BEGIN} rules,
@code{BEGINFILE} rules,
@@ -26223,7 +27563,6 @@ the body of an @code{if}, @code{else}, or loop is only a single statement.
@item
Parentheses are used only where needed, as indicated by the structure
of the program and the precedence rules.
-@c extra verbiage here satisfies the copyeditor. ugh.
For example, @samp{(3 + 5) * 4} means add three plus five, then multiply
the total by four. However, @samp{3 + 5 * 4} has no parentheses, and
means @samp{3 + (5 * 4)}.
@@ -26306,7 +27645,7 @@ As usual, the profiled version of the program is written to
@file{awkprof.out}, or to a different file if one specified with
the @option{--profile} option.
-Along with the regular profile, as shown earlier, the profile
+Along with the regular profile, as shown earlier, the profile file
includes a trace of any active functions:
@example
@@ -26348,14 +27687,59 @@ When called this way, @command{gawk} ``pretty prints'' the program into
@file{awkprof.out}, without any execution counts.
@quotation NOTE
-The @option{--pretty-print} option still runs your program.
-This will change in the next major release.
+Once upon a time, the @option{--pretty-print} option would also run
+your program. This is is no longer the case.
@end quotation
-@c ENDOFRANGE advgaw
-@c ENDOFRANGE gawadv
@c ENDOFRANGE awkp
@c ENDOFRANGE proawk
+@node Advanced Features Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+The @option{--non-decimal-data} option causes @command{gawk} to treat
+octal- and hexadecimal-looking input data as octal and hexadecimal.
+This option should be used with caution or not at all; use of @code{strtonum()}
+is preferable.
+
+@item
+You can take over complete control of sorting in @samp{for (@var{indx} in @var{array})}
+array traversal by setting @code{PROCINFO["sorted_in"]} to the name of a user-defined
+function that does the comparison of array elements based on index and value.
+
+@item
+Similarly, you can supply the name of a user-defined comparison function as the
+third argument to either @code{asort()} or @command{asorti()} to control how
+those functions sort arrays. Or you may provide one of the predefined control
+strings that work for @code{PROCINFO["sorted_in"]}.
+
+@item
+You can use the @samp{|&} operator to create a two-way pipe to a co-process.
+You read from the co-process with @code{getline} and write to it with @code{print}
+or @code{printf}. Use @code{close()} to close off the co-process completely, or
+optionally, close off one side of the two-way communications.
+
+@item
+By using special ``@value{FN}s'' with the @samp{|&} operator, you can open a
+TCP/IP (or UDP/IP) connection to remote hosts in the Internet. @command{gawk}
+supports both IPv4 an IPv6.
+
+@item
+You can generate statement count profiles of your program. This can help you
+determine which parts of your program may be taking the most time and let
+you tune them more easily. Sending the @code{USR1} signal while profiling causes
+@command{gawk} to dump the profile and keep going, including a function call stack.
+
+@item
+You can also just ``pretty print'' the program. This currently also runs
+the program, but that will change in the next major release.
+
+@end itemize
+
+@c ENDOFRANGE advgaw
+@c ENDOFRANGE gawadv
+
@node Internationalization
@chapter Internationalization with @command{gawk}
@@ -26384,11 +27768,12 @@ a requirement.
@menu
* I18N and L10N:: Internationalization and Localization.
-* Explaining gettext:: How GNU @code{gettext} works.
+* Explaining gettext:: How GNU @command{gettext} works.
* Programmer i18n:: Features for the programmer.
* Translator i18n:: Features for the translator.
* I18N Example:: A simple i18n example.
* Gawk I18N:: @command{gawk} is also internationalized.
+* I18N Summary:: Summary of I18N stuff.
@end menu
@node I18N and L10N
@@ -26408,20 +27793,22 @@ responses, and information related to how numerical and
monetary values are printed and read.
@node Explaining gettext
-@section GNU @code{gettext}
+@section GNU @command{gettext}
@cindex internationalizing a program
@c STARTOFRANGE gettex
-@cindex @code{gettext} library
-The facilities in GNU @code{gettext} focus on messages; strings printed
+@cindex @command{gettext} library
+@command{gawk} uses GNU @command{gettext} to provide its internationalization
+features.
+The facilities in GNU @command{gettext} focus on messages; strings printed
by a program, either directly or via formatting with @code{printf} or
@code{sprintf()}.@footnote{For some operating systems, the @command{gawk}
-port doesn't support GNU @code{gettext}.
+port doesn't support GNU @command{gettext}.
Therefore, these features are not available
if you are using one of those operating systems. Sorry.}
-@cindex portability, @code{gettext} library and
-When using GNU @code{gettext}, each application has its own
+@cindex portability, @command{gettext} library and
+When using GNU @command{gettext}, each application has its own
@dfn{text domain}. This is a unique name, such as @samp{kpilot} or @samp{gawk},
that identifies the application.
A complete application may have multiple components---programs written
@@ -26445,7 +27832,7 @@ language).
@cindex @code{textdomain()} function (C library)
@item
The programmer indicates the application's text domain
-(@code{"guide"}) to the @code{gettext} library,
+(@command{"guide"}) to the @command{gettext} library,
by calling the @code{textdomain()} function.
@cindex @code{.pot} files
@@ -26489,7 +27876,7 @@ are installed in a standard place.
@cindex @code{bindtextdomain()} function (C library)
@item
-For testing and development, it is possible to tell @code{gettext}
+For testing and development, it is possible to tell @command{gettext}
to use @file{.gmo} files in a different directory than the standard
one by using the @code{bindtextdomain()} function.
@@ -26522,7 +27909,7 @@ strings enclosed in calls to @code{gettext()}.
@cindex @code{_} (underscore), C macro
@cindex underscore (@code{_}), C macro
-The GNU @code{gettext} developers, recognizing that typing
+The GNU @command{gettext} developers, recognizing that typing
@samp{gettext(@dots{})} over and over again is both painful and ugly to look
at, use the macro @samp{_} (an underscore) to make things easier:
@@ -26535,7 +27922,7 @@ printf("%s", _("Don't Panic!\n"));
@end example
@cindex internationalization, localization, locale categories
-@cindex @code{gettext} library, locale categories
+@cindex @command{gettext} library, locale categories
@cindex locale categories
@noindent
This reduces the typing overhead to just three extra characters per string
@@ -26543,12 +27930,12 @@ and is considerably easier to read as well.
There are locale @dfn{categories}
for different types of locale-related information.
-The defined locale categories that @code{gettext} knows about are:
+The defined locale categories that @command{gettext} knows about are:
@table @code
@cindex @code{LC_MESSAGES} locale category
@item LC_MESSAGES
-Text messages. This is the default category for @code{gettext}
+Text messages. This is the default category for @command{gettext}
operations, but it is possible to supply a different one explicitly,
if necessary. (It is almost never necessary to supply a different category.)
@@ -26561,7 +27948,16 @@ and/or groups of characters sort in a given language.
@cindex @code{LC_CTYPE} locale category
@item LC_CTYPE
Character-type information (alphabetic, digit, upper- or lowercase, and
-so on).
+so on) as well as character encoding.
+@ignore
+In June 2001 Bruno Haible wrote:
+- Description of LC_CTYPE: It determines both
+ 1. character encoding,
+ 2. character type information.
+ (For example, in both KOI8-R and ISO-8859-5 the character type information
+ is the same - cyrillic letters could as 'alpha' - but the encoding is
+ different.)
+@end ignore
This information is accessed via the
POSIX character classes in regular expressions,
such as @code{/[[:alnum:]]/}
@@ -26582,11 +27978,6 @@ use a comma every three decimal places and a period for the decimal
point, while many Europeans do exactly the opposite:
1,234.56 versus 1.234,56.}
-@cindex @code{LC_RESPONSE} locale category
-@item LC_RESPONSE
-Response information, such as how ``yes'' and ``no'' appear in the
-local language, and possibly other information as well.
-
@cindex time, localization and
@cindex dates, information related to@comma{} localization
@cindex @code{LC_TIME} locale category
@@ -26596,7 +27987,7 @@ before or after the day in a date, local month abbreviations, and so on.
@cindex @code{LC_ALL} locale category
@item LC_ALL
-All of the above. (Not too useful in the context of @code{gettext}.)
+All of the above. (Not too useful in the context of @command{gettext}.)
@end table
@c ENDOFRANGE gettex
@@ -26612,7 +28003,7 @@ internationalization:
@cindex @code{TEXTDOMAIN} variable
@item TEXTDOMAIN
This variable indicates the application's text domain.
-For compatibility with GNU @code{gettext}, the default
+For compatibility with GNU @command{gettext}, the default
value is @code{"messages"}.
@cindex internationalization, localization, marked strings
@@ -26623,7 +28014,7 @@ are candidates for translation at runtime.
String constants without a leading underscore are not translated.
@cindexgawkfunc{dcgettext}
-@item dcgettext(@var{string} @r{[}, @var{domain} @r{[}, @var{category}@r{]]})
+@item @code{dcgettext(@var{string}} [@code{,} @var{domain} [@code{,} @var{category}]]@code{)}
Return the translation of @var{string} in
text domain @var{domain} for locale category @var{category}.
The default value for @var{domain} is the current value of @code{TEXTDOMAIN}.
@@ -26649,7 +28040,7 @@ default arguments.
@end quotation
@cindexgawkfunc{dcngettext}
-@item dcngettext(@var{string1}, @var{string2}, @var{number} @r{[}, @var{domain} @r{[}, @var{category}@r{]]})
+@item @code{dcngettext(@var{string1}, @var{string2}, @var{number}} [@code{,} @var{domain} [@code{,} @var{category}]]@code{)}
Return the plural form used for @var{number} of the
translation of @var{string1} and @var{string2} in text domain
@var{domain} for locale category @var{category}. @var{string1} is the
@@ -26665,9 +28056,9 @@ The same remarks about argument order as for the @code{dcgettext()} function app
@cindex message object files, specifying directory of
@cindex files, message object, specifying directory of
@cindexgawkfunc{bindtextdomain}
-@item bindtextdomain(@var{directory} @r{[}, @var{domain}@r{]})
+@item @code{bindtextdomain(@var{directory}} [@code{,} @var{domain} ]@code{)}
Change the directory in which
-@code{gettext} looks for @file{.gmo} files, in case they
+@command{gettext} looks for @file{.gmo} files, in case they
will not or cannot be placed in the standard locations
(e.g., during testing).
Return the directory in which @var{domain} is ``bound.''
@@ -26721,18 +28112,33 @@ printf(_"Number of users is %d\n", nusers)
@item
If you are creating strings dynamically, you can
still translate them, using the @code{dcgettext()}
-built-in function:
+built-in function:@footnote{Thanks to Bruno Haible for this
+example.}
@example
-message = nusers " users logged in"
-message = dcgettext(message, "adminprog")
-print message
+if (groggy)
+ message = dcgettext("%d customers disturbing me\n", "adminprog")
+else
+ message = dcgettext("enjoying %d customers\n", "adminprog")
+printf(message, ncustomers)
@end example
Here, the call to @code{dcgettext()} supplies a different
text domain (@code{"adminprog"}) in which to find the
message, but it uses the default @code{"LC_MESSAGES"} category.
+The previous example only works if @code{ncustomers} is greater than one.
+This example would be better done with @code{dcngettext()}:
+
+@example
+if (groggy)
+ message = dcngettext("%d customer disturbing me\n", "%d customers disturbing me\n", "adminprog")
+else
+ message = dcngettext("enjoying %d customer\n", "enjoying %d customers\n", "adminprog")
+printf(message, ncustomers)
+@end example
+
+
@cindex @code{LC_MESSAGES} locale category, @code{bindtextdomain()} function (@command{gawk})
@item
During development, you might want to put the @file{.gmo}
@@ -26806,12 +28212,15 @@ $ @kbd{gawk --gen-pot -f guide.awk > guide.pot}
@cindex @code{xgettext} utility
When run with @option{--gen-pot}, @command{gawk} does not execute your
program. Instead, it parses it as usual and prints all marked strings
-to standard output in the format of a GNU @code{gettext} Portable Object
+to standard output in the format of a GNU @command{gettext} Portable Object
file. Also included in the output are any constant strings that
appear as the first argument to @code{dcgettext()} or as the first and
second argument to @code{dcngettext()}.@footnote{The
@command{xgettext} utility that comes with GNU
-@code{gettext} can handle @file{.awk} files.}
+@command{gettext} can handle @file{.awk} files.}
+You should distribute the generated @file{.pot} file with
+your @command{awk} program; translators will eventually use it
+to provide you translations that you can also then distribute.
@xref{I18N Example},
for the full list of steps to go through to create and test
translations for @command{guide}.
@@ -26827,9 +28236,8 @@ Format strings for @code{printf} and @code{sprintf()}
(@pxref{Printf})
present a special problem for translation.
Consider the following:@footnote{This example is borrowed
-from the GNU @code{gettext} manual.}
+from the GNU @command{gettext} manual.}
-@c line broken here only for smallbook format
@example
printf(_"String `%s' has %d characters\n",
string, length(string)))
@@ -26937,7 +28345,7 @@ As written, it won't work on other versions of @command{awk}.
However, it is actually almost portable, requiring very little
change:
-@itemize @bullet
+@itemize @value{BULLET}
@cindex @code{TEXTDOMAIN} variable, portability and
@item
Assignments to @code{TEXTDOMAIN} won't have any effect,
@@ -27077,33 +28485,33 @@ msgstr "Like, the scoop is"
@cindex Linux
@cindex GNU/Linux
The next step is to make the directory to hold the binary message object
-file and then to create the @file{guide.gmo} file.
-The directory layout shown here is standard for GNU @code{gettext} on
-GNU/Linux systems. Other versions of @code{gettext} may use a different
+file and then to create the @file{guide.mo} file.
+We pretend that our file is to be used in the @code{en_US.UTF-8} locale.
+The directory layout shown here is standard for GNU @command{gettext} on
+GNU/Linux systems. Other versions of @command{gettext} may use a different
layout:
@example
-$ @kbd{mkdir en_US en_US/LC_MESSAGES}
+$ @kbd{mkdir en_US.UTF-8 en_US.UTF-8/LC_MESSAGES}
@end example
-@cindex @code{.po} files, converting to @code{.gmo}
-@cindex files, @code{.po}, converting to @code{.gmo}
-@cindex @code{.gmo} files, converting from @code{.po}
-@cindex files, @code{.gmo}, converting from @code{.po}
+@cindex @code{.po} files, converting to @code{.mo}
+@cindex files, @code{.po}, converting to @code{.mo}
+@cindex @code{.mo} files, converting from @code{.po}
+@cindex files, @code{.mo}, converting from @code{.po}
@cindex portable object files, converting to message object files
@cindex files, portable object, converting to message object files
@cindex message object files, converting from portable object files
@cindex files, message object, converting from portable object files
@cindex @command{msgfmt} utility
The @command{msgfmt} utility does the conversion from human-readable
-@file{.po} file to machine-readable @file{.gmo} file.
+@file{.po} file to machine-readable @file{.mo} file.
By default, @command{msgfmt} creates a file named @file{messages}.
This file must be renamed and placed in the proper directory so that
@command{gawk} can find it:
@example
-$ @kbd{msgfmt guide-mellow.po}
-$ @kbd{mv messages en_US/LC_MESSAGES/guide.gmo}
+$ @kbd{msgfmt guide-mellow.po -o en_US.UTF-8/LC_MESSAGES/guide.mo}
@end example
Finally, we run the program to test it:
@@ -27132,30 +28540,71 @@ $ @kbd{gawk --posix -f guide.awk -f libintl.awk}
@section @command{gawk} Can Speak Your Language
@command{gawk} itself has been internationalized
-using the GNU @code{gettext} package.
-(GNU @code{gettext} is described in
+using the GNU @command{gettext} package.
+(GNU @command{gettext} is described in
complete detail in
@ifinfo
-@inforef{Top, , GNU @code{gettext} utilities, gettext, GNU gettext tools}.)
+@inforef{Top, , GNU @command{gettext} utilities, gettext, GNU gettext tools}.)
@end ifinfo
@ifnotinfo
-@cite{GNU gettext tools}.)
+@uref{http://www.gnu.org/software/gettext/manual/,
+@cite{GNU gettext tools}}.)
@end ifnotinfo
-As of this writing, the latest version of GNU @code{gettext} is
-@uref{ftp://ftp.gnu.org/gnu/gettext/gettext-0.18.2.1.tar.gz, version 0.18.2.1}.
+As of this writing, the latest version of GNU @command{gettext} is
+@uref{ftp://ftp.gnu.org/gnu/gettext/gettext-0.19.1.tar.gz,
+@value{PVERSION} 0.19.1}.
If a translation of @command{gawk}'s messages exists,
then @command{gawk} produces usage messages, warnings,
and fatal errors in the local language.
-@c ENDOFRANGE inloc
-@c The original text for this chapter was contributed by Efraim Yawitz.
-@c FIXME: Add more indexing.
+@node I18N Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+Internationalization means writing a program such that it can use multiple
+languages without requiring source-code changes. Localization means
+providing the data necessary for an internationalized program to work
+in a particular language.
+
+@item
+@command{gawk} uses GNU @command{gettext} to let you internationalize
+and localize @command{awk} programs. A program's text domain identifies
+the program for grouping all messages and other data together.
+
+@item
+You mark a program's strings for translation by preceding them with
+an underscore. Once that is done, the strings are extracted into a
+@file{.pot} file. This file is copied for each language into a @file{.po}
+file, and the @file{.po} files are compiled into @file{.gmo} files for
+use at runtime.
+
+@item
+You can use position specifications with @code{sprintf()} and
+@code{printf} to rearrange the placement of argument values in formatted
+strings and output. This is useful for the translations of format
+control strings.
+
+@item
+The internationalization features have been designed so that they
+can be easily worked around in a standard @command{awk}.
+
+@item
+@command{gawk} itself has been internationalized and ships with
+a number of translations for its messages.
+
+@end itemize
+
+@c ENDOFRANGE inloc
@node Debugger
@chapter Debugging @command{awk} Programs
@cindex debugging @command{awk} programs
+@c The original text for this chapter was contributed by Efraim Yawitz.
+@c FIXME: Add more indexing.
+
It would be nice if computer programs worked perfectly the first time they
were run, but in real life, this rarely happens for programs of
any complexity. Thus, most programming languages have facilities available
@@ -27172,10 +28621,11 @@ how to use @command{gawk} for debugging your program is easy.
* List of Debugger Commands:: Main debugger commands.
* Readline Support:: Readline support.
* Limitations:: Limitations and future plans.
+* Debugging Summary:: Debugging summary.
@end menu
@node Debugging
-@section Introduction to @command{gawk} Debugger
+@section Introduction to The @command{gawk} Debugger
This @value{SECTION} introduces debugging in general and begins
the discussion of debugging in @command{gawk}.
@@ -27200,7 +28650,7 @@ In that case, what can you expect from such a tool? The answer to that
depends on the language being debugged, but in general, you can expect at
least the following:
-@itemize @bullet
+@itemize @value{BULLET}
@item
The ability to watch a program execute its instructions one by one,
giving you, the programmer, the opportunity to think about what is happening
@@ -27328,7 +28778,7 @@ to debug command-line programs, only programs contained in files.)
In our case, we invoke the debugger like this:
@example
-$ @kbd{gawk -D -f getopt.awk -f join.awk -f uniq.awk inputfile}
+$ @kbd{gawk -D -f getopt.awk -f join.awk -f uniq.awk -1 inputfile}
@end example
@noindent
@@ -27390,7 +28840,7 @@ the breakpoint, use the @code{b} (breakpoint) command:
@example
gawk> @kbd{b are_equal}
-@print{} Breakpoint 1 set at file `awklib/eg/prog/uniq.awk', line 64
+@print{} Breakpoint 1 set at file `awklib/eg/prog/uniq.awk', line 63
@end example
The debugger tells us the file and line number where the breakpoint is.
@@ -27402,8 +28852,8 @@ gawk> @kbd{r}
@print{} Starting program:
@print{} Stopping in Rule ...
@print{} Breakpoint 1, are_equal(n, m, clast, cline, alast, aline)
- at `awklib/eg/prog/uniq.awk':64
-@print{} 64 if (fcount == 0 && charcount == 0)
+ at `awklib/eg/prog/uniq.awk':63
+@print{} 63 if (fcount == 0 && charcount == 0)
gawk>
@end example
@@ -27415,12 +28865,12 @@ listing of the current stack frames:
@example
gawk> @kbd{bt}
@print{} #0 are_equal(n, m, clast, cline, alast, aline)
- at `awklib/eg/prog/uniq.awk':69
-@print{} #1 in main() at `awklib/eg/prog/uniq.awk':89
+ at `awklib/eg/prog/uniq.awk':68
+@print{} #1 in main() at `awklib/eg/prog/uniq.awk':88
@end example
This tells us that @code{are_equal()} was called by the main program at
-line 89 of @file{uniq.awk}. (This is not a big surprise, since this
+line 88 of @file{uniq.awk}. (This is not a big surprise, since this
is the only call to @code{are_equal()} in the program, but in more complex
programs, knowing who called a function and with what parameters can be
the key to finding the source of the problem.)
@@ -27444,7 +28894,7 @@ A more useful variable to display might be the current record:
@example
gawk> @kbd{p $0}
-@print{} $0 = string ("gawk is a wonderful program!")
+@print{} $0 = "gawk is a wonderful program!"
@end example
@noindent
@@ -27453,7 +28903,7 @@ our test input above. Let's look at @code{NR}:
@example
gawk> @kbd{p NR}
-@print{} NR = number (2)
+@print{} NR = 2
@end example
@noindent
@@ -27472,7 +28922,7 @@ OK, let's just check that that rule worked correctly:
@example
gawk> @kbd{p last}
-@print{} last = string ("awk is a wonderful program!")
+@print{} last = "awk is a wonderful program!"
@end example
Everything we have done so far has verified that the program has worked as
@@ -27483,29 +28933,23 @@ be inside this function. To investigate further, we must begin
@example
gawk> @kbd{n}
-@print{} 67 if (fcount > 0) @{
+@print{} 66 if (fcount > 0) @{
@end example
-This tells us that @command{gawk} is now ready to execute line 67, which
+This tells us that @command{gawk} is now ready to execute line 66, which
decides whether to give the lines the special ``field skipping'' treatment
-indicated by the @option{-f} command-line option. (Notice that we skipped
-from where we were before at line 64 to here, since the condition in line 64
-
-@example
-if (fcount == 0 && charcount == 0)
-@end example
-
-@noindent
-was false.)
+indicated by the @option{-1} command-line option. (Notice that we skipped
+from where we were before at line 63 to here, since the condition in line 63
+@samp{if (fcount == 0 && charcount == 0)} was false.)
Continuing to step, we now get to the splitting of the current and
last records:
@example
gawk> @kbd{n}
-@print{} 68 n = split(last, alast)
+@print{} 67 n = split(last, alast)
gawk> @kbd{n}
-@print{} 69 m = split($0, aline)
+@print{} 68 m = split($0, aline)
@end example
At this point, we should be curious to see what our records were split
@@ -27513,10 +28957,10 @@ into, so we try to look:
@example
gawk> @kbd{p n m alast aline}
-@print{} n = number (5)
-@print{} m = number (5)
+@print{} n = 5
+@print{} m = untyped variable
@print{} alast = array, 5 elements
-@print{} aline = array, 5 elements
+@print{} aline = untyped variable
@end example
@noindent
@@ -27524,7 +28968,9 @@ gawk> @kbd{p n m alast aline}
@command{awk}'s @code{print} statement.)
This is kind of disappointing, though. All we found out is that there
-are five elements in each of our arrays. Useful enough (we now know that
+are five elements in @code{alast}; @code{m} and @code{aline} don't have
+values yet since we are at line 68 but haven't executed it yet.
+This information is useful enough (we now know that
none of the words were accidentally left out), but what if we want to see
inside the array?
@@ -27540,7 +28986,7 @@ Oops!
@example
gawk> @kbd{p alast[1]}
-@print{} alast["1"] = string ("awk")
+@print{} alast["1"] = "awk"
@end example
This would be kind of slow for a 100-member array, though, so
@@ -27549,11 +28995,11 @@ not to be mentioned):
@example
gawk> @kbd{p @@alast}
-@print{} alast["1"] = string ("awk")
-@print{} alast["2"] = string ("is")
-@print{} alast["3"] = string ("a")
-@print{} alast["4"] = string ("wonderful")
-@print{} alast["5"] = string ("program!")
+@print{} alast["1"] = "awk"
+@print{} alast["2"] = "is"
+@print{} alast["3"] = "a"
+@print{} alast["4"] = "wonderful"
+@print{} alast["5"] = "program!"
@end example
It looks like we got this far OK. Let's take another step
@@ -27561,9 +29007,9 @@ or two:
@example
gawk> @kbd{n}
-@print{} 70 clast = join(alast, fcount, n)
+@print{} 69 clast = join(alast, fcount, n)
gawk> @kbd{n}
-@print{} 71 cline = join(aline, fcount, m)
+@print{} 70 cline = join(aline, fcount, m)
@end example
Well, here we are at our error (sorry to spoil the suspense). What we
@@ -27573,8 +29019,8 @@ this would work. Let's look at what we've got:
@example
gawk> @kbd{p cline clast}
-@print{} cline = string ("gawk is a wonderful program!")
-@print{} clast = string ("awk is a wonderful program!")
+@print{} cline = "gawk is a wonderful program!"
+@print{} clast = "awk is a wonderful program!"
@end example
Hey, those look pretty familiar! They're just our original, unaltered,
@@ -27605,7 +29051,7 @@ and problem solved!
The @command{gawk} debugger command set can be divided into the
following categories:
-@itemize @bullet{}
+@itemize @value{BULLET}
@item
Breakpoint control
@@ -27631,7 +29077,7 @@ In the following descriptions, commands which may be abbreviated
show the abbreviation on a second description line.
A debugger command name may also be truncated if that partial
name is unambiguous. The debugger has the built-in capability to
-automatically repeat the previous command when just hitting @key{Enter}.
+automatically repeat the previous command just by hitting @key{Enter}.
This works for the commands @code{list}, @code{next}, @code{nexti}, @code{step}, @code{stepi}
and @code{continue} executed without any argument.
@@ -27993,7 +29439,7 @@ No newline is printed unless one is specified.
@item @code{set} @var{var}@code{=}@var{value}
Assign a constant (number or string) value to an @command{awk} variable
or field.
-String values must be enclosed between double quotes (@code{"@dots{}"}).
+String values must be enclosed between double quotes (@code{"}@dots{}@code{"}).
You can also set special @command{awk} variables, such as @code{FS},
@code{NF}, @code{NR}, etc.
@@ -28054,7 +29500,7 @@ functions which called the one you are in. The commands for doing this are:
Print a backtrace of all function calls (stack frames), or innermost @var{count}
frames if @var{count} > 0. Print the outermost @var{count} frames if
@var{count} < 0. The backtrace displays the name and arguments to each
-function, the source file name, and the line number.
+function, the source @value{FN}, and the line number.
@cindex debugger commands, @code{down}
@cindex @code{down} debugger command
@@ -28068,10 +29514,11 @@ Then select and print the frame.
@cindex @code{f} debugger command (alias for @code{frame})
@item @code{frame} [@var{n}]
@itemx @code{f} [@var{n}]
-Select and print (frame number, function and argument names, source file,
-and the source line) stack frame @var{n}. Frame 0 is the currently executing,
-or @dfn{innermost}, frame (function call), frame 1 is the frame that called the
-innermost one. The highest numbered frame is the one for the main program.
+Select and print stack frame @var{n}. Frame 0 is the currently executing,
+or @dfn{innermost}, frame (function call), frame 1 is the frame that
+called the innermost one. The highest numbered frame is the one for the
+main program. The printed information consists of the frame number,
+function and argument names, source file, and the source line.
@cindex debugger commands, @code{up}
@cindex @code{up} debugger command
@@ -28119,7 +29566,7 @@ Description of the selected stack frame.
@item functions
@cindex list function definitions, in debugger
-List all function definitions including source file names and
+List all function definitions including source @value{FN}s and
line numbers.
@item locals
@@ -28169,44 +29616,45 @@ a new value to the named option.
The available options are:
@c nested table
-@table @code
-@item history_size
+@c asis for docbook
+@table @asis
+@item @code{history_size}
@cindex debugger history size
The maximum number of lines to keep in the history file @file{./.gawk_history}.
The default is 100.
-@item listsize
+@item @code{listsize}
@cindex debugger default list amount
The number of lines that @code{list} prints. The default is 15.
-@item outfile
+@item @code{outfile}
@cindex redirect @command{gawk} output, in debugger
Send @command{gawk} output to a file; debugger output still goes
to standard output. An empty string (@code{""}) resets output to
standard output.
-@item prompt
+@item @code{prompt}
@cindex debugger prompt
The debugger prompt. The default is @samp{@w{gawk> }}.
-@item save_history @r{[}on @r{|} off@r{]}
+@item @code{save_history} [@code{on} | @code{off}]
@cindex debugger history file
Save command history to file @file{./.gawk_history}.
The default is @code{on}.
-@item save_options @r{[}on @r{|} off@r{]}
+@item @code{save_options} [@code{on} | @code{off}]
@cindex save debugger options
Save current options to file @file{./.gawkrc} upon exit.
The default is @code{on}.
Options are read back in to the next session upon startup.
-@item trace @r{[}on @r{|} off@r{]}
+@item @code{trace} [@code{on} | @code{off}]
@cindex instruction tracing, in debugger
Turn instruction tracing on or off. The default is @code{off}.
@end table
@item @code{save} @var{filename}
-Save the commands from the current session to the given file name,
+Save the commands from the current session to the given @value{FN},
so that they can be replayed using the @command{source} command.
@item @code{source} @var{filename}
@@ -28246,7 +29694,7 @@ partial dump of Davide Brini's obfuscated code
@smallexample
gawk> @kbd{dump}
-@print{} # BEGIN
+@print{} # BEGIN
@print{}
@print{} [ 1:0xfcd340] Op_rule : [in_rule = BEGIN] [source_file = brini.awk]
@print{} [ 1:0xfcc240] Op_push_i : "~" [MALLOC|STRING|STRCUR]
@@ -28354,7 +29802,7 @@ running a program, the debugger warns you if you accidentally type
@cindex debugger commands, @code{trace}
@cindex @code{trace} debugger command
-@item @code{trace} @code{on} @r{|} @code{off}
+@item @code{trace} [@code{on} | @code{off}]
Turn on or off a continuous printing of instructions which are about to
be executed, along with printing the @command{awk} line which they
implement. The default is @code{off}.
@@ -28370,16 +29818,18 @@ fairly self-explanatory, and using @code{stepi} and @code{nexti} while
@cindex command completion, in debugger
@cindex history expansion, in debugger
-If @command{gawk} is compiled with the @code{readline} library, you
-can take advantage of that library's command completion and history expansion
-features. The following types of completion are available:
+If @command{gawk} is compiled with
+@uref{http://cnswww.cns.cwru.edu/php/chet/readline/readline.html,
+the @code{readline} library}, you can take advantage of that library's
+command completion and history expansion features. The following types
+of completion are available:
@table @asis
@item Command completion
Command names.
-@item Source file name completion
-Source file names. Relevant commands are
+@item Source @value{FN} completion
+Source @value{FN}s. Relevant commands are
@code{break},
@code{clear},
@code{list},
@@ -28409,7 +29859,7 @@ We hope you find the @command{gawk} debugger useful and enjoyable to work with,
but as with any program, especially in its early releases, it still has
some limitations. A few which are worth being aware of are:
-@itemize @bullet{}
+@itemize @value{BULLET}
@item
At this point, the debugger does not give a detailed explanation of
what you did wrong when you type in something it doesn't like. Rather, it just
@@ -28417,14 +29867,17 @@ responds @samp{syntax error}. When you do figure out what your mistake was,
though, you'll feel like a real guru.
@item
-If you perused the dump of opcodes in @ref{Miscellaneous Debugger Commands},
+@c NOTE: no comma after the ref{} on purpose, due to following
+@c parenthetical remark.
+If you perused the dump of opcodes in @ref{Miscellaneous Debugger Commands}
(or if you are already familiar with @command{gawk} internals),
you will realize that much of the internal manipulation of data
in @command{gawk}, as in many interpreters, is done on a stack.
@code{Op_push}, @code{Op_pop}, etc., are the ``bread and butter'' of
-most @command{gawk} code. Unfortunately, as of now, the @command{gawk}
-debugger does not allow you to examine the stack's contents.
+most @command{gawk} code.
+Unfortunately, as of now, the @command{gawk}
+debugger does not allow you to examine the stack's contents.
That is, the intermediate results of expression evaluation are on the
stack, but cannot be printed. Rather, only variables which are defined
in the program can be printed. Of course, a workaround for
@@ -28451,6 +29904,39 @@ The @command{gawk} debugger only accepts source supplied with the @option{-f} op
Look forward to a future release when these and other missing features may
be added, and of course feel free to try to add them yourself!
+@node Debugging Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+Programs rarely work correctly the first time. Finding bugs
+is @dfn{debugging} and a program that helps you find bugs is a
+@dfn{debugger}. @command{gawk} has a built-in debugger that works very
+similarly to the GNU Debugger, GDB.
+
+@item
+Debuggers let you step through your program one statement at a time,
+examine and change variable and array values, and do a number of other
+things that let you understand what your program is actually doing (as
+opposed to what it is supposed to do).
+
+@item
+Like most debuggers, the @command{gawk} debugger works in terms of stack
+frames, and lets you set both breakpoints (stop at a point in the code)
+and watchpoints (stop when a data value changes).
+
+@item
+The debugger command set is fairly complete, providing control over
+breakpoints, execution, viewing and changing data, working with the stack,
+getting information, and other tasks.
+
+@item
+If the @code{readline} library is available when @command{gawk} is
+compiled, it is used by the debugger to provide command-line history
+and editing.
+
+@end itemize
+
@node Arbitrary Precision Arithmetic
@chapter Arithmetic and Arbitrary Precision Arithmetic with @command{gawk}
@cindex arbitrary precision
@@ -28458,444 +29944,310 @@ be added, and of course feel free to try to add them yourself!
@cindex infinite precision
@cindex floating-point, numbers@comma{} arbitrary precision
-@cindex Knuth, Donald
-@quotation
-@i{There's a credibility gap: We don't know how much of the computer's answers
-to believe. Novice computer users solve this problem by implicitly trusting
-in the computer as an infallible authority; they tend to believe that all
-digits of a printed answer are significant. Disillusioned computer users have
-just the opposite approach; they are constantly afraid that their answers
-are almost meaningless.}@footnote{Donald E.@: Knuth.
-@cite{The Art of Computer Programming}. Volume 2,
-@cite{Seminumerical Algorithms}, third edition,
-1998, ISBN 0-201-89683-4, p.@: 229.}
-@author Donald Knuth
-@end quotation
-
-This @value{CHAPTER} discusses issues that you may encounter
-when performing arithmetic. It begins by discussing some of
-the general attributes of computer arithmetic, along with how
-this can influence what you see when running @command{awk} programs.
-This discussion applies to all versions of @command{awk}.
+This @value{CHAPTER} introduces some basic concepts relating to
+how computers do arithmetic and briefly lists the features in
+@command{gawk} for performing arbitrary precision floating point
+computations. It then proceeds to describe floating-point arithmetic,
+which is what @command{awk} uses for all its computations, including a
+discussion of arbitrary precision floating point arithmetic, which is
+a feature available only in @command{gawk}. It continues on to present
+arbitrary precision integers, and concludes with a description of some
+points where @command{gawk} and the POSIX standard are not quite in
+agreement.
-The @value{CHAPTER} then moves on to describe @dfn{arbitrary precision
-arithmetic}, a feature which is specific to @command{gawk}.
+@quotation NOTE
+Most users of @command{gawk} can safely skip this chapter.
+But if you want to do scientific calculations with @command{gawk},
+this is the place to be.
+@end quotation
@menu
-* General Arithmetic:: An introduction to computer arithmetic.
-* Floating-point Programming:: Effective Floating-point Programming.
-* Gawk and MPFR:: How @command{gawk} provides
- arbitrary-precision arithmetic.
-* Arbitrary Precision Floats:: Arbitrary Precision Floating-point Arithmetic
- with @command{gawk}.
-* Arbitrary Precision Integers:: Arbitrary Precision Integer Arithmetic with
- @command{gawk}.
+* Computer Arithmetic:: A quick intro to computer math.
+* Math Definitions:: Defining terms used.
+* MPFR features:: The MPFR features in @command{gawk}.
+* FP Math Caution:: Things to know.
+* Arbitrary Precision Integers:: Arbitrary Precision Integer Arithmetic with
+ @command{gawk}.
+* POSIX Floating Point Problems:: Standards Versus Existing Practice.
+* Floating point summary:: Summary of floating point discussion.
@end menu
-@node General Arithmetic
+@node Computer Arithmetic
@section A General Description of Computer Arithmetic
-@cindex integers
-@cindex floating-point, numbers
-@cindex numbers, floating-point
-Within computers, there are two kinds of numeric values: @dfn{integers}
-and @dfn{floating-point}.
-In school, integer values were referred to as ``whole'' numbers---that is,
-numbers without any fractional part, such as 1, 42, or @minus{}17.
+Until now, we have worked with data as either numbers or
+strings. Ultimately, however, computers represent everything in terms
+of @dfn{binary digits}, or @dfn{bits}. A decimal digit can take on any
+of 10 values: zero through nine. A binary digit can take on any of two
+values, zero or one. Using binary, computers (and computer software)
+can represent and manipulate numerical and character data. In general,
+the more bits you can use to represent a particular thing, the greater
+the range of possible values it can take on.
+
+Modern computers support at least two, and often more, ways to do
+arithmetic. Each kind of arithmetic uses a different representation
+(organization of the bits) for the numbers. The kinds of arithmetic
+that interest us are:
+
+@table @asis
+@item Decimal arithmetic
+This is the kind of arithmetic you learned in elementary school, using
+paper and pencil (and/or a calculator). In theory, numbers can have an
+arbitrary number of digits on either side (or both sides) of the decimal
+point, and the results of a computation are always exact.
+
+Some modern system can do decimal arithmetic in hardware, but usually you
+need a special software library to provide access to these instructions.
+There are also libraries that do decimal arithmetic entirely in software.
+
+Despite the fact that some users expect @command{gawk} to be performing
+decimal arithmetic,@footnote{We don't know why they expect this, but
+they do.} it does not do so.
+
+@item Integer arithmetic
+In school, integer values were referred to as ``whole'' numbers---that
+is, numbers without any fractional part, such as 1, 42, or @minus{}17.
The advantage to integer numbers is that they represent values exactly.
-The disadvantage is that their range is limited. On most systems,
-this range is @minus{}2,147,483,648 to 2,147,483,647.
-However, many systems now support a range from
-@minus{}9,223,372,036,854,775,808 to 9,223,372,036,854,775,807.
+The disadvantage is that their range is limited.
@cindex unsigned integers
@cindex integers, unsigned
-Integer values come in two flavors: @dfn{signed} and @dfn{unsigned}.
-Signed values may be negative or positive, with the range of values just
-described.
-Unsigned values are always positive. On most systems,
-the range is from 0 to 4,294,967,295.
-However, many systems now support a range from
-0 to 18,446,744,073,709,551,615.
-
-@cindex double precision floating-point
-@cindex single precision floating-point
-Floating-point numbers represent what are called ``real'' numbers; i.e.,
-those that do have a fractional part, such as 3.1415927.
-The advantage to floating-point numbers is that they
-can represent a much larger range of values.
-The disadvantage is that there are numbers that they cannot represent
-exactly.
-@command{awk} uses @dfn{double precision} floating-point numbers, which
-can hold more digits than @dfn{single precision}
-floating-point numbers.
-@c Floating-point issues are discussed more fully in
-@c @ref{Floating Point Issues}.
-
-There a several important issues to be aware of, described next.
+In computers, integer values come in two flavors: @dfn{signed} and
+@dfn{unsigned}. Signed values may be negative or positive, whereas
+unsigned values are always positive (that is, greater than or equal
+to zero).
+
+In computer systems, integer arithmetic is exact, but the possible
+range of values is limited. Integer arithmetic is generally faster than
+floating point arithmetic.
+
+@item Floating point arithmetic
+Floating-point numbers represent what were called in school ``real''
+numbers; i.e., those that have a fractional part, such as 3.1415927.
+The advantage to floating-point numbers is that they can represent a
+much larger range of values than can integers. The disadvantage is that
+there are numbers that they cannot represent exactly.
+
+Modern systems support floating point arithmetic in hardware, with a
+limited range of values. There are software libraries that allow
+the use of arbitrary precision floating point calculations.
+
+POSIX @command{awk} uses @dfn{double precision} floating-point numbers, which
+can hold more digits than @dfn{single precision} floating-point numbers.
+@command{gawk} has facilities for performing arbitrary precision floating
+point arithmetic, which we describe in more detail shortly.
+@end table
-@menu
-* Floating Point Issues:: Stuff to know about floating-point numbers.
-* Integer Programming:: Effective integer programming.
-@end menu
+Computers work with integer and floating point values of different
+ranges. Integer values are usually either 32 or 64 bits in size. Single
+precision floating point values occupy 32 bits, whereas double precision
+floating point values occupy 64 bits. Floating point values are always
+signed. The possible ranges of values are shown in the following table.
+
+@multitable @columnfractions .34 .33 .33
+@headitem Numeric representation @tab Miniumum value @tab Maximum value
+@item 32-bit signed integer @tab @minus{}2,147,483,648 @tab 2,147,483,647
+@item 32-bit unsigned integer @tab 0 @tab 4,294,967,295
+@item 64-bit signed integer @tab @minus{}9,223,372,036,854,775,808 @tab 9,223,372,036,854,775,807
+@item 64-bit unsigned integer @tab 0 @tab 18,446,744,073,709,551,615
+@item Single precision floating point (approximate) @tab @code{1.175494e-38} @tab @code{3.402823e+38}
+@item Double precision floating point (approximate) @tab @code{2.225074e-308} @tab @code{1.797693e+308}
+@end multitable
-@node Floating Point Issues
-@subsection Floating-Point Number Caveats
+@node Math Definitions
+@section Other Stuff To Know
-This @value{SECTION} describes some of the issues
-involved in using floating-point numbers.
+The rest of this @value{CHAPTER} uses a number of terms. Here are some
+informal definitions that should help you work your way through the material
+here.
-There is a very nice
-@uref{http://www.validlab.com/goldberg/paper.pdf, paper on floating-point arithmetic}
-by David Goldberg,
-``What Every Computer Scientist Should Know About Floating-point Arithmetic,''
-@cite{ACM Computing Surveys} @strong{23}, 1 (1991-03), 5-48.
-This is worth reading if you are interested in the details,
-but it does require a background in computer science.
+@table @dfn
+@item Accuracy
+A floating-point calculation's accuracy is how close it comes
+to the real (paper and pencil) value.
+
+@item Error
+The difference between what the result of a computation ``should be''
+and what it actually is. It is best to minimize error as much
+as possible.
+
+@item Exponent
+The order of magnitude of a value;
+some number of bits in a floating-point value store the exponent.
+
+@item Inf
+A special value representing infinity. Operations involving another
+number and infinity produce infinity.
+
+@item NaN
+``Not A Number.''@footnote{Thanks
+to Michael Brennan for this description, which I have paraphrased, and
+for the examples}.
+A special value that results from attempting a
+calculation that has no answer as a real number. In such a case,
+programs can either receive a floating-point exception, or get @code{NaN}
+back as the result. The IEEE 754 standard recommends that systems return
+@code{NaN}. Some examples:
-@menu
-* String Conversion Precision:: The String Value Can Lie.
-* Unexpected Results:: Floating Point Numbers Are Not Abstract
- Numbers.
-* POSIX Floating Point Problems:: Standards Versus Existing Practice.
-@end menu
+@table @code
+@item sqrt(-1)
+This makes sense in the range of complex numbers, but not in the
+range of real numbers, so the result is @code{NaN}.
-@node String Conversion Precision
-@subsubsection The String Value Can Lie
+@item log(-8)
+@minus{}8 is out of the domain of @code{log()}, so the result is @code{NaN}.
+@end table
-Internally, @command{awk} keeps both the numeric value
-(double precision floating-point) and the string value for a variable.
-Separately, @command{awk} keeps
-track of what type the variable has
-(@pxref{Typing and Comparison}),
-which plays a role in how variables are used in comparisons.
+@item Normalized
+How the significand (see later in this list) is usually stored. The
+value is adjusted so that the first bit is one, and then that leading
+one is assumed instead of physically stored. This provides one
+extra bit of precision.
-It is important to note that the string value for a number may not
-reflect the full value (all the digits) that the numeric value
-actually contains.
-The following program, @file{values.awk}, illustrates this:
+@item Precision
+The number of bits used to represent a floating-point number.
+The more bits, the more digits you can represent.
+Binary and decimal precisions are related approximately, according to the
+formula:
-@example
-@{
- sum = $1 + $2
- # see it for what it is
- printf("sum = %.12g\n", sum)
- # use CONVFMT
- a = "<" sum ">"
- print "a =", a
- # use OFMT
- print "sum =", sum
-@}
-@end example
+@display
+@iftex
+@math{prec = 3.322 @cdot dps}
+@end iftex
+@ifnottex
+@ifnotdocbook
+@var{prec} = 3.322 * @var{dps}
+@end ifnotdocbook
+@end ifnottex
+@docbook
+<emphasis>prec</emphasis> = 3.322 &sdot; <emphasis>dps</emphasis> @c
+@end docbook
+@end display
@noindent
-This program shows the full value of the sum of @code{$1} and @code{$2}
-using @code{printf}, and then prints the string values obtained
-from both automatic conversion (via @code{CONVFMT}) and
-from printing (via @code{OFMT}).
-
-Here is what happens when the program is run:
-
-@example
-$ @kbd{echo 3.654321 1.2345678 | awk -f values.awk}
-@print{} sum = 4.8888888
-@print{} a = <4.88889>
-@print{} sum = 4.88889
-@end example
+Here, @var{prec} denotes the binary precision
+(measured in bits) and @var{dps} (short for decimal places)
+is the decimal digits.
+
+@item Rounding mode
+How numbers are rounded up or down when necessary.
+More details are provided later.
+
+@item Significand
+A floating point value consists the significand multiplied by 10
+to the power of the exponent. For example, in @code{1.2345e67},
+the significand is @code{1.2345}.
+
+@item Stability
+From @uref{http://en.wikipedia.org/wiki/Numerical_stability,
+the Wikipedia article on numerical stability}:
+``Calculations that can be proven not to magnify approximation errors
+are called @dfn{numerically stable}.''
+@end table
-This makes it clear that the full numeric value is different from
-what the default string representations show.
+See @uref{http://en.wikipedia.org/wiki/Accuracy_and_precision,
+the Wikipedia article on accuracy and precision} for more information
+on some of those terms.
-@code{CONVFMT}'s default value is @code{"%.6g"}, which yields a value with
-at most six significant digits. For some applications, you might want to
-change it to specify more precision.
-On most modern machines, most of the time,
-17 digits is enough to capture a floating-point number's
-value exactly.@footnote{Pathological cases can require up to
-752 digits (!), but we doubt that you need to worry about this.}
+On modern systems, floating-point hardware uses the representation and
+operations defined by the IEEE 754 standard.
+Three of the standard IEEE 754 types are 32-bit single precision,
+64-bit double precision and 128-bit quadruple precision.
+The standard also specifies extended precision formats
+to allow greater precisions and larger exponent ranges.
+(@command{awk} uses only the 64-bit double precision format.)
-@node Unexpected Results
-@subsubsection Floating Point Numbers Are Not Abstract Numbers
-
-@cindex floating-point, numbers
-Unlike numbers in the abstract sense (such as what you studied in high school
-or college arithmetic), numbers stored in computers are limited in certain ways.
-They cannot represent an infinite number of digits, nor can they always
-represent things exactly.
-In particular,
-floating-point numbers cannot
-always represent values exactly. Here is an example:
-
-@example
-$ @kbd{awk '@{ printf("%010d\n", $1 * 100) @}'}
-515.79
-@print{} 0000051579
-515.80
-@print{} 0000051579
-515.81
-@print{} 0000051580
-515.82
-@print{} 0000051582
-@kbd{Ctrl-d}
-@end example
+@ref{table-ieee-formats} lists the precision and exponent
+field values for the basic IEEE 754 binary formats:
-@noindent
-This shows that some values can be represented exactly,
-whereas others are only approximated. This is not a ``bug''
-in @command{awk}, but simply an artifact of how computers
-represent numbers.
+@float Table,table-ieee-formats
+@caption{Basic IEEE Format Context Values}
+@multitable @columnfractions .20 .20 .20 .20 .20
+@headitem Name @tab Total bits @tab Precision @tab emin @tab emax
+@item Single @tab 32 @tab 24 @tab @minus{}126 @tab +127
+@item Double @tab 64 @tab 53 @tab @minus{}1022 @tab +1023
+@item Quadruple @tab 128 @tab 113 @tab @minus{}16382 @tab +16383
+@end multitable
+@end float
@quotation NOTE
-It cannot be emphasized enough that the behavior just
-described is fundamental to modern computers. You will
-see this kind of thing happen in @emph{any} programming
-language using hardware floating-point numbers. It is @emph{not}
-a bug in @command{gawk}, nor is it something that can be ``just
-fixed.''
+The precision numbers include the implied leading one that gives them
+one extra bit of significand.
@end quotation
-@cindex negative zero
-@cindex positive zero
-@cindex zero@comma{} negative vs.@: positive
-Another peculiarity of floating-point numbers on modern systems
-is that they often have more than one representation for the number zero!
-In particular, it is possible to represent ``minus zero'' as well as
-regular, or ``positive'' zero.
-
-This example shows that negative and positive zero are distinct values
-when stored internally, but that they are in fact equal to each other,
-as well as to ``regular'' zero:
-
-@example
-$ @kbd{gawk 'BEGIN @{ mz = -0 ; pz = 0}
-> @kbd{printf "-0 = %g, +0 = %g, (-0 == +0) -> %d\n", mz, pz, mz == pz}
-> @kbd{printf "mz == 0 -> %d, pz == 0 -> %d\n", mz == 0, pz == 0}
-> @kbd{@}'}
-@print{} -0 = -0, +0 = 0, (-0 == +0) -> 1
-@print{} mz == 0 -> 1, pz == 0 -> 1
-@end example
-
-It helps to keep this in mind should you process numeric data
-that contains negative zero values; the fact that the zero is negative
-is noted and can affect comparisons.
-
-@node POSIX Floating Point Problems
-@subsubsection Standards Versus Existing Practice
-
-Historically, @command{awk} has converted any non-numeric looking string
-to the numeric value zero, when required. Furthermore, the original
-definition of the language and the original POSIX standards specified that
-@command{awk} only understands decimal numbers (base 10), and not octal
-(base 8) or hexadecimal numbers (base 16).
-
-Changes in the language of the
-2001 and 2004 POSIX standards can be interpreted to imply that @command{awk}
-should support additional features. These features are:
-
-@itemize @bullet
-@item
-Interpretation of floating point data values specified in hexadecimal
-notation (@samp{0xDEADBEEF}). (Note: data values, @emph{not}
-source code constants.)
-
-@item
-Support for the special IEEE 754 floating point values ``Not A Number''
-(NaN), positive Infinity (``inf'') and negative Infinity (``@minus{}inf'').
-In particular, the format for these values is as specified by the ISO 1999
-C standard, which ignores case and can allow machine-dependent additional
-characters after the @samp{nan} and allow either @samp{inf} or @samp{infinity}.
-@end itemize
-
-The first problem is that both of these are clear changes to historical
-practice:
+@node MPFR features
+@section Arbitrary Precison Arithmetic Features In @command{gawk}
-@itemize @bullet
-@item
-The @command{gawk} maintainer feels that supporting hexadecimal floating
-point values, in particular, is ugly, and was never intended by the
-original designers to be part of the language.
-
-@item
-Allowing completely alphabetic strings to have valid numeric
-values is also a very severe departure from historical practice.
-@end itemize
-
-The second problem is that the @code{gawk} maintainer feels that this
-interpretation of the standard, which requires a certain amount of
-``language lawyering'' to arrive at in the first place, was not even
-intended by the standard developers. In other words, ``we see how you
-got where you are, but we don't think that that's where you want to be.''
-
-Recognizing the above issues, but attempting to provide compatibility
-with the earlier versions of the standard,
-the 2008 POSIX standard added explicit wording to allow, but not require,
-that @command{awk} support hexadecimal floating point values and
-special values for ``Not A Number'' and infinity.
-
-Although the @command{gawk} maintainer continues to feel that
-providing those features is inadvisable,
-nevertheless, on systems that support IEEE floating point, it seems
-reasonable to provide @emph{some} way to support NaN and Infinity values.
-The solution implemented in @command{gawk} is as follows:
-
-@itemize @bullet
-@item
-With the @option{--posix} command-line option, @command{gawk} becomes
-``hands off.'' String values are passed directly to the system library's
-@code{strtod()} function, and if it successfully returns a numeric value,
-that is what's used.@footnote{You asked for it, you got it.}
-By definition, the results are not portable across
-different systems. They are also a little surprising:
+By default, @command{gawk} uses the double precision floating point values
+supplied by the hardware of the system it runs on. However, if it was
+compiled to do, @command{gawk} uses the @uref{http://www.mpfr.org, GNU
+MPFR} and @uref{http://gmplib.org, GNU MP} (GMP) libraries for arbitrary
+precision arithmetic on numbers. You can see if MPFR support is available
+like so:
@example
-$ @kbd{echo nanny | gawk --posix '@{ print $1 + 0 @}'}
-@print{} nan
-$ @kbd{echo 0xDeadBeef | gawk --posix '@{ print $1 + 0 @}'}
-@print{} 3735928559
+$ @kbd{gawk --version}
+@print{} GNU Awk 4.1.1, API: 1.1 (GNU MPFR 3.1.0-p3, GNU MP 5.0.2)
+@print{} Copyright (C) 1989, 1991-2014 Free Software Foundation.
+@dots{}
@end example
-@item
-Without @option{--posix}, @command{gawk} interprets the four strings
-@samp{+inf},
-@samp{-inf},
-@samp{+nan},
-and
-@samp{-nan}
-specially, producing the corresponding special numeric values.
-The leading sign acts a signal to @command{gawk} (and the user)
-that the value is really numeric. Hexadecimal floating point is
-not supported (unless you also use @option{--non-decimal-data},
-which is @emph{not} recommended). For example:
+@noindent
+(You may see different version numbers than what's shown here. That's OK;
+what's important is to see that GNU MPFR and GNU MP are listed in
+the output.)
-@example
-$ @kbd{echo nanny | gawk '@{ print $1 + 0 @}'}
-@print{} 0
-$ @kbd{echo +nan | gawk '@{ print $1 + 0 @}'}
-@print{} nan
-$ @kbd{echo 0xDeadBeef | gawk '@{ print $1 + 0 @}'}
-@print{} 0
-@end example
+Additionally, there are a few elements available in the @code{PROCINFO}
+array to provide information about the MPFR and GMP libraries
+(@pxref{Auto-set}).
-@command{gawk} does ignore case in the four special values.
-Thus @samp{+nan} and @samp{+NaN} are the same.
-@end itemize
+The MPFR library provides precise control over precisions and rounding
+modes, and gives correctly rounded, reproducible, platform-independent
+results. With the @option{-M} command-line option,
+all floating-point arithmetic operators and numeric functions
+can yield results to any desired precision level supported by MPFR.
-@node Integer Programming
-@subsection Mixing Integers And Floating-point
-
-As has been mentioned already, @command{awk} uses hardware double
-precision with 64-bit IEEE binary floating-point representation
-for numbers on most systems. A large integer like 9,007,199,254,740,997
-has a binary representation that, although finite, is more than 53 bits long;
-it must also be rounded to 53 bits.
-The biggest integer that can be stored in a C @code{double} is usually the same
-as the largest possible value of a @code{double}. If your system @code{double}
-is an IEEE 64-bit @code{double}, this largest possible value is an integer and
-can be represented precisely. What more should one know about integers?
-
-If you want to know what is the largest integer, such that it and
-all smaller integers can be stored in 64-bit doubles without losing precision,
-then the answer is
-@iftex
-@math{2^{53}}.
-@end iftex
-@ifnottex
-@ifnotdocbook
-2^53.
-@end ifnotdocbook
-@end ifnottex
-@docbook
-2<superscript>53</superscript>. @c
-@end docbook
-The next representable number is the even number
-@iftex
-@math{2^{53} + 2},
-@end iftex
-@ifnottex
-@ifnotdocbook
-2^53 + 2,
-@end ifnotdocbook
-@end ifnottex
-@docbook
-2<superscript>53</superscript> &plus; 2, @c
-@end docbook
-meaning it is unlikely that you will be able to make
-@command{gawk} print
-@iftex
-@math{2^{53} + 1}
-@end iftex
-@ifnottex
-@ifnotdocbook
-2^53 + 1
-@end ifnotdocbook
-@end ifnottex
-@docbook
-2<superscript>53</superscript> &plus; 1 @c
-@end docbook
-in integer format.
-The range of integers exactly representable by a 64-bit double
-is
-@iftex
-@math{[-2^{53}, 2^{53}]}.
-@end iftex
-@ifnottex
-@ifnotdocbook
-[@minus{}2^53, 2^53].
-@end ifnotdocbook
-@end ifnottex
-@docbook
-[&minus;2<superscript>53</superscript>, 2<superscript>53</superscript>]. @c
-@end docbook
-If you ever see an integer outside this range in @command{awk}
-using 64-bit doubles, you have reason to be very suspicious about
-the accuracy of the output. Here is a simple program with erroneous output:
-
-@example
-$ @kbd{gawk 'BEGIN @{ i = 2^53 - 1; for (j = 0; j < 4; j++) print i + j @}'}
-@print{} 9007199254740991
-@print{} 9007199254740992
-@print{} 9007199254740992
-@print{} 9007199254740994
-@end example
+Two built-in variables, @code{PREC} and @code{ROUNDMODE},
+provide control over the working precision and the rounding mode.
+The precision and the rounding mode are set globally for every operation
+to follow.
+@xref{Auto-set}, for more information.
-The lesson is to not assume that any large integer printed by @command{awk}
-represents an exact result from your computation, especially if it wraps
-around on your screen.
+@node FP Math Caution
+@section Floating Point Arithmetic: Caveat Emptor!
-@node Floating-point Programming
-@section Understanding Floating-point Programming
+@quotation
+Math class is tough!
+@author Teen Talk Barbie, July 1992
+@end quotation
-Numerical programming is an extensive area; if you need to develop
-sophisticated numerical algorithms then @command{gawk} may not be
-the ideal tool, and this documentation may not be sufficient.
-It might require digesting a book or two@footnote{One recommended title is
-@cite{Numerical Computing with IEEE Floating Point Arithmetic}, Michael L.@:
-Overton, Society for Industrial and Applied Mathematics, 2004.
-ISBN: 0-89871-482-6, ISBN-13: 978-0-89871-482-1. See
-@uref{http://www.cs.nyu.edu/cs/faculty/overton/book}.}
-to really internalize how to compute
-with ideal accuracy and precision,
-and the result often depends on the particular application.
+This @value{SECTION} provides a high level overview of the issues
+involved when doing lots of floating-point arithmetic.@footnote{There
+is a very nice @uref{http://www.validlab.com/goldberg/paper.pdf,
+paper on floating-point arithmetic} by David Goldberg, ``What Every
+Computer Scientist Should Know About Floating-point Arithmetic,''
+@cite{ACM Computing Surveys} @strong{23}, 1 (1991-03), 5-48. This is
+worth reading if you are interested in the details, but it does require
+a background in computer science.}
+The discussion applies to both hardware and arbitrary-precision
+floating-point arithmetic.
-@quotation NOTE
-A floating-point calculation's @dfn{accuracy} is how close it comes
-to the real value. This is as opposed to the @dfn{precision}, which
-usually refers to the number of bits used to represent the number
-(see @uref{http://en.wikipedia.org/wiki/Accuracy_and_precision,
-the Wikipedia article} for more information).
+@quotation CAUTION
+The material here is purposely general. If you need to do serious
+computer arithmetic, you should do some research first, and not
+rely just on what we tell you.
@end quotation
-There are two options for doing floating-point calculations:
-hardware floating-point (as used by standard @command{awk} and
-the default for @command{gawk}), and @dfn{arbitrary-precision}
-floating-point, which is software based.
-From this point forward, this @value{CHAPTER}
-aims to provide enough information to understand both, and then
-will focus on @command{gawk}'s facilities for the latter.@footnote{If you
-are interested in other tools that perform arbitrary precision arithmetic,
-you may want to investigate the POSIX @command{bc} tool. See
-@uref{http://pubs.opengroup.org/onlinepubs/009695399/utilities/bc.html,
-the POSIX specification for it}, for more information.}
+@menu
+* Inexactness of computations:: Floating point math is not exact.
+* Getting Accuracy:: Getting more accuracy takes some work.
+* Try To Round:: Add digits and round.
+* Setting precision:: How to set the precision.
+* Setting the rounding mode:: How to set the rounding mode.
+@end menu
+
+@node Inexactness of computations
+@subsection Floating Point Arithmetic Is Not Exact
Binary floating-point representations and arithmetic are inexact.
Simple values like 0.1 cannot be precisely represented using
@@ -28907,7 +30259,16 @@ floating-point, you can set the precision before starting a computation,
but then you cannot be sure of the number of significant decimal places
in the final result.
-Sometimes, before you start to write any code, you should think more
+@menu
+* Inexact representation:: Numbers are not exactly represented.
+* Comparing FP Values:: How to compare floating point values.
+* Errors accumulate:: Errors get bigger as they go.
+@end menu
+
+@node Inexact representation
+@subsubsection Many Numbers Cannot Be Represented Exactly
+
+So, before you start to write any code, you should think
about what you really want and what's really happening. Consider the
two numbers in the following example:
@@ -28937,21 +30298,42 @@ you can always specify how much precision you would like in your output.
Usually this is a format string like @code{"%.15g"}, which when
used in the previous example, produces an output identical to the input.
+@node Comparing FP Values
+@subsubsection Be Careful Comparing Values
+
Because the underlying representation can be a little bit off from the exact value,
-comparing floating-point values to see if they are equal is generally not a good idea.
-Here is an example where it does not work like you expect:
+comparing floating-point values to see if they are exactly equal is generally a bad idea.
+Here is an example where it does not work like you would expect:
@example
$ @kbd{gawk 'BEGIN @{ print (0.1 + 12.2 == 12.3) @}'}
@print{} 0
@end example
-The loss of accuracy during a single computation with floating-point numbers
-usually isn't enough to worry about. However, if you compute a value
-which is the result of a sequence of floating point operations,
+The general wisdom when comparing floating-point values is to see if
+they are within some small range of each other (called a @dfn{delta},
+or @dfn{tolerance}).
+You have to decide how small a delta is important to you. Code to do
+this looks something like this:
+
+@example
+delta = 0.00001 # for example
+difference = abs(a) - abs(b) # subtract the two values
+if (difference < delta)
+ # all ok
+else
+ # not ok
+@end example
+
+@node Errors accumulate
+@subsubsection Errors Accumulate
+
+The loss of accuracy during a single computation with floating-point
+numbers usually isn't enough to worry about. However, if you compute a
+value which is the result of a sequence of floating point operations,
the error can accumulate and greatly affect the computation itself.
-Here is an attempt to compute the value of the constant
-@value{PI} using one of its many series representations:
+Here is an attempt to compute the value of @value{PI} using one of its
+many series representations:
@example
BEGIN @{
@@ -28965,8 +30347,8 @@ BEGIN @{
@}
@end example
-When run, the early errors propagating through later computations
-cause the loop to terminate prematurely after an attempt to divide by zero.
+When run, the early errors propagate through later computations,
+causing the loop to terminate prematurely after attempting to divide by zero:
@example
$ @kbd{gawk -f pi.awk}
@@ -28993,23 +30375,88 @@ $ @kbd{gawk 'BEGIN @{}
@print{} 4
@end example
-Can computation using arbitrary precision help with the previous examples?
-If you are impatient to know, see
-@ref{Exact Arithmetic}.
+@node Getting Accuracy
+@subsection Getting The Accuracy You Need
+
+Can arbitrary precision arithmetic give exact results? There are
+no easy answers. The standard rules of algebra often do not apply
+when using floating-point arithmetic.
+Among other things, the distributive and associative laws
+do not hold completely, and order of operation may be important
+for your computation. Rounding error, cumulative precision loss
+and underflow are often troublesome.
+
+When @command{gawk} tests the expressions @samp{0.1 + 12.2} and
+@samp{12.3} for equality using the machine double precision arithmetic,
+it decides that they are not equal! (@xref{Comparing FP Values}.)
+You can get the result you want by increasing the precision; 56 bits in
+this case does the job:
+
+@example
+$ @kbd{gawk -M -v PREC=56 'BEGIN @{ print (0.1 + 12.2 == 12.3) @}'}
+@print{} 1
+@end example
+
+If adding more bits is good, perhaps adding even more bits of
+precision is better?
+Here is what happens if we use an even larger value of @code{PREC}:
+
+@example
+$ @kbd{gawk -M -v PREC=201 'BEGIN @{ print (0.1 + 12.2 == 12.3) @}'}
+@print{} 0
+@end example
+
+This is not a bug in @command{gawk} or in the MPFR library.
+It is easy to forget that the finite number of bits used to store the value
+is often just an approximation after proper rounding.
+The test for equality succeeds if and only if @emph{all} bits in the two operands
+are exactly the same. Since this is not necessarily true after floating-point
+computations with a particular precision and effective rounding rule,
+a straight test for equality may not work. Instead, compare the
+two numbers to see if they are within the desirable delta of each other.
+
+In applications where 15 or fewer decimal places suffice,
+hardware double precision arithmetic can be adequate, and is usually much faster.
+But you need to keep in mind that every floating-point operation
+can suffer a new rounding error with catastrophic consequences as illustrated
+by our earlier attempt to compute the value of @value{PI}.
+Extra precision can greatly enhance the stability and the accuracy
+of your computation in such cases.
+
+Repeated addition is not necessarily equivalent to multiplication
+in floating-point arithmetic. In the example in
+@ref{Errors accumulate}:
+
+@example
+$ @kbd{gawk 'BEGIN @{}
+> @kbd{for (d = 1.1; d <= 1.5; d += 0.1) # loop five times (?)}
+> @kbd{i++}
+> @kbd{print i}
+> @kbd{@}'}
+@print{} 4
+@end example
+
+@noindent
+you may or may not succeed in getting the correct result by choosing
+an arbitrarily large value for @code{PREC}. Reformulation of
+the problem at hand is often the correct approach in such situations.
+
+@node Try To Round
+@subsection Try A Few Extra Bits of Precision and Rounding
Instead of arbitrary precision floating-point arithmetic,
often all you need is an adjustment of your logic
or a different order for the operations in your calculation.
-The stability and the accuracy of the computation of the constant @value{PI}
+The stability and the accuracy of the computation of @value{PI}
in the earlier example can be enhanced by using the following
simple algebraic transformation:
@example
-(sqrt(x * x + 1) - 1) / x = x / (sqrt(x * x + 1) + 1)
+(sqrt(x * x + 1) - 1) / x @equiv{} x / (sqrt(x * x + 1) + 1)
@end example
@noindent
-After making this, change the program does converge to
+After making this, change the program converges to
@value{PI} in under 30 iterations:
@example
@@ -29024,358 +30471,22 @@ $ @kbd{gawk -f pi2.awk}
@print{} 3.141592653589797
@end example
-There is no need to be unduly suspicious about the results from
-floating-point arithmetic. The lesson to remember is that
-floating-point arithmetic is always more complex than arithmetic using
-pencil and paper. In order to take advantage of the power
-of computer floating-point, you need to know its limitations
-and work within them. For most casual use of floating-point arithmetic,
-you will often get the expected result in the end if you simply round
-the display of your final results to the correct number of significant
-decimal digits.
-
-As general advice, avoid presenting numerical data in a manner that
-implies better precision than is actually the case.
-
-@menu
-* Floating-point Representation:: Binary floating-point representation.
-* Floating-point Context:: Floating-point context.
-* Rounding Mode:: Floating-point rounding mode.
-@end menu
-
-@node Floating-point Representation
-@subsection Binary Floating-point Representation
-@cindex IEEE-754 format
-
-Although floating-point representations vary from machine to machine,
-the most commonly encountered representation is that defined by the
-IEEE 754 Standard. An IEEE-754 format value has three components:
-
-@itemize @bullet
-@item
-A sign bit telling whether the number is positive or negative.
-
-@item
-An @dfn{exponent}, @var{e}, giving its order of magnitude.
-
-@item
-A @dfn{significand}, @var{s},
-specifying the actual digits of the number.
-@end itemize
-
-The value of the
-number is then
-@iftex
-@math{s @cdot 2^e}.
-@end iftex
-@ifnottex
-@ifnotdocbook
-@var{s * 2^e}.
-@end ifnotdocbook
-@end ifnottex
-@docbook
-<emphasis>s &sdot; 2<superscript>e</superscript></emphasis>. @c
-@end docbook
-The first bit of a non-zero binary significand
-is always one, so the significand in an IEEE-754 format only includes the
-fractional part, leaving the leading one implicit.
-The significand is stored in @dfn{normalized} format,
-which means that the first bit is always a one.
-
-Three of the standard IEEE-754 types are 32-bit single precision,
-64-bit double precision and 128-bit quadruple precision.
-The standard also specifies extended precision formats
-to allow greater precisions and larger exponent ranges.
-
-@node Floating-point Context
-@subsection Floating-point Context
-@cindex context, floating-point
-
-A floating-point @dfn{context} defines the environment for arithmetic operations.
-It governs precision, sets rules for rounding, and limits the range for exponents.
-The context has the following primary components:
-
-@table @dfn
-@item Precision
-Precision of the floating-point format in bits.
-
-@item emax
-Maximum exponent allowed for the format.
-
-@item emin
-Minimum exponent allowed for the format.
-
-@item Underflow behavior
-The format may or may not support gradual underflow.
-
-@item Rounding
-The rounding mode of the context.
-@end table
-
-@ref{table-ieee-formats} lists the precision and exponent
-field values for the basic IEEE-754 binary formats:
-
-@float Table,table-ieee-formats
-@caption{Basic IEEE Format Context Values}
-@multitable @columnfractions .20 .20 .20 .20 .20
-@headitem Name @tab Total bits @tab Precision @tab emin @tab emax
-@item Single @tab 32 @tab 24 @tab @minus{}126 @tab +127
-@item Double @tab 64 @tab 53 @tab @minus{}1022 @tab +1023
-@item Quadruple @tab 128 @tab 113 @tab @minus{}16382 @tab +16383
-@end multitable
-@end float
-
-@quotation NOTE
-The precision numbers include the implied leading one that gives them
-one extra bit of significand.
-@end quotation
-
-A floating-point context can also determine which signals are treated
-as exceptions, and can set rules for arithmetic with special values.
-Please consult the IEEE-754 standard or other resources for details.
-
-@command{gawk} ordinarily uses the hardware double precision
-representation for numbers. On most systems, this is IEEE-754
-floating-point format, corresponding to 64-bit binary with 53 bits
-of precision.
-
-@quotation NOTE
-In case an underflow occurs, the standard allows, but does not require,
-the result from an arithmetic operation to be a number smaller than
-the smallest nonzero normalized number. Such numbers do
-not have as many significant digits as normal numbers, and are called
-@dfn{denormals} or @dfn{subnormals}. The alternative, simply returning a zero,
-is called @dfn{flush to zero}. The basic IEEE-754 binary formats
-support subnormal numbers.
-@end quotation
-
-@node Rounding Mode
-@subsection Floating-point Rounding Mode
-@cindex rounding mode, floating-point
-
-The @dfn{rounding mode} specifies the behavior for the results of numerical
-operations when discarding extra precision. Each rounding mode indicates
-how the least significant returned digit of a rounded result is to
-be calculated.
-@ref{table-rounding-modes} lists the IEEE-754 defined
-rounding modes:
-
-@float Table,table-rounding-modes
-@caption{IEEE 754 Rounding Modes}
-@multitable @columnfractions .45 .55
-@headitem Rounding Mode @tab IEEE Name
-@item Round to nearest, ties to even @tab @code{roundTiesToEven}
-@item Round toward plus Infinity @tab @code{roundTowardPositive}
-@item Round toward negative Infinity @tab @code{roundTowardNegative}
-@item Round toward zero @tab @code{roundTowardZero}
-@item Round to nearest, ties away from zero @tab @code{roundTiesToAway}
-@end multitable
-@end float
-
-The default mode @code{roundTiesToEven} is the most preferred,
-but the least intuitive. This method does the obvious thing for most values,
-by rounding them up or down to the nearest digit.
-For example, rounding 1.132 to two digits yields 1.13,
-and rounding 1.157 yields 1.16.
-
-However, when it comes to rounding a value that is exactly halfway between,
-things do not work the way you probably learned in school.
-In this case, the number is rounded to the nearest even digit.
-So rounding 0.125 to two digits rounds down to 0.12,
-but rounding 0.6875 to three digits rounds up to 0.688.
-You probably have already encountered this rounding mode when
-using @code{printf} to format floating-point numbers.
-For example:
-
-@example
-BEGIN @{
- x = -4.5
- for (i = 1; i < 10; i++) @{
- x += 1.0
- printf("%4.1f => %2.0f\n", x, x)
- @}
-@}
-@end example
-
-@noindent
-produces the following output when run on the author's system:@footnote{It
-is possible for the output to be completely different if the
-C library in your system does not use the IEEE-754 even-rounding
-rule to round halfway cases for @code{printf}.}
-
-@example
--3.5 => -4
--2.5 => -2
--1.5 => -2
--0.5 => 0
- 0.5 => 0
- 1.5 => 2
- 2.5 => 2
- 3.5 => 4
- 4.5 => 4
-@end example
-
-The theory behind the rounding mode @code{roundTiesToEven} is that
-it more or less evenly distributes upward and downward rounds
-of exact halves, which might cause any round-off error
-to cancel itself out. This is the default rounding mode used
-in IEEE-754 computing functions and operators.
-
-The other rounding modes are rarely used.
-Round toward positive infinity (@code{roundTowardPositive})
-and round toward negative infinity (@code{roundTowardNegative})
-are often used to implement interval arithmetic,
-where you adjust the rounding mode to calculate upper and lower bounds
-for the range of output. The @code{roundTowardZero}
-mode can be used for converting floating-point numbers to integers.
-The rounding mode @code{roundTiesToAway} rounds the result to the
-nearest number and selects the number with the larger magnitude
-if a tie occurs.
-
-Some numerical analysts will tell you that your choice of rounding style
-has tremendous impact on the final outcome, and advise you to wait until
-final output for any rounding. Instead, you can often avoid round-off error problems by
-setting the precision initially to some value sufficiently larger than
-the final desired precision, so that the accumulation of round-off error
-does not influence the outcome.
-If you suspect that results from your computation are
-sensitive to accumulation of round-off error,
-one way to be sure is to look for a significant difference in output
-when you change the rounding mode.
-
-@node Gawk and MPFR
-@section @command{gawk} + MPFR = Powerful Arithmetic
-@cindex MPFR
-@cindex GMP
-
-The rest of this @value{CHAPTER} describes how to use the arbitrary precision
-(also known as @dfn{multiple precision} or @dfn{infinite precision}) numeric
-capabilities in @command{gawk} to produce maximally accurate results
-when you need it.
-
-But first you should check if your version of
-@command{gawk} supports arbitrary precision arithmetic.
-The easiest way to find out is to look at the output of
-the following command:
-
-@example
-$ @kbd{./gawk --version}
-@print{} GNU Awk 4.1.1, API: 1.1 (GNU MPFR 3.1.0-p3, GNU MP 5.0.2)
-@print{} Copyright (C) 1989, 1991-2014 Free Software Foundation.
-@dots{}
-@end example
-
-@noindent
-(You may see different version numbers than what's shown here. That's OK;
-what's important is to see that GNU MPFR and GNU MP are listed in
-the output.)
-
-@command{gawk} uses the
-@uref{http://www.mpfr.org, GNU MPFR}
-and
-@uref{http://gmplib.org, GNU MP} (GMP)
-libraries for arbitrary precision
-arithmetic on numbers. So if you do not see the names of these libraries
-in the output, then your version of @command{gawk} does not support
-arbitrary precision arithmetic.
-
-Additionally,
-there are a few elements available in the @code{PROCINFO} array
-to provide information about the MPFR and GMP libraries.
-@xref{Auto-set}, for more information.
-
-@ignore
-Even if you aren't interested in arbitrary precision arithmetic, you
-may still benefit from knowing about how @command{gawk} handles numbers
-in general, and the limitations of doing arithmetic with ordinary
-@command{gawk} numbers.
-@end ignore
-
-
-@node Arbitrary Precision Floats
-@section Arbitrary Precision Floating-point Arithmetic with @command{gawk}
-
-@command{gawk} uses the GNU MPFR library
-for arbitrary precision floating-point arithmetic. The MPFR library
-provides precise control over precisions and rounding modes, and gives
-correctly rounded, reproducible, platform-independent results. With one
-of the command-line options @option{--bignum} or @option{-M},
-all floating-point arithmetic operators and numeric functions can yield
-results to any desired precision level supported by MPFR.
-Two built-in variables, @code{PREC} and @code{ROUNDMODE},
-provide control over the working precision and the rounding mode
-(@pxref{Setting Precision}, and
-@pxref{Setting Rounding Mode}).
-The precision and the rounding mode are set globally for every operation
-to follow.
-
-The default working precision for arbitrary precision floating-point values is
-53 bits, and the default value for @code{ROUNDMODE} is @code{"N"},
-which selects the IEEE-754 @code{roundTiesToEven} rounding mode
-(@pxref{Rounding Mode}).@footnote{The
-default precision is 53 bits, since according to the MPFR documentation,
-the library should be able to exactly reproduce all computations with
-double-precision machine floating-point numbers (@code{double} type
-in C), except the default exponent range is much wider and subnormal
-numbers are not implemented.}
-@command{gawk} uses the default exponent range in MPFR
-@iftex
-(@math{emax = 2^{30} - 1, emin = -emax})
-@end iftex
-@ifnottex
-@ifnotdocbook
-(@var{emax} = 2^30 @minus{} 1, @var{emin} = @minus{}@var{emax})
-@end ifnotdocbook
-@end ifnottex
-@docbook
-(<emphasis>emax</emphasis> = 2<superscript>30</superscript> &minus; 1, <emphasis>emin</emphasis> = &minus;<emphasis>emax</emphasis>) @c
-@end docbook
-for all floating-point contexts.
-There is no explicit mechanism to adjust the exponent range.
-MPFR does not implement subnormal numbers by default,
-and this behavior cannot be changed in @command{gawk}.
-
-@quotation NOTE
-When emulating an IEEE-754 format (@pxref{Setting Precision}),
-@command{gawk} internally adjusts the exponent range
-to the value defined for the format and also performs computations needed for
-gradual underflow (subnormal numbers).
-@end quotation
-
-@quotation NOTE
-MPFR numbers are variable-size entities, consuming only as much space as
-needed to store the significant digits. Since the performance using MPFR
-numbers pales in comparison to doing arithmetic using the underlying machine
-types, you should consider using only as much precision as needed by
-your program.
-@end quotation
-
-@menu
-* Setting Precision:: Setting the working precision.
-* Setting Rounding Mode:: Setting the rounding mode.
-* Floating-point Constants:: Representing floating-point constants.
-* Changing Precision:: Changing the precision of a number.
-* Exact Arithmetic:: Exact arithmetic with floating-point numbers.
-@end menu
-
-@node Setting Precision
-@subsection Setting the Working Precision
-@cindex @code{PREC} variable
-@cindex setting working precision
+@node Setting precision
+@subsection Setting The Precision
@command{gawk} uses a global working precision; it does not keep track of
the precision or accuracy of individual numbers. Performing an arithmetic
operation or calling a built-in function rounds the result to the current
-working precision. The default working precision is 53 bits, which can be
-modified using the built-in variable @code{PREC}. You can also set the
-value to one of the pre-defined case-insensitive strings
+working precision. The default working precision is 53 bits, which you can
+modify using the built-in variable @code{PREC}. You can also set the
+value to one of the predefined case-insensitive strings
shown in @ref{table-predefined-precision-strings},
-to emulate an IEEE-754 binary format.
+to emulate an IEEE 754 binary format.
@float Table,table-predefined-precision-strings
-@caption{Predefined precision strings for @code{PREC}}
+@caption{Predefined Precision Strings For @code{PREC}}
@multitable {@code{"double"}} {12345678901234567890123456789012345}
-@headitem @code{PREC} @tab IEEE-754 Binary Format
+@headitem @code{PREC} @tab IEEE 754 Binary Format
@item @code{"half"} @tab 16-bit half-precision.
@item @code{"single"} @tab Basic 32-bit single precision.
@item @code{"double"} @tab Basic 64-bit double precision.
@@ -29394,57 +30505,34 @@ $ @kbd{gawk -M -v PREC=100 'BEGIN @{ x = 1.0e-400; print x + 0}
@print{} 0
@end example
-Binary and decimal precisions are related approximately, according to the
-formula:
+@quotation CAUTION
+Be wary of floating-point constants! When reading a floating-point
+constant from program source code, @command{gawk} uses the default
+precision (that of a C @code{double}), unless overridden by an assignment
+to the special variable @code{PREC} on the command line, to store it
+internally as a MPFR number. Changing the precision using @code{PREC}
+in the program text does @emph{not} change the precision of a constant.
+
+If you need to represent a floating-point constant at a higher precision
+than the default and cannot use a command-line assignment to @code{PREC},
+you should either specify the constant as a string, or as a rational
+number, whenever possible. The following example illustrates the
+differences among various ways to print a floating-point constant:
+@end quotation
-@iftex
-@math{prec = 3.322 @cdot dps}
-@end iftex
-@ifnottex
-@ifnotdocbook
-@var{prec} = 3.322 * @var{dps}
-@end ifnotdocbook
-@end ifnottex
-@docbook
-<para>
-<emphasis>prec</emphasis> = 3.322 &sdot; <emphasis>dps</emphasis> @c
-</para>
-@end docbook
+@example
+$ @kbd{gawk -M 'BEGIN @{ PREC = 113; printf("%0.25f\n", 0.1) @}'}
+@print{} 0.1000000000000000055511151
+$ @kbd{gawk -M -v PREC=113 'BEGIN @{ printf("%0.25f\n", 0.1) @}'}
+@print{} 0.1000000000000000000000000
+$ @kbd{gawk -M 'BEGIN @{ PREC = 113; printf("%0.25f\n", "0.1") @}'}
+@print{} 0.1000000000000000000000000
+$ @kbd{gawk -M 'BEGIN @{ PREC = 113; printf("%0.25f\n", 1/10) @}'}
+@print{} 0.1000000000000000000000000
+@end example
-@noindent
-Here, @var{prec} denotes the binary precision
-(measured in bits) and @var{dps} (short for decimal places)
-is the decimal digits. We can easily calculate how many decimal
-digits the 53-bit significand of an IEEE double is equivalent to:
-53 / 3.322 which is equal to about 15.95.
-But what does 15.95 digits actually mean? It depends whether you are
-concerned about how many digits you can rely on, or how many digits
-you need.
-
-It is important to know how many bits it takes to uniquely identify
-a double-precision value (the C type @code{double}). If you want to
-convert from @code{double} to decimal and back to @code{double} (e.g.,
-saving a @code{double} representing an intermediate result to a file, and
-later reading it back to restart the computation), then a few more decimal
-digits are required. 17 digits is generally enough for a @code{double}.
-
-It can also be important to know what decimal numbers can be uniquely
-represented with a @code{double}. If you want to convert
-from decimal to @code{double} and back again, 15 digits is the most that
-you can get. Stated differently, you should not present
-the numbers from your floating-point computations with more than 15
-significant digits in them.
-
-Conversely, it takes a precision of 332 bits to hold an approximation
-of the constant @value{PI} that is accurate to 100 decimal places.
-
-You should always add some extra bits in order to avoid the confusing round-off
-issues that occur because numbers are stored internally in binary.
-
-@node Setting Rounding Mode
-@subsection Setting the Rounding Mode
-@cindex @code{ROUNDMODE} variable
-@cindex setting rounding mode
+@node Setting the rounding mode
+@subsection Setting The Rounding Mode
The @code{ROUNDMODE} variable provides
program level control over the rounding mode.
@@ -29463,184 +30551,91 @@ rounding modes is shown in @ref{table-gawk-rounding-modes}.
@end multitable
@end float
-@code{ROUNDMODE} has the default value @code{"N"},
-which selects the IEEE-754 rounding mode @code{roundTiesToEven}.
-In @ref{table-gawk-rounding-modes}, @code{"A"} is listed to select the IEEE-754 mode
-@code{roundTiesToAway}. This is only available
-if your version of the MPFR library supports it; otherwise setting
-@code{ROUNDMODE} to this value has no effect. @xref{Rounding Mode},
-for the meanings of the various rounding modes.
-
-Here is an example of how to change the default rounding behavior of
-@code{printf}'s output:
-
-@example
-$ @kbd{gawk -M -v ROUNDMODE="Z" 'BEGIN @{ printf("%.2f\n", 1.378) @}'}
-@print{} 1.37
-@end example
-
-@node Floating-point Constants
-@subsection Representing Floating-point Constants
-@cindex constants, floating-point
-
-Be wary of floating-point constants! When reading a floating-point constant
-from program source code, @command{gawk} uses the default precision,
-unless overridden
-by an assignment to the special variable @code{PREC} on the command
-line, to store it internally as a MPFR number.
-Changing the precision using @code{PREC} in the program text does
-@emph{not} change the precision of a constant. If you need to
-represent a floating-point constant at a higher precision than the
-default and cannot use a command line assignment to @code{PREC},
-you should either specify the constant as a string, or
-as a rational number, whenever possible. The following example
-illustrates the differences among various ways to
-print a floating-point constant:
+@code{ROUNDMODE} has the default value @code{"N"}, which
+selects the IEEE 754 rounding mode @code{roundTiesToEven}.
+In @ref{table-gawk-rounding-modes}, the value @code{"A"} selects
+@code{roundTiesToAway}. This is only available if your version of the
+MPFR library supports it; otherwise setting @code{ROUNDMODE} to @code{"A"}
+has no effect.
-@example
-$ @kbd{gawk -M 'BEGIN @{ PREC = 113; printf("%0.25f\n", 0.1) @}'}
-@print{} 0.1000000000000000055511151
-$ @kbd{gawk -M -v PREC=113 'BEGIN @{ printf("%0.25f\n", 0.1) @}'}
-@print{} 0.1000000000000000000000000
-$ @kbd{gawk -M 'BEGIN @{ PREC = 113; printf("%0.25f\n", "0.1") @}'}
-@print{} 0.1000000000000000000000000
-$ @kbd{gawk -M 'BEGIN @{ PREC = 113; printf("%0.25f\n", 1/10) @}'}
-@print{} 0.1000000000000000000000000
-@end example
-
-In the first case, the number is stored with the default precision of 53 bits.
-
-@node Changing Precision
-@subsection Changing the Precision of a Number
-@cindex changing precision of a number
-
-@cindex Laurie, Dirk
-@quotation
-@i{The point is that in any variable-precision package,
-a decision is made on how to treat numbers given as data,
-or arising in intermediate results, which are represented in
-floating-point format to a precision lower than working precision.
-Do we promote them to full membership of the high-precision club,
-or do we treat them and all their associates as second-class citizens?
-Sometimes the first course is proper, sometimes the second, and it takes
-careful analysis to tell which.}@footnote{Dirk Laurie.
-@cite{Variable-precision Arithmetic Considered Perilous --- A Detective Story}.
-Electronic Transactions on Numerical Analysis. Volume 28, pp. 168-173, 2008.}
-@author Dirk Laurie
-@end quotation
+The default mode @code{roundTiesToEven} is the most preferred,
+but the least intuitive. This method does the obvious thing for most values,
+by rounding them up or down to the nearest digit.
+For example, rounding 1.132 to two digits yields 1.13,
+and rounding 1.157 yields 1.16.
-@command{gawk} does not implicitly modify the precision of any previously
-computed results when the working precision is changed with an assignment
-to @code{PREC}. The precision of a number is always the one that was
-used at the time of its creation, and there is no way for the user
-to explicitly change it afterwards. However, since the result of a
-floating-point arithmetic operation is always an arbitrary precision
-floating-point value---with a precision set by the value of @code{PREC}---one of the
-following workarounds effectively accomplishes the desired behavior:
+However, when it comes to rounding a value that is exactly halfway between,
+things do not work the way you probably learned in school.
+In this case, the number is rounded to the nearest even digit.
+So rounding 0.125 to two digits rounds down to 0.12,
+but rounding 0.6875 to three digits rounds up to 0.688.
+You probably have already encountered this rounding mode when
+using @code{printf} to format floating-point numbers.
+For example:
@example
-x = x + 0.0
+BEGIN @{
+ x = -4.5
+ for (i = 1; i < 10; i++) @{
+ x += 1.0
+ printf("%4.1f => %2.0f\n", x, x)
+ @}
+@}
@end example
@noindent
-or:
-
-@example
-x += 0.0
-@end example
-
-@node Exact Arithmetic
-@subsection Exact Arithmetic with Floating-point Numbers
-
-@quotation CAUTION
-Never depend on the exactness of floating-point arithmetic,
-even for apparently simple expressions!
-@end quotation
-
-Can arbitrary precision arithmetic give exact results? There are
-no easy answers. The standard rules of algebra often do not apply
-when using floating-point arithmetic.
-Among other things, the distributive and associative laws
-do not hold completely, and order of operation may be important
-for your computation. Rounding error, cumulative precision loss
-and underflow are often troublesome.
-
-When @command{gawk} tests the expressions @samp{0.1 + 12.2} and @samp{12.3}
-for equality
-using the machine double precision arithmetic, it decides that they
-are not equal!
-(@xref{Floating-point Programming}.)
-You can get the result you want by increasing the precision;
-56 bits in this case will get the job done:
-
-@example
-$ @kbd{gawk -M -v PREC=56 'BEGIN @{ print (0.1 + 12.2 == 12.3) @}'}
-@print{} 1
-@end example
-
-If adding more bits is good, perhaps adding even more bits of
-precision is better?
-Here is what happens if we use an even larger value of @code{PREC}:
-
-@example
-$ @kbd{gawk -M -v PREC=201 'BEGIN @{ print (0.1 + 12.2 == 12.3) @}'}
-@print{} 0
-@end example
-
-This is not a bug in @command{gawk} or in the MPFR library.
-It is easy to forget that the finite number of bits used to store the value
-is often just an approximation after proper rounding.
-The test for equality succeeds if and only if @emph{all} bits in the two operands
-are exactly the same. Since this is not necessarily true after floating-point
-computations with a particular precision and effective rounding rule,
-a straight test for equality may not work.
-
-So, don't assume that floating-point values can be compared for equality.
-You should also exercise caution when using other forms of comparisons.
-The standard way to compare between floating-point numbers is to determine
-how much error (or @dfn{tolerance}) you will allow in a comparison and
-check to see if one value is within this error range of the other.
-
-In applications where 15 or fewer decimal places suffice,
-hardware double precision arithmetic can be adequate, and is usually much faster.
-But you do need to keep in mind that every floating-point operation
-can suffer a new rounding error with catastrophic consequences as illustrated
-by our earlier attempt to compute the value of the constant @value{PI}
-(@pxref{Floating-point Programming}).
-Extra precision can greatly enhance the stability and the accuracy
-of your computation in such cases.
-
-Repeated addition is not necessarily equivalent to multiplication
-in floating-point arithmetic. In the example in
-@ref{Floating-point Programming}:
+produces the following output when run on the author's system:@footnote{It
+is possible for the output to be completely different if the
+C library in your system does not use the IEEE 754 even-rounding
+rule to round halfway cases for @code{printf}.}
@example
-$ @kbd{gawk 'BEGIN @{}
-> @kbd{for (d = 1.1; d <= 1.5; d += 0.1) # loop five times (?)}
-> @kbd{i++}
-> @kbd{print i}
-> @kbd{@}'}
-@print{} 4
+-3.5 => -4
+-2.5 => -2
+-1.5 => -2
+-0.5 => 0
+ 0.5 => 0
+ 1.5 => 2
+ 2.5 => 2
+ 3.5 => 4
+ 4.5 => 4
@end example
-@noindent
-you may or may not succeed in getting the correct result by choosing
-an arbitrarily large value for @code{PREC}. Reformulation of
-the problem at hand is often the correct approach in such situations.
+The theory behind @code{roundTiesToEven} is that it more or less evenly
+distributes upward and downward rounds of exact halves, which might
+cause any accumulating round-off error to cancel itself out. This is the
+default rounding mode for IEEE 754 computing functions and operators.
+
+The other rounding modes are rarely used. Round toward positive infinity
+(@code{roundTowardPositive}) and round toward negative infinity
+(@code{roundTowardNegative}) are often used to implement interval
+arithmetic, where you adjust the rounding mode to calculate upper and
+lower bounds for the range of output. The @code{roundTowardZero} mode can
+be used for converting floating-point numbers to integers. The rounding
+mode @code{roundTiesToAway} rounds the result to the nearest number and
+selects the number with the larger magnitude if a tie occurs.
+
+Some numerical analysts will tell you that your choice of rounding
+style has tremendous impact on the final outcome, and advise you to
+wait until final output for any rounding. Instead, you can often avoid
+round-off error problems by setting the precision initially to some
+value sufficiently larger than the final desired precision, so that
+the accumulation of round-off error does not influence the outcome.
+If you suspect that results from your computation are sensitive to
+accumulation of round-off error, look for a significant difference in
+output when you change the rounding mode to be sure.
@node Arbitrary Precision Integers
@section Arbitrary Precision Integer Arithmetic with @command{gawk}
@cindex integers, arbitrary precision
@cindex arbitrary precision integers
-If one of the options @option{--bignum} or @option{-M} is specified,
-@command{gawk} performs all
-integer arithmetic using GMP arbitrary precision integers.
-Any number that looks like an integer in a program source or data file
-is stored as an arbitrary precision integer.
-The size of the integer is limited only by your computer's memory.
-The current floating-point context has no effect on operations involving integers.
-For example, the following computes
+When given the @option{-M} option,
+@command{gawk} performs all integer arithmetic using GMP arbitrary
+precision integers. Any number that looks like an integer in a source
+or @value{DF} is stored as an arbitrary precision integer. The size
+of the integer is limited only by the available memory. For example,
+the following computes
@iftex
@math{5^{4^{3^{2}}}},
@end iftex
@@ -29653,7 +30648,7 @@ For example, the following computes
5<superscript>4<superscript>3<superscript>2</superscript></superscript></superscript>, @c
@end docbook
the result of which is beyond the
-limits of ordinary @command{gawk} numbers:
+limits of ordinary hardware double-precision floating point values:
@example
$ @kbd{gawk -M 'BEGIN @{}
@@ -29665,9 +30660,9 @@ $ @kbd{gawk -M 'BEGIN @{}
@print{} 62060698786608744707 ... 92256259918212890625
@end example
-If you were to compute the same value using arbitrary precision
-floating-point values instead, the precision needed for correct output
-(using the formula
+If instead you were to compute the same value using arbitrary precision
+floating-point values, the precision needed for correct output (using
+the formula
@iftex
@math{prec = 3.322 @cdot dps}),
would be @math{3.322 @cdot 183231},
@@ -29689,8 +30684,8 @@ The result from an arithmetic operation with an integer and a floating-point val
is a floating-point value with a precision equal to the working precision.
The following program calculates the eighth term in
Sylvester's sequence@footnote{Weisstein, Eric W.
-@cite{Sylvester's Sequence}. From MathWorld---A Wolfram Web Resource.
-@url{http://mathworld.wolfram.com/SylvestersSequence.html}}
+@cite{Sylvester's Sequence}. From MathWorld---A Wolfram Web Resource
+@w{(@url{http://mathworld.wolfram.com/SylvestersSequence.html}).}}
using a recurrence:
@example
@@ -29710,15 +30705,15 @@ floating-point results exactly. You can either increase the precision
@samp{2.0} with an integer, to perform all computations using integer
arithmetic to get the correct output.
-It will sometimes be necessary for @command{gawk} to implicitly convert an
-arbitrary precision integer into an arbitrary precision floating-point value.
-This is primarily because the MPFR library does not always provide the
-relevant interface to process arbitrary precision integers or mixed-mode
-numbers as needed by an operation or function.
-In such a case, the precision is set to the minimum value necessary
-for exact conversion, and the working precision is not used for this purpose.
-If this is not what you need or want, you can employ a subterfuge
-like this:
+Sometimes @command{gawk} must implicitly convert an arbitrary precision
+integer into an arbitrary precision floating-point value. This is
+primarily because the MPFR library does not always provide the relevant
+interface to process arbitrary precision integers or mixed-mode numbers
+as needed by an operation or function. In such a case, the precision is
+set to the minimum value necessary for exact conversion, and the working
+precision is not used for this purpose. If this is not what you need or
+want, you can employ a subterfuge, and convert the integer to floating
+point first, like this:
@example
gawk -M 'BEGIN @{ n = 13; print (n + 0.0) % 2.0 @}'
@@ -29738,6 +30733,215 @@ to just use the following:
gawk -M 'BEGIN @{ n = 13; print n % 2 @}'
@end example
+When dividing two arbitrary precision integers with either
+@samp{/} or @samp{%}, the result is typically an arbitrary
+precision floating point value (unless the denominator evenly
+divides into the numerator). In order to do integer division
+or remainder with arbitrary precision integers, use the built-in
+@code{div()} function (@pxref{Numeric Functions}).
+
+You can simulate the @code{div()} function in standard @command{awk}
+using this user-defined function:
+
+@example
+@c file eg/lib/div.awk
+# div --- do integer division
+
+@c endfile
+@ignore
+@c file eg/lib/div.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# July, 2014
+
+@c endfile
+
+@end ignore
+@c file eg/lib/div.awk
+function div(numerator, denominator, result, i)
+@{
+ split("", result)
+
+ numerator = int(numerator)
+ denominator = int(denominator)
+ result["quotient"] = int(numerator / denominator)
+ result["remainder"] = int(numerator % denominator)
+
+ return 0.0
+@}
+@c endfile
+@end example
+
+@node POSIX Floating Point Problems
+@section Standards Versus Existing Practice
+
+Historically, @command{awk} has converted any non-numeric looking string
+to the numeric value zero, when required. Furthermore, the original
+definition of the language and the original POSIX standards specified that
+@command{awk} only understands decimal numbers (base 10), and not octal
+(base 8) or hexadecimal numbers (base 16).
+
+Changes in the language of the
+2001 and 2004 POSIX standards can be interpreted to imply that @command{awk}
+should support additional features. These features are:
+
+@itemize @value{BULLET}
+@item
+Interpretation of floating point data values specified in hexadecimal
+notation (e.g., @code{0xDEADBEEF}). (Note: data values, @emph{not}
+source code constants.)
+
+@item
+Support for the special IEEE 754 floating point values ``Not A Number''
+(NaN), positive Infinity (``inf'') and negative Infinity (``@minus{}inf'').
+In particular, the format for these values is as specified by the ISO 1999
+C standard, which ignores case and can allow implementation-dependent additional
+characters after the @samp{nan} and allow either @samp{inf} or @samp{infinity}.
+@end itemize
+
+The first problem is that both of these are clear changes to historical
+practice:
+
+@itemize @value{BULLET}
+@item
+The @command{gawk} maintainer feels that supporting hexadecimal floating
+point values, in particular, is ugly, and was never intended by the
+original designers to be part of the language.
+
+@item
+Allowing completely alphabetic strings to have valid numeric
+values is also a very severe departure from historical practice.
+@end itemize
+
+The second problem is that the @code{gawk} maintainer feels that this
+interpretation of the standard, which requires a certain amount of
+``language lawyering'' to arrive at in the first place, was not even
+intended by the standard developers. In other words, ``we see how you
+got where you are, but we don't think that that's where you want to be.''
+
+Recognizing the above issues, but attempting to provide compatibility
+with the earlier versions of the standard,
+the 2008 POSIX standard added explicit wording to allow, but not require,
+that @command{awk} support hexadecimal floating point values and
+special values for ``Not A Number'' and infinity.
+
+Although the @command{gawk} maintainer continues to feel that
+providing those features is inadvisable,
+nevertheless, on systems that support IEEE floating point, it seems
+reasonable to provide @emph{some} way to support NaN and Infinity values.
+The solution implemented in @command{gawk} is as follows:
+
+@itemize @value{BULLET}
+@item
+With the @option{--posix} command-line option, @command{gawk} becomes
+``hands off.'' String values are passed directly to the system library's
+@code{strtod()} function, and if it successfully returns a numeric value,
+that is what's used.@footnote{You asked for it, you got it.}
+By definition, the results are not portable across
+different systems. They are also a little surprising:
+
+@example
+$ @kbd{echo nanny | gawk --posix '@{ print $1 + 0 @}'}
+@print{} nan
+$ @kbd{echo 0xDeadBeef | gawk --posix '@{ print $1 + 0 @}'}
+@print{} 3735928559
+@end example
+
+@item
+Without @option{--posix}, @command{gawk} interprets the four strings
+@samp{+inf},
+@samp{-inf},
+@samp{+nan},
+and
+@samp{-nan}
+specially, producing the corresponding special numeric values.
+The leading sign acts a signal to @command{gawk} (and the user)
+that the value is really numeric. Hexadecimal floating point is
+not supported (unless you also use @option{--non-decimal-data},
+which is @emph{not} recommended). For example:
+
+@example
+$ @kbd{echo nanny | gawk '@{ print $1 + 0 @}'}
+@print{} 0
+$ @kbd{echo +nan | gawk '@{ print $1 + 0 @}'}
+@print{} nan
+$ @kbd{echo 0xDeadBeef | gawk '@{ print $1 + 0 @}'}
+@print{} 0
+@end example
+
+@command{gawk} ignores case in the four special values.
+Thus @samp{+nan} and @samp{+NaN} are the same.
+@end itemize
+
+@node Floating point summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+Most computer arithmetic is done using either integers or floating-point
+values. The default for @command{awk} is to use double-precision
+floating-point values.
+
+@item
+In the early 1990's, Barbie mistakenly said ``Math class is tough!''
+While math isn't tough, floating-point arithmetic isn't the same
+as pencil and paper math, and care must be taken:
+
+@c nested list
+@itemize @value{MINUS}
+@item
+Not all numbers can be represented exactly.
+
+@item
+Comparing values should use a delta, instead of being done directly
+with @samp{==} and @samp{!=}.
+
+@item
+Errors accumulate.
+
+@item
+Operations are not always truly associative or distributive.
+@end itemize
+
+@item
+Increasing the accuracy can help, but it is not a panacea.
+
+@item
+Often, increasing the accuracy and then rounding to the desired
+number of digits produces reasonable results.
+
+@item
+Use @option{-M} (or @option{--bignum}) to enable MPFR
+arithmetic. Use @code{PREC} to set the precision in bits, and
+@code{ROUNDMODE} to set the IEEE 754 rounding mode.
+
+@item
+With @option{-M}, @command{gawk} performs
+arbitrary precision integer arithmetic using the GMP library.
+This is faster and more space efficient than using MPFR for
+the same calculations.
+
+@item
+There are several ``dark corners'' with respect to floating-point
+numbers where @command{gawk} disagrees with the POSIX standard.
+It pays to be aware of them.
+
+@item
+Overall, there is no need to be unduly suspicious about the results from
+floating-point arithmetic. The lesson to remember is that floating-point
+arithmetic is always more complex than arithmetic using pencil and
+paper. In order to take advantage of the power of computer floating-point,
+you need to know its limitations and work within them. For most casual
+use of floating-point arithmetic, you will often get the expected result
+if you simply round the display of your final results to the correct number
+of significant decimal digits.
+
+@item
+As general advice, avoid presenting numerical data in a manner that
+implies better precision than is actually the case.
+
+@end itemize
+
@node Dynamic Extensions
@chapter Writing Extensions for @command{gawk}
@cindex dynamically loaded extensions
@@ -29770,6 +30974,8 @@ When @option{--sandbox} is specified, extensions are disabled
* Extension Samples:: The sample extensions that ship with
@code{gawk}.
* gawkextlib:: The @code{gawkextlib} project.
+* Extension summary:: Extension summary.
+* Extension Exercises:: Exercises.
@end menu
@node Extension Intro
@@ -29795,8 +31001,15 @@ the facilities that the API provides and how to use
them, and presents a small sample extension. In addition, it documents
the sample extensions included in the @command{gawk} distribution,
and describes the @code{gawkextlib} project.
+@ifclear FOR_PRINT
@xref{Extension Design}, for a discussion of the extension mechanism
goals and design.
+@end ifclear
+@ifset FOR_PRINT
+See @uref{http://www.gnu.org/software/gawk/manual/html_node/Extension-Design.html}
+for a discussion of the extension mechanism
+goals and design.
+@end ifset
@node Plugin License
@section Extension Licensing
@@ -29822,31 +31035,33 @@ Communication between
is loaded, it is passed a pointer to a @code{struct} whose fields are
function pointers.
@ifnotdocbook
-This is shown in @ref{load-extension}.
+This is shown in @ref{figure-load-extension}.
@end ifnotdocbook
@ifdocbook
-This is shown in @inlineraw{docbook, <xref linkend="load-extension"/>}.
+This is shown in @inlineraw{docbook, <xref linkend="figure-load-extension"/>}.
@end ifdocbook
@ifnotdocbook
-@float Figure,load-extension
+@float Figure,figure-load-extension
@caption{Loading The Extension}
@c FIXME: One day, it should not be necessary to have two cases,
@c but rather just the one without the "txt" final argument.
@c This applies to the other figures as well.
@ifinfo
-@center @image{api-figure1, , , Loading the extension, txt}
+@center @image{api-figure1, , , Loading The Extension, txt}
@end ifinfo
@ifnotinfo
-@center @image{api-figure1, , , Loading the extension}
+@center @image{api-figure1, , , Loading The Extension}
@end ifnotinfo
@end float
@end ifnotdocbook
@docbook
-<figure id="load-extension">
-<title>Loading the extension</title>
-<graphic fileref="api-figure1.eps"/>
+<figure id="figure-load-extension" float="0">
+<title>Loading The Extension</title>
+<mediaobject>
+<imageobject role="web"><imagedata fileref="api-figure1.png" format="PNG"/></imageobject>
+</mediaobject>
</figure>
@end docbook
@@ -29855,28 +31070,30 @@ function pointers, at runtime, without needing (link-time) access
to @command{gawk}'s symbols. One of these function pointers is to a
function for ``registering'' new built-in functions.
@ifnotdocbook
-This is shown in @ref{load-new-function}.
+This is shown in @ref{figure-load-new-function}.
@end ifnotdocbook
@ifdocbook
-This is shown in @inlineraw{docbook, <xref linkend="load-new-function"/>}.
+This is shown in @inlineraw{docbook, <xref linkend="figure-load-new-function"/>}.
@end ifdocbook
@ifnotdocbook
-@float Figure,load-new-function
+@float Figure,figure-load-new-function
@caption{Loading The New Function}
@ifinfo
-@center @image{api-figure2, , , Loading the new function, txt}
+@center @image{api-figure2, , , Loading The New Function, txt}
@end ifinfo
@ifnotinfo
-@center @image{api-figure2, , , Loading the new function}
+@center @image{api-figure2, , , Loading The New Function}
@end ifnotinfo
@end float
@end ifnotdocbook
@docbook
-<figure id="load-new-function">
-<title>Loading the new function</title>
-<graphic fileref="api-figure2.eps"/>
+<figure id="figure-load-new-function" float="0">
+<title>Loading The New Function</title>
+<mediaobject>
+<imageobject role="web"><imagedata fileref="api-figure2.png" format="PNG"/></imageobject>
+</mediaobject>
</figure>
@end docbook
@@ -29886,14 +31103,14 @@ provide the new feature (@code{do_chdir()}, for example). @command{gawk}
associates the function pointer with a name and can then call it, using a
defined calling convention.
@ifnotdocbook
-This is shown in @ref{call-new-function}.
+This is shown in @ref{figure-call-new-function}.
@end ifnotdocbook
@ifdocbook
-This is shown in @inlineraw{docbook, <xref linkend="call-new-function"/>}.
+This is shown in @inlineraw{docbook, <xref linkend="figure-call-new-function"/>}.
@end ifdocbook
@ifnotdocbook
-@float Figure,call-new-function
+@float Figure,figure-call-new-function
@caption{Calling The New Function}
@ifinfo
@center @image{api-figure3, , , Calling the new function, txt}
@@ -29905,9 +31122,11 @@ This is shown in @inlineraw{docbook, <xref linkend="call-new-function"/>}.
@end ifnotdocbook
@docbook
-<figure id="call-new-function">
+<figure id="figure-call-new-function" float="0">
<title>Calling The New Function</title>
-<graphic fileref="api-figure3.eps"/>
+<mediaobject>
+<imageobject role="web"><imagedata fileref="api-figure3.png" format="PNG"/></imageobject>
+</mediaobject>
</figure>
@end docbook
@@ -29915,9 +31134,9 @@ The @code{do_@var{xxx}()} function, in turn, then uses the function
pointers in the API @code{struct} to do its work, such as updating
variables or arrays, printing messages, setting @code{ERRNO}, and so on.
-Convenience macros in the @file{gawkapi.h} header file make calling
-through the function pointers look like regular function calls so that
-extension code is quite readable and understandable.
+Convenience macros make calling through the function pointers look
+like regular function calls so that extension code is quite readable
+and understandable.
Although all of this sounds somewhat complicated, the result is that
extension code is quite straightforward to write and to read. You can
@@ -29926,10 +31145,10 @@ Example}) and also the @file{testext.c} code for testing the APIs.
Some other bits and pieces:
-@itemize @bullet
+@itemize @value{BULLET}
@item
The API provides access to @command{gawk}'s @code{do_@var{xxx}} values,
-reflecting command line options, like @code{do_lint}, @code{do_profiling}
+reflecting command-line options, like @code{do_lint}, @code{do_profiling}
and so on (@pxref{Extension API Variables}).
These are informational: an extension cannot affect their values
inside @command{gawk}. In addition, attempting to assign to them
@@ -29948,6 +31167,9 @@ happen, but we all know how @emph{that} goes.)
@section API Description
@cindex extension API
+C or C++ code for an extension must include the header file
+@file{gawkapi.h}, which declares the functions and defines the data
+types used to communicate with @command{gawk}.
This (rather large) @value{SECTION} describes the API in detail.
@menu
@@ -29976,10 +31198,10 @@ by calling through function pointers passed into your extension.
API function pointers are provided for the following kinds of operations:
-@itemize @bullet
+@itemize @value{BULLET}
@item
-Registrations functions. You may register:
-@itemize @minus
+Registration functions. You may register:
+@itemize @value{MINUS}
@item
extension functions,
@item
@@ -30020,7 +31242,7 @@ can be a big performance win.
@item
Manipulating arrays:
-@itemize @minus
+@itemize @value{MINUS}
@item
Retrieving, adding, deleting, and modifying elements
@@ -30040,7 +31262,7 @@ Flattening an array for easy C style looping over all its indices and elements
Some points about using the API:
-@itemize @bullet
+@itemize @value{BULLET}
@item
The following types and/or macros and/or functions are referenced
in @file{gawkapi.h}. For correct use, you must therefore include the
@@ -30049,6 +31271,7 @@ corresponding standard header file @emph{before} including @file{gawkapi.h}:
@multitable {@code{memset()}, @code{memcpy()}} {@code{<sys/types.h>}}
@headitem C Entity @tab Header File
@item @code{EOF} @tab @code{<stdio.h>}
+@item Values for @code{errno} @tab @code{<errno.h>}
@item @code{FILE} @tab @code{<stdio.h>}
@item @code{NULL} @tab @code{<stddef.h>}
@item @code{memcpy()} @tab @code{<string.h>}
@@ -30064,9 +31287,6 @@ is necessary in order to keep @file{gawkapi.h} clean, instead of becoming
a portability hodge-podge as can be seen in some parts of
the @command{gawk} source code.
-To pass reasonable integer values for @code{ERRNO}, you will also need to
-include @code{<errno.h>}.
-
@item
The @file{gawkapi.h} file may be included more than once without ill effect.
Doing so, however, is poor coding practice.
@@ -30079,7 +31299,7 @@ does not support this keyword, you should either place
@file{config.h} file in your extensions.
@item
-All pointers filled in by @command{gawk} are to memory
+All pointers filled in by @command{gawk} point to memory
managed by @command{gawk} and should be treated by the extension as
read-only. Memory for @emph{all} strings passed into @command{gawk}
from the extension @emph{must} come from calling the API-provided function
@@ -30090,7 +31310,7 @@ and is managed by @command{gawk} from then on.
The API defines several simple @code{struct}s that map values as seen
from @command{awk}. A value can be a @code{double}, a string, or an
array (as in multidimensional arrays, or when creating a new array).
-String values maintain both pointer and length since embedded @code{NUL}
+String values maintain both pointer and length since embedded @value{NUL}
characters are allowed.
@quotation NOTE
@@ -30222,7 +31442,7 @@ Scalar values in @command{awk} are either numbers or strings. The
indicates what is in the @code{union}.
Representing numbers is easy---the API uses a C @code{double}. Strings
-require more work. Since @command{gawk} allows embedded @code{NUL} bytes
+require more work. Since @command{gawk} allows embedded @value{NUL} bytes
in string values, a string must be represented as a pair containing a
data-pointer and length. This is the @code{awk_string_t} type.
@@ -30252,8 +31472,11 @@ reading and/or changing the value of one or more scalar variables, you
can obtain a @dfn{scalar cookie}@footnote{See
@uref{http://catb.org/jargon/html/C/cookie.html, the ``cookie'' entry in the Jargon file} for a
definition of @dfn{cookie}, and @uref{http://catb.org/jargon/html/M/magic-cookie.html,
-the ``magic cookie'' entry in the Jargon file} for a nice example. See
-also the entry for ``Cookie'' in the @ref{Glossary}.}
+the ``magic cookie'' entry in the Jargon file} for a nice example.
+@ifclear FOR_PRINT
+See also the entry for ``Cookie'' in the @ref{Glossary}.
+@end ifclear
+}
object for that variable, and then use
the cookie for getting the variable's value or for changing the variable's
value.
@@ -30284,9 +31507,9 @@ value type, as appropriate. This behavior is summarized in
@ref{table-value-types-returned}.
@c FIXME: Try to do this with spans...
-@ifdocbook
-@anchor{table-value-types-returned}
-@end ifdocbook
+
+@float Table,table-value-types-returned
+@caption{API Value Types Returned}
@docbook
<informaltable>
<tgroup cols="2">
@@ -30371,8 +31594,6 @@ value type, as appropriate. This behavior is summarized in
@ifnotplaintext
@ifnotdocbook
-@float Table,table-value-types-returned
-@caption{Value Types Returned}
@multitable @columnfractions .50 .50
@headitem @tab Type of Actual Value:
@end multitable
@@ -30385,12 +31606,9 @@ value type, as appropriate. This behavior is summarized in
@item @tab @b{Undefined} @tab String @tab Number @tab Array @tab Undefined
@item @tab @b{Value Cookie} @tab false @tab false @tab false @tab false
@end multitable
-@end float
@end ifnotdocbook
@end ifnotplaintext
@ifplaintext
-@float Table,table-value-types-returned
-@caption{Value Types Returned}
@example
+-------------------------------------------------+
| Type of Actual Value: |
@@ -30414,8 +31632,8 @@ value type, as appropriate. This behavior is summarized in
| | Cookie | | | | |
+-----------+-----------+------------+------------+-----------+-----------+
@end example
-@end float
@end ifplaintext
+@end float
@node Memory Allocation Functions
@subsection Memory Allocation Functions and Convenience Macros
@@ -30462,6 +31680,7 @@ procedure calls that do not return a value.
@table @code
@item #define emalloc(pointer, type, size, message) @dots{}
The arguments to this macro are as follows:
+
@c nested table
@table @code
@item pointer
@@ -30613,9 +31832,9 @@ empty string (@code{""}). The @code{func} pointer is the address of a
An @dfn{exit callback} function is a function that
@command{gawk} calls before it exits.
-Such functions are useful if you have general ``clean up'' tasks
-that should be performed in your extension (such as closing data
-base connections or other resource deallocations).
+Such functions are useful if you have general ``cleanup'' tasks
+that should be performed in your extension (such as closing database
+connections or other resource deallocations).
You can register such
a function with @command{gawk} using the following function.
@@ -30623,6 +31842,7 @@ a function with @command{gawk} using the following function.
@item void awk_atexit(void (*funcp)(void *data, int exit_status),
@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ void *arg0);
The parameters are:
+
@c nested table
@table @code
@item funcp
@@ -30716,8 +31936,9 @@ A pointer to your @code{@var{XXX}_can_take_file()} function.
A pointer to your @code{@var{XXX}_take_control_of()} function.
@item awk_const struct input_parser *awk_const next;
-This pointer is used by @command{gawk}.
-The extension cannot modify it.
+This is for use by @command{gawk};
+therefore it is marked @code{awk_const} so that the extension cannot
+modify it.
@end table
The steps are as follows:
@@ -30764,7 +31985,7 @@ open the file, then @code{fd} will @emph{not} be equal to
@code{INVALID_HANDLE}. Otherwise, it will.
@item struct stat sbuf;
-If file descriptor is valid, then @command{gawk} will have filled
+If the file descriptor is valid, then @command{gawk} will have filled
in this structure via a call to the @code{fstat()} system call.
@end table
@@ -30857,8 +32078,8 @@ need to test for a @code{NULL} value. @command{gawk} sets @code{*errcode}
to zero, so there is no need to set it unless an error occurs.
If an error does occur, the function should return @code{EOF} and set
-@code{*errcode} to a non-zero value. In that case, if @code{*errcode}
-does not equal @minus{}1, @command{gawk} automatically updates
+@code{*errcode} to a value greater than zero. In that case, if @code{*errcode}
+does not equal zero, @command{gawk} automatically updates
the @code{ERRNO} variable based on the value of @code{*errcode}.
(In general, setting @samp{*errcode = errno} should do the right thing.)
@@ -30945,8 +32166,8 @@ as described below, and return true if successful, false otherwise.
@item awk_const struct output_wrapper *awk_const next;
This is for use by @command{gawk};
-therefore they are marked @code{awk_const} so that the extension cannot
-modify them.
+therefore it is marked @code{awk_const} so that the extension cannot
+modify it.
@end table
The @code{awk_output_buf_t} structure looks like this:
@@ -31008,7 +32229,7 @@ The @code{@var{XXX}_can_take_file()} function should make a decision based
upon the @code{name} and @code{mode} fields, and any additional state
(such as @command{awk} variable values) that is appropriate.
-When @command{gawk} calls @code{@var{XXX}_take_control_of()}, it should fill
+When @command{gawk} calls @code{@var{XXX}_take_control_of()}, that function should fill
in the other fields, as appropriate, except for @code{fp}, which it should just
use normally.
@@ -31049,7 +32270,7 @@ The fields are as follows:
The name of the two-way processor.
@item awk_bool_t (*can_take_two_way)(const char *name);
-This function returns true if it wants to take over two-way I/O for this filename.
+This function returns true if it wants to take over two-way I/O for this @value{FN}.
It should not change any state (variable
values, etc.) within @command{gawk}.
@@ -31062,8 +32283,8 @@ This function should fill in the @code{awk_input_buf_t} and
@item awk_const struct two_way_processor *awk_const next;
This is for use by @command{gawk};
-therefore they are marked @code{awk_const} so that the extension cannot
-modify them.
+therefore it is marked @code{awk_const} so that the extension cannot
+modify it.
@end table
As with the input parser and output processor, you provide
@@ -31229,7 +32450,7 @@ Return false if the value cannot be retrieved.
@item awk_bool_t sym_update_scalar(awk_scalar_t cookie, awk_value_t *value);
Update the value associated with a scalar cookie. Return false if
-the new value is not one of @code{AWK_STRING} or @code{AWK_NUMBER}.
+the new value is not of type @code{AWK_STRING} or @code{AWK_NUMBER}.
Here too, the built-in variables may not be updated.
@end table
@@ -31347,7 +32568,7 @@ is what the routines in this section let you do. The functions are as follows:
@item awk_bool_t create_value(awk_value_t *value, awk_value_cookie_t *result);
Create a cached string or numeric value from @code{value} for efficient later
assignment.
-Only @code{AWK_NUMBER} and @code{AWK_STRING} values are allowed. Any other type
+Only values of type @code{AWK_NUMBER} and @code{AWK_STRING} are allowed. Any other type
is rejected. While @code{AWK_UNDEFINED} could be allowed, doing so would
result in inferior performance.
@@ -31408,13 +32629,13 @@ What happens if @command{awk} code assigns a new value to @code{VAR1},
are all the others be changed too?''
That's a great question. The answer is that no, it's not a problem.
-Internally, @command{gawk} uses reference-counted strings. This means
+Internally, @command{gawk} uses @dfn{reference-counted strings}. This means
that many variables can share the same string value, and @command{gawk}
keeps track of the usage. When a variable's value changes, @command{gawk}
simply decrements the reference count on the old value and updates
the variable to use the new value.
-Finally, as part of your clean up action (@pxref{Exit Callback Functions})
+Finally, as part of your cleanup action (@pxref{Exit Callback Functions})
you should release any cached values that you created, using
@code{release_value()}.
@@ -31540,7 +32761,8 @@ the string value of @code{index} must come from the API-provided functions @code
@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ const@ awk_value_t *const value);
In the array represented by @code{a_cookie}, create or modify
the element whose index is given by @code{index}.
-The @code{ARGV} and @code{ENVIRON} arrays may not be changed.
+The @code{ARGV} and @code{ENVIRON} arrays may not be changed,
+although the @code{PROCINFO} array can be.
@item awk_bool_t set_array_element_by_elem(awk_array_t a_cookie,
@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ awk_element_t element);
@@ -31811,7 +33033,7 @@ you must add the new array to its parent before adding any elements to it.
Thus, the correct way to build an array is to work ``top down.'' Create
the array, and immediately install it in @command{gawk}'s symbol table
using @code{sym_update()}, or install it as an element in a previously
-existing array using @code{set_element()}. We show example code shortly.
+existing array using @code{set_array_element()}. We show example code shortly.
@item
Due to gawk internals, after using @code{sym_update()} to install an array
@@ -31837,7 +33059,7 @@ of the array cookie after the call to @code{set_element()}.
@end enumerate
The following C code is a simple test extension to create an array
-with two regular elements and with a subarray. The leading @samp{#include}
+with two regular elements and with a subarray. The leading @code{#include}
directives and boilerplate variable declarations are omitted for brevity.
The first step is to create a new array and then install it
in the symbol table:
@@ -32063,12 +33285,15 @@ whether the corresponding command-line options were enabled when
@command{gawk} was invoked. The variables are:
@table @code
+@item do_debug
+This variable is true if @command{gawk} was invoked with @option{--debug} option.
+
@item do_lint
This variable is true if @command{gawk} was invoked with @option{--lint} option
(@pxref{Options}).
-@item do_traditional
-This variable is true if @command{gawk} was invoked with @option{--traditional} option.
+@item do_mpfr
+This variable is true if @command{gawk} was invoked with @option{--bignum} option.
@item do_profile
This variable is true if @command{gawk} was invoked with @option{--profile} option.
@@ -32076,11 +33301,8 @@ This variable is true if @command{gawk} was invoked with @option{--profile} opti
@item do_sandbox
This variable is true if @command{gawk} was invoked with @option{--sandbox} option.
-@item do_debug
-This variable is true if @command{gawk} was invoked with @option{--debug} option.
-
-@item do_mpfr
-This variable is true if @command{gawk} was invoked with @option{--bignum} option.
+@item do_traditional
+This variable is true if @command{gawk} was invoked with @option{--traditional} option.
@end table
The value of @code{do_lint} can change if @command{awk} code
@@ -32131,8 +33353,14 @@ These variables and functions are as follows:
@table @code
@item int plugin_is_GPL_compatible;
-This asserts that the extension is compatible with the GNU GPL
-(@pxref{Copying}). If your extension does not have this, @command{gawk}
+This asserts that the extension is compatible with
+@ifclear FOR_PRINT
+the GNU GPL (@pxref{Copying}).
+@end ifclear
+@ifset FOR_PRINT
+the GNU GPL.
+@end ifset
+If your extension does not have this, @command{gawk}
will not load it (@pxref{Plugin License}).
@item static gawk_api_t *const api;
@@ -32156,8 +33384,9 @@ as described earlier (@pxref{Extension Functions}).
It can then be looped over for multiple calls to
@code{add_ext_func()}.
+@c Use @var{OR} for docbook
@item static awk_bool_t (*init_func)(void) = NULL;
-@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @r{OR}
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @var{OR}
@itemx static awk_bool_t init_my_module(void) @{ @dots{} @}
@itemx static awk_bool_t (*init_func)(void) = init_my_module;
If you need to do some initialization work, you should define a
@@ -32396,7 +33625,6 @@ Those are followed by the necessary variable declarations
to make use of the API macros and boilerplate code
(@pxref{Extension API Boilerplate}).
-@c break line for page breaking
@example
#ifdef HAVE_CONFIG_H
#include <config.h>
@@ -32483,7 +33711,6 @@ The @code{stat()} extension is more involved. First comes a function
that turns a numeric mode into a printable representation
(e.g., 644 becomes @samp{-rw-r--r--}). This is omitted here for brevity:
-@c break line for page breaking
@example
/* format_mode --- turn a stat mode field into something readable */
@@ -32757,7 +33984,9 @@ structures for loading each function into @command{gawk}:
static awk_ext_func_t func_table[] = @{
@{ "chdir", do_chdir, 1 @},
@{ "stat", do_stat, 2 @},
+#ifndef __MINGW32__
@{ "fts", do_fts, 3 @},
+#endif
@};
@end example
@@ -32771,9 +34000,7 @@ everything that needs to be loaded. It is simplest to use the
dl_load_func(func_table, filefuncs, "")
@end example
-And that's it! As an exercise, consider adding functions to
-implement system calls such as @code{chown()}, @code{chmod()},
-and @code{umask()}.
+And that's it!
@node Using Internal File Ops
@subsection Integrating The Extensions
@@ -32785,7 +34012,7 @@ code must be compiled. Assuming that the functions are in
a file named @file{filefuncs.c}, and @var{idir} is the location
of the @file{gawkapi.h} header file,
the following steps@footnote{In practice, you would probably want to
-use the GNU Autotools---Automake, Autoconf, Libtool, and Gettext---to
+use the GNU Autotools---Automake, Autoconf, Libtool, and @command{gettext}---to
configure and build your libraries. Instructions for doing so are beyond
the scope of this @value{DOCUMENT}. @xref{gawkextlib}, for WWW links to
the tools.} create a GNU/Linux shared library:
@@ -32827,7 +34054,7 @@ BEGIN @{
@end example
The @env{AWKLIBPATH} environment variable tells
-@command{gawk} where to find shared libraries (@pxref{Finding Extensions}).
+@command{gawk} where to find extensions (@pxref{Finding Extensions}).
We set it to the current directory and run the program:
@example
@@ -32890,19 +34117,19 @@ Others mainly provide example code that shows how to use the extension API.
The @code{filefuncs} extension provides three different functions, as follows:
The usage is:
-@table @code
+@table @asis
@item @@load "filefuncs"
This is how you load the extension.
@cindex @code{chdir()} extension function
-@item result = chdir("/some/directory")
+@item @code{result = chdir("/some/directory")}
The @code{chdir()} function is a direct hook to the @code{chdir()}
system call to change the current directory. It returns zero
upon success or less than zero upon error. In the latter case it updates
@code{ERRNO}.
@cindex @code{stat()} extension function
-@item result = stat("/some/path", statdata @r{[}, follow@r{]})
+@item @code{result = stat("/some/path", statdata} [@code{, follow}]@code{)}
The @code{stat()} function provides a hook into the
@code{stat()} system call.
It returns zero upon success or less than zero upon error.
@@ -32915,69 +34142,27 @@ In all cases, it clears the @code{statdata} array.
When the call is successful, @code{stat()} fills the @code{statdata}
array with information retrieved from the filesystem, as follows:
-@c nested table
-@multitable @columnfractions .25 .60
-@item @code{statdata["name"]} @tab
-The name of the file.
-
-@item @code{statdata["dev"]} @tab
-Corresponds to the @code{st_dev} field in the @code{struct stat}.
-
-@item @code{statdata["ino"]} @tab
-Corresponds to the @code{st_ino} field in the @code{struct stat}.
-
-@item @code{statdata["mode"]} @tab
-Corresponds to the @code{st_mode} field in the @code{struct stat}.
-
-@item @code{statdata["nlink"]} @tab
-Corresponds to the @code{st_nlink} field in the @code{struct stat}.
-
-@item @code{statdata["uid"]} @tab
-Corresponds to the @code{st_uid} field in the @code{struct stat}.
-
-@item @code{statdata["gid"]} @tab
-Corresponds to the @code{st_gid} field in the @code{struct stat}.
-
-@item @code{statdata["size"]} @tab
-Corresponds to the @code{st_size} field in the @code{struct stat}.
-
-@item @code{statdata["atime"]} @tab
-Corresponds to the @code{st_atime} field in the @code{struct stat}.
-
-@item @code{statdata["mtime"]} @tab
-Corresponds to the @code{st_mtime} field in the @code{struct stat}.
-
-@item @code{statdata["ctime"]} @tab
-Corresponds to the @code{st_ctime} field in the @code{struct stat}.
-
-@item @code{statdata["rdev"]} @tab
-Corresponds to the @code{st_rdev} field in the @code{struct stat}.
-This element is only present for device files.
-
-@item @code{statdata["major"]} @tab
-Corresponds to the @code{st_major} field in the @code{struct stat}.
-This element is only present for device files.
-
-@item @code{statdata["minor"]} @tab
-Corresponds to the @code{st_minor} field in the @code{struct stat}.
-This element is only present for device files.
-
-@item @code{statdata["blksize"]} @tab
-Corresponds to the @code{st_blksize} field in the @code{struct stat},
-if this field is present on your system.
-(It is present on all modern systems that we know of.)
-
-@item @code{statdata["pmode"]} @tab
-A human-readable version of the mode value, such as printed by
-@command{ls}. For example, @code{"-rwxr-xr-x"}.
-
-@item @code{statdata["linkval"]} @tab
-If the named file is a symbolic link, this element will exist
-and its value is the value of the symbolic link (where the
-symbolic link points to).
-
-@item @code{statdata["type"]} @tab
-The type of the file as a string. One of
+@multitable @columnfractions .15 .50 .20
+@headitem Subscript @tab Field in @code{struct stat} @tab File type
+@item @code{"name"} @tab The @value{FN} @tab All
+@item @code{"dev"} @tab @code{st_dev} @tab All
+@item @code{"ino"} @tab @code{st_ino} @tab All
+@item @code{"mode"} @tab @code{st_mode} @tab All
+@item @code{"nlink"} @tab @code{st_nlink} @tab All
+@item @code{"uid"} @tab @code{st_uid} @tab All
+@item @code{"gid"} @tab @code{st_gid} @tab All
+@item @code{"size"} @tab @code{st_size} @tab All
+@item @code{"atime"} @tab @code{st_atime} @tab All
+@item @code{"mtime"} @tab @code{st_mtime} @tab All
+@item @code{"ctime"} @tab @code{st_ctime} @tab All
+@item @code{"rdev"} @tab @code{st_rdev} @tab Device files
+@item @code{"major"} @tab @code{st_major} @tab Device files
+@item @code{"minor"} @tab @code{st_minor} @tab Device files
+@item @code{"blksize"} @tab @code{st_blksize} @tab All
+@item @code{"pmode"} @tab A human-readable version of the mode value, such as printed by
+@command{ls}. For example, @code{"-rwxr-xr-x"} @tab All
+@item @code{"linkval"} @tab The value of the symbolic link @tab Symbolic links
+@item @code{"type"} @tab The type of the file as a string. One of
@code{"file"},
@code{"blockdev"},
@code{"chardev"},
@@ -32988,12 +34173,12 @@ The type of the file as a string. One of
@code{"door"},
or
@code{"unknown"}.
-Not all systems support all file types.
+Not all systems support all file types. @tab All
@end multitable
@cindex @code{fts()} extension function
-@item flags = or(FTS_PHYSICAL, ...)
-@itemx result = fts(pathlist, flags, filedata)
+@item @code{flags = or(FTS_PHYSICAL, ...)}
+@itemx @code{result = fts(pathlist, flags, filedata)}
Walk the file trees provided in @code{pathlist} and fill in the
@code{filedata} array as described below. @code{flags} is the bitwise
OR of several predefined constant values, also described below.
@@ -33010,7 +34195,7 @@ The arguments are as follows:
@table @code
@item pathlist
-An array of filenames. The element values are used; the index values are ignored.
+An array of @value{FN}s. The element values are used; the index values are ignored.
@item flags
This should be the bitwise OR of one or more of the following
@@ -33134,10 +34319,10 @@ The arguments to @code{fnmatch()} are:
@table @code
@item pattern
-The filename wildcard to match.
+The @value{FN} wildcard to match.
@item string
-The filename string.
+The @value{FN} string.
@item flag
Either zero, or the bitwise OR of one or more of the
@@ -33147,23 +34332,13 @@ flags in the @code{FNM} array.
The flags are follows:
@multitable @columnfractions .25 .75
-@item @code{FNM["CASEFOLD"]} @tab
-Corresponds to the @code{FNM_CASEFOLD} flag as defined in @code{fnmatch()}.
-
-@item @code{FNM["FILE_NAME"]} @tab
-Corresponds to the @code{FNM_FILE_NAME} flag as defined in @code{fnmatch()}.
-
-@item @code{FNM["LEADING_DIR"]} @tab
-Corresponds to the @code{FNM_LEADING_DIR} flag as defined in @code{fnmatch()}.
-
-@item @code{FNM["NOESCAPE"]} @tab
-Corresponds to the @code{FNM_NOESCAPE} flag as defined in @code{fnmatch()}.
-
-@item @code{FNM["PATHNAME"]} @tab
-Corresponds to the @code{FNM_PATHNAME} flag as defined in @code{fnmatch()}.
-
-@item @code{FNM["PERIOD"]} @tab
-Corresponds to the @code{FNM_PERIOD} flag as defined in @code{fnmatch()}.
+@headitem Array element @tab Corresponding flag defined by @code{fnmatch()}
+@item @code{FNM["CASEFOLD"]} @tab @code{FNM_CASEFOLD}
+@item @code{FNM["FILE_NAME"]} @tab @code{FNM_FILE_NAME}
+@item @code{FNM["LEADING_DIR"]} @tab @code{FNM_LEADING_DIR}
+@item @code{FNM["NOESCAPE"]} @tab @code{FNM_NOESCAPE}
+@item @code{FNM["PATHNAME"]} @tab @code{FNM_PATHNAME}
+@item @code{FNM["PERIOD"]} @tab @code{FNM_PERIOD}
@end multitable
Here is an example:
@@ -33254,8 +34429,8 @@ standard output to a temporary file configured to have the same owner
and permissions as the original. After the file has been processed,
the extension restores standard output to its original destination.
If @code{INPLACE_SUFFIX} is not an empty string, the original file is
-linked to a backup filename created by appending that suffix. Finally,
-the temporary file is renamed to the original filename.
+linked to a backup @value{FN} created by appending that suffix. Finally,
+the temporary file is renamed to the original @value{FN}.
If any error occurs, the extension issues a fatal error to terminate
processing immediately without damaging the original file.
@@ -33273,9 +34448,6 @@ $ @kbd{gawk -i inplace -v INPLACE_SUFFIX=.bak '@{ gsub(/foo/, "bar") @}}
> @kbd{@{ print @}' file1 file2 file3}
@end example
-We leave it as an exercise to write a wrapper script that presents an
-interface similar to @samp{sed -i}.
-
@node Extension Sample Ord
@subsection Character and Numeric values: @code{ord()} and @code{chr()}
@@ -33321,11 +34493,14 @@ on the command line (or with @code{getline}),
they are read, with each entry returned as a record.
The record consists of three fields. The first two are the inode number and the
-filename, separated by a forward slash character.
+@value{FN}, separated by a forward slash character.
On systems where the directory entry contains the file type, the record
has a third field (also separated by a slash) which is a single letter
-indicating the type of the file:
+indicating the type of the file. The letters are file types are shown
+in @ref{table-readdir-file-types}.
+@float Table,table-readdir-file-types
+@caption{File Types Returned By @code{readdir()}}
@multitable @columnfractions .1 .9
@headitem Letter @tab File Type
@item @code{b} @tab Block device
@@ -33337,6 +34512,7 @@ indicating the type of the file:
@item @code{s} @tab Socket
@item @code{u} @tab Anything else (unknown)
@end multitable
+@end float
On systems without the file type information, the third field is always
@samp{u}.
@@ -33371,12 +34547,12 @@ Here is an example:
BEGIN @{
REVOUT = 1
- print "hello, world" > "/dev/stdout"
+ print "don't panic" > "/dev/stdout"
@}
@end example
The output from this program is:
-@samp{dlrow ,olleh}.
+@samp{cinap t'nod}.
@node Extension Sample Rev2way
@subsection Two-Way I/O Example
@@ -33393,13 +34569,22 @@ The following example shows how to use it:
BEGIN @{
cmd = "/magic/mirror"
- print "hello, world" |& cmd
+ print "don't panic" |& cmd
cmd |& getline result
print result
close(cmd)
@}
@end example
+The output from this program
+@ifnotinfo
+also is:
+@end ifnotinfo
+@ifinfo
+is:
+@end ifinfo
+@samp{cinap t'nod}.
+
@node Extension Sample Read write array
@subsection Dumping and Restoring An Array
@@ -33410,8 +34595,8 @@ named @code{writea()} and @code{reada()}, as follows:
@cindex @code{writea()} extension function
@item ret = writea(file, array)
This function takes a string argument, which is the name of the file
-to which dump the array, and the array itself as the second argument.
-@code{writea()} understands multidimensional arrays. It returns one on
+to which to dump the array, and the array itself as the second argument.
+@code{writea()} understands arrays of arrays. It returns one on
success, or zero upon failure.
@cindex @code{reada()} extension function
@@ -33496,9 +34681,8 @@ for more information.
@node Extension Sample Time
@subsection Extension Time Functions
-These functions can be used either by invoking @command{gawk}
-with a command-line argument of @samp{-l time} or by
-inserting @samp{@@load "time"} in your script.
+The @code{time} extension adds two functions, named @code{gettimeofday()}
+and @code{sleep()}, as follows:
@table @code
@item @@load "time"
@@ -33511,7 +34695,7 @@ floating point value. If the time is unavailable on this platform, return
@minus{}1 and set @code{ERRNO}. The returned time should have sub-second
precision, but the actual precision may vary based on the platform.
If the standard C @code{gettimeofday()} system call is available on this
-platform, then it simply returns the value. Otherwise, if on Windows,
+platform, then it simply returns the value. Otherwise, if on MS-Windows,
it tries to use @code{GetSystemTimeAsFileTime()}.
@cindex @code{sleep()} extension function
@@ -33537,7 +34721,7 @@ processing XML files. This is the evolution of the original @command{xgawk}
As of this writing, there are five extensions:
-@itemize @bullet
+@itemize @value{BULLET}
@item
XML parser extension, using the @uref{http://expat.sourceforge.net, Expat}
XML parsing library.
@@ -33563,7 +34747,7 @@ main @command{gawk} distribution.
@cindex @command{git} utility
You can check out the code for the @code{gawkextlib} project
-using the @uref{http://git-scm.com, GIT} distributed source
+using the @uref{http://git-scm.com, Git} distributed source
code control system. The command is as follows:
@example
@@ -33579,7 +34763,7 @@ In addition, you must have the GNU Autotools installed
@uref{http://www.gnu.org/software/automake, Automake},
@uref{http://www.gnu.org/software/libtool, Libtool},
and
-@uref{http://www.gnu.org/software/gettext, Gettext}).
+@uref{http://www.gnu.org/software/gettext, GNU @command{gettext}}).
The simple recipe for building and testing @code{gawkextlib} is as follows.
First, build and install @command{gawk}:
@@ -33613,26 +34797,171 @@ If you write an extension that you wish to share with other
@code{gawkextlib} project.
See the project's web site for more information.
-@iftex
-@part Part IV:@* Appendices
-@end iftex
+@node Extension summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+You can write extensions (sometimes called plug-ins) for @command{gawk}
+in C or C++ using the Application Programming Interface (API) defined
+by the @command{gawk} developers.
+
+@item
+Extensions must have a license compatible with the GNU General Public
+License (GPL), and they must assert that fact by declaring a variable
+named @code{plugin_is_GPL_compatible}.
+
+@item
+Communication between @command{gawk} and an extension is two-way.
+@command{gawk} passes a @code{struct} to the extension which contains
+various data fields and function pointers. The extension can then call
+into @command{gawk} via the supplied function pointers to accomplish
+certain tasks.
+
+@item
+One of these tasks is to ``register'' the name and implementation of
+a new @command{awk}-level function with @command{gawk}. The implementation
+takes the form of a C function pointer with a defined signature.
+By convention, implementation functions are named @code{do_@var{XXXX}()}
+for some @command{awk}-level function @code{@var{XXXX}()}.
+
+@item
+The API is defined in a header file named @file{gawkpi.h}. You must include
+a number of standard header files @emph{before} including it in your source file.
+
+@item
+API function pointers are provided for the following kinds of operations:
+
+@itemize @value{BULLET}
+@item
+Registration functions. You may register
+extension functions,
+exit callbacks,
+a version string,
+input parsers,
+output wrappers,
+and two-way processors.
+
+@item
+Printing fatal, warning, and ``lint'' warning messages.
+
+@item
+Updating @code{ERRNO}, or unsetting it.
+
+@item
+Accessing parameters, including converting an undefined parameter into
+an array.
+
+@item
+Symbol table access: retrieving a global variable, creating one,
+or changing one.
+
+@item
+Allocating, reallocating, and releasing memory.
+
+@item
+Creating and releasing cached values; this provides an
+efficient way to use values for multiple variables and
+can be a big performance win.
+
+@item
+Manipulating arrays:
+retrieving, adding, deleting, and modifying elements;
+getting the count of elements in an array;
+creating a new array;
+clearing an array;
+and
+flattening an array for easy C style looping over all its indices and elements
+@end itemize
+
+@item
+The API defines a number of standard data types for representing
+@command{awk} values, array elements, and arrays.
+
+@item
+The API provide convenience functions for constructing values.
+It also provides memory management functions to ensure compatibility
+between memory allocated by @command{gawk} and memory allocated by an
+extension.
+
+@item
+@emph{All} memory passed from @command{gawk} to an extension must be
+treated as read-only by the extension.
+
+@item
+@emph{All} memory passed from an extension to @command{gawk} must come from
+the API's memory allocation functions. @command{gawk} takes responsibility for
+the memory and will release it when appropriate.
+
+@item
+The API provides information about the running version of @command{gawk} so
+that an extension can make sure it is compatible with the @command{gawk}
+that loaded it.
+
+@item
+It is easiest to start a new extension by copying the boilerplate code
+described in this @value{CHAPTER}. Macros in the @file{gawkapi.h} make
+this easier to do.
+
+@item
+The @command{gawk} distribution includes a number of small but useful
+sample extensions. The @code{gawkextlib} project includes several more,
+larger, extensions. If you wish to write an extension and contribute it
+to the community of @command{gawk} users, the @code{gawkextlib} project
+should be the place to do so.
+
+@end itemize
+
+@c EXCLUDE START
+@node Extension Exercises
+@section Exercises
+
+@enumerate
+@item
+Add functions to implement system calls such as @code{chown()},
+@code{chmod()}, and @code{umask()} to the file operations extension
+presented in @ref{Internal File Ops}.
+
+@item
+(Hard.)
+How would you provide namespaces in @command{gawk}, so that the
+names of functions in different extensions don't conflict with each other?
+If you come up with a really good scheme, contact the @command{gawk}
+maintainer to tell him about it.
+
+@item
+Write a wrapper script that provides an interface similar to
+@samp{sed -i} for the ``inplace'' extension presented in
+@ref{Extension Sample Inplace}.
+
+@end enumerate
+@c EXCLUDE END
+
+@ifnotinfo
+@part @value{PART4}Appendices
+@end ifnotinfo
-@ignore
@ifdocbook
-@part Part IV:@* Appendices
+@ifclear FOR_PRINT
+Part IV contains the appendices (including the two licenses that cover
+the @command{gawk} source code and this @value{DOCUMENT}, respectively)
+and the Glossary:
+@end ifclear
-Part IV provides the appendices, the Glossary, and two licenses that cover
-the @command{gawk} source code and this @value{DOCUMENT}, respectively.
-It contains the following appendices:
+@ifset FOR_PRINT
+Part IV contains two appendices and the license that
+covers the @command{gawk} source code:
+@end ifset
-@itemize @bullet
+@itemize @value{BULLET}
@item
@ref{Language History}.
@item
@ref{Installation}.
+@ifclear FOR_PRINT
@item
@ref{Notes}.
@@ -33641,30 +34970,39 @@ It contains the following appendices:
@item
@ref{Glossary}.
+@end ifclear
@item
@ref{Copying}.
+@ifclear FOR_PRINT
@item
@ref{GNU Free Documentation License}.
+@end ifclear
@end itemize
@end ifdocbook
-@end ignore
@node Language History
@appendix The Evolution of the @command{awk} Language
-This @value{DOCUMENT} describes the GNU implementation of @command{awk}, which follows
-the POSIX specification.
-Many long-time @command{awk} users learned @command{awk} programming
-with the original @command{awk} implementation in Version 7 Unix.
-(This implementation was the basis for @command{awk} in Berkeley Unix,
-through 4.3-Reno. Subsequent versions of Berkeley Unix, and some systems
-derived from 4.4BSD-Lite, use various versions of @command{gawk}
-for their @command{awk}.)
-This @value{CHAPTER} briefly describes the
-evolution of the @command{awk} language, with cross-references to other parts
-of the @value{DOCUMENT} where you can find more information.
+This @value{DOCUMENT} describes the GNU implementation of @command{awk},
+which follows the POSIX specification. Many long-time @command{awk}
+users learned @command{awk} programming with the original @command{awk}
+implementation in Version 7 Unix. (This implementation was the basis for
+@command{awk} in Berkeley Unix, through 4.3-Reno. Subsequent versions
+of Berkeley Unix, and some systems derived from 4.4BSD-Lite, used various
+versions of @command{gawk} for their @command{awk}.) This @value{CHAPTER}
+briefly describes the evolution of the @command{awk} language, with
+cross-references to other parts of the @value{DOCUMENT} where you can
+find more information.
+
+@ifset FOR_PRINT
+To save space, we have omitted
+information on the history of features in @command{gawk} from this
+edition. You can find it in the
+@uref{http://www.gnu.org/software/gawk/manual/html_node/Feature-History.html,
+online documentation}.
+@end ifset
@menu
* V7/SVR3.1:: The major changes between V7 and System V
@@ -33680,6 +35018,7 @@ of the @value{DOCUMENT} where you can find more information.
* Common Extensions:: Common Extensions Summary.
* Ranges and Locales:: How locales used to affect regexp ranges.
* Contributors:: The major contributors to @command{gawk}.
+* History summary:: History summary.
@end menu
@node V7/SVR3.1
@@ -33694,7 +35033,7 @@ Version 7 Unix (1978) and the new version that was first made generally availabl
System V Release 3.1 (1987). This @value{SECTION} summarizes the changes, with
cross-references to further details:
-@itemize @bullet
+@itemize @value{BULLET}
@item
The requirement for @samp{;} to separate rules on a line
(@pxref{Statements/Lines}).
@@ -33785,7 +35124,7 @@ Multidimensional arrays
The System V Release 4 (1989) version of Unix @command{awk} added these features
(some of which originated in @command{gawk}):
-@itemize @bullet
+@itemize @value{BULLET}
@item
The @code{ENVIRON} array (@pxref{Built-in Variables}).
@c gawk and MKS awk
@@ -33845,7 +35184,7 @@ Processing of escape sequences inside command-line variable assignments
The POSIX Command Language and Utilities standard for @command{awk} (1992)
introduced the following changes into the language:
-@itemize @bullet
+@itemize @value{BULLET}
@item
The use of @option{-W} for implementation-specific options
(@pxref{Options}).
@@ -33870,7 +35209,7 @@ features of the language.
In 2012, a number of extensions that had been commonly available for
many years were finally added to POSIX. They are:
-@itemize @bullet
+@itemize @value{BULLET}
@item
The @code{fflush()} built-in function for flushing buffered output
(@pxref{I/O Functions}).
@@ -33907,7 +35246,7 @@ has made his version available via his home page
This @value{SECTION} describes common extensions that
originally appeared in his version of @command{awk}.
-@itemize @bullet
+@itemize @value{BULLET}
@item
The @samp{**} and @samp{**=} operators
(@pxref{Arithmetic Ops}
@@ -33925,7 +35264,7 @@ The @code{fflush()} built-in function for flushing buffered output
@ignore
@item
The @code{SYMTAB} array, that allows access to @command{awk}'s internal symbol
-table. This feature is not documented, largely because
+table. This feature was never documented for his @command{awk}, largely because
it is somewhat shakily implemented. For instance, you cannot access arrays
or array elements through it.
@end ignore
@@ -33952,12 +35291,12 @@ A number of features have come and gone over the years. This @value{SECTION}
summarizes the additional features over POSIX @command{awk} that are
in the current version of @command{gawk}.
-@itemize @bullet
+@itemize @value{BULLET}
@item
Additional built-in variables:
-@itemize @minus
+@itemize @value{MINUS}
@item
The
@code{ARGIND}
@@ -33978,10 +35317,10 @@ variables
@item
Special files in I/O redirections:
-@itemize @minus{}
+@itemize @value{MINUS}
@item
The @file{/dev/stdin}, @file{/dev/stdout}, @file{/dev/stderr} and
-@file{/dev/fd/@var{N}} special file names
+@file{/dev/fd/@var{N}} special @value{FN}s
(@pxref{Special Files}).
@item
@@ -33994,7 +35333,7 @@ IP protocol to use.
@item
Changes and/or additions to the language:
-@itemize @minus{}
+@itemize @value{MINUS}
@item
The @samp{\x} escape sequence
(@pxref{Escape Sequences}).
@@ -34027,13 +35366,13 @@ Indirect function calls
@item
Directories on the command line produce a warning and are skipped
-(@pxref{Command line directories}).
+(@pxref{Command-line directories}).
@end itemize
@item
New keywords:
-@itemize @minus{}
+@itemize @value{MINUS}
@item
The @code{BEGINFILE} and @code{ENDFILE} special patterns.
(@pxref{BEGINFILE/ENDFILE}).
@@ -34054,7 +35393,7 @@ The @code{switch} statement
@item
Changes to standard @command{awk} functions:
-@itemize @minus
+@itemize @value{MINUS}
@item
The optional second argument to @code{close()} that allows closing one end
of a two-way pipe to a coprocess
@@ -34087,7 +35426,7 @@ argument which is an array to hold the text of the field separators.
@item
Additional functions only in @command{gawk}:
-@itemize @minus
+@itemize @value{MINUS}
@item
The
@code{and()},
@@ -34111,8 +35450,7 @@ functions for internationalization
(@pxref{Programmer i18n}).
@item
-The @code{fflush()} function from Brian Kernighan's
-version of @command{awk}
+The @code{fflush()} function from BWK @command{awk}
(@pxref{I/O Functions}).
@item
@@ -34130,7 +35468,7 @@ functions for working with timestamps
@item
Changes and/or additions in the command-line options:
-@itemize @minus
+@itemize @value{MINUS}
@item
The @env{AWKPATH} environment variable for specifying a path search for
the @option{-f} command-line option
@@ -34176,7 +35514,7 @@ and the
@option{--copyright},
@option{--debug},
@option{--dump-variables},
-@option{--execle},
+@option{--exec},
@option{--field-separator},
@option{--file},
@option{--gen-pot},
@@ -34205,10 +35543,10 @@ long options
@item
Support for the following obsolete systems was removed from the code
-and the documentation for @command{gawk} version 4.0:
+and the documentation for @command{gawk} @value{PVERSION} 4.0:
@c nested table
-@itemize @minus
+@itemize @value{MINUS}
@item
Amiga
@@ -34247,6 +35585,20 @@ GCC for VAX and Alpha has not been tested for a while.
@end itemize
+@item
+Support for the following obsolete systems was removed from the code
+and the documentation for @command{gawk} @value{PVERSION} 4.1:
+
+@c nested table
+@itemize @value{MINUS}
+@item
+Ultrix
+@end itemize
+
+@item
+@c FIXME: Verify the version here.
+Support for MirBSD was removed at @command{gawk} @value{PVERSION} 4.2.
+
@end itemize
@c XXX ADD MORE STUFF HERE
@@ -34255,6 +35607,8 @@ GCC for VAX and Alpha has not been tested for a while.
@c ENDOFRANGE exgnot
@c ENDOFRANGE posnot
+@c This does not need to be in the formal book.
+@ifclear FOR_PRINT
@node Feature History
@appendixsec History of @command{gawk} Features
@@ -34284,7 +35638,7 @@ in the order they were added to @command{gawk}.
Version 2.10 of @command{gawk} introduced the following features:
-@itemize @bullet
+@itemize @value{BULLET}
@item
The @env{AWKPATH} environment variable for specifying a path search for
the @option{-f} command-line option
@@ -34296,13 +35650,13 @@ The @code{IGNORECASE} variable and its effects
@item
The @file{/dev/stdin}, @file{/dev/stdout}, @file{/dev/stderr} and
-@file{/dev/fd/@var{N}} special file names
+@file{/dev/fd/@var{N}} special @value{FN}s
(@pxref{Special Files}).
@end itemize
Version 2.13 of @command{gawk} introduced the following features:
-@itemize @bullet
+@itemize @value{BULLET}
@item
The @code{FIELDWIDTHS} variable and its effects
(@pxref{Constant Size}).
@@ -34316,7 +35670,7 @@ and printing timestamps
Additional command-line options
(@pxref{Options}):
-@itemize @minus
+@itemize @value{MINUS}
@item
The @option{-W lint} option to provide error and portability checking
for both the source code and at runtime.
@@ -34331,19 +35685,19 @@ The @option{-W posix} option for full POSIX compliance.
Version 2.14 of @command{gawk} introduced the following feature:
-@itemize @bullet
+@itemize @value{BULLET}
@item
-The @code{next file} statement for skipping to the next data file
+The @code{next file} statement for skipping to the next @value{DF}
(@pxref{Nextfile Statement}).
@end itemize
Version 2.15 of @command{gawk} introduced the following features:
-@itemize @bullet
+@itemize @value{BULLET}
@item
New variables (@pxref{Built-in Variables}):
-@itemize @minus
+@itemize @value{MINUS}
@item
@code{ARGIND}, which tracks the movement of @code{FILENAME}
through @code{ARGV}.
@@ -34355,17 +35709,17 @@ through @code{ARGV}.
@item
The @file{/dev/pid}, @file{/dev/ppid}, @file{/dev/pgrpid}, and
-@file{/dev/user} special file names. These have since been removed.
+@file{/dev/user} special @value{FN}s. These have since been removed.
@item
The ability to delete all of an array at once with @samp{delete @var{array}}
(@pxref{Delete}).
@item
-Command line option changes
+Command-line option changes
(@pxref{Options}):
-@itemize @minus
+@itemize @value{MINUS}
@item
The ability to use GNU-style long-named options that start with @option{--}.
@@ -34377,11 +35731,11 @@ source code.
Version 3.0 of @command{gawk} introduced the following features:
-@itemize @bullet
+@itemize @value{BULLET}
@item
New or changed variables:
-@itemize @minus
+@itemize @value{MINUS}
@item
@code{IGNORECASE} changed, now applying to string comparison as well
as regexp operations
@@ -34419,14 +35773,15 @@ The @code{next file} statement became @code{nextfile}
(@pxref{Nextfile Statement}).
@item
-The @code{fflush()} function from the
-Bell Laboratories research version of @command{awk}
-(@pxref{I/O Functions}).
+The @code{fflush()} function from
+BWK @command{awk}
+(then at Bell Laboratories;
+@pxref{I/O Functions}).
@item
-New command line options:
+New command-line options:
-@itemize @minus
+@itemize @value{MINUS}
@item
The @option{--lint-old} option to
warn about constructs that are not available in
@@ -34434,9 +35789,9 @@ the original Version 7 Unix version of @command{awk}
(@pxref{V7/SVR3.1}).
@item
-The @option{-m} option from the
-Bell Laboratories research version of @command{awk}
-This was later removed.
+The @option{-m} option from BWK @command{awk}. (Brian was
+still at Bell Laboratories at the time.) This was later removed from
+both his @command{awk} and from @command{gawk}.
@item
The @option{--re-interval} option to provide interval expressions in regexps
@@ -34453,17 +35808,18 @@ The use of GNU Autoconf to control the configuration process
@item
Amiga support.
+This has since been removed.
@end itemize
Version 3.1 of @command{gawk} introduced the following features:
-@itemize @bullet
+@itemize @value{BULLET}
@item
New variables
(@pxref{Built-in Variables}):
-@itemize @minus
+@itemize @value{MINUS}
@item
@code{BINMODE}, for non-POSIX systems,
which allows binary I/O for input and/or output files
@@ -34511,7 +35867,7 @@ making translations easier
@item
A number of new built-in functions:
-@itemize @minus
+@itemize @value{MINUS}
@item
The @code{asort()} and @code{asorti()} functions for sorting arrays
(@pxref{Array Sorting}).
@@ -34542,10 +35898,10 @@ The support for @samp{next file} as two words was removed completely
(@pxref{Nextfile Statement}).
@item
-Additional commnd line options
+Additional command-line options
(@pxref{Options}):
-@itemize @minus
+@itemize @value{MINUS}
@item
The @option{--dump-variables} option to print a list of all global variables.
@@ -34579,7 +35935,7 @@ The use of GNU Automake to help in standardizing the configuration process
(@pxref{Quick Installation}).
@item
-The use of GNU @code{gettext} for @command{gawk}'s own message output
+The use of GNU @command{gettext} for @command{gawk}'s own message output
(@pxref{Gawk I18N}).
@item
@@ -34589,7 +35945,8 @@ BeOS support. This was later removed.
Tandem support. This was later removed.
@item
-The Atari port became officially unsupported.
+The Atari port became officially unsupported and was
+later removed entirely.
@item
The source code changed to use ISO C standard-style function definitions.
@@ -34611,12 +35968,12 @@ enable printing times as UTC
Version 4.0 of @command{gawk} introduced the following features:
-@itemize @bullet
+@itemize @value{BULLET}
@item
Variable additions:
-@itemize @minus
+@itemize @value{MINUS}
@item
@code{FPAT}, which allows you to specify a regexp that matches
the fields, instead of matching the field separator
@@ -34674,7 +36031,7 @@ An optional third argument to
(@pxref{String Functions}).
@item
-The behavior of @code{fflush()} changed to match Brian Kernighan's @command{awk}
+The behavior of @code{fflush()} changed to match BWK @command{awk}
and for POSIX; now both @samp{fflush()} and @samp{fflush("")}
flush all open output redirections
(@pxref{I/O Functions}).
@@ -34682,7 +36039,7 @@ flush all open output redirections
@item
The @code{isarray()}
function which distinguishes if an item is an array
-or not, to make it possible to traverse multidimensional arrays
+or not, to make it possible to traverse arrays of arrays
(@pxref{Type Functions}).
@item
@@ -34712,10 +36069,10 @@ Indirect function calls
(@pxref{Switch Statement}).
@item
-Command line option changes
+Command-line option changes
(@pxref{Options}):
-@itemize @minus
+@itemize @value{MINUS}
@item
The @option{-b} and @option{--characters-as-bytes} options
which prevent @command{gawk} from treating input as a multibyte string.
@@ -34737,7 +36094,7 @@ All long options acquired corresponding short options, for use in @samp{#!} scri
@item
Directories named on the command line now produce a warning, not a fatal
error, unless @option{--posix} or @option{--traditional} are used
-(@pxref{Command line directories}).
+(@pxref{Command-line directories}).
@item
The @command{gawk} internals were rewritten, bringing the @command{dgawk}
@@ -34766,7 +36123,7 @@ C locale, no matter what kind of regexp is being used, and even if
@item
Support was removed for the following systems:
-@itemize @minus
+@itemize @value{MINUS}
@item
Atari
@@ -34804,7 +36161,7 @@ Prestandard VAX C compiler for VAX/VMS
Version 4.1 of @command{gawk} introduced the following features:
-@itemize @bullet
+@itemize @value{BULLET}
@item
Three new arrays:
@@ -34813,13 +36170,13 @@ Three new arrays:
@item
The three executables @command{gawk}, @command{pgawk}, and @command{dgawk}, were merged into
-one, named just @command{gawk}. As a result the command line options changed.
+one, named just @command{gawk}. As a result the command-line options changed.
@item
-Command line option changes
+Command-line option changes
(@pxref{Options}):
-@itemize @minus
+@itemize @value{MINUS}
@item
The @option{-D} option invokes the debugger.
@@ -34845,7 +36202,7 @@ The @option{-R} option was removed.
@item
Support for high precision arithmetic with MPFR.
-(@pxref{Gawk and MPFR}).
+(@pxref{Arbitrary Precision Arithmetic}).
@item
The @code{and()}, @code{or()} and @code{xor()} functions
@@ -34860,6 +36217,7 @@ The dynamic extension interface was completely redone
@end itemize
@c XXX ADD MORE STUFF HERE
+@end ifclear
@node Common Extensions
@appendixsec Common Extensions Summary
@@ -34958,7 +36316,7 @@ it on your system).
@cindex Unicode
Similar considerations apply to other ranges. For example, @samp{["-/]}
is perfectly valid in ASCII, but is not valid in many Unicode locales,
-such as @samp{en_US.UTF-8}.
+such as @code{en_US.UTF-8}.
Early versions of @command{gawk} used regexp matching code that was not
locale aware, so ranges had their traditional interpretation.
@@ -34973,7 +36331,7 @@ like ``why does @samp{[A-Z]} match lowercase letters?!?''
This situation existed for close to 10 years, if not more, and
the @command{gawk} maintainer grew weary of trying to explain that
@command{gawk} was being nicely standards-compliant, and that the issue
-was in the user's locale. During the development of version 4.0,
+was in the user's locale. During the development of @value{PVERSION} 4.0,
he modified @command{gawk} to always treat ranges in the original,
pre-POSIX fashion, unless @option{--posix} was used (@pxref{Options}).@footnote{And
thus was born the Campaign for Rational Range Interpretation (or
@@ -35006,7 +36364,7 @@ cases: the default regexp matching; with @option{--traditional} and with
This @value{SECTION} names the major contributors to @command{gawk}
and/or this @value{DOCUMENT}, in approximate chronological order:
-@itemize @bullet
+@itemize @value{BULLET}
@item
@cindex Aho, Alfred
@cindex Weinberger, Peter
@@ -35086,8 +36444,8 @@ provided the initial port to OS/2 and its documentation.
Michal Jaegermann
provided the port to Atari systems and its documentation.
(This port is no longer supported.)
-He continues to provide portability checking with DEC Alpha
-systems, and has done a lot of work to make sure @command{gawk}
+He continues to provide portability checking,
+and has done a lot of work to make sure @command{gawk}
works on non-32-bit systems.
@item
@@ -35158,7 +36516,7 @@ provided the port to BeOS and its documentation.
@cindex Peters, Arno
Arno Peters
did the initial work to convert @command{gawk} to use
-GNU Automake and GNU @code{gettext}.
+GNU Automake and GNU @command{gettext}.
@item
@cindex Broder, Alan J.@:
@@ -35200,14 +36558,13 @@ Assaf Gordon contributed the code to implement the
@cindex Haque, John
John Haque made the following contributions:
-@itemize @minus
+@itemize @value{MINUS}
@item
The modifications to convert @command{gawk}
into a byte-code interpreter, including the debugger.
@item
-The addition of true multidimensional arrays.
-@ref{Arrays of Arrays}.
+The addition of true arrays of arrays.
@item
The additional modifications for support of arbitrary precision arithmetic.
@@ -35228,6 +36585,10 @@ The improved array sorting features were driven by John together
with Pat Rankin.
@end itemize
+@cindex Papadopoulos, Panos
+@item
+Panos Papadopoulos contributed the original text for @ref{Include Files}.
+
@item
@cindex Yawitz, Efraim
Efraim Yawitz contributed the original text for @ref{Debugger}.
@@ -35251,6 +36612,41 @@ has been working on @command{gawk} since 1988, at first
helping David Trueman, and as the primary maintainer since around 1994.
@end itemize
+@node History summary
+@appendixsec Summary
+
+@itemize @value{BULLET}
+@item
+The @command{awk} language has evolved over time. The first release
+was with V7 Unix circa 1978. In 1987 for System V Release 3.1,
+major additions, including user-defined functions, were made to the language.
+Additional changes were made for System V Release 4, in 1989.
+Since then, further minor changes happen under the auspices of the
+POSIX standard.
+
+@item
+Brian Kernighan's @command{awk} provides a small number of extensions
+that are implemented in common with other versions of @command{awk}.
+
+@item
+@command{gawk} provides a large number of extensions over POSIX @command{awk}.
+They can be disabled with either the @option{--traditional} or @option{--posix}
+options.
+
+@item
+The interaction of POSIX locales and regexp matching in @command{gawk} has been confusing over
+the years. Today, @command{gawk} implements Rational Range Interpretation, where
+ranges of the form @samp{[a-z]} match @emph{only} the characters numerically between
+@samp{a} through @samp{z} in the machine's native character set. Usually this is ASCII
+but it can be EBCDIC on IBM S/390 systems.
+
+@item
+Many people have contributed to @command{gawk} development over the years.
+We hope that the list provided in this @value{CHAPTER} is complete and gives
+the appropriate credit where credit is due.
+
+@end itemize
+
@node Installation
@appendix Installing @command{gawk}
@@ -35276,6 +36672,7 @@ the respective ports.
* Bugs:: Reporting Problems and Bugs.
* Other Versions:: Other freely available @command{awk}
implementations.
+* Installation summary:: Summary of installation.
@end menu
@node Gawk Distribution
@@ -35295,9 +36692,9 @@ subdirectories.
@node Getting
@appendixsubsec Getting the @command{gawk} Distribution
@cindex @command{gawk}, source code@comma{} obtaining
-There are three ways to get GNU software:
+There are two ways to get GNU software:
-@itemize @bullet
+@itemize @value{BULLET}
@item
Copy it from someone else who already has it.
@@ -35336,7 +36733,6 @@ file and then use @code{tar} to extract it. You can use the following
pipeline to produce the @command{gawk} distribution:
@example
-# Under System V, add 'o' to the tar options
gzip -d -c gawk-@value{VERSION}.@value{PATCHLEVEL}.tar.gz | tar -xvpf -
@end example
@@ -35352,7 +36748,7 @@ Extracting the archive
creates a directory named @file{gawk-@value{VERSION}.@value{PATCHLEVEL}}
in the current directory.
-The distribution file name is of the form
+The distribution @value{FN} is of the form
@file{gawk-@var{V}.@var{R}.@var{P}.tar.gz}.
The @var{V} represents the major version of @command{gawk},
the @var{R} represents the current release of version @var{V}, and
@@ -35484,6 +36880,8 @@ The generated Info file for
The @command{troff} source for a manual page describing the @command{igawk}
program presented in
@ref{Igawk Program}.
+(Since @command{gawk} can do its own @code{@@include} processing,
+neither @command{igawk} nor @file{igawk.1} are installed.)
@item doc/Makefile.in
The input file used during the configuration process to generate the
@@ -35491,8 +36889,8 @@ actual @file{Makefile} for creating the documentation.
@item Makefile.am
@itemx */Makefile.am
-Files used by the GNU @command{automake} software for generating
-the @file{Makefile.in} files used by @command{autoconf} and
+Files used by the GNU Automake software for generating
+the @file{Makefile.in} files used by Autoconf and
@command{configure}.
@item Makefile.in
@@ -35528,8 +36926,6 @@ source file for this @value{DOCUMENT}. It also contains a @file{Makefile.in} fil
@file{Makefile.am} is used by GNU Automake to create @file{Makefile.in}.
The library functions from
@ref{Library Functions},
-and the @command{igawk} program from
-@ref{Igawk Program},
are included as ready-to-use files in the @command{gawk} distribution.
They are installed as part of the installation process.
The rest of the programs in this @value{DOCUMENT} are available in appropriate
@@ -35544,11 +36940,14 @@ the sample extensions included with @command{gawk}.
Files needed for building @command{gawk} on POSIX-compliant systems.
@item pc/*
-Files needed for building @command{gawk} under MS-Windows and OS/2
+Files needed for building @command{gawk} under MS-Windows
+@ifclear FOR_PRINT
+and OS/2
+@end ifclear
(@pxref{PC Installation}, for details).
@item vms/*
-Files needed for building @command{gawk} under VMS
+Files needed for building @command{gawk} under Vax/VMS and OpenVMS
(@pxref{VMS Installation}, for details).
@item test/*
@@ -35585,9 +36984,9 @@ to @file{gawk-@value{VERSION}.@value{PATCHLEVEL}}. Like most GNU software,
@command{gawk} is configured
automatically for your system by running the @command{configure} program.
This program is a Bourne shell script that is generated automatically using
-GNU @command{autoconf}.
+GNU Autoconf.
@ifnotinfo
-(The @command{autoconf} software is
+(The Autoconf software is
described fully in
@cite{Autoconf---Generating Automatic Configuration Scripts},
which can be found online at
@@ -35595,7 +36994,7 @@ which can be found online at
the Free Software Foundation's web site}.)
@end ifnotinfo
@ifinfo
-(The @command{autoconf} software is described fully starting with
+(The Autoconf software is described fully starting with
@inforef{Top, , Autoconf, autoconf,Autoconf---Generating Automatic Configuration Scripts}.)
@end ifinfo
@@ -35698,7 +37097,7 @@ improvement.
@cindex @option{--with-whiny-user-strftime} configuration option
@cindex configuration option, @code{--with-whiny-user-strftime}
@item --with-whiny-user-strftime
-Force use of the included version of the @code{strftime()}
+Force use of the included version of the C @code{strftime()}
function for deficient systems.
@end table
@@ -35745,9 +37144,9 @@ should not have. @file{custom.h} is automatically included by
@file{config.h}.
It is also possible that the @command{configure} program generated by
-@command{autoconf} will not work on your system in some other fashion.
+Autoconf will not work on your system in some other fashion.
If you do have a problem, the file @file{configure.ac} is the input for
-@command{autoconf}. You may be able to change this file and generate a
+Autoconf. You may be able to change this file and generate a
new version of @command{configure} that works on your system
(@pxref{Bugs},
for information on how to report problems in configuring @command{gawk}).
@@ -35775,16 +37174,21 @@ various non-Unix systems.
@cindex PC operating systems@comma{} @command{gawk} on, installing
@cindex operating systems, PC@comma{} @command{gawk} on, installing
This @value{SECTION} covers installation and usage of @command{gawk} on x86 machines
+@ifclear FOR_PRINT
running MS-DOS, any version of MS-Windows, or OS/2.
+@end ifclear
+@ifset FOR_PRINT
+running MS-DOS and any version of MS-Windows.
+@end ifset
In this @value{SECTION}, the term ``Windows32''
-refers to any of Microsoft Windows-95/98/ME/NT/2000/XP/Vista/7.
+refers to any of Microsoft Windows-95/98/ME/NT/2000/XP/Vista/7/8.
-The limitations of MS-DOS (and MS-DOS shells under Windows32 or OS/2) has meant
-that various ``DOS extenders'' are often used with programs such as
-@command{gawk}. The varying capabilities of Microsoft Windows 3.1
-and Windows32 can add to the confusion. For an overview of the
-considerations, please refer to @file{README_d/README.pc} in the
-distribution.
+The limitations of MS-DOS (and MS-DOS shells under the other operating
+systems) has meant that various ``DOS extenders'' are often used with
+programs such as @command{gawk}. The varying capabilities of Microsoft
+Windows 3.1 and Windows32 can add to the confusion. For an overview
+of the considerations, please refer to @file{README_d/README.pc} in
+the distribution.
@menu
* PC Binary Installation:: Installing a prepared distribution.
@@ -35798,6 +37202,7 @@ distribution.
* MSYS:: Using @command{gawk} In The MSYS Environment.
@end menu
+@ifclear FOR_PRINT
@node PC Binary Installation
@appendixsubsubsec Installing a Prepared Distribution for PC Systems
@@ -35836,13 +37241,21 @@ install-info --info-dir=x:/usr/info x:/usr/info/gawkinet.info
The binary distribution may contain a separate file containing additional
or more detailed installation instructions.
+@end ifclear
@node PC Compiling
@appendixsubsubsec Compiling @command{gawk} for PC Operating Systems
+@ifclear FOR_PRINT
@command{gawk} can be compiled for MS-DOS, Windows32, and OS/2 using the GNU
-development tools from DJ Delorie (DJGPP: MS-DOS only) or Eberhard
-Mattes (EMX: MS-DOS, Windows32 and OS/2). The file
+development tools from DJ Delorie (DJGPP: MS-DOS only), MinGW (Windows32) or Eberhard
+Mattes (EMX: MS-DOS, Windows32 and OS/2).
+@end ifclear
+@ifset FOR_PRINT
+@command{gawk} can be compiled for MS-DOS and Windows32 using the GNU
+development tools from DJ Delorie (DJGPP: MS-DOS only) or MinGW (Windows32).
+@end ifset
+The file
@file{README_d/README.pc} in the @command{gawk} distribution contains
additional notes, and @file{pc/Makefile} contains important information on
compilation options.
@@ -35864,6 +37277,7 @@ build @command{gawk} using the DJGPP tools, enter @samp{make djgpp}.
@uref{ftp://ftp.delorie.com/pub/djgpp/current/v2gnu/}.) To build a
native MS-Windows binary of @command{gawk}, type @samp{make mingw32}.
+@ifclear FOR_PRINT
@cindex compiling @command{gawk} with EMX for OS/2
The 32 bit EMX version of @command{gawk} works ``out of the box'' under OS/2.
However, it is highly recommended to use GCC 2.95.3 for the compilation.
@@ -35898,7 +37312,7 @@ and @option{--libexecdir=c:/usr/lib}.
@end ignore
@ignore
-The internal @code{gettext} library tends to be problematic. It is therefore recommended
+The internal @command{gettext} library tends to be problematic. It is therefore recommended
to use either an external one (@option{--without-included-gettext}) or to disable
NLS entirely (@option{--disable-nls}).
@end ignore
@@ -35935,8 +37349,11 @@ Ancient OS/2 ports of GNU @command{make} are not able to handle
the Makefiles of this package. If you encounter any problems with
@command{make}, try GNU Make 3.79.1 or later versions. You should
find the latest version on
-@uref{ftp://hobbes.nmsu.edu/pub/os2/}.
+@uref{ftp://hobbes.nmsu.edu/pub/os2/}.@footnote{As of May, 2014,
+this site is still there, but the author could not find a package
+for GNU Make.}
@end quotation
+@end ifclear
@node PC Testing
@appendixsubsubsec Testing @command{gawk} on PC Operating Systems
@@ -35948,6 +37365,7 @@ be converted so that they have the usual MS-DOS-style end-of-line markers.
Alternatively, run @command{make check CMP="diff -a"} to use GNU @command{diff}
in text mode instead of @command{cmp} to compare the resulting files.
+@ifclear FOR_PRINT
Most
of the tests work properly with Stewartson's shell along with the
companion utilities or appropriate GNU utilities. However, some editing of
@@ -35960,7 +37378,7 @@ On OS/2 the @code{pid} test fails because @code{spawnl()} is used instead of
@code{fork()}/@code{execl()} to start child processes.
Also the @code{mbfw1} and @code{mbprintf1} tests fail because the needed
multibyte functionality is not available.
-
+@end ifclear
@node PC Using
@appendixsubsubsec Using @command{gawk} on PC Operating Systems
@@ -35972,11 +37390,12 @@ multibyte functionality is not available.
Under MS-DOS and MS-Windows, the Cygwin and MinGW environments support
both the @samp{|&} operator and TCP/IP networking
(@pxref{TCP/IP Networking}).
+@ifclear FOR_PRINT
EMX (OS/2 only) supports at least the @samp{|&} operator.
+@end ifclear
@cindex search paths
@cindex search paths, for source files
-@cindex @command{gawk}, OS/2 version of
@cindex @command{gawk}, MS-DOS version of
@cindex @command{gawk}, MS-Windows version of
@cindex @code{;} (semicolon), @code{AWKPATH} variable and
@@ -35987,36 +37406,50 @@ program files as described in @ref{AWKPATH Variable}. However,
semicolons (rather than colons) separate elements in the @env{AWKPATH}
variable. If @env{AWKPATH} is not set or is empty, then the default
search path for MS-Windows and MS-DOS versions is
-@code{@w{".;c:/lib/awk;c:/gnu/lib/awk"}}.
+@samp{@w{.;c:/lib/awk;c:/gnu/lib/awk}}.
+@ifclear FOR_PRINT
+@cindex @command{gawk}, OS/2 version of
@cindex @code{UNIXROOT} variable, on OS/2 systems
The search path for OS/2 (32 bit, EMX) is determined by the prefix directory
(most likely @file{/usr} or @file{c:/usr}) that has been specified as an option of
-the @command{configure} script like it is the case for the Unix versions.
+the @command{configure} script as is the case for the Unix versions.
If @file{c:/usr} is the prefix directory then the default search path contains @file{.}
and @file{c:/usr/share/awk}.
Additionally, to support binary distributions of @command{gawk} for OS/2
-systems whose drive @samp{c:} might not support long file names or might not exist
+systems whose drive @samp{c:} might not support long @value{FN}s or might not exist
at all, there is a special environment variable. If @env{UNIXROOT} specifies
a drive then this specific drive is also searched for program files.
E.g., if @env{UNIXROOT} is set to @file{e:} the complete default search path is
-@code{@w{".;c:/usr/share/awk;e:/usr/share/awk"}}.
+@samp{@w{.;c:/usr/share/awk;e:/usr/share/awk}}.
An @command{sh}-like shell (as opposed to @command{command.com} under MS-DOS
or @command{cmd.exe} under MS-Windows or OS/2) may be useful for @command{awk} programming.
The DJGPP collection of tools includes an MS-DOS port of Bash,
and several shells are available for OS/2, including @command{ksh}.
+@end ifclear
+@ifset FOR_PRINT
+An @command{sh}-like shell (as opposed to @command{command.com} under MS-DOS
+or @command{cmd.exe} under MS-Windows) may be useful for @command{awk} programming.
+The DJGPP collection of tools includes an MS-DOS port of Bash.
+@end ifset
@cindex common extensions, @code{BINMODE} variable
@cindex extensions, common@comma{} @code{BINMODE} variable
@cindex differences in @command{awk} and @command{gawk}, @code{BINMODE} variable
@cindex @code{BINMODE} variable
-Under MS-Windows, OS/2 and MS-DOS, @command{gawk} (and many other text programs) silently
-translate end-of-line @code{"\r\n"} to @code{"\n"} on input and @code{"\n"}
-to @code{"\r\n"} on output. A special @code{BINMODE} variable @value{COMMONEXT}
+@ifclear FOR_PRINT
+Under MS-Windows, OS/2 and MS-DOS,
+@end ifclear
+@ifset FOR_PRINT
+Under MS-Windows and MS-DOS,
+@end ifset
+@command{gawk} (and many other text programs) silently
+translate end-of-line @samp{\r\n} to @samp{\n} on input and @samp{\n}
+to @samp{\r\n} on output. A special @code{BINMODE} variable @value{COMMONEXT}
allows control over these translations and is interpreted as follows:
-@itemize @bullet
+@itemize @value{BULLET}
@item
If @code{BINMODE} is @code{"r"}, or one,
then
@@ -36054,7 +37487,7 @@ The name @code{BINMODE} was chosen to match @command{mawk}
@command{mawk} adds a @samp{-W BINMODE=@var{N}} option and an environment
variable that can set @code{BINMODE}, @code{RS}, and @code{ORS}. The
files @file{binmode[1-3].awk} (under @file{gnu/lib/awk} in some of the
-prepared distributions) have been chosen to match @command{mawk}'s @samp{-W
+prepared binary distributions) have been chosen to match @command{mawk}'s @samp{-W
BINMODE=@var{N}} option. These can be changed or discarded; in particular,
the setting of @code{RS} giving the fewest ``surprises'' is open to debate.
@command{mawk} uses @samp{RS = "\r\n"} if binary mode is set on read, which is
@@ -36082,7 +37515,7 @@ The following changes the record separator to @code{"\r\n"} and sets binary
mode on reads, but does not affect the mode on standard input:
@example
-gawk -v RS="\r\n" --source "BEGIN @{ BINMODE = 1 @}" @dots{}
+gawk -v RS="\r\n" -e "BEGIN @{ BINMODE = 1 @}" @dots{}
@end example
@noindent
@@ -36130,7 +37563,7 @@ been ported to MS-Windows that expect @command{gawk} to do automatic
translation of @code{"\r\n"}, since it won't. Caveat Emptor!
@node VMS Installation
-@appendixsubsec How to Compile and Install @command{gawk} on VMS
+@appendixsubsec How to Compile and Install @command{gawk} on Vax/VMS and OpenVMS
@c based on material from Pat Rankin <rankin@eql.caltech.edu>
@c now rankin@pactechdata.com
@@ -36178,11 +37611,11 @@ or:
$ @kbd{MMK/DESCRIPTION=[.vms]descrip.mms gawk}
@end example
-@code{MMK} is an open source, free, near-clone of @code{MMS} and
-can better handle @code{ODS-5} volumes with upper- and lowercase filenames.
-@code{MMK} is available from @uref{https://github.com/endlesssoftware/mmk}.
+@command{MMK} is an open source, free, near-clone of @command{MMS} and
+can better handle ODS-5 volumes with upper- and lowercase @value{FN}s.
+@command{MMK} is available from @uref{https://github.com/endlesssoftware/mmk}.
-With @code{ODS-5} volumes and extended parsing enabled, the case of the target
+With ODS-5 volumes and extended parsing enabled, the case of the target
parameter may need to be exact.
@command{gawk} has been tested under VAX/VMS 7.3 and Alpha/VMS 7.3-1
@@ -36191,8 +37624,8 @@ The most recent builds used HP C V7.3 on Alpha VMS 8.3 and both
Alpha and IA64 VMS 8.4 used HP C 7.3.@footnote{The IA64 architecture
is also known as ``Itanium.''}
-The @file{[.vms]gawk_build_steps.txt} provides information on how to build
-@command{gawk} into a PCSI kit that is compatible with the GNV product.
+@xref{VMS GNV}, for information on building
+@command{gawk} as a PCSI kit that is compatible with the GNV product.
@node VMS Dynamic Extensions
@appendixsubsubsec Compiling @command{gawk} Dynamic Extensions on VMS
@@ -36310,11 +37743,11 @@ provides information about both the @command{gawk} implementation and the
The logical name @samp{AWK_LIBRARY} can designate a default location
for @command{awk} program files. For the @option{-f} option, if the specified
-file name has no device or directory path information in it, @command{gawk}
+@value{FN} has no device or directory path information in it, @command{gawk}
looks in the current directory first, then in the directory specified
by the translation of @samp{AWK_LIBRARY} if the file is not found.
If, after searching in both directories, the file still is not found,
-@command{gawk} appends the suffix @samp{.awk} to the filename and retries
+@command{gawk} appends the suffix @samp{.awk} to the @value{FN} and retries
the file search. If @samp{AWK_LIBRARY} has no definition, a default value
of @samp{SYS$LIBRARY:} is used for it.
@@ -36343,7 +37776,7 @@ One side effect of dual command-line parsing is that if there is only a
single parameter (as in the quoted string program above), the command
becomes ambiguous. To work around this, the normally optional @option{--}
flag is required to force Unix-style parsing rather than @code{DCL} parsing. If any
-other dash-type options (or multiple parameters such as data files to
+other dash-type options (or multiple parameters such as @value{DF}s to
process) are present, there is no ambiguity and @option{--} can be omitted.
@cindex exit status, of VMS
@@ -36397,7 +37830,7 @@ The VMS GNV package provides a build environment similar to POSIX with ports
of a collection of open source tools. The @command{gawk} found in the GNV
base kit is an older port. Currently the GNV project is being reorganized
to supply individual PCSI packages for each component.
-See @uref{https://sourceforge.net/p/gnv/wiki/InstallingGNVPackages/}.
+See @w{@uref{https://sourceforge.net/p/gnv/wiki/InstallingGNVPackages/}.}
The normal build procedure for @command{gawk} produces a program that
is suitable for use with GNV.
@@ -36452,7 +37885,7 @@ define a symbol, as follows:
$ @kbd{gawk :== $sys$common:[syshlp.examples.tcpip.snmp]gawk.exe}
@end example
-This is apparently version 2.15.6, which is extremely old. We
+This is apparently @value{PVERSION} 2.15.6, which is extremely old. We
recommend compiling and using the current version.
@c ENDOFRANGE opgawx
@@ -36481,8 +37914,8 @@ what you're trying to do. If it's not clear whether you should be able
to do something or not, report that too; it's a bug in the documentation!
Before reporting a bug or trying to fix it yourself, try to isolate it
-to the smallest possible @command{awk} program and input data file that
-reproduces the problem. Then send us the program and data file,
+to the smallest possible @command{awk} program and input @value{DF} that
+reproduces the problem. Then send us the program and @value{DF},
some idea of what kind of Unix system you're using,
the compiler you used to compile @command{gawk}, and the exact results
@command{gawk} gave you. Also say what you expected to occur; this helps
@@ -36498,12 +37931,14 @@ Once you have a precise problem, send email to
@EMAIL{bug-gawk@@gnu.org,bug-gawk at gnu dot org}.
@cindex Robbins, Arnold
-Using this address automatically sends a copy of your
-mail to me. If necessary, I can be reached directly at
+The @command{gawk} maintainers subscribe to this address and
+thus they will receive your bug report.
+If necessary, the primary maintainer can be reached directly at
@EMAIL{arnold@@skeeve.com,arnold at skeeve dot com}.
The bug reporting address is preferred since the
email list is archived at the GNU Project.
-@emph{All email should be in English, since that is my native language.}
+@emph{All email should be in English. This is the only language
+understood in common by all the maintainers.}
@cindex @code{comp.lang.awk} newsgroup
@quotation CAUTION
@@ -36551,11 +37986,13 @@ as follows:
@cindex Rankin, Pat
@cindex Malmberg, John
@cindex Pitts, Dave
-@multitable {MS-Windows with MINGW} {123456789012345678901234567890123456789001234567890}
+@multitable {MS-Windows with MinGW} {123456789012345678901234567890123456789001234567890}
@item MS-DOS with DJGPP @tab Scott Deifik, @EMAIL{scottd.mail@@sbcglobal.net,scottd dot mail at sbcglobal dot net}.
-@item MS-Windows with MINGW @tab Eli Zaretskii, @EMAIL{eliz@@gnu.org,eliz at gnu dot org}.
+@item MS-Windows with MinGW @tab Eli Zaretskii, @EMAIL{eliz@@gnu.org,eliz at gnu dot org}.
+@c Leave this in the print version on purpose.
+@c OS/2 is not mentioned anywhere else in the print version though.
@item OS/2 @tab Andreas Buening, @EMAIL{andreas.buening@@nexgo.de,andreas dot buening at nexgo dot de}.
@item VMS @tab Pat Rankin, @EMAIL{r.pat.rankin@@gmail.com,r.pat.rankin at gmail.com}, and
@@ -36639,8 +38076,13 @@ for a list of extensions in this @command{awk} that are not in POSIX @command{aw
@cindex source code, @command{mawk}
@item @command{mawk}
Michael Brennan wrote an independent implementation of @command{awk},
-called @command{mawk}. It is available under the GPL
-(@pxref{Copying}),
+called @command{mawk}. It is available under the
+@ifclear FOR_PRINT
+GPL (@pxref{Copying}),
+@end ifclear
+@ifset FOR_PRINT
+GPL,
+@end ifset
just as @command{gawk} is.
The original distribution site for the @command{mawk} source code
@@ -36686,7 +38128,7 @@ since approximately 2003.
@cindex source code, @command{pawk}
@item @command{pawk}
Nelson H.F.@: Beebe at the University of Utah has modified
-Brian Kernighan's @command{awk} to provide timing and profiling information.
+BWK @command{awk} to provide timing and profiling information.
It is different from @command{gawk} with the @option{--profile} option.
(@pxref{Profiling}),
in that it uses CPU-based profiling, not line-count
@@ -36709,10 +38151,10 @@ information, see the @uref{http://busybox.net, project's home page}.
@cindex Solaris, POSIX-compliant @command{awk}
@cindex source code, Solaris @command{awk}
@item The OpenSolaris POSIX @command{awk}
-The version of @command{awk} in @file{/usr/xpg4/bin} on Solaris is
-more-or-less POSIX-compliant. It is based on the @command{awk} from
-Mortice Kern Systems for PCs.
-This author was able to make it compile and work under GNU/Linux
+The versions of @command{awk} in @file{/usr/xpg4/bin} and
+@file{/usr/xpg6/bin} on Solaris are more-or-less POSIX-compliant.
+They are based on the @command{awk} from Mortice Kern Systems for PCs.
+This author was able to make this code compile and work under GNU/Linux
with 1--2 hours of work. Making it more generally portable (using
GNU Autoconf and/or Automake) would take more work, and this
has not been done, at least to our knowledge.
@@ -36749,8 +38191,7 @@ This is an embeddable @command{awk} interpreter derived from
This is a Python module that claims to bring @command{awk}-like
features to Python. See @uref{https://github.com/alecthomas/pawk}
for more information. (This is not related to Nelson Beebe's
-modified version of Brian Kernighan's @command{awk},
-described earlier.)
+modified version of BWK @command{awk}, described earlier.)
@item @w{QSE Awk}
@cindex QSE Awk
@@ -36767,15 +38208,56 @@ under the GPL. It has a large number of extensions over standard
See @uref{http://www.quiktrim.org/QTawk.html} for more information,
including the manual and a download link.
+The project may also be frozen; no new code changes have been made
+since approximately 2008.
+
@item Other Versions
See also the @uref{http://en.wikipedia.org/wiki/Awk_language#Versions_and_implementations,
Wikipedia article}, for information on additional versions.
@end table
+@c ENDOFRANGE awkim
+
+@node Installation summary
+@appendixsec Summary
+
+@itemize @value{BULLET}
+@item
+The @command{gawk} distribution is available from GNU project's main
+distribution site, @code{ftp.gnu.org}. The canonical build recipe is:
+
+@example
+wget http://ftp.gnu.org/gnu/gawk/gawk-@value{VERSION}.@value{PATCHLEVEL}.tar.gz
+tar -xvpzf gawk-@value{VERSION}.@value{PATCHLEVEL}.tar.gz
+cd gawk-@value{VERSION}.@value{PATCHLEVEL}
+./configure && make && make check
+@end example
+
+@item
+@command{gawk} may be built on non-POSIX systems as well. The currently
+supported systems are MS-Windows using DJGPP, MSYS, MinGW and Cygwin,
+@ifclear FOR_PRINT
+OS/2 using EMX,
+@end ifclear
+and both Vax/VMS and OpenVMS.
+Instructions for each system are included in this @value{CHAPTER}.
+
+@item
+Bug reports should be sent via email to @email{bug-gawk@@gnu.org}.
+Bug reports should be in English, and should include the version of @command{gawk},
+how it was compiled, and a short program and @value{DF} which demonstrate
+the problem.
+
+@item
+There are a number of other freely available @command{awk}
+implementations. Many are POSIX compliant; others are less so.
+
+@end itemize
+
@c ENDOFRANGE gligawk
@c ENDOFRANGE ingawk
-@c ENDOFRANGE awkim
+@ifclear FOR_PRINT
@node Notes
@appendix Implementation Notes
@c STARTOFRANGE gawii
@@ -36795,6 +38277,7 @@ maintainers of @command{gawk}. Everything in it applies specifically to
* Implementation Limitations:: Some limitations of the implementation.
* Extension Design:: Design notes about the extension API.
* Old Extension Mechanism:: Some compatibility for old extensions.
+* Notes summary:: Summary of implementation notes.
@end menu
@node Compatibility Mode
@@ -36815,7 +38298,7 @@ is one more option available on the command line:
@table @code
@item -Y
@itemx --parsedebug
-Prints out the parse stack information as the program is being parsed.
+Print out the parse stack information as the program is being parsed.
@end table
This option is intended only for serious @command{gawk} developers
@@ -36839,15 +38322,15 @@ as well as any considerations you should bear in mind.
@command{gawk}.
* New Ports:: Porting @command{gawk} to a new operating
system.
-* Derived Files:: Why derived files are kept in the
- @command{git} repository.
+* Derived Files:: Why derived files are kept in the Git
+ repository.
@end menu
@node Accessing The Source
@appendixsubsec Accessing The @command{gawk} Git Repository
As @command{gawk} is Free Software, the source code is always available.
-@ref{Gawk Distribution}, describes how to get and build the formal,
+@DBREF{Gawk Distribution} describes how to get and build the formal,
released versions of @command{gawk}.
@cindex @command{git} utility
@@ -36864,8 +38347,8 @@ git clone git://git.savannah.gnu.org/gawk.git
@end example
@noindent
-This will clone the @command{gawk} repository. If you are behind a
-firewall that will not allow you to use the Git native protocol, you
+This clones the @command{gawk} repository. If you are behind a
+firewall that does not allow you to use the Git native protocol, you
can still access the repository using:
@example
@@ -36893,7 +38376,7 @@ that has a Git plug-in for working with Git repositories.
You are free to add any new features you like to @command{gawk}.
However, if you want your changes to be incorporated into the @command{gawk}
distribution, there are several steps that you need to take in order to
-make it possible to include your changes:
+make it possible to include them:
@enumerate 1
@item
@@ -36915,8 +38398,9 @@ or @EMAIL{assign@@gnu.org,assign at gnu dot org}.
@item
Get the latest version.
It is much easier for me to integrate changes if they are relative to
-the most recent distributed version of @command{gawk}. If your version of
-@command{gawk} is very old, I may not be able to integrate them at all.
+the most recent distributed version of @command{gawk}, or better yet,
+relative to the latest code in the Git repository. If your version of
+@command{gawk} is very old, I may not be able to integrate your changes at all.
(@xref{Getting},
for information on getting the latest version of @command{gawk}.)
@@ -36943,7 +38427,7 @@ using the traditional ``K&R'' style, particularly as regards to the placement
of braces and the use of TABs. In brief, the coding rules for @command{gawk}
are as follows:
-@itemize @bullet
+@itemize @value{BULLET}
@item
Use ANSI/ISO style (prototype) function headers when defining functions.
@@ -37047,6 +38531,7 @@ not do so, particularly if there are lots of changes.
Include an entry for the @file{ChangeLog} file with your submission.
This helps further minimize the amount of work I have to do,
making it easier for me to accept patches.
+It is simplest if you just make this part of your diff.
@end enumerate
Although this sounds like a lot of work, please remember that while you
@@ -37104,10 +38589,39 @@ A number of the files that come with @command{gawk} are maintained by other
people. Thus, you should not change them
unless it is for a very good reason; i.e., changes are not out of the
question, but changes to these files are scrutinized extra carefully.
-The files are @file{dfa.c}, @file{dfa.h}, @file{getopt1.c}, @file{getopt.c},
-@file{getopt.h}, @file{install-sh}, @file{mkinstalldirs}, @file{regcomp.c},
-@file{regex.c}, @file{regexec.c}, @file{regexex.c}, @file{regex.h},
-@file{regex_internal.c}, and @file{regex_internal.h}.
+The files are
+@file{dfa.c},
+@file{dfa.h},
+@file{getopt.c},
+@file{getopt.h},
+@file{getopt1.c},
+@file{getopt_int.h},
+@file{gettext.h},
+@file{regcomp.c},
+@file{regex.c},
+@file{regex.h},
+@file{regex_internal.c},
+@file{regex_internal.h},
+and
+@file{regexec.c}.
+
+@item
+A number of other files are provided by the GNU
+Autotools (Autoconf, Automake, and GNU @command{gettext}).
+You should not change them either, unless it is for a very
+good reason. The files are
+@file{ABOUT-NLS},
+@file{config.guess},
+@file{config.rpath},
+@file{config.sub},
+@file{depcomp},
+@file{INSTALL},
+@file{install-sh},
+@file{missing},
+@file{mkinstalldirs},
+@file{xalloc.h},
+and
+@file{ylwrap}.
@item
Be willing to continue to maintain the port.
@@ -37158,23 +38672,23 @@ In the code that you supply and maintain, feel free to use a
coding style and brace layout that suits your taste.
@node Derived Files
-@appendixsubsec Why Generated Files Are Kept In @command{git}
+@appendixsubsec Why Generated Files Are Kept In Git
@c STARTOFRANGE gawkgit
-@cindex @command{git}, use of for @command{gawk} source code
+@cindex Git, use of for @command{gawk} source code
@c From emails written March 22, 2012, to the gawk developers list.
-If you look at the @command{gawk} source in the @command{git}
+If you look at the @command{gawk} source in the Git
repository, you will notice that it includes files that are automatically
generated by GNU infrastructure tools, such as @file{Makefile.in} from
-@command{automake} and even @file{configure} from @command{autoconf}.
+Automake and even @file{configure} from Autoconf.
This is different from many Free Software projects that do not store
the derived files, because that keeps the repository less cluttered,
and it is easier to see the substantive changes when comparing versions
and trying to understand what changed between commits.
-However, there are two reasons why the @command{gawk} maintainer
+However, there are several reasons why the @command{gawk} maintainer
likes to have everything in the repository.
First, because it is then easy to reproduce any given version completely,
@@ -37193,11 +38707,10 @@ there a guarantee that we could find that @command{bison} version? Or that
@emph{it} would build?)
If the repository has all the generated files, then it's easy to just check
-them out and build. (Or @emph{easier}, depending upon how far back we go.
-@code{:-)})
+them out and build. (Or @emph{easier}, depending upon how far back we go.)
And that brings us to the second (and stronger) reason why all the files
-really need to be in @command{git}. It boils down to who do you cater
+really need to be in Git. It boils down to who do you cater
to---the @command{gawk} developer(s), or the user who just wants to check
out a version and try it out?
@@ -37206,10 +38719,10 @@ wants it to be possible for any interested @command{awk} user in the
world to just clone the repository, check out the branch of interest and
build it. Without their having to have the correct version(s) of the
autotools.@footnote{There is one GNU program that is (in our opinion)
-severely difficult to bootstrap from the @command{git} repository. For
-example, on the author's old (but still working) PowerPC macintosh with
+severely difficult to bootstrap from the Git repository. For
+example, on the author's old (but still working) PowerPC Macintosh with
Mac OS X 10.5, it was necessary to bootstrap a ton of software, starting
-with @command{git} itself, in order to try to work with the latest code.
+with Git itself, in order to try to work with the latest code.
It's not pleasant, and especially on older systems, it's a big waste
of time.
@@ -37232,18 +38745,26 @@ This is extremely important for the @code{master} and
Further, the @command{gawk} maintainer would argue that it's also
important for the @command{gawk} developers. When he tried to check out
-the @code{xgawk} branch@footnote{A branch created by one of the other
+the @code{xgawk} branch@footnote{A branch (since removed) created by one of the other
developers that did not include the generated files.} to build it, he
couldn't. (No @file{ltmain.sh} file, and he had no idea how to create it,
and that was not the only problem.)
He felt @emph{extremely} frustrated. With respect to that branch,
the maintainer is no different than Jane User who wants to try to build
-@code{gawk-4.0-stable} or @code{master} from the repository.
+@code{gawk-4.1-stable} or @code{master} from the repository.
Thus, the maintainer thinks that it's not just important, but critical,
that for any given branch, the above incantation @emph{just works}.
+@c Added 9/2014:
+A third reason to have all the files is that without them, using @samp{git
+bisect} to try to find the commit that introduced a bug is exceedingly
+difficult. The maintainer tried to do that on another project that
+requires running bootstrapping scripts just to create @command{configure}
+and so on; it was really painful. When the repository is self-contained,
+using @command{git bisect} in it is very easy.
+
@c So - that's my reasoning and philosophy.
What are some of the consequences and/or actions to take?
@@ -37259,29 +38780,29 @@ It's the maintainer's job to merge them and he will deal with it.
@item
He is really good at @samp{git diff x y > /tmp/diff1 ; gvim /tmp/diff1} to
-remove the diffs that aren't of interest in order to review code. @code{:-)}
+remove the diffs that aren't of interest in order to review code.
@end enumerate
@item
It would certainly help if everyone used the same versions of the GNU tools
as he does, which in general are the latest released versions of
-@command{automake},
-@command{autoconf},
+Automake,
+Autoconf,
@command{bison},
and
-@command{gettext}.
+GNU @command{gettext}.
@ignore
-If it would help if I sent out an "I just upgraded to version x.y
-of tool Z" kind of message to this list, I can do that. Up until
+If it would help if I sent out an ``I just upgraded to version x.y
+of tool Z'' kind of message to this list, I can do that. Up until
now it hasn't been a real issue since I'm the only one who's been
dorking with the configuration machinery.
@end ignore
-@enumerate A
-@item
+@c @enumerate A
+@c @item
Installing from source is quite easy. It's how the maintainer worked for years
-under Fedora.
+(and still works).
He had @file{/usr/local/bin} at the front of his @env{PATH} and just did:
@example
@@ -37292,10 +38813,11 @@ cd @var{package}-@var{x}.@var{y}.@var{z}
make install # as root
@end example
-@item
+@c @item
+@ignore
These days the maintainer uses Ubuntu 12.04 which is medium current, but
-he is already doing the above for @command{autoconf}, @command{automake}
-and @command{bison}.
+he is already doing the above for Automake, Autoconf, and @command{bison}.
+@end ignore
@ignore
(C. Rant: Recent Linux versions with GNOME 3 really suck. What
@@ -37303,7 +38825,7 @@ and @command{bison}.
me to Ubuntu, but Ubuntu 11.04 and 11.10 are totally unusable from
a UI perspective. Bleah.)
@end ignore
-@end enumerate
+@c @end enumerate
@ignore
@item
@@ -37319,7 +38841,7 @@ the "real" changes and the second with "everything else needed for
Most of the above was originally written by the maintainer to other
@command{gawk} developers. It raised the objection from one of
the developers ``@dots{} that anybody pulling down the source from
-@command{git} is not an end user.''
+Git is not an end user.''
However, this is not true. There are ``power @command{awk} users''
who can build @command{gawk} (using the magic incantation shown previously)
@@ -37329,10 +38851,10 @@ kept buildable all the time.
It was then suggested that there be a @command{cron} job to create
nightly tarballs of ``the source.'' Here, the problem is that there
are source trees, corresponding to the various branches! So,
-nightly tar balls aren't the answer, especially as the repository can go
+nightly tarballs aren't the answer, especially as the repository can go
for weeks without significant change being introduced.
-Fortunately, the @command{git} server can meet this need. For any given
+Fortunately, the Git server can meet this need. For any given
branch named @var{branchname}, use:
@example
@@ -37392,9 +38914,10 @@ Larry
@author Larry Wall
@end quotation
-The @file{TODO} file in the @command{gawk} Git repository lists possible
-future enhancements. Some of these relate to the source code, and others
-to possible new features. Please see that file for the list.
+The @file{TODO} file in the @code{master} branch of the @command{gawk}
+Git repository lists possible future enhancements. Some of these relate
+to the source code, and others to possible new features. Please see
+that file for the list.
@xref{Additions},
if you are interested in tackling any of the projects listed there.
@@ -37408,7 +38931,7 @@ different limits.
@multitable @columnfractions .40 .60
@headitem Item @tab Limit
@item Characters in a character class @tab 2^(number of bits per byte)
-@item Length of input record @tab @code{MAX_INT }
+@item Length of input record @tab @code{MAX_INT}
@item Length of output record @tab Unlimited
@item Length of source line @tab Unlimited
@item Number of fields in a record @tab @code{MAX_LONG}
@@ -37417,9 +38940,9 @@ different limits.
@item Number of input records total @tab @code{MAX_LONG}
@item Number of pipe redirections @tab min(number of processes per user, number of open files)
@item Numeric values @tab Double-precision floating point (if not using MPFR)
-@item Size of a field @tab @code{MAX_INT }
-@item Size of a literal string @tab @code{MAX_INT }
-@item Size of a printf string @tab @code{MAX_INT }
+@item Size of a field @tab @code{MAX_INT}
+@item Size of a literal string @tab @code{MAX_INT}
+@item Size of a printf string @tab @code{MAX_INT}
@end multitable
@node Extension Design
@@ -37454,7 +38977,7 @@ mechanism was bolted onto the side and was not really well thought out.
The old extension mechanism had several problems:
-@itemize @bullet
+@itemize @value{BULLET}
@item
It depended heavily upon @command{gawk} internals. Any time the
@code{NODE} structure@footnote{A critical central data structure
@@ -37466,8 +38989,8 @@ documentation in this @value{DOCUMENT}, but it was quite minimal.
@item
Being able to call into @command{gawk} from an extension required linker
facilities that are common on Unix-derived systems but that did
-not work on Windows systems; users wanting extensions on Windows
-had to statically link them into @command{gawk}, even though Windows supports
+not work on MS-Windows systems; users wanting extensions on MS-Windows
+had to statically link them into @command{gawk}, even though MS-Windows supports
dynamic loading of shared objects.
@item
@@ -37490,7 +39013,7 @@ project is provided in @ref{gawkextlib}.
Some goals for the new API were:
-@itemize @bullet
+@itemize @value{BULLET}
@item
The API should be independent of @command{gawk} internals. Changes in
@command{gawk} internals should not be visible to the writer of an
@@ -37505,7 +39028,7 @@ The API should enable extensions written in C or C++ to have roughly the
same ``appearance'' to @command{awk}-level code as @command{awk}
functions do. This means that extensions should have:
-@itemize @minus
+@itemize @value{MINUS}
@item
The ability to access function parameters.
@@ -37521,13 +39044,13 @@ in order to loop over all the element in an easy fashion for C code.
@item
The ability to create arrays (including @command{gawk}'s true
-multidimensional arrays).
+arrays of arrays).
@end itemize
@end itemize
Some additional important goals were:
-@itemize @bullet
+@itemize @value{BULLET}
@item
The API should use only features in ISO C 90, so that extensions
can be written using the widest range of C and C++ compilers. The header
@@ -37542,15 +39065,15 @@ The API mechanism should not require access to @command{gawk}'s
symbols@footnote{The @dfn{symbols} are the variables and functions
defined inside @command{gawk}. Access to these symbols by code
external to @command{gawk} loaded dynamically at runtime is
-problematic on Windows.} by the compile-time or dynamic linker,
-in order to enable creation of extensions that also work on Windows.
+problematic on MS-Windows.} by the compile-time or dynamic linker,
+in order to enable creation of extensions that also work on MS-Windows.
@end itemize
During development, it became clear that there were other features
that should be available to extensions, which were also subsequently
provided:
-@itemize @bullet
+@itemize @value{BULLET}
@item
Extensions should have the ability to hook into @command{gawk}'s
I/O redirection mechanism. In particular, the @command{xgawk}
@@ -37561,7 +39084,7 @@ two-way I/O.
@item
An extension should be able to provide a ``call back'' function
-to perform clean up actions when @command{gawk} exits.
+to perform cleanup actions when @command{gawk} exits.
@item
An extension should be able to provide a version string so that
@@ -37631,7 +39154,7 @@ to provide a minimal yet powerful set of features for creating extensions.
The API can later be expanded, in two ways:
-@itemize @bullet
+@itemize @value{BULLET}
@item
@command{gawk} passes an ``extension id'' into the extension when it
first loads the extension. The extension then passes this id back
@@ -37654,12 +39177,12 @@ to any of the above.
@ref{Dynamic Extensions}, describes the supported API and mechanisms
for writing extensions for @command{gawk}. This API was introduced
-in version 4.1. However, for many years @command{gawk}
+in @value{PVERSION} 4.1. However, for many years @command{gawk}
provided an extension mechanism that required knowledge of @command{gawk}
internals and that was not as well designed.
-In order to provide a transition period, @command{gawk} version
-4.1 continues to support the original extension mechanism.
+In order to provide a transition period, @command{gawk} @value{PVERSION} 4.1
+continues to support the original extension mechanism.
This will be true for the life of exactly one major release. This support
will be withdrawn, and removed from the source code, at the next major
release.
@@ -37685,6 +39208,42 @@ The @command{gawk} development team strongly recommends that you
convert any old extensions that you may have to use the new API
described in @ref{Dynamic Extensions}.
+@node Notes summary
+@appendixsec Summary
+
+@itemize @value{BULLET}
+@item
+@command{gawk}'s extensions can be disabled with either the
+@option{--traditional} option or with the @option{--posix} option.
+The @option{--parsedebug} option is available if @command{gawk} is
+compiled with @samp{-DDEBUG}.
+
+@item
+The source code for @command{gawk} is maintained in a publicly
+accessible Git repository. Anyone may check it out and view the source.
+
+@item
+Contributions to @command{gawk} are welcome. Following the steps
+outlined in this @value{CHAPTER} will make it easier to integrate
+your contributions into the code base.
+This applies both to new feature contributions and to ports to
+additional operating systems.
+
+@item
+@command{gawk} has some limits---generally those that are imposed by
+the machine architecture.
+
+@item
+The extension API design was intended to solve a number of problems
+with the previous extension mechanism, enable features needed by
+the @code{xgawk} project, and provide binary compatibility going forward.
+
+@item
+The previous extension mechanism is still supported in @value{PVERSION} 4.1
+of @command{gawk}, but it @emph{will} be removed in the next major release.
+
+@end itemize
+
@c ENDOFRANGE impis
@c ENDOFRANGE gawii
@@ -37733,9 +39292,11 @@ See @inlineraw{docbook, <xref linkend="figure-general-flow"/>}.
@end ifnotdocbook
@docbook
-<figure id="figure-general-flow">
+<figure id="figure-general-flow" float="0">
<title>General Program Flow</title>
-<graphic fileref="general-program.eps"/>
+<mediaobject>
+<imageobject role="web"><imagedata fileref="general-program.png" format="PNG"/></imageobject>
+</mediaobject>
</figure>
@end docbook
@@ -37758,7 +39319,7 @@ of the following, very basic set of steps,
as shown in @ref{figure-process-flow}:
@end ifnotdocbook
@ifdocbook
-as shown in @inlineraw{docbook <xref linkend="figure-process-flow"/>}:
+as shown in @inlineraw{docbook, <xref linkend="figure-process-flow"/>}:
@end ifdocbook
@ifnotdocbook
@@ -37774,9 +39335,11 @@ as shown in @inlineraw{docbook <xref linkend="figure-process-flow"/>}:
@end ifnotdocbook
@docbook
-<figure id="figure-process-flow">
+<figure id="figure-process-flow" float="0">
<title>Basic Program Stages</title>
-<graphic fileref="process-flow.eps"/>
+<mediaobject>
+<imageobject role="web"><imagedata fileref="process-flow.png" format="PNG"/></imageobject>
+</mediaobject>
</figure>
@end docbook
@@ -37874,7 +39437,7 @@ Individual variables, as well as numeric and string variables, are
referred to as @dfn{scalar} values.
Groups of values, such as arrays, are not scalars.
-@ref{General Arithmetic}, provided a basic introduction to numeric
+@ref{Computer Arithmetic}, provided a basic introduction to numeric
types (integer and floating-point) and how they are used in a computer.
Please review that information, including a number of caveats that
were presented.
@@ -37890,14 +39453,14 @@ like this: @code{""}.
Humans are used to working in decimal; i.e., base 10. In base 10,
numbers go from 0 to 9, and then ``roll over'' into the next
-column. (Remember grade school? 42 is 4 times 10 plus 2.)
+column. (Remember grade school? 42 = 4 x 10 + 2.)
There are other number bases though. Computers commonly use base 2
or @dfn{binary}, base 8 or @dfn{octal}, and base 16 or @dfn{hexadecimal}.
In binary, each column represents two times the value in the column to
its right. Each column may contain either a 0 or a 1.
-Thus, binary 1010 represents 1 times 8, plus 0 times 4, plus 1 times 2,
-plus 0 times 1, or decimal 10.
+Thus, binary 1010 represents (1 x 8) + (0 x 4) + (1 x 2)
++ (0 x 1), or decimal 10.
Octal and hexadecimal are discussed more in
@ref{Nondecimal-numbers}.
@@ -37934,7 +39497,7 @@ Where it makes sense, POSIX @command{awk} is compatible with 1999 ISO C.
@item Action
A series of @command{awk} statements attached to a rule. If the rule's
pattern matches an input record, @command{awk} executes the
-rule's action. Actions are always enclosed in curly braces.
+rule's action. Actions are always enclosed in braces.
(@xref{Action Overview}.)
@cindex Spencer, Henry
@@ -38039,7 +39602,7 @@ Named after the English mathematician Boole. See also ``Logical Expression.''
@item Bourne Shell
The standard shell (@file{/bin/sh}) on Unix and Unix-like systems,
-originally written by Steven R.@: Bourne.
+originally written by Steven R.@: Bourne at Bell Laboratories.
Many shells (Bash, @command{ksh}, @command{pdksh}, @command{zsh}) are
generally upwardly compatible with the Bourne shell.
@@ -38089,7 +39652,9 @@ Changing some of them affects @command{awk}'s running environment.
(@xref{Built-in Variables}.)
@item Braces
-See ``Curly Braces.''
+The characters @samp{@{} and @samp{@}}. Braces are used in
+@command{awk} for delimiting actions, compound statements, and function
+bodies.
@item C
The system programming language that most GNU software is written in. The
@@ -38114,7 +39679,7 @@ or place. The most common character set in use today is ASCII (American
Standard Code for Information Interchange). Many European
countries use an extension of ASCII known as ISO-8859-1 (ISO Latin-1).
The @uref{http://www.unicode.org, Unicode character set} is
-becoming increasingly popular and standard, and is particularly
+increasingly popular and standard, and is particularly
widely used on GNU/Linux systems.
@cindex Kernighan, Brian
@@ -38127,10 +39692,11 @@ It was written in @command{awk}
by Brian Kernighan and Jon Bentley, and is available from
@uref{http://netlib.sandia.gov/netlib/typesetting/chem.gz}.
+@cindex McIlroy, Doug
@cindex cookie
@item Cookie
A peculiar goodie, token, saying or remembrance
-produced by or presented to a program. (With thanks to Doug McIlroy.)
+produced by or presented to a program. (With thanks to Professor Doug McIlroy.)
@ignore
From: Doug McIlroy <doug@cs.dartmouth.edu>
Date: Sat, 13 Oct 2012 19:55:25 -0400
@@ -38208,9 +39774,7 @@ statements, and in patterns to select which input records to process.
(@xref{Typing and Comparison}.)
@item Curly Braces
-The characters @samp{@{} and @samp{@}}. Curly braces are used in
-@command{awk} for delimiting actions, compound statements, and function
-bodies.
+See ``Braces.''
@cindex dark corner
@item Dark Corner
@@ -38255,7 +39819,7 @@ ordinary expression. It could be a string constant, such as
(@xref{Computed Regexps}.)
@item Environment
-A collection of strings, of the form @var{name}@code{=}@code{val}, that each
+A collection of strings, of the form @samp{@var{name}=@var{val}}, that each
program has available to it. Users generally place values into the
environment in order to provide information to various programs. Typical
examples are the environment variables @env{HOME} and @env{PATH}.
@@ -38309,8 +39873,8 @@ this is just a number that can have a fractional part.
See also ``Double Precision'' and ``Single Precision.''
@item Format
-Format strings are used to control the appearance of output in the
-@code{strftime()} and @code{sprintf()} functions, and are used in the
+Format strings control the appearance of output in the
+@code{strftime()} and @code{sprintf()} functions, and in the
@code{printf} statement as well. Also, data conversions from numbers to strings
are controlled by the format strings contained in the built-in variables
@code{CONVFMT} and @code{OFMT}. (@xref{Control Letters}.)
@@ -38379,7 +39943,7 @@ Base 16 notation, where the digits are @code{0}--@code{9} and
@code{A}--@code{F}, with @samp{A}
representing 10, @samp{B} representing 11, and so on, up to @samp{F} for 15.
Hexadecimal numbers are written in C using a leading @samp{0x},
-to indicate their base. Thus, @code{0x12} is 18 (1 times 16 plus 2).
+to indicate their base. Thus, @code{0x12} is 18 ((1 x 16) + 2).
@xref{Nondecimal-numbers}.
@item I/O
@@ -38453,8 +40017,8 @@ meaning. Keywords are reserved and may not be used as variable names.
@code{function},
@code{func},
@code{if},
-@code{nextfile},
@code{next},
+@code{nextfile},
@code{switch},
and
@code{while}.
@@ -38515,13 +40079,9 @@ Ancient @command{awk} implementations used single precision floating-point.
@item Octal
Base-eight notation, where the digits are @code{0}--@code{7}.
Octal numbers are written in C using a leading @samp{0},
-to indicate their base. Thus, @code{013} is 11 (one times 8 plus 3).
+to indicate their base. Thus, @code{013} is 11 ((1 x 8) + 3).
@xref{Nondecimal-numbers}.
-@cindex P1003.1 POSIX standard
-@item P1003.1
-See ``POSIX.''
-
@item Pattern
Patterns tell @command{awk} which input records are interesting to which
rules.
@@ -38562,8 +40122,8 @@ specify single lines. (@xref{Pattern Overview}.)
@item Recursion
When a function calls itself, either directly or indirectly.
-As long as this is not clear, refer to the entry for ``recursion.''
If this is clear, stop, and proceed to the next entry.
+Otherwise, refer to the entry for ``recursion.''
@item Redirection
Redirection means performing input from something other than the standard input
@@ -38642,14 +40202,14 @@ expressions, and function calls have side effects.
An internal representation of numbers that can have fractional parts.
Single precision numbers keep track of fewer digits than do double precision
numbers, but operations on them are sometimes less expensive in terms of CPU time.
-This is the type used by some very old versions of @command{awk} to store
+This is the type used by some ancient versions of @command{awk} to store
numeric values. It is the C type @code{float}.
@item Space
The character generated by hitting the space bar on the keyboard.
@item Special File
-A file name interpreted internally by @command{gawk}, instead of being handed
+A @value{FN} interpreted internally by @command{gawk}, instead of being handed
directly to the underlying operating system---for example, @file{/dev/stderr}.
(@xref{Special Files}.)
@@ -38679,7 +40239,7 @@ into the local language.
A value in the ``seconds since the epoch'' format used by Unix
and POSIX systems. Used for the @command{gawk} functions
@code{mktime()}, @code{strftime()}, and @code{systime()}.
-See also ``Epoch'' and ``UTC.''
+See also ``Epoch,'' ``GMT,'' and ``UTC.''
@cindex Linux
@cindex GNU/Linux
@@ -38708,6 +40268,8 @@ A sequence of space, TAB, or newline characters occurring inside an input
record or a string.
@end table
+@end ifclear
+
@c The GNU General Public License.
@node Copying
@unnumbered GNU General Public License
@@ -39437,7 +40999,7 @@ applications with the library. If this is what you want to do, use
the GNU Lesser General Public License instead of this License. But
first, please read @url{http://www.gnu.org/philosophy/why-not-lgpl.html}.
-
+@ifclear FOR_PRINT
@c The GNU Free Documentation License.
@node GNU Free Documentation License
@unnumbered GNU Free Documentation License
@@ -39952,9 +41514,7 @@ recommend releasing these examples in parallel under your choice of
free software license, such as the GNU General Public License,
to permit their use in free software.
-@c Local Variables:
-@c ispell-local-pdict: "ispell-dict"
-@c End:
+@end ifclear
@ifnotdocbook
@node Index
@@ -39998,13 +41558,14 @@ Consistency issues:
Use "zeros" instead of "zeroes".
Use "nonzero" not "non-zero".
Use "runtime" not "run time" or "run-time".
- Use "command-line" not "command line".
+ Use "command-line" as an adjective and "command line" as a noun.
Use "online" not "on-line".
Use "whitespace" not "white space".
Use "Input/Output", not "input/output". Also "I/O", not "i/o".
Use "lefthand"/"righthand", not "left-hand"/"right-hand".
Use "workaround", not "work-around".
Use "startup"/"cleanup", not "start-up"/"clean-up"
+ Use "filesystem", not "file system"
Use @code{do}, and not @code{do}-@code{while}, except where
actually discussing the do-while.
Use "versus" in text and "vs." in index entries
@@ -40019,8 +41580,6 @@ Consistency issues:
The numbers zero through ten should be spelled out, except when
talking about file descriptor numbers. > 10 and < 0, it's
ok to use numbers.
- In tables, put command-line options in @code, while in the text,
- put them in @option.
For most cases, do NOT put a comma before "and", "or" or "but".
But exercise taste with this rule.
Don't show the awk command with a program in quotes when it's
@@ -40067,9 +41626,6 @@ ORA uses filename, thus the macro.
Suggestions:
------------
-% Next edition:
-% 1. Standardize the error messages from the functions and programs
-% in the two sample code chapters.
Better sidebars can almost sort of be done with:
@@ -40101,3 +41657,6 @@ But to use it you have to say
}
which sorta sucks.
+
+TODO:
+-----
diff --git a/doc/gawktexi.in b/doc/gawktexi.in
index 791f787f..daa4695c 100644
--- a/doc/gawktexi.in
+++ b/doc/gawktexi.in
@@ -19,6 +19,7 @@
\gdef\xrefprintnodename#1{``#1''}
@end tex
@end ifset
+
@ifclear FOR_PRINT
@c With early 2014 texinfo.tex, restore PDF links and colors
@tex
@@ -28,6 +29,18 @@
@end tex
@end ifclear
+@ifnotdocbook
+@set BULLET @bullet{}
+@set MINUS @minus{}
+@set NUL @sc{nul}
+@end ifnotdocbook
+
+@ifdocbook
+@set BULLET
+@set MINUS
+@set NUL NUL
+@end ifdocbook
+
@set xref-automatic-section-title
@c The following information should be updated here only!
@@ -35,12 +48,10 @@
@c applies to and all the info about who's publishing this edition
@c These apply across the board.
-@set UPDATE-MONTH April, 2014
+@set UPDATE-MONTH August, 2014
@set VERSION 4.1
@set PATCHLEVEL 1
-@set FSF
-
@set TITLE GAWK: Effective AWK Programming
@set SUBTITLE A User's Guide for GNU Awk
@set EDITION 4.1
@@ -53,6 +64,7 @@
@set SUBSECTION subsection
@set DARKCORNER @inmargin{@image{lflashlight,1cm}, @image{rflashlight,1cm}}
@set COMMONEXT (c.e.)
+@set PAGE page
@end iftex
@ifinfo
@set DOCUMENT Info file
@@ -62,6 +74,7 @@
@set SUBSECTION node
@set DARKCORNER (d.c.)
@set COMMONEXT (c.e.)
+@set PAGE screen
@end ifinfo
@ifhtml
@set DOCUMENT Web page
@@ -71,6 +84,7 @@
@set SUBSECTION subsection
@set DARKCORNER (d.c.)
@set COMMONEXT (c.e.)
+@set PAGE screen
@end ifhtml
@ifdocbook
@set DOCUMENT book
@@ -80,6 +94,7 @@
@set SUBSECTION subsection
@set DARKCORNER (d.c.)
@set COMMONEXT (c.e.)
+@set PAGE page
@end ifdocbook
@ifxml
@set DOCUMENT book
@@ -89,6 +104,7 @@
@set SUBSECTION subsection
@set DARKCORNER (d.c.)
@set COMMONEXT (c.e.)
+@set PAGE page
@end ifxml
@ifplaintext
@set DOCUMENT book
@@ -98,16 +114,38 @@
@set SUBSECTION subsection
@set DARKCORNER (d.c.)
@set COMMONEXT (c.e.)
+@set PAGE page
@end ifplaintext
+@ifdocbook
+@c empty on purpose
+@set PART1
+@set PART2
+@set PART3
+@set PART4
+@end ifdocbook
+
+@ifnotdocbook
+@set PART1 Part I:@*
+@set PART2 Part II:@*
+@set PART3 Part III:@*
+@set PART4 Part IV:@*
+@end ifnotdocbook
+
@c some special symbols
@iftex
@set LEQ @math{@leq}
@set PI @math{@pi}
@end iftex
+@ifdocbook
+@set LEQ @inlineraw{docbook, &le;}
+@set PI @inlineraw{docbook, &pgr;}
+@end ifdocbook
@ifnottex
+@ifnotdocbook
@set LEQ <=
@set PI @i{pi}
+@end ifnotdocbook
@end ifnottex
@ifnottex
@@ -124,6 +162,34 @@
@end macro
@end ifdocbook
+@c hack for docbook, where comma shouldn't always follow an @ref{}
+@ifdocbook
+@macro DBREF{text}
+@ref{\text\}
+@end macro
+@end ifdocbook
+
+@ifnotdocbook
+@macro DBREF{text}
+@ref{\text\},
+@end macro
+@end ifnotdocbook
+
+@ifclear FOR_PRINT
+@set FN file name
+@set FFN File Name
+@set DF data file
+@set DDF Data File
+@set PVERSION version
+@end ifclear
+@ifset FOR_PRINT
+@set FN filename
+@set FFN Filename
+@set DF datafile
+@set DDF Datafile
+@set PVERSION Version
+@end ifset
+
@c For HTML, spell out email addresses, to avoid problems with
@c address harvesters for spammers.
@ifhtml
@@ -198,6 +264,10 @@ quirk of the language / makeinfo, and isn't going to change.
@copying
@docbook
+<para>
+&ldquo;To boldly go where no man has gone before&rdquo; is a
+Registered Trademark of Paramount Pictures Corporation.</para>
+
<para>Published by:</para>
<literallayout class="normal">Free Software Foundation
@@ -226,19 +296,24 @@ implementation of AWK.
Permission is granted to copy, distribute and/or modify this document
under the terms of the GNU Free Documentation License, Version 1.3 or
any later version published by the Free Software Foundation; with the
-Invariant Sections being ``GNU General Public License'', the Front-Cover
-texts being (a) (see below), and with the Back-Cover Texts being (b)
-(see below). A copy of the license is included in the section entitled
+Invariant Sections being ``GNU General Public License'', with the
+Front-Cover Texts being ``A GNU Manual'', and with the Back-Cover Texts
+as in (a) below.
+@ifclear FOR_PRINT
+A copy of the license is included in the section entitled
``GNU Free Documentation License''.
+@end ifclear
+@ifset FOR_PRINT
+A copy of the license
+may be found on the Internet at
+@uref{http://www.gnu.org/software/gawk/manual/html_node/GNU-Free-Documentation-License.html,
+the GNU Project's web site}.
+@end ifset
@enumerate a
@item
-``A GNU Manual''
-
-@item
-``You have the freedom to
-copy and modify this GNU manual. Buying copies from the FSF
-supports it in developing GNU and promoting software freedom.''
+The FSF's Back-Cover Text is: ``You have the freedom to
+copy and modify this GNU manual.''
@end enumerate
@end copying
@@ -298,15 +373,13 @@ ISBN 1-882114-28-0 @*
@page
@w{ }
@sp 9
-@center @i{To Miriam, for making me complete.}
-@sp 1
-@center @i{To Chana, for the joy you bring us.}
+@center @i{To my parents, for their love, and for the wonderful example they set for me.}
@sp 1
-@center @i{To Rivka, for the exponential increase.}
+@center @i{To my wife Miriam, for making me complete.
+Thank you for building your life together with me.}
@sp 1
-@center @i{To Nachum, for the added dimension.}
+@center @i{To our children Chana, Rivka, Nachum and Malka, for enrichening our lives in innumerable ways.}
@sp 1
-@center @i{To Malka, for the new beginning.}
@w{ }
@page
@w{ }
@@ -316,13 +389,12 @@ ISBN 1-882114-28-0 @*
@docbook
<dedication>
-<simplelist>
-<member>To Miriam, for making me complete.</member>
-<member>To Chana, for the joy you bring us.</member>
-<member>To Rivka, for the exponential increase.</member>
-<member>To Nachum, for the added dimension.</member>
-<member>To Malka, for the new beginning.</member>
-</simplelist>
+<para>To my parents, for their love, and for the wonderful
+example they set for me.</para>
+<para>To my wife Miriam, for making me complete.
+Thank you for building your life together with me.</para>
+<para>To our children Chana, Rivka, Nachum and Malka,
+for enrichening our lives in innumerable ways.</para>
</dedication>
@end docbook
@@ -414,8 +486,8 @@ particular records in a file and perform operations upon them.
includes command-line syntax.
* One-shot:: Running a short throwaway
@command{awk} program.
-* Read Terminal:: Using no input files (input from
- terminal instead).
+* Read Terminal:: Using no input files (input from the
+ keyboard instead).
* Long:: Putting permanent @command{awk}
programs in files.
* Executable Scripts:: Making self-contained @command{awk}
@@ -437,6 +509,7 @@ particular records in a file and perform operations upon them.
* Other Features:: Other Features of @command{awk}.
* When:: When to use @command{gawk} and when to
use other things.
+* Intro Summary:: Summary of the introduction.
* Command Line:: How to run @command{awk}.
* Options:: Command-line options and their
meanings.
@@ -458,16 +531,21 @@ particular records in a file and perform operations upon them.
program.
* Obsolete:: Obsolete Options and/or features.
* Undocumented:: Undocumented Options and Features.
+* Invoking Summary:: Invocation summary.
* Regexp Usage:: How to Use Regular Expressions.
* Escape Sequences:: How to write nonprinting characters.
* Regexp Operators:: Regular Expression Operators.
* Bracket Expressions:: What can go between @samp{[...]}.
-* GNU Regexp Operators:: Operators specific to GNU software.
-* Case-sensitivity:: How to do case-insensitive matching.
* Leftmost Longest:: How much text matches.
* Computed Regexps:: Using Dynamic Regexps.
+* GNU Regexp Operators:: Operators specific to GNU software.
+* Case-sensitivity:: How to do case-insensitive matching.
+* Regexp Summary:: Regular expressions summary.
* Records:: Controlling how data is split into
records.
+* awk split records:: How standard @command{awk} splits
+ records.
+* gawk split records:: How @command{gawk} splits records.
* Fields:: An introduction to fields.
* Nonconstant Fields:: Nonconstant Field Numbers.
* Changing Fields:: Changing the Contents of a Field.
@@ -478,7 +556,7 @@ particular records in a file and perform operations upon them.
* Single Character Fields:: Making each character a separate
field.
* Command Line Field Separator:: Setting @code{FS} from the
- command-line.
+ command line.
* Full Line Fields:: Making the full line be a single
field.
* Field Splitting Summary:: Some final points and a summary table.
@@ -504,8 +582,10 @@ particular records in a file and perform operations upon them.
@code{getline}.
* Getline Summary:: Summary of @code{getline} Variants.
* Read Timeout:: Reading input with a timeout.
-* Command line directories:: What happens if you put a directory on
+* Command-line directories:: What happens if you put a directory on
the command line.
+* Input Summary:: Input summary.
+* Input Exercises:: Exercises.
* Print:: The @code{print} statement.
* Print Examples:: Simple examples of @code{print}
statements.
@@ -529,6 +609,8 @@ particular records in a file and perform operations upon them.
* Special Caveats:: Things to watch out for.
* Close Files And Pipes:: Closing Input and Output Files and
Pipes.
+* Output Summary:: Output summary.
+* Output Exercises:: Exercises.
* Values:: Constants, Variables, and Regular
Expressions.
* Constants:: String, numeric and regexp constants.
@@ -539,11 +621,14 @@ particular records in a file and perform operations upon them.
* Variables:: Variables give names to values for
later use.
* Using Variables:: Using variables in your programs.
-* Assignment Options:: Setting variables on the command-line
+* Assignment Options:: Setting variables on the command line
and a summary of command-line syntax.
This is an advanced method of input.
* Conversion:: The conversion of strings to numbers
and vice versa.
+* Strings And Numbers:: How @command{awk} Converts Between
+ Strings And Numbers.
+* Locale influences conversions:: How the locale may affect conversions.
* All Operators:: @command{gawk}'s operators.
* Arithmetic Ops:: Arithmetic operations (@samp{+},
@samp{-}, etc.)
@@ -571,6 +656,7 @@ particular records in a file and perform operations upon them.
* Function Calls:: A function call is an expression.
* Precedence:: How various operators nest.
* Locales:: How the locale affects things.
+* Expressions Summary:: Expressions summary.
* Pattern Overview:: What goes into a pattern.
* Regexp Patterns:: Using regexps as patterns.
* Expression Patterns:: Any expression can be used as a
@@ -617,6 +703,7 @@ particular records in a file and perform operations upon them.
gives you information.
* ARGC and ARGV:: Ways to use @code{ARGC} and
@code{ARGV}.
+* Pattern Action Summary:: Patterns and Actions summary.
* Array Basics:: The basics of arrays.
* Array Intro:: Introduction to Arrays
* Reference to Elements:: How to examine one element of an
@@ -639,6 +726,7 @@ particular records in a file and perform operations upon them.
@command{awk}.
* Multiscanning:: Scanning multidimensional arrays.
* Arrays of Arrays:: True multidimensional arrays.
+* Arrays Summary:: Summary of arrays.
* Built-in:: Summarizes the built-in functions.
* Calling Built-in:: How to call built-in functions.
* Numeric Functions:: Functions that work with numbers,
@@ -673,6 +761,7 @@ particular records in a file and perform operations upon them.
runtime.
* Indirect Calls:: Choosing the function to call at
runtime.
+* Functions Summary:: Summary of functions.
* Library Names:: How to best name private global
variables in library functions.
* General Functions:: Functions that are of general use.
@@ -707,6 +796,8 @@ particular records in a file and perform operations upon them.
* Group Functions:: Functions for getting group
information.
* Walking Arrays:: A function to walk arrays of arrays.
+* Library Functions Summary:: Summary of library functions.
+* Library Exercises:: Exercises.
* Running Examples:: How to run these examples.
* Clones:: Clones of common utilities.
* Cut Program:: The @command{cut} utility.
@@ -736,6 +827,8 @@ particular records in a file and perform operations upon them.
* Anagram Program:: Finding anagrams from a dictionary.
* Signature Program:: People do amazing things with too much
time on their hands.
+* Programs Summary:: Summary of programs.
+* Programs Exercises:: Exercises.
* Nondecimal Data:: Allowing nondecimal input data.
* Array Sorting:: Facilities for controlling array
traversal and sorting arrays.
@@ -747,8 +840,9 @@ particular records in a file and perform operations upon them.
* TCP/IP Networking:: Using @command{gawk} for network
programming.
* Profiling:: Profiling your @command{awk} programs.
+* Advanced Features Summary:: Summary of advanced features.
* I18N and L10N:: Internationalization and Localization.
-* Explaining gettext:: How GNU @code{gettext} works.
+* Explaining gettext:: How GNU @command{gettext} works.
* Programmer i18n:: Features for the programmer.
* Translator i18n:: Features for the translator.
* String Extraction:: Extracting marked strings.
@@ -758,6 +852,7 @@ particular records in a file and perform operations upon them.
* I18N Example:: A simple i18n example.
* Gawk I18N:: @command{gawk} is also
internationalized.
+* I18N Summary:: Summary of I18N stuff.
* Debugging:: Introduction to @command{gawk}
debugger.
* Debugging Concepts:: Debugging in General.
@@ -776,31 +871,23 @@ particular records in a file and perform operations upon them.
* Miscellaneous Debugger Commands:: Miscellaneous Commands.
* Readline Support:: Readline support.
* Limitations:: Limitations and future plans.
-* General Arithmetic:: An introduction to computer
- arithmetic.
-* Floating Point Issues:: Stuff to know about floating-point
- numbers.
-* String Conversion Precision:: The String Value Can Lie.
-* Unexpected Results:: Floating Point Numbers Are Not
- Abstract Numbers.
-* POSIX Floating Point Problems:: Standards Versus Existing Practice.
-* Integer Programming:: Effective integer programming.
-* Floating-point Programming:: Effective Floating-point Programming.
-* Floating-point Representation:: Binary floating-point representation.
-* Floating-point Context:: Floating-point context.
-* Rounding Mode:: Floating-point rounding mode.
-* Gawk and MPFR:: How @command{gawk} provides
- arbitrary-precision arithmetic.
-* Arbitrary Precision Floats:: Arbitrary Precision Floating-point
- Arithmetic with @command{gawk}.
-* Setting Precision:: Setting the working precision.
-* Setting Rounding Mode:: Setting the rounding mode.
-* Floating-point Constants:: Representing floating-point constants.
-* Changing Precision:: Changing the precision of a number.
-* Exact Arithmetic:: Exact arithmetic with floating-point
- numbers.
+* Debugging Summary:: Debugging summary.
+* Computer Arithmetic:: A quick intro to computer math.
+* Math Definitions:: Defining terms used.
+* MPFR features:: The MPFR features in @command{gawk}.
+* FP Math Caution:: Things to know.
+* Inexactness of computations:: Floating point math is not exact.
+* Inexact representation:: Numbers are not exactly represented.
+* Comparing FP Values:: How to compare floating point values.
+* Errors accumulate:: Errors get bigger as they go.
+* Getting Accuracy:: Getting more accuracy takes some work.
+* Try To Round:: Add digits and round.
+* Setting precision:: How to set the precision.
+* Setting the rounding mode:: How to set the rounding mode.
* Arbitrary Precision Integers:: Arbitrary Precision Integer Arithmetic
with @command{gawk}.
+* POSIX Floating Point Problems:: Standards Versus Existing Practice.
+* Floating point summary:: Summary of floating point discussion.
* Extension Intro:: What is an extension.
* Plugin License:: A note about licensing.
* Extension Mechanism Outline:: An outline of how it works.
@@ -862,6 +949,8 @@ particular records in a file and perform operations upon them.
* Extension Sample Time:: An interface to @code{gettimeofday()}
and @code{sleep()}.
* gawkextlib:: The @code{gawkextlib} project.
+* Extension summary:: Extension summary.
+* Extension Exercises:: Exercises.
* V7/SVR3.1:: The major changes between V7 and
System V Release 3.1.
* SVR4:: Minor changes between System V
@@ -878,6 +967,7 @@ particular records in a file and perform operations upon them.
ranges.
* Contributors:: The major contributors to
@command{gawk}.
+* History summary:: History summary.
* Gawk Distribution:: What is in the @command{gawk}
distribution.
* Getting:: How to get the distribution.
@@ -916,6 +1006,7 @@ particular records in a file and perform operations upon them.
* Bugs:: Reporting Problems and Bugs.
* Other Versions:: Other freely available @command{awk}
implementations.
+* Installation summary:: Summary of installation.
* Compatibility Mode:: How to disable certain @command{gawk}
extensions.
* Additions:: Making Additions To @command{gawk}.
@@ -924,8 +1015,8 @@ particular records in a file and perform operations upon them.
@command{gawk}.
* New Ports:: Porting @command{gawk} to a new
operating system.
-* Derived Files:: Why derived files are kept in the
- @command{git} repository.
+* Derived Files:: Why derived files are kept in the Git
+ repository.
* Future Extensions:: New features that may be implemented
one day.
* Implementation Limitations:: Some limitations of the
@@ -936,6 +1027,7 @@ particular records in a file and perform operations upon them.
* Extension Other Design Decisions:: Some other design decisions.
* Extension Future Growth:: Some room for future growth.
* Old Extension Mechanism:: Some compatibility for old extensions.
+* Notes summary:: Summary of implementation notes.
* Basic High Level:: The high level view.
* Basic Data Typing:: A very quick intro to data types.
@end detailmenu
@@ -943,15 +1035,14 @@ particular records in a file and perform operations upon them.
@c dedication for Info file
@ifinfo
-@center To Miriam, for making me complete.
-@sp 1
-@center To Chana, for the joy you bring us.
+To my parents, for their love, and for the wonderful
+example they set for me.
@sp 1
-@center To Rivka, for the exponential increase.
+To my wife Miriam, for making me complete.
+Thank you for building your life together with me.
@sp 1
-@center To Nachum, for the added dimension.
-@sp 1
-@center To Malka, for the new beginning.
+To our children Chana, Rivka, Nachum and Malka,
+for enrichening our lives in innumerable ways.
@end ifinfo
@summarycontents
@@ -960,6 +1051,21 @@ particular records in a file and perform operations upon them.
@node Foreword
@unnumbered Foreword
+@c This bit is post-processed by a script which turns the chapter
+@c tag into a preface tag, and moves this stuff to before the title.
+@c Bleah.
+@docbook
+ <prefaceinfo>
+ <author>
+ <firstname>Michael</firstname>
+ <surname>Brennan</surname>
+ <!-- can't put mawk into command tags. sigh. -->
+ <affiliation><jobtitle>Author of mawk</jobtitle></affiliation>
+ </author>
+ <date>March, 2001</date>
+ </prefaceinfo>
+@end docbook
+
Arnold Robbins and I are good friends. We were introduced
@c 11 years ago
in 1990
@@ -1084,12 +1190,14 @@ Arnold has distilled over a decade of experience writing and
using AWK programs, and developing @command{gawk}, into this book. If you use
AWK or want to learn how, then read this book.
+@ifnotdocbook
@cindex Brennan, Michael
@display
Michael Brennan
Author of @command{mawk}
March, 2001
@end display
+@end ifnotdocbook
@node Preface
@unnumbered Preface
@@ -1098,24 +1206,34 @@ March, 2001
@c
@c 12/2000: Chuck wants the preface & intro combined.
-Several kinds of tasks occur repeatedly
-when working with text files.
-You might want to extract certain lines and discard the rest.
-Or you may need to make changes wherever certain patterns appear,
-but leave the rest of the file alone.
-Writing single-use programs for these tasks in languages such as C, C++,
-or Java is time-consuming and inconvenient.
-Such jobs are often easier with @command{awk}.
-The @command{awk} utility interprets a special-purpose programming language
-that makes it easy to handle simple data-reformatting jobs.
+@c This bit is post-processed by a script which turns the chapter
+@c tag into a preface tag, and moves this stuff to before the title.
+@c Bleah.
+@docbook
+ <prefaceinfo>
+ <author>
+ <firstname>Arnold</firstname>
+ <surname>Robbins</surname>
+ <affiliation><jobtitle>Nof Ayalon</jobtitle></affiliation>
+ <affiliation><jobtitle>ISRAEL</jobtitle></affiliation>
+ </author>
+ <date>June, 2014</date>
+ </prefaceinfo>
+@end docbook
+
+Several kinds of tasks occur repeatedly when working with text files.
+You might want to extract certain lines and discard the rest. Or you
+may need to make changes wherever certain patterns appear, but leave the
+rest of the file alone. Such jobs are often easy with @command{awk}.
+The @command{awk} utility interprets a special-purpose programming
+language that makes it easy to handle simple data-reformatting jobs.
-@cindex Brian Kernighan's @command{awk}
The GNU implementation of @command{awk} is called @command{gawk}; if you
invoke it with the proper options or environment variables
(@pxref{Options}), it is fully
compatible with
-the POSIX@footnote{The 2008 POSIX standard is online at
-@url{http://www.opengroup.org/onlinepubs/9699919799/}.}
+the POSIX@footnote{The 2008 POSIX standard is accessible online at
+@w{@url{http://www.opengroup.org/onlinepubs/9699919799/}.}}
specification of the @command{awk} language
and with the Unix version of @command{awk} maintained
by Brian Kernighan.
@@ -1132,7 +1250,7 @@ Thus, we usually don't distinguish between @command{gawk} and other
@cindex @command{awk}, uses for
Using @command{awk} allows you to:
-@itemize @bullet
+@itemize @value{BULLET}
@item
Manage small, personal databases
@@ -1157,7 +1275,7 @@ In addition,
@command{gawk}
provides facilities that make it easy to:
-@itemize @bullet
+@itemize @value{BULLET}
@item
Extract bits and pieces of data for processing
@@ -1166,6 +1284,12 @@ Sort data
@item
Perform simple network communications
+
+@item
+Profile and debug @command{awk} programs.
+
+@item
+Extend the language with functions written in C or C++.
@end itemize
This @value{DOCUMENT} teaches you about the @command{awk} language and
@@ -1181,12 +1305,18 @@ Implementations of the @command{awk} language are available for many
different computing environments. This @value{DOCUMENT}, while describing
the @command{awk} language in general, also describes the particular
implementation of @command{awk} called @command{gawk} (which stands for
-``GNU awk''). @command{gawk} runs on a broad range of Unix systems,
-ranging from Intel@registeredsymbol{}-architecture PC-based computers
-up through large-scale systems,
-such as Crays. @command{gawk} has also been ported to Mac OS X,
-Microsoft Windows (all versions) and OS/2 PCs,
-and VMS.
+``GNU @command{awk}''). @command{gawk} runs on a broad range of Unix systems,
+ranging from Intel-architecture PC-based computers
+up through large-scale systems.
+@command{gawk} has also been ported to Mac OS X,
+Microsoft Windows
+@ifset FOR_PRINT
+(all versions),
+@end ifset
+@ifclear FOR_PRINT
+(all versions) and OS/2 PCs,
+@end ifclear
+and OpenVMS.
(Some other, obsolete systems to which @command{gawk} was once ported
are no longer supported and the code for those systems
has been removed.)
@@ -1251,7 +1381,7 @@ help from me, thoroughly reworked @command{gawk} for compatibility
with the newer @command{awk}.
Circa 1994, I became the primary maintainer.
Current development focuses on bug fixes,
-performance improvements, standards compliance, and occasionally, new features.
+performance improvements, standards compliance and, occasionally, new features.
In May of 1997, J@"urgen Kahrs felt the need for network access
from @command{awk}, and with a little help from me, set about adding
@@ -1260,11 +1390,11 @@ wrote the bulk of
@cite{TCP/IP Internetworking with @command{gawk}}
(a separate document, available as part of the @command{gawk} distribution).
His code finally became part of the main @command{gawk} distribution
-with @command{gawk} version 3.1.
+with @command{gawk} @value{PVERSION} 3.1.
John Haque rewrote the @command{gawk} internals, in the process providing
an @command{awk}-level debugger. This version became available as
-@command{gawk} version 4.0, in 2011.
+@command{gawk} @value{PVERSION} 4.0, in 2011.
@xref{Contributors},
for a complete list of those who made important contributions to @command{gawk}.
@@ -1276,29 +1406,27 @@ for a complete list of those who made important contributions to @command{gawk}.
The @command{awk} language has evolved over the years. Full details are
provided in @ref{Language History}.
The language described in this @value{DOCUMENT}
-is often referred to as ``new @command{awk}'' (@command{nawk}).
+is often referred to as ``new @command{awk}''.
+By analogy, the original version of @command{awk} is
+referred to as ``old @command{awk}.''
-@cindex @command{awk}, versions of
-Because of this, there are systems with multiple
-versions of @command{awk}.
-Some systems have an @command{awk} utility that implements the
-original version of the @command{awk} language and a @command{nawk} utility
-for the new version.
-Others have an @command{oawk} version for the ``old @command{awk}''
-language and plain @command{awk} for the new one. Still others only
-have one version, which is usually the new one.@footnote{Often, these systems
-use @command{gawk} for their @command{awk} implementation!}
-
-@cindex @command{nawk} utility
-@cindex @command{oawk} utility
-All in all, this makes it difficult for you to know which version of
-@command{awk} you should run when writing your programs. The best advice
-we can give here is to check your local documentation. Look for @command{awk},
-@command{oawk}, and @command{nawk}, as well as for @command{gawk}.
-It is likely that you already
-have some version of new @command{awk} on your system, which is what
-you should use when running your programs. (Of course, if you're reading
-this @value{DOCUMENT}, chances are good that you have @command{gawk}!)
+Today, on most systems, when you run the @command{awk} utility,
+you get some version of new @command{awk}.@footnote{Only
+Solaris systems still use an old @command{awk} for the
+default @command{awk} utility. A more modern @command{awk} lives in
+@file{/usr/xpg6/bin} on these systems.} If your system's standard
+@command{awk} is the old one, you will see something like this
+if you try the test program:
+
+@example
+$ @kbd{awk 1 /dev/null}
+@error{} awk: syntax error near line 1
+@error{} awk: bailing out near line 1
+@end example
+
+@noindent
+In this case, you should find a version of new @command{awk},
+or just install @command{gawk}!
Throughout this @value{DOCUMENT}, whenever we refer to a language feature
that should be available in any complete implementation of POSIX @command{awk},
@@ -1326,9 +1454,15 @@ Primarily, this @value{DOCUMENT} explains the features of @command{awk}
as defined in the POSIX standard. It does so in the context of the
@command{gawk} implementation. While doing so, it also
attempts to describe important differences between @command{gawk}
-and other @command{awk} implementations.@footnote{All such differences
+and other @command{awk}
+@ifclear FOR_PRINT
+implementations.@footnote{All such differences
appear in the index under the
entry ``differences in @command{awk} and @command{gawk}.''}
+@end ifclear
+@ifset FOR_PRINT
+implementations.
+@end ifset
Finally, any @command{gawk} features that are not in
the POSIX standard for @command{awk} are noted.
@@ -1336,14 +1470,16 @@ the POSIX standard for @command{awk} are noted.
This @value{DOCUMENT} has the difficult task of being both a tutorial and a reference.
If you are a novice, feel free to skip over details that seem too complex.
You should also ignore the many cross-references; they are for the
-expert user and for the online Info and HTML versions of the document.
+expert user and for the online Info and HTML versions of the @value{DOCUMENT}.
@end ifnotinfo
There are sidebars
scattered throughout the @value{DOCUMENT}.
They add a more complete explanation of points that are relevant, but not likely
to be of interest on first reading.
+@ifclear FOR_PRINT
All appear in the index, under the heading ``sidebar.''
+@end ifclear
Most of the time, the examples use complete @command{awk} programs.
Some of the more advanced sections show only the part of the @command{awk}
@@ -1360,6 +1496,8 @@ should be of interest.
This @value{DOCUMENT} is split into several parts, as follows:
+@c FULLXREF ON
+
Part I describes the @command{awk} language and @command{gawk} program in detail.
It starts with the basics, and continues through all of the features of @command{awk}.
It contains the following chapters:
@@ -1443,9 +1581,15 @@ describes advanced arithmetic facilities provided by
@ref{Dynamic Extensions}, describes how to add new variables and
functions to @command{gawk} by writing extensions in C or C++.
+@ifclear FOR_PRINT
Part IV provides the appendices, the Glossary, and two licenses that cover
the @command{gawk} source code and this @value{DOCUMENT}, respectively.
It contains the following appendices:
+@end ifclear
+@ifset FOR_PRINT
+Part IV provides the following appendices,
+including the GNU General Public License:
+@end ifset
@ref{Language History},
describes how the @command{awk} language has evolved since
@@ -1460,6 +1604,42 @@ non-POSIX systems. It also describes how to report bugs
in @command{gawk} and where to get other freely
available @command{awk} implementations.
+@ifset FOR_PRINT
+
+@ref{Copying},
+presents the license that covers the @command{gawk} source code.
+
+The version of this @value{DOCUMENT} distributed with @command{gawk}
+contains additional appendices and other end material.
+To save space, we have omitted them from the
+printed edition. You may find them online, as follows:
+
+@uref{http://www.gnu.org/software/gawk/manual/html_node/Notes.html,
+The appendix on implementation notes}
+describes how to disable @command{gawk}'s extensions, as
+well as how to contribute new code to @command{gawk},
+and some possible future directions for @command{gawk} development.
+
+@uref{http://www.gnu.org/software/gawk/manual/html_node/Basic-Concepts.html,
+The appendix on basic concepts}
+provides some very cursory background material for those who
+are completely unfamiliar with computer programming.
+
+@uref{http://www.gnu.org/software/gawk/manual/html_node/Glossary.html,
+The Glossary}
+defines most, if not all, the significant terms used
+throughout the @value{DOCUMENT}. If you find terms that you aren't familiar with,
+try looking them up here.
+
+@uref{http://www.gnu.org/software/gawk/manual/html_node/GNU-Free-Documentation-License.html,
+The GNU FDL}
+is the license that covers this @value{DOCUMENT}.
+
+Some of the chapters have exercise sections; these have also been
+omitted from the print edition.
+@end ifset
+
+@ifclear FOR_PRINT
@ref{Notes},
describes how to disable @command{gawk}'s extensions, as
well as how to contribute new code to @command{gawk},
@@ -1470,13 +1650,16 @@ provides some very cursory background material for those who
are completely unfamiliar with computer programming.
The @ref{Glossary}, defines most, if not all, the significant terms used
-throughout the book. If you find terms that you aren't familiar with,
+throughout the @value{DOCUMENT}. If you find terms that you aren't familiar with,
try looking them up here.
@ref{Copying}, and
@ref{GNU Free Documentation License},
present the licenses that cover the @command{gawk} source code
and this @value{DOCUMENT}, respectively.
+@end ifclear
+
+@c FULLXREF OFF
@node Conventions
@unnumberedsec Typographical Conventions
@@ -1494,11 +1677,18 @@ are slightly different than in other books you may have read.
This @value{SECTION} briefly documents the typographical conventions used in Texinfo.
@end ifinfo
-Examples you would type at the command-line are preceded by the common
+Examples you would type at the command line are preceded by the common
shell primary and secondary prompts, @samp{$} and @samp{>}.
Input that you type is shown @kbd{like this}.
+@c 8/2014: @print{} is stripped from the texi to make docbook.
+@ifclear FOR_PRINT
Output from the command is preceded by the glyph ``@print{}''.
This typically represents the command's standard output.
+@end ifclear
+@ifset FOR_PRINT
+Output from the command, usually its standard output, appears
+@code{like this}.
+@end ifset
Error messages, and other output on the command's standard error, are preceded
by the glyph ``@error{}''. For example:
@@ -1518,7 +1708,7 @@ emphasized @emph{like this}, and if a point needs to be made
strongly, it is done @strong{like this}. The first occurrence of
a new term is usually its @dfn{definition} and appears in the same
font as the previous occurrence of ``definition'' in this sentence.
-Finally, file names are indicated like this: @file{/path/to/ourfile}.
+Finally, @value{FN}s are indicated like this: @file{/path/to/ourfile}.
@end ifnotinfo
Characters that you type at the keyboard look @kbd{like this}. In particular,
@@ -1528,6 +1718,20 @@ another key, at the same time. For example, a @kbd{Ctrl-d} is typed
by first pressing and holding the @kbd{CONTROL} key, next
pressing the @kbd{d} key and finally releasing both keys.
+For the sake of brevity, throughout this @value{DOCUMENT}, we refer to
+Brian Kernighan's version of @command{awk} as ``BWK @command{awk}.''
+(@xref{Other Versions}, for information on his and other versions.)
+
+@ifset FOR_PRINT
+@quotation NOTE
+Notes of interest look like this.
+@end quotation
+
+@quotation CAUTION
+Cautionary or warning notes look like this.
+@end quotation
+@end ifset
+
@c fakenode --- for prepinfo
@unnumberedsubsec Dark Corners
@cindex Kernighan, Brian
@@ -1550,16 +1754,23 @@ the picture of a flashlight in the margin, as shown here.
@ifnottex
``(d.c.)''.
@end ifnottex
+@ifclear FOR_PRINT
They also appear in the index under the heading ``dark corner.''
+@end ifclear
-As noted by the opening quote, though, any
-coverage of dark corners
-is, by definition, incomplete.
+As noted by the opening quote, though, any coverage of dark corners is,
+by definition, incomplete.
+@cindex c.e., See common extensions
Extensions to the standard @command{awk} language that are supported by
more than one @command{awk} implementation are marked
+@ifclear FOR_PRINT
``@value{COMMONEXT},'' and listed in the index under ``common extensions''
and ``extensions, common.''
+@end ifclear
+@ifset FOR_PRINT
+``@value{COMMONEXT}'' for ``common extension.''
+@end ifset
@node Manual History
@unnumberedsec The GNU Project and This Book
@@ -1582,19 +1793,22 @@ Foundation to create a complete, freely distributable, POSIX-compliant
computing environment.
The FSF uses the ``GNU General Public License'' (GPL) to ensure that
their software's
-source code is always available to the end user. A
-copy of the GPL is included
+source code is always available to the end user.
+@ifclear FOR_PRINT
+A copy of the GPL is included
@ifnotinfo
in this @value{DOCUMENT}
@end ifnotinfo
for your reference
(@pxref{Copying}).
+@end ifclear
The GPL applies to the C language source code for @command{gawk}.
To find out more about the FSF and the GNU Project online,
see @uref{http://www.gnu.org, the GNU Project's home page}.
This @value{DOCUMENT} may also be read from
@uref{http://www.gnu.org/software/gawk/manual/, their web site}.
+@ifclear FOR_PRINT
A shell, an editor (Emacs), highly portable optimizing C, C++, and
Objective-C compilers, a symbolic debugger and dozens of large and
small utilities (such as @command{gawk}), have all been completed and are
@@ -1605,80 +1819,29 @@ stage of development.
@cindex Linux
@cindex GNU/Linux
@cindex operating systems, BSD-based
-@cindex Alpha (DEC)
Until the GNU operating system is more fully developed, you should
consider using GNU/Linux, a freely distributable, Unix-like operating
-system for Intel@registeredsymbol{},
+system for Intel,
Power Architecture,
Sun SPARC, IBM S/390, and other
systems.@footnote{The terminology ``GNU/Linux'' is explained
in the @ref{Glossary}.}
Many GNU/Linux distributions are
available for download from the Internet.
-
-(There are numerous other freely available, Unix-like operating systems
-based on the
-Berkeley Software Distribution, and some of them use recent versions
-of @command{gawk} for their versions of @command{awk}.
-@uref{http://www.netbsd.org, NetBSD},
-@uref{http://www.freebsd.org, FreeBSD},
-and
-@uref{http://www.openbsd.org, OpenBSD}
-are three of the most popular ones, but there
-are others.)
+@end ifclear
@ifnotinfo
The @value{DOCUMENT} you are reading is actually free---at least, the
information in it is free to anyone. The machine-readable
source code for the @value{DOCUMENT} comes with @command{gawk}; anyone
may take this @value{DOCUMENT} to a copying machine and make as many
-copies as they like. (Take a moment to check the Free Documentation
+copies as they like.
+@ifclear FOR_PRINT
+(Take a moment to check the Free Documentation
License in @ref{GNU Free Documentation License}.)
+@end ifclear
@end ifnotinfo
-@ignore
-@cindex Close, Diane
-The @value{DOCUMENT} itself has gone through several previous,
-preliminary editions.
-Paul Rubin wrote the very first draft of @cite{The GAWK Manual};
-it was around 40 pages in size.
-Diane Close and Richard Stallman improved it, yielding the
-version which I started working with in the fall of 1988.
-It was around 90 pages long and barely described the original, ``old''
-version of @command{awk}. After substantial revision, the first version of
-the @cite{The GAWK Manual} to be released was Edition 0.11 Beta in
-October of 1989. The manual then underwent more substantial revision
-for Edition 0.13 of December 1991.
-David Trueman, Pat Rankin and Michal Jaegermann contributed sections
-of the manual for Edition 0.13.
-That edition was published by the
-FSF as a bound book early in 1992. Since then there were several
-minor revisions, notably Edition 0.14 of November 1992 that was published
-by the FSF in January of 1993 and Edition 0.16 of August 1993.
-
-Edition 1.0 of @cite{GAWK: The GNU Awk User's Guide} represented a significant re-working
-of @cite{The GAWK Manual}, with much additional material.
-The FSF and I agreed that I was now the primary author.
-@c I also felt that the manual needed a more descriptive title.
-
-In January 1996, SSC published Edition 1.0 under the title @cite{Effective AWK Programming}.
-In February 1997, they published Edition 1.0.3 which had minor changes
-as a ``second edition.''
-In 1999, the FSF published this same version as Edition 2
-of @cite{GAWK: The GNU Awk User's Guide}.
-
-Edition @value{EDITION} maintains the basic structure of Edition 1.0,
-but with significant additional material, reflecting the host of new features
-in @command{gawk} version @value{VERSION}.
-Of particular note is
-@ref{Array Sorting},
-@ref{Bitwise Functions},
-@ref{Internationalization},
-@ref{Advanced Features},
-and
-@ref{Dynamic Extensions}.
-@end ignore
-
@cindex Close, Diane
The @value{DOCUMENT} itself has gone through a number of previous editions.
Paul Rubin wrote the very first draft of @cite{The GAWK Manual};
@@ -1694,24 +1857,50 @@ the FSF published several preliminary versions (numbered 0.@var{x}).
In 1996, Edition 1.0 was released with @command{gawk} 3.0.0.
The FSF published the first two editions under
the title @cite{The GNU Awk User's Guide}.
+@ifset FOR_PRINT
+SSC published two editions of the @value{DOCUMENT} under the
+title @cite{Effective awk Programming}, and in O'Reilly published
+the third edition in 2001.
+@end ifset
This edition maintains the basic structure of the previous editions.
-For Edition 4.0, the content has been thoroughly reviewed
+For FSF edition 4.0, the content has been thoroughly reviewed
and updated. All references to @command{gawk} versions prior to 4.0 have been
removed.
Of significant note for this edition was @ref{Debugger}.
-For edition @value{EDITION}, the content has been reorganized into parts,
+For FSF edition
+@ifclear FOR_PRINT
+@value{EDITION},
+@end ifclear
+@ifset FOR_PRINT
+@value{EDITION}
+(the fourth edition as published by O'Reilly),
+@end ifset
+the content has been reorganized into parts,
and the major new additions are @ref{Arbitrary Precision Arithmetic},
and @ref{Dynamic Extensions}.
-@cite{@value{TITLE}} will undoubtedly continue to evolve.
-An electronic version
-comes with the @command{gawk} distribution from the FSF.
-If you find an error in this @value{DOCUMENT}, please report it!
-@xref{Bugs}, for information on submitting
-problem reports electronically.
+This @value{DOCUMENT} will undoubtedly continue to evolve. An electronic
+version comes with the @command{gawk} distribution from the FSF. If you
+find an error in this @value{DOCUMENT}, please report it! @xref{Bugs},
+for information on submitting problem reports electronically.
+@ifset FOR_PRINT
+@c fakenode --- for prepinfo
+@unnumberedsec How to Stay Current
+
+It may be you have a version of @command{gawk} which is newer than the
+one described in this @value{DOCUMENT}. To find out what has changed,
+you should first look at the @file{NEWS} file in the @command{gawk}
+distribution, which provides a high level summary of what changed in
+each release.
+
+You can then look at the @uref{http://www.gnu.org/software/gawk/manual/,
+online version} of this @value{DOCUMENT} to read about any new features.
+@end ifset
+
+@ifclear FOR_PRINT
@node How To Contribute
@unnumberedsec How to Contribute
@@ -1728,7 +1917,7 @@ However, I found that I could not dedicate enough time to managing
contributed code: the archive did not grow and the domain went unused
for several years.
-Fortunately, late in 2008, a volunteer took on the task of setting up
+Late in 2008, a volunteer took on the task of setting up
an @command{awk}-related web site---@uref{http://awk.info}---and did a very
nice job.
@@ -1737,11 +1926,15 @@ a @command{gawk} extension that you would like to share with the rest
of the world, please see @uref{http://awk.info/?contribute} for how to
contribute it to the web site.
+As of this writing, this website is in search of a maintainer; please
+contact me if you are interested.
+
@ignore
Other links:
http://www.reddit.com/r/linux/comments/dtect/composing_music_in_awk/
@end ignore
+@end ifclear
@node Acknowledgments
@unnumberedsec Acknowledgments
@@ -1879,13 +2072,29 @@ people.
Notable code and documentation contributions were made by
a number of people. @xref{Contributors}, for the full list.
+Thanks to Patrice Dumas for the new @command{makeinfo} program.
+Thanks to Karl Berry who continues to work to keep
+the Texinfo markup language sane.
+
@cindex Kernighan, Brian
+@cindex Brennan, Michael
+@cindex Day, Robert P.J.@:
+Robert P.J.@: Day, Michael Brennan and Brian Kernighan kindly acted as
+reviewers for the 2015 edition of this @value{DOCUMENT}. Their feedback
+helped improve the final work.
+
I would like to thank Brian Kernighan for invaluable assistance during the
testing and debugging of @command{gawk}, and for ongoing
help and advice in clarifying numerous points about the language.
We could not have done nearly as good a job on either @command{gawk}
or its documentation without his help.
+Brian is in a class by himself as a programmer and technical
+author. I have to thank him (yet again) for his ongoing friendship
+and the role model he has been for me for close to 30 years!
+Having him as a reviewer is an exciting privilege. It has also
+been extremely humbling@enddots{}
+
@cindex Robbins, Miriam
@cindex Robbins, Jean
@cindex Robbins, Harry
@@ -1898,26 +2107,28 @@ which they raised and educated me.
Finally, I also must acknowledge my gratitude to G-d, for the many opportunities
He has sent my way, as well as for the gifts He has given me with which to
take advantage of those opportunities.
+@iftex
@sp 2
@noindent
Arnold Robbins @*
Nof Ayalon @*
ISRAEL @*
-May, 2013
-
-@iftex
-@part Part I:@* The @command{awk} Language
+May, 2014
@end iftex
-@ignore
+@ifnotinfo
+@part @value{PART1}The @command{awk} Language
+@end ifnotinfo
+
@ifdocbook
-@part Part I:@* The @command{awk} Language
-Part I describes the @command{awk} language and @command{gawk} program in detail.
-It starts with the basics, and continues through all of the features of @command{awk}
-and @command{gawk}. It contains the following chapters:
+Part I describes the @command{awk} language and @command{gawk} program
+in detail. It starts with the basics, and continues through all of
+the features of @command{awk}. Included also are many, but not all,
+of the features of @command{gawk}. This part contains the
+following chapters:
-@itemize @bullet
+@itemize @value{BULLET}
@item
@ref{Getting Started}.
@@ -1946,7 +2157,6 @@ and @command{gawk}. It contains the following chapters:
@ref{Functions}.
@end itemize
@end ifdocbook
-@end ignore
@node Getting Started
@chapter Getting Started with @command{awk}
@@ -1986,7 +2196,7 @@ pattern to search for and one action to perform
upon finding the pattern.
Syntactically, a rule consists of a pattern followed by an action. The
-action is enclosed in curly braces to separate it from the pattern.
+action is enclosed in braces to separate it from the pattern.
Newlines usually separate rules. Therefore, an @command{awk}
program looks like this:
@@ -2010,6 +2220,7 @@ program looks like this:
* Other Features:: Other Features of @command{awk}.
* When:: When to use @command{gawk} and when to use
other things.
+* Intro Summary:: Summary of the introduction.
@end menu
@node Running gawk
@@ -2038,7 +2249,7 @@ variations of each.
@menu
* One-shot:: Running a short throwaway @command{awk}
program.
-* Read Terminal:: Using no input files (input from terminal
+* Read Terminal:: Using no input files (input from the keyboard
instead).
* Long:: Putting permanent @command{awk} programs in
files.
@@ -2102,10 +2313,15 @@ awk '@var{program}'
@noindent
@command{awk} applies the @var{program} to the @dfn{standard input},
-which usually means whatever you type on the terminal. This continues
+which usually means whatever you type on the keyboard. This continues
until you indicate end-of-file by typing @kbd{Ctrl-d}.
+@ifset FOR_PRINT
+(On other operating systems, the end-of-file character may be different.)
+@end ifset
+@ifclear FOR_PRINT
(On other operating systems, the end-of-file character may be different.
For example, on OS/2, it is @kbd{Ctrl-z}.)
+@end ifclear
@cindex files, input, See input files
@cindex input files, running @command{awk} without
@@ -2113,29 +2329,27 @@ For example, on OS/2, it is @kbd{Ctrl-z}.)
As an example, the following program prints a friendly piece of advice
(from Douglas Adams's @cite{The Hitchhiker's Guide to the Galaxy}),
to keep you from worrying about the complexities of computer
-programming@footnote{If you use Bash as your shell, you should execute
-the command @samp{set +H} before running this program interactively,
-to disable the C shell-style command history, which treats
-@samp{!} as a special character. We recommend putting this command into
-your personal startup file.}
-(@code{BEGIN} is a feature we haven't discussed yet):
+programming:
@example
-$ @kbd{awk "BEGIN @{ print \"Don't Panic!\" @}"}
+$ @kbd{awk "BEGIN @{ print "Don\47t Panic!" @}"}
@print{} Don't Panic!
@end example
-@cindex shell quoting, double quote
-@cindex double quote (@code{"}) in shell commands
-@cindex @code{"} (double quote) in shell commands
-@cindex @code{\} (backslash) in shell commands
-@cindex backslash (@code{\}) in shell commands
-This program does not read any input. The @samp{\} before each of the
-inner double quotes is necessary because of the shell's quoting
-rules---in particular because it mixes both single quotes and
-double quotes.@footnote{Although we generally recommend the use of single
-quotes around the program text, double quotes are needed here in order to
-put the single quote into the message.}
+@command{awk} executes statements associated with @code{BEGIN} before
+reading any input. If there are no other statements in your program,
+as is the case here, @command{awk} just stops, instead of trying to read
+input it doesn't know how to process.
+The @samp{\47} is a magic way of getting a single quote into
+the program, without having to engage in ugly shell quoting tricks.
+
+@quotation NOTE
+As a side note, if you use Bash as your shell, you should execute the
+command @samp{set +H} before running this program interactively, to
+disable the C shell-style command history, which treats @samp{!} as a
+special character. We recommend putting this command into your personal
+startup file.
+@end quotation
This next simple @command{awk} program
emulates the @command{cat} utility; it copies whatever you type on the
@@ -2170,9 +2384,10 @@ awk -f @var{source-file} @var{input-file1} @var{input-file2} @dots{}
@cindex @option{-f} option
@cindex command line, option @option{-f}
-The @option{-f} instructs the @command{awk} utility to get the @command{awk} program
-from the file @var{source-file}. Any file name can be used for
-@var{source-file}. For example, you could put the program:
+The @option{-f} instructs the @command{awk} utility to get the
+@command{awk} program from the file @var{source-file} (@pxref{Options}).
+Any @value{FN} can be used for @var{source-file}. For example, you
+could put the program:
@example
BEGIN @{ print "Don't Panic!" @}
@@ -2196,8 +2411,8 @@ awk "BEGIN @{ print \"Don't Panic!\" @}"
@noindent
This was explained earlier
(@pxref{Read Terminal}).
-Note that you don't usually need single quotes around the file name that you
-specify with @option{-f}, because most file names don't contain any of the shell's
+Note that you don't usually need single quotes around the @value{FN} that you
+specify with @option{-f}, because most @value{FN}s don't contain any of the shell's
special characters. Notice that in @file{advice}, the @command{awk}
program did not have single quotes around it. The quotes are only needed
for programs that are provided on the @command{awk} command line.
@@ -2207,7 +2422,7 @@ for programs that are provided on the @command{awk} command line.
@c STARTOFRANGE qs2x
@cindex @code{'} (single quote) in @command{gawk} command lines
If you want to clearly identify your @command{awk} program files as such,
-you can add the extension @file{.awk} to the file name. This doesn't
+you can add the extension @file{.awk} to the @value{FN}. This doesn't
affect the execution of the @command{awk} program but it does make
``housekeeping'' easier.
@@ -2233,16 +2448,7 @@ BEGIN @{ print "Don't Panic!" @}
@noindent
After making this file executable (with the @command{chmod} utility),
simply type @samp{advice}
-at the shell and the system arranges to run @command{awk}@footnote{The
-line beginning with @samp{#!} lists the full file name of an interpreter
-to run and an optional initial command-line argument to pass to that
-interpreter. The operating system then runs the interpreter with the given
-argument and the full argument list of the executed program. The first argument
-in the list is the full file name of the @command{awk} program.
-The rest of the
-argument list contains either options to @command{awk}, or data files,
-or both. Note that on many systems @command{awk} may be found in
-@file{/usr/bin} instead of in @file{/bin}. Caveat Emptor.} as if you had
+at the shell and the system arranges to run @command{awk} as if you had
typed @samp{awk -f advice}:
@example
@@ -2260,9 +2466,27 @@ Self-contained @command{awk} scripts are useful when you want to write a
program that users can invoke without their having to know that the program is
written in @command{awk}.
-@sidebar Portability Issues with @samp{#!}
+@sidebar Understanding @samp{#!}
@cindex portability, @code{#!} (executable scripts)
+@command{awk} is an @dfn{interpreted} language. This means that the
+@command{awk} utility reads your program and then processes your data
+according to the instructions in your program. (This is different
+from a @dfn{compiled} language such as C, where your program is first
+compiled into machine code that is executed directly by your system's
+hardware.) The @command{awk} utility is thus termed an @dfn{interpreter}.
+Many modern languages are interperted.
+
+The line beginning with @samp{#!} lists the full @value{FN} of an
+interpreter to run and a single optional initial command-line argument
+to pass to that interpreter. The operating system then runs the
+interpreter with the given argument and the full argument list of the
+executed program. The first argument in the list is the full @value{FN}
+of the @command{awk} program. The rest of the argument list contains
+either options to @command{awk}, or @value{DF}s, or both. Note that on
+many systems @command{awk} may be found in @file{/usr/bin} instead of
+in @file{/bin}. Caveat Emptor.
+
Some systems limit the length of the interpreter name to 32 characters.
Often, this can be dealt with by using a symbolic link.
@@ -2274,8 +2498,7 @@ of some sort from @command{awk}.
@cindex @code{ARGC}/@code{ARGV} variables, portability and
@cindex portability, @code{ARGV} variable
-Finally,
-the value of @code{ARGV[0]}
+Finally, the value of @code{ARGV[0]}
(@pxref{Built-in Variables})
varies depending upon your operating system.
Some systems put @samp{awk} there, some put the full pathname
@@ -2331,7 +2554,7 @@ runs, it will probably print strange messages about syntax errors.
For example, look at the following:
@example
-$ @kbd{awk '@{ print "hello" @} # let's be cute'}
+$ @kbd{awk 'BEGIN @{ print "hello" @} # let's be cute'}
>
@end example
@@ -2379,7 +2602,28 @@ knowledge of shell quoting rules. The following rules apply only to
POSIX-compliant, Bourne-style shells (such as Bash, the GNU Bourne-Again
Shell). If you use the C shell, you're on your own.
-@itemize @bullet
+Before diving into the rules, we introduce a concept that appears
+throughout this @value{DOCUMENT}, which is that of the @dfn{null},
+or empty, string.
+
+The null string is character data that has no value.
+In other words, it is empty. It is written in @command{awk} programs
+like this: @code{""}. In the shell, it can be written using single
+or double quotes: @code{""} or @code{''}. While the null string has
+no characters in it, it does exist. Consider this command:
+
+@example
+$ @kbd{echo ""}
+@end example
+
+@noindent
+Here, the @command{echo} utility receives a single argument, even
+though that argument has no characters in it. In the rest of this
+@value{DOCUMENT}, we use the terms @dfn{null string} and @dfn{empty string}
+interchangeably. Now, on to the quoting rules.
+
+
+@itemize @value{BULLET}
@item
Quoted items can be concatenated with nonquoted items as well as with other
quoted items. The shell turns everything into one argument for
@@ -2433,7 +2677,7 @@ Note that the single quote is not special within double quotes.
@item
Null strings are removed when they occur as part of a non-null
-command-line argument, while explicit non-null objects are kept.
+command-line argument, while explicit null objects are kept.
For example, to specify that the field separator @code{FS} should
be set to the null string, use:
@@ -2451,7 +2695,7 @@ awk -F"" '@var{program}' @var{files} # wrong!
@noindent
In the second case, @command{awk} will attempt to use the text of the program
-as the value of @code{FS}, and the first file name as the text of the program!
+as the value of @code{FS}, and the first @value{FN} as the text of the program!
This results in syntax errors at best, and confusing behavior at worst.
@end itemize
@@ -2554,6 +2798,7 @@ Although this @value{DOCUMENT} generally only worries about POSIX systems and th
POSIX shell, the following issue arises often enough for many users that
it is worth addressing.
+@cindex Brink, Jeroen
The ``shells'' on Microsoft Windows systems use the double-quote
character for quoting, and make it difficult or impossible to include an
escaped double-quote character in a command-line script.
@@ -2566,21 +2811,22 @@ gawk "@{ print \"\042\" $0 \"\042\" @}" @var{file}
@node Sample Data Files
-@section Data Files for the Examples
-@c For gawk >= 4.0, update these data files. No-one has such slow modems!
+@section @value{DDF}s for the Examples
@cindex input files, examples
@cindex @code{mail-list} file
Many of the examples in this @value{DOCUMENT} take their input from two sample
-data files. The first, @file{mail-list}, represents a list of peoples' names
+@value{DF}s. The first, @file{mail-list}, represents a list of peoples' names
together with their email addresses and information about those people.
-The second data file, called @file{inventory-shipped}, contains
+The second @value{DF}, called @file{inventory-shipped}, contains
information about monthly shipments. In both files,
each line is considered to be one @dfn{record}.
-In the data file @file{mail-list}, each record contains the name of a person,
+In the @value{DF} @file{mail-list}, each record contains the name of a person,
his/her phone number, his/her email-address, and a code for their relationship
-with the author of the list. An @samp{A} in the last column
+with the author of the list.
+The columns are aligned using spaces.
+An @samp{A} in the last column
means that the person is an acquaintance. An @samp{F} in the last
column means that the person is a friend.
An @samp{R} means that the person is a relative:
@@ -2607,13 +2853,14 @@ Jean-Paul 555-2127 jeanpaul.campanorum@@nyu.edu R
@end example
@cindex @code{inventory-shipped} file
-The data file @file{inventory-shipped} represents
+The @value{DF} @file{inventory-shipped} represents
information about shipments during the year.
Each record contains the month, the number
of green crates shipped, the number of red boxes shipped, the number of
orange bags shipped, and the number of blue packages shipped,
respectively. There are 16 entries, covering the 12 months of last year
and the first four months of the current year.
+An empty line separates the data for the two years.
@example
@c file eg/data/inventory-shipped
@@ -2687,10 +2934,10 @@ for @emph{every} input line. If the action is omitted, the default
action is to print all lines that match the pattern.
@cindex actions, empty
-Thus, we could leave out the action (the @code{print} statement and the curly
+Thus, we could leave out the action (the @code{print} statement and the
braces) in the previous example and the result would be the same:
@command{awk} prints all lines matching the pattern @samp{li}. By comparison,
-omitting the @code{print} statement but retaining the curly braces makes an
+omitting the @code{print} statement but retaining the braces makes an
empty action that does nothing (i.e., no lines are printed).
@cindex @command{awk} programs, one-line examples
@@ -2699,44 +2946,49 @@ collection of useful, short programs to get you started. Some of these
programs contain constructs that haven't been covered yet. (The description
of the program will give you a good idea of what is going on, but please
read the rest of the @value{DOCUMENT} to become an @command{awk} expert!)
-Most of the examples use a data file named @file{data}. This is just a
+Most of the examples use a @value{DF} named @file{data}. This is just a
placeholder; if you use these programs yourself, substitute
-your own file names for @file{data}.
+your own @value{FN}s for @file{data}.
For future reference, note that there is often more than
one way to do things in @command{awk}. At some point, you may want
to look back at these examples and see if
you can come up with different ways to do the same things shown here:
-@itemize @bullet
+@itemize @value{BULLET}
@item
-Print the length of the longest input line:
+Print every line that is longer than 80 characters:
@example
-awk '@{ if (length($0) > max) max = length($0) @}
- END @{ print max @}' data
+awk 'length($0) > 80' data
@end example
+The sole rule has a relational expression as its pattern and it has no
+action---so it uses the default action, printing the record.
+
@item
-Print every line that is longer than 80 characters:
+Print the length of the longest input line:
@example
-awk 'length($0) > 80' data
+awk '@{ if (length($0) > max) max = length($0) @}
+ END @{ print max @}' data
@end example
-The sole rule has a relational expression as its pattern and it has no
-action---so the default action, printing the record, is used.
+The code associated with @code{END} executes after all
+input has been read; it's the other side of the coin to @code{BEGIN}.
@cindex @command{expand} utility
@item
Print the length of the longest line in @file{data}:
@example
-expand data | awk '@{ if (x < length()) x = length() @}
+expand data | awk '@{ if (x < length($0)) x = length($0) @}
END @{ print "maximum line length is " x @}'
@end example
+This example differs slightly from the previous one:
The input is processed by the @command{expand} utility to change TABs
-into spaces, so the widths compared are actually the right-margin columns.
+into spaces, so the widths compared are actually the right-margin columns,
+as opposed to the number of input characters on each line.
@item
Print every line that has at least one field:
@@ -2791,7 +3043,7 @@ awk 'END @{ print NR @}' data
@end example
@item
-Print the even-numbered lines in the data file:
+Print the even-numbered lines in the @value{DF}:
@example
awk 'NR % 2 == 0' data
@@ -2807,9 +3059,9 @@ the program would print the odd-numbered lines.
The @command{awk} utility reads the input files one line at a
time. For each line, @command{awk} tries the patterns of each of the rules.
-If several patterns match, then several actions are run in the order in
+If several patterns match, then several actions execute in the order in
which they appear in the @command{awk} program. If no patterns match, then
-no actions are run.
+no actions run.
After processing all the rules that match the line (and perhaps there are none),
@command{awk} reads the next line. (However,
@@ -2833,7 +3085,7 @@ This program prints every line that contains the string
@samp{12} @emph{or} the string @samp{21}. If a line contains both
strings, it is printed twice, once by each rule.
-This is what happens if we run this program on our two sample data files,
+This is what happens if we run this program on our two sample @value{DF}s,
@file{mail-list} and @file{inventory-shipped}:
@example
@@ -2863,8 +3115,8 @@ features that haven't been covered yet, so don't worry if you don't
understand all the details:
@example
-LC_ALL=C ls -l | awk '$6 == "Nov" @{ sum += $5 @}
- END @{ print sum @}'
+ls -l | awk '$6 == "Nov" @{ sum += $5 @}
+ END @{ print sum @}'
@end example
@cindex @command{ls} utility
@@ -2893,7 +3145,7 @@ the file. The fourth field identifies the group of the file.
The fifth field contains the size of the file in bytes. The
sixth, seventh, and eighth fields contain the month, day, and time,
respectively, that the file was last modified. Finally, the ninth field
-contains the file name.@footnote{The @samp{LC_ALL=C} is
+contains the @value{FN}.@footnote{The @samp{LC_ALL=C} is
needed to produce this traditional-style output from @command{ls}.}
@c @cindex automatic initialization
@@ -2901,8 +3153,8 @@ needed to produce this traditional-style output from @command{ls}.}
The @samp{$6 == "Nov"} in our @command{awk} program is an expression that
tests whether the sixth field of the output from @w{@samp{ls -l}}
matches the string @samp{Nov}. Each time a line has the string
-@samp{Nov} for its sixth field, the action @samp{sum += $5} is
-performed. This adds the fifth field (the file's size) to the variable
+@samp{Nov} for its sixth field, @command{awk} performs the action
+@samp{sum += $5}. This adds the fifth field (the file's size) to the variable
@code{sum}. As a result, when @command{awk} has finished reading all the
input lines, @code{sum} is the total of the sizes of the files whose
lines matched the pattern. (This works because @command{awk} variables
@@ -2969,7 +3221,7 @@ We have generally not used backslash continuation in our sample programs.
@command{gawk} places no limit on the
length of a line, so backslash continuation is never strictly necessary;
it just makes programs more readable. For this same reason, as well as
-for clarity, we have kept most statements short in the sample programs
+for clarity, we have kept most statements short in the programs
presented throughout the @value{DOCUMENT}. Backslash continuation is
most useful when your @command{awk} program is in a separate source file
instead of entered from the command line. You should also note that
@@ -3034,7 +3286,7 @@ $ gawk 'BEGIN @{ print "dont panic" # a friendly \
> BEGIN rule
> @}'
@error{} gawk: cmd. line:2: BEGIN rule
-@error{} gawk: cmd. line:2: ^ parse error
+@error{} gawk: cmd. line:2: ^ syntax error
@end example
@noindent
@@ -3082,7 +3334,7 @@ and array sorting.
As we develop our presentation of the @command{awk} language, we introduce
most of the variables and many of the functions. They are described
-systematically in @ref{Built-in Variables}, and
+systematically in @ref{Built-in Variables}, and in
@ref{Built-in}.
@node When
@@ -3107,25 +3359,64 @@ edit-compile-test-debug cycle of software development.
@cindex Brian Kernighan's @command{awk}
Complex programs have been written in @command{awk}, including a complete
-retargetable assembler for eight-bit microprocessors (@pxref{Glossary}, for
-more information), and a microcode assembler for a special-purpose Prolog
+retargetable assembler for
+@ifclear FOR_PRINT
+eight-bit microprocessors (@pxref{Glossary}, for more information),
+@end ifclear
+@ifset FOR_PRINT
+eight-bit microprocessors,
+@end ifset
+and a microcode assembler for a special-purpose Prolog
computer.
While the original @command{awk}'s capabilities were strained by tasks
-of such complexity, modern versions are more capable. Even Brian Kernighan's
-version of @command{awk} has fewer predefined limits, and those
-that it has are much larger than they used to be.
+of such complexity, modern versions are more capable.
@cindex @command{awk} programs, complex
-If you find yourself writing @command{awk} scripts of more than, say, a few
-hundred lines, you might consider using a different programming
-language. Emacs Lisp is a good choice if you need sophisticated string
-or pattern matching capabilities. The shell is also good at string and
-pattern matching; in addition, it allows powerful use of the system
-utilities. More conventional languages, such as C, C++, and Java, offer
-better facilities for system programming and for managing the complexity
-of large programs. Programs in these languages may require more lines
-of source code than the equivalent @command{awk} programs, but they are
-easier to maintain and usually run more efficiently.
+If you find yourself writing @command{awk} scripts of more than, say,
+a few hundred lines, you might consider using a different programming
+language. The shell is good at string and pattern matching; in addition,
+it allows powerful use of the system utilities. Python offers a nice
+balance between high-level ease of programming and access to system
+facilities.@footnote{Other popular scripting languages include Ruby
+and Perl.}
+
+@node Intro Summary
+@section Summary
+
+@c FIXME: Review this chapter for summary of builtin functions called.
+@itemize @value{BULLET}
+@item
+Programs in @command{awk} consist of @var{pattern}-@var{action} pairs.
+
+@item
+An @var{action} without a @var{pattern} always runs. The default
+@var{action} for a pattern without one is @samp{@{ print $0 @}}.
+
+@item
+Use either
+@samp{awk '@var{program}' @var{files}}
+or
+@samp{awk -f @var{program-file} @var{files}}
+to run @command{awk}.
+
+@item
+You may use the special @samp{#!} header line to create @command{awk}
+programs that are directly executable.
+
+@item
+Comments in @command{awk} programs start with @samp{#} and continue to
+the end of the same line.
+
+@item
+Be aware of quoting issues when writing @command{awk} programs as
+part of a larger shell script (or MS-Windows batch file).
+
+@item
+You may use backslash continuation to continue a source line.
+Lines are automatically continued after
+a comma, open brace, question mark, colon,
+@samp{||}, @samp{&&}, @code{do} and @code{else}.
+@end itemize
@node Invoking Gawk
@chapter Running @command{awk} and @command{gawk}
@@ -3155,6 +3446,7 @@ things in this @value{CHAPTER} that don't interest you right now.
* Loading Shared Libraries:: Loading shared libraries into your program.
* Obsolete:: Obsolete Options and/or features.
* Undocumented:: Undocumented Options and Features.
+* Invoking Summary:: Invocation summary.
@end menu
@node Command Line
@@ -3168,19 +3460,10 @@ There are two ways to run @command{awk}---with an explicit program or with
one or more program files. Here are templates for both of them; items
enclosed in [@dots{}] in these templates are optional:
-@ifnotdocbook
-@example
-awk @r{[@var{options}]} -f progfile @r{[@code{--}]} @var{file} @dots{}
-awk @r{[@var{options}]} @r{[@code{--}]} '@var{program}' @var{file} @dots{}
-@end example
-@end ifnotdocbook
-
-@c FIXME - find a better way to mark this up in docbook
-@docbook
-<screen>awk [<replaceable>options</replaceable>] -f progfile [<literal>--</literal>] <replaceable>file</replaceable> &#8230;
-awk [<replaceable>options</replaceable>] [<literal>--</literal>] '<replaceable>program</replaceable>' <replaceable>file</replaceable> &#8230;
-</screen>
-@end docbook
+@display
+@command{awk} [@var{options}] @option{-f} @var{progfile} [@option{--}] @var{file} @dots{}
+@command{awk} [@var{options}] [@option{--}] @code{'@var{program}'} @var{file} @dots{}
+@end display
@cindex GNU long options
@cindex long options
@@ -3296,8 +3579,8 @@ conventions.
@cindex @code{-} (hyphen), filenames beginning with
@cindex hyphen (@code{-}), filenames beginning with
-This is useful if you have file names that start with @samp{-},
-or in shell scripts, if you have file names that will be specified
+This is useful if you have @value{FN}s that start with @samp{-},
+or in shell scripts, if you have @value{FN}s that will be specified
by the user that could start with @samp{-}.
It is also useful for passing options on to the @command{awk}
program; see @ref{Getopt Function}.
@@ -3307,9 +3590,10 @@ program; see @ref{Getopt Function}.
The following list describes @command{gawk}-specific options:
-@table @code
-@item -b
-@itemx --characters-as-bytes
+@c Have to use @asis here to get docbook to come out right.
+@table @asis
+@item @option{-b}
+@itemx @option{--characters-as-bytes}
@cindex @option{-b} option
@cindex @option{--characters-as-bytes} option
Cause @command{gawk} to treat all input data as single-byte characters.
@@ -3317,33 +3601,36 @@ In addition, all output written with @code{print} or @code{printf}
are treated as single-byte characters.
Normally, @command{gawk} follows the POSIX standard and attempts to process
-its input data according to the current locale. This can often involve
+its input data according to the current locale (@pxref{Locales}). This can often involve
converting multibyte characters into wide characters (internally), and
can lead to problems or confusion if the input data does not contain valid
multibyte characters. This option is an easy way to tell @command{gawk}:
``hands off my data!''.
-@item -c
-@itemx --traditional
+@item @option{-c}
+@itemx @option{--traditional}
@cindex @option{-c} option
@cindex @option{--traditional} option
@cindex compatibility mode (@command{gawk}), specifying
Specify @dfn{compatibility mode}, in which the GNU extensions to
the @command{awk} language are disabled, so that @command{gawk} behaves just
-like Brian Kernighan's version @command{awk}.
+like BWK @command{awk}.
@xref{POSIX/GNU},
-which summarizes the extensions. Also see
+which summarizes the extensions.
+@ifclear FOR_PRINT
+Also see
@ref{Compatibility Mode}.
+@end ifclear
-@item -C
-@itemx --copyright
+@item @option{-C}
+@itemx @option{--copyright}
@cindex @option{-C} option
@cindex @option{--copyright} option
@cindex GPL (General Public License), printing
Print the short version of the General Public License and then exit.
-@item -d@r{[}@var{file}@r{]}
-@itemx --dump-variables@r{[}=@var{file}@r{]}
+@item @option{-d}[@var{file}]
+@itemx @option{--dump-variables}[@code{=}@var{file}]
@cindex @option{-d} option
@cindex @option{--dump-variables} option
@cindex dump all variables of a program
@@ -3365,21 +3652,21 @@ inadvertently use global variables that you meant to be local.
(This is a particularly easy mistake to make with simple variable
names like @code{i}, @code{j}, etc.)
-@item -D@r{[}@var{file}@r{]}
-@itemx --debug=@r{[}@var{file}@r{]}
+@item @option{-D}[@var{file}]
+@itemx @option{--debug}[@code{=}@var{file}]
@cindex @option{-D} option
@cindex @option{--debug} option
@cindex @command{awk} debugging, enabling
Enable debugging of @command{awk} programs
(@pxref{Debugging}).
-By default, the debugger reads commands interactively from the terminal.
+By default, the debugger reads commands interactively from the keyboard.
The optional @var{file} argument allows you to specify a file with a list
of commands for the debugger to execute non-interactively.
No space is allowed between the @option{-D} and @var{file}, if
@var{file} is supplied.
-@item -e @var{program-text}
-@itemx --source @var{program-text}
+@item @option{-e} @var{program-text}
+@itemx @option{--source} @var{program-text}
@cindex @option{-e} option
@cindex @option{--source} option
@cindex source code, mixing
@@ -3390,8 +3677,8 @@ This is particularly useful
when you have library functions that you want to use from your command-line
programs (@pxref{AWKPATH Variable}).
-@item -E @var{file}
-@itemx --exec @var{file}
+@item @option{-E} @var{file}
+@itemx @option{--exec} @var{file}
@cindex @option{-E} option
@cindex @option{--exec} option
@cindex @command{awk} programs, location of
@@ -3399,7 +3686,7 @@ programs (@pxref{AWKPATH Variable}).
Similar to @option{-f}, read @command{awk} program text from @var{file}.
There are two differences from @option{-f}:
-@itemize @bullet
+@itemize @value{BULLET}
@item
This option terminates option processing; anything
else on the command line is passed on directly to the @command{awk} program.
@@ -3412,7 +3699,7 @@ Command-line variable assignments of the form
This option is particularly necessary for World Wide Web CGI applications
that pass arguments through the URL; using this option prevents a malicious
(or other) user from passing in options, assignments, or @command{awk} source
-code (via @option{--source}) to the CGI application. This option should be used
+code (via @option{-e}) to the CGI application. This option should be used
with @samp{#!} scripts (@pxref{Executable Scripts}), like so:
@example
@@ -3421,20 +3708,20 @@ with @samp{#!} scripts (@pxref{Executable Scripts}), like so:
@var{awk program here @dots{}}
@end example
-@item -g
-@itemx --gen-pot
+@item @option{-g}
+@itemx @option{--gen-pot}
@cindex @option{-g} option
@cindex @option{--gen-pot} option
@cindex portable object files, generating
@cindex files, portable object, generating
Analyze the source program and
-generate a GNU @code{gettext} Portable Object Template file on standard
+generate a GNU @command{gettext} Portable Object Template file on standard
output for all string constants that have been marked for translation.
@xref{Internationalization},
for information about this option.
-@item -h
-@itemx --help
+@item @option{-h}
+@itemx @option{--help}
@cindex @option{-h} option
@cindex @option{--help} option
@cindex GNU long options, printing list of
@@ -3443,42 +3730,47 @@ for information about this option.
Print a ``usage'' message summarizing the short and long style options
that @command{gawk} accepts and then exit.
-@item -i @var{source-file}
-@itemx --include @var{source-file}
+@item @option{-i} @var{source-file}
+@itemx @option{--include} @var{source-file}
@cindex @option{-i} option
@cindex @option{--include} option
@cindex @command{awk} programs, location of
-Read @command{awk} source library from @var{source-file}. This option is
-completely equivalent to using the @samp{@@include} directive inside
-your program. This option is very
-similar to the @option{-f} option, but there are two important differences.
-First, when @option{-i} is used, the program source will not be loaded if it has
-been previously loaded, whereas the @option{-f} will always load the file.
+Read @command{awk} source library from @var{source-file}. This option
+is completely equivalent to using the @code{@@include} directive inside
+your program. This option is very similar to the @option{-f} option,
+but there are two important differences. First, when @option{-i} is
+used, the program source is not loaded if it has been previously
+loaded, whereas with @option{-f}, @command{gawk} always loads the file.
Second, because this option is intended to be used with code libraries,
@command{gawk} does not recognize such files as constituting main program
-input. Thus, after processing an @option{-i} argument, @command{gawk} still expects to
-find the main source code via the @option{-f} option or on the command-line.
+input. Thus, after processing an @option{-i} argument, @command{gawk}
+still expects to find the main source code via the @option{-f} option
+or on the command line.
-@item -l @var{lib}
-@itemx --load @var{lib}
+@item @option{-l} @var{ext}
+@itemx @option{--load} @var{ext}
@cindex @option{-l} option
@cindex @option{--load} option
-@cindex loading, library
-Load a shared library @var{lib}. This searches for the library using the @env{AWKLIBPATH}
+@cindex loading, extensions
+Load a dynamic extension named @var{ext}. Extensions
+are stored as system shared libraries.
+This option searches for the library using the @env{AWKLIBPATH}
environment variable. The correct library suffix for your platform will be
-supplied by default, so it need not be specified in the library name.
-The library initialization routine should be named @code{dl_load()}.
-An alternative is to use the @samp{@@load} keyword inside the program to load
-a shared library.
+supplied by default, so it need not be specified in the extension name.
+The extension initialization routine should be named @code{dl_load()}.
+An alternative is to use the @code{@@load} keyword inside the program to load
+a shared library. This feature is described in detail in @ref{Dynamic Extensions}.
-@item -L @r{[}value@r{]}
-@itemx --lint@r{[}=value@r{]}
+@item @option{-L}[@var{value}]
+@itemx @option{--lint}[@code{=}@var{value}]
@cindex @option{-l} option
@cindex @option{--lint} option
@cindex lint checking, issuing warnings
@cindex warnings, issuing
Warn about constructs that are dubious or nonportable to
other @command{awk} implementations.
+No space is allowed between the @option{-L} and @var{value}, if
+@var{value} is supplied.
Some warnings are issued when @command{gawk} first reads your program. Others
are issued at runtime, as your program executes.
With an optional argument of @samp{fatal},
@@ -3494,16 +3786,16 @@ when eliminating problems pointed out by @option{--lint}, you should take
care to search for all occurrences of each inappropriate construct. As
@command{awk} programs are usually short, doing so is not burdensome.
-@item -M
-@itemx --bignum
+@item @option{-M}
+@itemx @option{--bignum}
@cindex @option{-M} option
@cindex @option{--bignum} option
Force arbitrary precision arithmetic on numbers. This option has no effect
if @command{gawk} is not compiled to use the GNU MPFR and MP libraries
-(@pxref{Gawk and MPFR}).
+(@pxref{Arbitrary Precision Arithmetic}).
-@item -n
-@itemx --non-decimal-data
+@item @option{-n}
+@itemx @option{--non-decimal-data}
@cindex @option{-n} option
@cindex @option{--non-decimal-data} option
@cindex hexadecimal values@comma{} enabling interpretation of
@@ -3518,34 +3810,39 @@ This option can severely break old programs.
Use with care.
@end quotation
-@item -N
-@itemx --use-lc-numeric
+@item @option{-N}
+@itemx @option{--use-lc-numeric}
@cindex @option{-N} option
@cindex @option{--use-lc-numeric} option
Force the use of the locale's decimal point character
when parsing numeric input data (@pxref{Locales}).
-@item -o@r{[}@var{file}@r{]}
-@itemx --pretty-print@r{[}=@var{file}@r{]}
+@item @option{-o}[@var{file}]
+@itemx @option{--pretty-print}[@code{=}@var{file}]
@cindex @option{-o} option
@cindex @option{--pretty-print} option
Enable pretty-printing of @command{awk} programs.
-By default, output program is created in a file named @file{awkprof.out}.
+By default, output program is created in a file named @file{awkprof.out}
+(@pxref{Profiling}).
The optional @var{file} argument allows you to specify a different
-file name for the output.
+@value{FN} for the output.
No space is allowed between the @option{-o} and @var{file}, if
@var{file} is supplied.
-@item -O
-@itemx --optimize
+@quotation NOTE
+In the past, this option would also execute your program.
+This is no longer the case.
+@end quotation
+
+@item @option{-O}
+@itemx @option{--optimize}
@cindex @option{--optimize} option
@cindex @option{-O} option
Enable some optimizations on the internal representation of the program.
-At the moment this includes just simple constant folding. The @command{gawk}
-maintainer hopes to add more optimizations over time.
+At the moment this includes just simple constant folding.
-@item -p@r{[}@var{file}@r{]}
-@itemx --profile@r{[}=@var{file}@r{]}
+@item @option{-p}[@var{file}]
+@itemx @option{--profile}[@code{=}@var{file}]
@cindex @option{-p} option
@cindex @option{--profile} option
@cindex @command{awk} profiling, enabling
@@ -3553,15 +3850,15 @@ Enable profiling of @command{awk} programs
(@pxref{Profiling}).
By default, profiles are created in a file named @file{awkprof.out}.
The optional @var{file} argument allows you to specify a different
-file name for the profile file.
+@value{FN} for the profile file.
No space is allowed between the @option{-p} and @var{file}, if
@var{file} is supplied.
The profile contains execution counts for each statement in the program
in the left margin, and function call counts for each function.
-@item -P
-@itemx --posix
+@item @option{-P}
+@itemx @option{--posix}
@cindex @option{-P} option
@cindex @option{--posix} option
@cindex POSIX mode
@@ -3575,7 +3872,7 @@ Also,
the following additional
restrictions apply:
-@itemize @bullet
+@itemize @value{BULLET}
@cindex newlines
@cindex whitespace, newlines as
@@ -3591,7 +3888,7 @@ Newlines are not allowed after @samp{?} or @samp{:}
@cindex @code{FS} variable, as TAB character
@item
-Specifying @samp{-Ft} on the command-line does not set the value
+Specifying @samp{-Ft} on the command line does not set the value
of @code{FS} to be a single TAB character
(@pxref{Field Separators}).
@@ -3608,10 +3905,10 @@ data (@pxref{Locales}).
@cindex @option{--posix} option, @code{--traditional} option and
If you supply both @option{--traditional} and @option{--posix} on the
command line, @option{--posix} takes precedence. @command{gawk}
-also issues a warning if both options are supplied.
+issues a warning if both options are supplied.
-@item -r
-@itemx --re-interval
+@item @option{-r}
+@itemx @option{--re-interval}
@cindex @option{-r} option
@cindex @option{--re-interval} option
@cindex regular expressions, interval expressions and
@@ -3620,10 +3917,10 @@ Allow interval expressions
in regexps.
This is now @command{gawk}'s default behavior.
Nevertheless, this option remains both for backward compatibility,
-and for use in combination with the @option{--traditional} option.
+and for use in combination with @option{--traditional}.
-@item -S
-@itemx --sandbox
+@item @option{-S}
+@itemx @option{--sandbox}
@cindex @option{-S} option
@cindex @option{--sandbox} option
@cindex sandbox mode
@@ -3633,18 +3930,18 @@ output redirections with @code{print} and @code{printf},
and dynamic extensions.
This is particularly useful when you want to run @command{awk} scripts
from questionable sources and need to make sure the scripts
-can't access your system (other than the specified input data file).
+can't access your system (other than the specified input @value{DF}).
-@item -t
-@itemx --lint-old
+@item @option{-t}
+@itemx @option{--lint-old}
@cindex @option{-L} option
@cindex @option{--lint-old} option
Warn about constructs that are not available in the original version of
@command{awk} from Version 7 Unix
(@pxref{V7/SVR3.1}).
-@item -V
-@itemx --version
+@item @option{-V}
+@itemx @option{--version}
@cindex @option{-V} option
@cindex @option{--version} option
@cindex @command{gawk}, versions of, information about@comma{} printing
@@ -3679,23 +3976,23 @@ of having to be included into each individual program.
function names must be unique.)
With standard @command{awk}, library functions can still be used, even
-if the program is entered at the terminal,
+if the program is entered at the keyboard,
by specifying @samp{-f /dev/tty}. After typing your program,
type @kbd{Ctrl-d} (the end-of-file character) to terminate it.
(You may also use @samp{-f -} to read program source from the standard
input but then you will not be able to also use the standard input as a
source of data.)
-Because it is clumsy using the standard @command{awk} mechanisms to mix source
-file and command-line @command{awk} programs, @command{gawk} provides the
-@option{--source} option. This does not require you to pre-empt the standard
-input for your source code; it allows you to easily mix command-line
-and library source code
-(@pxref{AWKPATH Variable}).
-The @option{--source} option may also be used multiple times on the command line.
+Because it is clumsy using the standard @command{awk} mechanisms to mix
+source file and command-line @command{awk} programs, @command{gawk}
+provides the @option{-e} option. This does not require you to
+pre-empt the standard input for your source code; it allows you to easily
+mix command-line and library source code (@pxref{AWKPATH Variable}).
+As with @option{-f}, the @option{-e} and @option{-i}
+options may also be used multiple times on the command line.
-@cindex @option{--source} option
-If no @option{-f} or @option{--source} option is specified, then @command{gawk}
+@cindex @option{-e} option
+If no @option{-f} or @option{-e} option is specified, then @command{gawk}
uses the first non-option command-line argument as the text of the
program source code.
@@ -3704,7 +4001,7 @@ program source code.
@cindex POSIX mode
If the environment variable @env{POSIXLY_CORRECT} exists,
then @command{gawk} behaves in strict POSIX mode, exactly as if
-you had supplied the @option{--posix} command-line option.
+you had supplied @option{--posix}.
Many GNU programs look for this environment variable to suppress
extensions that conflict with POSIX, but @command{gawk} behaves
differently: it suppresses all extensions, even those that do not
@@ -3763,11 +4060,16 @@ included. As each element of @code{ARGV} is processed, @command{gawk}
sets the variable @code{ARGIND} to the index in @code{ARGV} of the
current element.
+@c FIXME: One day, move the ARGC and ARGV node closer to here.
+Changing @code{ARGC} and @code{ARGV} in your @command{awk} program lets
+you control how @command{awk} processes the input files; this is described
+in more detail in @ref{ARGC and ARGV}.
+
@cindex input files, variable assignments and
@cindex variable assignments and input files
-The distinction between file name arguments and variable-assignment
+The distinction between @value{FN} arguments and variable-assignment
arguments is made when @command{awk} is about to open the next input file.
-At that point in execution, it checks the file name to see whether
+At that point in execution, it checks the @value{FN} to see whether
it is really a variable assignment; if so, @command{awk} sets the variable
instead of reading a file.
@@ -3783,8 +4085,8 @@ The variable values given on the command line are processed for escape
sequences (@pxref{Escape Sequences}).
@value{DARKCORNER}
-In some earlier implementations of @command{awk}, when a variable assignment
-occurred before any file names, the assignment would happen @emph{before}
+In some very early implementations of @command{awk}, when a variable assignment
+occurred before any @value{FN}s, the assignment would happen @emph{before}
the @code{BEGIN} rule was executed. @command{awk}'s behavior was thus
inconsistent; some command-line assignments were available inside the
@code{BEGIN} rule, while others were not. Unfortunately,
@@ -3795,8 +4097,8 @@ upon the old behavior.
The variable assignment feature is most useful for assigning to variables
such as @code{RS}, @code{OFS}, and @code{ORS}, which control input and
-output formats before scanning the data files. It is also useful for
-controlling state if multiple passes are needed over a data file. For
+output formats, before scanning the @value{DF}s. It is also useful for
+controlling state if multiple passes are needed over a @value{DF}. For
example:
@cindex files, multiple passes over
@@ -3832,13 +4134,13 @@ You may also use @code{"-"} to name standard input when reading
files with @code{getline} (@pxref{Getline/File}).
In addition, @command{gawk} allows you to specify the special
-file name @file{/dev/stdin}, both on the command line and
+@value{FN} @file{/dev/stdin}, both on the command line and
with @code{getline}.
Some other versions of @command{awk} also support this, but it
is not standard.
(Some operating systems provide a @file{/dev/stdin} file
-in the file system, however, @command{gawk} always processes
-this file name itself.)
+in the filesystem; however, @command{gawk} always processes
+this @value{FN} itself.)
@node Environment Variables
@section The Environment Variables @command{gawk} Uses
@@ -3863,12 +4165,12 @@ behaves.
@cindex differences in @command{awk} and @command{gawk}, @code{AWKPATH} environment variable
@ifinfo
The previous @value{SECTION} described how @command{awk} program files can be named
-on the command-line with the @option{-f} option.
+on the command line with the @option{-f} option.
@end ifinfo
In most @command{awk}
implementations, you must supply a precise path name for each program
file, unless the file is in the current directory.
-But in @command{gawk}, if the file name supplied to the @option{-f}
+But in @command{gawk}, if the @value{FN} supplied to the @option{-f}
or @option{-i} options
does not contain a directory separator @samp{/}, then @command{gawk} searches a list of
directories (called the @dfn{search path}), one by one, looking for a
@@ -3885,13 +4187,13 @@ directory is the value of @samp{$(datadir)} generated when
@command{gawk} was configured. You probably don't need to worry about this,
though.}
-The search path feature is particularly useful for building libraries
+The search path feature is particularly helpful for building libraries
of useful @command{awk} functions. The library files can be placed in a
standard directory in the default path and then specified on
-the command line with a short file name. Otherwise, the full file name
+the command line with a short @value{FN}. Otherwise, the full @value{FN}
would have to be typed for each file.
-By using the @option{-i} option, or the @option{--source} and @option{-f} options, your command-line
+By using the @option{-i} option, or the @option{-e} and @option{-f} options, your command-line
@command{awk} programs can use facilities in @command{awk} library files
(@pxref{Library Functions}).
Path searching is not done if @command{gawk} is in compatibility mode.
@@ -3899,17 +4201,20 @@ This is true for both @option{--traditional} and @option{--posix}.
@xref{Options}.
If the source code is not found after the initial search, the path is searched
-again after adding the default @samp{.awk} suffix to the filename.
+again after adding the default @samp{.awk} suffix to the @value{FN}.
@quotation NOTE
+@c 4/2014:
+@c using @samp{.} to get quotes, since @file{} no longer supplies them.
To include
the current directory in the path, either place
-@file{.} explicitly in the path or write a null entry in the
+@samp{.} explicitly in the path or write a null entry in the
path. (A null entry is indicated by starting or ending the path with a
-colon or by placing two colons next to each other (@samp{::}).)
+colon or by placing two colons next to each other [@samp{::}].)
This path search mechanism is similar
to the shell's.
-@c someday, @cite{The Bourne Again Shell}....
+(See @uref{http://www.gnu.org/software/bash/manual/,
+@cite{The Bourne-Again SHell manual}.})
However, @command{gawk} always looks in the current directory @emph{before}
searching @env{AWKPATH}, so there is no real reason to include
@@ -3921,7 +4226,7 @@ the current directory in the search path.
If @env{AWKPATH} is not defined in the
environment, @command{gawk} places its default search path into
@code{ENVIRON["AWKPATH"]}. This makes it easy to determine
-the actual search path that @command{gawk} will use
+the actual search path that @command{gawk} used
from within an @command{awk} program.
While you can change @code{ENVIRON["AWKPATH"]} within your @command{awk}
@@ -3933,18 +4238,18 @@ found, and @command{gawk} no longer needs to use @env{AWKPATH}.
@node AWKLIBPATH Variable
@subsection The @env{AWKLIBPATH} Environment Variable
@cindex @env{AWKLIBPATH} environment variable
-@cindex directories, searching for shared libraries
-@cindex search paths, for shared libraries
+@cindex directories, searching for loadable extensions
+@cindex search paths, for loadable extensions
@cindex differences in @command{awk} and @command{gawk}, @code{AWKLIBPATH} environment variable
The @env{AWKLIBPATH} environment variable is similar to the @env{AWKPATH}
-variable, but it is used to search for shared libraries specified
-with the @option{-l} option rather than for source files. If the library
-is not found, the path is searched again after adding the appropriate
-shared library suffix for the platform. For example, on GNU/Linux systems,
-the suffix @samp{.so} is used.
-The search path specified is also used for libraries loaded via the
-@samp{@@load} keyword (@pxref{Loading Shared Libraries}).
+variable, but it is used to search for loadable extensions (stored as
+system shared libraries) specified with the @option{-l} option rather
+than for source files. If the extension is not found, the path is
+searched again after adding the appropriate shared library suffix for
+the platform. For example, on GNU/Linux systems, the suffix @samp{.so}
+is used. The search path specified is also used for extensions loaded
+via the @code{@@load} keyword (@pxref{Loading Shared Libraries}).
@node Other Environment Variables
@subsection Other Environment Variables
@@ -3955,12 +4260,12 @@ list are meant to be used by regular users.
@table @env
@item POSIXLY_CORRECT
-Causes @command{gawk} to switch POSIX compatibility
+Causes @command{gawk} to switch to POSIX compatibility
mode, disabling all traditional and GNU extensions.
@xref{Options}.
@item GAWK_SOCK_RETRIES
-Controls the number of time @command{gawk} will attempt to
+Controls the number of times @command{gawk} attempts to
retry a two-way TCP/IP (socket) connection before giving up.
@xref{TCP/IP Networking}.
@@ -3981,9 +4286,18 @@ for use by the @command{gawk} developers for testing and tuning.
They are subject to change. The variables are:
@table @env
+@item AWKBUFSIZE
+This variable only affects @command{gawk} on POSIX-compliant systems.
+With a value of @samp{exact}, @command{gawk} uses the size of each input
+file as the size of the memory buffer to allocate for I/O. Otherwise,
+the value should be a number, and @command{gawk} uses that number as
+the size of the buffer to allocate. (When this variable is not set,
+@command{gawk} uses the smaller of the file's size and the ``default''
+blocksize, which is usually the filesystems I/O blocksize.)
+
@item AWK_HASH
If this variable exists with a value of @samp{gst}, @command{gawk}
-will switch to using the hash function from GNU Smalltalk for
+switches to using the hash function from GNU Smalltalk for
managing arrays.
This function may be marginally faster than the standard function.
@@ -4052,13 +4366,16 @@ to @code{EXIT_FAILURE}.
This @value{SECTION} describes a feature that is specific to @command{gawk}.
-The @samp{@@include} keyword can be used to read external @command{awk} source
+@cindex @code{@@include} directive
+@cindex file inclusion, @code{@@include} directive
+@cindex including files, @code{@@include} directive
+The @code{@@include} keyword can be used to read external @command{awk} source
files. This gives you the ability to split large @command{awk} source files
into smaller, more manageable pieces, and also lets you reuse common @command{awk}
code from various @command{awk} scripts. In other words, you can group
together @command{awk} functions, used to carry out specific tasks,
into external files. These files can be used just like function libraries,
-using the @samp{@@include} keyword in conjunction with the @env{AWKPATH}
+using the @code{@@include} keyword in conjunction with the @env{AWKPATH}
environment variable. Note that source files may also be included
using the @option{-i} option.
@@ -4092,14 +4409,14 @@ $ @kbd{gawk -f test2}
@end example
@code{gawk} runs the @file{test2} script which includes @file{test1}
-using the @samp{@@include}
+using the @code{@@include}
keyword. So, to include external @command{awk} source files you just
-use @samp{@@include} followed by the name of the file to be included,
+use @code{@@include} followed by the name of the file to be included,
enclosed in double quotes.
@quotation NOTE
-Keep in mind that this is a language construct and the file name cannot
-be a string variable, but rather just a literal string in double quotes.
+Keep in mind that this is a language construct and the @value{FN} cannot
+be a string variable, but rather just a literal string constant in double quotes.
@end quotation
The files to be included may be nested; e.g., given a third
@@ -4123,7 +4440,7 @@ $ @kbd{gawk -f test3}
@print{} This is file test3.
@end example
-The file name can, of course, be a pathname. For example:
+The @value{FN} can, of course, be a pathname. For example:
@example
@@include "../io_funcs"
@@ -4138,49 +4455,53 @@ or:
@noindent
are valid. The @code{AWKPATH} environment variable can be of great
-value when using @samp{@@include}. The same rules for the use
+value when using @code{@@include}. The same rules for the use
of the @code{AWKPATH} variable in command-line file searches
(@pxref{AWKPATH Variable}) apply to
-@samp{@@include} also.
+@code{@@include} also.
This is very helpful in constructing @command{gawk} function libraries.
If you have a large script with useful, general purpose @command{awk}
functions, you can break it down into library files and put those files
in a special directory. You can then include those ``libraries,'' using
either the full pathnames of the files, or by setting the @code{AWKPATH}
-environment variable accordingly and then using @samp{@@include} with
+environment variable accordingly and then using @code{@@include} with
just the file part of the full pathname. Of course you can have more
than one directory to keep library files; the more complex the working
environment is, the more directories you may need to organize the files
to be included.
Given the ability to specify multiple @option{-f} options, the
-@samp{@@include} mechanism is not strictly necessary.
-However, the @samp{@@include} keyword
+@code{@@include} mechanism is not strictly necessary.
+However, the @code{@@include} keyword
can help you in constructing self-contained @command{gawk} programs,
thus reducing the need for writing complex and tedious command lines.
-In particular, @samp{@@include} is very useful for writing CGI scripts
+In particular, @code{@@include} is very useful for writing CGI scripts
to be run from web pages.
As mentioned in @ref{AWKPATH Variable}, the current directory is always
searched first for source files, before searching in @env{AWKPATH},
-and this also applies to files named with @samp{@@include}.
+and this also applies to files named with @code{@@include}.
@node Loading Shared Libraries
-@section Loading Shared Libraries Into Your Program
+@section Loading Dynamic Extensions Into Your Program
This @value{SECTION} describes a feature that is specific to @command{gawk}.
-The @samp{@@load} keyword can be used to read external @command{awk} shared
-libraries. This allows you to link in compiled code that may offer superior
+@cindex @code{@@load} directive
+@cindex loading extensions, @code{@@load} directive
+@cindex extensions, loading, @code{@@load} directive
+The @code{@@load} keyword can be used to read external @command{awk} extensions
+(stored as system shared libraries).
+This allows you to link in compiled code that may offer superior
performance and/or give you access to extended capabilities not supported
by the @command{awk} language. The @env{AWKLIBPATH} variable is used to
-search for the shared library. Using @samp{@@load} is completely equivalent
+search for the extension. Using @code{@@load} is completely equivalent
to using the @option{-l} command-line option.
-If the shared library is not initially found in @env{AWKLIBPATH}, another
+If the extension is not initially found in @env{AWKLIBPATH}, another
search is conducted after appending the platform's default shared library
-suffix to the filename. For example, on GNU/Linux systems, the suffix
+suffix to the @value{FN}. For example, on GNU/Linux systems, the suffix
@samp{.so} is used.
@example
@@ -4198,16 +4519,17 @@ $ @kbd{gawk -lordchr 'BEGIN @{print chr(65)@}'}
@noindent
For command-line usage, the @option{-l} option is more convenient,
-but @samp{@@load} is useful for embedding inside an @command{awk} source file
-that requires access to a shared library.
+but @code{@@load} is useful for embedding inside an @command{awk} source file
+that requires access to an extension.
@ref{Dynamic Extensions}, describes how to write extensions (in C or C++)
-that can be loaded with either @samp{@@load} or the @option{-l} option.
+that can be loaded with either @code{@@load} or the @option{-l} option.
@node Obsolete
@section Obsolete Options and/or Features
-@cindex features, advanced, See advanced features
+@c update this section for each release!
+
@cindex options, deprecated
@cindex features, deprecated
@cindex obsolete features
@@ -4216,11 +4538,9 @@ previous releases of @command{gawk} that are either not available in the
current version or that are still supported but deprecated (meaning that
they will @emph{not} be in the next release).
-@c update this section for each release!
-
The process-related special files @file{/dev/pid}, @file{/dev/ppid},
@file{/dev/pgrpid}, and @file{/dev/user} were deprecated in @command{gawk}
-3.1, but still worked. As of version 4.0, they are no longer
+3.1, but still worked. As of @value{PVERSION} 4.0, they are no longer
interpreted specially by @command{gawk}. (Use @code{PROCINFO} instead;
see @ref{Auto-set}.)
@@ -4300,6 +4620,58 @@ long-undocumented ``feature'' of Unix @code{awk}.
@end ignore
+@node Invoking Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+Use either
+@samp{awk '@var{program}' @var{files}}
+or
+@samp{awk -f @var{program-file} @var{files}}
+to run @command{awk}.
+
+@item
+The three standard options for all versions of @command{awk} are
+@option{-f}, @option{-F} and @option{-v}. @command{gawk} supplies these
+and many others, as well as corresponding GNU-style long options.
+
+@item
+Non-option command-line arguments are usually treated as @value{FN}s,
+unless they have the form @samp{@var{var}=@var{value}}, in which case
+they are taken as variable assignments to be performed at that point
+in processing the input.
+
+@item
+All non-option command-line arguments, excluding the program text,
+are placed in the @code{ARGV} array. Adjusting @code{ARGC} and @code{ARGV}
+affects how @command{awk} processes input.
+
+@item
+You can use a single minus sign (@samp{-}) to refer to standard input
+on the command line.
+
+@item
+@command{gawk} pays attention to a number of environment variables.
+@env{AWKPATH}, @env{AWKLIBPATH}, and @env{POSIXLY_CORRECT} are the
+most important ones.
+
+@item
+@command{gawk}'s exit status conveys information to the program
+that invoked it. Use the @code{exit} statement from within
+an @command{awk} program to set the exit status.
+
+@item
+@command{gawk} allows you to include other @command{awk} source files into
+your program using the @code{@@include} statement and/or the @option{-i}
+and @option{-f} command-line options.
+
+@item
+@command{gawk} allows you to load additional functions written in C
+or C++ using the @code{@@load} statement and/or the @option{-l} option.
+(This advanced feature is described later on in @ref{Dynamic Extensions}.)
+@end itemize
+
@node Regexp
@chapter Regular Expressions
@cindex regexp
@@ -4320,7 +4692,7 @@ The simplest regular expression is a sequence of letters, numbers, or
both. Such a regexp matches any string that contains that sequence.
Thus, the regexp @samp{foo} matches any string containing @samp{foo}.
Therefore, the pattern @code{/foo/} matches any input record containing
-the three characters @samp{foo} @emph{anywhere} in the record. Other
+the three adjacent characters @samp{foo} @emph{anywhere} in the record. Other
kinds of regexps let you specify more complicated classes of strings.
@ifnotinfo
@@ -4334,10 +4706,11 @@ regular expressions work, we present more complicated instances.
* Escape Sequences:: How to write nonprinting characters.
* Regexp Operators:: Regular Expression Operators.
* Bracket Expressions:: What can go between @samp{[...]}.
-* GNU Regexp Operators:: Operators specific to GNU software.
-* Case-sensitivity:: How to do case-insensitive matching.
* Leftmost Longest:: How much text matches.
* Computed Regexps:: Using Dynamic Regexps.
+* GNU Regexp Operators:: Operators specific to GNU software.
+* Case-sensitivity:: How to do case-insensitive matching.
+* Regexp Summary:: Regular expressions summary.
@end menu
@node Regexp Usage
@@ -4348,8 +4721,8 @@ A regular expression can be used as a pattern by enclosing it in
slashes. Then the regular expression is tested against the
entire text of each record. (Normally, it only needs
to match some part of the text in order to succeed.) For example, the
-following prints the second field of each record that contains the string
-@samp{li} anywhere in it:
+following prints the second field of each record where the string
+@samp{li} appears anywhere in the record:
@example
$ @kbd{awk '/li/ @{ print $2 @}' mail-list}
@@ -4479,7 +4852,7 @@ A literal backslash, @samp{\}.
@cindex backslash (@code{\}), @code{\a} escape sequence
@item \a
The ``alert'' character, @kbd{Ctrl-g}, ASCII code 7 (BEL).
-(This usually makes some sort of audible noise.)
+(This often makes some sort of audible noise.)
@cindex @code{\} (backslash), @code{\b} escape sequence
@cindex backslash (@code{\}), @code{\b} escape sequence
@@ -4528,20 +4901,30 @@ between @samp{0} and @samp{7}. For example, the code for the ASCII ESC
@item \x@var{hh}@dots{}
The hexadecimal value @var{hh}, where @var{hh} stands for a sequence
of hexadecimal digits (@samp{0}--@samp{9}, and either @samp{A}--@samp{F}
-or @samp{a}--@samp{f}). Like the same construct
-in ISO C, the escape sequence continues until the first nonhexadecimal
-digit is seen. @value{COMMONEXT}
+or @samp{a}--@samp{f}). A maximum of two digts are allowed after
+the @samp{\x}. Any further hexadecimal digits are treated as simple
+letters or numbers. @value{COMMONEXT}
+
+@quotation CAUTION
+In ISO C, the escape sequence continues until the first nonhexadecimal
+digit is seen.
+@c FIXME: Add exact version here.
+For many years, @command{gawk} would continue incorporating
+hexadecimal digits into the value until a non-hexadecimal digit
+or the end of the string was encountered.
However, using more than two hexadecimal digits produces
-undefined results. (The @samp{\x} escape sequence is not allowed in
-POSIX @command{awk}.)
+@end quotation
@cindex @code{\} (backslash), @code{\/} escape sequence
@cindex backslash (@code{\}), @code{\/} escape sequence
@item \/
A literal slash (necessary for regexp constants only).
This sequence is used when you want to write a regexp
-constant that contains a slash. Because the regexp is delimited by
-slashes, you need to escape the slash that is part of the pattern,
+constant that contains a slash
+(such as @code{/.*:\/home\/[[:alnum:]]+:.*/}; the @samp{[[:alnum:]]}
+notation is discussed shortly, in @ref{Bracket Expressions}).
+Because the regexp is delimited by
+slashes, you need to escape any slash that is part of the pattern,
in order to tell @command{awk} to keep processing the rest of the regexp.
@cindex @code{\} (backslash), @code{\"} escape sequence
@@ -4549,8 +4932,10 @@ in order to tell @command{awk} to keep processing the rest of the regexp.
@item \"
A literal double quote (necessary for string constants only).
This sequence is used when you want to write a string
-constant that contains a double quote. Because the string is delimited by
-double quotes, you need to escape the quote that is part of the string,
+constant that contains a double quote
+(such as @code{"He said \"hi!\" to her."}).
+Because the string is delimited by
+double quotes, you need to escape any quote that is part of the string,
in order to tell @command{awk} to keep processing the rest of the string.
@end table
@@ -4573,7 +4958,7 @@ shown in the previous list.
To summarize:
-@itemize @bullet
+@itemize @value{BULLET}
@item
The escape sequences in the table above are always processed first,
for both string constants and regexp constants. This happens very early,
@@ -4606,7 +4991,7 @@ leaves what happens as undefined. There are two choices:
@cindex Brian Kernighan's @command{awk}
@table @asis
@item Strip the backslash out
-This is what Brian Kernighan's @command{awk} and @command{gawk} both do.
+This is what BWK @command{awk} and @command{gawk} both do.
For example, @code{"a\qc"} is the same as @code{"aqc"}.
(Because this is such an easy bug both to introduce and to miss,
@command{gawk} warns you about it.)
@@ -4659,7 +5044,7 @@ The escape sequences described
@ifnotinfo
earlier
@end ifnotinfo
-in @ref{Escape Sequences},
+in @DBREF{Escape Sequences}
are valid inside a regexp. They are introduced by a @samp{\} and
are recognized and converted into corresponding real characters as
the very first step in processing regexps.
@@ -4667,10 +5052,11 @@ the very first step in processing regexps.
Here is a list of metacharacters. All characters that are not escape
sequences and that are not listed in the table stand for themselves:
-@table @code
+@c Use @asis so the docbook comes out ok. Sigh.
+@table @asis
@cindex backslash (@code{\}), regexp operator
@cindex @code{\} (backslash), regexp operator
-@item \
+@item @code{\}
This is used to suppress the special meaning of a character when
matching. For example, @samp{\$}
matches the character @samp{$}.
@@ -4679,7 +5065,7 @@ matches the character @samp{$}.
@cindex Texinfo, chapter beginnings in files
@cindex @code{^} (caret), regexp operator
@cindex caret (@code{^}), regexp operator
-@item ^
+@item @code{^}
This matches the beginning of a string. For example, @samp{^@@chapter}
matches @samp{@@chapter} at the beginning of a string and can be used
to identify chapter beginnings in Texinfo source files.
@@ -4687,7 +5073,7 @@ The @samp{^} is known as an @dfn{anchor}, because it anchors the pattern to
match only at the beginning of the string.
It is important to realize that @samp{^} does not match the beginning of
-a line embedded in a string.
+a line (the point right after a @samp{\n} newline character) embedded in a string.
The condition is not true in the following example:
@example
@@ -4696,11 +5082,13 @@ if ("line1\nLINE 2" ~ /^L/) @dots{}
@cindex @code{$} (dollar sign), regexp operator
@cindex dollar sign (@code{$}), regexp operator
-@item $
+@item @code{$}
This is similar to @samp{^}, but it matches only at the end of a string.
For example, @samp{p$}
matches a record that ends with a @samp{p}. The @samp{$} is an anchor
-and does not match the end of a line embedded in a string.
+and does not match the end of a line
+(the point right before a @samp{\n} newline character)
+embedded in a string.
The condition in the following example is not true:
@example
@@ -4709,7 +5097,7 @@ if ("line1\nLINE 2" ~ /1$/) @dots{}
@cindex @code{.} (period), regexp operator
@cindex period (@code{.}), regexp operator
-@item . @r{(period)}
+@item @code{.} (period)
This matches any single character,
@emph{including} the newline character. For example, @samp{.P}
matches any single character followed by a @samp{P} in a string. Using
@@ -4719,10 +5107,10 @@ with @samp{A}.
@cindex POSIX @command{awk}, period (@code{.})@comma{} using
In strict POSIX mode (@pxref{Options}),
-@samp{.} does not match the @sc{nul}
+@samp{.} does not match the @value{NUL}
character, which is a character with all bits equal to zero.
-Otherwise, @sc{nul} is just another character. Other versions of @command{awk}
-may not be able to match the @sc{nul} character.
+Otherwise, @value{NUL} is just another character. Other versions of @command{awk}
+may not be able to match the @value{NUL} character.
@cindex @code{[]} (square brackets), regexp operator
@cindex square brackets (@code{[]}), regexp operator
@@ -4730,7 +5118,7 @@ may not be able to match the @sc{nul} character.
@cindex character sets, See Also bracket expressions
@cindex character lists, See bracket expressions
@cindex character classes, See bracket expressions
-@item [@dots{}]
+@item @code{[}@dots{}@code{]}
This is called a @dfn{bracket expression}.@footnote{In other literature,
you may see a bracket expression referred to as either a
@dfn{character set}, a @dfn{character class}, or a @dfn{character list}.}
@@ -4742,7 +5130,7 @@ is given in
@ref{Bracket Expressions}.
@cindex bracket expressions, complemented
-@item [^ @dots{}]
+@item @code{[^}@dots{}@code{]}
This is a @dfn{complemented bracket expression}. The first character after
the @samp{[} @emph{must} be a @samp{^}. It matches any characters
@emph{except} those in the square brackets. For example, @samp{[^awk]}
@@ -4751,20 +5139,19 @@ or @samp{k}.
@cindex @code{|} (vertical bar)
@cindex vertical bar (@code{|})
-@item |
+@item @code{|}
This is the @dfn{alternation operator} and it is used to specify
-alternatives.
-The @samp{|} has the lowest precedence of all the regular
-expression operators.
-For example, @samp{^P|[[:digit:]]}
-matches any string that matches either @samp{^P} or @samp{[[:digit:]]}. This
-means it matches any string that starts with @samp{P} or contains a digit.
+alternatives. The @samp{|} has the lowest precedence of all the regular
+expression operators. For example, @samp{^P|[aeiouy]} matches any string
+that matches either @samp{^P} or @samp{[aeiouy]}. This means it matches
+any string that starts with @samp{P} or contains (anywhere within it)
+a lowercase English vowel.
The alternation applies to the largest possible regexps on either side.
@cindex @code{()} (parentheses), regexp operator
@cindex parentheses @code{()}, regexp operator
-@item (@dots{})
+@item @code{(}@dots{}@code{)}
Parentheses are used for grouping in regular expressions, as in
arithmetic. They can be used to concatenate regular expressions
containing the alternation operator, @samp{|}. For example,
@@ -4775,47 +5162,42 @@ explained further on in this list.)
@cindex @code{*} (asterisk), @code{*} operator, as regexp operator
@cindex asterisk (@code{*}), @code{*} operator, as regexp operator
-@item *
+@item @code{*}
This symbol means that the preceding regular expression should be
repeated as many times as necessary to find a match. For example, @samp{ph*}
applies the @samp{*} symbol to the preceding @samp{h} and looks for matches
of one @samp{p} followed by any number of @samp{h}s. This also matches
just @samp{p} if no @samp{h}s are present.
-The @samp{*} repeats the @emph{smallest} possible preceding expression.
-(Use parentheses if you want to repeat a larger expression.) It finds
-as many repetitions as possible. For example,
-@samp{awk '/\(c[ad][ad]*r x\)/ @{ print @}' sample}
-prints every record in @file{sample} containing a string of the form
-@samp{(car x)}, @samp{(cdr x)}, @samp{(cadr x)}, and so on.
-Notice the escaping of the parentheses by preceding them
-with backslashes.
+There are two subtle points to understand about how @samp{*} works.
+First, the @samp{*} applies only to the single preceding regular expression
+component (e.g., in @samp{ph*}, it applies just to the @samp{h}).
+To cause @samp{*} to apply to a larger sub-expression, use parentheses:
+@samp{(ph)*} matches @samp{ph}, @samp{phph}, @samp{phphph} and so on.
+
+Second, @samp{*} finds as many repetititons as possible. If the text
+to be matched is @samp{phhhhhhhhhhhhhhooey}, @samp{ph*} matches all of
+the @samp{h}s.
@cindex @code{+} (plus sign), regexp operator
@cindex plus sign (@code{+}), regexp operator
-@item +
+@item @code{+}
This symbol is similar to @samp{*}, except that the preceding expression must be
matched at least once. This means that @samp{wh+y}
would match @samp{why} and @samp{whhy}, but not @samp{wy}, whereas
-@samp{wh*y} would match all three of these strings.
-The following is a simpler
-way of writing the last @samp{*} example:
-
-@example
-awk '/\(c[ad]+r x\)/ @{ print @}' sample
-@end example
+@samp{wh*y} would match all three.
@cindex @code{?} (question mark), regexp operator
@cindex question mark (@code{?}), regexp operator
-@item ?
+@item @code{?}
This symbol is similar to @samp{*}, except that the preceding expression can be
matched either once or not at all. For example, @samp{fe?d}
matches @samp{fed} and @samp{fd}, but nothing else.
@cindex interval expressions, regexp operator
-@item @{@var{n}@}
-@itemx @{@var{n},@}
-@itemx @{@var{n},@var{m}@}
+@item @code{@{}@var{n}@code{@}}
+@itemx @code{@{}@var{n}@code{,@}}
+@itemx @code{@{}@var{n}@code{,}@var{m}@code{@}}
One or two numbers inside braces denote an @dfn{interval expression}.
If there is one number in the braces, the preceding regexp is repeated
@var{n} times.
@@ -4846,7 +5228,7 @@ constants,
@command{gawk} did @emph{not} match interval expressions
in regexps.
-However, beginning with version 4.0,
+However, beginning with @value{PVERSION} 4.0,
@command{gawk} does match interval expressions by default.
This is because compatibility with POSIX has become more
important to most @command{gawk} users than compatibility with
@@ -4898,7 +5280,7 @@ Within a bracket expression, a @dfn{range expression} consists of two
characters separated by a hyphen. It matches any single character that
sorts between the two characters, based upon the system's native character
set. For example, @samp{[0-9]} is equivalent to @samp{[0123456789]}.
-(See @ref{Ranges and Locales}, for an explanation of how the POSIX
+(See @DBREF{Ranges and Locales} for an explanation of how the POSIX
standard and @command{gawk} have changed over time. This is mainly
of historical interest.)
@@ -4917,6 +5299,9 @@ bracket expression, put a @samp{\} in front of it. For example:
@noindent
matches either @samp{d} or @samp{]}.
+Additionally, if you place @samp{]} right after the opening
+@samp{[}, the closing bracket is treated as one of the
+characters to be matched.
@cindex POSIX @command{awk}, bracket expressions and
@cindex Extended Regular Expressions (EREs)
@@ -4974,6 +5359,17 @@ With the POSIX character classes, you can write
@code{/[[:alnum:]]/} to match the alphabetic
and numeric characters in your character set.
+@c Thanks to
+@c Date: Tue, 01 Jul 2014 07:39:51 +0200
+@c From: Hermann Peifer <peifer@gmx.eu>
+Some utilities that match regular expressions provide a non-standard
+@code{[:ascii:]} character class; @command{awk} does not. However, you
+can simulate such a construct using @code{[\x00-\x7F]}. This matches
+all values numerically between zero and 127, which is the defined
+range of the ASCII character set. Use a complemented character list
+(@code{[^\x00-\x7F]}) to match any single-byte characters that are not
+in the ASCII range.
+
@cindex bracket expressions, collating elements
@cindex bracket expressions, non-ASCII
@cindex collating elements
@@ -5017,6 +5413,160 @@ they do not recognize collating symbols or equivalence classes.
@c maybe one day ...
@c ENDOFRANGE charlist
+@node Leftmost Longest
+@section How Much Text Matches?
+
+@cindex regular expressions, leftmost longest match
+@c @cindex matching, leftmost longest
+Consider the following:
+
+@example
+echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'
+@end example
+
+This example uses the @code{sub()} function (which we haven't discussed yet;
+@pxref{String Functions})
+to make a change to the input record. Here, the regexp @code{/a+/}
+indicates ``one or more @samp{a} characters,'' and the replacement
+text is @samp{<A>}.
+
+The input contains four @samp{a} characters.
+@command{awk} (and POSIX) regular expressions always match
+the leftmost, @emph{longest} sequence of input characters that can
+match. Thus, all four @samp{a} characters are
+replaced with @samp{<A>} in this example:
+
+@example
+$ @kbd{echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'}
+@print{} <A>bcd
+@end example
+
+For simple match/no-match tests, this is not so important. But when doing
+text matching and substitutions with the @code{match()}, @code{sub()}, @code{gsub()},
+and @code{gensub()} functions, it is very important.
+@ifinfo
+@xref{String Functions},
+for more information on these functions.
+@end ifinfo
+Understanding this principle is also important for regexp-based record
+and field splitting (@pxref{Records},
+and also @pxref{Field Separators}).
+
+@node Computed Regexps
+@section Using Dynamic Regexps
+
+@c STARTOFRANGE dregexp
+@cindex regular expressions, computed
+@c STARTOFRANGE regexpd
+@cindex regular expressions, dynamic
+@cindex @code{~} (tilde), @code{~} operator
+@cindex tilde (@code{~}), @code{~} operator
+@cindex @code{!} (exclamation point), @code{!~} operator
+@cindex exclamation point (@code{!}), @code{!~} operator
+@c @cindex operators, @code{~}
+@c @cindex operators, @code{!~}
+The righthand side of a @samp{~} or @samp{!~} operator need not be a
+regexp constant (i.e., a string of characters between slashes). It may
+be any expression. The expression is evaluated and converted to a string
+if necessary; the contents of the string are then used as the
+regexp. A regexp computed in this way is called a @dfn{dynamic
+regexp} or a @dfn{computed regexp}:
+
+@example
+BEGIN @{ digits_regexp = "[[:digit:]]+" @}
+$0 ~ digits_regexp @{ print @}
+@end example
+
+@noindent
+This sets @code{digits_regexp} to a regexp that describes one or more digits,
+and tests whether the input record matches this regexp.
+
+@quotation NOTE
+When using the @samp{~} and @samp{!~}
+operators, there is a difference between a regexp constant
+enclosed in slashes and a string constant enclosed in double quotes.
+If you are going to use a string constant, you have to understand that
+the string is, in essence, scanned @emph{twice}: the first time when
+@command{awk} reads your program, and the second time when it goes to
+match the string on the lefthand side of the operator with the pattern
+on the right. This is true of any string-valued expression (such as
+@code{digits_regexp}, shown previously), not just string constants.
+@end quotation
+
+@cindex regexp constants, slashes vs.@: quotes
+@cindex @code{\} (backslash), in regexp constants
+@cindex backslash (@code{\}), in regexp constants
+@cindex @code{"} (double quote), in regexp constants
+@cindex double quote (@code{"}), in regexp constants
+What difference does it make if the string is
+scanned twice? The answer has to do with escape sequences, and particularly
+with backslashes. To get a backslash into a regular expression inside a
+string, you have to type two backslashes.
+
+For example, @code{/\*/} is a regexp constant for a literal @samp{*}.
+Only one backslash is needed. To do the same thing with a string,
+you have to type @code{"\\*"}. The first backslash escapes the
+second one so that the string actually contains the
+two characters @samp{\} and @samp{*}.
+
+@cindex troubleshooting, regexp constants vs.@: string constants
+@cindex regexp constants, vs.@: string constants
+@cindex string constants, vs.@: regexp constants
+Given that you can use both regexp and string constants to describe
+regular expressions, which should you use? The answer is ``regexp
+constants,'' for several reasons:
+
+@itemize @value{BULLET}
+@item
+String constants are more complicated to write and
+more difficult to read. Using regexp constants makes your programs
+less error-prone. Not understanding the difference between the two
+kinds of constants is a common source of errors.
+
+@item
+It is more efficient to use regexp constants. @command{awk} can note
+that you have supplied a regexp and store it internally in a form that
+makes pattern matching more efficient. When using a string constant,
+@command{awk} must first convert the string into this internal form and
+then perform the pattern matching.
+
+@item
+Using regexp constants is better form; it shows clearly that you
+intend a regexp match.
+@end itemize
+
+@sidebar Using @code{\n} in Bracket Expressions of Dynamic Regexps
+@cindex regular expressions, dynamic, with embedded newlines
+@cindex newlines, in dynamic regexps
+
+Some versions of @command{awk} do not allow the newline
+character to be used inside a bracket expression for a dynamic regexp:
+
+@example
+$ @kbd{awk '$0 ~ "[ \t\n]"'}
+@error{} awk: newline in character class [
+@error{} ]...
+@error{} source line number 1
+@error{} context is
+@error{} >>> <<<
+@end example
+
+@cindex newlines, in regexp constants
+But a newline in a regexp constant works with no problem:
+
+@example
+$ @kbd{awk '$0 ~ /[ \t\n]/'}
+@kbd{here is a sample line}
+@print{} here is a sample line
+@kbd{Ctrl-d}
+@end example
+
+@command{gawk} does not have this problem, and it isn't likely to
+occur often in practice, but it's worth noting for future reference.
+@end sidebar
+@c ENDOFRANGE dregexp
+@c ENDOFRANGE regexpd
+
@node GNU Regexp Operators
@section @command{gawk}-Specific Regexp Operators
@@ -5149,9 +5699,6 @@ GNU operators, but this was deemed too confusing. The current
method of using @samp{\y} for the GNU @samp{\b} appears to be the
lesser of two evils.
-@c NOTE!!! Keep this in sync with the same table in the summary appendix!
-@c
-@c Should really do this with file inclusion.
@cindex regular expressions, @command{gawk}, command-line options
@cindex @command{gawk}, command-line options, and regular expressions
The various command-line options
@@ -5167,8 +5714,10 @@ previously described
GNU regexp operators.
@end ifnotinfo
@ifnottex
+@ifnotdocbook
GNU regexp operators described
in @ref{Regexp Operators}.
+@end ifnotdocbook
@end ifnottex
@item @code{--posix}
@@ -5181,7 +5730,7 @@ are allowed.
Traditional Unix @command{awk} regexps are matched. The GNU operators
are not special, and interval expressions are not available.
The POSIX character classes (@samp{[[:alnum:]]}, etc.) are supported,
-as Brian Kernighan's @command{awk} does support them.
+as BWK @command{awk} does support them.
Characters described by octal and hexadecimal escape sequences are
treated literally, even if they represent regexp metacharacters.
@@ -5238,10 +5787,12 @@ This works in any POSIX-compliant @command{awk}.
Another method, specific to @command{gawk}, is to set the variable
@code{IGNORECASE} to a nonzero value (@pxref{Built-in Variables}).
When @code{IGNORECASE} is not zero, @emph{all} regexp and string
-operations ignore case. Changing the value of
-@code{IGNORECASE} dynamically controls the case-sensitivity of the
-program as it runs. Case is significant by default because
-@code{IGNORECASE} (like most variables) is initialized to zero:
+operations ignore case.
+
+Changing the value of @code{IGNORECASE} dynamically controls the
+case-sensitivity of the program as it runs. Case is significant by
+default because @code{IGNORECASE} (like most variables) is initialized
+to zero:
@example
x = "aB"
@@ -5271,9 +5822,6 @@ case-sensitivity on or off for all the rules at once.
Setting @code{IGNORECASE} from the command line is a way to make
a program case-insensitive without having to edit it.
-Both regexp and string comparison
-operations are affected by @code{IGNORECASE}.
-
@c @cindex ISO 8859-1
@c @cindex ISO Latin-1
In multibyte locales,
@@ -5294,159 +5842,51 @@ Case is always significant in compatibility mode.
@c ENDOFRANGE csregexp
@c ENDOFRANGE regexpcs
-@node Leftmost Longest
-@section How Much Text Matches?
+@node Regexp Summary
+@section Summary
-@cindex regular expressions, leftmost longest match
-@c @cindex matching, leftmost longest
-Consider the following:
-
-@example
-echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'
-@end example
-
-This example uses the @code{sub()} function (which we haven't discussed yet;
-@pxref{String Functions})
-to make a change to the input record. Here, the regexp @code{/a+/}
-indicates ``one or more @samp{a} characters,'' and the replacement
-text is @samp{<A>}.
-
-The input contains four @samp{a} characters.
-@command{awk} (and POSIX) regular expressions always match
-the leftmost, @emph{longest} sequence of input characters that can
-match. Thus, all four @samp{a} characters are
-replaced with @samp{<A>} in this example:
-
-@example
-$ @kbd{echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'}
-@print{} <A>bcd
-@end example
-
-For simple match/no-match tests, this is not so important. But when doing
-text matching and substitutions with the @code{match()}, @code{sub()}, @code{gsub()},
-and @code{gensub()} functions, it is very important.
-@ifinfo
-@xref{String Functions},
-for more information on these functions.
-@end ifinfo
-Understanding this principle is also important for regexp-based record
-and field splitting (@pxref{Records},
-and also @pxref{Field Separators}).
-
-@node Computed Regexps
-@section Using Dynamic Regexps
-
-@c STARTOFRANGE dregexp
-@cindex regular expressions, computed
-@c STARTOFRANGE regexpd
-@cindex regular expressions, dynamic
-@cindex @code{~} (tilde), @code{~} operator
-@cindex tilde (@code{~}), @code{~} operator
-@cindex @code{!} (exclamation point), @code{!~} operator
-@cindex exclamation point (@code{!}), @code{!~} operator
-@c @cindex operators, @code{~}
-@c @cindex operators, @code{!~}
-The righthand side of a @samp{~} or @samp{!~} operator need not be a
-regexp constant (i.e., a string of characters between slashes). It may
-be any expression. The expression is evaluated and converted to a string
-if necessary; the contents of the string are then used as the
-regexp. A regexp computed in this way is called a @dfn{dynamic
-regexp}:
-
-@example
-BEGIN @{ digits_regexp = "[[:digit:]]+" @}
-$0 ~ digits_regexp @{ print @}
-@end example
-
-@noindent
-This sets @code{digits_regexp} to a regexp that describes one or more digits,
-and tests whether the input record matches this regexp.
-
-@quotation NOTE
-When using the @samp{~} and @samp{!~}
-operators, there is a difference between a regexp constant
-enclosed in slashes and a string constant enclosed in double quotes.
-If you are going to use a string constant, you have to understand that
-the string is, in essence, scanned @emph{twice}: the first time when
-@command{awk} reads your program, and the second time when it goes to
-match the string on the lefthand side of the operator with the pattern
-on the right. This is true of any string-valued expression (such as
-@code{digits_regexp}, shown previously), not just string constants.
-@end quotation
-
-@cindex regexp constants, slashes vs.@: quotes
-@cindex @code{\} (backslash), in regexp constants
-@cindex backslash (@code{\}), in regexp constants
-@cindex @code{"} (double quote), in regexp constants
-@cindex double quote (@code{"}), in regexp constants
-What difference does it make if the string is
-scanned twice? The answer has to do with escape sequences, and particularly
-with backslashes. To get a backslash into a regular expression inside a
-string, you have to type two backslashes.
-
-For example, @code{/\*/} is a regexp constant for a literal @samp{*}.
-Only one backslash is needed. To do the same thing with a string,
-you have to type @code{"\\*"}. The first backslash escapes the
-second one so that the string actually contains the
-two characters @samp{\} and @samp{*}.
-
-@cindex troubleshooting, regexp constants vs.@: string constants
-@cindex regexp constants, vs.@: string constants
-@cindex string constants, vs.@: regexp constants
-Given that you can use both regexp and string constants to describe
-regular expressions, which should you use? The answer is ``regexp
-constants,'' for several reasons:
+@itemize @value{BULLET}
+@item
+Regular expressions describe sets of strings to be matched.
+In @command{awk}, regular expression constants are written enclosed
+between slashes: @code{/}@dots{}@code{/}.
-@itemize @bullet
@item
-String constants are more complicated to write and
-more difficult to read. Using regexp constants makes your programs
-less error-prone. Not understanding the difference between the two
-kinds of constants is a common source of errors.
+Regexp constants may be used standalone in patterns and
+in conditional expressions, or as part of matching expressions
+using the @samp{~} and @samp{!~} operators.
@item
-It is more efficient to use regexp constants. @command{awk} can note
-that you have supplied a regexp and store it internally in a form that
-makes pattern matching more efficient. When using a string constant,
-@command{awk} must first convert the string into this internal form and
-then perform the pattern matching.
+Escape sequences let you represent non-printable characters and
+also let you represent regexp metacharacters as literal characters
+to be matched.
@item
-Using regexp constants is better form; it shows clearly that you
-intend a regexp match.
-@end itemize
+Regexp operators provide grouping, alternation and repetition.
-@sidebar Using @code{\n} in Bracket Expressions of Dynamic Regexps
-@cindex regular expressions, dynamic, with embedded newlines
-@cindex newlines, in dynamic regexps
+@item
+Bracket expressions give you a shorthand for specifying sets
+of characters that can match at a particular point in a regexp.
+Within bracket expressions, POSIX character classes let you specify
+certain groups of characters in a locale-independent fashion.
-Some commercial versions of @command{awk} do not allow the newline
-character to be used inside a bracket expression for a dynamic regexp:
+@item
+@command{gawk}'s @code{IGNORECASE} variable lets you control the
+case sensitivity of regexp matching. In other @command{awk}
+versions, use @code{tolower()} or @code{toupper()}.
-@example
-$ @kbd{awk '$0 ~ "[ \t\n]"'}
-@error{} awk: newline in character class [
-@error{} ]...
-@error{} source line number 1
-@error{} context is
-@error{} >>> <<<
-@end example
+@item
+Regular expressions match the leftmost longest text in the string being
+matched. This matters for cases where you need to know the extent of
+the match, such as for text substitution and when the record separator
+is a regexp.
-@cindex newlines, in regexp constants
-But a newline in a regexp constant works with no problem:
+@item
+Matching expressions may use dynamic regexps, that is, string values
+treated as regular expressions.
-@example
-$ @kbd{awk '$0 ~ /[ \t\n]/'}
-@kbd{here is a sample line}
-@print{} here is a sample line
-@kbd{Ctrl-d}
-@end example
+@end itemize
-@command{gawk} does not have this problem, and it isn't likely to
-occur often in practice, but it's worth noting for future reference.
-@end sidebar
-@c ENDOFRANGE dregexp
-@c ENDOFRANGE regexpd
@c ENDOFRANGE regexp
@node Reading Files
@@ -5494,8 +5934,10 @@ used with it do not have to be named on the @command{awk} command line
* Getline:: Reading files under explicit program control
using the @code{getline} function.
* Read Timeout:: Reading input with a timeout.
-* Command line directories:: What happens if you put a directory on the
+* Command-line directories:: What happens if you put a directory on the
command line.
+* Input Summary:: Input summary.
+* Input Exercises:: Exercises.
@end menu
@node Records
@@ -5507,16 +5949,21 @@ used with it do not have to be named on the @command{awk} command line
@cindex records, splitting input into
@cindex @code{NR} variable
@cindex @code{FNR} variable
-The @command{awk} utility divides the input for your @command{awk}
-program into records and fields.
-@command{awk} keeps track of the number of records that have
-been read
-so far
-from the current input file. This value is stored in a
-built-in variable called @code{FNR}. It is reset to zero when a new
-file is started. Another built-in variable, @code{NR}, records the total
-number of input records read so far from all data files. It starts at zero,
-but is never automatically reset to zero.
+@command{awk} divides the input for your program into records and fields.
+It keeps track of the number of records that have been read so far from
+the current input file. This value is stored in a built-in variable
+called @code{FNR} which is reset to zero when a new file is started.
+Another built-in variable, @code{NR}, records the total number of input
+records read so far from all @value{DF}s. It starts at zero, but is
+never automatically reset to zero.
+
+@menu
+* awk split records:: How standard @command{awk} splits records.
+* gawk split records:: How @command{gawk} splits records.
+@end menu
+
+@node awk split records
+@subsection Record Splitting With Standard @command{awk}
@cindex separators, for records
@cindex record separators
@@ -5600,7 +6047,7 @@ $ @kbd{awk 'BEGIN @{ RS = "u" @}}
@noindent
Note that the entry for the name @samp{Bill} is not split.
-In the original data file
+In the original @value{DF}
(@pxref{Sample Data Files}),
the line looks like this:
@@ -5613,7 +6060,7 @@ It contains no @samp{u} so there is no reason to split the record,
unlike the others which have one or more occurrences of the @samp{u}.
In fact, this record is treated as part of the previous record;
the newline separating them in the output
-is the original newline in the data file, not the one added by
+is the original newline in the @value{DF}, not the one added by
@command{awk} when it printed the record!
@cindex record separators, changing
@@ -5681,6 +6128,9 @@ After the end of the record has been determined, @command{gawk}
sets the variable @code{RT} to the text in the input that matched
@code{RS}.
+@node gawk split records
+@subsection Record Splitting With @command{gawk}
+
@cindex common extensions, @code{RS} as a regexp
@cindex extensions, common@comma{} @code{RS} as a regexp
When using @command{gawk},
@@ -5712,17 +6162,17 @@ with optional leading and/or trailing whitespace:
@example
$ @kbd{echo record 1 AAAA record 2 BBBB record 3 |}
> @kbd{gawk 'BEGIN @{ RS = "\n|( *[[:upper:]]+ *)" @}}
-> @kbd{@{ print "Record =", $0, "and RT =", RT @}'}
-@print{} Record = record 1 and RT = AAAA
-@print{} Record = record 2 and RT = BBBB
-@print{} Record = record 3 and RT =
-@print{}
+> @kbd{@{ print "Record =", $0,"and RT = [" RT "]" @}'}
+@print{} Record = record 1 and RT = [ AAAA ]
+@print{} Record = record 2 and RT = [ BBBB ]
+@print{} Record = record 3 and RT = [
+@print{} ]
@end example
@noindent
-The final line of output has an extra blank line. This is because the
-value of @code{RT} is a newline, and the @code{print} statement
-supplies its own terminating newline.
+The square brackets delineate the contents of @code{RT}, letting you
+see the leading and trailing whitespace. The final value of @code{RT}
+@code{RT} is a newline.
@xref{Simple Sed}, for a more useful example
of @code{RS} as a regexp and @code{RT}.
@@ -5754,14 +6204,13 @@ In compatibility mode, only the first character of the value of
@sidebar @code{RS = "\0"} Is Not Portable
@cindex portability, data files as single record
-There are times when you might want to treat an entire data file as a
+There are times when you might want to treat an entire @value{DF} as a
single record. The only way to make this happen is to give @code{RS}
a value that you know doesn't occur in the input file. This is hard
to do in a general way, such that a program always works for arbitrary
input files.
-@c can you say `understatement' boys and girls?
-You might think that for text files, the @sc{nul} character, which
+You might think that for text files, the @value{NUL} character, which
consists of a character with all bits equal to zero, is a good
value to use for @code{RS} in this case:
@@ -5770,29 +6219,29 @@ BEGIN @{ RS = "\0" @} # whole file becomes one record?
@end example
@cindex differences in @command{awk} and @command{gawk}, strings, storing
-@command{gawk} in fact accepts this, and uses the @sc{nul}
+@command{gawk} in fact accepts this, and uses the @value{NUL}
character for the record separator.
+This works for certain special files, such as @file{/proc/environ} on
+GNU/Linux systems, where the @value{NUL} character is in fact the record separator.
However, this usage is @emph{not} portable
to most other @command{awk} implementations.
@cindex dark corner, strings, storing
Almost all other @command{awk} implementations@footnote{At least that we know
about.} store strings internally as C-style strings. C strings use the
-@sc{nul} character as the string terminator. In effect, this means that
+@value{NUL} character as the string terminator. In effect, this means that
@samp{RS = "\0"} is the same as @samp{RS = ""}.
@value{DARKCORNER}
-It happens that recent versions of @command{mawk} can use the @sc{nul}
+It happens that recent versions of @command{mawk} can use the @value{NUL}
character as a record separator. However, this is a special case:
-@command{mawk} does not allow embedded @sc{nul} characters in strings.
+@command{mawk} does not allow embedded @value{NUL} characters in strings.
@cindex records, treating files as
@cindex treating files, as single records
-The best way to treat a whole file as a single record is to
-simply read the file in, one record at a time, concatenating each
-record onto the end of the previous ones.
-
-@c @strong{FIXME}: Using @sc{nul} is good for @file{/proc/environ} etc.
+@xref{Readfile Function}, for an interesting, portable way to read
+whole files. If you are using @command{gawk}, see @ref{Extension Sample
+Readfile}, for another option.
@end sidebar
@c ENDOFRANGE inspl
@c ENDOFRANGE recspl
@@ -5828,7 +6277,7 @@ simple @command{awk} programs so powerful.
@cindex @code{$} (dollar sign), @code{$} field operator
@cindex dollar sign (@code{$}), @code{$} field operator
@cindex field operators@comma{} dollar sign as
-A dollar-sign (@samp{$}) is used
+You use a dollar-sign (@samp{$})
to refer to a field in an @command{awk} program,
followed by the number of the field you want. Thus, @code{$1}
refers to the first field, @code{$2} to the second, and so on.
@@ -5859,7 +6308,7 @@ one (such as @code{$8} when the record has only seven fields), you get
the empty string. (If used in a numeric operation, you get zero.)
The use of @code{$0}, which looks like a reference to the ``zero-th'' field, is
-a special case: it represents the whole input record
+a special case: it represents the whole input record. Use it
when you are not interested in specific fields.
Here are some more examples:
@@ -5895,7 +6344,7 @@ $ @kbd{awk '/li/ @{ print $1, $NF @}' mail-list}
@cindex fields, numbers
@cindex field numbers
-The number of a field does not need to be a constant. Any expression in
+A field number need not be a constant. Any expression in
the @command{awk} language can be used after a @samp{$} to refer to a
field. The value of the expression specifies the field number. If the
value is a string, rather than a number, it is converted to a number.
@@ -5922,7 +6371,11 @@ its value as the number of the field to print. The @samp{*} sign
represents multiplication, so the expression @samp{2*2} evaluates to four.
The parentheses are used so that the multiplication is done before the
@samp{$} operation; they are necessary whenever there is a binary
-operator in the field-number expression. This example, then, prints the
+operator@footnote{A @dfn{binary operator}, such as @samp{*} for
+multiplication, is one that takes two operands. The distinction
+is required, since @command{awk} also has unary (one-operand)
+and ternary (three-operand) operators.}
+in the field-number expression. This example, then, prints the
type of relationship (the fourth field) for every line of the file
@file{mail-list}. (All of the @command{awk} operators are listed, in
order of decreasing precedence, in
@@ -5972,7 +6425,7 @@ Then it prints the original and new values for field three.
(Someone in the warehouse made a consistent mistake while inventorying
the red boxes.)
-For this to work, the text in field @code{$3} must make sense
+For this to work, the text in @code{$3} must make sense
as a number; the string of characters must be converted to a number
for the computer to do arithmetic on it. The number resulting
from the subtraction is converted back to a string of characters that
@@ -6063,7 +6516,7 @@ $ @kbd{echo a b c d | awk '@{ OFS = ":"; $2 = ""}
@end example
@noindent
-The field is still there; it just has an empty value, denoted by
+The field is still there; it just has an empty value, delimited by
the two colons between @samp{a} and @samp{c}.
This example shows what happens if you create a new field:
@@ -6146,7 +6599,7 @@ with a statement such as @samp{$1 = $1}, as described earlier.
* Default Field Splitting:: How fields are normally separated.
* Regexp Field Splitting:: Using regexps as the field separator.
* Single Character Fields:: Making each character a separate field.
-* Command Line Field Separator:: Setting @code{FS} from the command-line.
+* Command Line Field Separator:: Setting @code{FS} from the command line.
* Full Line Fields:: Making the full line be a single field.
* Field Splitting Summary:: Some final points and a summary table.
@end menu
@@ -6315,7 +6768,7 @@ $ @kbd{echo ' a b c d ' | awk 'BEGIN @{ FS = "[ \t\n]+" @}}
@cindex null strings
@cindex strings, null
@cindex empty strings, See null strings
-In this case, the first field is @dfn{null} or empty.
+In this case, the first field is null, or empty.
The stripping of leading and trailing whitespace also comes into
play whenever @code{$0} is recomputed. For instance, study this pipeline:
@@ -6347,7 +6800,7 @@ should not rely on any specific behavior in your programs.
@value{DARKCORNER}
@cindex Brian Kernighan's @command{awk}
-As a point of information, Brian Kernighan's @command{awk} allows @samp{^}
+As a point of information, BWK @command{awk} allows @samp{^}
to match only at the beginning of the record. @command{gawk}
also works this way. For example:
@@ -6402,7 +6855,7 @@ behaves this way.
@node Command Line Field Separator
@subsection Setting @code{FS} from the Command Line
-@cindex @option{-F} option, command line
+@cindex @option{-F} option, command-line
@cindex field separator, on command line
@cindex command line, @code{FS} on@comma{} setting
@cindex @code{FS} variable, setting from command line
@@ -6452,6 +6905,8 @@ shell, without any quotes, the @samp{\} gets deleted, so @command{awk}
figures that you really want your fields to be separated with TABs and
not @samp{t}s. Use @samp{-v FS="t"} or @samp{-F"[t]"} on the command line
if you really do want to separate your fields with @samp{t}s.
+Use @samp{-F '\t'} when not in compatibility mode to specify that TABs
+separate fields.
As an example, let's use an @command{awk} program file called @file{edu.awk}
that contains the pattern @code{/edu/} and the action @samp{print $1}:
@@ -6465,7 +6920,6 @@ program on the file @file{mail-list}. The following command prints a
list of the names of the people that work at or attend a university, and
the first three digits of their phone numbers:
-@c tweaked to make the tex output look better in @smallbook
@example
$ @kbd{awk -F- -f edu.awk mail-list}
@print{} Fabius 555
@@ -6598,7 +7052,7 @@ root
@noindent
on an incorrect implementation of @command{awk}, while @command{gawk}
-prints something like:
+prints the full first line of the file, something like:
@example
root:nSijPlPhZZwgE:0:0:Root:/:
@@ -6698,7 +7152,7 @@ haven't been introduced yet.
BEGIN @{ FIELDWIDTHS = "9 6 10 6 7 7 35" @}
NR > 2 @{
idle = $4
- sub(/^ */, "", idle) # strip leading spaces
+ sub(/^ +/, "", idle) # strip leading spaces
if (idle == "")
idle = 0
if (idle ~ /:/) @{
@@ -6735,10 +7189,6 @@ program for processing such data could use the @code{FIELDWIDTHS} feature
to simplify reading the data. (Of course, getting @command{gawk} to run on
a system with card readers is another story!)
-@ignore
-Exercise: Write a ballot card reading program
-@end ignore
-
@cindex @command{gawk}, splitting fields and
Assigning a value to @code{FS} causes @command{gawk} to use
@code{FS} for field splitting again. Use @samp{FS = FS} to make this happen,
@@ -6755,7 +7205,7 @@ if (PROCINFO["FS"] == "FS")
else if (PROCINFO["FS"] == "FIELDWIDTHS")
@var{fixed-width field splitting} @dots{}
else
- @var{content-based field splitting} @dots{} (see next @value{SECTION})
+ @var{content-based field splitting} @dots{} @ii{(see next @value{SECTION})}
@end example
This information is useful when writing a function
@@ -6860,6 +7310,8 @@ if (substr($i, 1, 1) == "\"") @{
As with @code{FS}, the @code{IGNORECASE} variable (@pxref{User-modified})
affects field splitting with @code{FPAT}.
+Assigning a value to @code{FPAT} overrides field splitting
+with @code{FS} and with @code{FIELDWIDTHS}.
Similar to @code{FIELDWIDTHS}, the value of @code{PROCINFO["FS"]}
will be @code{"FPAT"} if content-based field splitting is being used.
@@ -6869,7 +7321,7 @@ the double quotes. @command{gawk} provides no way to deal with this.
Since there is no formal specification for CSV data, there isn't much
more to be done;
the @code{FPAT} mechanism provides an elegant solution for the majority
-of cases, and the @command{gawk} maintainer is satisfied with that.
+of cases, and the @command{gawk} developers are satisfied with that.
@end quotation
As written, the regexp used for @code{FPAT} requires that each field
@@ -6883,6 +7335,12 @@ FPAT = "([^,]*)|(\"[^\"]+\")"
Finally, the @code{patsplit()} function makes the same functionality
available for splitting regular strings (@pxref{String Functions}).
+To recap, @command{gawk} provides three independent methods
+to split input records into fields. @command{gawk} uses whichever
+mechanism was last chosen based on which of the three
+variables---@code{FS}, @code{FIELDWIDTHS}, and @code{FPAT}---was
+last assigned to.
+
@node Multiple Line
@section Multiple-Line Records
@@ -6931,9 +7389,9 @@ the first nonblank line that follows---no matter how many blank lines
appear in a row, they are considered one record separator.
@cindex dark corner, multiline records
-There is an important difference between @samp{RS = ""} and
+However, there is an important difference between @samp{RS = ""} and
@samp{RS = "\n\n+"}. In the first case, leading newlines in the input
-data file are ignored, and if a file ends without extra blank lines
+@value{DF} are ignored, and if a file ends without extra blank lines
after the last record, the final newline is removed from the record.
In the second case, this special processing is not done.
@value{DARKCORNER}
@@ -6969,7 +7427,7 @@ Another way to separate fields is to
put each field on a separate line: to do this, just set the
variable @code{FS} to the string @code{"\n"}. (This single
character separator matches a single newline.)
-A practical example of a data file organized this way might be a mailing
+A practical example of a @value{DF} organized this way might be a mailing
list, where each entry is separated by blank lines. Consider a mailing
list in a file named @file{addresses}, which looks like this:
@@ -7034,7 +7492,7 @@ value of
@table @code
@item RS == "\n"
Records are separated by the newline character (@samp{\n}). In effect,
-every line in the data file is a separate record, including blank lines.
+every line in the @value{DF} is a separate record, including blank lines.
This is the default.
@item RS == @var{any single character}
@@ -7073,7 +7531,7 @@ then @command{gawk} sets @code{RT} to the null string.
@c STARTOFRANGE inex
@cindex input, explicit
So far we have been getting our input data from @command{awk}'s main
-input stream---either the standard input (usually your terminal, sometimes
+input stream---either the standard input (usually your keyboard, sometimes
the output from another program) or from the
files specified on the command line. The @command{awk} language has a
special built-in command called @code{getline} that
@@ -7084,7 +7542,19 @@ The @code{getline} command is used in several different ways and should
The examples that follow the explanation of the @code{getline} command
include material that has not been covered yet. Therefore, come back
and study the @code{getline} command @emph{after} you have reviewed the
-rest of this @value{DOCUMENT} and have a good knowledge of how @command{awk} works.
+rest of
+@ifinfo
+this @value{DOCUMENT}
+@end ifinfo
+@ifhtml
+this @value{DOCUMENT}
+@end ifhtml
+@ifnotinfo
+@ifnothtml
+Parts I and II
+@end ifnothtml
+@end ifnotinfo
+and have a good knowledge of how @command{awk} works.
@cindex @command{gawk}, @code{ERRNO} variable in
@cindex @code{ERRNO} variable, with @command{getline} command
@@ -7092,7 +7562,7 @@ rest of this @value{DOCUMENT} and have a good knowledge of how @command{awk} wor
@cindex @code{getline} command, return values
@cindex @option{--sandbox} option, input redirection with @code{getline}
-The @code{getline} command returns one if it finds a record and zero if
+The @code{getline} command returns 1 if it finds a record and 0 if
it encounters the end of the file. If there is some error in getting
a record, such as a file that cannot be opened, then @code{getline}
returns @minus{}1. In this case, @command{gawk} sets the variable
@@ -7132,42 +7602,63 @@ finished processing the current record, but want to do some special
processing on the next record @emph{right now}. For example:
@example
+# Remove text between /* and */, inclusive
@{
- if ((t = index($0, "/*")) != 0) @{
- # value of `tmp' will be "" if t is 1
- tmp = substr($0, 1, t - 1)
- u = index(substr($0, t + 2), "*/")
- offset = t + 2
- while (u == 0) @{
- if (getline <= 0) @{
+ if ((i = index($0, "/*")) != 0) @{
+ out = substr($0, 1, i - 1) # leading part of the string
+ rest = substr($0, i + 2) # ... */ ...
+ j = index(rest, "*/") # is */ in trailing part?
+ if (j > 0) @{
+ rest = substr(rest, j + 2) # remove comment
+ @} else @{
+ while (j == 0) @{
+ # get more text
+ if (getline <= 0) @{
m = "unexpected EOF or error"
m = (m ": " ERRNO)
print m > "/dev/stderr"
exit
- @}
- u = index($0, "*/")
- offset = 0
- @}
- # substr() expression will be "" if */
- # occurred at end of line
- $0 = tmp substr($0, offset + u + 2)
- @}
- print $0
+ @}
+ # build up the line using string concatenation
+ rest = rest $0
+ j = index(rest, "*/") # is */ in trailing part?
+ if (j != 0) @{
+ rest = substr(rest, j + 2)
+ break
+ @}
+ @}
+ @}
+ # build up the output line using string concatenation
+ $0 = out rest
+ @}
+ print $0
@}
@end example
+@c 8/2014: Here is some sample input:
+@ignore
+mon/*comment*/key
+rab/*commen
+t*/bit
+horse /*comment*/more text
+part 1 /*comment*/part 2 /*comment*/part 3
+no comment
+@end ignore
+
This @command{awk} program deletes C-style comments (@samp{/* @dots{}
-*/}) from the input. By replacing the @samp{print $0} with other
+*/}) from the input.
+It uses a number of features we haven't covered yet, including
+string concatenation
+(@pxref{Concatenation})
+and the @code{index()} and @code{substr()} built-in
+functions
+(@pxref{String Functions}).
+By replacing the @samp{print $0} with other
statements, you could perform more complicated processing on the
decommented input, such as searching for matches of a regular
expression. (This program has a subtle problem---it does not work if one
comment ends and another begins on the same line.)
-@ignore
-Exercise,
-write a program that does handle multiple comments on the line.
-@end ignore
-
This form of the @code{getline} command sets @code{NF},
@code{NR}, @code{FNR}, @code{RT}, and the value of @code{$0}.
@@ -7243,7 +7734,7 @@ the value of @code{NF} do not change.
@cindex operators, input/output
Use @samp{getline < @var{file}} to read the next record from @var{file}.
Here @var{file} is a string-valued expression that
-specifies the file name. @samp{< @var{file}} is called a @dfn{redirection}
+specifies the @value{FN}. @samp{< @var{file}} is called a @dfn{redirection}
because it directs input to come from a different place.
For example, the following
program reads its input record from the file @file{secondary.input} when it
@@ -7271,9 +7762,9 @@ changed, resulting in a new value of @code{NF}.
According to POSIX, @samp{getline < @var{expression}} is ambiguous if
@var{expression} contains unparenthesized operators other than
@samp{$}; for example, @samp{getline < dir "/" file} is ambiguous
-because the concatenation operator is not parenthesized. You should
-write it as @samp{getline < (dir "/" file)} if you want your program
-to be portable to all @command{awk} implementations.
+because the concatenation operator (not discussed yet; @pxref{Concatenation})
+is not parenthesized. You should write it as @samp{getline < (dir "/" file)} if
+you want your program to be portable to all @command{awk} implementations.
@node Getline/Variable/File
@subsection Using @code{getline} into a Variable from a File
@@ -7306,19 +7797,19 @@ Such a record is replaced by the contents of the file
Note here how the name of the extra input file is not built into
the program; it is taken directly from the data, specifically from the second field on
-the @samp{@@include} line.
+the @code{@@include} line.
The @code{close()} function is called to ensure that if two identical
-@samp{@@include} lines appear in the input, the entire specified file is
+@code{@@include} lines appear in the input, the entire specified file is
included twice.
@xref{Close Files And Pipes}.
One deficiency of this program is that it does not process nested
-@samp{@@include} statements
-(i.e., @samp{@@include} statements in included files)
+@code{@@include} statements
+(i.e., @code{@@include} statements in included files)
the way a true macro preprocessor would.
@xref{Igawk Program}, for a program
-that does handle nested @samp{@@include} statements.
+that does handle nested @code{@@include} statements.
@node Getline/Pipe
@subsection Using @code{getline} from a Pipe
@@ -7362,9 +7853,10 @@ The @code{close()} function is called to ensure that if two identical
@samp{@@execute} lines appear in the input, the command is run for
each one.
@ifnottex
+@ifnotdocbook
@xref{Close Files And Pipes}.
+@end ifnotdocbook
@end ifnottex
-@c Exercise!!
@c This example is unrealistic, since you could just use system
Given the input:
@@ -7418,7 +7910,7 @@ Unfortunately, @command{gawk} has not been consistent in its treatment
of a construct like @samp{@w{"echo "} "date" | getline}.
Most versions, including the current version, treat it at as
@samp{@w{("echo "} "date") | getline}.
-(This how Brian Kernighan's @command{awk} behaves.)
+(This how BWK @command{awk} behaves.)
Some versions changed and treated it as
@samp{@w{"echo "} ("date" | getline)}.
(This is how @command{mawk} behaves.)
@@ -7524,7 +8016,7 @@ where coprocesses are discussed in more detail.
Here are some miscellaneous points about @code{getline} that
you should bear in mind:
-@itemize @bullet
+@itemize @value{BULLET}
@item
When @code{getline} changes the value of @code{$0} and @code{NF},
@command{awk} does @emph{not} automatically jump to the start of the
@@ -7536,7 +8028,7 @@ However, the new record is tested against any subsequent rules.
@cindex @command{awk}, implementations, limits
@cindex @command{gawk}, implementation issues, limits
@item
-Many @command{awk} implementations limit the number of pipelines that an @command{awk}
+Some very old @command{awk} implementations limit the number of pipelines that an @command{awk}
program may have open to just one. In @command{gawk}, there is no such limit.
You can open as many pipelines (and coprocesses) as the underlying operating
system permits.
@@ -7549,10 +8041,10 @@ system permits.
@item
An interesting side effect occurs if you use @code{getline} without a
redirection inside a @code{BEGIN} rule. Because an unredirected @code{getline}
-reads from the command-line data files, the first @code{getline} command
+reads from the command-line @value{DF}s, the first @code{getline} command
causes @command{awk} to set the value of @code{FILENAME}. Normally,
@code{FILENAME} does not have a value inside @code{BEGIN} rules, because you
-have not yet started to process the command-line data files.
+have not yet started to process the command-line @value{DF}s.
@value{DARKCORNER}
(@xref{BEGIN/END},
also @pxref{Auto-set}.)
@@ -7568,13 +8060,14 @@ probably by accident, and you should reconsider what it is you're
trying to accomplish.
@item
-@ref{Getline Summary}, presents a table summarizing the
+@DBREF{Getline Summary} presents a table summarizing the
@code{getline} variants and which variables they can affect.
It is worth noting that those variants which do not use redirection
can cause @code{FILENAME} to be updated if they cause
@command{awk} to start reading a new input file.
@item
+@cindex Moore, Duncan
If the variable being assigned is an expression with side effects,
different versions of @command{awk} behave differently upon encountering
end-of-file. Some versions don't evaluate the expression; many versions
@@ -7599,7 +8092,7 @@ end of file is encountered, before the element in @code{a} is assigned?
@command{gawk} treats @code{getline} like a function call, and evaluates
the expression @samp{a[++c]} before attempting to read from @file{f}.
-Other versions of @command{awk} only evaluate the expression once they
+However, some versions of @command{awk} only evaluate the expression once they
know that there is a string value to be assigned. Caveat Emptor.
@end itemize
@@ -7635,10 +8128,13 @@ Note: for each variant, @command{gawk} sets the @code{RT} built-in variable.
@section Reading Input With A Timeout
@cindex timeout, reading input
-You may specify a timeout in milliseconds for reading input from a terminal,
-pipe or two-way communication including, TCP/IP sockets. This can be done
+@cindex differences in @command{awk} and @command{gawk}, read timeouts
+This @value{SECTION} describes a feature that is specific to @command{gawk}.
+
+You may specify a timeout in milliseconds for reading input from the keyboard,
+a pipe, or two-way communication, including TCP/IP sockets. This can be done
on a per input, command or connection basis, by setting a special element
-in the @code{PROCINFO} array:
+in the @code{PROCINFO} (@pxref{Auto-set}) array:
@example
PROCINFO["input_name", "READ_TIMEOUT"] = @var{timeout in milliseconds}
@@ -7658,8 +8154,8 @@ else if (ERRNO != "")
print ERRNO
@end example
-Here is how to read interactively from the terminal@footnote{This assumes
-that standard input is the keyboard} without waiting
+Here is how to read interactively from the user@footnote{This assumes
+that standard input is the keyboard.} without waiting
for more than five seconds:
@example
@@ -7668,13 +8164,13 @@ while ((getline < "/dev/stdin") > 0)
print $0
@end example
-@command{gawk} will terminate the read operation if input does not
-arrive after waiting for the timeout period, return failure
-and set the @code{ERRNO} variable to an appropriate string value.
+@command{gawk} terminates the read operation if input does not
+arrive after waiting for the timeout period, returns failure
+and sets the @code{ERRNO} variable to an appropriate string value.
A negative or zero value for the timeout is the same as specifying
no timeout at all.
-A timeout can also be set for reading from the terminal in the implicit
+A timeout can also be set for reading from the keyboard in the implicit
loop that reads input records and matches them against patterns,
like so:
@@ -7735,22 +8231,124 @@ a connection before it can start reading any data,
or the attempt to open a FIFO special file for reading can block
indefinitely until some other process opens it for writing.
-@node Command line directories
+@node Command-line directories
@section Directories On The Command Line
-@cindex differences in @command{awk} and @command{gawk}, command line directories
-@cindex directories, command line
+@cindex differences in @command{awk} and @command{gawk}, command-line directories
+@cindex directories, command-line
@cindex command line, directories on
According to the POSIX standard, files named on the @command{awk}
-command line must be text files. It is a fatal error if they are not.
+command line must be text files; it is a fatal error if they are not.
Most versions of @command{awk} treat a directory on the command line as
a fatal error.
By default, @command{gawk} produces a warning for a directory on the
-command line, but otherwise ignores it. If either of the @option{--posix}
+command line, but otherwise ignores it. This makes it easier to use
+shell wildcards with your @command{awk} program:
+
+@example
+$ @kbd{gawk -f whizprog.awk *} @ii{Directories could kill this progam}
+@end example
+
+If either of the @option{--posix}
or @option{--traditional} options is given, then @command{gawk} reverts
to treating a directory on the command line as a fatal error.
+@xref{Extension Sample Readdir}, for a way to treat directories
+as usable data from an @command{awk} program.
+
+@node Input Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+Input is split into records based on the value of @code{RS}.
+The possibilities are as follows:
+
+@multitable @columnfractions .25 .35 .40
+@headitem Value of @code{RS} @tab Records are split on @tab @command{awk} / @command{gawk}
+@item Any single character @tab That character @tab @command{awk}
+@item The empty string (@code{""}) @tab Runs of two or more newlines @tab @command{awk}
+@item A regexp @tab Text that matches the regexp @tab @command{gawk}
+@end multitable
+
+@item
+@command{gawk} sets @code{RT} to the text matched by @code{RS}.
+
+@item
+After splitting the input into records, @command{awk} further splits
+the record into individual fields, named @code{$1}, @code{$2} and so
+on. @code{$0} is the whole record, and @code{NF} indicates how many
+fields there are. The default way to split fields is between whitespace
+characters.
+
+@item
+Fields may be referenced using a variable, as in @samp{$NF}. Fields
+may also be assigned values, which causes the value of @code{$0} to be
+recomputed when it is later referenced. Assigning to a field with a number
+greater than @code{NF} creates the field and rebuilds the record, using
+@code{OFS} to separate the fields. Incrementing @code{NF} does the same
+thing. Decrementing @code{NF} throws away fields and rebuilds the record.
+
+@item
+Field splitting is more complicated than record splitting.
+
+@multitable @columnfractions .40 .40 .20
+@headitem Field separator value @tab Fields are split @dots{} @tab @command{awk} / @command{gawk}
+@item @code{FS == " "} @tab On runs of whitespace @tab @command{awk}
+@item @code{FS == @var{any single character}} @tab On that character @tab @command{awk}
+@item @code{FS == @var{regexp}} @tab On text matching the regexp @tab @command{awk}
+@item @code{FS == ""} @tab Each individual character is a separate field @tab @command{gawk}
+@item @code{FIELDWIDTHS == @var{list of columns}} @tab Based on character position @tab @command{gawk}
+@item @code{FPAT == @var{regexp}} @tab On text around text matching the regexp @tab @command{gawk}
+@end multitable
+
+Using @samp{FS = "\n"} causes the entire record to be a single field
+(assuming that newlines separate records).
+
+@item
+@code{FS} may be set from the command line using the @option{-F} option.
+This can also be done using command-line variable assignment.
+
+@item
+@code{PROCINFO["FS"]} can be used to see how fields are being split.
+
+@item
+Use @code{getline} in its various forms to read additional records,
+from the default input stream, from a file, or from a pipe or co-process.
+
+@item
+Use @code{PROCINFO[@var{file}, "READ_TIMEOUT"]} to cause reads to timeout
+for @var{file}.
+
+@item
+Directories on the command line are fatal for standard @command{awk};
+@command{gawk} ignores them if not in POSIX mode.
+
+@end itemize
+
+@c EXCLUDE START
+@node Input Exercises
+@section Exercises
+
+@enumerate
+@item
+Using the @code{FIELDWIDTHS} variable (@pxref{Constant Size}),
+write a program to read election data, where each record represents
+one voter's votes. Come up with a way to define which columns are
+associated with each ballot item, and print the total votes,
+including abstentions, for each item.
+
+@item
+@ref{Plain Getline}, presented a program to remove C-style
+comments (@samp{/* @dots{} */}) from the input. That program
+does not work if one comment ends on one line and another one
+starts later on the same line.
+That can be fixed by making one simple change. What is it?
+
+@end enumerate
+@c EXCLUDE END
+
@node Printing
@chapter Printing Output
@@ -7775,7 +8373,7 @@ For printing with specifications, you need the @code{printf} statement
@cindex @code{printf} statement
Besides basic and formatted printing, this @value{CHAPTER}
also covers I/O redirections to files and pipes, introduces
-the special file names that @command{gawk} processes internally,
+the special @value{FN}s that @command{gawk} processes internally,
and discusses the @code{close()} built-in function.
@menu
@@ -7790,13 +8388,15 @@ and discusses the @code{close()} built-in function.
@command{gawk} allows access to inherited file
descriptors.
* Close Files And Pipes:: Closing Input and Output Files and Pipes.
+* Output Summary:: Output summary.
+* Output Exercises:: Exercises.
@end menu
@node Print
@section The @code{print} Statement
The @code{print} statement is used for producing output with simple, standardized
-formatting. Specify only the strings or numbers to print, in a
+formatting. You specify only the strings or numbers to print, in a
list separated by commas. They are output, separated by single spaces,
followed by a newline. The statement looks like this:
@@ -7826,6 +8426,10 @@ double-quote characters, your text is taken as an @command{awk}
expression, and you will probably get an error. Keep in mind that a
space is printed between any two items.
+Note that the @code{print} statement is a statement and not an
+expression---you can't use it the pattern part of a pattern-action
+statement, for example.
+
@node Print Examples
@section @code{print} Statement Examples
@@ -7879,10 +8483,9 @@ $ @kbd{awk '@{ print $1 $2 @}' inventory-shipped}
To someone unfamiliar with the @file{inventory-shipped} file, neither
example's output makes much sense. A heading line at the beginning
would make it clearer. Let's add some headings to our table of months
-(@code{$1}) and green crates shipped (@code{$2}). We do this using the
-@code{BEGIN} pattern
-(@pxref{BEGIN/END})
-so that the headings are only printed once:
+(@code{$1}) and green crates shipped (@code{$2}). We do this using
+a @code{BEGIN} rule (@pxref{BEGIN/END}) so that the headings are only
+printed once:
@example
awk 'BEGIN @{ print "Month Crates"
@@ -7968,16 +8571,6 @@ The following example prints the first and second fields of each input
record, separated by a semicolon, with a blank line added after each
newline:
-@ignore
-Exercise,
-Rewrite the
-@example
-awk 'BEGIN @{ print "Month Crates"
- print "----- ------" @}
- @{ print $1, " ", $2 @}' inventory-shipped
-@end example
-program by using a new value of @code{OFS}.
-@end ignore
@example
$ @kbd{awk 'BEGIN @{ OFS = ";"; ORS = "\n\n" @}}
@@ -8134,8 +8727,9 @@ of value to print. The rest of the format specifier is made up of
optional @dfn{modifiers} that control @emph{how} to print the value, such as
the field width. Here is a list of the format-control letters:
-@table @code
-@item %c
+@c @asis for docbook to come out right
+@table @asis
+@item @code{%c}
Print a number as an ASCII character; thus, @samp{printf "%c",
65} outputs the letter @samp{A}. The output for a string value is
the first character of the string.
@@ -8143,16 +8737,6 @@ the first character of the string.
@cindex dark corner, format-control characters
@cindex @command{gawk}, format-control characters
@quotation NOTE
-@ignore
-The @samp{%c} format does @emph{not} handle values outside the range
-0--255. On most systems, values from 0--127 are within the range of
-ASCII and will yield an ASCII character. Values in the range 128--255
-may format as characters in some extended character set, or they may not.
-System 390 (IBM architecture mainframe) systems use 8-bit characters,
-and thus values from 0--255 yield the corresponding EBCDIC character.
-Any value above 255 is treated as modulo 255; i.e., the lowest eight bits
-of the value are used. The locale and character set are always ignored.
-@end ignore
The POSIX standard says the first character of a string is printed.
In locales with multibyte characters, @command{gawk} attempts to
convert the leading bytes of the string into a valid wide character
@@ -8160,6 +8744,8 @@ and then to print the multibyte encoding of that character.
Similarly, when printing a numeric value, @command{gawk} allows the
value to be within the numeric range of values that can be held
in a wide character.
+If the conversion to multibyte encoding fails, @command{gawk}
+uses the low eight bits of the value as the character to print.
Other @command{awk} versions generally restrict themselves to printing
the first byte of a string or to numeric values within the range of
@@ -8167,12 +8753,12 @@ a single byte (0--255).
@end quotation
-@item %d@r{,} %i
+@item @code{%d}, @code{%i}
Print a decimal integer.
The two control letters are equivalent.
(The @samp{%i} specification is for compatibility with ISO C.)
-@item %e@r{,} %E
+@item @code{%e}, @code{%E}
Print a number in scientific (exponential) notation;
for example:
@@ -8187,7 +8773,7 @@ which follow the decimal point.
discussed in the next @value{SUBSECTION}.)
@samp{%E} uses @samp{E} instead of @samp{e} in the output.
-@item %f
+@item @code{%f}
Print a number in floating-point notation.
For example:
@@ -8207,39 +8793,40 @@ infinity are formatted as
@samp{-inf} or @samp{-infinity},
and positive infinity as
@samp{inf} and @samp{infinity}.
-The special ``not a number'' value formats as @samp{-nan} or @samp{nan}.
+The special ``not a number'' value formats as @samp{-nan} or @samp{nan}
+(@pxref{Math Definitions}).
-@item %F
+@item @code{%F}
Like @samp{%f} but the infinity and ``not a number'' values are spelled
using uppercase letters.
The @samp{%F} format is a POSIX extension to ISO C; not all systems
support it. On those that don't, @command{gawk} uses @samp{%f} instead.
-@item %g@r{,} %G
+@item @code{%g}, @code{%G}
Print a number in either scientific notation or in floating-point
notation, whichever uses fewer characters; if the result is printed in
scientific notation, @samp{%G} uses @samp{E} instead of @samp{e}.
-@item %o
+@item @code{%o}
Print an unsigned octal integer
(@pxref{Nondecimal-numbers}).
-@item %s
+@item @code{%s}
Print a string.
-@item %u
+@item @code{%u}
Print an unsigned decimal integer.
(This format is of marginal use, because all numbers in @command{awk}
are floating-point; it is provided primarily for compatibility with C.)
-@item %x@r{,} %X
+@item @code{%x}, @code{%X}
Print an unsigned hexadecimal integer;
@samp{%X} uses the letters @samp{A} through @samp{F}
instead of @samp{a} through @samp{f}
(@pxref{Nondecimal-numbers}).
-@item %%
+@item @code{%%}
Print a single @samp{%}.
This does not consume an
argument and it ignores any modifiers.
@@ -8274,7 +8861,7 @@ which they may appear:
@table @code
@cindex differences in @command{awk} and @command{gawk}, @code{print}/@code{printf} statements
@cindex @code{printf} statement, positional specifiers
-@c the command does NOT start a secondary
+@c the code{} does NOT start a secondary
@cindex positional specifiers, @code{printf} statement
@item @var{N}$
An integer constant followed by a @samp{$} is a @dfn{positional specifier}.
@@ -8350,7 +8937,7 @@ For example:
$ @kbd{cat thousands.awk} @ii{Show source program}
@print{} BEGIN @{ printf "%'d\n", 1234567 @}
$ @kbd{LC_ALL=C gawk -f thousands.awk}
-@print{} 1234567 @ii{Results in "C" locale}
+@print{} 1234567 @ii{Results in} "C" @ii{locale}
$ @kbd{LC_ALL=en_US.UTF-8 gawk -f thousands.awk}
@print{} 1,234,567 @ii{Results in US English UTF locale}
@end example
@@ -8460,14 +9047,12 @@ This is not particularly easy to read but it does work.
@c @cindex lint checks
@cindex troubleshooting, fatal errors, @code{printf} format strings
@cindex POSIX @command{awk}, @code{printf} format strings and
-C programmers may be used to supplying additional
-@samp{l}, @samp{L}, and @samp{h}
-modifiers in @code{printf} format strings. These are not valid in @command{awk}.
-Most @command{awk} implementations silently ignore them.
-If @option{--lint} is provided on the command line
-(@pxref{Options}),
-@command{gawk} warns about their use. If @option{--posix} is supplied,
-their use is a fatal error.
+C programmers may be used to supplying additional modifiers (@samp{h},
+@samp{j}, @samp{l}, @samp{L}, @samp{t}, and @samp{z}) in @code{printf}
+format strings. These are not valid in @command{awk}. Most @command{awk}
+implementations silently ignore them. If @option{--lint} is provided
+on the command line (@pxref{Options}), @command{gawk} warns about their
+use. If @option{--posix} is supplied, their use is a fatal error.
@c ENDOFRANGE pfm
@node Printf Examples
@@ -8513,7 +9098,7 @@ they are last on their lines. They don't need to have spaces
after them.
The table could be made to look even nicer by adding headings to the
-tops of the columns. This is done using the @code{BEGIN} pattern
+tops of the columns. This is done using a @code{BEGIN} rule
(@pxref{BEGIN/END})
so that the headers are only printed once, at the beginning of
the @command{awk} program:
@@ -8549,12 +9134,6 @@ awk 'BEGIN @{ format = "%-10s %s\n"
@{ printf format, $1, $2 @}' mail-list
@end example
-@c !!! exercise
-At this point, it would be a worthwhile exercise to use the
-@code{printf} statement to line up the headings and table data for the
-@file{inventory-shipped} example that was covered earlier in the @value{SECTION}
-on the @code{print} statement
-(@pxref{Print}).
@c ENDOFRANGE printfs
@node Redirection
@@ -8585,7 +9164,7 @@ commands, except that they are written inside the @command{awk} program.
@cindex @code{printf} statement, See Also redirection@comma{} of output
There are four forms of output redirection: output to a file, output
appended to a file, output through a pipe to another command, and output
-to a coprocess. They are all shown for the @code{print} statement,
+to a coprocess. We show them all for the @code{print} statement,
but they work identically for @code{printf}:
@table @code
@@ -8594,9 +9173,9 @@ but they work identically for @code{printf}:
@cindex operators, input/output
@item print @var{items} > @var{output-file}
This redirection prints the items into the output file named
-@var{output-file}. The file name @var{output-file} can be any
+@var{output-file}. The @value{FN} @var{output-file} can be any
expression. Its value is changed to a string and then used as a
-file name (@pxref{Expressions}).
+@value{FN} (@pxref{Expressions}).
When this type of redirection is used, the @var{output-file} is erased
before the first output is written to it. Subsequent writes to the same
@@ -8690,7 +9269,7 @@ This example also illustrates the use of a variable to represent
a @var{file} or @var{command}---it is not necessary to always
use a string constant. Using a variable is generally a good idea,
because (if you mean to refer to that same file or command)
-@command{awk} requires that the string value be spelled identically
+@command{awk} requires that the string value be written identically
every time.
@cindex coprocesses
@@ -8749,7 +9328,9 @@ As mentioned earlier
many
@end ifnotinfo
@ifnottex
+@ifnotdocbook
Many
+@end ifnotdocbook
@end ifnottex
older
@command{awk} implementations limit the number of pipelines that an @command{awk}
@@ -8762,7 +9343,7 @@ open as many pipelines as the underlying operating system permits.
A particularly powerful way to use redirection is to build command lines
and pipe them into the shell, @command{sh}. For example, suppose you
-have a list of files brought over from a system where all the file names
+have a list of files brought over from a system where all the @value{FN}s
are stored in uppercase, and you wish to rename them to have names in
all lowercase. The following program is both simple and efficient:
@@ -8784,12 +9365,12 @@ It then sends the list to the shell for execution.
@c ENDOFRANGE reout
@node Special Files
-@section Special File Names in @command{gawk}
+@section Special @value{FFN}s in @command{gawk}
@c STARTOFRANGE gfn
@cindex @command{gawk}, file names in
-@command{gawk} provides a number of special file names that it interprets
-internally. These file names provide access to standard file descriptors
+@command{gawk} provides a number of special @value{FN}s that it interprets
+internally. These @value{FN}s provide access to standard file descriptors
and TCP/IP networking.
@menu
@@ -8831,7 +9412,8 @@ print "Serious error detected!" | "cat 1>&2"
@noindent
This works by opening a pipeline to a shell command that can access the
standard error stream that it inherits from the @command{awk} process.
-This is far from elegant, and it is also inefficient, because it requires a
+@c 8/2014: Mike Brennan says not to cite this as inefficient. So, fixed.
+This is far from elegant, and it also requires a
separate process. So people writing @command{awk} programs often
don't do this. Instead, they send the error messages to the
screen, like this:
@@ -8853,12 +9435,12 @@ that happens, writing to the screen is not correct. In fact, if
terminal at all.
Then opening @file{/dev/tty} fails.
-@command{gawk} provides special file names for accessing the three standard
-streams. @value{COMMONEXT}. It also provides syntax for accessing
-any other inherited open files. If the file name matches
+@command{gawk} provides special @value{FN}s for accessing the three standard
+streams. @value{COMMONEXT} It also provides syntax for accessing
+any other inherited open files. If the @value{FN} matches
one of these special names when @command{gawk} redirects input or output,
-then it directly uses the stream that the file name stands for.
-These special file names work for all operating systems that @command{gawk}
+then it directly uses the stream that the @value{FN} stands for.
+These special @value{FN}s work for all operating systems that @command{gawk}
has been ported to, not just those that are POSIX-compliant:
@cindex common extensions, @code{/dev/stdin} special file
@@ -8888,7 +9470,7 @@ the shell). Unless special pains are taken in the shell from which
@command{gawk} is invoked, only descriptors 0, 1, and 2 are available.
@end table
-The file names @file{/dev/stdin}, @file{/dev/stdout}, and @file{/dev/stderr}
+The @value{FN}s @file{/dev/stdin}, @file{/dev/stdout}, and @file{/dev/stderr}
are aliases for @file{/dev/fd/0}, @file{/dev/fd/1}, and @file{/dev/fd/2},
respectively. However, they are more self-explanatory.
The proper way to write an error message in a @command{gawk} program
@@ -8899,13 +9481,12 @@ print "Serious error detected!" > "/dev/stderr"
@end example
@cindex troubleshooting, quotes with file names
-Note the use of quotes around the file name.
+Note the use of quotes around the @value{FN}.
Like any other redirection, the value must be a string.
It is a common error to omit the quotes, which leads
to confusing results.
-@c Exercise: What does it do? :-)
-Finally, using the @code{close()} function on a file name of the
+Finally, using the @code{close()} function on a @value{FN} of the
form @code{"/dev/fd/@var{N}"}, for file descriptor numbers
above two, does actually close the given file descriptor.
@@ -8921,7 +9502,7 @@ versions of @command{awk}.
@command{gawk} programs
can open a two-way
TCP/IP connection, acting as either a client or a server.
-This is done using a special file name of the form:
+This is done using a special @value{FN} of the form:
@example
@file{/@var{net-type}/@var{protocol}/@var{local-port}/@var{remote-host}/@var{remote-port}}
@@ -8931,7 +9512,7 @@ The @var{net-type} is one of @samp{inet}, @samp{inet4} or @samp{inet6}.
The @var{protocol} is one of @samp{tcp} or @samp{udp},
and the other fields represent the other essential pieces of information
for making a networking connection.
-These file names are used with the @samp{|&} operator for communicating
+These @value{FN}s are used with the @samp{|&} operator for communicating
with a coprocess
(@pxref{Two-way I/O}).
This is an advanced feature, mentioned here only for completeness.
@@ -8939,21 +9520,21 @@ Full discussion is delayed until
@ref{TCP/IP Networking}.
@node Special Caveats
-@subsection Special File Name Caveats
+@subsection Special @value{FFN} Caveats
Here is a list of things to bear in mind when using the
-special file names that @command{gawk} provides:
+special @value{FN}s that @command{gawk} provides:
-@itemize @bullet
+@itemize @value{BULLET}
@cindex compatibility mode (@command{gawk}), file names
@cindex file names, in compatibility mode
@item
-Recognition of these special file names is disabled if @command{gawk} is in
+Recognition of these special @value{FN}s is disabled if @command{gawk} is in
compatibility mode (@pxref{Options}).
@item
@command{gawk} @emph{always}
-interprets these special file names.
+interprets these special @value{FN}s.
For example, using @samp{/dev/fd/4}
for output actually writes on file descriptor 4, and not on a new
file descriptor that is @code{dup()}'ed from file descriptor 4. Most of
@@ -8976,7 +9557,7 @@ Doing so results in unpredictable behavior.
@cindex coprocesses, closing
@cindex @code{getline} command, coprocesses@comma{} using from
-If the same file name or the same shell command is used with @code{getline}
+If the same @value{FN} or the same shell command is used with @code{getline}
more than once during the execution of an @command{awk} program
(@pxref{Getline}),
the file is opened (or the command is executed) the first time only.
@@ -8985,7 +9566,7 @@ The next time the same file or command is used with @code{getline},
another record is read from it, and so on.
Similarly, when a file or pipe is opened for output, @command{awk} remembers
-the file name or command associated with it, and subsequent
+the @value{FN} or command associated with it, and subsequent
writes to the same file or command are appended to the previous writes.
The file or pipe stays open until @command{awk} exits.
@@ -9027,7 +9608,7 @@ file or command, or the next @code{print} or @code{printf} to that
file or command, reopens the file or reruns the command.
Because the expression that you use to close a file or pipeline must
exactly match the expression used to open the file or run the command,
-it is good practice to use a variable to store the file name or command.
+it is good practice to use a variable to store the @value{FN} or command.
The previous example becomes the following:
@example
@@ -9041,7 +9622,7 @@ close(sortcom)
This helps avoid hard-to-find typographical errors in your @command{awk}
programs. Here are some of the reasons for closing an output file:
-@itemize @bullet
+@itemize @value{BULLET}
@item
To write a file and read it back later on in the same @command{awk}
program. Close the file after writing it, then
@@ -9077,7 +9658,7 @@ a separate message.
@cindex @code{close()} function, portability
If you use more files than the system allows you to have open,
@command{gawk} attempts to multiplex the available open files among
-your data files. @command{gawk}'s ability to do this depends upon the
+your @value{DF}s. @command{gawk}'s ability to do this depends upon the
facilities of your operating system, so it may not always work. It is
therefore both good practice and good portability advice to always
use @code{close()} on your files when you are done with them.
@@ -9110,15 +9691,16 @@ more importantly, the file descriptor for the pipe
is not closed and released until @code{close()} is called or
@command{awk} exits.
-@code{close()} will silently do nothing if given an argument that
+@code{close()} silently does nothing if given an argument that
does not represent a file, pipe or coprocess that was opened with
-a redirection.
+a redirection. In such a case, it returns a negative value,
+indicating an error. In addition, @command{gawk} sets @code{ERRNO}
+to a string indicating the error.
-Note also that @samp{close(FILENAME)} has no
-``magic'' effects on the implicit loop that reads through the
-files named on the command line. It is, more likely, a close
-of a file that was never opened, so @command{awk} silently
-does nothing.
+Note also that @samp{close(FILENAME)} has no ``magic'' effects on the
+implicit loop that reads through the files named on the command line.
+It is, more likely, a close of a file that was never opened with a
+redirection, so @command{awk} silently does nothing.
@cindex @code{|} (vertical bar), @code{|&} operator (I/O), pipes@comma{} closing
When using the @samp{|&} operator to communicate with a coprocess,
@@ -9142,7 +9724,7 @@ which discusses it in more detail and gives an example.
@cindex differences in @command{awk} and @command{gawk}, @code{close()} function
@cindex Unix @command{awk}, @code{close()} function and
-In many versions of Unix @command{awk}, the @code{close()} function
+In many older versions of Unix @command{awk}, the @code{close()} function
is actually a statement. It is a syntax error to try and use the return
value from @code{close()}:
@value{DARKCORNER}
@@ -9187,6 +9769,69 @@ when closing a pipe.
@c ENDOFRANGE ofc
@c ENDOFRANGE pc
@c ENDOFRANGE cc
+
+@node Output Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+The @code{print} statement prints comma-separated expressions. Each
+expression is separated by the value of @code{OFS} and terminated by
+the value of @code{ORS}. @code{OFMT} provides the conversion format
+for numeric values for the @code{print} statement.
+
+@item
+The @code{printf} statement provides finer-grained control over output,
+with format control letters for different data types and various flags
+that modify the behavior of the format control letters.
+
+@item
+Output from both @code{print} and @code{printf} may be redirected to
+files, pipes, and co-processes.
+
+@item
+@command{gawk} provides special file names for access to standard input,
+output and error, and for network communications.
+
+@item
+Use @code{close()} to close open file, pipe and co-process redirections.
+For co-processes, it is possible to close only one direction of the
+communications.
+
+@end itemize
+
+@c EXCLUDE START
+@node Output Exercises
+@section Exercises
+
+@enumerate
+@item
+Rewrite the program:
+
+@example
+awk 'BEGIN @{ print "Month Crates"
+ print "----- ------" @}
+ @{ print $1, " ", $2 @}' inventory-shipped
+@end example
+
+@noindent
+from @ref{Output Separators}, by using a new value of @code{OFS}.
+
+@item
+Use the @code{printf} statement to line up the headings and table data
+for the @file{inventory-shipped} example that was covered in @ref{Print}.
+
+@item
+What happens if you forget the double quotes when redirecting
+output, as follows:
+
+@example
+BEGIN @{ print "Serious error detected!" > /dev/stderr @}
+@end example
+
+@end enumerate
+@c EXCLUDE END
+
@c ENDOFRANGE prnt
@node Expressions
@@ -9213,6 +9858,7 @@ combinations of these with various operators.
* Function Calls:: A function call is an expression.
* Precedence:: How various operators nest.
* Locales:: How the locale affects things.
+* Expressions Summary:: Expressions summary.
@end menu
@node Values
@@ -9258,9 +9904,9 @@ have different forms, but are stored identically internally.
A @dfn{numeric constant} stands for a number. This number can be an
integer, a decimal fraction, or a number in scientific (exponential)
notation.@footnote{The internal representation of all numbers,
-including integers, uses double precision
-floating-point numbers.
-On most modern systems, these are in IEEE 754 standard format.}
+including integers, uses double precision floating-point numbers.
+On most modern systems, these are in IEEE 754 standard format.
+@xref{Arbitrary Precision Arithmetic}, for much more information.}
Here are some examples of numeric constants that all
have the same value:
@@ -9283,7 +9929,7 @@ double-quotation marks. For example:
@cindex strings, length limitations
represents the string whose contents are @samp{parrot}. Strings in
@command{gawk} can be of any length, and they can contain any of the possible
-eight-bit ASCII characters including ASCII @sc{nul} (character code zero).
+eight-bit ASCII characters including ASCII @value{NUL} (character code zero).
Other @command{awk}
implementations may have difficulty with some character codes.
@@ -9399,7 +10045,8 @@ A regexp constant is a regular expression description enclosed in
slashes, such as @code{@w{/^beginning and end$/}}. Most regexps used in
@command{awk} programs are constant, but the @samp{~} and @samp{!~}
matching operators can also match computed or dynamic regexps
-(which are just ordinary strings or variables that contain a regexp).
+(which are typically just ordinary strings or variables that contain a regexp,
+but could be a more complex expression).
@c ENDOFRANGE cnst
@node Using Constant Regexps
@@ -9473,7 +10120,7 @@ upon the contents of the current input record.
Constant regular expressions are also used as the first argument for
the @code{gensub()}, @code{sub()}, and @code{gsub()} functions, as the
second argument of the @code{match()} function,
-and as the third argument of the @code{patsplit()} function
+and as the third argument of the @code{split()} and @code{patsplit()} functions
(@pxref{String Functions}).
Modern implementations of @command{awk}, including @command{gawk}, allow
the third argument of @code{split()} to be a regexp constant, but some
@@ -9505,7 +10152,7 @@ function mysub(pat, repl, str, global)
@c @cindex automatic warnings
@c @cindex warnings, automatic
In this example, the programmer wants to pass a regexp constant to the
-user-defined function @code{mysub}, which in turn passes it on to
+user-defined function @code{mysub()}, which in turn passes it on to
either @code{sub()} or @code{gsub()}. However, what really happens is that
the @code{pat} parameter is either one or zero, depending upon whether
or not @code{$0} matches @code{/hi/}.
@@ -9526,7 +10173,7 @@ on the @command{awk} command line.
@menu
* Using Variables:: Using variables in your programs.
-* Assignment Options:: Setting variables on the command-line and a
+* Assignment Options:: Setting variables on the command line and a
summary of command-line syntax. This is an
advanced method of input.
@end menu
@@ -9537,7 +10184,11 @@ on the @command{awk} command line.
Variables let you give names to values and refer to them later. Variables
have already been used in many of the examples. The name of a variable
must be a sequence of letters, digits, or underscores, and it may not begin
-with a digit. Case is significant in variable names; @code{a} and @code{A}
+with a digit.
+Here, a @dfn{letter} is any one of the 52 upper- and lowercase
+English letters. Other characters that may be defined as letters
+in non-English locales are not valid in variable names.
+Case is significant in variable names; @code{a} and @code{A}
are distinct variables.
A variable name is a valid expression by itself; it represents the
@@ -9594,7 +10245,7 @@ as in the following:
@noindent
the variable is set at the very beginning, even before the
@code{BEGIN} rules execute. The @option{-v} option and its assignment
-must precede all the file name arguments, as well as the program text.
+must precede all the @value{FN} arguments, as well as the program text.
(@xref{Options}, for more information about
the @option{-v} option.)
Otherwise, the variable assignment is performed at a time determined by
@@ -9635,6 +10286,19 @@ sequences
@node Conversion
@subsection Conversion of Strings and Numbers
+Number to string and string to number conversion are generally
+straightforward. There can be subtleties to be aware of;
+this @value{SECTION} discusses this important facet of @command{awk}.
+
+@menu
+* Strings And Numbers:: How @command{awk} Converts Between Strings And
+ Numbers.
+* Locale influences conversions:: How the locale may affect conversions.
+@end menu
+
+@node Strings And Numbers
+@subsubsection How @command{awk} Converts Between Strings And Numbers
+
@cindex converting, strings to numbers
@cindex strings, converting
@cindex numbers, converting
@@ -9704,6 +10368,7 @@ b = a ""
@code{b} has the value @code{"12"}, not @code{"12.00"}.
@value{DARKCORNER}
+@sidebar Pre-POSIX @command{awk} Used @code{OFMT} For String Conversion
@cindex POSIX @command{awk}, @code{OFMT} variable and
@cindex @code{OFMT} variable
@cindex portability, new @command{awk} vs.@: old @command{awk}
@@ -9715,32 +10380,32 @@ specifies the output format to use when printing numbers with @code{print}.
conversion from the semantics of printing. Both @code{CONVFMT} and
@code{OFMT} have the same default value: @code{"%.6g"}. In the vast majority
of cases, old @command{awk} programs do not change their behavior.
-However, these semantics for @code{OFMT} are something to keep in mind if you must
-port your new-style program to older implementations of @command{awk}.
-We recommend
-that instead of changing your programs, just port @command{gawk} itself.
-@xref{Print},
-for more information on the @code{print} statement.
-
-And, once again, where you are can matter when it comes to converting
-between numbers and strings. In @ref{Locales}, we mentioned that
-the local character set and language (the locale) can affect how
-@command{gawk} matches characters. The locale also affects numeric
-formats. In particular, for @command{awk} programs, it affects the
-decimal point character. The @code{"C"} locale, and most English-language
-locales, use the period character (@samp{.}) as the decimal point.
-However, many (if not most) European and non-English locales use the comma
-(@samp{,}) as the decimal point character.
+@xref{Print}, for more information on the @code{print} statement.
+@end sidebar
+
+@node Locale influences conversions
+@subsubsection Locales Can Influence Conversion
+
+Where you are can matter when it comes to converting between numbers and
+strings. The local character set and language---the @dfn{locale}---can
+affect numeric formats. In particular, for @command{awk} programs,
+it affects the decimal point character and the thousands-separator
+character. The @code{"C"} locale, and most English-language locales,
+use the period character (@samp{.}) as the decimal point and don't
+have a thousands separator. However, many (if not most) European and
+non-English locales use the comma (@samp{,}) as the decimal point
+character. European locales often use either a space or a period as
+the thousands separator, if they have one.
@cindex dark corner, locale's decimal point character
The POSIX standard says that @command{awk} always uses the period as the decimal
-point when reading the @command{awk} program source code, and for command-line
-variable assignments (@pxref{Other Arguments}).
-However, when interpreting input data, for @code{print} and @code{printf} output,
-and for number to string conversion, the local decimal point character is used.
-@value{DARKCORNER}
-Here are some examples indicating the difference in behavior,
-on a GNU/Linux system:
+point when reading the @command{awk} program source code, and for
+command-line variable assignments (@pxref{Other Arguments}). However,
+when interpreting input data, for @code{print} and @code{printf} output,
+and for number to string conversion, the local decimal point character
+is used. @value{DARKCORNER} In all cases, numbers in source code and
+in input data cannot have a thousands separator. Here are some examples
+indicating the difference in behavior, on a GNU/Linux system:
@example
$ @kbd{export POSIXLY_CORRECT=1} @ii{Force POSIX behavior}
@@ -9755,7 +10420,7 @@ $ @kbd{echo 4,321 | LC_ALL=en_DK.utf-8 gawk '@{ print $1 + 1 @}'}
@end example
@noindent
-The @samp{en_DK.utf-8} locale is for English in Denmark, where the comma acts as
+The @code{en_DK.utf-8} locale is for English in Denmark, where the comma acts as
the decimal point separator. In the normal @code{"C"} locale, @command{gawk}
treats @samp{4,321} as @samp{4}, while in the Danish locale, it's treated
as the full number, 4.321.
@@ -9902,7 +10567,7 @@ b * int(a / b) + (a % b) == a
@end example
One possibly undesirable effect of this definition of remainder is that
-@code{@var{x} % @var{y}} is negative if @var{x} is negative. Thus:
+@samp{@var{x} % @var{y}} is negative if @var{x} is negative. Thus:
@example
-17 % 8 = -1
@@ -9910,7 +10575,7 @@ One possibly undesirable effect of this definition of remainder is that
In other @command{awk} implementations, the signedness of the remainder
may be machine-dependent.
-@c !!! what does posix say?
+@c FIXME !!! what does posix say?
@cindex portability, @code{**} operator and
@cindex @code{*} (asterisk), @code{**} operator
@@ -9970,7 +10635,7 @@ print "something meaningful" > file name
@cindex @command{mawk} utility
@noindent
This produces a syntax error with some versions of Unix
-@command{awk}.@footnote{It happens that Brian Kernighan's
+@command{awk}.@footnote{It happens that BWK
@command{awk}, @command{gawk} and @command{mawk} all ``get it right,''
but you should not rely on this.}
It is necessary to use the following:
@@ -9996,7 +10661,7 @@ BEGIN @{
@end example
@noindent
-It is not defined whether the assignment to @code{a} happens
+It is not defined whether the second assignment to @code{a} happens
before or after the value of @code{a} is retrieved for producing the
concatenated value. The result could be either @samp{don't panic},
or @samp{panic panic}.
@@ -10055,7 +10720,7 @@ Otherwise, it's parsed as follows:
@end display
As mentioned earlier,
-when doing concatenation, @emph{parenthesize}. Otherwise,
+when mixing concatenation with other operators, @emph{parenthesize}. Otherwise,
you're never quite sure what you'll get.
@node Assignment Ops
@@ -10118,8 +10783,8 @@ element. (Such values are called @dfn{rvalues}.)
@cindex variables, types of
It is important to note that variables do @emph{not} have permanent types.
-A variable's type is simply the type of whatever value it happens
-to hold at the moment. In the following program fragment, the variable
+A variable's type is simply the type of whatever value was last assigned
+to it. In the following program fragment, the variable
@code{foo} has a numeric value at first, and a string value later on:
@example
@@ -10220,6 +10885,7 @@ The indices of @code{bar} are practically guaranteed to be different, because
and see @ref{Numeric Functions}, for more information).
This example illustrates an important fact about assignment
operators: the lefthand expression is only evaluated @emph{once}.
+
It is up to the implementation as to which expression is evaluated
first, the lefthand or the righthand.
Consider this example:
@@ -10252,17 +10918,17 @@ to a number.
@caption{Arithmetic Assignment Operators}
@multitable @columnfractions .30 .70
@headitem Operator @tab Effect
-@item @var{lvalue} @code{+=} @var{increment} @tab Adds @var{increment} to the value of @var{lvalue}.
-@item @var{lvalue} @code{-=} @var{decrement} @tab Subtracts @var{decrement} from the value of @var{lvalue}.
-@item @var{lvalue} @code{*=} @var{coefficient} @tab Multiplies the value of @var{lvalue} by @var{coefficient}.
-@item @var{lvalue} @code{/=} @var{divisor} @tab Divides the value of @var{lvalue} by @var{divisor}.
-@item @var{lvalue} @code{%=} @var{modulus} @tab Sets @var{lvalue} to its remainder by @var{modulus}.
+@item @var{lvalue} @code{+=} @var{increment} @tab Add @var{increment} to the value of @var{lvalue}.
+@item @var{lvalue} @code{-=} @var{decrement} @tab Subtract @var{decrement} from the value of @var{lvalue}.
+@item @var{lvalue} @code{*=} @var{coefficient} @tab Multiply the value of @var{lvalue} by @var{coefficient}.
+@item @var{lvalue} @code{/=} @var{divisor} @tab Divide the value of @var{lvalue} by @var{divisor}.
+@item @var{lvalue} @code{%=} @var{modulus} @tab Set @var{lvalue} to its remainder by @var{modulus}.
@cindex common extensions, @code{**=} operator
@cindex extensions, common@comma{} @code{**=} operator
@cindex @command{awk} language, POSIX version
@cindex POSIX @command{awk}
@item @var{lvalue} @code{^=} @var{power} @tab
-@item @var{lvalue} @code{**=} @var{power} @tab Raises @var{lvalue} to the power @var{power}. @value{COMMONEXT}
+@item @var{lvalue} @code{**=} @var{power} @tab Raise @var{lvalue} to the power @var{power}. @value{COMMONEXT}
@end multitable
@end float
@@ -10307,10 +10973,8 @@ A workaround is:
awk '/[=]=/' /dev/null
@end example
-@command{gawk} does not have this problem,
-nor do the other
-freely available versions described in
-@ref{Other Versions}.
+@command{gawk} does not have this problem; BWK @command{awk}
+and @command{mawk} also do not (@pxref{Other Versions}).
@end sidebar
@c ENDOFRANGE exas
@c ENDOFRANGE opas
@@ -10334,11 +10998,10 @@ are convenient abbreviations for very common operations.
@cindex side effects, decrement/increment operators
The operator used for adding one is written @samp{++}. It can be used to increment
a variable either before or after taking its value.
-To pre-increment a variable @code{v}, write @samp{++v}. This adds
+To @dfn{pre-increment} a variable @code{v}, write @samp{++v}. This adds
one to the value of @code{v}---that new value is also the value of the
-expression. (The assignment expression @samp{v += 1} is completely
-equivalent.)
-Writing the @samp{++} after the variable specifies post-increment. This
+expression. (The assignment expression @samp{v += 1} is completely equivalent.)
+Writing the @samp{++} after the variable specifies @dfn{post-increment}. This
increments the variable value just the same; the difference is that the
value of the increment expression itself is the variable's @emph{old}
value. Thus, if @code{foo} has the value four, then the expression @samp{foo++}
@@ -10350,7 +11013,18 @@ The post-increment @samp{foo++} is nearly the same as writing @samp{(foo
+= 1) - 1}. It is not perfectly equivalent because all numbers in
@command{awk} are floating-point---in floating-point, @samp{foo + 1 - 1} does
not necessarily equal @code{foo}. But the difference is minute as
-long as you stick to numbers that are fairly small (less than 10e12).
+long as you stick to numbers that are fairly small (less than
+@iftex
+@math{10^12}).
+@end iftex
+@ifnottex
+@ifnotdocbook
+10e12).
+@end ifnotdocbook
+@end ifnottex
+@docbook
+10<superscript>12</superscript>). @c
+@end docbook
@cindex @code{$} (dollar sign), incrementing fields and arrays
@cindex dollar sign (@code{$}), incrementing fields and arrays
@@ -10532,20 +11206,16 @@ compares variables.
@cindex numeric, strings
@cindex strings, numeric
@cindex POSIX @command{awk}, numeric strings and
-The 1992 POSIX standard introduced
+The POSIX standard introduced
the concept of a @dfn{numeric string}, which is simply a string that looks
like a number---for example, @code{@w{" +2"}}. This concept is used
for determining the type of a variable.
The type of the variable is important because the types of two variables
determine how they are compared.
-The various versions of the POSIX standard did not get the rules
-quite right for several editions. Fortunately, as of at least the
-2008 standard (and possibly earlier), the standard has been fixed,
-and variable typing follows these rules:@footnote{@command{gawk} has
-followed these rules for many years,
-and it is gratifying that the POSIX standard is also now correct.}
+Variable typing follows these rules:
+
-@itemize @bullet
+@itemize @value{BULLET}
@item
A numeric constant or the result of a numeric operation has the @var{numeric}
attribute.
@@ -10631,6 +11301,7 @@ STRNUM &&string &numeric &numeric\cr
}}}
@end tex
@ifnottex
+@ifnotdocbook
@display
+----------------------------------------------
| STRING NUMERIC STRNUM
@@ -10643,7 +11314,51 @@ NUMERIC | string numeric numeric
STRNUM | string numeric numeric
--------+----------------------------------------------
@end display
+@end ifnotdocbook
@end ifnottex
+@docbook
+<informaltable>
+<tgroup cols="4">
+<colspec colname="1" align="left"/>
+<colspec colname="2" align="left"/>
+<colspec colname="3" align="left"/>
+<colspec colname="4" align="left"/>
+<thead>
+<row>
+<entry/>
+<entry>STRING</entry>
+<entry>NUMERIC</entry>
+<entry>STRNUM</entry>
+</row>
+</thead>
+
+<tbody>
+<row>
+<entry><emphasis role="bold">STRING</emphasis></entry>
+<entry>string</entry>
+<entry>string</entry>
+<entry>string</entry>
+</row>
+
+<row>
+<entry><emphasis role="bold">NUMERIC</emphasis></entry>
+<entry>string</entry>
+<entry>numeric</entry>
+<entry>numeric</entry>
+</row>
+
+<row>
+<entry><emphasis role="bold">STRNUM</emphasis></entry>
+<entry>string</entry>
+<entry>numeric</entry>
+<entry>numeric</entry>
+</row>
+
+</tbody>
+</tgroup>
+</informaltable>
+
+@end docbook
The basic idea is that user input that looks numeric---and @emph{only}
user input---should be treated as numeric, even though it is actually
@@ -10651,7 +11366,7 @@ made of characters and is therefore also a string.
Thus, for example, the string constant @w{@code{" +3.14"}},
when it appears in program source code,
is a string---even though it looks numeric---and
-is @emph{never} treated as number for comparison
+is @emph{never} treated as a number for comparison
purposes.
In short, when one operand is a ``pure'' string, such as a string
@@ -10662,8 +11377,8 @@ This point bears additional emphasis: All user input is made of characters,
and so is first and foremost of @var{string} type; input strings
that look numeric are additionally given the @var{strnum} attribute.
Thus, the six-character input string @w{@samp{ +3.14}} receives the
-@var{strnum} attribute. In contrast, the eight-character literal
-@w{@code{" +3.14"}} appearing in program text is a string constant.
+@var{strnum} attribute. In contrast, the eight characters
+@w{@code{" +3.14"}} appearing in program text comprise a string constant.
The following examples print @samp{1} when the comparison between
the two different constants is true, @samp{0} otherwise:
@@ -10824,7 +11539,7 @@ has the value one if @code{x} contains @samp{foo}, such as
@cindex @code{!} (exclamation point), @code{!~} operator
@cindex exclamation point (@code{!}), @code{!~} operator
The righthand operand of the @samp{~} and @samp{!~} operators may be
-either a regexp constant (@code{/@dots{}/}) or an ordinary
+either a regexp constant (@code{/}@dots{}@code{/}) or an ordinary
expression. In the latter case, the value of the expression as a string is used as a
dynamic regexp (@pxref{Regexp Usage}; also
@pxref{Computed Regexps}).
@@ -10849,7 +11564,9 @@ where this is discussed in more detail.
@subsubsection String Comparison With POSIX Rules
The POSIX standard says that string comparison is performed based
-on the locale's collating order. This is usually very different
+on the locale's @dfn{collating order}. This is the order in which
+characters sort, as defined by the locale (for more discussion,
+@pxref{Ranges and Locales}). This order is usually very different
from the results obtained when doing straight character-by-character
comparison.@footnote{Technically, string comparison is supposed
to behave the same way as if the strings are compared with the C
@@ -10857,7 +11574,7 @@ to behave the same way as if the strings are compared with the C
Because this behavior differs considerably from existing practice,
@command{gawk} only implements it when in POSIX mode (@pxref{Options}).
-Here is an example to illustrate the difference, in an @samp{en_US.UTF-8}
+Here is an example to illustrate the difference, in an @code{en_US.UTF-8}
locale:
@example
@@ -10966,7 +11683,7 @@ is ``short-circuited'' if the result can be determined part way through
its evaluation.
@cindex line continuations
-Statements that use @samp{&&} or @samp{||} can be continued simply
+Statements that end with @samp{&&} or @samp{||} can be continued simply
by putting a newline after them. But you cannot put a newline in front
of either of these operators without using backslash continuation
(@pxref{Statements/Lines}).
@@ -10985,7 +11702,7 @@ program is one way to print lines in between special bracketing lines:
@example
$1 == "START" @{ interested = ! interested; next @}
-interested == 1 @{ print @}
+interested @{ print @}
$1 == "END" @{ interested = ! interested; next @}
@end example
@@ -11005,6 +11722,16 @@ bogus input data, but the point is to illustrate the use of `!',
so we'll leave well enough alone.
@end ignore
+Most commonly, the @samp{!} operator is used in the conditions of
+@code{if} and @code{while} statements, where it often makes more
+sense to phrase the logic in the negative:
+
+@example
+if (! @var{some condition} || @var{some other condition}) @{
+ @var{@dots{} do whatever processing @dots{}}
+@}
+@end example
+
@cindex @code{next} statement
@quotation NOTE
The @code{next} statement is discussed in
@@ -11073,7 +11800,7 @@ However, putting a newline in front
of either character does not work without using backslash continuation
(@pxref{Statements/Lines}).
If @option{--posix} is specified
-(@pxref{Options}), then this extension is disabled.
+(@pxref{Options}), this extension is disabled.
@node Function Calls
@section Function Calls
@@ -11092,6 +11819,8 @@ functions and their descriptions. In addition, you can define
functions for use in your program.
@xref{User-defined},
for instructions on how to do this.
+Finally, @command{gawk} lets you write functions in C or C++
+that may be called from your program: see @ref{Dynamic Extensions}.
@cindex arguments, in function calls
The way to use a function is with a @dfn{function call} expression,
@@ -11133,7 +11862,9 @@ If those arguments are not supplied, the functions
use a reasonable default value.
@xref{Built-in}, for full details. If arguments
are omitted in calls to user-defined functions, then those arguments are
-treated as local variables and initialized to the empty string
+treated as local variables. Such local variables act like the
+empty string if referenced where a string value is required,
+and like zero if referenced where a numeric value is required
(@pxref{User-defined}).
As an advanced feature, @command{gawk} provides indirect function calls,
@@ -11142,12 +11873,12 @@ when you write the source code to your program. We defer discussion of
this feature until later; see @ref{Indirect Calls}.
@cindex side effects, function calls
-Like every other expression, the function call has a value, which is
-computed by the function based on the arguments you give it. In this
-example, the value of @samp{sqrt(@var{argument})} is the square root of
-@var{argument}.
-The following program reads numbers, one number per line, and prints the
-square root of each one:
+Like every other expression, the function call has a value, often
+called the @dfn{return value}, which is computed by the function
+based on the arguments you give it. In this example, the return value
+of @samp{sqrt(@var{argument})} is the square root of @var{argument}.
+The following program reads numbers, one number per line, and prints
+the square root of each one:
@example
$ @kbd{awk '@{ print "The square root of", $1, "is", sqrt($1) @}'}
@@ -11235,28 +11966,28 @@ expression because the first @samp{$} has higher precedence than the
This table presents @command{awk}'s operators, in order of highest
to lowest precedence:
-@c use @code in the items, looks better in TeX w/o all the quotes
-@table @code
-@item (@dots{})
+@c @asis for docbook to come out right
+@table @asis
+@item @code{(}@dots{}@code{)}
Grouping.
@cindex @code{$} (dollar sign), @code{$} field operator
@cindex dollar sign (@code{$}), @code{$} field operator
-@item $
+@item @code{$}
Field reference.
@cindex @code{+} (plus sign), @code{++} operator
@cindex plus sign (@code{+}), @code{++} operator
@cindex @code{-} (hyphen), @code{--} operator
@cindex hyphen (@code{-}), @code{--} operator
-@item ++ --
+@item @code{++ --}
Increment, decrement.
@cindex @code{^} (caret), @code{^} operator
@cindex caret (@code{^}), @code{^} operator
@cindex @code{*} (asterisk), @code{**} operator
@cindex asterisk (@code{*}), @code{**} operator
-@item ^ **
+@item @code{^ **}
Exponentiation. These operators group right-to-left.
@cindex @code{+} (plus sign), @code{+} operator
@@ -11265,7 +11996,7 @@ Exponentiation. These operators group right-to-left.
@cindex hyphen (@code{-}), @code{-} operator
@cindex @code{!} (exclamation point), @code{!} operator
@cindex exclamation point (@code{!}), @code{!} operator
-@item + - !
+@item @code{+ - !}
Unary plus, minus, logical ``not.''
@cindex @code{*} (asterisk), @code{*} operator, as multiplication operator
@@ -11274,17 +12005,17 @@ Unary plus, minus, logical ``not.''
@cindex forward slash (@code{/}), @code{/} operator
@cindex @code{%} (percent sign), @code{%} operator
@cindex percent sign (@code{%}), @code{%} operator
-@item * / %
+@item @code{* / %}
Multiplication, division, remainder.
@cindex @code{+} (plus sign), @code{+} operator
@cindex plus sign (@code{+}), @code{+} operator
@cindex @code{-} (hyphen), @code{-} operator
@cindex hyphen (@code{-}), @code{-} operator
-@item + -
+@item @code{+ -}
Addition, subtraction.
-@item @r{String Concatenation}
+@item String Concatenation
There is no special symbol for concatenation.
The operands are simply written side by side
(@pxref{Concatenation}).
@@ -11310,7 +12041,7 @@ The operands are simply written side by side
@cindex @code{|} (vertical bar), @code{|&} operator (I/O)
@cindex vertical bar (@code{|}), @code{|&} operator (I/O)
@cindex operators, input/output
-@item < <= == != > >= >> | |&
+@item @code{< <= == != > >= >> | |&}
Relational and redirection.
The relational operators and the redirections have the same precedence
level. Characters such as @samp{>} serve both as relationals and as
@@ -11331,26 +12062,26 @@ The correct way to write this statement is @samp{print foo > (a ? b : c)}.
@cindex tilde (@code{~}), @code{~} operator
@cindex @code{!} (exclamation point), @code{!~} operator
@cindex exclamation point (@code{!}), @code{!~} operator
-@item ~ !~
+@item @code{~ !~}
Matching, nonmatching.
@cindex @code{in} operator
-@item in
+@item @code{in}
Array membership.
@cindex @code{&} (ampersand), @code{&&} operator
@cindex ampersand (@code{&}), @code{&&} operator
-@item &&
+@item @code{&&}
Logical ``and''.
@cindex @code{|} (vertical bar), @code{||} operator
@cindex vertical bar (@code{|}), @code{||} operator
-@item ||
+@item @code{||}
Logical ``or''.
@cindex @code{?} (question mark), @code{?:} operator
@cindex question mark (@code{?}), @code{?:} operator
-@item ?:
+@item @code{?:}
Conditional. This operator groups right-to-left.
@cindex @code{+} (plus sign), @code{+=} operator
@@ -11367,7 +12098,7 @@ Conditional. This operator groups right-to-left.
@cindex percent sign (@code{%}), @code{%=} operator
@cindex @code{^} (caret), @code{^=} operator
@cindex caret (@code{^}), @code{^=} operator
-@item = += -= *= /= %= ^= **=
+@item @code{= += -= *= /= %= ^= **=}
Assignment. These operators group right-to-left.
@end table
@@ -11384,27 +12115,102 @@ For maximum portability, do not use them.
@section Where You Are Makes A Difference
@cindex locale, definition of
-Modern systems support the notion of @dfn{locales}: a way to tell
-the system about the local character set and language.
+Modern systems support the notion of @dfn{locales}: a way to tell the
+system about the local character set and language. The ISO C standard
+defines a default @code{"C"} locale, which is an environment that is
+typical of what many C programmers are used to.
Once upon a time, the locale setting used to affect regexp matching
(@pxref{Ranges and Locales}), but this is no longer true.
-Locales can affect record splitting.
-For the normal case of @samp{RS = "\n"}, the locale is largely irrelevant.
-For other single-character record separators, setting @samp{LC_ALL=C}
-in the environment
-will give you much better performance when reading records. Otherwise,
+Locales can affect record splitting. For the normal case of @samp{RS =
+"\n"}, the locale is largely irrelevant. For other single-character
+record separators, setting @samp{LC_ALL=C} in the environment will
+give you much better performance when reading records. Otherwise,
@command{gawk} has to make several function calls, @emph{per input
character}, to find the record terminator.
-According to POSIX, string comparison is also affected by locales
-(similar to regular expressions). The details are presented in
-@ref{POSIX String Comparison}.
+Locales can affect how dates and times are formatted (@pxref{Time
+Functions}). For example, a common way to abbreviate the date September
+4, 2015 in the United States is ``9/4/15.'' In many countries in
+Europe, however, it is abbreviated ``4.9.15.'' Thus, the @samp{%x}
+specification in a @code{"US"} locale might produce @samp{9/4/15},
+while in a @code{"EUROPE"} locale, it might produce @samp{4.9.15}.
+
+According to POSIX, string comparison is also affected by locales (similar
+to regular expressions). The details are presented in @ref{POSIX String
+Comparison}.
Finally, the locale affects the value of the decimal point character
-used when @command{gawk} parses input data. This is discussed in
-detail in @ref{Conversion}.
+used when @command{gawk} parses input data. This is discussed in detail
+in @ref{Conversion}.
+
+@node Expressions Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+Expressions are the basic elements of computation in programs. They are
+built from constants, variables, function calls and combinations of the
+various kinds of values with operators.
+
+@item
+@command{awk} supplies three kinds of constants: numeric, string, and
+regexp. @command{gawk} lets you specify numeric constants in octal
+and hexadecimal (bases 8 and 16) in addition to decimal (base 10).
+In certain contexts, a standalone regexp constant such as @code{/foo/}
+has the same meaning as @samp{$0 ~ /foo/}.
+
+@item
+Variables hold values between uses in computations. A number of built-in
+variables provide information to your @command{awk} program, and a number
+of others let you control how @command{awk} behaves.
+
+@item
+Numbers are automatically converted to strings, and strings to numbers,
+as needed by @command{awk}. Numeric values are converted as if they were
+formatted with @code{sprintf()} using the format in @code{CONVFMT}.
+Locales can influence the conversions.
+
+@item
+@command{awk} provides the usual arithmetic operators (addition,
+subtraction, multiplication, division, modulus), and unary plus and minus.
+It also provides comparison operators, boolean operators, and regexp
+matching operators. String concatenation is accomplished by placing
+two expressions next to each other; there is no explicit operator.
+The three-operand @samp{?:} operator provides an ``if-else'' test within
+expressions.
+
+@item
+Assignment operators provide convenient shorthands for common arithmetic
+operations.
+
+@item
+In @command{awk}, a value is considered to be true if it is non-zero
+@emph{or} non-null. Otherwise, the value is false.
+
+@item
+A value's type is set upon each assignment and may change over its
+lifetime. The type determines how it behaves in comparisons (string
+or numeric).
+
+@item
+Function calls return a value which may be used as part of a larger
+expression. Expressions used to pass parameter values are fully
+evaluated before the function is called. @command{awk} provides
+built-in and user-defined functions; this is described later on in this
+@value{DOCUMENT}.
+
+@item
+Operator precedence specifies the order in which operations are performed,
+unless explicitly overridden by parentheses. @command{awk}'s operator
+precedence is compatible with that of C.
+
+@item
+Locales can affect the format of data as output by an @command{awk}
+program, and occasionally the format for data read as input.
+
+@end itemize
@c ENDOFRANGE exps
@@ -11432,6 +12238,7 @@ building something useful.
* Statements:: Describes the various control statements in
detail.
* Built-in Variables:: Summarizes the built-in variables.
+* Pattern Action Summary:: Patterns and Actions summary.
@end menu
@node Pattern Overview
@@ -11462,10 +12269,10 @@ A single expression. It matches when its value
is nonzero (if a number) or non-null (if a string).
(@xref{Expression Patterns}.)
-@item @var{pat1}, @var{pat2}
+@item @var{begpat}, @var{endpat}
A pair of patterns separated by a comma, specifying a range of records.
-The range includes both the initial record that matches @var{pat1} and
-the final record that matches @var{pat2}.
+The range includes both the initial record that matches @var{begpat} and
+the final record that matches @var{endpat}.
(@xref{Ranges}.)
@item BEGIN
@@ -11477,7 +12284,7 @@ Special patterns for you to supply startup or cleanup actions for your
@item BEGINFILE
@itemx ENDFILE
Special patterns for you to supply startup or cleanup actions to be
-done on a per file basis.
+done on a per-file basis.
(@xref{BEGINFILE/ENDFILE}.)
@item @var{empty}
@@ -11545,7 +12352,7 @@ Contrast this with the following regular expression match, which
accepts any record with a first field that contains @samp{li}:
@example
-$ @kbd{awk '$1 ~ /foo/ @{ print $2 @}' mail-list}
+$ @kbd{awk '$1 ~ /li/ @{ print $2 @}' mail-list}
@print{} 555-5553
@print{} 555-6699
@end example
@@ -11638,7 +12445,7 @@ input record. When a record matches @var{begpat}, the range pattern is
@dfn{turned on} and the range pattern matches this record as well. As long as
the range pattern stays turned on, it automatically matches every input
record read. The range pattern also matches @var{endpat} against every
-input record; when this succeeds, the range pattern is turned off again
+input record; when this succeeds, the range pattern is @dfn{turned off} again
for the following record. Then the range pattern goes back to checking
@var{begpat} against each record.
@@ -11792,7 +12599,7 @@ rule checks the @code{FNR} and @code{NR} variables.
@subsubsection Input/Output from @code{BEGIN} and @code{END} Rules
@cindex input/output, from @code{BEGIN} and @code{END}
-There are several (sometimes subtle) points to remember when doing I/O
+There are several (sometimes subtle) points to be aware of when doing I/O
from a @code{BEGIN} or @code{END} rule.
The first has to do with the value of @code{$0} in a @code{BEGIN}
rule. Because @code{BEGIN} rules are executed before any input is read,
@@ -11817,7 +12624,7 @@ rule. It contains the number of fields from the last input record.
Most probably due to an oversight, the standard does not say that @code{$0}
is also preserved, although logically one would think that it should be.
In fact, @command{gawk} does preserve the value of @code{$0} for use in
-@code{END} rules. Be aware, however, that Brian Kernighan's @command{awk}, and possibly
+@code{END} rules. Be aware, however, that BWK @command{awk}, and possibly
other implementations, do not.
The third point follows from the first two. The meaning of @samp{print}
@@ -11853,8 +12660,19 @@ This @value{SECTION} describes a @command{gawk}-specific feature.
Two special kinds of rule, @code{BEGINFILE} and @code{ENDFILE}, give
you ``hooks'' into @command{gawk}'s command-line file processing loop.
-As with the @code{BEGIN} and @code{END} rules (@pxref{BEGIN/END}), all
-@code{BEGINFILE} rules in a program are merged, in the order they are
+As with the @code{BEGIN} and @code{END} rules
+@ifnottex
+@ifnotdocbook
+(@pxref{BEGIN/END}),
+@end ifnotdocbook
+@end ifnottex
+@iftex
+(see the previous section),
+@end iftex
+@ifdocbook
+(see the previous section),
+@end ifdocbook
+all @code{BEGINFILE} rules in a program are merged, in the order they are
read by @command{gawk}, and all @code{ENDFILE} rules are merged as well.
The body of the @code{BEGINFILE} rules is executed just before
@@ -11864,7 +12682,7 @@ is set to the name of the current file, and @code{FNR} is set to zero.
The @code{BEGINFILE} rule provides you the opportunity to accomplish two tasks
that would otherwise be difficult or impossible to perform:
-@itemize @bullet
+@itemize @value{BULLET}
@item
You can test if the file is readable. Normally, it is a fatal error if a
file named on the command line cannot be opened for reading. However,
@@ -11882,10 +12700,11 @@ the file entirely. Otherwise, @command{gawk} exits with the usual
fatal error.
@item
-If you have written extensions that modify the record handling (by inserting
-an ``input parser''), you can invoke them at this point, before @command{gawk}
-has started processing the file. (This is a @emph{very} advanced feature,
-currently used only by the @uref{http://gawkextlib.sourceforge.net, @code{gawkextlib} project}.)
+If you have written extensions that modify the record handling (by
+inserting an ``input parser,'' @pxref{Input Parsers}), you can invoke
+them at this point, before @command{gawk} has started processing the file.
+(This is a @emph{very} advanced feature, currently used only by the
+@uref{http://gawkextlib.sourceforge.net, @code{gawkextlib} project}.)
@end itemize
The @code{ENDFILE} rule is called when @command{gawk} has finished processing
@@ -11907,8 +12726,8 @@ statement (@pxref{Nextfile Statement}) is allowed only inside a
@cindex @code{getline} statement, @code{BEGINFILE}/@code{ENDFILE} patterns and
The @code{getline} statement (@pxref{Getline}) is restricted inside
-both @code{BEGINFILE} and @code{ENDFILE}. Only the @samp{getline
-@var{variable} < @var{file}} form is allowed.
+both @code{BEGINFILE} and @code{ENDFILE}: only redirected
+forms of @code{getline} are allowed.
@code{BEGINFILE} and @code{ENDFILE} are @command{gawk} extensions.
In most other @command{awk} implementations, or if @command{gawk} is in
@@ -11968,7 +12787,7 @@ into the body of the @command{awk} program.
@cindex shells, quoting
The most common method is to use shell quoting to substitute
the variable's value into the program inside the script.
-For example, in the following program:
+For example, consider the following program:
@example
printf "Enter search pattern: "
@@ -11978,7 +12797,7 @@ awk "/$pattern/ "'@{ nmatches++ @}
@end example
@noindent
-the @command{awk} program consists of two pieces of quoted text
+The @command{awk} program consists of two pieces of quoted text
that are concatenated together to form the program.
The first part is double-quoted, which allows substitution of
the @code{pattern} shell variable inside the quotes.
@@ -11992,8 +12811,8 @@ match up the quotes when reading the program.
A better method is to use @command{awk}'s variable assignment feature
(@pxref{Assignment Options})
-to assign the shell variable's value to an @command{awk} variable's
-value. Then use dynamic regexps to match the pattern
+to assign the shell variable's value to an @command{awk} variable.
+Then use dynamic regexps to match the pattern
(@pxref{Computed Regexps}).
The following shows how to redo the
previous example using this technique:
@@ -12031,13 +12850,13 @@ both) may be omitted. The purpose of the @dfn{action} is to tell
@command{awk} what to do once a match for the pattern is found. Thus,
in outline, an @command{awk} program generally looks like this:
-@example
-@r{[}@var{pattern}@r{]} @{ @var{action} @}
- @var{pattern} @r{[}@{ @var{action} @}@r{]}
+@display
+[@var{pattern}] @code{@{ @var{action} @}}
+ @var{pattern} [@code{@{ @var{action} @}}]
@dots{}
-function @var{name}(@var{args}) @{ @dots{} @}
+@code{function @var{name}(@var{args}) @{ @dots{} @}}
@dots{}
-@end example
+@end display
@cindex @code{@{@}} (braces), actions and
@cindex braces (@code{@{@}}), actions and
@@ -12046,11 +12865,11 @@ function @var{name}(@var{args}) @{ @dots{} @}
@cindex @code{;} (semicolon), separating statements in actions
@cindex semicolon (@code{;}), separating statements in actions
An action consists of one or more @command{awk} @dfn{statements}, enclosed
-in curly braces (@samp{@{@dots{}@}}). Each statement specifies one
+in braces (@samp{@{@r{@dots{}}@}}). Each statement specifies one
thing to do. The statements are separated by newlines or semicolons.
-The curly braces around an action must be used even if the action
+The braces around an action must be used even if the action
contains only one statement, or if it contains no statements at
-all. However, if you omit the action entirely, omit the curly braces as
+all. However, if you omit the action entirely, omit the braces as
well. An omitted action is equivalent to @samp{@{ print $0 @}}:
@example
@@ -12076,10 +12895,9 @@ programs. The @command{awk} language gives you C-like constructs
special ones (@pxref{Statements}).
@item Compound statements
-Consist of one or more statements enclosed in
-curly braces. A compound statement is used in order to put several
-statements together in the body of an @code{if}, @code{while}, @code{do},
-or @code{for} statement.
+Enclose one or more statements in braces. A compound statement
+is used in order to put several statements together in the body of an
+@code{if}, @code{while}, @code{do}, or @code{for} statement.
@item Input statements
Use the @code{getline} command
@@ -12125,7 +12943,7 @@ Many control statements contain other statements. For example, the
@code{if} statement contains another statement that may or may not be
executed. The contained statement is called the @dfn{body}.
To include more than one statement in the body, group them into a
-single @dfn{compound statement} with curly braces, separating them with
+single @dfn{compound statement} with braces, separating them with
newlines or semicolons.
@menu
@@ -12153,9 +12971,9 @@ newlines or semicolons.
The @code{if}-@code{else} statement is @command{awk}'s decision-making
statement. It looks like this:
-@example
-if (@var{condition}) @var{then-body} @r{[}else @var{else-body}@r{]}
-@end example
+@display
+@code{if (@var{condition}) @var{then-body}} [@code{else @var{else-body}}]
+@end display
@noindent
The @var{condition} is an expression that controls what the rest of the
@@ -12179,7 +12997,7 @@ if the value of @code{x} is evenly divisible by two), then the first
statement is executed.
If the @code{else} keyword appears on the same line as @var{then-body} and
@var{then-body} is not a compound statement (i.e., not surrounded by
-curly braces), then a semicolon must separate @var{then-body} from
+braces), then a semicolon must separate @var{then-body} from
the @code{else}.
To illustrate this, the previous example can be rewritten as:
@@ -12413,6 +13231,8 @@ for more information on this version of the @code{for} loop.
@cindex @code{default} keyword
This @value{SECTION} describes a @command{gawk}-specific feature.
+If @command{gawk} is in compatibility mode (@pxref{Options}),
+it is not available.
The @code{switch} statement allows the evaluation of an expression and
the execution of statements based on a @code{case} match. Case statements
@@ -12443,36 +13263,38 @@ case is made, the case statement bodies execute until a @code{break},
or the end of the @code{switch} statement itself. For example:
@example
-switch (NR * 2 + 1) @{
-case 3:
-case "11":
- print NR - 1
- break
-
-case /2[[:digit:]]+/:
- print NR
-
-default:
- print NR + 1
-
-case -1:
- print NR * -1
+while ((c = getopt(ARGC, ARGV, "aksx")) != -1) @{
+ switch (c) @{
+ case "a":
+ # report size of all files
+ all_files = TRUE;
+ break
+ case "k":
+ BLOCK_SIZE = 1024 # 1K block size
+ break
+ case "s":
+ # do sums only
+ sum_only = TRUE
+ break
+ case "x":
+ # don't cross filesystems
+ fts_flags = or(fts_flags, FTS_XDEV)
+ break
+ case "?":
+ default:
+ usage()
+ break
+ @}
@}
@end example
Note that if none of the statements specified above halt execution
of a matched @code{case} statement, execution falls through to the
-next @code{case} until execution halts. In the above example, for
-any case value starting with @samp{2} followed by one or more digits,
-the @code{print} statement is executed and then falls through into the
-@code{default} section, executing its @code{print} statement. In turn,
-the @minus{}1 case will also be executed since the @code{default} does
-not halt execution.
-
-This @code{switch} statement is a @command{gawk} extension.
-If @command{gawk} is in compatibility mode
-(@pxref{Options}),
-it is not available.
+next @code{case} until execution halts. In the above example, the
+@code{case} for @code{"?"} falls through to the @code{default}
+case, which is to call a function named @code{usage()}.
+(The @code{getopt()} function being called here is
+described in @ref{Getopt Function}.)
@node Break Statement
@subsection The @code{break} Statement
@@ -12488,15 +13310,15 @@ numbers:
@example
# find smallest divisor of num
@{
- num = $1
- for (div = 2; div * div <= num; div++) @{
- if (num % div == 0)
- break
- @}
- if (num % div == 0)
- printf "Smallest divisor of %d is %d\n", num, div
- else
- printf "%d is prime\n", num
+ num = $1
+ for (div = 2; div * div <= num; div++) @{
+ if (num % div == 0)
+ break
+ @}
+ if (num % div == 0)
+ printf "Smallest divisor of %d is %d\n", num, div
+ else
+ printf "%d is prime\n", num
@}
@end example
@@ -12514,17 +13336,17 @@ an @code{if}:
@example
# find smallest divisor of num
@{
- num = $1
- for (div = 2; ; div++) @{
- if (num % div == 0) @{
- printf "Smallest divisor of %d is %d\n", num, div
- break
- @}
- if (div * div > num) @{
- printf "%d is prime\n", num
- break
+ num = $1
+ for (div = 2; ; div++) @{
+ if (num % div == 0) @{
+ printf "Smallest divisor of %d is %d\n", num, div
+ break
+ @}
+ if (div * div > num) @{
+ printf "%d is prime\n", num
+ break
+ @}
@}
- @}
@}
@end example
@@ -12546,7 +13368,7 @@ historical implementations of @command{awk} treated the @code{break}
statement outside of a loop as if it were a @code{next} statement
(@pxref{Next Statement}).
@value{DARKCORNER}
-Recent versions of Brian Kernighan's @command{awk} no longer allow this usage,
+Recent versions of BWK @command{awk} no longer allow this usage,
nor does @command{gawk}.
@node Continue Statement
@@ -12595,7 +13417,8 @@ BEGIN @{
@end example
@noindent
-This program loops forever once @code{x} reaches 5.
+This program loops forever once @code{x} reaches 5, since
+the increment (@samp{x++}) is never reached.
@c @cindex @code{continue}, outside of loops
@c @cindex historical features
@@ -12612,7 +13435,7 @@ statement outside a loop: as if it were a @code{next}
statement
(@pxref{Next Statement}).
@value{DARKCORNER}
-Recent versions of Brian Kernighan's @command{awk} no longer work this way, nor
+Recent versions of BWK @command{awk} no longer work this way, nor
does @command{gawk}.
@node Next Statement
@@ -12645,9 +13468,8 @@ the beginning, in the following manner:
@example
NF != 4 @{
- err = sprintf("%s:%d: skipped: NF != 4\n", FILENAME, FNR)
- print err > "/dev/stderr"
- next
+ printf("%s:%d: skipped: NF != 4\n", FILENAME, FNR) > "/dev/stderr"
+ next
@}
@end example
@@ -12673,16 +13495,14 @@ The @code{next} statement is not allowed inside @code{BEGINFILE} and
@cindex POSIX @command{awk}, @code{next}/@code{nextfile} statements and
@cindex @code{next} statement, user-defined functions and
@cindex functions, user-defined, @code{next}/@code{nextfile} statements and
-According to the POSIX standard, the behavior is undefined if
-the @code{next} statement is used in a @code{BEGIN} or @code{END} rule.
-@command{gawk} treats it as a syntax error.
-Although POSIX permits it,
-some other @command{awk} implementations don't allow the @code{next}
-statement inside function bodies
-(@pxref{User-defined}).
-Just as with any other @code{next} statement, a @code{next} statement inside a
-function body reads the next record and starts processing it with the
-first rule in the program.
+According to the POSIX standard, the behavior is undefined if the
+@code{next} statement is used in a @code{BEGIN} or @code{END} rule.
+@command{gawk} treats it as a syntax error. Although POSIX permits it,
+most other @command{awk} implementations don't allow the @code{next}
+statement inside function bodies (@pxref{User-defined}). Just as with any
+other @code{next} statement, a @code{next} statement inside a function
+body reads the next record and starts processing it with the first rule
+in the program.
@node Nextfile Statement
@subsection The @code{nextfile} Statement
@@ -12692,34 +13512,32 @@ The @code{nextfile} statement
is similar to the @code{next} statement.
However, instead of abandoning processing of the current record, the
@code{nextfile} statement instructs @command{awk} to stop processing the
-current data file.
+current @value{DF}.
Upon execution of the @code{nextfile} statement,
@code{FILENAME} is
-updated to the name of the next data file listed on the command line,
+updated to the name of the next @value{DF} listed on the command line,
@code{FNR} is reset to one,
and processing
starts over with the first rule in the program.
If the @code{nextfile} statement causes the end of the input to be reached,
then the code in any @code{END} rules is executed. An exception to this is
when @code{nextfile} is invoked during execution of any statement in an
-@code{END} rule; In this case, it causes the program to stop immediately. @xref{BEGIN/END}.
+@code{END} rule; in this case, it causes the program to stop immediately.
+@xref{BEGIN/END}.
-The @code{nextfile} statement is useful when there are many data files
+The @code{nextfile} statement is useful when there are many @value{DF}s
to process but it isn't necessary to process every record in every file.
Without @code{nextfile},
-in order to move on to the next data file, a program
+in order to move on to the next @value{DF}, a program
would have to continue scanning the unwanted records. The @code{nextfile}
statement accomplishes this much more efficiently.
In @command{gawk}, execution of @code{nextfile} causes additional things
-to happen:
-any @code{ENDFILE} rules are executed except in the case as
-mentioned below,
-@code{ARGIND} is incremented,
-and
-any @code{BEGINFILE} rules are executed.
-(@code{ARGIND} hasn't been introduced yet. @xref{Built-in Variables}.)
+to happen: any @code{ENDFILE} rules are executed if @command{gawk} is
+not currently in an @code{END} or @code{BEGINFILE} rule, @code{ARGIND} is
+incremented, and any @code{BEGINFILE} rules are executed. (@code{ARGIND}
+hasn't been introduced yet. @xref{Built-in Variables}.)
With @command{gawk}, @code{nextfile} is useful inside a @code{BEGINFILE}
rule to skip over a file that would otherwise cause @command{gawk}
@@ -12743,7 +13561,7 @@ See @uref{http://austingroupbugs.net/view.php?id=607, the Austin Group website}.
@cindex @code{nextfile} statement, user-defined functions and
@cindex Brian Kernighan's @command{awk}
@cindex @command{mawk} utility
-The current version of the Brian Kernighan's @command{awk}, and @command{mawk} (@pxref{Other
+The current version of BWK @command{awk}, and @command{mawk} (@pxref{Other
Versions}) also support @code{nextfile}. However, they don't allow the
@code{nextfile} statement inside function bodies (@pxref{User-defined}).
@command{gawk} does; a @code{nextfile} inside a function body reads the
@@ -12758,9 +13576,9 @@ The @code{exit} statement causes @command{awk} to immediately stop
executing the current rule and to stop processing input; any remaining input
is ignored. The @code{exit} statement is written as follows:
-@example
-exit @r{[}@var{return code}@r{]}
-@end example
+@display
+@code{exit} [@var{return code}]
+@end display
@cindex @code{BEGIN} pattern, @code{exit} statement and
@cindex @code{END} pattern, @code{exit} statement and
@@ -12793,8 +13611,7 @@ status code for the @command{awk} process. If no argument is supplied,
In the case where an argument
is supplied to a first @code{exit} statement, and then @code{exit} is
called a second time from an @code{END} rule with no argument,
-@command{awk} uses the previously supplied exit value.
-@value{DARKCORNER}
+@command{awk} uses the previously supplied exit value. @value{DARKCORNER}
@xref{Exit Status}, for more information.
@cindex programming conventions, @code{exit} statement
@@ -12806,12 +13623,12 @@ in the following example:
@example
BEGIN @{
- if (("date" | getline date_now) <= 0) @{
- print "Can't get system date" > "/dev/stderr"
- exit 1
- @}
- print "current date is", date_now
- close("date")
+ if (("date" | getline date_now) <= 0) @{
+ print "Can't get system date" > "/dev/stderr"
+ exit 1
+ @}
+ print "current date is", date_now
+ close("date")
@}
@end example
@@ -12842,9 +13659,9 @@ automatically by @command{awk}, so that they carry information from the
internal workings of @command{awk} to your program.
@cindex @command{gawk}, built-in variables and
-This @value{SECTION} documents all the built-in variables of
-@command{gawk}, most of which are also documented in the chapters
-describing their areas of activity.
+This @value{SECTION} documents all of @command{gawk}'s built-in variables,
+most of which are also documented in the @value{CHAPTER}s describing
+their areas of activity.
@menu
* User-modified:: Built-in variables that you change to control
@@ -12862,44 +13679,38 @@ describing their areas of activity.
@cindex user-modifiable variables
The following is an alphabetical list of variables that you can change to
-control how @command{awk} does certain things. The variables that are
-specific to @command{gawk} are marked with a pound sign@w{ (@samp{#}).}
+control how @command{awk} does certain things.
+
+The variables that are specific to @command{gawk} are marked with a pound
+sign (@samp{#}). These variables are @command{gawk} extensions. In other
+@command{awk} implementations or if @command{gawk} is in compatibility
+mode (@pxref{Options}), they are not special. (Any exceptions are noted
+in the description of each variable.)
@table @code
@cindex @code{BINMODE} variable
@cindex binary input/output
@cindex input/output, binary
-@item BINMODE #
-On non-POSIX systems, this variable specifies use of binary mode for all I/O.
-Numeric values of one, two, or three specify that input files, output files, or
-all files, respectively, should use binary I/O.
-A numeric value less than zero is treated as zero, and a numeric value greater than
-three is treated as three.
-Alternatively,
-string values of @code{"r"} or @code{"w"} specify that input files and
-output files, respectively, should use binary I/O.
-A string value of @code{"rw"} or @code{"wr"} indicates that all
-files should use binary I/O.
-Any other string value is treated the same as @code{"rw"},
-but causes @command{gawk}
-to generate a warning message.
-@code{BINMODE} is described in more detail in
-@ref{PC Using}.
-
@cindex differences in @command{awk} and @command{gawk}, @code{BINMODE} variable
-This variable is a @command{gawk} extension.
-In other @command{awk} implementations
-(except @command{mawk},
-@pxref{Other Versions}),
-or if @command{gawk} is in compatibility mode
-(@pxref{Options}),
-it is not special.
+@item BINMODE #
+On non-POSIX systems, this variable specifies use of binary mode
+for all I/O. Numeric values of one, two, or three specify that input
+files, output files, or all files, respectively, should use binary I/O.
+A numeric value less than zero is treated as zero, and a numeric value
+greater than three is treated as three. Alternatively, string values
+of @code{"r"} or @code{"w"} specify that input files and output files,
+respectively, should use binary I/O. A string value of @code{"rw"} or
+@code{"wr"} indicates that all files should use binary I/O. Any other
+string value is treated the same as @code{"rw"}, but causes @command{gawk}
+to generate a warning message. @code{BINMODE} is described in more
+detail in @ref{PC Using}. @command{mawk} @pxref{Other Versions}),
+also supports this variable, but only using numeric values.
@cindex @code{CONVFMT} variable
@cindex POSIX @command{awk}, @code{CONVFMT} variable and
@cindex numbers, converting, to strings
@cindex strings, converting, numbers to
-@item CONVFMT
+@item @code{CONVFMT}
This string controls conversion of numbers to
strings (@pxref{Conversion}).
It works by being passed, in effect, as the first argument to the
@@ -12914,40 +13725,29 @@ Its default value is @code{"%.6g"}.
@cindex field separators, @code{FIELDWIDTHS} variable and
@cindex separators, field, @code{FIELDWIDTHS} variable and
@item FIELDWIDTHS #
-This is a space-separated list of columns that tells @command{gawk}
+A space-separated list of columns that tells @command{gawk}
how to split input with fixed columnar boundaries.
Assigning a value to @code{FIELDWIDTHS}
overrides the use of @code{FS} and @code{FPAT} for field splitting.
@xref{Constant Size}, for more information.
-If @command{gawk} is in compatibility mode
-(@pxref{Options}), then @code{FIELDWIDTHS}
-has no special meaning, and field-splitting operations occur based
-exclusively on the value of @code{FS}.
-
@cindex @command{gawk}, @code{FPAT} variable in
@cindex @code{FPAT} variable
@cindex differences in @command{awk} and @command{gawk}, @code{FPAT} variable
@cindex field separators, @code{FPAT} variable and
@cindex separators, field, @code{FPAT} variable and
@item FPAT #
-This is a regular expression (as a string) that tells @command{gawk}
+A regular expression (as a string) that tells @command{gawk}
to create the fields based on text that matches the regular expression.
Assigning a value to @code{FPAT}
overrides the use of @code{FS} and @code{FIELDWIDTHS} for field splitting.
@xref{Splitting By Content}, for more information.
-If @command{gawk} is in compatibility mode
-(@pxref{Options}), then @code{FPAT}
-has no special meaning, and field-splitting operations occur based
-exclusively on the value of @code{FS}.
-
@cindex @code{FS} variable
@cindex separators, field
@cindex field separators
@item FS
-This is the input field separator
-(@pxref{Field Separators}).
+The input field separator (@pxref{Field Separators}).
The value is a single-character string or a multicharacter regular
expression that matches the separations between fields in an input
record. If the value is the null string (@code{""}), then each
@@ -12997,18 +13797,13 @@ and it does not affect field splitting when using a single-character
field separator.
@xref{Case-sensitivity}.
-If @command{gawk} is in compatibility mode
-(@pxref{Options}),
-then @code{IGNORECASE} has no special meaning. Thus, string
-and regexp operations are always case-sensitive.
-
@cindex @command{gawk}, @code{LINT} variable in
@cindex @code{LINT} variable
@cindex differences in @command{awk} and @command{gawk}, @code{LINT} variable
@cindex lint checking
@item LINT #
When this variable is true (nonzero or non-null), @command{gawk}
-behaves as if the @option{--lint} command-line option is in effect.
+behaves as if the @option{--lint} command-line option is in effect
(@pxref{Options}).
With a value of @code{"fatal"}, lint warnings become fatal errors.
With a value of @code{"invalid"}, only warnings about things that are
@@ -13029,7 +13824,7 @@ of @command{awk} being executed.
@cindex numbers, converting, to strings
@cindex strings, converting, numbers to
@item OFMT
-This string controls conversion of numbers to
+Controls conversion of numbers to
strings (@pxref{Conversion}) for
printing with the @code{print} statement. It works by being passed
as the first argument to the @code{sprintf()} function
@@ -13050,27 +13845,26 @@ default value is @w{@code{" "}}, a string consisting of a single space.
@cindex @code{ORS} variable
@item ORS
-This is the output record separator. It is output at the end of every
+The output record separator. It is output at the end of every
@code{print} statement. Its default value is @code{"\n"}, the newline
character. (@xref{Output Separators}.)
@cindex @code{PREC} variable
@item PREC #
The working precision of arbitrary precision floating-point numbers,
-53 bits by default (@pxref{Setting Precision}).
+53 bits by default (@pxref{Setting precision}).
@cindex @code{ROUNDMODE} variable
@item ROUNDMODE #
The rounding mode to use for arbitrary precision arithmetic on
numbers, by default @code{"N"} (@samp{roundTiesToEven} in
-the IEEE-754 standard)
-(@pxref{Setting Rounding Mode}).
+the IEEE 754 standard; @pxref{Setting the rounding mode}).
@cindex @code{RS} variable
@cindex separators, for records
@cindex record separators
-@item RS
-This is @command{awk}'s input record separator. Its default value is a string
+@item @code{RS}
+The input record separator. Its default value is a string
containing a single newline character, which means that an input record
consists of a single line of text.
It can also be the null string, in which case records are separated by
@@ -13089,8 +13883,8 @@ just the first character of @code{RS}'s value is used.
@cindex @code{SUBSEP} variable
@cindex separators, subscript
@cindex subscript separators
-@item SUBSEP
-This is the subscript separator. It has the default value of
+@item @code{SUBSEP}
+The subscript separator. It has the default value of
@code{"\034"} and is used to separate the parts of the indices of a
multidimensional array. Thus, the expression @code{@w{foo["A", "B"]}}
really accesses @code{foo["A\034B"]}
@@ -13101,18 +13895,12 @@ really accesses @code{foo["A\034B"]}
@cindex differences in @command{awk} and @command{gawk}, @code{TEXTDOMAIN} variable
@cindex internationalization, localization
@item TEXTDOMAIN #
-This variable is used for internationalization of programs at the
+Used for internationalization of programs at the
@command{awk} level. It sets the default text domain for specially
marked string constants in the source text, as well as for the
@code{dcgettext()}, @code{dcngettext()} and @code{bindtextdomain()} functions
(@pxref{Internationalization}).
The default value of @code{TEXTDOMAIN} is @code{"messages"}.
-
-This variable is a @command{gawk} extension.
-In other @command{awk} implementations,
-or if @command{gawk} is in compatibility mode
-(@pxref{Options}),
-it is not special.
@end table
@c ENDOFRANGE bvar
@c ENDOFRANGE varb
@@ -13128,14 +13916,19 @@ it is not special.
@cindex variables, built-in, conveying information
The following is an alphabetical list of variables that @command{awk}
sets automatically on certain occasions in order to provide
-information to your program. The variables that are specific to
-@command{gawk} are marked with a pound sign@w{ (@samp{#}).}
+information to your program.
-@table @code
+The variables that are specific to @command{gawk} are marked with a pound
+sign (@samp{#}). These variables are @command{gawk} extensions. In other
+@command{awk} implementations or if @command{gawk} is in compatibility
+mode (@pxref{Options}), they are not special.
+
+@c @asis for docbook
+@table @asis
@cindex @code{ARGC}/@code{ARGV} variables
@cindex arguments, command-line
@cindex command line, arguments
-@item ARGC@r{,} ARGV
+@item @code{ARGC}, @code{ARGV}
The command-line arguments available to @command{awk} programs are stored in
an array called @code{ARGV}. @code{ARGC} is the number of command-line
arguments present. @xref{Other Arguments}.
@@ -13175,36 +13968,30 @@ about how @command{awk} uses these variables.
@cindex @code{ARGIND} variable
@cindex differences in @command{awk} and @command{gawk}, @code{ARGIND} variable
-@item ARGIND #
+@item @code{ARGIND #}
The index in @code{ARGV} of the current file being processed.
-Every time @command{gawk} opens a new data file for processing, it sets
-@code{ARGIND} to the index in @code{ARGV} of the file name.
+Every time @command{gawk} opens a new @value{DF} for processing, it sets
+@code{ARGIND} to the index in @code{ARGV} of the @value{FN}.
When @command{gawk} is processing the input files,
@samp{FILENAME == ARGV[ARGIND]} is always true.
@cindex files, processing@comma{} @code{ARGIND} variable and
This variable is useful in file processing; it allows you to tell how far
-along you are in the list of data files as well as to distinguish between
-successive instances of the same file name on the command line.
+along you are in the list of @value{DF}s as well as to distinguish between
+successive instances of the same @value{FN} on the command line.
@cindex file names, distinguishing
While you can change the value of @code{ARGIND} within your @command{awk}
program, @command{gawk} automatically sets it to a new value when the
next file is opened.
-This variable is a @command{gawk} extension.
-In other @command{awk} implementations,
-or if @command{gawk} is in compatibility mode
-(@pxref{Options}),
-it is not special.
-
@cindex @code{ENVIRON} array
@cindex environment variables, in @code{ENVIRON} array
-@item ENVIRON
+@item @code{ENVIRON}
An associative array containing the values of the environment. The array
indices are the environment variable names; the elements are the values of
the particular environment variables. For example,
-@code{ENVIRON["HOME"]} might be @file{/home/arnold}.
+@code{ENVIRON["HOME"]} might be @code{/home/arnold}.
For POSIX @command{awk}, changing this array does not affect the
environment passed on to any programs that @command{awk} may spawn via
@@ -13219,69 +14006,55 @@ executable programs.
Some operating systems may not have environment variables.
On such systems, the @code{ENVIRON} array is empty (except for
-@w{@code{ENVIRON["AWKPATH"]}},
-@pxref{AWKPATH Variable} and
-@w{@code{ENVIRON["AWKLIBPATH"]}},
+@w{@code{ENVIRON["AWKPATH"]}} and
+@w{@code{ENVIRON["AWKLIBPATH"]}};
+@pxref{AWKPATH Variable}, and
@pxref{AWKLIBPATH Variable}).
@cindex @command{gawk}, @code{ERRNO} variable in
@cindex @code{ERRNO} variable
@cindex differences in @command{awk} and @command{gawk}, @code{ERRNO} variable
@cindex error handling, @code{ERRNO} variable and
-@item ERRNO #
-If a system error occurs during a redirection for @code{getline},
-during a read for @code{getline}, or during a @code{close()} operation,
-then @code{ERRNO} contains a string describing the error.
-
-In addition, @command{gawk} clears @code{ERRNO}
-before opening each command-line input file. This enables checking if
-the file is readable inside a @code{BEGINFILE} pattern (@pxref{BEGINFILE/ENDFILE}).
-
-Otherwise,
-@code{ERRNO} works similarly to the C variable @code{errno}.
-Except for the case just mentioned,
-@command{gawk} @emph{never} clears it (sets it
-to zero or @code{""}). Thus, you should only expect its value
-to be meaningful when an I/O operation returns a failure
-value, such as @code{getline} returning @minus{}1.
-You are, of course, free to clear it yourself before doing an
-I/O operation.
-
-This variable is a @command{gawk} extension.
-In other @command{awk} implementations,
-or if @command{gawk} is in compatibility mode
-(@pxref{Options}),
-it is not special.
+@item @code{ERRNO #}
+If a system error occurs during a redirection for @code{getline}, during
+a read for @code{getline}, or during a @code{close()} operation, then
+@code{ERRNO} contains a string describing the error.
+
+In addition, @command{gawk} clears @code{ERRNO} before opening each
+command-line input file. This enables checking if the file is readable
+inside a @code{BEGINFILE} pattern (@pxref{BEGINFILE/ENDFILE}).
+
+Otherwise, @code{ERRNO} works similarly to the C variable @code{errno}.
+Except for the case just mentioned, @command{gawk} @emph{never} clears
+it (sets it to zero or @code{""}). Thus, you should only expect its
+value to be meaningful when an I/O operation returns a failure value,
+such as @code{getline} returning @minus{}1. You are, of course, free
+to clear it yourself before doing an I/O operation.
@cindex @code{FILENAME} variable
@cindex dark corner, @code{FILENAME} variable
-@item FILENAME
-The name of the file that @command{awk} is currently reading.
-When no data files are listed on the command line, @command{awk} reads
-from the standard input and @code{FILENAME} is set to @code{"-"}.
-@code{FILENAME} is changed each time a new file is read
-(@pxref{Reading Files}).
-Inside a @code{BEGIN} rule, the value of @code{FILENAME} is
-@code{""}, since there are no input files being processed
-yet.@footnote{Some early implementations of Unix @command{awk} initialized
-@code{FILENAME} to @code{"-"}, even if there were data files to be
-processed. This behavior was incorrect and should not be relied
-upon in your programs.}
-@value{DARKCORNER}
-Note, though, that using @code{getline}
-(@pxref{Getline})
-inside a @code{BEGIN} rule can give
-@code{FILENAME} a value.
+@item @code{FILENAME}
+The name of the current input file. When no @value{DF}s are listed
+on the command line, @command{awk} reads from the standard input and
+@code{FILENAME} is set to @code{"-"}. @code{FILENAME} changes each
+time a new file is read (@pxref{Reading Files}). Inside a @code{BEGIN}
+rule, the value of @code{FILENAME} is @code{""}, since there are no input
+files being processed yet.@footnote{Some early implementations of Unix
+@command{awk} initialized @code{FILENAME} to @code{"-"}, even if there
+were @value{DF}s to be processed. This behavior was incorrect and should
+not be relied upon in your programs.} @value{DARKCORNER} Note, though,
+that using @code{getline} (@pxref{Getline}) inside a @code{BEGIN} rule
+can give @code{FILENAME} a value.
@cindex @code{FNR} variable
-@item FNR
+@item @code{FNR}
The current record number in the current file. @code{FNR} is
incremented each time a new record is read
(@pxref{Records}). It is reinitialized
to zero each time a new input file is started.
@cindex @code{NF} variable
-@item NF
+@item @code{NF}
The number of fields in the current input record.
@code{NF} is set each time a new record is read, when a new field is
created or when @code{$0} changes (@pxref{Fields}).
@@ -13295,18 +14068,18 @@ current record. @xref{Changing Fields}.
@cindex @code{FUNCTAB} array
@cindex @command{gawk}, @code{FUNCTAB} array in
@cindex differences in @command{awk} and @command{gawk}, @code{FUNCTAB} variable
-@item FUNCTAB #
+@item @code{FUNCTAB #}
An array whose indices and corresponding values are the names of all
-the user-defined or extension functions in the program.
+the built-in, user-defined and extension functions in the program.
@quotation NOTE
Attempting to use the @code{delete} statement with the @code{FUNCTAB}
-array will cause a fatal error. Any attempt to assign to an element of
-the @code{FUNCTAB} array will also cause a fatal error.
+array causes a fatal error. Any attempt to assign to an element of
+@code{FUNCTAB} also causes a fatal error.
@end quotation
@cindex @code{NR} variable
-@item NR
+@item @code{NR}
The number of input records @command{awk} has processed since
the beginning of the program's execution
(@pxref{Records}).
@@ -13315,7 +14088,7 @@ the beginning of the program's execution
@cindex @command{gawk}, @code{PROCINFO} array in
@cindex @code{PROCINFO} array
@cindex differences in @command{awk} and @command{gawk}, @code{PROCINFO} array
-@item PROCINFO #
+@item @code{PROCINFO #}
The elements of this array provide access to information about the
running @command{awk} program.
The following elements (listed alphabetically)
@@ -13338,16 +14111,22 @@ or @code{"FPAT"} if field matching with @code{FPAT} is in effect.
@item PROCINFO["identifiers"]
@cindex program identifiers
-A subarray, indexed by the names of all identifiers used in the
-text of the AWK program. For each identifier, the value of the element is one of the following:
+A subarray, indexed by the names of all identifiers used in the text of
+the AWK program. An @dfn{identifier} is simply the name of a variable
+(be it scalar or array), built-in function, user-defined function, or
+extension function. For each identifier, the value of the element is
+one of the following:
@table @code
@item "array"
The identifier is an array.
+@item "builtin"
+The identifier is a built-in function.
+
@item "extension"
The identifier is an extension function loaded via
-@code{@@load}.
+@code{@@load} or @option{-l}.
@item "scalar"
The identifier is a scalar.
@@ -13384,7 +14163,7 @@ The parent process ID of the current process.
@item PROCINFO["sorted_in"]
If this element exists in @code{PROCINFO}, its value controls the
order in which array indices will be processed by
-@samp{for (index in array) @dots{}} loops.
+@samp{for (@var{index} in @var{array})} loops.
Since this is an advanced feature, we defer the
full description until later; see
@ref{Scanning an Array}.
@@ -13406,7 +14185,7 @@ The version of @command{gawk}.
The following additional elements in the array
are available to provide information about the MPFR and GMP libraries
if your version of @command{gawk} supports arbitrary precision numbers
-(@pxref{Gawk and MPFR}):
+(@pxref{Arbitrary Precision Arithmetic}):
@table @code
@cindex version of GNU MPFR library
@@ -13452,12 +14231,11 @@ to test for these elements
@cindex @code{PROCINFO} array, uses
The @code{PROCINFO} array has the following additional uses:
-@itemize @bullet
+@itemize @value{BULLET}
@item
-It may be
-used to cause coprocesses
-to communicate over pseudo-ttys instead of through two-way pipes;
-this is discussed further in @ref{Two-way I/O}.
+It may be used to cause coprocesses to communicate over pseudo-ttys
+instead of through two-way pipes; this is discussed further in
+@ref{Two-way I/O}.
@item
It may be used to provide a timeout when reading from any
@@ -13465,14 +14243,8 @@ open input file, pipe, or coprocess.
@xref{Read Timeout}, for more information.
@end itemize
-This array is a @command{gawk} extension.
-In other @command{awk} implementations,
-or if @command{gawk} is in compatibility mode
-(@pxref{Options}),
-it is not special.
-
@cindex @code{RLENGTH} variable
-@item RLENGTH
+@item @code{RLENGTH}
The length of the substring matched by the
@code{match()} function
(@pxref{String Functions}).
@@ -13480,7 +14252,7 @@ The length of the substring matched by the
is the length of the matched string, or @minus{}1 if no match is found.
@cindex @code{RSTART} variable
-@item RSTART
+@item @code{RSTART}
The start-index in characters of the substring that is matched by the
@code{match()} function
(@pxref{String Functions}).
@@ -13491,20 +14263,14 @@ if no match was found.
@cindex @command{gawk}, @code{RT} variable in
@cindex @code{RT} variable
@cindex differences in @command{awk} and @command{gawk}, @code{RT} variable
-@item RT #
-This is set each time a record is read. It contains the input text
-that matched the text denoted by @code{RS}, the record separator.
-
-This variable is a @command{gawk} extension.
-In other @command{awk} implementations,
-or if @command{gawk} is in compatibility mode
-(@pxref{Options}),
-it is not special.
+@item @code{RT #}
+The input text that matched the text denoted by @code{RS},
+the record separator. It is set every time a record is read.
@cindex @command{gawk}, @code{SYMTAB} array in
@cindex @code{SYMTAB} array
@cindex differences in @command{awk} and @command{gawk}, @code{SYMTAB} variable
-@item SYMTAB #
+@item @code{SYMTAB #}
An array whose indices are the names of all currently defined
global variables and arrays in the program. The array may be used
for indirect access to read or write the value of a variable:
@@ -13533,6 +14299,7 @@ This works as expected: in this case @code{SYMTAB} acts just like
a regular array. The only difference is that you can't then delete
@code{SYMTAB["xxx"]}.
+@cindex Schorr, Andrew
The @code{SYMTAB} array is more interesting than it looks. Andrew Schorr
points out that it effectively gives @command{awk} data pointers. Consider his
example:
@@ -13547,8 +14314,8 @@ function multiply(variable, amount)
@end example
@quotation NOTE
-In order to avoid severe time-travel paradoxes@footnote{Not to mention difficult
-implementation issues.}, neither @code{FUNCTAB} nor @code{SYMTAB}
+In order to avoid severe time-travel paradoxes,@footnote{Not to mention difficult
+implementation issues.} neither @code{FUNCTAB} nor @code{SYMTAB}
are available as elements within the @code{SYMTAB} array.
@end quotation
@end table
@@ -13593,7 +14360,7 @@ changed.
@cindex arguments, command-line
@cindex command line, arguments
-@ref{Auto-set},
+@DBREF{Auto-set}
presented the following program describing the information contained in @code{ARGC}
and @code{ARGV}:
@@ -13651,11 +14418,11 @@ additional files to be read.
If the value of @code{ARGC} is decreased, that eliminates input files
from the end of the list. By recording the old value of @code{ARGC}
elsewhere, a program can treat the eliminated arguments as
-something other than file names.
+something other than @value{FN}s.
To eliminate a file from the middle of the list, store the null string
(@code{""}) into @code{ARGV} in place of the file's name. As a
-special feature, @command{awk} ignores file names that have been
+special feature, @command{awk} ignores @value{FN}s that have been
replaced with the null string.
Another option is to
use the @code{delete} statement to remove elements from
@@ -13666,8 +14433,17 @@ before actual processing of the input begins.
@xref{Split Program}, and see
@ref{Tee Program}, for examples
of each way of removing elements from @code{ARGV}.
+
+To actually get options into an @command{awk} program,
+end the @command{awk} options with @option{--} and then supply
+the @command{awk} program's options, in the following manner:
+
+@example
+awk -f myprog.awk -- -v -q file1 file2 @dots{}
+@end example
+
The following fragment processes @code{ARGV} in order to examine, and
-then remove, command-line options:
+then remove, the above command-line options:
@example
BEGIN @{
@@ -13687,32 +14463,83 @@ BEGIN @{
@}
@end example
-To actually get the options into the @command{awk} program,
-end the @command{awk} options with @option{--} and then supply
-the @command{awk} program's options, in the following manner:
-
-@example
-awk -f myprog -- -v -q file1 file2 @dots{}
-@end example
-
@cindex differences in @command{awk} and @command{gawk}, @code{ARGC}/@code{ARGV} variables
-This is not necessary in @command{gawk}. Unless @option{--posix} has
+Ending the @command{awk} options with @option{--} isn't
+necessary in @command{gawk}. Unless @option{--posix} has
been specified, @command{gawk} silently puts any unrecognized options
into @code{ARGV} for the @command{awk} program to deal with. As soon
as it sees an unknown option, @command{gawk} stops looking for other
-options that it might otherwise recognize. The previous example with
+options that it might otherwise recognize. The previous command line with
@command{gawk} would be:
@example
-gawk -f myprog -q -v file1 file2 @dots{}
+gawk -f myprog.awk -q -v file1 file2 @dots{}
@end example
@noindent
-Because @option{-q} is not a valid @command{gawk} option,
-it and the following @option{-v}
-are passed on to the @command{awk} program.
-(@xref{Getopt Function}, for an @command{awk} library function
-that parses command-line options.)
+Because @option{-q} is not a valid @command{gawk} option, it and the
+following @option{-v} are passed on to the @command{awk} program.
+(@xref{Getopt Function}, for an @command{awk} library function that
+parses command-line options.)
+
+@node Pattern Action Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+Pattern-action pairs make up the basic elements of an @command{awk}
+program. Patterns are either normal expressions, range expressions,
+regexp constants, one of the special keywords @code{BEGIN}, @code{END},
+@code{BEGINFILE}, @code{ENDFILE}, or empty. The action executes if
+the current record matches the pattern. Empty (missing) patterns match
+all records.
+
+@item
+I/O from @code{BEGIN} and @code{END} rules have certain constraints.
+This is also true, only more so, for @code{BEGINFILE} and @code{ENDFILE}
+rules. The latter two give you ``hooks'' into @command{gawk}'s file
+processing, allowing you to recover from a file that otherwise would
+cause a fatal error (such as a file that cannot be opened).
+
+@item
+Shell variables can be used in @command{awk} programs by careful
+use of shell quoting. It is easier to pass a shell variable into
+@command{awk} by using the @option{-v} option and an @command{awk}
+variable.
+
+@item
+Actions consist of statements enclosed in curly braces. Statements
+are built up from expressions, control statements, compound statements,
+input and output statements, and deletion statements.
+
+@item
+The control statements in @command{awk} are @code{if}-@code{else},
+@code{while}, @code{for}, and @code{do}-@code{while}. @command{gawk}
+adds the @code{switch} statement. There are two flavors of @code{for}
+statement: one for for performing general looping, and the other iterating
+through an array.
+
+@item
+@code{break} and @code{continue} let you exit early or start the next
+iteration of a loop (or get out of a @code{switch}).
+
+@item
+@code{next} and @code{nextfile} let you read the next record and start
+over at the top of your program, or skip to the next input file and
+start over, respectively.
+
+@item
+The @code{exit} statement terminates your program. When executed
+from an action (or function body) it transfers control to the
+@code{END} statements. From an @code{END} statement body, it exits
+immediately. You may pass an optional numeric value to be used
+at @command{awk}'s exit status.
+
+@item
+Some built-in variables provide control over @command{awk}, mainly for I/O.
+Other variables convey information from @command{awk} to your program.
+
+@end itemize
@node Arrays
@chapter Arrays in @command{awk}
@@ -13730,7 +14557,7 @@ It also describes how @command{awk} simulates multidimensional
arrays, as well as some of the less obvious points about array usage.
The @value{CHAPTER} moves on to discuss @command{gawk}'s facility
for sorting arrays, and ends with a brief description of @command{gawk}'s
-ability to support true multidimensional arrays.
+ability to support true arrays of arrays.
@cindex variables, names of
@cindex functions, names of
@@ -13753,6 +14580,7 @@ same @command{awk} program.
* Multidimensional:: Emulating multidimensional arrays in
@command{awk}.
* Arrays of Arrays:: True multidimensional arrays.
+* Arrays Summary:: Summary of arrays.
@end menu
@node Array Basics
@@ -13814,35 +14642,34 @@ the array is declared.)
A contiguous array of four elements might look like the following example,
conceptually, if the element values are 8, @code{"foo"},
-@code{""}, and 30:
+@code{""}, and 30
+@ifnotdocbook
+as shown in @ref{figure-array-elements}:
+@end ifnotdocbook
+@ifdocbook
+as shown in @inlineraw{docbook, <xref linkend="figure-array-elements"/>}:
+@end ifdocbook
-@c @strong{FIXME: NEXT ED:} Use real images here, and an @float
-@iftex
-@c from Karl Berry, much thanks for the help.
-@tex
-\bigskip % space above the table (about 1 linespace)
-\offinterlineskip
-\newdimen\width \width = 1.5cm
-\newdimen\hwidth \hwidth = 4\width \advance\hwidth by 2pt % 5 * 0.4pt
-\centerline{\vbox{
-\halign{\strut\hfil\ignorespaces#&&\vrule#&\hbox to\width{\hfil#\unskip\hfil}\cr
-\noalign{\hrule width\hwidth}
- &&{\tt 8} &&{\tt "foo"} &&{\tt ""} &&{\tt 30} &&\quad Value\cr
-\noalign{\hrule width\hwidth}
-\noalign{\smallskip}
- &\omit&0&\omit &1 &\omit&2 &\omit&3 &\omit&\quad Index\cr
-}
-}}
-@end tex
-@end iftex
-@ifnottex
-@example
-+---------+---------+--------+---------+
-| 8 | "foo" | "" | 30 | @r{Value}
-+---------+---------+--------+---------+
- 0 1 2 3 @r{Index}
-@end example
-@end ifnottex
+@ifnotdocbook
+@float Figure,figure-array-elements
+@caption{A Contiguous Array}
+@ifinfo
+@center @image{array-elements, , , Basic Program Stages, txt}
+@end ifinfo
+@ifnotinfo
+@center @image{array-elements, , , Basic Program Stages}
+@end ifnotinfo
+@end float
+@end ifnotdocbook
+
+@docbook
+<figure id="figure-array-elements" float="0">
+<title>A Contiguous Array</title>
+<mediaobject>
+<imageobject role="web"><imagedata fileref="array-elements.png" format="PNG"/></imageobject>
+</mediaobject>
+</figure>
+@end docbook
@noindent
Only the values are stored; the indices are implicit from the order of
@@ -13859,20 +14686,65 @@ Arrays in @command{awk} are different---they are @dfn{associative}. This means
that each array is a collection of pairs: an index and its corresponding
array element value:
+@ifnotdocbook
@example
@r{Index} 3 @r{Value} 30
@r{Index} 1 @r{Value} "foo"
@r{Index} 0 @r{Value} 8
@r{Index} 2 @r{Value} ""
@end example
+@end ifnotdocbook
+
+@docbook
+<informaltable>
+<tgroup cols="2">
+<colspec colname="1" align="center"/>
+<colspec colname="2" align="center"/>
+<thead>
+<row>
+<entry>Index</entry>
+<entry>Value</entry>
+</row>
+</thead>
+
+<tbody>
+<row>
+<entry><literal>3</literal></entry>
+<entry><literal>30</literal></entry>
+</row>
+
+<row>
+<entry><literal>1</literal></entry>
+<entry><literal>"foo"</literal></entry>
+</row>
+
+<row>
+<entry><literal>0</literal></entry>
+<entry><literal>8</literal></entry>
+</row>
+
+<row>
+<entry><literal>2</literal></entry>
+<entry><literal>""</literal></entry>
+</row>
+
+</tbody>
+</tgroup>
+</informaltable>
+
+@end docbook
@noindent
-The pairs are shown in jumbled order because their order is irrelevant.
+The pairs are shown in jumbled order because their order is
+irrelevant.@footnote{The ordering will vary among @command{awk}
+implementations, which typically use hash tables to store array elements
+and values.}
One advantage of associative arrays is that new pairs can be added
at any time. For example, suppose a tenth element is added to the array
whose value is @w{@code{"number ten"}}. The result is:
+@ifnotdocbook
@example
@r{Index} 10 @r{Value} "number ten"
@r{Index} 3 @r{Value} 30
@@ -13880,6 +14752,51 @@ whose value is @w{@code{"number ten"}}. The result is:
@r{Index} 0 @r{Value} 8
@r{Index} 2 @r{Value} ""
@end example
+@end ifnotdocbook
+
+@docbook
+<informaltable>
+<tgroup cols="2">
+<colspec colname="1" align="center"/>
+<colspec colname="2" align="center"/>
+<thead>
+<row>
+<entry>Index</entry>
+<entry>Value</entry>
+</row>
+</thead>
+<tbody>
+
+<row>
+<entry><literal>10</literal></entry>
+<entry><literal>"number ten"</literal></entry>
+</row>
+
+<row>
+<entry><literal>3</literal></entry>
+<entry><literal>30</literal></entry>
+</row>
+
+<row>
+<entry><literal>1</literal></entry>
+<entry><literal>"foo"</literal></entry>
+</row>
+
+<row>
+<entry><literal>0</literal></entry>
+<entry><literal>8</literal></entry>
+</row>
+
+<row>
+<entry><literal>2</literal></entry>
+<entry><literal>""</literal></entry>
+</row>
+
+</tbody>
+</tgroup>
+</informaltable>
+
+@end docbook
@noindent
@cindex sparse arrays
@@ -13892,20 +14809,61 @@ have to be positive integers. Any number, or even a string, can be
an index. For example, the following is an array that translates words from
English to French:
+@ifnotdocbook
@example
@r{Index} "dog" @r{Value} "chien"
@r{Index} "cat" @r{Value} "chat"
@r{Index} "one" @r{Value} "un"
@r{Index} 1 @r{Value} "un"
@end example
+@end ifnotdocbook
+
+@docbook
+<informaltable>
+<tgroup cols="2">
+<colspec colname="1" align="center"/>
+<colspec colname="2" align="center"/>
+<thead>
+<row>
+<entry>Index</entry>
+<entry>Value</entry>
+</row>
+</thead>
+<tbody>
+<row>
+<entry><literal>"dog"</literal></entry>
+<entry><literal>"chien"</literal></entry>
+</row>
+
+<row>
+<entry><literal>"cat"</literal></entry>
+<entry><literal>"chat"</literal></entry>
+</row>
+
+<row>
+<entry><literal>"one"</literal></entry>
+<entry><literal>"un"</literal></entry>
+</row>
+
+<row>
+<entry><literal>1</literal></entry>
+<entry><literal>"un"</literal></entry>
+</row>
+
+</tbody>
+</tgroup>
+</informaltable>
+
+@end docbook
@noindent
Here we decided to translate the number one in both spelled-out and
numeric form---thus illustrating that a single array can have both
numbers and strings as indices.
-In fact, array subscripts are always strings; this is discussed
-in more detail in
-@ref{Numeric Array Subscripts}.
+(In fact, array subscripts are always strings.
+There are some subtleties to how numbers work when used as
+array subscripts; this is discussed in more detail in
+@ref{Numeric Array Subscripts}.)
Here, the number @code{1} isn't double-quoted, since @command{awk}
automatically converts it to a string.
@@ -13971,8 +14929,9 @@ if (a["foo"] != "") @dots{}
@end example
@noindent
-This is incorrect, since this will @emph{create} @code{a["foo"]}
-if it didn't exist before!
+This is incorrect for two reasons. First, it @emph{creates} @code{a["foo"]}
+if it didn't exist before! Second, it is valid (if a bit unusual) to set
+an array element equal to the empty string.
@end quotation
@c @cindex arrays, @code{in} operator and
@@ -13981,15 +14940,17 @@ To determine whether an element exists in an array at a certain index, use
the following expression:
@example
-@var{ind} in @var{array}
+@var{indx} in @var{array}
@end example
@cindex side effects, array indexing
@noindent
-This expression tests whether the particular index @var{ind} exists,
+This expression tests whether the particular index @var{indx} exists,
without the side effect of creating that element if it is not present.
-The expression has the value one (true) if @code{@var{array}[@var{ind}]}
+The expression has the value one (true) if @code{@var{array}[@var{indx}]}
exists and zero (false) if it does not exist.
+(We use @var{indx} here, since @samp{index} is the name of a built-in
+function.)
For example, this statement tests whether the array @code{frequencies}
contains the index @samp{2}:
@@ -14162,14 +15123,56 @@ for a more detailed example of this type.
@cindex @code{in} operator, order of array access
The order in which elements of the array are accessed by this statement
is determined by the internal arrangement of the array elements within
-@command{awk} and normally cannot be controlled or changed. This can lead to
-problems if new elements are added to @var{array} by statements in
-the loop body; it is not predictable whether the @code{for} loop will
-reach them. Similarly, changing @var{var} inside the loop may produce
-strange results. It is best to avoid such things.
+@command{awk} and in standard @command{awk} cannot be controlled
+or changed. This can lead to problems if new elements are added to
+@var{array} by statements in the loop body; it is not predictable whether
+the @code{for} loop will reach them. Similarly, changing @var{var} inside
+the loop may produce strange results. It is best to avoid such things.
+
+As a point of information, @command{gawk} sets up the list of elements
+to be iterated over before the loop starts, and does not change it.
+But not all @command{awk} versions do so. Consider this program, named
+@file{loopcheck.awk}:
+
+@example
+BEGIN @{
+ a["here"] = "here"
+ a["is"] = "is"
+ a["a"] = "a"
+ a["loop"] = "loop"
+ for (i in a) @{
+ j++
+ a[j] = j
+ print i
+ @}
+@}
+@end example
+
+Here is what happens when run with @command{gawk}:
+
+@example
+$ @kbd{gawk -f loopcheck.awk}
+@print{} here
+@print{} loop
+@print{} a
+@print{} is
+@end example
+
+Contrast this to BWK @command{awk}:
+
+@example
+$ @kbd{nawk -f loopcheck.awk}
+@print{} loop
+@print{} here
+@print{} is
+@print{} a
+@print{} 1
+@end example
@node Controlling Scanning
-@subsection Using Predefined Array Scanning Orders
+@subsection Using Predefined Array Scanning Orders With @command{gawk}
+
+This @value{SUBSECTION} describes a feature that is specific to @command{gawk}.
By default, when a @code{for} loop traverses an array, the order
is undefined, meaning that the @command{awk} implementation
@@ -14184,7 +15187,7 @@ Often, though, you may wish to do something simple, such as
or ``traverse the array by comparing the values in descending order.''
@command{gawk} provides two mechanisms which give you this control.
-@itemize @bullet
+@itemize @value{BULLET}
@item
Set @code{PROCINFO["sorted_in"]} to one of a set of predefined values.
We describe this now.
@@ -14291,7 +15294,7 @@ order relative to each other is determined by their index strings.
Here are some additional things to bear in mind about sorted
array traversal.
-@itemize @bullet
+@itemize @value{BULLET}
@item
The value of @code{PROCINFO["sorted_in"]} is global. That is, it affects
all array traversal @code{for} loops. If you need to change it within your
@@ -14400,7 +15403,7 @@ using @code{delete} without a subscript was a @command{gawk} extension.
As of September, 2012, it was accepted for
inclusion into the POSIX standard. See @uref{http://austingroupbugs.net/view.php?id=544,
the Austin Group website}. This form of the @code{delete} statement is also supported
-by Brian Kernighan's @command{awk} and @command{mawk}, as well as
+by BWK @command{awk} and @command{mawk}, as well as
by a number of other implementations (@pxref{Other Versions}).
@end quotation
@@ -14516,14 +15519,14 @@ $ @kbd{echo 'line 1}
> @kbd{line 2}
> @kbd{line 3' | awk '@{ l[lines] = $0; ++lines @}}
> @kbd{END @{}
-> @kbd{for (i = lines-1; i >= 0; --i)}
+> @kbd{for (i = lines - 1; i >= 0; i--)}
> @kbd{print l[i]}
> @kbd{@}'}
@print{} line 3
@print{} line 2
@end example
-Unfortunately, the very first line of input data did not come out in the
+Unfortunately, the very first line of input data did not appear in the
output!
Upon first glance, we would think that this program should have worked.
@@ -14540,7 +15543,7 @@ The following version of the program works correctly:
@example
@{ l[lines++] = $0 @}
END @{
- for (i = lines - 1; i >= 0; --i)
+ for (i = lines - 1; i >= 0; i--)
print l[i]
@}
@end example
@@ -14614,10 +15617,11 @@ used for single dimensional arrays. Write the whole sequence of indices
in parentheses, separated by commas, as the left operand:
@example
-(@var{subscript1}, @var{subscript2}, @dots{}) in @var{array}
+if ((@var{subscript1}, @var{subscript2}, @dots{}) in @var{array})
+ @dots{}
@end example
-The following example treats its input as a two-dimensional array of
+Here is an example that treats its input as a two-dimensional array of
fields; it rotates this array 90 degrees clockwise and prints the
result. It assumes that all lines have the same number of
elements:
@@ -14721,7 +15725,7 @@ separate indices is recovered.
array access and provides true arrays of
arrays. Elements of a subarray are referred to by their own indices
enclosed in square brackets, just like the elements of the main array.
-For example, the following creates a two-element subarray at index @samp{1}
+For example, the following creates a two-element subarray at index @code{1}
of the main array @code{a}:
@example
@@ -14745,7 +15749,7 @@ Each subarray and the main array can be of different length. In fact, the
elements of an array or its subarray do not all have to have the same
type. This means that the main array and any of its subarrays can be
non-rectangular, or jagged in structure. One can assign a scalar value to
-the index @samp{4} of the main array @code{a}:
+the index @code{4} of the main array @code{a}:
@example
a[4] = "An element in a jagged array"
@@ -14766,7 +15770,7 @@ a[4][5][6][7] = "An element in a four-dimensional array"
@end example
@noindent
-This removes the scalar value from index @samp{4} and then inserts a
+This removes the scalar value from index @code{4} and then inserts a
subarray of subarray of subarray containing a scalar. You can also
delete an entire subarray or subarray of subarrays:
@@ -14867,6 +15871,63 @@ creating an arbitrary index:
$ @kbd{gawk 'BEGIN @{ b[1][1] = ""; split("a b c d", b[1]); print b[1][1] @}'}
@print{} a
@end example
+
+@node Arrays Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+Standard @command{awk} provides one-dimensional associative arrays
+(arrays indexed by string values). All arrays are associative; numeric
+indices are converted automatically to strings.
+
+@item
+Array elements are referenced as @code{@var{array}[@var{indx}]}.
+Referencing an element creates it if it did not exist previously.
+
+@item
+The proper way to see if an array has an element with a given index
+is to use the @code{in} operator: @samp{@var{indx} in @var{array}}.
+
+@item
+Use @samp{for (@var{indx} in @var{array}) @dots{}} to scan through all the
+individual elements of an array. In the body of the loop, @var{indx} takes
+on the value of each element's index in turn.
+
+@item
+The order in which a @samp{for (@var{indx} in @var{array})} loop
+traverses an array is undefined in POSIX @command{awk} and varies among
+implementations. @command{gawk} lets you control the order by assigning
+special predefined values to @code{PROCINFO["sorted_in"]}.
+
+@item
+Use @samp{delete @var{array}[@var{indx}]} to delete an individual element.
+You may also use @samp{delete @var{array}} to delete all of the elements
+in the array. This latter feature has been a common extension for many
+years and is now standard, but may not be supported by all commercial
+versions of @command{awk}.
+
+@item
+Standard @command{awk} simulates multidimensional arrays by separating
+subscript values with a comma. The values are concatenated into a
+single string, separated by the value of @code{SUBSEP}. The fact
+that such a subscript was created in this way is not retained; thus
+changing @code{SUBSEP} may have unexpected consequences. You can use
+@samp{(@var{sub1}, @var{sub2}, @dots{}) in @var{array}} to see if such
+a multidimensional subscript exists in @var{array}.
+
+@item
+@command{gawk} provides true arrays of arrays. You use a separate
+set of square brackets for each dimension in such an array:
+@code{data[row][col]}, for example. Array elements may thus be either
+scalar values (number or string) or another array.
+
+@item
+Use the @code{isarray()} built-in function to determine if an array
+element is itself a subarray.
+
+@end itemize
+
@c ENDOFRANGE arrs
@node Functions
@@ -14891,6 +15952,7 @@ The second half of this @value{CHAPTER} describes these
* Built-in:: Summarizes the built-in functions.
* User-defined:: Describes User-defined functions in detail.
* Indirect Calls:: Choosing the function to call at runtime.
+* Functions Summary:: Summary of functions.
@end menu
@node Built-in
@@ -14981,26 +16043,45 @@ The following list describes all of
the built-in functions that work with numbers.
Optional parameters are enclosed in square brackets@w{ ([ ]):}
-@table @code
-@item atan2(@var{y}, @var{x})
+@c @asis for docbook
+@table @asis
+@item @code{atan2(@var{y}, @var{x})}
@cindexawkfunc{atan2}
@cindex arctangent
Return the arctangent of @code{@var{y} / @var{x}} in radians.
-You can use @samp{pi = atan2(0, -1)} to retrieve the value of @value{PI}.
+You can use @samp{pi = atan2(0, -1)} to retrieve the value of
+@value{PI}.
-@item cos(@var{x})
+@item @code{cos(@var{x})}
@cindexawkfunc{cos}
@cindex cosine
Return the cosine of @var{x}, with @var{x} in radians.
-@item exp(@var{x})
+@item @code{div(@var{numerator}, @var{denominator}, @var{result})}
+@cindexawkfunc{div}
+@cindex div
+Perform integer division, similar to the standard C function of the
+same name. First, truncate @code{numerator} and @code{denominator}
+towards zero, creating integer values. Clear the @code{result}
+array, and then set @code{result["quotient"]} to the result of
+@samp{numerator / denominator}, truncated towards zero to an integer,
+and set @code{result["remainder"]} to the result of @samp{numerator %
+denominator}, truncated towards zero to an integer. This function is
+primarily intended for use with arbitrary length integers; it avoids
+creating MPFR arbitrary precision floating-point values (@pxref{Arbitrary
+Precision Integers}).
+
+This function is a @code{gawk} extension. It is not available in
+compatibility mode (@pxref{Options}).
+
+@item @code{exp(@var{x})}
@cindexawkfunc{exp}
@cindex exponent
Return the exponential of @var{x} (@code{e ^ @var{x}}) or report
an error if @var{x} is out of range. The range of values @var{x} can have
depends on your machine's floating-point representation.
-@item int(@var{x})
+@item @code{int(@var{x})}
@cindexawkfunc{int}
@cindex round to nearest integer
Return the nearest integer to @var{x}, located between @var{x} and zero and
@@ -15009,13 +16090,15 @@ truncated toward zero.
For example, @code{int(3)} is 3, @code{int(3.9)} is 3, @code{int(-3.9)}
is @minus{}3, and @code{int(-3)} is @minus{}3 as well.
-@item log(@var{x})
+@item @code{log(@var{x})}
@cindexawkfunc{log}
@cindex logarithm
Return the natural logarithm of @var{x}, if @var{x} is positive;
-otherwise, report an error.
+otherwise, return @code{NaN} (``not a number'') on IEEE 754 systems.
+Additionally, @command{gawk} prints a warning message when @code{x}
+is negative.
-@item rand()
+@item @code{rand()}
@cindexawkfunc{rand}
@cindex random numbers, @code{rand()}/@code{srand()} functions
Return a random number. The values of @code{rand()} are
@@ -15073,19 +16156,19 @@ the seed to a value that is different in each run. To do this,
use @code{srand()}.
@end quotation
-@item sin(@var{x})
+@item @code{sin(@var{x})}
@cindexawkfunc{sin}
@cindex sine
Return the sine of @var{x}, with @var{x} in radians.
-@item sqrt(@var{x})
+@item @code{sqrt(@var{x})}
@cindexawkfunc{sqrt}
@cindex square root
Return the positive square root of @var{x}.
@command{gawk} prints a warning message
if @var{x} is negative. Thus, @code{sqrt(4)} is 2.
-@item srand(@r{[}@var{x}@r{]})
+@item @code{srand(}[@var{x}]@code{)}
@cindexawkfunc{srand}
Set the starting point, or seed,
for generating random numbers to the value @var{x}.
@@ -15112,6 +16195,9 @@ numbers that are truly unpredictable.
The return value of @code{srand()} is the previous seed. This makes it
easy to keep track of the seeds in case you need to consistently reproduce
sequences of random numbers.
+
+POSIX does not specify the initial seed; it differs among @command{awk}
+implementations.
@end table
@node String Functions
@@ -15129,12 +16215,23 @@ example, @code{length()} returns the number of characters in a string,
and not the number of bytes used to represent those characters. Similarly,
@code{index()} works with character indices, and not byte indices.
+@quotation CAUTION
+A number of functions deal with indices into strings. For these
+functions, the first character of a string is at position (index) one.
+This is different from C and the languages descended from it, where the
+first character is at position zero. You need to remember this when
+doing index calculations, particularly if you are used to C.
+@end quotation
+
In the following list, optional parameters are enclosed in square brackets@w{ ([ ]).}
Several functions perform string substitution; the full discussion is
provided in the description of the @code{sub()} function, which comes
towards the end since the list is presented in alphabetic order.
+
Those functions that are specific to @command{gawk} are marked with a
-pound sign@w{ (@samp{#}):}
+pound sign (@samp{#}). They are not available in compatibility mode
+(@pxref{Options}):
+
@menu
* Gory Details:: More than you want to know about @samp{\} and
@@ -15142,9 +16239,10 @@ pound sign@w{ (@samp{#}):}
@code{gensub()}.
@end menu
-@table @code
-@item asort(@var{source} @r{[}, @var{dest} @r{[}, @var{how} @r{]} @r{]}) #
-@itemx asorti(@var{source} @r{[}, @var{dest} @r{[}, @var{how} @r{]} @r{]}) #
+@c @asis for docbook
+@table @asis
+@item @code{asort(}@var{source} [@code{,} @var{dest} [@code{,} @var{how} ] ]@code{) #}
+@itemx @code{asorti(}@var{source} [@code{,} @var{dest} [@code{,} @var{how} ] ]@code{) #}
@cindexgawkfunc{asorti}
@cindex sort array
@cindex arrays, elements, retrieving number of
@@ -15208,10 +16306,7 @@ a[2] = "last"
a[3] = "middle"
@end example
-@code{asort()} and @code{asorti()} are @command{gawk} extensions; they
-are not available in compatibility mode (@pxref{Options}).
-
-@item gensub(@var{regexp}, @var{replacement}, @var{how} @r{[}, @var{target}@r{]}) #
+@item @code{gensub(@var{regexp}, @var{replacement}, @var{how}} [@code{, @var{target}}]@code{) #}
@cindexgawkfunc{gensub}
@cindex search and replace in strings
@cindex substitute in string
@@ -15273,10 +16368,7 @@ a warning message.
If @var{regexp} does not match @var{target}, @code{gensub()}'s return value
is the original unchanged value of @var{target}.
-@code{gensub()} is a @command{gawk} extension; it is not available
-in compatibility mode (@pxref{Options}).
-
-@item gsub(@var{regexp}, @var{replacement} @r{[}, @var{target}@r{]})
+@item @code{gsub(@var{regexp}, @var{replacement}} [@code{, @var{target}}]@code{)}
@cindexawkfunc{gsub}
Search @var{target} for
@emph{all} of the longest, leftmost, @emph{nonoverlapping} matching
@@ -15298,7 +16390,7 @@ omitted, then the entire input record (@code{$0}) is used.
As in @code{sub()}, the characters @samp{&} and @samp{\} are special,
and the third argument must be assignable.
-@item index(@var{in}, @var{find})
+@item @code{index(@var{in}, @var{find})}
@cindexawkfunc{index}
@cindex search in string
@cindex find substring in string
@@ -15313,19 +16405,29 @@ $ @kbd{awk 'BEGIN @{ print index("peanut", "an") @}'}
@noindent
If @var{find} is not found, @code{index()} returns zero.
-(Remember that string indices in @command{awk} start at one.)
It is a fatal error to use a regexp constant for @var{find}.
-@item length(@r{[}@var{string}@r{]})
+@item @code{length(}[@var{string}]@code{)}
@cindexawkfunc{length}
@cindex string length
@cindex length of string
Return the number of characters in @var{string}. If
@var{string} is a number, the length of the digit string representing
that number is returned. For example, @code{length("abcde")} is five. By
-contrast, @code{length(15 * 35)} works out to three. In this example, 15 * 35 =
-525, and 525 is then converted to the string @code{"525"}, which has
+contrast, @code{length(15 * 35)} works out to three. In this example,
+@iftex
+@math{15 @cdot 35 = 525},
+@end iftex
+@ifnottex
+@ifnotdocbook
+15 * 35 = 525,
+@end ifnotdocbook
+@end ifnottex
+@docbook
+15 &sdot; 35 = 525, @c
+@end docbook
+and 525 is then converted to the string @code{"525"}, which has
three characters.
@cindex length of input record
@@ -15382,18 +16484,18 @@ If @option{--lint} is provided on the command line
If @option{--posix} is supplied, using an array argument is a fatal error
(@pxref{Arrays}).
-@item match(@var{string}, @var{regexp} @r{[}, @var{array}@r{]})
+@item @code{match(@var{string}, @var{regexp}} [@code{, @var{array}}]@code{)}
@cindexawkfunc{match}
@cindex string, regular expression match
@cindex match regexp in string
Search @var{string} for the
longest, leftmost substring matched by the regular expression,
-@var{regexp} and return the character position, or @dfn{index},
+@var{regexp} and return the character position (index)
at which that substring begins (one, if it starts at the beginning of
@var{string}). If no match is found, return zero.
The @var{regexp} argument may be either a regexp constant
-(@code{/@dots{}/}) or a string constant (@code{"@dots{}"}).
+(@code{/}@dots{}@code{/}) or a string constant (@code{"}@dots{}@code{"}).
In the latter case, the string is treated as a regexp to be matched.
@xref{Computed Regexps}, for a
discussion of the difference between the two forms, and the
@@ -15499,7 +16601,7 @@ The @var{array} argument to @code{match()} is a
(@pxref{Options}),
using a third argument is a fatal error.
-@item patsplit(@var{string}, @var{array} @r{[}, @var{fieldpat} @r{[}, @var{seps} @r{]} @r{]}) #
+@item @code{patsplit(@var{string}, @var{array}} [@code{, @var{fieldpat}} [@code{, @var{seps}} ] ]@code{) #}
@cindexgawkfunc{patsplit}
@cindex split string into array
Divide
@@ -15525,13 +16627,7 @@ manner similar to the way input lines are split into fields using @code{FPAT}
Before splitting the string, @code{patsplit()} deletes any previously existing
elements in the arrays @var{array} and @var{seps}.
-@cindex troubleshooting, @code{patsplit()} function
-The @code{patsplit()} function is a
-@command{gawk} extension. In compatibility mode
-(@pxref{Options}),
-it is not available.
-
-@item split(@var{string}, @var{array} @r{[}, @var{fieldsep} @r{[}, @var{seps} @r{]} @r{]})
+@item @code{split(@var{string}, @var{array}} [@code{, @var{fieldsep}} [@code{, @var{seps}} ] ]@code{)}
@cindexawkfunc{split}
Divide @var{string} into pieces separated by @var{fieldsep}
and store the pieces in @var{array} and the separator strings in the
@@ -15616,7 +16712,9 @@ If @var{string} does not match @var{fieldsep} at all (but is not null),
@var{array} has one element only. The value of that element is the original
@var{string}.
-@item sprintf(@var{format}, @var{expression1}, @dots{})
+In POSIX mode (@pxref{Options}), the fourth argument is not allowed.
+
+@item @code{sprintf(@var{format}, @var{expression1}, @dots{})}
@cindexawkfunc{sprintf}
@cindex formatting strings
Return (without printing) the string that @code{printf} would
@@ -15633,7 +16731,7 @@ assigns the string @w{@samp{pi = 3.14 (approx.)}} to the variable @code{pival}.
@cindexgawkfunc{strtonum}
@cindex convert string to number
-@item strtonum(@var{str}) #
+@item @code{strtonum(@var{str}) #}
Examine @var{str} and return its numeric value. If @var{str}
begins with a leading @samp{0}, @code{strtonum()} assumes that @var{str}
is an octal number. If @var{str} begins with a leading @samp{0x} or
@@ -15655,10 +16753,7 @@ you use the @option{--non-decimal-data} option, which isn't recommended.
Note also that @code{strtonum()} uses the current locale's decimal point
for recognizing numbers (@pxref{Locales}).
-@code{strtonum()} is a @command{gawk} extension; it is not available
-in compatibility mode (@pxref{Options}).
-
-@item sub(@var{regexp}, @var{replacement} @r{[}, @var{target}@r{]})
+@item @code{sub(@var{regexp}, @var{replacement}} [@code{, @var{target}}]@code{)}
@cindexawkfunc{sub}
@cindex replace in string
Search @var{target}, which is treated as a string, for the
@@ -15669,7 +16764,7 @@ The modified string becomes the new value of @var{target}.
Return the number of substitutions made (zero or one).
The @var{regexp} argument may be either a regexp constant
-(@code{/@dots{}/}) or a string constant (@code{"@dots{}"}).
+(@code{/}@dots{}@code{/}) or a string constant (@code{"}@dots{}@code{"}).
In the latter case, the string is treated as a regexp to be matched.
@xref{Computed Regexps}, for a
discussion of the difference between the two forms, and the
@@ -15759,7 +16854,7 @@ will not run.
Finally, if the @var{regexp} is not a regexp constant, it is converted into a
string, and then the value of that string is treated as the regexp to match.
-@item substr(@var{string}, @var{start} @r{[}, @var{length}@r{]})
+@item @code{substr(@var{string}, @var{start}} [@code{, @var{length}} ]@code{)}
@cindexawkfunc{substr}
@cindex substring
Return a @var{length}-character-long substring of @var{string},
@@ -15778,7 +16873,7 @@ in the string, counting from character @var{start}.
@cindex Brian Kernighan's @command{awk}
If @var{start} is less than one, @code{substr()} treats it as
if it was one. (POSIX doesn't specify what to do in this case:
-Brian Kernighan's @command{awk} acts this way, and therefore @command{gawk}
+BWK @command{awk} acts this way, and therefore @command{gawk}
does too.)
If @var{start} is greater than the number of characters
in the string, @code{substr()} returns the null string.
@@ -15819,7 +16914,7 @@ string = substr(string, 1, 2) "CDE" substr(string, 6)
@cindex case sensitivity, converting case
@cindex strings, converting letter case
-@item tolower(@var{string})
+@item @code{tolower(@var{string})}
@cindexawkfunc{tolower}
@cindex convert string to lower case
Return a copy of @var{string}, with each uppercase character
@@ -15827,7 +16922,7 @@ in the string replaced with its corresponding lowercase character.
Nonalphabetic characters are left unchanged. For example,
@code{tolower("MiXeD cAsE 123")} returns @code{"mixed case 123"}.
-@item toupper(@var{string})
+@item @code{toupper(@var{string})}
@cindexawkfunc{toupper}
@cindex convert string to upper case
Return a copy of @var{string}, with each lowercase character
@@ -15847,13 +16942,19 @@ Nonalphabetic characters are left unchanged. For example,
@cindex backslash (@code{\}), @code{gsub()}/@code{gensub()}/@code{sub()} functions and
@cindex @code{&} (ampersand), @code{gsub()}/@code{gensub()}/@code{sub()} functions and
@cindex ampersand (@code{&}), @code{gsub()}/@code{gensub()}/@code{sub()} functions and
+
+@quotation CAUTION
+This section has been known to cause headaches.
+You might want to skip it upon first reading.
+@end quotation
+
When using @code{sub()}, @code{gsub()}, or @code{gensub()}, and trying to get literal
backslashes and ampersands into the replacement text, you need to remember
that there are several levels of @dfn{escape processing} going on.
First, there is the @dfn{lexical} level, which is when @command{awk} reads
your program
-and builds an internal copy of it that can be executed.
+and builds an internal copy of it to execute.
Then there is the runtime level, which is when @command{awk} actually scans the
replacement string to determine what to generate.
@@ -15864,7 +16965,7 @@ escape sequences listed in @ref{Escape Sequences}.
Thus, for every @samp{\} that @command{awk} processes at the runtime
level, you must type two backslashes at the lexical level.
When a character that is not valid for an escape sequence follows the
-@samp{\}, Brian Kernighan's @command{awk} and @command{gawk} both simply remove the initial
+@samp{\}, BWK @command{awk} and @command{gawk} both simply remove the initial
@samp{\} and put the next character into the string. Thus, for
example, @code{"a\qb"} is treated as @code{"aqb"}.
@@ -15889,26 +16990,26 @@ through unchanged. This is illustrated in @ref{table-sub-escapes}.
_halign{_hfil#!_qquad_hfil#!_qquad#_hfil_cr
You type!@code{sub()} sees!@code{sub()} generates_cr
_hrulefill!_hrulefill!_hrulefill_cr
- @code{\&}! @code{&}!the matched text_cr
- @code{\\&}! @code{\&}!a literal @samp{&}_cr
- @code{\\\&}! @code{\&}!a literal @samp{&}_cr
- @code{\\\\&}! @code{\\&}!a literal @samp{\&}_cr
- @code{\\\\\&}! @code{\\&}!a literal @samp{\&}_cr
-@code{\\\\\\&}! @code{\\\&}!a literal @samp{\\&}_cr
- @code{\\q}! @code{\q}!a literal @samp{\q}_cr
+ @code{\&}! @code{&}!The matched text_cr
+ @code{\\&}! @code{\&}!A literal @samp{&}_cr
+ @code{\\\&}! @code{\&}!A literal @samp{&}_cr
+ @code{\\\\&}! @code{\\&}!A literal @samp{\&}_cr
+ @code{\\\\\&}! @code{\\&}!A literal @samp{\&}_cr
+@code{\\\\\\&}! @code{\\\&}!A literal @samp{\\&}_cr
+ @code{\\q}! @code{\q}!A literal @samp{\q}_cr
}
_bigskip}
@end tex
@ifdocbook
@multitable @columnfractions .20 .20 .60
@headitem You type @tab @code{sub()} sees @tab @code{sub()} generates
-@item @code{\&} @tab @code{&} @tab the matched text
-@item @code{\\&} @tab @code{\&} @tab a literal @samp{&}
-@item @code{\\\&} @tab @code{\&} @tab a literal @samp{&}
-@item @code{\\\\&} @tab @code{\\&} @tab a literal @samp{\&}
-@item @code{\\\\\&} @tab @code{\\&} @tab a literal @samp{\&}
-@item @code{\\\\\\&} @tab @code{\\\&} @tab a literal @samp{\\&}
-@item @code{\\q} @tab @code{\q} @tab a literal @samp{\q}
+@item @code{\&} @tab @code{&} @tab The matched text
+@item @code{\\&} @tab @code{\&} @tab A literal @samp{&}
+@item @code{\\\&} @tab @code{\&} @tab A literal @samp{&}
+@item @code{\\\\&} @tab @code{\\&} @tab A literal @samp{\&}
+@item @code{\\\\\&} @tab @code{\\&} @tab A literal @samp{\&}
+@item @code{\\\\\\&} @tab @code{\\\&} @tab A literal @samp{\\&}
+@item @code{\\q} @tab @code{\q} @tab A literal @samp{\q}
@end multitable
@end ifdocbook
@ifnottex
@@ -15916,13 +17017,13 @@ _bigskip}
@display
You type @code{sub()} sees @code{sub()} generates
-------- ---------- ---------------
- @code{\&} @code{&} the matched text
- @code{\\&} @code{\&} a literal @samp{&}
- @code{\\\&} @code{\&} a literal @samp{&}
- @code{\\\\&} @code{\\&} a literal @samp{\&}
- @code{\\\\\&} @code{\\&} a literal @samp{\&}
-@code{\\\\\\&} @code{\\\&} a literal @samp{\\&}
- @code{\\q} @code{\q} a literal @samp{\q}
+ @code{\&} @code{&} The matched text
+ @code{\\&} @code{\&} A literal @samp{&}
+ @code{\\\&} @code{\&} A literal @samp{&}
+ @code{\\\\&} @code{\\&} A literal @samp{\&}
+ @code{\\\\\&} @code{\\&} A literal @samp{\&}
+@code{\\\\\\&} @code{\\\&} A literal @samp{\\&}
+ @code{\\q} @code{\q} A literal @samp{\q}
@end display
@end ifnotdocbook
@end ifnottex
@@ -15938,86 +17039,19 @@ case of even numbers of backslashes entered at the lexical level.)
The problem with the historical approach is that there is no way to get
a literal @samp{\} followed by the matched text.
-@c @cindex @command{awk} language, POSIX version
-@cindex POSIX @command{awk}, functions and, @code{gsub()}/@code{sub()}
-The 1992 POSIX standard attempted to fix this problem. That standard
-says that @code{sub()} and @code{gsub()} look for either a @samp{\} or an @samp{&}
-after the @samp{\}. If either one follows a @samp{\}, that character is
-output literally. The interpretation of @samp{\} and @samp{&} then becomes
-as shown in @ref{table-sub-posix-92}.
-
-@float Table,table-sub-posix-92
-@caption{1992 POSIX Rules for @code{sub()} and @code{gsub()} Escape Sequence Processing}
-@c thanks to Karl Berry for formatting this table
-@tex
-\vbox{\bigskip
-% We need more characters for escape and tab ...
-\catcode`_ = 0
-\catcode`! = 4
-% ... since this table has lots of &'s and \'s, so we unspecialize them.
-\catcode`\& = \other \catcode`\\ = \other
-_halign{_hfil#!_qquad_hfil#!_qquad#_hfil_cr
- You type!@code{sub()} sees!@code{sub()} generates_cr
-_hrulefill!_hrulefill!_hrulefill_cr
- @code{&}! @code{&}!the matched text_cr
- @code{\\&}! @code{\&}!a literal @samp{&}_cr
-@code{\\\\&}! @code{\\&}!a literal @samp{\}, then the matched text_cr
-@code{\\\\\\&}! @code{\\\&}!a literal @samp{\&}_cr
-}
-_bigskip}
-@end tex
-@ifdocbook
-@multitable @columnfractions .20 .20 .60
-@headitem You type @tab @code{sub()} sees @tab @code{sub()} generates
-@item @code{&} @tab @code{&} @tab the matched text
-@item @code{\\&} @tab @code{\&} @tab a literal @samp{&}
-@item @code{\\\\&} @tab @code{\\&} @tab a literal @samp{\}, then the matched text
-@item @code{\\\\\\&} @tab @code{\\\&} @tab a literal @samp{\&}
-@end multitable
-@end ifdocbook
-@ifnottex
-@ifnotdocbook
-@display
- You type @code{sub()} sees @code{sub()} generates
- -------- ---------- ---------------
- @code{&} @code{&} the matched text
- @code{\\&} @code{\&} a literal @samp{&}
- @code{\\\\&} @code{\\&} a literal @samp{\}, then the matched text
-@code{\\\\\\&} @code{\\\&} a literal @samp{\&}
-@end display
-@end ifnotdocbook
-@end ifnottex
-@end float
-
-@noindent
-This appears to solve the problem.
-Unfortunately, the phrasing of the standard is unusual. It
-says, in effect, that @samp{\} turns off the special meaning of any
-following character, but for anything other than @samp{\} and @samp{&},
-such special meaning is undefined. This wording leads to two problems:
+Several editions of the POSIX standard attempted to fix this problem
+but weren't successful. The details are irrelevant at this point in time.
-@itemize @bullet
-@item
-Backslashes must now be doubled in the @var{replacement} string, breaking
-historical @command{awk} programs.
-
-@item
-To make sure that an @command{awk} program is portable, @emph{every} character
-in the @var{replacement} string must be preceded with a
-backslash.@footnote{This consequence was certainly unintended.}
-@c I can say that, 'cause I was involved in making this change
-@end itemize
-
-Because of the problems just listed,
-in 1996, the @command{gawk} maintainer submitted
+At one point, the @command{gawk} maintainer submitted
proposed text for a revised standard that
reverts to rules that correspond more closely to the original existing
practice. The proposed rules have special cases that make it possible
-to produce a @samp{\} preceding the matched text. This is shown in
+to produce a @samp{\} preceding the matched text.
+This is shown in
@ref{table-sub-proposed}.
@float Table,table-sub-proposed
-@caption{Proposed Rules For @code{sub()} And Backslash}
+@caption{GNU @command{awk} Rules For @code{sub()} And Backslash}
@tex
\vbox{\bigskip
% We need more characters for escape and tab ...
@@ -16028,10 +17062,10 @@ to produce a @samp{\} preceding the matched text. This is shown in
_halign{_hfil#!_qquad_hfil#!_qquad#_hfil_cr
You type!@code{sub()} sees!@code{sub()} generates_cr
_hrulefill!_hrulefill!_hrulefill_cr
-@code{\\\\\\&}! @code{\\\&}!a literal @samp{\&}_cr
-@code{\\\\&}! @code{\\&}!a literal @samp{\}, followed by the matched text_cr
- @code{\\&}! @code{\&}!a literal @samp{&}_cr
- @code{\\q}! @code{\q}!a literal @samp{\q}_cr
+@code{\\\\\\&}! @code{\\\&}!A literal @samp{\&}_cr
+@code{\\\\&}! @code{\\&}!A literal @samp{\}, followed by the matched text_cr
+ @code{\\&}! @code{\&}!A literal @samp{&}_cr
+ @code{\\q}! @code{\q}!A literal @samp{\q}_cr
@code{\\\\}! @code{\\}!@code{\\}_cr
}
_bigskip}
@@ -16039,10 +17073,10 @@ _bigskip}
@ifdocbook
@multitable @columnfractions .20 .20 .60
@headitem You type @tab @code{sub()} sees @tab @code{sub()} generates
-@item @code{\\\\\\&} @tab @code{\\\&} @tab a literal @samp{\&}
-@item @code{\\\\&} @tab @code{\\&} @tab a literal @samp{\}, followed by the matched text
-@item @code{\\&} @tab @code{\&} @tab a literal @samp{&}
-@item @code{\\q} @tab @code{\q} @tab a literal @samp{\q}
+@item @code{\\\\\\&} @tab @code{\\\&} @tab A literal @samp{\&}
+@item @code{\\\\&} @tab @code{\\&} @tab A literal @samp{\}, followed by the matched text
+@item @code{\\&} @tab @code{\&} @tab A literal @samp{&}
+@item @code{\\q} @tab @code{\q} @tab A literal @samp{\q}
@item @code{\\\\} @tab @code{\\} @tab @code{\\}
@end multitable
@end ifdocbook
@@ -16051,10 +17085,10 @@ _bigskip}
@display
You type @code{sub()} sees @code{sub()} generates
-------- ---------- ---------------
-@code{\\\\\\&} @code{\\\&} a literal @samp{\&}
- @code{\\\\&} @code{\\&} a literal @samp{\}, followed by the matched text
- @code{\\&} @code{\&} a literal @samp{&}
- @code{\\q} @code{\q} a literal @samp{\q}
+@code{\\\\\\&} @code{\\\&} A literal @samp{\&}
+ @code{\\\\&} @code{\\&} A literal @samp{\}, followed by the matched text
+ @code{\\&} @code{\&} A literal @samp{&}
+ @code{\\q} @code{\q} A literal @samp{\q}
@code{\\\\} @code{\\} @code{\\}
@end display
@end ifnotdocbook
@@ -16067,13 +17101,13 @@ there was only one. However, as in the historical case, any @samp{\} that
is not part of one of these three sequences is not special and appears
in the output literally.
-@command{gawk} 3.0 and 3.1 follow these proposed POSIX rules for @code{sub()} and
-@code{gsub()}.
-@c As much as we think it's a lousy idea. You win some, you lose some. Sigh.
-The POSIX standard took much longer to be revised than was expected in 1996.
-The 2001 standard does not follow the above rules. Instead, the rules
-there are somewhat simpler. The results are similar except for one case.
+@command{gawk} 3.0 and 3.1 follow these rules for @code{sub()} and
+@code{gsub()}. The POSIX standard took much longer to be revised than
+was expected. In addition, the @command{gawk} maintainer's proposal was
+lost during the standardization process. The final rules are
+somewhat simpler. The results are similar except for one case.
+@cindex POSIX @command{awk}, functions and, @code{gsub()}/@code{sub()}
The POSIX rules state that @samp{\&} in the replacement string produces
a literal @samp{&}, @samp{\\} produces a literal @samp{\}, and @samp{\} followed
by anything else is not special; the @samp{\} is placed straight into the output.
@@ -16091,10 +17125,10 @@ These rules are presented in @ref{table-posix-sub}.
_halign{_hfil#!_qquad_hfil#!_qquad#_hfil_cr
You type!@code{sub()} sees!@code{sub()} generates_cr
_hrulefill!_hrulefill!_hrulefill_cr
-@code{\\\\\\&}! @code{\\\&}!a literal @samp{\&}_cr
-@code{\\\\&}! @code{\\&}!a literal @samp{\}, followed by the matched text_cr
- @code{\\&}! @code{\&}!a literal @samp{&}_cr
- @code{\\q}! @code{\q}!a literal @samp{\q}_cr
+@code{\\\\\\&}! @code{\\\&}!A literal @samp{\&}_cr
+@code{\\\\&}! @code{\\&}!A literal @samp{\}, followed by the matched text_cr
+ @code{\\&}! @code{\&}!A literal @samp{&}_cr
+ @code{\\q}! @code{\q}!A literal @samp{\q}_cr
@code{\\\\}! @code{\\}!@code{\}_cr
}
_bigskip}
@@ -16102,10 +17136,10 @@ _bigskip}
@ifdocbook
@multitable @columnfractions .20 .20 .60
@headitem You type @tab @code{sub()} sees @tab @code{sub()} generates
-@item @code{\\\\\\&} @tab @code{\\\&} @tab a literal @samp{\&}
-@item @code{\\\\&} @tab @code{\\&} @tab a literal @samp{\}, followed by the matched text
-@item @code{\\&} @tab @code{\&} @tab a literal @samp{&}
-@item @code{\\q} @tab @code{\q} @tab a literal @samp{\q}
+@item @code{\\\\\\&} @tab @code{\\\&} @tab A literal @samp{\&}
+@item @code{\\\\&} @tab @code{\\&} @tab A literal @samp{\}, followed by the matched text
+@item @code{\\&} @tab @code{\&} @tab A literal @samp{&}
+@item @code{\\q} @tab @code{\q} @tab A literal @samp{\q}
@item @code{\\\\} @tab @code{\\} @tab @code{\}
@end multitable
@end ifdocbook
@@ -16114,10 +17148,10 @@ _bigskip}
@display
You type @code{sub()} sees @code{sub()} generates
-------- ---------- ---------------
-@code{\\\\\\&} @code{\\\&} a literal @samp{\&}
- @code{\\\\&} @code{\\&} a literal @samp{\}, followed by the matched text
- @code{\\&} @code{\&} a literal @samp{&}
- @code{\\q} @code{\q} a literal @samp{\q}
+@code{\\\\\\&} @code{\\\&} A literal @samp{\&}
+ @code{\\\\&} @code{\\&} A literal @samp{\}, followed by the matched text
+ @code{\\&} @code{\&} A literal @samp{&}
+ @code{\\q} @code{\q} A literal @samp{\q}
@code{\\\\} @code{\\} @code{\}
@end display
@end ifnotdocbook
@@ -16127,17 +17161,17 @@ _bigskip}
The only case where the difference is noticeable is the last one: @samp{\\\\}
is seen as @samp{\\} and produces @samp{\} instead of @samp{\\}.
-Starting with version 3.1.4, @command{gawk} followed the POSIX rules
+Starting with @value{PVERSION} 3.1.4, @command{gawk} followed the POSIX rules
when @option{--posix} is specified (@pxref{Options}). Otherwise,
-it continued to follow the 1996 proposed rules, since
+it continued to follow the proposed rules, since
that had been its behavior for many years.
-When version 4.0.0 was released, the @command{gawk} maintainer
+When @value{PVERSION} 4.0.0 was released, the @command{gawk} maintainer
made the POSIX rules the default, breaking well over a decade's worth
of backwards compatibility.@footnote{This was rather naive of him, despite
there being a note in this section indicating that the next major version
would move to the POSIX rules.} Needless to say, this was a bad idea,
-and as of version 4.0.1, @command{gawk} resumed its historical
+and as of @value{PVERSION} 4.0.1, @command{gawk} resumed its historical
behavior, and only follows the POSIX rules when @option{--posix} is given.
The rules for @code{gensub()} are considerably simpler. At the runtime
@@ -16160,24 +17194,24 @@ as shown in @ref{table-gensub-escapes}.
_halign{_hfil#!_qquad_hfil#!_qquad#_hfil_cr
You type!@code{gensub()} sees!@code{gensub()} generates_cr
_hrulefill!_hrulefill!_hrulefill_cr
- @code{&}! @code{&}!the matched text_cr
- @code{\\&}! @code{\&}!a literal @samp{&}_cr
- @code{\\\\}! @code{\\}!a literal @samp{\}_cr
- @code{\\\\&}! @code{\\&}!a literal @samp{\}, then the matched text_cr
-@code{\\\\\\&}! @code{\\\&}!a literal @samp{\&}_cr
- @code{\\q}! @code{\q}!a literal @samp{q}_cr
+ @code{&}! @code{&}!The matched text_cr
+ @code{\\&}! @code{\&}!A literal @samp{&}_cr
+ @code{\\\\}! @code{\\}!A literal @samp{\}_cr
+ @code{\\\\&}! @code{\\&}!A literal @samp{\}, then the matched text_cr
+@code{\\\\\\&}! @code{\\\&}!A literal @samp{\&}_cr
+ @code{\\q}! @code{\q}!A literal @samp{q}_cr
}
_bigskip}
@end tex
@ifdocbook
@multitable @columnfractions .20 .20 .60
@headitem You type @tab @code{gensub()} sees @tab @code{gensub()} generates
-@item @code{&} @tab @code{&} @tab the matched text
-@item @code{\\&} @tab @code{\&} @tab a literal @samp{&}
-@item @code{\\\\} @tab @code{\\} @tab a literal @samp{\}
-@item @code{\\\\&} @tab @code{\\&} @tab a literal @samp{\}, then the matched text
-@item @code{\\\\\\&} @tab @code{\\\&} @tab a literal @samp{\&}
-@item @code{\\q} @tab @code{\q} @tab a literal @samp{q}
+@item @code{&} @tab @code{&} @tab The matched text
+@item @code{\\&} @tab @code{\&} @tab A literal @samp{&}
+@item @code{\\\\} @tab @code{\\} @tab A literal @samp{\}
+@item @code{\\\\&} @tab @code{\\&} @tab A literal @samp{\}, then the matched text
+@item @code{\\\\\\&} @tab @code{\\\&} @tab A literal @samp{\&}
+@item @code{\\q} @tab @code{\q} @tab A literal @samp{q}
@end multitable
@end ifdocbook
@ifnottex
@@ -16185,12 +17219,12 @@ _bigskip}
@display
You type @code{gensub()} sees @code{gensub()} generates
-------- ------------- ------------------
- @code{&} @code{&} the matched text
- @code{\\&} @code{\&} a literal @samp{&}
- @code{\\\\} @code{\\} a literal @samp{\}
- @code{\\\\&} @code{\\&} a literal @samp{\}, then the matched text
-@code{\\\\\\&} @code{\\\&} a literal @samp{\&}
- @code{\\q} @code{\q} a literal @samp{q}
+ @code{&} @code{&} The matched text
+ @code{\\&} @code{\&} A literal @samp{&}
+ @code{\\\\} @code{\\} A literal @samp{\}
+ @code{\\\\&} @code{\\&} A literal @samp{\}, then the matched text
+@code{\\\\\\&} @code{\\\&} A literal @samp{\&}
+ @code{\\q} @code{\q} A literal @samp{q}
@end display
@end ifnotdocbook
@end ifnottex
@@ -16227,8 +17261,8 @@ Although this makes a certain amount of sense, it can be surprising.
The following functions relate to input/output (I/O).
Optional parameters are enclosed in square brackets ([ ]):
-@table @code
-@item close(@var{filename} @r{[}, @var{how}@r{]})
+@table @asis
+@item @code{close(}@var{filename} [@code{,} @var{how}]@code{)}
@cindexawkfunc{close}
@cindex files, closing
@cindex close file or coprocess
@@ -16247,7 +17281,10 @@ not matter.
@xref{Two-way I/O},
which discusses this feature in more detail and gives an example.
-@item fflush(@r{[}@var{filename}@r{]})
+Note that the second argument to @code{close()} is a @command{gawk}
+extension; it is not available in compatibility mode (@pxref{Options}).
+
+@item @code{fflush(}[@var{filename}]@code{)}
@cindexawkfunc{fflush}
@cindex flush buffered output
Flush any buffered output associated with @var{filename}, which is either a
@@ -16269,7 +17306,7 @@ buffers its output and the @code{fflush()} function forces
@cindex extensions, common@comma{} @code{fflush()} function
@cindex Brian Kernighan's @command{awk}
-@code{fflush()} was added to Brian Kernighan's version of @command{awk} in
+@code{fflush()} was added to BWK @command{awk} in
April of 1992. For two decades, it was not part of the POSIX standard.
As of December, 2012, it was accepted for inclusion into the POSIX
standard.
@@ -16281,7 +17318,7 @@ then @command{awk} flushes the buffers for @emph{all} open output files
and pipes.
@quotation NOTE
-Prior to version 4.0.2, @command{gawk}
+Prior to @value{PVERSION} 4.0.2, @command{gawk}
would flush only the standard output if there was no argument,
and flush all output files and pipes if the argument was the null
string. This was changed in order to be compatible with Brian
@@ -16297,7 +17334,7 @@ only the standard output.
@c @cindex warnings, automatic
@cindex troubleshooting, @code{fflush()} function
@code{fflush()} returns zero if the buffer is successfully flushed;
-otherwise, it returns non-zero (@command{gawk} returns @minus{}1).
+otherwise, it returns non-zero. (@command{gawk} returns @minus{}1.)
In the case where all buffers are flushed, the return value is zero
only if all buffers were flushed successfully. Otherwise, it is
@minus{}1, and @command{gawk} warns about the problem @var{filename}.
@@ -16307,7 +17344,7 @@ a file or pipe that was opened for reading (such as with @code{getline}),
or if @var{filename} is not an open file, pipe, or coprocess.
In such a case, @code{fflush()} returns @minus{}1, as well.
-@item system(@var{command})
+@item @code{system(@var{command})}
@cindexawkfunc{system}
@cindex invoke shell command
@cindex interacting with other programs
@@ -16473,8 +17510,13 @@ particular log record was written. Many programs log their timestamp
in the form returned by the @code{time()} system call, which is the
number of seconds since a particular epoch. On POSIX-compliant systems,
it is the number of seconds since
-1970-01-01 00:00:00 UTC, not counting leap seconds.@footnote{@xref{Glossary},
-especially the entries ``Epoch'' and ``UTC.''}
+1970-01-01 00:00:00 UTC, not counting leap
+@ifclear FOR_PRINT
+seconds.@footnote{@xref{Glossary}, especially the entries ``Epoch'' and ``UTC.''}
+@end ifclear
+@ifset FOR_PRINT
+seconds.
+@end ifset
All known POSIX-compliant systems support timestamps from 0 through
@iftex
@math{2^{31} - 1},
@@ -16504,8 +17546,9 @@ However, recent versions
of @command{mawk} (@pxref{Other Versions}) also support these functions.
Optional parameters are enclosed in square brackets ([ ]):
-@table @code
-@item mktime(@var{datespec})
+@c @asis for docbook
+@table @asis
+@item @code{mktime(@var{datespec})}
@cindexgawkfunc{mktime}
@cindex generate time values
Turn @var{datespec} into a timestamp in the same form
@@ -16535,7 +17578,7 @@ is out of range, @code{mktime()} returns @minus{}1.
@cindex @command{gawk}, @code{PROCINFO} array in
@cindex @code{PROCINFO} array
-@item strftime(@r{[}@var{format} @r{[}, @var{timestamp} @r{[}, @var{utc-flag}@r{]]]})
+@item @code{strftime(} [@var{format} [@code{,} @var{timestamp} [@code{,} @var{utc-flag}] ] ]@code{)}
@c STARTOFRANGE strf
@cindexgawkfunc{strftime}
@cindex format time string
@@ -16557,7 +17600,7 @@ output that is equivalent to that of the @command{date} utility.
You can assign a new value to @code{PROCINFO["strftime"]} to
change the default format; see below for the various format directives.
-@item systime()
+@item @code{systime()}
@cindexgawkfunc{systime}
@cindex timestamps
@cindex current system time
@@ -16632,10 +17675,10 @@ This is the ISO 8601 date format.
@item %g
The year modulo 100 of the ISO 8601 week number, as a decimal number (00--99).
-For example, January 1, 1993 is in week 53 of 1992. Thus, the year
-of its ISO 8601 week number is 1992, even though its year is 1993.
-Similarly, December 31, 1973 is in week 1 of 1974. Thus, the year
-of its ISO week number is 1974, even though its year is 1973.
+For example, January 1, 2012 is in week 53 of 2011. Thus, the year
+of its ISO 8601 week number is 2011, even though its year is 2012.
+Similarly, December 31, 2012 is in week 1 of 2013. Thus, the year
+of its ISO week number is 2013, even though its year is 2012.
@item %G
The full year of the ISO week number, as a decimal number.
@@ -16716,7 +17759,7 @@ The locale's ``appropriate'' time representation.
The year modulo 100 as a decimal number (00--99).
@item %Y
-The full year as a decimal number (e.g., 2011).
+The full year as a decimal number (e.g., 2015).
@c @cindex RFC 822
@c @cindex RFC 1036
@@ -16750,17 +17793,6 @@ uses the system's version of @code{strftime()} if it's there.
Typically, the conversion specifier either does not appear in the
returned string or appears literally.}
-@c @cindex locale, definition of
-Informally, a @dfn{locale} is the geographic place in which a program
-is meant to run. For example, a common way to abbreviate the date
-September 4, 2012 in the United States is ``9/4/12.''
-In many countries in Europe, however, it is abbreviated ``4.9.12.''
-Thus, the @samp{%x} specification in a @code{"US"} locale might produce
-@samp{9/4/12}, while in a @code{"EUROPE"} locale, it might produce
-@samp{4.9.12}. The ISO C standard defines a default @code{"C"}
-locale, which is an environment that is typical of what many C programmers
-are used to.
-
For systems that are not yet fully standards-compliant,
@command{gawk} supplies a copy of
@code{strftime()} from the GNU C Library.
@@ -16813,7 +17845,7 @@ the string. For example:
@example
$ date '+Today is %A, %B %d, %Y.'
-@print{} Today is Wednesday, March 30, 2011.
+@print{} Today is Monday, May 05, 2014.
@end example
Here is the @command{gawk} version of the @command{date} utility.
@@ -16833,7 +17865,7 @@ case $1 in
esac
gawk 'BEGIN @{
- format = "%a %b %e %H:%M:%S %Z %Y"
+ format = PROCINFO["strftime"]
exitval = 0
if (ARGC > 2)
@@ -16921,9 +17953,7 @@ Operands | 0 | 1 | 0 | 1 | 0 | 1
@end tex
@docbook
-<!-- FIXME: Fix ID and add xref in text. -->
-<table id="table-bitwise-ops">
-<title>Bitwise Operations</title>
+<informaltable>
<tgroup cols="7" colsep="1">
<colspec colname="c1"/>
@@ -16983,7 +18013,7 @@ Operands | 0 | 1 | 0 | 1 | 0 | 1
</tbody>
</tgroup>
-</table>
+</informaltable>
@end docbook
@end float
@@ -17019,32 +18049,32 @@ bitwise operations just described. They are:
@table @code
@cindexgawkfunc{and}
@cindex bitwise AND
-@item and(@var{v1}, @var{v2} @r{[}, @r{@dots{}]})
+@item @code{and(@var{v1}, @var{v2}} [@code{,} @dots{}]@code{)}
Return the bitwise AND of the arguments. There must be at least two.
@cindexgawkfunc{compl}
@cindex bitwise complement
-@item compl(@var{val})
+@item @code{compl(@var{val})}
Return the bitwise complement of @var{val}.
@cindexgawkfunc{lshift}
@cindex left shift
-@item lshift(@var{val}, @var{count})
+@item @code{lshift(@var{val}, @var{count})}
Return the value of @var{val}, shifted left by @var{count} bits.
@cindexgawkfunc{or}
@cindex bitwise OR
-@item or(@var{v1}, @var{v2} @r{[}, @r{@dots{}]})
+@item @code{or(@var{v1}, @var{v2}} [@code{,} @dots{}]@code{)}
Return the bitwise OR of the arguments. There must be at least two.
@cindexgawkfunc{rshift}
@cindex right shift
-@item rshift(@var{val}, @var{count})
+@item @code{rshift(@var{val}, @var{count})}
Return the value of @var{val}, shifted right by @var{count} bits.
@cindexgawkfunc{xor}
@cindex bitwise XOR
-@item xor(@var{v1}, @var{v2} @r{[}, @r{@dots{}]})
+@item @code{xor(@var{v1}, @var{v2}} [@code{,} @dots{}]@code{)}
Return the bitwise XOR of the arguments. There must be at least two.
@end table
@@ -17167,7 +18197,7 @@ results of the @code{compl()}, @code{lshift()}, and @code{rshift()} functions.
@command{gawk} provides a single function that lets you distinguish
an array from a scalar variable. This is necessary for writing code
-that traverses every element of a true multidimensional array
+that traverses every element of an array of arrays.
(@pxref{Arrays of Arrays}).
@table @code
@@ -17205,10 +18235,10 @@ The descriptions here are purposely brief.
for the full story.
Optional parameters are enclosed in square brackets ([ ]):
-@table @code
+@table @asis
@cindexgawkfunc{bindtextdomain}
@cindex set directory of message catalogs
-@item bindtextdomain(@var{directory} @r{[}, @var{domain}@r{]})
+@item @code{bindtextdomain(@var{directory}} [@code{,} @var{domain}]@code{)}
Set the directory in which
@command{gawk} will look for message translation files, in case they
will not or cannot be placed in the ``standard'' locations
@@ -17222,14 +18252,14 @@ given @var{domain}.
@cindexgawkfunc{dcgettext}
@cindex translate string
-@item dcgettext(@var{string} @r{[}, @var{domain} @r{[}, @var{category}@r{]]})
+@item @code{dcgettext(@var{string}} [@code{,} @var{domain} [@code{,} @var{category}] ]@code{)}
Return the translation of @var{string} in
text domain @var{domain} for locale category @var{category}.
The default value for @var{domain} is the current value of @code{TEXTDOMAIN}.
The default value for @var{category} is @code{"LC_MESSAGES"}.
@cindexgawkfunc{dcngettext}
-@item dcngettext(@var{string1}, @var{string2}, @var{number} @r{[}, @var{domain} @r{[}, @var{category}@r{]]})
+@item @code{dcngettext(@var{string1}, @var{string2}, @var{number}} [@code{,} @var{domain} [@code{,} @var{category}] ]@code{)}
Return the plural form used for @var{number} of the
translation of @var{string1} and @var{string2} in text domain
@var{domain} for locale category @var{category}. @var{string1} is the
@@ -17265,6 +18295,12 @@ them, i.e., to tell @command{awk} what they should do.
@node Definition Syntax
@subsection Function Definition Syntax
+@quotation
+It's entirely fair to say that the @command{awk} syntax for local
+variable definitions is appallingly awful.
+@author Brian Kernighan
+@end quotation
+
@c STARTOFRANGE fdef
@cindex functions, defining
Definitions of functions can appear anywhere between the rules of an
@@ -17277,12 +18313,12 @@ entire program before starting to execute any of it.
The definition of a function named @var{name} looks like this:
-@example
-function @var{name}(@r{[}@var{parameter-list}@r{]})
-@{
+@display
+@code{function} @var{name}@code{(}[@var{parameter-list}]@code{)}
+@code{@{}
@var{body-of-function}
-@}
-@end example
+@code{@}}
+@end display
@cindex names, functions
@cindex functions, names of
@@ -17291,20 +18327,28 @@ function @var{name}(@r{[}@var{parameter-list}@r{]})
Here, @var{name} is the name of the function to define. A valid function
name is like a valid variable name: a sequence of letters, digits, and
underscores that doesn't start with a digit.
+Here too, only the 52 upper- and lowercase English letters may
+be used in a function name.
Within a single @command{awk} program, any particular name can only be
used as a variable, array, or function.
@var{parameter-list} is an optional list of the function's arguments and local
variable names, separated by commas. When the function is called,
the argument names are used to hold the argument values given in
-the call. The local variables are initialized to the empty string.
+the call.
+
A function cannot have two parameters with the same name, nor may it
have a parameter with the same name as the function itself.
+In addition, according to the POSIX standard, function parameters
+cannot have the same name as one of the special built-in variables
+(@pxref{Built-in Variables}). Not all versions of @command{awk} enforce
+this restriction.
-In addition, according to the POSIX standard, function parameters cannot have the same
-name as one of the special built-in variables
-(@pxref{Built-in Variables}. Not all versions of @command{awk}
-enforce this restriction.)
+Local variables act like the empty string if referenced where a string
+value is required, and like zero if referenced where a numeric value
+is required. This is the same as regular variables that have never been
+assigned a value. (There is more to understand about local variables;
+@pxref{Dynamic Typing}.)
The @var{body-of-function} consists of @command{awk} statements. It is the
most important part of the definition, because it says what the function
@@ -17428,7 +18472,8 @@ this program, using our function to format the results, prints:
21.2
@end example
-This function deletes all the elements in an array:
+This function deletes all the elements in an array (recall that the
+extra whitespace signifies the start of the local variable list):
@example
function delarray(a, i)
@@ -17451,17 +18496,18 @@ addition to the POSIX standard.)
The following is an example of a recursive function. It takes a string
as an input parameter and returns the string in backwards order.
Recursive functions must always have a test that stops the recursion.
-In this case, the recursion terminates when the starting position
-is zero, i.e., when there are no more characters left in the string.
+In this case, the recursion terminates when the input string is
+already empty.
+@c 8/2014: Thanks to Mike Brennan for the improved formulation
@cindex @code{rev()} user-defined function
@example
-function rev(str, start)
+function rev(str)
@{
- if (start == 0)
+ if (str == "")
return ""
- return (substr(str, start, 1) rev(str, start - 1))
+ return (rev(substr(str, 2)) substr(str, 1, 1))
@}
@end example
@@ -17470,7 +18516,7 @@ this way:
@example
$ @kbd{echo "Don't Panic!" |}
-> @kbd{gawk --source '@{ print rev($0, length($0)) @}' -f rev.awk}
+> @kbd{gawk -e '@{ print rev($0) @}' -f rev.awk}
@print{} !cinaP t'noD
@end example
@@ -17489,7 +18535,7 @@ to create an @command{awk} version of @code{ctime()}:
function ctime(ts, format)
@{
- format = "%a %b %e %H:%M:%S %Z %Y"
+ format = PROCINFO["strftime"]
if (ts == 0)
ts = systime() # use current time as default
return strftime(format, ts)
@@ -17541,7 +18587,8 @@ an error.
@cindex local variables, in a function
@cindex variables, local to a function
-There is no way to make a variable local to a @code{@{ @dots{} @}} block in
+Unlike many languages,
+there is no way to make a variable local to a @code{@{} @dots{} @code{@}} block in
@command{awk}, but you can make a variable local to a function. It is
good practice to do so whenever a variable is needed only in that
function.
@@ -17754,7 +18801,7 @@ BEGIN @{
@noindent
prints @samp{a[1] = 1, a[2] = two, a[3] = 3}, because
-@code{changeit} stores @code{"two"} in the second element of @code{a}.
+@code{changeit()} stores @code{"two"} in the second element of @code{a}.
@end quotation
@cindex undefined functions
@@ -17803,14 +18850,14 @@ This statement returns control to the calling part of the @command{awk} program.
can also be used to return a value for use in the rest of the @command{awk}
program. It looks like this:
-@example
-return @r{[}@var{expression}@r{]}
-@end example
+@display
+@code{return} [@var{expression}]
+@end display
The @var{expression} part is optional.
Due most likely to an oversight, POSIX does not define what the return
value is if you omit the @var{expression}. Technically speaking, this
-make the returned value undefined, and therefore, unpredictable.
+makes the returned value undefined, and therefore, unpredictable.
In practice, though, all versions of @command{awk} simply return the
null string, which acts like zero if used in a numeric context.
@@ -17913,9 +18960,9 @@ BEGIN @{
@end example
In this example, the first call to @code{foo()} generates
-a fatal error, so @command{gawk} will not report the second
-error. If you comment out that call, though, then @command{gawk}
-will report the second error.
+a fatal error, so @command{awk} will not report the second
+error. If you comment out that call, though, then @command{awk}
+does report the second error.
Usually, such things aren't a big issue, but it's worth
being aware of them.
@@ -17930,7 +18977,7 @@ being aware of them.
@cindex pointers to functions
@cindex differences in @command{awk} and @command{gawk}, indirect function calls
-This section describes a @command{gawk}-specific extension.
+This section describes an advanced, @command{gawk}-specific extension.
Often, you may wish to defer the choice of function to call until runtime.
For example, you may have different kinds of records, each of which
@@ -17976,8 +19023,11 @@ To process the data, you might write initially:
@noindent
This style of programming works, but can be awkward. With @dfn{indirect}
function calls, you tell @command{gawk} to use the @emph{value} of a
-variable as the name of the function to call.
+variable as the @emph{name} of the function to call.
+@cindex @code{@@}-notation for indirect function calls
+@cindex indirect function calls, @code{@@}-notation
+@cindex function calls, indirect, @code{@@}-notation for
The syntax is similar to that of a regular function call: an identifier
immediately followed by a left parenthesis, any arguments, and then
a closing right parenthesis, with the addition of a leading @samp{@@}
@@ -18035,7 +19085,6 @@ Otherwise they perform the expected computations and are not unusual.
@example
@c file eg/prog/indirectcall.awk
# For each record, print the class name and the requested statistics
-
@{
class_name = $1
gsub(/_/, " ", class_name) # Replace _ with spaces
@@ -18229,7 +19278,7 @@ function rsort(first, last)
@c endfile
@end example
-Here is an extended version of the data file:
+Here is an extended version of the @value{DF}:
@example
@c file eg/data/class_data2
@@ -18264,10 +19313,12 @@ $ @kbd{gawk -f quicksort.awk -f indirectcall.awk class_data2}
Remember that you must supply a leading @samp{@@} in front of an indirect function call.
-Unfortunately, indirect function calls cannot be used with the built-in functions. However,
-you can generally write ``wrapper'' functions which call the built-in ones, and those can
-be called indirectly. (Other than, perhaps, the mathematical functions, there is not a lot
-of reason to try to call the built-in functions indirectly.)
+Starting with @value{PVERSION} 4.1.2 of @command{gawk}, indirect function
+calls may also be used with built-in functions and with extension functions
+(@pxref{Dynamic Extensions}). The only thing you cannot do is pass a regular
+expression constant to a built-in function through an indirect function
+call.@footnote{This may change in a future version; recheck the documentation that
+comes with your version of @command{gawk} to see if it has.}
@command{gawk} does its best to make indirect function calls efficient.
For example, in the following case:
@@ -18278,23 +19329,84 @@ for (i = 1; i <= n; i++)
@end example
@noindent
-@code{gawk} will look up the actual function to call only once.
+@code{gawk} looks up the actual function to call only once.
+
+@node Functions Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+@command{awk} provides built-in functions and lets you define your own
+functions.
+
+@item
+POSIX @command{awk} provides three kinds of built-in functions: numeric,
+string, and I/O. @command{gawk} provides functions that work with values
+representing time, do bit manipulation, sort arrays, and internationalize
+and localize programs. @command{gawk} also provides several extensions to
+some of standard functions, typically in the form of additional arguments.
+
+@item
+Functions accept zero or more arguments and return a value. The
+expressions that provide the argument values are completely evaluated
+before the function is called. Order of evaluation is not defined.
+The return value can be ignored.
+
+@item
+The handling of backslash in @code{sub()} and @code{gsub()} is not simple.
+It is more straightforward in @command{gawk}'s @code{gensub()} function,
+but that function still requires care in its use.
+
+@item
+User-defined functions provide important capabilities but come with
+some syntactic inelegancies. In a function call, there cannot be any
+space between the function name and the opening left parenthesis of the
+argument list. Also, there is no provision for local variables, so the
+convention is to add extra parameters, and to separate them visually
+from the real parameters by extra whitespace.
+
+@item
+User-defined functions may call other user-defined (and built-in)
+functions and may call themselves recursively. Function parameters
+``hide'' any global variables of the same names.
+You cannot use the name of a reserved variable (such as @code{ARGC})
+as the name of a parameter in user-defined functions.
+
+@item
+Scalar values are passed to user-defined functions by value. Array
+parameters are passed by reference; any changes made by the function to
+array parameters are thus visible after the function has returned.
+
+@item
+Use the @code{return} statement to return from a user-defined function.
+An optional expression becomes the function's return value. Only scalar
+values may be returned by a function.
+
+@item
+If a variable that has never been used is passed to a user-defined
+function, how that function treats the variable can set its nature:
+either scalar or array.
+
+@item
+@command{gawk} provides indirect function calls using a special syntax.
+By setting a variable to the name of a function, you can
+determine at runtime what function will be called at that point in the
+program. This is equivalent to function pointers in C and C++.
+
+@end itemize
@c ENDOFRANGE funcud
-@iftex
-@part Part II:@* Problem Solving With @command{awk}
-@end iftex
+@ifnotinfo
+@part @value{PART2}Problem Solving With @command{awk}
+@end ifnotinfo
-@ignore
@ifdocbook
-@part Part II:@* Problem Solving With @command{awk}
-
Part II shows how to use @command{awk} and @command{gawk} for problem solving.
There is lots of code here for you to read and learn from.
It contains the following chapters:
-@itemize @bullet
+@itemize @value{BULLET}
@item
@ref{Library Functions}.
@@ -18302,7 +19414,6 @@ It contains the following chapters:
@ref{Sample Programs}.
@end itemize
@end ifdocbook
-@end ignore
@node Library Functions
@chapter A Library of @command{awk} Functions
@@ -18313,7 +19424,7 @@ It contains the following chapters:
@c STARTOFRANGE fudlib
@cindex functions, user-defined, library of
-@ref{User-defined}, describes how to write
+@DBREF{User-defined} describes how to write
your own @command{awk} functions. Writing functions is important, because
it allows you to encapsulate algorithms and program tasks in a single
place. It simplifies programming, making program development more
@@ -18337,25 +19448,26 @@ of good programs leads to better writing.
In fact, they felt this idea was so important that they placed this
statement on the cover of their book. Because we believe strongly
that their statement is correct, this @value{CHAPTER} and @ref{Sample
-Programs}, provide a good-sized body of code for you to read, and we hope,
+Programs}, provide a good-sized body of code for you to read and, we hope,
to learn from.
-@c 2e: USE TEXINFO-2 FUNCTION DEFINITION STUFF!!!!!!!!!!!!!
This @value{CHAPTER} presents a library of useful @command{awk} functions.
Many of the sample programs presented later in this @value{DOCUMENT}
use these functions.
The functions are presented here in a progression from simple to complex.
@cindex Texinfo
-@ref{Extract Program},
+@DBREF{Extract Program}
presents a program that you can use to extract the source code for
these example library functions and programs from the Texinfo source
for this @value{DOCUMENT}.
(This has already been done as part of the @command{gawk} distribution.)
+@ifclear FOR_PRINT
If you have written one or more useful, general-purpose @command{awk} functions
and would like to contribute them to the @command{awk} user community, see
@ref{How To Contribute}, for more information.
+@end ifclear
@cindex portability, example programs
The programs in this @value{CHAPTER} and in
@@ -18364,7 +19476,7 @@ freely use features that are @command{gawk}-specific.
Rewriting these programs for different implementations of @command{awk}
is pretty straightforward.
-@itemize @bullet
+@itemize @value{BULLET}
@item
Diagnostic error messages are sent to @file{/dev/stderr}.
Use @samp{| "cat 1>&2"} instead of @samp{> "/dev/stderr"} if your system
@@ -18408,6 +19520,8 @@ comparisons use only lowercase letters.
* Passwd Functions:: Functions for getting user information.
* Group Functions:: Functions for getting group information.
* Walking Arrays:: A function to walk arrays of arrays.
+* Library Functions Summary:: Summary of library functions.
+* Library Exercises:: Exercises.
@end menu
@node Library Names
@@ -18494,7 +19608,7 @@ A different convention, common in the Tcl community, is to use a single
associative array to hold the values needed by the library function(s), or
``package.'' This significantly decreases the number of actual global names
in use. For example, the functions described in
-@ref{Passwd Functions},
+@DBREF{Passwd Functions}
might have used array elements @code{@w{PW_data["inited"]}}, @code{@w{PW_data["total"]}},
@code{@w{PW_data["count"]}}, and @code{@w{PW_data["awklib"]}}, instead of
@code{@w{_pw_inited}}, @code{@w{_pw_awklib}}, @code{@w{_pw_total}},
@@ -18542,11 +19656,12 @@ provides an implementation for other versions of @command{awk}:
#
# Arnold Robbins, arnold@@skeeve.com, Public Domain
# February, 2004
+# Revised June, 2014
@c endfile
@end ignore
@c file eg/lib/strtonum.awk
-function mystrtonum(str, ret, chars, n, i, k, c)
+function mystrtonum(str, ret, n, i, k, c)
@{
if (str ~ /^0[0-7]*$/) @{
# octal
@@ -18554,12 +19669,13 @@ function mystrtonum(str, ret, chars, n, i, k, c)
ret = 0
for (i = 1; i <= n; i++) @{
c = substr(str, i, 1)
- if ((k = index("01234567", c)) > 0)
- k-- # adjust for 1-basing in awk
+ # index() returns 0 if c not in string,
+ # includes c == "0"
+ k = index("1234567", c)
ret = ret * 8 + k
@}
- @} else if (str ~ /^0[xX][[:xdigit:]]+/) @{
+ @} else if (str ~ /^0[xX][[:xdigit:]]+$/) @{
# hexadecimal
str = substr(str, 3) # lop off leading 0x
n = length(str)
@@ -18567,10 +19683,9 @@ function mystrtonum(str, ret, chars, n, i, k, c)
for (i = 1; i <= n; i++) @{
c = substr(str, i, 1)
c = tolower(c)
- if ((k = index("0123456789", c)) > 0)
- k-- # adjust for 1-basing in awk
- else if ((k = index("abcdef", c)) > 0)
- k += 9
+ # index() returns 0 if c not in string,
+ # includes c == "0"
+ k = index("123456789abcdef", c)
ret = ret * 16 + k
@}
@@ -18738,7 +19853,7 @@ An @code{END} rule is automatically added
to the program calling @code{assert()}. Normally, if a program consists
of just a @code{BEGIN} rule, the input files and/or standard input are
not read. However, now that the program has an @code{END} rule, @command{awk}
-attempts to read the input data files or standard input
+attempts to read the input @value{DF}s or standard input
(@pxref{Using BEGIN/END}),
most likely causing the program to hang as it waits for input.
@@ -18971,8 +20086,7 @@ function chr(c)
@c endfile
#### test code ####
-# BEGIN \
-# @{
+# BEGIN @{
# for (;;) @{
# printf("enter a character: ")
# if (getline var <= 0)
@@ -19057,7 +20171,7 @@ more difficult than they really need to be.}
@cindex timestamps, formatted
@cindex time, managing
The @code{systime()} and @code{strftime()} functions described in
-@ref{Time Functions},
+@DBREF{Time Functions}
provide the minimum functionality necessary for dealing with the time of day
in human readable form. While @code{strftime()} is extensive, the control
formats are not necessarily easy to remember or intuitively obvious when
@@ -19109,7 +20223,7 @@ function getlocaltime(time, ret, now, i)
now = systime()
# return date(1)-style output
- ret = strftime("%a %b %e %H:%M:%S %Z %Y", now)
+ ret = strftime(PROCINFO["strftime"], now)
# clear out target array
delete time
@@ -19143,7 +20257,7 @@ function getlocaltime(time, ret, now, i)
The string indices are easier to use and read than the various formats
required by @code{strftime()}. The @code{alarm} program presented in
-@ref{Alarm Program},
+@DBREF{Alarm Program}
uses this function.
A more general design for the @code{getlocaltime()} function would have
allowed the user to supply an optional timestamp value to use instead
@@ -19225,7 +20339,7 @@ This tests the result to see if it is empty or not. An equivalent
test would be @samp{contents == ""}.
@node Data File Management
-@section Data File Management
+@section @value{DDF} Management
@c STARTOFRANGE dataf
@cindex files, managing
@@ -19234,7 +20348,7 @@ test would be @samp{contents == ""}.
@c STARTOFRANGE flibdataf
@cindex functions, library, managing data files
This @value{SECTION} presents functions that are useful for managing
-command-line data files.
+command-line @value{DF}s.
@menu
* Filetrans Function:: A function for handling data file transitions.
@@ -19245,7 +20359,7 @@ command-line data files.
@end menu
@node Filetrans Function
-@subsection Noting Data File Boundaries
+@subsection Noting @value{DDF} Boundaries
@cindex files, managing, data file boundaries
@cindex files, initialization and cleanup
@@ -19253,8 +20367,8 @@ The @code{BEGIN} and @code{END} rules are each executed exactly once at
the beginning and end of your @command{awk} program, respectively
(@pxref{BEGIN/END}).
We (the @command{gawk} authors) once had a user who mistakenly thought that the
-@code{BEGIN} rule is executed at the beginning of each data file and the
-@code{END} rule is executed at the end of each data file.
+@code{BEGIN} rule is executed at the beginning of each @value{DF} and the
+@code{END} rule is executed at the end of each @value{DF}.
When informed
that this was not the case, the user requested that we add new special
@@ -19265,7 +20379,7 @@ Adding these special patterns to @command{gawk} wasn't necessary;
the job can be done cleanly in @command{awk} itself, as illustrated
by the following library program.
It arranges to call two user-supplied functions, @code{beginfile()} and
-@code{endfile()}, at the beginning and end of each data file.
+@code{endfile()}, at the beginning and end of each @value{DF}.
Besides solving the problem in only nine(!) lines of code, it does so
@emph{portably}; this works with any implementation of @command{awk}:
@@ -19296,17 +20410,17 @@ This file must be loaded before the user's ``main'' program, so that the
rule it supplies is executed first.
This rule relies on @command{awk}'s @code{FILENAME} variable that
-automatically changes for each new data file. The current file name is
+automatically changes for each new @value{DF}. The current @value{FN} is
saved in a private variable, @code{_oldfilename}. If @code{FILENAME} does
-not equal @code{_oldfilename}, then a new data file is being processed and
+not equal @code{_oldfilename}, then a new @value{DF} is being processed and
it is necessary to call @code{endfile()} for the old file. Because
@code{endfile()} should only be called if a file has been processed, the
program first checks to make sure that @code{_oldfilename} is not the null
-string. The program then assigns the current file name to
+string. The program then assigns the current @value{FN} to
@code{_oldfilename} and calls @code{beginfile()} for the file.
Because, like all @command{awk} variables, @code{_oldfilename} is
initialized to the null string, this rule executes correctly even for the
-first data file.
+first @value{DF}.
The program also supplies an @code{END} rule to do the final processing for
the last file. Because this @code{END} rule comes before any @code{END} rules
@@ -19315,7 +20429,7 @@ again the value of multiple @code{BEGIN} and @code{END} rules should be clear.
@cindex @code{beginfile()} user-defined function
@cindex @code{endfile()} user-defined function
-If the same data file occurs twice in a row on the command line, then
+If the same @value{DF} occurs twice in a row on the command line, then
@code{endfile()} and @code{beginfile()} are not executed at the end of the
first pass and at the beginning of the second pass.
The following version solves the problem:
@@ -19346,7 +20460,7 @@ END @{ endfile(_filename_) @}
@c endfile
@end example
-@ref{Wc Program},
+@DBREF{Wc Program}
shows how this library function can be used and
how it simplifies writing the main program.
@@ -19427,19 +20541,23 @@ to either update @code{ARGIND} on your own
or modify this code as appropriate.
The @code{rewind()} function also relies on the @code{nextfile} keyword
-(@pxref{Nextfile Statement}).
+(@pxref{Nextfile Statement}). Because of this, you should not call it
+from an @code{ENDFILE} rule. (This isn't necessary anyway, since as soon
+as an @code{ENDFILE} rule finishes @command{gawk} goes to the next file!)
@node File Checking
-@subsection Checking for Readable Data Files
+@subsection Checking for Readable @value{DDF}s
@cindex troubleshooting, readable data files
@cindex readable data files@comma{} checking
@cindex files, skipping
-Normally, if you give @command{awk} a data file that isn't readable,
-it stops with a fatal error. There are times when you
-might want to just ignore such files and keep going. You can
-do this by prepending the following program to your @command{awk}
-program:
+Normally, if you give @command{awk} a @value{DF} that isn't readable,
+it stops with a fatal error. There are times when you might want to
+just ignore such files and keep going.@footnote{The @code{BEGINFILE}
+special pattern (@pxref{BEGINFILE/ENDFILE}) provides an alternative
+mechanism for dealing with files that can't be opened. However, the
+code here provides a portable solution.} You can do this by prepending
+the following program to your @command{awk} program:
@cindex @code{readable.awk} program
@example
@@ -19477,22 +20595,22 @@ skips the file (since it's no longer in the list).
See also @ref{ARGC and ARGV}.
@node Empty Files
-@subsection Checking For Zero-length Files
+@subsection Checking for Zero-length Files
All known @command{awk} implementations silently skip over zero-length files.
This is a by-product of @command{awk}'s implicit
read-a-record-and-match-against-the-rules loop: when @command{awk}
tries to read a record from an empty file, it immediately receives an
end of file indication, closes the file, and proceeds on to the next
-command-line data file, @emph{without} executing any user-level
+command-line @value{DF}, @emph{without} executing any user-level
@command{awk} program code.
Using @command{gawk}'s @code{ARGIND} variable
(@pxref{Built-in Variables}), it is possible to detect when an empty
-data file has been skipped. Similar to the library file presented
+@value{DF} has been skipped. Similar to the library file presented
in @ref{Filetrans Function}, the following library file calls a function named
@code{zerofile()} that the user must provide. The arguments passed are
-the file name and the position in @code{ARGV} where it was found:
+the @value{FN} and the position in @code{ARGV} where it was found:
@cindex @code{zerofile.awk} program
@example
@@ -19539,56 +20657,16 @@ the end of the command-line arguments. Note that the test in the
condition of the @code{for} loop uses the @samp{<=} operator,
not @samp{<}.
-As an exercise, you might consider whether this same problem can
-be solved without relying on @command{gawk}'s @code{ARGIND} variable.
-
-As a second exercise, revise this code to handle the case where
-an intervening value in @code{ARGV} is a variable assignment.
-
-@ignore
-# zerofile2.awk --- same thing, portably
-
-BEGIN @{
- ARGIND = Argind = 0
- for (i = 1; i < ARGC; i++)
- Fnames[ARGV[i]]++
-
-@}
-FNR == 1 @{
- while (ARGV[ARGIND] != FILENAME)
- ARGIND++
- Seen[FILENAME]++
- if (Seen[FILENAME] == Fnames[FILENAME])
- do
- ARGIND++
- while (ARGV[ARGIND] != FILENAME)
-@}
-ARGIND > Argind + 1 @{
- for (Argind++; Argind < ARGIND; Argind++)
- zerofile(ARGV[Argind], Argind)
-@}
-ARGIND != Argind @{
- Argind = ARGIND
-@}
-END @{
- if (ARGIND < ARGC - 1)
- ARGIND = ARGC - 1
- if (ARGIND > Argind)
- for (Argind++; Argind <= ARGIND; Argind++)
- zerofile(ARGV[Argind], Argind)
-@}
-@end ignore
-
@node Ignoring Assigns
-@subsection Treating Assignments as File Names
+@subsection Treating Assignments as @value{FFN}s
@cindex assignments as filenames
@cindex filenames, assignments as
Occasionally, you might not want @command{awk} to process command-line
variable assignments
(@pxref{Assignment Options}).
-In particular, if you have a file name that contains an @samp{=} character,
-@command{awk} treats the file name as an assignment, and does not process it.
+In particular, if you have a @value{FN} that contains an @samp{=} character,
+@command{awk} treats the @value{FN} as an assignment, and does not process it.
Some users have suggested an additional command-line option for @command{gawk}
to disable command-line assignments. However, some simple programming with
@@ -19632,7 +20710,7 @@ awk -v No_command_assign=1 -f noassign.awk -f yourprog.awk *
The function works by looping through the arguments.
It prepends @samp{./} to
any argument that matches the form
-of a variable assignment, turning that argument into a file name.
+of a variable assignment, turning that argument into a @value{FN}.
The use of @code{No_command_assign} allows you to disable command-line
assignments at invocation time, by giving the variable a true value.
@@ -19716,7 +20794,6 @@ application might want to print its own error message.)
@item optopt
The letter representing the command-line option.
-@c While not usually documented, most versions supply this variable.
@end table
The following C fragment shows how @code{getopt()} might process command-line
@@ -19767,7 +20844,6 @@ necessary for accessing individual characters
function was written before @command{gawk} acquired the ability to
split strings into single characters using @code{""} as the separator.
We have left it alone, since using @code{substr()} is more portable.}
-@c FIXME: could use split(str, a, "") to do it more easily.
The discussion that follows walks through the code a bit at a time:
@@ -19855,8 +20931,7 @@ it is not an option, and it ends option processing. Continuing on:
i = index(options, thisopt)
if (i == 0) @{
if (Opterr)
- printf("%c -- invalid option\n",
- thisopt) > "/dev/stderr"
+ printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
if (_opti >= length(argv[Optind])) @{
Optind++
_opti = 0
@@ -19950,7 +21025,7 @@ BEGIN @{
# test program
if (_getopt_test) @{
while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1)
- printf("c = <%c>, optarg = <%s>\n",
+ printf("c = <%c>, Optarg = <%s>\n",
_go_c, Optarg)
printf("non-option arguments:\n")
for (; Optind < ARGC; Optind++)
@@ -19966,32 +21041,31 @@ result of two sample runs of the test program:
@example
$ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a -cbARG bax -x}
-@print{} c = <a>, optarg = <>
-@print{} c = <c>, optarg = <>
-@print{} c = <b>, optarg = <ARG>
+@print{} c = <a>, Optarg = <>
+@print{} c = <c>, Optarg = <>
+@print{} c = <b>, Optarg = <ARG>
@print{} non-option arguments:
@print{} ARGV[3] = <bax>
@print{} ARGV[4] = <-x>
$ @kbd{awk -f getopt.awk -v _getopt_test=1 -- -a -x -- xyz abc}
-@print{} c = <a>, optarg = <>
+@print{} c = <a>, Optarg = <>
@error{} x -- invalid option
-@print{} c = <?>, optarg = <>
+@print{} c = <?>, Optarg = <>
@print{} non-option arguments:
@print{} ARGV[4] = <xyz>
@print{} ARGV[5] = <abc>
@end example
-In both runs,
-the first @option{--} terminates the arguments to @command{awk}, so that it does
-not try to interpret the @option{-a}, etc., as its own options.
+In both runs, the first @option{--} terminates the arguments to
+@command{awk}, so that it does not try to interpret the @option{-a},
+etc., as its own options.
@quotation NOTE
-After @code{getopt()} is through, it is the responsibility of the user level
-code to
-clear out all the elements of @code{ARGV} from 1 to @code{Optind},
-so that @command{awk} does not try to process the command-line options
-as file names.
+After @code{getopt()} is through, it is the responsibility of the
+user level code to clear out all the elements of @code{ARGV} from 1
+to @code{Optind}, so that @command{awk} does not try to process the
+command-line options as @value{FN}s.
@end quotation
Several of the sample programs presented in
@@ -20053,14 +21127,12 @@ no more entries, it returns @code{NULL}, the null pointer. When this
happens, the C program should call @code{endpwent()} to close the database.
Following is @command{pwcat}, a C program that ``cats'' the password database:
-@c Use old style function header for portability to old systems (SunOS, HP/UX).
-
@example
@c file eg/lib/pwcat.c
/*
* pwcat.c
*
- * Generate a printable version of the password database
+ * Generate a printable version of the password database.
*/
@c endfile
@ignore
@@ -20362,7 +21434,7 @@ once. If you are worried about squeezing every last cycle out of your
this is not necessary, since most @command{awk} programs are I/O-bound,
and such a change would clutter up the code.
-The @command{id} program in @ref{Id Program},
+The @command{id} program in @DBREF{Id Program}
uses these functions.
@c ENDOFRANGE libfudata
@c ENDOFRANGE flibudata
@@ -20388,7 +21460,7 @@ uses these functions.
@cindex group file
@cindex files, group
Much of the discussion presented in
-@ref{Passwd Functions},
+@DBREF{Passwd Functions}
applies to the group database as well. Although there has traditionally
been a well-known file (@file{/etc/group}) in a well-known format, the POSIX
standard only provides a set of C library routines
@@ -20406,7 +21478,7 @@ is as follows:
/*
* grcat.c
*
- * Generate a printable version of the group database
+ * Generate a printable version of the group database.
*/
@c endfile
@ignore
@@ -20493,7 +21565,7 @@ it is usually empty or set to @samp{*}.
@item Group ID Number
The group's numeric group ID number;
-this number must be unique within the file.
+the association of name to number must be unique within the file.
(On some systems it's a C @code{long}, and not an @code{int}. Thus
we cast it to @code{long} for all cases.)
@@ -20541,8 +21613,7 @@ There are several, modeled after the C library functions of the same names:
@c line break on _gr_init for smallbook
@c file eg/lib/groupawk.in
-BEGIN \
-@{
+BEGIN @{
# Change to suit your system
_gr_awklib = "/usr/local/libexec/awk/"
@}
@@ -20623,16 +21694,16 @@ database for the same group. This is common when a group has a large number
of members. A pair of such entries might look like the following:
@example
-tvpeople:*:101:johnny,jay,arsenio
+tvpeople:*:101:johny,jay,arsenio
tvpeople:*:101:david,conan,tom,joan
@end example
For this reason, @code{_gr_init()} looks to see if a group name or
group ID number is already seen. If it is, then the user names are
-simply concatenated onto the previous list of users. (There is actually a
+simply concatenated onto the previous list of users.@footnote{There is actually a
subtle problem with the code just presented. Suppose that
the first time there were no names. This code adds the names with
-a leading comma. It also doesn't check that there is a @code{$4}.)
+a leading comma. It also doesn't check that there is a @code{$4}.}
Finally, @code{_gr_init()} closes the pipeline to @command{grcat}, restores
@code{FS} (and @code{FIELDWIDTHS} or @code{FPAT} if necessary), @code{RS}, and @code{$0},
@@ -20728,13 +21799,13 @@ Most of the work is in scanning the database and building the various
associative arrays. The functions that the user calls are themselves very
simple, relying on @command{awk}'s associative arrays to do work.
-The @command{id} program in @ref{Id Program},
+The @command{id} program in @DBREF{Id Program}
uses these functions.
@node Walking Arrays
@section Traversing Arrays of Arrays
-@ref{Arrays of Arrays}, described how @command{gawk}
+@DBREF{Arrays of Arrays} described how @command{gawk}
provides arrays of arrays. In particular, any element of
an array may be either a scalar, or another array. The
@code{isarray()} function (@pxref{Type Functions})
@@ -20792,24 +21863,123 @@ $ @kbd{gawk -f walk_array.awk}
@print{} a[3] = 3
@end example
-Walking an array and processing each element is a general-purpose
-operation. You might want to consider generalizing the @code{walk_array()}
-function by adding an additional parameter named @code{process}.
-
-Then, inside the loop, instead of simply printing the array element's
-index and value, use the indirect function call syntax
-(@pxref{Indirect Calls}) on @code{process}, passing it the index
-and the value.
-
-When calling @code{walk_array()}, you would pass the name of a user-defined
-function that expects to receive an index and a value, and then processes
-the element.
-
-
@c ENDOFRANGE libfgdata
@c ENDOFRANGE flibgdata
@c ENDOFRANGE gdatar
@c ENDOFRANGE libf
+
+@node Library Functions Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+Reading programs is an excellent way to learn Good Programming.
+The functions provided in this @value{CHAPTER} and the next are intended
+to serve that purpose.
+
+@item
+When writing general-purpose library functions, put some thought into how
+to name any global variables so that they won't conflict with variables
+from a user's program.
+
+@item
+The functions presented here fit into the following categories:
+
+@c nested list
+@table @asis
+@item General problems
+Number to string conversion, assertions, rounding, random number
+generation, converting characters to numbers, joining strings, getting
+easily usable time-of-day information, and reading a whole file in
+one shot.
+
+@item Managing @value{DF}s
+Noting @value{DF} boundaries, rereading the current file, checking for
+readable files, checking for zero-length files, and treating assignments
+as @value{FN}s.
+
+@item Processing command-line options
+An @command{awk} version of the standard C @code{getopt()} function.
+
+@item Reading the user and group databases
+Two sets of routines that parallel the C library versions.
+
+@item Traversing arrays of arrays
+A simple function to traverse an array of arrays to any depth.
+@end table
+@c end nested list
+
+@end itemize
+
+@c EXCLUDE START
+@node Library Exercises
+@section Exercises
+
+@enumerate
+@item
+In @ref{Empty Files}, we presented the @file{zerofile.awk} program,
+which made use of @command{gawk}'s @code{ARGIND} variable. Can this
+problem be solved without relying on @code{ARGIND}? If so, how?
+
+@ignore
+# zerofile2.awk --- same thing, portably
+
+BEGIN @{
+ ARGIND = Argind = 0
+ for (i = 1; i < ARGC; i++)
+ Fnames[ARGV[i]]++
+
+@}
+FNR == 1 @{
+ while (ARGV[ARGIND] != FILENAME)
+ ARGIND++
+ Seen[FILENAME]++
+ if (Seen[FILENAME] == Fnames[FILENAME])
+ do
+ ARGIND++
+ while (ARGV[ARGIND] != FILENAME)
+@}
+ARGIND > Argind + 1 @{
+ for (Argind++; Argind < ARGIND; Argind++)
+ zerofile(ARGV[Argind], Argind)
+@}
+ARGIND != Argind @{
+ Argind = ARGIND
+@}
+END @{
+ if (ARGIND < ARGC - 1)
+ ARGIND = ARGC - 1
+ if (ARGIND > Argind)
+ for (Argind++; Argind <= ARGIND; Argind++)
+ zerofile(ARGV[Argind], Argind)
+@}
+@end ignore
+
+@item
+As a related challenge, revise that code to handle the case where
+an intervening value in @code{ARGV} is a variable assignment.
+
+@item
+@DBREF{Walking Arrays} presented a function that walked a multidimensional
+array to print it out. However, walking an array and processing
+each element is a general-purpose operation. Generalize the
+@code{walk_array()} function by adding an additional parameter named
+@code{process}.
+
+Then, inside the loop, instead of printing the array element's index and
+value, use the indirect function call syntax (@pxref{Indirect Calls})
+on @code{process}, passing it the index and the value.
+
+When calling @code{walk_array()}, you would pass the name of a
+user-defined function that expects to receive an index and a value,
+and then processes the element.
+
+Test your new version by printing the array; you should end up with
+output identical to that of the original version.
+
+@end enumerate
+@c EXCLUDE END
+
@c ENDOFRANGE flib
@c ENDOFRANGE fudlib
@c ENDOFRANGE datagr
@@ -20819,11 +21989,13 @@ the element.
@c STARTOFRANGE awkpex
@cindex @command{awk} programs, examples of
+@c FULLXREF ON
@ref{Library Functions},
presents the idea that reading programs in a language contributes to
learning that language. This @value{CHAPTER} continues that theme,
presenting a potpourri of @command{awk} programs for your reading
enjoyment.
+@c FULLXREF OFF
@ifnotinfo
There are three sections.
The first describes how to run the programs presented
@@ -20850,6 +22022,8 @@ Many of these programs use library functions presented in
* Running Examples:: How to run these examples.
* Clones:: Clones of common utilities.
* Miscellaneous Programs:: Some interesting @command{awk} programs.
+* Programs Summary:: Summary of programs.
+* Programs Exercises:: Exercises.
@end menu
@node Running Examples
@@ -20864,7 +22038,7 @@ awk -f @var{program} -- @var{options} @var{files}
@noindent
Here, @var{program} is the name of the @command{awk} program (such as
@file{cut.awk}), @var{options} are any command-line options for the
-program that start with a @samp{-}, and @var{files} are the actual data files.
+program that start with a @samp{-}, and @var{files} are the actual @value{DF}s.
If your system supports the @samp{#!} executable interpreter mechanism
(@pxref{Executable Scripts}),
@@ -21002,13 +22176,7 @@ function usage( e1, e2)
@noindent
The variables @code{e1} and @code{e2} are used so that the function
-fits nicely on the
-@ifnotinfo
-page.
-@end ifnotinfo
-@ifnottex
-screen.
-@end ifnottex
+fits nicely on the @value{PAGE}.
@cindex @code{BEGIN} pattern, running @command{awk} programs and
@cindex @code{FS} variable, running @command{awk} programs and
@@ -21024,8 +22192,7 @@ string:
@example
@c file eg/prog/cut.awk
-BEGIN \
-@{
+BEGIN @{
FS = "\t" # default
OFS = FS
while ((c = getopt(ARGC, ARGV, "sf:c:d:")) != -1) @{
@@ -21038,7 +22205,7 @@ BEGIN \
OFS = ""
@} else if (c == "d") @{
if (length(Optarg) > 1) @{
- printf("Using first character of %s" \
+ printf("cut: using first character of %s" \
" for delimiter\n", Optarg) > "/dev/stderr"
Optarg = substr(Optarg, 1, 1)
@}
@@ -21047,7 +22214,7 @@ BEGIN \
if (FS == " ") # defeat awk semantics
FS = "[ ]"
@} else if (c == "s")
- suppress++
+ suppress = 1
else
usage()
@}
@@ -21069,7 +22236,7 @@ spaces. Also remember that after @code{getopt()} is through
we have to
clear out all the elements of @code{ARGV} from 1 to @code{Optind},
so that @command{awk} does not try to process the command-line options
-as file names.
+as @value{FN}s.
After dealing with the command-line options, the program verifies that the
options make sense. Only one or the other of @option{-c} and @option{-f}
@@ -21119,7 +22286,7 @@ function set_fieldlist( n, m, i, j, k, f, g)
m = split(f[i], g, "-")
@group
if (m != 2 || g[1] >= g[2]) @{
- printf("bad field list: %s\n",
+ printf("cut: bad field list: %s\n",
f[i]) > "/dev/stderr"
exit 1
@}
@@ -21166,7 +22333,7 @@ function set_charlist( field, i, j, f, g, n, m, t,
if (index(f[i], "-") != 0) @{ # range
m = split(f[i], g, "-")
if (m != 2 || g[1] >= g[2]) @{
- printf("bad character list: %s\n",
+ printf("cut: bad character list: %s\n",
f[i]) > "/dev/stderr"
exit 1
@}
@@ -21242,7 +22409,6 @@ of picking the input line apart by characters.
@c ENDOFRANGE ficut
@c ENDOFRANGE colcut
-@c Exercise: Rewrite using split with "".
@node Egrep Program
@subsection Searching for Regular Expressions in Files
@@ -21260,14 +22426,14 @@ expressions that are almost identical to those available in @command{awk}
(@pxref{Regexp}).
You invoke it as follows:
-@example
-egrep @r{[} @var{options} @r{]} '@var{pattern}' @var{files} @dots{}
-@end example
+@display
+@command{egrep} [@var{options}] @code{'@var{pattern}'} @var{files} @dots{}
+@end display
The @var{pattern} is a regular expression. In typical usage, the regular
expression is quoted to prevent the shell from expanding any of the
-special characters as file name wildcards. Normally, @command{egrep}
-prints the lines that matched. If multiple file names are provided on
+special characters as @value{FN} wildcards. Normally, @command{egrep}
+prints the lines that matched. If multiple @value{FN}s are provided on
the command line, each output line is preceded by the name of the file
and a colon.
@@ -21358,7 +22524,7 @@ pattern is supplied with @option{-e}, the first nonoption on the
command line is used. The @command{awk} command-line arguments up to @code{ARGV[Optind]}
are cleared, so that @command{awk} won't try to process them as files. If no
files are specified, the standard input is used, and if multiple files are
-specified, we make sure to note this so that the file names can precede the
+specified, we make sure to note this so that the @value{FN}s can precede the
matched lines in the output:
@example
@@ -21392,8 +22558,6 @@ if a match happens, we output the translated line, not the original.}
The rule is
commented out since it is not necessary with @command{gawk}:
-@c Exercise: Fix this, w/array and new line as key to original line
-
@example
@c file eg/prog/egrep.awk
#@{
@@ -21444,6 +22608,11 @@ function endfile(file)
@c endfile
@end example
+The @code{BEGINFILE} and @code{ENDFILE} special patterns
+(@pxref{BEGINFILE/ENDFILE}) could be used, but then the program would be
+@command{gawk}-specific. Additionally, this example was written before
+@command{gawk} acquired @code{BEGINFILE} and @code{ENDFILE}.
+
The following rule does most of the work of matching lines. The variable
@code{matches} is true if the line matched the pattern. If the user
wants lines that did not match, the sense of @code{matches} is inverted
@@ -21456,9 +22625,9 @@ A number of additional tests are made, but they are only done if we
are not counting lines. First, if the user only wants exit status
(@code{no_print} is true), then it is enough to know that @emph{one}
line in this file matched, and we can skip on to the next file with
-@code{nextfile}. Similarly, if we are only printing file names, we can
-print the file name, and then skip to the next file with @code{nextfile}.
-Finally, each line is printed, with a leading file name and colon
+@code{nextfile}. Similarly, if we are only printing @value{FN}s, we can
+print the @value{FN}, and then skip to the next file with @code{nextfile}.
+Finally, each line is printed, with a leading @value{FN} and colon
if necessary:
@cindex @code{!} (exclamation point), @code{!} operator
@@ -21498,11 +22667,8 @@ there are no matches, the exit status is one; otherwise it is zero:
@example
@c file eg/prog/egrep.awk
-END \
-@{
- if (total == 0)
- exit 1
- exit 0
+END @{
+ exit (total == 0)
@}
@c endfile
@end example
@@ -21525,17 +22691,6 @@ function usage( e)
The variable @code{e} is used so that the function fits nicely
on the printed page.
-@cindex @code{END} pattern, backslash continuation and
-@cindex @code{\} (backslash), continuing lines and
-@cindex backslash (@code{\}), continuing lines and
-Just a note on programming style: you may have noticed that the @code{END}
-rule uses backslash continuation, with the open brace on a line by
-itself. This is so that it more closely resembles the way functions
-are written. Many of the examples
-in this @value{CHAPTER}
-use this style. You can decide for yourself if you like writing
-your @code{BEGIN} and @code{END} rules this way
-or not.
@c ENDOFRANGE regexps
@c ENDOFRANGE sfregexp
@c ENDOFRANGE fsregexp
@@ -21556,7 +22711,7 @@ corresponding user and group names. The output might look like this:
@example
$ @kbd{id}
-@print{} uid=500(arnold) gid=500(arnold) groups=6(disk),7(lp),19(floppy)
+@print{} uid=1000(arnold) gid=1000(arnold) groups=1000(arnold),4(adm),7(lp),27(sudo)
@end example
@cindex @code{PROCINFO} array, and user and group ID numbers
@@ -21592,6 +22747,7 @@ numbers:
# Arnold Robbins, arnold@@skeeve.com, Public Domain
# May 1993
# Revised February 1996
+# Revised May 2014
@c endfile
@end ignore
@@ -21601,8 +22757,7 @@ numbers:
# egid=5(blat) groups=9(nine),2(two),1(one)
@group
-BEGIN \
-@{
+BEGIN @{
uid = PROCINFO["uid"]
euid = PROCINFO["euid"]
gid = PROCINFO["gid"]
@@ -21611,34 +22766,26 @@ BEGIN \
printf("uid=%d", uid)
pw = getpwuid(uid)
- if (pw != "") @{
- split(pw, a, ":")
- printf("(%s)", a[1])
- @}
+ if (pw != "")
+ pr_first_field(pw)
if (euid != uid) @{
printf(" euid=%d", euid)
pw = getpwuid(euid)
- if (pw != "") @{
- split(pw, a, ":")
- printf("(%s)", a[1])
- @}
+ if (pw != "")
+ pr_first_field(pw)
@}
printf(" gid=%d", gid)
pw = getgrgid(gid)
- if (pw != "") @{
- split(pw, a, ":")
- printf("(%s)", a[1])
- @}
+ if (pw != "")
+ pr_first_field(pw)
if (egid != gid) @{
printf(" egid=%d", egid)
pw = getgrgid(egid)
- if (pw != "") @{
- split(pw, a, ":")
- printf("(%s)", a[1])
- @}
+ if (pw != "")
+ pr_first_field(pw)
@}
for (i = 1; ("group" i) in PROCINFO; i++) @{
@@ -21647,16 +22794,20 @@ BEGIN \
group = PROCINFO["group" i]
printf("%d", group)
pw = getgrgid(group)
- if (pw != "") @{
- split(pw, a, ":")
- printf("(%s)", a[1])
- @}
+ if (pw != "")
+ pr_first_field(pw)
if (("group" (i+1)) in PROCINFO)
printf(",")
@}
print ""
@}
+
+function pr_first_field(str, a)
+@{
+ split(str, a, ":")
+ printf("(%s)", a[1])
+@}
@c endfile
@end example
@@ -21676,12 +22827,10 @@ The loop is also correct if there are @emph{no} supplementary
groups; then the condition is false the first time it's
tested, and the loop body never executes.
-@c exercise!!!
-@ignore
-The POSIX version of @command{id} takes arguments that control which
-information is printed. Modify this version to accept the same
-arguments and perform in the same way.
-@end ignore
+The @code{pr_first_field()} function simply isolates out some
+code that is used repeatedly, making the whole program
+slightly shorter and cleaner.
+
@c ENDOFRANGE id
@node Split Program
@@ -21698,9 +22847,9 @@ Usage is as follows:@footnote{This is the traditional usage. The
POSIX usage is different, but not relevant for what the program
aims to demonstrate.}
-@example
-split @r{[}-@var{count}@r{]} file @r{[} @var{prefix} @r{]}
-@end example
+@display
+@command{split} [@code{-@var{count}}] [@var{file}] [@var{prefix}]
+@end display
By default,
the output files are named @file{xaa}, @file{xab}, and so on. Each file has
@@ -21709,7 +22858,7 @@ number of lines in each file, supply a number on the command line
preceded with a minus; e.g., @samp{-500} for files with 500 lines in them
instead of 1000. To change the name of the output files to something like
@file{myfileaa}, @file{myfileab}, and so on, supply an additional
-argument that specifies the file name prefix.
+argument that specifies the @value{FN} prefix.
Here is a version of @command{split} in @command{awk}. It uses the
@code{ord()} and @code{chr()} functions presented in
@@ -21719,8 +22868,8 @@ The program first sets its defaults, and then tests to make sure there are
not too many arguments. It then looks at each argument in turn. The
first argument could be a minus sign followed by a number. If it is, this happens
to look like a negative number, so it is made positive, and that is the
-count of lines. The data file name is skipped over and the final argument
-is used as the prefix for the output file names:
+count of lines. The @value{DF} name is skipped over and the final argument
+is used as the prefix for the output @value{FN}s:
@cindex @code{split.awk} program
@example
@@ -21734,11 +22883,12 @@ is used as the prefix for the output file names:
#
# Arnold Robbins, arnold@@skeeve.com, Public Domain
# May 1993
+# Revised slightly, May 2014
@c endfile
@end ignore
@c file eg/prog/split.awk
-# usage: split [-num] [file] [outname]
+# usage: split [-count] [file] [outname]
BEGIN @{
outfile = "x" # default
@@ -21747,7 +22897,7 @@ BEGIN @{
usage()
i = 1
- if (ARGV[i] ~ /^-[[:digit:]]+$/) @{
+ if (i in ARGV && ARGV[i] ~ /^-[[:digit:]]+$/) @{
count = -ARGV[i]
ARGV[i] = ""
i++
@@ -21769,7 +22919,7 @@ BEGIN @{
The next rule does most of the work. @code{tcount} (temporary count) tracks
how many lines have been printed to the output file so far. If it is greater
than @code{count}, it is time to close the current file and start a new one.
-@code{s1} and @code{s2} track the current suffixes for the file name. If
+@code{s1} and @code{s2} track the current suffixes for the @value{FN}. If
they are both @samp{z}, the file is just too big. Otherwise, @code{s1}
moves to the next letter in the alphabet and @code{s2} starts over again at
@samp{a}:
@@ -21801,8 +22951,6 @@ moves to the next letter in the alphabet and @code{s2} starts over again at
@c endfile
@end example
-@c Exercise: do this with just awk builtin functions, index("abc..."), substr, etc.
-
@noindent
The @code{usage()} function simply prints an error message and exits:
@@ -21819,21 +22967,19 @@ function usage( e)
@noindent
The variable @code{e} is used so that the function
-fits nicely on the
-@ifinfo
-screen.
-@end ifinfo
-@ifnotinfo
-page.
-@end ifnotinfo
+fits nicely on the @value{PAGE}.
This program is a bit sloppy; it relies on @command{awk} to automatically close the last file
instead of doing it in an @code{END} rule.
It also assumes that letters are contiguous in the character set,
which isn't true for EBCDIC systems.
-@c Exercise: Fix these problems.
-@c BFD...
+@ifset FOR_PRINT
+You might want to consider how to eliminate the use of
+@code{ord()} and @code{chr()}; this can be done in such a
+way as to solve the EBCDIC issue as well.
+@end ifset
+
@c ENDOFRANGE filspl
@c ENDOFRANGE split
@@ -21848,9 +22994,9 @@ The @code{tee} program is known as a ``pipe fitting.'' @code{tee} copies
its standard input to its standard output and also duplicates it to the
files named on the command line. Its usage is as follows:
-@example
-tee @r{[}-a@r{]} file @dots{}
-@end example
+@display
+@command{tee} [@option{-a}] @var{file} @dots{}
+@end display
The @option{-a} option tells @code{tee} to append to the named files, instead of
truncating them and starting over.
@@ -21859,13 +23005,13 @@ The @code{BEGIN} rule first makes a copy of all the command-line arguments
into an array named @code{copy}.
@code{ARGV[0]} is not copied, since it is not needed.
@code{tee} cannot use @code{ARGV} directly, since @command{awk} attempts to
-process each file name in @code{ARGV} as input data.
+process each @value{FN} in @code{ARGV} as input data.
@cindex flag variables
If the first argument is @option{-a}, then the flag variable
@code{append} is set to true, and both @code{ARGV[1]} and
@code{copy[1]} are deleted. If @code{ARGC} is less than two, then no
-file names were supplied and @code{tee} prints a usage message and exits.
+@value{FN}s were supplied and @code{tee} prints a usage message and exits.
Finally, @command{awk} is forced to read the standard input by setting
@code{ARGV[1]} to @code{"-"} and @code{ARGC} to two:
@@ -21887,8 +23033,7 @@ Finally, @command{awk} is forced to read the standard input by setting
@c endfile
@end ignore
@c file eg/prog/tee.awk
-BEGIN \
-@{
+BEGIN @{
for (i = 1; i < ARGC; i++)
copy[i] = ARGV[i]
@@ -21950,8 +23095,7 @@ Finally, the @code{END} rule cleans up by closing all the output files:
@example
@c file eg/prog/tee.awk
-END \
-@{
+END @{
for (i in copy)
close(copy[i])
@}
@@ -21975,9 +23119,9 @@ input, and by default removes duplicate lines. In other words, it only
prints unique lines---hence the name. @command{uniq} has a number of
options. The usage is as follows:
-@example
-uniq @r{[}-udc @r{[}-@var{n}@r{]]} @r{[}+@var{n}@r{]} @r{[} @var{input file} @r{[} @var{output file} @r{]]}
-@end example
+@display
+@command{uniq} [@option{-udc} [@code{-@var{n}}]] [@code{+@var{n}}] [@var{inputfile} [@var{outputfile}]]
+@end display
The options for @command{uniq} are:
@@ -22001,11 +23145,11 @@ by runs of spaces and/or TABs.
Skip @var{n} characters before comparing lines. Any fields specified with
@samp{-@var{n}} are skipped first.
-@item @var{input file}
+@item @var{inputfile}
Data is read from the input file named on the command line, instead of from
the standard input.
-@item @var{output file}
+@item @var{outputfile}
The generated output is sent to the named output file, instead of to the
standard output.
@end table
@@ -22068,8 +23212,7 @@ function usage( e)
# -n skip n fields
# +n skip n characters, skip fields first
-BEGIN \
-@{
+BEGIN @{
count = 1
outputfile = "/dev/stdout"
opts = "udc0:1:2:3:4:5:6:7:8:9:"
@@ -22081,7 +23224,7 @@ BEGIN \
else if (c == "c")
do_count++
else if (index("0123456789", c) != 0) @{
- # getopt requires args to options
+ # getopt() requires args to options
# this messes us up for things like -5
if (Optarg ~ /^[[:digit:]]+$/)
fcount = (c Optarg) + 0
@@ -22218,6 +23361,22 @@ END @{
@}
@c endfile
@end example
+
+@ifset FOR_PRINT
+The logic for choosing which lines to print represents a @dfn{state
+machine}, which is ``a device that can be in one of a set number of stable
+conditions depending on its previous condition and on the present values
+of its inputs.''@footnote{This is the definition returned from entering
+@code{define: state machine} into Google.}
+Brian Kernighan suggests that
+``an alternative approach to state mechines is to just read
+the input into an array, then use indexing. It's almost always
+easier code, and for most inputs where you would use this, just
+as fast.'' Consider how to rewrite the logic to follow this
+suggestion.
+@end ifset
+
+
@c ENDOFRANGE prunt
@c ENDOFRANGE tpul
@c ENDOFRANGE uniq
@@ -22242,9 +23401,9 @@ END @{
The @command{wc} (word count) utility counts lines, words, and characters in
one or more input files. Its usage is as follows:
-@example
-wc @r{[}-lwc@r{]} @r{[} @var{files} @dots{} @r{]}
-@end example
+@display
+@command{wc} [@option{-lwc}] [@var{files} @dots{}]
+@end display
If no files are specified on the command line, @command{wc} reads its standard
input. If there are multiple files, it also prints total counts for all
@@ -22331,7 +23490,7 @@ BEGIN @{
@end example
The @code{beginfile()} function is simple; it just resets the counts of lines,
-words, and characters to zero, and saves the current file name in
+words, and characters to zero, and saves the current @value{FN} in
@code{fname}:
@example
@@ -22344,18 +23503,10 @@ function beginfile(file)
@c endfile
@end example
-The @code{endfile()} function adds the current file's numbers to the running
-totals of lines, words, and characters.@footnote{@command{wc} can't just use the value of
-@code{FNR} in @code{endfile()}. If you examine
-the code in
-@ref{Filetrans Function},
-you will see that
-@code{FNR} has already been reset by the time
-@code{endfile()} is called.} It then prints out those numbers
-for the file that was just read. It relies on @code{beginfile()} to reset the
-numbers for the following data file:
-@c FIXME: ONE DAY: make the above footnote an exercise,
-@c instead of giving away the answer.
+The @code{endfile()} function adds the current file's numbers to the
+running totals of lines, words, and characters. It then prints out those
+numbers for the file that was just read. It relies on @code{beginfile()}
+to reset the numbers for the following @value{DF}:
@example
@c file eg/prog/wc.awk
@@ -22596,8 +23747,7 @@ Here is the program:
@c file eg/prog/alarm.awk
# usage: alarm time [ "message" [ count [ delay ] ] ]
-BEGIN \
-@{
+BEGIN @{
# Initial argument sanity checking
usage1 = "usage: alarm time ['message' [count [delay]]]"
usage2 = sprintf("\t(%s) time ::= hh:mm", ARGV[1])
@@ -22672,7 +23822,7 @@ is how long to wait before setting off the alarm:
# how long to sleep for
naptime = target - current
if (naptime <= 0) @{
- print "time is in the past!" > "/dev/stderr"
+ print "alarm: time is in the past!" > "/dev/stderr"
exit 1
@}
@c endfile
@@ -22725,19 +23875,18 @@ often used to map uppercase letters into lowercase for further processing:
@end example
@command{tr} requires two lists of characters.@footnote{On some older
-systems,
-including Solaris,
-@command{tr} may require that the lists be written as
-range expressions enclosed in square brackets (@samp{[a-z]}) and quoted,
-to prevent the shell from attempting a file name expansion. This is
-not a feature.} When processing the input, the first character in the
-first list is replaced with the first character in the second list,
-the second character in the first list is replaced with the second
-character in the second list, and so on. If there are more characters
-in the ``from'' list than in the ``to'' list, the last character of the
-``to'' list is used for the remaining characters in the ``from'' list.
-
-Some time ago,
+systems, including Solaris, the system version of @command{tr} may require
+that the lists be written as range expressions enclosed in square brackets
+(@samp{[a-z]}) and quoted, to prevent the shell from attempting a file
+name expansion. This is not a feature.} When processing the input, the
+first character in the first list is replaced with the first character
+in the second list, the second character in the first list is replaced
+with the second character in the second list, and so on. If there are
+more characters in the ``from'' list than in the ``to'' list, the last
+character of the ``to'' list is used for the remaining characters in the
+``from'' list.
+
+Once upon a time,
@c early or mid-1989!
a user proposed that a transliteration function should
be added to @command{gawk}.
@@ -22753,9 +23902,8 @@ of standard @command{awk}: dealing with individual characters is very
painful, requiring repeated use of the @code{substr()}, @code{index()},
and @code{gsub()} built-in functions
(@pxref{String Functions}).@footnote{This
-program was written before @command{gawk} acquired the ability to
+program was also written before @command{gawk} acquired the ability to
split each character in a string into separate array elements.}
-@c Exercise: How might you use this new feature to simplify the program?
There are two functions. The first, @code{stranslate()}, takes three
arguments:
@@ -22851,18 +23999,23 @@ BEGIN @{
While it is possible to do character transliteration in a user-level
function, it is not necessarily efficient, and we (the @command{gawk}
authors) started to consider adding a built-in function. However,
-shortly after writing this program, we learned that the System V Release 4
-@command{awk} had added the @code{toupper()} and @code{tolower()} functions
-(@pxref{String Functions}).
-These functions handle the vast majority of the
-cases where character transliteration is necessary, and so we chose to
-simply add those functions to @command{gawk} as well and then leave well
-enough alone.
+shortly after writing this program, we learned that Brian Kernighan
+had added the @code{toupper()} and @code{tolower()} functions to his
+@command{awk} (@pxref{String Functions}). These functions handle the
+vast majority of the cases where character transliteration is necessary,
+and so we chose to simply add those functions to @command{gawk} as well
+and then leave well enough alone.
An obvious improvement to this program would be to set up the
@code{t_ar} array only once, in a @code{BEGIN} rule. However, this
assumes that the ``from'' and ``to'' lists
will never change throughout the lifetime of the program.
+
+Another obvious improvement is to enable the use of ranges,
+such as @samp{a-z}, as allowed by the @command{tr} utility.
+Look at the code for @file{cut.awk} (@pxref{Cut Program})
+for inspiration.
+
@c ENDOFRANGE chtra
@c ENDOFRANGE tr
@@ -22890,7 +24043,18 @@ The @code{BEGIN} rule simply sets @code{RS} to the empty string, so that
@command{awk} splits records at blank lines
(@pxref{Records}).
It sets @code{MAXLINES} to 100, since 100 is the maximum number
-of lines on the page (20 * 5 = 100).
+of lines on the page
+@iftex
+(@math{20 @cdot 5 = 100}).
+@end iftex
+@ifnottex
+@ifnotdocbook
+(20 * 5 = 100).
+@end ifnotdocbook
+@end ifnottex
+@docbook
+(20 &sdot; 5 = 100). @c
+@end docbook
Most of the work is done in the @code{printpage()} function.
The label lines are stored sequentially in the @code{line} array. But they
@@ -22984,8 +24148,7 @@ function printpage( i, j)
Count++
@}
-END \
-@{
+END @{
printpage()
@}
@c endfile
@@ -23002,7 +24165,7 @@ END \
When working with large amounts of text, it can be interesting to know
how often different words appear. For example, an author may overuse
-certain words, in which case she might wish to find synonyms to substitute
+certain words, in which case he or she might wish to find synonyms to substitute
for words that appear too often. This @value{SUBSECTION} develops a
program for counting words and presenting the frequency information
in a useful format.
@@ -23032,7 +24195,7 @@ it prints the counts.
This program has several problems that prevent it from being
useful on real text files:
-@itemize @bullet
+@itemize @value{BULLET}
@item
The @command{awk} language considers upper- and lowercase characters to be
distinct. Therefore, ``bartender'' and ``Bartender'' are not treated
@@ -23080,6 +24243,10 @@ END @{
@}
@end example
+The regexp @samp{/[^[:alnum:]_[:blank:]]/} might have been written
+@samp{/[[:punct:]]/}, but then underscores would also be removed,
+and we want to keep them.
+
Assuming we have saved this program in a file named @file{wordfreq.awk},
and that the data is in @file{file1}, the following pipeline:
@@ -23132,7 +24299,7 @@ The @command{uniq} program
(@pxref{Uniq Program}),
removes duplicate lines from @emph{sorted} data.
-Suppose, however, you need to remove duplicate lines from a data file but
+Suppose, however, you need to remove duplicate lines from a @value{DF} but
that you want to preserve the order the lines are in. A good example of
this might be a shell history file. The history file keeps a copy of all
the commands you have entered, and it is not unusual to repeat a command
@@ -23191,6 +24358,7 @@ information. For example, using the following @code{print} statement in the
print data[lines[i]], lines[i]
@end example
+@noindent
This works because @code{data[$0]} is incremented each time a line is
seen.
@c ENDOFRANGE lidu
@@ -23237,7 +24405,7 @@ The Texinfo language is described fully, starting with
For our purposes, it is enough to know three things about Texinfo input
files:
-@itemize @bullet
+@itemize @value{BULLET}
@item
The ``at'' symbol (@samp{@@}) is special in Texinfo, much as
the backslash (@samp{\}) is in C
@@ -23327,7 +24495,7 @@ BEGIN @{ IGNORECASE = 1 @}
/^@@c(omment)?[ \t]+system/ \
@{
if (NF < 3) @{
- e = (FILENAME ":" FNR)
+ e = ("extract: " FILENAME ":" FNR)
e = (e ": badly formed `system' line")
print e > "/dev/stderr"
next
@@ -23336,7 +24504,7 @@ BEGIN @{ IGNORECASE = 1 @}
$2 = ""
stat = system($0)
if (stat != 0) @{
- e = (FILENAME ":" FNR)
+ e = ("extract: " FILENAME ":" FNR)
e = (e ": warning: system returned " stat)
print e > "/dev/stderr"
@}
@@ -23346,16 +24514,10 @@ BEGIN @{ IGNORECASE = 1 @}
@noindent
The variable @code{e} is used so that the rule
-fits nicely on the
-@ifnotinfo
-page.
-@end ifnotinfo
-@ifnottex
-screen.
-@end ifnottex
+fits nicely on the @value{PAGE}.
The second rule handles moving data into files. It verifies that a
-file name is given in the directive. If the file named is not the
+@value{FN} is given in the directive. If the file named is not the
current file, then the current file is closed. Keeping the current file
open until a new file is encountered allows the use of the @samp{>}
redirection for printing the contents, keeping open file management
@@ -23379,12 +24541,11 @@ the array @code{a}, using the @code{split()} function
The @samp{@@} symbol is used as the separator character.
Each element of @code{a} that is empty indicates two successive @samp{@@}
symbols in the original line. For each two empty elements (@samp{@@@@} in
-the original file), we have to add a single @samp{@@} symbol back
-in.@footnote{This program was written before @command{gawk} had the
-@code{gensub()} function. Consider how you might use it to simplify the code.}
+the original file), we have to add a single @samp{@@} symbol back in.
When the processing of the array is finished, @code{join()} is called with the
-value of @code{SUBSEP}, to rejoin the pieces back into a single
+value of @code{SUBSEP} (@pxref{Multidimensional}),
+to rejoin the pieces back into a single
line. That line is then printed to the output file:
@example
@@ -23392,7 +24553,7 @@ line. That line is then printed to the output file:
/^@@c(omment)?[ \t]+file/ \
@{
if (NF != 3) @{
- e = (FILENAME ":" FNR ": badly formed `file' line")
+ e = ("extract: " FILENAME ":" FNR ": badly formed `file' line")
print e > "/dev/stderr"
next
@}
@@ -23437,20 +24598,19 @@ subsequent output is appended to the file
(@pxref{Redirection}).
This makes it easy to mix program text and explanatory prose for the same
sample source file (as has been done here!) without any hassle. The file is
-only closed when a new data file name is encountered or at the end of the
+only closed when a new @value{DF} name is encountered or at the end of the
input file.
Finally, the function @code{@w{unexpected_eof()}} prints an appropriate
error message and then exits.
The @code{END} rule handles the final cleanup, closing the open file:
-@c function lb put on same line for page breaking. sigh
@example
@c file eg/prog/extract.awk
@group
function unexpected_eof()
@{
- printf("%s:%d: unexpected EOF or error\n",
+ printf("extract: %s:%d: unexpected EOF or error\n",
FILENAME, FNR) > "/dev/stderr"
exit 1
@}
@@ -23490,7 +24650,7 @@ Here, @samp{s/old/new/g} tells @command{sed} to look for the regexp
The following program, @file{awksed.awk}, accepts at least two command-line
arguments: the pattern to look for and the text to replace it with. Any
-additional arguments are treated as data file names to process. If none
+additional arguments are treated as @value{DF} names to process. If none
are provided, the standard input is used:
@cindex Brennan, Michael
@@ -23564,33 +24724,13 @@ The @code{BEGIN} rule handles the setup, checking for the right number
of arguments and calling @code{usage()} if there is a problem. Then it sets
@code{RS} and @code{ORS} from the command-line arguments and sets
@code{ARGV[1]} and @code{ARGV[2]} to the null string, so that they are
-not treated as file names
+not treated as @value{FN}s
(@pxref{ARGC and ARGV}).
The @code{usage()} function prints an error message and exits.
Finally, the single rule handles the printing scheme outlined above,
using @code{print} or @code{printf} as appropriate, depending upon the
value of @code{RT}.
-
-@ignore
-Exercise, compare the performance of this version with the more
-straightforward:
-
-BEGIN {
- pat = ARGV[1]
- repl = ARGV[2]
- ARGV[1] = ARGV[2] = ""
-}
-
-{ gsub(pat, repl); print }
-
-Exercise: what are the advantages and disadvantages of this version versus sed?
- Advantage: egrep regexps
- speed (?)
- Disadvantage: no & in replacement text
-
-Others?
-@end ignore
@c ENDOFRANGE awksed
@node Igawk Program
@@ -23633,7 +24773,7 @@ BEGIN @{
The following program, @file{igawk.sh}, provides this service.
It simulates @command{gawk}'s searching of the @env{AWKPATH} variable
and also allows @dfn{nested} includes; i.e., a file that is included
-with @samp{@@include} can contain further @samp{@@include} statements.
+with @code{@@include} can contain further @code{@@include} statements.
@command{igawk} makes an effort to only include files once, so that nested
includes don't accidentally include a library function twice.
@@ -23659,11 +24799,11 @@ a shell variable that will be expanded. There are two cases:
@enumerate a
@item
-Literal text, provided with @option{--source} or @option{--source=}. This
+Literal text, provided with @option{-e} or @option{--source}. This
text is just appended directly.
@item
-Source file names, provided with @option{-f}. We use a neat trick and append
+Source @value{FN}s, provided with @option{-f}. We use a neat trick and append
@samp{@@include @var{filename}} to the shell variable's contents. Since the file-inclusion
program works the way @command{gawk} does, this gets the text
of the file included into the program at the correct point.
@@ -23671,12 +24811,12 @@ of the file included into the program at the correct point.
@item
Run an @command{awk} program (naturally) over the shell variable's contents to expand
-@samp{@@include} statements. The expanded program is placed in a second
+@code{@@include} statements. The expanded program is placed in a second
shell variable.
@item
Run the expanded program with @command{gawk} and any other original command-line
-arguments that the user supplied (such as the data file names).
+arguments that the user supplied (such as the @value{DF} names).
@end enumerate
This program uses shell variables extensively: for storing command-line arguments,
@@ -23691,24 +24831,25 @@ argument is @samp{debug}.
The next part loops through all the command-line arguments.
There are several cases of interest:
-@table @code
-@item --
+@c @asis for docbook
+@table @asis
+@item @option{--}
This ends the arguments to @command{igawk}. Anything else should be passed on
to the user's @command{awk} program without being evaluated.
-@item -W
+@item @option{-W}
This indicates that the next option is specific to @command{gawk}. To make
argument processing easier, the @option{-W} is appended to the front of the
remaining arguments and the loop continues. (This is an @command{sh}
programming trick. Don't worry about it if you are not familiar with
@command{sh}.)
-@item -v@r{,} -F
+@item @option{-v}, @option{-F}
These are saved and passed on to @command{gawk}.
-@item -f@r{,} --file@r{,} --file=@r{,} -Wfile=
-The file name is appended to the shell variable @code{program} with an
-@samp{@@include} statement.
+@item @option{-f}, @option{--file}, @option{--file=}, @option{-Wfile=}
+The @value{FN} is appended to the shell variable @code{program} with an
+@code{@@include} statement.
The @command{expr} utility is used to remove the leading option part of the
argument (e.g., @samp{--file=}).
(Typical @command{sh} usage would be to use the @command{echo} and @command{sed}
@@ -23716,10 +24857,10 @@ utilities to do this work. Unfortunately, some versions of @command{echo} evalu
escape sequences in their arguments, possibly mangling the program text.
Using @command{expr} avoids this problem.)
-@item --source@r{,} --source=@r{,} -Wsource=
+@item @option{--source}, @option{--source=}, @option{-Wsource=}
The source text is appended to @code{program}.
-@item --version@r{,} -Wversion
+@item @option{--version}, @option{-Wversion}
@command{igawk} prints its version number, runs @samp{gawk --version}
to get the @command{gawk} version information, and then exits.
@end table
@@ -23827,15 +24968,15 @@ fi
@c endfile
@end example
-The @command{awk} program to process @samp{@@include} directives
+The @command{awk} program to process @code{@@include} directives
is stored in the shell variable @code{expand_prog}. Doing this keeps
the shell script readable. The @command{awk} program
reads through the user's program, one line at a time, using @code{getline}
(@pxref{Getline}). The input
-file names and @samp{@@include} statements are managed using a stack.
-As each @samp{@@include} is encountered, the current file name is
-``pushed'' onto the stack and the file named in the @samp{@@include}
-directive becomes the current file name. As each file is finished,
+@value{FN}s and @code{@@include} statements are managed using a stack.
+As each @code{@@include} is encountered, the current @value{FN} is
+``pushed'' onto the stack and the file named in the @code{@@include}
+directive becomes the current @value{FN}. As each file is finished,
the stack is ``popped,'' and the previous input file becomes the current
input file again. The process is started by making the original file
the first one on the stack.
@@ -23844,16 +24985,16 @@ The @code{pathto()} function does the work of finding the full path to
a file. It simulates @command{gawk}'s behavior when searching the
@env{AWKPATH} environment variable
(@pxref{AWKPATH Variable}).
-If a file name has a @samp{/} in it, no path search is done.
-Similarly, if the file name is @code{"-"}, then that string is
+If a @value{FN} has a @samp{/} in it, no path search is done.
+Similarly, if the @value{FN} is @code{"-"}, then that string is
used as-is. Otherwise,
-the file name is concatenated with the name of each directory in
-the path, and an attempt is made to open the generated file name.
+the @value{FN} is concatenated with the name of each directory in
+the path, and an attempt is made to open the generated @value{FN}.
The only way to test if a file can be read in @command{awk} is to go
ahead and try to read it with @code{getline}; this is what @code{pathto()}
does.@footnote{On some very old versions of @command{awk}, the test
@samp{getline junk < t} can loop forever if the file exists but is empty.
-Caveat emptor.} If the file can be read, it is closed and the file name
+Caveat emptor.} If the file can be read, it is closed and the @value{FN}
is returned:
@ignore
@@ -23908,17 +25049,17 @@ BEGIN @{
@c endfile
@end example
-The stack is initialized with @code{ARGV[1]}, which will be @samp{/dev/stdin}.
+The stack is initialized with @code{ARGV[1]}, which will be @code{"/dev/stdin"}.
The main loop comes next. Input lines are read in succession. Lines that
-do not start with @samp{@@include} are printed verbatim.
-If the line does start with @samp{@@include}, the file name is in @code{$2}.
+do not start with @code{@@include} are printed verbatim.
+If the line does start with @code{@@include}, the @value{FN} is in @code{$2}.
@code{pathto()} is called to generate the full path. If it cannot, then the program
prints an error message and continues.
The next thing to check is if the file is included already. The
-@code{processed} array is indexed by the full file name of each included
+@code{processed} array is indexed by the full @value{FN} of each included
file and it tracks this information for us. If the file is
-seen again, a warning message is printed. Otherwise, the new file name is
+seen again, a warning message is printed. Otherwise, the new @value{FN} is
pushed onto the stack and processing continues.
Finally, when @code{getline} encounters the end of the input file, the file
@@ -23939,7 +25080,7 @@ the program is done:
fpath = pathto($2)
@group
if (fpath == "") @{
- printf("igawk:%s:%d: cannot find %s\n",
+ printf("igawk: %s:%d: cannot find %s\n",
input[stackptr], FNR, $2) > "/dev/stderr"
continue
@}
@@ -23979,7 +25120,7 @@ It's done in these steps:
@enumerate
@item
-Run @command{gawk} with the @samp{@@include}-processing program (the
+Run @command{gawk} with the @code{@@include}-processing program (the
value of the @code{expand_prog} shell variable) on standard input.
@item
@@ -23996,14 +25137,14 @@ options and command-line arguments that the user supplied.
@c this causes more problems than it solves, so leave it out.
@ignore
-The special file @file{/dev/null} is passed as a data file to @command{gawk}
+The special file @file{/dev/null} is passed as a @value{DF} to @command{gawk}
to handle an interesting case. Suppose that the user's program only has
-a @code{BEGIN} rule and there are no data files to read.
-The program should exit without reading any data files.
+a @code{BEGIN} rule and there are no @value{DF}s to read.
+The program should exit without reading any @value{DF}s.
However, suppose that an included library file defines an @code{END}
rule of its own. In this case, @command{gawk} will hang, reading standard
input. In order to avoid this, @file{/dev/null} is explicitly added to the
-command-line. Reading from @file{/dev/null} always returns an immediate
+command line. Reading from @file{/dev/null} always returns an immediate
end of file indication.
@c Hmm. Add /dev/null if $# is 0? Still messes up ARGV. Sigh.
@@ -24018,27 +25159,25 @@ eval gawk $opts -- '"$processed_program"' '"$@@"'
The @command{eval} command is a shell construct that reruns the shell's parsing
process. This keeps things properly quoted.
-This version of @command{igawk} represents my fifth version of this program.
+This version of @command{igawk} represents the fifth version of this program.
There are four key simplifications that make the program work better:
-@itemize @bullet
+@itemize @value{BULLET}
@item
-Using @samp{@@include} even for the files named with @option{-f} makes building
+Using @code{@@include} even for the files named with @option{-f} makes building
the initial collected @command{awk} program much simpler; all the
-@samp{@@include} processing can be done once.
+@code{@@include} processing can be done once.
@item
Not trying to save the line read with @code{getline}
in the @code{pathto()} function when testing for the
file's accessibility for use with the main program simplifies things
considerably.
-@c what problem does this engender though - exercise
-@c answer, reading from "-" or /dev/stdin
@item
Using a @code{getline} loop in the @code{BEGIN} rule does it all in one
place. It is not necessary to call out to a separate loop for processing
-nested @samp{@@include} statements.
+nested @code{@@include} statements.
@item
Instead of saving the expanded program in a temporary file, putting it in a shell variable
@@ -24058,40 +25197,9 @@ Finally, @command{igawk} shows that it is not always necessary to add new
features to a program; they can often be layered on top.
@ignore
With @command{igawk},
-there is no real reason to build @samp{@@include} processing into
+there is no real reason to build @code{@@include} processing into
@command{gawk} itself.
@end ignore
-
-@cindex search paths
-@cindex search paths, for source files
-@cindex source files@comma{} search path for
-@cindex files, source@comma{} search path for
-@cindex directories, searching
-As an additional example of this, consider the idea of having two
-files in a directory in the search path:
-
-@table @file
-@item default.awk
-This file contains a set of default library functions, such
-as @code{getopt()} and @code{assert()}.
-
-@item site.awk
-This file contains library functions that are specific to a site or
-installation; i.e., locally developed functions.
-Having a separate file allows @file{default.awk} to change with
-new @command{gawk} releases, without requiring the system administrator to
-update it each time by adding the local functions.
-@end table
-
-One user
-@c Karl Berry, karl@ileaf.com, 10/95
-suggested that @command{gawk} be modified to automatically read these files
-upon startup. Instead, it would be very simple to modify @command{igawk}
-to do this. Since @command{igawk} can process nested @samp{@@include}
-directives, @file{default.awk} could simply contain @samp{@@include}
-statements for the desired library functions.
-
-@c Exercise: make this change
@c ENDOFRANGE libfex
@c ENDOFRANGE flibex
@c ENDOFRANGE awkpex
@@ -24228,6 +25336,7 @@ babels beslab
babery yabber
@dots{}
@end example
+
@c ENDOFRANGE anagram
@node Signature Program
@@ -24259,7 +25368,10 @@ X*(X-x)-o*o,(x+X)*o*o+o,x*(X-x)-O-O,x-O+(O+o+X+x)*(o+O),X*X-X*(x-O)-x+O,
O+X*(o*(o+O)+O),+x+O+X*o,x*(x-o),(o+X+x)*o*o-(x-O-O),O+(X-x)*(X+O),x-O@}'
@end example
-We leave it to you to determine what the program does.
+@cindex Johansen, Chris
+We leave it to you to determine what the program does. (If you are
+truly desperate to understand it, see Chris Johansen's explanation,
+which is embedded in the Texinfo source file for this @value{DOCUMENT}.)
@ignore
To: "Arnold Robbins" <arnold@skeeve.com>
@@ -24339,19 +25451,193 @@ BEGIN {
}
@end ignore
-@iftex
-@part Part III:@* Moving Beyond Standard @command{awk} With @command{gawk}
-@end iftex
+@node Programs Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+The functions provided in this @value{CHAPTER} and the previous one
+continue on the theme that reading programs is an excellent way to learn
+Good Programming.
+
+@item
+Using @samp{#!} to make @command{awk} programs directly runnable makes
+them easier to use. Otherwise, invoke the program using @samp{awk
+-f @dots{}}.
+
+@item
+Reimplementing standard POSIX programs in @command{awk} is a pleasant
+exercise; @command{awk}'s expressive power lets you write such programs
+in relatively few lines of code, yet they are functionally complete
+and usable.
+
+@item
+One of standard @command{awk}'s weaknesses is working with individual
+characters. The ability to use @code{split()} with the empty string as
+the separator can considerably simplify such tasks.
+
+@item
+The library functions from @ref{Library Functions}, proved their
+usefulness for a number of real (if small) programs.
+
+@item
+Besides reinventing POSIX wheels, other programs solved a selection of
+interesting problems, such as finding duplicates words in text, printing
+mailing labels, and finding anagrams.
+
+@end itemize
+
+@c EXCLUDE START
+@node Programs Exercises
+@section Exercises
+
+@enumerate
+@item
+Rewrite @file{cut.awk} (@pxref{Cut Program})
+using @code{split()} with @code{""} as the seperator.
+
+@item
+In @ref{Egrep Program}, we mentioned that @samp{egrep -i} could be
+simulated in versions of @command{awk} without @code{IGNORECASE} by
+using @code{tolower()} on the line and the pattern. In a footnote there,
+we also mentioned that this solution has a bug: the translated line is
+output, and not the original one. Fix this problem.
+@c Exercise: Fix this, w/array and new line as key to original line
+
+@item
+The POSIX version of @command{id} takes options that control which
+information is printed. Modify the @command{awk} version
+(@pxref{Id Program}) to accept the same arguments and perform in the
+same way.
+
+@item
+The @code{split.awk} program (@pxref{Split Program}) assumes
+that letters are contiguous in the character set,
+which isn't true for EBCDIC systems.
+Fix this problem.
+(Hint: Consider a different way to work through the alphabet,
+without relying on @code{ord()} and @code{chr()}.)
+
+@item
+In @file{uniq.awk} (@pxref{Uniq Program}, the
+logic for choosing which lines to print represents a @dfn{state
+machine}, which is ``a device that can be in one of a set number of stable
+conditions depending on its previous condition and on the present values
+of its inputs.''@footnote{This is the definition returned from entering
+@code{define: state machine} into Google.}
+Brian Kernighan suggests that
+``an alternative approach to state mechines is to just read
+the input into an array, then use indexing. It's almost always
+easier code, and for most inputs where you would use this, just
+as fast.'' Rewrite the logic to follow this
+suggestion.
+
+
+@item
+Why can't the @file{wc.awk} program (@pxref{Wc Program}) just
+use the value of @code{FNR} in @code{endfile()}?
+Hint: Examine the code in @ref{Filetrans Function}.
@ignore
-@ifdocbook
+@command{wc} can't just use the value of @code{FNR} in
+@code{endfile()}. If you examine the code in @ref{Filetrans Function},
+you will see that @code{FNR} has already been reset by the time
+@code{endfile()} is called.
+@end ignore
+
+@item
+Manipulation of individual characters in the @command{translate} program
+(@pxref{Translate Program}) is painful using standard @command{awk}
+functions. Given that @command{gawk} can split strings into individual
+characters using @code{""} as the separator, how might you use this
+feature to simplify the program?
+
+@item
+The @file{extract.awk} program (@pxref{Extract Program}) was written
+before @command{gawk} had the @code{gensub()} function. Use it
+to simplify the code.
+
+@item
+Compare the performance of the @file{awksed.awk} program
+(@pxref{Simple Sed}) with the more straightforward:
+
+@example
+BEGIN @{
+ pat = ARGV[1]
+ repl = ARGV[2]
+ ARGV[1] = ARGV[2] = ""
+@}
+
+@{ gsub(pat, repl); print @}
+@end example
+
+@item
+What are the advantages and disadvantages of @file{awksed.awk} versus
+the real @command{sed} utility?
+
+@ignore
+ Advantage: egrep regexps
+ speed (?)
+ Disadvantage: no & in replacement text
-@part Part III:@* Moving Beyond Standard @command{awk} With @command{gawk}
+Others?
+@end ignore
+
+@item
+In @ref{Igawk Program}, we mentioned that not trying to save the line
+read with @code{getline} in the @code{pathto()} function when testing
+for the file's accessibility for use with the main program simplifies
+things considerably. What problem does this engender though?
+@c answer, reading from "-" or /dev/stdin
+
+@cindex search paths
+@cindex search paths, for source files
+@cindex source files@comma{} search path for
+@cindex files, source@comma{} search path for
+@cindex directories, searching
+@item
+As an additional example of the idea that it is not always necessary to
+add new features to a program, consider the idea of having two files in
+a directory in the search path:
+
+@table @file
+@item default.awk
+This file contains a set of default library functions, such
+as @code{getopt()} and @code{assert()}.
+@item site.awk
+This file contains library functions that are specific to a site or
+installation; i.e., locally developed functions.
+Having a separate file allows @file{default.awk} to change with
+new @command{gawk} releases, without requiring the system administrator to
+update it each time by adding the local functions.
+@end table
+
+One user
+@c Karl Berry, karl@ileaf.com, 10/95
+suggested that @command{gawk} be modified to automatically read these files
+upon startup. Instead, it would be very simple to modify @command{igawk}
+to do this. Since @command{igawk} can process nested @code{@@include}
+directives, @file{default.awk} could simply contain @code{@@include}
+statements for the desired library functions.
+Make this change.
+
+@item
+Modify @file{anagram.awk} (@pxref{Anagram Program}), to avoid
+the use of the external @command{sort} utility.
+
+@end enumerate
+@c EXCLUDE END
+
+@ifnotinfo
+@part @value{PART3}Moving Beyond Standard @command{awk} With @command{gawk}
+@end ifnotinfo
+
+@ifdocbook
Part III focuses on features specific to @command{gawk}.
It contains the following chapters:
-@itemize @bullet
+@itemize @value{BULLET}
@item
@ref{Advanced Features}.
@@ -24368,13 +25654,9 @@ It contains the following chapters:
@ref{Dynamic Extensions}.
@end itemize
@end ifdocbook
-@end ignore
@node Advanced Features
@chapter Advanced Features of @command{gawk}
-@ifset WITH_NETWORK_CHAPTER
-@cindex advanced features, network connections, See Also networks@comma{} connections
-@end ifset
@c STARTOFRANGE gawadv
@cindex @command{gawk}, features, advanced
@c STARTOFRANGE advgaw
@@ -24387,6 +25669,8 @@ Contributed by: Peter Langston <pud!psl@bellcore.bellcore.com>
"Write documentation as if whoever reads it is a violent psychopath
who knows where you live."
@end ignore
+@cindex Langston, Peter
+@cindex English, Steve
@quotation
@i{Write documentation as if whoever reads it is
a violent psychopath who knows where you live.}
@@ -24406,10 +25690,11 @@ of TCP/IP networking. Finally, @command{gawk}
can @dfn{profile} an @command{awk} program, making it possible to tune
it for performance.
+@c FULLXREF ON
A number of advanced features require separate @value{CHAPTER}s of their
own:
-@itemize @bullet
+@itemize @value{BULLET}
@item
@ref{Internationalization}, discusses how to internationalize
your @command{awk} programs, so that they can speak multiple
@@ -24428,6 +25713,7 @@ debugger for debugging @command{awk} programs.
discusses the ability to dynamically add new built-in functions to
@command{gawk}.
@end itemize
+@c FULLXREF OFF
@menu
* Nondecimal Data:: Allowing nondecimal input data.
@@ -24436,6 +25722,7 @@ discusses the ability to dynamically add new built-in functions to
* Two-way I/O:: Two-way communications with another process.
* TCP/IP Networking:: Using @command{gawk} for network programming.
* Profiling:: Profiling your @command{awk} programs.
+* Advanced Features Summary:: Summary of advanced features.
@end menu
@node Nondecimal Data
@@ -24468,7 +25755,7 @@ $ @kbd{echo 0123 123 0x123 | gawk '@{ print $1, $2, $3 @}'}
The @code{print} statement treats its expressions as strings.
Although the fields can act as numbers when necessary,
they are still strings, so @code{print} does not try to treat them
-numerically. You may need to add zero to a field to force it to
+numerically. You need to add zero to a field to force it to
be treated as a number. For example:
@example
@@ -24490,7 +25777,7 @@ disabled. If you want it, you must explicitly request it.
@emph{Use of this option is not recommended.}
It can break old programs very badly.
Instead, use the @code{strtonum()} function to convert your data
-(@pxref{Nondecimal-numbers}).
+(@pxref{String Functions}).
This makes your programs easier to write and easier to read, and
leads to less surprising results.
@end quotation
@@ -24522,9 +25809,9 @@ Often, though, it is desirable to be able to loop over the elements
in a particular order that you, the programmer, choose. @command{gawk}
lets you do this.
-@ref{Controlling Scanning}, describes how you can assign special,
+@DBREF{Controlling Scanning} describes how you can assign special,
pre-defined values to @code{PROCINFO["sorted_in"]} in order to
-control the order in which @command{gawk} will traverse an array
+control the order in which @command{gawk} traverses an array
during a @code{for} loop.
In addition, the value of @code{PROCINFO["sorted_in"]} can be a function name.
@@ -24848,9 +26135,9 @@ END @{
So far, so good. Now it starts to get interesting. Both @code{asort()}
and @code{asorti()} accept a third string argument to control comparison
-of array elements. In @ref{String Functions}, we ignored this third
-argument; however, the time has now come to describe how this argument
-affects these two functions.
+of array elements. When we introduced @code{asort()} and @code{asorti()}
+in @ref{String Functions}, we ignored this third argument; however,
+now is the time to describe how this argument affects these two functions.
Basically, the third argument specifies how the array is to be sorted.
There are two possibilities. As with @code{PROCINFO["sorted_in"]},
@@ -24891,6 +26178,9 @@ Caveat Emptor.
@node Two-way I/O
@section Two-Way Communications with Another Process
+
+@c 8/2014. Neither Mike nor BWK saw this as relevant. Commenting it out.
+@ignore
@cindex Brennan, Michael
@cindex programmers, attractiveness of
@smallexample
@@ -24920,6 +26210,7 @@ the scent of perl programmers.
Mike Brennan
@c brennan@@whidbey.com
@end smallexample
+@end ignore
@cindex advanced features, processes@comma{} communicating with
@cindex processes, two-way communications with
@@ -24946,7 +26237,10 @@ system("rm " tempfile)
This works, but not elegantly. Among other things, it requires that
the program be run in a directory that cannot be shared among users;
for example, @file{/tmp} will not do, as another user might happen
-to be using a temporary file with the same name.
+to be using a temporary file with the same name.@footnote{Michael
+Brennan suggests the use of @command{rand()} to generate unique
+@value{FN}s. This is a valid point; nevertheless, temporary files
+remain more difficult than two-way pipes.} @c 8/2014
@cindex coprocesses
@cindex input/output, two-way
@@ -24980,7 +26274,7 @@ the shell.
There are some cautionary items to be aware of:
-@itemize @bullet
+@itemize @value{BULLET}
@item
As the code inside @command{gawk} currently stands, the coprocess's
standard error goes to the same place that the parent @command{gawk}'s
@@ -25046,6 +26340,7 @@ has been read, @command{gawk} terminates the coprocess and exits.
As a side note, the assignment @samp{LC_ALL=C} in the @command{sort}
command ensures traditional Unix (ASCII) sorting from @command{sort}.
+This is not strictly necessary here, but it's good to know how to do this.
@cindex @command{gawk}, @code{PROCINFO} array in
@cindex @code{PROCINFO} array, and communications via ptys
@@ -25064,7 +26359,7 @@ print @dots{} |& command # start two-way pipe
@end example
@noindent
-Using ptys avoids the buffer deadlock issues described earlier, at some
+Using ptys usually avoids the buffer deadlock issues described earlier, at some
loss in performance. If your system does not have ptys, or if all the
system's ptys are in use, @command{gawk} automatically falls back to
using regular pipes.
@@ -25099,10 +26394,10 @@ another process on another system across an IP network connection.
You can think of this as just a @emph{very long} two-way pipeline to
a coprocess.
The way @command{gawk} decides that you want to use TCP/IP networking is
-by recognizing special file names that begin with one of @samp{/inet/},
-@samp{/inet4/} or @samp{/inet6}.
+by recognizing special @value{FN}s that begin with one of @samp{/inet/},
+@samp{/inet4/} or @samp{/inet6/}.
-The full syntax of the special file name is
+The full syntax of the special @value{FN} is
@file{/@var{net-type}/@var{protocol}/@var{local-port}/@var{remote-host}/@var{remote-port}}.
The components are:
@@ -25168,7 +26463,9 @@ See
@inforef{Top, , General Introduction, gawkinet, TCP/IP Internetworking with @command{gawk}},
@end ifinfo
@ifnotinfo
-See @cite{TCP/IP Internetworking with @command{gawk}},
+See
+@uref{http://www.gnu.org/software/gawk/manual/gawkinet/,
+@cite{TCP/IP Internetworking with @command{gawk}}},
which comes as part of the @command{gawk} distribution,
@end ifnotinfo
for a much more complete introduction and discussion, as well as
@@ -25305,7 +26602,7 @@ in the morning to work.)
This example illustrates many of the basic features of profiling output.
They are as follows:
-@itemize @bullet
+@itemize @value{BULLET}
@item
The program is printed in the order @code{BEGIN} rules,
@code{BEGINFILE} rules,
@@ -25364,7 +26661,6 @@ the body of an @code{if}, @code{else}, or loop is only a single statement.
@item
Parentheses are used only where needed, as indicated by the structure
of the program and the precedence rules.
-@c extra verbiage here satisfies the copyeditor. ugh.
For example, @samp{(3 + 5) * 4} means add three plus five, then multiply
the total by four. However, @samp{3 + 5 * 4} has no parentheses, and
means @samp{3 + (5 * 4)}.
@@ -25447,7 +26743,7 @@ As usual, the profiled version of the program is written to
@file{awkprof.out}, or to a different file if one specified with
the @option{--profile} option.
-Along with the regular profile, as shown earlier, the profile
+Along with the regular profile, as shown earlier, the profile file
includes a trace of any active functions:
@example
@@ -25489,14 +26785,59 @@ When called this way, @command{gawk} ``pretty prints'' the program into
@file{awkprof.out}, without any execution counts.
@quotation NOTE
-The @option{--pretty-print} option still runs your program.
-This will change in the next major release.
+Once upon a time, the @option{--pretty-print} option would also run
+your program. This is is no longer the case.
@end quotation
-@c ENDOFRANGE advgaw
-@c ENDOFRANGE gawadv
@c ENDOFRANGE awkp
@c ENDOFRANGE proawk
+@node Advanced Features Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+The @option{--non-decimal-data} option causes @command{gawk} to treat
+octal- and hexadecimal-looking input data as octal and hexadecimal.
+This option should be used with caution or not at all; use of @code{strtonum()}
+is preferable.
+
+@item
+You can take over complete control of sorting in @samp{for (@var{indx} in @var{array})}
+array traversal by setting @code{PROCINFO["sorted_in"]} to the name of a user-defined
+function that does the comparison of array elements based on index and value.
+
+@item
+Similarly, you can supply the name of a user-defined comparison function as the
+third argument to either @code{asort()} or @command{asorti()} to control how
+those functions sort arrays. Or you may provide one of the predefined control
+strings that work for @code{PROCINFO["sorted_in"]}.
+
+@item
+You can use the @samp{|&} operator to create a two-way pipe to a co-process.
+You read from the co-process with @code{getline} and write to it with @code{print}
+or @code{printf}. Use @code{close()} to close off the co-process completely, or
+optionally, close off one side of the two-way communications.
+
+@item
+By using special ``@value{FN}s'' with the @samp{|&} operator, you can open a
+TCP/IP (or UDP/IP) connection to remote hosts in the Internet. @command{gawk}
+supports both IPv4 an IPv6.
+
+@item
+You can generate statement count profiles of your program. This can help you
+determine which parts of your program may be taking the most time and let
+you tune them more easily. Sending the @code{USR1} signal while profiling causes
+@command{gawk} to dump the profile and keep going, including a function call stack.
+
+@item
+You can also just ``pretty print'' the program. This currently also runs
+the program, but that will change in the next major release.
+
+@end itemize
+
+@c ENDOFRANGE advgaw
+@c ENDOFRANGE gawadv
+
@node Internationalization
@chapter Internationalization with @command{gawk}
@@ -25525,11 +26866,12 @@ a requirement.
@menu
* I18N and L10N:: Internationalization and Localization.
-* Explaining gettext:: How GNU @code{gettext} works.
+* Explaining gettext:: How GNU @command{gettext} works.
* Programmer i18n:: Features for the programmer.
* Translator i18n:: Features for the translator.
* I18N Example:: A simple i18n example.
* Gawk I18N:: @command{gawk} is also internationalized.
+* I18N Summary:: Summary of I18N stuff.
@end menu
@node I18N and L10N
@@ -25549,20 +26891,22 @@ responses, and information related to how numerical and
monetary values are printed and read.
@node Explaining gettext
-@section GNU @code{gettext}
+@section GNU @command{gettext}
@cindex internationalizing a program
@c STARTOFRANGE gettex
-@cindex @code{gettext} library
-The facilities in GNU @code{gettext} focus on messages; strings printed
+@cindex @command{gettext} library
+@command{gawk} uses GNU @command{gettext} to provide its internationalization
+features.
+The facilities in GNU @command{gettext} focus on messages; strings printed
by a program, either directly or via formatting with @code{printf} or
@code{sprintf()}.@footnote{For some operating systems, the @command{gawk}
-port doesn't support GNU @code{gettext}.
+port doesn't support GNU @command{gettext}.
Therefore, these features are not available
if you are using one of those operating systems. Sorry.}
-@cindex portability, @code{gettext} library and
-When using GNU @code{gettext}, each application has its own
+@cindex portability, @command{gettext} library and
+When using GNU @command{gettext}, each application has its own
@dfn{text domain}. This is a unique name, such as @samp{kpilot} or @samp{gawk},
that identifies the application.
A complete application may have multiple components---programs written
@@ -25586,7 +26930,7 @@ language).
@cindex @code{textdomain()} function (C library)
@item
The programmer indicates the application's text domain
-(@code{"guide"}) to the @code{gettext} library,
+(@command{"guide"}) to the @command{gettext} library,
by calling the @code{textdomain()} function.
@cindex @code{.pot} files
@@ -25630,7 +26974,7 @@ are installed in a standard place.
@cindex @code{bindtextdomain()} function (C library)
@item
-For testing and development, it is possible to tell @code{gettext}
+For testing and development, it is possible to tell @command{gettext}
to use @file{.gmo} files in a different directory than the standard
one by using the @code{bindtextdomain()} function.
@@ -25663,7 +27007,7 @@ strings enclosed in calls to @code{gettext()}.
@cindex @code{_} (underscore), C macro
@cindex underscore (@code{_}), C macro
-The GNU @code{gettext} developers, recognizing that typing
+The GNU @command{gettext} developers, recognizing that typing
@samp{gettext(@dots{})} over and over again is both painful and ugly to look
at, use the macro @samp{_} (an underscore) to make things easier:
@@ -25676,7 +27020,7 @@ printf("%s", _("Don't Panic!\n"));
@end example
@cindex internationalization, localization, locale categories
-@cindex @code{gettext} library, locale categories
+@cindex @command{gettext} library, locale categories
@cindex locale categories
@noindent
This reduces the typing overhead to just three extra characters per string
@@ -25684,12 +27028,12 @@ and is considerably easier to read as well.
There are locale @dfn{categories}
for different types of locale-related information.
-The defined locale categories that @code{gettext} knows about are:
+The defined locale categories that @command{gettext} knows about are:
@table @code
@cindex @code{LC_MESSAGES} locale category
@item LC_MESSAGES
-Text messages. This is the default category for @code{gettext}
+Text messages. This is the default category for @command{gettext}
operations, but it is possible to supply a different one explicitly,
if necessary. (It is almost never necessary to supply a different category.)
@@ -25702,7 +27046,16 @@ and/or groups of characters sort in a given language.
@cindex @code{LC_CTYPE} locale category
@item LC_CTYPE
Character-type information (alphabetic, digit, upper- or lowercase, and
-so on).
+so on) as well as character encoding.
+@ignore
+In June 2001 Bruno Haible wrote:
+- Description of LC_CTYPE: It determines both
+ 1. character encoding,
+ 2. character type information.
+ (For example, in both KOI8-R and ISO-8859-5 the character type information
+ is the same - cyrillic letters could as 'alpha' - but the encoding is
+ different.)
+@end ignore
This information is accessed via the
POSIX character classes in regular expressions,
such as @code{/[[:alnum:]]/}
@@ -25723,11 +27076,6 @@ use a comma every three decimal places and a period for the decimal
point, while many Europeans do exactly the opposite:
1,234.56 versus 1.234,56.}
-@cindex @code{LC_RESPONSE} locale category
-@item LC_RESPONSE
-Response information, such as how ``yes'' and ``no'' appear in the
-local language, and possibly other information as well.
-
@cindex time, localization and
@cindex dates, information related to@comma{} localization
@cindex @code{LC_TIME} locale category
@@ -25737,7 +27085,7 @@ before or after the day in a date, local month abbreviations, and so on.
@cindex @code{LC_ALL} locale category
@item LC_ALL
-All of the above. (Not too useful in the context of @code{gettext}.)
+All of the above. (Not too useful in the context of @command{gettext}.)
@end table
@c ENDOFRANGE gettex
@@ -25753,7 +27101,7 @@ internationalization:
@cindex @code{TEXTDOMAIN} variable
@item TEXTDOMAIN
This variable indicates the application's text domain.
-For compatibility with GNU @code{gettext}, the default
+For compatibility with GNU @command{gettext}, the default
value is @code{"messages"}.
@cindex internationalization, localization, marked strings
@@ -25764,7 +27112,7 @@ are candidates for translation at runtime.
String constants without a leading underscore are not translated.
@cindexgawkfunc{dcgettext}
-@item dcgettext(@var{string} @r{[}, @var{domain} @r{[}, @var{category}@r{]]})
+@item @code{dcgettext(@var{string}} [@code{,} @var{domain} [@code{,} @var{category}]]@code{)}
Return the translation of @var{string} in
text domain @var{domain} for locale category @var{category}.
The default value for @var{domain} is the current value of @code{TEXTDOMAIN}.
@@ -25790,7 +27138,7 @@ default arguments.
@end quotation
@cindexgawkfunc{dcngettext}
-@item dcngettext(@var{string1}, @var{string2}, @var{number} @r{[}, @var{domain} @r{[}, @var{category}@r{]]})
+@item @code{dcngettext(@var{string1}, @var{string2}, @var{number}} [@code{,} @var{domain} [@code{,} @var{category}]]@code{)}
Return the plural form used for @var{number} of the
translation of @var{string1} and @var{string2} in text domain
@var{domain} for locale category @var{category}. @var{string1} is the
@@ -25806,9 +27154,9 @@ The same remarks about argument order as for the @code{dcgettext()} function app
@cindex message object files, specifying directory of
@cindex files, message object, specifying directory of
@cindexgawkfunc{bindtextdomain}
-@item bindtextdomain(@var{directory} @r{[}, @var{domain}@r{]})
+@item @code{bindtextdomain(@var{directory}} [@code{,} @var{domain} ]@code{)}
Change the directory in which
-@code{gettext} looks for @file{.gmo} files, in case they
+@command{gettext} looks for @file{.gmo} files, in case they
will not or cannot be placed in the standard locations
(e.g., during testing).
Return the directory in which @var{domain} is ``bound.''
@@ -25862,18 +27210,33 @@ printf(_"Number of users is %d\n", nusers)
@item
If you are creating strings dynamically, you can
still translate them, using the @code{dcgettext()}
-built-in function:
+built-in function:@footnote{Thanks to Bruno Haible for this
+example.}
@example
-message = nusers " users logged in"
-message = dcgettext(message, "adminprog")
-print message
+if (groggy)
+ message = dcgettext("%d customers disturbing me\n", "adminprog")
+else
+ message = dcgettext("enjoying %d customers\n", "adminprog")
+printf(message, ncustomers)
@end example
Here, the call to @code{dcgettext()} supplies a different
text domain (@code{"adminprog"}) in which to find the
message, but it uses the default @code{"LC_MESSAGES"} category.
+The previous example only works if @code{ncustomers} is greater than one.
+This example would be better done with @code{dcngettext()}:
+
+@example
+if (groggy)
+ message = dcngettext("%d customer disturbing me\n", "%d customers disturbing me\n", "adminprog")
+else
+ message = dcngettext("enjoying %d customer\n", "enjoying %d customers\n", "adminprog")
+printf(message, ncustomers)
+@end example
+
+
@cindex @code{LC_MESSAGES} locale category, @code{bindtextdomain()} function (@command{gawk})
@item
During development, you might want to put the @file{.gmo}
@@ -25947,12 +27310,15 @@ $ @kbd{gawk --gen-pot -f guide.awk > guide.pot}
@cindex @code{xgettext} utility
When run with @option{--gen-pot}, @command{gawk} does not execute your
program. Instead, it parses it as usual and prints all marked strings
-to standard output in the format of a GNU @code{gettext} Portable Object
+to standard output in the format of a GNU @command{gettext} Portable Object
file. Also included in the output are any constant strings that
appear as the first argument to @code{dcgettext()} or as the first and
second argument to @code{dcngettext()}.@footnote{The
@command{xgettext} utility that comes with GNU
-@code{gettext} can handle @file{.awk} files.}
+@command{gettext} can handle @file{.awk} files.}
+You should distribute the generated @file{.pot} file with
+your @command{awk} program; translators will eventually use it
+to provide you translations that you can also then distribute.
@xref{I18N Example},
for the full list of steps to go through to create and test
translations for @command{guide}.
@@ -25968,9 +27334,8 @@ Format strings for @code{printf} and @code{sprintf()}
(@pxref{Printf})
present a special problem for translation.
Consider the following:@footnote{This example is borrowed
-from the GNU @code{gettext} manual.}
+from the GNU @command{gettext} manual.}
-@c line broken here only for smallbook format
@example
printf(_"String `%s' has %d characters\n",
string, length(string)))
@@ -26078,7 +27443,7 @@ As written, it won't work on other versions of @command{awk}.
However, it is actually almost portable, requiring very little
change:
-@itemize @bullet
+@itemize @value{BULLET}
@cindex @code{TEXTDOMAIN} variable, portability and
@item
Assignments to @code{TEXTDOMAIN} won't have any effect,
@@ -26218,33 +27583,33 @@ msgstr "Like, the scoop is"
@cindex Linux
@cindex GNU/Linux
The next step is to make the directory to hold the binary message object
-file and then to create the @file{guide.gmo} file.
-The directory layout shown here is standard for GNU @code{gettext} on
-GNU/Linux systems. Other versions of @code{gettext} may use a different
+file and then to create the @file{guide.mo} file.
+We pretend that our file is to be used in the @code{en_US.UTF-8} locale.
+The directory layout shown here is standard for GNU @command{gettext} on
+GNU/Linux systems. Other versions of @command{gettext} may use a different
layout:
@example
-$ @kbd{mkdir en_US en_US/LC_MESSAGES}
+$ @kbd{mkdir en_US.UTF-8 en_US.UTF-8/LC_MESSAGES}
@end example
-@cindex @code{.po} files, converting to @code{.gmo}
-@cindex files, @code{.po}, converting to @code{.gmo}
-@cindex @code{.gmo} files, converting from @code{.po}
-@cindex files, @code{.gmo}, converting from @code{.po}
+@cindex @code{.po} files, converting to @code{.mo}
+@cindex files, @code{.po}, converting to @code{.mo}
+@cindex @code{.mo} files, converting from @code{.po}
+@cindex files, @code{.mo}, converting from @code{.po}
@cindex portable object files, converting to message object files
@cindex files, portable object, converting to message object files
@cindex message object files, converting from portable object files
@cindex files, message object, converting from portable object files
@cindex @command{msgfmt} utility
The @command{msgfmt} utility does the conversion from human-readable
-@file{.po} file to machine-readable @file{.gmo} file.
+@file{.po} file to machine-readable @file{.mo} file.
By default, @command{msgfmt} creates a file named @file{messages}.
This file must be renamed and placed in the proper directory so that
@command{gawk} can find it:
@example
-$ @kbd{msgfmt guide-mellow.po}
-$ @kbd{mv messages en_US/LC_MESSAGES/guide.gmo}
+$ @kbd{msgfmt guide-mellow.po -o en_US.UTF-8/LC_MESSAGES/guide.mo}
@end example
Finally, we run the program to test it:
@@ -26273,30 +27638,71 @@ $ @kbd{gawk --posix -f guide.awk -f libintl.awk}
@section @command{gawk} Can Speak Your Language
@command{gawk} itself has been internationalized
-using the GNU @code{gettext} package.
-(GNU @code{gettext} is described in
+using the GNU @command{gettext} package.
+(GNU @command{gettext} is described in
complete detail in
@ifinfo
-@inforef{Top, , GNU @code{gettext} utilities, gettext, GNU gettext tools}.)
+@inforef{Top, , GNU @command{gettext} utilities, gettext, GNU gettext tools}.)
@end ifinfo
@ifnotinfo
-@cite{GNU gettext tools}.)
+@uref{http://www.gnu.org/software/gettext/manual/,
+@cite{GNU gettext tools}}.)
@end ifnotinfo
-As of this writing, the latest version of GNU @code{gettext} is
-@uref{ftp://ftp.gnu.org/gnu/gettext/gettext-0.18.2.1.tar.gz, version 0.18.2.1}.
+As of this writing, the latest version of GNU @command{gettext} is
+@uref{ftp://ftp.gnu.org/gnu/gettext/gettext-0.19.1.tar.gz,
+@value{PVERSION} 0.19.1}.
If a translation of @command{gawk}'s messages exists,
then @command{gawk} produces usage messages, warnings,
and fatal errors in the local language.
-@c ENDOFRANGE inloc
-@c The original text for this chapter was contributed by Efraim Yawitz.
-@c FIXME: Add more indexing.
+@node I18N Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+Internationalization means writing a program such that it can use multiple
+languages without requiring source-code changes. Localization means
+providing the data necessary for an internationalized program to work
+in a particular language.
+
+@item
+@command{gawk} uses GNU @command{gettext} to let you internationalize
+and localize @command{awk} programs. A program's text domain identifies
+the program for grouping all messages and other data together.
+
+@item
+You mark a program's strings for translation by preceding them with
+an underscore. Once that is done, the strings are extracted into a
+@file{.pot} file. This file is copied for each language into a @file{.po}
+file, and the @file{.po} files are compiled into @file{.gmo} files for
+use at runtime.
+
+@item
+You can use position specifications with @code{sprintf()} and
+@code{printf} to rearrange the placement of argument values in formatted
+strings and output. This is useful for the translations of format
+control strings.
+
+@item
+The internationalization features have been designed so that they
+can be easily worked around in a standard @command{awk}.
+
+@item
+@command{gawk} itself has been internationalized and ships with
+a number of translations for its messages.
+
+@end itemize
+
+@c ENDOFRANGE inloc
@node Debugger
@chapter Debugging @command{awk} Programs
@cindex debugging @command{awk} programs
+@c The original text for this chapter was contributed by Efraim Yawitz.
+@c FIXME: Add more indexing.
+
It would be nice if computer programs worked perfectly the first time they
were run, but in real life, this rarely happens for programs of
any complexity. Thus, most programming languages have facilities available
@@ -26313,10 +27719,11 @@ how to use @command{gawk} for debugging your program is easy.
* List of Debugger Commands:: Main debugger commands.
* Readline Support:: Readline support.
* Limitations:: Limitations and future plans.
+* Debugging Summary:: Debugging summary.
@end menu
@node Debugging
-@section Introduction to @command{gawk} Debugger
+@section Introduction to The @command{gawk} Debugger
This @value{SECTION} introduces debugging in general and begins
the discussion of debugging in @command{gawk}.
@@ -26341,7 +27748,7 @@ In that case, what can you expect from such a tool? The answer to that
depends on the language being debugged, but in general, you can expect at
least the following:
-@itemize @bullet
+@itemize @value{BULLET}
@item
The ability to watch a program execute its instructions one by one,
giving you, the programmer, the opportunity to think about what is happening
@@ -26469,7 +27876,7 @@ to debug command-line programs, only programs contained in files.)
In our case, we invoke the debugger like this:
@example
-$ @kbd{gawk -D -f getopt.awk -f join.awk -f uniq.awk inputfile}
+$ @kbd{gawk -D -f getopt.awk -f join.awk -f uniq.awk -1 inputfile}
@end example
@noindent
@@ -26531,7 +27938,7 @@ the breakpoint, use the @code{b} (breakpoint) command:
@example
gawk> @kbd{b are_equal}
-@print{} Breakpoint 1 set at file `awklib/eg/prog/uniq.awk', line 64
+@print{} Breakpoint 1 set at file `awklib/eg/prog/uniq.awk', line 63
@end example
The debugger tells us the file and line number where the breakpoint is.
@@ -26543,8 +27950,8 @@ gawk> @kbd{r}
@print{} Starting program:
@print{} Stopping in Rule ...
@print{} Breakpoint 1, are_equal(n, m, clast, cline, alast, aline)
- at `awklib/eg/prog/uniq.awk':64
-@print{} 64 if (fcount == 0 && charcount == 0)
+ at `awklib/eg/prog/uniq.awk':63
+@print{} 63 if (fcount == 0 && charcount == 0)
gawk>
@end example
@@ -26556,12 +27963,12 @@ listing of the current stack frames:
@example
gawk> @kbd{bt}
@print{} #0 are_equal(n, m, clast, cline, alast, aline)
- at `awklib/eg/prog/uniq.awk':69
-@print{} #1 in main() at `awklib/eg/prog/uniq.awk':89
+ at `awklib/eg/prog/uniq.awk':68
+@print{} #1 in main() at `awklib/eg/prog/uniq.awk':88
@end example
This tells us that @code{are_equal()} was called by the main program at
-line 89 of @file{uniq.awk}. (This is not a big surprise, since this
+line 88 of @file{uniq.awk}. (This is not a big surprise, since this
is the only call to @code{are_equal()} in the program, but in more complex
programs, knowing who called a function and with what parameters can be
the key to finding the source of the problem.)
@@ -26585,7 +27992,7 @@ A more useful variable to display might be the current record:
@example
gawk> @kbd{p $0}
-@print{} $0 = string ("gawk is a wonderful program!")
+@print{} $0 = "gawk is a wonderful program!"
@end example
@noindent
@@ -26594,7 +28001,7 @@ our test input above. Let's look at @code{NR}:
@example
gawk> @kbd{p NR}
-@print{} NR = number (2)
+@print{} NR = 2
@end example
@noindent
@@ -26613,7 +28020,7 @@ OK, let's just check that that rule worked correctly:
@example
gawk> @kbd{p last}
-@print{} last = string ("awk is a wonderful program!")
+@print{} last = "awk is a wonderful program!"
@end example
Everything we have done so far has verified that the program has worked as
@@ -26624,29 +28031,23 @@ be inside this function. To investigate further, we must begin
@example
gawk> @kbd{n}
-@print{} 67 if (fcount > 0) @{
+@print{} 66 if (fcount > 0) @{
@end example
-This tells us that @command{gawk} is now ready to execute line 67, which
+This tells us that @command{gawk} is now ready to execute line 66, which
decides whether to give the lines the special ``field skipping'' treatment
-indicated by the @option{-f} command-line option. (Notice that we skipped
-from where we were before at line 64 to here, since the condition in line 64
-
-@example
-if (fcount == 0 && charcount == 0)
-@end example
-
-@noindent
-was false.)
+indicated by the @option{-1} command-line option. (Notice that we skipped
+from where we were before at line 63 to here, since the condition in line 63
+@samp{if (fcount == 0 && charcount == 0)} was false.)
Continuing to step, we now get to the splitting of the current and
last records:
@example
gawk> @kbd{n}
-@print{} 68 n = split(last, alast)
+@print{} 67 n = split(last, alast)
gawk> @kbd{n}
-@print{} 69 m = split($0, aline)
+@print{} 68 m = split($0, aline)
@end example
At this point, we should be curious to see what our records were split
@@ -26654,10 +28055,10 @@ into, so we try to look:
@example
gawk> @kbd{p n m alast aline}
-@print{} n = number (5)
-@print{} m = number (5)
+@print{} n = 5
+@print{} m = untyped variable
@print{} alast = array, 5 elements
-@print{} aline = array, 5 elements
+@print{} aline = untyped variable
@end example
@noindent
@@ -26665,7 +28066,9 @@ gawk> @kbd{p n m alast aline}
@command{awk}'s @code{print} statement.)
This is kind of disappointing, though. All we found out is that there
-are five elements in each of our arrays. Useful enough (we now know that
+are five elements in @code{alast}; @code{m} and @code{aline} don't have
+values yet since we are at line 68 but haven't executed it yet.
+This information is useful enough (we now know that
none of the words were accidentally left out), but what if we want to see
inside the array?
@@ -26681,7 +28084,7 @@ Oops!
@example
gawk> @kbd{p alast[1]}
-@print{} alast["1"] = string ("awk")
+@print{} alast["1"] = "awk"
@end example
This would be kind of slow for a 100-member array, though, so
@@ -26690,11 +28093,11 @@ not to be mentioned):
@example
gawk> @kbd{p @@alast}
-@print{} alast["1"] = string ("awk")
-@print{} alast["2"] = string ("is")
-@print{} alast["3"] = string ("a")
-@print{} alast["4"] = string ("wonderful")
-@print{} alast["5"] = string ("program!")
+@print{} alast["1"] = "awk"
+@print{} alast["2"] = "is"
+@print{} alast["3"] = "a"
+@print{} alast["4"] = "wonderful"
+@print{} alast["5"] = "program!"
@end example
It looks like we got this far OK. Let's take another step
@@ -26702,9 +28105,9 @@ or two:
@example
gawk> @kbd{n}
-@print{} 70 clast = join(alast, fcount, n)
+@print{} 69 clast = join(alast, fcount, n)
gawk> @kbd{n}
-@print{} 71 cline = join(aline, fcount, m)
+@print{} 70 cline = join(aline, fcount, m)
@end example
Well, here we are at our error (sorry to spoil the suspense). What we
@@ -26714,8 +28117,8 @@ this would work. Let's look at what we've got:
@example
gawk> @kbd{p cline clast}
-@print{} cline = string ("gawk is a wonderful program!")
-@print{} clast = string ("awk is a wonderful program!")
+@print{} cline = "gawk is a wonderful program!"
+@print{} clast = "awk is a wonderful program!"
@end example
Hey, those look pretty familiar! They're just our original, unaltered,
@@ -26746,7 +28149,7 @@ and problem solved!
The @command{gawk} debugger command set can be divided into the
following categories:
-@itemize @bullet{}
+@itemize @value{BULLET}
@item
Breakpoint control
@@ -26772,7 +28175,7 @@ In the following descriptions, commands which may be abbreviated
show the abbreviation on a second description line.
A debugger command name may also be truncated if that partial
name is unambiguous. The debugger has the built-in capability to
-automatically repeat the previous command when just hitting @key{Enter}.
+automatically repeat the previous command just by hitting @key{Enter}.
This works for the commands @code{list}, @code{next}, @code{nexti}, @code{step}, @code{stepi}
and @code{continue} executed without any argument.
@@ -27134,7 +28537,7 @@ No newline is printed unless one is specified.
@item @code{set} @var{var}@code{=}@var{value}
Assign a constant (number or string) value to an @command{awk} variable
or field.
-String values must be enclosed between double quotes (@code{"@dots{}"}).
+String values must be enclosed between double quotes (@code{"}@dots{}@code{"}).
You can also set special @command{awk} variables, such as @code{FS},
@code{NF}, @code{NR}, etc.
@@ -27195,7 +28598,7 @@ functions which called the one you are in. The commands for doing this are:
Print a backtrace of all function calls (stack frames), or innermost @var{count}
frames if @var{count} > 0. Print the outermost @var{count} frames if
@var{count} < 0. The backtrace displays the name and arguments to each
-function, the source file name, and the line number.
+function, the source @value{FN}, and the line number.
@cindex debugger commands, @code{down}
@cindex @code{down} debugger command
@@ -27209,10 +28612,11 @@ Then select and print the frame.
@cindex @code{f} debugger command (alias for @code{frame})
@item @code{frame} [@var{n}]
@itemx @code{f} [@var{n}]
-Select and print (frame number, function and argument names, source file,
-and the source line) stack frame @var{n}. Frame 0 is the currently executing,
-or @dfn{innermost}, frame (function call), frame 1 is the frame that called the
-innermost one. The highest numbered frame is the one for the main program.
+Select and print stack frame @var{n}. Frame 0 is the currently executing,
+or @dfn{innermost}, frame (function call), frame 1 is the frame that
+called the innermost one. The highest numbered frame is the one for the
+main program. The printed information consists of the frame number,
+function and argument names, source file, and the source line.
@cindex debugger commands, @code{up}
@cindex @code{up} debugger command
@@ -27260,7 +28664,7 @@ Description of the selected stack frame.
@item functions
@cindex list function definitions, in debugger
-List all function definitions including source file names and
+List all function definitions including source @value{FN}s and
line numbers.
@item locals
@@ -27310,44 +28714,45 @@ a new value to the named option.
The available options are:
@c nested table
-@table @code
-@item history_size
+@c asis for docbook
+@table @asis
+@item @code{history_size}
@cindex debugger history size
The maximum number of lines to keep in the history file @file{./.gawk_history}.
The default is 100.
-@item listsize
+@item @code{listsize}
@cindex debugger default list amount
The number of lines that @code{list} prints. The default is 15.
-@item outfile
+@item @code{outfile}
@cindex redirect @command{gawk} output, in debugger
Send @command{gawk} output to a file; debugger output still goes
to standard output. An empty string (@code{""}) resets output to
standard output.
-@item prompt
+@item @code{prompt}
@cindex debugger prompt
The debugger prompt. The default is @samp{@w{gawk> }}.
-@item save_history @r{[}on @r{|} off@r{]}
+@item @code{save_history} [@code{on} | @code{off}]
@cindex debugger history file
Save command history to file @file{./.gawk_history}.
The default is @code{on}.
-@item save_options @r{[}on @r{|} off@r{]}
+@item @code{save_options} [@code{on} | @code{off}]
@cindex save debugger options
Save current options to file @file{./.gawkrc} upon exit.
The default is @code{on}.
Options are read back in to the next session upon startup.
-@item trace @r{[}on @r{|} off@r{]}
+@item @code{trace} [@code{on} | @code{off}]
@cindex instruction tracing, in debugger
Turn instruction tracing on or off. The default is @code{off}.
@end table
@item @code{save} @var{filename}
-Save the commands from the current session to the given file name,
+Save the commands from the current session to the given @value{FN},
so that they can be replayed using the @command{source} command.
@item @code{source} @var{filename}
@@ -27387,7 +28792,7 @@ partial dump of Davide Brini's obfuscated code
@smallexample
gawk> @kbd{dump}
-@print{} # BEGIN
+@print{} # BEGIN
@print{}
@print{} [ 1:0xfcd340] Op_rule : [in_rule = BEGIN] [source_file = brini.awk]
@print{} [ 1:0xfcc240] Op_push_i : "~" [MALLOC|STRING|STRCUR]
@@ -27495,7 +28900,7 @@ running a program, the debugger warns you if you accidentally type
@cindex debugger commands, @code{trace}
@cindex @code{trace} debugger command
-@item @code{trace} @code{on} @r{|} @code{off}
+@item @code{trace} [@code{on} | @code{off}]
Turn on or off a continuous printing of instructions which are about to
be executed, along with printing the @command{awk} line which they
implement. The default is @code{off}.
@@ -27511,16 +28916,18 @@ fairly self-explanatory, and using @code{stepi} and @code{nexti} while
@cindex command completion, in debugger
@cindex history expansion, in debugger
-If @command{gawk} is compiled with the @code{readline} library, you
-can take advantage of that library's command completion and history expansion
-features. The following types of completion are available:
+If @command{gawk} is compiled with
+@uref{http://cnswww.cns.cwru.edu/php/chet/readline/readline.html,
+the @code{readline} library}, you can take advantage of that library's
+command completion and history expansion features. The following types
+of completion are available:
@table @asis
@item Command completion
Command names.
-@item Source file name completion
-Source file names. Relevant commands are
+@item Source @value{FN} completion
+Source @value{FN}s. Relevant commands are
@code{break},
@code{clear},
@code{list},
@@ -27550,7 +28957,7 @@ We hope you find the @command{gawk} debugger useful and enjoyable to work with,
but as with any program, especially in its early releases, it still has
some limitations. A few which are worth being aware of are:
-@itemize @bullet{}
+@itemize @value{BULLET}
@item
At this point, the debugger does not give a detailed explanation of
what you did wrong when you type in something it doesn't like. Rather, it just
@@ -27558,14 +28965,17 @@ responds @samp{syntax error}. When you do figure out what your mistake was,
though, you'll feel like a real guru.
@item
-If you perused the dump of opcodes in @ref{Miscellaneous Debugger Commands},
+@c NOTE: no comma after the ref{} on purpose, due to following
+@c parenthetical remark.
+If you perused the dump of opcodes in @ref{Miscellaneous Debugger Commands}
(or if you are already familiar with @command{gawk} internals),
you will realize that much of the internal manipulation of data
in @command{gawk}, as in many interpreters, is done on a stack.
@code{Op_push}, @code{Op_pop}, etc., are the ``bread and butter'' of
-most @command{gawk} code. Unfortunately, as of now, the @command{gawk}
-debugger does not allow you to examine the stack's contents.
+most @command{gawk} code.
+Unfortunately, as of now, the @command{gawk}
+debugger does not allow you to examine the stack's contents.
That is, the intermediate results of expression evaluation are on the
stack, but cannot be printed. Rather, only variables which are defined
in the program can be printed. Of course, a workaround for
@@ -27592,6 +29002,39 @@ The @command{gawk} debugger only accepts source supplied with the @option{-f} op
Look forward to a future release when these and other missing features may
be added, and of course feel free to try to add them yourself!
+@node Debugging Summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+Programs rarely work correctly the first time. Finding bugs
+is @dfn{debugging} and a program that helps you find bugs is a
+@dfn{debugger}. @command{gawk} has a built-in debugger that works very
+similarly to the GNU Debugger, GDB.
+
+@item
+Debuggers let you step through your program one statement at a time,
+examine and change variable and array values, and do a number of other
+things that let you understand what your program is actually doing (as
+opposed to what it is supposed to do).
+
+@item
+Like most debuggers, the @command{gawk} debugger works in terms of stack
+frames, and lets you set both breakpoints (stop at a point in the code)
+and watchpoints (stop when a data value changes).
+
+@item
+The debugger command set is fairly complete, providing control over
+breakpoints, execution, viewing and changing data, working with the stack,
+getting information, and other tasks.
+
+@item
+If the @code{readline} library is available when @command{gawk} is
+compiled, it is used by the debugger to provide command-line history
+and editing.
+
+@end itemize
+
@node Arbitrary Precision Arithmetic
@chapter Arithmetic and Arbitrary Precision Arithmetic with @command{gawk}
@cindex arbitrary precision
@@ -27599,444 +29042,310 @@ be added, and of course feel free to try to add them yourself!
@cindex infinite precision
@cindex floating-point, numbers@comma{} arbitrary precision
-@cindex Knuth, Donald
-@quotation
-@i{There's a credibility gap: We don't know how much of the computer's answers
-to believe. Novice computer users solve this problem by implicitly trusting
-in the computer as an infallible authority; they tend to believe that all
-digits of a printed answer are significant. Disillusioned computer users have
-just the opposite approach; they are constantly afraid that their answers
-are almost meaningless.}@footnote{Donald E.@: Knuth.
-@cite{The Art of Computer Programming}. Volume 2,
-@cite{Seminumerical Algorithms}, third edition,
-1998, ISBN 0-201-89683-4, p.@: 229.}
-@author Donald Knuth
-@end quotation
-
-This @value{CHAPTER} discusses issues that you may encounter
-when performing arithmetic. It begins by discussing some of
-the general attributes of computer arithmetic, along with how
-this can influence what you see when running @command{awk} programs.
-This discussion applies to all versions of @command{awk}.
+This @value{CHAPTER} introduces some basic concepts relating to
+how computers do arithmetic and briefly lists the features in
+@command{gawk} for performing arbitrary precision floating point
+computations. It then proceeds to describe floating-point arithmetic,
+which is what @command{awk} uses for all its computations, including a
+discussion of arbitrary precision floating point arithmetic, which is
+a feature available only in @command{gawk}. It continues on to present
+arbitrary precision integers, and concludes with a description of some
+points where @command{gawk} and the POSIX standard are not quite in
+agreement.
-The @value{CHAPTER} then moves on to describe @dfn{arbitrary precision
-arithmetic}, a feature which is specific to @command{gawk}.
+@quotation NOTE
+Most users of @command{gawk} can safely skip this chapter.
+But if you want to do scientific calculations with @command{gawk},
+this is the place to be.
+@end quotation
@menu
-* General Arithmetic:: An introduction to computer arithmetic.
-* Floating-point Programming:: Effective Floating-point Programming.
-* Gawk and MPFR:: How @command{gawk} provides
- arbitrary-precision arithmetic.
-* Arbitrary Precision Floats:: Arbitrary Precision Floating-point Arithmetic
- with @command{gawk}.
-* Arbitrary Precision Integers:: Arbitrary Precision Integer Arithmetic with
- @command{gawk}.
+* Computer Arithmetic:: A quick intro to computer math.
+* Math Definitions:: Defining terms used.
+* MPFR features:: The MPFR features in @command{gawk}.
+* FP Math Caution:: Things to know.
+* Arbitrary Precision Integers:: Arbitrary Precision Integer Arithmetic with
+ @command{gawk}.
+* POSIX Floating Point Problems:: Standards Versus Existing Practice.
+* Floating point summary:: Summary of floating point discussion.
@end menu
-@node General Arithmetic
+@node Computer Arithmetic
@section A General Description of Computer Arithmetic
-@cindex integers
-@cindex floating-point, numbers
-@cindex numbers, floating-point
-Within computers, there are two kinds of numeric values: @dfn{integers}
-and @dfn{floating-point}.
-In school, integer values were referred to as ``whole'' numbers---that is,
-numbers without any fractional part, such as 1, 42, or @minus{}17.
+Until now, we have worked with data as either numbers or
+strings. Ultimately, however, computers represent everything in terms
+of @dfn{binary digits}, or @dfn{bits}. A decimal digit can take on any
+of 10 values: zero through nine. A binary digit can take on any of two
+values, zero or one. Using binary, computers (and computer software)
+can represent and manipulate numerical and character data. In general,
+the more bits you can use to represent a particular thing, the greater
+the range of possible values it can take on.
+
+Modern computers support at least two, and often more, ways to do
+arithmetic. Each kind of arithmetic uses a different representation
+(organization of the bits) for the numbers. The kinds of arithmetic
+that interest us are:
+
+@table @asis
+@item Decimal arithmetic
+This is the kind of arithmetic you learned in elementary school, using
+paper and pencil (and/or a calculator). In theory, numbers can have an
+arbitrary number of digits on either side (or both sides) of the decimal
+point, and the results of a computation are always exact.
+
+Some modern system can do decimal arithmetic in hardware, but usually you
+need a special software library to provide access to these instructions.
+There are also libraries that do decimal arithmetic entirely in software.
+
+Despite the fact that some users expect @command{gawk} to be performing
+decimal arithmetic,@footnote{We don't know why they expect this, but
+they do.} it does not do so.
+
+@item Integer arithmetic
+In school, integer values were referred to as ``whole'' numbers---that
+is, numbers without any fractional part, such as 1, 42, or @minus{}17.
The advantage to integer numbers is that they represent values exactly.
-The disadvantage is that their range is limited. On most systems,
-this range is @minus{}2,147,483,648 to 2,147,483,647.
-However, many systems now support a range from
-@minus{}9,223,372,036,854,775,808 to 9,223,372,036,854,775,807.
+The disadvantage is that their range is limited.
@cindex unsigned integers
@cindex integers, unsigned
-Integer values come in two flavors: @dfn{signed} and @dfn{unsigned}.
-Signed values may be negative or positive, with the range of values just
-described.
-Unsigned values are always positive. On most systems,
-the range is from 0 to 4,294,967,295.
-However, many systems now support a range from
-0 to 18,446,744,073,709,551,615.
-
-@cindex double precision floating-point
-@cindex single precision floating-point
-Floating-point numbers represent what are called ``real'' numbers; i.e.,
-those that do have a fractional part, such as 3.1415927.
-The advantage to floating-point numbers is that they
-can represent a much larger range of values.
-The disadvantage is that there are numbers that they cannot represent
-exactly.
-@command{awk} uses @dfn{double precision} floating-point numbers, which
-can hold more digits than @dfn{single precision}
-floating-point numbers.
-@c Floating-point issues are discussed more fully in
-@c @ref{Floating Point Issues}.
-
-There a several important issues to be aware of, described next.
+In computers, integer values come in two flavors: @dfn{signed} and
+@dfn{unsigned}. Signed values may be negative or positive, whereas
+unsigned values are always positive (that is, greater than or equal
+to zero).
+
+In computer systems, integer arithmetic is exact, but the possible
+range of values is limited. Integer arithmetic is generally faster than
+floating point arithmetic.
+
+@item Floating point arithmetic
+Floating-point numbers represent what were called in school ``real''
+numbers; i.e., those that have a fractional part, such as 3.1415927.
+The advantage to floating-point numbers is that they can represent a
+much larger range of values than can integers. The disadvantage is that
+there are numbers that they cannot represent exactly.
+
+Modern systems support floating point arithmetic in hardware, with a
+limited range of values. There are software libraries that allow
+the use of arbitrary precision floating point calculations.
+
+POSIX @command{awk} uses @dfn{double precision} floating-point numbers, which
+can hold more digits than @dfn{single precision} floating-point numbers.
+@command{gawk} has facilities for performing arbitrary precision floating
+point arithmetic, which we describe in more detail shortly.
+@end table
-@menu
-* Floating Point Issues:: Stuff to know about floating-point numbers.
-* Integer Programming:: Effective integer programming.
-@end menu
+Computers work with integer and floating point values of different
+ranges. Integer values are usually either 32 or 64 bits in size. Single
+precision floating point values occupy 32 bits, whereas double precision
+floating point values occupy 64 bits. Floating point values are always
+signed. The possible ranges of values are shown in the following table.
+
+@multitable @columnfractions .34 .33 .33
+@headitem Numeric representation @tab Miniumum value @tab Maximum value
+@item 32-bit signed integer @tab @minus{}2,147,483,648 @tab 2,147,483,647
+@item 32-bit unsigned integer @tab 0 @tab 4,294,967,295
+@item 64-bit signed integer @tab @minus{}9,223,372,036,854,775,808 @tab 9,223,372,036,854,775,807
+@item 64-bit unsigned integer @tab 0 @tab 18,446,744,073,709,551,615
+@item Single precision floating point (approximate) @tab @code{1.175494e-38} @tab @code{3.402823e+38}
+@item Double precision floating point (approximate) @tab @code{2.225074e-308} @tab @code{1.797693e+308}
+@end multitable
-@node Floating Point Issues
-@subsection Floating-Point Number Caveats
+@node Math Definitions
+@section Other Stuff To Know
-This @value{SECTION} describes some of the issues
-involved in using floating-point numbers.
+The rest of this @value{CHAPTER} uses a number of terms. Here are some
+informal definitions that should help you work your way through the material
+here.
-There is a very nice
-@uref{http://www.validlab.com/goldberg/paper.pdf, paper on floating-point arithmetic}
-by David Goldberg,
-``What Every Computer Scientist Should Know About Floating-point Arithmetic,''
-@cite{ACM Computing Surveys} @strong{23}, 1 (1991-03), 5-48.
-This is worth reading if you are interested in the details,
-but it does require a background in computer science.
+@table @dfn
+@item Accuracy
+A floating-point calculation's accuracy is how close it comes
+to the real (paper and pencil) value.
+
+@item Error
+The difference between what the result of a computation ``should be''
+and what it actually is. It is best to minimize error as much
+as possible.
+
+@item Exponent
+The order of magnitude of a value;
+some number of bits in a floating-point value store the exponent.
+
+@item Inf
+A special value representing infinity. Operations involving another
+number and infinity produce infinity.
+
+@item NaN
+``Not A Number.''@footnote{Thanks
+to Michael Brennan for this description, which I have paraphrased, and
+for the examples}.
+A special value that results from attempting a
+calculation that has no answer as a real number. In such a case,
+programs can either receive a floating-point exception, or get @code{NaN}
+back as the result. The IEEE 754 standard recommends that systems return
+@code{NaN}. Some examples:
-@menu
-* String Conversion Precision:: The String Value Can Lie.
-* Unexpected Results:: Floating Point Numbers Are Not Abstract
- Numbers.
-* POSIX Floating Point Problems:: Standards Versus Existing Practice.
-@end menu
+@table @code
+@item sqrt(-1)
+This makes sense in the range of complex numbers, but not in the
+range of real numbers, so the result is @code{NaN}.
-@node String Conversion Precision
-@subsubsection The String Value Can Lie
+@item log(-8)
+@minus{}8 is out of the domain of @code{log()}, so the result is @code{NaN}.
+@end table
-Internally, @command{awk} keeps both the numeric value
-(double precision floating-point) and the string value for a variable.
-Separately, @command{awk} keeps
-track of what type the variable has
-(@pxref{Typing and Comparison}),
-which plays a role in how variables are used in comparisons.
+@item Normalized
+How the significand (see later in this list) is usually stored. The
+value is adjusted so that the first bit is one, and then that leading
+one is assumed instead of physically stored. This provides one
+extra bit of precision.
-It is important to note that the string value for a number may not
-reflect the full value (all the digits) that the numeric value
-actually contains.
-The following program, @file{values.awk}, illustrates this:
+@item Precision
+The number of bits used to represent a floating-point number.
+The more bits, the more digits you can represent.
+Binary and decimal precisions are related approximately, according to the
+formula:
-@example
-@{
- sum = $1 + $2
- # see it for what it is
- printf("sum = %.12g\n", sum)
- # use CONVFMT
- a = "<" sum ">"
- print "a =", a
- # use OFMT
- print "sum =", sum
-@}
-@end example
+@display
+@iftex
+@math{prec = 3.322 @cdot dps}
+@end iftex
+@ifnottex
+@ifnotdocbook
+@var{prec} = 3.322 * @var{dps}
+@end ifnotdocbook
+@end ifnottex
+@docbook
+<emphasis>prec</emphasis> = 3.322 &sdot; <emphasis>dps</emphasis> @c
+@end docbook
+@end display
@noindent
-This program shows the full value of the sum of @code{$1} and @code{$2}
-using @code{printf}, and then prints the string values obtained
-from both automatic conversion (via @code{CONVFMT}) and
-from printing (via @code{OFMT}).
-
-Here is what happens when the program is run:
-
-@example
-$ @kbd{echo 3.654321 1.2345678 | awk -f values.awk}
-@print{} sum = 4.8888888
-@print{} a = <4.88889>
-@print{} sum = 4.88889
-@end example
+Here, @var{prec} denotes the binary precision
+(measured in bits) and @var{dps} (short for decimal places)
+is the decimal digits.
+
+@item Rounding mode
+How numbers are rounded up or down when necessary.
+More details are provided later.
+
+@item Significand
+A floating point value consists the significand multiplied by 10
+to the power of the exponent. For example, in @code{1.2345e67},
+the significand is @code{1.2345}.
+
+@item Stability
+From @uref{http://en.wikipedia.org/wiki/Numerical_stability,
+the Wikipedia article on numerical stability}:
+``Calculations that can be proven not to magnify approximation errors
+are called @dfn{numerically stable}.''
+@end table
-This makes it clear that the full numeric value is different from
-what the default string representations show.
+See @uref{http://en.wikipedia.org/wiki/Accuracy_and_precision,
+the Wikipedia article on accuracy and precision} for more information
+on some of those terms.
-@code{CONVFMT}'s default value is @code{"%.6g"}, which yields a value with
-at most six significant digits. For some applications, you might want to
-change it to specify more precision.
-On most modern machines, most of the time,
-17 digits is enough to capture a floating-point number's
-value exactly.@footnote{Pathological cases can require up to
-752 digits (!), but we doubt that you need to worry about this.}
+On modern systems, floating-point hardware uses the representation and
+operations defined by the IEEE 754 standard.
+Three of the standard IEEE 754 types are 32-bit single precision,
+64-bit double precision and 128-bit quadruple precision.
+The standard also specifies extended precision formats
+to allow greater precisions and larger exponent ranges.
+(@command{awk} uses only the 64-bit double precision format.)
-@node Unexpected Results
-@subsubsection Floating Point Numbers Are Not Abstract Numbers
-
-@cindex floating-point, numbers
-Unlike numbers in the abstract sense (such as what you studied in high school
-or college arithmetic), numbers stored in computers are limited in certain ways.
-They cannot represent an infinite number of digits, nor can they always
-represent things exactly.
-In particular,
-floating-point numbers cannot
-always represent values exactly. Here is an example:
-
-@example
-$ @kbd{awk '@{ printf("%010d\n", $1 * 100) @}'}
-515.79
-@print{} 0000051579
-515.80
-@print{} 0000051579
-515.81
-@print{} 0000051580
-515.82
-@print{} 0000051582
-@kbd{Ctrl-d}
-@end example
+@ref{table-ieee-formats} lists the precision and exponent
+field values for the basic IEEE 754 binary formats:
-@noindent
-This shows that some values can be represented exactly,
-whereas others are only approximated. This is not a ``bug''
-in @command{awk}, but simply an artifact of how computers
-represent numbers.
+@float Table,table-ieee-formats
+@caption{Basic IEEE Format Context Values}
+@multitable @columnfractions .20 .20 .20 .20 .20
+@headitem Name @tab Total bits @tab Precision @tab emin @tab emax
+@item Single @tab 32 @tab 24 @tab @minus{}126 @tab +127
+@item Double @tab 64 @tab 53 @tab @minus{}1022 @tab +1023
+@item Quadruple @tab 128 @tab 113 @tab @minus{}16382 @tab +16383
+@end multitable
+@end float
@quotation NOTE
-It cannot be emphasized enough that the behavior just
-described is fundamental to modern computers. You will
-see this kind of thing happen in @emph{any} programming
-language using hardware floating-point numbers. It is @emph{not}
-a bug in @command{gawk}, nor is it something that can be ``just
-fixed.''
+The precision numbers include the implied leading one that gives them
+one extra bit of significand.
@end quotation
-@cindex negative zero
-@cindex positive zero
-@cindex zero@comma{} negative vs.@: positive
-Another peculiarity of floating-point numbers on modern systems
-is that they often have more than one representation for the number zero!
-In particular, it is possible to represent ``minus zero'' as well as
-regular, or ``positive'' zero.
-
-This example shows that negative and positive zero are distinct values
-when stored internally, but that they are in fact equal to each other,
-as well as to ``regular'' zero:
-
-@example
-$ @kbd{gawk 'BEGIN @{ mz = -0 ; pz = 0}
-> @kbd{printf "-0 = %g, +0 = %g, (-0 == +0) -> %d\n", mz, pz, mz == pz}
-> @kbd{printf "mz == 0 -> %d, pz == 0 -> %d\n", mz == 0, pz == 0}
-> @kbd{@}'}
-@print{} -0 = -0, +0 = 0, (-0 == +0) -> 1
-@print{} mz == 0 -> 1, pz == 0 -> 1
-@end example
-
-It helps to keep this in mind should you process numeric data
-that contains negative zero values; the fact that the zero is negative
-is noted and can affect comparisons.
-
-@node POSIX Floating Point Problems
-@subsubsection Standards Versus Existing Practice
-
-Historically, @command{awk} has converted any non-numeric looking string
-to the numeric value zero, when required. Furthermore, the original
-definition of the language and the original POSIX standards specified that
-@command{awk} only understands decimal numbers (base 10), and not octal
-(base 8) or hexadecimal numbers (base 16).
-
-Changes in the language of the
-2001 and 2004 POSIX standards can be interpreted to imply that @command{awk}
-should support additional features. These features are:
-
-@itemize @bullet
-@item
-Interpretation of floating point data values specified in hexadecimal
-notation (@samp{0xDEADBEEF}). (Note: data values, @emph{not}
-source code constants.)
-
-@item
-Support for the special IEEE 754 floating point values ``Not A Number''
-(NaN), positive Infinity (``inf'') and negative Infinity (``@minus{}inf'').
-In particular, the format for these values is as specified by the ISO 1999
-C standard, which ignores case and can allow machine-dependent additional
-characters after the @samp{nan} and allow either @samp{inf} or @samp{infinity}.
-@end itemize
-
-The first problem is that both of these are clear changes to historical
-practice:
+@node MPFR features
+@section Arbitrary Precison Arithmetic Features In @command{gawk}
-@itemize @bullet
-@item
-The @command{gawk} maintainer feels that supporting hexadecimal floating
-point values, in particular, is ugly, and was never intended by the
-original designers to be part of the language.
-
-@item
-Allowing completely alphabetic strings to have valid numeric
-values is also a very severe departure from historical practice.
-@end itemize
-
-The second problem is that the @code{gawk} maintainer feels that this
-interpretation of the standard, which requires a certain amount of
-``language lawyering'' to arrive at in the first place, was not even
-intended by the standard developers. In other words, ``we see how you
-got where you are, but we don't think that that's where you want to be.''
-
-Recognizing the above issues, but attempting to provide compatibility
-with the earlier versions of the standard,
-the 2008 POSIX standard added explicit wording to allow, but not require,
-that @command{awk} support hexadecimal floating point values and
-special values for ``Not A Number'' and infinity.
-
-Although the @command{gawk} maintainer continues to feel that
-providing those features is inadvisable,
-nevertheless, on systems that support IEEE floating point, it seems
-reasonable to provide @emph{some} way to support NaN and Infinity values.
-The solution implemented in @command{gawk} is as follows:
-
-@itemize @bullet
-@item
-With the @option{--posix} command-line option, @command{gawk} becomes
-``hands off.'' String values are passed directly to the system library's
-@code{strtod()} function, and if it successfully returns a numeric value,
-that is what's used.@footnote{You asked for it, you got it.}
-By definition, the results are not portable across
-different systems. They are also a little surprising:
+By default, @command{gawk} uses the double precision floating point values
+supplied by the hardware of the system it runs on. However, if it was
+compiled to do, @command{gawk} uses the @uref{http://www.mpfr.org, GNU
+MPFR} and @uref{http://gmplib.org, GNU MP} (GMP) libraries for arbitrary
+precision arithmetic on numbers. You can see if MPFR support is available
+like so:
@example
-$ @kbd{echo nanny | gawk --posix '@{ print $1 + 0 @}'}
-@print{} nan
-$ @kbd{echo 0xDeadBeef | gawk --posix '@{ print $1 + 0 @}'}
-@print{} 3735928559
+$ @kbd{gawk --version}
+@print{} GNU Awk 4.1.1, API: 1.1 (GNU MPFR 3.1.0-p3, GNU MP 5.0.2)
+@print{} Copyright (C) 1989, 1991-2014 Free Software Foundation.
+@dots{}
@end example
-@item
-Without @option{--posix}, @command{gawk} interprets the four strings
-@samp{+inf},
-@samp{-inf},
-@samp{+nan},
-and
-@samp{-nan}
-specially, producing the corresponding special numeric values.
-The leading sign acts a signal to @command{gawk} (and the user)
-that the value is really numeric. Hexadecimal floating point is
-not supported (unless you also use @option{--non-decimal-data},
-which is @emph{not} recommended). For example:
+@noindent
+(You may see different version numbers than what's shown here. That's OK;
+what's important is to see that GNU MPFR and GNU MP are listed in
+the output.)
-@example
-$ @kbd{echo nanny | gawk '@{ print $1 + 0 @}'}
-@print{} 0
-$ @kbd{echo +nan | gawk '@{ print $1 + 0 @}'}
-@print{} nan
-$ @kbd{echo 0xDeadBeef | gawk '@{ print $1 + 0 @}'}
-@print{} 0
-@end example
+Additionally, there are a few elements available in the @code{PROCINFO}
+array to provide information about the MPFR and GMP libraries
+(@pxref{Auto-set}).
-@command{gawk} does ignore case in the four special values.
-Thus @samp{+nan} and @samp{+NaN} are the same.
-@end itemize
+The MPFR library provides precise control over precisions and rounding
+modes, and gives correctly rounded, reproducible, platform-independent
+results. With the @option{-M} command-line option,
+all floating-point arithmetic operators and numeric functions
+can yield results to any desired precision level supported by MPFR.
-@node Integer Programming
-@subsection Mixing Integers And Floating-point
-
-As has been mentioned already, @command{awk} uses hardware double
-precision with 64-bit IEEE binary floating-point representation
-for numbers on most systems. A large integer like 9,007,199,254,740,997
-has a binary representation that, although finite, is more than 53 bits long;
-it must also be rounded to 53 bits.
-The biggest integer that can be stored in a C @code{double} is usually the same
-as the largest possible value of a @code{double}. If your system @code{double}
-is an IEEE 64-bit @code{double}, this largest possible value is an integer and
-can be represented precisely. What more should one know about integers?
-
-If you want to know what is the largest integer, such that it and
-all smaller integers can be stored in 64-bit doubles without losing precision,
-then the answer is
-@iftex
-@math{2^{53}}.
-@end iftex
-@ifnottex
-@ifnotdocbook
-2^53.
-@end ifnotdocbook
-@end ifnottex
-@docbook
-2<superscript>53</superscript>. @c
-@end docbook
-The next representable number is the even number
-@iftex
-@math{2^{53} + 2},
-@end iftex
-@ifnottex
-@ifnotdocbook
-2^53 + 2,
-@end ifnotdocbook
-@end ifnottex
-@docbook
-2<superscript>53</superscript> &plus; 2, @c
-@end docbook
-meaning it is unlikely that you will be able to make
-@command{gawk} print
-@iftex
-@math{2^{53} + 1}
-@end iftex
-@ifnottex
-@ifnotdocbook
-2^53 + 1
-@end ifnotdocbook
-@end ifnottex
-@docbook
-2<superscript>53</superscript> &plus; 1 @c
-@end docbook
-in integer format.
-The range of integers exactly representable by a 64-bit double
-is
-@iftex
-@math{[-2^{53}, 2^{53}]}.
-@end iftex
-@ifnottex
-@ifnotdocbook
-[@minus{}2^53, 2^53].
-@end ifnotdocbook
-@end ifnottex
-@docbook
-[&minus;2<superscript>53</superscript>, 2<superscript>53</superscript>]. @c
-@end docbook
-If you ever see an integer outside this range in @command{awk}
-using 64-bit doubles, you have reason to be very suspicious about
-the accuracy of the output. Here is a simple program with erroneous output:
-
-@example
-$ @kbd{gawk 'BEGIN @{ i = 2^53 - 1; for (j = 0; j < 4; j++) print i + j @}'}
-@print{} 9007199254740991
-@print{} 9007199254740992
-@print{} 9007199254740992
-@print{} 9007199254740994
-@end example
+Two built-in variables, @code{PREC} and @code{ROUNDMODE},
+provide control over the working precision and the rounding mode.
+The precision and the rounding mode are set globally for every operation
+to follow.
+@xref{Auto-set}, for more information.
-The lesson is to not assume that any large integer printed by @command{awk}
-represents an exact result from your computation, especially if it wraps
-around on your screen.
+@node FP Math Caution
+@section Floating Point Arithmetic: Caveat Emptor!
-@node Floating-point Programming
-@section Understanding Floating-point Programming
+@quotation
+Math class is tough!
+@author Teen Talk Barbie, July 1992
+@end quotation
-Numerical programming is an extensive area; if you need to develop
-sophisticated numerical algorithms then @command{gawk} may not be
-the ideal tool, and this documentation may not be sufficient.
-It might require digesting a book or two@footnote{One recommended title is
-@cite{Numerical Computing with IEEE Floating Point Arithmetic}, Michael L.@:
-Overton, Society for Industrial and Applied Mathematics, 2004.
-ISBN: 0-89871-482-6, ISBN-13: 978-0-89871-482-1. See
-@uref{http://www.cs.nyu.edu/cs/faculty/overton/book}.}
-to really internalize how to compute
-with ideal accuracy and precision,
-and the result often depends on the particular application.
+This @value{SECTION} provides a high level overview of the issues
+involved when doing lots of floating-point arithmetic.@footnote{There
+is a very nice @uref{http://www.validlab.com/goldberg/paper.pdf,
+paper on floating-point arithmetic} by David Goldberg, ``What Every
+Computer Scientist Should Know About Floating-point Arithmetic,''
+@cite{ACM Computing Surveys} @strong{23}, 1 (1991-03), 5-48. This is
+worth reading if you are interested in the details, but it does require
+a background in computer science.}
+The discussion applies to both hardware and arbitrary-precision
+floating-point arithmetic.
-@quotation NOTE
-A floating-point calculation's @dfn{accuracy} is how close it comes
-to the real value. This is as opposed to the @dfn{precision}, which
-usually refers to the number of bits used to represent the number
-(see @uref{http://en.wikipedia.org/wiki/Accuracy_and_precision,
-the Wikipedia article} for more information).
+@quotation CAUTION
+The material here is purposely general. If you need to do serious
+computer arithmetic, you should do some research first, and not
+rely just on what we tell you.
@end quotation
-There are two options for doing floating-point calculations:
-hardware floating-point (as used by standard @command{awk} and
-the default for @command{gawk}), and @dfn{arbitrary-precision}
-floating-point, which is software based.
-From this point forward, this @value{CHAPTER}
-aims to provide enough information to understand both, and then
-will focus on @command{gawk}'s facilities for the latter.@footnote{If you
-are interested in other tools that perform arbitrary precision arithmetic,
-you may want to investigate the POSIX @command{bc} tool. See
-@uref{http://pubs.opengroup.org/onlinepubs/009695399/utilities/bc.html,
-the POSIX specification for it}, for more information.}
+@menu
+* Inexactness of computations:: Floating point math is not exact.
+* Getting Accuracy:: Getting more accuracy takes some work.
+* Try To Round:: Add digits and round.
+* Setting precision:: How to set the precision.
+* Setting the rounding mode:: How to set the rounding mode.
+@end menu
+
+@node Inexactness of computations
+@subsection Floating Point Arithmetic Is Not Exact
Binary floating-point representations and arithmetic are inexact.
Simple values like 0.1 cannot be precisely represented using
@@ -28048,7 +29357,16 @@ floating-point, you can set the precision before starting a computation,
but then you cannot be sure of the number of significant decimal places
in the final result.
-Sometimes, before you start to write any code, you should think more
+@menu
+* Inexact representation:: Numbers are not exactly represented.
+* Comparing FP Values:: How to compare floating point values.
+* Errors accumulate:: Errors get bigger as they go.
+@end menu
+
+@node Inexact representation
+@subsubsection Many Numbers Cannot Be Represented Exactly
+
+So, before you start to write any code, you should think
about what you really want and what's really happening. Consider the
two numbers in the following example:
@@ -28078,21 +29396,42 @@ you can always specify how much precision you would like in your output.
Usually this is a format string like @code{"%.15g"}, which when
used in the previous example, produces an output identical to the input.
+@node Comparing FP Values
+@subsubsection Be Careful Comparing Values
+
Because the underlying representation can be a little bit off from the exact value,
-comparing floating-point values to see if they are equal is generally not a good idea.
-Here is an example where it does not work like you expect:
+comparing floating-point values to see if they are exactly equal is generally a bad idea.
+Here is an example where it does not work like you would expect:
@example
$ @kbd{gawk 'BEGIN @{ print (0.1 + 12.2 == 12.3) @}'}
@print{} 0
@end example
-The loss of accuracy during a single computation with floating-point numbers
-usually isn't enough to worry about. However, if you compute a value
-which is the result of a sequence of floating point operations,
+The general wisdom when comparing floating-point values is to see if
+they are within some small range of each other (called a @dfn{delta},
+or @dfn{tolerance}).
+You have to decide how small a delta is important to you. Code to do
+this looks something like this:
+
+@example
+delta = 0.00001 # for example
+difference = abs(a) - abs(b) # subtract the two values
+if (difference < delta)
+ # all ok
+else
+ # not ok
+@end example
+
+@node Errors accumulate
+@subsubsection Errors Accumulate
+
+The loss of accuracy during a single computation with floating-point
+numbers usually isn't enough to worry about. However, if you compute a
+value which is the result of a sequence of floating point operations,
the error can accumulate and greatly affect the computation itself.
-Here is an attempt to compute the value of the constant
-@value{PI} using one of its many series representations:
+Here is an attempt to compute the value of @value{PI} using one of its
+many series representations:
@example
BEGIN @{
@@ -28106,8 +29445,8 @@ BEGIN @{
@}
@end example
-When run, the early errors propagating through later computations
-cause the loop to terminate prematurely after an attempt to divide by zero.
+When run, the early errors propagate through later computations,
+causing the loop to terminate prematurely after attempting to divide by zero:
@example
$ @kbd{gawk -f pi.awk}
@@ -28134,23 +29473,88 @@ $ @kbd{gawk 'BEGIN @{}
@print{} 4
@end example
-Can computation using arbitrary precision help with the previous examples?
-If you are impatient to know, see
-@ref{Exact Arithmetic}.
+@node Getting Accuracy
+@subsection Getting The Accuracy You Need
+
+Can arbitrary precision arithmetic give exact results? There are
+no easy answers. The standard rules of algebra often do not apply
+when using floating-point arithmetic.
+Among other things, the distributive and associative laws
+do not hold completely, and order of operation may be important
+for your computation. Rounding error, cumulative precision loss
+and underflow are often troublesome.
+
+When @command{gawk} tests the expressions @samp{0.1 + 12.2} and
+@samp{12.3} for equality using the machine double precision arithmetic,
+it decides that they are not equal! (@xref{Comparing FP Values}.)
+You can get the result you want by increasing the precision; 56 bits in
+this case does the job:
+
+@example
+$ @kbd{gawk -M -v PREC=56 'BEGIN @{ print (0.1 + 12.2 == 12.3) @}'}
+@print{} 1
+@end example
+
+If adding more bits is good, perhaps adding even more bits of
+precision is better?
+Here is what happens if we use an even larger value of @code{PREC}:
+
+@example
+$ @kbd{gawk -M -v PREC=201 'BEGIN @{ print (0.1 + 12.2 == 12.3) @}'}
+@print{} 0
+@end example
+
+This is not a bug in @command{gawk} or in the MPFR library.
+It is easy to forget that the finite number of bits used to store the value
+is often just an approximation after proper rounding.
+The test for equality succeeds if and only if @emph{all} bits in the two operands
+are exactly the same. Since this is not necessarily true after floating-point
+computations with a particular precision and effective rounding rule,
+a straight test for equality may not work. Instead, compare the
+two numbers to see if they are within the desirable delta of each other.
+
+In applications where 15 or fewer decimal places suffice,
+hardware double precision arithmetic can be adequate, and is usually much faster.
+But you need to keep in mind that every floating-point operation
+can suffer a new rounding error with catastrophic consequences as illustrated
+by our earlier attempt to compute the value of @value{PI}.
+Extra precision can greatly enhance the stability and the accuracy
+of your computation in such cases.
+
+Repeated addition is not necessarily equivalent to multiplication
+in floating-point arithmetic. In the example in
+@ref{Errors accumulate}:
+
+@example
+$ @kbd{gawk 'BEGIN @{}
+> @kbd{for (d = 1.1; d <= 1.5; d += 0.1) # loop five times (?)}
+> @kbd{i++}
+> @kbd{print i}
+> @kbd{@}'}
+@print{} 4
+@end example
+
+@noindent
+you may or may not succeed in getting the correct result by choosing
+an arbitrarily large value for @code{PREC}. Reformulation of
+the problem at hand is often the correct approach in such situations.
+
+@node Try To Round
+@subsection Try A Few Extra Bits of Precision and Rounding
Instead of arbitrary precision floating-point arithmetic,
often all you need is an adjustment of your logic
or a different order for the operations in your calculation.
-The stability and the accuracy of the computation of the constant @value{PI}
+The stability and the accuracy of the computation of @value{PI}
in the earlier example can be enhanced by using the following
simple algebraic transformation:
@example
-(sqrt(x * x + 1) - 1) / x = x / (sqrt(x * x + 1) + 1)
+(sqrt(x * x + 1) - 1) / x @equiv{} x / (sqrt(x * x + 1) + 1)
@end example
@noindent
-After making this, change the program does converge to
+After making this, change the program converges to
@value{PI} in under 30 iterations:
@example
@@ -28165,358 +29569,22 @@ $ @kbd{gawk -f pi2.awk}
@print{} 3.141592653589797
@end example
-There is no need to be unduly suspicious about the results from
-floating-point arithmetic. The lesson to remember is that
-floating-point arithmetic is always more complex than arithmetic using
-pencil and paper. In order to take advantage of the power
-of computer floating-point, you need to know its limitations
-and work within them. For most casual use of floating-point arithmetic,
-you will often get the expected result in the end if you simply round
-the display of your final results to the correct number of significant
-decimal digits.
-
-As general advice, avoid presenting numerical data in a manner that
-implies better precision than is actually the case.
-
-@menu
-* Floating-point Representation:: Binary floating-point representation.
-* Floating-point Context:: Floating-point context.
-* Rounding Mode:: Floating-point rounding mode.
-@end menu
-
-@node Floating-point Representation
-@subsection Binary Floating-point Representation
-@cindex IEEE-754 format
-
-Although floating-point representations vary from machine to machine,
-the most commonly encountered representation is that defined by the
-IEEE 754 Standard. An IEEE-754 format value has three components:
-
-@itemize @bullet
-@item
-A sign bit telling whether the number is positive or negative.
-
-@item
-An @dfn{exponent}, @var{e}, giving its order of magnitude.
-
-@item
-A @dfn{significand}, @var{s},
-specifying the actual digits of the number.
-@end itemize
-
-The value of the
-number is then
-@iftex
-@math{s @cdot 2^e}.
-@end iftex
-@ifnottex
-@ifnotdocbook
-@var{s * 2^e}.
-@end ifnotdocbook
-@end ifnottex
-@docbook
-<emphasis>s &sdot; 2<superscript>e</superscript></emphasis>. @c
-@end docbook
-The first bit of a non-zero binary significand
-is always one, so the significand in an IEEE-754 format only includes the
-fractional part, leaving the leading one implicit.
-The significand is stored in @dfn{normalized} format,
-which means that the first bit is always a one.
-
-Three of the standard IEEE-754 types are 32-bit single precision,
-64-bit double precision and 128-bit quadruple precision.
-The standard also specifies extended precision formats
-to allow greater precisions and larger exponent ranges.
-
-@node Floating-point Context
-@subsection Floating-point Context
-@cindex context, floating-point
-
-A floating-point @dfn{context} defines the environment for arithmetic operations.
-It governs precision, sets rules for rounding, and limits the range for exponents.
-The context has the following primary components:
-
-@table @dfn
-@item Precision
-Precision of the floating-point format in bits.
-
-@item emax
-Maximum exponent allowed for the format.
-
-@item emin
-Minimum exponent allowed for the format.
-
-@item Underflow behavior
-The format may or may not support gradual underflow.
-
-@item Rounding
-The rounding mode of the context.
-@end table
-
-@ref{table-ieee-formats} lists the precision and exponent
-field values for the basic IEEE-754 binary formats:
-
-@float Table,table-ieee-formats
-@caption{Basic IEEE Format Context Values}
-@multitable @columnfractions .20 .20 .20 .20 .20
-@headitem Name @tab Total bits @tab Precision @tab emin @tab emax
-@item Single @tab 32 @tab 24 @tab @minus{}126 @tab +127
-@item Double @tab 64 @tab 53 @tab @minus{}1022 @tab +1023
-@item Quadruple @tab 128 @tab 113 @tab @minus{}16382 @tab +16383
-@end multitable
-@end float
-
-@quotation NOTE
-The precision numbers include the implied leading one that gives them
-one extra bit of significand.
-@end quotation
-
-A floating-point context can also determine which signals are treated
-as exceptions, and can set rules for arithmetic with special values.
-Please consult the IEEE-754 standard or other resources for details.
-
-@command{gawk} ordinarily uses the hardware double precision
-representation for numbers. On most systems, this is IEEE-754
-floating-point format, corresponding to 64-bit binary with 53 bits
-of precision.
-
-@quotation NOTE
-In case an underflow occurs, the standard allows, but does not require,
-the result from an arithmetic operation to be a number smaller than
-the smallest nonzero normalized number. Such numbers do
-not have as many significant digits as normal numbers, and are called
-@dfn{denormals} or @dfn{subnormals}. The alternative, simply returning a zero,
-is called @dfn{flush to zero}. The basic IEEE-754 binary formats
-support subnormal numbers.
-@end quotation
-
-@node Rounding Mode
-@subsection Floating-point Rounding Mode
-@cindex rounding mode, floating-point
-
-The @dfn{rounding mode} specifies the behavior for the results of numerical
-operations when discarding extra precision. Each rounding mode indicates
-how the least significant returned digit of a rounded result is to
-be calculated.
-@ref{table-rounding-modes} lists the IEEE-754 defined
-rounding modes:
-
-@float Table,table-rounding-modes
-@caption{IEEE 754 Rounding Modes}
-@multitable @columnfractions .45 .55
-@headitem Rounding Mode @tab IEEE Name
-@item Round to nearest, ties to even @tab @code{roundTiesToEven}
-@item Round toward plus Infinity @tab @code{roundTowardPositive}
-@item Round toward negative Infinity @tab @code{roundTowardNegative}
-@item Round toward zero @tab @code{roundTowardZero}
-@item Round to nearest, ties away from zero @tab @code{roundTiesToAway}
-@end multitable
-@end float
-
-The default mode @code{roundTiesToEven} is the most preferred,
-but the least intuitive. This method does the obvious thing for most values,
-by rounding them up or down to the nearest digit.
-For example, rounding 1.132 to two digits yields 1.13,
-and rounding 1.157 yields 1.16.
-
-However, when it comes to rounding a value that is exactly halfway between,
-things do not work the way you probably learned in school.
-In this case, the number is rounded to the nearest even digit.
-So rounding 0.125 to two digits rounds down to 0.12,
-but rounding 0.6875 to three digits rounds up to 0.688.
-You probably have already encountered this rounding mode when
-using @code{printf} to format floating-point numbers.
-For example:
-
-@example
-BEGIN @{
- x = -4.5
- for (i = 1; i < 10; i++) @{
- x += 1.0
- printf("%4.1f => %2.0f\n", x, x)
- @}
-@}
-@end example
-
-@noindent
-produces the following output when run on the author's system:@footnote{It
-is possible for the output to be completely different if the
-C library in your system does not use the IEEE-754 even-rounding
-rule to round halfway cases for @code{printf}.}
-
-@example
--3.5 => -4
--2.5 => -2
--1.5 => -2
--0.5 => 0
- 0.5 => 0
- 1.5 => 2
- 2.5 => 2
- 3.5 => 4
- 4.5 => 4
-@end example
-
-The theory behind the rounding mode @code{roundTiesToEven} is that
-it more or less evenly distributes upward and downward rounds
-of exact halves, which might cause any round-off error
-to cancel itself out. This is the default rounding mode used
-in IEEE-754 computing functions and operators.
-
-The other rounding modes are rarely used.
-Round toward positive infinity (@code{roundTowardPositive})
-and round toward negative infinity (@code{roundTowardNegative})
-are often used to implement interval arithmetic,
-where you adjust the rounding mode to calculate upper and lower bounds
-for the range of output. The @code{roundTowardZero}
-mode can be used for converting floating-point numbers to integers.
-The rounding mode @code{roundTiesToAway} rounds the result to the
-nearest number and selects the number with the larger magnitude
-if a tie occurs.
-
-Some numerical analysts will tell you that your choice of rounding style
-has tremendous impact on the final outcome, and advise you to wait until
-final output for any rounding. Instead, you can often avoid round-off error problems by
-setting the precision initially to some value sufficiently larger than
-the final desired precision, so that the accumulation of round-off error
-does not influence the outcome.
-If you suspect that results from your computation are
-sensitive to accumulation of round-off error,
-one way to be sure is to look for a significant difference in output
-when you change the rounding mode.
-
-@node Gawk and MPFR
-@section @command{gawk} + MPFR = Powerful Arithmetic
-@cindex MPFR
-@cindex GMP
-
-The rest of this @value{CHAPTER} describes how to use the arbitrary precision
-(also known as @dfn{multiple precision} or @dfn{infinite precision}) numeric
-capabilities in @command{gawk} to produce maximally accurate results
-when you need it.
-
-But first you should check if your version of
-@command{gawk} supports arbitrary precision arithmetic.
-The easiest way to find out is to look at the output of
-the following command:
-
-@example
-$ @kbd{./gawk --version}
-@print{} GNU Awk 4.1.1, API: 1.1 (GNU MPFR 3.1.0-p3, GNU MP 5.0.2)
-@print{} Copyright (C) 1989, 1991-2014 Free Software Foundation.
-@dots{}
-@end example
-
-@noindent
-(You may see different version numbers than what's shown here. That's OK;
-what's important is to see that GNU MPFR and GNU MP are listed in
-the output.)
-
-@command{gawk} uses the
-@uref{http://www.mpfr.org, GNU MPFR}
-and
-@uref{http://gmplib.org, GNU MP} (GMP)
-libraries for arbitrary precision
-arithmetic on numbers. So if you do not see the names of these libraries
-in the output, then your version of @command{gawk} does not support
-arbitrary precision arithmetic.
-
-Additionally,
-there are a few elements available in the @code{PROCINFO} array
-to provide information about the MPFR and GMP libraries.
-@xref{Auto-set}, for more information.
-
-@ignore
-Even if you aren't interested in arbitrary precision arithmetic, you
-may still benefit from knowing about how @command{gawk} handles numbers
-in general, and the limitations of doing arithmetic with ordinary
-@command{gawk} numbers.
-@end ignore
-
-
-@node Arbitrary Precision Floats
-@section Arbitrary Precision Floating-point Arithmetic with @command{gawk}
-
-@command{gawk} uses the GNU MPFR library
-for arbitrary precision floating-point arithmetic. The MPFR library
-provides precise control over precisions and rounding modes, and gives
-correctly rounded, reproducible, platform-independent results. With one
-of the command-line options @option{--bignum} or @option{-M},
-all floating-point arithmetic operators and numeric functions can yield
-results to any desired precision level supported by MPFR.
-Two built-in variables, @code{PREC} and @code{ROUNDMODE},
-provide control over the working precision and the rounding mode
-(@pxref{Setting Precision}, and
-@pxref{Setting Rounding Mode}).
-The precision and the rounding mode are set globally for every operation
-to follow.
-
-The default working precision for arbitrary precision floating-point values is
-53 bits, and the default value for @code{ROUNDMODE} is @code{"N"},
-which selects the IEEE-754 @code{roundTiesToEven} rounding mode
-(@pxref{Rounding Mode}).@footnote{The
-default precision is 53 bits, since according to the MPFR documentation,
-the library should be able to exactly reproduce all computations with
-double-precision machine floating-point numbers (@code{double} type
-in C), except the default exponent range is much wider and subnormal
-numbers are not implemented.}
-@command{gawk} uses the default exponent range in MPFR
-@iftex
-(@math{emax = 2^{30} - 1, emin = -emax})
-@end iftex
-@ifnottex
-@ifnotdocbook
-(@var{emax} = 2^30 @minus{} 1, @var{emin} = @minus{}@var{emax})
-@end ifnotdocbook
-@end ifnottex
-@docbook
-(<emphasis>emax</emphasis> = 2<superscript>30</superscript> &minus; 1, <emphasis>emin</emphasis> = &minus;<emphasis>emax</emphasis>) @c
-@end docbook
-for all floating-point contexts.
-There is no explicit mechanism to adjust the exponent range.
-MPFR does not implement subnormal numbers by default,
-and this behavior cannot be changed in @command{gawk}.
-
-@quotation NOTE
-When emulating an IEEE-754 format (@pxref{Setting Precision}),
-@command{gawk} internally adjusts the exponent range
-to the value defined for the format and also performs computations needed for
-gradual underflow (subnormal numbers).
-@end quotation
-
-@quotation NOTE
-MPFR numbers are variable-size entities, consuming only as much space as
-needed to store the significant digits. Since the performance using MPFR
-numbers pales in comparison to doing arithmetic using the underlying machine
-types, you should consider using only as much precision as needed by
-your program.
-@end quotation
-
-@menu
-* Setting Precision:: Setting the working precision.
-* Setting Rounding Mode:: Setting the rounding mode.
-* Floating-point Constants:: Representing floating-point constants.
-* Changing Precision:: Changing the precision of a number.
-* Exact Arithmetic:: Exact arithmetic with floating-point numbers.
-@end menu
-
-@node Setting Precision
-@subsection Setting the Working Precision
-@cindex @code{PREC} variable
-@cindex setting working precision
+@node Setting precision
+@subsection Setting The Precision
@command{gawk} uses a global working precision; it does not keep track of
the precision or accuracy of individual numbers. Performing an arithmetic
operation or calling a built-in function rounds the result to the current
-working precision. The default working precision is 53 bits, which can be
-modified using the built-in variable @code{PREC}. You can also set the
-value to one of the pre-defined case-insensitive strings
+working precision. The default working precision is 53 bits, which you can
+modify using the built-in variable @code{PREC}. You can also set the
+value to one of the predefined case-insensitive strings
shown in @ref{table-predefined-precision-strings},
-to emulate an IEEE-754 binary format.
+to emulate an IEEE 754 binary format.
@float Table,table-predefined-precision-strings
-@caption{Predefined precision strings for @code{PREC}}
+@caption{Predefined Precision Strings For @code{PREC}}
@multitable {@code{"double"}} {12345678901234567890123456789012345}
-@headitem @code{PREC} @tab IEEE-754 Binary Format
+@headitem @code{PREC} @tab IEEE 754 Binary Format
@item @code{"half"} @tab 16-bit half-precision.
@item @code{"single"} @tab Basic 32-bit single precision.
@item @code{"double"} @tab Basic 64-bit double precision.
@@ -28535,57 +29603,34 @@ $ @kbd{gawk -M -v PREC=100 'BEGIN @{ x = 1.0e-400; print x + 0}
@print{} 0
@end example
-Binary and decimal precisions are related approximately, according to the
-formula:
+@quotation CAUTION
+Be wary of floating-point constants! When reading a floating-point
+constant from program source code, @command{gawk} uses the default
+precision (that of a C @code{double}), unless overridden by an assignment
+to the special variable @code{PREC} on the command line, to store it
+internally as a MPFR number. Changing the precision using @code{PREC}
+in the program text does @emph{not} change the precision of a constant.
+
+If you need to represent a floating-point constant at a higher precision
+than the default and cannot use a command-line assignment to @code{PREC},
+you should either specify the constant as a string, or as a rational
+number, whenever possible. The following example illustrates the
+differences among various ways to print a floating-point constant:
+@end quotation
-@iftex
-@math{prec = 3.322 @cdot dps}
-@end iftex
-@ifnottex
-@ifnotdocbook
-@var{prec} = 3.322 * @var{dps}
-@end ifnotdocbook
-@end ifnottex
-@docbook
-<para>
-<emphasis>prec</emphasis> = 3.322 &sdot; <emphasis>dps</emphasis> @c
-</para>
-@end docbook
+@example
+$ @kbd{gawk -M 'BEGIN @{ PREC = 113; printf("%0.25f\n", 0.1) @}'}
+@print{} 0.1000000000000000055511151
+$ @kbd{gawk -M -v PREC=113 'BEGIN @{ printf("%0.25f\n", 0.1) @}'}
+@print{} 0.1000000000000000000000000
+$ @kbd{gawk -M 'BEGIN @{ PREC = 113; printf("%0.25f\n", "0.1") @}'}
+@print{} 0.1000000000000000000000000
+$ @kbd{gawk -M 'BEGIN @{ PREC = 113; printf("%0.25f\n", 1/10) @}'}
+@print{} 0.1000000000000000000000000
+@end example
-@noindent
-Here, @var{prec} denotes the binary precision
-(measured in bits) and @var{dps} (short for decimal places)
-is the decimal digits. We can easily calculate how many decimal
-digits the 53-bit significand of an IEEE double is equivalent to:
-53 / 3.322 which is equal to about 15.95.
-But what does 15.95 digits actually mean? It depends whether you are
-concerned about how many digits you can rely on, or how many digits
-you need.
-
-It is important to know how many bits it takes to uniquely identify
-a double-precision value (the C type @code{double}). If you want to
-convert from @code{double} to decimal and back to @code{double} (e.g.,
-saving a @code{double} representing an intermediate result to a file, and
-later reading it back to restart the computation), then a few more decimal
-digits are required. 17 digits is generally enough for a @code{double}.
-
-It can also be important to know what decimal numbers can be uniquely
-represented with a @code{double}. If you want to convert
-from decimal to @code{double} and back again, 15 digits is the most that
-you can get. Stated differently, you should not present
-the numbers from your floating-point computations with more than 15
-significant digits in them.
-
-Conversely, it takes a precision of 332 bits to hold an approximation
-of the constant @value{PI} that is accurate to 100 decimal places.
-
-You should always add some extra bits in order to avoid the confusing round-off
-issues that occur because numbers are stored internally in binary.
-
-@node Setting Rounding Mode
-@subsection Setting the Rounding Mode
-@cindex @code{ROUNDMODE} variable
-@cindex setting rounding mode
+@node Setting the rounding mode
+@subsection Setting The Rounding Mode
The @code{ROUNDMODE} variable provides
program level control over the rounding mode.
@@ -28604,184 +29649,91 @@ rounding modes is shown in @ref{table-gawk-rounding-modes}.
@end multitable
@end float
-@code{ROUNDMODE} has the default value @code{"N"},
-which selects the IEEE-754 rounding mode @code{roundTiesToEven}.
-In @ref{table-gawk-rounding-modes}, @code{"A"} is listed to select the IEEE-754 mode
-@code{roundTiesToAway}. This is only available
-if your version of the MPFR library supports it; otherwise setting
-@code{ROUNDMODE} to this value has no effect. @xref{Rounding Mode},
-for the meanings of the various rounding modes.
-
-Here is an example of how to change the default rounding behavior of
-@code{printf}'s output:
-
-@example
-$ @kbd{gawk -M -v ROUNDMODE="Z" 'BEGIN @{ printf("%.2f\n", 1.378) @}'}
-@print{} 1.37
-@end example
-
-@node Floating-point Constants
-@subsection Representing Floating-point Constants
-@cindex constants, floating-point
-
-Be wary of floating-point constants! When reading a floating-point constant
-from program source code, @command{gawk} uses the default precision,
-unless overridden
-by an assignment to the special variable @code{PREC} on the command
-line, to store it internally as a MPFR number.
-Changing the precision using @code{PREC} in the program text does
-@emph{not} change the precision of a constant. If you need to
-represent a floating-point constant at a higher precision than the
-default and cannot use a command line assignment to @code{PREC},
-you should either specify the constant as a string, or
-as a rational number, whenever possible. The following example
-illustrates the differences among various ways to
-print a floating-point constant:
-
-@example
-$ @kbd{gawk -M 'BEGIN @{ PREC = 113; printf("%0.25f\n", 0.1) @}'}
-@print{} 0.1000000000000000055511151
-$ @kbd{gawk -M -v PREC=113 'BEGIN @{ printf("%0.25f\n", 0.1) @}'}
-@print{} 0.1000000000000000000000000
-$ @kbd{gawk -M 'BEGIN @{ PREC = 113; printf("%0.25f\n", "0.1") @}'}
-@print{} 0.1000000000000000000000000
-$ @kbd{gawk -M 'BEGIN @{ PREC = 113; printf("%0.25f\n", 1/10) @}'}
-@print{} 0.1000000000000000000000000
-@end example
+@code{ROUNDMODE} has the default value @code{"N"}, which
+selects the IEEE 754 rounding mode @code{roundTiesToEven}.
+In @ref{table-gawk-rounding-modes}, the value @code{"A"} selects
+@code{roundTiesToAway}. This is only available if your version of the
+MPFR library supports it; otherwise setting @code{ROUNDMODE} to @code{"A"}
+has no effect.
-In the first case, the number is stored with the default precision of 53 bits.
-
-@node Changing Precision
-@subsection Changing the Precision of a Number
-@cindex changing precision of a number
-
-@cindex Laurie, Dirk
-@quotation
-@i{The point is that in any variable-precision package,
-a decision is made on how to treat numbers given as data,
-or arising in intermediate results, which are represented in
-floating-point format to a precision lower than working precision.
-Do we promote them to full membership of the high-precision club,
-or do we treat them and all their associates as second-class citizens?
-Sometimes the first course is proper, sometimes the second, and it takes
-careful analysis to tell which.}@footnote{Dirk Laurie.
-@cite{Variable-precision Arithmetic Considered Perilous --- A Detective Story}.
-Electronic Transactions on Numerical Analysis. Volume 28, pp. 168-173, 2008.}
-@author Dirk Laurie
-@end quotation
+The default mode @code{roundTiesToEven} is the most preferred,
+but the least intuitive. This method does the obvious thing for most values,
+by rounding them up or down to the nearest digit.
+For example, rounding 1.132 to two digits yields 1.13,
+and rounding 1.157 yields 1.16.
-@command{gawk} does not implicitly modify the precision of any previously
-computed results when the working precision is changed with an assignment
-to @code{PREC}. The precision of a number is always the one that was
-used at the time of its creation, and there is no way for the user
-to explicitly change it afterwards. However, since the result of a
-floating-point arithmetic operation is always an arbitrary precision
-floating-point value---with a precision set by the value of @code{PREC}---one of the
-following workarounds effectively accomplishes the desired behavior:
+However, when it comes to rounding a value that is exactly halfway between,
+things do not work the way you probably learned in school.
+In this case, the number is rounded to the nearest even digit.
+So rounding 0.125 to two digits rounds down to 0.12,
+but rounding 0.6875 to three digits rounds up to 0.688.
+You probably have already encountered this rounding mode when
+using @code{printf} to format floating-point numbers.
+For example:
@example
-x = x + 0.0
+BEGIN @{
+ x = -4.5
+ for (i = 1; i < 10; i++) @{
+ x += 1.0
+ printf("%4.1f => %2.0f\n", x, x)
+ @}
+@}
@end example
@noindent
-or:
-
-@example
-x += 0.0
-@end example
-
-@node Exact Arithmetic
-@subsection Exact Arithmetic with Floating-point Numbers
-
-@quotation CAUTION
-Never depend on the exactness of floating-point arithmetic,
-even for apparently simple expressions!
-@end quotation
-
-Can arbitrary precision arithmetic give exact results? There are
-no easy answers. The standard rules of algebra often do not apply
-when using floating-point arithmetic.
-Among other things, the distributive and associative laws
-do not hold completely, and order of operation may be important
-for your computation. Rounding error, cumulative precision loss
-and underflow are often troublesome.
-
-When @command{gawk} tests the expressions @samp{0.1 + 12.2} and @samp{12.3}
-for equality
-using the machine double precision arithmetic, it decides that they
-are not equal!
-(@xref{Floating-point Programming}.)
-You can get the result you want by increasing the precision;
-56 bits in this case will get the job done:
-
-@example
-$ @kbd{gawk -M -v PREC=56 'BEGIN @{ print (0.1 + 12.2 == 12.3) @}'}
-@print{} 1
-@end example
-
-If adding more bits is good, perhaps adding even more bits of
-precision is better?
-Here is what happens if we use an even larger value of @code{PREC}:
-
-@example
-$ @kbd{gawk -M -v PREC=201 'BEGIN @{ print (0.1 + 12.2 == 12.3) @}'}
-@print{} 0
-@end example
-
-This is not a bug in @command{gawk} or in the MPFR library.
-It is easy to forget that the finite number of bits used to store the value
-is often just an approximation after proper rounding.
-The test for equality succeeds if and only if @emph{all} bits in the two operands
-are exactly the same. Since this is not necessarily true after floating-point
-computations with a particular precision and effective rounding rule,
-a straight test for equality may not work.
-
-So, don't assume that floating-point values can be compared for equality.
-You should also exercise caution when using other forms of comparisons.
-The standard way to compare between floating-point numbers is to determine
-how much error (or @dfn{tolerance}) you will allow in a comparison and
-check to see if one value is within this error range of the other.
-
-In applications where 15 or fewer decimal places suffice,
-hardware double precision arithmetic can be adequate, and is usually much faster.
-But you do need to keep in mind that every floating-point operation
-can suffer a new rounding error with catastrophic consequences as illustrated
-by our earlier attempt to compute the value of the constant @value{PI}
-(@pxref{Floating-point Programming}).
-Extra precision can greatly enhance the stability and the accuracy
-of your computation in such cases.
-
-Repeated addition is not necessarily equivalent to multiplication
-in floating-point arithmetic. In the example in
-@ref{Floating-point Programming}:
+produces the following output when run on the author's system:@footnote{It
+is possible for the output to be completely different if the
+C library in your system does not use the IEEE 754 even-rounding
+rule to round halfway cases for @code{printf}.}
@example
-$ @kbd{gawk 'BEGIN @{}
-> @kbd{for (d = 1.1; d <= 1.5; d += 0.1) # loop five times (?)}
-> @kbd{i++}
-> @kbd{print i}
-> @kbd{@}'}
-@print{} 4
+-3.5 => -4
+-2.5 => -2
+-1.5 => -2
+-0.5 => 0
+ 0.5 => 0
+ 1.5 => 2
+ 2.5 => 2
+ 3.5 => 4
+ 4.5 => 4
@end example
-@noindent
-you may or may not succeed in getting the correct result by choosing
-an arbitrarily large value for @code{PREC}. Reformulation of
-the problem at hand is often the correct approach in such situations.
+The theory behind @code{roundTiesToEven} is that it more or less evenly
+distributes upward and downward rounds of exact halves, which might
+cause any accumulating round-off error to cancel itself out. This is the
+default rounding mode for IEEE 754 computing functions and operators.
+
+The other rounding modes are rarely used. Round toward positive infinity
+(@code{roundTowardPositive}) and round toward negative infinity
+(@code{roundTowardNegative}) are often used to implement interval
+arithmetic, where you adjust the rounding mode to calculate upper and
+lower bounds for the range of output. The @code{roundTowardZero} mode can
+be used for converting floating-point numbers to integers. The rounding
+mode @code{roundTiesToAway} rounds the result to the nearest number and
+selects the number with the larger magnitude if a tie occurs.
+
+Some numerical analysts will tell you that your choice of rounding
+style has tremendous impact on the final outcome, and advise you to
+wait until final output for any rounding. Instead, you can often avoid
+round-off error problems by setting the precision initially to some
+value sufficiently larger than the final desired precision, so that
+the accumulation of round-off error does not influence the outcome.
+If you suspect that results from your computation are sensitive to
+accumulation of round-off error, look for a significant difference in
+output when you change the rounding mode to be sure.
@node Arbitrary Precision Integers
@section Arbitrary Precision Integer Arithmetic with @command{gawk}
@cindex integers, arbitrary precision
@cindex arbitrary precision integers
-If one of the options @option{--bignum} or @option{-M} is specified,
-@command{gawk} performs all
-integer arithmetic using GMP arbitrary precision integers.
-Any number that looks like an integer in a program source or data file
-is stored as an arbitrary precision integer.
-The size of the integer is limited only by your computer's memory.
-The current floating-point context has no effect on operations involving integers.
-For example, the following computes
+When given the @option{-M} option,
+@command{gawk} performs all integer arithmetic using GMP arbitrary
+precision integers. Any number that looks like an integer in a source
+or @value{DF} is stored as an arbitrary precision integer. The size
+of the integer is limited only by the available memory. For example,
+the following computes
@iftex
@math{5^{4^{3^{2}}}},
@end iftex
@@ -28794,7 +29746,7 @@ For example, the following computes
5<superscript>4<superscript>3<superscript>2</superscript></superscript></superscript>, @c
@end docbook
the result of which is beyond the
-limits of ordinary @command{gawk} numbers:
+limits of ordinary hardware double-precision floating point values:
@example
$ @kbd{gawk -M 'BEGIN @{}
@@ -28806,9 +29758,9 @@ $ @kbd{gawk -M 'BEGIN @{}
@print{} 62060698786608744707 ... 92256259918212890625
@end example
-If you were to compute the same value using arbitrary precision
-floating-point values instead, the precision needed for correct output
-(using the formula
+If instead you were to compute the same value using arbitrary precision
+floating-point values, the precision needed for correct output (using
+the formula
@iftex
@math{prec = 3.322 @cdot dps}),
would be @math{3.322 @cdot 183231},
@@ -28830,8 +29782,8 @@ The result from an arithmetic operation with an integer and a floating-point val
is a floating-point value with a precision equal to the working precision.
The following program calculates the eighth term in
Sylvester's sequence@footnote{Weisstein, Eric W.
-@cite{Sylvester's Sequence}. From MathWorld---A Wolfram Web Resource.
-@url{http://mathworld.wolfram.com/SylvestersSequence.html}}
+@cite{Sylvester's Sequence}. From MathWorld---A Wolfram Web Resource
+@w{(@url{http://mathworld.wolfram.com/SylvestersSequence.html}).}}
using a recurrence:
@example
@@ -28851,15 +29803,15 @@ floating-point results exactly. You can either increase the precision
@samp{2.0} with an integer, to perform all computations using integer
arithmetic to get the correct output.
-It will sometimes be necessary for @command{gawk} to implicitly convert an
-arbitrary precision integer into an arbitrary precision floating-point value.
-This is primarily because the MPFR library does not always provide the
-relevant interface to process arbitrary precision integers or mixed-mode
-numbers as needed by an operation or function.
-In such a case, the precision is set to the minimum value necessary
-for exact conversion, and the working precision is not used for this purpose.
-If this is not what you need or want, you can employ a subterfuge
-like this:
+Sometimes @command{gawk} must implicitly convert an arbitrary precision
+integer into an arbitrary precision floating-point value. This is
+primarily because the MPFR library does not always provide the relevant
+interface to process arbitrary precision integers or mixed-mode numbers
+as needed by an operation or function. In such a case, the precision is
+set to the minimum value necessary for exact conversion, and the working
+precision is not used for this purpose. If this is not what you need or
+want, you can employ a subterfuge, and convert the integer to floating
+point first, like this:
@example
gawk -M 'BEGIN @{ n = 13; print (n + 0.0) % 2.0 @}'
@@ -28879,6 +29831,215 @@ to just use the following:
gawk -M 'BEGIN @{ n = 13; print n % 2 @}'
@end example
+When dividing two arbitrary precision integers with either
+@samp{/} or @samp{%}, the result is typically an arbitrary
+precision floating point value (unless the denominator evenly
+divides into the numerator). In order to do integer division
+or remainder with arbitrary precision integers, use the built-in
+@code{div()} function (@pxref{Numeric Functions}).
+
+You can simulate the @code{div()} function in standard @command{awk}
+using this user-defined function:
+
+@example
+@c file eg/lib/div.awk
+# div --- do integer division
+
+@c endfile
+@ignore
+@c file eg/lib/div.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# July, 2014
+
+@c endfile
+
+@end ignore
+@c file eg/lib/div.awk
+function div(numerator, denominator, result, i)
+@{
+ split("", result)
+
+ numerator = int(numerator)
+ denominator = int(denominator)
+ result["quotient"] = int(numerator / denominator)
+ result["remainder"] = int(numerator % denominator)
+
+ return 0.0
+@}
+@c endfile
+@end example
+
+@node POSIX Floating Point Problems
+@section Standards Versus Existing Practice
+
+Historically, @command{awk} has converted any non-numeric looking string
+to the numeric value zero, when required. Furthermore, the original
+definition of the language and the original POSIX standards specified that
+@command{awk} only understands decimal numbers (base 10), and not octal
+(base 8) or hexadecimal numbers (base 16).
+
+Changes in the language of the
+2001 and 2004 POSIX standards can be interpreted to imply that @command{awk}
+should support additional features. These features are:
+
+@itemize @value{BULLET}
+@item
+Interpretation of floating point data values specified in hexadecimal
+notation (e.g., @code{0xDEADBEEF}). (Note: data values, @emph{not}
+source code constants.)
+
+@item
+Support for the special IEEE 754 floating point values ``Not A Number''
+(NaN), positive Infinity (``inf'') and negative Infinity (``@minus{}inf'').
+In particular, the format for these values is as specified by the ISO 1999
+C standard, which ignores case and can allow implementation-dependent additional
+characters after the @samp{nan} and allow either @samp{inf} or @samp{infinity}.
+@end itemize
+
+The first problem is that both of these are clear changes to historical
+practice:
+
+@itemize @value{BULLET}
+@item
+The @command{gawk} maintainer feels that supporting hexadecimal floating
+point values, in particular, is ugly, and was never intended by the
+original designers to be part of the language.
+
+@item
+Allowing completely alphabetic strings to have valid numeric
+values is also a very severe departure from historical practice.
+@end itemize
+
+The second problem is that the @code{gawk} maintainer feels that this
+interpretation of the standard, which requires a certain amount of
+``language lawyering'' to arrive at in the first place, was not even
+intended by the standard developers. In other words, ``we see how you
+got where you are, but we don't think that that's where you want to be.''
+
+Recognizing the above issues, but attempting to provide compatibility
+with the earlier versions of the standard,
+the 2008 POSIX standard added explicit wording to allow, but not require,
+that @command{awk} support hexadecimal floating point values and
+special values for ``Not A Number'' and infinity.
+
+Although the @command{gawk} maintainer continues to feel that
+providing those features is inadvisable,
+nevertheless, on systems that support IEEE floating point, it seems
+reasonable to provide @emph{some} way to support NaN and Infinity values.
+The solution implemented in @command{gawk} is as follows:
+
+@itemize @value{BULLET}
+@item
+With the @option{--posix} command-line option, @command{gawk} becomes
+``hands off.'' String values are passed directly to the system library's
+@code{strtod()} function, and if it successfully returns a numeric value,
+that is what's used.@footnote{You asked for it, you got it.}
+By definition, the results are not portable across
+different systems. They are also a little surprising:
+
+@example
+$ @kbd{echo nanny | gawk --posix '@{ print $1 + 0 @}'}
+@print{} nan
+$ @kbd{echo 0xDeadBeef | gawk --posix '@{ print $1 + 0 @}'}
+@print{} 3735928559
+@end example
+
+@item
+Without @option{--posix}, @command{gawk} interprets the four strings
+@samp{+inf},
+@samp{-inf},
+@samp{+nan},
+and
+@samp{-nan}
+specially, producing the corresponding special numeric values.
+The leading sign acts a signal to @command{gawk} (and the user)
+that the value is really numeric. Hexadecimal floating point is
+not supported (unless you also use @option{--non-decimal-data},
+which is @emph{not} recommended). For example:
+
+@example
+$ @kbd{echo nanny | gawk '@{ print $1 + 0 @}'}
+@print{} 0
+$ @kbd{echo +nan | gawk '@{ print $1 + 0 @}'}
+@print{} nan
+$ @kbd{echo 0xDeadBeef | gawk '@{ print $1 + 0 @}'}
+@print{} 0
+@end example
+
+@command{gawk} ignores case in the four special values.
+Thus @samp{+nan} and @samp{+NaN} are the same.
+@end itemize
+
+@node Floating point summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+Most computer arithmetic is done using either integers or floating-point
+values. The default for @command{awk} is to use double-precision
+floating-point values.
+
+@item
+In the early 1990's, Barbie mistakenly said ``Math class is tough!''
+While math isn't tough, floating-point arithmetic isn't the same
+as pencil and paper math, and care must be taken:
+
+@c nested list
+@itemize @value{MINUS}
+@item
+Not all numbers can be represented exactly.
+
+@item
+Comparing values should use a delta, instead of being done directly
+with @samp{==} and @samp{!=}.
+
+@item
+Errors accumulate.
+
+@item
+Operations are not always truly associative or distributive.
+@end itemize
+
+@item
+Increasing the accuracy can help, but it is not a panacea.
+
+@item
+Often, increasing the accuracy and then rounding to the desired
+number of digits produces reasonable results.
+
+@item
+Use @option{-M} (or @option{--bignum}) to enable MPFR
+arithmetic. Use @code{PREC} to set the precision in bits, and
+@code{ROUNDMODE} to set the IEEE 754 rounding mode.
+
+@item
+With @option{-M}, @command{gawk} performs
+arbitrary precision integer arithmetic using the GMP library.
+This is faster and more space efficient than using MPFR for
+the same calculations.
+
+@item
+There are several ``dark corners'' with respect to floating-point
+numbers where @command{gawk} disagrees with the POSIX standard.
+It pays to be aware of them.
+
+@item
+Overall, there is no need to be unduly suspicious about the results from
+floating-point arithmetic. The lesson to remember is that floating-point
+arithmetic is always more complex than arithmetic using pencil and
+paper. In order to take advantage of the power of computer floating-point,
+you need to know its limitations and work within them. For most casual
+use of floating-point arithmetic, you will often get the expected result
+if you simply round the display of your final results to the correct number
+of significant decimal digits.
+
+@item
+As general advice, avoid presenting numerical data in a manner that
+implies better precision than is actually the case.
+
+@end itemize
+
@node Dynamic Extensions
@chapter Writing Extensions for @command{gawk}
@cindex dynamically loaded extensions
@@ -28911,6 +30072,8 @@ When @option{--sandbox} is specified, extensions are disabled
* Extension Samples:: The sample extensions that ship with
@code{gawk}.
* gawkextlib:: The @code{gawkextlib} project.
+* Extension summary:: Extension summary.
+* Extension Exercises:: Exercises.
@end menu
@node Extension Intro
@@ -28936,8 +30099,15 @@ the facilities that the API provides and how to use
them, and presents a small sample extension. In addition, it documents
the sample extensions included in the @command{gawk} distribution,
and describes the @code{gawkextlib} project.
+@ifclear FOR_PRINT
@xref{Extension Design}, for a discussion of the extension mechanism
goals and design.
+@end ifclear
+@ifset FOR_PRINT
+See @uref{http://www.gnu.org/software/gawk/manual/html_node/Extension-Design.html}
+for a discussion of the extension mechanism
+goals and design.
+@end ifset
@node Plugin License
@section Extension Licensing
@@ -28963,31 +30133,33 @@ Communication between
is loaded, it is passed a pointer to a @code{struct} whose fields are
function pointers.
@ifnotdocbook
-This is shown in @ref{load-extension}.
+This is shown in @ref{figure-load-extension}.
@end ifnotdocbook
@ifdocbook
-This is shown in @inlineraw{docbook, <xref linkend="load-extension"/>}.
+This is shown in @inlineraw{docbook, <xref linkend="figure-load-extension"/>}.
@end ifdocbook
@ifnotdocbook
-@float Figure,load-extension
+@float Figure,figure-load-extension
@caption{Loading The Extension}
@c FIXME: One day, it should not be necessary to have two cases,
@c but rather just the one without the "txt" final argument.
@c This applies to the other figures as well.
@ifinfo
-@center @image{api-figure1, , , Loading the extension, txt}
+@center @image{api-figure1, , , Loading The Extension, txt}
@end ifinfo
@ifnotinfo
-@center @image{api-figure1, , , Loading the extension}
+@center @image{api-figure1, , , Loading The Extension}
@end ifnotinfo
@end float
@end ifnotdocbook
@docbook
-<figure id="load-extension">
-<title>Loading the extension</title>
-<graphic fileref="api-figure1.eps"/>
+<figure id="figure-load-extension" float="0">
+<title>Loading The Extension</title>
+<mediaobject>
+<imageobject role="web"><imagedata fileref="api-figure1.png" format="PNG"/></imageobject>
+</mediaobject>
</figure>
@end docbook
@@ -28996,28 +30168,30 @@ function pointers, at runtime, without needing (link-time) access
to @command{gawk}'s symbols. One of these function pointers is to a
function for ``registering'' new built-in functions.
@ifnotdocbook
-This is shown in @ref{load-new-function}.
+This is shown in @ref{figure-load-new-function}.
@end ifnotdocbook
@ifdocbook
-This is shown in @inlineraw{docbook, <xref linkend="load-new-function"/>}.
+This is shown in @inlineraw{docbook, <xref linkend="figure-load-new-function"/>}.
@end ifdocbook
@ifnotdocbook
-@float Figure,load-new-function
+@float Figure,figure-load-new-function
@caption{Loading The New Function}
@ifinfo
-@center @image{api-figure2, , , Loading the new function, txt}
+@center @image{api-figure2, , , Loading The New Function, txt}
@end ifinfo
@ifnotinfo
-@center @image{api-figure2, , , Loading the new function}
+@center @image{api-figure2, , , Loading The New Function}
@end ifnotinfo
@end float
@end ifnotdocbook
@docbook
-<figure id="load-new-function">
-<title>Loading the new function</title>
-<graphic fileref="api-figure2.eps"/>
+<figure id="figure-load-new-function" float="0">
+<title>Loading The New Function</title>
+<mediaobject>
+<imageobject role="web"><imagedata fileref="api-figure2.png" format="PNG"/></imageobject>
+</mediaobject>
</figure>
@end docbook
@@ -29027,14 +30201,14 @@ provide the new feature (@code{do_chdir()}, for example). @command{gawk}
associates the function pointer with a name and can then call it, using a
defined calling convention.
@ifnotdocbook
-This is shown in @ref{call-new-function}.
+This is shown in @ref{figure-call-new-function}.
@end ifnotdocbook
@ifdocbook
-This is shown in @inlineraw{docbook, <xref linkend="call-new-function"/>}.
+This is shown in @inlineraw{docbook, <xref linkend="figure-call-new-function"/>}.
@end ifdocbook
@ifnotdocbook
-@float Figure,call-new-function
+@float Figure,figure-call-new-function
@caption{Calling The New Function}
@ifinfo
@center @image{api-figure3, , , Calling the new function, txt}
@@ -29046,9 +30220,11 @@ This is shown in @inlineraw{docbook, <xref linkend="call-new-function"/>}.
@end ifnotdocbook
@docbook
-<figure id="call-new-function">
+<figure id="figure-call-new-function" float="0">
<title>Calling The New Function</title>
-<graphic fileref="api-figure3.eps"/>
+<mediaobject>
+<imageobject role="web"><imagedata fileref="api-figure3.png" format="PNG"/></imageobject>
+</mediaobject>
</figure>
@end docbook
@@ -29056,9 +30232,9 @@ The @code{do_@var{xxx}()} function, in turn, then uses the function
pointers in the API @code{struct} to do its work, such as updating
variables or arrays, printing messages, setting @code{ERRNO}, and so on.
-Convenience macros in the @file{gawkapi.h} header file make calling
-through the function pointers look like regular function calls so that
-extension code is quite readable and understandable.
+Convenience macros make calling through the function pointers look
+like regular function calls so that extension code is quite readable
+and understandable.
Although all of this sounds somewhat complicated, the result is that
extension code is quite straightforward to write and to read. You can
@@ -29067,10 +30243,10 @@ Example}) and also the @file{testext.c} code for testing the APIs.
Some other bits and pieces:
-@itemize @bullet
+@itemize @value{BULLET}
@item
The API provides access to @command{gawk}'s @code{do_@var{xxx}} values,
-reflecting command line options, like @code{do_lint}, @code{do_profiling}
+reflecting command-line options, like @code{do_lint}, @code{do_profiling}
and so on (@pxref{Extension API Variables}).
These are informational: an extension cannot affect their values
inside @command{gawk}. In addition, attempting to assign to them
@@ -29089,6 +30265,9 @@ happen, but we all know how @emph{that} goes.)
@section API Description
@cindex extension API
+C or C++ code for an extension must include the header file
+@file{gawkapi.h}, which declares the functions and defines the data
+types used to communicate with @command{gawk}.
This (rather large) @value{SECTION} describes the API in detail.
@menu
@@ -29117,10 +30296,10 @@ by calling through function pointers passed into your extension.
API function pointers are provided for the following kinds of operations:
-@itemize @bullet
+@itemize @value{BULLET}
@item
-Registrations functions. You may register:
-@itemize @minus
+Registration functions. You may register:
+@itemize @value{MINUS}
@item
extension functions,
@item
@@ -29161,7 +30340,7 @@ can be a big performance win.
@item
Manipulating arrays:
-@itemize @minus
+@itemize @value{MINUS}
@item
Retrieving, adding, deleting, and modifying elements
@@ -29181,7 +30360,7 @@ Flattening an array for easy C style looping over all its indices and elements
Some points about using the API:
-@itemize @bullet
+@itemize @value{BULLET}
@item
The following types and/or macros and/or functions are referenced
in @file{gawkapi.h}. For correct use, you must therefore include the
@@ -29190,6 +30369,7 @@ corresponding standard header file @emph{before} including @file{gawkapi.h}:
@multitable {@code{memset()}, @code{memcpy()}} {@code{<sys/types.h>}}
@headitem C Entity @tab Header File
@item @code{EOF} @tab @code{<stdio.h>}
+@item Values for @code{errno} @tab @code{<errno.h>}
@item @code{FILE} @tab @code{<stdio.h>}
@item @code{NULL} @tab @code{<stddef.h>}
@item @code{memcpy()} @tab @code{<string.h>}
@@ -29205,9 +30385,6 @@ is necessary in order to keep @file{gawkapi.h} clean, instead of becoming
a portability hodge-podge as can be seen in some parts of
the @command{gawk} source code.
-To pass reasonable integer values for @code{ERRNO}, you will also need to
-include @code{<errno.h>}.
-
@item
The @file{gawkapi.h} file may be included more than once without ill effect.
Doing so, however, is poor coding practice.
@@ -29220,7 +30397,7 @@ does not support this keyword, you should either place
@file{config.h} file in your extensions.
@item
-All pointers filled in by @command{gawk} are to memory
+All pointers filled in by @command{gawk} point to memory
managed by @command{gawk} and should be treated by the extension as
read-only. Memory for @emph{all} strings passed into @command{gawk}
from the extension @emph{must} come from calling the API-provided function
@@ -29231,7 +30408,7 @@ and is managed by @command{gawk} from then on.
The API defines several simple @code{struct}s that map values as seen
from @command{awk}. A value can be a @code{double}, a string, or an
array (as in multidimensional arrays, or when creating a new array).
-String values maintain both pointer and length since embedded @code{NUL}
+String values maintain both pointer and length since embedded @value{NUL}
characters are allowed.
@quotation NOTE
@@ -29363,7 +30540,7 @@ Scalar values in @command{awk} are either numbers or strings. The
indicates what is in the @code{union}.
Representing numbers is easy---the API uses a C @code{double}. Strings
-require more work. Since @command{gawk} allows embedded @code{NUL} bytes
+require more work. Since @command{gawk} allows embedded @value{NUL} bytes
in string values, a string must be represented as a pair containing a
data-pointer and length. This is the @code{awk_string_t} type.
@@ -29393,8 +30570,11 @@ reading and/or changing the value of one or more scalar variables, you
can obtain a @dfn{scalar cookie}@footnote{See
@uref{http://catb.org/jargon/html/C/cookie.html, the ``cookie'' entry in the Jargon file} for a
definition of @dfn{cookie}, and @uref{http://catb.org/jargon/html/M/magic-cookie.html,
-the ``magic cookie'' entry in the Jargon file} for a nice example. See
-also the entry for ``Cookie'' in the @ref{Glossary}.}
+the ``magic cookie'' entry in the Jargon file} for a nice example.
+@ifclear FOR_PRINT
+See also the entry for ``Cookie'' in the @ref{Glossary}.
+@end ifclear
+}
object for that variable, and then use
the cookie for getting the variable's value or for changing the variable's
value.
@@ -29425,9 +30605,9 @@ value type, as appropriate. This behavior is summarized in
@ref{table-value-types-returned}.
@c FIXME: Try to do this with spans...
-@ifdocbook
-@anchor{table-value-types-returned}
-@end ifdocbook
+
+@float Table,table-value-types-returned
+@caption{API Value Types Returned}
@docbook
<informaltable>
<tgroup cols="2">
@@ -29512,8 +30692,6 @@ value type, as appropriate. This behavior is summarized in
@ifnotplaintext
@ifnotdocbook
-@float Table,table-value-types-returned
-@caption{Value Types Returned}
@multitable @columnfractions .50 .50
@headitem @tab Type of Actual Value:
@end multitable
@@ -29526,12 +30704,9 @@ value type, as appropriate. This behavior is summarized in
@item @tab @b{Undefined} @tab String @tab Number @tab Array @tab Undefined
@item @tab @b{Value Cookie} @tab false @tab false @tab false @tab false
@end multitable
-@end float
@end ifnotdocbook
@end ifnotplaintext
@ifplaintext
-@float Table,table-value-types-returned
-@caption{Value Types Returned}
@example
+-------------------------------------------------+
| Type of Actual Value: |
@@ -29555,8 +30730,8 @@ value type, as appropriate. This behavior is summarized in
| | Cookie | | | | |
+-----------+-----------+------------+------------+-----------+-----------+
@end example
-@end float
@end ifplaintext
+@end float
@node Memory Allocation Functions
@subsection Memory Allocation Functions and Convenience Macros
@@ -29603,6 +30778,7 @@ procedure calls that do not return a value.
@table @code
@item #define emalloc(pointer, type, size, message) @dots{}
The arguments to this macro are as follows:
+
@c nested table
@table @code
@item pointer
@@ -29754,9 +30930,9 @@ empty string (@code{""}). The @code{func} pointer is the address of a
An @dfn{exit callback} function is a function that
@command{gawk} calls before it exits.
-Such functions are useful if you have general ``clean up'' tasks
-that should be performed in your extension (such as closing data
-base connections or other resource deallocations).
+Such functions are useful if you have general ``cleanup'' tasks
+that should be performed in your extension (such as closing database
+connections or other resource deallocations).
You can register such
a function with @command{gawk} using the following function.
@@ -29764,6 +30940,7 @@ a function with @command{gawk} using the following function.
@item void awk_atexit(void (*funcp)(void *data, int exit_status),
@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ void *arg0);
The parameters are:
+
@c nested table
@table @code
@item funcp
@@ -29857,8 +31034,9 @@ A pointer to your @code{@var{XXX}_can_take_file()} function.
A pointer to your @code{@var{XXX}_take_control_of()} function.
@item awk_const struct input_parser *awk_const next;
-This pointer is used by @command{gawk}.
-The extension cannot modify it.
+This is for use by @command{gawk};
+therefore it is marked @code{awk_const} so that the extension cannot
+modify it.
@end table
The steps are as follows:
@@ -29905,7 +31083,7 @@ open the file, then @code{fd} will @emph{not} be equal to
@code{INVALID_HANDLE}. Otherwise, it will.
@item struct stat sbuf;
-If file descriptor is valid, then @command{gawk} will have filled
+If the file descriptor is valid, then @command{gawk} will have filled
in this structure via a call to the @code{fstat()} system call.
@end table
@@ -29998,8 +31176,8 @@ need to test for a @code{NULL} value. @command{gawk} sets @code{*errcode}
to zero, so there is no need to set it unless an error occurs.
If an error does occur, the function should return @code{EOF} and set
-@code{*errcode} to a non-zero value. In that case, if @code{*errcode}
-does not equal @minus{}1, @command{gawk} automatically updates
+@code{*errcode} to a value greater than zero. In that case, if @code{*errcode}
+does not equal zero, @command{gawk} automatically updates
the @code{ERRNO} variable based on the value of @code{*errcode}.
(In general, setting @samp{*errcode = errno} should do the right thing.)
@@ -30086,8 +31264,8 @@ as described below, and return true if successful, false otherwise.
@item awk_const struct output_wrapper *awk_const next;
This is for use by @command{gawk};
-therefore they are marked @code{awk_const} so that the extension cannot
-modify them.
+therefore it is marked @code{awk_const} so that the extension cannot
+modify it.
@end table
The @code{awk_output_buf_t} structure looks like this:
@@ -30149,7 +31327,7 @@ The @code{@var{XXX}_can_take_file()} function should make a decision based
upon the @code{name} and @code{mode} fields, and any additional state
(such as @command{awk} variable values) that is appropriate.
-When @command{gawk} calls @code{@var{XXX}_take_control_of()}, it should fill
+When @command{gawk} calls @code{@var{XXX}_take_control_of()}, that function should fill
in the other fields, as appropriate, except for @code{fp}, which it should just
use normally.
@@ -30190,7 +31368,7 @@ The fields are as follows:
The name of the two-way processor.
@item awk_bool_t (*can_take_two_way)(const char *name);
-This function returns true if it wants to take over two-way I/O for this filename.
+This function returns true if it wants to take over two-way I/O for this @value{FN}.
It should not change any state (variable
values, etc.) within @command{gawk}.
@@ -30203,8 +31381,8 @@ This function should fill in the @code{awk_input_buf_t} and
@item awk_const struct two_way_processor *awk_const next;
This is for use by @command{gawk};
-therefore they are marked @code{awk_const} so that the extension cannot
-modify them.
+therefore it is marked @code{awk_const} so that the extension cannot
+modify it.
@end table
As with the input parser and output processor, you provide
@@ -30370,7 +31548,7 @@ Return false if the value cannot be retrieved.
@item awk_bool_t sym_update_scalar(awk_scalar_t cookie, awk_value_t *value);
Update the value associated with a scalar cookie. Return false if
-the new value is not one of @code{AWK_STRING} or @code{AWK_NUMBER}.
+the new value is not of type @code{AWK_STRING} or @code{AWK_NUMBER}.
Here too, the built-in variables may not be updated.
@end table
@@ -30488,7 +31666,7 @@ is what the routines in this section let you do. The functions are as follows:
@item awk_bool_t create_value(awk_value_t *value, awk_value_cookie_t *result);
Create a cached string or numeric value from @code{value} for efficient later
assignment.
-Only @code{AWK_NUMBER} and @code{AWK_STRING} values are allowed. Any other type
+Only values of type @code{AWK_NUMBER} and @code{AWK_STRING} are allowed. Any other type
is rejected. While @code{AWK_UNDEFINED} could be allowed, doing so would
result in inferior performance.
@@ -30549,13 +31727,13 @@ What happens if @command{awk} code assigns a new value to @code{VAR1},
are all the others be changed too?''
That's a great question. The answer is that no, it's not a problem.
-Internally, @command{gawk} uses reference-counted strings. This means
+Internally, @command{gawk} uses @dfn{reference-counted strings}. This means
that many variables can share the same string value, and @command{gawk}
keeps track of the usage. When a variable's value changes, @command{gawk}
simply decrements the reference count on the old value and updates
the variable to use the new value.
-Finally, as part of your clean up action (@pxref{Exit Callback Functions})
+Finally, as part of your cleanup action (@pxref{Exit Callback Functions})
you should release any cached values that you created, using
@code{release_value()}.
@@ -30681,7 +31859,8 @@ the string value of @code{index} must come from the API-provided functions @code
@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ const@ awk_value_t *const value);
In the array represented by @code{a_cookie}, create or modify
the element whose index is given by @code{index}.
-The @code{ARGV} and @code{ENVIRON} arrays may not be changed.
+The @code{ARGV} and @code{ENVIRON} arrays may not be changed,
+although the @code{PROCINFO} array can be.
@item awk_bool_t set_array_element_by_elem(awk_array_t a_cookie,
@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ awk_element_t element);
@@ -30952,7 +32131,7 @@ you must add the new array to its parent before adding any elements to it.
Thus, the correct way to build an array is to work ``top down.'' Create
the array, and immediately install it in @command{gawk}'s symbol table
using @code{sym_update()}, or install it as an element in a previously
-existing array using @code{set_element()}. We show example code shortly.
+existing array using @code{set_array_element()}. We show example code shortly.
@item
Due to gawk internals, after using @code{sym_update()} to install an array
@@ -30978,7 +32157,7 @@ of the array cookie after the call to @code{set_element()}.
@end enumerate
The following C code is a simple test extension to create an array
-with two regular elements and with a subarray. The leading @samp{#include}
+with two regular elements and with a subarray. The leading @code{#include}
directives and boilerplate variable declarations are omitted for brevity.
The first step is to create a new array and then install it
in the symbol table:
@@ -31204,12 +32383,15 @@ whether the corresponding command-line options were enabled when
@command{gawk} was invoked. The variables are:
@table @code
+@item do_debug
+This variable is true if @command{gawk} was invoked with @option{--debug} option.
+
@item do_lint
This variable is true if @command{gawk} was invoked with @option{--lint} option
(@pxref{Options}).
-@item do_traditional
-This variable is true if @command{gawk} was invoked with @option{--traditional} option.
+@item do_mpfr
+This variable is true if @command{gawk} was invoked with @option{--bignum} option.
@item do_profile
This variable is true if @command{gawk} was invoked with @option{--profile} option.
@@ -31217,11 +32399,8 @@ This variable is true if @command{gawk} was invoked with @option{--profile} opti
@item do_sandbox
This variable is true if @command{gawk} was invoked with @option{--sandbox} option.
-@item do_debug
-This variable is true if @command{gawk} was invoked with @option{--debug} option.
-
-@item do_mpfr
-This variable is true if @command{gawk} was invoked with @option{--bignum} option.
+@item do_traditional
+This variable is true if @command{gawk} was invoked with @option{--traditional} option.
@end table
The value of @code{do_lint} can change if @command{awk} code
@@ -31272,8 +32451,14 @@ These variables and functions are as follows:
@table @code
@item int plugin_is_GPL_compatible;
-This asserts that the extension is compatible with the GNU GPL
-(@pxref{Copying}). If your extension does not have this, @command{gawk}
+This asserts that the extension is compatible with
+@ifclear FOR_PRINT
+the GNU GPL (@pxref{Copying}).
+@end ifclear
+@ifset FOR_PRINT
+the GNU GPL.
+@end ifset
+If your extension does not have this, @command{gawk}
will not load it (@pxref{Plugin License}).
@item static gawk_api_t *const api;
@@ -31297,8 +32482,9 @@ as described earlier (@pxref{Extension Functions}).
It can then be looped over for multiple calls to
@code{add_ext_func()}.
+@c Use @var{OR} for docbook
@item static awk_bool_t (*init_func)(void) = NULL;
-@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @r{OR}
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @var{OR}
@itemx static awk_bool_t init_my_module(void) @{ @dots{} @}
@itemx static awk_bool_t (*init_func)(void) = init_my_module;
If you need to do some initialization work, you should define a
@@ -31537,7 +32723,6 @@ Those are followed by the necessary variable declarations
to make use of the API macros and boilerplate code
(@pxref{Extension API Boilerplate}).
-@c break line for page breaking
@example
#ifdef HAVE_CONFIG_H
#include <config.h>
@@ -31624,7 +32809,6 @@ The @code{stat()} extension is more involved. First comes a function
that turns a numeric mode into a printable representation
(e.g., 644 becomes @samp{-rw-r--r--}). This is omitted here for brevity:
-@c break line for page breaking
@example
/* format_mode --- turn a stat mode field into something readable */
@@ -31898,7 +33082,9 @@ structures for loading each function into @command{gawk}:
static awk_ext_func_t func_table[] = @{
@{ "chdir", do_chdir, 1 @},
@{ "stat", do_stat, 2 @},
+#ifndef __MINGW32__
@{ "fts", do_fts, 3 @},
+#endif
@};
@end example
@@ -31912,9 +33098,7 @@ everything that needs to be loaded. It is simplest to use the
dl_load_func(func_table, filefuncs, "")
@end example
-And that's it! As an exercise, consider adding functions to
-implement system calls such as @code{chown()}, @code{chmod()},
-and @code{umask()}.
+And that's it!
@node Using Internal File Ops
@subsection Integrating The Extensions
@@ -31926,7 +33110,7 @@ code must be compiled. Assuming that the functions are in
a file named @file{filefuncs.c}, and @var{idir} is the location
of the @file{gawkapi.h} header file,
the following steps@footnote{In practice, you would probably want to
-use the GNU Autotools---Automake, Autoconf, Libtool, and Gettext---to
+use the GNU Autotools---Automake, Autoconf, Libtool, and @command{gettext}---to
configure and build your libraries. Instructions for doing so are beyond
the scope of this @value{DOCUMENT}. @xref{gawkextlib}, for WWW links to
the tools.} create a GNU/Linux shared library:
@@ -31968,7 +33152,7 @@ BEGIN @{
@end example
The @env{AWKLIBPATH} environment variable tells
-@command{gawk} where to find shared libraries (@pxref{Finding Extensions}).
+@command{gawk} where to find extensions (@pxref{Finding Extensions}).
We set it to the current directory and run the program:
@example
@@ -32031,19 +33215,19 @@ Others mainly provide example code that shows how to use the extension API.
The @code{filefuncs} extension provides three different functions, as follows:
The usage is:
-@table @code
+@table @asis
@item @@load "filefuncs"
This is how you load the extension.
@cindex @code{chdir()} extension function
-@item result = chdir("/some/directory")
+@item @code{result = chdir("/some/directory")}
The @code{chdir()} function is a direct hook to the @code{chdir()}
system call to change the current directory. It returns zero
upon success or less than zero upon error. In the latter case it updates
@code{ERRNO}.
@cindex @code{stat()} extension function
-@item result = stat("/some/path", statdata @r{[}, follow@r{]})
+@item @code{result = stat("/some/path", statdata} [@code{, follow}]@code{)}
The @code{stat()} function provides a hook into the
@code{stat()} system call.
It returns zero upon success or less than zero upon error.
@@ -32056,69 +33240,27 @@ In all cases, it clears the @code{statdata} array.
When the call is successful, @code{stat()} fills the @code{statdata}
array with information retrieved from the filesystem, as follows:
-@c nested table
-@multitable @columnfractions .25 .60
-@item @code{statdata["name"]} @tab
-The name of the file.
-
-@item @code{statdata["dev"]} @tab
-Corresponds to the @code{st_dev} field in the @code{struct stat}.
-
-@item @code{statdata["ino"]} @tab
-Corresponds to the @code{st_ino} field in the @code{struct stat}.
-
-@item @code{statdata["mode"]} @tab
-Corresponds to the @code{st_mode} field in the @code{struct stat}.
-
-@item @code{statdata["nlink"]} @tab
-Corresponds to the @code{st_nlink} field in the @code{struct stat}.
-
-@item @code{statdata["uid"]} @tab
-Corresponds to the @code{st_uid} field in the @code{struct stat}.
-
-@item @code{statdata["gid"]} @tab
-Corresponds to the @code{st_gid} field in the @code{struct stat}.
-
-@item @code{statdata["size"]} @tab
-Corresponds to the @code{st_size} field in the @code{struct stat}.
-
-@item @code{statdata["atime"]} @tab
-Corresponds to the @code{st_atime} field in the @code{struct stat}.
-
-@item @code{statdata["mtime"]} @tab
-Corresponds to the @code{st_mtime} field in the @code{struct stat}.
-
-@item @code{statdata["ctime"]} @tab
-Corresponds to the @code{st_ctime} field in the @code{struct stat}.
-
-@item @code{statdata["rdev"]} @tab
-Corresponds to the @code{st_rdev} field in the @code{struct stat}.
-This element is only present for device files.
-
-@item @code{statdata["major"]} @tab
-Corresponds to the @code{st_major} field in the @code{struct stat}.
-This element is only present for device files.
-
-@item @code{statdata["minor"]} @tab
-Corresponds to the @code{st_minor} field in the @code{struct stat}.
-This element is only present for device files.
-
-@item @code{statdata["blksize"]} @tab
-Corresponds to the @code{st_blksize} field in the @code{struct stat},
-if this field is present on your system.
-(It is present on all modern systems that we know of.)
-
-@item @code{statdata["pmode"]} @tab
-A human-readable version of the mode value, such as printed by
-@command{ls}. For example, @code{"-rwxr-xr-x"}.
-
-@item @code{statdata["linkval"]} @tab
-If the named file is a symbolic link, this element will exist
-and its value is the value of the symbolic link (where the
-symbolic link points to).
-
-@item @code{statdata["type"]} @tab
-The type of the file as a string. One of
+@multitable @columnfractions .15 .50 .20
+@headitem Subscript @tab Field in @code{struct stat} @tab File type
+@item @code{"name"} @tab The @value{FN} @tab All
+@item @code{"dev"} @tab @code{st_dev} @tab All
+@item @code{"ino"} @tab @code{st_ino} @tab All
+@item @code{"mode"} @tab @code{st_mode} @tab All
+@item @code{"nlink"} @tab @code{st_nlink} @tab All
+@item @code{"uid"} @tab @code{st_uid} @tab All
+@item @code{"gid"} @tab @code{st_gid} @tab All
+@item @code{"size"} @tab @code{st_size} @tab All
+@item @code{"atime"} @tab @code{st_atime} @tab All
+@item @code{"mtime"} @tab @code{st_mtime} @tab All
+@item @code{"ctime"} @tab @code{st_ctime} @tab All
+@item @code{"rdev"} @tab @code{st_rdev} @tab Device files
+@item @code{"major"} @tab @code{st_major} @tab Device files
+@item @code{"minor"} @tab @code{st_minor} @tab Device files
+@item @code{"blksize"} @tab @code{st_blksize} @tab All
+@item @code{"pmode"} @tab A human-readable version of the mode value, such as printed by
+@command{ls}. For example, @code{"-rwxr-xr-x"} @tab All
+@item @code{"linkval"} @tab The value of the symbolic link @tab Symbolic links
+@item @code{"type"} @tab The type of the file as a string. One of
@code{"file"},
@code{"blockdev"},
@code{"chardev"},
@@ -32129,12 +33271,12 @@ The type of the file as a string. One of
@code{"door"},
or
@code{"unknown"}.
-Not all systems support all file types.
+Not all systems support all file types. @tab All
@end multitable
@cindex @code{fts()} extension function
-@item flags = or(FTS_PHYSICAL, ...)
-@itemx result = fts(pathlist, flags, filedata)
+@item @code{flags = or(FTS_PHYSICAL, ...)}
+@itemx @code{result = fts(pathlist, flags, filedata)}
Walk the file trees provided in @code{pathlist} and fill in the
@code{filedata} array as described below. @code{flags} is the bitwise
OR of several predefined constant values, also described below.
@@ -32151,7 +33293,7 @@ The arguments are as follows:
@table @code
@item pathlist
-An array of filenames. The element values are used; the index values are ignored.
+An array of @value{FN}s. The element values are used; the index values are ignored.
@item flags
This should be the bitwise OR of one or more of the following
@@ -32275,10 +33417,10 @@ The arguments to @code{fnmatch()} are:
@table @code
@item pattern
-The filename wildcard to match.
+The @value{FN} wildcard to match.
@item string
-The filename string.
+The @value{FN} string.
@item flag
Either zero, or the bitwise OR of one or more of the
@@ -32288,23 +33430,13 @@ flags in the @code{FNM} array.
The flags are follows:
@multitable @columnfractions .25 .75
-@item @code{FNM["CASEFOLD"]} @tab
-Corresponds to the @code{FNM_CASEFOLD} flag as defined in @code{fnmatch()}.
-
-@item @code{FNM["FILE_NAME"]} @tab
-Corresponds to the @code{FNM_FILE_NAME} flag as defined in @code{fnmatch()}.
-
-@item @code{FNM["LEADING_DIR"]} @tab
-Corresponds to the @code{FNM_LEADING_DIR} flag as defined in @code{fnmatch()}.
-
-@item @code{FNM["NOESCAPE"]} @tab
-Corresponds to the @code{FNM_NOESCAPE} flag as defined in @code{fnmatch()}.
-
-@item @code{FNM["PATHNAME"]} @tab
-Corresponds to the @code{FNM_PATHNAME} flag as defined in @code{fnmatch()}.
-
-@item @code{FNM["PERIOD"]} @tab
-Corresponds to the @code{FNM_PERIOD} flag as defined in @code{fnmatch()}.
+@headitem Array element @tab Corresponding flag defined by @code{fnmatch()}
+@item @code{FNM["CASEFOLD"]} @tab @code{FNM_CASEFOLD}
+@item @code{FNM["FILE_NAME"]} @tab @code{FNM_FILE_NAME}
+@item @code{FNM["LEADING_DIR"]} @tab @code{FNM_LEADING_DIR}
+@item @code{FNM["NOESCAPE"]} @tab @code{FNM_NOESCAPE}
+@item @code{FNM["PATHNAME"]} @tab @code{FNM_PATHNAME}
+@item @code{FNM["PERIOD"]} @tab @code{FNM_PERIOD}
@end multitable
Here is an example:
@@ -32395,8 +33527,8 @@ standard output to a temporary file configured to have the same owner
and permissions as the original. After the file has been processed,
the extension restores standard output to its original destination.
If @code{INPLACE_SUFFIX} is not an empty string, the original file is
-linked to a backup filename created by appending that suffix. Finally,
-the temporary file is renamed to the original filename.
+linked to a backup @value{FN} created by appending that suffix. Finally,
+the temporary file is renamed to the original @value{FN}.
If any error occurs, the extension issues a fatal error to terminate
processing immediately without damaging the original file.
@@ -32414,9 +33546,6 @@ $ @kbd{gawk -i inplace -v INPLACE_SUFFIX=.bak '@{ gsub(/foo/, "bar") @}}
> @kbd{@{ print @}' file1 file2 file3}
@end example
-We leave it as an exercise to write a wrapper script that presents an
-interface similar to @samp{sed -i}.
-
@node Extension Sample Ord
@subsection Character and Numeric values: @code{ord()} and @code{chr()}
@@ -32462,11 +33591,14 @@ on the command line (or with @code{getline}),
they are read, with each entry returned as a record.
The record consists of three fields. The first two are the inode number and the
-filename, separated by a forward slash character.
+@value{FN}, separated by a forward slash character.
On systems where the directory entry contains the file type, the record
has a third field (also separated by a slash) which is a single letter
-indicating the type of the file:
+indicating the type of the file. The letters are file types are shown
+in @ref{table-readdir-file-types}.
+@float Table,table-readdir-file-types
+@caption{File Types Returned By @code{readdir()}}
@multitable @columnfractions .1 .9
@headitem Letter @tab File Type
@item @code{b} @tab Block device
@@ -32478,6 +33610,7 @@ indicating the type of the file:
@item @code{s} @tab Socket
@item @code{u} @tab Anything else (unknown)
@end multitable
+@end float
On systems without the file type information, the third field is always
@samp{u}.
@@ -32512,12 +33645,12 @@ Here is an example:
BEGIN @{
REVOUT = 1
- print "hello, world" > "/dev/stdout"
+ print "don't panic" > "/dev/stdout"
@}
@end example
The output from this program is:
-@samp{dlrow ,olleh}.
+@samp{cinap t'nod}.
@node Extension Sample Rev2way
@subsection Two-Way I/O Example
@@ -32534,13 +33667,22 @@ The following example shows how to use it:
BEGIN @{
cmd = "/magic/mirror"
- print "hello, world" |& cmd
+ print "don't panic" |& cmd
cmd |& getline result
print result
close(cmd)
@}
@end example
+The output from this program
+@ifnotinfo
+also is:
+@end ifnotinfo
+@ifinfo
+is:
+@end ifinfo
+@samp{cinap t'nod}.
+
@node Extension Sample Read write array
@subsection Dumping and Restoring An Array
@@ -32551,8 +33693,8 @@ named @code{writea()} and @code{reada()}, as follows:
@cindex @code{writea()} extension function
@item ret = writea(file, array)
This function takes a string argument, which is the name of the file
-to which dump the array, and the array itself as the second argument.
-@code{writea()} understands multidimensional arrays. It returns one on
+to which to dump the array, and the array itself as the second argument.
+@code{writea()} understands arrays of arrays. It returns one on
success, or zero upon failure.
@cindex @code{reada()} extension function
@@ -32637,9 +33779,8 @@ for more information.
@node Extension Sample Time
@subsection Extension Time Functions
-These functions can be used either by invoking @command{gawk}
-with a command-line argument of @samp{-l time} or by
-inserting @samp{@@load "time"} in your script.
+The @code{time} extension adds two functions, named @code{gettimeofday()}
+and @code{sleep()}, as follows:
@table @code
@item @@load "time"
@@ -32652,7 +33793,7 @@ floating point value. If the time is unavailable on this platform, return
@minus{}1 and set @code{ERRNO}. The returned time should have sub-second
precision, but the actual precision may vary based on the platform.
If the standard C @code{gettimeofday()} system call is available on this
-platform, then it simply returns the value. Otherwise, if on Windows,
+platform, then it simply returns the value. Otherwise, if on MS-Windows,
it tries to use @code{GetSystemTimeAsFileTime()}.
@cindex @code{sleep()} extension function
@@ -32678,7 +33819,7 @@ processing XML files. This is the evolution of the original @command{xgawk}
As of this writing, there are five extensions:
-@itemize @bullet
+@itemize @value{BULLET}
@item
XML parser extension, using the @uref{http://expat.sourceforge.net, Expat}
XML parsing library.
@@ -32704,7 +33845,7 @@ main @command{gawk} distribution.
@cindex @command{git} utility
You can check out the code for the @code{gawkextlib} project
-using the @uref{http://git-scm.com, GIT} distributed source
+using the @uref{http://git-scm.com, Git} distributed source
code control system. The command is as follows:
@example
@@ -32720,7 +33861,7 @@ In addition, you must have the GNU Autotools installed
@uref{http://www.gnu.org/software/automake, Automake},
@uref{http://www.gnu.org/software/libtool, Libtool},
and
-@uref{http://www.gnu.org/software/gettext, Gettext}).
+@uref{http://www.gnu.org/software/gettext, GNU @command{gettext}}).
The simple recipe for building and testing @code{gawkextlib} is as follows.
First, build and install @command{gawk}:
@@ -32754,26 +33895,171 @@ If you write an extension that you wish to share with other
@code{gawkextlib} project.
See the project's web site for more information.
-@iftex
-@part Part IV:@* Appendices
-@end iftex
+@node Extension summary
+@section Summary
+
+@itemize @value{BULLET}
+@item
+You can write extensions (sometimes called plug-ins) for @command{gawk}
+in C or C++ using the Application Programming Interface (API) defined
+by the @command{gawk} developers.
+
+@item
+Extensions must have a license compatible with the GNU General Public
+License (GPL), and they must assert that fact by declaring a variable
+named @code{plugin_is_GPL_compatible}.
+
+@item
+Communication between @command{gawk} and an extension is two-way.
+@command{gawk} passes a @code{struct} to the extension which contains
+various data fields and function pointers. The extension can then call
+into @command{gawk} via the supplied function pointers to accomplish
+certain tasks.
+
+@item
+One of these tasks is to ``register'' the name and implementation of
+a new @command{awk}-level function with @command{gawk}. The implementation
+takes the form of a C function pointer with a defined signature.
+By convention, implementation functions are named @code{do_@var{XXXX}()}
+for some @command{awk}-level function @code{@var{XXXX}()}.
+
+@item
+The API is defined in a header file named @file{gawkpi.h}. You must include
+a number of standard header files @emph{before} including it in your source file.
+
+@item
+API function pointers are provided for the following kinds of operations:
+
+@itemize @value{BULLET}
+@item
+Registration functions. You may register
+extension functions,
+exit callbacks,
+a version string,
+input parsers,
+output wrappers,
+and two-way processors.
+
+@item
+Printing fatal, warning, and ``lint'' warning messages.
+
+@item
+Updating @code{ERRNO}, or unsetting it.
+
+@item
+Accessing parameters, including converting an undefined parameter into
+an array.
+
+@item
+Symbol table access: retrieving a global variable, creating one,
+or changing one.
+
+@item
+Allocating, reallocating, and releasing memory.
+
+@item
+Creating and releasing cached values; this provides an
+efficient way to use values for multiple variables and
+can be a big performance win.
+
+@item
+Manipulating arrays:
+retrieving, adding, deleting, and modifying elements;
+getting the count of elements in an array;
+creating a new array;
+clearing an array;
+and
+flattening an array for easy C style looping over all its indices and elements
+@end itemize
+
+@item
+The API defines a number of standard data types for representing
+@command{awk} values, array elements, and arrays.
+
+@item
+The API provide convenience functions for constructing values.
+It also provides memory management functions to ensure compatibility
+between memory allocated by @command{gawk} and memory allocated by an
+extension.
+
+@item
+@emph{All} memory passed from @command{gawk} to an extension must be
+treated as read-only by the extension.
+
+@item
+@emph{All} memory passed from an extension to @command{gawk} must come from
+the API's memory allocation functions. @command{gawk} takes responsibility for
+the memory and will release it when appropriate.
+
+@item
+The API provides information about the running version of @command{gawk} so
+that an extension can make sure it is compatible with the @command{gawk}
+that loaded it.
+
+@item
+It is easiest to start a new extension by copying the boilerplate code
+described in this @value{CHAPTER}. Macros in the @file{gawkapi.h} make
+this easier to do.
+
+@item
+The @command{gawk} distribution includes a number of small but useful
+sample extensions. The @code{gawkextlib} project includes several more,
+larger, extensions. If you wish to write an extension and contribute it
+to the community of @command{gawk} users, the @code{gawkextlib} project
+should be the place to do so.
+
+@end itemize
+
+@c EXCLUDE START
+@node Extension Exercises
+@section Exercises
+
+@enumerate
+@item
+Add functions to implement system calls such as @code{chown()},
+@code{chmod()}, and @code{umask()} to the file operations extension
+presented in @ref{Internal File Ops}.
+
+@item
+(Hard.)
+How would you provide namespaces in @command{gawk}, so that the
+names of functions in different extensions don't conflict with each other?
+If you come up with a really good scheme, contact the @command{gawk}
+maintainer to tell him about it.
+
+@item
+Write a wrapper script that provides an interface similar to
+@samp{sed -i} for the ``inplace'' extension presented in
+@ref{Extension Sample Inplace}.
+
+@end enumerate
+@c EXCLUDE END
+
+@ifnotinfo
+@part @value{PART4}Appendices
+@end ifnotinfo
-@ignore
@ifdocbook
-@part Part IV:@* Appendices
+@ifclear FOR_PRINT
+Part IV contains the appendices (including the two licenses that cover
+the @command{gawk} source code and this @value{DOCUMENT}, respectively)
+and the Glossary:
+@end ifclear
-Part IV provides the appendices, the Glossary, and two licenses that cover
-the @command{gawk} source code and this @value{DOCUMENT}, respectively.
-It contains the following appendices:
+@ifset FOR_PRINT
+Part IV contains two appendices and the license that
+covers the @command{gawk} source code:
+@end ifset
-@itemize @bullet
+@itemize @value{BULLET}
@item
@ref{Language History}.
@item
@ref{Installation}.
+@ifclear FOR_PRINT
@item
@ref{Notes}.
@@ -32782,30 +34068,39 @@ It contains the following appendices:
@item
@ref{Glossary}.
+@end ifclear
@item
@ref{Copying}.
+@ifclear FOR_PRINT
@item
@ref{GNU Free Documentation License}.
+@end ifclear
@end itemize
@end ifdocbook
-@end ignore
@node Language History
@appendix The Evolution of the @command{awk} Language
-This @value{DOCUMENT} describes the GNU implementation of @command{awk}, which follows
-the POSIX specification.
-Many long-time @command{awk} users learned @command{awk} programming
-with the original @command{awk} implementation in Version 7 Unix.
-(This implementation was the basis for @command{awk} in Berkeley Unix,
-through 4.3-Reno. Subsequent versions of Berkeley Unix, and some systems
-derived from 4.4BSD-Lite, use various versions of @command{gawk}
-for their @command{awk}.)
-This @value{CHAPTER} briefly describes the
-evolution of the @command{awk} language, with cross-references to other parts
-of the @value{DOCUMENT} where you can find more information.
+This @value{DOCUMENT} describes the GNU implementation of @command{awk},
+which follows the POSIX specification. Many long-time @command{awk}
+users learned @command{awk} programming with the original @command{awk}
+implementation in Version 7 Unix. (This implementation was the basis for
+@command{awk} in Berkeley Unix, through 4.3-Reno. Subsequent versions
+of Berkeley Unix, and some systems derived from 4.4BSD-Lite, used various
+versions of @command{gawk} for their @command{awk}.) This @value{CHAPTER}
+briefly describes the evolution of the @command{awk} language, with
+cross-references to other parts of the @value{DOCUMENT} where you can
+find more information.
+
+@ifset FOR_PRINT
+To save space, we have omitted
+information on the history of features in @command{gawk} from this
+edition. You can find it in the
+@uref{http://www.gnu.org/software/gawk/manual/html_node/Feature-History.html,
+online documentation}.
+@end ifset
@menu
* V7/SVR3.1:: The major changes between V7 and System V
@@ -32821,6 +34116,7 @@ of the @value{DOCUMENT} where you can find more information.
* Common Extensions:: Common Extensions Summary.
* Ranges and Locales:: How locales used to affect regexp ranges.
* Contributors:: The major contributors to @command{gawk}.
+* History summary:: History summary.
@end menu
@node V7/SVR3.1
@@ -32835,7 +34131,7 @@ Version 7 Unix (1978) and the new version that was first made generally availabl
System V Release 3.1 (1987). This @value{SECTION} summarizes the changes, with
cross-references to further details:
-@itemize @bullet
+@itemize @value{BULLET}
@item
The requirement for @samp{;} to separate rules on a line
(@pxref{Statements/Lines}).
@@ -32926,7 +34222,7 @@ Multidimensional arrays
The System V Release 4 (1989) version of Unix @command{awk} added these features
(some of which originated in @command{gawk}):
-@itemize @bullet
+@itemize @value{BULLET}
@item
The @code{ENVIRON} array (@pxref{Built-in Variables}).
@c gawk and MKS awk
@@ -32986,7 +34282,7 @@ Processing of escape sequences inside command-line variable assignments
The POSIX Command Language and Utilities standard for @command{awk} (1992)
introduced the following changes into the language:
-@itemize @bullet
+@itemize @value{BULLET}
@item
The use of @option{-W} for implementation-specific options
(@pxref{Options}).
@@ -33011,7 +34307,7 @@ features of the language.
In 2012, a number of extensions that had been commonly available for
many years were finally added to POSIX. They are:
-@itemize @bullet
+@itemize @value{BULLET}
@item
The @code{fflush()} built-in function for flushing buffered output
(@pxref{I/O Functions}).
@@ -33048,7 +34344,7 @@ has made his version available via his home page
This @value{SECTION} describes common extensions that
originally appeared in his version of @command{awk}.
-@itemize @bullet
+@itemize @value{BULLET}
@item
The @samp{**} and @samp{**=} operators
(@pxref{Arithmetic Ops}
@@ -33066,7 +34362,7 @@ The @code{fflush()} built-in function for flushing buffered output
@ignore
@item
The @code{SYMTAB} array, that allows access to @command{awk}'s internal symbol
-table. This feature is not documented, largely because
+table. This feature was never documented for his @command{awk}, largely because
it is somewhat shakily implemented. For instance, you cannot access arrays
or array elements through it.
@end ignore
@@ -33093,12 +34389,12 @@ A number of features have come and gone over the years. This @value{SECTION}
summarizes the additional features over POSIX @command{awk} that are
in the current version of @command{gawk}.
-@itemize @bullet
+@itemize @value{BULLET}
@item
Additional built-in variables:
-@itemize @minus
+@itemize @value{MINUS}
@item
The
@code{ARGIND}
@@ -33119,10 +34415,10 @@ variables
@item
Special files in I/O redirections:
-@itemize @minus{}
+@itemize @value{MINUS}
@item
The @file{/dev/stdin}, @file{/dev/stdout}, @file{/dev/stderr} and
-@file{/dev/fd/@var{N}} special file names
+@file{/dev/fd/@var{N}} special @value{FN}s
(@pxref{Special Files}).
@item
@@ -33135,7 +34431,7 @@ IP protocol to use.
@item
Changes and/or additions to the language:
-@itemize @minus{}
+@itemize @value{MINUS}
@item
The @samp{\x} escape sequence
(@pxref{Escape Sequences}).
@@ -33168,13 +34464,13 @@ Indirect function calls
@item
Directories on the command line produce a warning and are skipped
-(@pxref{Command line directories}).
+(@pxref{Command-line directories}).
@end itemize
@item
New keywords:
-@itemize @minus{}
+@itemize @value{MINUS}
@item
The @code{BEGINFILE} and @code{ENDFILE} special patterns.
(@pxref{BEGINFILE/ENDFILE}).
@@ -33195,7 +34491,7 @@ The @code{switch} statement
@item
Changes to standard @command{awk} functions:
-@itemize @minus
+@itemize @value{MINUS}
@item
The optional second argument to @code{close()} that allows closing one end
of a two-way pipe to a coprocess
@@ -33228,7 +34524,7 @@ argument which is an array to hold the text of the field separators.
@item
Additional functions only in @command{gawk}:
-@itemize @minus
+@itemize @value{MINUS}
@item
The
@code{and()},
@@ -33252,8 +34548,7 @@ functions for internationalization
(@pxref{Programmer i18n}).
@item
-The @code{fflush()} function from Brian Kernighan's
-version of @command{awk}
+The @code{fflush()} function from BWK @command{awk}
(@pxref{I/O Functions}).
@item
@@ -33271,7 +34566,7 @@ functions for working with timestamps
@item
Changes and/or additions in the command-line options:
-@itemize @minus
+@itemize @value{MINUS}
@item
The @env{AWKPATH} environment variable for specifying a path search for
the @option{-f} command-line option
@@ -33317,7 +34612,7 @@ and the
@option{--copyright},
@option{--debug},
@option{--dump-variables},
-@option{--execle},
+@option{--exec},
@option{--field-separator},
@option{--file},
@option{--gen-pot},
@@ -33346,10 +34641,10 @@ long options
@item
Support for the following obsolete systems was removed from the code
-and the documentation for @command{gawk} version 4.0:
+and the documentation for @command{gawk} @value{PVERSION} 4.0:
@c nested table
-@itemize @minus
+@itemize @value{MINUS}
@item
Amiga
@@ -33388,6 +34683,20 @@ GCC for VAX and Alpha has not been tested for a while.
@end itemize
+@item
+Support for the following obsolete systems was removed from the code
+and the documentation for @command{gawk} @value{PVERSION} 4.1:
+
+@c nested table
+@itemize @value{MINUS}
+@item
+Ultrix
+@end itemize
+
+@item
+@c FIXME: Verify the version here.
+Support for MirBSD was removed at @command{gawk} @value{PVERSION} 4.2.
+
@end itemize
@c XXX ADD MORE STUFF HERE
@@ -33396,6 +34705,8 @@ GCC for VAX and Alpha has not been tested for a while.
@c ENDOFRANGE exgnot
@c ENDOFRANGE posnot
+@c This does not need to be in the formal book.
+@ifclear FOR_PRINT
@node Feature History
@appendixsec History of @command{gawk} Features
@@ -33425,7 +34736,7 @@ in the order they were added to @command{gawk}.
Version 2.10 of @command{gawk} introduced the following features:
-@itemize @bullet
+@itemize @value{BULLET}
@item
The @env{AWKPATH} environment variable for specifying a path search for
the @option{-f} command-line option
@@ -33437,13 +34748,13 @@ The @code{IGNORECASE} variable and its effects
@item
The @file{/dev/stdin}, @file{/dev/stdout}, @file{/dev/stderr} and
-@file{/dev/fd/@var{N}} special file names
+@file{/dev/fd/@var{N}} special @value{FN}s
(@pxref{Special Files}).
@end itemize
Version 2.13 of @command{gawk} introduced the following features:
-@itemize @bullet
+@itemize @value{BULLET}
@item
The @code{FIELDWIDTHS} variable and its effects
(@pxref{Constant Size}).
@@ -33457,7 +34768,7 @@ and printing timestamps
Additional command-line options
(@pxref{Options}):
-@itemize @minus
+@itemize @value{MINUS}
@item
The @option{-W lint} option to provide error and portability checking
for both the source code and at runtime.
@@ -33472,19 +34783,19 @@ The @option{-W posix} option for full POSIX compliance.
Version 2.14 of @command{gawk} introduced the following feature:
-@itemize @bullet
+@itemize @value{BULLET}
@item
-The @code{next file} statement for skipping to the next data file
+The @code{next file} statement for skipping to the next @value{DF}
(@pxref{Nextfile Statement}).
@end itemize
Version 2.15 of @command{gawk} introduced the following features:
-@itemize @bullet
+@itemize @value{BULLET}
@item
New variables (@pxref{Built-in Variables}):
-@itemize @minus
+@itemize @value{MINUS}
@item
@code{ARGIND}, which tracks the movement of @code{FILENAME}
through @code{ARGV}.
@@ -33496,17 +34807,17 @@ through @code{ARGV}.
@item
The @file{/dev/pid}, @file{/dev/ppid}, @file{/dev/pgrpid}, and
-@file{/dev/user} special file names. These have since been removed.
+@file{/dev/user} special @value{FN}s. These have since been removed.
@item
The ability to delete all of an array at once with @samp{delete @var{array}}
(@pxref{Delete}).
@item
-Command line option changes
+Command-line option changes
(@pxref{Options}):
-@itemize @minus
+@itemize @value{MINUS}
@item
The ability to use GNU-style long-named options that start with @option{--}.
@@ -33518,11 +34829,11 @@ source code.
Version 3.0 of @command{gawk} introduced the following features:
-@itemize @bullet
+@itemize @value{BULLET}
@item
New or changed variables:
-@itemize @minus
+@itemize @value{MINUS}
@item
@code{IGNORECASE} changed, now applying to string comparison as well
as regexp operations
@@ -33560,14 +34871,15 @@ The @code{next file} statement became @code{nextfile}
(@pxref{Nextfile Statement}).
@item
-The @code{fflush()} function from the
-Bell Laboratories research version of @command{awk}
-(@pxref{I/O Functions}).
+The @code{fflush()} function from
+BWK @command{awk}
+(then at Bell Laboratories;
+@pxref{I/O Functions}).
@item
-New command line options:
+New command-line options:
-@itemize @minus
+@itemize @value{MINUS}
@item
The @option{--lint-old} option to
warn about constructs that are not available in
@@ -33575,9 +34887,9 @@ the original Version 7 Unix version of @command{awk}
(@pxref{V7/SVR3.1}).
@item
-The @option{-m} option from the
-Bell Laboratories research version of @command{awk}
-This was later removed.
+The @option{-m} option from BWK @command{awk}. (Brian was
+still at Bell Laboratories at the time.) This was later removed from
+both his @command{awk} and from @command{gawk}.
@item
The @option{--re-interval} option to provide interval expressions in regexps
@@ -33594,17 +34906,18 @@ The use of GNU Autoconf to control the configuration process
@item
Amiga support.
+This has since been removed.
@end itemize
Version 3.1 of @command{gawk} introduced the following features:
-@itemize @bullet
+@itemize @value{BULLET}
@item
New variables
(@pxref{Built-in Variables}):
-@itemize @minus
+@itemize @value{MINUS}
@item
@code{BINMODE}, for non-POSIX systems,
which allows binary I/O for input and/or output files
@@ -33652,7 +34965,7 @@ making translations easier
@item
A number of new built-in functions:
-@itemize @minus
+@itemize @value{MINUS}
@item
The @code{asort()} and @code{asorti()} functions for sorting arrays
(@pxref{Array Sorting}).
@@ -33683,10 +34996,10 @@ The support for @samp{next file} as two words was removed completely
(@pxref{Nextfile Statement}).
@item
-Additional commnd line options
+Additional command-line options
(@pxref{Options}):
-@itemize @minus
+@itemize @value{MINUS}
@item
The @option{--dump-variables} option to print a list of all global variables.
@@ -33720,7 +35033,7 @@ The use of GNU Automake to help in standardizing the configuration process
(@pxref{Quick Installation}).
@item
-The use of GNU @code{gettext} for @command{gawk}'s own message output
+The use of GNU @command{gettext} for @command{gawk}'s own message output
(@pxref{Gawk I18N}).
@item
@@ -33730,7 +35043,8 @@ BeOS support. This was later removed.
Tandem support. This was later removed.
@item
-The Atari port became officially unsupported.
+The Atari port became officially unsupported and was
+later removed entirely.
@item
The source code changed to use ISO C standard-style function definitions.
@@ -33752,12 +35066,12 @@ enable printing times as UTC
Version 4.0 of @command{gawk} introduced the following features:
-@itemize @bullet
+@itemize @value{BULLET}
@item
Variable additions:
-@itemize @minus
+@itemize @value{MINUS}
@item
@code{FPAT}, which allows you to specify a regexp that matches
the fields, instead of matching the field separator
@@ -33815,7 +35129,7 @@ An optional third argument to
(@pxref{String Functions}).
@item
-The behavior of @code{fflush()} changed to match Brian Kernighan's @command{awk}
+The behavior of @code{fflush()} changed to match BWK @command{awk}
and for POSIX; now both @samp{fflush()} and @samp{fflush("")}
flush all open output redirections
(@pxref{I/O Functions}).
@@ -33823,7 +35137,7 @@ flush all open output redirections
@item
The @code{isarray()}
function which distinguishes if an item is an array
-or not, to make it possible to traverse multidimensional arrays
+or not, to make it possible to traverse arrays of arrays
(@pxref{Type Functions}).
@item
@@ -33853,10 +35167,10 @@ Indirect function calls
(@pxref{Switch Statement}).
@item
-Command line option changes
+Command-line option changes
(@pxref{Options}):
-@itemize @minus
+@itemize @value{MINUS}
@item
The @option{-b} and @option{--characters-as-bytes} options
which prevent @command{gawk} from treating input as a multibyte string.
@@ -33878,7 +35192,7 @@ All long options acquired corresponding short options, for use in @samp{#!} scri
@item
Directories named on the command line now produce a warning, not a fatal
error, unless @option{--posix} or @option{--traditional} are used
-(@pxref{Command line directories}).
+(@pxref{Command-line directories}).
@item
The @command{gawk} internals were rewritten, bringing the @command{dgawk}
@@ -33907,7 +35221,7 @@ C locale, no matter what kind of regexp is being used, and even if
@item
Support was removed for the following systems:
-@itemize @minus
+@itemize @value{MINUS}
@item
Atari
@@ -33945,7 +35259,7 @@ Prestandard VAX C compiler for VAX/VMS
Version 4.1 of @command{gawk} introduced the following features:
-@itemize @bullet
+@itemize @value{BULLET}
@item
Three new arrays:
@@ -33954,13 +35268,13 @@ Three new arrays:
@item
The three executables @command{gawk}, @command{pgawk}, and @command{dgawk}, were merged into
-one, named just @command{gawk}. As a result the command line options changed.
+one, named just @command{gawk}. As a result the command-line options changed.
@item
-Command line option changes
+Command-line option changes
(@pxref{Options}):
-@itemize @minus
+@itemize @value{MINUS}
@item
The @option{-D} option invokes the debugger.
@@ -33986,7 +35300,7 @@ The @option{-R} option was removed.
@item
Support for high precision arithmetic with MPFR.
-(@pxref{Gawk and MPFR}).
+(@pxref{Arbitrary Precision Arithmetic}).
@item
The @code{and()}, @code{or()} and @code{xor()} functions
@@ -34001,6 +35315,7 @@ The dynamic extension interface was completely redone
@end itemize
@c XXX ADD MORE STUFF HERE
+@end ifclear
@node Common Extensions
@appendixsec Common Extensions Summary
@@ -34099,7 +35414,7 @@ it on your system).
@cindex Unicode
Similar considerations apply to other ranges. For example, @samp{["-/]}
is perfectly valid in ASCII, but is not valid in many Unicode locales,
-such as @samp{en_US.UTF-8}.
+such as @code{en_US.UTF-8}.
Early versions of @command{gawk} used regexp matching code that was not
locale aware, so ranges had their traditional interpretation.
@@ -34114,7 +35429,7 @@ like ``why does @samp{[A-Z]} match lowercase letters?!?''
This situation existed for close to 10 years, if not more, and
the @command{gawk} maintainer grew weary of trying to explain that
@command{gawk} was being nicely standards-compliant, and that the issue
-was in the user's locale. During the development of version 4.0,
+was in the user's locale. During the development of @value{PVERSION} 4.0,
he modified @command{gawk} to always treat ranges in the original,
pre-POSIX fashion, unless @option{--posix} was used (@pxref{Options}).@footnote{And
thus was born the Campaign for Rational Range Interpretation (or
@@ -34147,7 +35462,7 @@ cases: the default regexp matching; with @option{--traditional} and with
This @value{SECTION} names the major contributors to @command{gawk}
and/or this @value{DOCUMENT}, in approximate chronological order:
-@itemize @bullet
+@itemize @value{BULLET}
@item
@cindex Aho, Alfred
@cindex Weinberger, Peter
@@ -34227,8 +35542,8 @@ provided the initial port to OS/2 and its documentation.
Michal Jaegermann
provided the port to Atari systems and its documentation.
(This port is no longer supported.)
-He continues to provide portability checking with DEC Alpha
-systems, and has done a lot of work to make sure @command{gawk}
+He continues to provide portability checking,
+and has done a lot of work to make sure @command{gawk}
works on non-32-bit systems.
@item
@@ -34299,7 +35614,7 @@ provided the port to BeOS and its documentation.
@cindex Peters, Arno
Arno Peters
did the initial work to convert @command{gawk} to use
-GNU Automake and GNU @code{gettext}.
+GNU Automake and GNU @command{gettext}.
@item
@cindex Broder, Alan J.@:
@@ -34341,14 +35656,13 @@ Assaf Gordon contributed the code to implement the
@cindex Haque, John
John Haque made the following contributions:
-@itemize @minus
+@itemize @value{MINUS}
@item
The modifications to convert @command{gawk}
into a byte-code interpreter, including the debugger.
@item
-The addition of true multidimensional arrays.
-@ref{Arrays of Arrays}.
+The addition of true arrays of arrays.
@item
The additional modifications for support of arbitrary precision arithmetic.
@@ -34369,6 +35683,10 @@ The improved array sorting features were driven by John together
with Pat Rankin.
@end itemize
+@cindex Papadopoulos, Panos
+@item
+Panos Papadopoulos contributed the original text for @ref{Include Files}.
+
@item
@cindex Yawitz, Efraim
Efraim Yawitz contributed the original text for @ref{Debugger}.
@@ -34392,6 +35710,41 @@ has been working on @command{gawk} since 1988, at first
helping David Trueman, and as the primary maintainer since around 1994.
@end itemize
+@node History summary
+@appendixsec Summary
+
+@itemize @value{BULLET}
+@item
+The @command{awk} language has evolved over time. The first release
+was with V7 Unix circa 1978. In 1987 for System V Release 3.1,
+major additions, including user-defined functions, were made to the language.
+Additional changes were made for System V Release 4, in 1989.
+Since then, further minor changes happen under the auspices of the
+POSIX standard.
+
+@item
+Brian Kernighan's @command{awk} provides a small number of extensions
+that are implemented in common with other versions of @command{awk}.
+
+@item
+@command{gawk} provides a large number of extensions over POSIX @command{awk}.
+They can be disabled with either the @option{--traditional} or @option{--posix}
+options.
+
+@item
+The interaction of POSIX locales and regexp matching in @command{gawk} has been confusing over
+the years. Today, @command{gawk} implements Rational Range Interpretation, where
+ranges of the form @samp{[a-z]} match @emph{only} the characters numerically between
+@samp{a} through @samp{z} in the machine's native character set. Usually this is ASCII
+but it can be EBCDIC on IBM S/390 systems.
+
+@item
+Many people have contributed to @command{gawk} development over the years.
+We hope that the list provided in this @value{CHAPTER} is complete and gives
+the appropriate credit where credit is due.
+
+@end itemize
+
@node Installation
@appendix Installing @command{gawk}
@@ -34417,6 +35770,7 @@ the respective ports.
* Bugs:: Reporting Problems and Bugs.
* Other Versions:: Other freely available @command{awk}
implementations.
+* Installation summary:: Summary of installation.
@end menu
@node Gawk Distribution
@@ -34436,9 +35790,9 @@ subdirectories.
@node Getting
@appendixsubsec Getting the @command{gawk} Distribution
@cindex @command{gawk}, source code@comma{} obtaining
-There are three ways to get GNU software:
+There are two ways to get GNU software:
-@itemize @bullet
+@itemize @value{BULLET}
@item
Copy it from someone else who already has it.
@@ -34477,7 +35831,6 @@ file and then use @code{tar} to extract it. You can use the following
pipeline to produce the @command{gawk} distribution:
@example
-# Under System V, add 'o' to the tar options
gzip -d -c gawk-@value{VERSION}.@value{PATCHLEVEL}.tar.gz | tar -xvpf -
@end example
@@ -34493,7 +35846,7 @@ Extracting the archive
creates a directory named @file{gawk-@value{VERSION}.@value{PATCHLEVEL}}
in the current directory.
-The distribution file name is of the form
+The distribution @value{FN} is of the form
@file{gawk-@var{V}.@var{R}.@var{P}.tar.gz}.
The @var{V} represents the major version of @command{gawk},
the @var{R} represents the current release of version @var{V}, and
@@ -34625,6 +35978,8 @@ The generated Info file for
The @command{troff} source for a manual page describing the @command{igawk}
program presented in
@ref{Igawk Program}.
+(Since @command{gawk} can do its own @code{@@include} processing,
+neither @command{igawk} nor @file{igawk.1} are installed.)
@item doc/Makefile.in
The input file used during the configuration process to generate the
@@ -34632,8 +35987,8 @@ actual @file{Makefile} for creating the documentation.
@item Makefile.am
@itemx */Makefile.am
-Files used by the GNU @command{automake} software for generating
-the @file{Makefile.in} files used by @command{autoconf} and
+Files used by the GNU Automake software for generating
+the @file{Makefile.in} files used by Autoconf and
@command{configure}.
@item Makefile.in
@@ -34669,8 +36024,6 @@ source file for this @value{DOCUMENT}. It also contains a @file{Makefile.in} fil
@file{Makefile.am} is used by GNU Automake to create @file{Makefile.in}.
The library functions from
@ref{Library Functions},
-and the @command{igawk} program from
-@ref{Igawk Program},
are included as ready-to-use files in the @command{gawk} distribution.
They are installed as part of the installation process.
The rest of the programs in this @value{DOCUMENT} are available in appropriate
@@ -34685,11 +36038,14 @@ the sample extensions included with @command{gawk}.
Files needed for building @command{gawk} on POSIX-compliant systems.
@item pc/*
-Files needed for building @command{gawk} under MS-Windows and OS/2
+Files needed for building @command{gawk} under MS-Windows
+@ifclear FOR_PRINT
+and OS/2
+@end ifclear
(@pxref{PC Installation}, for details).
@item vms/*
-Files needed for building @command{gawk} under VMS
+Files needed for building @command{gawk} under Vax/VMS and OpenVMS
(@pxref{VMS Installation}, for details).
@item test/*
@@ -34726,9 +36082,9 @@ to @file{gawk-@value{VERSION}.@value{PATCHLEVEL}}. Like most GNU software,
@command{gawk} is configured
automatically for your system by running the @command{configure} program.
This program is a Bourne shell script that is generated automatically using
-GNU @command{autoconf}.
+GNU Autoconf.
@ifnotinfo
-(The @command{autoconf} software is
+(The Autoconf software is
described fully in
@cite{Autoconf---Generating Automatic Configuration Scripts},
which can be found online at
@@ -34736,7 +36092,7 @@ which can be found online at
the Free Software Foundation's web site}.)
@end ifnotinfo
@ifinfo
-(The @command{autoconf} software is described fully starting with
+(The Autoconf software is described fully starting with
@inforef{Top, , Autoconf, autoconf,Autoconf---Generating Automatic Configuration Scripts}.)
@end ifinfo
@@ -34839,7 +36195,7 @@ improvement.
@cindex @option{--with-whiny-user-strftime} configuration option
@cindex configuration option, @code{--with-whiny-user-strftime}
@item --with-whiny-user-strftime
-Force use of the included version of the @code{strftime()}
+Force use of the included version of the C @code{strftime()}
function for deficient systems.
@end table
@@ -34886,9 +36242,9 @@ should not have. @file{custom.h} is automatically included by
@file{config.h}.
It is also possible that the @command{configure} program generated by
-@command{autoconf} will not work on your system in some other fashion.
+Autoconf will not work on your system in some other fashion.
If you do have a problem, the file @file{configure.ac} is the input for
-@command{autoconf}. You may be able to change this file and generate a
+Autoconf. You may be able to change this file and generate a
new version of @command{configure} that works on your system
(@pxref{Bugs},
for information on how to report problems in configuring @command{gawk}).
@@ -34916,16 +36272,21 @@ various non-Unix systems.
@cindex PC operating systems@comma{} @command{gawk} on, installing
@cindex operating systems, PC@comma{} @command{gawk} on, installing
This @value{SECTION} covers installation and usage of @command{gawk} on x86 machines
+@ifclear FOR_PRINT
running MS-DOS, any version of MS-Windows, or OS/2.
+@end ifclear
+@ifset FOR_PRINT
+running MS-DOS and any version of MS-Windows.
+@end ifset
In this @value{SECTION}, the term ``Windows32''
-refers to any of Microsoft Windows-95/98/ME/NT/2000/XP/Vista/7.
+refers to any of Microsoft Windows-95/98/ME/NT/2000/XP/Vista/7/8.
-The limitations of MS-DOS (and MS-DOS shells under Windows32 or OS/2) has meant
-that various ``DOS extenders'' are often used with programs such as
-@command{gawk}. The varying capabilities of Microsoft Windows 3.1
-and Windows32 can add to the confusion. For an overview of the
-considerations, please refer to @file{README_d/README.pc} in the
-distribution.
+The limitations of MS-DOS (and MS-DOS shells under the other operating
+systems) has meant that various ``DOS extenders'' are often used with
+programs such as @command{gawk}. The varying capabilities of Microsoft
+Windows 3.1 and Windows32 can add to the confusion. For an overview
+of the considerations, please refer to @file{README_d/README.pc} in
+the distribution.
@menu
* PC Binary Installation:: Installing a prepared distribution.
@@ -34939,6 +36300,7 @@ distribution.
* MSYS:: Using @command{gawk} In The MSYS Environment.
@end menu
+@ifclear FOR_PRINT
@node PC Binary Installation
@appendixsubsubsec Installing a Prepared Distribution for PC Systems
@@ -34977,13 +36339,21 @@ install-info --info-dir=x:/usr/info x:/usr/info/gawkinet.info
The binary distribution may contain a separate file containing additional
or more detailed installation instructions.
+@end ifclear
@node PC Compiling
@appendixsubsubsec Compiling @command{gawk} for PC Operating Systems
+@ifclear FOR_PRINT
@command{gawk} can be compiled for MS-DOS, Windows32, and OS/2 using the GNU
-development tools from DJ Delorie (DJGPP: MS-DOS only) or Eberhard
-Mattes (EMX: MS-DOS, Windows32 and OS/2). The file
+development tools from DJ Delorie (DJGPP: MS-DOS only), MinGW (Windows32) or Eberhard
+Mattes (EMX: MS-DOS, Windows32 and OS/2).
+@end ifclear
+@ifset FOR_PRINT
+@command{gawk} can be compiled for MS-DOS and Windows32 using the GNU
+development tools from DJ Delorie (DJGPP: MS-DOS only) or MinGW (Windows32).
+@end ifset
+The file
@file{README_d/README.pc} in the @command{gawk} distribution contains
additional notes, and @file{pc/Makefile} contains important information on
compilation options.
@@ -35005,6 +36375,7 @@ build @command{gawk} using the DJGPP tools, enter @samp{make djgpp}.
@uref{ftp://ftp.delorie.com/pub/djgpp/current/v2gnu/}.) To build a
native MS-Windows binary of @command{gawk}, type @samp{make mingw32}.
+@ifclear FOR_PRINT
@cindex compiling @command{gawk} with EMX for OS/2
The 32 bit EMX version of @command{gawk} works ``out of the box'' under OS/2.
However, it is highly recommended to use GCC 2.95.3 for the compilation.
@@ -35039,7 +36410,7 @@ and @option{--libexecdir=c:/usr/lib}.
@end ignore
@ignore
-The internal @code{gettext} library tends to be problematic. It is therefore recommended
+The internal @command{gettext} library tends to be problematic. It is therefore recommended
to use either an external one (@option{--without-included-gettext}) or to disable
NLS entirely (@option{--disable-nls}).
@end ignore
@@ -35076,8 +36447,11 @@ Ancient OS/2 ports of GNU @command{make} are not able to handle
the Makefiles of this package. If you encounter any problems with
@command{make}, try GNU Make 3.79.1 or later versions. You should
find the latest version on
-@uref{ftp://hobbes.nmsu.edu/pub/os2/}.
+@uref{ftp://hobbes.nmsu.edu/pub/os2/}.@footnote{As of May, 2014,
+this site is still there, but the author could not find a package
+for GNU Make.}
@end quotation
+@end ifclear
@node PC Testing
@appendixsubsubsec Testing @command{gawk} on PC Operating Systems
@@ -35089,6 +36463,7 @@ be converted so that they have the usual MS-DOS-style end-of-line markers.
Alternatively, run @command{make check CMP="diff -a"} to use GNU @command{diff}
in text mode instead of @command{cmp} to compare the resulting files.
+@ifclear FOR_PRINT
Most
of the tests work properly with Stewartson's shell along with the
companion utilities or appropriate GNU utilities. However, some editing of
@@ -35101,7 +36476,7 @@ On OS/2 the @code{pid} test fails because @code{spawnl()} is used instead of
@code{fork()}/@code{execl()} to start child processes.
Also the @code{mbfw1} and @code{mbprintf1} tests fail because the needed
multibyte functionality is not available.
-
+@end ifclear
@node PC Using
@appendixsubsubsec Using @command{gawk} on PC Operating Systems
@@ -35113,11 +36488,12 @@ multibyte functionality is not available.
Under MS-DOS and MS-Windows, the Cygwin and MinGW environments support
both the @samp{|&} operator and TCP/IP networking
(@pxref{TCP/IP Networking}).
+@ifclear FOR_PRINT
EMX (OS/2 only) supports at least the @samp{|&} operator.
+@end ifclear
@cindex search paths
@cindex search paths, for source files
-@cindex @command{gawk}, OS/2 version of
@cindex @command{gawk}, MS-DOS version of
@cindex @command{gawk}, MS-Windows version of
@cindex @code{;} (semicolon), @code{AWKPATH} variable and
@@ -35128,36 +36504,50 @@ program files as described in @ref{AWKPATH Variable}. However,
semicolons (rather than colons) separate elements in the @env{AWKPATH}
variable. If @env{AWKPATH} is not set or is empty, then the default
search path for MS-Windows and MS-DOS versions is
-@code{@w{".;c:/lib/awk;c:/gnu/lib/awk"}}.
+@samp{@w{.;c:/lib/awk;c:/gnu/lib/awk}}.
+@ifclear FOR_PRINT
+@cindex @command{gawk}, OS/2 version of
@cindex @code{UNIXROOT} variable, on OS/2 systems
The search path for OS/2 (32 bit, EMX) is determined by the prefix directory
(most likely @file{/usr} or @file{c:/usr}) that has been specified as an option of
-the @command{configure} script like it is the case for the Unix versions.
+the @command{configure} script as is the case for the Unix versions.
If @file{c:/usr} is the prefix directory then the default search path contains @file{.}
and @file{c:/usr/share/awk}.
Additionally, to support binary distributions of @command{gawk} for OS/2
-systems whose drive @samp{c:} might not support long file names or might not exist
+systems whose drive @samp{c:} might not support long @value{FN}s or might not exist
at all, there is a special environment variable. If @env{UNIXROOT} specifies
a drive then this specific drive is also searched for program files.
E.g., if @env{UNIXROOT} is set to @file{e:} the complete default search path is
-@code{@w{".;c:/usr/share/awk;e:/usr/share/awk"}}.
+@samp{@w{.;c:/usr/share/awk;e:/usr/share/awk}}.
An @command{sh}-like shell (as opposed to @command{command.com} under MS-DOS
or @command{cmd.exe} under MS-Windows or OS/2) may be useful for @command{awk} programming.
The DJGPP collection of tools includes an MS-DOS port of Bash,
and several shells are available for OS/2, including @command{ksh}.
+@end ifclear
+@ifset FOR_PRINT
+An @command{sh}-like shell (as opposed to @command{command.com} under MS-DOS
+or @command{cmd.exe} under MS-Windows) may be useful for @command{awk} programming.
+The DJGPP collection of tools includes an MS-DOS port of Bash.
+@end ifset
@cindex common extensions, @code{BINMODE} variable
@cindex extensions, common@comma{} @code{BINMODE} variable
@cindex differences in @command{awk} and @command{gawk}, @code{BINMODE} variable
@cindex @code{BINMODE} variable
-Under MS-Windows, OS/2 and MS-DOS, @command{gawk} (and many other text programs) silently
-translate end-of-line @code{"\r\n"} to @code{"\n"} on input and @code{"\n"}
-to @code{"\r\n"} on output. A special @code{BINMODE} variable @value{COMMONEXT}
+@ifclear FOR_PRINT
+Under MS-Windows, OS/2 and MS-DOS,
+@end ifclear
+@ifset FOR_PRINT
+Under MS-Windows and MS-DOS,
+@end ifset
+@command{gawk} (and many other text programs) silently
+translate end-of-line @samp{\r\n} to @samp{\n} on input and @samp{\n}
+to @samp{\r\n} on output. A special @code{BINMODE} variable @value{COMMONEXT}
allows control over these translations and is interpreted as follows:
-@itemize @bullet
+@itemize @value{BULLET}
@item
If @code{BINMODE} is @code{"r"}, or one,
then
@@ -35195,7 +36585,7 @@ The name @code{BINMODE} was chosen to match @command{mawk}
@command{mawk} adds a @samp{-W BINMODE=@var{N}} option and an environment
variable that can set @code{BINMODE}, @code{RS}, and @code{ORS}. The
files @file{binmode[1-3].awk} (under @file{gnu/lib/awk} in some of the
-prepared distributions) have been chosen to match @command{mawk}'s @samp{-W
+prepared binary distributions) have been chosen to match @command{mawk}'s @samp{-W
BINMODE=@var{N}} option. These can be changed or discarded; in particular,
the setting of @code{RS} giving the fewest ``surprises'' is open to debate.
@command{mawk} uses @samp{RS = "\r\n"} if binary mode is set on read, which is
@@ -35223,7 +36613,7 @@ The following changes the record separator to @code{"\r\n"} and sets binary
mode on reads, but does not affect the mode on standard input:
@example
-gawk -v RS="\r\n" --source "BEGIN @{ BINMODE = 1 @}" @dots{}
+gawk -v RS="\r\n" -e "BEGIN @{ BINMODE = 1 @}" @dots{}
@end example
@noindent
@@ -35271,7 +36661,7 @@ been ported to MS-Windows that expect @command{gawk} to do automatic
translation of @code{"\r\n"}, since it won't. Caveat Emptor!
@node VMS Installation
-@appendixsubsec How to Compile and Install @command{gawk} on VMS
+@appendixsubsec How to Compile and Install @command{gawk} on Vax/VMS and OpenVMS
@c based on material from Pat Rankin <rankin@eql.caltech.edu>
@c now rankin@pactechdata.com
@@ -35319,11 +36709,11 @@ or:
$ @kbd{MMK/DESCRIPTION=[.vms]descrip.mms gawk}
@end example
-@code{MMK} is an open source, free, near-clone of @code{MMS} and
-can better handle @code{ODS-5} volumes with upper- and lowercase filenames.
-@code{MMK} is available from @uref{https://github.com/endlesssoftware/mmk}.
+@command{MMK} is an open source, free, near-clone of @command{MMS} and
+can better handle ODS-5 volumes with upper- and lowercase @value{FN}s.
+@command{MMK} is available from @uref{https://github.com/endlesssoftware/mmk}.
-With @code{ODS-5} volumes and extended parsing enabled, the case of the target
+With ODS-5 volumes and extended parsing enabled, the case of the target
parameter may need to be exact.
@command{gawk} has been tested under VAX/VMS 7.3 and Alpha/VMS 7.3-1
@@ -35332,8 +36722,8 @@ The most recent builds used HP C V7.3 on Alpha VMS 8.3 and both
Alpha and IA64 VMS 8.4 used HP C 7.3.@footnote{The IA64 architecture
is also known as ``Itanium.''}
-The @file{[.vms]gawk_build_steps.txt} provides information on how to build
-@command{gawk} into a PCSI kit that is compatible with the GNV product.
+@xref{VMS GNV}, for information on building
+@command{gawk} as a PCSI kit that is compatible with the GNV product.
@node VMS Dynamic Extensions
@appendixsubsubsec Compiling @command{gawk} Dynamic Extensions on VMS
@@ -35451,11 +36841,11 @@ provides information about both the @command{gawk} implementation and the
The logical name @samp{AWK_LIBRARY} can designate a default location
for @command{awk} program files. For the @option{-f} option, if the specified
-file name has no device or directory path information in it, @command{gawk}
+@value{FN} has no device or directory path information in it, @command{gawk}
looks in the current directory first, then in the directory specified
by the translation of @samp{AWK_LIBRARY} if the file is not found.
If, after searching in both directories, the file still is not found,
-@command{gawk} appends the suffix @samp{.awk} to the filename and retries
+@command{gawk} appends the suffix @samp{.awk} to the @value{FN} and retries
the file search. If @samp{AWK_LIBRARY} has no definition, a default value
of @samp{SYS$LIBRARY:} is used for it.
@@ -35484,7 +36874,7 @@ One side effect of dual command-line parsing is that if there is only a
single parameter (as in the quoted string program above), the command
becomes ambiguous. To work around this, the normally optional @option{--}
flag is required to force Unix-style parsing rather than @code{DCL} parsing. If any
-other dash-type options (or multiple parameters such as data files to
+other dash-type options (or multiple parameters such as @value{DF}s to
process) are present, there is no ambiguity and @option{--} can be omitted.
@cindex exit status, of VMS
@@ -35538,7 +36928,7 @@ The VMS GNV package provides a build environment similar to POSIX with ports
of a collection of open source tools. The @command{gawk} found in the GNV
base kit is an older port. Currently the GNV project is being reorganized
to supply individual PCSI packages for each component.
-See @uref{https://sourceforge.net/p/gnv/wiki/InstallingGNVPackages/}.
+See @w{@uref{https://sourceforge.net/p/gnv/wiki/InstallingGNVPackages/}.}
The normal build procedure for @command{gawk} produces a program that
is suitable for use with GNV.
@@ -35593,7 +36983,7 @@ define a symbol, as follows:
$ @kbd{gawk :== $sys$common:[syshlp.examples.tcpip.snmp]gawk.exe}
@end example
-This is apparently version 2.15.6, which is extremely old. We
+This is apparently @value{PVERSION} 2.15.6, which is extremely old. We
recommend compiling and using the current version.
@c ENDOFRANGE opgawx
@@ -35622,8 +37012,8 @@ what you're trying to do. If it's not clear whether you should be able
to do something or not, report that too; it's a bug in the documentation!
Before reporting a bug or trying to fix it yourself, try to isolate it
-to the smallest possible @command{awk} program and input data file that
-reproduces the problem. Then send us the program and data file,
+to the smallest possible @command{awk} program and input @value{DF} that
+reproduces the problem. Then send us the program and @value{DF},
some idea of what kind of Unix system you're using,
the compiler you used to compile @command{gawk}, and the exact results
@command{gawk} gave you. Also say what you expected to occur; this helps
@@ -35639,12 +37029,14 @@ Once you have a precise problem, send email to
@EMAIL{bug-gawk@@gnu.org,bug-gawk at gnu dot org}.
@cindex Robbins, Arnold
-Using this address automatically sends a copy of your
-mail to me. If necessary, I can be reached directly at
+The @command{gawk} maintainers subscribe to this address and
+thus they will receive your bug report.
+If necessary, the primary maintainer can be reached directly at
@EMAIL{arnold@@skeeve.com,arnold at skeeve dot com}.
The bug reporting address is preferred since the
email list is archived at the GNU Project.
-@emph{All email should be in English, since that is my native language.}
+@emph{All email should be in English. This is the only language
+understood in common by all the maintainers.}
@cindex @code{comp.lang.awk} newsgroup
@quotation CAUTION
@@ -35692,11 +37084,13 @@ as follows:
@cindex Rankin, Pat
@cindex Malmberg, John
@cindex Pitts, Dave
-@multitable {MS-Windows with MINGW} {123456789012345678901234567890123456789001234567890}
+@multitable {MS-Windows with MinGW} {123456789012345678901234567890123456789001234567890}
@item MS-DOS with DJGPP @tab Scott Deifik, @EMAIL{scottd.mail@@sbcglobal.net,scottd dot mail at sbcglobal dot net}.
-@item MS-Windows with MINGW @tab Eli Zaretskii, @EMAIL{eliz@@gnu.org,eliz at gnu dot org}.
+@item MS-Windows with MinGW @tab Eli Zaretskii, @EMAIL{eliz@@gnu.org,eliz at gnu dot org}.
+@c Leave this in the print version on purpose.
+@c OS/2 is not mentioned anywhere else in the print version though.
@item OS/2 @tab Andreas Buening, @EMAIL{andreas.buening@@nexgo.de,andreas dot buening at nexgo dot de}.
@item VMS @tab Pat Rankin, @EMAIL{r.pat.rankin@@gmail.com,r.pat.rankin at gmail.com}, and
@@ -35780,8 +37174,13 @@ for a list of extensions in this @command{awk} that are not in POSIX @command{aw
@cindex source code, @command{mawk}
@item @command{mawk}
Michael Brennan wrote an independent implementation of @command{awk},
-called @command{mawk}. It is available under the GPL
-(@pxref{Copying}),
+called @command{mawk}. It is available under the
+@ifclear FOR_PRINT
+GPL (@pxref{Copying}),
+@end ifclear
+@ifset FOR_PRINT
+GPL,
+@end ifset
just as @command{gawk} is.
The original distribution site for the @command{mawk} source code
@@ -35827,7 +37226,7 @@ since approximately 2003.
@cindex source code, @command{pawk}
@item @command{pawk}
Nelson H.F.@: Beebe at the University of Utah has modified
-Brian Kernighan's @command{awk} to provide timing and profiling information.
+BWK @command{awk} to provide timing and profiling information.
It is different from @command{gawk} with the @option{--profile} option.
(@pxref{Profiling}),
in that it uses CPU-based profiling, not line-count
@@ -35850,10 +37249,10 @@ information, see the @uref{http://busybox.net, project's home page}.
@cindex Solaris, POSIX-compliant @command{awk}
@cindex source code, Solaris @command{awk}
@item The OpenSolaris POSIX @command{awk}
-The version of @command{awk} in @file{/usr/xpg4/bin} on Solaris is
-more-or-less POSIX-compliant. It is based on the @command{awk} from
-Mortice Kern Systems for PCs.
-This author was able to make it compile and work under GNU/Linux
+The versions of @command{awk} in @file{/usr/xpg4/bin} and
+@file{/usr/xpg6/bin} on Solaris are more-or-less POSIX-compliant.
+They are based on the @command{awk} from Mortice Kern Systems for PCs.
+This author was able to make this code compile and work under GNU/Linux
with 1--2 hours of work. Making it more generally portable (using
GNU Autoconf and/or Automake) would take more work, and this
has not been done, at least to our knowledge.
@@ -35890,8 +37289,7 @@ This is an embeddable @command{awk} interpreter derived from
This is a Python module that claims to bring @command{awk}-like
features to Python. See @uref{https://github.com/alecthomas/pawk}
for more information. (This is not related to Nelson Beebe's
-modified version of Brian Kernighan's @command{awk},
-described earlier.)
+modified version of BWK @command{awk}, described earlier.)
@item @w{QSE Awk}
@cindex QSE Awk
@@ -35908,15 +37306,56 @@ under the GPL. It has a large number of extensions over standard
See @uref{http://www.quiktrim.org/QTawk.html} for more information,
including the manual and a download link.
+The project may also be frozen; no new code changes have been made
+since approximately 2008.
+
@item Other Versions
See also the @uref{http://en.wikipedia.org/wiki/Awk_language#Versions_and_implementations,
Wikipedia article}, for information on additional versions.
@end table
+@c ENDOFRANGE awkim
+
+@node Installation summary
+@appendixsec Summary
+
+@itemize @value{BULLET}
+@item
+The @command{gawk} distribution is available from GNU project's main
+distribution site, @code{ftp.gnu.org}. The canonical build recipe is:
+
+@example
+wget http://ftp.gnu.org/gnu/gawk/gawk-@value{VERSION}.@value{PATCHLEVEL}.tar.gz
+tar -xvpzf gawk-@value{VERSION}.@value{PATCHLEVEL}.tar.gz
+cd gawk-@value{VERSION}.@value{PATCHLEVEL}
+./configure && make && make check
+@end example
+
+@item
+@command{gawk} may be built on non-POSIX systems as well. The currently
+supported systems are MS-Windows using DJGPP, MSYS, MinGW and Cygwin,
+@ifclear FOR_PRINT
+OS/2 using EMX,
+@end ifclear
+and both Vax/VMS and OpenVMS.
+Instructions for each system are included in this @value{CHAPTER}.
+
+@item
+Bug reports should be sent via email to @email{bug-gawk@@gnu.org}.
+Bug reports should be in English, and should include the version of @command{gawk},
+how it was compiled, and a short program and @value{DF} which demonstrate
+the problem.
+
+@item
+There are a number of other freely available @command{awk}
+implementations. Many are POSIX compliant; others are less so.
+
+@end itemize
+
@c ENDOFRANGE gligawk
@c ENDOFRANGE ingawk
-@c ENDOFRANGE awkim
+@ifclear FOR_PRINT
@node Notes
@appendix Implementation Notes
@c STARTOFRANGE gawii
@@ -35936,6 +37375,7 @@ maintainers of @command{gawk}. Everything in it applies specifically to
* Implementation Limitations:: Some limitations of the implementation.
* Extension Design:: Design notes about the extension API.
* Old Extension Mechanism:: Some compatibility for old extensions.
+* Notes summary:: Summary of implementation notes.
@end menu
@node Compatibility Mode
@@ -35956,7 +37396,7 @@ is one more option available on the command line:
@table @code
@item -Y
@itemx --parsedebug
-Prints out the parse stack information as the program is being parsed.
+Print out the parse stack information as the program is being parsed.
@end table
This option is intended only for serious @command{gawk} developers
@@ -35980,15 +37420,15 @@ as well as any considerations you should bear in mind.
@command{gawk}.
* New Ports:: Porting @command{gawk} to a new operating
system.
-* Derived Files:: Why derived files are kept in the
- @command{git} repository.
+* Derived Files:: Why derived files are kept in the Git
+ repository.
@end menu
@node Accessing The Source
@appendixsubsec Accessing The @command{gawk} Git Repository
As @command{gawk} is Free Software, the source code is always available.
-@ref{Gawk Distribution}, describes how to get and build the formal,
+@DBREF{Gawk Distribution} describes how to get and build the formal,
released versions of @command{gawk}.
@cindex @command{git} utility
@@ -36005,8 +37445,8 @@ git clone git://git.savannah.gnu.org/gawk.git
@end example
@noindent
-This will clone the @command{gawk} repository. If you are behind a
-firewall that will not allow you to use the Git native protocol, you
+This clones the @command{gawk} repository. If you are behind a
+firewall that does not allow you to use the Git native protocol, you
can still access the repository using:
@example
@@ -36034,7 +37474,7 @@ that has a Git plug-in for working with Git repositories.
You are free to add any new features you like to @command{gawk}.
However, if you want your changes to be incorporated into the @command{gawk}
distribution, there are several steps that you need to take in order to
-make it possible to include your changes:
+make it possible to include them:
@enumerate 1
@item
@@ -36056,8 +37496,9 @@ or @EMAIL{assign@@gnu.org,assign at gnu dot org}.
@item
Get the latest version.
It is much easier for me to integrate changes if they are relative to
-the most recent distributed version of @command{gawk}. If your version of
-@command{gawk} is very old, I may not be able to integrate them at all.
+the most recent distributed version of @command{gawk}, or better yet,
+relative to the latest code in the Git repository. If your version of
+@command{gawk} is very old, I may not be able to integrate your changes at all.
(@xref{Getting},
for information on getting the latest version of @command{gawk}.)
@@ -36084,7 +37525,7 @@ using the traditional ``K&R'' style, particularly as regards to the placement
of braces and the use of TABs. In brief, the coding rules for @command{gawk}
are as follows:
-@itemize @bullet
+@itemize @value{BULLET}
@item
Use ANSI/ISO style (prototype) function headers when defining functions.
@@ -36188,6 +37629,7 @@ not do so, particularly if there are lots of changes.
Include an entry for the @file{ChangeLog} file with your submission.
This helps further minimize the amount of work I have to do,
making it easier for me to accept patches.
+It is simplest if you just make this part of your diff.
@end enumerate
Although this sounds like a lot of work, please remember that while you
@@ -36245,10 +37687,39 @@ A number of the files that come with @command{gawk} are maintained by other
people. Thus, you should not change them
unless it is for a very good reason; i.e., changes are not out of the
question, but changes to these files are scrutinized extra carefully.
-The files are @file{dfa.c}, @file{dfa.h}, @file{getopt1.c}, @file{getopt.c},
-@file{getopt.h}, @file{install-sh}, @file{mkinstalldirs}, @file{regcomp.c},
-@file{regex.c}, @file{regexec.c}, @file{regexex.c}, @file{regex.h},
-@file{regex_internal.c}, and @file{regex_internal.h}.
+The files are
+@file{dfa.c},
+@file{dfa.h},
+@file{getopt.c},
+@file{getopt.h},
+@file{getopt1.c},
+@file{getopt_int.h},
+@file{gettext.h},
+@file{regcomp.c},
+@file{regex.c},
+@file{regex.h},
+@file{regex_internal.c},
+@file{regex_internal.h},
+and
+@file{regexec.c}.
+
+@item
+A number of other files are provided by the GNU
+Autotools (Autoconf, Automake, and GNU @command{gettext}).
+You should not change them either, unless it is for a very
+good reason. The files are
+@file{ABOUT-NLS},
+@file{config.guess},
+@file{config.rpath},
+@file{config.sub},
+@file{depcomp},
+@file{INSTALL},
+@file{install-sh},
+@file{missing},
+@file{mkinstalldirs},
+@file{xalloc.h},
+and
+@file{ylwrap}.
@item
Be willing to continue to maintain the port.
@@ -36299,23 +37770,23 @@ In the code that you supply and maintain, feel free to use a
coding style and brace layout that suits your taste.
@node Derived Files
-@appendixsubsec Why Generated Files Are Kept In @command{git}
+@appendixsubsec Why Generated Files Are Kept In Git
@c STARTOFRANGE gawkgit
-@cindex @command{git}, use of for @command{gawk} source code
+@cindex Git, use of for @command{gawk} source code
@c From emails written March 22, 2012, to the gawk developers list.
-If you look at the @command{gawk} source in the @command{git}
+If you look at the @command{gawk} source in the Git
repository, you will notice that it includes files that are automatically
generated by GNU infrastructure tools, such as @file{Makefile.in} from
-@command{automake} and even @file{configure} from @command{autoconf}.
+Automake and even @file{configure} from Autoconf.
This is different from many Free Software projects that do not store
the derived files, because that keeps the repository less cluttered,
and it is easier to see the substantive changes when comparing versions
and trying to understand what changed between commits.
-However, there are two reasons why the @command{gawk} maintainer
+However, there are several reasons why the @command{gawk} maintainer
likes to have everything in the repository.
First, because it is then easy to reproduce any given version completely,
@@ -36334,11 +37805,10 @@ there a guarantee that we could find that @command{bison} version? Or that
@emph{it} would build?)
If the repository has all the generated files, then it's easy to just check
-them out and build. (Or @emph{easier}, depending upon how far back we go.
-@code{:-)})
+them out and build. (Or @emph{easier}, depending upon how far back we go.)
And that brings us to the second (and stronger) reason why all the files
-really need to be in @command{git}. It boils down to who do you cater
+really need to be in Git. It boils down to who do you cater
to---the @command{gawk} developer(s), or the user who just wants to check
out a version and try it out?
@@ -36347,10 +37817,10 @@ wants it to be possible for any interested @command{awk} user in the
world to just clone the repository, check out the branch of interest and
build it. Without their having to have the correct version(s) of the
autotools.@footnote{There is one GNU program that is (in our opinion)
-severely difficult to bootstrap from the @command{git} repository. For
-example, on the author's old (but still working) PowerPC macintosh with
+severely difficult to bootstrap from the Git repository. For
+example, on the author's old (but still working) PowerPC Macintosh with
Mac OS X 10.5, it was necessary to bootstrap a ton of software, starting
-with @command{git} itself, in order to try to work with the latest code.
+with Git itself, in order to try to work with the latest code.
It's not pleasant, and especially on older systems, it's a big waste
of time.
@@ -36373,18 +37843,26 @@ This is extremely important for the @code{master} and
Further, the @command{gawk} maintainer would argue that it's also
important for the @command{gawk} developers. When he tried to check out
-the @code{xgawk} branch@footnote{A branch created by one of the other
+the @code{xgawk} branch@footnote{A branch (since removed) created by one of the other
developers that did not include the generated files.} to build it, he
couldn't. (No @file{ltmain.sh} file, and he had no idea how to create it,
and that was not the only problem.)
He felt @emph{extremely} frustrated. With respect to that branch,
the maintainer is no different than Jane User who wants to try to build
-@code{gawk-4.0-stable} or @code{master} from the repository.
+@code{gawk-4.1-stable} or @code{master} from the repository.
Thus, the maintainer thinks that it's not just important, but critical,
that for any given branch, the above incantation @emph{just works}.
+@c Added 9/2014:
+A third reason to have all the files is that without them, using @samp{git
+bisect} to try to find the commit that introduced a bug is exceedingly
+difficult. The maintainer tried to do that on another project that
+requires running bootstrapping scripts just to create @command{configure}
+and so on; it was really painful. When the repository is self-contained,
+using @command{git bisect} in it is very easy.
+
@c So - that's my reasoning and philosophy.
What are some of the consequences and/or actions to take?
@@ -36400,29 +37878,29 @@ It's the maintainer's job to merge them and he will deal with it.
@item
He is really good at @samp{git diff x y > /tmp/diff1 ; gvim /tmp/diff1} to
-remove the diffs that aren't of interest in order to review code. @code{:-)}
+remove the diffs that aren't of interest in order to review code.
@end enumerate
@item
It would certainly help if everyone used the same versions of the GNU tools
as he does, which in general are the latest released versions of
-@command{automake},
-@command{autoconf},
+Automake,
+Autoconf,
@command{bison},
and
-@command{gettext}.
+GNU @command{gettext}.
@ignore
-If it would help if I sent out an "I just upgraded to version x.y
-of tool Z" kind of message to this list, I can do that. Up until
+If it would help if I sent out an ``I just upgraded to version x.y
+of tool Z'' kind of message to this list, I can do that. Up until
now it hasn't been a real issue since I'm the only one who's been
dorking with the configuration machinery.
@end ignore
-@enumerate A
-@item
+@c @enumerate A
+@c @item
Installing from source is quite easy. It's how the maintainer worked for years
-under Fedora.
+(and still works).
He had @file{/usr/local/bin} at the front of his @env{PATH} and just did:
@example
@@ -36433,10 +37911,11 @@ cd @var{package}-@var{x}.@var{y}.@var{z}
make install # as root
@end example
-@item
+@c @item
+@ignore
These days the maintainer uses Ubuntu 12.04 which is medium current, but
-he is already doing the above for @command{autoconf}, @command{automake}
-and @command{bison}.
+he is already doing the above for Automake, Autoconf, and @command{bison}.
+@end ignore
@ignore
(C. Rant: Recent Linux versions with GNOME 3 really suck. What
@@ -36444,7 +37923,7 @@ and @command{bison}.
me to Ubuntu, but Ubuntu 11.04 and 11.10 are totally unusable from
a UI perspective. Bleah.)
@end ignore
-@end enumerate
+@c @end enumerate
@ignore
@item
@@ -36460,7 +37939,7 @@ the "real" changes and the second with "everything else needed for
Most of the above was originally written by the maintainer to other
@command{gawk} developers. It raised the objection from one of
the developers ``@dots{} that anybody pulling down the source from
-@command{git} is not an end user.''
+Git is not an end user.''
However, this is not true. There are ``power @command{awk} users''
who can build @command{gawk} (using the magic incantation shown previously)
@@ -36470,10 +37949,10 @@ kept buildable all the time.
It was then suggested that there be a @command{cron} job to create
nightly tarballs of ``the source.'' Here, the problem is that there
are source trees, corresponding to the various branches! So,
-nightly tar balls aren't the answer, especially as the repository can go
+nightly tarballs aren't the answer, especially as the repository can go
for weeks without significant change being introduced.
-Fortunately, the @command{git} server can meet this need. For any given
+Fortunately, the Git server can meet this need. For any given
branch named @var{branchname}, use:
@example
@@ -36533,9 +38012,10 @@ Larry
@author Larry Wall
@end quotation
-The @file{TODO} file in the @command{gawk} Git repository lists possible
-future enhancements. Some of these relate to the source code, and others
-to possible new features. Please see that file for the list.
+The @file{TODO} file in the @code{master} branch of the @command{gawk}
+Git repository lists possible future enhancements. Some of these relate
+to the source code, and others to possible new features. Please see
+that file for the list.
@xref{Additions},
if you are interested in tackling any of the projects listed there.
@@ -36549,7 +38029,7 @@ different limits.
@multitable @columnfractions .40 .60
@headitem Item @tab Limit
@item Characters in a character class @tab 2^(number of bits per byte)
-@item Length of input record @tab @code{MAX_INT }
+@item Length of input record @tab @code{MAX_INT}
@item Length of output record @tab Unlimited
@item Length of source line @tab Unlimited
@item Number of fields in a record @tab @code{MAX_LONG}
@@ -36558,9 +38038,9 @@ different limits.
@item Number of input records total @tab @code{MAX_LONG}
@item Number of pipe redirections @tab min(number of processes per user, number of open files)
@item Numeric values @tab Double-precision floating point (if not using MPFR)
-@item Size of a field @tab @code{MAX_INT }
-@item Size of a literal string @tab @code{MAX_INT }
-@item Size of a printf string @tab @code{MAX_INT }
+@item Size of a field @tab @code{MAX_INT}
+@item Size of a literal string @tab @code{MAX_INT}
+@item Size of a printf string @tab @code{MAX_INT}
@end multitable
@node Extension Design
@@ -36595,7 +38075,7 @@ mechanism was bolted onto the side and was not really well thought out.
The old extension mechanism had several problems:
-@itemize @bullet
+@itemize @value{BULLET}
@item
It depended heavily upon @command{gawk} internals. Any time the
@code{NODE} structure@footnote{A critical central data structure
@@ -36607,8 +38087,8 @@ documentation in this @value{DOCUMENT}, but it was quite minimal.
@item
Being able to call into @command{gawk} from an extension required linker
facilities that are common on Unix-derived systems but that did
-not work on Windows systems; users wanting extensions on Windows
-had to statically link them into @command{gawk}, even though Windows supports
+not work on MS-Windows systems; users wanting extensions on MS-Windows
+had to statically link them into @command{gawk}, even though MS-Windows supports
dynamic loading of shared objects.
@item
@@ -36631,7 +38111,7 @@ project is provided in @ref{gawkextlib}.
Some goals for the new API were:
-@itemize @bullet
+@itemize @value{BULLET}
@item
The API should be independent of @command{gawk} internals. Changes in
@command{gawk} internals should not be visible to the writer of an
@@ -36646,7 +38126,7 @@ The API should enable extensions written in C or C++ to have roughly the
same ``appearance'' to @command{awk}-level code as @command{awk}
functions do. This means that extensions should have:
-@itemize @minus
+@itemize @value{MINUS}
@item
The ability to access function parameters.
@@ -36662,13 +38142,13 @@ in order to loop over all the element in an easy fashion for C code.
@item
The ability to create arrays (including @command{gawk}'s true
-multidimensional arrays).
+arrays of arrays).
@end itemize
@end itemize
Some additional important goals were:
-@itemize @bullet
+@itemize @value{BULLET}
@item
The API should use only features in ISO C 90, so that extensions
can be written using the widest range of C and C++ compilers. The header
@@ -36683,15 +38163,15 @@ The API mechanism should not require access to @command{gawk}'s
symbols@footnote{The @dfn{symbols} are the variables and functions
defined inside @command{gawk}. Access to these symbols by code
external to @command{gawk} loaded dynamically at runtime is
-problematic on Windows.} by the compile-time or dynamic linker,
-in order to enable creation of extensions that also work on Windows.
+problematic on MS-Windows.} by the compile-time or dynamic linker,
+in order to enable creation of extensions that also work on MS-Windows.
@end itemize
During development, it became clear that there were other features
that should be available to extensions, which were also subsequently
provided:
-@itemize @bullet
+@itemize @value{BULLET}
@item
Extensions should have the ability to hook into @command{gawk}'s
I/O redirection mechanism. In particular, the @command{xgawk}
@@ -36702,7 +38182,7 @@ two-way I/O.
@item
An extension should be able to provide a ``call back'' function
-to perform clean up actions when @command{gawk} exits.
+to perform cleanup actions when @command{gawk} exits.
@item
An extension should be able to provide a version string so that
@@ -36772,7 +38252,7 @@ to provide a minimal yet powerful set of features for creating extensions.
The API can later be expanded, in two ways:
-@itemize @bullet
+@itemize @value{BULLET}
@item
@command{gawk} passes an ``extension id'' into the extension when it
first loads the extension. The extension then passes this id back
@@ -36795,12 +38275,12 @@ to any of the above.
@ref{Dynamic Extensions}, describes the supported API and mechanisms
for writing extensions for @command{gawk}. This API was introduced
-in version 4.1. However, for many years @command{gawk}
+in @value{PVERSION} 4.1. However, for many years @command{gawk}
provided an extension mechanism that required knowledge of @command{gawk}
internals and that was not as well designed.
-In order to provide a transition period, @command{gawk} version
-4.1 continues to support the original extension mechanism.
+In order to provide a transition period, @command{gawk} @value{PVERSION} 4.1
+continues to support the original extension mechanism.
This will be true for the life of exactly one major release. This support
will be withdrawn, and removed from the source code, at the next major
release.
@@ -36826,6 +38306,42 @@ The @command{gawk} development team strongly recommends that you
convert any old extensions that you may have to use the new API
described in @ref{Dynamic Extensions}.
+@node Notes summary
+@appendixsec Summary
+
+@itemize @value{BULLET}
+@item
+@command{gawk}'s extensions can be disabled with either the
+@option{--traditional} option or with the @option{--posix} option.
+The @option{--parsedebug} option is available if @command{gawk} is
+compiled with @samp{-DDEBUG}.
+
+@item
+The source code for @command{gawk} is maintained in a publicly
+accessible Git repository. Anyone may check it out and view the source.
+
+@item
+Contributions to @command{gawk} are welcome. Following the steps
+outlined in this @value{CHAPTER} will make it easier to integrate
+your contributions into the code base.
+This applies both to new feature contributions and to ports to
+additional operating systems.
+
+@item
+@command{gawk} has some limits---generally those that are imposed by
+the machine architecture.
+
+@item
+The extension API design was intended to solve a number of problems
+with the previous extension mechanism, enable features needed by
+the @code{xgawk} project, and provide binary compatibility going forward.
+
+@item
+The previous extension mechanism is still supported in @value{PVERSION} 4.1
+of @command{gawk}, but it @emph{will} be removed in the next major release.
+
+@end itemize
+
@c ENDOFRANGE impis
@c ENDOFRANGE gawii
@@ -36874,9 +38390,11 @@ See @inlineraw{docbook, <xref linkend="figure-general-flow"/>}.
@end ifnotdocbook
@docbook
-<figure id="figure-general-flow">
+<figure id="figure-general-flow" float="0">
<title>General Program Flow</title>
-<graphic fileref="general-program.eps"/>
+<mediaobject>
+<imageobject role="web"><imagedata fileref="general-program.png" format="PNG"/></imageobject>
+</mediaobject>
</figure>
@end docbook
@@ -36899,7 +38417,7 @@ of the following, very basic set of steps,
as shown in @ref{figure-process-flow}:
@end ifnotdocbook
@ifdocbook
-as shown in @inlineraw{docbook <xref linkend="figure-process-flow"/>}:
+as shown in @inlineraw{docbook, <xref linkend="figure-process-flow"/>}:
@end ifdocbook
@ifnotdocbook
@@ -36915,9 +38433,11 @@ as shown in @inlineraw{docbook <xref linkend="figure-process-flow"/>}:
@end ifnotdocbook
@docbook
-<figure id="figure-process-flow">
+<figure id="figure-process-flow" float="0">
<title>Basic Program Stages</title>
-<graphic fileref="process-flow.eps"/>
+<mediaobject>
+<imageobject role="web"><imagedata fileref="process-flow.png" format="PNG"/></imageobject>
+</mediaobject>
</figure>
@end docbook
@@ -37015,7 +38535,7 @@ Individual variables, as well as numeric and string variables, are
referred to as @dfn{scalar} values.
Groups of values, such as arrays, are not scalars.
-@ref{General Arithmetic}, provided a basic introduction to numeric
+@ref{Computer Arithmetic}, provided a basic introduction to numeric
types (integer and floating-point) and how they are used in a computer.
Please review that information, including a number of caveats that
were presented.
@@ -37031,14 +38551,14 @@ like this: @code{""}.
Humans are used to working in decimal; i.e., base 10. In base 10,
numbers go from 0 to 9, and then ``roll over'' into the next
-column. (Remember grade school? 42 is 4 times 10 plus 2.)
+column. (Remember grade school? 42 = 4 x 10 + 2.)
There are other number bases though. Computers commonly use base 2
or @dfn{binary}, base 8 or @dfn{octal}, and base 16 or @dfn{hexadecimal}.
In binary, each column represents two times the value in the column to
its right. Each column may contain either a 0 or a 1.
-Thus, binary 1010 represents 1 times 8, plus 0 times 4, plus 1 times 2,
-plus 0 times 1, or decimal 10.
+Thus, binary 1010 represents (1 x 8) + (0 x 4) + (1 x 2)
++ (0 x 1), or decimal 10.
Octal and hexadecimal are discussed more in
@ref{Nondecimal-numbers}.
@@ -37075,7 +38595,7 @@ Where it makes sense, POSIX @command{awk} is compatible with 1999 ISO C.
@item Action
A series of @command{awk} statements attached to a rule. If the rule's
pattern matches an input record, @command{awk} executes the
-rule's action. Actions are always enclosed in curly braces.
+rule's action. Actions are always enclosed in braces.
(@xref{Action Overview}.)
@cindex Spencer, Henry
@@ -37180,7 +38700,7 @@ Named after the English mathematician Boole. See also ``Logical Expression.''
@item Bourne Shell
The standard shell (@file{/bin/sh}) on Unix and Unix-like systems,
-originally written by Steven R.@: Bourne.
+originally written by Steven R.@: Bourne at Bell Laboratories.
Many shells (Bash, @command{ksh}, @command{pdksh}, @command{zsh}) are
generally upwardly compatible with the Bourne shell.
@@ -37230,7 +38750,9 @@ Changing some of them affects @command{awk}'s running environment.
(@xref{Built-in Variables}.)
@item Braces
-See ``Curly Braces.''
+The characters @samp{@{} and @samp{@}}. Braces are used in
+@command{awk} for delimiting actions, compound statements, and function
+bodies.
@item C
The system programming language that most GNU software is written in. The
@@ -37255,7 +38777,7 @@ or place. The most common character set in use today is ASCII (American
Standard Code for Information Interchange). Many European
countries use an extension of ASCII known as ISO-8859-1 (ISO Latin-1).
The @uref{http://www.unicode.org, Unicode character set} is
-becoming increasingly popular and standard, and is particularly
+increasingly popular and standard, and is particularly
widely used on GNU/Linux systems.
@cindex Kernighan, Brian
@@ -37268,10 +38790,11 @@ It was written in @command{awk}
by Brian Kernighan and Jon Bentley, and is available from
@uref{http://netlib.sandia.gov/netlib/typesetting/chem.gz}.
+@cindex McIlroy, Doug
@cindex cookie
@item Cookie
A peculiar goodie, token, saying or remembrance
-produced by or presented to a program. (With thanks to Doug McIlroy.)
+produced by or presented to a program. (With thanks to Professor Doug McIlroy.)
@ignore
From: Doug McIlroy <doug@cs.dartmouth.edu>
Date: Sat, 13 Oct 2012 19:55:25 -0400
@@ -37349,9 +38872,7 @@ statements, and in patterns to select which input records to process.
(@xref{Typing and Comparison}.)
@item Curly Braces
-The characters @samp{@{} and @samp{@}}. Curly braces are used in
-@command{awk} for delimiting actions, compound statements, and function
-bodies.
+See ``Braces.''
@cindex dark corner
@item Dark Corner
@@ -37396,7 +38917,7 @@ ordinary expression. It could be a string constant, such as
(@xref{Computed Regexps}.)
@item Environment
-A collection of strings, of the form @var{name}@code{=}@code{val}, that each
+A collection of strings, of the form @samp{@var{name}=@var{val}}, that each
program has available to it. Users generally place values into the
environment in order to provide information to various programs. Typical
examples are the environment variables @env{HOME} and @env{PATH}.
@@ -37450,8 +38971,8 @@ this is just a number that can have a fractional part.
See also ``Double Precision'' and ``Single Precision.''
@item Format
-Format strings are used to control the appearance of output in the
-@code{strftime()} and @code{sprintf()} functions, and are used in the
+Format strings control the appearance of output in the
+@code{strftime()} and @code{sprintf()} functions, and in the
@code{printf} statement as well. Also, data conversions from numbers to strings
are controlled by the format strings contained in the built-in variables
@code{CONVFMT} and @code{OFMT}. (@xref{Control Letters}.)
@@ -37520,7 +39041,7 @@ Base 16 notation, where the digits are @code{0}--@code{9} and
@code{A}--@code{F}, with @samp{A}
representing 10, @samp{B} representing 11, and so on, up to @samp{F} for 15.
Hexadecimal numbers are written in C using a leading @samp{0x},
-to indicate their base. Thus, @code{0x12} is 18 (1 times 16 plus 2).
+to indicate their base. Thus, @code{0x12} is 18 ((1 x 16) + 2).
@xref{Nondecimal-numbers}.
@item I/O
@@ -37594,8 +39115,8 @@ meaning. Keywords are reserved and may not be used as variable names.
@code{function},
@code{func},
@code{if},
-@code{nextfile},
@code{next},
+@code{nextfile},
@code{switch},
and
@code{while}.
@@ -37656,13 +39177,9 @@ Ancient @command{awk} implementations used single precision floating-point.
@item Octal
Base-eight notation, where the digits are @code{0}--@code{7}.
Octal numbers are written in C using a leading @samp{0},
-to indicate their base. Thus, @code{013} is 11 (one times 8 plus 3).
+to indicate their base. Thus, @code{013} is 11 ((1 x 8) + 3).
@xref{Nondecimal-numbers}.
-@cindex P1003.1 POSIX standard
-@item P1003.1
-See ``POSIX.''
-
@item Pattern
Patterns tell @command{awk} which input records are interesting to which
rules.
@@ -37703,8 +39220,8 @@ specify single lines. (@xref{Pattern Overview}.)
@item Recursion
When a function calls itself, either directly or indirectly.
-As long as this is not clear, refer to the entry for ``recursion.''
If this is clear, stop, and proceed to the next entry.
+Otherwise, refer to the entry for ``recursion.''
@item Redirection
Redirection means performing input from something other than the standard input
@@ -37783,14 +39300,14 @@ expressions, and function calls have side effects.
An internal representation of numbers that can have fractional parts.
Single precision numbers keep track of fewer digits than do double precision
numbers, but operations on them are sometimes less expensive in terms of CPU time.
-This is the type used by some very old versions of @command{awk} to store
+This is the type used by some ancient versions of @command{awk} to store
numeric values. It is the C type @code{float}.
@item Space
The character generated by hitting the space bar on the keyboard.
@item Special File
-A file name interpreted internally by @command{gawk}, instead of being handed
+A @value{FN} interpreted internally by @command{gawk}, instead of being handed
directly to the underlying operating system---for example, @file{/dev/stderr}.
(@xref{Special Files}.)
@@ -37820,7 +39337,7 @@ into the local language.
A value in the ``seconds since the epoch'' format used by Unix
and POSIX systems. Used for the @command{gawk} functions
@code{mktime()}, @code{strftime()}, and @code{systime()}.
-See also ``Epoch'' and ``UTC.''
+See also ``Epoch,'' ``GMT,'' and ``UTC.''
@cindex Linux
@cindex GNU/Linux
@@ -37849,6 +39366,8 @@ A sequence of space, TAB, or newline characters occurring inside an input
record or a string.
@end table
+@end ifclear
+
@c The GNU General Public License.
@node Copying
@unnumbered GNU General Public License
@@ -38578,7 +40097,7 @@ applications with the library. If this is what you want to do, use
the GNU Lesser General Public License instead of this License. But
first, please read @url{http://www.gnu.org/philosophy/why-not-lgpl.html}.
-
+@ifclear FOR_PRINT
@c The GNU Free Documentation License.
@node GNU Free Documentation License
@unnumbered GNU Free Documentation License
@@ -39093,9 +40612,7 @@ recommend releasing these examples in parallel under your choice of
free software license, such as the GNU General Public License,
to permit their use in free software.
-@c Local Variables:
-@c ispell-local-pdict: "ispell-dict"
-@c End:
+@end ifclear
@ifnotdocbook
@node Index
@@ -39139,13 +40656,14 @@ Consistency issues:
Use "zeros" instead of "zeroes".
Use "nonzero" not "non-zero".
Use "runtime" not "run time" or "run-time".
- Use "command-line" not "command line".
+ Use "command-line" as an adjective and "command line" as a noun.
Use "online" not "on-line".
Use "whitespace" not "white space".
Use "Input/Output", not "input/output". Also "I/O", not "i/o".
Use "lefthand"/"righthand", not "left-hand"/"right-hand".
Use "workaround", not "work-around".
Use "startup"/"cleanup", not "start-up"/"clean-up"
+ Use "filesystem", not "file system"
Use @code{do}, and not @code{do}-@code{while}, except where
actually discussing the do-while.
Use "versus" in text and "vs." in index entries
@@ -39160,8 +40678,6 @@ Consistency issues:
The numbers zero through ten should be spelled out, except when
talking about file descriptor numbers. > 10 and < 0, it's
ok to use numbers.
- In tables, put command-line options in @code, while in the text,
- put them in @option.
For most cases, do NOT put a comma before "and", "or" or "but".
But exercise taste with this rule.
Don't show the awk command with a program in quotes when it's
@@ -39208,9 +40724,6 @@ ORA uses filename, thus the macro.
Suggestions:
------------
-% Next edition:
-% 1. Standardize the error messages from the functions and programs
-% in the two sample code chapters.
Better sidebars can almost sort of be done with:
@@ -39242,3 +40755,6 @@ But to use it you have to say
}
which sorta sucks.
+
+TODO:
+-----
diff --git a/double.c b/double.c
index 6390443f..cfb2fbf9 100644
--- a/double.c
+++ b/double.c
@@ -141,6 +141,7 @@ awknum_init(bltin_t **numbr_bltins)
{ "atan2", do_atan2 },
{ "compl", do_compl },
{ "cos", do_cos },
+ { "div", do_div },
{ "exp", do_exp },
{ "int", do_int },
{ "log", do_log },
@@ -1340,6 +1341,72 @@ do_strtonum(int nargs)
return make_awknum(d);
}
+/* do_div --- do integer division, return quotient and remainder in dest array */
+
+/*
+ * We define the semantics as:
+ * numerator = int(numerator)
+ * denominator = int(denonmator)
+ * quotient = int(numerator / denomator)
+ * remainder = int(numerator % denomator)
+ */
+
+NODE *
+do_div(int nargs)
+{
+ NODE *numerator, *denominator, *result;
+ double num, denom, quotient, remainder;
+ NODE *sub, **lhs;
+
+ result = POP_PARAM();
+ if (result->type != Node_var_array)
+ fatal(_("div: third argument is not an array"));
+ assoc_clear(result);
+
+ denominator = POP_SCALAR();
+ numerator = POP_SCALAR();
+
+ if (do_lint) {
+ if ((numerator->flags & (NUMCUR|NUMBER)) == 0)
+ lintwarn(_("div: received non-numeric first argument"));
+ if ((denominator->flags & (NUMCUR|NUMBER)) == 0)
+ lintwarn(_("div: received non-numeric second argument"));
+ }
+
+ (void) force_number(numerator);
+ (void) force_number(denominator);
+ num = double_to_int(get_number_d(numerator));
+ denom = double_to_int(get_number_d(denominator));
+
+ if (denom == 0.0)
+ fatal(_("div: division by zero attempted"));
+
+ quotient = double_to_int(num / denom);
+ /*
+ * FIXME: This code is duplicated, factor it out to a
+ * separate function.
+ */
+#ifdef HAVE_FMOD
+ remainder = fmod(num, denom);
+#else /* ! HAVE_FMOD */
+ (void) modf(num / denom, & remainder);
+ remainder = num - remainder * denom;
+#endif /* ! HAVE_FMOD */
+ remainder = double_to_int(remainder);
+
+ sub = make_string("quotient", 8);
+ lhs = assoc_lookup(result, sub);
+ unref(*lhs);
+ *lhs = make_number((AWKNUM) quotient);
+
+ sub = make_string("remainder", 9);
+ lhs = assoc_lookup(result, sub);
+ unref(*lhs);
+ *lhs = make_number((AWKNUM) remainder);
+
+ return make_number((AWKNUM) 0.0);
+}
+
/* format_awknum_printf --- format a number for (s)printf */
static int
diff --git a/eval.c b/eval.c
index 978941fb..7bca21a4 100644
--- a/eval.c
+++ b/eval.c
@@ -217,6 +217,7 @@ load_casetable(void)
return;
#ifndef ZOS_USS
+ /* use of isalpha is ok here (see is_alpha in awkgram.y) */
for (i = 0200; i <= 0377; i++) {
if (isalpha(i) && islower(i) && i != toupper(i))
casetable[i] = toupper(i);
@@ -242,6 +243,7 @@ static const char *const nodetypes[] = {
"Node_func",
"Node_ext_func",
"Node_old_ext_func",
+ "Node_builtin_func",
"Node_array_ref",
"Node_array_tree",
"Node_array_leaf",
@@ -799,9 +801,35 @@ set_BINMODE()
void
set_OFS()
{
+ static bool first = true;
+ size_t new_ofs_len;
+
+ if (first) /* true when called from init_vars() in main() */
+ first = false;
+ else {
+ /* rebuild $0 using OFS that was current when $0 changed */
+ if (! field0_valid) {
+ get_field(UNLIMITED - 1, NULL);
+ rebuild_record();
+ }
+ }
+
+ /*
+ * Save OFS value for use in building record and in printing.
+ * Can't just have OFS point into the OFS_node since it's
+ * already updated when we come into this routine, and we need
+ * the old value to rebuild the record (see above).
+ */
OFS_node->var_value = force_string(OFS_node->var_value);
- OFS = OFS_node->var_value->stptr;
- OFSlen = OFS_node->var_value->stlen;
+ new_ofs_len = OFS_node->var_value->stlen;
+
+ if (OFS == NULL)
+ emalloc(OFS, char *, new_ofs_len + 2, "set_OFS");
+ else if (OFSlen < new_ofs_len)
+ erealloc(OFS, char *, new_ofs_len + 2, "set_OFS");
+
+ memcpy(OFS, OFS_node->var_value->stptr, OFS_node->var_value->stlen);
+ OFSlen = new_ofs_len;
OFS[OFSlen] = '\0';
}
diff --git a/ext.c b/ext.c
index 09e10164..afb8d715 100644
--- a/ext.c
+++ b/ext.c
@@ -46,33 +46,9 @@ extern SRCFILE *srcfiles;
static bool
is_letter(unsigned char c)
{
- switch (c) {
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
- case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
- case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
- case 's': case 't': case 'u': case 'v': case 'w': case 'x':
- case 'y': case 'z':
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
- case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
- case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
- case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
- case 'Y': case 'Z':
- case '_':
- return true;
- default:
- return false;
- }
+ return (is_alpha(c) || c == '_');
}
-/* is_identifier_char --- return true if a character can be used in an identifier */
-
-static bool
-is_identifier_char(unsigned char c)
-{
- return (is_letter(c) || isdigit(c));
-}
-
-
#define INIT_FUNC "dl_load"
/* load_ext --- load an external library */
@@ -224,7 +200,7 @@ make_builtin(const awk_ext_func_t *funcinfo)
return awk_false;
for (sp++; (c = *sp++) != '\0';) {
- if (! is_identifier_char(c))
+ if (! is_identchar(c))
return awk_false;
}
@@ -279,7 +255,7 @@ make_old_builtin(const char *name, NODE *(*func)(int), int count) /* temporary *
fatal(_("extension: illegal character `%c' in function name `%s'"), *sp, name);
for (sp++; (c = *sp++) != '\0';) {
- if (! is_identifier_char(c))
+ if (! is_identchar(c))
fatal(_("extension: illegal character `%c' in function name `%s'"), c, name);
}
diff --git a/extension/CMakeLists.txt b/extension/CMakeLists.txt
new file mode 100644
index 00000000..1bb4ceb1
--- /dev/null
+++ b/extension/CMakeLists.txt
@@ -0,0 +1,84 @@
+#
+# extension/CMakeLists.txt --- CMake input file for gawk
+#
+# Copyright (C) 2013
+# the Free Software Foundation, Inc.
+#
+# This file is part of GAWK, the GNU implementation of the
+# AWK Programming Language.
+#
+# GAWK is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# GAWK is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+#
+
+## process this file with CMake to produce Makefile
+
+# Remove the definition of GAWK because of gawkapi.h.
+remove_definitions(-DGAWK)
+
+MACRO(BuildExtension name sources)
+ add_library (${name} MODULE ${sources} ${ARGN})
+ target_link_libraries(${name} ${EXTRA_LIBS})
+ set_target_properties(${name} PROPERTIES PREFIX "")
+ install(PROGRAMS ${CMAKE_BINARY_DIR}/extension/${name}${CMAKE_SHARED_LIBRARY_SUFFIX} DESTINATION lib)
+ENDMACRO(BuildExtension)
+
+if (${HAVE_STRUCT_STAT_ST_BLKSIZE})
+ BuildExtension(filefuncs filefuncs.c stack.c gawkfts.c)
+else()
+ message(STATUS "extension filefuncs cannot be built because HAVE_STRUCT_STAT_ST_BLKSIZE is missing")
+endif()
+
+if (HAVE_FNMATCH AND HAVE_FNMATCH_H)
+ BuildExtension(fnmatch fnmatch.c)
+else()
+ message(STATUS "extension fnmatch cannot be built because function fnmatch or fnmatch.h is missing")
+endif()
+
+if (${HAVE_SYS_WAIT_H})
+ BuildExtension(fork fork.c)
+else()
+ message(STATUS "extension fork cannot be built because HAVE_SYS_WAIT_H is missing")
+endif()
+
+if (${HAVE_MKSTEMP})
+ BuildExtension(inplace inplace.c)
+else()
+ message(STATUS "extension inplace cannot be built because HAVE_MKSTEMP is missing")
+endif()
+
+BuildExtension(ordchr ordchr.c)
+
+if (HAVE_DIRENT_H AND HAVE_DIRFD)
+ BuildExtension(readdir readdir.c)
+else()
+ message(STATUS "extension readdir cannot be built because function readdir is missing")
+endif()
+
+BuildExtension(readfile readfile.c)
+
+BuildExtension(revoutput revoutput.c)
+
+if (${HAVE_GETDTABLESIZE})
+ BuildExtension(revtwoway revtwoway.c)
+else()
+ message(STATUS "extension revtwoway cannot be built because function getdtablesize is missing")
+endif()
+
+BuildExtension(rwarray rwarray.c)
+
+BuildExtension(time time.c)
+
+BuildExtension(testext testext.c)
+
diff --git a/extension/ChangeLog b/extension/ChangeLog
index f3a1c7a8..f324bdeb 100644
--- a/extension/ChangeLog
+++ b/extension/ChangeLog
@@ -1,3 +1,17 @@
+2014-08-12 Arnold D. Robbins <arnold@skeeve.com>
+
+ * Makefile.am (RM): Define for makes that don't have it,
+ such as on OpenBSD. Thanks to Jeremie Courreges-Anglas
+ <jca@wxcvbn.org> for the report.
+
+2014-06-13 Paul Gortmaker <paul.gortmaker@windriver.com>
+
+ * Makefile.am (uninstall-so): Came across below bug while cross
+ compiling, and changed both install-data-hook and uninstall-so
+ to use $(DESTDIR) on v4.1.1 before seeing most of the fix in
+ gawk-4.1.1-3-g976f73ab0356; here we ensure uninstall-so also
+ uses the $(DESTDIR) prefix on its use of pkgextensiondir.
+
2014-04-11 Arnold D. Robbins <arnold@skeeve.com>
* Makefile.am (install-data-hook): Use $(DESTDIR) when removing
diff --git a/extension/Makefile.am b/extension/Makefile.am
index 11826e2b..e6678c54 100644
--- a/extension/Makefile.am
+++ b/extension/Makefile.am
@@ -29,6 +29,9 @@ AM_CPPFLAGS = -I$(srcdir)/..
# correctly after changing configure.ac
ACLOCAL_AMFLAGS = -I m4
+# For some make's, e.g. OpenBSD, that don't define this
+RM = rm -f
+
# Note: rwarray does not currently compile.
pkgextension_LTLIBRARIES = \
@@ -105,7 +108,7 @@ install-data-hook:
# Keep the uninstall check working:
uninstall-so:
- $(RM) $(pkgextensiondir)/*.so
+ $(RM) $(DESTDIR)$(pkgextensiondir)/*.so
uninstall-recursive: uninstall-so
diff --git a/extension/Makefile.in b/extension/Makefile.in
index d81b1696..46168e4e 100644
--- a/extension/Makefile.in
+++ b/extension/Makefile.in
@@ -513,6 +513,9 @@ AM_CPPFLAGS = -I$(srcdir)/..
# correctly after changing configure.ac
ACLOCAL_AMFLAGS = -I m4
+# For some make's, e.g. OpenBSD, that don't define this
+RM = rm -f
+
# Note: rwarray does not currently compile.
pkgextension_LTLIBRARIES = \
filefuncs.la \
@@ -1236,7 +1239,7 @@ install-data-hook:
# Keep the uninstall check working:
uninstall-so:
- $(RM) $(pkgextensiondir)/*.so
+ $(RM) $(DESTDIR)$(pkgextensiondir)/*.so
uninstall-recursive: uninstall-so
diff --git a/field.c b/field.c
index 67a34fcd..ffbfc6de 100644
--- a/field.c
+++ b/field.c
@@ -40,7 +40,6 @@ typedef void (* Setfunc)(long, char *, long, NODE *);
static long (*parse_field)(long, char **, int, NODE *,
Regexp *, Setfunc, NODE *, NODE *, bool);
-static void rebuild_record(void);
static long re_parse_field(long, char **, int, NODE *,
Regexp *, Setfunc, NODE *, NODE *, bool);
static long def_parse_field(long, char **, int, NODE *,
@@ -140,7 +139,7 @@ set_field(long num,
/* rebuild_record --- Someone assigned a value to $(something).
Fix up $0 to be right */
-static void
+void
rebuild_record()
{
/*
@@ -148,9 +147,7 @@ rebuild_record()
* a size_t isn't big enough.
*/
unsigned long tlen;
- unsigned long ofslen;
NODE *tmp;
- NODE *ofs;
char *ops;
char *cops;
long i;
@@ -158,14 +155,12 @@ rebuild_record()
assert(NF != -1);
tlen = 0;
- ofs = force_string(OFS_node->var_value);
- ofslen = ofs->stlen;
for (i = NF; i > 0; i--) {
tmp = fields_arr[i];
tmp = force_string(tmp);
tlen += tmp->stlen;
}
- tlen += (NF - 1) * ofslen;
+ tlen += (NF - 1) * OFSlen;
if ((long) tlen < 0)
tlen = 0;
emalloc(ops, char *, tlen + 2, "rebuild_record");
@@ -183,11 +178,11 @@ rebuild_record()
}
/* copy OFS */
if (i != NF) {
- if (ofslen == 1)
- *cops++ = ofs->stptr[0];
- else if (ofslen != 0) {
- memcpy(cops, ofs->stptr, ofslen);
- cops += ofslen;
+ if (OFSlen == 1)
+ *cops++ = *OFS;
+ else if (OFSlen != 0) {
+ memcpy(cops, OFS, OFSlen);
+ cops += OFSlen;
}
}
}
@@ -227,7 +222,7 @@ rebuild_record()
fields_arr[i] = n;
assert((n->flags & WSTRCUR) == 0);
}
- cops += fields_arr[i]->stlen + ofslen;
+ cops += fields_arr[i]->stlen + OFSlen;
}
unref(fields_arr[0]);
diff --git a/format.c b/format.c
index c1c16f43..2322d563 100644
--- a/format.c
+++ b/format.c
@@ -601,13 +601,29 @@ check_pos:
size_t count;
memset(& mbs, 0, sizeof(mbs));
+
+ /* handle systems with too small wchar_t */
+ if (sizeof(wchar_t) < 4 && uval > 0xffff) {
+ if (do_lint)
+ lintwarn(
+ _("[s]printf: value %g is too big for %%c format"),
+ arg->numbr);
+
+ goto out0;
+ }
+
wc = uval;
count = wcrtomb(buf, wc, & mbs);
if (count == 0
- || count == (size_t)-1
- || count == (size_t)-2)
+ || count == (size_t) -1) {
+ if (do_lint)
+ lintwarn(
+ _("[s]printf: value %g is not a valid wide character"),
+ arg->numbr);
+
goto out0;
+ }
memcpy(CPBUF, buf, count);
spec.prec = count;
@@ -619,10 +635,6 @@ out0:
/* else,
fall through */
#endif
- if (do_lint && uval > 255) {
- lintwarn("[s]printf: value %g is too big for %%c format",
- get_number_d(arg));
- }
CPBUF[0] = uval;
spec.prec = 1;
cp = CPBUF;
@@ -647,7 +659,7 @@ out0:
memset(& state, 0, sizeof(state));
count = mbrlen(cp, arg->stlen, & state);
- if (count > 0) {
+ if (count != (size_t) -1 && count != (size_t) -2 && count > 0) {
spec.prec = count;
/* may need to increase fw so that padding happens, see pr_tail code */
if (spec.fw > 0)
diff --git a/gawkapi.h b/gawkapi.h
index 5ccadc21..090cf797 100644
--- a/gawkapi.h
+++ b/gawkapi.h
@@ -142,8 +142,8 @@ typedef struct awk_input {
* so there is no need to set it unless an error occurs.
*
* If an error does occur, the function should return EOF and set
- * *errcode to a non-zero value. In that case, if *errcode does not
- * equal -1, gawk will automatically update the ERRNO variable based
+ * *errcode to a positive value. In that case, if *errcode is greater
+ * than zero, gawk will automatically update the ERRNO variable based
* on the value of *errcode (e.g., setting *errcode = errno should do
* the right thing).
*/
diff --git a/helpers/ChangeLog b/helpers/ChangeLog
index 17624c3f..a5bbafb1 100644
--- a/helpers/ChangeLog
+++ b/helpers/ChangeLog
@@ -1,3 +1,11 @@
+2014-09-04 Arnold D. Robbins <arnold@skeeve.com>
+
+ * chlistref.awk: New file. Finds @ref{} to non-chapters.
+
+2014-06-08 Arnold D. Robbins <arnold@skeeve.com>
+
+ * testdfa.c: Minor improvements.
+
2014-04-08 Arnold D. Robbins <arnold@skeeve.com>
* 4.1.1: Release tar ball made.
diff --git a/helpers/chlistref.awk b/helpers/chlistref.awk
new file mode 100644
index 00000000..49f63f59
--- /dev/null
+++ b/helpers/chlistref.awk
@@ -0,0 +1,31 @@
+BEGIN {
+ chapters["Getting Started"]++
+ chapters["Invoking Gawk"]++
+ chapters["Regexp"]++
+ chapters["Reading Files"]++
+ chapters["Printing"]++
+ chapters["Expressions"]++
+ chapters["Patterns and Actions"]++
+ chapters["Arrays"]++
+ chapters["Functions"]++
+ chapters["Library Functions"]++
+ chapters["Sample Programs"]++
+ chapters["Advanced Features"]++
+ chapters["Internationalization"]++
+ chapters["Debugger"]++
+ chapters["Arbitrary Precision Arithmetic"]++
+ chapters["Dynamic Extensions"]++
+ chapters["Language History"]++
+ chapters["Installation"]++
+ chapters["Notes"]++
+ chapters["Basic Concepts"]++
+
+ Pattern = ".*@ref\\{([^}]+)\\},.*"
+}
+
+$0 ~ Pattern {
+ ref = gensub(Pattern, "\\1", 1, $0)
+ if (! (ref in chapters))
+ printf("%s:%d: %s\n", FILENAME, FNR, $0)
+}
+
diff --git a/helpers/testdfa.c b/helpers/testdfa.c
index 813acaab..25a229a2 100644
--- a/helpers/testdfa.c
+++ b/helpers/testdfa.c
@@ -40,14 +40,16 @@
#include <sys/stat.h>
+#undef _Noreturn
#define _Noreturn
+#define _GL_ATTRIBUTE_PURE
#include "dfa.h"
const char *regexflags2str(int flags);
char *databuf(int fd);
const char * reflags2str(int flagval);
int parse_escape(const char **string_ptr);
-char *setup_pattern(const char *pattern, size_t len);
+char *setup_pattern(const char *pattern, size_t *len);
char casetable[];
reg_syntax_t syn;
@@ -126,10 +128,10 @@ int main(int argc, char **argv)
printf("Ignorecase: %s\nSyntax: %s\n",
(ignorecase ? "true" : "false"),
reflags2str(syn));
- printf("Pattern: /%s/\n", pattern);
+ printf("Pattern: /%s/, len = %d\n", pattern, len);
- pattern = setup_pattern(pattern, len);
- len = strlen(pattern);
+ pattern = setup_pattern(pattern, & len);
+ printf("After setup_pattern(), len = %d\n", len);
pat.fastmap = (char *) malloc(256);
if (pat.fastmap == NULL) {
@@ -191,7 +193,10 @@ int main(int argc, char **argv)
&count, &try_backref);
data[len] = save;
- printf("dfaexec returned %p (%.3s)\n", place, place);
+ if (place == NULL)
+ printf("dfaexec returned NULL\n");
+ else
+ printf("dfaexec returned %d (%.3s)\n", place - data, place);
/* release storage */
regfree(& pat);
@@ -363,7 +368,7 @@ r_fatal(const char *mesg, ...)
/* setup_pattern --- do what gawk does with the pattern string */
char *
-setup_pattern(const char *pattern, size_t len)
+setup_pattern(const char *pattern, size_t *len)
{
size_t is_multibyte = 0;
int c, c2;
@@ -377,7 +382,7 @@ setup_pattern(const char *pattern, size_t len)
memset(& mbs, 0, sizeof(mbs));
src = pattern;
- end = pattern + len;
+ end = pattern + *len;
/* Handle escaped characters first. */
@@ -387,19 +392,19 @@ setup_pattern(const char *pattern, size_t len)
* from that.
*/
if (buf == NULL) {
- buf = (char *) malloc(len + 2);
+ buf = (char *) malloc(*len + 2);
if (buf == NULL) {
fprintf(stderr, "%s: malloc failed\n", __func__);
exit(EXIT_FAILURE);
}
- buflen = len;
- } else if (len > buflen) {
- buf = (char *) realloc(buf, len + 2);
+ buflen = *len;
+ } else if (*len > buflen) {
+ buf = (char *) realloc(buf, *len + 2);
if (buf == NULL) {
fprintf(stderr, "%s: realloc failed\n", __func__);
exit(EXIT_FAILURE);
}
- buflen = len;
+ buflen = *len;
}
dest = buf;
@@ -487,7 +492,7 @@ setup_pattern(const char *pattern, size_t len)
} /* while */
*dest = '\0';
- len = dest - buf;
+ *len = dest - buf;
return buf;
}
diff --git a/interpret.h b/interpret.h
index 6cb39699..e84c7b36 100644
--- a/interpret.h
+++ b/interpret.h
@@ -972,10 +972,44 @@ match_re:
f = lookup(t1->stptr);
}
- if (f == NULL || f->type != Node_func) {
- if (f->type == Node_ext_func || f->type == Node_old_ext_func)
- fatal(_("cannot (yet) call extension functions indirectly"));
- else
+ if (f == NULL) {
+ fatal(_("`%s' is not a function, so it cannot be called indirectly"),
+ t1->stptr);
+ } else if (f->type == Node_builtin_func) {
+ int arg_count = (pc + 1)->expr_count;
+ builtin_func_t the_func = lookup_builtin(t1->stptr);
+
+ assert(the_func != NULL);
+
+ /* call it */
+ r = the_func(arg_count);
+ PUSH(r);
+ break;
+ } else if (f->type != Node_func) {
+ if ( f->type == Node_ext_func
+ || f->type == Node_old_ext_func) {
+ /* code copied from below, keep in sync */
+ INSTRUCTION *bc;
+ char *fname = pc->func_name;
+ int arg_count = (pc + 1)->expr_count;
+ static INSTRUCTION npc[2];
+
+ npc[0] = *pc;
+
+ bc = f->code_ptr;
+ assert(bc->opcode == Op_symbol);
+ if (f->type == Node_ext_func)
+ npc[0].opcode = Op_ext_builtin; /* self modifying code */
+ else
+ npc[0].opcode = Op_old_ext_builtin; /* self modifying code */
+ npc[0].extfunc = bc->extfunc;
+ npc[0].expr_count = arg_count; /* actual argument count */
+ npc[1] = pc[1];
+ npc[1].func_name = fname; /* name of the builtin */
+ npc[1].expr_count = bc->expr_count; /* defined max # of arguments */
+ ni = npc;
+ JUMPTO(ni);
+ } else
fatal(_("function called indirectly through `%s' does not exist"),
pc->func_name);
}
@@ -999,6 +1033,7 @@ match_re:
}
if (f->type == Node_ext_func || f->type == Node_old_ext_func) {
+ /* keep in sync with indirect call code */
INSTRUCTION *bc;
char *fname = pc->func_name;
int arg_count = (pc + 1)->expr_count;
@@ -1032,10 +1067,6 @@ match_re:
JUMPTO(ni);
case Op_K_getline_redir:
- if ((currule == BEGINFILE || currule == ENDFILE)
- && pc->into_var == false
- && pc->redir_type == redirect_input)
- fatal(_("`getline' invalid inside `%s' rule"), ruletab[currule]);
r = do_getline_redir(pc->into_var, pc->redir_type);
PUSH(r);
break;
@@ -1129,10 +1160,13 @@ match_re:
JUMPTO(ni);
}
- if (inrec(curfile, & errcode) != 0) {
- if (errcode > 0 && (do_traditional || ! pc->has_endfile))
- fatal(_("error reading input file `%s': %s"),
+ if (! inrec(curfile, & errcode)) {
+ if (errcode > 0) {
+ update_ERRNO_int(errcode);
+ if (do_traditional || ! pc->has_endfile)
+ fatal(_("error reading input file `%s': %s"),
curfile->public.name, strerror(errcode));
+ }
JUMPTO(ni);
} /* else
diff --git a/io.c b/io.c
index 3d128fad..feef47eb 100644
--- a/io.c
+++ b/io.c
@@ -198,6 +198,7 @@ typedef enum { CLOSE_ALL, CLOSE_TO, CLOSE_FROM } two_way_close_type;
#define at_eof(iop) (((iop)->flag & IOP_AT_EOF) != 0)
#define has_no_data(iop) ((iop)->dataend == NULL)
#define no_data_left(iop) ((iop)->off >= (iop)->dataend)
+#define buffer_has_all_data(iop) ((iop)->dataend - (iop)->off == (iop)->public.sbuf.st_size)
/*
* The key point to the design is to split out the code that searches through
@@ -271,7 +272,23 @@ static RECVALUE (*matchrec)(IOBUF *iop, struct recmatch *recm, SCANSTATE *state)
static int get_a_record(char **out, IOBUF *iop, int *errcode);
static void free_rp(struct redirect *rp);
-static int inetfile(const char *str, int *length, int *family);
+
+struct inet_socket_info {
+ int family; /* AF_UNSPEC, AF_INET, or AF_INET6 */
+ int protocol; /* SOCK_STREAM or SOCK_DGRAM */
+ /*
+ * N.B. If we used 'char *' or 'const char *' pointers to the
+ * substrings, it would trigger compiler warnings about the casts
+ * in either inetfile() or devopen(). So we use offset/len to
+ * avoid that.
+ */
+ struct {
+ int offset;
+ int len;
+ } localport, remotehost, remoteport;
+};
+
+static bool inetfile(const char *str, struct inet_socket_info *isn);
static NODE *in_PROCINFO(const char *pidx1, const char *pidx2, NODE **full_idx);
static long get_read_timeout(IOBUF *iop);
@@ -538,12 +555,12 @@ set_NR()
/* inrec --- This reads in a record from the input file */
-int
+bool
inrec(IOBUF *iop, int *errcode)
{
char *begin;
int cnt;
- int retval = 0;
+ bool retval = true;
if (at_eof(iop) && no_data_left(iop))
cnt = EOF;
@@ -553,9 +570,7 @@ inrec(IOBUF *iop, int *errcode)
cnt = get_a_record(& begin, iop, errcode);
if (cnt == EOF) {
- retval = 1;
- if (*errcode > 0)
- update_ERRNO_int(*errcode);
+ retval = false;
} else {
#if 0
/* XXX: looser if AWKNUM is long double */
@@ -569,6 +584,8 @@ inrec(IOBUF *iop, int *errcode)
FNR = numbr_hndlr->increment_var(FNR_node, FNR);
}
set_record(begin, cnt);
+ if (*errcode > 0)
+ retval = false;
}
return retval;
@@ -703,7 +720,9 @@ redirect(NODE *redir_exp, int redirtype, int *errflg)
int fd;
const char *what = NULL;
bool new_rp = false;
- int len; /* used with /inet */
+#ifdef HAVE_SOCKETS
+ struct inet_socket_info isi;
+#endif
static struct redirect *save_rp = NULL; /* hold onto rp that should
* be freed for reuse
*/
@@ -762,9 +781,9 @@ redirect(NODE *redir_exp, int redirtype, int *errflg)
* Use /inet4 to force IPv4, /inet6 to force IPv6, and plain
* /inet will be whatever we get back from the system.
*/
- if (inetfile(str, & len, NULL)) {
+ if (inetfile(str, & isi)) {
tflag |= RED_SOCKET;
- if (strncmp(str + len, "tcp/", 4) == 0)
+ if (isi.protocol == SOCK_STREAM)
tflag |= RED_TCP; /* use shutdown when closing */
}
#endif /* HAVE_SOCKETS */
@@ -891,7 +910,7 @@ redirect(NODE *redir_exp, int redirtype, int *errflg)
direction = "to/from";
if (! two_way_open(str, rp)) {
#ifdef HAVE_SOCKETS
- if (inetfile(str, NULL, NULL)) {
+ if (inetfile(str, NULL)) {
*errflg = errno;
/* do not free rp, saving it for reuse (save_rp = rp) */
return NULL;
@@ -1528,8 +1547,7 @@ devopen(const char *name, const char *mode)
char *cp;
char *ptr;
int flag = 0;
- int len;
- int family;
+ struct inet_socket_info isi;
if (strcmp(name, "-") == 0)
return fileno(stdin);
@@ -1566,74 +1584,14 @@ devopen(const char *name, const char *mode)
/* do not set close-on-exec for inherited fd's */
if (openfd != INVALID_HANDLE)
return openfd;
- } else if (inetfile(name, & len, & family)) {
+ } else if (inetfile(name, & isi)) {
#ifdef HAVE_SOCKETS
- /* /inet/protocol/localport/hostname/remoteport */
- int protocol;
- char *hostname;
- char *hostnameslastcharp;
- char *localpname;
- char *localpnamelastcharp;
-
- cp = (char *) name + len;
- /* which protocol? */
- if (strncmp(cp, "tcp/", 4) == 0)
- protocol = SOCK_STREAM;
- else if (strncmp(cp, "udp/", 4) == 0)
- protocol = SOCK_DGRAM;
- else {
- protocol = SOCK_STREAM; /* shut up the compiler */
- fatal(_("no (known) protocol supplied in special filename `%s'"),
- name);
- }
- cp += 4;
-
- /* which localport? */
- localpname = cp;
- while (*cp != '/' && *cp != '\0')
- cp++;
- /*
- * Require a port, let them explicitly put 0 if
- * they don't care.
- */
- if (*cp != '/' || cp == localpname)
- fatal(_("special file name `%s' is incomplete"), name);
-
- /*
- * We change the special file name temporarily because we
- * need a 0-terminated string here for conversion with atoi().
- * By using atoi() the use of decimal numbers is enforced.
- */
- *cp = '\0';
- localpnamelastcharp = cp;
+ cp = (char *) name;
- /* which hostname? */
- cp++;
- hostname = cp;
- while (*cp != '/' && *cp != '\0')
- cp++;
- if (*cp != '/' || cp == hostname) {
- *localpnamelastcharp = '/';
- fatal(_("must supply a remote hostname to `/inet'"));
- }
- *cp = '\0';
- hostnameslastcharp = cp;
-
- /* which remoteport? */
- cp++;
- /*
- * The remote port ends the special file name.
- * This means there already is a '\0' at the end of the string.
- * Therefore no need to patch any string ending.
- *
- * Here too, require a port, let them explicitly put 0 if
- * they don't care.
- */
- if (*cp == '\0') {
- *localpnamelastcharp = '/';
- *hostnameslastcharp = '/';
- fatal(_("must supply a remote port to `/inet'"));
- }
+ /* socketopen requires NUL-terminated strings */
+ cp[isi.localport.offset+isi.localport.len] = '\0';
+ cp[isi.remotehost.offset+isi.remotehost.len] = '\0';
+ /* remoteport comes last, so already NUL-terminated */
{
#define DEFAULT_RETRIES 20
@@ -1670,13 +1628,14 @@ devopen(const char *name, const char *mode)
retries = def_retries;
do {
- openfd = socketopen(family, protocol, localpname, cp, hostname);
+ openfd = socketopen(isi.family, isi.protocol, name+isi.localport.offset, name+isi.remoteport.offset, name+isi.remotehost.offset);
retries--;
} while (openfd == INVALID_HANDLE && retries > 0 && usleep(msleep) == 0);
}
- *localpnamelastcharp = '/';
- *hostnameslastcharp = '/';
+ /* restore original name string */
+ cp[isi.localport.offset+isi.localport.len] = '/';
+ cp[isi.remotehost.offset+isi.remotehost.len] = '/';
#else /* ! HAVE_SOCKETS */
fatal(_("TCP/IP communications are not supported"));
#endif /* HAVE_SOCKETS */
@@ -1690,9 +1649,8 @@ strictopen:
/* On OS/2 and Windows directory access via open() is
not permitted. */
struct stat buf;
- int l, f;
- if (!inetfile(name, &l, &f)
+ if (!inetfile(name, NULL)
&& stat(name, & buf) == 0 && S_ISDIR(buf.st_mode))
errno = EISDIR;
}
@@ -1714,7 +1672,7 @@ two_way_open(const char *str, struct redirect *rp)
#ifdef HAVE_SOCKETS
/* case 1: socket */
- if (inetfile(str, NULL, NULL)) {
+ if (inetfile(str, NULL)) {
int fd, newfd;
fd = devopen(str, "rw");
@@ -2111,6 +2069,7 @@ use_pipes:
|| close(ctop[0]) == -1 || close(ctop[1]) == -1)
fatal(_("close of pipe failed (%s)"), strerror(errno));
/* stderr does NOT get dup'ed onto child's stdout */
+ signal(SIGPIPE, SIG_DFL);
execl("/bin/sh", "sh", "-c", str, NULL);
_exit(errno == ENOENT ? 127 : 126);
}
@@ -2304,6 +2263,7 @@ gawk_popen(const char *cmd, struct redirect *rp)
fatal(_("moving pipe to stdout in child failed (dup: %s)"), strerror(errno));
if (close(p[0]) == -1 || close(p[1]) == -1)
fatal(_("close of pipe failed (%s)"), strerror(errno));
+ signal(SIGPIPE, SIG_DFL);
execl("/bin/sh", "sh", "-c", cmd, NULL);
_exit(errno == ENOENT ? 127 : 126);
}
@@ -3512,9 +3472,18 @@ get_a_record(char **out, /* pointer to pointer to data */
ret = (*matchrec)(iop, & recm, & state);
iop->flag &= ~IOP_AT_START;
+ /* found the record, we're done, break the loop */
if (ret == REC_OK)
break;
+ /*
+ * Likely found the record; if there's no more data
+ * to be had (like from a tiny regular file), break the
+ * loop. Otherwise, see if we can read more.
+ */
+ if (ret == TERMNEAREND && buffer_has_all_data(iop))
+ break;
+
/* need to add more data to buffer */
/* shift data down in buffer */
dataend_off = iop->dataend - iop->off;
@@ -3564,10 +3533,14 @@ get_a_record(char **out, /* pointer to pointer to data */
break;
} else if (iop->count == 0) {
/*
- * hit EOF before matching RS, so end
- * the record and set RT to ""
+ * Hit EOF before being certain that we've matched
+ * the end of the record. If ret is TERMNEAREND,
+ * we need to pull out what we've got in the buffer.
+ * Eventually we'll come back here and see the EOF,
+ * end the record and set RT to "".
*/
- iop->flag |= IOP_AT_EOF;
+ if (ret != TERMNEAREND)
+ iop->flag |= IOP_AT_EOF;
break;
} else
iop->dataend += iop->count;
@@ -3749,35 +3722,92 @@ free_rp(struct redirect *rp)
/* inetfile --- return true for a /inet special file, set other values */
-static int
-inetfile(const char *str, int *length, int *family)
+static bool
+inetfile(const char *str, struct inet_socket_info *isi)
{
- bool ret = false;
-
- if (strncmp(str, "/inet/", 6) == 0) {
- ret = true;
- if (length != NULL)
- *length = 6;
- if (family != NULL)
- *family = AF_UNSPEC;
- } else if (strncmp(str, "/inet4/", 7) == 0) {
- ret = true;
- if (length != NULL)
- *length = 7;
- if (family != NULL)
- *family = AF_INET;
- } else if (strncmp(str, "/inet6/", 7) == 0) {
- ret = true;
- if (length != NULL)
- *length = 7;
- if (family != NULL)
- *family = AF_INET6;
+#ifndef HAVE_SOCKETS
+ return false;
+#else
+ const char *cp = str;
+ struct inet_socket_info buf;
+
+ /* syntax: /inet/protocol/localport/hostname/remoteport */
+ if (strncmp(cp, "/inet", 5) != 0)
+ /* quick exit */
+ return false;
+ if (! isi)
+ isi = & buf;
+ cp += 5;
+ switch (*cp) {
+ case '/':
+ isi->family = AF_UNSPEC;
+ break;
+ case '4':
+ if (*++cp != '/')
+ return false;
+ isi->family = AF_INET;
+ break;
+ case '6':
+ if (*++cp != '/')
+ return false;
+ isi->family = AF_INET6;
+ break;
+ default:
+ return false;
+ }
+ cp++; /* skip past '/' */
+
+ /* which protocol? */
+ if (strncmp(cp, "tcp/", 4) == 0)
+ isi->protocol = SOCK_STREAM;
+ else if (strncmp(cp, "udp/", 4) == 0)
+ isi->protocol = SOCK_DGRAM;
+ else
+ return false;
+ cp += 4;
+
+ /* which localport? */
+ isi->localport.offset = cp-str;
+ while (*cp != '/' && *cp != '\0')
+ cp++;
+ /*
+ * Require a port, let them explicitly put 0 if
+ * they don't care.
+ */
+ if (*cp != '/' || ((isi->localport.len = (cp-str)-isi->localport.offset) == 0))
+ return false;
+
+ /* which hostname? */
+ cp++;
+ isi->remotehost.offset = cp-str;
+ while (*cp != '/' && *cp != '\0')
+ cp++;
+ if (*cp != '/' || ((isi->remotehost.len = (cp-str)-isi->remotehost.offset) == 0))
+ return false;
+
+ /* which remoteport? */
+ cp++;
+ /*
+ * The remote port ends the special file name.
+ * This means there already is a '\0' at the end of the string.
+ * Therefore no need to patch any string ending.
+ *
+ * Here too, require a port, let them explicitly put 0 if
+ * they don't care.
+ */
+ isi->remoteport.offset = cp-str;
+ while (*cp != '/' && *cp != '\0')
+ cp++;
+ if (*cp != '\0' || ((isi->remoteport.len = (cp-str)-isi->remoteport.offset) == 0))
+ return false;
+
#ifndef HAVE_GETADDRINFO
+ /* final check for IPv6: */
+ if (isi->family == AF_INET6)
fatal(_("IPv6 communication is not supported"));
#endif
- }
-
- return ret;
+ return true;
+#endif /* HAVE_SOCKETS */
}
/*
diff --git a/main.c b/main.c
index b3c41a01..4a31459e 100644
--- a/main.c
+++ b/main.c
@@ -33,6 +33,16 @@
#include <mcheck.h>
#endif
+#ifdef HAVE_LIBSIGSEGV
+#include <sigsegv.h>
+#else
+typedef void *stackoverflow_context_t;
+/* the argument to this macro is purposely not used */
+#define sigsegv_install_handler(catchsegv) signal(SIGSEGV, catchsig)
+/* define as 0 rather than empty so that (void) cast on it works */
+#define stackoverflow_install_handler(catchstackoverflow, extra_stack, STACK_SIZE) 0
+#endif
+
#define DEFAULT_PROFILE "awkprof.out" /* where to put profile */
#define DEFAULT_VARFILE "awkvars.out" /* where to put vars */
@@ -269,17 +279,6 @@ main(int argc, char **argv)
*/
gawk_mb_cur_max = MB_CUR_MAX;
/* Without MBS_SUPPORT, gawk_mb_cur_max is 1. */
-#ifdef LIBC_IS_BORKED
-{
- const char *env_lc;
-
- env_lc = getenv("LC_ALL");
- if (env_lc == NULL)
- env_lc = getenv("LANG");
- if (env_lc != NULL && env_lc[1] == '\0' && tolower(env_lc[0]) == 'c')
- gawk_mb_cur_max = 1;
-}
-#endif
/* init the cache for checking bytes if they're characters */
init_btowc_cache();
@@ -471,7 +470,11 @@ main(int argc, char **argv)
break;
case 'M':
+#ifdef HAVE_MPFR
numbr_hndlr = & mpfp_hndlr;
+#else
+ warning(_("-M ignored: MPFR/GMP support not compiled in"));
+#endif
break;
case 'P':
@@ -698,6 +701,8 @@ out:
if (do_intl)
exit(EXIT_SUCCESS);
+ install_builtins();
+
if (do_lint)
shadow_funcs();
@@ -734,9 +739,8 @@ out:
if (do_debug)
debug_prog(code_block);
- else if (do_pretty_print && ! do_debug && getenv("GAWK_NO_PP_RUN") != NULL)
- /* hack to run pretty printer only. need a better solution */
- ;
+ else if (do_pretty_print && ! do_profile)
+ ; /* run pretty printer only. */
else
interpret(code_block);
@@ -814,10 +818,10 @@ usage(int exitval, FILE *fp)
fputs(_("\t-h\t\t\t--help\n"), fp);
fputs(_("\t-i includefile\t\t--include=includefile\n"), fp);
fputs(_("\t-l library\t\t--load=library\n"), fp);
- fputs(_("\t-L [fatal]\t\t--lint[=fatal]\n"), fp);
- fputs(_("\t-n\t\t\t--non-decimal-data\n"), fp);
+ fputs(_("\t-L[fatal|invalid]\t--lint[=fatal|invalid]\n"), fp);
fputs(_("\t-M\t\t\t--bignum\n"), fp);
fputs(_("\t-N\t\t\t--use-lc-numeric\n"), fp);
+ fputs(_("\t-n\t\t\t--non-decimal-data\n"), fp);
fputs(_("\t-o[file]\t\t--pretty-print[=file]\n"), fp);
fputs(_("\t-O\t\t\t--optimize\n"), fp);
fputs(_("\t-p[file]\t\t--profile[=file]\n"), fp);
@@ -1313,11 +1317,11 @@ arg_assign(char *arg, bool initing)
/* first check that the variable name has valid syntax */
badvar = false;
- if (! isalpha((unsigned char) arg[0]) && arg[0] != '_')
+ if (! is_alpha((unsigned char) arg[0]) && arg[0] != '_')
badvar = true;
else
for (cp2 = arg+1; *cp2; cp2++)
- if (! isalnum((unsigned char) *cp2) && *cp2 != '_') {
+ if (! is_identchar((unsigned char) *cp2)) {
badvar = true;
break;
}
diff --git a/mpfr.c b/mpfr.c
index 1008745f..d7d22b07 100644
--- a/mpfr.c
+++ b/mpfr.c
@@ -84,6 +84,7 @@ static NODE *do_mpfp_and(int);
static NODE *do_mpfp_atan2(int);
static NODE *do_mpfp_compl(int);
static NODE *do_mpfp_cos(int);
+static NODE *do_mpfp_div(int);
static NODE *do_mpfp_exp(int);
static NODE *do_mpfp_int(int);
static NODE *do_mpfp_log(int);
@@ -195,6 +196,7 @@ mpfp_init(bltin_t **numbr_bltins)
{ "atan2", do_mpfp_atan2 },
{ "compl", do_mpfp_compl },
{ "cos", do_mpfp_cos },
+ { "div", do_mpfp_div },
{ "exp", do_mpfp_exp },
{ "int", do_mpfp_int },
{ "log", do_mpfp_log },
@@ -1020,22 +1022,42 @@ do_mpfp_atan2(int nargs)
return res;
}
+/* do_mpfp_func --- run an MPFR function - not inline, for debugging */
-#define MPFPFUNC(X) \
-NODE *t1, *res; \
-mpfr_ptr p1; \
-int tval; \
-t1 = POP_SCALAR(); \
-if (do_lint && (t1->flags & (NUMCUR|NUMBER)) == 0) \
- lintwarn(_("%s: received non-numeric argument"), #X); \
-t1 = force_number(t1); \
-p1 = mpfp_tofloat(t1, _mp1); \
-res = mpfp_float(); \
-tval = mpfr_##X(res->qnumbr, p1, ROUND_MODE); \
-IEEE_FMT(res->qnumbr, tval); \
-DEREF(t1); \
-return res
+static inline NODE *
+do_mpfp_func(const char *name,
+ int (*mpfr_func)(), /* putting argument types just gets the compiler confused */
+ int nargs)
+{
+ NODE *t1, *res;
+ mpfr_ptr p1;
+ int tval;
+ int prec;
+
+ t1 = POP_SCALAR();
+ if (do_lint && (t1->flags & (NUMCUR|NUMBER)) == 0)
+ lintwarn(_("%s: received non-numeric argument"), name);
+
+ force_number(t1);
+
+ if (is_mpfp_integer(t1))
+ p1 = mpfp_tofloat(t1, _mp1);
+ else
+ p1 = MPFR_T(t1);
+ res = mpfp_float();
+ prec = mpfr_get_prec(p1);
+ mpfr_set_prec(res->qnumbr, prec); /* needed at least for sqrt() */
+ tval = mpfr_func(res->qnumbr, p1, ROUND_MODE);
+ IEEE_FMT(res->qnumbr, tval);
+ DEREF(t1);
+ return res;
+}
+
+#define MPFPFUNC(X) \
+NODE *result; \
+result = do_mpfp_func(#X, mpfr_##X, nargs); \
+return result
/* do_mpfp_sin --- do the sin function */
@@ -1501,6 +1523,131 @@ do_mpfp_srand(int nargs)
return res;
}
+/* do_mpfp_div --- do integer division, return quotient and remainder in dest array */
+
+/*
+ * We define the semantics as:
+ * numerator = int(numerator)
+ * denominator = int(denonmator)
+ * quotient = int(numerator / denomator)
+ * remainder = int(numerator % denomator)
+ */
+
+static NODE *
+do_mpfp_div(int nargs)
+{
+ NODE *numerator, *denominator, *result;
+ NODE *num, *denom;
+ NODE *quotient, *remainder;
+ NODE *sub, **lhs;
+
+ result = POP_PARAM();
+ if (result->type != Node_var_array)
+ fatal(_("div: third argument is not an array"));
+ assoc_clear(result);
+
+ denominator = POP_SCALAR();
+ numerator = POP_SCALAR();
+
+ if (do_lint) {
+ if ((numerator->flags & (NUMCUR|NUMBER)) == 0)
+ lintwarn(_("div: received non-numeric first argument"));
+ if ((denominator->flags & (NUMCUR|NUMBER)) == 0)
+ lintwarn(_("div: received non-numeric second argument"));
+ }
+
+ (void) force_number(numerator);
+ (void) force_number(denominator);
+
+ /* convert numerator and denominator to integer */
+ if (is_mpfp_integer(numerator)) {
+ num = mpfp_integer();
+ mpz_set(num->qnumbr, numerator->qnumbr);
+ } else {
+ if (! mpfr_number_p(numerator->qnumbr)) {
+ /* [+-]inf or NaN */
+ return numerator;
+ }
+
+ num = mpfp_integer();
+ mpfr_get_z(num->qnumbr, numerator->qnumbr, MPFR_RNDZ);
+ }
+
+ if (is_mpfp_integer(denominator)) {
+ denom = mpfp_integer();
+ mpz_set(denom->qnumbr, denominator->qnumbr);
+ } else {
+ if (! mpfr_number_p(denominator->qnumbr)) {
+ /* [+-]inf or NaN */
+ return denominator;
+ }
+
+ denom = mpfp_integer();
+ mpfr_get_z(denom->qnumbr, denominator->qnumbr, MPFR_RNDZ);
+ }
+
+ if (mpz_sgn(MPZ_T(denom->qnumbr)) == 0)
+ fatal(_("div: division by zero attempted"));
+
+ quotient = mpfp_integer();
+ remainder = mpfp_integer();
+
+ /* do the division */
+ mpz_tdiv_qr(quotient->qnumbr, remainder->qnumbr, num->qnumbr, denom->qnumbr);
+ unref(num);
+ unref(denom);
+ unref(numerator);
+ unref(denominator);
+
+ sub = make_string("quotient", 8);
+ lhs = assoc_lookup(result, sub);
+ unref(*lhs);
+ *lhs = quotient;
+
+ sub = make_string("remainder", 9);
+ lhs = assoc_lookup(result, sub);
+ unref(*lhs);
+ *lhs = remainder;
+
+ return make_number((AWKNUM) 0.0);
+}
+
+/*
+ * mpg_tofloat --- convert an arbitrary-precision integer operand to
+ * a float without loss of precision. It is assumed that the
+ * MPFR variable has already been initialized.
+ */
+
+static inline mpfr_ptr
+mpg_tofloat(mpfr_ptr mf, mpz_ptr mz)
+{
+ size_t prec;
+
+ /*
+ * When implicitely converting a GMP integer operand to a MPFR float, use
+ * a precision sufficiently large to hold the converted value exactly.
+ *
+ * $ ./gawk -M 'BEGIN { print 13 % 2 }'
+ * 1
+ * If the user-specified precision is used to convert the integer 13 to a
+ * float, one will get:
+ * $ ./gawk -M 'BEGIN { PREC=2; print 13 % 2.0 }'
+ * 0
+ */
+
+ prec = mpz_sizeinbase(mz, 2); /* most significant 1 bit position starting at 1 */
+ if (prec > PRECISION_MIN) {
+ prec -= (size_t) mpz_scan1(mz, 0); /* least significant 1 bit index starting at 0 */
+ if (prec > MPFR_PREC_MAX)
+ prec = MPFR_PREC_MAX;
+ if (prec > PRECISION_MIN)
+ mpfr_set_prec(mf, prec);
+ }
+
+ mpfr_set_z(mf, mz, ROUND_MODE);
+ return mf;
+}
+
/* mpfp_add --- add arbitrary-precision numbers */
@@ -1656,8 +1803,27 @@ mpfp_mod(const NODE *t1, const NODE *t2)
int tval;
if (is_mpfp_integer(t1) && is_mpfp_integer(t2)) {
+ /*
+ * 8/2014: Originally, this was just
+ *
+ * r = mpg_integer();
+ * mpz_mod(r->mpg_i, t1->mpg_i, t2->mpg_i);
+ *
+ * But that gave very strange results with negative numerator:
+ *
+ * $ ./gawk -M 'BEGIN { print -15 % 7 }'
+ * 6
+ *
+ * So instead we use mpz_tdiv_qr() to get the correct result
+ * and just throw away the quotient. We could not find any
+ * reason why mpz_mod() wasn't working correctly.
+ */
+ NODE *dummy_quotient;
+
r = mpfp_integer();
- mpz_mod(r->qnumbr, t1->qnumbr, t2->qnumbr);
+ dummy_quotient = mpfp_integer();
+ mpz_tdiv_qr(dummy_quotient->qnumbr, r->qnumbr, t1->qnumbr, t2->qnumbr);
+ unref(dummy_quotient);
} else {
mpfr_ptr p1, p2;
diff --git a/node.c b/node.c
index ed034c99..81a1e55d 100644
--- a/node.c
+++ b/node.c
@@ -39,7 +39,6 @@ AWKNUM (*get_number_d)(const NODE *);
uintmax_t (*get_number_uj)(const NODE *);
int (*sgn_number)(const NODE *);
-
/* r_dupnode --- duplicate a node */
NODE *
@@ -291,9 +290,8 @@ parse_escape(const char **string_ptr)
warning(_("no hex digits in `\\x' escape sequence"));
return ('x');
}
- i = j = 0;
start = *string_ptr;
- for (;; j++) {
+ for (i = j = 0; j < 2; j++) {
/* do outside test to avoid multiple side effects */
c = *(*string_ptr)++;
if (isxdigit(c)) {
diff --git a/pc/ChangeLog b/pc/ChangeLog
index d8e60c98..a66edae9 100644
--- a/pc/ChangeLog
+++ b/pc/ChangeLog
@@ -1,3 +1,7 @@
+2014-04-17 Scott Deifik <scottd.mail@sbcglobal.net>
+
+ * Makefile.tst: Add readfile2 test.
+
2014-04-08 Arnold D. Robbins <arnold@skeeve.com>
* 4.1.1: Release tar ball made.
diff --git a/pc/Makefile.tst b/pc/Makefile.tst
index 866b4692..610704e4 100644
--- a/pc/Makefile.tst
+++ b/pc/Makefile.tst
@@ -141,8 +141,7 @@ BASIC_TESTS = \
arrayref arrymem1 arryref2 arryref3 arryref4 arryref5 arynasty \
arynocls aryprm1 aryprm2 aryprm3 aryprm4 aryprm5 aryprm6 aryprm7 \
aryprm8 arysubnm asgext awkpath \
- back89 backgsub \
- badassign1 \
+ back89 backgsub badassign1 \
childin clobber closebad clsflnam compare compare2 concat1 concat2 \
concat3 concat4 convfmt \
datanonl defref delargv delarpm2 delarprm delfunc dfamb1 dfastress dynlj \
@@ -211,7 +210,7 @@ LOCALE_CHARSET_TESTS = \
SHLIB_TESTS = \
fnmatch filefuncs fork fork2 fts functab4 inplace1 inplace2 inplace3 \
- ordchr ordchr2 readdir readfile revout revtwoway rwarray testext time
+ ordchr ordchr2 readdir readfile readfile2 revout revtwoway rwarray testext time
# List of the tests which should be run with --lint option:
NEED_LINT = \
@@ -950,6 +949,11 @@ readfile::
@$(AWK) -l readfile 'BEGIN {printf "%s", readfile("Makefile")}' >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) Makefile _$@ && rm -f _$@ || cp -p Makefile $@.ok
+readfile2::
+ @echo $@
+ @$(AWK) -f "$(srcdir)"/$@.awk "$(srcdir)"/$@.awk "$(srcdir)"/readdir.awk > _$@ || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
include2::
@echo $@
@AWKPATH="$(srcdir)" $(AWK) --include inclib 'BEGIN {print sandwich("a", "b", "c")}' >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
diff --git a/po/CMakeLists.txt b/po/CMakeLists.txt
new file mode 100644
index 00000000..cd930077
--- /dev/null
+++ b/po/CMakeLists.txt
@@ -0,0 +1,133 @@
+# Most of this copied from the repository of Stellarium
+# http://sourceforge.net/projects/stellarium/
+
+# Special targets for translations:
+#
+# translations
+# Converts all PO files to GMO files. Note that it does *not* update
+# the PO files or the PO templates -- in fact, these files are never
+# updated automatically.
+#
+# generate-pot
+# Re-creates all POT files unconditionally.
+#
+# update-po
+# Updates all PO files unconditionally. Note that it takes care of
+# updating the POT files.
+#
+# translations-<DOMAIN>
+# generate-pot-<DOMAIN>
+# update-po-<DOMAIN>
+# Same as above, but only affect the files in the corresponding
+# po/<DOMAIN> directory. (DOMAIN is actually the base name of the POT
+# file in the subdirectory, but that should match the directory name
+# anyway.)
+
+ADD_CUSTOM_TARGET(translations)
+ADD_CUSTOM_TARGET(generate-pot)
+ADD_CUSTOM_TARGET(update-po)
+
+# GETTEXT_CREATE_TRANSLATIONS(domain [DEFAULT_TARGET] lang1 ... langN)
+#
+# Creates custom build rules to create and install (G)MO files for the
+# specified languages. If the DEFAULT_TARGET option is used, the
+# translations will also be created when building the default target.
+#
+# "domain" is the translation domain, eg. "gawk". A POT file
+# with the name ${domain}.pot must exist in the directory of the
+# CMakeLists.txt file invoking the macro.
+#
+# This macro also creates the "translations-${domain}" and
+# "update-po-${domain}" targets (see above for an explanation).
+#
+MACRO(GETTEXT_CREATE_TRANSLATIONS _domain _firstLang)
+
+ SET(_gmoFiles)
+ GET_FILENAME_COMPONENT(_absPotFile ${_domain}.pot ABSOLUTE)
+
+ # Update these PO files when building the "update-po-<DOMAIN>" and
+ # "update-po" targets.
+ ADD_CUSTOM_TARGET(update-po-${_domain})
+ ADD_DEPENDENCIES(update-po update-po-${_domain})
+
+ # Make sure the POT file is updated before updating the PO files.
+ ADD_DEPENDENCIES(update-po-${_domain} generate-pot-${_domain})
+
+ SET(_addToAll)
+ IF(${_firstLang} STREQUAL "DEFAULT_TARGET")
+ SET(_addToAll "ALL")
+ SET(_firstLang)
+ ENDIF(${_firstLang} STREQUAL "DEFAULT_TARGET")
+
+ FOREACH (_lang ${ARGN})
+ GET_FILENAME_COMPONENT(_absFile ${_lang}.po ABSOLUTE)
+ FILE(RELATIVE_PATH _relFile ${PROJECT_SOURCE_DIR} ${_absFile})
+ SET(_gmoFile ${CMAKE_CURRENT_BINARY_DIR}/${_lang}.gmo)
+
+ # Convert a PO file into a GMO file.
+ ADD_CUSTOM_COMMAND(
+ OUTPUT ${_gmoFile}
+ COMMAND ${GETTEXT_MSGFMT_EXECUTABLE} -o ${_gmoFile} ${_absFile}
+ DEPENDS ${_absFile}
+ )
+
+ # Update the PO file unconditionally when building the
+ # "update-po-<DOMAIN>" target. Note that to see the file being
+ # processed, we have to run "cmake -E echo", because the
+ # COMMENT is not displayed by cmake...
+ ADD_CUSTOM_COMMAND(
+ TARGET update-po-${_domain}
+ POST_BUILD
+ COMMAND ${CMAKE_COMMAND} -E echo "** Updating ${_relFile}"
+ COMMAND ${GETTEXT_MSGMERGE_EXECUTABLE}
+ --quiet --update -m --backup=none -s
+ ${_absFile} ${_absPotFile}
+ VERBATIM
+ )
+
+ INSTALL(FILES ${_gmoFile} DESTINATION share/locale/${_lang}/LC_MESSAGES RENAME ${_domain}.mo)
+ SET(_gmoFiles ${_gmoFiles} ${_gmoFile})
+
+ ENDFOREACH (_lang)
+
+ # Create the GMO files when building the "translations-<DOMAIN>" and
+ # "translations" targets.
+ ADD_CUSTOM_TARGET(translations-${_domain} ${_addToAll} DEPENDS ${_gmoFiles})
+ ADD_DEPENDENCIES(translations translations-${_domain})
+
+ENDMACRO(GETTEXT_CREATE_TRANSLATIONS )
+
+SET(gawk_DOMAIN gawk)
+SET(gawk_POT ${gawk_DOMAIN}.pot)
+
+file(READ LINGUAS linguas)
+string(REGEX REPLACE "\n" ";" linguas ${linguas})
+GETTEXT_CREATE_TRANSLATIONS(${gawk_DOMAIN} DEFAULT_TARGET ${linguas})
+
+ADD_CUSTOM_TARGET(
+ generate-pot-${gawk_DOMAIN}
+ ${GETTEXT_XGETTEXT_EXECUTABLE}
+ -o ${CMAKE_CURRENT_SOURCE_DIR}/${gawk_POT}
+ -C
+ --keyword=_
+ --keyword=N_
+ --keyword=q_
+ --keyword=translate:2
+ --add-comments=TRANSLATORS:
+ --directory=${CMAKE_BINARY_DIR}
+ --directory=${CMAKE_SOURCE_DIR}
+ --output-dir=${CMAKE_BINARY_DIR}
+ --files-from=${CMAKE_CURRENT_SOURCE_DIR}/POTFILES.in
+ --copyright-holder=FSF
+ WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
+ COMMENT "Generating ${gawk_POT}"
+ VERBATIM
+)
+# TODO: It would be nice to just depend on the exact files in POTFILES.in
+#file(READ ${CMAKE_CURRENT_SOURCE_DIR}/${gawk_POT} UiHeaders)
+#ADD_DEPENDENCIES(generate-pot-${gawk_DOMAIN} UiHeaders)
+#ADD_DEPENDENCIES(generate-pot-${gawk_DOMAIN} gawk_UIS_H)
+# Make sure the UI headers are created first.
+ADD_DEPENDENCIES(generate-pot-${gawk_DOMAIN} StelGuiLib) # ??? FIXME
+# Generate this POT file when building the "generate-pot" target.
+ADD_DEPENDENCIES(generate-pot generate-pot-${gawk_DOMAIN})
diff --git a/po/id.po b/po/id.po
index 5fa3b23a..d1b97f73 100644
--- a/po/id.po
+++ b/po/id.po
@@ -1,14 +1,14 @@
# Pesan bahasa indonesia untuk gawk.
# Copyright (C) 2008 Free Software Foundation, Inc.
# This file is distributed under the same license as the gawk package.
-# Arif E. Nugroho <arif_endro@yahoo.com>, 2008, 2009.
+# Arif E. Nugroho <arif_endro@yahoo.com>, 2008, 2009, 2010, 2011, 2012, 2013, 2014.
#
msgid ""
msgstr ""
-"Project-Id-Version: gawk 3.1.6e\n"
+"Project-Id-Version: gawk 4.1.0b\n"
"Report-Msgid-Bugs-To: arnold@skeeve.com\n"
-"POT-Creation-Date: 2011-03-18 12:00+0200\n"
-"PO-Revision-Date: 2009-07-11 14:00+0700\n"
+"POT-Creation-Date: 2014-01-14 22:23+0200\n"
+"PO-Revision-Date: 2014-08-03 07:30+0700\n"
"Last-Translator: Arif E. Nugroho <arif_endro@yahoo.com>\n"
"Language-Team: Indonesian <translation-team-id@lists.sourceforge.net>\n"
"Language: id\n"
@@ -16,494 +16,473 @@ msgstr ""
"Content-Type: text/plain; charset=ISO-8859-1\n"
"Content-Transfer-Encoding: 8bit\n"
-#: array.c:103
+#: array.c:256
#, c-format
msgid "from %s"
msgstr "dari %s"
-#: array.c:267
-#, fuzzy
+#: array.c:357
msgid "attempt to use a scalar value as array"
-msgstr "mencoba untuk menggunakan skalar `%s' sebagai sebuah array"
-
-#: array.c:270
-#, c-format
-msgid "attempt to use function `%s' as an array"
-msgstr "mencoba untuk menggunakan fungsi `%s' sebagai sebuah array"
+msgstr "mencoba untuk menggunakan skalar sebagai sebuah array"
-#: array.c:273
+#: array.c:359
#, c-format
msgid "attempt to use scalar parameter `%s' as an array"
msgstr "mencoba untuk menggunakan parameter `%s' sebagai sebuah array"
-#: array.c:276 eval.c:2013
-#, fuzzy, c-format
+#: array.c:362
+#, c-format
msgid "attempt to use scalar `%s' as an array"
msgstr "mencoba untuk menggunakan skalar `%s' sebagai sebuah array"
-#: array.c:321 array.c:648 builtin.c:75 builtin.c:555 builtin.c:597
-#: builtin.c:610 builtin.c:1016 builtin.c:1028 eval.c:1381 eval.c:1385
-#: eval.c:1710 eval.c:1958 eval.c:2026 eval.c:2274
+#: array.c:409 array.c:576 builtin.c:85 builtin.c:1599 builtin.c:1645
+#: builtin.c:1658 builtin.c:2086 builtin.c:2100 eval.c:1122 eval.c:1126
+#: eval.c:1531
#, c-format
msgid "attempt to use array `%s' in a scalar context"
msgstr "mencoba menggunakan array `%s' dalam sebuah konteks skalar"
-#: array.c:570
-#, fuzzy, c-format
-msgid "reference to uninitialized element `%s[\"%.*s\"]'"
-msgstr "referensi ke elemen tidak terinisialisasi `%s[\"%s\"]'"
-
-#: array.c:576
-#, c-format
-msgid "subscript of array `%s' is null string"
-msgstr "subscript dari array `%s' adalah string null"
-
-#: array.c:684
+#: array.c:583
#, c-format
msgid "delete: index `%s' not in array `%s'"
msgstr "delete: indeks `%s' tidak dalam array `%s'"
-#: array.c:708
-#, fuzzy, c-format
+#: array.c:597
+#, c-format
msgid "attempt to use scalar `%s[\"%.*s\"]' as an array"
-msgstr "mencoba untuk menggunakan skalar `%s' sebagai sebuah array"
+msgstr "mencoba untuk menggunakan skalar `%s[\"%.*s\"]' sebagai sebuah array"
-#: array.c:871
-#, c-format
-msgid "%s: empty (null)\n"
-msgstr "%s: kosong (null)\n"
+#: array.c:776
+msgid "adump: first argument not an array"
+msgstr "adump: argumen ketiga bukan sebuah array"
-#: array.c:876
-#, c-format
-msgid "%s: empty (zero)\n"
-msgstr "%s: kosong (nol)\n"
+#: array.c:815
+msgid "asort: second argument not an array"
+msgstr "asort: argumen kedua bukan sebuah array"
-#: array.c:880
-#, c-format
-msgid "%s: table_size = %d, array_size = %d\n"
-msgstr "%s: table_size = %d, array_size = %d\n"
+#: array.c:816
+msgid "asorti: second argument not an array"
+msgstr "asorti: argumen kedua bukan sebuah array"
-#: array.c:915
-#, c-format
-msgid "%s: is parameter\n"
-msgstr "%s: adalah parameter\n"
+#: array.c:823
+msgid "asort: first argument not an array"
+msgstr "asort: argumen ketiga bukan sebuah array"
-#: array.c:919
-#, c-format
-msgid "%s: array_ref to %s\n"
-msgstr "%s: array_ref ke %s\n"
+#: array.c:824
+msgid "asorti: first argument not an array"
+msgstr "asorti: argumen ketiga bukan sebuah array"
-#: array.c:924
-#, fuzzy
-msgid "adump: argument not an array"
-msgstr "match: argumen ketiga bukan sebuah array"
+#: array.c:831
+msgid "asort: cannot use a subarray of first arg for second arg"
+msgstr "asort: cannot use a subarray of first arg for second arg"
-#: array.c:1142
-#, fuzzy
-msgid "attempt to use array in a scalar context"
-msgstr "mencoba menggunakan array `%s' dalam sebuah konteks skalar"
+#: array.c:832
+msgid "asorti: cannot use a subarray of first arg for second arg"
+msgstr "asorti: cannot use a subarray of first arg for second arg"
-#: array.c:1239
-#, fuzzy
-msgid "asort: second argument not an array"
-msgstr "split: argumen kedua bukan sebuah array"
+#: array.c:837
+msgid "asort: cannot use a subarray of second arg for first arg"
+msgstr "asort: cannot use a subarray of second arg for first arg"
-#: array.c:1240
-#, fuzzy
-msgid "asorti: second argument not an array"
-msgstr "split: argumen kedua bukan sebuah array"
+#: array.c:838
+msgid "asorti: cannot use a subarray of second arg for first arg"
+msgstr "asorti: cannot use a subarray of second arg for first arg"
-#: array.c:1247
-#, fuzzy
-msgid "asort: first argument not an array"
-msgstr "match: argumen ketiga bukan sebuah array"
+#: array.c:1314
+#, c-format
+msgid "`%s' is invalid as a function name"
+msgstr "`%s' digunakan dalam aksi"
-#: array.c:1248
-#, fuzzy
-msgid "asorti: first argument not an array"
-msgstr "match: argumen ketiga bukan sebuah array"
+#: array.c:1318
+#, c-format
+msgid "sort comparison function `%s' is not defined"
+msgstr "fungsi `%s' tidak didefinisikan"
-#: awkgram.y:249
+#: awkgram.y:233
#, c-format
msgid "%s blocks must have an action part"
msgstr "%s blok harus memiliki sebuah bagian aksi"
-#: awkgram.y:252
+#: awkgram.y:236
msgid "each rule must have a pattern or an action part"
msgstr "setiap aturan harus memiliki sebuah pola atau sebuah bagian aksi"
-#: awkgram.y:323 awkgram.y:334
+#: awkgram.y:325 awkgram.y:336
msgid "old awk does not support multiple `BEGIN' or `END' rules"
msgstr "awk lama tidak mendukung multiple aturan `BEGIN' atau `END'"
-#: awkgram.y:371
+#: awkgram.y:373
#, c-format
msgid "`%s' is a built-in function, it cannot be redefined"
msgstr "`%s' adalah sebuah fungsi bawaan, ini tidak dapat di redefinisi"
-#: awkgram.y:432
+#: awkgram.y:419
msgid "regexp constant `//' looks like a C++ comment, but is not"
msgstr "konstanta regexp `//' tampak seperti sebuah komentar C++, tetapi bukan"
-#: awkgram.y:436
+#: awkgram.y:423
#, c-format
msgid "regexp constant `/%s/' looks like a C comment, but is not"
msgstr "konstanta regexp `/%s/' tampak seperti sebuah komentar C, tetapi bukan"
-#: awkgram.y:528
+#: awkgram.y:515
#, c-format
msgid "duplicate case values in switch body: %s"
msgstr "duplikasi nilai case dalam tubuh switch: %s"
-#: awkgram.y:549
-#, fuzzy
+#: awkgram.y:536
msgid "duplicate `default' detected in switch body"
msgstr "Duplikasi `default' terdeteksi dalam tubuh switch"
-#: awkgram.y:811
-#, fuzzy
+#: awkgram.y:796 awkgram.y:3699
msgid "`break' is not allowed outside a loop or switch"
msgstr "`break' diluar sebuah loop tidak diijinkan"
-#: awkgram.y:820
-#, fuzzy
+#: awkgram.y:805 awkgram.y:3691
msgid "`continue' is not allowed outside a loop"
msgstr "`continue' diluar sebuah loop tidak diijinkan"
-#: awkgram.y:829
-#, fuzzy, c-format
+#: awkgram.y:815
+#, c-format
msgid "`next' used in %s action"
-msgstr "`%s' digunakan dalam aksi %s"
-
-#: awkgram.y:837
-msgid "`nextfile' is a gawk extension"
-msgstr "`nextfile' adalah sebuah ekstensi gawk"
+msgstr "`next' digunakan dalam aksi %s"
-#: awkgram.y:840
-#, fuzzy, c-format
+#: awkgram.y:824
+#, c-format
msgid "`nextfile' used in %s action"
-msgstr "`%s' digunakan dalam aksi %s"
+msgstr "`nextfile' digunakan dalam aksi %s"
-#: awkgram.y:863
+#: awkgram.y:848
msgid "`return' used outside function context"
msgstr "`return' digunakan diluar konteks fungsi"
-#: awkgram.y:923
+#: awkgram.y:922
msgid "plain `print' in BEGIN or END rule should probably be `print \"\"'"
-msgstr ""
-"plain `print' dalam aturan BEGIN atau AKHIR seharusnya berupa `print \"\"'"
+msgstr "plain `print' dalam aturan BEGIN atau AKHIR seharusnya berupa `print \"\"'"
+
+#: awkgram.y:988 awkgram.y:1037
+msgid "`delete' is not allowed with SYMTAB"
+msgstr "`delete' is not allowed with SYMTAB"
-#: awkgram.y:993 awkgram.y:997 awkgram.y:1021
-msgid "`delete array' is a gawk extension"
-msgstr "`delete array' adalah sebuah ekstensi gawk"
+#: awkgram.y:990 awkgram.y:1039
+msgid "`delete' is not allowed with FUNCTAB"
+msgstr "`delete' is not allowed with FUNCTAB"
-#: awkgram.y:1017
+#: awkgram.y:1024 awkgram.y:1028
msgid "`delete(array)' is a non-portable tawk extension"
msgstr "`delete(array)' adalah sebuah ekstensi tidak portabel tawk"
-#: awkgram.y:1133
+#: awkgram.y:1149
msgid "multistage two-way pipelines don't work"
msgstr "multi tahap dua jalur pipe lines tidak bekerja"
-#: awkgram.y:1236
+#: awkgram.y:1264
msgid "regular expression on right of assignment"
msgstr "ekspresi regular di penempatan kanan"
-#: awkgram.y:1247
+#: awkgram.y:1275
msgid "regular expression on left of `~' or `!~' operator"
msgstr "ekspresi regular di kiri dari operator `~' atau `!~'"
-#: awkgram.y:1263 awkgram.y:1417
+#: awkgram.y:1291 awkgram.y:1442
msgid "old awk does not support the keyword `in' except after `for'"
msgstr "awk lama tidak mendukung kata kunci `in' kecuali setelah `for'"
-#: awkgram.y:1273
+#: awkgram.y:1301
msgid "regular expression on right of comparison"
msgstr "ekspresi regular di kanan dari perbandingan"
-#: awkgram.y:1392
+#: awkgram.y:1417
#, c-format
msgid "`getline var' invalid inside `%s' rule"
-msgstr ""
+msgstr "`getline var' invalid inside `%s' rule"
-#: awkgram.y:1395 eval.c:2649
+#: awkgram.y:1420
#, c-format
msgid "`getline' invalid inside `%s' rule"
-msgstr ""
+msgstr "`getline' invalid inside `%s' rule"
-#: awkgram.y:1400
+#: awkgram.y:1425
msgid "non-redirected `getline' undefined inside END action"
msgstr "tidak terdireksi `getline' tidak terdefinisi didalam aksi END"
-#: awkgram.y:1419
+#: awkgram.y:1444
msgid "old awk does not support multidimensional arrays"
msgstr "awk lama tidak mendukung array multi dimensi"
-#: awkgram.y:1515
+#: awkgram.y:1541
msgid "call of `length' without parentheses is not portable"
msgstr "panggilan dari `length' tanpa tanda kurung tidak portabel"
-#: awkgram.y:1578
-#, fuzzy
+#: awkgram.y:1607
msgid "indirect function calls are a gawk extension"
-msgstr "`nextfile' adalah sebuah ekstensi gawk"
+msgstr "indirect adalah sebuah ekstensi gawk"
-#: awkgram.y:1591
-#, fuzzy, c-format
+#: awkgram.y:1620
+#, c-format
msgid "can not use special variable `%s' for indirect function call"
-msgstr ""
-"fungsi `%s': tidak dapat menggunakan variabel `%s' sebagai fungsi parameter"
+msgstr "tidak dapat menggunakan variabel `%s' sebagai fungsi parameter"
-#: awkgram.y:1669
+#: awkgram.y:1698
msgid "invalid subscript expression"
msgstr "ekspresi subscript tidak valid"
-#: awkgram.y:1709
-msgid "use of non-array as array"
-msgstr "penggunaan dari bukan array sebagai array"
-
-#: awkgram.y:1973 awkgram.y:1993 msg.c:98
+#: awkgram.y:2024 awkgram.y:2044 gawkapi.c:206 gawkapi.c:224 msg.c:126
msgid "warning: "
msgstr "peringatan: "
-#: awkgram.y:1991 msg.c:130
+#: awkgram.y:2042 gawkapi.c:192 gawkapi.c:221 msg.c:158
msgid "fatal: "
msgstr "fatal: "
-#: awkgram.y:2041
+#: awkgram.y:2092
msgid "unexpected newline or end of string"
msgstr "tidak terduga baris baru atau akhir dari string"
-#: awkgram.y:2297 awkgram.y:2355 awkgram.y:2539
+#: awkgram.y:2359 awkgram.y:2435 awkgram.y:2658 debug.c:523 debug.c:539
+#: debug.c:2812 debug.c:5055
#, c-format
msgid "can't open source file `%s' for reading (%s)"
msgstr "tidak dapat membuka berkas sumber `%s' untuk pembacaan (%s)"
-#: awkgram.y:2298 awkgram.y:2356 builtin.c:119
+#: awkgram.y:2360 awkgram.y:2485
+#, c-format
+msgid "can't open shared library `%s' for reading (%s)"
+msgstr "tidak dapat membuka berkas sumber `%s' untuk pembacaan (%s)"
+
+#: awkgram.y:2362 awkgram.y:2436 awkgram.y:2486 builtin.c:135 debug.c:5206
msgid "reason unknown"
msgstr "alasan tidak diketahui"
-#: awkgram.y:2314
-#, fuzzy, c-format
+#: awkgram.y:2371 awkgram.y:2395
+#, c-format
+msgid "can't include `%s' and use it as a program file"
+msgstr "can't include `%s' and use it as a program file"
+
+#: awkgram.y:2384
+#, c-format
msgid "already included source file `%s'"
-msgstr "tidak dapat membaca berkas sumber `%s' (%s)"
+msgstr "tidak dapat membaca berkas sumber `%s'"
+
+#: awkgram.y:2385
+#, c-format
+msgid "already loaded shared library `%s'"
+msgstr "already loaded shared library `%s'"
-#: awkgram.y:2340
-#, fuzzy
+#: awkgram.y:2420
msgid "@include is a gawk extension"
-msgstr "`nextfile' adalah sebuah ekstensi gawk"
+msgstr "@include adalah sebuah ekstensi gawk"
-#: awkgram.y:2346
+#: awkgram.y:2426
msgid "empty filename after @include"
-msgstr ""
+msgstr "empty filename after @include"
+
+#: awkgram.y:2470
+msgid "@load is a gawk extension"
+msgstr "@load adalah sebuah ekstensi gawk"
-#: awkgram.y:2491
+#: awkgram.y:2476
+msgid "empty filename after @load"
+msgstr "empty filename after @load"
+
+#: awkgram.y:2610
msgid "empty program text on command line"
msgstr "aplikasi teks kosong di baris perintah"
-#: awkgram.y:2606
+#: awkgram.y:2725
#, c-format
msgid "can't read sourcefile `%s' (%s)"
msgstr "tidak dapat membaca berkas sumber `%s' (%s)"
-#: awkgram.y:2617
+#: awkgram.y:2736
#, c-format
msgid "source file `%s' is empty"
msgstr "berkas sumber `%s' kosong"
-#: awkgram.y:2802
+#: awkgram.y:2913
msgid "source file does not end in newline"
msgstr "berkas sumber tidak berakhir dalam baris baru"
-#: awkgram.y:2879
+#: awkgram.y:3018
msgid "unterminated regexp ends with `\\' at end of file"
msgstr "tidak terakhiri regexp akhir denga `\\' diakhir dari berkas"
-#: awkgram.y:2903
+#: awkgram.y:3042
#, c-format
msgid "%s: %d: tawk regex modifier `/.../%c' doesn't work in gawk"
msgstr "%s: %d: tawk regex pemodifikasi `/.../%c' tidak bekerja dalam gawk"
-#: awkgram.y:2907
+#: awkgram.y:3046
#, c-format
msgid "tawk regex modifier `/.../%c' doesn't work in gawk"
msgstr "tawk regex pemodifikasi `/.../%c' tidak bekerja dalam gawk"
-#: awkgram.y:2914
+#: awkgram.y:3053
msgid "unterminated regexp"
msgstr "tidak terselesaikan regexp"
-#: awkgram.y:2918
+#: awkgram.y:3057
msgid "unterminated regexp at end of file"
msgstr "tidak terselesaikan di akhir dari berkas"
-#: awkgram.y:2977
+#: awkgram.y:3116
msgid "use of `\\ #...' line continuation is not portable"
msgstr "penggunaan dari `\\ #...' kelanjutan baris tidak portabel"
-#: awkgram.y:2993
+#: awkgram.y:3132
msgid "backslash not last character on line"
msgstr "backslash bukan karakter terakhir di baris"
-#: awkgram.y:3054
+#: awkgram.y:3193
msgid "POSIX does not allow operator `**='"
msgstr "POSIX tidak mengijinkan operator `**='"
-#: awkgram.y:3056
+#: awkgram.y:3195
msgid "old awk does not support operator `**='"
msgstr "awk lama tidak mendukung operator `**='"
-#: awkgram.y:3065
+#: awkgram.y:3204
msgid "POSIX does not allow operator `**'"
msgstr "POSIX tidak mengijinkan operator `**'"
-#: awkgram.y:3067
+#: awkgram.y:3206
msgid "old awk does not support operator `**'"
msgstr "awk lama tidak mendukung operator `**'"
-#: awkgram.y:3102
+#: awkgram.y:3241
msgid "operator `^=' is not supported in old awk"
msgstr "operator `^=' tidak didukung dalam awk lama"
-#: awkgram.y:3110
+#: awkgram.y:3249
msgid "operator `^' is not supported in old awk"
msgstr "operator `^' tidak didukung dalam awk lama"
-#: awkgram.y:3203 awkgram.y:3219
+#: awkgram.y:3342 awkgram.y:3358 command.y:1178
msgid "unterminated string"
msgstr "string tidak terselesaikan"
-#: awkgram.y:3415
+#: awkgram.y:3579
#, c-format
msgid "invalid char '%c' in expression"
msgstr "karakter '%c' tidak valid dalam ekspresi"
-#: awkgram.y:3462
+#: awkgram.y:3626
#, c-format
msgid "`%s' is a gawk extension"
msgstr "`%s' adalah sebuah ekstensi gawk"
-#: awkgram.y:3467
-#, c-format
-msgid "`%s' is a Bell Labs extension"
-msgstr "`%s' adalah sebuah ekstensi Bell Labs"
-
-#: awkgram.y:3472
+#: awkgram.y:3631
#, c-format
msgid "POSIX does not allow `%s'"
msgstr "POSIX tidak mengijinkan `%s'"
-#: awkgram.y:3480
+#: awkgram.y:3639
#, c-format
msgid "`%s' is not supported in old awk"
msgstr "`%s' tidak didukung dalam awk lama"
-#: awkgram.y:3550
+#: awkgram.y:3729
msgid "`goto' considered harmful!\n"
msgstr "`goto' dipertimbangkan berbahaya!\n"
-#: awkgram.y:3602
+#: awkgram.y:3763
#, c-format
msgid "%d is invalid as number of arguments for %s"
msgstr "%d tidak valid sebagai jumlah dari argumen untuk %s"
-#: awkgram.y:3637 awkgram.y:3640
-msgid "match: third argument is a gawk extension"
-msgstr "cocok: argumen ketiga adalah sebuah ekstensi gawk"
-
-#: awkgram.y:3668
+#: awkgram.y:3798
#, c-format
msgid "%s: string literal as last arg of substitute has no effect"
-msgstr ""
-"%s: literal string sebagai argumen terakhir dari pergantian tidak memiliki "
-"efek"
+msgstr "%s: literal string sebagai argumen terakhir dari pergantian tidak memiliki efek"
-#: awkgram.y:3673
+#: awkgram.y:3803
#, c-format
msgid "%s third parameter is not a changeable object"
msgstr "%s parameter ketika bukan sebuah objek yang dapat diubah"
-#: awkgram.y:3759 awkgram.y:3762
+#: awkgram.y:3886 awkgram.y:3889
+msgid "match: third argument is a gawk extension"
+msgstr "cocok: argumen ketiga adalah sebuah ekstensi gawk"
+
+#: awkgram.y:3943 awkgram.y:3946
msgid "close: second argument is a gawk extension"
msgstr "tutup: argumen kedua adalah sebuah ekstensi gawk"
-#: awkgram.y:3774
+#: awkgram.y:3958
msgid "use of dcgettext(_\"...\") is incorrect: remove leading underscore"
-msgstr ""
-"penggunaan dari dcgettext(_\"...\") adalah tidak benar: hapus garis bawah "
-"yang mengawali"
+msgstr "penggunaan dari dcgettext(_\"...\") adalah tidak benar: hapus garis bawah yang mengawali"
-#: awkgram.y:3789
+#: awkgram.y:3973
msgid "use of dcngettext(_\"...\") is incorrect: remove leading underscore"
-msgstr ""
-"penggunaan dari dcngettext(_\"...\") adalah tidak benar: hapus garis bawah "
-"yang mengawali"
+msgstr "penggunaan dari dcngettext(_\"...\") adalah tidak benar: hapus garis bawah yang mengawali"
-#: awkgram.y:3881
-#, c-format
-msgid "function `%s': parameter #%d, `%s', duplicates parameter #%d"
-msgstr "fungsi `%s': parameter #%d, `%s', duplikasi paramter #%d"
+#: awkgram.y:3992
+msgid "index: regexp constant as second argument is not allowed"
+msgstr "index: diterima argumen kedua bukan string"
-#: awkgram.y:3923
+#: awkgram.y:4045
#, c-format
msgid "function `%s': parameter `%s' shadows global variable"
msgstr "fungsi `%s': parameter `%s' bayangan variabel global"
-#: awkgram.y:4081
+#: awkgram.y:4102 debug.c:4041 debug.c:4084 debug.c:5204
#, c-format
msgid "could not open `%s' for writing (%s)"
msgstr "tidak dapat membuka `%s' untuk menulis (%s)"
-#: awkgram.y:4082 profile.c:85
-msgid "sending profile to standard error"
+#: awkgram.y:4103
+msgid "sending variable list to standard error"
msgstr "mengirim profile ke standar error"
-#: awkgram.y:4088
+#: awkgram.y:4111
#, c-format
msgid "%s: close failed (%s)"
msgstr "%s: tutup gagal (%s)"
-#: awkgram.y:4140
+#: awkgram.y:4136
msgid "shadow_funcs() called twice!"
msgstr "shadow_funcs() dipanggil dua kali!"
-#: awkgram.y:4146
+#: awkgram.y:4144
msgid "there were shadowed variables."
msgstr "disana tidak ada variabel bayangan."
-#: awkgram.y:4176
+#: awkgram.y:4215
+#, c-format
+msgid "function name `%s' previously defined"
+msgstr "nama fungsi `%s' sebelumnya telah didefinisikan"
+
+#: awkgram.y:4261
#, c-format
msgid "function `%s': can't use function name as parameter name"
-msgstr ""
-"fungsi `%s': tidak dapat menggunakan nama fungsi sebagai nama parameter"
+msgstr "fungsi `%s': tidak dapat menggunakan nama fungsi sebagai nama parameter"
-#: awkgram.y:4180
+#: awkgram.y:4264
#, c-format
msgid "function `%s': can't use special variable `%s' as a function parameter"
-msgstr ""
-"fungsi `%s': tidak dapat menggunakan variabel `%s' sebagai fungsi parameter"
+msgstr "fungsi `%s': tidak dapat menggunakan variabel `%s' sebagai fungsi parameter"
-#: awkgram.y:4196
+#: awkgram.y:4272
#, c-format
-msgid "function name `%s' previously defined"
-msgstr "nama fungsi `%s' sebelumnya telah didefinisikan"
+msgid "function `%s': parameter #%d, `%s', duplicates parameter #%d"
+msgstr "fungsi `%s': parameter #%d, `%s', duplikasi paramter #%d"
-#: awkgram.y:4364 awkgram.y:4370
+#: awkgram.y:4359 awkgram.y:4365
#, c-format
msgid "function `%s' called but never defined"
msgstr "fungsi `%s' dipanggil tetapi tidak pernah didefinisikan"
-#: awkgram.y:4373
-#, fuzzy, c-format
+#: awkgram.y:4369
+#, c-format
msgid "function `%s' defined but never called directly"
msgstr "fungsi `%s' didefinisikan tetapi tidak pernah dipanggil"
-#: awkgram.y:4405
+#: awkgram.y:4401
#, c-format
msgid "regexp constant for parameter #%d yields boolean value"
msgstr "konstanta regexp untuk parameter #%d menghasilkan nilai boolean"
-#: awkgram.y:4514
+#: awkgram.y:4460
#, c-format
msgid ""
"function `%s' called with space between name and `(',\n"
@@ -512,363 +491,1306 @@ msgstr ""
"fungsi `%s' dipanggil dengan spasi diantara nama dan `(',\n"
"atau gunakan sebagai sebuah variabel atau sebuah array"
-#: awkgram.y:4761 eval.c:2206
+#: awkgram.y:4696
msgid "division by zero attempted"
msgstr "pembagian dengan nol telah dicoba"
-#: awkgram.y:4770 eval.c:2222
+#: awkgram.y:4705
#, c-format
msgid "division by zero attempted in `%%'"
msgstr "pembagian dengan nol dicoba dalam `%%'"
-#: builtin.c:117
+#: awkgram.y:5025
+msgid "cannot assign a value to the result of a field post-increment expression"
+msgstr "cannot assign a value to the result of a field post-increment expression"
+
+#: awkgram.y:5028
+#, c-format
+msgid "invalid target of assignment (opcode %s)"
+msgstr "tidak valid sebagai jumlah dari argumen untuk %s"
+
+#: builtin.c:133
#, c-format
msgid "%s to \"%s\" failed (%s)"
msgstr "%s ke \"%s\" gagal (%s)"
-#: builtin.c:118
+#: builtin.c:134
msgid "standard output"
msgstr "standar keluaran"
-#: builtin.c:132
+#: builtin.c:148
msgid "exp: received non-numeric argument"
msgstr "exp: diterima argumen bukan-numerik"
-#: builtin.c:138
+#: builtin.c:154
#, c-format
msgid "exp: argument %g is out of range"
msgstr "exp: argumen %g diluar dari jangkauan"
-#: builtin.c:197
+#: builtin.c:229
#, c-format
msgid "fflush: cannot flush: pipe `%s' opened for reading, not writing"
-msgstr ""
-"fflush: tidak dapat flush: pipe `%s' dibuka untuk dibaca, bukan ditulis"
+msgstr "fflush: tidak dapat flush: pipe `%s' dibuka untuk dibaca, bukan ditulis"
-#: builtin.c:200
+#: builtin.c:232
#, c-format
msgid "fflush: cannot flush: file `%s' opened for reading, not writing"
-msgstr ""
-"fflush: tidak dapat flush: berkas `%s' dibuka untuk dibaca, bukan ditulis"
+msgstr "fflush: tidak dapat flush: berkas `%s' dibuka untuk dibaca, bukan ditulis"
-#: builtin.c:212
+#: builtin.c:244
#, c-format
msgid "fflush: `%s' is not an open file, pipe or co-process"
msgstr "fflush: `%s' bukan sebuah berkas terbuka, pipe atau co-proses"
-#: builtin.c:330
+#: builtin.c:362
msgid "index: received non-string first argument"
msgstr "indeks: diterima argumen pertama bukan string"
-#: builtin.c:332
+#: builtin.c:364
msgid "index: received non-string second argument"
msgstr "indeks: diterima argumen kedua bukan string"
-#: builtin.c:454
+#: builtin.c:488 mpfr.c:757
msgid "int: received non-numeric argument"
msgstr "int: diterima argumen bukan numerik"
-#: builtin.c:490
-#, fuzzy
+#: builtin.c:525
msgid "length: received array argument"
msgstr "length: diterima argumen bukan-string"
-#: builtin.c:493
-#, fuzzy
+#: builtin.c:528
msgid "`length(array)' is a gawk extension"
msgstr "`length(array)' adalah sebuah ekstensi gawk"
-#: builtin.c:501
+#: builtin.c:544
msgid "length: received non-string argument"
msgstr "length: diterima argumen bukan-string"
-#: builtin.c:532
+#: builtin.c:575
msgid "log: received non-numeric argument"
msgstr "log: diterima argumen bukan numerik"
-#: builtin.c:535
+#: builtin.c:578
#, c-format
msgid "log: received negative argument %g"
msgstr "log: diterima argumen negatif %g"
-#: builtin.c:593 builtin.c:604
+#: builtin.c:776 builtin.c:781
+msgid "fatal: must use `count$' on all formats or none"
+msgstr "harus menggunakan `count$' di semua format atau tidak sama sekali"
+
+#: builtin.c:851
+#, c-format
+msgid "field width is ignored for `%%' specifier"
+msgstr "lebar daerah diabaikan untuk penspesifikasi `%%'"
+
+#: builtin.c:853
+#, c-format
+msgid "precision is ignored for `%%' specifier"
+msgstr "ketepatan diabaikan untuk penspesifikasi `%%'"
+
+#: builtin.c:855
+#, c-format
+msgid "field width and precision are ignored for `%%' specifier"
+msgstr "lebar daerah dan presisi diabaikan untuk penspesifikasi `%%'"
+
+#: builtin.c:906
+msgid "fatal: `$' is not permitted in awk formats"
+msgstr "`$' tidak diijinkan dalam format awk"
+
+#: builtin.c:915
+msgid "fatal: arg count with `$' must be > 0"
+msgstr "arg count dengan `$' harus > 0"
+
+#: builtin.c:919
+#, c-format
+msgid "fatal: arg count %ld greater than total number of supplied arguments"
+msgstr "arg count %ld lebih besar dari jumlah total dari argumen yang diberikan"
+
+#: builtin.c:923
+msgid "fatal: `$' not permitted after period in format"
+msgstr "`$' tidak diijinkan setelah periode dalam format"
+
+#: builtin.c:939
+msgid "fatal: no `$' supplied for positional field width or precision"
+msgstr "tidak ada `$' yang diberikan untuk posisional field width atau presisi"
+
+#: builtin.c:1011
+msgid "`l' is meaningless in awk formats; ignored"
+msgstr "`l' tidak berarti dalam format awk; diabaikan"
+
+#: builtin.c:1015
+msgid "fatal: `l' is not permitted in POSIX awk formats"
+msgstr "`l' tidak diijinkan dalam format POSIX awk"
+
+#: builtin.c:1028
+msgid "`L' is meaningless in awk formats; ignored"
+msgstr "`L' tidak berarti dalam format awk; diabaikan"
+
+#: builtin.c:1032
+msgid "fatal: `L' is not permitted in POSIX awk formats"
+msgstr "`L' tidak diijinkan dalam format awk POSIX"
+
+#: builtin.c:1045
+msgid "`h' is meaningless in awk formats; ignored"
+msgstr "`h' tidak berarti dalam format awk; diabaikan"
+
+#: builtin.c:1049
+msgid "fatal: `h' is not permitted in POSIX awk formats"
+msgstr "`h' tidak diijinkan dalam format awk POSIX"
+
+#: builtin.c:1447
+#, c-format
+msgid "[s]printf: value %g is out of range for `%%%c' format"
+msgstr "[s]printf: nilai %g diluar dari jangkauan untuk format `%%%c'"
+
+#: builtin.c:1545
+#, c-format
+msgid "ignoring unknown format specifier character `%c': no argument converted"
+msgstr "mengabaikan format tidak dikenal karakter penspesifikasi `%c': tidak ada argumen yang diubah"
+
+#: builtin.c:1550
+msgid "fatal: not enough arguments to satisfy format string"
+msgstr "tidak cukup argumen untuk memuaskan format string"
+
+#: builtin.c:1552
+msgid "^ ran out for this one"
+msgstr "^ kehabisan untuk yang ini"
+
+#: builtin.c:1559
+msgid "[s]printf: format specifier does not have control letter"
+msgstr "[s]printf: penspesifikasi format tidak memiliki pengontrol huruf"
+
+#: builtin.c:1562
+msgid "too many arguments supplied for format string"
+msgstr "terlalu banyak argumen diberikan untuk format string"
+
+#: builtin.c:1618
+msgid "sprintf: no arguments"
+msgstr "sprintf: tidak ada argumen"
+
+#: builtin.c:1641 builtin.c:1652
msgid "printf: no arguments"
msgstr "printf: tidak ada argumen"
-#: builtin.c:645
+#: builtin.c:1695
msgid "sqrt: received non-numeric argument"
msgstr "sqrt: diterima argumen bukan numerik"
-#: builtin.c:649
+#: builtin.c:1699
#, c-format
msgid "sqrt: called with negative argument %g"
msgstr "sqrt: dipanggil dengan argumen %g negatif"
-#: builtin.c:673
+#: builtin.c:1730
#, c-format
msgid "substr: length %g is not >= 1"
msgstr "substr: panjang %g tidak >= 1"
-#: builtin.c:675
+#: builtin.c:1732
#, c-format
msgid "substr: length %g is not >= 0"
msgstr "substr: panjang %g tidak >= 0"
-#: builtin.c:682
+#: builtin.c:1739
#, c-format
msgid "substr: non-integer length %g will be truncated"
msgstr "substr: panjang bukan integer %g akan dipotong"
-#: builtin.c:687
+#: builtin.c:1744
#, c-format
msgid "substr: length %g too big for string indexing, truncating to %g"
-msgstr ""
-"substr: panjang %g terlalu besar untuk pengindeksan string, dipotong ke %g"
+msgstr "substr: panjang %g terlalu besar untuk pengindeksan string, dipotong ke %g"
-#: builtin.c:699
+#: builtin.c:1756
#, c-format
msgid "substr: start index %g is invalid, using 1"
msgstr "substr: awal indeks %g tidak valid, menggunakan 1"
-#: builtin.c:704
+#: builtin.c:1761
#, c-format
msgid "substr: non-integer start index %g will be truncated"
msgstr "substr: awal indeks %g bukan integer akan dipotong"
-#: builtin.c:729
+#: builtin.c:1786
msgid "substr: source string is zero length"
msgstr "substr: sumber string memiliki panjang nol"
-#: builtin.c:745
+#: builtin.c:1802
#, c-format
msgid "substr: start index %g is past end of string"
msgstr "substr: awal indeks %g melewati akhir dari string"
-#: builtin.c:753
+#: builtin.c:1810
#, c-format
-msgid ""
-"substr: length %g at start index %g exceeds length of first argument (%lu)"
-msgstr ""
-"substr: panjang %g di awal indeks %g melewati panjang dari argumen pertama "
-"(%lu)"
+msgid "substr: length %g at start index %g exceeds length of first argument (%lu)"
+msgstr "substr: panjang %g di awal indeks %g melewati panjang dari argumen pertama (%lu)"
-#: builtin.c:826
+#: builtin.c:1884
msgid "strftime: format value in PROCINFO[\"strftime\"] has numeric type"
-msgstr ""
+msgstr "strftime: format value in PROCINFO[\"strftime\"] has numeric type"
-#: builtin.c:840
+#: builtin.c:1907
msgid "strftime: received non-numeric second argument"
msgstr "strftime: diterima argumen kedua bukan numerik"
-#: builtin.c:847
+#: builtin.c:1911
+msgid "strftime: second argument less than 0 or too big for time_t"
+msgstr "strftime: second argument less than 0 or too big for time_t"
+
+#: builtin.c:1918
msgid "strftime: received non-string first argument"
msgstr "strftime: diterima argumen pertama bukan string"
-#: builtin.c:853
+#: builtin.c:1925
msgid "strftime: received empty format string"
msgstr "strftime: diterima format string kosong"
-#: builtin.c:919
+#: builtin.c:1991
msgid "mktime: received non-string argument"
msgstr "mktime: diterima argumen bukan string"
-#: builtin.c:936
+#: builtin.c:2008
msgid "mktime: at least one of the values is out of the default range"
-msgstr ""
+msgstr "mktime: at least one of the values is out of the default range"
-#: builtin.c:971
+#: builtin.c:2043
msgid "'system' function not allowed in sandbox mode"
-msgstr ""
+msgstr "'system' function not allowed in sandbox mode"
-#: builtin.c:976
+#: builtin.c:2048
msgid "system: received non-string argument"
msgstr "system: diterima argumen bukan string"
-#: builtin.c:1031 eval.c:1411 eval.c:1936 eval.c:1949
-#, c-format
-msgid "reference to uninitialized variable `%s'"
-msgstr "referensi ke variabel `%s' tidak terinisialisasi"
-
-#: builtin.c:1098
+#: builtin.c:2168
#, c-format
msgid "reference to uninitialized field `$%d'"
msgstr "referensi ke field tidak terinisialisasi `$%d'"
-#: builtin.c:1185
+#: builtin.c:2255
msgid "tolower: received non-string argument"
msgstr "tolower: diterima argumen bukan string"
-#: builtin.c:1219
+#: builtin.c:2289
msgid "toupper: received non-string argument"
msgstr "toupper: diterima argumen bukan string"
-#: builtin.c:1255
+#: builtin.c:2325 mpfr.c:672
msgid "atan2: received non-numeric first argument"
msgstr "atan2: diterima argumen pertama bukan numerik"
-#: builtin.c:1257
+#: builtin.c:2327 mpfr.c:674
msgid "atan2: received non-numeric second argument"
msgstr "atan2: diterima argumen kedua bukan numerik"
-#: builtin.c:1276
+#: builtin.c:2346
msgid "sin: received non-numeric argument"
msgstr "sin: diterima argumen bukan numerik"
-#: builtin.c:1292
+#: builtin.c:2362
msgid "cos: received non-numeric argument"
msgstr "cos: diterima argumen bukan numerik"
-#: builtin.c:1345
+#: builtin.c:2415 mpfr.c:1156
msgid "srand: received non-numeric argument"
msgstr "srand: diterima argumen bukan numerik"
-#: builtin.c:1376
+#: builtin.c:2446
msgid "match: third argument is not an array"
msgstr "match: argumen ketiga bukan sebuah array"
-#: builtin.c:1883
+#: builtin.c:2718
msgid "gensub: third argument of 0 treated as 1"
msgstr "gensub: argumen ketiga dari 0 diperlakukan sebagai 1"
-#: builtin.c:1923
+#: builtin.c:3014
msgid "lshift: received non-numeric first argument"
msgstr "lshift: diterima argumen pertama bukan numerik"
-#: builtin.c:1925
+#: builtin.c:3016
msgid "lshift: received non-numeric second argument"
msgstr "lshift: diterima argumen kedua bukan numerik"
-#: builtin.c:1931
+#: builtin.c:3022
#, c-format
-msgid "lshift(%lf, %lf): negative values will give strange results"
-msgstr "lshift(%lf, %lf): nilai negatif akan memberikan hasil aneh"
+msgid "lshift(%f, %f): negative values will give strange results"
+msgstr "lshift(%f, %f): nilai negatif akan memberikan hasil aneh"
-#: builtin.c:1933
+#: builtin.c:3024
#, c-format
-msgid "lshift(%lf, %lf): fractional values will be truncated"
-msgstr "lshift(%lf, %lf): nilai pecahan akan dipotong"
+msgid "lshift(%f, %f): fractional values will be truncated"
+msgstr "lshift(%f, %f): nilai pecahan akan dipotong"
-#: builtin.c:1935
+#: builtin.c:3026
#, c-format
-msgid "lshift(%lf, %lf): too large shift value will give strange results"
-msgstr "lshift(%lf, %lf): nilai shift terlalu besar akan memberikan hasil aneh"
+msgid "lshift(%f, %f): too large shift value will give strange results"
+msgstr "lshift(%f, %f): nilai shift terlalu besar akan memberikan hasil aneh"
-#: builtin.c:1960
+#: builtin.c:3051
msgid "rshift: received non-numeric first argument"
msgstr "rshift: diterima argumen pertama bukan numerik"
-#: builtin.c:1962
+#: builtin.c:3053
msgid "rshift: received non-numeric second argument"
msgstr "rshift: diterima argumen kedua bukan-numerik"
-#: builtin.c:1968
+#: builtin.c:3059
#, c-format
-msgid "rshift(%lf, %lf): negative values will give strange results"
-msgstr "rshift(%lf. %lf): nilai negatif akan memberikan hasil aneh"
+msgid "rshift(%f, %f): negative values will give strange results"
+msgstr "rshift(%f. %f): nilai negatif akan memberikan hasil aneh"
-#: builtin.c:1970
+#: builtin.c:3061
#, c-format
-msgid "rshift(%lf, %lf): fractional values will be truncated"
-msgstr "rshift(%lf, %lf): nilai pecahan akan dipotong"
+msgid "rshift(%f, %f): fractional values will be truncated"
+msgstr "rshift(%f, %f): nilai pecahan akan dipotong"
-#: builtin.c:1972
+#: builtin.c:3063
#, c-format
-msgid "rshift(%lf, %lf): too large shift value will give strange results"
-msgstr "rshift(%lf, %lf): nilai shift terlalu besar akan memberikan hasil aneh"
-
-#: builtin.c:1997
-msgid "and: received non-numeric first argument"
-msgstr "and: diterima argumen pertama tidak numerik"
+msgid "rshift(%f, %f): too large shift value will give strange results"
+msgstr "rshift(%f, %f): nilai shift terlalu besar akan memberikan hasil aneh"
-#: builtin.c:1999
-msgid "and: received non-numeric second argument"
-msgstr "and: diterima argumen kedua bukan numerik"
+#: builtin.c:3088 mpfr.c:968
+msgid "and: called with less than two arguments"
+msgstr "and: dipanggil dengan argumen negatif"
-#: builtin.c:2005
+#: builtin.c:3093
#, c-format
-msgid "and(%lf, %lf): negative values will give strange results"
-msgstr "and(%lf, %lf): nilai negatif akan memberikan hasil aneh"
+msgid "and: argument %d is non-numeric"
+msgstr "and: argumen %d diluar dari jangkauan"
-#: builtin.c:2007
+#: builtin.c:3097
#, c-format
-msgid "and(%lf, %lf): fractional values will be truncated"
-msgstr "and(%lf, %lf): nilai pecahan akan dipotong"
+msgid "and: argument %d negative value %g will give strange results"
+msgstr "and: nilai %d negatif akan memberikan %g hasil aneh"
-#: builtin.c:2032
-msgid "or: received non-numeric first argument"
-msgstr "or: diterima argumen pertama bukan numerik"
+#: builtin.c:3120 mpfr.c:1000
+msgid "or: called with less than two arguments"
+msgstr "or: dipanggil dengan argumen negatif"
-#: builtin.c:2034
-msgid "or: received non-numeric second argument"
-msgstr "or: diterima argumen kedua bukan numerik"
-
-#: builtin.c:2040
+#: builtin.c:3125
#, c-format
-msgid "or(%lf, %lf): negative values will give strange results"
-msgstr "or(%lf, %lf): nilai negatif akan memberikan hasil aneh"
+msgid "or: argument %d is non-numeric"
+msgstr "or: argumen %d diluar dari jangkauan"
-#: builtin.c:2042
+#: builtin.c:3129
#, c-format
-msgid "or(%lf, %lf): fractional values will be truncated"
-msgstr "or(%lf, %lf): nilai pecahan akan dipotong"
-
-#: builtin.c:2070
-msgid "xor: received non-numeric first argument"
-msgstr "xor: diterima argumen pertama bukan numerik"
+msgid "or: argument %d negative value %g will give strange results"
+msgstr "or: nilai %d negatif akan memberikan %g hasil aneh"
-#: builtin.c:2072
-msgid "xor: received non-numeric second argument"
-msgstr "xor: diterima argumen kedua bukan numerik"
+#: builtin.c:3151 mpfr.c:1031
+msgid "xor: called with less than two arguments"
+msgstr "xor: dipanggil dengan argumen negatif"
-#: builtin.c:2078
+#: builtin.c:3157
#, c-format
-msgid "xor(%lf, %lf): negative values will give strange results"
-msgstr "xor(%lf, %lf): nilai negatif akan memberikan hasil aneh"
+msgid "xor: argument %d is non-numeric"
+msgstr "xor: argumen %d diluar dari jangkauan"
-#: builtin.c:2080
+#: builtin.c:3161
#, c-format
-msgid "xor(%lf, %lf): fractional values will be truncated"
-msgstr "xor(%lf, %lf): nilai pecahan akan dipotong"
+msgid "xor: argument %d negative value %g will give strange results"
+msgstr "xor: nilai %d negatif akan memberikan %g hasil aneh"
-#: builtin.c:2104 builtin.c:2110
+#: builtin.c:3186 mpfr.c:787
msgid "compl: received non-numeric argument"
msgstr "compl: diterima argumen bukan numerik"
-#: builtin.c:2112
+#: builtin.c:3192
#, c-format
-msgid "compl(%lf): negative value will give strange results"
-msgstr "compl(%lf): nilai negatif akan memberikan hasil aneh"
+msgid "compl(%f): negative value will give strange results"
+msgstr "compl(%f): nilai negatif akan memberikan hasil aneh"
-#: builtin.c:2114
+#: builtin.c:3194
#, c-format
-msgid "compl(%lf): fractional value will be truncated"
-msgstr "compl(%lf): nilai pecahan akan dipotong"
+msgid "compl(%f): fractional value will be truncated"
+msgstr "compl(%f): nilai pecahan akan dipotong"
-#: builtin.c:2283
+#: builtin.c:3363
#, c-format
msgid "dcgettext: `%s' is not a valid locale category"
msgstr "dcgettext: `%s' bukan sebuah kategori lokal yang valid"
-#: eval.c:410
+#: command.y:225
+#, c-format
+msgid "Type (g)awk statement(s). End with the command \"end\"\n"
+msgstr "Type (g)awk statement(s). End with the command \"end\"\n"
+
+#: command.y:289
+#, c-format
+msgid "invalid frame number: %d"
+msgstr "Akhir jangkauan tidak valid: %d"
+
+#: command.y:295
+#, c-format
+msgid "info: invalid option - \"%s\""
+msgstr "info: pilihan tidak valid - \"%s\""
+
+#: command.y:321
+#, c-format
+msgid "source \"%s\": already sourced."
+msgstr "source \"%s\": already sourced."
+
+#: command.y:326
+#, c-format
+msgid "save \"%s\": command not permitted."
+msgstr "save \"%s\": command not permitted."
+
+#: command.y:339
+msgid "Can't use command `commands' for breakpoint/watchpoint commands"
+msgstr "Can't use command `commands' for breakpoint/watchpoint commands"
+
+#: command.y:341
+msgid "no breakpoint/watchpoint has been set yet"
+msgstr "no breakpoint/watchpoint has been set yet"
+
+#: command.y:343
+msgid "invalid breakpoint/watchpoint number"
+msgstr "invalid breakpoint/watchpoint number"
+
+#: command.y:348
+#, c-format
+msgid "Type commands for when %s %d is hit, one per line.\n"
+msgstr "Type commands for when %s %d is hit, one per line.\n"
+
+#: command.y:350
+#, c-format
+msgid "End with the command \"end\"\n"
+msgstr "End with the command \"end\"\n"
+
+#: command.y:357
+msgid "`end' valid only in command `commands' or `eval'"
+msgstr "`end' valid only in command `commands' or `eval'"
+
+#: command.y:367
+msgid "`silent' valid only in command `commands'"
+msgstr "`silent' valid only in command `commands'"
+
+#: command.y:373
+#, c-format
+msgid "trace: invalid option - \"%s\""
+msgstr "trace: pilihan tidak valid - \"%s\""
+
+#: command.y:387
+msgid "condition: invalid breakpoint/watchpoint number"
+msgstr "condition: invalid breakpoint/watchpoint number"
+
+#: command.y:449
+msgid "argument not a string"
+msgstr "tidak cukup argumen untuk memuaskan format string"
+
+#: command.y:459 command.y:464
+#, c-format
+msgid "option: invalid parameter - \"%s\""
+msgstr "option: invalid parameter - \"%s\""
+
+#: command.y:474
+#, c-format
+msgid "no such function - \"%s\""
+msgstr "no such function - \"%s\""
+
+#: command.y:531
+#, c-format
+msgid "enable: invalid option - \"%s\""
+msgstr "enable: pilihan tidak valid - \"%s\""
+
+#: command.y:597
+#, c-format
+msgid "invalid range specification: %d - %d"
+msgstr "Akhir jangkauan tidak valid: %d - %d"
+
+#: command.y:659
+msgid "non-numeric value for field number"
+msgstr "non-numeric value for field number"
+
+#: command.y:680 command.y:687
+msgid "non-numeric value found, numeric expected"
+msgstr "non-numeric value found, numeric expected"
+
+#: command.y:712 command.y:718
+msgid "non-zero integer value"
+msgstr "non-zero integer value"
+
+#: command.y:817
+msgid "backtrace [N] - print trace of all or N innermost (outermost if N < 0) frames."
+msgstr "backtrace [N] - print trace of all or N innermost (outermost if N < 0) frames."
+
+#: command.y:819
+msgid "break [[filename:]N|function] - set breakpoint at the specified location."
+msgstr "break [[filename:]N|function] - set breakpoint at the specified location."
+
+#: command.y:821
+msgid "clear [[filename:]N|function] - delete breakpoints previously set."
+msgstr "clear [[filename:]N|function] - delete breakpoints previously set."
+
+#: command.y:823
+msgid "commands [num] - starts a list of commands to be executed at a breakpoint(watchpoint) hit."
+msgstr "commands [num] - starts a list of commands to be executed at a breakpoint(watchpoint) hit."
+
+#: command.y:825
+msgid "condition num [expr] - set or clear breakpoint or watchpoint condition."
+msgstr "condition num [expr] - set or clear breakpoint or watchpoint condition."
+
+#: command.y:827
+msgid "continue [COUNT] - continue program being debugged."
+msgstr "continue [COUNT] - continue program being debugged."
+
+#: command.y:829
+msgid "delete [breakpoints] [range] - delete specified breakpoints."
+msgstr "delete [breakpoints] [range] - delete specified breakpoints."
+
+#: command.y:831
+msgid "disable [breakpoints] [range] - disable specified breakpoints."
+msgstr "disable [breakpoints] [range] - disable specified breakpoints."
+
+#: command.y:833
+msgid "display [var] - print value of variable each time the program stops."
+msgstr "display [var] - print value of variable each time the program stops."
+
+#: command.y:835
+msgid "down [N] - move N frames down the stack."
+msgstr "down [N] - move N frames down the stack."
+
+#: command.y:837
+msgid "dump [filename] - dump instructions to file or stdout."
+msgstr "dump [filename] - dump instructions to file or stdout."
+
+#: command.y:839
+msgid "enable [once|del] [breakpoints] [range] - enable specified breakpoints."
+msgstr "enable [once|del] [breakpoints] [range] - enable specified breakpoints."
+
+#: command.y:841
+msgid "end - end a list of commands or awk statements."
+msgstr "end - end a list of commands or awk statements."
+
+#: command.y:843
+msgid "eval stmt|[p1, p2, ...] - evaluate awk statement(s)."
+msgstr "eval stmt|[p1, p2, ...] - evaluate awk statement(s)."
+
+#: command.y:845
+msgid "finish - execute until selected stack frame returns."
+msgstr "finish - execute until selected stack frame returns."
+
+#: command.y:847
+msgid "frame [N] - select and print stack frame number N."
+msgstr "frame [N] - select and print stack frame number N."
+
+#: command.y:849
+msgid "help [command] - print list of commands or explanation of command."
+msgstr "help [command] - print list of commands or explanation of command."
+
+#: command.y:851
+msgid "ignore N COUNT - set ignore-count of breakpoint number N to COUNT."
+msgstr "ignore N COUNT - set ignore-count of breakpoint number N to COUNT."
+
+#: command.y:853
+msgid "info topic - source|sources|variables|functions|break|frame|args|locals|display|watch."
+msgstr "info topic - source|sources|variables|functions|break|frame|args|locals|display|watch."
+
+#: command.y:855
+msgid "list [-|+|[filename:]lineno|function|range] - list specified line(s)."
+msgstr "list [-|+|[filename:]lineno|function|range] - list specified line(s)."
+
+#: command.y:857
+msgid "next [COUNT] - step program, proceeding through subroutine calls."
+msgstr "next [COUNT] - step program, proceeding through subroutine calls."
+
+#: command.y:859
+msgid "nexti [COUNT] - step one instruction, but proceed through subroutine calls."
+msgstr "nexti [COUNT] - stepp one instruction, but proceed through subroutine calls."
+
+#: command.y:861
+msgid "option [name[=value]] - set or display debugger option(s)."
+msgstr "option [name[=value]] - set or display debugger option(s)."
+
+#: command.y:863
+msgid "print var [var] - print value of a variable or array."
+msgstr "print var [var] - print value of a variable or array."
+
+#: command.y:865
+msgid "printf format, [arg], ... - formatted output."
+msgstr "printf format, [arg], ... - formatted output."
+
+#: command.y:867
+msgid "quit - exit debugger."
+msgstr "quit - exit debugger."
+
+#: command.y:869
+msgid "return [value] - make selected stack frame return to its caller."
+msgstr "return [value] - make selected stack frame return to its caller."
+
+#: command.y:871
+msgid "run - start or restart executing program."
+msgstr "run - start or restart executing program."
+
+#: command.y:874
+msgid "save filename - save commands from the session to file."
+msgstr "save filename - save commands from the session to file."
+
+#: command.y:877
+msgid "set var = value - assign value to a scalar variable."
+msgstr "set var = value - assign value to a scalar variable."
+
+#: command.y:879
+msgid "silent - suspends usual message when stopped at a breakpoint/watchpoint."
+msgstr "silent - suspends usual message when stopped at a breakpoint/watchpoint."
+
+#: command.y:881
+msgid "source file - execute commands from file."
+msgstr "source file - execute commads from file."
+
+#: command.y:883
+msgid "step [COUNT] - step program until it reaches a different source line."
+msgstr "step [COUNT] - step program until it reaches a different source line."
+
+#: command.y:885
+msgid "stepi [COUNT] - step one instruction exactly."
+msgstr "stepi [COUNT] - step one instruction exactly."
+
+#: command.y:887
+msgid "tbreak [[filename:]N|function] - set a temporary breakpoint."
+msgstr "tbreak [[filename:]N|function] - set a temporary breakpoint."
+
+#: command.y:889
+msgid "trace on|off - print instruction before executing."
+msgstr "trace on|off - print instruction before executing."
+
+#: command.y:891
+msgid "undisplay [N] - remove variable(s) from automatic display list."
+msgstr "undisplay [N] - remove variable(s) from automatic display list."
+
+#: command.y:893
+msgid "until [[filename:]N|function] - execute until program reaches a different line or line N within current frame."
+msgstr "until [[filename:]N|function] - execute until program reaches a different line or line N within current frame."
+
+#: command.y:895
+msgid "unwatch [N] - remove variable(s) from watch list."
+msgstr "unwatch [N] - remove variable(s) from watch list."
+
+#: command.y:897
+msgid "up [N] - move N frames up the stack."
+msgstr "up [N] - move N frames up the stack."
+
+#: command.y:899
+msgid "watch var - set a watchpoint for a variable."
+msgstr "watch var - set a watchpoint for a variable."
+
+#: command.y:1011 debug.c:401 msg.c:135
+#, c-format
+msgid "error: "
+msgstr "error: "
+
+#: command.y:1051
+#, c-format
+msgid "can't read command (%s)\n"
+msgstr "tidak dapat redirek dari (%s)\n"
+
+#: command.y:1065
+#, c-format
+msgid "can't read command (%s)"
+msgstr "tidak dapat redirek dari (%s)"
+
+#: command.y:1116
+msgid "invalid character in command"
+msgstr "nama kelas karakter tidak valid"
+
+#: command.y:1152
+#, c-format
+msgid "unknown command - \"%.*s\", try help"
+msgstr "unknown command - \"%.*s\", try help"
+
+#: command.y:1222
+#, c-format
+msgid "%s"
+msgstr "%s"
+
+#: command.y:1284
+msgid "invalid character"
+msgstr "Karakter kolasi tidak valid"
+
+#: command.y:1455
+#, c-format
+msgid "undefined command: %s\n"
+msgstr "undefined command: %s\n"
+
+#: debug.c:252
+msgid "set or show the number of lines to keep in history file."
+msgstr "set or show the number of lines to keep in history file."
+
+#: debug.c:254
+msgid "set or show the list command window size."
+msgstr "set or show the list command window size."
+
+#: debug.c:256
+msgid "set or show gawk output file."
+msgstr "set or show gawk output file."
+
+#: debug.c:258
+msgid "set or show debugger prompt."
+msgstr "set or show debugger prompt."
+
+#: debug.c:260
+msgid "(un)set or show saving of command history (value=on|off)."
+msgstr "(un)set or show saving of command history (value=on|off)."
+
+#: debug.c:262
+msgid "(un)set or show saving of options (value=on|off)."
+msgstr "(un)set or show saving of options (value=on|off)."
+
+#: debug.c:264
+msgid "(un)set or show instruction tracing (value=on|off)."
+msgstr "(un)set or show instruction tracing (value=on|off)."
+
+#: debug.c:345
+msgid "program not running."
+msgstr "program not running."
+
+#: debug.c:448 debug.c:606
+#, c-format
+msgid "can't read source file `%s' (%s)"
+msgstr "tidak dapat membaca berkas sumber `%s' (%s)"
+
+#: debug.c:453
+#, c-format
+msgid "source file `%s' is empty.\n"
+msgstr "berkas sumber `%s' kosong.\n"
+
+#: debug.c:480
+msgid "no current source file."
+msgstr "no current source file."
+
+#: debug.c:505
+#, c-format
+msgid "cannot find source file named `%s' (%s)"
+msgstr "tidak dapat membaca berkas sumber `%s' (%s)"
+
+#: debug.c:529
+#, c-format
+msgid "WARNING: source file `%s' modified since program compilation.\n"
+msgstr "WARNING: source file `%s' modified since program compilation.\n"
+
+#: debug.c:551
+#, c-format
+msgid "line number %d out of range; `%s' has %d lines"
+msgstr "line number %d out of range; `%s' has %d lines"
+
+#: debug.c:611
+#, c-format
+msgid "unexpected eof while reading file `%s', line %d"
+msgstr "tidak terduga baris baru atau akhir dari string `%s', %d"
+
+#: debug.c:620
+#, c-format
+msgid "source file `%s' modified since start of program execution"
+msgstr "source file `%s' modified since start of program execution"
+
+#: debug.c:732
+#, c-format
+msgid "Current source file: %s\n"
+msgstr "Current source file: %s\n"
+
+#: debug.c:733
+#, c-format
+msgid "Number of lines: %d\n"
+msgstr "Number of lines: %d\n"
+
+#: debug.c:740
+#, c-format
+msgid "Source file (lines): %s (%d)\n"
+msgstr "Source file (lines): %s (%d)\n"
+
+#: debug.c:754
+msgid ""
+"Number Disp Enabled Location\n"
+"\n"
+msgstr ""
+"Number Disp Enabled Location\n"
+"\n"
+
+#: debug.c:765
+#, c-format
+msgid "\tno of hits = %ld\n"
+msgstr "\tno of hits = %ld\n"
+
+#: debug.c:767
+#, c-format
+msgid "\tignore next %ld hit(s)\n"
+msgstr "\tignore next %ld hit(s)\n"
+
+#: debug.c:769 debug.c:909
+#, c-format
+msgid "\tstop condition: %s\n"
+msgstr "\tstop condition: %s\n"
+
+#: debug.c:771 debug.c:911
+msgid "\tcommands:\n"
+msgstr "\tcommands:\n"
+
+#: debug.c:793
+#, c-format
+msgid "Current frame: "
+msgstr "Current frame: "
+
+#: debug.c:796
+#, c-format
+msgid "Called by frame: "
+msgstr "Called by frame: "
+
+#: debug.c:800
+#, c-format
+msgid "Caller of frame: "
+msgstr "Caller of frame: "
+
+#: debug.c:818
+#, c-format
+msgid "None in main().\n"
+msgstr "None in main().\n"
+
+#: debug.c:848
+msgid "No arguments.\n"
+msgstr "tidak ada argumen\n"
+
+#: debug.c:849
+msgid "No locals.\n"
+msgstr "No locals.\n"
+
+#: debug.c:857
+msgid ""
+"All defined variables:\n"
+"\n"
+msgstr ""
+"All defined variables:\n"
+"\n"
+
+#: debug.c:867
+msgid ""
+"All defined functions:\n"
+"\n"
+msgstr ""
+"All defined functions:\n"
+"\n"
+
+#: debug.c:886
+msgid ""
+"Auto-display variables:\n"
+"\n"
+msgstr ""
+"Auto-display variables:\n"
+"\n"
+
+#: debug.c:889
+msgid ""
+"Watch variables:\n"
+"\n"
+msgstr ""
+"Watch variables:\n"
+"\n"
+
+#: debug.c:1029
+#, c-format
+msgid "no symbol `%s' in current context\n"
+msgstr "no symbol `%s' in current context\n"
+
+#: debug.c:1041 debug.c:1427
+#, c-format
+msgid "`%s' is not an array\n"
+msgstr "`%s' bukan sebuah nama variabel legal\n"
+
+#: debug.c:1055
+#, c-format
+msgid "$%ld = uninitialized field\n"
+msgstr "referensi ke field tidak terinisialisasi $%ld\n"
+
+#: debug.c:1076
+#, c-format
+msgid "array `%s' is empty\n"
+msgstr "berkas data `%s' kosong\n"
+
+#: debug.c:1119 debug.c:1171
+#, c-format
+msgid "[\"%s\"] not in array `%s'\n"
+msgstr "indeks [\"%s\"] tidak dalam array `%s'\n"
+
+#: debug.c:1175
+#, c-format
+msgid "`%s[\"%s\"]' is not an array\n"
+msgstr "`%s[\"%s\"]' is no an array\n"
+
+#: debug.c:1236 debug.c:4964
+#, c-format
+msgid "`%s' is not a scalar variable"
+msgstr "`%s' bukan sebuah nama variabel legal"
+
+#: debug.c:1258 debug.c:4994
+#, c-format
+msgid "attempt to use array `%s[\"%s\"]' in a scalar context"
+msgstr "mencoba menggunakan array `%s[\"%s\"]' dalam sebuah konteks skalar"
+
+#: debug.c:1280 debug.c:5005
+#, c-format
+msgid "attempt to use scalar `%s[\"%s\"]' as array"
+msgstr "mencoba untuk menggunakan skalar `%s[\"%s\"]' sebagai sebuah array"
+
+#: debug.c:1423
+#, c-format
+msgid "`%s' is a function"
+msgstr "`%s' digunakan dalam aksi"
+
+#: debug.c:1465
+#, c-format
+msgid "watchpoint %d is unconditional\n"
+msgstr "watchpoint %d is unconditional\n"
+
+#: debug.c:1499
+#, c-format
+msgid "No display item numbered %ld"
+msgstr "No display item numbered %ld"
+
+#: debug.c:1502
+#, c-format
+msgid "No watch item numbered %ld"
+msgstr "No watch item numbered %ld"
+
+#: debug.c:1528
+#, c-format
+msgid "%d: [\"%s\"] not in array `%s'\n"
+msgstr "%d: indeks [\"%s\"] tidak dalam array `%s'\n"
+
+#: debug.c:1767
+msgid "attempt to use scalar value as array"
+msgstr "mencoba untuk menggunakan skalar sebagai sebuah array"
+
+#: debug.c:1856
+#, c-format
+msgid "Watchpoint %d deleted because parameter is out of scope.\n"
+msgstr "Watchpoint %d deleted because parameter is out of scope.\n"
+
+#: debug.c:1867
+#, c-format
+msgid "Display %d deleted because parameter is out of scope.\n"
+msgstr "Display %d deleted because parameter is out of scope.\n"
+
+#: debug.c:1900
+#, c-format
+msgid " in file `%s', line %d\n"
+msgstr " in file `%s', line %d\n"
+
+#: debug.c:1921
+#, c-format
+msgid " at `%s':%d"
+msgstr " at `%s':%d"
+
+#: debug.c:1937 debug.c:2000
+#, c-format
+msgid "#%ld\tin "
+msgstr "#%ld\tin"
+
+#: debug.c:1974
+#, c-format
+msgid "More stack frames follow ...\n"
+msgstr "More stack frames follow ...\n"
+
+#: debug.c:2017
+msgid "invalid frame number"
+msgstr "Akhir jangkauan tidak valid"
+
+#: debug.c:2200
+#, c-format
+msgid "Note: breakpoint %d (enabled, ignore next %ld hits), also set at %s:%d"
+msgstr "Note: breakpoint %d (enabled, ignore next %ld hits), alse set at %s:%d"
+
+#: debug.c:2207
+#, c-format
+msgid "Note: breakpoint %d (enabled), also set at %s:%d"
+msgstr "Note: breakpoint %d (enabled), also set at %s:%d"
+
+#: debug.c:2214
+#, c-format
+msgid "Note: breakpoint %d (disabled, ignore next %ld hits), also set at %s:%d"
+msgstr "Note: breakpoint %d (disabled, ignore next %ld hits), also set at %s:%d"
+
+#: debug.c:2221
+#, c-format
+msgid "Note: breakpoint %d (disabled), also set at %s:%d"
+msgstr "Note: breakpoint %d (disabled), also set at %s:%d"
+
+#: debug.c:2238
+#, c-format
+msgid "Breakpoint %d set at file `%s', line %d\n"
+msgstr "Breakpoint %d set at file `%s', line %d\n"
+
+#: debug.c:2340
+#, c-format
+msgid "Can't set breakpoint in file `%s'\n"
+msgstr "Can't set breakpoint in file `%s'\n"
+
+#: debug.c:2369 debug.c:2492 debug.c:3350
+#, c-format
+msgid "line number %d in file `%s' out of range"
+msgstr "argumen %d diluar dari jangkauan `%s'"
+
+#: debug.c:2373
+#, c-format
+msgid "Can't find rule!!!\n"
+msgstr "Can't find rule!!!\n"
+
+#: debug.c:2375
+#, c-format
+msgid "Can't set breakpoint at `%s':%d\n"
+msgstr "Can't set breakpoint at `%s':%d\n"
+
+#: debug.c:2387
+#, c-format
+msgid "Can't set breakpoint in function `%s'\n"
+msgstr "Can't set breakpoint in function `%s'\n"
+
+#: debug.c:2403
+#, c-format
+msgid "breakpoint %d set at file `%s', line %d is unconditional\n"
+msgstr "breakpoint %d set at file `%s', line %d is unconditional\n"
+
+#: debug.c:2508 debug.c:2530
+#, c-format
+msgid "Deleted breakpoint %d"
+msgstr "Deleted breakpoint %d"
+
+#: debug.c:2514
+#, c-format
+msgid "No breakpoint(s) at entry to function `%s'\n"
+msgstr "No breakpoint(s) at entry to function `%s'\n"
+
+#: debug.c:2541
+#, c-format
+msgid "No breakpoint at file `%s', line #%d\n"
+msgstr "error membaca berkas masukan `%s': %d\n"
+
+#: debug.c:2596 debug.c:2637 debug.c:2657 debug.c:2700
+msgid "invalid breakpoint number"
+msgstr "invalid breakpoint number"
+
+#: debug.c:2612
+msgid "Delete all breakpoints? (y or n) "
+msgstr "Delete all breakpoints? (y or n) "
+
+#: debug.c:2613 debug.c:2923 debug.c:2976
+msgid "y"
+msgstr "y"
+
+#: debug.c:2662
+#, c-format
+msgid "Will ignore next %ld crossing(s) of breakpoint %d.\n"
+msgstr "Will ignore next %ld crossing(s) of breakpoint %d.\n"
+
+#: debug.c:2666
+#, c-format
+msgid "Will stop next time breakpoint %d is reached.\n"
+msgstr "Will stop next time breakpoint %d is reached.\n"
+
+#: debug.c:2783
+#, c-format
+msgid "Can only debug programs provided with the `-f' option.\n"
+msgstr "Can only debug programs provided with the `-f' option.\n"
+
+#: debug.c:2908
+#, c-format
+msgid "Failed to restart debugger"
+msgstr "Failed to restart debugger"
+
+#: debug.c:2922
+msgid "Program already running. Restart from beginning (y/n)? "
+msgstr "Program already running. Restart from beginning (y/n)? "
+
+#: debug.c:2926
+#, c-format
+msgid "Program not restarted\n"
+msgstr "Program not restarted\n"
+
+#: debug.c:2936
+#, c-format
+msgid "error: cannot restart, operation not allowed\n"
+msgstr "error: cannot restart, operation not allowed\n"
+
+#: debug.c:2942
+#, c-format
+msgid "error (%s): cannot restart, ignoring rest of the commands\n"
+msgstr "error (%s): cannot restart, ignoring rest of the commands\n"
+
+#: debug.c:2950
+#, c-format
+msgid "Starting program: \n"
+msgstr "Starting program: \n"
+
+#: debug.c:2959
+#, c-format
+msgid "Program exited %s with exit value: %d\n"
+msgstr "Program exited %s with exit value: %d\n"
+
+#: debug.c:2975
+msgid "The program is running. Exit anyway (y/n)? "
+msgstr "The program is running. Exit anyway (y/n)? "
+
+#: debug.c:3010
+#, c-format
+msgid "Not stopped at any breakpoint; argument ignored.\n"
+msgstr "Not stopped at any breakpoint; argument ignored.\n"
+
+#: debug.c:3015
+#, c-format
+msgid "invalid breakpoint number %d."
+msgstr "invalid breakpoint number %d."
+
+#: debug.c:3020
+#, c-format
+msgid "Will ignore next %ld crossings of breakpoint %d.\n"
+msgstr "Will ignore next %ld crossings of breakpoint %d.\n"
+
+#: debug.c:3207
+#, c-format
+msgid "'finish' not meaningful in the outermost frame main()\n"
+msgstr "'finish' not meaningful in the outermost frame main()\n"
+
+#: debug.c:3212
+#, c-format
+msgid "Run till return from "
+msgstr "Run till return from"
+
+#: debug.c:3255
+#, c-format
+msgid "'return' not meaningful in the outermost frame main()\n"
+msgstr "'return' not meaningful in the outermost frame main()\n"
+
+#: debug.c:3369
+#, c-format
+msgid "Can't find specified location in function `%s'\n"
+msgstr "Can't find specified location in function `%s'\n"
+
+#: debug.c:3377
+#, c-format
+msgid "invalid source line %d in file `%s'"
+msgstr "invalid source line %d in file `%s'"
+
+#: debug.c:3392
+#, c-format
+msgid "Can't find specified location %d in file `%s'\n"
+msgstr "Can't find specified location %d in file `%s'\n"
+
+#: debug.c:3424
+#, c-format
+msgid "element not in array\n"
+msgstr "tidak dalam array\n"
+
+#: debug.c:3424
+#, c-format
+msgid "untyped variable\n"
+msgstr "untyped variable\n"
+
+#: debug.c:3466
+#, c-format
+msgid "Stopping in %s ...\n"
+msgstr "Stopping in %s ...\n"
+
+#: debug.c:3543
+#, c-format
+msgid "'finish' not meaningful with non-local jump '%s'\n"
+msgstr "'finish' not meaningful with non-local jump '%s'\n"
+
+#: debug.c:3550
+#, c-format
+msgid "'until' not meaningful with non-local jump '%s'\n"
+msgstr "'until' not meaningful with non-local jump '%s'\n"
+
+#: debug.c:4185
+msgid "\t------[Enter] to continue or q [Enter] to quit------"
+msgstr "\t------[Enter] to continue or q [Enter] to quit------"
+
+#: debug.c:4186
+msgid "q"
+msgstr "q"
+
+#: debug.c:5001
+#, c-format
+msgid "[\"%s\"] not in array `%s'"
+msgstr "[\"%s\"] tidak dalam array `%s'"
+
+#: debug.c:5207
+#, c-format
+msgid "sending output to stdout\n"
+msgstr "sending output to stdout\n"
+
+#: debug.c:5247
+msgid "invalid number"
+msgstr "invalid number"
+
+#: debug.c:5381
+#, c-format
+msgid "`%s' not allowed in current context; statement ignored"
+msgstr "`%s' not allowed in current context; statement ignored"
+
+#: debug.c:5389
+msgid "`return' not allowed in current context; statement ignored"
+msgstr "`return' not allowed in current context; statement ignored"
+
+#: debug.c:5590
+#, c-format
+msgid "No symbol `%s' in current context"
+msgstr "No symbol `%s' in current context"
+
+#: dfa.c:998 dfa.c:1001 dfa.c:1021 dfa.c:1031 dfa.c:1043 dfa.c:1094 dfa.c:1103
+#: dfa.c:1106 dfa.c:1111 dfa.c:1124 dfa.c:1191
+msgid "unbalanced ["
+msgstr "unbalanced ["
+
+#: dfa.c:1052
+msgid "invalid character class"
+msgstr "nama kelas karakter tidak valid"
+
+#: dfa.c:1228
+msgid "character class syntax is [[:space:]], not [:space:]"
+msgstr "character class syntax is [[:space:]], not [:space:]"
+
+#: dfa.c:1280
+msgid "unfinished \\ escape"
+msgstr "unfinished \\ escape"
+
+#: dfa.c:1427 regcomp.c:161
+msgid "Invalid content of \\{\\}"
+msgstr "Isi dari \\{\\} tidak valid"
+
+#: dfa.c:1430 regcomp.c:176
+msgid "Regular expression too big"
+msgstr "Ekspresi regular terlalu besar"
+
+#: dfa.c:1847
+msgid "unbalanced ("
+msgstr "unbalanced ("
+
+#: dfa.c:1973
+msgid "no syntax specified"
+msgstr "no syntax specified"
+
+#: dfa.c:1981
+msgid "unbalanced )"
+msgstr "unbalanced )"
+
+#: eval.c:394
#, c-format
msgid "unknown nodetype %d"
msgstr "tipe titik %d tidak diketahui"
-#: eval.c:421 eval.c:435
-#, fuzzy, c-format
+#: eval.c:405 eval.c:419
+#, c-format
msgid "unknown opcode %d"
msgstr "tipe titik %d tidak diketahui"
-#: eval.c:432
+#: eval.c:416
#, c-format
msgid "opcode %s not an operator or keyword"
-msgstr ""
+msgstr "opcode %s not an operator or keyword"
-#: eval.c:485
+#: eval.c:472
msgid "buffer overflow in genflags2str"
msgstr "buffer overflow dalam genflags2str"
-#: eval.c:696
+#: eval.c:675
#, c-format
msgid ""
"\n"
@@ -879,835 +1801,1208 @@ msgstr ""
"\t# Fungsi Call Stack:\n"
"\n"
-#: eval.c:723
+#: eval.c:704
msgid "`IGNORECASE' is a gawk extension"
msgstr "`IGNORECASE' adalah ekstensi gawk"
-#: eval.c:752
+#: eval.c:736
msgid "`BINMODE' is a gawk extension"
msgstr "`BINMODE' adalah ekstensi gawk"
-#: eval.c:810
+#: eval.c:794
#, c-format
msgid "BINMODE value `%s' is invalid, treated as 3"
msgstr "BINMODE nilai `%s' tidak valid, diperlakukan sebagai 3"
-#: eval.c:900
+#: eval.c:885
#, c-format
msgid "bad `%sFMT' specification `%s'"
msgstr "buruk `%sFMT' spesifikasi `%s'"
-#: eval.c:978
+#: eval.c:969
msgid "turning off `--lint' due to assignment to `LINT'"
-msgstr "menonaktifkan `--lint' karena assignmen ke `LINT'"
-
-#: eval.c:1247
-#, fuzzy
-msgid "sorted array traversal is a gawk extension"
-msgstr "`delete array' adalah sebuah ekstensi gawk"
-
-#: eval.c:1291
-msgid "`PROCINFO[\"sorted_in\"]' value is not recognized"
-msgstr ""
+msgstr "menonaktifkan `--lint' karena penempatan ke `LINT'"
-#: eval.c:1373 eval.c:1923
-#, c-format
-msgid "can't use function name `%s' as variable or array"
-msgstr ""
-"tidak dapat menggunakan nama fungsi `%s' sebagai sebuah variabel atau array"
-
-#: eval.c:1401
-msgid "assignment is not allowed to result of builtin function"
-msgstr "assignmen tidak diijinkan untuk menghasilkan fungsi bawaan"
-
-#: eval.c:1410 eval.c:1935 eval.c:1948
+#: eval.c:1147
#, c-format
msgid "reference to uninitialized argument `%s'"
msgstr "referensi ke argumen `%s' tidak terinisialisasi"
-#: eval.c:1429
+#: eval.c:1148
+#, c-format
+msgid "reference to uninitialized variable `%s'"
+msgstr "referensi ke variabel `%s' tidak terinisialisasi"
+
+#: eval.c:1166
msgid "attempt to field reference from non-numeric value"
msgstr "mencoba untuk mereferensi field dari nilai bukan numerik"
-#: eval.c:1431
-#, fuzzy
+#: eval.c:1168
msgid "attempt to field reference from null string"
msgstr "mencoba untuk mereferensi dari null string"
-#: eval.c:1437
-#, fuzzy, c-format
+#: eval.c:1176
+#, c-format
msgid "attempt to access field %ld"
-msgstr "mencoba untuk mengakses field %d"
+msgstr "mencoba untuk mengakses field %ld"
-#: eval.c:1446
-#, fuzzy, c-format
+#: eval.c:1185
+#, c-format
msgid "reference to uninitialized field `$%ld'"
-msgstr "referensi ke field tidak terinisialisasi `$%d'"
+msgstr "referensi ke field tidak terinisialisasi `$%ld'"
-#: eval.c:1508
+#: eval.c:1272
#, c-format
msgid "function `%s' called with more arguments than declared"
msgstr "fungsi `%s' dipanggil argumen lebih dari yang dideklarasikan"
-#: eval.c:1663
+#: eval.c:1473
#, c-format
msgid "unwind_stack: unexpected type `%s'"
-msgstr ""
+msgstr "unwind_stack: unexpected type `%s'"
-#: eval.c:1747
+#: eval.c:1569
msgid "division by zero attempted in `/='"
msgstr "pembagian dengan nol dicoba dalam `/='"
-#: eval.c:1754
+#: eval.c:1576
#, c-format
msgid "division by zero attempted in `%%='"
msgstr "pembagian dengan nol dicoba dalam `%%='"
-#: eval.c:2057
-msgid "assignment used in conditional context"
-msgstr "assignment digunakan dalam konteks kondisional"
+#: ext.c:89 ext.c:171
+msgid "extensions are not allowed in sandbox mode"
+msgstr "extensions are not allowed in sandbox mode"
+
+#: ext.c:92
+msgid "-l / @load are gawk extensions"
+msgstr "adalah sebuah ekstensi gawk"
-#: eval.c:2061
-msgid "statement has no effect"
-msgstr "pernyataan tidak memiliki efek"
+#: ext.c:95
+msgid "load_ext: received NULL lib_name"
+msgstr "load_ext: received NULL lib_name"
-#: eval.c:2473
+#: ext.c:98
#, c-format
-msgid "for loop: array `%s' changed size from %ld to %ld during loop execution"
-msgstr ""
-"for loop: array `%s' berubah ukuran dari %ld ke %ld selama eksekusi loop"
+msgid "load_ext: cannot open library `%s' (%s)\n"
+msgstr "load_ext: tidak dapat membuka `%s' (%s)\n"
-#: eval.c:2583
+#: ext.c:104
#, c-format
-msgid "function called indirectly through `%s' does not exist"
-msgstr ""
+msgid "load_ext: library `%s': does not define `plugin_is_GPL_compatible' (%s)\n"
+msgstr "load_ext: library `%s': does not define `plugin_is_GPL_compatible' (%s)\n"
-#: eval.c:2595
+#: ext.c:110
#, c-format
-msgid "function `%s' not defined"
-msgstr "fungsi `%s' tidak didefinisikan"
+msgid "load_ext: library `%s': cannot call function `%s' (%s)\n"
+msgstr "load_ext: perpustakaan `%s': tidak dapat memanggil fungsi `%s' (%s)\n"
-#: eval.c:2656
-#, fuzzy, c-format
-msgid "non-redirected `getline' invalid inside `%s' rule"
-msgstr "tidak terdireksi `getline' tidak terdefinisi didalam aksi END"
+#: ext.c:114
+#, c-format
+msgid "load_ext: library `%s' initialization routine `%s' failed\n"
+msgstr "load_ext: library `%s' initialization routine `%s' failed\n"
-#: eval.c:2717
-#, fuzzy, c-format
-msgid "`nextfile' cannot be called from a `%s' rule"
-msgstr "`nextfile' tidak dapat dipanggil dari sebuah aturan END"
+#: ext.c:174
+msgid "`extension' is a gawk extension"
+msgstr "`extension' adalah sebuah ekstensi gawk"
-#: eval.c:2767
-#, fuzzy, c-format
-msgid "`next' cannot be called from a `%s' rule"
-msgstr "`next' tidak dapat dipanggil dari sebuah aturan END"
+#: ext.c:177
+msgid "extension: received NULL lib_name"
+msgstr "extension: received NULL lib_name"
-#: eval.c:2834
+#: ext.c:180
#, c-format
-msgid "Sorry, don't know how to interpret `%s'"
-msgstr ""
+msgid "extension: cannot open library `%s' (%s)"
+msgstr "extension: tidak dapat membuka `%s' (%s)"
-#: ext.c:64
-msgid "extensions are not allowed in sandbox mode"
-msgstr ""
+#: ext.c:186
+#, c-format
+msgid "extension: library `%s': does not define `plugin_is_GPL_compatible' (%s)"
+msgstr "extension: perpustakaan `%s': tidak dapat memanggil fungsi (%s)"
-#: ext.c:70 ext.c:75
-msgid "`extension' is a gawk extension"
-msgstr "`extension' adalah sebuah ekstensi gawk"
+#: ext.c:190
+#, c-format
+msgid "extension: library `%s': cannot call function `%s' (%s)"
+msgstr "extension: perpustakaan `%s': tidak dapat memanggil fungsi `%s' (%s)"
-#: ext.c:85
-#, fuzzy, c-format
-msgid "fatal: extension: cannot open `%s' (%s)\n"
-msgstr "extension: tidak dapat membuka `%s' (%s)\n"
+#: ext.c:221
+msgid "make_builtin: missing function name"
+msgstr "make_builtin: hilang nama fungsi"
-#: ext.c:94
-#, fuzzy, c-format
-msgid ""
-"fatal: extension: library `%s': does not define "
-"`plugin_is_GPL_compatible' (%s)\n"
-msgstr "extension: perpustakaan `%s': tidak dapat memanggil fungsi `%s' (%s)\n"
+#: ext.c:236
+#, c-format
+msgid "make_builtin: can't redefine function `%s'"
+msgstr "make_builtin: tidak dapat meredefinisi fungsi `%s'"
-#: ext.c:103
-#, fuzzy, c-format
-msgid "fatal: extension: library `%s': cannot call function `%s' (%s)\n"
-msgstr "extension: perpustakaan `%s': tidak dapat memanggil fungsi `%s' (%s)\n"
+#: ext.c:240
+#, c-format
+msgid "make_builtin: function `%s' already defined"
+msgstr "make_builtin: fungsi `%s' telah didefinisikan"
+
+#: ext.c:244
+#, c-format
+msgid "make_builtin: function name `%s' previously defined"
+msgstr "make_builtin: nama fungsi `%s' telah didefinisikan sebelumnya"
-#: ext.c:137
+#: ext.c:246
+#, c-format
+msgid "make_builtin: can't use gawk built-in `%s' as function name"
+msgstr "make_builtin: tidak dapat menggunakan gawk bawaan `%s' sebagai nama fungsi"
+
+#: ext.c:249 ext.c:304
+#, c-format
+msgid "make_builtin: negative argument count for function `%s'"
+msgstr "make_builtin: negative argument count for function `%s'"
+
+#: ext.c:276
msgid "extension: missing function name"
msgstr "extension: hilang nama fungsi"
-#: ext.c:142
+#: ext.c:279 ext.c:283
#, c-format
msgid "extension: illegal character `%c' in function name `%s'"
msgstr "extension: karakter `%c' tidak legal dalam nama fungsi `%s'"
-#: ext.c:151
+#: ext.c:291
#, c-format
msgid "extension: can't redefine function `%s'"
msgstr "extension: tidak dapat meredefinisi fungsi `%s'"
-#: ext.c:155
+#: ext.c:295
#, c-format
msgid "extension: function `%s' already defined"
msgstr "extension: fungsi `%s' telah didefinisikan"
-#: ext.c:160
+#: ext.c:299
#, c-format
msgid "extension: function name `%s' previously defined"
msgstr "extension: nama fungsi `%s' telah didefinisikan sebelumnya"
-#: ext.c:162
+#: ext.c:301
#, c-format
msgid "extension: can't use gawk built-in `%s' as function name"
-msgstr ""
-"extension: tidak dapat menggunakan gawk bawaan `%s' sebagai nama fungsi"
+msgstr "extension: tidak dapat menggunakan gawk bawaan `%s' sebagai nama fungsi"
-#: ext.c:166
+#: ext.c:375
#, c-format
-msgid "make_builtin: negative argument count for function `%s'"
-msgstr ""
-
-#: ext.c:269
-#, fuzzy, c-format
msgid "function `%s' defined to take no more than %d argument(s)"
msgstr "fungsi `%s' didefinisikan untuk mengambil lebih dari %d argumen"
-#: ext.c:272
+#: ext.c:378
#, c-format
msgid "function `%s': missing argument #%d"
msgstr "fungsi `%s': hilang argumen #%d"
-#: ext.c:282
+#: ext.c:395
#, c-format
msgid "function `%s': argument #%d: attempt to use scalar as an array"
-msgstr ""
-"fungsi `%s': argumen #%d: mencoba menggunaka skalar sebagai sebuah array"
+msgstr "fungsi `%s': argumen #%d: mencoba menggunaka skalar sebagai sebuah array"
-#: ext.c:286
+#: ext.c:399
#, c-format
msgid "function `%s': argument #%d: attempt to use array as a scalar"
-msgstr ""
-"fungsi `%s': argumen #%d: mencoba untuk menggunakan array sebagai sebuah "
-"skalar"
+msgstr "fungsi `%s': argumen #%d: mencoba untuk menggunakan array sebagai sebuah skalar"
-#: ext.c:299
-msgid "Operation Not Supported"
-msgstr "Operasi Tidak Didukung"
+#: ext.c:413
+msgid "dynamic loading of library not supported"
+msgstr "dynamic loading of library not supported"
+
+#: extension/filefuncs.c:159
+msgid "chdir: called with incorrect number of arguments, expecting 1"
+msgstr "chdir: dipanggil dengan argumen %g negatif"
+
+#: extension/filefuncs.c:439
+#, c-format
+msgid "stat: unable to read symbolic link `%s'"
+msgstr "stat: unable to read symbolic link `%s'"
+
+#: extension/filefuncs.c:472
+msgid "stat: called with wrong number of arguments"
+msgstr "stat: dipanggil dengan argumen negatif"
+
+#: extension/filefuncs.c:479
+msgid "stat: bad parameters"
+msgstr "stat: adalah parameter"
+
+#: extension/filefuncs.c:533
+#, c-format
+msgid "fts init: could not create variable %s"
+msgstr "fts init: could not create variable %s"
+
+#: extension/filefuncs.c:554
+msgid "fts is not supported on this system"
+msgstr "tidak didukung dalam awk lama"
+
+#: extension/filefuncs.c:573
+msgid "fill_stat_element: could not create array"
+msgstr "fill_stat_element: could not create array"
+
+#: extension/filefuncs.c:582
+msgid "fill_stat_element: could not set element"
+msgstr "fill_stat_element: could not set element"
+
+#: extension/filefuncs.c:597
+msgid "fill_path_element: could not set element"
+msgstr "fill_path_element: could not set element"
+
+#: extension/filefuncs.c:613
+msgid "fill_error_element: could not set element"
+msgstr "fill_error_element: could not set element"
+
+#: extension/filefuncs.c:660 extension/filefuncs.c:707
+msgid "fts-process: could not create array"
+msgstr "fts-process: could not create array"
+
+#: extension/filefuncs.c:670 extension/filefuncs.c:717
+#: extension/filefuncs.c:735
+msgid "fts-process: could not set element"
+msgstr "fts-process: could not set element"
+
+#: extension/filefuncs.c:784
+msgid "fts: called with incorrect number of arguments, expecting 3"
+msgstr "fts: dipanggil dengan argumen negatif"
+
+#: extension/filefuncs.c:787
+msgid "fts: bad first parameter"
+msgstr "fts: adalah parameter"
+
+#: extension/filefuncs.c:793
+msgid "fts: bad second parameter"
+msgstr "fts: adalah parameter"
+
+#: extension/filefuncs.c:799
+msgid "fts: bad third parameter"
+msgstr "fts: adalah parameter"
+
+#: extension/filefuncs.c:806
+msgid "fts: could not flatten array\n"
+msgstr "fts: could not flatten array\n"
-#: field.c:328
+#: extension/filefuncs.c:824
+msgid "fts: ignoring sneaky FTS_NOSTAT flag. nyah, nyah, nyah."
+msgstr "fts: ignoring sneaky FTS_NOSTAT flag. nyah, nyah, nyah."
+
+#: extension/filefuncs.c:841
+msgid "fts: clear_array() failed\n"
+msgstr "fts: clear_array() failed\n"
+
+#: extension/fnmatch.c:112
+msgid "fnmatch: called with less than three arguments"
+msgstr "fnmatch: dipanggil dengan argumen negatif"
+
+#: extension/fnmatch.c:115
+msgid "fnmatch: called with more than three arguments"
+msgstr "fnmatch: dipanggil dengan argumen negatif"
+
+#: extension/fnmatch.c:118
+msgid "fnmatch: could not get first argument"
+msgstr "fnmatch: diterima argumen pertama bukan string"
+
+#: extension/fnmatch.c:123
+msgid "fnmatch: could not get second argument"
+msgstr "fnmatch: diterima argumen kedua bukan string"
+
+#: extension/fnmatch.c:128
+msgid "fnmatch: could not get third argument"
+msgstr "fnmatch: could not get third argument"
+
+#: extension/fnmatch.c:141
+msgid "fnmatch is not implemented on this system\n"
+msgstr "fnmatch is not implemented on this system\n"
+
+#: extension/fnmatch.c:173
+msgid "fnmatch init: could not add FNM_NOMATCH variable"
+msgstr "fnmatch init: could not add FNM_NOMATCH variable"
+
+#: extension/fnmatch.c:183
+#, c-format
+msgid "fnmatch init: could not set array element %s"
+msgstr "fnmatch init: could not set array element %s"
+
+#: extension/fnmatch.c:193
+msgid "fnmatch init: could not install FNM array"
+msgstr "fnmatch init: could not install FNM array"
+
+#: extension/fork.c:81
+msgid "fork: called with too many arguments"
+msgstr "fork: dipanggil dengan argumen negatif"
+
+#: extension/fork.c:94
+msgid "fork: PROCINFO is not an array!"
+msgstr "fork: PROCINFO is not an array!"
+
+#: extension/fork.c:118
+msgid "waitpid: called with too many arguments"
+msgstr "waitpid: dipanggil dengan argumen negatif"
+
+#: extension/fork.c:126
+msgid "wait: called with no arguments"
+msgstr "wait: dipanggil dengan argumen negatif"
+
+#: extension/fork.c:143
+msgid "wait: called with too many arguments"
+msgstr "wait: dipanggil dengan argumen negatif"
+
+#: extension/inplace.c:130
+msgid "inplace_begin: in-place editing already active"
+msgstr "inplace_begin: in-place editing already active"
+
+#: extension/inplace.c:133 extension/inplace.c:207
+#, c-format
+msgid "inplace_begin: expects 2 arguments but called with %d"
+msgstr "inplace_begin: expects 2 arguments but called with %d"
+
+#: extension/inplace.c:136
+msgid "inplace_begin: cannot retrieve 1st argument as a string filename"
+msgstr "inplace_begin: cannot retrieve 1st argument as a string filename"
+
+#: extension/inplace.c:144
+#, c-format
+msgid "inplace_begin: disabling in-place editing for invalid FILENAME `%s'"
+msgstr "inplace_begin: disabling in-place editing for invalid FILENAME `%s'"
+
+#: extension/inplace.c:151
+#, c-format
+msgid "inplace_begin: Cannot stat `%s' (%s)"
+msgstr "inplace_begin: tidak dapat membuka `%s' (%s)"
+
+#: extension/inplace.c:158
+#, c-format
+msgid "inplace_begin: `%s' is not a regular file"
+msgstr "inplace_begin: `%s' is not a regular file"
+
+#: extension/inplace.c:169
+#, c-format
+msgid "inplace_begin: mkstemp(`%s') failed (%s)"
+msgstr "inplace_begin: mkstemp(`%s') failed (%s)"
+
+#: extension/inplace.c:178
+#, c-format
+msgid "inplace_begin: chmod failed (%s)"
+msgstr "inplace_begin: tutup gagal (%s)"
+
+#: extension/inplace.c:185
+#, c-format
+msgid "inplace_begin: dup(stdout) failed (%s)"
+msgstr "inplace_begin: dup(stdout) failed (%s)"
+
+#: extension/inplace.c:188
+#, c-format
+msgid "inplace_begin: dup2(%d, stdout) failed (%s)"
+msgstr "inplace_begin: dup2(%d, stdout) failed (%s)"
+
+#: extension/inplace.c:191
+#, c-format
+msgid "inplace_begin: close(%d) failed (%s)"
+msgstr "inplace_begin: tutup(%d) gagal (%s)"
+
+#: extension/inplace.c:210
+msgid "inplace_end: cannot retrieve 1st argument as a string filename"
+msgstr "inplace_end: cannot retrieve 1st argument as a string filename"
+
+#: extension/inplace.c:217
+msgid "inplace_end: in-place editing not active"
+msgstr "inplace_end: in-place editing not active"
+
+#: extension/inplace.c:223
+#, c-format
+msgid "inplace_end: dup2(%d, stdout) failed (%s)"
+msgstr "inplace_end: dup2(%d, stdout) failed (%s)"
+
+#: extension/inplace.c:226
+#, c-format
+msgid "inplace_end: close(%d) failed (%s)"
+msgstr "inplace_end: tutup(%d) gagal (%s)"
+
+#: extension/inplace.c:230
+#, c-format
+msgid "inplace_end: fsetpos(stdout) failed (%s)"
+msgstr "inplace_end: fsetpos(stdout) failed (%s)"
+
+#: extension/inplace.c:243
+#, c-format
+msgid "inplace_end: link(`%s', `%s') failed (%s)"
+msgstr "inplace_end: pipe flush dari (`%s',`%s') gagal (%s)."
+
+#: extension/inplace.c:253
+#, c-format
+msgid "inplace_end: rename(`%s', `%s') failed (%s)"
+msgstr "inplace_end: penutupan dari fd (`%s',`%s') gagal (%s)"
+
+#: extension/ordchr.c:69
+msgid "ord: called with too many arguments"
+msgstr "ord: dipanggil dengan argumen negatif"
+
+#: extension/ordchr.c:75
+msgid "ord: called with no arguments"
+msgstr "ord: dipanggil dengan argumen negatif"
+
+#: extension/ordchr.c:77
+msgid "ord: called with inappropriate argument(s)"
+msgstr "ord: dipanggil dengan argumen negatif"
+
+#: extension/ordchr.c:99
+msgid "chr: called with too many arguments"
+msgstr "chr: dipanggil dengan argumen negatif"
+
+#: extension/ordchr.c:109
+msgid "chr: called with no arguments"
+msgstr "chr: dipanggil dengan argumen negatif"
+
+#: extension/ordchr.c:111
+msgid "chr: called with inappropriate argument(s)"
+msgstr "chr: dipanggil dengan argumen negatif"
+
+#: extension/readdir.c:277
+#, c-format
+msgid "dir_take_control_of: opendir/fdopendir failed: %s"
+msgstr "dir_take_control_of: opendir/fdopendir failed: %s"
+
+#: extension/readfile.c:84
+msgid "readfile: called with too many arguments"
+msgstr "readfile: dipanggil dengan argumen negatif"
+
+#: extension/readfile.c:118
+msgid "readfile: called with no arguments"
+msgstr "readfile: dipanggil dengan argumen negatif"
+
+#: extension/rwarray.c:124
+msgid "writea: called with too many arguments"
+msgstr "writea: dipanggil dengan argumen negatif"
+
+#: extension/rwarray.c:131
+#, c-format
+msgid "do_writea: argument 0 is not a string\n"
+msgstr "do_write: argumen diluar dari jangkauan\n"
+
+#: extension/rwarray.c:137
+#, c-format
+msgid "do_writea: argument 1 is not an array\n"
+msgstr "do_writea: argumen kedua bukan sebuah array\n"
+
+#: extension/rwarray.c:184
+#, c-format
+msgid "write_array: could not flatten array\n"
+msgstr "write_array: could not flatten array\n"
+
+#: extension/rwarray.c:198
+#, c-format
+msgid "write_array: could not release flattened array\n"
+msgstr "write_array: could not release flattened array\n"
+
+#: extension/rwarray.c:280
+msgid "reada: called with too many arguments"
+msgstr "reada: dipanggil dengan argumen negatif"
+
+#: extension/rwarray.c:287
+#, c-format
+msgid "do_reada: argument 0 is not a string\n"
+msgstr "do_reada: argumen diluar dari jangkauan\n"
+
+#: extension/rwarray.c:293
+#, c-format
+msgid "do_reada: argument 1 is not an array\n"
+msgstr "do_reada: argumen ketiga bukan sebuah array\n"
+
+#: extension/rwarray.c:337
+#, c-format
+msgid "do_reada: clear_array failed\n"
+msgstr "do_reada: clear_array failed\n"
+
+#: extension/rwarray.c:374
+#, c-format
+msgid "read_array: set_array_element failed\n"
+msgstr "read_array: set_array_element failed\n"
+
+#: extension/time.c:106
+msgid "gettimeofday: ignoring arguments"
+msgstr "gettimeofday: diterima argumen bukan string"
+
+#: extension/time.c:137
+msgid "gettimeofday: not supported on this platform"
+msgstr "gettimeofday: not supported on this platform"
+
+#: extension/time.c:158
+msgid "sleep: called with too many arguments"
+msgstr "sleep: dipanggil dengan argumen negatif"
+
+#: extension/time.c:161
+msgid "sleep: missing required numeric argument"
+msgstr "sleep: diterima argumen bukan-numerik"
+
+#: extension/time.c:167
+msgid "sleep: argument is negative"
+msgstr "sleep: argumen diluar dari jangkauan"
+
+#: extension/time.c:201
+msgid "sleep: not supported on this platform"
+msgstr "sleep: not supported on this platform"
+
+#: field.c:345
msgid "NF set to negative value"
msgstr "NF set ke nilai negatif"
-#: field.c:939 field.c:946 field.c:950
-#, fuzzy
+#: field.c:971 field.c:978 field.c:982
msgid "split: fourth argument is a gawk extension"
-msgstr "cocok: argumen ketiga adalah sebuah ekstensi gawk"
+msgstr "split: argumen ketiga adalah sebuah ekstensi gawk"
-#: field.c:943
-#, fuzzy
+#: field.c:975
msgid "split: fourth argument is not an array"
msgstr "split: argumen kedua bukan sebuah array"
-#: field.c:957
+#: field.c:989
msgid "split: second argument is not an array"
msgstr "split: argumen kedua bukan sebuah array"
-#: field.c:962
-msgid "split: can not use the same array for second and fourth args"
-msgstr ""
+#: field.c:993
+msgid "split: cannot use the same array for second and fourth args"
+msgstr "split: cannot use the same array for second and fourth args"
-#: field.c:990
+#: field.c:998
+msgid "split: cannot use a subarray of second arg for fourth arg"
+msgstr "split: cannot use a subarray of second arg for fourth arg"
+
+#: field.c:1001
+msgid "split: cannot use a subarray of fourth arg for second arg"
+msgstr "split: cannot use a subarray of fourth arg for secod arg"
+
+#: field.c:1032
msgid "split: null string for third arg is a gawk extension"
msgstr "split: null string untuk arg ketika adalah sebuah ekstensi gawk"
-#: field.c:1031
-#, fuzzy
+#: field.c:1072
msgid "patsplit: fourth argument is not an array"
-msgstr "split: argumen kedua bukan sebuah array"
+msgstr "patsplit: argumen kedua bukan sebuah array"
-#: field.c:1036
-#, fuzzy
+#: field.c:1077
msgid "patsplit: second argument is not an array"
-msgstr "split: argumen kedua bukan sebuah array"
+msgstr "patsplit: argumen kedua bukan sebuah array"
-#: field.c:1054
-#, fuzzy
+#: field.c:1083
msgid "patsplit: third argument must be non-null"
-msgstr "match: argumen ketiga bukan sebuah array"
+msgstr "patsplit: argumen ketiga bukan sebuah array"
-#: field.c:1059
-msgid "patsplit: can not use the same array for second and fourth args"
-msgstr ""
+#: field.c:1087
+msgid "patsplit: cannot use the same array for second and fourth args"
+msgstr "patsplit: cannot use the same array for second and fourth args"
-#: field.c:1089
+#: field.c:1092
+msgid "patsplit: cannot use a subarray of second arg for fourth arg"
+msgstr "patsplit: cannot use a subarray of second arg for fourth arg"
+
+#: field.c:1095
+msgid "patsplit: cannot use a subarray of fourth arg for second arg"
+msgstr "patsplit: cannot use a subarray of fourth arg for second arg"
+
+#: field.c:1133
msgid "`FIELDWIDTHS' is a gawk extension"
msgstr "`FIELDWIDTHS' adalah sebuah ekstensi gawk"
-#: field.c:1152
+#: field.c:1197
#, c-format
msgid "invalid FIELDWIDTHS value, near `%s'"
msgstr "nilai FIELDWIDTHS tidak valid, didekat `%s'"
-#: field.c:1225
+#: field.c:1270
msgid "null string for `FS' is a gawk extension"
msgstr "null string untuk `FS' adalah sebuah ekstensi gawk"
-#: field.c:1229
+#: field.c:1274
msgid "old awk does not support regexps as value of `FS'"
msgstr "awk lama tidak mendukung regexps sebagai nilai dari `FS'"
-#: field.c:1348
-#, fuzzy
+#: field.c:1393
msgid "`FPAT' is a gawk extension"
-msgstr "`%s' adalah sebuah ekstensi gawk"
+msgstr "`FPAT' adalah sebuah ekstensi gawk"
+
+#: gawkapi.c:146
+msgid "awk_value_to_node: received null retval"
+msgstr "awk_value_to_node: received null retval"
+
+#: gawkapi.c:384
+msgid "node_to_awk_value: received null node"
+msgstr "node_to_awk_value: received null node"
+
+#: gawkapi.c:387
+msgid "node_to_awk_value: received null val"
+msgstr "node_to_awk_value: received null val"
+
+#: gawkapi.c:808
+msgid "remove_element: received null array"
+msgstr "remove_element: received null array"
+
+#: gawkapi.c:811
+msgid "remove_element: received null subscript"
+msgstr "remove_element: received null subscript"
+
+#: gawkapi.c:948
+#, c-format
+msgid "api_flatten_array: could not convert index %d\n"
+msgstr "api_flatten_array: could not convert index %d\n"
+
+#: gawkapi.c:953
+#, c-format
+msgid "api_flatten_array: could not convert value %d\n"
+msgstr "api_flatten_array: could not convert value %d\n"
-#: getopt.c:574 getopt.c:590
+#: getopt.c:604 getopt.c:633
#, c-format
-msgid "%s: option '%s' is ambiguous\n"
-msgstr "%s: pilihan '%s' adalah ambigu\n"
+msgid "%s: option '%s' is ambiguous; possibilities:"
+msgstr "%s: pilihan '%s' adalah ambigu"
-#: getopt.c:623 getopt.c:627
+#: getopt.c:679 getopt.c:683
#, c-format
msgid "%s: option '--%s' doesn't allow an argument\n"
msgstr "%s: pilihan '--%s' tidak mengijinkan sebuah argumen\n"
-#: getopt.c:636 getopt.c:641
+#: getopt.c:692 getopt.c:697
#, c-format
msgid "%s: option '%c%s' doesn't allow an argument\n"
msgstr "%s: pilihan '%c%s' tidak mengijinkan sebuah argumen\n"
-#: getopt.c:684 getopt.c:703
-#, fuzzy, c-format
+#: getopt.c:740 getopt.c:759
+#, c-format
msgid "%s: option '--%s' requires an argument\n"
-msgstr "%s: pilihan '%s' membutuhkan sebuah argumen\n"
+msgstr "%s: pilihan '--%s' membutuhkan sebuah argumen\n"
-#: getopt.c:741 getopt.c:744
+#: getopt.c:797 getopt.c:800
#, c-format
msgid "%s: unrecognized option '--%s'\n"
msgstr "%s: pilihan tidak dikenal '--%s'\n"
-#: getopt.c:752 getopt.c:755
+#: getopt.c:808 getopt.c:811
#, c-format
msgid "%s: unrecognized option '%c%s'\n"
msgstr "%s: pilihan tidak dikenal '%c%s'\n"
-#: getopt.c:804 getopt.c:807
+#: getopt.c:860 getopt.c:863
#, c-format
msgid "%s: invalid option -- '%c'\n"
msgstr "%s: pilihan tidak valid -- '%c'\n"
-#: getopt.c:857 getopt.c:874 getopt.c:1082 getopt.c:1100
+#: getopt.c:916 getopt.c:933 getopt.c:1143 getopt.c:1161
#, c-format
msgid "%s: option requires an argument -- '%c'\n"
msgstr "%s: pilihan membutuhkan sebuah argumen -- '%c'\n"
-#: getopt.c:930 getopt.c:946
+#: getopt.c:989 getopt.c:1005
#, c-format
msgid "%s: option '-W %s' is ambiguous\n"
msgstr "%s: pilihan '-W %s' adalah ambigu\n"
-#: getopt.c:970 getopt.c:988
+#: getopt.c:1029 getopt.c:1047
#, c-format
msgid "%s: option '-W %s' doesn't allow an argument\n"
msgstr "%s: pilihan '-W %s' tidak mengijinkan sebuah argumen\n"
-#: getopt.c:1009 getopt.c:1027
-#, fuzzy, c-format
+#: getopt.c:1068 getopt.c:1086
+#, c-format
msgid "%s: option '-W %s' requires an argument\n"
-msgstr "%s: pilihan '%s' membutuhkan sebuah argumen\n"
+msgstr "%s: pilihan '-w %s' membutuhkan sebuah argumen\n"
-#: io.c:282
+#: io.c:392
#, c-format
msgid "command line argument `%s' is a directory: skipped"
-msgstr ""
+msgstr "command line argument `%s' is a directory: skipped"
-#: io.c:285 io.c:382
+#: io.c:395 io.c:513
#, c-format
msgid "cannot open file `%s' for reading (%s)"
msgstr "tidak dapat membuka berkas `%s' untuk membaca (%s)"
-#: io.c:429
-#, c-format
-msgid "error reading input file `%s': %s"
-msgstr "error membaca berkas masukan `%s': %s"
-
-#: io.c:498
+#: io.c:640
#, c-format
msgid "close of fd %d (`%s') failed (%s)"
msgstr "penutupan dari fd %d (`%s') gagal (%s)"
-#: io.c:575
+#: io.c:716
msgid "redirection not allowed in sandbox mode"
-msgstr ""
+msgstr "redirection not allowed in sandbox mode"
-#: io.c:609
+#: io.c:750
#, c-format
msgid "expression in `%s' redirection only has numeric value"
msgstr "ekspresi dalam `%s' redireksi hanya memiliki nilai numerik"
-#: io.c:615
+#: io.c:756
#, c-format
msgid "expression for `%s' redirection has null string value"
msgstr "ekspresi untuk `%s' redireksi hanya memiliki nilai string null"
-#: io.c:621
+#: io.c:761
#, c-format
msgid "filename `%s' for `%s' redirection may be result of logical expression"
-msgstr ""
-"nama berkas `%s' untuk `%s' redireksi hanya menghasilkan ekspresi logikal"
+msgstr "nama berkas `%s' untuk `%s' redireksi hanya menghasilkan ekspresi logikal"
-#: io.c:664
+#: io.c:809
#, c-format
msgid "unnecessary mixing of `>' and `>>' for file `%.*s'"
msgstr "pencampuran tidak perlu dari `>' dan `>>' untuk berkas `%.*s'"
-#: io.c:717
+#: io.c:863
#, c-format
msgid "can't open pipe `%s' for output (%s)"
msgstr "tidak dapat membuka pipe `%s' untuk keluaran (%s)"
-#: io.c:727
+#: io.c:873
#, c-format
msgid "can't open pipe `%s' for input (%s)"
msgstr "tidak dapat membuka pipe `%s' untuk masukan (%s)"
-#: io.c:749
+#: io.c:904
#, c-format
msgid "can't open two way pipe `%s' for input/output (%s)"
msgstr "tidak dapat membuka pipe dua arah `%s' untuk input/output (%s)"
-#: io.c:831
+#: io.c:986
#, c-format
msgid "can't redirect from `%s' (%s)"
msgstr "tidak dapat redirek dari `%s' (%s)"
-#: io.c:834
+#: io.c:989
#, c-format
msgid "can't redirect to `%s' (%s)"
msgstr "tidak dapat redirek ke `%s' (%s)"
-#: io.c:883
-msgid ""
-"reached system limit for open files: starting to multiplex file descriptors"
-msgstr ""
-"batas sistem tercapi untuk berkas terbuka: mulai untuk multiplex berkas "
-"deskripsi"
+#: io.c:1040
+msgid "reached system limit for open files: starting to multiplex file descriptors"
+msgstr "batas sistem tercapi untuk berkas terbuka: mulai untuk multiplex berkas deskripsi"
-#: io.c:899
+#: io.c:1056
#, c-format
msgid "close of `%s' failed (%s)."
msgstr "penutupan dari `%s' gagal (%s)."
-#: io.c:907
+#: io.c:1064
msgid "too many pipes or input files open"
msgstr "terlalu banyak pipes atau berkas masukan terbuka"
-#: io.c:929
+#: io.c:1086
msgid "close: second argument must be `to' or `from'"
msgstr "close: argumen kedua harus berupa `to' atau `from'"
-#: io.c:946
+#: io.c:1103
#, c-format
msgid "close: `%.*s' is not an open file, pipe or co-process"
msgstr "close: `%.*s' bukan sebuah berkas terbuka, pipe atau co-proses"
-#: io.c:951
+#: io.c:1108
msgid "close of redirection that was never opened"
msgstr "penutupan dari redireksi yang tidak pernah terbuka"
-#: io.c:1048
+#: io.c:1205
#, c-format
msgid "close: redirection `%s' not opened with `|&', second argument ignored"
-msgstr ""
-"close: redireksi `%s' tidak dibuka dengan `|&', argumen kedua diabaikan"
+msgstr "close: redireksi `%s' tidak dibuka dengan `|&', argumen kedua diabaikan"
-#: io.c:1064
+#: io.c:1222
#, c-format
msgid "failure status (%d) on pipe close of `%s' (%s)"
msgstr "status gagal (%d) di tutup pipe dari `%s' (%s)"
-#: io.c:1067
+#: io.c:1225
#, c-format
msgid "failure status (%d) on file close of `%s' (%s)"
msgstr "status gagal (%d) di tutup berkas dari `%s' (%s)"
-#: io.c:1087
+#: io.c:1245
#, c-format
msgid "no explicit close of socket `%s' provided"
msgstr "tidak ada eksplisit tutup dari socket `%s' yang disediakan"
-#: io.c:1090
+#: io.c:1248
#, c-format
msgid "no explicit close of co-process `%s' provided"
msgstr "tidak ada eksplisit tutup dari co-proses `%s' yang disediakan"
-#: io.c:1093
+#: io.c:1251
#, c-format
msgid "no explicit close of pipe `%s' provided"
msgstr "tidak ada eksplisit tutup dari pipe `%s' disediakan"
-#: io.c:1096
+#: io.c:1254
#, c-format
msgid "no explicit close of file `%s' provided"
msgstr "tidak ada eksplisit close dari berkas `%s' disediakan"
-#: io.c:1124 io.c:1179 main.c:809 main.c:851
+#: io.c:1284 io.c:1342 main.c:864 main.c:906
#, c-format
msgid "error writing standard output (%s)"
msgstr "error menulis standar keluaran (%s)"
-#: io.c:1128 io.c:1184
+#: io.c:1289 io.c:1348 main.c:866
#, c-format
msgid "error writing standard error (%s)"
msgstr "error menulis standar error (%s)"
-#: io.c:1136
+#: io.c:1297
#, c-format
msgid "pipe flush of `%s' failed (%s)."
msgstr "pipe flush dari `%s' gagal (%s)."
-#: io.c:1139
+#: io.c:1300
#, c-format
msgid "co-process flush of pipe to `%s' failed (%s)."
msgstr "co-proses flush dari pipe ke `%s' gagal (%s)."
-#: io.c:1142
+#: io.c:1303
#, c-format
msgid "file flush of `%s' failed (%s)."
msgstr "file flush dari `%s' gagal (%s)."
-#: io.c:1257
+#: io.c:1420
#, c-format
msgid "local port %s invalid in `/inet'"
msgstr "lokal port %s tidak valid dalam `/inet'"
-#: io.c:1274
+#: io.c:1438
#, c-format
msgid "remote host and port information (%s, %s) invalid"
msgstr "remote host dan informasi port (%s, %s) tidak valid"
-#: io.c:1426
+#: io.c:1590
#, c-format
msgid "no (known) protocol supplied in special filename `%s'"
-msgstr ""
-"tidak (diketahui) protokol yang diberikan dalam nama berkas spesial `%s'"
+msgstr "tidak (diketahui) protokol yang diberikan dalam nama berkas spesial `%s'"
-#: io.c:1440
+#: io.c:1604
#, c-format
msgid "special file name `%s' is incomplete"
msgstr "nama berkas spesial `%s' tidak lengkap"
-#: io.c:1457
+#: io.c:1621
msgid "must supply a remote hostname to `/inet'"
msgstr "harus memberikan sebuah remote hostname ke `/inet'"
-#: io.c:1475
+#: io.c:1639
msgid "must supply a remote port to `/inet'"
msgstr "harus memberikan sebuah remote port ke `/inet'"
-#: io.c:1521
+#: io.c:1685
msgid "TCP/IP communications are not supported"
msgstr "komunikasi TCP/IP tidak didukung"
-#: io.c:1688
+#: io.c:1867
#, c-format
msgid "could not open `%s', mode `%s'"
msgstr "tidak dapat membuka `%s', mode `%s'"
-#: io.c:1739
+#: io.c:1917
#, c-format
msgid "close of master pty failed (%s)"
msgstr "penutupan dari master pty gagal (%s)"
-#: io.c:1741 io.c:1909 io.c:2066
+#: io.c:1919 io.c:2105 io.c:2305
#, c-format
msgid "close of stdout in child failed (%s)"
msgstr "penutupan dari stdout dalam child gagal (%s)"
-#: io.c:1744
+#: io.c:1922
#, c-format
msgid "moving slave pty to stdout in child failed (dup: %s)"
msgstr "memindahkan slave pty ke stdout dalam child gagal (dup: %s)"
-#: io.c:1746 io.c:1914
+#: io.c:1924 io.c:2110
#, c-format
msgid "close of stdin in child failed (%s)"
msgstr "penutupan dari stdin dalam anak gagal (%s)"
-#: io.c:1749
+#: io.c:1927
#, c-format
msgid "moving slave pty to stdin in child failed (dup: %s)"
msgstr "memindahkan slave pty ke stdin dalam anak gagal (dup: %s)"
-#: io.c:1751 io.c:1772
+#: io.c:1929 io.c:1951
#, c-format
msgid "close of slave pty failed (%s)"
msgstr "penutupan dari pty budak gagal (%s)"
-#: io.c:1850 io.c:1912 io.c:2044 io.c:2069
+#: io.c:2040 io.c:2108 io.c:2276 io.c:2308
#, c-format
msgid "moving pipe to stdout in child failed (dup: %s)"
msgstr "memindahkan pipe ke stdout dalam anak gaal (dup: %s)"
-#: io.c:1857 io.c:1917
+#: io.c:2047 io.c:2113
#, c-format
msgid "moving pipe to stdin in child failed (dup: %s)"
msgstr "memindahkan pipe ke stdin dalam anak gagal (dup: %s)"
-#: io.c:1877 io.c:2059
+#: io.c:2073 io.c:2298
msgid "restoring stdout in parent process failed\n"
msgstr "mengembalikan stdout dalam proses orang tua gagal\n"
-#: io.c:1885
+#: io.c:2081
msgid "restoring stdin in parent process failed\n"
msgstr "mengembalikan stdin dalam proses orang tua gagal\n"
-#: io.c:1920 io.c:2071 io.c:2085
+#: io.c:2116 io.c:2310 io.c:2324
#, c-format
msgid "close of pipe failed (%s)"
msgstr "penutupan dari pipe gagal (%s)"
-#: io.c:1965
+#: io.c:2174
msgid "`|&' not supported"
msgstr "`|&' tidak didukung"
-#: io.c:2031
+#: io.c:2261
#, c-format
msgid "cannot open pipe `%s' (%s)"
msgstr "tidak dapat membuka pipe `%s' (%s)"
-#: io.c:2079
+#: io.c:2318
#, c-format
msgid "cannot create child process for `%s' (fork: %s)"
msgstr "tidak dapat membuat proses anak untuk `%s' (fork: %s)"
-#: io.c:2569
+#: io.c:2790
+msgid "register_input_parser: received NULL pointer"
+msgstr "register_input_parser: received NULL pointer"
+
+#: io.c:2818
+#, c-format
+msgid "input parser `%s' conflicts with previously installed input parser `%s'"
+msgstr "input parser `%s' conflicts with previously installed input parser `%s'"
+
+#: io.c:2825
+#, c-format
+msgid "input parser `%s' failed to open `%s'"
+msgstr "input parser `%s' failed to open `%s'"
+
+#: io.c:2845
+msgid "register_output_wrapper: received NULL pointer"
+msgstr "register_output_wrapper: received NULL pointer"
+
+#: io.c:2873
+#, c-format
+msgid "output wrapper `%s' conflicts with previously installed output wrapper `%s'"
+msgstr "output wrapper `%s' conflicts with previously installed output wrapper `%s'"
+
+#: io.c:2880
+#, c-format
+msgid "output wrapper `%s' failed to open `%s'"
+msgstr "output wrapper `%s' failed to open `%s'"
+
+#: io.c:2901
+msgid "register_output_processor: received NULL pointer"
+msgstr "register_output_processor: received NULL pointer"
+
+#: io.c:2930
+#, c-format
+msgid "two-way processor `%s' conflicts with previously installed two-way processor `%s'"
+msgstr "two-way processor `%s' conflicts with previously installed two-way processor `%s'"
+
+#: io.c:2939
+#, c-format
+msgid "two way processor `%s' failed to open `%s'"
+msgstr "two way processor `%s' failed to open `%s'"
+
+#: io.c:3064
#, c-format
msgid "data file `%s' is empty"
msgstr "berkas data `%s' kosong"
-#: io.c:2610 io.c:2618
+#: io.c:3106 io.c:3114
msgid "could not allocate more input memory"
msgstr "tidak dapat mengalokasikan lebih dari masukan memori"
-#: io.c:3171
+#: io.c:3682
msgid "multicharacter value of `RS' is a gawk extension"
msgstr "nilai multi karakter dari `RS' adalah sebuah ekstensi gawk"
-#: io.c:3276
-#, fuzzy
+#: io.c:3771
msgid "IPv6 communication is not supported"
-msgstr "komunikasi TCP/IP tidak didukung"
-
-#: main.c:307
-msgid "out of memory"
-msgstr "kehabisan memori"
+msgstr "IPv6 komunikasi TCP/IP tidak didukung"
-#: main.c:384
-msgid "`-m[fr]' option irrelevant in gawk"
-msgstr "pilihan `-m[fr]' tidak relevan dalam gawk"
-
-#: main.c:386
-msgid "-m option usage: `-m[fr] nnn'"
-msgstr "penggunaan pilihan -m: `-m[fr] nnn'"
-
-#: main.c:409
-#, fuzzy
+#: main.c:405
msgid "empty argument to `-e/--source' ignored"
-msgstr "argumen kosong ke `--source' diabaikan"
+msgstr "argumen kosong ke `-e/--source' diabaikan"
-#: main.c:475
+#: main.c:495
#, c-format
msgid "%s: option `-W %s' unrecognized, ignored\n"
msgstr "%s: pilihan `-W %s' tidak dikenal, diabaikan\n"
-#: main.c:528
+#: main.c:541
#, c-format
msgid "%s: option requires an argument -- %c\n"
msgstr "%s: pilihan membutuhkan sebuah argumen -- %c\n"
-#: main.c:549
+#: main.c:562
msgid "environment variable `POSIXLY_CORRECT' set: turning on `--posix'"
msgstr "variabel lingkungan `POSIXLY_CORRECT' set: mengaktifkan `--posix'"
-#: main.c:555
+#: main.c:568
msgid "`--posix' overrides `--traditional'"
msgstr "`--posix' overrides `--traditional'"
-#: main.c:566
+#: main.c:579
msgid "`--posix'/`--traditional' overrides `--non-decimal-data'"
msgstr "`--posix'/`--traditional' overrides `--non-decimal-data'"
-#: main.c:570
+#: main.c:583
#, c-format
msgid "running %s setuid root may be a security problem"
msgstr "menjalankan %s setuid root mungkin sebuah masalah keamanan"
-#: main.c:575
-#, fuzzy
-msgid "`--posix' overrides `--binary'"
-msgstr "`--posix' overrides `--traditional'"
+#: main.c:588
+msgid "`--posix' overrides `--characters-as-bytes'"
+msgstr "`--posix' overrides `--characters-as-bytes'"
-#: main.c:626
+#: main.c:647
#, c-format
msgid "can't set binary mode on stdin (%s)"
msgstr "tidak dapat menset mode binari di stdin (%s)"
-#: main.c:629
+#: main.c:650
#, c-format
msgid "can't set binary mode on stdout (%s)"
msgstr "tidak dapat menset mode binari di stdout (%s)"
-#: main.c:631
+#: main.c:652
#, c-format
msgid "can't set binary mode on stderr (%s)"
msgstr "tidak dapat menset mode binari di stderr (%s)"
-#: main.c:670
+#: main.c:710
msgid "no program text at all!"
msgstr "tidak ada teks aplikasi apapun!"
-#: main.c:749
+#: main.c:799
#, c-format
msgid "Usage: %s [POSIX or GNU style options] -f progfile [--] file ...\n"
-msgstr ""
-"Penggunaan: %s [pilihan POSIX atau gaya GNU] -f progfile [--] berkas ...\n"
+msgstr "Penggunaan: %s [pilihan POSIX atau gaya GNU] -f progfile [--] berkas ...\n"
-#: main.c:751
+#: main.c:801
#, c-format
msgid "Usage: %s [POSIX or GNU style options] [--] %cprogram%c file ...\n"
-msgstr ""
-"Penggunaan: %s[ pilihan POSIX atau gaya GNU] [--] %cprogram%c berkas ...\n"
+msgstr "Penggunaan: %s[ pilihan POSIX atau gaya GNU] [--] %cprogram%c berkas ...\n"
-#: main.c:756
-#, fuzzy
+#: main.c:806
msgid "POSIX options:\t\tGNU long options: (standard)\n"
msgstr "pilihan POSIX:\t\tpilihan panjang GNU:\n"
-#: main.c:757
+#: main.c:807
msgid "\t-f progfile\t\t--file=progfile\n"
msgstr "\t-f progfile\t\t--file=progfile\n"
-#: main.c:758
+#: main.c:808
msgid "\t-F fs\t\t\t--field-separator=fs\n"
msgstr "\t-F fs\t\t\t--field-separator=fs\n"
-#: main.c:759
+#: main.c:809
msgid "\t-v var=val\t\t--assign=var=val\n"
msgstr "\t-v var=val\t\t--assign=var=val\n"
-#: main.c:760
-#, fuzzy
+#: main.c:810
msgid "Short options:\t\tGNU long options: (extensions)\n"
msgstr "pilihan POSIX:\t\tpilihan panjang GNU:\n"
-#: main.c:761
+#: main.c:811
msgid "\t-b\t\t\t--characters-as-bytes\n"
-msgstr ""
+msgstr "\t-b\t\t\t--characters-as-bytes\n"
-#: main.c:762
-#, fuzzy
+#: main.c:812
msgid "\t-c\t\t\t--traditional\n"
-msgstr "\t-W tradisional\t\t--traditional\n"
+msgstr "\t-c\t\t\t--traditional\n"
-#: main.c:763
-#, fuzzy
+#: main.c:813
msgid "\t-C\t\t\t--copyright\n"
-msgstr "\t-W hak cipta\t\t--copyright\n"
+msgstr "\t-C hak cipta\t\t--copyright\n"
-#: main.c:764
-#, fuzzy
-msgid "\t-d [file]\t\t--dump-variables[=file]\n"
-msgstr "\t-W tampilkan variabel[=berkas]\t--dump-variables[=berkas]\n"
+#: main.c:814
+msgid "\t-d[file]\t\t--dump-variables[=file]\n"
+msgstr "\t-d tampilkan variabel[=berkas]\t\t--dump-variables[=berkas]\n"
-#: main.c:765
-#, fuzzy
+#: main.c:815
+msgid "\t-D[file]\t\t--debug[=file]\n"
+msgstr "\t-D profile[=file]\t\t--profile[=file]\n"
+
+#: main.c:816
msgid "\t-e 'program-text'\t--source='program-text'\n"
-msgstr "\t-W sumber=teks-program\t--source=teks-program\n"
+msgstr "\t-e sumber=teks-program\t\t--source=teks-program\n"
-#: main.c:766
-#, fuzzy
+#: main.c:817
msgid "\t-E file\t\t\t--exec=file\n"
-msgstr "\t-W exec=berkas\t\t--exec=berkas\n"
+msgstr "\t-E exec=berkas\t\t\t--exec=berkas\n"
-#: main.c:767
-#, fuzzy
+#: main.c:818
msgid "\t-g\t\t\t--gen-pot\n"
-msgstr "\t-W gen-po\t\t--gen-po\n"
+msgstr "\t-g gen-po\t\t\t--gen-po\n"
-#: main.c:768
-#, fuzzy
+#: main.c:819
msgid "\t-h\t\t\t--help\n"
-msgstr "\t-W bantuan\t\t\t--help\n"
+msgstr "\t-h bantuan\t\t\t--help\n"
+
+#: main.c:820
+msgid "\t-i includefile\t\t--include=includefile\n"
+msgstr "\t-i includefile\t\t--include=includefile\n"
+
+#: main.c:821
+msgid "\t-l library\t\t--load=library\n"
+msgstr "\t-I library\t\t--load=library\n"
-#: main.c:769
-#, fuzzy
+#: main.c:822
msgid "\t-L [fatal]\t\t--lint[=fatal]\n"
-msgstr "\t-W lint[=fatal]\t\t--lint[=fatal]\n"
+msgstr "\t-L lint[=fatal]\t\t--lint[=fatal]\n"
-#: main.c:770
-#, fuzzy
+#: main.c:823
msgid "\t-n\t\t\t--non-decimal-data\n"
-msgstr "\t-W non-decimal-data\t--non-decimal-data\n"
+msgstr "\t-n non-decimal-data\t\t\t--non-decimal-data\n"
+
+#: main.c:824
+msgid "\t-M\t\t\t--bignum\n"
+msgstr "\t-M\t\t\t--optimize\n"
-#: main.c:771
-#, fuzzy
+#: main.c:825
msgid "\t-N\t\t\t--use-lc-numeric\n"
-msgstr "\t-W use-lc-numeric\t--use-lc-numeric\n"
+msgstr "\t-N use-lc-numeric\t\t\t--use-lc-numeric\n"
-#: main.c:772
+#: main.c:826
+msgid "\t-o[file]\t\t--pretty-print[=file]\n"
+msgstr "\t-W profile[=file]\t\t--profile[=file]\n"
+
+#: main.c:827
msgid "\t-O\t\t\t--optimize\n"
msgstr "\t-0\t\t\t--optimize\n"
-#: main.c:773
-#, fuzzy
-msgid "\t-p [file]\t\t--profile[=file]\n"
-msgstr "\t-W profile[=file]\t--profile[=file]\n"
+#: main.c:828
+msgid "\t-p[file]\t\t--profile[=file]\n"
+msgstr "\t-p profile[=file]\t\t--profile[=file]\n"
-#: main.c:774
-#, fuzzy
+#: main.c:829
msgid "\t-P\t\t\t--posix\n"
-msgstr "\t-W posix\t\t--posix\n"
+msgstr "\t-P posix\t\t\t--posix\n"
-#: main.c:775
-#, fuzzy
+#: main.c:830
msgid "\t-r\t\t\t--re-interval\n"
-msgstr "\t-W re-interval\t\t--re-interval\n"
+msgstr "\t-r re-interval\t\t\t--re-interval\n"
-#: main.c:777
-#, fuzzy
-msgid "\t-R file\t\t\t--command=file\n"
-msgstr "\t-W exec=berkas\t\t--exec=berkas\n"
-
-#: main.c:778
+#: main.c:831
msgid "\t-S\t\t\t--sandbox\n"
-msgstr ""
+msgstr "\t-S\t\t\t--sandbox\n"
-#: main.c:779
-#, fuzzy
+#: main.c:832
msgid "\t-t\t\t\t--lint-old\n"
-msgstr "\t-W lint-old\t\t--lint-old\n"
+msgstr "\t-t lint-old\t\t\t--lint-old\n"
-#: main.c:780
-#, fuzzy
+#: main.c:833
msgid "\t-V\t\t\t--version\n"
-msgstr "\t-W versi\t\t--version\n"
+msgstr "\t-V versi\t\t\t--version\n"
-#: main.c:782
+#: main.c:835
msgid "\t-W nostalgia\t\t--nostalgia\n"
msgstr "\t-W nostalgia\t\t--nostalgia\n"
-#: main.c:785
-#, fuzzy
+#: main.c:838
msgid "\t-Y\t\t--parsedebug\n"
-msgstr "\t-W parsedebug\t\t--parsedebug\n"
+msgstr "\t-Y parsedebug\t\t--parsedebug\n"
#. TRANSLATORS: --help output 5 (end)
#. TRANSLATORS: the placeholder indicates the bug-reporting address
#. for this application. Please add _another line_ with the
#. address for translation bugs.
#. no-wrap
-#: main.c:794
+#: main.c:847
msgid ""
"\n"
"To report bugs, see node `Bugs' in `gawk.info', which is\n"
@@ -1719,7 +3014,7 @@ msgstr ""
"daerah `Reporting Problems and Bugs' dalam versi tercetak.\n"
"\n"
-#: main.c:798
+#: main.c:851
msgid ""
"gawk is a pattern scanning and processing language.\n"
"By default it reads standard input and writes standard output.\n"
@@ -1729,7 +3024,7 @@ msgstr ""
"Secara baku ini membaca standar masukan dan menulis standa keluaran.\n"
"\n"
-#: main.c:802
+#: main.c:855
msgid ""
"Examples:\n"
"\tgawk '{ sum += $1 }; END { print sum }' file\n"
@@ -1739,7 +3034,7 @@ msgstr ""
"\tgawk '{ sum += $1 }; END { print sum }' berkas\n"
"\tgawk -F: '{ print $1 }' /etc/passwd\n"
-#: main.c:822
+#: main.c:880
#, c-format
msgid ""
"Copyright (C) 1989, 1991-%d Free Software Foundation.\n"
@@ -1752,15 +3047,13 @@ msgid ""
msgstr ""
"Hak Cipta (C) 1989, 1991-%d Free Software Foundationn.\n"
"\n"
-"Aplikasi ini adalah aplikasi bebas; anda dapat meredistribusikannya dan/atau "
-"memodifikasinya\n"
-"dibawah ketentuan dari GNU General Public License seperti dipublikasikan "
-"oleh\n"
+"Aplikasi ini adalah aplikasi bebas; anda dapat meredistribusikannya dan/atau memodifikasinya\n"
+"dibawah ketentuan dari GNU General Public License seperti dipublikasikan oleh\n"
"Free Software Foundation; baik versi 3 dari Lisensi, atau\n"
"(di pilihan anda) untuk versi selanjutnya.\n"
"\n"
-#: main.c:830
+#: main.c:888
msgid ""
"This program is distributed in the hope that it will be useful,\n"
"but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
@@ -1769,31 +3062,29 @@ msgid ""
"\n"
msgstr ""
"Aplikasi ini didistribusikan dengan harapan ini akan berguna,\n"
-"tetapi TANPA GARANSI APAPUN; bahkan tanpa garansi yang diimplisikasikan "
-"dari\n"
+"tetapi TANPA GARANSI APAPUN; bahkan tanpa garansi yang diimplisikasikan dari\n"
"PERDAGANGAN atau KESESUAIAN UNTUK SEBUAH TUJUAN TERTENTU. Lihat\n"
"GNU General Public License untuk lebih lengkapnya.\n"
"\n"
-#: main.c:841
+#: main.c:894
msgid ""
"You should have received a copy of the GNU General Public License\n"
"along with this program. If not, see http://www.gnu.org/licenses/.\n"
msgstr ""
"Anda seharusnya menerima salinan dari GNU General Public License\n"
-"bersama dengan aplikasi ini. Jika tidak, lihat http://www.gnu.org/"
-"licenses/.\n"
+"bersama dengan aplikasi ini. Jika tidak, lihat http://www.gnu.org/licenses/.\n"
-#: main.c:876
+#: main.c:931
msgid "-Ft does not set FS to tab in POSIX awk"
msgstr "-Ft tidak menset FS ke tab dalam POSIX awk"
-#: main.c:1110
+#: main.c:1208
#, c-format
msgid "unknown value for field spec: %d\n"
-msgstr ""
+msgstr "unknown value for field spec: %d\n"
-#: main.c:1170
+#: main.c:1306
#, c-format
msgid ""
"%s: `%s' argument to `-v' not in `var=value' form\n"
@@ -1802,119 +3093,168 @@ msgstr ""
"%s: `%s' argumen ke `-v' tidak dalam bentuk `var=value'\n"
"\n"
-#: main.c:1190
+#: main.c:1332
#, c-format
msgid "`%s' is not a legal variable name"
msgstr "`%s' bukan sebuah nama variabel legal"
-#: main.c:1193
+#: main.c:1335
#, c-format
msgid "`%s' is not a variable name, looking for file `%s=%s'"
msgstr "`%s' bukan sebuah nama variabel, pencarian untuk berkas `%s=%s'"
-#: main.c:1246
+#: main.c:1339
+#, c-format
+msgid "cannot use gawk builtin `%s' as variable name"
+msgstr "tidak dapat menggunakan gawk bawaan `%s' sebagai nama fungsi"
+
+#: main.c:1344
+#, c-format
+msgid "cannot use function `%s' as variable name"
+msgstr "tidak dapat menggunakan nama fungsi `%s' sebagai sebuah variabel atau array"
+
+#: main.c:1397
msgid "floating point exception"
msgstr "eksepsi titik pecahan"
-#: main.c:1253
+#: main.c:1404
msgid "fatal error: internal error"
msgstr "fatal error: internal error"
-#: main.c:1268
+#: main.c:1419
msgid "fatal error: internal error: segfault"
msgstr "fatal error: internal error: segfault"
-#: main.c:1280
+#: main.c:1431
msgid "fatal error: internal error: stack overflow"
msgstr "fatal error: internal error: stack overflow"
-#: main.c:1330
+#: main.c:1490
#, c-format
msgid "no pre-opened fd %d"
msgstr "tidak ada pre-opened fd %d"
-#: main.c:1337
+#: main.c:1497
#, c-format
msgid "could not pre-open /dev/null for fd %d"
msgstr "tidak dapat pre-open /dev/null untuk fd %d"
-#: main.c:1360 main.c:1369
+#: mpfr.c:550
#, c-format
-msgid "could not find groups: %s"
-msgstr "tidak dapat menemukan grup: %s"
+msgid "PREC value `%.*s' is invalid"
+msgstr "PREC nilai `%.*s' tidak valid, diperlakukan sebagai 3"
-#: msg.c:63
+#: mpfr.c:608
+#, c-format
+msgid "RNDMODE value `%.*s' is invalid"
+msgstr "RNDMODE nilai `%.*s' tidak valid, diperlakukan sebagai 3"
+
+#: mpfr.c:698
+#, c-format
+msgid "%s: received non-numeric argument"
+msgstr "%s: diterima argumen bukan numerik"
+
+#: mpfr.c:800
+msgid "compl(%Rg): negative value will give strange results"
+msgstr "compl(%Rg): nilai negatif akan memberikan hasil aneh"
+
+#: mpfr.c:804
+msgid "comp(%Rg): fractional value will be truncated"
+msgstr "compl(%Rg): nilai pecahan akan dipotong"
+
+#: mpfr.c:816
+#, c-format
+msgid "cmpl(%Zd): negative values will give strange results"
+msgstr "compl(%Zd): nilai negatif akan memberikan hasil aneh"
+
+#: mpfr.c:835
+#, c-format
+msgid "%s: received non-numeric argument #%d"
+msgstr "%s: diterima argumen bukan numerik #%d"
+
+#: mpfr.c:845
+msgid "%s: argument #%d has invalid value %Rg, using 0"
+msgstr "%s: argument #%d has invalid value %Rg, using 0"
+
+#: mpfr.c:857
+msgid "%s: argument #%d negative value %Rg will give strange results"
+msgstr "%s: #%d nilai negatif %Rg akan memberikan hasil aneh"
+
+#: mpfr.c:863
+msgid "%s: argument #%d fractional value %Rg will be truncated"
+msgstr "%s: #%d nilai pecahan %Rg akan dipotong"
+
+#: mpfr.c:878
+#, c-format
+msgid "%s: argument #%d negative value %Zd will give strange results"
+msgstr "%s: #%d nilai negatif %Zd akan memberikan hasil aneh"
+
+#: msg.c:68
#, c-format
msgid "cmd. line:"
msgstr "cmd. baris:"
-#: msg.c:107
-msgid "error: "
-msgstr "error: "
-
-#: node.c:401
+#: node.c:421
msgid "backslash at end of string"
msgstr "backslash di akhir dari string"
-#: node.c:502
+#: node.c:500
#, c-format
msgid "old awk does not support the `\\%c' escape sequence"
msgstr "awk lama tidak mendukung escape sequence `\\%c'"
-#: node.c:553
+#: node.c:551
msgid "POSIX does not allow `\\x' escapes"
msgstr "POSIX tidak mengijinkan escapes `\\x'"
-#: node.c:559
+#: node.c:557
msgid "no hex digits in `\\x' escape sequence"
msgstr "tidak ada digit heksa dalam escape sequence `\\x'"
-#: node.c:581
+#: node.c:579
#, c-format
-msgid ""
-"hex escape \\x%.*s of %d characters probably not interpreted the way you "
-"expect"
-msgstr ""
-"hex escape \\x%.*s dari karakter %d mungkin tidak dapat diinterpretrasikan "
-"seperti yang anda kira"
+msgid "hex escape \\x%.*s of %d characters probably not interpreted the way you expect"
+msgstr "hex escape \\x%.*s dari karakter %d mungkin tidak dapat diinterpretrasikan seperti yang anda kira"
-#: node.c:596
+#: node.c:594
#, c-format
msgid "escape sequence `\\%c' treated as plain `%c'"
msgstr "escape sequence `\\%c' diperlakukan sebagai plain `%c'"
-#: node.c:735
-msgid ""
-"Invalid multibyte data detected. There may be a mismatch between your data "
-"and your locale."
-msgstr ""
+#: node.c:739
+msgid "Invalid multibyte data detected. There may be a mismatch between your data and your locale."
+msgstr "Invalid multibyte data detected. There may be a mismatch between your data and your locale."
-#: posix/gawkmisc.c:175
-#, fuzzy, c-format
+#: posix/gawkmisc.c:177
+#, c-format
msgid "%s %s `%s': could not get fd flags: (fcntl F_GETFD: %s)"
msgstr "%s %s `%s': tidak dapat menset close-on-exec: (fcntl: %s)"
-#: posix/gawkmisc.c:187
-#, fuzzy, c-format
+#: posix/gawkmisc.c:189
+#, c-format
msgid "%s %s `%s': could not set close-on-exec: (fcntl F_SETFD: %s)"
msgstr "%s %s `%s': tidak dapat menset close-on-exec: (fcntl: %s)"
-#: profile.c:83
+#: profile.c:71
#, c-format
msgid "could not open `%s' for writing: %s"
msgstr "tidak dapat membuka `%s' untuk penulisan: %s"
-#: profile.c:203
-#, fuzzy, c-format
+#: profile.c:73
+msgid "sending profile to standard error"
+msgstr "mengirim profile ke standar error"
+
+#: profile.c:193
+#, c-format
msgid ""
"\t# %s block(s)\n"
"\n"
msgstr ""
-"\t# END blok\n"
+"\t# %s END blok\n"
"\n"
-#: profile.c:208
-#, fuzzy, c-format
+#: profile.c:198
+#, c-format
msgid ""
"\t# Rule(s)\n"
"\n"
@@ -1922,17 +3262,30 @@ msgstr ""
"\t# Aturan\n"
"\n"
-#: profile.c:279
+#: profile.c:272
#, c-format
msgid "internal error: %s with null vname"
msgstr "internal error: %s dengan null vname"
-#: profile.c:938
+#: profile.c:537
+msgid "internal error: builtin with null fname"
+msgstr "internal error: dengan null vname"
+
+#: profile.c:949
+#, c-format
+msgid ""
+"\t# Loaded extensions (-l and/or @load)\n"
+"\n"
+msgstr ""
+"\t# Loaded extensions (-l and/or @load)\n"
+"\n"
+
+#: profile.c:972
#, c-format
msgid "\t# gawk profile, created %s\n"
msgstr "\t# gawk profile, dibuat %s\n"
-#: profile.c:1317
+#: profile.c:1475
#, c-format
msgid ""
"\n"
@@ -1941,271 +3294,256 @@ msgstr ""
"\n"
"\t# Fungsi, terdaftar secara alphabet\n"
-#: profile.c:1356
+#: profile.c:1513
#, c-format
msgid "redir2str: unknown redirection type %d"
-msgstr ""
+msgstr "redir2str: unknown redirection type %d"
-#: re.c:589
-#, c-format
-msgid "range of the form `[%c-%c]' is locale dependant"
-msgstr ""
-
-#: re.c:611
+#: re.c:607
#, c-format
msgid "regexp component `%.*s' should probably be `[%.*s]'"
-msgstr ""
+msgstr "regexp component `%.*s' should probably be `[%.*s]'"
-#: regcomp.c:132
+#: regcomp.c:131
msgid "Success"
msgstr "Sukses"
-#: regcomp.c:135
+#: regcomp.c:134
msgid "No match"
msgstr "Tidak cocok"
-#: regcomp.c:138
+#: regcomp.c:137
msgid "Invalid regular expression"
msgstr "Ekspresi regular tidak valid"
-#: regcomp.c:141
+#: regcomp.c:140
msgid "Invalid collation character"
msgstr "Karakter kolasi tidak valid"
-#: regcomp.c:144
+#: regcomp.c:143
msgid "Invalid character class name"
msgstr "nama kelas karakter tidak valid"
-#: regcomp.c:147
+#: regcomp.c:146
msgid "Trailing backslash"
msgstr "Akhiran backslash"
-#: regcomp.c:150
+#: regcomp.c:149
msgid "Invalid back reference"
msgstr "Referensi balik tidak valid"
-#: regcomp.c:153
+#: regcomp.c:152
msgid "Unmatched [ or [^"
msgstr "Tidak cocok [ atau [^"
-#: regcomp.c:156
+#: regcomp.c:155
msgid "Unmatched ( or \\("
msgstr "Tidak cocok ( atau \\("
-#: regcomp.c:159
+#: regcomp.c:158
msgid "Unmatched \\{"
msgstr "Tidak cocok \\{"
-#: regcomp.c:162
-msgid "Invalid content of \\{\\}"
-msgstr "Isi dari \\{\\} tidak valid"
-
-#: regcomp.c:165
+#: regcomp.c:164
msgid "Invalid range end"
msgstr "Akhir jangkauan tidak valid"
-#: regcomp.c:168
+#: regcomp.c:167
msgid "Memory exhausted"
msgstr "Kehabisan memori"
-#: regcomp.c:171
+#: regcomp.c:170
msgid "Invalid preceding regular expression"
msgstr "Ekspresi regular yang mengawali tidak valid"
-#: regcomp.c:174
+#: regcomp.c:173
msgid "Premature end of regular expression"
msgstr "Akhir dari ekspresi regular prematur"
-#: regcomp.c:177
-msgid "Regular expression too big"
-msgstr "Ekspresi regular terlalu besar"
-
-#: regcomp.c:180
+#: regcomp.c:179
msgid "Unmatched ) or \\)"
msgstr "Tidak cocok ) atau \\)"
-#: regcomp.c:701
+#: regcomp.c:704
msgid "No previous regular expression"
msgstr "Tidak ada ekspresi regular sebelumnya"
-#~ msgid "statement may have no effect"
-#~ msgstr "pernyataan mungkin tidak memiliki pengaruh"
+#: symbol.c:741
+msgid "can not pop main context"
+msgstr "can not pop main context"
-#~ msgid "attempt to use scalar `%s' as array"
-#~ msgstr "mencoba untuk menggunakan skalar `%s' sebagai sebuah array"
+#~ msgid "attempt to use function `%s' as an array"
+#~ msgstr "mencoba untuk menggunakan fungsi `%s' sebagai sebuah array"
-#, fuzzy
-#~ msgid "attempt to use array `%s' in scalar context"
-#~ msgstr "mencoba menggunakan array `%s' dalam sebuah konteks skalar"
+#~ msgid "reference to uninitialized element `%s[\"%s\"]'"
+#~ msgstr "referensi ke elemen tidak terinisialisasi `%s[\"%s\"]'"
-#~ msgid "`continue' outside a loop is not allowed"
-#~ msgstr "`continue' diluar sebuah loop tidak diijinkan"
+#~ msgid "subscript of array `%s' is null string"
+#~ msgstr "subscript dari array `%s' adalah string null"
-#, fuzzy
-#~ msgid "`break' outside a loop is not allowed"
-#~ msgstr "`break' diluar sebuah loop tidak diijinkan"
+#~ msgid "%s: empty (null)\n"
+#~ msgstr "%s: kosong (null)\n"
-#~ msgid "/inet/raw client not ready yet, sorry"
-#~ msgstr "/inet/raw client belum siap, maaf"
+#~ msgid "%s: empty (zero)\n"
+#~ msgstr "%s: kosong (nol)\n"
-#~ msgid "only root may use `/inet/raw'."
-#~ msgstr "hanya root yang boleh menggunakan `/inet/raw'."
+#~ msgid "%s: table_size = %d, array_size = %d\n"
+#~ msgstr "%s: table_size = %d, array_size = %d\n"
-#~ msgid "/inet/raw server not ready yet, sorry"
-#~ msgstr "/inet/raw server belum siap, maaf"
+#~ msgid "%s: array_ref to %s\n"
+#~ msgstr "%s: array_ref ke %s\n"
-#~ msgid "\t-m[fr] val\n"
-#~ msgstr "\t-m[fr] val\n"
+#~ msgid "statement may have no effect"
+#~ msgstr "pernyataan mungkin tidak memiliki pengaruh"
+
+#~ msgid "`delete array' is a gawk extension"
+#~ msgstr "`delete array' adalah sebuah ekstensi gawk"
#~ msgid "call of `length' without parentheses is deprecated by POSIX"
-#~ msgstr ""
-#~ "panggilan dari `length' tanpa tanda kurung sudah ditinggalkan oleh POSIX"
+#~ msgstr "panggilan dari `length' tanpa tanda kurung sudah ditinggalkan oleh POSIX"
-#, fuzzy
-#~ msgid "reference to uninitialized field `$%s'"
-#~ msgstr "referensi ke field tidak terinisialisasi `$%d'"
+#~ msgid "use of non-array as array"
+#~ msgstr "penggunaan dari bukan array sebagai array"
-#~ msgid "can't convert string to float"
-#~ msgstr "tidak dapat mengubah string ke float"
+#~ msgid "`%s' is a Bell Labs extension"
+#~ msgstr "`%s' adalah sebuah ekstensi Bell Labs"
-#~ msgid "`continue' outside a loop is not portable"
-#~ msgstr "`continue' diluar sebuah loop tidak portabel"
+#~ msgid "length: untyped argument will be forced to scalar"
+#~ msgstr "length: argument tidak terketik akan dipaksa ke skalar"
-#~ msgid "`break' outside a loop is not portable"
-#~ msgstr "`break' diluar sebuah loop adalah tidak portabel"
+#~ msgid "and: received non-numeric first argument"
+#~ msgstr "and: diterima argumen pertama tidak numerik"
-#~ msgid "`nextfile' cannot be called from a BEGIN rule"
-#~ msgstr "`nextfile' tidak dapat dipanggil dari sebuah aturan BEGIN"
+#~ msgid "and: received non-numeric second argument"
+#~ msgstr "and: diterima argumen kedua bukan numerik"
-#~ msgid "`next' cannot be called from a BEGIN rule"
-#~ msgstr "`next' tidak dapat dipanggil dari sebuah aturan BEGIN"
+#~ msgid "or: received non-numeric first argument"
+#~ msgstr "or: diterima argumen pertama bukan numerik"
-#~ msgid "file `%s' is a directory"
-#~ msgstr "berkas `%s' adalah sebuah direktori"
+#~ msgid "or: received non-numeric second argument"
+#~ msgstr "or: diterima argumen kedua bukan numerik"
-#~ msgid "use `PROCINFO[\"%s\"]' instead of `%s'"
-#~ msgstr "lebih baik gunakan `PROCINFO[\"%s\"]' daripada `%s'"
+#~ msgid "or(%lf, %lf): negative values will give strange results"
+#~ msgstr "or(%lf, %lf): nilai negatif akan memberikan hasil aneh"
-#~ msgid "use `PROCINFO[...]' instead of `/dev/user'"
-#~ msgstr "lebih baik gunakan `PROCINFO[...]' daripada `/dev/user'"
+#~ msgid "or(%lf, %lf): fractional values will be truncated"
+#~ msgstr "or(%lf, %lf): nilai pecahan akan dipotong"
-#~ msgid "\t-W compat\t\t--compat\n"
-#~ msgstr "\t-W compabilitas\t\t--compat\n"
+#~ msgid "xor: received non-numeric first argument"
+#~ msgstr "xor: diterima argumen pertama bukan numerik"
-#~ msgid "\t-W copyleft\t\t--copyleft\n"
-#~ msgstr "\t-W copyleft\t\t--copyleft\n"
+#~ msgid "xor: received non-numeric second argument"
+#~ msgstr "xor: diterima argumen kedua bukan numerik"
-#~ msgid "\t-W usage\t\t--usage\n"
-#~ msgstr "\t-W penggunaan\t\t--usage\n"
+#~ msgid "xor(%lf, %lf): fractional values will be truncated"
+#~ msgstr "xor(%lf, %lf): nilai pecahan akan dipotong"
-#~ msgid ""
-#~ "\t# BEGIN block(s)\n"
-#~ "\n"
-#~ msgstr ""
-#~ "\t # BEGIN blok\n"
-#~ "\n"
+#~ msgid "for loop: array `%s' changed size from %ld to %ld during loop execution"
+#~ msgstr "for loop: array `%s' berubah ukuran dari %ld ke %ld selama eksekusi loop"
-#~ msgid "must use `count$' on all formats or none"
-#~ msgstr "harus menggunakan `count$' di semua format atau tidak sama sekali"
+#~ msgid "`break' outside a loop is not portable"
+#~ msgstr "`break' diluar sebuah loop adalah tidak portabel"
-#~ msgid "field width is ignored for `%%%%' specifier"
-#~ msgstr "lebar daerah diabaikan untuk penspesifikasi `%%%%'"
+#~ msgid "`continue' outside a loop is not portable"
+#~ msgstr "`continue' diluar sebuah loop tidak portabel"
-#~ msgid "precision is ignored for `%%%%' specifier"
-#~ msgstr "ketepatan diabaikan untuk penspesifikasi `%%%%'"
+#~ msgid "`next' cannot be called from a BEGIN rule"
+#~ msgstr "`next' tidak dapat dipanggil dari sebuah aturan BEGIN"
-#~ msgid "field width and precision are ignored for `%%%%' specifier"
-#~ msgstr "lebar daerah dan presisi diabaikan untuk penspesifikasi `%%%%'"
+#~ msgid "`next' cannot be called from an END rule"
+#~ msgstr "`next' tidak dapat dipanggil dari sebuah aturan END"
-#~ msgid "`$' is not permitted in awk formats"
-#~ msgstr "`$' tidak diijinkan dalam format awk"
+#~ msgid "`nextfile' cannot be called from a BEGIN rule"
+#~ msgstr "`nextfile' tidak dapat dipanggil dari sebuah aturan BEGIN"
-#~ msgid "arg count with `$' must be > 0"
-#~ msgstr "arg count dengan `$' harus > 0"
+#~ msgid "`nextfile' cannot be called from an END rule"
+#~ msgstr "`nextfile' tidak dapat dipanggil dari sebuah aturan END"
-#~ msgid "arg count %ld greater than total number of supplied arguments"
-#~ msgstr ""
-#~ "arg count %ld lebih besar dari jumlah total dari argumen yang diberikan"
+#~ msgid "statement has no effect"
+#~ msgstr "pernyataan tidak memiliki efek"
-#~ msgid "`$' not permitted after period in format"
-#~ msgstr "`$' tidak diijinkan setelah periode dalam format"
+#~ msgid "concatenation: side effects in one expression have changed the length of another!"
+#~ msgstr "concatenation: efek samping dalam satu ekspresi telah mengubah panjang dari yang lain!"
-#~ msgid "no `$' supplied for positional field width or precision"
-#~ msgstr ""
-#~ "tidak ada `$' yang diberikan untuk posisional field width atau presisi"
+#~ msgid "assignment used in conditional context"
+#~ msgstr "penempatan digunakan dalam konteks kondisional"
-#~ msgid "`l' is meaningless in awk formats; ignored"
-#~ msgstr "`l' tidak berarti dalam format awk; diabaikan"
+#~ msgid "illegal type (%s) in tree_eval"
+#~ msgstr "tipe (%s) tidak legal dalam tree_eval"
-#~ msgid "`l' is not permitted in POSIX awk formats"
-#~ msgstr "`l' tidak diijinkan dalam format POSIX awk"
+#~ msgid "\t# -- main --\n"
+#~ msgstr "\t# -- main --\n"
-#~ msgid "`L' is meaningless in awk formats; ignored"
-#~ msgstr "`L' tidak berarti dalam format awk; diabaikan"
+#~ msgid "assignment is not allowed to result of builtin function"
+#~ msgstr "penempatan tidak diijinkan untuk menghasilkan fungsi bawaan"
-#~ msgid "`L' is not permitted in POSIX awk formats"
-#~ msgstr "`L' tidak diijinkan dalam format awk POSIX"
+#~ msgid "Operation Not Supported"
+#~ msgstr "Operasi Tidak Didukung"
-#~ msgid "`h' is meaningless in awk formats; ignored"
-#~ msgstr "`h' tidak berarti dalam format awk; diabaikan"
+#~ msgid "invalid tree type %s in redirect()"
+#~ msgstr "tipe tree %s tidak valid dalam redirect()"
-#~ msgid "`h' is not permitted in POSIX awk formats"
-#~ msgstr "`h' tidak diijinkan dalam format awk POSIX"
+#~ msgid "can't open two way socket `%s' for input/output (%s)"
+#~ msgstr "tidak dapat membuka socket dua arah `%s' untuk input/output (%s)"
-#~ msgid "[s]printf: value %g is out of range for `%%%c' format"
-#~ msgstr "[s]printf: nilai %g diluar dari jangkauan untuk format `%%%c'"
+#~ msgid "/inet/raw client not ready yet, sorry"
+#~ msgstr "/inet/raw client belum siap, maaf"
-#~ msgid ""
-#~ "ignoring unknown format specifier character `%c': no argument converted"
-#~ msgstr ""
-#~ "mengabaikan format tidak dikenal karakter penspesifikasi `%c': tidak ada "
-#~ "argumen yang diubah"
+#~ msgid "only root may use `/inet/raw'."
+#~ msgstr "hanya root yang boleh menggunakan `/inet/raw'."
-#~ msgid "not enough arguments to satisfy format string"
-#~ msgstr "tidak cukup argumen untuk memuaskan format string"
+#~ msgid "/inet/raw server not ready yet, sorry"
+#~ msgstr "/inet/raw server belum siap, maaf"
-#~ msgid "^ ran out for this one"
-#~ msgstr "^ kehabisan untuk yang ini"
+#~ msgid "file `%s' is a directory"
+#~ msgstr "berkas `%s' adalah sebuah direktori"
-#~ msgid "[s]printf: format specifier does not have control letter"
-#~ msgstr "[s]printf: penspesifikasi format tidak memiliki pengontrol huruf"
+#~ msgid "use `PROCINFO[\"%s\"]' instead of `%s'"
+#~ msgstr "lebih baik gunakan `PROCINFO[\"%s\"]' daripada `%s'"
-#~ msgid "too many arguments supplied for format string"
-#~ msgstr "terlalu banyak argumen diberikan untuk format string"
+#~ msgid "use `PROCINFO[...]' instead of `/dev/user'"
+#~ msgstr "lebih baik gunakan `PROCINFO[...]' daripada `/dev/user'"
-#, fuzzy
-#~ msgid "attempt to use array parameter `%s' in a scalar context"
-#~ msgstr "mencoba menggunakan array `%s' dalam sebuah konteks skalar"
+#~ msgid "out of memory"
+#~ msgstr "kehabisan memori"
-#~ msgid "can't open two way socket `%s' for input/output (%s)"
-#~ msgstr "tidak dapat membuka socket dua arah `%s' untuk input/output (%s)"
+#~ msgid "`-m[fr]' option irrelevant in gawk"
+#~ msgstr "pilihan `-m[fr]' tidak relevan dalam gawk"
-#~ msgid "length: untyped argument will be forced to scalar"
-#~ msgstr "length: argument tidak terketik akan dipaksa ke skalar"
+#~ msgid "-m option usage: `-m[fr] nnn'"
+#~ msgstr "penggunaan pilihan -m: `-m[fr] nnn'"
-#~ msgid ""
-#~ "concatenation: side effects in one expression have changed the length of "
-#~ "another!"
-#~ msgstr ""
-#~ "concatenation: efek samping dalam satu ekspresi telah mengubah panjang "
-#~ "dari yang lain!"
+#~ msgid "\t-m[fr] val\n"
+#~ msgstr "\t-m[fr] val\n"
-#~ msgid "illegal type (%s) in tree_eval"
-#~ msgstr "tipe (%s) tidak legal dalam tree_eval"
+#~ msgid "\t-W compat\t\t--compat\n"
+#~ msgstr "\t-W compabilitas\t\t--compat\n"
-#~ msgid "\t# -- main --\n"
-#~ msgstr "\t# -- main --\n"
+#~ msgid "\t-W copyleft\t\t--copyleft\n"
+#~ msgstr "\t-W copyleft\t\t--copyleft\n"
-#~ msgid "invalid tree type %s in redirect()"
-#~ msgstr "tipe tree %s tidak valid dalam redirect()"
+#~ msgid "\t-W usage\t\t--usage\n"
+#~ msgstr "\t-W penggunaan\t\t--usage\n"
+
+#~ msgid "could not find groups: %s"
+#~ msgstr "tidak dapat menemukan grup: %s"
+
+#~ msgid "can't convert string to float"
+#~ msgstr "tidak dapat mengubah string ke float"
-#, fuzzy
#~ msgid "# treated internally as `delete'"
-#~ msgstr "# diperlakukan secara internal sebagai 'delete'"
+#~ msgstr "# diperlakukan secara internal sebagai `delete'"
#~ msgid "# this is a dynamically loaded extension function"
#~ msgstr "# ini adalah sebuah fungsi yang secara dinamis diload ekstensi"
+#~ msgid ""
+#~ "\t# BEGIN block(s)\n"
+#~ "\n"
+#~ msgstr ""
+#~ "\t # BEGIN blok\n"
+#~ "\n"
+
#~ msgid "unexpected type %s in prec_level"
#~ msgstr "tipe %s tidak terduga dalam prec_level"
diff --git a/po/it.po b/po/it.po
index 68f2d7f0..a5578723 100644
--- a/po/it.po
+++ b/po/it.po
@@ -1,5 +1,5 @@
# Italian messages for GNU Awk
-# Copyright (C) 2002-2013 Free Software Foundation, Inc.
+# Copyright (C) 2002-2014 Free Software Foundation, Inc.
# Antonio Colombo <azc100@gmail.com>.
#
msgid ""
@@ -7,7 +7,7 @@ msgstr ""
"Project-Id-Version: GNU Awk 4.0.73, API: 0.0\n"
"Report-Msgid-Bugs-To: arnold@skeeve.com\n"
"POT-Creation-Date: 2014-04-08 19:23+0300\n"
-"PO-Revision-Date: 2014-01-15 10:39+0100\n"
+"PO-Revision-Date: 2014-06-14 17:50+0100\n"
"Last-Translator: Antonio Colombo <azc100@gmail.com>\n"
"Language-Team: Italian <it@li.org>\n"
"Language: it\n"
@@ -211,7 +211,7 @@ msgstr "`getline' invalida all'interno della regola `%s'"
#: awkgram.y:1425
msgid "non-redirected `getline' undefined inside END action"
-msgstr "`getline' non re-diretta indefinita dentro `azione' END"
+msgstr "`getline' non ri-diretta indefinita dentro `azione' END"
#: awkgram.y:1444
msgid "old awk does not support multidimensional arrays"
@@ -1141,7 +1141,7 @@ msgstr ""
#: command.y:855
msgid "list [-|+|[filename:]lineno|function|range] - list specified line(s)."
msgstr ""
-"list [-|+|[nome-file:]num_linea|funzione|intervallo] - elenca riga/he "
+"list [-|+|[nome-file:]num_riga|funzione|intervallo] - elenca riga/he "
"richiesta/e."
#: command.y:857
@@ -1229,7 +1229,7 @@ msgid ""
"line or line N within current frame."
msgstr ""
"until [[nome-file:]N|funzione] - esegui finché il programma arriva una "
-"rigadifferente, o alla riga N nell'elemento di stack corrente."
+"riga differente, o alla riga N nell'elemento di stack corrente."
#: command.y:895
msgid "unwatch [N] - remove variable(s) from watch list."
@@ -2615,23 +2615,23 @@ msgstr "chiusura di fd %d (`%s') non riuscita (%s)"
#: io.c:716
msgid "redirection not allowed in sandbox mode"
-msgstr "re-direzione non consentita in modo `sandbox'"
+msgstr "ri-direzione non consentita in modo `sandbox'"
#: io.c:750
#, c-format
msgid "expression in `%s' redirection only has numeric value"
-msgstr "espressione nella re-direzione `%s' ha solo un valore numerico"
+msgstr "espressione nella ri-direzione `%s' ha solo un valore numerico"
#: io.c:756
#, c-format
msgid "expression for `%s' redirection has null string value"
-msgstr "espressione nella re-direzione `%s' ha per valore la stringa nulla"
+msgstr "espressione nella ri-direzione `%s' ha per valore la stringa nulla"
#: io.c:761
#, c-format
msgid "filename `%s' for `%s' redirection may be result of logical expression"
msgstr ""
-"nome-file `%s' per la re-direzione `%s' può essere il risultato di una "
+"nome-file `%s' per la ri-direzione `%s' può essere il risultato di una "
"espressione logica"
#: io.c:809
@@ -2658,12 +2658,12 @@ msgstr ""
#: io.c:986
#, c-format
msgid "can't redirect from `%s' (%s)"
-msgstr "non riesco a re-dirigere da `%s' (%s)"
+msgstr "non riesco a ri-dirigere da `%s' (%s)"
#: io.c:989
#, c-format
msgid "can't redirect to `%s' (%s)"
-msgstr "non riesco a re-dirigere a `%s' (%s)"
+msgstr "non riesco a ri-dirigere a `%s' (%s)"
#: io.c:1040
msgid ""
@@ -2692,12 +2692,12 @@ msgstr "close: `%.*s' non è un file aperto, una `pipe' o un co-processo"
#: io.c:1108
msgid "close of redirection that was never opened"
-msgstr "chiusura di una re-direzione mai aperta"
+msgstr "chiusura di una ri-direzione mai aperta"
#: io.c:1205
#, c-format
msgid "close: redirection `%s' not opened with `|&', second argument ignored"
-msgstr "close: re-direzione `%s' non aperta con `|&', ignoro secondo argomento"
+msgstr "close: ri-direzione `%s' non aperta con `|&', ignoro secondo argomento"
#: io.c:1222
#, c-format
@@ -3313,7 +3313,7 @@ msgstr "%s: argomento #%d con valore negativo %Zd, darà risultati strani"
#: msg.c:68
#, c-format
msgid "cmd. line:"
-msgstr "linea com.:"
+msgstr "riga com.:"
#: node.c:421
msgid "backslash at end of string"
@@ -3427,7 +3427,7 @@ msgstr ""
#: profile.c:1513
#, c-format
msgid "redir2str: unknown redirection type %d"
-msgstr "redir2str: tipo di re-direzione non noto %d"
+msgstr "redir2str: tipo di ri-direzione non noto %d"
#: re.c:607
#, c-format
@@ -3503,5 +3503,3 @@ msgstr "Nessuna espressione regolare precedente"
msgid "can not pop main context"
msgstr "non posso salire più in alto nello stack"
-#~ msgid "range of the form `[%c-%c]' is locale dependent"
-#~ msgstr "intervallo della forma `[%c-%c]' dipende da `locale'"
diff --git a/profile.c b/profile.c
index 28c5349c..6cb4c6a4 100644
--- a/profile.c
+++ b/profile.c
@@ -711,20 +711,28 @@ cleanup:
ip = pc + 1;
indent(ip->forloop_body->exec_count);
fprintf(prof_fp, "%s (", op2str(pc->opcode));
- pprint(pc->nexti, ip->forloop_cond, true);
- fprintf(prof_fp, "; ");
- if (ip->forloop_cond->opcode == Op_no_op &&
- ip->forloop_cond->nexti == ip->forloop_body)
+ /* If empty for looop header, print it a little more nicely. */
+ if ( pc->nexti->opcode == Op_no_op
+ && ip->forloop_cond == pc->nexti
+ && pc->target_continue->opcode == Op_jmp) {
+ fprintf(prof_fp, ";;");
+ } else {
+ pprint(pc->nexti, ip->forloop_cond, true);
fprintf(prof_fp, "; ");
- else {
- pprint(ip->forloop_cond, ip->forloop_body, true);
- t1 = pp_pop();
- fprintf(prof_fp, "%s; ", t1->pp_str);
- pp_free(t1);
- }
- pprint(pc->target_continue, pc->target_break, true);
+ if (ip->forloop_cond->opcode == Op_no_op &&
+ ip->forloop_cond->nexti == ip->forloop_body)
+ fprintf(prof_fp, "; ");
+ else {
+ pprint(ip->forloop_cond, ip->forloop_body, true);
+ t1 = pp_pop();
+ fprintf(prof_fp, "%s; ", t1->pp_str);
+ pp_free(t1);
+ }
+
+ pprint(pc->target_continue, pc->target_break, true);
+ }
fprintf(prof_fp, ") {\n");
indent_in();
pprint(ip->forloop_body->nexti, pc->target_continue, false);
diff --git a/regcomp.c b/regcomp.c
index 776b7134..1f6d978a 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -856,10 +856,6 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
#ifndef _LIBC
char *codeset_name;
#endif
-#if defined(GAWK) && defined(LIBC_IS_BORKED)
- /* Needed for brain damaged systems */
- extern int gawk_mb_cur_max;
-#endif
memset (dfa, '\0', sizeof (re_dfa_t));
@@ -881,11 +877,7 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
dfa->state_hash_mask = table_size - 1;
-#if defined(GAWK) && defined(LIBC_IS_BORKED)
- dfa->mb_cur_max = gawk_mb_cur_max;
-#else
dfa->mb_cur_max = MB_CUR_MAX;
-#endif
#ifdef _LIBC
if (dfa->mb_cur_max == 6
&& strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)
@@ -907,24 +899,9 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
codeset_name = strchr (codeset_name, '.') + 1;
# endif
- /* strcasecmp isn't a standard interface. brute force check */
-#ifndef GAWK
if (strcasecmp (codeset_name, "UTF-8") == 0
|| strcasecmp (codeset_name, "UTF8") == 0)
dfa->is_utf8 = 1;
-#else
- if ( (codeset_name[0] == 'U' || codeset_name[0] == 'u')
- && (codeset_name[1] == 'T' || codeset_name[1] == 't')
- && (codeset_name[2] == 'F' || codeset_name[2] == 'f')
- && (codeset_name[3] == '-'
- ? codeset_name[4] == '8' && codeset_name[5] == '\0'
- : codeset_name[3] == '8' && codeset_name[4] == '\0'))
- dfa->is_utf8 = 1;
-#if defined(GAWK) && defined(LIBC_IS_BORKED)
- if (gawk_mb_cur_max == 1)
- dfa->is_utf8 = 0;
-#endif /* defined(GAWK) && defined(LIBC_IS_BORKED) */
-#endif
/* We check exhaustively in the loop below if this charset is a
superset of ASCII. */
@@ -2215,7 +2192,11 @@ parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
{
branch = parse_branch (regexp, preg, token, syntax, nest, err);
if (BE (*err != REG_NOERROR && branch == NULL, 0))
- return NULL;
+ {
+ if (tree != NULL)
+ postorder (tree, free_tree, NULL);
+ return NULL;
+ }
}
else
branch = NULL;
@@ -2476,14 +2457,21 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS
|| token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM)
{
- tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
- if (BE (*err != REG_NOERROR && tree == NULL, 0))
- return NULL;
+ bin_tree_t *dup_tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
+ if (BE (*err != REG_NOERROR && dup_tree == NULL, 0))
+ {
+ if (tree != NULL)
+ postorder (tree, free_tree, NULL);
+ return NULL;
+ }
+ tree = dup_tree;
/* In BRE consecutive duplications are not allowed. */
if ((syntax & RE_CONTEXT_INVALID_DUP)
&& (token->type == OP_DUP_ASTERISK
|| token->type == OP_OPEN_DUP_NUM))
{
+ if (tree != NULL)
+ postorder (tree, free_tree, NULL);
*err = REG_BADRPT;
return NULL;
}
@@ -2632,6 +2620,8 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
/* Duplicate ELEM before it is marked optional. */
elem = duplicate_tree (elem, dfa);
+ if (BE (elem == NULL, 0))
+ goto parse_dup_op_espace;
old_tree = tree;
}
else
@@ -3128,8 +3118,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
if (BE (sbcset == NULL, 0))
#endif /* RE_ENABLE_I18N */
{
-#ifdef RE_ENABLE_I18N
re_free (sbcset);
+#ifdef RE_ENABLE_I18N
re_free (mbcset);
#endif
*err = REG_ESPACE;
diff --git a/regex.h b/regex.h
index 56602961..3d26a606 100644
--- a/regex.h
+++ b/regex.h
@@ -470,7 +470,7 @@ typedef struct
#ifdef __USE_GNU
/* Sets the current default syntax to SYNTAX, and return the old syntax.
You can also simply assign to the `re_syntax_options' variable. */
-extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax);
+extern reg_syntax_t re_set_syntax (reg_syntax_t syntax);
/* Compile the regular expression PATTERN, with length LENGTH
and syntax given by the global `re_syntax_options', into the buffer
@@ -480,14 +480,14 @@ extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax);
Note that the translate table must either have been initialised by
`regcomp', with a malloc'ed value, or set to NULL before calling
`regfree'. */
-extern const char *re_compile_pattern (const char *__pattern, size_t __length,
- struct re_pattern_buffer *__buffer);
+extern const char *re_compile_pattern (const char *pattern, size_t length,
+ struct re_pattern_buffer *buffer);
/* Compile a fastmap for the compiled pattern in BUFFER; used to
accelerate searches. Return 0 if successful and -2 if was an
internal error. */
-extern int re_compile_fastmap (struct re_pattern_buffer *__buffer);
+extern int re_compile_fastmap (struct re_pattern_buffer *buffer);
/* Search in the string STRING (with length LENGTH) for the pattern
@@ -495,30 +495,30 @@ extern int re_compile_fastmap (struct re_pattern_buffer *__buffer);
characters. Return the starting position of the match, -1 for no
match, or -2 for an internal error. Also return register
information in REGS (if REGS and BUFFER->no_sub are nonzero). */
-extern int re_search (struct re_pattern_buffer *__buffer, const char *__cstring,
- int __length, int __start, int __range,
- struct re_registers *__regs);
+extern int re_search (struct re_pattern_buffer *buffer, const char *c_string,
+ int length, int start, int range,
+ struct re_registers *regs);
/* Like `re_search', but search in the concatenation of STRING1 and
STRING2. Also, stop searching at index START + STOP. */
-extern int re_search_2 (struct re_pattern_buffer *__buffer,
- const char *__string1, int __length1,
- const char *__string2, int __length2, int __start,
- int __range, struct re_registers *__regs, int __stop);
+extern int re_search_2 (struct re_pattern_buffer *buffer,
+ const char *string1, int length1,
+ const char *string2, int length2, int start,
+ int range, struct re_registers *regs, int stop);
/* Like `re_search', but return how many characters in STRING the regexp
in BUFFER matched, starting at position START. */
-extern int re_match (struct re_pattern_buffer *__buffer, const char *__cstring,
- int __length, int __start, struct re_registers *__regs);
+extern int re_match (struct re_pattern_buffer *buffer, const char *c_string,
+ int length, int start, struct re_registers *regs);
/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
-extern int re_match_2 (struct re_pattern_buffer *__buffer,
- const char *__string1, int __length1,
- const char *__string2, int __length2, int __start,
- struct re_registers *__regs, int __stop);
+extern int re_match_2 (struct re_pattern_buffer *buffer,
+ const char *string1, int length1,
+ const char *string2, int length2, int start,
+ struct re_registers *regs, int stop);
/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
@@ -533,10 +533,10 @@ extern int re_match_2 (struct re_pattern_buffer *__buffer,
Unless this function is called, the first search or match using
PATTERN_BUFFER will allocate its own register data, without
freeing the old data. */
-extern void re_set_registers (struct re_pattern_buffer *__buffer,
- struct re_registers *__regs,
- unsigned int __num_regs,
- regoff_t *__starts, regoff_t *__ends);
+extern void re_set_registers (struct re_pattern_buffer *buffer,
+ struct re_registers *regs,
+ unsigned int num_regs,
+ regoff_t *starts, regoff_t *ends);
#endif /* Use GNU */
#if defined _REGEX_RE_COMP || (defined _LIBC && defined __USE_MISC)
@@ -569,19 +569,19 @@ extern int re_exec (const char *);
#endif
/* POSIX compatibility. */
-extern int regcomp (regex_t *__restrict __preg,
- const char *__restrict __pattern,
- int __cflags);
+extern int regcomp (regex_t *__restrict preg,
+ const char *__restrict pattern,
+ int cflags);
-extern int regexec (const regex_t *__restrict __preg,
- const char *__restrict __cstring, size_t __nmatch,
- regmatch_t __pmatch[__restrict_arr],
- int __eflags);
+extern int regexec (const regex_t *__restrict preg,
+ const char *__restrict c_string, size_t nmatch,
+ regmatch_t pmatch[__restrict_arr],
+ int eflags);
-extern size_t regerror (int __errcode, const regex_t *__restrict __preg,
- char *__restrict __errbuf, size_t __errbuf_size);
+extern size_t regerror (int errcode, const regex_t *__restrict preg,
+ char *__restrict errbuf, size_t errbuf_size);
-extern void regfree (regex_t *__preg);
+extern void regfree (regex_t *preg);
#ifdef __cplusplus
diff --git a/regex_internal.c b/regex_internal.c
index 056cff3d..9e427081 100644
--- a/regex_internal.c
+++ b/regex_internal.c
@@ -545,7 +545,10 @@ build_upper_buffer (re_string_t *pstr)
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
if (BE (pstr->trans != NULL, 0))
ch = pstr->trans[ch];
- pstr->mbs[char_idx] = toupper (ch);
+ if (islower (ch))
+ pstr->mbs[char_idx] = toupper (ch);
+ else
+ pstr->mbs[char_idx] = ch;
}
pstr->valid_len = char_idx;
pstr->valid_raw_len = char_idx;
@@ -683,7 +686,7 @@ re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
pstr->valid_len - offset);
pstr->valid_len -= offset;
pstr->valid_raw_len -= offset;
-#if DEBUG
+#if defined DEBUG && DEBUG
assert (pstr->valid_len > 0);
#endif
}
@@ -940,7 +943,7 @@ re_string_context_at (const re_string_t *input, int idx, int eflags)
int wc_idx = idx;
while(input->wcs[wc_idx] == WEOF)
{
-#ifdef DEBUG
+#if defined DEBUG && DEBUG
/* It must not happen. */
assert (wc_idx >= 0);
#endif
diff --git a/replace.c b/replace.c
index 559de014..71a8dc51 100644
--- a/replace.c
+++ b/replace.c
@@ -50,7 +50,7 @@
#include "missing_d/memmove.c"
#endif /* HAVE_MEMMOVE */
-#ifndef HAVE_STRNCASECMP
+#if !defined(HAVE_STRNCASECMP) || !defined(HAVE_STRCASECMP)
#include "missing_d/strncasecmp.c"
#endif /* HAVE_STRCASE */
diff --git a/symbol.c b/symbol.c
index fe297d22..e89214c0 100644
--- a/symbol.c
+++ b/symbol.c
@@ -35,8 +35,8 @@ static int var_count; /* total number of global variables and functions */
static NODE *symbol_list;
static void (*install_func)(NODE *) = NULL;
-static NODE *make_symbol(char *name, NODETYPE type);
-static NODE *install(char *name, NODE *parm, NODETYPE type);
+static NODE *make_symbol(const char *name, NODETYPE type);
+static NODE *install(const char *name, NODE *parm, NODETYPE type);
static void free_bcpool(INSTRUCTION *pl);
static AWK_CONTEXT *curr_ctxt = NULL;
@@ -75,7 +75,7 @@ init_symbol_table()
*/
NODE *
-install_symbol(char *name, NODETYPE type)
+install_symbol(const char *name, NODETYPE type)
{
return install(name, NULL, type);
}
@@ -112,14 +112,12 @@ lookup(const char *name)
continue;
n = in_array(tables[i], tmp);
- if (n != NULL) {
- unref(tmp);
- return n;
- }
+ if (n != NULL)
+ break;
}
unref(tmp);
- return n; /* NULL */
+ return n; /* NULL or new place */
}
/* make_params --- allocate function parameters for the symbol table */
@@ -155,11 +153,13 @@ install_params(NODE *func)
if (func == NULL)
return;
+
assert(func->type == Node_func);
- if ((pcount = func->param_cnt) <= 0
- || (parms = func->fparms) == NULL
- )
+
+ if ( (pcount = func->param_cnt) <= 0
+ || (parms = func->fparms) == NULL)
return;
+
for (i = 0; i < pcount; i++)
(void) install(parms[i].param, parms + i, Node_param_list);
}
@@ -177,10 +177,11 @@ remove_params(NODE *func)
if (func == NULL)
return;
+
assert(func->type == Node_func);
- if ((pcount = func->param_cnt) <= 0
- || (parms = func->fparms) == NULL
- )
+
+ if ( (pcount = func->param_cnt) <= 0
+ || (parms = func->fparms) == NULL)
return;
for (i = pcount - 1; i >= 0; i--) {
@@ -191,11 +192,11 @@ remove_params(NODE *func)
assert(p->type == Node_param_list);
tmp = make_string(p->vname, strlen(p->vname));
tmp2 = in_array(param_table, tmp);
- if (tmp2 != NULL && tmp2->dup_ent != NULL) {
+ if (tmp2 != NULL && tmp2->dup_ent != NULL)
tmp2->dup_ent = tmp2->dup_ent->dup_ent;
- } else {
+ else
(void) assoc_remove(param_table, tmp);
- }
+
unref(tmp);
}
@@ -274,7 +275,7 @@ destroy_symbol(NODE *r)
/* make_symbol --- allocates a global symbol for the symbol table. */
static NODE *
-make_symbol(char *name, NODETYPE type)
+make_symbol(const char *name, NODETYPE type)
{
NODE *r;
@@ -284,7 +285,7 @@ make_symbol(char *name, NODETYPE type)
null_array(r);
else if (type == Node_var)
r->var_value = dupnode(Nnull_string);
- r->vname = name;
+ r->vname = (char *) name;
r->type = type;
return r;
@@ -293,7 +294,7 @@ make_symbol(char *name, NODETYPE type)
/* install --- install a global name or function parameter in the symbol table */
static NODE *
-install(char *name, NODE *parm, NODETYPE type)
+install(const char *name, NODE *parm, NODETYPE type)
{
NODE *r;
NODE **aptr;
@@ -306,20 +307,22 @@ install(char *name, NODE *parm, NODETYPE type)
if (type == Node_param_list) {
table = param_table;
- } else if (type == Node_func || type == Node_ext_func) {
+ } else if ( type == Node_func
+ || type == Node_ext_func
+ || type == Node_builtin_func) {
table = func_table;
} else if (installing_specials) {
table = global_table;
}
- if (parm != NULL) {
+ if (parm != NULL)
r = parm;
- } else {
+ else {
/* global symbol */
r = make_symbol(name, type);
if (type == Node_func)
func_count++;
- if (type != Node_ext_func && table != global_table)
+ if (type != Node_ext_func && type != Node_builtin_func && table != global_table)
var_count++; /* total, includes Node_func */
}
@@ -344,7 +347,6 @@ simple:
return r;
}
-
/* comp_symbol --- compare two (variable or function) names */
static int
@@ -393,7 +395,7 @@ get_symbols(SYMBOL_TYPE what, bool sort)
for (i = count = 0; i < max; i += 2) {
r = list[i+1];
- if (r->type == Node_ext_func)
+ if (r->type == Node_ext_func || r->type == Node_builtin_func)
continue;
assert(r->type == Node_func);
table[count++] = r;
@@ -508,7 +510,7 @@ append_symbol(NODE *r)
symbol_list->rnode = p;
}
-/* release_symbol --- free symbol list and optionally remove symbol from symbol table */
+/* release_symbols --- free symbol list and optionally remove symbol from symbol table */
void
release_symbols(NODE *symlist, int keep_globals)
@@ -517,7 +519,8 @@ release_symbols(NODE *symlist, int keep_globals)
for (p = symlist->rnode; p != NULL; p = next) {
if (! keep_globals) {
- /* destroys globals, function, and params
+ /*
+ * destroys globals, function, and params
* if still in symbol table
*/
destroy_symbol(p->lnode);
@@ -538,7 +541,7 @@ load_symbols()
NODE *sym_array;
NODE **aptr;
long i, j, max;
- NODE *user, *extension, *untyped, *scalar, *array;
+ NODE *user, *extension, *untyped, *scalar, *array, *built_in;
NODE **list;
NODE *tables[4];
@@ -569,6 +572,7 @@ load_symbols()
scalar = make_string("scalar", 6);
untyped = make_string("untyped", 7);
array = make_string("array", 5);
+ built_in = make_string("builtin", 7);
for (i = 0; tables[i] != NULL; i++) {
list = assoc_list(tables[i], "@unsorted", ASORTI);
@@ -579,6 +583,7 @@ load_symbols()
r = list[j+1];
if ( r->type == Node_ext_func
|| r->type == Node_func
+ || r->type == Node_builtin_func
|| r->type == Node_var
|| r->type == Node_var_array
|| r->type == Node_var_new) {
@@ -593,6 +598,9 @@ load_symbols()
case Node_func:
*aptr = dupnode(user);
break;
+ case Node_builtin_func:
+ *aptr = dupnode(built_in);
+ break;
case Node_var:
*aptr = dupnode(scalar);
break;
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
new file mode 100644
index 00000000..fee5eeca
--- /dev/null
+++ b/test/CMakeLists.txt
@@ -0,0 +1,90 @@
+#
+# test/CMakeLists.txt --- CMake input file for gawk
+#
+# Copyright (C) 2013
+# the Free Software Foundation, Inc.
+#
+# This file is part of GAWK, the GNU implementation of the
+# AWK Programming Language.
+#
+# GAWK is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# GAWK is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+#
+
+## process this file with CMake to produce Makefile
+
+if(WIN32)
+ set(SHELL_PREFIX "C:\\MinGW\\msys\\1.0\\bin\\sh")
+endif()
+
+# Find the names of the groups of tests in Makefile.am.
+file(READ ${CMAKE_CURRENT_SOURCE_DIR}/Makefile.am ALL_GROUPS)
+string(REGEX MATCHALL "[A-Z_]*_TESTS " ALL_GROUPS "${ALL_GROUPS}")
+string(REGEX REPLACE "_TESTS " ";" ALL_GROUPS "${ALL_GROUPS}")
+# For each group of test cases, search through Makefile.am and find the test cases.
+foreach(testgroup ${ALL_GROUPS} )
+ file(READ ${CMAKE_CURRENT_SOURCE_DIR}/Makefile.am ONE_GROUP)
+ string(REGEX MATCH "${testgroup}_TESTS = [a-z0-9_ \\\n\t]*" ONE_GROUP "${ONE_GROUP}")
+ string(REGEX REPLACE "${testgroup}_TESTS = " "" ONE_GROUP "${ONE_GROUP}")
+ string(REGEX REPLACE "[\\\n\t]" "" ONE_GROUP "${ONE_GROUP}")
+ string(REGEX REPLACE " " ";" ONE_GROUP "${ONE_GROUP}")
+ # Use each name of a test case to start a script that executes the test case.
+ foreach(testcase ${ONE_GROUP} )
+ add_test("${testgroup}.${testcase}" ${SHELL_PREFIX} ${CMAKE_SOURCE_DIR}/cmake/basictest ${CMAKE_BINARY_DIR}/gawk${CMAKE_EXECUTABLE_SUFFIX} ${testcase})
+ endforeach(testcase)
+endforeach(testgroup)
+
+# Create an empty configuration file for customizing test execution.
+set(CTestCustom ${CMAKE_BINARY_DIR}/CTestCustom.cmake)
+file(WRITE ${CTestCustom} "# DO NOT EDIT, THIS FILE WILL BE OVERWRITTEN\n" )
+# Test case SHLIB.filefuncs needs a file named gawkapi.o in source directory.
+file(APPEND ${CTestCustom} "file(COPY ${CMAKE_SOURCE_DIR}/README DESTINATION ${CMAKE_SOURCE_DIR}/gawkapi.o)\n")
+# Exclude test cases from execution that make no sense on a certain platform.
+file(APPEND ${CTestCustom} "set(CTEST_CUSTOM_TESTS_IGNORE\n")
+if(WIN32)
+ file(APPEND ${CTestCustom} " BASIC.exitval2\n")
+ file(APPEND ${CTestCustom} " BASIC.hsprint\n")
+ file(APPEND ${CTestCustom} " BASIC.rstest4\n")
+ file(APPEND ${CTestCustom} " BASIC.rstest5\n")
+ file(APPEND ${CTestCustom} " UNIX.getlnhd\n")
+ file(APPEND ${CTestCustom} " UNIX.pid\n")
+ file(APPEND ${CTestCustom} " GAWK_EXT.beginfile1\n")
+ file(APPEND ${CTestCustom} " GAWK_EXT.beginfile2\n")
+ file(APPEND ${CTestCustom} " GAWK_EXT.clos1way\n")
+ file(APPEND ${CTestCustom} " GAWK_EXT.devfd\n")
+ file(APPEND ${CTestCustom} " GAWK_EXT.devfd1\n")
+ file(APPEND ${CTestCustom} " GAWK_EXT.devfd2\n")
+ file(APPEND ${CTestCustom} " GAWK_EXT.getlndir\n")
+ file(APPEND ${CTestCustom} " GAWK_EXT.posix\n")
+ file(APPEND ${CTestCustom} " GAWK_EXT.pty1\n")
+ file(APPEND ${CTestCustom} " INET.inetdayu\n")
+ file(APPEND ${CTestCustom} " INET.inetdayt\n")
+ file(APPEND ${CTestCustom} " INET.inetechu\n")
+ file(APPEND ${CTestCustom} " INET.inetecht\n")
+ file(APPEND ${CTestCustom} " MACHINE.double2\n")
+ file(APPEND ${CTestCustom} " LOCALE_CHARSET.fmttest\n")
+ file(APPEND ${CTestCustom} " LOCALE_CHARSET.lc_num1\n")
+ file(APPEND ${CTestCustom} " LOCALE_CHARSET.mbfw1\n")
+ file(APPEND ${CTestCustom} " SHLIB.filefuncs\n")
+ file(APPEND ${CTestCustom} " SHLIB.fnmatch\n")
+ file(APPEND ${CTestCustom} " SHLIB.fork\n")
+ file(APPEND ${CTestCustom} " SHLIB.fork2\n")
+ file(APPEND ${CTestCustom} " SHLIB.fts\n")
+ file(APPEND ${CTestCustom} " SHLIB.functab4\n")
+ file(APPEND ${CTestCustom} " SHLIB.readdir\n")
+ file(APPEND ${CTestCustom} " SHLIB.revtwoway\n")
+ file(APPEND ${CTestCustom} " SHLIB.rwarray\n")
+endif()
+file(APPEND ${CTestCustom} ")\n")
+
diff --git a/test/ChangeLog b/test/ChangeLog
index 6d4cd1c2..68cc18e6 100644
--- a/test/ChangeLog
+++ b/test/ChangeLog
@@ -1,3 +1,92 @@
+2014-09-05 Arnold D. Robbins <arnold@skeeve.com>
+
+ * functab4.awk: Changed to use stat instead of chdir since
+ /tmp isn't /tmp on all systems (e.g. Mac OS X). Thanks to
+ Hermann Peifer for the report.
+
+ Sort of related:
+
+ * indirectcall2.awk, indirectcall2.ok: New files.
+ * id.ok: Updated.
+
+2014-09-04 Arnold D. Robbins <arnold@skeeve.com>
+
+ * profile2.ok: Update after code improvement in profiler.
+ * functab4.ok: Update after making indirect calls of
+ extension functions work. :-)
+
+2014-08-15 Arnold D. Robbins <arnold@skeeve.com>
+
+ * badargs.ok: Adjust after revising text for -L option.
+
+2014-08-12 Arnold D. Robbins <arnold@skeeve.com>
+
+ * ofs1.ok: Updated to match corrected behavior in gawk.
+
+2014-08-05 Arnold D. Robbins <arnold@skeeve.com>
+
+ * Makefile.am (mpfrsqrt): New test.
+ * mpfrsqrt.awk, mpfrsqrt.ok: New files.
+ Test from Katie Wasserman <katie@wass.net>.
+
+2014-07-25 Arnold D. Robbins <arnold@skeeve.com>
+
+ * printhuge.awk: Add a newline to output.
+ * printhuge.ok: Adjust.
+
+2014-07-24 Arnold D. Robbins <arnold@skeeve.com>
+
+ * badargs.ok: Adjust after correctly alphabetizing options.
+
+2014-07-10 Arnold D. Robbins <arnold@skeeve.com>
+
+ * Makefile.am (printhuge): New test.
+ * printhuge.awk, printhuge.ok: New files.
+ Test from mail.green.fox@gmail.com.
+
+2014-06-24 Arnold D. Robbins <arnold@skeeve.com>
+
+ * Makefile.am (profile1, profile4, profile5): Adjust for change to
+ --pretty-print option.
+
+2014-06-19 Michael Forney <forney@google.com>
+
+ * Makefile.am (poundbang): Fix relative path of AWKPROG.
+
+2014-06-08 Arnold D. Robbins <arnold@skeeve.com>
+
+ * Makefile.am (dbugeval): Add leading @ to recipe. Ooops.
+
+2014-05-30 Arnold D. Robbins <arnold@skeeve.com>
+
+ * Makefile.am (regnul1, regnul2): New tests.
+ * regnul1.awk, regnul1.ok, regnul1.awk, regnul2.ok: New files.
+
+2014-05-22 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * lintwarn.ok: Updated.
+
+2014-05-13 Arnold D. Robbins <arnold@skeeve.com>
+
+ * Makefile.am (EXTRA_DIST): Forgot dbugeval.ok. Ooops.
+
+2014-05-11 Arnold D. Robbins <arnold@skeeve.com>
+
+ * Makefile.am (dbugeval): New test.
+ * dbugeval.in, dbugeval.ok: New files.
+
+2014-05-10 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * Makefile.am (rsglstdin): New test.
+ * rsglstdin.ok: New file.
+
+2014-05-09 Andrew J. Schorr <aschorr@telemetry-investments.com>
+
+ * Makefile.am (rebuf): Force buffer size to 4096 via AWKBUFSIZE
+ environment variable.
+ (rsgetline): New test.
+ * rsgetline.awk, rsgetline.in, rsgetline.ok: New files.
+
2014-04-11 Arnold D. Robbins <arnold@skeeve.com>
* Makefile.am (charset-msg-start): Add a warning message that tests
diff --git a/test/Makefile.am b/test/Makefile.am
index 238f2ed7..3be8b7cb 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -167,6 +167,8 @@ EXTRA_DIST = \
datanonl.awk \
datanonl.in \
datanonl.ok \
+ dbugeval.in \
+ dbugeval.ok \
defref.awk \
defref.ok \
delargv.awk \
@@ -419,6 +421,8 @@ EXTRA_DIST = \
indirectcall.awk \
indirectcall.in \
indirectcall.ok \
+ indirectcall2.awk \
+ indirectcall2.ok \
inftest.awk \
inftest.ok \
inplace.in \
@@ -527,10 +531,14 @@ EXTRA_DIST = \
mpfrnr.awk \
mpfrnr.in \
mpfrnr.ok \
+ mpfrrem.awk \
+ mpfrrem.ok \
mpfrrnd.awk \
mpfrrnd.ok \
mpfrsort.awk \
mpfrsort.ok \
+ mpfrsqrt.awk \
+ mpfrsqrt.ok \
mtchi18n.awk \
mtchi18n.in \
mtchi18n.ok \
@@ -680,6 +688,8 @@ EXTRA_DIST = \
printfbad3.awk \
printfbad3.ok \
printfloat.awk \
+ printhuge.awk \
+ printhuge.ok \
printlang.awk \
prmarscl.awk \
prmarscl.ok \
@@ -729,6 +739,10 @@ EXTRA_DIST = \
regexprange.ok \
reginttrad.awk \
reginttrad.ok \
+ regnul1.awk \
+ regnul1.ok \
+ regnul2.awk \
+ regnul2.ok \
regrange.awk \
regrange.ok \
regtest.sh \
@@ -759,6 +773,10 @@ EXTRA_DIST = \
rs.awk \
rs.in \
rs.ok \
+ rsgetline.awk \
+ rsgetline.in \
+ rsgetline.ok \
+ rsglstdin.ok \
rsnul1nl.awk \
rsnul1nl.in \
rsnul1nl.ok \
@@ -988,19 +1006,19 @@ UNIX_TESTS = \
GAWK_EXT_TESTS = \
aadelete1 aadelete2 aarray1 aasort aasorti argtest arraysort \
backw badargs beginfile1 beginfile2 binmode1 charasbytes \
- colonwarn clos1way delsub devfd devfd1 devfd2 dumpvars exit \
+ colonwarn clos1way dbugeval delsub devfd devfd1 devfd2 dumpvars exit \
fieldwdth fpat1 fpat2 fpat3 fpatnull fsfwfs funlen \
functab1 functab2 functab3 fwtest fwtest2 fwtest3 \
gensub gensub2 getlndir gnuops2 gnuops3 gnureops \
icasefs icasers id igncdym igncfs ignrcas2 ignrcase \
incdupe incdupe2 incdupe3 incdupe4 incdupe5 incdupe6 incdupe7 \
- include include2 indirectcall \
+ include include2 indirectcall indirectcall2 \
lint lintold lintwarn \
manyfiles match1 match2 match3 mbstr1 \
nastyparm next nondec nondec2 \
- patsplit posix printfbad1 printfbad2 printfbad3 procinfs \
+ patsplit posix printfbad1 printfbad2 printfbad3 printhuge procinfs \
profile1 profile2 profile3 profile4 profile5 pty1 \
- rebuf regx8bit reginttrad reint reint2 rsstart1 \
+ rebuf regnul1 regnul2 regx8bit reginttrad reint reint2 rsgetline rsglstdin rsstart1 \
rsstart2 rsstart3 rstest6 shadow sortfor sortu split_after_fpat \
splitarg4 strftime \
strtonum switch2 symtab1 symtab2 symtab3 symtab4 symtab5 symtab6 \
@@ -1012,8 +1030,8 @@ INET_TESTS = inetdayu inetdayt inetechu inetecht
MACHINE_TESTS = double1 double2 fmtspcl intformat
-MPFR_TESTS = mpfrnr mpfrnegzero mpfrrnd mpfrieee mpfrexprange \
- mpfrsort mpfrbigint
+MPFR_TESTS = mpfrnr mpfrnegzero mpfrrem mpfrrnd mpfrieee mpfrexprange \
+ mpfrsort mpfrsqrt mpfrbigint
LOCALE_CHARSET_TESTS = \
asort asorti backbigs1 backsmalls1 backsmalls2 \
@@ -1185,7 +1203,7 @@ poundbang::
@if ./_pbd.awk "$(srcdir)"/poundbang.awk > _`basename $@` ; \
then : ; \
else \
- sed "s;/tmp/gawk;../$(AWKPROG);" < "$(srcdir)"/poundbang.awk > ./_pbd.awk ; \
+ sed "s;/tmp/gawk;$(AWKPROG);" < "$(srcdir)"/poundbang.awk > ./_pbd.awk ; \
chmod +x ./_pbd.awk ; \
LC_ALL=$${GAWKLOCALE:-C} LANG=$${GAWKLOCALE:-C} ./_pbd.awk "$(srcdir)"/poundbang.awk > _`basename $@`; \
fi
@@ -1329,6 +1347,16 @@ fmtspcl: fmtspcl.ok
$(CMP) "$(srcdir)"/$@-mpfr.ok _$@ && rm -f _$@ ; \
fi
+rebuf::
+ @echo $@
+ @AWKBUFSIZE=4096 AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
+rsglstdin::
+ @echo $@
+ @cat "$(srcdir)"/rsgetline.in | AWKPATH="$(srcdir)" $(AWK) -f rsgetline.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
reint::
@echo $@
@$(AWK) --re-interval -f "$(srcdir)"/reint.awk "$(srcdir)"/reint.in >_$@
@@ -1639,10 +1667,11 @@ dumpvars::
profile1:
@echo $@
- @$(AWK) --pretty-print=ap-$@.out -f "$(srcdir)"/xref.awk "$(srcdir)"/dtdgport.awk > _$@.out1
+ @$(AWK) -f "$(srcdir)"/xref.awk "$(srcdir)"/dtdgport.awk > _$@.out1
+ @$(AWK) --pretty-print=ap-$@.out -f "$(srcdir)"/xref.awk
@$(AWK) -f ap-$@.out "$(srcdir)"/dtdgport.awk > _$@.out2 ; rm ap-$@.out
@$(CMP) _$@.out1 _$@.out2 && rm _$@.out[12] || { echo EXIT CODE: $$? >>_$@ ; \
- cp "$(srcdir)"/dtdgport.awk > $@.ok ; }
+ cp "$(srcdir)"/dtdgport.awk $@.ok ; }
profile2:
@@ -1659,13 +1688,13 @@ profile3:
profile4:
@echo $@
- @GAWK_NO_PP_RUN=1 $(AWK) --profile=ap-$@.out -f "$(srcdir)"/$@.awk > /dev/null
+ @$(AWK) --pretty-print=ap-$@.out -f "$(srcdir)"/$@.awk > /dev/null
@sed 1,2d < ap-$@.out > _$@; rm ap-$@.out
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
profile5:
@echo $@
- @GAWK_NO_PP_RUN=1 $(AWK) --profile=ap-$@.out -f "$(srcdir)"/$@.awk > /dev/null
+ @$(AWK) --pretty-print=ap-$@.out -f "$(srcdir)"/$@.awk > /dev/null
@sed 1,2d < ap-$@.out > _$@; rm ap-$@.out
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
@@ -1725,6 +1754,16 @@ mpfrbigint:
@$(AWK) -M -f "$(srcdir)"/$@.awk > _$@ 2>&1
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+mpfrsqrt:
+ @echo $@
+ @$(AWK) -M -f "$(srcdir)"/$@.awk > _$@ 2>&1
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
+mpfrrem:
+ @echo $@
+ @$(AWK) -M -f "$(srcdir)"/$@.awk > _$@ 2>&1
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
jarebug::
@echo $@
@"$(srcdir)"/$@.sh "$(AWKPROG)" "$(srcdir)"/$@.awk "$(srcdir)"/$@.in "_$@"
@@ -1922,6 +1961,17 @@ backsmalls2:
AWKPATH="$(srcdir)" $(AWK) -f $@.awk "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+dbugeval::
+ @echo $@
+ @$(AWK) --debug -f /dev/null < "$(srcdir)"/$@.in > _$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
+printhuge::
+ @echo $@
+ @[ -z "$$GAWKLOCALE" ] && GAWKLOCALE=en_US.UTF-8; \
+ AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
# Targets generated for other tests:
include Maketests
diff --git a/test/Makefile.in b/test/Makefile.in
index 0831c597..b9753369 100644
--- a/test/Makefile.in
+++ b/test/Makefile.in
@@ -413,6 +413,8 @@ EXTRA_DIST = \
datanonl.awk \
datanonl.in \
datanonl.ok \
+ dbugeval.in \
+ dbugeval.ok \
defref.awk \
defref.ok \
delargv.awk \
@@ -665,6 +667,8 @@ EXTRA_DIST = \
indirectcall.awk \
indirectcall.in \
indirectcall.ok \
+ indirectcall2.awk \
+ indirectcall2.ok \
inftest.awk \
inftest.ok \
inplace.in \
@@ -773,10 +777,14 @@ EXTRA_DIST = \
mpfrnr.awk \
mpfrnr.in \
mpfrnr.ok \
+ mpfrrem.awk \
+ mpfrrem.ok \
mpfrrnd.awk \
mpfrrnd.ok \
mpfrsort.awk \
mpfrsort.ok \
+ mpfrsqrt.awk \
+ mpfrsqrt.ok \
mtchi18n.awk \
mtchi18n.in \
mtchi18n.ok \
@@ -926,6 +934,8 @@ EXTRA_DIST = \
printfbad3.awk \
printfbad3.ok \
printfloat.awk \
+ printhuge.awk \
+ printhuge.ok \
printlang.awk \
prmarscl.awk \
prmarscl.ok \
@@ -975,6 +985,10 @@ EXTRA_DIST = \
regexprange.ok \
reginttrad.awk \
reginttrad.ok \
+ regnul1.awk \
+ regnul1.ok \
+ regnul2.awk \
+ regnul2.ok \
regrange.awk \
regrange.ok \
regtest.sh \
@@ -1005,6 +1019,10 @@ EXTRA_DIST = \
rs.awk \
rs.in \
rs.ok \
+ rsgetline.awk \
+ rsgetline.in \
+ rsgetline.ok \
+ rsglstdin.ok \
rsnul1nl.awk \
rsnul1nl.in \
rsnul1nl.ok \
@@ -1233,19 +1251,19 @@ UNIX_TESTS = \
GAWK_EXT_TESTS = \
aadelete1 aadelete2 aarray1 aasort aasorti argtest arraysort \
backw badargs beginfile1 beginfile2 binmode1 charasbytes \
- colonwarn clos1way delsub devfd devfd1 devfd2 dumpvars exit \
+ colonwarn clos1way dbugeval delsub devfd devfd1 devfd2 dumpvars exit \
fieldwdth fpat1 fpat2 fpat3 fpatnull fsfwfs funlen \
functab1 functab2 functab3 fwtest fwtest2 fwtest3 \
gensub gensub2 getlndir gnuops2 gnuops3 gnureops \
icasefs icasers id igncdym igncfs ignrcas2 ignrcase \
incdupe incdupe2 incdupe3 incdupe4 incdupe5 incdupe6 incdupe7 \
- include include2 indirectcall \
+ include include2 indirectcall indirectcall2 \
lint lintold lintwarn \
manyfiles match1 match2 match3 mbstr1 \
nastyparm next nondec nondec2 \
- patsplit posix printfbad1 printfbad2 printfbad3 procinfs \
+ patsplit posix printfbad1 printfbad2 printfbad3 printhuge procinfs \
profile1 profile2 profile3 profile4 profile5 pty1 \
- rebuf regx8bit reginttrad reint reint2 rsstart1 \
+ rebuf regnul1 regnul2 regx8bit reginttrad reint reint2 rsgetline rsglstdin rsstart1 \
rsstart2 rsstart3 rstest6 shadow sortfor sortu split_after_fpat \
splitarg4 strftime \
strtonum switch2 symtab1 symtab2 symtab3 symtab4 symtab5 symtab6 \
@@ -1254,8 +1272,8 @@ GAWK_EXT_TESTS = \
EXTRA_TESTS = inftest regtest
INET_TESTS = inetdayu inetdayt inetechu inetecht
MACHINE_TESTS = double1 double2 fmtspcl intformat
-MPFR_TESTS = mpfrnr mpfrnegzero mpfrrnd mpfrieee mpfrexprange \
- mpfrsort mpfrbigint
+MPFR_TESTS = mpfrnr mpfrnegzero mpfrrem mpfrrnd mpfrieee mpfrexprange \
+ mpfrsort mpfrsqrt mpfrbigint
LOCALE_CHARSET_TESTS = \
asort asorti backbigs1 backsmalls1 backsmalls2 \
@@ -1610,7 +1628,7 @@ poundbang::
@if ./_pbd.awk "$(srcdir)"/poundbang.awk > _`basename $@` ; \
then : ; \
else \
- sed "s;/tmp/gawk;../$(AWKPROG);" < "$(srcdir)"/poundbang.awk > ./_pbd.awk ; \
+ sed "s;/tmp/gawk;$(AWKPROG);" < "$(srcdir)"/poundbang.awk > ./_pbd.awk ; \
chmod +x ./_pbd.awk ; \
LC_ALL=$${GAWKLOCALE:-C} LANG=$${GAWKLOCALE:-C} ./_pbd.awk "$(srcdir)"/poundbang.awk > _`basename $@`; \
fi
@@ -1754,6 +1772,16 @@ fmtspcl: fmtspcl.ok
$(CMP) "$(srcdir)"/$@-mpfr.ok _$@ && rm -f _$@ ; \
fi
+rebuf::
+ @echo $@
+ @AWKBUFSIZE=4096 AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
+rsglstdin::
+ @echo $@
+ @cat "$(srcdir)"/rsgetline.in | AWKPATH="$(srcdir)" $(AWK) -f rsgetline.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
reint::
@echo $@
@$(AWK) --re-interval -f "$(srcdir)"/reint.awk "$(srcdir)"/reint.in >_$@
@@ -2064,10 +2092,11 @@ dumpvars::
profile1:
@echo $@
- @$(AWK) --pretty-print=ap-$@.out -f "$(srcdir)"/xref.awk "$(srcdir)"/dtdgport.awk > _$@.out1
+ @$(AWK) -f "$(srcdir)"/xref.awk "$(srcdir)"/dtdgport.awk > _$@.out1
+ @$(AWK) --pretty-print=ap-$@.out -f "$(srcdir)"/xref.awk
@$(AWK) -f ap-$@.out "$(srcdir)"/dtdgport.awk > _$@.out2 ; rm ap-$@.out
@$(CMP) _$@.out1 _$@.out2 && rm _$@.out[12] || { echo EXIT CODE: $$? >>_$@ ; \
- cp "$(srcdir)"/dtdgport.awk > $@.ok ; }
+ cp "$(srcdir)"/dtdgport.awk $@.ok ; }
profile2:
@echo $@
@@ -2083,13 +2112,13 @@ profile3:
profile4:
@echo $@
- @GAWK_NO_PP_RUN=1 $(AWK) --profile=ap-$@.out -f "$(srcdir)"/$@.awk > /dev/null
+ @$(AWK) --pretty-print=ap-$@.out -f "$(srcdir)"/$@.awk > /dev/null
@sed 1,2d < ap-$@.out > _$@; rm ap-$@.out
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
profile5:
@echo $@
- @GAWK_NO_PP_RUN=1 $(AWK) --profile=ap-$@.out -f "$(srcdir)"/$@.awk > /dev/null
+ @$(AWK) --pretty-print=ap-$@.out -f "$(srcdir)"/$@.awk > /dev/null
@sed 1,2d < ap-$@.out > _$@; rm ap-$@.out
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
@@ -2149,6 +2178,16 @@ mpfrbigint:
@$(AWK) -M -f "$(srcdir)"/$@.awk > _$@ 2>&1
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+mpfrsqrt:
+ @echo $@
+ @$(AWK) -M -f "$(srcdir)"/$@.awk > _$@ 2>&1
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
+mpfrrem:
+ @echo $@
+ @$(AWK) -M -f "$(srcdir)"/$@.awk > _$@ 2>&1
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
jarebug::
@echo $@
@"$(srcdir)"/$@.sh "$(AWKPROG)" "$(srcdir)"/$@.awk "$(srcdir)"/$@.in "_$@"
@@ -2344,6 +2383,17 @@ backsmalls2:
@[ -z "$$GAWKLOCALE" ] && GAWKLOCALE=en_US.UTF-8; \
AWKPATH="$(srcdir)" $(AWK) -f $@.awk "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
+dbugeval::
+ @echo $@
+ @$(AWK) --debug -f /dev/null < "$(srcdir)"/$@.in > _$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
+printhuge::
+ @echo $@
+ @[ -z "$$GAWKLOCALE" ] && GAWKLOCALE=en_US.UTF-8; \
+ AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
Gt-dummy:
# file Maketests, generated from Makefile.am by the Gentests program
addcomma:
@@ -3428,6 +3478,11 @@ indirectcall:
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+indirectcall2:
+ @echo $@
+ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
lint:
@echo $@
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@@ -3498,9 +3553,14 @@ pty1:
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
-rebuf:
+regnul1:
@echo $@
- @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
+regnul2:
+ @echo $@
+ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
regx8bit:
@@ -3508,6 +3568,11 @@ regx8bit:
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+rsgetline:
+ @echo $@
+ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
rstest6:
@echo $@
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
diff --git a/test/Maketests b/test/Maketests
index b9b713c4..5c34af9e 100644
--- a/test/Maketests
+++ b/test/Maketests
@@ -1082,6 +1082,11 @@ indirectcall:
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+indirectcall2:
+ @echo $@
+ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
lint:
@echo $@
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@@ -1152,9 +1157,14 @@ pty1:
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
-rebuf:
+regnul1:
@echo $@
- @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
+regnul2:
+ @echo $@
+ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
regx8bit:
@@ -1162,6 +1172,11 @@ regx8bit:
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+rsgetline:
+ @echo $@
+ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
rstest6:
@echo $@
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
diff --git a/test/badargs.ok b/test/badargs.ok
index 1be81ec3..1664ec1c 100644
--- a/test/badargs.ok
+++ b/test/badargs.ok
@@ -17,10 +17,10 @@ Short options: GNU long options: (extensions)
-h --help
-i includefile --include=includefile
-l library --load=library
- -L [fatal] --lint[=fatal]
- -n --non-decimal-data
+ -L[fatal|invalid] --lint[=fatal|invalid]
-M --bignum
-N --use-lc-numeric
+ -n --non-decimal-data
-o[file] --pretty-print[=file]
-O --optimize
-p[file] --profile[=file]
diff --git a/test/dbugeval.in b/test/dbugeval.in
new file mode 100644
index 00000000..6a3c2459
--- /dev/null
+++ b/test/dbugeval.in
@@ -0,0 +1,2 @@
+eval ""
+eval ""
diff --git a/test/dbugeval.ok b/test/dbugeval.ok
new file mode 100644
index 00000000..284f2abb
--- /dev/null
+++ b/test/dbugeval.ok
@@ -0,0 +1 @@
+EXIT CODE: 2
diff --git a/test/functab4.awk b/test/functab4.awk
index 0d9d4267..196fcc6d 100644
--- a/test/functab4.awk
+++ b/test/functab4.awk
@@ -6,9 +6,25 @@ function foo()
}
BEGIN {
- x = FUNCTAB["chdir"]
- print "x =", x
- @x("/tmp")
- printf "we are now in --> "
- system("/bin/pwd || /usr/bin/pwd")
+ f = FUNCTAB["foo"]
+ @f()
+
+ ret1 = stat(".", data1)
+ print "ret1 =", ret1
+
+ f = "stat"
+ ret2 = @f(".", data2)
+ print "ret2 =", ret2
+
+ problem = 0
+ for (i in data1) {
+ if (! isarray(data1[i])) {
+# print i, data1[i]
+ if (! (i in data2) || data1[i] != data2[i]) {
+ printf("mismatch element \"%s\"\n", i)
+ problems++
+ }
+ }
+ }
+ print(problems ? (problems+0) "encountered" : "no problems encountered")
}
diff --git a/test/functab4.ok b/test/functab4.ok
index 70a520b7..2b76cd88 100644
--- a/test/functab4.ok
+++ b/test/functab4.ok
@@ -1,3 +1,4 @@
-x = chdir
-gawk: functab4.awk:11: fatal: cannot (yet) call extension functions indirectly
-EXIT CODE: 2
+foo!
+ret1 = 0
+ret2 = 0
+no problems encountered
diff --git a/test/id.ok b/test/id.ok
index d31573de..4cb39b32 100644
--- a/test/id.ok
+++ b/test/id.ok
@@ -1,32 +1,73 @@
-FUNCTAB -> array
-ARGV -> array
-SYMTAB -> array
-ORS -> scalar
-ROUNDMODE -> scalar
-i -> untyped
OFS -> scalar
+rand -> builtin
+ARGC -> scalar
+dcgettext -> builtin
+gsub -> builtin
+PREC -> scalar
+match -> builtin
+ARGIND -> scalar
+int -> builtin
ERRNO -> scalar
+ARGV -> array
+log -> builtin
+sprintf -> builtin
+ROUNDMODE -> scalar
+strftime -> builtin
+systime -> builtin
+and -> builtin
+srand -> builtin
FNR -> scalar
+asort -> builtin
+atan2 -> builtin
+cos -> builtin
+TEXTDOMAIN -> scalar
+ORS -> scalar
+split -> builtin
+div -> builtin
+RSTART -> scalar
+compl -> builtin
+bindtextdomain -> builtin
+exp -> builtin
+or -> builtin
+fflush -> builtin
+gensub -> builtin
LINT -> scalar
+dcngettext -> builtin
+index -> builtin
IGNORECASE -> scalar
-NR -> scalar
-function1 -> user
-ARGIND -> scalar
-NF -> scalar
-TEXTDOMAIN -> scalar
+system -> builtin
CONVFMT -> scalar
+sqrt -> builtin
+rshift -> builtin
+tolower -> builtin
+FS -> scalar
+BINMODE -> scalar
+sin -> builtin
+asorti -> builtin
FIELDWIDTHS -> scalar
-ARGC -> scalar
+function1 -> user
+FILENAME -> scalar
+close -> builtin
+mktime -> builtin
+FUNCTAB -> array
+NF -> scalar
+isarray -> builtin
an_array -> untyped
-PROCINFO -> array
-PREC -> scalar
+patsplit -> builtin
+NR -> scalar
SUBSEP -> scalar
-FPAT -> scalar
-RS -> scalar
-FS -> scalar
+extension -> builtin
+i -> untyped
+sub -> builtin
OFMT -> scalar
RLENGTH -> scalar
+substr -> builtin
+FPAT -> scalar
+RS -> scalar
+xor -> builtin
RT -> scalar
-BINMODE -> scalar
-FILENAME -> scalar
-RSTART -> scalar
+PROCINFO -> array
+lshift -> builtin
+SYMTAB -> array
+strtonum -> builtin
+toupper -> builtin
diff --git a/test/indirectcall2.awk b/test/indirectcall2.awk
new file mode 100644
index 00000000..8f3c9483
--- /dev/null
+++ b/test/indirectcall2.awk
@@ -0,0 +1,11 @@
+BEGIN {
+ Quarter_pi = 3.1415927 / 4
+ print sin(Quarter_pi)
+
+ f = "sin"
+ print @f(Quarter_pi)
+
+ print substr("abcdefgh", 2, 3)
+ f = "substr"
+ print @f("abcdefgh", 2, 3)
+}
diff --git a/test/indirectcall2.ok b/test/indirectcall2.ok
new file mode 100644
index 00000000..05bee4b1
--- /dev/null
+++ b/test/indirectcall2.ok
@@ -0,0 +1,4 @@
+0.707107
+0.707107
+bcd
+bcd
diff --git a/test/lintwarn.ok b/test/lintwarn.ok
index ec87612f..bc5226e6 100644
--- a/test/lintwarn.ok
+++ b/test/lintwarn.ok
@@ -1,6 +1,6 @@
gawk: lintwarn.awk:2: warning: `BEGINFILE' is a gawk extension
-gawk: lintwarn.awk:3: error: `getline var' invalid inside `BEGINFILE' rule
-gawk: lintwarn.awk:4: error: `getline' invalid inside `BEGINFILE' rule
+gawk: lintwarn.awk:3: error: non-redirected `getline' invalid inside `BEGINFILE' rule
+gawk: lintwarn.awk:4: error: non-redirected `getline' invalid inside `BEGINFILE' rule
gawk: lintwarn.awk:8: warning: statement may have no effect
gawk: lintwarn.awk:9: warning: plain `print' in BEGIN or END rule should probably be `print ""'
gawk: lintwarn.awk:10: error: `nextfile' used in BEGIN action
diff --git a/test/mpfrrem.awk b/test/mpfrrem.awk
new file mode 100644
index 00000000..fd8bc4d5
--- /dev/null
+++ b/test/mpfrrem.awk
@@ -0,0 +1,6 @@
+BEGIN {
+ print "15 % 7 =", 15 % 7
+ print "15 % -7 =", 15 % -7
+ print "-15 % 7 =", -15 % 7
+ print "-15 % -7 =", -15 % -7
+}
diff --git a/test/mpfrrem.ok b/test/mpfrrem.ok
new file mode 100644
index 00000000..91010457
--- /dev/null
+++ b/test/mpfrrem.ok
@@ -0,0 +1,4 @@
+15 % 7 = 1
+15 % -7 = 1
+-15 % 7 = -1
+-15 % -7 = -1
diff --git a/test/mpfrsqrt.awk b/test/mpfrsqrt.awk
new file mode 100644
index 00000000..23a15c92
--- /dev/null
+++ b/test/mpfrsqrt.awk
@@ -0,0 +1,82 @@
+# Date: Sat, 02 Aug 2014 12:27:00 -0400
+# To: bug-gawk@gnu.org
+# From: Katherine Wasserman <katie@wass.net>
+# Message-ID: <E1XDc9F-0007BX-O1@eggs.gnu.org>
+# Subject: [bug-gawk] GAWK 4.1 SQRT() bug
+#
+# In version 4.1.60 of GAWK the sqrt() function does not work correctly on bignums.
+# Here's a demo of the problem along with, a function that does work correctly.
+#
+# Running this program (sqrt-bug.awk):
+# --------------------------------------------------------------------
+BEGIN {
+a=11111111111111111111111111111111111111111111111111111111111
+print sqrt(a^2)
+#print sq_root(a^2)
+
+# ADR: Added for gawk-4.1-stable which doesn't have built-in div() function
+if (PROCINFO["version"] < "4.1.60")
+ print sq_root2(a^2)
+else
+ print sq_root(a^2)
+}
+
+
+function sq_root(x, temp,r,z)
+{ temp=substr(x,1,length(x)/2) + 0 # a good first guess
+ z=0
+ while (abs(z-temp)>1)
+ { z=temp
+ div(x,temp,r)
+ temp=r["quotient"] + temp
+ div(temp,2,r)
+ temp=r["quotient"]
+ }
+ return temp
+}
+
+function sq_root2(x, temp,r,z)
+{ temp=substr(x,1,length(x)/2) + 0 # a good first guess
+ z=0
+ while (abs(z-temp)>1)
+ { z=temp
+ awk_div(x,temp,r)
+ temp=r["quotient"] + temp
+ awk_div(temp,2,r)
+ temp=r["quotient"]
+ }
+ return temp
+}
+
+function abs(x)
+{ return (x<0 ? -x : x)
+}
+#
+# --------------------------------------------------------------------
+# gawk -M -f sqrt-bug.awk
+#
+# results in:
+# 11111111111111111261130863809439559987542611609749437808640
+# 11111111111111111111111111111111111111111111111111111111111
+#
+# Thanks,
+# Katie
+#
+
+# div --- do integer division
+
+function awk_div(numerator, denominator, result, i, save_PREC)
+{
+ save_PREC = PREC
+ PREC = 400 # good enough for this test
+
+ split("", result)
+
+ numerator = int(numerator)
+ denominator = int(denominator)
+ result["quotient"] = int(numerator / denominator)
+ result["remainder"] = int(numerator % denominator)
+
+ PREC = save_PREC
+ return 0.0
+}
diff --git a/test/mpfrsqrt.ok b/test/mpfrsqrt.ok
new file mode 100644
index 00000000..16217c78
--- /dev/null
+++ b/test/mpfrsqrt.ok
@@ -0,0 +1,2 @@
+11111111111111111111111111111111111111111111111111111111111
+11111111111111111111111111111111111111111111111111111111111
diff --git a/test/ofs1.ok b/test/ofs1.ok
index a3a8ca7b..d01fa161 100644
--- a/test/ofs1.ok
+++ b/test/ofs1.ok
@@ -1,7 +1,7 @@
-a:x:c
+a x c
a x c
a x c
a
-a:x:c
a x c
-a:x:c
+a x c
+a x c
diff --git a/test/printhuge.awk b/test/printhuge.awk
new file mode 100644
index 00000000..1de27ecc
--- /dev/null
+++ b/test/printhuge.awk
@@ -0,0 +1,3 @@
+BEGIN {
+ printf("%c\n", sprintf("%c", (0xffffff00+255)))
+}
diff --git a/test/printhuge.ok b/test/printhuge.ok
new file mode 100644
index 00000000..29e181eb
--- /dev/null
+++ b/test/printhuge.ok
@@ -0,0 +1 @@
+ÿ
diff --git a/test/profile2.ok b/test/profile2.ok
index fe76a2c9..50c7e190 100644
--- a/test/profile2.ok
+++ b/test/profile2.ok
@@ -7,7 +7,7 @@
1 asplit("BEGIN:END:atan2:break:close:continue:cos:delete:" "do:else:exit:exp:for:getline:gsub:if:in:index:int:" "length:log:match:next:print:printf:rand:return:sin:" "split:sprintf:sqrt:srand:sub:substr:system:while", keywords, ":")
1 split("00:00:00:00:00:00:00:00:00:00:" "20:10:10:12:12:11:07:00:00:00:" "08:08:08:08:08:33:08:00:00:00:" "08:44:08:36:08:08:08:00:00:00:" "08:44:45:42:42:41:08", machine, ":")
1 state = 1
- 571 for (; ; ) {
+ 571 for (;;) {
571 symb = lex()
571 nextstate = substr(machine[state symb], 1, 1)
571 act = substr(machine[state symb], 2, 1)
@@ -109,7 +109,7 @@
571 function lex()
{
- 1702 for (; ; ) {
+ 1702 for (;;) {
1702 if (tok == "(eof)") {
return 7
}
diff --git a/test/regnul1.awk b/test/regnul1.awk
new file mode 100644
index 00000000..2a35d176
--- /dev/null
+++ b/test/regnul1.awk
@@ -0,0 +1,84 @@
+# From denis@gissoft.eu Thu May 29 09:07:56 IDT 2014
+# Article: 8400 of comp.lang.awk
+# X-Received: by 10.236.81.99 with SMTP id l63mr3912466yhe.3.1401224812642;
+# Tue, 27 May 2014 14:06:52 -0700 (PDT)
+# X-Received: by 10.140.37.148 with SMTP id r20mr578874qgr.0.1401224812310; Tue,
+# 27 May 2014 14:06:52 -0700 (PDT)
+# Path: eternal-september.org!news.eternal-september.org!feeder.eternal-september.org!v102.xanadu-bbs.net!xanadu-bbs.net!news.glorb.com!hl10no6493021igb.0!news-out.google.com!gi6ni15574igc.0!nntp.google.com!hl10no6493018igb.0!postnews.google.com!glegroupsg2000goo.googlegroups.com!not-for-mail
+# Newsgroups: comp.lang.awk
+# Date: Tue, 27 May 2014 14:06:52 -0700 (PDT)
+# Complaints-To: groups-abuse@google.com
+# Injection-Info: glegroupsg2000goo.googlegroups.com; posting-host=85.253.50.165;
+# posting-account=zNhVLgoAAACsg-WfVe_or2VV7loUhx8H
+# NNTP-Posting-Host: 85.253.50.165
+# User-Agent: G2/1.0
+# MIME-Version: 1.0
+# Message-ID: <3112e356-d2e1-45cd-ba55-2f939ee50105@googlegroups.com>
+# Subject: \0 character can't be implement inside regexp in some cases?
+# From: denis@gissoft.eu
+# Injection-Date: Tue, 27 May 2014 21:06:52 +0000
+# Content-Type: text/plain; charset=ISO-8859-1
+# Xref: news.eternal-september.org comp.lang.awk:8400
+#
+# Hello,
+#
+# while doing some experiments with the gawk(4.1.1) i was found problem in implementing character \x00 inside regexp for two cases:
+#
+# str~/\0/
+#
+# and
+#
+# switch ( str ) { case /\0/: ... }
+#
+# the following code try to match given string(=="\x00") with the regexp /^\0$/ using different ways provided by gawk:
+#
+func _chm(t) {
+ _ch("match()",match(t,/^\0$/))
+ _ch("split()",split(t,A,/^\0$/)>1)
+ _ch("patsplit()",patsplit(t,A,/^\0$/))
+ _ch("gsub()",gsub(/^\0$/,"&",t))
+ _ch("sub()",sub(/^\0$/,"&",t))
+ _ch("gensub()",!gensub(/^\0$/,"","G",t))
+ _ch("str~/rexp/",t~/^\0$/)
+ a=0; switch ( t ) { case /^\0$/: a=1 }; _ch("switch-case //",a) }
+
+func _ch(fn,bool) {
+ print substr(fn ": ",1,16) (bool ? "+" : "-") }
+
+BEGIN{ _chm("\000") }
+#
+# output:
+#
+# > gawk -f _null.gwk
+# match(): +
+# split(): +
+# patsplit(): +
+# gsub(): +
+# sub(): +
+# gensub(): +
+# str~/rexp/: -
+# switch-case //: -
+#
+# can someone explain me:
+#
+# why in case using match(), split(), patsplit(), gsub(), sub() and gensub() the given string "\x00" matches with the /^\0$/
+#
+# but in cases:
+#
+# "\x00"~/^\0$/
+#
+# and
+#
+# switch ( "\x00" ) { case /^\0$/: doesn't match? }
+#
+#
+# thank You
+#
+#
+# GNU Awk 4.1.1, API: 1.1 (GNU MPFR 3.1.0-p8, GNU MP 5.0.2)
+# Copyright (C) 1989, 1991-2014 Free Software Foundation.
+# downloaded from ezwinports
+#
+# windows 7x64; cmd
+#
+#
diff --git a/test/regnul1.ok b/test/regnul1.ok
new file mode 100644
index 00000000..2ba0e1da
--- /dev/null
+++ b/test/regnul1.ok
@@ -0,0 +1,8 @@
+match(): +
+split(): +
+patsplit(): +
+gsub(): +
+sub(): +
+gensub(): +
+str~/rexp/: +
+switch-case //: +
diff --git a/test/regnul2.awk b/test/regnul2.awk
new file mode 100644
index 00000000..3d93df41
--- /dev/null
+++ b/test/regnul2.awk
@@ -0,0 +1,112 @@
+# From denis@gissoft.eu Thu May 29 09:10:18 IDT 2014
+# Article: 8408 of comp.lang.awk
+# X-Received: by 10.182.128.166 with SMTP id np6mr93689obb.16.1401289466734;
+# Wed, 28 May 2014 08:04:26 -0700 (PDT)
+# X-Received: by 10.140.36.6 with SMTP id o6mr4939qgo.26.1401289466607; Wed, 28
+# May 2014 08:04:26 -0700 (PDT)
+# Path: eternal-september.org!news.eternal-september.org!feeder.eternal-september.org!news.glorb.com!c1no19185457igq.0!news-out.google.com!qf4ni13600igc.0!nntp.google.com!c1no19185454igq.0!postnews.google.com!glegroupsg2000goo.googlegroups.com!not-for-mail
+# Newsgroups: comp.lang.awk
+# Date: Wed, 28 May 2014 08:04:26 -0700 (PDT)
+# In-Reply-To: <lm4rra$4u9$1@dont-email.me>
+# Complaints-To: groups-abuse@google.com
+# Injection-Info: glegroupsg2000goo.googlegroups.com; posting-host=82.131.35.51; posting-account=zNhVLgoAAACsg-WfVe_or2VV7loUhx8H
+# NNTP-Posting-Host: 82.131.35.51
+# References: <3112e356-d2e1-45cd-ba55-2f939ee50105@googlegroups.com>
+# <lm34d7$tb4$1@news.m-online.net> <f666871f-a94c-4505-9677-8711d656433c@googlegroups.com>
+# <lm4rra$4u9$1@dont-email.me>
+# User-Agent: G2/1.0
+# MIME-Version: 1.0
+# Message-ID: <79828a24-d265-4e88-8de1-e61ecbaa6701@googlegroups.com>
+# Subject: Re: \0 character can't be implement inside regexp in some cases?
+# From: Denis Shirokov <denis@gissoft.eu>
+# Injection-Date: Wed, 28 May 2014 15:04:26 +0000
+# Content-Type: text/plain; charset=ISO-8859-1
+# Xref: news.eternal-september.org comp.lang.awk:8408
+#
+#
+# All of the other use-cases just cluttered up your posting.
+#
+# oh, really?
+#
+# 1. where in the Janis code the case with the `switch-case'?
+# 2. how do you know about that there is only two cases? may be you know it because my code contains the other test cases?
+# 3. fine. do you know what situation with the dynamic regexps? no?
+# 4. do you know what situation with RS,FS and /.../ in the middle-area? how you can say that there is only two cases if you absolutely do not know it?
+#
+# i'm asking: WHO will perform testing other cases? You? gawk-team? the God?
+# what is that point of view: that it will be enough to say:
+# Oh! my match(t,/^\0$/) is matching "\x00" but t~/^\0$/ is not. why oh why?
+#
+# where is the test cover? or you think that other peoples will doing its instead of You? instead of Me?
+#
+# and the second point: guys you are screaming about two levels of stack. really, you kidding? =)
+#
+# however, i'm attaching some additional information about dynrexp:
+#
+func _chmd(t,r) {
+ _ch("match()",match(t,r))
+ _ch("split()",split(t,A,r)>1)
+ _ch("patsplit()",patsplit(t,A,r))
+ _ch("gsub()",gsub(r,"&",t))
+ t2=t; _ch("sub()",sub(r,"&",t2))
+ _ch("gensub()",!gensub(r,"","G",t))
+ _ch("str~/rexp/",t~r)
+ # switch-case is not applicable with dynrxp
+ _conline() }
+
+func _ch(fn,bool) {
+ print substr(fn ": ",1,16) (bool ? "+" : "-") }
+
+func _conline() {
+ print "__________________________"; print }
+
+BEGIN{ _chmd("\x01","^\1$") #testing that all doings right; all match
+ _chmd("\x00","^\1$") #testing that all doings right; all not match
+ _chmd("\x00","^\0$") #tesing dynrexp
+}
+#
+# output:
+#
+# match(): +
+# split(): +
+# patsplit(): +
+# gsub(): +
+# sub(): +
+# gensub(): +
+# str~/rexp/: +
+# __________________________
+#
+# match(): -
+# split(): -
+# patsplit(): -
+# gsub(): -
+# sub(): -
+# gensub(): -
+# str~/rexp/: -
+# __________________________
+#
+# match(): +
+# split(): +
+# patsplit(): +
+# gsub(): +
+# sub(): +
+# gensub(): +
+# str~/rexp/: -
+#
+# it's looks like with the dynamic regexp the same story.
+#
+# i found another one moment that is possible near with the reason of this issue:
+#
+# i was testing what characters can be present in doublestring and regexp "directly" (just as the character) and what characters must be present as escape sequence (\qqq)
+#
+# so, i found the following:
+#
+# t="abc"
+# if ( match(t,/^abc[NUL]def/) ) ... - where [NUL] is the character \x00
+#
+# it's seems that in that case the regular expression is processed until [NUL]character and the other part is ignored because the example above gives TRUE
+#
+# friendship
+# Denis Shirokov
+#
+#
diff --git a/test/regnul2.ok b/test/regnul2.ok
new file mode 100644
index 00000000..6b3cecab
--- /dev/null
+++ b/test/regnul2.ok
@@ -0,0 +1,27 @@
+match(): +
+split(): +
+patsplit(): +
+gsub(): +
+sub(): +
+gensub(): +
+str~/rexp/: +
+__________________________
+
+match(): -
+split(): -
+patsplit(): -
+gsub(): -
+sub(): -
+gensub(): -
+str~/rexp/: -
+__________________________
+
+match(): +
+split(): +
+patsplit(): +
+gsub(): +
+sub(): +
+gensub(): +
+str~/rexp/: +
+__________________________
+
diff --git a/test/rsgetline.awk b/test/rsgetline.awk
new file mode 100644
index 00000000..fa327fcf
--- /dev/null
+++ b/test/rsgetline.awk
@@ -0,0 +1,23 @@
+# Date: Sun, 4 May 2014 18:09:01 +0200
+# From: Davide Brini <dave_br@gmx.com>
+# To: bug-gawk@gnu.org
+# Subject: Re: [bug-gawk] Computed regex and getline bug / issue
+#
+# I have been able to reduce the behavior to these simple test cases, which
+# (unless I'm missing something obvious) should behave identically but don't:
+#
+# $ printf '1,2,' | gawk 'BEGIN{RS="[,]+"}{print; a = getline; print "-"a"-"; print}'
+# 1
+# -0-
+# 1
+
+BEGIN {
+ RS = "[,]+"
+}
+
+{
+ printf "[%s] [%s]\n", $0, RT
+ a = getline
+ print "-"a"-"
+ printf "[%s] [%s]\n", $0, RT
+}
diff --git a/test/rsgetline.in b/test/rsgetline.in
new file mode 100644
index 00000000..f1782346
--- /dev/null
+++ b/test/rsgetline.in
@@ -0,0 +1 @@
+1,2, \ No newline at end of file
diff --git a/test/rsgetline.ok b/test/rsgetline.ok
new file mode 100644
index 00000000..1388369a
--- /dev/null
+++ b/test/rsgetline.ok
@@ -0,0 +1,3 @@
+[1] [,]
+-1-
+[2] [,]
diff --git a/test/rsglstdin.ok b/test/rsglstdin.ok
new file mode 100644
index 00000000..1388369a
--- /dev/null
+++ b/test/rsglstdin.ok
@@ -0,0 +1,3 @@
+[1] [,]
+-1-
+[2] [,]
diff --git a/vms/ChangeLog b/vms/ChangeLog
index 1bb34475..9e055cf4 100644
--- a/vms/ChangeLog
+++ b/vms/ChangeLog
@@ -1,3 +1,7 @@
+2014-04-18 John E. Malmberg <wb8tyw@qsl.net>
+
+ * gawk_alias_setup.com: Fix problem with file links on Vax/VMS.
+
2014-04-08 Arnold D. Robbins <arnold@skeeve.com>
* 4.1.1: Release tar ball made.
diff --git a/vms/gawk_alias_setup.com b/vms/gawk_alias_setup.com
index 73f70d15..a7f13f4b 100644
--- a/vms/gawk_alias_setup.com
+++ b/vms/gawk_alias_setup.com
@@ -59,14 +59,43 @@ $ alias = "gnv$gnu:''p4'''p3'"
$ endif
$ if f$search(file) .nes. ""
$ then
+$ fid = ""
+$ mess = f$environment("message")
+$ if f$search(alias) .nes. ""
+$ then
+$ on warn then goto fix_link
+$ set message/nofac/nosev/noident/notext
+$ fid = f$file_attributes(alias, "FID")
+$ endif
+$ goto fix_link_end
+$fix_link:
+$ set file/remove 'alias';
+$fix_link_end:
+$ set message'mess'
$ if f$search(alias) .eqs. ""
$ then
$ set file/enter='alias' 'file'
$ endif
$ alias1 = alias + "exe"
-$ if (ftype .eqs. ".") .and. (f$search(alias1) .eqs. "")
+$ if (ftype .eqs. ".")
$ then
+$ fid = ""
+$ mess = f$environment("message")
+$ if f$search(alias1) .nes. ""
+$ then
+$ on warn then goto fix_link1
+$ set message/nofac/nosev/noident/notext
+$ fid = f$file_attributes(alias1, "FID")
+$ endif
+$ goto fix_link_end1
+$fix_link1:
+$ set file/remove 'alias1';
+$fix_link_end1:
+$ set message'mess'
+$ if (f$search(alias1) .eqs. "")
+$ then
$ set file/enter='alias1' 'file'
+$ endif
$ endif
$ endif
$ exit
@@ -78,7 +107,7 @@ $ if p4 .eqs. "" then p4 = p2
$ ftype = f$element(1, ".", p1)
$ if ftype .eqs. "."
$ then
-$ file = "gnv$gnu:''p2'''p1'.EXE"
+$ file = "gnv$gnu:''p2'gnv$''p1'.EXE"
$ alias = "gnv$gnu:''p4'''p3'."
$ else
$ file = "gnv$gnu:''p2'''p1'"
diff --git a/xalloc.h b/xalloc.h
index eb0ef1a0..0d169cf9 100644
--- a/xalloc.h
+++ b/xalloc.h
@@ -136,6 +136,8 @@ xnmalloc (size_t n, size_t s)
#ifdef GAWK
#include <errno.h>
+extern void r_fatal(const char *msg, ...) ATTRIBUTE_NORETURN ;
+
/* Allocate an array of N objects, each with S bytes of memory,
dynamically, with error checking. S must be nonzero.
Clear the contents afterwards. */
@@ -165,8 +167,6 @@ xrealloc(void *p, size_t size)
void
xalloc_die (void)
{
- extern void r_fatal(const char *msg, ...) ATTRIBUTE_NORETURN ;
-
r_fatal(_("xalloc: malloc failed: %s"), strerror(errno));
}
@@ -179,6 +179,22 @@ xmemdup (void const *p, size_t s)
{
return memcpy (xmalloc (s), p, s);
}
+
+/* xstrdup --- strdup and die if fails */
+char *xstrdup(const char *s)
+{
+ char *p;
+ int l;
+
+ if (s == NULL)
+ r_fatal(_("xstrdup: null parameter"));
+
+ l = strlen(s);
+ p = xmemdup(s, l + 1);
+ p[l] = '\0';
+
+ return p;
+}
#endif
/* Change the size of an allocated block of memory P to an array of N
@@ -260,7 +276,7 @@ x2nrealloc (void *p, size_t *pn, size_t s)
requests, when the invoking code specifies an old size of
zero. 64 bytes is the largest "small" request for the
GNU C library malloc. */
- enum { DEFAULT_MXFAST = 64 };
+ enum { DEFAULT_MXFAST = 64 * sizeof (size_t) / 4 };
n = DEFAULT_MXFAST / s;
n += !n;
@@ -274,7 +290,7 @@ x2nrealloc (void *p, size_t *pn, size_t s)
worth the trouble. */
if ((size_t) -1 / 3 * 2 / s <= n)
xalloc_die ();
- n += (n + 1) / 2;
+ n += n / 2 + 1;
}
*pn = n;