summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Davies <sdavies@sdc.com.au>2014-09-11 16:26:34 +0930
committerStephen Davies <sdavies@sdc.com.au>2014-09-11 16:26:34 +0930
commit33acaac51cd9087eb7a9b7e73c11f58da396df9a (patch)
tree31f31fcbddca399940865007322bdeea81f2f6f7
parent9b21de890a81179e951ffa5bea2099673f584b16 (diff)
parentca9f23d6c33c4b5cb3786d480948a42988ca99ac (diff)
downloadgawk-33acaac51cd9087eb7a9b7e73c11f58da396df9a.tar.gz
Merge branch 'master' of ssh://git.sv.gnu.org/srv/git/gawk
Conflicts: doc/gawk.info
-rw-r--r--CMakeLists.txt108
-rw-r--r--ChangeLog132
-rw-r--r--Makefile.am7
-rw-r--r--Makefile.in7
-rw-r--r--NEWS12
-rw-r--r--README_d/ChangeLog4
-rw-r--r--README_d/README.cmake95
-rw-r--r--TODO4
-rw-r--r--awk.h16
-rw-r--r--awkgram.c37
-rw-r--r--awkgram.y37
-rw-r--r--awklib/eg/lib/div.awk17
-rw-r--r--awklib/eg/lib/getopt.awk3
-rw-r--r--awklib/eg/lib/groupawk.in3
-rw-r--r--awklib/eg/lib/strtonum.awk7
-rw-r--r--awklib/eg/prog/alarm.awk3
-rw-r--r--awklib/eg/prog/cut.awk3
-rw-r--r--awklib/eg/prog/egrep.awk3
-rw-r--r--awklib/eg/prog/id.awk3
-rw-r--r--awklib/eg/prog/labels.awk3
-rw-r--r--awklib/eg/prog/tee.awk6
-rw-r--r--awklib/eg/prog/uniq.awk5
-rw-r--r--builtin.c25
-rw-r--r--cmake/Toolchain_clang.cmake19
-rw-r--r--cmake/Toolchain_generic.cmake21
-rw-r--r--cmake/Toolchain_mingw32.cmake23
-rw-r--r--cmake/Toolchain_s390.cmake20
-rw-r--r--cmake/auk.icobin0 -> 5190 bytes
-rwxr-xr-xcmake/basictest553
-rwxr-xr-xcmake/configure58
-rw-r--r--cmake/configure.cmake300
-rwxr-xr-xcmake/docmaker100
-rw-r--r--cmake/package.cmake54
-rw-r--r--configh.in6
-rwxr-xr-xconfigure10
-rw-r--r--configure.ac9
-rw-r--r--dfa.c12
-rw-r--r--doc/CMakeLists.txt95
-rw-r--r--doc/ChangeLog91
-rw-r--r--doc/gawk.113
-rw-r--r--doc/gawk.info3493
-rw-r--r--doc/gawk.texi1845
-rw-r--r--doc/gawktexi.in1726
-rw-r--r--eval.c31
-rw-r--r--extension/CMakeLists.txt84
-rw-r--r--extension/ChangeLog6
-rw-r--r--extension/Makefile.am3
-rw-r--r--extension/Makefile.in3
-rw-r--r--field.c21
-rw-r--r--helpers/ChangeLog4
-rw-r--r--helpers/chlistref.awk31
-rw-r--r--interpret.h43
-rw-r--r--main.c25
-rw-r--r--node.c3
-rw-r--r--po/CMakeLists.txt133
-rw-r--r--profile.c30
-rw-r--r--regcomp.c36
-rw-r--r--regex.h62
-rw-r--r--regex_internal.c9
-rw-r--r--replace.c2
-rw-r--r--symbol.c64
-rw-r--r--test/CMakeLists.txt90
-rw-r--r--test/ChangeLog25
-rw-r--r--test/Makefile.am4
-rw-r--r--test/Makefile.in9
-rw-r--r--test/Maketests5
-rw-r--r--test/badargs.ok2
-rw-r--r--test/functab4.awk26
-rw-r--r--test/functab4.ok7
-rw-r--r--test/id.ok81
-rw-r--r--test/indirectcall2.awk11
-rw-r--r--test/indirectcall2.ok4
-rw-r--r--test/ofs1.ok6
-rw-r--r--test/profile2.ok4
74 files changed, 7800 insertions, 2052 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 00000000..154d2afb
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,108 @@
+#
+# CMakeLists.txt --- CMake input file for gawk
+#
+# Copyright (C) 2013
+# the Free Software Foundation, Inc.
+#
+# This file is part of GAWK, the GNU implementation of the
+# AWK Programming Language.
+#
+# GAWK is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# GAWK is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+#
+
+## process this file with CMake to produce Makefile
+
+cmake_minimum_required (VERSION 2.6)
+project (gawk C)
+
+include(cmake/configure.cmake)
+
+set (EXTRA_LIBS "")
+
+if (${HAVE_MPFR})
+ set (EXTRA_LIBS ${EXTRA_LIBS} mpfr gmp)
+endif ()
+if (${HAVE_LIBREADLINE})
+ set (EXTRA_LIBS ${EXTRA_LIBS} readline)
+endif ()
+if (${DYNAMIC})
+ set (EXTRA_LIBS ${EXTRA_LIBS} ${CMAKE_DL_LIBS} )
+endif ()
+
+include_directories(${CMAKE_SOURCE_DIR})
+
+if(WIN32 OR "${CMAKE_SYSTEM_NAME}" STREQUAL "Windows")
+ # This is enough to build with MinGW in a native Windows environment
+ # and also with a cross-compiler on OpenSuSE 12.2.
+ # On Ubuntu 12.04 patches to gawk's source code are needed:
+ # - insert #include <windows.h> at the top of awk.h
+ # - remove function execvp from pc/gawkmisc.pc
+ DefineConfigHValue(HAVE_SETENV 1)
+ DefineConfigHValue(HAVE_USLEEP 1)
+ DefineConfigHValue(STDC_HEADERS 1)
+ DefineConfigHValue(HAVE_STRINGIZE 1)
+ include_directories(${CMAKE_SOURCE_DIR}/missing_d)
+ DefineConfigHValue(HAVE_MKSTEMP 1)
+ set (EXTRA_LIBS ${EXTRA_LIBS} ws2_32)
+ # TODO: Eli Zaretskii remined me that the generated
+ # settings in config.h should be the same as those in
+ # pc/config.h. With these settings and DYNAMIC=1
+ # it looks like functions in dynamic libs (extensions) can
+ # be invoked on Windows.
+ DefineConfigHValue(HAVE_GETSYSTEMTIMEASFILETIME 1)
+ set (GAWK_SOURCES ${GAWK_SOURCES} regex.c pc/getid.c pc/gawkmisc.pc pc/popen.c)
+ include_directories(${CMAKE_SOURCE_DIR}/pc)
+endif()
+
+set (GAWK_SOURCES ${GAWK_SOURCES}
+ array.c
+ builtin.c
+ cint_array.c
+ command.c
+ debug.c
+ dfa.c
+ eval.c
+ ext.c
+ field.c
+ floatcomp.c
+ gawkapi.c
+ gawkmisc.c
+ int_array.c
+ io.c
+ main.c
+ mpfr.c
+ msg.c
+ node.c
+ profile.c
+ random.c
+ re.c
+ replace.c
+ str_array.c
+ symbol.c
+ version.c
+)
+
+add_executable (gawk ${GAWK_SOURCES} ${BISON_awkgram_OUTPUTS})
+target_link_libraries (gawk m ${EXTRA_LIBS})
+install(PROGRAMS ${CMAKE_BINARY_DIR}/gawk${CMAKE_EXECUTABLE_SUFFIX} DESTINATION bin)
+
+# Beware: before building the extension, -DGAWK gets undefined.
+add_subdirectory(extension)
+enable_testing()
+add_subdirectory(test)
+add_subdirectory(doc)
+include(InstallRequiredSystemLibraries)
+set(CPACK_PACKAGING_INSTALL_PREFIX /usr)
+include(cmake/package.cmake)
diff --git a/ChangeLog b/ChangeLog
index 6d734c98..eab657c5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,135 @@
+2014-09-07 Arnold D. Robbins <arnold@skeeve.com>
+
+ * awk.h: Move libsigsegv stuff to ...
+ * main.c: here. Thanks to Yehezkel Bernat for motivating
+ the cleanup.
+ * symbol.c (make_symbol, install, install_symbol): Add const to
+ first parameter. Adjust decls and fix up uses.
+
+2014-09-05 Arnold D. Robbins <arnold@skeeve.com>
+
+ Add builtin functions to FUNCTAB for consistency.
+
+ * awk.h (Node_builtin_func): New node type.
+ (install_builtins): Declare new function.
+ * awkgram.y [DEBUG_USE]: New flag value for debug functions; they
+ don't go into FUNCTAB.
+ (install_builtins): New function.
+ * eval.c (nodetypes): Add Node_builtin_func.
+ * interpret.h (r_interpret): Rework indirect calls of built-ins
+ since they're now in the symbol table.
+ * main.c (main): Call `install_builtins'.
+ * symbol.c (install): Adjust for Node_builtin_func.
+ (load_symbols): Ditto.
+
+2014-09-04 Arnold D. Robbins <arnold@skeeve.com>
+
+ * profile.c (pprint): Case Op_K_for: Improve printing of
+ empty for loop header.
+
+ Unrelated: Make indirect function calls work for built-in and
+ extension functions.
+
+ * awkgram.y (lookup_builtin): New function.
+ * awk.h (builtin_func_t): New typedef.
+ (lookup_builtin): Declare it.
+ * interpret.h (r_interpret): For indirect calls, add code to
+ find and call builtin functions, and call extension functions.
+
+2014-09-01 Arnold D. Robbins <arnold@skeeve.com>
+
+ * builtin.c (do_substr): Return "" instead of null string in case
+ result is passed to length() with --lint. Based on discussions in
+ comp.lang.awk.
+
+ Unrelated:
+
+ * interpret.h (r_interpret): For indirect function call, separate
+ error message if lookup returned NULL. Otherwise got a core dump.
+ Thanks to "Kenny McKormack" for the report in comp.lang.awk.
+
+2014-08-27 Arnold D. Robbins <arnold@skeeve.com>
+
+ * configure.ac: Add test for strcasecmp.
+ * regcomp.c: Remove special case code around use of strcasecmp().
+ * replace.c: Include missing/strncasecmp.c if either strcasecmp()
+ or strncasecmp() aren't available.
+
+2014-08-26 Arnold D. Robbins <arnold@skeeve.com>
+
+ * regcomp.c, regex_internal.c: Sync with GBLIC. Why not.
+
+ Unrelated:
+
+ Remove support for MirBSD. It uglified the code too much
+ for no discernable gain.
+
+ * configure.ac: Remove check for MirBSD and define of
+ LIBC_IS_BORKED.
+ * dfa.c: Remove code depending on LIBC_IS_BORKED.
+ * main.c: Ditto.
+ * regcomp.c: Ditto.
+ * NEWS: Updated.
+
+2014-08-24 Arnold D. Robbins <arnold@skeeve.com>
+
+ * regex.h: Remove underscores in names of parameters in function
+ declarations. Tweak names as neeeded.
+
+2014-08-20 Arnold D. Robbins <arnold@skeeve.com>
+
+ * node.c (parse_escape): Max of 2 digits after \x.
+
+2014-08-18 Arnold D. Robbins <arnold@skeeve.com>
+
+ * symbol.c: General formatting cleanup.
+
+2014-08-15 Arnold D. Robbins <arnold@skeeve.com>
+
+ * main.c (usage): Adjust whitespace for -L and add "invalid"
+ as a possible value for it. Report from Robert P. J. Day
+ <rpjday@crashcourse.ca>.
+
+2014-08-14 Arnold D. Robbins <arnold@skeeve.com>
+
+ * Makefile.am (SUBDIRS): Put awklib after doc so that examples
+ get extracted when the doc changes.
+
+2014-08-13 Arnold D. Robbins <arnold@skeeve.com>
+
+ * builtin.c (do_sub): Move initial allocation of the replacement
+ string down towards code to do the replacement, with a (we hope)
+ better guesstimate of how much to initially allocate. The idea
+ is to avoid unnecessary realloc() calls by making a better guess
+ at how much to allocate. This came up in an email discussion
+ with Tom Dickey about mawk's gsub().
+
+2014-08-12 Juergen Kahrs <jkahrs@users.sourceforge.net>
+
+ * cmake/configure.cmake:
+ * cmake/package.cmake: Copyright update.
+ * README.cmake:
+ * README_d/README.cmake: Moved file.
+
+2014-08-12 Arnold D. Robbins <arnold@skeeve.com>
+
+ OFS being set should rebuild $0 using previous OFS if $0
+ needs to be rebuilt. Thanks to Mike Brennan for pointing this out.
+
+ * awk.h (rebuild_record): Declare.
+ * eval.c (set_OFS): If not being called from var_init(), check
+ if $0 needs rebuilding. If so, parse the record fully and rebuild it.
+ Make OFS point to a separate copy of the new OFS for next time, since
+ OFS_node->var_value->stptr was already updated at this point.
+ * field.c (rebuild_record): Is now extern instead of static.
+ Use OFS and OFSlen instead of the value of OFS_node.
+
+ Unrelated:
+
+ * Makefile.am (RM): Define for makes that don't have it,
+ such as on OpenBSD. Thanks to Jeremie Courreges-Anglas
+ <jca@wxcvbn.org> for the report.
+
2014-08-05 Arnold D. Robbins <arnold@skeeve.com>
Bug fix: For MPFR sqrt(), need to set precision of result to be
diff --git a/Makefile.am b/Makefile.am
index 6e5715d7..3d1c8837 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -61,10 +61,12 @@ EXTRA_DIST = \
# The order to do things in.
# Build explicitly in "." in order to build gawk first, so
# that `make check' without a prior `make' works.
+# Build in awklib after in doc, since we want to extract
+# sample files if doc/gawk.texi changed.
SUBDIRS = \
. \
- awklib \
doc \
+ awklib \
po \
extension \
test
@@ -144,6 +146,9 @@ CLEANFILES = core core.*
# We want hard links for install-exec-hook, below
LN= ln
+# For some make's, e.g. OpenBSD, that don't define this
+RM = rm -f
+
# First, add a link from gawk to gawk-X.Y.Z.
#
# For GNU systems where gawk is awk, add a link to awk.
diff --git a/Makefile.in b/Makefile.in
index 2abf5b46..5c2a7f11 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -467,10 +467,12 @@ EXTRA_DIST = \
# The order to do things in.
# Build explicitly in "." in order to build gawk first, so
# that `make check' without a prior `make' works.
+# Build in awklib after in doc, since we want to extract
+# sample files if doc/gawk.texi changed.
SUBDIRS = \
. \
- awklib \
doc \
+ awklib \
po \
extension \
test
@@ -542,6 +544,9 @@ CLEANFILES = core core.*
# We want hard links for install-exec-hook, below
LN = ln
+
+# For some make's, e.g. OpenBSD, that don't define this
+RM = rm -f
all: config.h
$(MAKE) $(AM_MAKEFLAGS) all-recursive
diff --git a/NEWS b/NEWS
index 584a41f2..37cb5f95 100644
--- a/NEWS
+++ b/NEWS
@@ -25,6 +25,15 @@ Changes from 4.1.x to 4.2.0
primarily useful for the -M option to avoid MPFR division when all
values involved are integers.
+6. Gawk can now be built with CMake. This is an alternative build
+ system for those who may want it; gawk is not going to switch off
+ use of the autotools anytime soon, if ever.
+
+7. Gawk now processes a maximum of two hexadecimal digits in \x
+ escape sequences inside strings.
+
+8. MirBSD is no longer supported.
+
Changes from 4.1.1 to 4.1.2
---------------------------
@@ -41,6 +50,9 @@ Changes from 4.1.1 to 4.1.2
4. A number of bugs have been fixed in the MPFR code.
+5. Indirect function calls now work for both built-in and
+ extension functions.
+
XX. A number of bugs have been fixed. See the ChangeLog.
Changes from 4.1.0 to 4.1.1
diff --git a/README_d/ChangeLog b/README_d/ChangeLog
index 6bc828d9..8c19cfdb 100644
--- a/README_d/ChangeLog
+++ b/README_d/ChangeLog
@@ -1,3 +1,7 @@
+2014-08-12 Juergen Kahrs <jkahrs@users.sourceforge.net>
+
+ * README.cmake: Moved file from top-level to here.
+
2014-04-08 Arnold D. Robbins <arnold@skeeve.com>
* 4.1.1: Release tar ball made.
diff --git a/README_d/README.cmake b/README_d/README.cmake
new file mode 100644
index 00000000..b291d1be
--- /dev/null
+++ b/README_d/README.cmake
@@ -0,0 +1,95 @@
+CMake is a build automation system
+ http://en.wikipedia.org/wiki/Cmake
+
+We try to use it as a replacement for the established GNU build system.
+This attempt is currently only experimental. If you wonder why anyone
+should do this, read
+
+ Why the KDE project switched to CMake -- and how
+ http://lwn.net/Articles/188693/
+ Escape from GNU Autohell!
+ http://www.shlomifish.org/open-source/anti/autohell
+
+- How can I get GNU Awk compiled with CMake as fast as possible ?
+ git clone git://git.savannah.gnu.org/gawk.git
+ cd gawk
+ git checkout cmake
+ mkdir build
+ cd build
+ cmake ..
+ make
+ ./gawk --version
+ make test
+Notice that this git-checkout allows you to read the source code,
+track the cmake branch and get updates. You will not be able to
+commit anything.
+
+- How can I use git to contribute source code ?
+You need an account at Savannah. Read this to understand the first steps:
+ http://savannah.gnu.org/maintenance/UsingGit
+ README.git
+Use your account there to register your public ssh key at Savannah.
+Then you are ready to checkout. Remember that (when cloning) you are
+setting up your own local repository and make sure you configure it
+properly.
+ git clone ssh://my_account_name@git.sv.gnu.org/srv/git/gawk.git
+ git config --global user.name "first-name last-name"
+ git config --global user.email First.Last@email.com
+ git config --global color.ui auto
+
+- What is the current status of the cmake branch ?
+It has just begun, pre-alpha, unclear if it will ever be taken up
+by the maintainer. We want to study if using CMake with such a
+basic tool like gawk is feasible and if it easier to use than
+the GNU build system.
+
+- Where can I find a tutorial on CMake basics ?
+Use the "official tutorial":
+ http://www.cmake.org/cmake/help/cmake_tutorial.html
+
+- Where is the reference of all commands and variables ?
+Depending on the CMake version you use, select one of these:
+ http://www.cmake.org/cmake/help/v2.8.10/cmake.html
+
+- How can I cross-compile ?
+Proceed in the same way as explained above for native compilation,
+but use a different build directory. When using CMake, do this:
+ cmake -DCMAKE_TOOLCHAIN_FILE=../cmake/Toolchain_mingw32.cmake ..
+Write a new Toolchain file for your cross-compiler and use it.
+
+- How can I build an installable file ?
+Use "make package". The exact kind of installable file depends on your
+operating system and defaults to TGZ.
+
+- Can I build an executable that runs on any Win32 platform ?
+Yes, there are two ways of doing this.
+In both cases you need a MinGW compiler and the NSIS package builder
+installed on the host that shall do the build.
+ http://sourceforge.net/projects/mingw
+ http://sourceforge.net/projects/nsis
+When installed properly, the NSIS tool can even build an installer file
+(a single .exe file that unpacks, registers and installs the gawk executable
+and several other files).
+1. way: native build on a Win32 platform
+ http://www.cmake.org/cmake/help/runningcmake.html
+ After clicking "Configure" select the MinGW option with the default native compiler
+ In the build directory, the command "mingw32-make" will build the gawk.exe
+ The command "mingw32-make package" will build installer file
+2. way: build with cross-compiler on a Linux platform like Ubuntu 12.04 LTS
+ Proceed as describe above for cross-compilers.
+ The command "make ; make package" will build gawk.exe and the installer file
+
+- How can I run test cases ?
+You can run all the test cases that are defined in test/Makefile.am.
+These test case scripts were not changed, but the way they are invoked has
+been adapted to CMake habits.
+See http://cmake.org/Wiki/CMake/Testing_With_CTest#Simple_Testing
+ cmake ..
+ make
+ make test # run all test cases
+ ctest -N # list all test cases but don't run them
+ ctest -R BASIC # run all test cases belonging to group BASIC
+ ctest -R MPFR # run all test cases belonging to group MPFR
+ ctest -E SHLIB.filefunc # run all tests, except the SHLIB.filefunc test case
+Remember that running test cases is possible only after a native build.
+
diff --git a/TODO b/TODO
index 35b327bc..66c8cc91 100644
--- a/TODO
+++ b/TODO
@@ -1,4 +1,4 @@
-Wed Jun 25 22:28:05 IDT 2014
+Sun Aug 24 20:00:53 IDT 2014
============================
There were too many files tracking different thoughts and ideas for
@@ -33,8 +33,6 @@ Minor Cleanups and Code Improvements
Look at function order within files.
- regex.h - remove underscores in param names
-
Consider removing use of and/or need for the protos.h file.
Recheck if gnulib regex can be dropped in
diff --git a/awk.h b/awk.h
index c964ad19..0b76aea2 100644
--- a/awk.h
+++ b/awk.h
@@ -193,15 +193,6 @@ extern void *memset_ulong(void *dest, int val, unsigned long l);
#define memset memset_ulong
#endif
-#ifdef HAVE_LIBSIGSEGV
-#include <sigsegv.h>
-#else
-typedef void *stackoverflow_context_t;
-#define sigsegv_install_handler(catchsegv) signal(SIGSEGV, catchsig)
-/* define as 0 rather than empty so that (void) cast on it works */
-#define stackoverflow_install_handler(catchstackoverflow, extra_stack, STACK_SIZE) 0
-#endif
-
#if defined(__EMX__) || defined(__MINGW32__)
#include "nonposix.h"
#endif /* defined(__EMX__) || defined(__MINGW32__) */
@@ -297,6 +288,7 @@ typedef enum nodevals {
Node_func, /* lnode is param. list, rnode is body */
Node_ext_func, /* extension function, code_ptr is builtin code */
Node_old_ext_func, /* extension function, code_ptr is builtin code */
+ Node_builtin_func, /* built-in function, main use is for FUNCTAB */
Node_array_ref, /* array passed by ref as parameter */
Node_array_tree, /* Hashed array tree (HAT) */
@@ -1378,6 +1370,9 @@ extern void register_deferred_variable(const char *name, NODE *(*load_func)(void
extern int files_are_same(char *path, SRCFILE *src);
extern void valinfo(NODE *n, Func_print print_func, FILE *fp);
extern void negate_num(NODE *n);
+typedef NODE *(*builtin_func_t)(int); /* function that implements a built-in */
+extern builtin_func_t lookup_builtin(const char *name);
+extern void install_builtins(void);
/* builtin.c */
extern double double_to_int(double d);
extern NODE *do_exp(int nargs);
@@ -1473,6 +1468,7 @@ extern NODE *get_actual_argument(int, bool, bool);
extern void init_fields(void);
extern void set_record(const char *buf, int cnt);
extern void reset_record(void);
+extern void rebuild_record(void);
extern void set_NF(void);
extern NODE **get_field(long num, Func_ptr *assign);
extern NODE *do_split(int nargs);
@@ -1640,7 +1636,7 @@ extern void load_symbols();
extern void init_symbol_table();
extern NODE *symbol_table;
extern NODE *func_table;
-extern NODE *install_symbol(char *name, NODETYPE type);
+extern NODE *install_symbol(const char *name, NODETYPE type);
extern NODE *remove_symbol(NODE *r);
extern void destroy_symbol(NODE *r);
extern void release_symbols(NODE *symlist, int keep_globals);
diff --git a/awkgram.c b/awkgram.c
index 1c5e952d..34099a0b 100644
--- a/awkgram.c
+++ b/awkgram.c
@@ -4144,6 +4144,7 @@ struct token {
# define GAWKX 0x0400 /* gawk extension */
# define BREAK 0x0800 /* break allowed inside */
# define CONTINUE 0x1000 /* continue allowed inside */
+# define DEBUG_USE 0x2000 /* for use by developers */
NODE *(*ptr)(int); /* function that implements this keyword */
NODE *(*ptr2)(int); /* alternate arbitrary-precision function */
@@ -4182,7 +4183,7 @@ static const struct token tokentab[] = {
{"END", Op_rule, LEX_END, 0, 0, 0},
{"ENDFILE", Op_rule, LEX_ENDFILE, GAWKX, 0, 0},
#ifdef ARRAYDEBUG
-{"adump", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_adump, 0},
+{"adump", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|DEBUG_USE, do_adump, 0},
#endif
{"and", Op_builtin, LEX_BUILTIN, GAWKX, do_and, MPF(and)},
{"asort", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3), do_asort, 0},
@@ -4242,7 +4243,7 @@ static const struct token tokentab[] = {
{"sqrt", Op_builtin, LEX_BUILTIN, A(1), do_sqrt, MPF(sqrt)},
{"srand", Op_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), do_srand, MPF(srand)},
#if defined(GAWKDEBUG) || defined(ARRAYDEBUG) /* || ... */
-{"stopme", Op_builtin, LEX_BUILTIN, GAWKX|A(0), stopme, 0},
+{"stopme", Op_builtin, LEX_BUILTIN, GAWKX|A(0)|DEBUG_USE, stopme, 0},
#endif
{"strftime", Op_builtin, LEX_BUILTIN, GAWKX|A(0)|A(1)|A(2)|A(3), do_strftime, 0},
{"strtonum", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_strtonum, MPF(strtonum)},
@@ -8054,3 +8055,35 @@ one_line_close(int fd)
}
+/* lookup_builtin --- find a builtin function or return NULL */
+
+builtin_func_t
+lookup_builtin(const char *name)
+{
+ int mid = check_special(name);
+
+ if (mid == -1 || tokentab[mid].class != LEX_BUILTIN)
+ return NULL;
+#ifdef HAVE_MPFR
+ if (do_mpfr)
+ return tokentab[mid].ptr2;
+#endif
+
+ return tokentab[mid].ptr;
+}
+
+/* install_builtins --- add built-in functions to FUNCTAB */
+
+void
+install_builtins(void)
+{
+ int i, j;
+
+ j = sizeof(tokentab) / sizeof(tokentab[0]);
+ for (i = 0; i < j; i++) {
+ if ( tokentab[i].class == LEX_BUILTIN
+ && (tokentab[i].flags & DEBUG_USE) == 0) {
+ (void) install_symbol(tokentab[i].operator, Node_builtin_func);
+ }
+ }
+}
diff --git a/awkgram.y b/awkgram.y
index 2ceb88e0..0f57b452 100644
--- a/awkgram.y
+++ b/awkgram.y
@@ -1805,6 +1805,7 @@ struct token {
# define GAWKX 0x0400 /* gawk extension */
# define BREAK 0x0800 /* break allowed inside */
# define CONTINUE 0x1000 /* continue allowed inside */
+# define DEBUG_USE 0x2000 /* for use by developers */
NODE *(*ptr)(int); /* function that implements this keyword */
NODE *(*ptr2)(int); /* alternate arbitrary-precision function */
@@ -1843,7 +1844,7 @@ static const struct token tokentab[] = {
{"END", Op_rule, LEX_END, 0, 0, 0},
{"ENDFILE", Op_rule, LEX_ENDFILE, GAWKX, 0, 0},
#ifdef ARRAYDEBUG
-{"adump", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_adump, 0},
+{"adump", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|DEBUG_USE, do_adump, 0},
#endif
{"and", Op_builtin, LEX_BUILTIN, GAWKX, do_and, MPF(and)},
{"asort", Op_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3), do_asort, 0},
@@ -1903,7 +1904,7 @@ static const struct token tokentab[] = {
{"sqrt", Op_builtin, LEX_BUILTIN, A(1), do_sqrt, MPF(sqrt)},
{"srand", Op_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), do_srand, MPF(srand)},
#if defined(GAWKDEBUG) || defined(ARRAYDEBUG) /* || ... */
-{"stopme", Op_builtin, LEX_BUILTIN, GAWKX|A(0), stopme, 0},
+{"stopme", Op_builtin, LEX_BUILTIN, GAWKX|A(0)|DEBUG_USE, stopme, 0},
#endif
{"strftime", Op_builtin, LEX_BUILTIN, GAWKX|A(0)|A(1)|A(2)|A(3), do_strftime, 0},
{"strtonum", Op_builtin, LEX_BUILTIN, GAWKX|A(1), do_strtonum, MPF(strtonum)},
@@ -5715,3 +5716,35 @@ one_line_close(int fd)
}
+/* lookup_builtin --- find a builtin function or return NULL */
+
+builtin_func_t
+lookup_builtin(const char *name)
+{
+ int mid = check_special(name);
+
+ if (mid == -1 || tokentab[mid].class != LEX_BUILTIN)
+ return NULL;
+#ifdef HAVE_MPFR
+ if (do_mpfr)
+ return tokentab[mid].ptr2;
+#endif
+
+ return tokentab[mid].ptr;
+}
+
+/* install_builtins --- add built-in functions to FUNCTAB */
+
+void
+install_builtins(void)
+{
+ int i, j;
+
+ j = sizeof(tokentab) / sizeof(tokentab[0]);
+ for (i = 0; i < j; i++) {
+ if ( tokentab[i].class == LEX_BUILTIN
+ && (tokentab[i].flags & DEBUG_USE) == 0) {
+ (void) install_symbol(tokentab[i].operator, Node_builtin_func);
+ }
+ }
+}
diff --git a/awklib/eg/lib/div.awk b/awklib/eg/lib/div.awk
new file mode 100644
index 00000000..9d919288
--- /dev/null
+++ b/awklib/eg/lib/div.awk
@@ -0,0 +1,17 @@
+# div --- do integer division
+
+#
+# Arnold Robbins, arnold@skeeve.com, Public Domain
+# July, 2014
+
+function div(numerator, denominator, result, i)
+{
+ split("", result)
+
+ numerator = int(numerator)
+ denominator = int(denominator)
+ result["quotient"] = int(numerator / denominator)
+ result["remainder"] = int(numerator % denominator)
+
+ return 0.0
+}
diff --git a/awklib/eg/lib/getopt.awk b/awklib/eg/lib/getopt.awk
index db957ceb..6b1f4c50 100644
--- a/awklib/eg/lib/getopt.awk
+++ b/awklib/eg/lib/getopt.awk
@@ -38,8 +38,7 @@ function getopt(argc, argv, options, thisopt, i)
i = index(options, thisopt)
if (i == 0) {
if (Opterr)
- printf("%c -- invalid option\n",
- thisopt) > "/dev/stderr"
+ printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
if (_opti >= length(argv[Optind])) {
Optind++
_opti = 0
diff --git a/awklib/eg/lib/groupawk.in b/awklib/eg/lib/groupawk.in
index 0917b923..9382bce8 100644
--- a/awklib/eg/lib/groupawk.in
+++ b/awklib/eg/lib/groupawk.in
@@ -5,8 +5,7 @@
# Revised October 2000
# Revised December 2010
-BEGIN \
-{
+BEGIN {
# Change to suit your system
_gr_awklib = "/usr/local/libexec/awk/"
}
diff --git a/awklib/eg/lib/strtonum.awk b/awklib/eg/lib/strtonum.awk
index 9342e789..5e20626b 100644
--- a/awklib/eg/lib/strtonum.awk
+++ b/awklib/eg/lib/strtonum.awk
@@ -13,8 +13,9 @@ function mystrtonum(str, ret, n, i, k, c)
ret = 0
for (i = 1; i <= n; i++) {
c = substr(str, i, 1)
- if ((k = index("01234567", c)) > 0)
- k-- # adjust for 1-basing in awk
+ # index() returns 0 if c not in string,
+ # includes c == "0"
+ k = index("1234567", c)
ret = ret * 8 + k
}
@@ -26,6 +27,8 @@ function mystrtonum(str, ret, n, i, k, c)
for (i = 1; i <= n; i++) {
c = substr(str, i, 1)
c = tolower(c)
+ # index() returns 0 if c not in string,
+ # includes c == "0"
k = index("123456789abcdef", c)
ret = ret * 16 + k
diff --git a/awklib/eg/prog/alarm.awk b/awklib/eg/prog/alarm.awk
index 63cf64a4..59630ea8 100644
--- a/awklib/eg/prog/alarm.awk
+++ b/awklib/eg/prog/alarm.awk
@@ -8,8 +8,7 @@
# usage: alarm time [ "message" [ count [ delay ] ] ]
-BEGIN \
-{
+BEGIN {
# Initial argument sanity checking
usage1 = "usage: alarm time ['message' [count [delay]]]"
usage2 = sprintf("\t(%s) time ::= hh:mm", ARGV[1])
diff --git a/awklib/eg/prog/cut.awk b/awklib/eg/prog/cut.awk
index 04d9bc11..56e35e71 100644
--- a/awklib/eg/prog/cut.awk
+++ b/awklib/eg/prog/cut.awk
@@ -20,8 +20,7 @@ function usage( e1, e2)
print e2 > "/dev/stderr"
exit 1
}
-BEGIN \
-{
+BEGIN {
FS = "\t" # default
OFS = FS
while ((c = getopt(ARGC, ARGV, "sf:c:d:")) != -1) {
diff --git a/awklib/eg/prog/egrep.awk b/awklib/eg/prog/egrep.awk
index 86b3cfda..094bdea5 100644
--- a/awklib/eg/prog/egrep.awk
+++ b/awklib/eg/prog/egrep.awk
@@ -88,8 +88,7 @@ function endfile(file)
print
}
}
-END \
-{
+END {
exit (total == 0)
}
function usage( e)
diff --git a/awklib/eg/prog/id.awk b/awklib/eg/prog/id.awk
index cf744447..992fa57c 100644
--- a/awklib/eg/prog/id.awk
+++ b/awklib/eg/prog/id.awk
@@ -11,8 +11,7 @@
# uid=12(foo) euid=34(bar) gid=3(baz) \
# egid=5(blat) groups=9(nine),2(two),1(one)
-BEGIN \
-{
+BEGIN {
uid = PROCINFO["uid"]
euid = PROCINFO["euid"]
gid = PROCINFO["gid"]
diff --git a/awklib/eg/prog/labels.awk b/awklib/eg/prog/labels.awk
index abf53c3b..3195809b 100644
--- a/awklib/eg/prog/labels.awk
+++ b/awklib/eg/prog/labels.awk
@@ -48,7 +48,6 @@ function printpage( i, j)
Count++
}
-END \
-{
+END {
printpage()
}
diff --git a/awklib/eg/prog/tee.awk b/awklib/eg/prog/tee.awk
index 639b9f80..fd9985f1 100644
--- a/awklib/eg/prog/tee.awk
+++ b/awklib/eg/prog/tee.awk
@@ -7,8 +7,7 @@
# May 1993
# Revised December 1995
-BEGIN \
-{
+BEGIN {
for (i = 1; i < ARGC; i++)
copy[i] = ARGV[i]
@@ -35,8 +34,7 @@ BEGIN \
print > copy[i]
print
}
-END \
-{
+END {
for (i in copy)
close(copy[i])
}
diff --git a/awklib/eg/prog/uniq.awk b/awklib/eg/prog/uniq.awk
index 990387ac..2a2cf63e 100644
--- a/awklib/eg/prog/uniq.awk
+++ b/awklib/eg/prog/uniq.awk
@@ -18,8 +18,7 @@ function usage( e)
# -n skip n fields
# +n skip n characters, skip fields first
-BEGIN \
-{
+BEGIN {
count = 1
outputfile = "/dev/stdout"
opts = "udc0:1:2:3:4:5:6:7:8:9:"
@@ -31,7 +30,7 @@ BEGIN \
else if (c == "c")
do_count++
else if (index("0123456789", c) != 0) {
- # getopt requires args to options
+ # getopt() requires args to options
# this messes us up for things like -5
if (Optarg ~ /^[[:digit:]]+$/)
fcount = (c Optarg) + 0
diff --git a/builtin.c b/builtin.c
index 778760d9..1c794be7 100644
--- a/builtin.c
+++ b/builtin.c
@@ -1759,7 +1759,14 @@ do_substr(int nargs)
else if (do_lint == DO_LINT_INVALID && ! (d_length >= 0))
lintwarn(_("substr: length %g is not >= 0"), d_length);
DEREF(t1);
- return dupnode(Nnull_string);
+ /*
+ * Return explicit null string instead of doing
+ * dupnode(Nnull_string) so that if the result
+ * is checked with the combination of length()
+ * and lint, no error is reported about using
+ * an uninitialized value. Same thing later, too.
+ */
+ return make_string("", 0);
}
if (do_lint) {
if (double_to_int(d_length) != d_length)
@@ -1813,7 +1820,7 @@ do_substr(int nargs)
if (do_lint && (do_lint == DO_LINT_ALL || ((indx | length) != 0)))
lintwarn(_("substr: source string is zero length"));
DEREF(t1);
- return dupnode(Nnull_string);
+ return make_string("", 0);
}
/* get total len of input string, for following checks */
@@ -1830,7 +1837,7 @@ do_substr(int nargs)
lintwarn(_("substr: start index %g is past end of string"),
d_index);
DEREF(t1);
- return dupnode(Nnull_string);
+ return make_string("", 0);
}
if (length > src_len - indx) {
if (do_lint)
@@ -2847,16 +2854,11 @@ set_how_many:
text = t->stptr;
textlen = t->stlen;
- buflen = textlen + 2;
repl = s->stptr;
replend = repl + s->stlen;
repllen = replend - repl;
- emalloc(buf, char *, buflen + 2, "do_sub");
- buf[buflen] = '\0';
- buf[buflen + 1] = '\0';
-
ampersands = 0;
/*
@@ -2915,6 +2917,13 @@ set_how_many:
}
lastmatchnonzero = false;
+
+ /* guesstimate how much room to allocate; +2 forces > 0 */
+ buflen = textlen + (ampersands + 1) * repllen + 2;
+ emalloc(buf, char *, buflen + 2, "do_sub");
+ buf[buflen] = '\0';
+ buf[buflen + 1] = '\0';
+
bp = buf;
for (current = 1;; current++) {
matches++;
diff --git a/cmake/Toolchain_clang.cmake b/cmake/Toolchain_clang.cmake
new file mode 100644
index 00000000..89353570
--- /dev/null
+++ b/cmake/Toolchain_clang.cmake
@@ -0,0 +1,19 @@
+# http://www.cmake.org/Wiki/CmakeMingw
+# http://www.cmake.org/Wiki/CMake_Cross_Compiling#The_toolchain_file
+
+# the name of the target operating system
+SET(CMAKE_SYSTEM_NAME Generic)
+
+# which compilers to use for C and C++
+SET(CMAKE_C_COMPILER /usr/bin/clang)
+
+# here is the target environment located
+SET(CMAKE_FIND_ROOT_PATH /usr/lib64/clang/3.1)
+
+# adjust the default behaviour of the FIND_XXX() commands:
+# search headers and libraries in the target environment, search
+# programs in the host environment
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+
diff --git a/cmake/Toolchain_generic.cmake b/cmake/Toolchain_generic.cmake
new file mode 100644
index 00000000..91ddc6e7
--- /dev/null
+++ b/cmake/Toolchain_generic.cmake
@@ -0,0 +1,21 @@
+# http://www.cmake.org/Wiki/CmakeMingw
+# http://www.cmake.org/Wiki/CMake_Cross_Compiling#The_toolchain_file
+
+# the name of the target operating system
+SET(CMAKE_SYSTEM_NAME Generic)
+
+# which compilers to use for C and C++
+# Settings for Ubuntu 12.04.1 LTS
+SET(CMAKE_C_COMPILER /usr/bin/gcc)
+
+# here is the target environment located
+# Settings for Ubuntu 12.04.1 LTS
+SET(CMAKE_FIND_ROOT_PATH /usr/)
+
+# adjust the default behaviour of the FIND_XXX() commands:
+# search headers and libraries in the target environment, search
+# programs in the host environment
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+
diff --git a/cmake/Toolchain_mingw32.cmake b/cmake/Toolchain_mingw32.cmake
new file mode 100644
index 00000000..bb885f2f
--- /dev/null
+++ b/cmake/Toolchain_mingw32.cmake
@@ -0,0 +1,23 @@
+# http://www.cmake.org/Wiki/CmakeMingw
+# http://www.cmake.org/Wiki/CMake_Cross_Compiling#The_toolchain_file
+
+# the name of the target operating system
+SET(CMAKE_SYSTEM_NAME Windows)
+
+# which compilers to use for C and C++
+# Settings for Ubuntu 12.04.1 LTS
+SET(CMAKE_C_COMPILER /usr/bin/i686-w64-mingw32-gcc)
+SET(CMAKE_CXX_COMPILER /usr/bin/i686-w64-mingw32-g++)
+SET(CMAKE_RC_COMPILER /usr/bin/i686-w64-mingw32-windres)
+
+# here is the target environment located
+# Settings for Ubuntu 12.04.1 LTS
+SET(CMAKE_FIND_ROOT_PATH /usr/i686-w64-mingw32)
+
+# adjust the default behaviour of the FIND_XXX() commands:
+# search headers and libraries in the target environment, search
+# programs in the host environment
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+
diff --git a/cmake/Toolchain_s390.cmake b/cmake/Toolchain_s390.cmake
new file mode 100644
index 00000000..e1cdcfff
--- /dev/null
+++ b/cmake/Toolchain_s390.cmake
@@ -0,0 +1,20 @@
+# http://www.cmake.org/Wiki/CmakeMingw
+# http://www.cmake.org/Wiki/CMake_Cross_Compiling#The_toolchain_file
+# http://wiki.debian.org/EmdebianToolchain#Get_the_binaries
+
+# the name of the target operating system
+SET(CMAKE_SYSTEM_NAME Generic)
+
+# which compilers to use for C and C++
+SET(CMAKE_C_COMPILER /usr/bin/s390-linux-gnu-gcc-4.4)
+
+# here is the target environment located
+SET(CMAKE_FIND_ROOT_PATH /usr/s390-linux-gnu/)
+
+# adjust the default behaviour of the FIND_XXX() commands:
+# search headers and libraries in the target environment, search
+# programs in the host environment
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+
diff --git a/cmake/auk.ico b/cmake/auk.ico
new file mode 100644
index 00000000..795ef1d9
--- /dev/null
+++ b/cmake/auk.ico
Binary files differ
diff --git a/cmake/basictest b/cmake/basictest
new file mode 100755
index 00000000..210ed224
--- /dev/null
+++ b/cmake/basictest
@@ -0,0 +1,553 @@
+#!/bin/sh
+
+# Use this for debugging the test cases.
+# The resulting textual output will not destroy the test cases.
+set -x
+# After test case execution, the output can be found in
+# build/Testing/Temporary/LastTest.log
+
+export PATH=$PATH:/c/MinGW/msys/1.0/bin
+export GAWKEXE=$1
+export TESTCASE=$2
+TOPSRCDIR=$(dirname ${0})/..
+SRCDIR=${TOPSRCDIR}/test
+export AWKPATH=${SRCDIR}
+export AWKLIBPATH=$(dirname ${GAWKEXE})/extension/
+export LANG=C
+# Is this shell running in a native MinGW shell (MSYS) ?
+if test -n "$COMSPEC"; then
+ # Ignore all differences in white space.
+ COMPARE="diff -w"
+ PATH_SEPARATOR="\\"
+else
+ # This is a shell running in Unix environment.
+ COMPARE="cmp"
+ PATH_SEPARATOR="/"
+fi
+
+# This is the central function for executing a standard test case.
+# Many of the more specialized test cases rely on this function.
+function simple_test_case() {
+ local options=$1 # options passed to the gawk executable
+ local parameters=$2 # parameters passed to the test case script
+ cd ${SRCDIR}
+ if test -r ${TESTCASE}.in
+ # Any existing .in file will be redirected to standard input.
+ # The output redirection must be bound to the test script, otherwise
+ # the "set -x" logging would mix with the test case output.
+ then
+ ${pregawk} $GAWKEXE ${options} -f ${TESTCASE}.awk ${parameters} < ${TESTCASE}.in ${postgawk} > _${TESTCASE} 2>&1
+ else
+ ${pregawk} $GAWKEXE ${options} -f ${TESTCASE}.awk ${parameters} ${postgawk} > _${TESTCASE} 2>&1
+ fi || echo EXIT CODE: $? >> _${TESTCASE}
+ # Compare the expected (correct) output with the actual output.
+ ${COMPARE} ${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+ # If the comparison succeeds then remove the actual output.
+ # Else leave the actual output file untouched for later analysis.
+}
+
+# Each test case that cannot be handle in the "standard way" shall
+# be implemented as a function here.
+
+function lintold() { simple_test_case "--lint-old" "" ; }
+function defref() { simple_test_case "--lint" "" ; }
+function fmtspcl() { simple_test_case "--lint" "" ; }
+function lintwarn() { simple_test_case "--lint" "" ; }
+function noeffect() { simple_test_case "--lint" "" ; }
+function nofmtch() { simple_test_case "--lint" "" ; }
+function shadow() { simple_test_case "--lint" "" ; }
+function uninit2() { simple_test_case "--lint" "" ; }
+function uninit3() { simple_test_case "--lint" "" ; }
+function uninit4() { simple_test_case "--lint" "" ; }
+function uninit5() { simple_test_case "--lint" "" ; }
+function uninitialized() { simple_test_case "--lint" "" ; }
+
+function regtest() {
+ echo 'Some of the output from regtest is very system specific, do not'
+ echo 'be distressed if your output differs from that distributed.'
+ echo 'Manual inspection is called for.'
+ AWK=$GAWKEXE ${SRCDIR}/regtest.sh
+}
+
+function compare() { simple_test_case "" "0 1" ; }
+
+function inftest() {
+ echo This test is very machine specific...
+ $GAWKEXE -f ${SRCDIR}/inftest.awk | sed "s/inf/Inf/g" >_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function getline2() { simple_test_case "" "getline2.awk getline2.awk" ; }
+
+function awkpath() {
+ AWKPATH="${SRCDIR}$(PATH_SEPARATOR)/lib" $GAWKEXE -f awkpath.awk >_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function argtest() { simple_test_case "" "-x -y abc" ; }
+
+function badargs() {
+ $GAWKEXE -f 2>&1 | grep -v patchlevel >_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function nonl() { simple_test_case "--lint" "/dev/null" ; }
+
+function poundbang() {
+# The original poundbang test case looks a bit non-deterministic.
+# This is a shortened version.
+ sed "s;/tmp/gawk;$GAWKEXE;" < ${SRCDIR}/poundbang.awk > ./_pbd.awk
+ chmod +x ./_pbd.awk
+ ./_pbd.awk ${SRCDIR}/poundbang.awk > _`basename ${TESTCASE}` ;
+ ${COMPARE} ${SRCDIR}/poundbang.awk _`basename ${TESTCASE}` && rm -f _`basename ${TESTCASE}` _pbd.awk
+}
+
+function messages() {
+ $GAWKEXE -f ${SRCDIR}/messages.awk >_out2 2>_out3
+ ${COMPARE} ${SRCDIR}/out1.ok _out1 && ${COMPARE} ${SRCDIR}/out2.ok _out2 && ${COMPARE} ${SRCDIR}/out3.ok _out3 && rm -f _out1 _out2 _out3
+}
+
+function argarray() {
+ case ${SRCDIR} in
+ .) : ;;
+ *) cp ${SRCDIR}/argarray.in . ;;
+ esac
+ TEST=test echo just a test | $GAWKEXE -f ${SRCDIR}/argarray.awk ./argarray.in - >_${TESTCASE}
+ case ${SRCDIR} in
+ .) : ;;
+ *) rm -f ./argarray.in ;;
+ esac
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+
+function localenl() {
+ ${SRCDIR}/${TESTCASE}.sh >_${TESTCASE} 2>/dev/null
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function mbprintf1() {
+ GAWKLOCALE=en_US.UTF-8 ; export GAWKLOCALE
+ LANG=en_US.UTF-8
+ $GAWKEXE -f ${SRCDIR}/${TESTCASE}.awk ${SRCDIR}/${TESTCASE}.in >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >> _${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function mbfw1() {
+ GAWKLOCALE=en_US.UTF-8 ; export GAWKLOCALE
+ LANG=en_US.UTF-8
+ $GAWKEXE -f ${SRCDIR}/${TESTCASE}.awk ${SRCDIR}/${TESTCASE}.in >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >> _${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function printfbad2() {
+ $GAWKEXE --lint -f ${SRCDIR}/${TESTCASE}.awk ${SRCDIR}/${TESTCASE}.in 2>&1 | sed "s;$SRCDIR/;;g" >_${TESTCASE} || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function beginfile1() {
+ AWKPATH=${SRCDIR} $GAWKEXE -f ${TESTCASE}.awk ${SRCDIR}/${TESTCASE}.awk . ./no/such/file Makefile >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function beginfile2() {
+ # This differs from the original, the pwd part is new.
+ # The re-direction is now bound to the .sh file.
+ # This way the output of "set -x" is not written to the script's output file.
+ ( cd ${SRCDIR} && LC_ALL=C AWK="$GAWKEXE" ${SRCDIR}/${TESTCASE}.sh ${SRCDIR}/${TESTCASE}.in > `pwd`/_${TESTCASE} 2>&1 )
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok ${SRCDIR}/_${TESTCASE} && rm -f ${SRCDIR}/_${TESTCASE}
+}
+
+function dumpvars() {
+ AWKPATH=${SRCDIR} $GAWKEXE --dump-variables 1 < ${SRCDIR}/${TESTCASE}.in >/dev/null 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ mv awkvars.out _${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function manyfiles() {
+ rm -rf junk
+ mkdir junk
+ $GAWKEXE 'BEGIN { for (i = 1; i <= 1030; i++) print i, i}' >_${TESTCASE}
+ $GAWKEXE -f ${SRCDIR}/manyfiles.awk _${TESTCASE} _${TESTCASE}
+ wc -l junk/* | $GAWKEXE '$1 != 2' | wc -l | sed "s/ *//g" > _${TESTCASE}
+ rm -rf junk
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function exitval1() {
+ $GAWKEXE -f ${SRCDIR}/exitval1.awk >_${TESTCASE} 2>&1; echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function fsspcoln() {
+ $GAWKEXE -f ${SRCDIR}/${TESTCASE}.awk 'FS=[ :]+' ${SRCDIR}/${TESTCASE}.in >_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function rsstart2() {
+ $GAWKEXE -f ${SRCDIR}/${TESTCASE}.awk ${SRCDIR}/rsstart1.in >_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function rsstart3() {
+ head ${SRCDIR}/rsstart1.in | $GAWKEXE -f ${SRCDIR}/rsstart2.awk >_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function strftime() {
+ echo This test could fail on slow machines or on a minute boundary,
+ echo so if it does, double check the actual results:
+ GAWKLOCALE=C; export GAWKLOCALE
+ TZ=GMT0; export TZ
+ (LC_ALL=C date) | $GAWKEXE -v OUTPUT=_${TESTCASE} -f ${SRCDIR}/strftime.awk
+ ${COMPARE} strftime.ok _${TESTCASE} && rm -f _${TESTCASE} strftime.ok || exit 0
+}
+
+function inplace1() {
+ cp ${SRCDIR}/inplace.1.in _${TESTCASE}.1
+ cp ${SRCDIR}/inplace.2.in _${TESTCASE}.2
+ AWKPATH=${SRCDIR}/../awklib/eg/lib $GAWKEXE -i inplace 'BEGIN {print "before"} {gsub(/foo/, "bar"); print} END {print "after"}' _${TESTCASE}.1 - _${TESTCASE}.2 < ${SRCDIR}/inplace.in >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.1.ok _${TESTCASE}.1 && rm -f _${TESTCASE}.1
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.2.ok _${TESTCASE}.2 && rm -f _${TESTCASE}.2
+}
+
+function inplace2() {
+ cp ${SRCDIR}/inplace.1.in _${TESTCASE}.1
+ cp ${SRCDIR}/inplace.2.in _${TESTCASE}.2
+ AWKPATH=${SRCDIR}/../awklib/eg/lib $GAWKEXE -i inplace -v INPLACE_SUFFIX=.bak 'BEGIN {print "before"} {gsub(/foo/, "bar"); print} END {print "after"}' _${TESTCASE}.1 - _${TESTCASE}.2 < ${SRCDIR}/inplace.in >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.1.ok _${TESTCASE}.1 && rm -f _${TESTCASE}.1
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.1.bak.ok _${TESTCASE}.1.bak && rm -f _${TESTCASE}.1.bak
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.2.ok _${TESTCASE}.2 && rm -f _${TESTCASE}.2
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.2.bak.ok _${TESTCASE}.2.bak && rm -f _${TESTCASE}.2.bak
+}
+
+function inplace3() {
+ cp ${SRCDIR}/inplace.1.in _${TESTCASE}.1
+ cp ${SRCDIR}/inplace.2.in _${TESTCASE}.2
+ AWKPATH=${SRCDIR}/../awklib/eg/lib $GAWKEXE -i inplace -v INPLACE_SUFFIX=.bak 'BEGIN {print "before"} {gsub(/foo/, "bar"); print} END {print "after"}' _${TESTCASE}.1 - _${TESTCASE}.2 < ${SRCDIR}/inplace.in >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ AWKPATH=${SRCDIR}/../awklib/eg/lib $GAWKEXE -i inplace -v INPLACE_SUFFIX=.bak 'BEGIN {print "Before"} {gsub(/bar/, "foo"); print} END {print "After"}' _${TESTCASE}.1 - _${TESTCASE}.2 < ${SRCDIR}/inplace.in >>_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.1.ok _${TESTCASE}.1 && rm -f _${TESTCASE}.1
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.1.bak.ok _${TESTCASE}.1.bak && rm -f _${TESTCASE}.1.bak
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.2.ok _${TESTCASE}.2 && rm -f _${TESTCASE}.2
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.2.bak.ok _${TESTCASE}.2.bak && rm -f _${TESTCASE}.2.bak
+}
+
+function testext() {
+ $GAWKEXE ' /^(@load|BEGIN)/,/^}/' ${SRCDIR}/../extension/testext.c > testext.awk
+ $GAWKEXE -f ${TESTCASE}.awk > ${SRCDIR}/_${TESTCASE} 2>&1 || echo EXIT CODE: $? >> ${SRCDIR}/_${TESTCASE}
+ rm -f testext.awk
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok ${SRCDIR}/_${TESTCASE} && rm -f ${SRCDIR}/_${TESTCASE}
+}
+
+function readdir() {
+ if [ "`uname`" = Linux ] && [ "`stat -f . 2>/dev/null | awk 'NR == 2 { print $NF }'`" = nfs ]; then
+ echo This test may fail on GNU/Linux systems when run on an NFS filesystem.;
+ echo If it does, try rerunning on an ext'[234]' filesystem. ;
+ fi
+ $GAWKEXE -f ${TESTCASE}.awk ${SRCDIR}/.. > ${SRCDIR}/_${TESTCASE} 2>&1
+ ls -afli ${TOPSRCDIR} | sed 1d | $GAWKEXE -f ${SRCDIR}/readdir0.awk -v extout=${SRCDIR}/_${TESTCASE} > ${SRCDIR}/${TESTCASE}.ok
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok ${SRCDIR}/_${TESTCASE} && rm -f ${SRCDIR}/_${TESTCASE} ${SRCDIR}/${TESTCASE}.ok
+}
+
+function ordchr2() {
+ $GAWKEXE -l ordchr 'BEGIN {print chr(ord("z"))}' >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function include2() {
+ AWKPATH=${SRCDIR} $GAWKEXE -i inclib 'BEGIN {print sandwich("a", "b", "c")}' >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function incdupe() {
+ AWKPATH=${SRCDIR} $GAWKEXE --lint -i inclib -i inclib.awk 'BEGIN {print sandwich("a", "b", "c")}' >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function incdupe2() {
+ AWKPATH=${SRCDIR} $GAWKEXE --lint -f inclib -f inclib.awk >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function incdupe3() {
+ AWKPATH=${SRCDIR} $GAWKEXE --lint -f hello -f hello.awk >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function incdupe4() {
+ AWKPATH=${SRCDIR} $GAWKEXE --lint -f hello -i hello.awk >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function incdupe5() {
+ AWKPATH=${SRCDIR} $GAWKEXE --lint -i hello -f hello.awk >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function incdupe6() {
+ AWKPATH=${SRCDIR} $GAWKEXE --lint -i inchello -f hello.awk >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function incdupe7() {
+ AWKPATH=${SRCDIR} $GAWKEXE --lint -f hello -i inchello >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+# TODO: The compare operation passes even when there are diffs.
+function readfile() {
+ $GAWKEXE -l readfile 'BEGIN {printf "%s", readfile("Makefile")}' >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} Makefile _${TESTCASE} && rm -f _${TESTCASE} || cp -p Makefile ${TESTCASE}.ok
+}
+
+function fts() {
+ if [ "`uname`" = IRIX ]; then \
+ echo This test may fail on IRIX systems when run on an NFS filesystem.; \
+ echo If it does, try rerunning on an xfs filesystem. ; \
+ fi
+ simple_test_case "" ""
+}
+
+function charasbytes() {
+ [ -z "$GAWKLOCALE" ] && GAWKLOCALE=en_US.UTF-8; \
+ AWKPATH=${SRCDIR} $GAWKEXE -b -v BINMODE=2 -f ${TESTCASE}.awk ${SRCDIR}/${TESTCASE}.in | \
+ od -c -t x1 | sed -e 's/ */ /g' -e 's/ *$//' >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function symtab6() {
+ $GAWKEXE -d__${TESTCASE} -f ${SRCDIR}/${TESTCASE}.awk
+ grep -v '^ENVIRON' __${TESTCASE} | grep -v '^PROCINFO' > _${TESTCASE} ; rm __${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function symtab8() {
+ $GAWKEXE -d__${TESTCASE} -f ${SRCDIR}/${TESTCASE}.awk ${SRCDIR}/${TESTCASE}.in >_${TESTCASE}
+ grep -v '^ENVIRON' __${TESTCASE} | grep -v '^PROCINFO' | grep -v '^FILENAME' >> _${TESTCASE} ; rm __${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function colonwarn() {
+ for i in 1 2 3 ; \
+ do $GAWKEXE -f ${SRCDIR}/${TESTCASE}.awk $i < ${SRCDIR}/${TESTCASE}.in ; \
+ done > _${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function litoct() {
+ echo ab | $GAWKEXE --traditional -f ${SRCDIR}/litoct.awk >_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function devfd() {
+ $GAWKEXE 1 /dev/fd/4 /dev/fd/5 4<${SRCDIR}/devfd.in4 5<${SRCDIR}/devfd.in5 >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >> _${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function fflush() {
+ ${SRCDIR}/fflush.sh >_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function mmap8k() {
+ $GAWKEXE '{ print }' ${SRCDIR}/mmap8k.in >_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/mmap8k.in _${TESTCASE} && rm -f _${TESTCASE} || cp ${SRCDIR}/${TESTCASE}.in ${TESTCASE}.ok
+}
+
+function pid() {
+ AWKPATH=${SRCDIR} AWK=$GAWKEXE ${SHELL} ${SRCDIR}/pid.sh $$ > _${TESTCASE} ; :
+ ${COMPARE} ${SRCDIR}/pid.ok _`basename ${TESTCASE}` && rm -f _${TESTCASE}
+}
+
+function strftlng() {
+ TZ=UTC; export TZ; $GAWKEXE -f ${SRCDIR}/strftlng.awk >_${TESTCASE}
+ if ${COMPARE} ${SRCDIR}/strftlng.ok _${TESTCASE} >/dev/null 2>&1 ; then : ; else \
+ TZ=UTC0; export TZ; $GAWKEXE -f ${SRCDIR}/strftlng.awk >_${TESTCASE} ; \
+ fi
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function nors() {
+ echo A B C D E | tr -d '\12\15' | $GAWKEXE '{ print $NF }' - ${SRCDIR}/nors.in > _${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function fmtspcl() {
+ $GAWKEXE -v "sd=${SRCDIR}" 'BEGIN {pnan = sprintf("%g",sqrt(-1)); nnan = sprintf("%g",-sqrt(-1)); pinf = sprintf("%g",-log(0)); ninf = sprintf("%g",log(0))} {sub(/positive_nan/,pnan); sub(/negative_nan/,nnan); sub(/positive_infinity/,pinf); sub(/negative_infinity/,ninf); sub(/fmtspcl/,(sd"/fmtspcl")); print}' < ${SRCDIR}/fmtspcl.tok > ${TESTCASE}.ok 2>/dev/null
+ $GAWKEXE $AWKFLAGS -f ${SRCDIR}/fmtspcl.awk --lint >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ if test -z "$AWKFLAGS" ; then
+ ${COMPARE} ${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+ else
+ ${COMPARE} ${SRCDIR}/${TESTCASE}-mpfr.ok _${TESTCASE} && rm -f _${TESTCASE}
+ fi
+}
+
+function pipeio2() { simple_test_case "-v SRCDIR=${SRCDIR}" "" ; }
+
+function arynocls() {
+ AWKPATH=${SRCDIR} $GAWKEXE -v INPUT=${SRCDIR}/arynocls.in -f arynocls.awk >_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function inetechu() {
+ echo This test is for establishing UDP connections
+ $GAWKEXE 'BEGIN {print "" |& "/inet/udp/0/127.0.0.1/9"}'
+}
+
+function inetecht() {
+ echo This test is for establishing TCP connections
+ $GAWKEXE 'BEGIN {print "" |& "/inet/tcp/0/127.0.0.1/9"}'
+}
+
+function inetdayu() {
+ echo This test is for bidirectional UDP transmission
+ $GAWKEXE 'BEGIN { print "" |& "/inet/udp/0/127.0.0.1/13"; \
+ "/inet/udp/0/127.0.0.1/13" |& getline; print $0}'
+}
+
+function inetdayt() {
+ echo This test is for bidirectional TCP transmission
+ $GAWKEXE 'BEGIN { print "" |& "/inet/tcp/0/127.0.0.1/13"; \
+ "/inet/tcp/0/127.0.0.1/13" |& getline; print $0}'
+}
+
+function redfilnm() { simple_test_case "" "srcdir=${SRCDIR}" ; }
+
+function leaddig() { simple_test_case "-v x=2E" "" ; }
+function longwrds() { simple_test_case "-vSORT=sort" "" ; }
+
+function gsubtst3() {
+ $GAWKEXE --re-interval -f ${SRCDIR}/${TESTCASE}.awk ${SRCDIR}/${TESTCASE}.in >_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function space() {
+ $GAWKEXE -f ' ' ${SRCDIR}/space.awk >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function rsnulbig() {
+ # Suppose that block size for pipe is at most 128kB:
+ $GAWKEXE 'BEGIN { for (i = 1; i <= 128*64+1; i++) print "abcdefgh123456\n" }' 2>&1 | \
+ $GAWKEXE 'BEGIN { RS = ""; ORS = "\n\n" }; { print }' 2>&1 | \
+ $GAWKEXE '/^[^a]/; END{ print NR }' >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function rsnulbig2() {
+ $GAWKEXE 'BEGIN { ORS = ""; n = "\n"; for (i = 1; i <= 10; i++) n = (n n); \
+ for (i = 1; i <= 128; i++) print n; print "abc\n" }' 2>&1 | \
+ $GAWKEXE 'BEGIN { RS = ""; ORS = "\n\n" };{ print }' 2>&1 | \
+ $GAWKEXE '/^[^a]/; END { print NR }' >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function printf0() { simple_test_case "--posix" "" ; }
+
+function profile1() {
+ $GAWKEXE --pretty-print=ap-${TESTCASE}.out -f ${SRCDIR}/xref.awk ${SRCDIR}/dtdgport.awk > _${TESTCASE}.out1
+ $GAWKEXE -f ap-${TESTCASE}.out ${SRCDIR}/dtdgport.awk > _${TESTCASE}.out2 ; rm ap-${TESTCASE}.out
+ ${COMPARE} _${TESTCASE}.out1 _${TESTCASE}.out2 && rm _${TESTCASE}.out[12] || { echo EXIT CODE: $$? >>_${TESTCASE} ; \
+ cp $(srcdir)/dtdgport.awk > ${TESTCASE}.ok ; }
+}
+
+function profile2() {
+ $GAWKEXE --profile=ap-${TESTCASE}.out -v sortcmd=sort -f ${SRCDIR}/xref.awk ${SRCDIR}/dtdgport.awk > /dev/null
+ sed 1,2d < ap-${TESTCASE}.out > _${TESTCASE}; rm ap-${TESTCASE}.out
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function profile3() {
+ $GAWKEXE --profile=ap-${TESTCASE}.out -f ${SRCDIR}/${TESTCASE}.awk > /dev/null
+ sed 1,2d < ap-${TESTCASE}.out > _${TESTCASE}; rm ap-${TESTCASE}.out
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function profile4() {
+ GAWK_NO_PP_RUN=1 $GAWKEXE --profile=ap-${TESTCASE}.out -f ${SRCDIR}/${TESTCASE}.awk > /dev/null
+ sed 1,2d < ap-${TESTCASE}.out > _${TESTCASE}; rm ap-${TESTCASE}.out
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function profile5() {
+ GAWK_NO_PP_RUN=1 $GAWKEXE --profile=ap-${TESTCASE}.out -f ${SRCDIR}/${TESTCASE}.awk > /dev/null
+ sed 1,2d < ap-${TESTCASE}.out > _${TESTCASE}; rm ap-${TESTCASE}.out
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function posix2008sub() {
+ $GAWKEXE --posix -f ${SRCDIR}/${TESTCASE}.awk > _${TESTCASE} 2>&1
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function next() {
+ LC_ALL=${GAWKLOCALE:-C} LANG=${GAWKLOCALE:-C} AWK="$GAWKEXE" ${SRCDIR}/${TESTCASE}.sh > _${TESTCASE} 2>&1
+ LC_ALL=C ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function exit() {
+ AWK="$GAWKEXE" ${SRCDIR}/${TESTCASE}.sh > _${TESTCASE} 2>&1
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function mpfrexprange() { simple_test_case "-M -vPREC=53 " "" ; }
+function mpfrrnd() { simple_test_case "-M -vPREC=53 " "" ; }
+function mpfrnr() { simple_test_case "-M -vPREC=113" "" ; }
+function mpfrbigint() { simple_test_case "-M " "" ; }
+
+function jarebug() {
+ ${SRCDIR}/${TESTCASE}.sh "$GAWKEXE" "${SRCDIR}/${TESTCASE}.awk" "${SRCDIR}/${TESTCASE}.in" "_${TESTCASE}"
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function rtlen() {
+ ${SRCDIR}/${TESTCASE}.sh >_${TESTCASE} || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function rtlen01() {
+ ${SRCDIR}/${TESTCASE}.sh >_${TESTCASE} || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function rtlenmb() {
+ GAWKLOCALE=en_US.UTF-8 ; export GAWKLOCALE
+ ${SRCDIR}/rtlen.sh >_${TESTCASE} || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function nondec2() { simple_test_case "--non-decimal-data -v a=0x1" "" ; }
+
+function nofile() {
+ $GAWKEXE '{}' no/such/file >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function binmode1() {
+ $GAWKEXE -v BINMODE=3 'BEGIN { print BINMODE }' >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function devfd1() {
+ $GAWKEXE -f ${SRCDIR}/${TESTCASE}.awk 4< ${SRCDIR}/devfd.in1 5< ${SRCDIR}/devfd.in2 >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+function devfd2() {
+ # The program text is the '1' which will print each record. How compact can you get?
+ $GAWKEXE 1 /dev/fd/4 /dev/fd/5 4< ${SRCDIR}/devfd.in1 5< ${SRCDIR}/devfd.in2 >_${TESTCASE} 2>&1 || echo EXIT CODE: $? >>_${TESTCASE}
+ ${COMPARE} ${SRCDIR}/${TESTCASE}.ok _${TESTCASE} && rm -f _${TESTCASE}
+}
+
+# Is this test case implemented as a function ?
+if [ "$( type -t $TESTCASE )" = "function" ]
+then
+ $TESTCASE
+else
+ # If no function exists, then treat the test case in standard way.
+ simple_test_case "" ""
+fi
+
diff --git a/cmake/configure b/cmake/configure
new file mode 100755
index 00000000..d375a81c
--- /dev/null
+++ b/cmake/configure
@@ -0,0 +1,58 @@
+#!/bin/sh
+# On 2013-05-14 Arnold wrote in an e-mail:
+
+# <QUOTE)
+# I think that using CMake would be more palatable if there is also a simple
+# configure wrapper that can be used by people who build distributions. This would
+# mean things like
+#
+# configure CC=XXXX # XXXX in { gcc, clang, tcc } or native platform cc
+# configure --prefix=/path/to/install
+#
+# And the few other current configure options like --with-whiny-user-strftime,
+# --disable-nls, etc. I don't know if we need all the standard configure options,
+# but I do want the ones I've added in configure.ac.
+# </QUOTE)
+
+
+# Anyone using this script still needs an out-of-source build directory.
+if [ -f CMakeLists.txt ] ; then
+ echo "Your current working directory contains a file CMakeLists.txt, indicating"
+ echo "that this is a source directory. Create a new directory elsewhere, change into"
+ echo "this empty directory and try again."
+ echo " mkdir build"
+ echo " cd build"
+ echo " ../$0"
+ exit 1
+fi
+
+# TODO: Evaluate all the options and translate the options into CMake variables.
+CC=$( which cc )
+PREFIX=""
+SRCDIR=".."
+WHINY=""
+
+for p in $@
+do
+ if [ ${p:0:3} = "CC=" ]; then CC=${p:3}; fi
+ if [ ${p:0:9} = "--prefix=" ]; then PREFIX=-DCMAKE_INSTALL_PREFIX=${p:9}; fi
+ if [ ${p:0:9} = "--srcdir=" ]; then SRCDIR=${p:9}; fi
+ if [ ${p:0:26} = "--with-whiny-user-strftime" ]; then WHINY=-DUSE_INCLUDED_STRFTIME=1; fi
+done
+CC=$( which $CC )
+
+rm -f Toolchain.cmake
+(
+ echo "set(CMAKE_C_COMPILER $CC)"
+ echo "set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)"
+ echo "set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)"
+ echo "set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)"
+) > Toolchain.cmake
+
+if ! [ -f ${SRCDIR}/CMakeLists.txt ] ; then
+ echo "The source directory (${SRCDIR}) does not contain a file CMakeLists.txt."
+ exit 1
+fi
+
+cmake ${PREFIX} ${WHINY} -DCMAKE_TOOLCHAIN_FILE=Toolchain.cmake ${SRCDIR}
+
diff --git a/cmake/configure.cmake b/cmake/configure.cmake
new file mode 100644
index 00000000..7dbe841c
--- /dev/null
+++ b/cmake/configure.cmake
@@ -0,0 +1,300 @@
+#
+# cmake/configure --- CMake input file for gawk
+#
+# Copyright (C) 2013-2014
+# the Free Software Foundation, Inc.
+#
+# This file is part of GAWK, the GNU implementation of the
+# AWK Programming Language.
+#
+# GAWK is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# GAWK is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+#
+
+## process this file with CMake to produce Makefile
+
+option (USE_CONFIG_H "Generate a file config.h for inclusion into C source code" ON)
+if (USE_CONFIG_H)
+ file( WRITE config.h "/* all settings defined by CMake. */\n\n" )
+ ADD_DEFINITIONS (-D HAVE_CONFIG_H)
+ # Configure a header file to pass some of the CMake settings
+ # to the source code
+ # http://www.cmake.org/cmake/help/v2.8.8/cmake.html#command:configure_file
+ # CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/config.cmake.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h IMMEDIATE )
+else()
+ file( WRITE config.h "/* empty file, all settings defined by CMake. */" )
+endif()
+
+include(CheckIncludeFiles)
+include(CheckIncludeFile)
+include(CheckSymbolExists)
+include(CheckFunctionExists)
+include(CheckLibraryExists)
+include(CheckTypeSize)
+include(CheckStructHasMember)
+INCLUDE(CheckCSourceCompiles)
+include(CheckPrototypeDefinition)
+
+MACRO(DefineConfigH feature)
+# message(STATUS feature=${feature}=${${feature}})
+ if (${feature})
+ if (${USE_CONFIG_H} STREQUAL ON)
+ FILE( APPEND config.h "#define ${feature} ${${feature}}\n")
+ else()
+ #ADD_DEFINITIONS (-D ${feature})
+ ADD_DEFINITIONS (-D${feature}=${${feature}})
+ endif ()
+ endif ()
+ENDMACRO(DefineConfigH)
+
+MACRO(DefineConfigHValue feature value)
+ set(${feature} ${value})
+ DefineConfigH(${feature})
+ENDMACRO(DefineConfigHValue)
+
+MACRO(DefineFunctionIfAvailable func feature)
+ check_function_exists("${func}" "${feature}")
+ DefineConfigH(${feature})
+ENDMACRO(DefineFunctionIfAvailable)
+
+MACRO(DefineHFileIfAvailable hfile feature)
+ check_include_file("${hfile}" "${feature}")
+ DefineConfigH(${feature})
+ENDMACRO(DefineHFileIfAvailable)
+
+MACRO(DefineTypeIfAvailable type feature)
+ check_type_size("${type}" "${feature}")
+ DefineConfigH(${feature})
+ENDMACRO(DefineTypeIfAvailable)
+
+MACRO(DefineSymbolIfAvailable symbol hfile feature)
+ check_symbol_exists("${symbol}" "${hfile}" "${feature}")
+ DefineConfigH(${feature})
+ENDMACRO(DefineSymbolIfAvailable)
+
+MACRO(DefineStructHasMemberIfAvailable struct member hfile feature)
+ check_struct_has_member("${struct}" "${member}" "${hfile}" "${feature}")
+ DefineConfigH(${feature})
+ENDMACRO(DefineStructHasMemberIfAvailable)
+
+MACRO(DefineLibraryIfAvailable lib func location feature)
+ check_library_exists("${lib}" "${func}" "${location}" "${feature}")
+ DefineConfigH(${feature})
+ENDMACRO(DefineLibraryIfAvailable)
+
+MACRO(DefineIfSourceCompiles source feature)
+ check_c_source_compiles( "${source}" "${feature}")
+ DefineConfigH(${feature})
+ENDMACRO(DefineIfSourceCompiles)
+
+FILE( READ configure.ac CONFIG_AUTOMAKE )
+STRING( REGEX MATCH "AC_INIT\\(\\[GNU Awk\\], ([0-9]+\\.[0-9]+\\.[0-9]+)" GAWK_AUTOMAKE_LINE_VERSION "${CONFIG_AUTOMAKE}")
+STRING( REGEX REPLACE ".*([0-9]+)\\.[0-9]+\\.[0-9]+.*" "\\1" GAWK_MAJOR_VERSION "${GAWK_AUTOMAKE_LINE_VERSION}")
+STRING( REGEX REPLACE ".*[0-9]+\\.([0-9]+)\\.[0-9]+.*" "\\1" GAWK_MINOR_VERSION "${GAWK_AUTOMAKE_LINE_VERSION}")
+STRING( REGEX REPLACE ".*[0-9]+\\.[0-9]+\\.([0-9]+).*" "\\1" GAWK_BUGFIX_VERSION "${GAWK_AUTOMAKE_LINE_VERSION}")
+
+# The definition of the symbol GAWK cannot be passed in config.h
+# because the extensions will fail to build.
+add_definitions(-DGAWK)
+DefineConfigHValue(_GL_ATTRIBUTE_PURE "__attribute__ ((__pure__))")
+DefineConfigHValue(GAWK_VERSION "${GAWK_MAJOR_VERSION}.${GAWK_MINOR_VERSION}.${GAWK_BUGFIX_VERSION}")
+DefineConfigHValue(VERSION \\"${GAWK_VERSION}\\")
+DefineConfigHValue(PACKAGE \\"gawk\\")
+DefineConfigHValue(PACKAGE_STRING \\"GNU Awk ${GAWK_VERSION}\\")
+DefineConfigHValue(PACKAGE_TARNAME \\"gawk\\")
+DefineConfigHValue(PACKAGE_URL \\"http://www.gnu.org/software/gawk/\\")
+DefineConfigHValue(PACKAGE_VERSION \\"${GAWK_VERSION}\\")
+DefineConfigHValue(DEFPATH \\"${CMAKE_BINARY_DIR}/awk\\")
+DefineConfigHValue(DEFLIBPATH \\"${CMAKE_BINARY_DIR}/lib\\")
+if (CMAKE_DL_LIBS)
+ message(STATUS "Found CMAKE_DL_LIBS:${CMAKE_DL_LIBS}")
+else()
+ message(STATUS "Found no CMAKE_DL_LIBS")
+endif()
+if (CMAKE_SHARED_LIBRARY_SUFFIX)
+ DefineConfigHValue(DYNAMIC 1)
+ STRING( REGEX REPLACE "^(\\.)([a-zA-Z0-9])" "\\2" SHLIBEXT "${CMAKE_SHARED_LIBRARY_SUFFIX}")
+ DefineConfigHValue(SHLIBEXT \\"${SHLIBEXT}\\")
+ message(STATUS "Found SHLIBEXT: ${SHLIBEXT}")
+else()
+ message(STATUS "Found no SHLIBEXT")
+endif()
+DefineTypeIfAvailable("unsigned int" SIZEOF_UNSIGNED_INT)
+DefineTypeIfAvailable("unsigned long" SIZEOF_UNSIGNED_LONG)
+#/* Define to 1 if *printf supports %F format */
+add_definitions(-D PRINTF_HAS_F_FORMAT)
+#/* Define as the return type of signal handlers (`int' or `void'). */
+add_definitions(-D RETSIGTYPE=void)
+#add_definitions(-D PIPES_SIMULATED)
+check_prototype_definition(getpgrp "pid_t getpgrp(void)" "NULL" "unistd.h" GETPGRP_VOID)
+DefineConfigH(GETPGRP_VOID)
+#add_definitions(-D YYPARSE_PARAM)
+
+DefineFunctionIfAvailable(snprintf HAVE_SNPRINTF)
+DefineFunctionIfAvailable(vprintf HAVE_VPRINTF)
+DefineHFileIfAvailable(sys/types.h HAVE_SYS_TYPES_H)
+DefineHFileIfAvailable(sys/stat.h HAVE_SYS_STAT_H)
+DefineHFileIfAvailable(string.h HAVE_STRING_H)
+DefineHFileIfAvailable(memory.h HAVE_MEMORY_H)
+DefineHFileIfAvailable(strings.h HAVE_STRINGS_H)
+DefineHFileIfAvailable(stdint.h HAVE_STDINT_H)
+DefineHFileIfAvailable(inttypes.h HAVE_INTTYPES_H)
+DefineHFileIfAvailable(stdlib.h HAVE_STDLIB_H)
+DefineHFileIfAvailable(unistd.h HAVE_UNISTD_H)
+FIND_PATH(INTL_INCLUDE_DIR libintl.h PATHS /usr/include /usr/local/include)
+FIND_LIBRARY(INTL_LIBRARIES intl c PATHS /usr/lib/ /usr/local/lib)
+DefineSymbolIfAvailable("CODESET" "langinfo.h" HAVE_LANGINFO_CODESET)
+DefineSymbolIfAvailable("LC_MESSAGES" "locale.h" HAVE_LC_MESSAGES)
+DefineTypeIfAvailable("_Bool" HAVE__BOOL)
+if (${HAVE_GETTEXT} AND ${HAVE_DCGETTEXT} AND ${HAVE_LANGINFO_CODESET} AND ${HAVE_LC_MESSAGES})
+ add_definitions(-D LOCALEDIR=\\"/usr/share/locale\\")
+ add_definitions(-D ENABLE_NLS)
+ ADD_SUBDIRECTORY( po )
+endif()
+DefineHFileIfAvailable(stdbool.h HAVE_STDBOOL_H)
+DefineHFileIfAvailable(sys/wait.h HAVE_SYS_WAIT_H)
+DefineHFileIfAvailable(arpa/inet.h HAVE_ARPA_INET_H)
+DefineHFileIfAvailable(fcntl.h HAVE_FCNTL_H)
+DefineHFileIfAvailable(limits.h HAVE_LIMITS_H)
+DefineHFileIfAvailable(locale.h HAVE_LOCALE_H)
+DefineHFileIfAvailable(libintl.h HAVE_LIBINTL_H)
+DefineHFileIfAvailable(mcheck.h HAVE_MCHECK_H)
+DefineHFileIfAvailable(netdb.h HAVE_NETDB_H)
+DefineHFileIfAvailable(netinet/in.h HAVE_NETINET_IN_H)
+DefineHFileIfAvailable(stdarg.h HAVE_STDARG_H)
+DefineHFileIfAvailable(stddef.h HAVE_STDDEF_H)
+DefineHFileIfAvailable(sys/ioctl.h HAVE_SYS_IOCTL_H)
+DefineHFileIfAvailable(sys/param.h HAVE_SYS_PARAM_H)
+DefineHFileIfAvailable(sys/socket.h HAVE_SYS_SOCKET_H)
+DefineHFileIfAvailable(sys/termios.h HAVE_TERMIOS_H)
+DefineHFileIfAvailable(stropts.h HAVE_STROPTS_H)
+DefineHFileIfAvailable(wchar.h HAVE_WCHAR_H)
+DefineHFileIfAvailable(wctype.h HAVE_WCTYPE_H)
+DefineTypeIfAvailable("long long int" HAVE_LONG_LONG_INT)
+DefineTypeIfAvailable("unsigned long long int" HAVE_UNSIGNED_LONG_LONG_INT)
+DefineTypeIfAvailable(intmax_t INTMAX_T)
+DefineTypeIfAvailable(uintmax_t UINTMAX_T)
+DefineTypeIfAvailable("time_t" TIME_T_IN_SYS_TYPES_H)
+SET(CMAKE_EXTRA_INCLUDE_FILES wctype.h)
+DefineTypeIfAvailable("wctype_t" HAVE_WCTYPE_T)
+DefineTypeIfAvailable("wint_t" HAVE_WINT_T)
+SET(CMAKE_EXTRA_INCLUDE_FILES)
+
+DefineStructHasMemberIfAvailable("struct sockaddr_storage" ss_family sys/socket.h HAVE_SOCKADDR_STORAGE)
+DefineStructHasMemberIfAvailable("struct stat" st_blksize sys/stat.h HAVE_STRUCT_STAT_ST_BLKSIZE)
+DefineStructHasMemberIfAvailable("struct stat" st_blksize sys/stat.h HAVE_ST_BLKSIZE)
+DefineStructHasMemberIfAvailable("struct tm" tm_zone time.h HAVE_TM_ZONE)
+DefineStructHasMemberIfAvailable("struct tm" tm_zone time.h HAVE_STRUCT_TM_TM_ZONE)
+
+DefineHFileIfAvailable(sys/time.h HAVE_SYS_TIME_H)
+DefineFunctionIfAvailable(alarm HAVE_ALARM)
+DefineFunctionIfAvailable(tzname HAVE_DECL_TZNAME)
+DefineFunctionIfAvailable(mktime HAVE_MKTIME)
+DefineFunctionIfAvailable(getaddrinfo HAVE_GETADDRINFO)
+DefineFunctionIfAvailable(atexit HAVE_ATEXIT)
+DefineFunctionIfAvailable(btowc HAVE_BTOWC)
+DefineFunctionIfAvailable(fmod HAVE_FMOD)
+DefineFunctionIfAvailable(isinf HAVE_ISINF)
+DefineFunctionIfAvailable(ismod HAVE_ISMOD)
+DefineFunctionIfAvailable(getgrent HAVE_GETGRENT)
+DefineSymbolIfAvailable("getgroups" "unistd.h" HAVE_GETGROUPS)
+if (${HAVE_GETGROUPS})
+ check_prototype_definition(getgroups "int getgroups(int size, gid_t list[])" "NULL" "unistd.h" GETGROUPS_T)
+ if (${GETGROUPS_T})
+ DefineConfigHValue(GETGROUPS_T gid_t)
+ else()
+ DefineConfigHValue(GETGROUPS_T int)
+ endif()
+endif()
+
+DefineTypeIfAvailable("pid_t" PID_T)
+DefineTypeIfAvailable("intmax_t" HAVE_INTMAX_T)
+DefineFunctionIfAvailable(grantpt HAVE_GRANTPT)
+DefineFunctionIfAvailable(isascii HAVE_ISASCII)
+DefineFunctionIfAvailable(iswctype HAVE_ISWCTYPE)
+DefineFunctionIfAvailable(iswlower HAVE_ISWLOWER)
+DefineFunctionIfAvailable(iswupper HAVE_ISWUPPER)
+DefineFunctionIfAvailable(mbrlen HAVE_MBRLEN)
+DefineFunctionIfAvailable(memcmp HAVE_MEMCMP)
+DefineFunctionIfAvailable(memcpy HAVE_MEMCPY)
+DefineFunctionIfAvailable(memmove HAVE_MEMMOVE)
+DefineFunctionIfAvailable(memset HAVE_MEMSET)
+DefineFunctionIfAvailable(mkstemp HAVE_MKSTEMP)
+DefineFunctionIfAvailable(posix_openpt HAVE_POSIX_OPENPT)
+DefineFunctionIfAvailable(setenv HAVE_SETENV)
+DefineFunctionIfAvailable(setlocale HAVE_SETLOCALE)
+DefineFunctionIfAvailable(setsid HAVE_SETSID)
+DefineFunctionIfAvailable(strchr HAVE_STRCHR)
+DefineFunctionIfAvailable(strerror HAVE_STRERROR)
+DefineFunctionIfAvailable(strftime HAVE_STRFTIME)
+DefineFunctionIfAvailable(strncasecmp HAVE_STRNCASECMP)
+DefineFunctionIfAvailable(strcoll HAVE_STRCOLL)
+DefineFunctionIfAvailable(strtod HAVE_STRTOD)
+DefineFunctionIfAvailable(strtoul HAVE_STRTOUL)
+DefineFunctionIfAvailable(system HAVE_SYSTEM)
+DefineFunctionIfAvailable(tmpfile HAVE_TMPFILE)
+DefineFunctionIfAvailable(towlower HAVE_TOWLOWER)
+DefineFunctionIfAvailable(towupper HAVE_TOWUPPER)
+DefineFunctionIfAvailable(tzset HAVE_TZSET)
+DefineFunctionIfAvailable(usleep HAVE_USLEEP)
+DefineFunctionIfAvailable(wcrtomb HAVE_WCRTOMB)
+DefineFunctionIfAvailable(wcscoll HAVE_WCSCOLL)
+DefineFunctionIfAvailable(wctype HAVE_WCTYPE)
+DefineFunctionIfAvailable(mbrtowc HAVE_MBRTOWC)
+
+add_definitions(-D HAVE_STRINGIZE)
+add_definitions(-D _Noreturn=)
+
+find_package(BISON QUIET)
+# If there is a bison installed on this platform,
+if (${BISON_FOUND} STREQUAL "TRUE")
+ # then let bison generate awkgram.c.
+ BISON_TARGET(awkgram awkgram.y ${CMAKE_SOURCE_DIR}/awkgram.c)
+else()
+ # otherwise use the existing awkgram.c.
+ set(BISON_awkgram_OUTPUTS ${CMAKE_SOURCE_DIR}/awkgram.c)
+endif()
+
+find_package(Gettext REQUIRED)
+if (GETTEXT_FOUND STREQUAL "TRUE")
+ include_directories(${GETTEXT_INCLUDE_DIR})
+ DefineFunctionIfAvailable(gettext HAVE_GETTEXT)
+ DefineFunctionIfAvailable(dcgettext HAVE_DCGETTEXT)
+else ()
+ message( FATAL_ERROR "Gettext not found" )
+endif()
+
+find_package(LATEX)
+include(GNUInstallDirs)
+include(GetPrerequisites)
+
+# For some unknown reason the defines for the extensions
+# are written into config.h only if they are implemented
+# here and not in extension/CMakeLists.txt.
+DefineLibraryIfAvailable(m sin "" HAVE_LIBM)
+DefineLibraryIfAvailable(mpfr mpfr_add_si "" HAVE_MPFR)
+DefineLibraryIfAvailable(c socket "" HAVE_SOCKETS)
+DefineLibraryIfAvailable(readline readline "" HAVE_LIBREADLINE)
+DefineFunctionIfAvailable(fnmatch HAVE_FNMATCH)
+DefineHFileIfAvailable(fnmatch.h HAVE_FNMATCH_H)
+DefineHFileIfAvailable(dirent.h HAVE_DIRENT_H)
+DefineFunctionIfAvailable(dirfd HAVE_DIRFD)
+DefineFunctionIfAvailable(getdtablesize HAVE_GETDTABLESIZE)
+DefineFunctionIfAvailable(select HAVE_SELECT)
+DefineFunctionIfAvailable(gettimeofday HAVE_GETTIMEOFDAY)
+DefineHFileIfAvailable(sys/select.h HAVE_SYS_SELECT_H)
+DefineFunctionIfAvailable(nanosleep HAVE_NANOSLEEP)
+DefineHFileIfAvailable(time.h HAVE_TIME_H)
+DefineFunctionIfAvailable(GetSystemTimeAsFileTime HAVE_GETSYSTEMTIMEASFILETIME)
+
diff --git a/cmake/docmaker b/cmake/docmaker
new file mode 100755
index 00000000..4af7cee1
--- /dev/null
+++ b/cmake/docmaker
@@ -0,0 +1,100 @@
+#!/bin/sh
+
+# The first parameter is the target, the file to be built.
+# All remaining parameters are dependencies (file names).
+if [ $# -lt 1 ] ; then
+ echo " $0: Incorrect number ($#) of parameters passed: $*"
+ exit 1
+fi
+OUTFILE=$1
+shift 1
+INFILES="$@"
+
+MAKEINFO="makeinfo --no-split --force"
+TROFF="groff -t -Tps -U"
+SEDME="sed -e \"s/^level0 restore/level0 restore flashme 100 72 moveto (Copyright `date '+%m-%d-%y %T'`, FSF, Inc. (all)) show/\" -e \"s/^\/level0 save def/\/level0 save def 30 -48 translate/\""
+SEDME2="sed '/%%Page: 10 10/,/0 Cg EP/d'"
+
+function BuildTarget()
+{
+ local OUTFILE=$1
+ local INFILE=""
+ local COMMAND=""
+
+ FILEBASE=${OUTFILE%.*}
+ case $OUTFILE in
+ *\.in | *\.1 | macros | cardfonts | colors | ad.block | setter.outline | \
+ gawkinet.texi | rflashlight.eps | api-figure1.fig | api-figure2.fig | api-figure3.fig | \
+ general-program.fig | process-flow.fig | statist.eps)
+ INFILE=$OUTFILE
+ ;;
+ *\.texi)
+ if [ $FILEBASE = gawk ] ; then
+ INFILE=gawktexi.in
+ else
+ INFILE=$OUTFILE.in
+ fi
+ COMMAND="awk -f sidebar.awk < $INFILE > $OUTFILE"
+ ;;
+ *\.dvi)
+ INFILE=$FILEBASE.texi
+ COMMAND="texi2dvi -q --clean $INFILE"
+ ;;
+ *\.info)
+ INFILE=$FILEBASE.texi
+ COMMAND="${MAKEINFO} $INFILE"
+ ;;
+ *\.ps)
+ if [ $FILEBASE = awkcard ] ; then
+ INFILE=awkcard.in
+ COMMAND="${TROFF} $* | ${SEDME} | cat setter.outline - | ${SEDME2} > awkcard.ps"
+ elif [ $FILEBASE = gawk.1 -o $FILEBASE = igawk.1 ] ; then
+ INFILE=$FILEBASE
+ COMMAND="groff -z -man $INFILE > $OUTFILE"
+ else
+ INFILE=$FILEBASE.dvi
+ COMMAND="dvips -q -o $OUTFILE $INFILE"
+ fi
+ ;;
+ *\.pdf)
+ INFILE=$FILEBASE.ps
+ COMMAND="ps2pdf -q $INFILE $OUTFILE"
+ ;;
+ *\.tr)
+ INFILE=$FILEBASE.in
+ COMMAND="sed 's:SRCDIR:.:' < $INFILE > $OUTFILE"
+ ;;
+ *\.nc)
+ INFILE=$FILEBASE.in
+ COMMAND="sed 's:SRCDIR:.:' < $INFILE > $OUTFILE"
+ COMMAND="${TROFF} $* | ${SEDME} | cat setter.outline - | ${SEDME2} > $FILEBASE.ps && touch $OUTFILE"
+ ;;
+ *)
+ echo " unknwon target $OUTFILE"
+ exit 1
+ esac
+
+ if [ ! -r "$INFILE" ] ; then
+ echo " $0: Cannot read input file $INFILE"
+ exit 1
+ fi
+
+ if [ -f "$OUTFILE" ] ; then
+ if [ "$INFILE" -ot "$OUTFILE" ] ; then
+ #printf " Target %15s is up-to-date\n" $OUTFILE
+ COMMAND=""
+ fi
+ fi
+ #echo " Generating $OUTFILE from $INFILE"
+ echo $COMMAND | sh -x
+ #echo "COMMAND=$COMMAND"
+}
+
+# Build all dependencies first, then build the target.
+for dep in $INFILES
+do
+ #echo $OUTFILE depends on $dep
+ BuildTarget $dep
+done
+BuildTarget $OUTFILE
+
diff --git a/cmake/package.cmake b/cmake/package.cmake
new file mode 100644
index 00000000..203a8c3b
--- /dev/null
+++ b/cmake/package.cmake
@@ -0,0 +1,54 @@
+#
+# cmake/package --- CMake input file for gawk
+#
+# Copyright (C) 2013-2014
+# the Free Software Foundation, Inc.
+#
+# This file is part of GAWK, the GNU implementation of the
+# AWK Programming Language.
+#
+# GAWK is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# GAWK is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+#
+
+## process this file with CMake to produce Makefile
+
+SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "This is GNU Awk ${GAWK_VERSION}")
+set(CPACK_PACKAGE_VENDOR "GNU Project - Free Software Foundation (FSF)")
+SET(CPACK_PACKAGE_NAME "gawk")
+SET(CPACK_PACKAGE_VERSION "${GAWK_VERSION}")
+SET(CPACK_PACKAGE_VERSION_MAJOR "${GAWK_MAJOR_VERSION}")
+SET(CPACK_PACKAGE_VERSION_MINOR "${GAWK_MINOR_VERSION}")
+SET(CPACK_PACKAGE_VERSION_PATCH "${GAWK_BUGFIX_VERSION}")
+SET(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_SOURCE_DIR}/COPYING")
+SET(CPACK_RESOURCE_FILE_README "${CMAKE_SOURCE_DIR}/README")
+set(CPACK_PACKAGE_CONTACT "bug-gawk@gnu.org")
+
+IF (WIN32)
+ SET(CPACK_GENERATOR "NSIS")
+ set(CPACK_NSIS_INSTALL_ROOT "C:")
+ set(CPACK_NSIS_MENU_LINKS "http://www.gnu.org/software/gawk" "GNU Awk")
+ set(CPACK_NSIS_MUI_ICON "${CMAKE_SOURCE_DIR}/cmake/auk.ico")
+ set(CPACK_NSIS_MUI_UNIICON "${CMAKE_SOURCE_DIR}/cmake/auk.ico")
+ set(CPACK_NSIS_CONTACT "bug-gawk@gnu.org")
+ set(CPACK_NSIS_DISPLAY_NAME "GNU Awk")
+ELSE()
+ SET(CPACK_PACKAGING_INSTALL_PREFIX /usr)
+ IF(NOT CPACK_GENERATOR)
+ SET(CPACK_GENERATOR "TGZ")
+ ENDIF()
+ message(STATUS "CPACK_GENERATOR set to ${CPACK_GENERATOR}")
+ENDIF()
+
+INCLUDE(CPack)
diff --git a/configh.in b/configh.in
index bfffd853..1ca2946a 100644
--- a/configh.in
+++ b/configh.in
@@ -195,6 +195,9 @@
/* Define to 1 if you have the <stdlib.h> header file. */
#undef HAVE_STDLIB_H
+/* Define to 1 if you have the `strcasecmp' function. */
+#undef HAVE_STRCASECMP
+
/* Define to 1 if you have the `strchr' function. */
#undef HAVE_STRCHR
@@ -317,9 +320,6 @@
/* Define to 1 if the system has the type `_Bool'. */
#undef HAVE__BOOL
-/* libc is broken for regex handling */
-#undef LIBC_IS_BORKED
-
/* disable lint checks */
#undef NO_LINT
diff --git a/configure b/configure
index 4dc9ce8d..038e2081 100755
--- a/configure
+++ b/configure
@@ -5999,14 +5999,6 @@ then
CFLAGS="$CFLAGS -D_SYSV3"
fi
-case $host_os in
-mirbsd*)
-
-$as_echo "#define LIBC_IS_BORKED 1" >>confdefs.h
-
- ;;
-esac
-
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
@@ -10007,7 +9999,7 @@ for ac_func in atexit btowc fmod getgrent getgroups grantpt \
isascii iswctype iswlower iswupper mbrlen \
memcmp memcpy memcpy_ulong memmove memset \
memset_ulong mkstemp posix_openpt setenv setlocale setsid snprintf strchr \
- strerror strftime strncasecmp strcoll strtod strtoul \
+ strerror strftime strcasecmp strncasecmp strcoll strtod strtoul \
system tmpfile towlower towupper tzset usleep wcrtomb \
wcscoll wctype
do :
diff --git a/configure.ac b/configure.ac
index e7e2d5f6..8b4f188e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -119,13 +119,6 @@ dnl need -D_SYSV3 for ISC
CFLAGS="$CFLAGS -D_SYSV3"
fi
-dnl check for systems where libc is borked for regex handling
-case $host_os in
-mirbsd*)
- AC_DEFINE([LIBC_IS_BORKED], 1, [libc is broken for regex handling])
- ;;
-esac
-
dnl Set the programming language for checks. Fortunately,
dnl this only needs to be set once, since everything is in C.
AC_LANG([C])
@@ -275,7 +268,7 @@ AC_CHECK_FUNCS(atexit btowc fmod getgrent getgroups grantpt \
isascii iswctype iswlower iswupper mbrlen \
memcmp memcpy memcpy_ulong memmove memset \
memset_ulong mkstemp posix_openpt setenv setlocale setsid snprintf strchr \
- strerror strftime strncasecmp strcoll strtod strtoul \
+ strerror strftime strcasecmp strncasecmp strcoll strtod strtoul \
system tmpfile towlower towupper tzset usleep wcrtomb \
wcscoll wctype)
dnl this check is for both mbrtowc and the mbstate_t type, which is good
diff --git a/dfa.c b/dfa.c
index 1a153785..2d0e7f20 100644
--- a/dfa.c
+++ b/dfa.c
@@ -77,14 +77,6 @@ is_blank (int c)
}
#endif /* GAWK */
-#ifdef LIBC_IS_BORKED
-extern int gawk_mb_cur_max;
-#undef MB_CUR_MAX
-#define MB_CUR_MAX gawk_mb_cur_max
-#undef mbrtowc
-#define mbrtowc(a, b, c, d) (-1)
-#endif
-
/* HPUX defines these as macros in sys/param.h. */
#ifdef setbit
# undef setbit
@@ -828,10 +820,6 @@ using_utf8 (void)
wchar_t wc;
mbstate_t mbs = { 0 };
utf8 = mbrtowc (&wc, "\xc4\x80", 2, &mbs) == 2 && wc == 0x100;
-#ifdef LIBC_IS_BORKED
- if (gawk_mb_cur_max == 1)
- utf8 = 0;
-#endif
}
return utf8;
}
diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt
new file mode 100644
index 00000000..e12f5de0
--- /dev/null
+++ b/doc/CMakeLists.txt
@@ -0,0 +1,95 @@
+#
+# doc/CMakeLists.txt --- CMake input file for gawk
+#
+# Copyright (C) 2013
+# the Free Software Foundation, Inc.
+#
+# This file is part of GAWK, the GNU implementation of the
+# AWK Programming Language.
+#
+# GAWK is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# GAWK is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+#
+
+## process this file with CMake to produce Makefile
+
+MACRO(DocDependency outfile)
+ add_dependencies(doc ${outfile})
+ add_custom_target(
+ ${outfile}
+ DEPENDS ${ARGN}
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+ COMMAND ${CMAKE_SOURCE_DIR}/cmake/docmaker ${outfile} ${ARGN}
+ )
+ENDMACRO(DocDependency)
+
+find_program(TEXI2DVI_CONVERTER texi2dvi)
+if (TEXI2DVI_CONVERTER)
+ add_custom_target(doc)
+ DocDependency(gawk.texi gawktexi.in rflashlight.eps api-figure1.fig api-figure2.fig api-figure3.fig general-program.fig process-flow.fig)
+ DocDependency(rflashlight.eps)
+ DocDependency(api-figure1.fig)
+ DocDependency(api-figure2.fig)
+ DocDependency(api-figure3.fig)
+ DocDependency(general-program.fig)
+ DocDependency(process-flow.fig)
+ DocDependency(gawk.dvi gawk.texi)
+ DocDependency(gawk.info gawk.texi)
+ DocDependency(gawkinet.dvi gawkinet.texi)
+ DocDependency(gawkinet.info gawkinet.texi)
+ DocDependency(gawkinet.texi statist.eps)
+ DocDependency(gawk.1.ps gawk.1)
+ DocDependency(igawk.1.ps igawk.1)
+ find_program(DVIPS_CONVERTER dvips)
+ if (DVIPS_CONVERTER)
+ DocDependency(gawk.ps gawk.dvi)
+ DocDependency(gawkinet.ps gawkinet.dvi)
+ find_program(PS2PDF_CONVERTER ps2pdf)
+ if (PS2PDF_CONVERTER)
+ DocDependency(gawk.1.pdf gawk.1.ps)
+ DocDependency(igawk.1.pdf igawk.1.ps)
+ DocDependency(gawk.pdf gawk.ps)
+ DocDependency(gawkinet.pdf gawkinet.ps)
+ install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/gawk.1.pdf DESTINATION doc)
+ install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/igawk.1.pdf DESTINATION doc)
+ install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/gawk.info DESTINATION doc)
+ install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/gawk.pdf DESTINATION doc)
+ install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/gawkinet.info DESTINATION doc)
+ install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/gawkinet.pdf DESTINATION doc)
+
+ set(CARDSRC macros cardfonts colors awkcard.tr)
+ set(CARDSRC_N macros cardfonts no.colors awkcard.tr)
+ set(CARDFILES ${CARDSRC} ad.block awkcard.in setter.outline)
+ DocDependency(awkcard.tr awkcard.in)
+ DocDependency(awkcard.nc ${CARDFILES})
+ DocDependency(awkcard.ps ${CARDFILES})
+ DocDependency(awkcard.pdf awkcard.ps)
+ install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/awkcard.pdf DESTINATION doc)
+
+ else()
+ message(WARNING "Found no ps2pdf tool; no doc will be generated")
+ install(CODE "MESSAGE(\"doc generated only in .ps files\")")
+ endif()
+ else()
+ message(WARNING "Found no dvips tool; no doc will be generated")
+ install(CODE "MESSAGE(\"doc generated only in .dvi files and man pages in .ps files\")")
+ endif()
+else()
+ message(WARNING "Found no texi2dvi tool; no doc will be generated")
+ add_custom_command(
+ TARGET doc
+ COMMAND echo no doc generated because of missing texi2dvi
+ )
+endif()
+
diff --git a/doc/ChangeLog b/doc/ChangeLog
index 0500b6c9..ab2b26c7 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,3 +1,94 @@
+2014-09-08 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Remove text that won't get used.
+
+2014-09-07 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Minor cleanups.
+
+2014-09-05 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Document builtin functions in FUNCTAB and in
+ PROCINFO["identifiers"].
+ * gawk.1: Ditto.
+
+ Unrelated:
+
+ * gawktexi.in: More stuff from reviewer comments.
+
+2014-09-04 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Document that indirect calls now work on built-in
+ and extension functions.
+ * gawk.1: Same.
+
+2014-09-03 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Further fixes from reviews and bug reports.
+
+2014-09-02 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Corrections to walkthrough in debugger chapter.
+ Thanks to David Ward <dlward134@gmail.com> for the problem report.
+
+2014-09-01 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Add index entry for @ - @load, @include,
+ and indirect function calls. Thanks to "Kenny McKormack" in
+ comp.lang.awk.
+
+2014-08-29 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Continuing on reviewer comments, and other
+ bug fixes, miscellanious improvements.
+
+2014-08-26 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Use a different mechanism to exclude
+ exercises. Remove use of LC_ALL in an example; doesn't seem
+ to be needed anymore.
+
+ Unrelated:
+
+ * gawktexi.in: Document that MirBSD is no longer supported.
+
+2014-08-25 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Exercises are excluded from print edition.
+
+2014-08-24 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Continuing on reviewer comments.
+
+2014-08-23 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Continuing on reviewer comments.
+
+2014-08-22 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Continuing on reviewer comments.
+
+2014-08-20 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Continuing on reviewer comments.
+
+2014-08-16 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Continuing on reviewer comments.
+
+2014-08-15 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Continuing on reviewer comments.
+
+2014-08-13 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Starting on reviewer comments.
+ Update acknowledgements.
+
+2014-08-12 Arnold D. Robbins <arnold@skeeve.com>
+
+ * gawktexi.in: Cause div.awk to get into the example files.
+
2014-08-06 Arnold D. Robbins <arnold@skeeve.com>
* gawktexi.in: Misc minor additions.
diff --git a/doc/gawk.1 b/doc/gawk.1
index d5756f08..a4d66720 100644
--- a/doc/gawk.1
+++ b/doc/gawk.1
@@ -13,7 +13,7 @@
. if \w'\(rq' .ds rq "\(rq
. \}
.\}
-.TH GAWK 1 "Jul 10 2014" "Free Software Foundation" "Utility Commands"
+.TH GAWK 1 "Aug 03 2014" "Free Software Foundation" "Utility Commands"
.SH NAME
gawk \- pattern scanning and processing language
.SH SYNOPSIS
@@ -1132,9 +1132,14 @@ For each identifier, the value of the element is one of the following:
\fB"array"\fR
The identifier is an array.
.TP
+\fB"builtin"\fR
+The identifier is a built-in function.
+.TP
\fB"extension"\fR
The identifier is an extension function loaded via
-.BR @load .
+.B @load
+or
+.BR \-l .
.TP
\fB"scalar"\fR
The identifier is a scalar.
@@ -3287,7 +3292,7 @@ sign, like so:
.RS
.ft B
.nf
-function myfunc()
+function myfunc()
{
print "myfunc called"
\&.\|.\|.
@@ -3301,6 +3306,8 @@ function myfunc()
.fi
.ft R
.RE
+As of version 4.1.2, this works with user-defined functions,
+built-in functions, and extension functions.
.PP
If
.B \-\^\-lint
diff --git a/doc/gawk.info b/doc/gawk.info
index ec16647a..65ae1509 100644
--- a/doc/gawk.info
+++ b/doc/gawk.info
@@ -169,11 +169,17 @@ in (a) below. A copy of the license is included in the section entitled
* Regexp Usage:: How to Use Regular Expressions.
* Escape Sequences:: How to write nonprinting characters.
* Regexp Operators:: Regular Expression Operators.
+<<<<<<< HEAD
* Bracket Expressions:: What can go between '[...]'.
* GNU Regexp Operators:: Operators specific to GNU software.
* Case-sensitivity:: How to do case-insensitive matching.
+=======
+* Bracket Expressions:: What can go between `[...]'.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* Leftmost Longest:: How much text matches.
* Computed Regexps:: Using Dynamic Regexps.
+* GNU Regexp Operators:: Operators specific to GNU software.
+* Case-sensitivity:: How to do case-insensitive matching.
* Regexp Summary:: Regular expressions summary.
* Records:: Controlling how data is split into
records.
@@ -189,8 +195,13 @@ in (a) below. A copy of the license is included in the section entitled
* Regexp Field Splitting:: Using regexps as the field separator.
* Single Character Fields:: Making each character a separate
field.
+<<<<<<< HEAD
* Command Line Field Separator:: Setting 'FS' from the
command-line.
+=======
+* Command Line Field Separator:: Setting `FS' from the
+ command line.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* Full Line Fields:: Making the full line be a single
field.
* Field Splitting Summary:: Some final points and a summary table.
@@ -216,7 +227,7 @@ in (a) below. A copy of the license is included in the section entitled
'getline'.
* Getline Summary:: Summary of 'getline' Variants.
* Read Timeout:: Reading input with a timeout.
-* Command line directories:: What happens if you put a directory on
+* Command-line directories:: What happens if you put a directory on
the command line.
* Input Summary:: Input summary.
* Input Exercises:: Exercises.
@@ -244,7 +255,7 @@ in (a) below. A copy of the license is included in the section entitled
* Close Files And Pipes:: Closing Input and Output Files and
Pipes.
* Output Summary:: Output summary.
-* Output exercises:: Exercises.
+* Output Exercises:: Exercises.
* Values:: Constants, Variables, and Regular
Expressions.
* Constants:: String, numeric and regexp constants.
@@ -255,7 +266,7 @@ in (a) below. A copy of the license is included in the section entitled
* Variables:: Variables give names to values for
later use.
* Using Variables:: Using variables in your programs.
-* Assignment Options:: Setting variables on the command-line
+* Assignment Options:: Setting variables on the command line
and a summary of command-line syntax.
This is an advanced method of input.
* Conversion:: The conversion of strings to numbers
@@ -431,7 +442,7 @@ in (a) below. A copy of the license is included in the section entitled
information.
* Walking Arrays:: A function to walk arrays of arrays.
* Library Functions Summary:: Summary of library functions.
-* Library exercises:: Exercises.
+* Library Exercises:: Exercises.
* Running Examples:: How to run these examples.
* Clones:: Clones of common utilities.
* Cut Program:: The 'cut' utility.
@@ -770,6 +781,7 @@ Preface
Several kinds of tasks occur repeatedly when working with text files.
You might want to extract certain lines and discard the rest. Or you
may need to make changes wherever certain patterns appear, but leave the
+<<<<<<< HEAD
rest of the file alone. Writing single-use programs for these tasks in
languages such as C, C++, or Java is time-consuming and inconvenient.
Such jobs are often easier with 'awk'. The 'awk' utility interprets a
@@ -777,6 +789,13 @@ special-purpose programming language that makes it easy to handle simple
data-reformatting jobs.
The GNU implementation of 'awk' is called 'gawk'; if you invoke it
+=======
+rest of the file alone. Such jobs are often easy with `awk'. The
+`awk' utility interprets a special-purpose programming language that
+makes it easy to handle simple data-reformatting jobs.
+
+ The GNU implementation of `awk' is called `gawk'; if you invoke it
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
with the proper options or environment variables (*note Options::), it
is fully compatible with the POSIX(1) specification of the 'awk'
language and with the Unix version of 'awk' maintained by Brian
@@ -816,12 +835,21 @@ such as input/output (I/O) redirection and pipes.
Implementations of the 'awk' language are available for many
different computing environments. This Info file, while describing the
+<<<<<<< HEAD
'awk' language in general, also describes the particular implementation
of 'awk' called 'gawk' (which stands for "GNU 'awk'"). 'gawk' runs on a
broad range of Unix systems, ranging from Intel(R)-architecture PC-based
computers up through large-scale systems. 'gawk' has also been ported
to Mac OS X, Microsoft Windows (all versions) and OS/2 PCs, and OpenVMS.
(Some other, obsolete systems to which 'gawk' was once ported are no
+=======
+`awk' language in general, also describes the particular implementation
+of `awk' called `gawk' (which stands for "GNU `awk'"). `gawk' runs on
+a broad range of Unix systems, ranging from Intel-architecture PC-based
+computers up through large-scale systems. `gawk' has also been ported
+to Mac OS X, Microsoft Windows (all versions) and OS/2 PCs, and OpenVMS.
+(Some other, obsolete systems to which `gawk' was once ported are no
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
longer supported and the code for those systems has been removed.)
* Menu:
@@ -839,7 +867,12 @@ longer supported and the code for those systems has been removed.)
---------- Footnotes ----------
+<<<<<<< HEAD
(1) The 2008 POSIX standard is accessable online at <http://www.opengroup.org/onlinepubs/9699919799/>.
+=======
+ (1) The 2008 POSIX standard is accessible online at
+`http://www.opengroup.org/onlinepubs/9699919799/'.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(2) These commands are available on POSIX-compliant systems, as well
as on traditional Unix-based systems. If you are using some other
@@ -878,11 +911,19 @@ designers provided feedback for the POSIX specification.
Paul Rubin wrote the GNU implementation, 'gawk', in 1986. Jay
Fenlason completed it, with advice from Richard Stallman. John Woods
+<<<<<<< HEAD
contributed parts of the code as well. In 1988 and 1989, David Trueman,
with help from me, thoroughly reworked 'gawk' for compatibility with the
newer 'awk'. Circa 1994, I became the primary maintainer. Current
development focuses on bug fixes, performance improvements, standards
compliance, and occasionally, new features.
+=======
+contributed parts of the code as well. In 1988 and 1989, David
+Trueman, with help from me, thoroughly reworked `gawk' for compatibility
+with the newer `awk'. Circa 1994, I became the primary maintainer.
+Current development focuses on bug fixes, performance improvements,
+standards compliance and, occasionally, new features.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
In May of 1997, Ju"rgen Kahrs felt the need for network access from
'awk', and with a little help from me, set about adding features to do
@@ -906,6 +947,7 @@ A Rose by Any Other Name
The 'awk' language has evolved over the years. Full details are
provided in *note Language History::. The language described in this
+<<<<<<< HEAD
Info file is often referred to as "new 'awk'" ('nawk').
For some time after new 'awk' was introduced, there were systems with
@@ -923,6 +965,21 @@ systems.) All other modern systems use some version of new 'awk'.(1)
system, which is what you should use when running your programs. (Of
course, if you're reading this Info file, chances are good that you have
'gawk'!)
+=======
+Info file is often referred to as "new `awk'". By analogy, the
+original version of `awk' is referred to as "old `awk'."
+
+ Today, on most systems, when you run the `awk' utility, you get some
+version of new `awk'.(1) If your system's standard `awk' is the old
+one, you will see something like this if you try the test program:
+
+ $ awk 1 /dev/null
+ error--> awk: syntax error near line 1
+ error--> awk: bailing out near line 1
+
+In this case, you should find a version of new `awk', or just install
+`gawk'!
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
Throughout this Info file, whenever we refer to a language feature
that should be available in any complete implementation of POSIX 'awk',
@@ -931,7 +988,13 @@ specific to the GNU implementation, we use the term 'gawk'.
---------- Footnotes ----------
+<<<<<<< HEAD
(1) Many of these systems use 'gawk' for their 'awk' implementation!
+=======
+ (1) Only Solaris systems still use an old `awk' for the default
+`awk' utility. A more modern `awk' lives in `/usr/xpg6/bin' on these
+systems.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac

File: gawk.info, Node: This Manual, Next: Conventions, Prev: Names, Up: Preface
@@ -1084,12 +1147,21 @@ language. A single Texinfo source file is used to produce both the
printed and online versions of the documentation. This minor node
briefly documents the typographical conventions used in Texinfo.
+<<<<<<< HEAD
Examples you would type at the command-line are preceded by the
common shell primary and secondary prompts, '$' and '>'. Input that you
type is shown 'like this'. Output from the command is preceded by the
glyph "-|". This typically represents the command's standard output.
Error messages, and other output on the command's standard error, are
preceded by the glyph "error->". For example:
+=======
+ Examples you would type at the command line are preceded by the
+common shell primary and secondary prompts, `$' and `>'. Input that
+you type is shown `like this'. Output from the command is preceded by
+the glyph "-|". This typically represents the command's standard
+output. Error messages, and other output on the command's standard
+error, are preceded by the glyph "error-->". For example:
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
$ echo hi on stdout
-| hi on stdout
@@ -1103,6 +1175,10 @@ key and another key, at the same time. For example, a 'Ctrl-d' is typed
by first pressing and holding the 'CONTROL' key, next pressing the 'd'
key and finally releasing both keys.
+ For the sake of brevity, throughout this Info file, we refer to
+Brian Kernighan's version of `awk' as "BWK `awk'." (*Note Other
+Versions::, for information on his and other versions.)
+
Dark Corners
------------
@@ -1153,6 +1229,7 @@ released but remains in an early stage of development.
Until the GNU operating system is more fully developed, you should
consider using GNU/Linux, a freely distributable, Unix-like operating
+<<<<<<< HEAD
system for Intel(R), Power Architecture, Sun SPARC, IBM S/390, and other
systems.(2) Many GNU/Linux distributions are available for download
from the Internet.
@@ -1164,6 +1241,12 @@ use recent versions of 'gawk' for their versions of 'awk'. NetBSD
(http://www.openbsd.org) are three of the most popular ones, but there
are others.)
+=======
+system for Intel, Power Architecture, Sun SPARC, IBM S/390, and other
+systems.(2) Many GNU/Linux distributions are available for download
+from the Internet.
+
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
The Info file itself has gone through a number of previous editions.
Paul Rubin wrote the very first draft of 'The GAWK Manual'; it was
around 40 pages in size. Diane Close and Richard Stallman improved it,
@@ -1298,16 +1381,30 @@ a pleasure working with this team of fine people.
Notable code and documentation contributions were made by a number of
people. *Note Contributors::, for the full list.
+<<<<<<< HEAD
Thanks to Patrice Dumas for the new 'makeinfo' program. Thanks to
+=======
+ Thanks to Patrice Dumas for the new `makeinfo' program. Thanks to
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
Karl Berry who continues to work to keep the Texinfo markup language
sane.
+ Robert P.J. Day, Michael Brennan and Brian Kernighan kindly acted as
+reviewers for the 2015 edition of this Info file. Their feedback helped
+improve the final work.
+
I would like to thank Brian Kernighan for invaluable assistance
during the testing and debugging of 'gawk', and for ongoing help and
advice in clarifying numerous points about the language. We could not
have done nearly as good a job on either 'gawk' or its documentation
without his help.
+ Brian is in a class by himself as a programmer and technical author.
+I have to thank him (yet again) for his ongoing friendship and the role
+model he has been for me for close to 30 years! Having him as a
+reviewer is an exciting privilege. It has also been extremely
+humbling...
+
I must thank my wonderful wife, Miriam, for her patience through the
many versions of this project, for her proofreading, and for sharing me
with the computer. I would like to thank my parents for their love, and
@@ -1449,17 +1546,38 @@ end-of-file character may be different. For example, on OS/2, it is
'Ctrl-z'.)
As an example, the following program prints a friendly piece of
+<<<<<<< HEAD
advice (from Douglas Adams's 'The Hitchhiker's Guide to the Galaxy'), to
keep you from worrying about the complexities of computer programming(1)
('BEGIN' is a feature we haven't discussed yet):
+=======
+advice (from Douglas Adams's `The Hitchhiker's Guide to the Galaxy'),
+to keep you from worrying about the complexities of computer
+programming:
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
- $ awk "BEGIN { print \"Don't Panic!\" }"
+ $ awk "BEGIN { print "Don\47t Panic!" }"
-| Don't Panic!
+<<<<<<< HEAD
This program does not read any input. The '\' before each of the
inner double quotes is necessary because of the shell's quoting
rules--in particular because it mixes both single quotes and double
quotes.(2)
+=======
+ `awk' executes statements associated with `BEGIN' before reading any
+input. If there are no other statements in your program, as is the
+case here, `awk' just stops, instead of trying to read input it doesn't
+know how to process. The `\47' is a magic way of getting a single
+quote into the program, without having to engage in ugly shell quoting
+tricks.
+
+ NOTE: As a side note, if you use Bash as your shell, you should
+ execute the command `set +H' before running this program
+ interactively, to disable the C shell-style command history, which
+ treats `!' as a special character. We recommend putting this
+ command into your personal startup file.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
This next simple 'awk' program emulates the 'cat' utility; it copies
whatever you type on the keyboard to its standard output (why this works
@@ -1476,6 +1594,7 @@ is explained shortly).
-| What, me worry?
Ctrl-d
+<<<<<<< HEAD
---------- Footnotes ----------
(1) If you use Bash as your shell, you should execute the command
@@ -1487,6 +1606,8 @@ We recommend putting this command into your personal startup file.
the program text, double quotes are needed here in order to put the
single quote into the message.
+=======
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac

File: gawk.info, Node: Long, Next: Executable Scripts, Prev: Read Terminal, Up: Running gawk
@@ -1499,9 +1620,15 @@ tell 'awk' to use that file for its program, you type:
awk -f SOURCE-FILE INPUT-FILE1 INPUT-FILE2 ...
+<<<<<<< HEAD
The '-f' instructs the 'awk' utility to get the 'awk' program from
the file SOURCE-FILE. Any file name can be used for SOURCE-FILE. For
example, you could put the program:
+=======
+ The `-f' instructs the `awk' utility to get the `awk' program from
+the file SOURCE-FILE (*note Options::). Any file name can be used for
+SOURCE-FILE. For example, you could put the program:
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
BEGIN { print "Don't Panic!" }
@@ -1539,9 +1666,15 @@ like this:
BEGIN { print "Don't Panic!" }
+<<<<<<< HEAD
After making this file executable (with the 'chmod' utility), simply
type 'advice' at the shell and the system arranges to run 'awk'(2) as if
you had typed 'awk -f advice':
+=======
+After making this file executable (with the `chmod' utility), simply
+type `advice' at the shell and the system arranges to run `awk' as if
+you had typed `awk -f advice':
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
$ chmod +x advice
$ advice
@@ -1555,7 +1688,28 @@ at the shell.)
program that users can invoke without their having to know that the
program is written in 'awk'.
+<<<<<<< HEAD
Portability Issues with '#!'
+=======
+ Understanding `#!'
+
+ `awk' is an "interpreted" language. This means that the `awk'
+utility reads your program and then processes your data according to
+the instructions in your program. (This is different from a "compiled"
+language such as C, where your program is first compiled into machine
+code that is executed directly by your system's hardware.) The `awk'
+utility is thus termed an "interpreter". Many modern languages are
+interperted.
+
+ The line beginning with `#!' lists the full file name of an
+interpreter to run and a single optional initial command-line argument
+to pass to that interpreter. The operating system then runs the
+interpreter with the given argument and the full argument list of the
+executed program. The first argument in the list is the full file name
+of the `awk' program. The rest of the argument list contains either
+options to `awk', or data files, or both. Note that on many systems
+`awk' may be found in `/usr/bin' instead of in `/bin'. Caveat Emptor.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
Some systems limit the length of the interpreter name to 32
characters. Often, this can be dealt with by using a symbolic link.
@@ -1577,6 +1731,7 @@ the name of your script ('advice'). (d.c.) Don't rely on the value of
(1) The '#!' mechanism works on GNU/Linux systems, BSD-based systems
and commercial Unix systems.
+<<<<<<< HEAD
(2) The line beginning with '#!' lists the full file name of an
interpreter to run and an optional initial command-line argument to pass
to that interpreter. The operating system then runs the interpreter
@@ -1586,6 +1741,8 @@ program. The first argument in the list is the full file name of the
'awk', or data files, or both. Note that on many systems 'awk' may be
found in '/usr/bin' instead of in '/bin'. Caveat Emptor.
+=======
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac

File: gawk.info, Node: Comments, Next: Quoting, Prev: Executable Scripts, Up: Running gawk
@@ -1713,9 +1870,15 @@ the quoting rules.
Note that the single quote is not special within double quotes.
* Null strings are removed when they occur as part of a non-null
+<<<<<<< HEAD
command-line argument, while explicit non-null objects are kept.
For example, to specify that the field separator 'FS' should be set
to the null string, use:
+=======
+ command-line argument, while explicit null objects are kept. For
+ example, to specify that the field separator `FS' should be set to
+ the null string, use:
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
awk -F "" 'PROGRAM' FILES # correct
@@ -1808,10 +1971,17 @@ one "record".
In the data file 'mail-list', each record contains the name of a
person, his/her phone number, his/her email-address, and a code for
+<<<<<<< HEAD
their relationship with the author of the list. An 'A' in the last
column means that the person is an acquaintance. An 'F' in the last
column means that the person is a friend. An 'R' means that the person
is a relative:
+=======
+their relationship with the author of the list. The columns are
+aligned using spaces. An `A' in the last column means that the person
+is an acquaintance. An `F' in the last column means that the person is
+a friend. An `R' means that the person is a relative:
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
Amelia 555-5553 amelia.zodiacusque@gmail.com F
Anthony 555-3412 anthony.asserturo@hotmail.com A
@@ -1830,7 +2000,8 @@ shipments during the year. Each record contains the month, the number
of green crates shipped, the number of red boxes shipped, the number of
orange bags shipped, and the number of blue packages shipped,
respectively. There are 16 entries, covering the 12 months of last year
-and the first four months of the current year.
+and the first four months of the current year. An empty line separates
+the data for the two years.
Jan 13 25 15 115
Feb 15 32 24 226
@@ -1910,11 +2081,6 @@ often more than one way to do things in 'awk'. At some point, you may
want to look back at these examples and see if you can come up with
different ways to do the same things shown here:
- * Print the length of the longest input line:
-
- awk '{ if (length($0) > max) max = length($0) }
- END { print max }' data
-
* Print every line that is longer than 80 characters:
awk 'length($0) > 80' data
@@ -1922,14 +2088,33 @@ different ways to do the same things shown here:
The sole rule has a relational expression as its pattern and it has
no action--so it uses the default action, printing the record.
+<<<<<<< HEAD
* Print the length of the longest line in 'data':
+=======
+ * Print the length of the longest input line:
- expand data | awk '{ if (x < length()) x = length() }
+ awk '{ if (length($0) > max) max = length($0) }
+ END { print max }' data
+
+ The code associated with `END' executes after all input has been
+ read; it's the other side of the coin to `BEGIN'.
+
+ * Print the length of the longest line in `data':
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
+
+ expand data | awk '{ if (x < length($0)) x = length($0) }
END { print "maximum line length is " x }'
+<<<<<<< HEAD
The input is processed by the 'expand' utility to change TABs into
spaces, so the widths compared are actually the right-margin
columns.
+=======
+ This example differs slightly from the previous one: The input is
+ processed by the `expand' utility to change TABs into spaces, so
+ the widths compared are actually the right-margin columns, as
+ opposed to the number of input characters on each line.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* Print every line that has at least one field:
@@ -2026,8 +2211,8 @@ summarize, select, and rearrange the output of another utility. It uses
features that haven't been covered yet, so don't worry if you don't
understand all the details:
- LC_ALL=C ls -l | awk '$6 == "Nov" { sum += $5 }
- END { print sum }'
+ ls -l | awk '$6 == "Nov" { sum += $5 }
+ END { print sum }'
This command prints the total number of bytes in all the files in the
current directory that were last modified in November (of any year).
@@ -2204,9 +2389,15 @@ built-in functions for working with timestamps, performing bit
manipulation, for runtime string translation (internationalization),
determining the type of a variable, and array sorting.
+<<<<<<< HEAD
As we develop our presentation of the 'awk' language, we introduce
most of the variables and many of the functions. They are described
systematically in *note Built-in Variables::, and *note Built-in::.
+=======
+ As we develop our presentation of the `awk' language, we introduce
+most of the variables and many of the functions. They are described
+systematically in *note Built-in Variables::, and in *note Built-in::.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac

File: gawk.info, Node: When, Next: Intro Summary, Prev: Other Features, Up: Getting Started
@@ -2232,14 +2423,20 @@ edit-compile-test-debug cycle of software development.
Complex programs have been written in 'awk', including a complete
retargetable assembler for eight-bit microprocessors (*note Glossary::,
for more information), and a microcode assembler for a special-purpose
+<<<<<<< HEAD
Prolog computer. While the original 'awk''s capabilities were strained
by tasks of such complexity, modern versions are more capable. Even
Brian Kernighan's version of 'awk' has fewer predefined limits, and
those that it has are much larger than they used to be.
+=======
+Prolog computer. While the original `awk''s capabilities were strained
+by tasks of such complexity, modern versions are more capable.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
If you find yourself writing 'awk' scripts of more than, say, a few
hundred lines, you might consider using a different programming
language. The shell is good at string and pattern matching; in
+<<<<<<< HEAD
addition, it allows powerful use of the system utilities. More
conventional languages, such as C, C++, and Java, offer better
facilities for system programming and for managing the complexity of
@@ -2248,6 +2445,15 @@ programming and access to system facilities. Programs in these
languages may require more lines of source code than the equivalent
'awk' programs, but they are easier to maintain and usually run more
efficiently.
+=======
+addition, it allows powerful use of the system utilities. Python
+offers a nice balance between high-level ease of programming and access
+to system facilities.(1)
+
+ ---------- Footnotes ----------
+
+ (1) Other popular scripting languages include Ruby and Perl.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac

File: gawk.info, Node: Intro Summary, Prev: When, Up: Getting Started
@@ -2257,8 +2463,16 @@ File: gawk.info, Node: Intro Summary, Prev: When, Up: Getting Started
* Programs in 'awk' consist of PATTERN-ACTION pairs.
+<<<<<<< HEAD
* Use either 'awk 'PROGRAM' FILES' or 'awk -f PROGRAM-FILE FILES' to
run 'awk'.
+=======
+ * An ACTION without a PATTERN always runs. The default ACTION for a
+ pattern without one is `{ print $0 }'.
+
+ * Use either `awk 'PROGRAM' FILES' or `awk -f PROGRAM-FILE FILES' to
+ run `awk'.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* You may use the special '#!' header line to create 'awk' programs
that are directly executable.
@@ -2411,9 +2625,15 @@ The following list describes options mandated by the POSIX standard:
'-c'
'--traditional'
Specify "compatibility mode", in which the GNU extensions to the
+<<<<<<< HEAD
'awk' language are disabled, so that 'gawk' behaves just like Brian
Kernighan's version 'awk'. *Note POSIX/GNU::, which summarizes the
extensions. Also see *note Compatibility Mode::.
+=======
+ `awk' language are disabled, so that `gawk' behaves just like BWK
+ `awk'. *Note POSIX/GNU::, which summarizes the extensions. Also
+ see *note Compatibility Mode::.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
'-C'
'--copyright'
@@ -2463,10 +2683,17 @@ The following list describes options mandated by the POSIX standard:
disallowed.
This option is particularly necessary for World Wide Web CGI
+<<<<<<< HEAD
applications that pass arguments through the URL; using this option
prevents a malicious (or other) user from passing in options,
assignments, or 'awk' source code (via '--source') to the CGI
application. This option should be used with '#!' scripts (*note
+=======
+ applications that pass arguments through the URL; using this
+ option prevents a malicious (or other) user from passing in
+ options, assignments, or `awk' source code (via `-e') to the CGI
+ application. This option should be used with `#!' scripts (*note
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
Executable Scripts::), like so:
#! /usr/local/bin/gawk -E
@@ -2483,6 +2710,7 @@ The following list describes options mandated by the POSIX standard:
'-h'
'--help'
Print a "usage" message summarizing the short and long style
+<<<<<<< HEAD
options that 'gawk' accepts and then exit.
'-i' SOURCE-FILE
@@ -2501,6 +2729,27 @@ The following list describes options mandated by the POSIX standard:
'-l' EXT
'--load' EXT
Load a dynamic extension named EXT. Extensions are stored as
+=======
+ options that `gawk' accepts and then exit.
+
+`-i' SOURCE-FILE
+`--include' SOURCE-FILE
+ Read `awk' source library from SOURCE-FILE. This option is
+ completely equivalent to using the `@include' directive inside
+ your program. This option is very similar to the `-f' option, but
+ there are two important differences. First, when `-i' is used,
+ the program source is not loaded if it has been previously loaded,
+ whereas with `-f', `gawk' always loads the file. Second, because
+ this option is intended to be used with code libraries, `gawk'
+ does not recognize such files as constituting main program input.
+ Thus, after processing an `-i' argument, `gawk' still expects to
+ find the main source code via the `-f' option or on the command
+ line.
+
+`-l' EXT
+`--load' EXT
+ Load a dynamic extension named EXT. Extensions are stored as
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
system shared libraries. This option searches for the library
using the 'AWKLIBPATH' environment variable. The correct library
suffix for your platform will be supplied by default, so it need
@@ -2513,8 +2762,13 @@ The following list describes options mandated by the POSIX standard:
'-L'[VALUE]
'--lint'['='VALUE]
Warn about constructs that are dubious or nonportable to other
+<<<<<<< HEAD
'awk' implementations. No space is allowed between the '-D' and
VALUE, if VALUE is supplied. Some warnings are issued when 'gawk'
+=======
+ `awk' implementations. No space is allowed between the `-L' and
+ VALUE, if VALUE is supplied. Some warnings are issued when `gawk'
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
first reads your program. Others are issued at runtime, as your
program executes. With an optional argument of 'fatal', lint
warnings become fatal errors. This may be drastic, but its use
@@ -2590,8 +2844,14 @@ The following list describes options mandated by the POSIX standard:
* Newlines are not allowed after '?' or ':' (*note Conditional
Exp::).
+<<<<<<< HEAD
* Specifying '-Ft' on the command-line does not set the value of
'FS' to be a single TAB character (*note Field Separators::).
+=======
+ * Specifying `-Ft' on the command line does not set the value
+ of `FS' to be a single TAB character (*note Field
+ Separators::).
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* The locale's decimal point character is used for parsing input
data (*note Locales::).
@@ -2651,6 +2911,7 @@ terminate it. (You may also use '-f -' to read program source from the
standard input but then you will not be able to also use the standard
input as a source of data.)
+<<<<<<< HEAD
Because it is clumsy using the standard 'awk' mechanisms to mix
source file and command-line 'awk' programs, 'gawk' provides the
'--source' option. This does not require you to pre-empt the standard
@@ -2662,6 +2923,17 @@ the command line.
If no '-f' or '--source' option is specified, then 'gawk' uses the
first non-option command-line argument as the text of the program source
code.
+=======
+ Because it is clumsy using the standard `awk' mechanisms to mix
+source file and command-line `awk' programs, `gawk' provides the `-e'
+option. This does not require you to pre-empt the standard input for
+your source code; it allows you to easily mix command-line and library
+source code (*note AWKPATH Variable::). As with `-f', the `-e' and `-i'
+options may also be used multiple times on the command line.
+
+ If no `-f' or `-e' option is specified, then `gawk' uses the first
+non-option command-line argument as the text of the program source code.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
If the environment variable 'POSIXLY_CORRECT' exists, then 'gawk'
behaves in strict POSIX mode, exactly as if you had supplied '--posix'.
@@ -2710,6 +2982,10 @@ including variable assignments, are included. As each element of 'ARGV'
is processed, 'gawk' sets the variable 'ARGIND' to the index in 'ARGV'
of the current element.
+ Changing `ARGC' and `ARGV' in your `awk' program lets you control
+how `awk' processes the input files; this is described in more detail
+in *note ARGC and ARGV::.
+
The distinction between file name arguments and variable-assignment
arguments is made when 'awk' is about to open the next input file. At
that point in execution, it checks the file name to see whether it is
@@ -2768,11 +3044,19 @@ SOME_COMMAND, and finally it reads 'file2'.
You may also use '"-"' to name standard input when reading files with
'getline' (*note Getline/File::).
+<<<<<<< HEAD
In addition, 'gawk' allows you to specify the special file name
'/dev/stdin', both on the command line and with 'getline'. Some other
versions of 'awk' also support this, but it is not standard. (Some
operating systems provide a '/dev/stdin' file in the file system;
however, 'gawk' always processes this file name itself.)
+=======
+ In addition, `gawk' allows you to specify the special file name
+`/dev/stdin', both on the command line and with `getline'. Some other
+versions of `awk' also support this, but it is not standard. (Some
+operating systems provide a `/dev/stdin' file in the filesystem;
+however, `gawk' always processes this file name itself.)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac

File: gawk.info, Node: Environment Variables, Next: Exit Status, Prev: Naming Standard Input, Up: Invoking Gawk
@@ -2796,8 +3080,13 @@ File: gawk.info, Node: AWKPATH Variable, Next: AWKLIBPATH Variable, Up: Envir
2.5.1 The 'AWKPATH' Environment Variable
----------------------------------------
+<<<<<<< HEAD
The previous minor node described how 'awk' program files can be named
on the command-line with the '-f' option. In most 'awk'
+=======
+The previous minor node described how `awk' program files can be named
+on the command line with the `-f' option. In most `awk'
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
implementations, you must supply a precise path name for each program
file, unless the file is in the current directory. But in 'gawk', if
the file name supplied to the '-f' or '-i' options does not contain a
@@ -2816,11 +3105,19 @@ a standard directory in the default path and then specified on the
command line with a short file name. Otherwise, the full file name
would have to be typed for each file.
+<<<<<<< HEAD
By using the '-i' option, or the '--source' and '-f' options, your
command-line 'awk' programs can use facilities in 'awk' library files
(*note Library Functions::). Path searching is not done if 'gawk' is in
compatibility mode. This is true for both '--traditional' and
'--posix'. *Note Options::.
+=======
+ By using the `-i' option, or the `-e' and `-f' options, your
+command-line `awk' programs can use facilities in `awk' library files
+(*note Library Functions::). Path searching is not done if `gawk' is
+in compatibility mode. This is true for both `--traditional' and
+`--posix'. *Note Options::.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
If the source code is not found after the initial search, the path is
searched again after adding the default '.awk' suffix to the file name.
@@ -2881,8 +3178,13 @@ A number of other environment variables affect 'gawk''s behavior, but
they are more specialized. Those in the following list are meant to be
used by regular users.
+<<<<<<< HEAD
'POSIXLY_CORRECT'
Causes 'gawk' to switch POSIX compatibility mode, disabling all
+=======
+`POSIXLY_CORRECT'
+ Causes `gawk' to switch to POSIX compatibility mode, disabling all
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
traditional and GNU extensions. *Note Options::.
'GAWK_SOCK_RETRIES'
@@ -2900,6 +3202,7 @@ used by regular users.
before returning with an error. *Note Read Timeout::.
The environment variables in the following list are meant for use by
+<<<<<<< HEAD
the 'gawk' developers for testing and tuning. They are subject to
change. The variables are:
@@ -2914,6 +3217,22 @@ change. The variables are:
'AWK_HASH'
If this variable exists with a value of 'gst', 'gawk' switches to
+=======
+the `gawk' developers for testing and tuning. They are subject to
+change. The variables are:
+
+`AWKBUFSIZE'
+ This variable only affects `gawk' on POSIX-compliant systems.
+ With a value of `exact', `gawk' uses the size of each input file
+ as the size of the memory buffer to allocate for I/O. Otherwise,
+ the value should be a number, and `gawk' uses that number as the
+ size of the buffer to allocate. (When this variable is not set,
+ `gawk' uses the smaller of the file's size and the "default"
+ blocksize, which is usually the filesystems I/O blocksize.)
+
+`AWK_HASH'
+ If this variable exists with a value of `gst', `gawk' switches to
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
using the hash function from GNU Smalltalk for managing arrays.
This function may be marginally faster than the standard function.
@@ -3139,9 +3458,15 @@ File: gawk.info, Node: Invoking Summary, Prev: Undocumented, Up: Invoking Gaw
* Use either 'awk 'PROGRAM' FILES' or 'awk -f PROGRAM-FILE FILES' to
run 'awk'.
+<<<<<<< HEAD
* The three standard 'awk' options are '-f', '-F' and '-v'. 'gawk'
supplies these and many others, as well as corresponding GNU-style
long options.
+=======
+ * The three standard options for all versions of `awk' are `-f',
+ `-F' and `-v'. `gawk' supplies these and many others, as well as
+ corresponding GNU-style long options.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* Non-option command-line arguments are usually treated as file
names, unless they have the form 'VAR=VALUE', in which case they
@@ -3184,22 +3509,36 @@ strings. Because regular expressions are such a fundamental part of
A regular expression enclosed in slashes ('/') is an 'awk' pattern
that matches every input record whose text belongs to that set. The
simplest regular expression is a sequence of letters, numbers, or both.
+<<<<<<< HEAD
Such a regexp matches any string that contains that sequence. Thus, the
regexp 'foo' matches any string containing 'foo'. Therefore, the
pattern '/foo/' matches any input record containing the three characters
'foo' _anywhere_ in the record. Other kinds of regexps let you specify
more complicated classes of strings.
+=======
+Such a regexp matches any string that contains that sequence. Thus,
+the regexp `foo' matches any string containing `foo'. Therefore, the
+pattern `/foo/' matches any input record containing the three adjacent
+characters `foo' _anywhere_ in the record. Other kinds of regexps let
+you specify more complicated classes of strings.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* Menu:
* Regexp Usage:: How to Use Regular Expressions.
* Escape Sequences:: How to write nonprinting characters.
* Regexp Operators:: Regular Expression Operators.
+<<<<<<< HEAD
* Bracket Expressions:: What can go between '[...]'.
* GNU Regexp Operators:: Operators specific to GNU software.
* Case-sensitivity:: How to do case-insensitive matching.
+=======
+* Bracket Expressions:: What can go between `[...]'.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* Leftmost Longest:: How much text matches.
* Computed Regexps:: Using Dynamic Regexps.
+* GNU Regexp Operators:: Operators specific to GNU software.
+* Case-sensitivity:: How to do case-insensitive matching.
* Regexp Summary:: Regular expressions summary.

@@ -3326,15 +3665,30 @@ apply to both string constants and regexp constants:
'\xHH...'
The hexadecimal value HH, where HH stands for a sequence of
+<<<<<<< HEAD
hexadecimal digits ('0'-'9', and either 'A'-'F' or 'a'-'f'). Like
the same construct in ISO C, the escape sequence continues until
the first nonhexadecimal digit is seen. (c.e.) However, using
more than two hexadecimal digits produces undefined results. (The
'\x' escape sequence is not allowed in POSIX 'awk'.)
+=======
+ hexadecimal digits (`0'-`9', and either `A'-`F' or `a'-`f'). A
+ maximum of two digts are allowed after the `\x'. Any further
+ hexadecimal digits are treated as simple letters or numbers.
+ (c.e.)
+
+ CAUTION: In ISO C, the escape sequence continues until the
+ first nonhexadecimal digit is seen. For many years, `gawk'
+ would continue incorporating hexadecimal digits into the
+ value until a non-hexadecimal digit or the end of the string
+ was encountered. However, using more than two hexadecimal
+ digits produces
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
'\/'
A literal slash (necessary for regexp constants only). This
sequence is used when you want to write a regexp constant that
+<<<<<<< HEAD
contains a slash. Because the regexp is delimited by slashes, you
need to escape the slash that is part of the pattern, in order to
tell 'awk' to keep processing the rest of the regexp.
@@ -3345,6 +3699,21 @@ apply to both string constants and regexp constants:
contains a double quote. Because the string is delimited by double
quotes, you need to escape the quote that is part of the string, in
order to tell 'awk' to keep processing the rest of the string.
+=======
+ contains a slash (such as `/.*:\/home\/[[:alnum:]]+:.*/'; the
+ `[[:alnum:]]' notation is discussed shortly, in *note Bracket
+ Expressions::). Because the regexp is delimited by slashes, you
+ need to escape any slash that is part of the pattern, in order to
+ tell `awk' to keep processing the rest of the regexp.
+
+`\"'
+ A literal double quote (necessary for string constants only).
+ This sequence is used when you want to write a string constant
+ that contains a double quote (such as `"He said \"hi!\" to her."').
+ Because the string is delimited by double quotes, you need to
+ escape any quote that is part of the string, in order to tell
+ `awk' to keep processing the rest of the string.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
In 'gawk', a number of additional two-character sequences that begin
with a backslash have special meaning in regexps. *Note GNU Regexp
@@ -3379,12 +3748,21 @@ is not one of the characters previously listed, POSIX 'awk' purposely
leaves what happens as undefined. There are two choices:
Strip the backslash out
+<<<<<<< HEAD
This is what Brian Kernighan's 'awk' and 'gawk' both do. For
example, '"a\qc"' is the same as '"aqc"'. (Because this is such an
easy bug both to introduce and to miss, 'gawk' warns you about it.)
Consider 'FS = "[ \t]+\|[ \t]+"' to use vertical bars surrounded by
whitespace as the field separator. There should be two backslashes
in the string: 'FS = "[ \t]+\\|[ \t]+"'.)
+=======
+ This is what BWK `awk' and `gawk' both do. For example, `"a\qc"'
+ is the same as `"aqc"'. (Because this is such an easy bug both to
+ introduce and to miss, `gawk' warns you about it.) Consider `FS =
+ "[ \t]+\|[ \t]+"' to use vertical bars surrounded by whitespace as
+ the field separator. There should be two backslashes in the
+ string: `FS = "[ \t]+\\|[ \t]+"'.)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
Leave the backslash alone
Some other 'awk' implementations do this. In such implementations,
@@ -3475,10 +3853,18 @@ sequences and that are not listed in the table stand for themselves:
'|'
This is the "alternation operator" and it is used to specify
+<<<<<<< HEAD
alternatives. The '|' has the lowest precedence of all the regular
expression operators. For example, '^P|[[:digit:]]' matches any
string that matches either '^P' or '[[:digit:]]'. This means it
matches any string that starts with 'P' or contains a digit.
+=======
+ alternatives. The `|' has the lowest precedence of all the regular
+ expression operators. For example, `^P|[aeiouy]' matches any
+ string that matches either `^P' or `[aeiouy]'. This means it
+ matches any string that starts with `P' or contains (anywhere
+ within it) a lowercase English vowel.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
The alternation applies to the largest possible regexps on either
side.
@@ -3494,6 +3880,7 @@ sequences and that are not listed in the table stand for themselves:
'*'
This symbol means that the preceding regular expression should be
repeated as many times as necessary to find a match. For example,
+<<<<<<< HEAD
'ph*' applies the '*' symbol to the preceding 'h' and looks for
matches of one 'p' followed by any number of 'h's. This also
matches just 'p' if no 'h's are present.
@@ -3514,6 +3901,26 @@ sequences and that are not listed in the table stand for themselves:
example:
awk '/\(c[ad]+r x\)/ { print }' sample
+=======
+ `ph*' applies the `*' symbol to the preceding `h' and looks for
+ matches of one `p' followed by any number of `h's. This also
+ matches just `p' if no `h's are present.
+
+ There are two subtle points to understand about how `*' works.
+ First, the `*' applies only to the single preceding regular
+ expression component (e.g., in `ph*', it applies just to the `h').
+ To cause `*' to apply to a larger sub-expression, use parentheses:
+ `(ph)*' matches `ph', `phph', `phphph' and so on.
+
+ Second, `*' finds as many repetititons as possible. If the text to
+ be matched is `phhhhhhhhhhhhhhooey', `ph*' matches all of the `h's.
+
+`+'
+ This symbol is similar to `*', except that the preceding
+ expression must be matched at least once. This means that `wh+y'
+ would match `why' and `whhy', but not `wy', whereas `wh*y' would
+ match all three.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
'?'
This symbol is similar to '*', except that the preceding expression
@@ -3582,7 +3989,7 @@ as either a "character set", a "character class", or a "character list".
regexp operator or function.

-File: gawk.info, Node: Bracket Expressions, Next: GNU Regexp Operators, Prev: Regexp Operators, Up: Regexp
+File: gawk.info, Node: Bracket Expressions, Next: Leftmost Longest, Prev: Regexp Operators, Up: Regexp
3.4 Using Bracket Expressions
=============================
@@ -3603,7 +4010,13 @@ expression, put a '\' in front of it. For example:
[d\]]
+<<<<<<< HEAD
matches either 'd' or ']'.
+=======
+matches either `d' or `]'. Additionally, if you place `]' right after
+the opening `[', the closing bracket is treated as one of the
+characters to be matched.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
This treatment of '\' in bracket expressions is compatible with other
'awk' implementations and is also mandated by POSIX. The regular
@@ -3684,9 +4097,122 @@ Equivalence classes
classes.

-File: gawk.info, Node: GNU Regexp Operators, Next: Case-sensitivity, Prev: Bracket Expressions, Up: Regexp
+File: gawk.info, Node: Leftmost Longest, Next: Computed Regexps, Prev: Bracket Expressions, Up: Regexp
+
+3.5 How Much Text Matches?
+==========================
+
+Consider the following:
+
+ echo aaaabcd | awk '{ sub(/a+/, "<A>"); print }'
+
+ This example uses the `sub()' function (which we haven't discussed
+yet; *note String Functions::) to make a change to the input record.
+Here, the regexp `/a+/' indicates "one or more `a' characters," and the
+replacement text is `<A>'.
+
+ The input contains four `a' characters. `awk' (and POSIX) regular
+expressions always match the leftmost, _longest_ sequence of input
+characters that can match. Thus, all four `a' characters are replaced
+with `<A>' in this example:
+
+ $ echo aaaabcd | awk '{ sub(/a+/, "<A>"); print }'
+ -| <A>bcd
+
+ For simple match/no-match tests, this is not so important. But when
+doing text matching and substitutions with the `match()', `sub()',
+`gsub()', and `gensub()' functions, it is very important. *Note String
+Functions::, for more information on these functions. Understanding
+this principle is also important for regexp-based record and field
+splitting (*note Records::, and also *note Field Separators::).
+
+
+File: gawk.info, Node: Computed Regexps, Next: GNU Regexp Operators, Prev: Leftmost Longest, Up: Regexp
+
+3.6 Using Dynamic Regexps
+=========================
+
+The righthand side of a `~' or `!~' operator need not be a regexp
+constant (i.e., a string of characters between slashes). It may be any
+expression. The expression is evaluated and converted to a string if
+necessary; the contents of the string are then used as the regexp. A
+regexp computed in this way is called a "dynamic regexp" or a "computed
+regexp":
+
+ BEGIN { digits_regexp = "[[:digit:]]+" }
+ $0 ~ digits_regexp { print }
+
+This sets `digits_regexp' to a regexp that describes one or more digits,
+and tests whether the input record matches this regexp.
+
+ NOTE: When using the `~' and `!~' operators, there is a difference
+ between a regexp constant enclosed in slashes and a string
+ constant enclosed in double quotes. If you are going to use a
+ string constant, you have to understand that the string is, in
+ essence, scanned _twice_: the first time when `awk' reads your
+ program, and the second time when it goes to match the string on
+ the lefthand side of the operator with the pattern on the right.
+ This is true of any string-valued expression (such as
+ `digits_regexp', shown previously), not just string constants.
+
+ What difference does it make if the string is scanned twice? The
+answer has to do with escape sequences, and particularly with
+backslashes. To get a backslash into a regular expression inside a
+string, you have to type two backslashes.
+
+ For example, `/\*/' is a regexp constant for a literal `*'. Only
+one backslash is needed. To do the same thing with a string, you have
+to type `"\\*"'. The first backslash escapes the second one so that
+the string actually contains the two characters `\' and `*'.
+
+ Given that you can use both regexp and string constants to describe
+regular expressions, which should you use? The answer is "regexp
+constants," for several reasons:
+
+ * String constants are more complicated to write and more difficult
+ to read. Using regexp constants makes your programs less
+ error-prone. Not understanding the difference between the two
+ kinds of constants is a common source of errors.
+
+ * It is more efficient to use regexp constants. `awk' can note that
+ you have supplied a regexp and store it internally in a form that
+ makes pattern matching more efficient. When using a string
+ constant, `awk' must first convert the string into this internal
+ form and then perform the pattern matching.
+
+ * Using regexp constants is better form; it shows clearly that you
+ intend a regexp match.
+
+ Using `\n' in Bracket Expressions of Dynamic Regexps
+
+ Some versions of `awk' do not allow the newline character to be used
+inside a bracket expression for a dynamic regexp:
+
+ $ awk '$0 ~ "[ \t\n]"'
+ error--> awk: newline in character class [
+ error--> ]...
+ error--> source line number 1
+ error--> context is
+ error--> >>> <<<
+
+ But a newline in a regexp constant works with no problem:
+
+ $ awk '$0 ~ /[ \t\n]/'
+ here is a sample line
+ -| here is a sample line
+ Ctrl-d
+
+ `gawk' does not have this problem, and it isn't likely to occur
+often in practice, but it's worth noting for future reference.
+
+
+File: gawk.info, Node: GNU Regexp Operators, Next: Case-sensitivity, Prev: Computed Regexps, Up: Regexp
+<<<<<<< HEAD
3.5 'gawk'-Specific Regexp Operators
+=======
+3.7 `gawk'-Specific Regexp Operators
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
====================================
GNU software that deals with regular expressions provides a number of
@@ -3766,6 +4292,7 @@ No options
(e.g., '\w' matches a literal 'w'). Interval expressions are
allowed.
+<<<<<<< HEAD
'--traditional'
Traditional Unix 'awk' regexps are matched. The GNU operators are
not special, and interval expressions are not available. The POSIX
@@ -3773,6 +4300,15 @@ No options
Kernighan's 'awk' does support them. Characters described by octal
and hexadecimal escape sequences are treated literally, even if
they represent regexp metacharacters.
+=======
+`--traditional'
+ Traditional Unix `awk' regexps are matched. The GNU operators are
+ not special, and interval expressions are not available. The
+ POSIX character classes (`[[:alnum:]]', etc.) are supported, as
+ BWK `awk' does support them. Characters described by octal and
+ hexadecimal escape sequences are treated literally, even if they
+ represent regexp metacharacters.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
'--re-interval'
Allow interval expressions in regexps, if '--traditional' has been
@@ -3780,9 +4316,9 @@ No options
default.

-File: gawk.info, Node: Case-sensitivity, Next: Leftmost Longest, Prev: GNU Regexp Operators, Up: Regexp
+File: gawk.info, Node: Case-sensitivity, Next: Regexp Summary, Prev: GNU Regexp Operators, Up: Regexp
-3.6 Case Sensitivity in Matching
+3.8 Case Sensitivity in Matching
================================
Case is normally significant in regular expressions, both when matching
@@ -3855,6 +4391,7 @@ and we don't recommend it.
that 'gawk' does the right thing.

+<<<<<<< HEAD
File: gawk.info, Node: Leftmost Longest, Next: Computed Regexps, Prev: Case-sensitivity, Up: Regexp
3.7 How Much Text Matches?
@@ -3965,6 +4502,9 @@ in practice, but it's worth noting for future reference.

File: gawk.info, Node: Regexp Summary, Prev: Computed Regexps, Up: Regexp
+=======
+File: gawk.info, Node: Regexp Summary, Prev: Case-sensitivity, Up: Regexp
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
3.9 Summary
===========
@@ -3973,7 +4513,7 @@ File: gawk.info, Node: Regexp Summary, Prev: Computed Regexps, Up: Regexp
'awk', regular expression constants are written enclosed between
slashes: '/'...'/'.
- * Regexp constants may be used by standalone in patterns and in
+ * Regexp constants may be used standalone in patterns and in
conditional expressions, or as part of matching expressions using
the '~' and '!~' operators.
@@ -3997,8 +4537,8 @@ File: gawk.info, Node: Regexp Summary, Prev: Computed Regexps, Up: Regexp
extent of the match, such as for text substitution and when the
record separator is a regexp.
- * Matching expressions may use dynamic regexps; that is string values
- treated as regular expressions.
+ * Matching expressions may use dynamic regexps, that is, string
+ values treated as regular expressions.

File: gawk.info, Node: Reading Files, Next: Printing, Prev: Regexp, Up: Top
@@ -4038,7 +4578,7 @@ to be named on the 'awk' command line (*note Getline::).
* Getline:: Reading files under explicit program control
using the 'getline' function.
* Read Timeout:: Reading input with a timeout.
-* Command line directories:: What happens if you put a directory on the
+* Command-line directories:: What happens if you put a directory on the
command line.
* Input Summary:: Input summary.
* Input Exercises:: Exercises.
@@ -4049,6 +4589,7 @@ File: gawk.info, Node: Records, Next: Fields, Up: Reading Files
4.1 How Input Is Split into Records
===================================
+<<<<<<< HEAD
The 'awk' utility divides the input for your 'awk' program into records
and fields. 'awk' keeps track of the number of records that have been
read so far from the current input file. This value is stored in a
@@ -4056,6 +4597,15 @@ built-in variable called 'FNR'. It is reset to zero when a new file is
started. Another built-in variable, 'NR', records the total number of
input records read so far from all data files. It starts at zero, but
is never automatically reset to zero.
+=======
+`awk' divides the input for your program into records and fields. It
+keeps track of the number of records that have been read so far from
+the current input file. This value is stored in a built-in variable
+called `FNR' which is reset to zero when a new file is started.
+Another built-in variable, `NR', records the total number of input
+records read so far from all data files. It starts at zero, but is
+never automatically reset to zero.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* Menu:
@@ -4219,12 +4769,13 @@ trailing whitespace:
$ echo record 1 AAAA record 2 BBBB record 3 |
> gawk 'BEGIN { RS = "\n|( *[[:upper:]]+ *)" }
- > { print "Record =", $0, "and RT =", RT }'
- -| Record = record 1 and RT = AAAA
- -| Record = record 2 and RT = BBBB
- -| Record = record 3 and RT =
- -|
+ > { print "Record =", $0,"and RT = [" RT "]" }'
+ -| Record = record 1 and RT = [ AAAA ]
+ -| Record = record 2 and RT = [ BBBB ]
+ -| Record = record 3 and RT = [
+ -| ]
+<<<<<<< HEAD
The final line of output has an extra blank line. This is because the
value of 'RT' is a newline, and the 'print' statement supplies its own
terminating newline. *Note Simple Sed::, for a more useful example of
@@ -4248,6 +4799,31 @@ never happen.
The use of 'RS' as a regular expression and the 'RT' variable are
'gawk' extensions; they are not available in compatibility mode (*note
+=======
+The square brackets delineate the contents of `RT', letting you see the
+leading and trailing whitespace. The final value of `RT' `RT' is a
+newline. *Note Simple Sed::, for a more useful example of `RS' as a
+regexp and `RT'.
+
+ If you set `RS' to a regular expression that allows optional
+trailing text, such as `RS = "abc(XYZ)?"' it is possible, due to
+implementation constraints, that `gawk' may match the leading part of
+the regular expression, but not the trailing part, particularly if the
+input text that could match the trailing part is fairly long. `gawk'
+attempts to avoid this problem, but currently, there's no guarantee
+that this will never happen.
+
+ NOTE: Remember that in `awk', the `^' and `$' anchor
+ metacharacters match the beginning and end of a _string_, and not
+ the beginning and end of a _line_. As a result, something like
+ `RS = "^[[:upper:]]"' can only match at the beginning of a file.
+ This is because `gawk' views the input file as one long string
+ that happens to contain newline characters in it. It is thus best
+ to avoid anchor characters in the value of `RS'.
+
+ The use of `RS' as a regular expression and the `RT' variable are
+`gawk' extensions; they are not available in compatibility mode (*note
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
Options::). In compatibility mode, only the first character of the
value of 'RS' is used to determine the end of the record.
@@ -4567,7 +5143,11 @@ File: gawk.info, Node: Field Separators, Next: Constant Size, Prev: Changing
* Default Field Splitting:: How fields are normally separated.
* Regexp Field Splitting:: Using regexps as the field separator.
* Single Character Fields:: Making each character a separate field.
+<<<<<<< HEAD
* Command Line Field Separator:: Setting 'FS' from the command-line.
+=======
+* Command Line Field Separator:: Setting `FS' from the command line.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* Full Line Fields:: Making the full line be a single field.
* Field Splitting Summary:: Some final points and a summary table.
@@ -4708,9 +5288,14 @@ field separator a new string? It turns out that different 'awk'
versions answer this question differently, and you should not rely on
any specific behavior in your programs. (d.c.)
+<<<<<<< HEAD
As a point of information, Brian Kernighan's 'awk' allows '^' to
match only at the beginning of the record. 'gawk' also works this way.
For example:
+=======
+ As a point of information, BWK `awk' allows `^' to match only at the
+beginning of the record. `gawk' also works this way. For example:
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
$ echo 'xxAA xxBxx C' |
> gawk -F '(^x+)|( +)' '{ for (i = 1; i <= NF; i++)
@@ -4779,11 +5364,20 @@ Sequences::), finally yielding a single '\' to use for the field
separator.
As a special case, in compatibility mode (*note Options::), if the
+<<<<<<< HEAD
argument to '-F' is 't', then 'FS' is set to the TAB character. If you
type '-F\t' at the shell, without any quotes, the '\' gets deleted, so
'awk' figures that you really want your fields to be separated with TABs
and not 't's. Use '-v FS="t"' or '-F"[t]"' on the command line if you
really do want to separate your fields with 't's.
+=======
+argument to `-F' is `t', then `FS' is set to the TAB character. If you
+type `-F\t' at the shell, without any quotes, the `\' gets deleted, so
+`awk' figures that you really want your fields to be separated with
+TABs and not `t's. Use `-v FS="t"' or `-F"[t]"' on the command line if
+you really do want to separate your fields with `t's. Use `-F '\t''
+when not in compatibility mode to specify that TABs separate fields.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
As an example, let's use an 'awk' program file called 'edu.awk' that
contains the pattern '/edu/' and the action 'print $1':
@@ -4902,8 +5496,13 @@ which usually prints:
root
+<<<<<<< HEAD
on an incorrect implementation of 'awk', while 'gawk' prints something
like:
+=======
+on an incorrect implementation of `awk', while `gawk' prints the full
+first line of the file, something like:
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
root:nSijPlPhZZwgE:0:0:Root:/:
@@ -4982,7 +5581,7 @@ calculated idle time:
BEGIN { FIELDWIDTHS = "9 6 10 6 7 7 35" }
NR > 2 {
idle = $4
- sub(/^ */, "", idle) # strip leading spaces
+ sub(/^ +/, "", idle) # strip leading spaces
if (idle == "")
idle = 0
if (idle ~ /:/) {
@@ -5113,8 +5712,15 @@ would be to remove the quotes when they occur, with something like this:
As with 'FS', the 'IGNORECASE' variable (*note User-modified::)
affects field splitting with 'FPAT'.
+<<<<<<< HEAD
Similar to 'FIELDWIDTHS', the value of 'PROCINFO["FS"]' will be
'"FPAT"' if content-based field splitting is being used.
+=======
+ Assigning a value to `FPAT' overrides field splitting with `FS' and
+with `FIELDWIDTHS'. Similar to `FIELDWIDTHS', the value of
+`PROCINFO["FS"]' will be `"FPAT"' if content-based field splitting is
+being used.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
NOTE: Some programs export CSV data that contains embedded newlines
between the double quotes. 'gawk' provides no way to deal with
@@ -5132,6 +5738,11 @@ the first '+' to '*') allows fields to be empty:
Finally, the 'patsplit()' function makes the same functionality
available for splitting regular strings (*note String Functions::).
+ To recap, `gawk' provides three independent methods to split input
+records into fields. `gawk' uses whichever mechanism was last chosen
+based on which of the three variables--`FS', `FIELDWIDTHS', and
+`FPAT'--was last assigned to.
+
---------- Footnotes ----------
(1) At least, we don't know of one.
@@ -5284,6 +5895,7 @@ File: gawk.info, Node: Getline, Next: Read Timeout, Prev: Multiple Line, Up:
So far we have been getting our input data from 'awk''s main input
stream--either the standard input (usually your keyboard, sometimes the
output from another program) or from the files specified on the command
+<<<<<<< HEAD
line. The 'awk' language has a special built-in command called
'getline' that can be used to read input under your explicit control.
@@ -5299,6 +5911,23 @@ encounters the end of the file. If there is some error in getting a
record, such as a file that cannot be opened, then 'getline' returns -1.
In this case, 'gawk' sets the variable 'ERRNO' to a string describing
the error that occurred.
+=======
+line. The `awk' language has a special built-in command called
+`getline' that can be used to read input under your explicit control.
+
+ The `getline' command is used in several different ways and should
+_not_ be used by beginners. The examples that follow the explanation
+of the `getline' command include material that has not been covered
+yet. Therefore, come back and study the `getline' command _after_ you
+have reviewed the rest of this Info file and have a good knowledge of
+how `awk' works.
+
+ The `getline' command returns 1 if it finds a record and 0 if it
+encounters the end of the file. If there is some error in getting a
+record, such as a file that cannot be opened, then `getline' returns
+-1. In this case, `gawk' sets the variable `ERRNO' to a string
+describing the error that occurred.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
In the following examples, COMMAND stands for a string value that
represents a shell command.
@@ -5334,35 +5963,56 @@ record and split it up into fields. This is useful if you've finished
processing the current record, but want to do some special processing on
the next record _right now_. For example:
+ # Remove text between /* and */, inclusive
{
- if ((t = index($0, "/*")) != 0) {
- # value of `tmp' will be "" if t is 1
- tmp = substr($0, 1, t - 1)
- u = index(substr($0, t + 2), "*/")
- offset = t + 2
- while (u == 0) {
- if (getline <= 0) {
+ if ((i = index($0, "/*")) != 0) {
+ out = substr($0, 1, i - 1) # leading part of the string
+ rest = substr($0, i + 2) # ... */ ...
+ j = index(rest, "*/") # is */ in trailing part?
+ if (j > 0) {
+ rest = substr(rest, j + 2) # remove comment
+ } else {
+ while (j == 0) {
+ # get more text
+ if (getline <= 0) {
m = "unexpected EOF or error"
m = (m ": " ERRNO)
print m > "/dev/stderr"
exit
- }
- u = index($0, "*/")
- offset = 0
- }
- # substr() expression will be "" if */
- # occurred at end of line
- $0 = tmp substr($0, offset + u + 2)
- }
- print $0
+ }
+ # build up the line using string concatenation
+ rest = rest $0
+ j = index(rest, "*/") # is */ in trailing part?
+ if (j != 0) {
+ rest = substr(rest, j + 2)
+ break
+ }
+ }
+ }
+ # build up the output line using string concatenation
+ $0 = out rest
+ }
+ print $0
}
+<<<<<<< HEAD
This 'awk' program deletes C-style comments ('/* ... */') from the
input. By replacing the 'print $0' with other statements, you could
perform more complicated processing on the decommented input, such as
searching for matches of a regular expression. (This program has a
subtle problem--it does not work if one comment ends and another begins
on the same line.)
+=======
+ This `awk' program deletes C-style comments (`/* ... */') from the
+input. It uses a number of features we haven't covered yet, including
+string concatenation (*note Concatenation::) and the `index()' and
+`substr()' built-in functions (*note String Functions::). By replacing
+the `print $0' with other statements, you could perform more
+complicated processing on the decommented input, such as searching for
+matches of a regular expression. (This program has a subtle
+problem--it does not work if one comment ends and another begins on the
+same line.)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
This form of the 'getline' command sets 'NF', 'NR', 'FNR', 'RT', and
the value of '$0'.
@@ -5544,6 +6194,7 @@ Notice that this program ran the command 'who' and printed the previous
result. (If you try this program yourself, you will of course get
different results, depending upon who is logged in on your system.)
+<<<<<<< HEAD
This variation of 'getline' splits the record into fields, sets the
value of 'NF', and recomputes the value of '$0'. The values of 'NR' and
'FNR' are not changed. 'RT' is set.
@@ -5561,6 +6212,26 @@ you want your program to be portable to all 'awk' implementations.
Some versions changed and treated it as '"echo " ("date" |
getline)'. (This is how 'mawk' behaves.) In short, _always_ use
explicit parentheses, and then you won't have to worry.
+=======
+ This variation of `getline' splits the record into fields, sets the
+value of `NF', and recomputes the value of `$0'. The values of `NR'
+and `FNR' are not changed. `RT' is set.
+
+ According to POSIX, `EXPRESSION | getline' is ambiguous if
+EXPRESSION contains unparenthesized operators other than `$'--for
+example, `"echo " "date" | getline' is ambiguous because the
+concatenation operator is not parenthesized. You should write it as
+`("echo " "date") | getline' if you want your program to be portable to
+all `awk' implementations.
+
+ NOTE: Unfortunately, `gawk' has not been consistent in its
+ treatment of a construct like `"echo " "date" | getline'. Most
+ versions, including the current version, treat it at as `("echo "
+ "date") | getline'. (This how BWK `awk' behaves.) Some versions
+ changed and treated it as `"echo " ("date" | getline)'. (This is
+ how `mawk' behaves.) In short, _always_ use explicit parentheses,
+ and then you won't have to worry.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac

File: gawk.info, Node: Getline/Variable/Pipe, Next: Getline/Coprocess, Prev: Getline/Pipe, Up: Getline
@@ -5727,7 +6398,7 @@ VAR
Table 4.1: 'getline' Variants and What They Set

-File: gawk.info, Node: Read Timeout, Next: Command line directories, Prev: Getline, Up: Reading Files
+File: gawk.info, Node: Read Timeout, Next: Command-line directories, Prev: Getline, Up: Reading Files
4.10 Reading Input With A Timeout
=================================
@@ -5820,7 +6491,7 @@ can block indefinitely until some other process opens it for writing.
(1) This assumes that standard input is the keyboard.

-File: gawk.info, Node: Command line directories, Next: Input Summary, Prev: Read Timeout, Up: Reading Files
+File: gawk.info, Node: Command-line directories, Next: Input Summary, Prev: Read Timeout, Up: Reading Files
4.11 Directories On The Command Line
====================================
@@ -5843,7 +6514,7 @@ error.
usable data from an 'awk' program.

-File: gawk.info, Node: Input Summary, Next: Input Exercises, Prev: Command line directories, Up: Reading Files
+File: gawk.info, Node: Input Summary, Next: Input Exercises, Prev: Command-line directories, Up: Reading Files
4.12 Summary
============
@@ -5922,10 +6593,18 @@ File: gawk.info, Node: Input Exercises, Prev: Input Summary, Up: Reading File
including abstentions, for each item.
2. *note Plain Getline::, presented a program to remove C-style
+<<<<<<< HEAD
comments ('/* ... */') from the input. That program does not work
if one comment ends on one line and another one starts later on the
same line. Write a program that does handle multiple comments on
the line.
+=======
+ comments (`/* ... */') from the input. That program does not work
+ if one comment ends on one line and another one starts later on
+ the same line. That can be fixed by making one simple change.
+ What is it?
+
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac

File: gawk.info, Node: Printing, Next: Expressions, Prev: Reading Files, Up: Top
@@ -5962,7 +6641,7 @@ function.
descriptors.
* Close Files And Pipes:: Closing Input and Output Files and Pipes.
* Output Summary:: Output summary.
-* Output exercises:: Exercises.
+* Output Exercises:: Exercises.

File: gawk.info, Node: Print, Next: Print Examples, Up: Printing
@@ -5994,6 +6673,10 @@ you forget to use the double-quote characters, your text is taken as an
'awk' expression, and you will probably get an error. Keep in mind that
a space is printed between any two items.
+ Note that the `print' statement is a statement and not an
+expression--you can't use it the pattern part of a pattern-action
+statement, for example.
+

File: gawk.info, Node: Print Examples, Next: Output Separators, Prev: Print, Up: Printing
@@ -6711,8 +7394,13 @@ rename the files. It then sends the list to the shell for execution.

File: gawk.info, Node: Special Files, Next: Close Files And Pipes, Prev: Redirection, Up: Printing
+<<<<<<< HEAD
5.7 Special File Name in 'gawk'
===============================
+=======
+5.7 Special File Names in `gawk'
+================================
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
'gawk' provides a number of special file names that it interprets
internally. These file names provide access to standard file
@@ -6746,10 +7434,17 @@ message to standard error in an 'awk' program is as follows:
print "Serious error detected!" | "cat 1>&2"
This works by opening a pipeline to a shell command that can access the
+<<<<<<< HEAD
standard error stream that it inherits from the 'awk' process. This is
far from elegant, and it is also inefficient, because it requires a
separate process. So people writing 'awk' programs often don't do this.
Instead, they send the error messages to the screen, like this:
+=======
+standard error stream that it inherits from the `awk' process. This is
+far from elegant, and it also requires a separate process. So people
+writing `awk' programs often don't do this. Instead, they send the
+error messages to the screen, like this:
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
print "Serious error detected!" > "/dev/tty"
@@ -7014,7 +7709,7 @@ call. See the system manual pages for information on how to decode this
value.

-File: gawk.info, Node: Output Summary, Next: Output exercises, Prev: Close Files And Pipes, Up: Printing
+File: gawk.info, Node: Output Summary, Next: Output Exercises, Prev: Close Files And Pipes, Up: Printing
5.9 Summary
===========
@@ -7039,7 +7734,7 @@ File: gawk.info, Node: Output Summary, Next: Output exercises, Prev: Close Fi
communications.

-File: gawk.info, Node: Output exercises, Prev: Output Summary, Up: Printing
+File: gawk.info, Node: Output Exercises, Prev: Output Summary, Up: Printing
5.10 Exercises
==============
@@ -7238,10 +7933,18 @@ File: gawk.info, Node: Regexp Constants, Prev: Nondecimal-numbers, Up: Consta
....................................
A regexp constant is a regular expression description enclosed in
+<<<<<<< HEAD
slashes, such as '/^beginning and end$/'. Most regexps used in 'awk'
programs are constant, but the '~' and '!~' matching operators can also
match computed or dynamic regexps (which are just ordinary strings or
variables that contain a regexp).
+=======
+slashes, such as `/^beginning and end$/'. Most regexps used in `awk'
+programs are constant, but the `~' and `!~' matching operators can also
+match computed or dynamic regexps (which are typically just ordinary
+strings or variables that contain a regexp, but could be a more complex
+expression).
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac

File: gawk.info, Node: Using Constant Regexps, Next: Variables, Prev: Constants, Up: Values
@@ -7313,12 +8016,21 @@ For example:
}
In this example, the programmer wants to pass a regexp constant to
+<<<<<<< HEAD
the user-defined function 'mysub', which in turn passes it on to either
'sub()' or 'gsub()'. However, what really happens is that the 'pat'
parameter is either one or zero, depending upon whether or not '$0'
matches '/hi/'. 'gawk' issues a warning when it sees a regexp constant
used as a parameter to a user-defined function, since passing a truth
value in this way is probably not what was intended.
+=======
+the user-defined function `mysub()', which in turn passes it on to
+either `sub()' or `gsub()'. However, what really happens is that the
+`pat' parameter is either one or zero, depending upon whether or not
+`$0' matches `/hi/'. `gawk' issues a warning when it sees a regexp
+constant used as a parameter to a user-defined function, since passing
+a truth value in this way is probably not what was intended.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac

File: gawk.info, Node: Variables, Next: Conversion, Prev: Using Constant Regexps, Up: Values
@@ -7334,7 +8046,7 @@ on the 'awk' command line.
* Menu:
* Using Variables:: Using variables in your programs.
-* Assignment Options:: Setting variables on the command-line and a
+* Assignment Options:: Setting variables on the command line and a
summary of command-line syntax. This is an
advanced method of input.
@@ -7757,13 +8469,19 @@ it's parsed as follows:
=> -12 (-24)
=> -12-24
- As mentioned earlier, when doing concatenation, _parenthesize_.
-Otherwise, you're never quite sure what you'll get.
+ As mentioned earlier, when mixing concatenation with other
+operators, _parenthesize_. Otherwise, you're never quite sure what
+you'll get.
---------- Footnotes ----------
+<<<<<<< HEAD
(1) It happens that Brian Kernighan's 'awk', 'gawk' and 'mawk' all
"get it right," but you should not rely on this.
+=======
+ (1) It happens that BWK `awk', `gawk' and `mawk' all "get it right,"
+but you should not rely on this.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac

File: gawk.info, Node: Assignment Ops, Next: Increment Ops, Prev: Concatenation, Up: All Operators
@@ -7928,8 +8646,13 @@ A workaround is:
awk '/[=]=/' /dev/null
+<<<<<<< HEAD
'gawk' does not have this problem; Brian Kernighan's 'awk' and 'mawk'
also do not (*note Other Versions::).
+=======
+ `gawk' does not have this problem; BWK `awk' and `mawk' also do not
+(*note Other Versions::).
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac

File: gawk.info, Node: Increment Ops, Prev: Assignment Ops, Up: All Operators
@@ -8094,6 +8817,7 @@ File: gawk.info, Node: Variable Typing, Next: Comparison Operators, Up: Typin
6.3.2.1 String Type Versus Numeric Type
.......................................
+<<<<<<< HEAD
The 1992 POSIX standard introduced the concept of a "numeric string",
which is simply a string that looks like a number--for example, '" +2"'.
This concept is used for determining the type of a variable. The type
@@ -8104,6 +8828,13 @@ determine how they are compared.
quite right for several editions. Fortunately, as of at least the 2008
standard (and possibly earlier), the standard has been fixed, and
variable typing follows these rules:(1)
+=======
+The POSIX standard introduced the concept of a "numeric string", which
+is simply a string that looks like a number--for example, `" +2"'.
+This concept is used for determining the type of a variable. The type
+of the variable is important because the types of two variables
+determine how they are compared. Variable typing follows these rules:
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* A numeric constant or the result of a numeric operation has the
NUMERIC attribute.
@@ -8145,12 +8876,21 @@ operands, according to the following symmetric matrix:
STRNUM | string numeric numeric
-----+-------------------------------
+<<<<<<< HEAD
The basic idea is that user input that looks numeric--and _only_ user
input--should be treated as numeric, even though it is actually made of
characters and is therefore also a string. Thus, for example, the
string constant '" +3.14"', when it appears in program source code, is a
string--even though it looks numeric--and is _never_ treated as number
for comparison purposes.
+=======
+ The basic idea is that user input that looks numeric--and _only_
+user input--should be treated as numeric, even though it is actually
+made of characters and is therefore also a string. Thus, for example,
+the string constant `" +3.14"', when it appears in program source code,
+is a string--even though it looks numeric--and is _never_ treated as a
+number for comparison purposes.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
In short, when one operand is a "pure" string, such as a string
constant, then a string comparison is performed. Otherwise, a numeric
@@ -8181,11 +8921,14 @@ comparison between the two different constants is true, '0' otherwise:
$ echo ' +3.14' | gawk '{ print $1 == 3.14 }' True
-| 1
+<<<<<<< HEAD
---------- Footnotes ----------
(1) 'gawk' has followed these rules for many years, and it is
gratifying that the POSIX standard is also now correct.
+=======
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac

File: gawk.info, Node: Comparison Operators, Next: POSIX String Comparison, Prev: Variable Typing, Up: Typing and Comparison
@@ -8389,9 +9132,15 @@ because of the way they work. Evaluation of the full expression is
"short-circuited" if the result can be determined part way through its
evaluation.
+<<<<<<< HEAD
Statements that use '&&' or '||' can be continued simply by putting a
newline after them. But you cannot put a newline in front of either of
these operators without using backslash continuation (*note
+=======
+ Statements that end with `&&' or `||' can be continued simply by
+putting a newline after them. But you cannot put a newline in front of
+either of these operators without using backslash continuation (*note
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
Statements/Lines::).
The actual value of an expression using the '!' operator is either
@@ -8402,7 +9151,7 @@ following program is one way to print lines in between special
bracketing lines:
$1 == "START" { interested = ! interested; next }
- interested == 1 { print }
+ interested { print }
$1 == "END" { interested = ! interested; next }
The variable 'interested', as with all 'awk' variables, starts out
@@ -8412,8 +9161,21 @@ using '!'. The next rule prints lines as long as 'interested' is true.
When a line is seen whose first field is 'END', 'interested' is toggled
back to false.(1)
+<<<<<<< HEAD
NOTE: The 'next' statement is discussed in *note Next Statement::.
'next' tells 'awk' to skip the rest of the rules, get the next
+=======
+ Most commonly, the `!' operator is used in the conditions of `if'
+and `while' statements, where it often makes more sense to phrase the
+logic in the negative:
+
+ if (! SOME CONDITION || SOME OTHER CONDITION) {
+ ... DO WHATEVER PROCESSING ...
+ }
+
+ NOTE: The `next' statement is discussed in *note Next Statement::.
+ `next' tells `awk' to skip the rest of the rules, get the next
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
record, and start processing the rules over again at the top. The
reason it's there is to avoid printing the bracketing 'START' and
'END' lines.
@@ -8865,7 +9627,7 @@ precisely 'li':
'li'.) Contrast this with the following regular expression match, which
accepts any record with a first field that contains 'li':
- $ awk '$1 ~ /foo/ { print $2 }' mail-list
+ $ awk '$1 ~ /li/ { print $2 }' mail-list
-| 555-5553
-| 555-6699
@@ -9080,11 +9842,18 @@ direction. Traditionally, due largely to implementation issues, '$0'
and 'NF' were _undefined_ inside an 'END' rule. The POSIX standard
specifies that 'NF' is available in an 'END' rule. It contains the
number of fields from the last input record. Most probably due to an
+<<<<<<< HEAD
oversight, the standard does not say that '$0' is also preserved,
although logically one would think that it should be. In fact, 'gawk'
does preserve the value of '$0' for use in 'END' rules. Be aware,
however, that Brian Kernighan's 'awk', and possibly other
implementations, do not.
+=======
+oversight, the standard does not say that `$0' is also preserved,
+although logically one would think that it should be. In fact, `gawk'
+does preserve the value of `$0' for use in `END' rules. Be aware,
+however, that BWK `awk', and possibly other implementations, do not.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
The third point follows from the first two. The meaning of 'print'
inside a 'BEGIN' or 'END' rule is the same as always: 'print $0'. If
@@ -9564,22 +10333,31 @@ match to a given case is made, the case statement bodies execute until a
'break', 'continue', 'next', 'nextfile' or 'exit' is encountered, or the
end of the 'switch' statement itself. For example:
- switch (NR * 2 + 1) {
- case 3:
- case "11":
- print NR - 1
- break
-
- case /2[[:digit:]]+/:
- print NR
-
- default:
- print NR + 1
-
- case -1:
- print NR * -1
+ while ((c = getopt(ARGC, ARGV, "aksx")) != -1) {
+ switch (c) {
+ case "a":
+ # report size of all files
+ all_files = TRUE;
+ break
+ case "k":
+ BLOCK_SIZE = 1024 # 1K block size
+ break
+ case "s":
+ # do sums only
+ sum_only = TRUE
+ break
+ case "x":
+ # don't cross filesystems
+ fts_flags = or(fts_flags, FTS_XDEV)
+ break
+ case "?":
+ default:
+ usage()
+ break
+ }
}
+<<<<<<< HEAD
Note that if none of the statements specified above halt execution of
a matched 'case' statement, execution falls through to the next 'case'
until execution halts. In the above example, for any case value
@@ -9587,6 +10365,14 @@ starting with '2' followed by one or more digits, the 'print' statement
is executed and then falls through into the 'default' section, executing
its 'print' statement. In turn, the -1 case will also be executed since
the 'default' does not halt execution.
+=======
+ Note that if none of the statements specified above halt execution
+of a matched `case' statement, execution falls through to the next
+`case' until execution halts. In the above example, the `case' for
+`"?"' falls through to the `default' case, which is to call a function
+named `usage()'. (The `getopt()' function being called here is
+described in *note Getopt Function::.)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac

File: gawk.info, Node: Break Statement, Next: Continue Statement, Prev: Switch Statement, Up: Statements
@@ -9639,12 +10425,21 @@ Statement::.)
The 'break' statement is also used to break out of the 'switch'
statement. This is discussed in *note Switch Statement::.
+<<<<<<< HEAD
The 'break' statement has no meaning when used outside the body of a
loop or 'switch'. However, although it was never documented, historical
implementations of 'awk' treated the 'break' statement outside of a loop
as if it were a 'next' statement (*note Next Statement::). (d.c.)
Recent versions of Brian Kernighan's 'awk' no longer allow this usage,
nor does 'gawk'.
+=======
+ The `break' statement has no meaning when used outside the body of a
+loop or `switch'. However, although it was never documented,
+historical implementations of `awk' treated the `break' statement
+outside of a loop as if it were a `next' statement (*note Next
+Statement::). (d.c.) Recent versions of BWK `awk' no longer allow
+this usage, nor does `gawk'.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac

File: gawk.info, Node: Continue Statement, Next: Next Statement, Prev: Break Statement, Up: Statements
@@ -9687,6 +10482,7 @@ the previous example with the following 'while' loop:
print ""
}
+<<<<<<< HEAD
This program loops forever once 'x' reaches 5.
The 'continue' statement has no special meaning with respect to the
@@ -9696,6 +10492,18 @@ statement outside a loop the same way they treated a 'break' statement
outside a loop: as if it were a 'next' statement (*note Next
Statement::). (d.c.) Recent versions of Brian Kernighan's 'awk' no
longer work this way, nor does 'gawk'.
+=======
+This program loops forever once `x' reaches 5, since the increment
+(`x++') is never reached.
+
+ The `continue' statement has no special meaning with respect to the
+`switch' statement, nor does it have any meaning when used outside the
+body of a loop. Historical versions of `awk' treated a `continue'
+statement outside a loop the same way they treated a `break' statement
+outside a loop: as if it were a `next' statement (*note Next
+Statement::). (d.c.) Recent versions of BWK `awk' no longer work this
+way, nor does `gawk'.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac

File: gawk.info, Node: Next Statement, Next: Nextfile Statement, Prev: Continue Statement, Up: Statements
@@ -9763,10 +10571,17 @@ statement instructs 'awk' to stop processing the current data file.
Upon execution of the 'nextfile' statement, 'FILENAME' is updated to
the name of the next data file listed on the command line, 'FNR' is
reset to one, and processing starts over with the first rule in the
+<<<<<<< HEAD
program. If the 'nextfile' statement causes the end of the input to be
reached, then the code in any 'END' rules is executed. An exception to
this is when 'nextfile' is invoked during execution of any statement in
an 'END' rule; In this case, it causes the program to stop immediately.
+=======
+program. If the `nextfile' statement causes the end of the input to be
+reached, then the code in any `END' rules is executed. An exception to
+this is when `nextfile' is invoked during execution of any statement in
+an `END' rule; in this case, it causes the program to stop immediately.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
*Note BEGIN/END::.
The 'nextfile' statement is useful when there are many data files to
@@ -9775,10 +10590,18 @@ Without 'nextfile', in order to move on to the next data file, a program
would have to continue scanning the unwanted records. The 'nextfile'
statement accomplishes this much more efficiently.
+<<<<<<< HEAD
In 'gawk', execution of 'nextfile' causes additional things to
happen: any 'ENDFILE' rules are executed except in the case as mentioned
below, 'ARGIND' is incremented, and any 'BEGINFILE' rules are executed.
('ARGIND' hasn't been introduced yet. *Note Built-in Variables::.)
+=======
+ In `gawk', execution of `nextfile' causes additional things to
+happen: any `ENDFILE' rules are executed if `gawk' is not currently in
+an `END' or `BEGINFILE' rule, `ARGIND' is incremented, and any
+`BEGINFILE' rules are executed. (`ARGIND' hasn't been introduced yet.
+*Note Built-in Variables::.)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
With 'gawk', 'nextfile' is useful inside a 'BEGINFILE' rule to skip
over a file that would otherwise cause 'gawk' to exit with a fatal
@@ -9796,12 +10619,21 @@ in 'ARGV'.
standard. See the Austin Group website
(http://austingroupbugs.net/view.php?id=607).
+<<<<<<< HEAD
The current version of the Brian Kernighan's 'awk', and 'mawk' (*note
Other Versions::) also support 'nextfile'. However, they don't allow
the 'nextfile' statement inside function bodies (*note User-defined::).
'gawk' does; a 'nextfile' inside a function body reads the next record
and starts processing it with the first rule in the program, just as any
other 'nextfile' statement.
+=======
+ The current version of BWK `awk', and `mawk' (*note Other
+Versions::) also support `nextfile'. However, they don't allow the
+`nextfile' statement inside function bodies (*note User-defined::).
+`gawk' does; a `nextfile' inside a function body reads the next record
+and starts processing it with the first rule in the program, just as
+any other `nextfile' statement.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac

File: gawk.info, Node: Exit Statement, Prev: Nextfile Statement, Up: Statements
@@ -10172,7 +11004,8 @@ they are not special.
'FUNCTAB #'
An array whose indices and corresponding values are the names of
- all the user-defined or extension functions in the program.
+ all the built-in, user-defined and extension functions in the
+ program.
NOTE: Attempting to use the 'delete' statement with the
'FUNCTAB' array causes a fatal error. Any attempt to assign
@@ -10208,9 +11041,18 @@ they are not special.
'"array"'
The identifier is an array.
+<<<<<<< HEAD
'"extension"'
The identifier is an extension function loaded via
'@load'.
+=======
+ `"builtin"'
+ The identifier is a built-in function.
+
+ `"extension"'
+ The identifier is an extension function loaded via
+ `@load' or `-l'.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
'"scalar"'
The identifier is a scalar.
@@ -10447,8 +11289,21 @@ elements from 'ARGV' (*note Delete::).
All of these actions are typically done in the 'BEGIN' rule, before
actual processing of the input begins. *Note Split Program::, and see
*note Tee Program::, for examples of each way of removing elements from
+<<<<<<< HEAD
'ARGV'. The following fragment processes 'ARGV' in order to examine,
and then remove, command-line options:
+=======
+`ARGV'.
+
+ To actually get options into an `awk' program, end the `awk' options
+with `--' and then supply the `awk' program's options, in the following
+manner:
+
+ awk -f myprog.awk -- -v -q file1 file2 ...
+
+ The following fragment processes `ARGV' in order to examine, and
+then remove, the above command-line options:
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
BEGIN {
for (i = 1; i < ARGC; i++) {
@@ -10466,6 +11321,7 @@ and then remove, command-line options:
}
}
+<<<<<<< HEAD
To actually get the options into the 'awk' program, end the 'awk'
options with '--' and then supply the 'awk' program's options, in the
following manner:
@@ -10479,6 +11335,16 @@ the 'awk' program to deal with. As soon as it sees an unknown option,
recognize. The previous example with 'gawk' would be:
gawk -f myprog -q -v file1 file2 ...
+=======
+ Ending the `awk' options with `--' isn't necessary in `gawk'. Unless
+`--posix' has been specified, `gawk' silently puts any unrecognized
+options into `ARGV' for the `awk' program to deal with. As soon as it
+sees an unknown option, `gawk' stops looking for other options that it
+might otherwise recognize. The previous command line with `gawk' would
+be:
+
+ gawk -f myprog.awk -q -v file1 file2 ...
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
Because '-q' is not a valid 'gawk' option, it and the following '-v' are
passed on to the 'awk' program. (*Note Getopt Function::, for an 'awk'
@@ -10649,7 +11515,8 @@ array element value:
Index 0 Value 8
Index 2 Value ""
-The pairs are shown in jumbled order because their order is irrelevant.
+The pairs are shown in jumbled order because their order is
+irrelevant.(1)
One advantage of associative arrays is that new pairs can be added at
any time. For example, suppose a tenth element is added to the array
@@ -10678,9 +11545,16 @@ from English to French:
Here we decided to translate the number one in both spelled-out and
numeric form--thus illustrating that a single array can have both
numbers and strings as indices. (In fact, array subscripts are always
+<<<<<<< HEAD
strings; this is discussed in more detail in *note Numeric Array
Subscripts::.) Here, the number '1' isn't double-quoted, since 'awk'
automatically converts it to a string.
+=======
+strings. There are some subtleties to how numbers work when used as
+array subscripts; this is discussed in more detail in *note Numeric
+Array Subscripts::.) Here, the number `1' isn't double-quoted, since
+`awk' automatically converts it to a string.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
The value of 'IGNORECASE' has no effect upon array subscripting. The
identical string value used to store an array element must be used to
@@ -10691,6 +11565,11 @@ starting at one. (*Note String Functions::.)
'awk''s arrays are efficient--the time to access an element is
independent of the number of elements in the array.
+ ---------- Footnotes ----------
+
+ (1) The ordering will vary among `awk' implementations, which
+typically use hash tables to store array elements and values.
+

File: gawk.info, Node: Reference to Elements, Next: Assigning Elements, Prev: Array Intro, Up: Array Basics
@@ -10725,8 +11604,14 @@ been assigned any value as well as elements that have been deleted
# Check if "foo" exists in a: Incorrect!
if (a["foo"] != "") ...
+<<<<<<< HEAD
This is incorrect, since this will _create_ 'a["foo"]' if it didn't
exist before!
+=======
+ This is incorrect for two reasons. First, it _creates_ `a["foo"]'
+ if it didn't exist before! Second, it is valid (if a bit unusual)
+ to set an array element equal to the empty string.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
To determine whether an element exists in an array at a certain
index, use the following expression:
@@ -10735,9 +11620,16 @@ index, use the following expression:
This expression tests whether the particular index INDX exists, without
the side effect of creating that element if it is not present. The
+<<<<<<< HEAD
expression has the value one (true) if 'ARRAY[INDX]' exists and zero
(false) if it does not exist. For example, this statement tests whether
the array 'frequencies' contains the index '2':
+=======
+expression has the value one (true) if `ARRAY[INDX]' exists and zero
+(false) if it does not exist. (We use INDX here, since `index' is the
+name of a built-in function.) For example, this statement tests
+whether the array `frequencies' contains the index `2':
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
if (2 in frequencies)
print "Subscript 2 is present."
@@ -10900,7 +11792,11 @@ all 'awk' versions do so. Consider this program, named 'loopcheck.awk':
-| a
-| is
+<<<<<<< HEAD
Contrast this to Brian Kernighan's 'awk':
+=======
+ Contrast this to BWK `awk':
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
$ nawk -f loopcheck.awk
-| loop
@@ -11106,9 +12002,15 @@ a time.
'gawk' extension. As of September, 2012, it was accepted for
inclusion into the POSIX standard. See the Austin Group website
(http://austingroupbugs.net/view.php?id=544). This form of the
+<<<<<<< HEAD
'delete' statement is also supported by Brian Kernighan's 'awk' and
'mawk', as well as by a number of other implementations (*note
Other Versions::).
+=======
+ `delete' statement is also supported by BWK `awk' and `mawk', as
+ well as by a number of other implementations (*note Other
+ Versions::).
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
The following statement provides a portable but nonobvious way to
clear out an array:(1)
@@ -11197,7 +12099,7 @@ might look like this:
> line 2
> line 3' | awk '{ l[lines] = $0; ++lines }
> END {
- > for (i = lines-1; i >= 0; --i)
+ > for (i = lines - 1; i >= 0; i--)
> print l[i]
> }'
-| line 3
@@ -11218,7 +12120,7 @@ following version of the program works correctly:
{ l[lines++] = $0 }
END {
- for (i = lines - 1; i >= 0; --i)
+ for (i = lines - 1; i >= 0; i--)
print l[i]
}
@@ -11277,10 +12179,11 @@ multidimensional array, use the same operator ('in') that is used for
single dimensional arrays. Write the whole sequence of indices in
parentheses, separated by commas, as the left operand:
- (SUBSCRIPT1, SUBSCRIPT2, ...) in ARRAY
+ if ((SUBSCRIPT1, SUBSCRIPT2, ...) in ARRAY)
+ ...
- The following example treats its input as a two-dimensional array of
-fields; it rotates this array 90 degrees clockwise and prints the
+ Here is an example that treats its input as a two-dimensional array
+of fields; it rotates this array 90 degrees clockwise and prints the
result. It assumes that all lines have the same number of elements:
{
@@ -11660,7 +12563,8 @@ with numbers. Optional parameters are enclosed in square brackets ([ ]):
'log(X)'
Return the natural logarithm of X, if X is positive; otherwise,
- report an error.
+ return `NaN' ("not a number") on IEEE 754 systems. Additionally,
+ `gawk' prints a warning message when `x' is negative.
'rand()'
Return a random number. The values of 'rand()' are uniformly
@@ -11731,6 +12635,9 @@ with numbers. Optional parameters are enclosed in square brackets ([ ]):
easy to keep track of the seeds in case you need to consistently
reproduce sequences of random numbers.
+ POSIX does not specify the initial seed; it differs among `awk'
+ implementations.
+
---------- Footnotes ----------
(1) The C version of 'rand()' on many Unix systems is known to
@@ -12235,12 +13142,21 @@ Options::):
also returned if LENGTH is greater than the number of characters
remaining in the string, counting from character START.
+<<<<<<< HEAD
If START is less than one, 'substr()' treats it as if it was one.
(POSIX doesn't specify what to do in this case: Brian Kernighan's
'awk' acts this way, and therefore 'gawk' does too.) If START is
greater than the number of characters in the string, 'substr()'
returns the null string. Similarly, if LENGTH is present but less
than or equal to zero, the null string is returned.
+=======
+ If START is less than one, `substr()' treats it as if it was one.
+ (POSIX doesn't specify what to do in this case: BWK `awk' acts
+ this way, and therefore `gawk' does too.) If START is greater
+ than the number of characters in the string, `substr()' returns
+ the null string. Similarly, if LENGTH is present but less than or
+ equal to zero, the null string is returned.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
The string returned by 'substr()' _cannot_ be assigned. Thus, it
is a mistake to attempt to change a portion of a string, as shown
@@ -12296,9 +13212,19 @@ File: gawk.info, Node: Gory Details, Up: String Functions
9.1.3.1 More About '\' and '&' with 'sub()', 'gsub()', and 'gensub()'
.....................................................................
+<<<<<<< HEAD
When using 'sub()', 'gsub()', or 'gensub()', and trying to get literal
backslashes and ampersands into the replacement text, you need to
remember that there are several levels of "escape processing" going on.
+=======
+ CAUTION: This section has been known to cause headaches. You
+ might want to skip it upon first reading.
+
+ When using `sub()', `gsub()', or `gensub()', and trying to get
+literal backslashes and ampersands into the replacement text, you need
+to remember that there are several levels of "escape processing" going
+on.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
First, there is the "lexical" level, which is when 'awk' reads your
program and builds an internal copy of it to execute. Then there is the
@@ -12310,6 +13236,7 @@ come after a backslash. At the lexical level, it looks for the escape
sequences listed in *note Escape Sequences::. Thus, for every '\' that
'awk' processes at the runtime level, you must type two backslashes at
the lexical level. When a character that is not valid for an escape
+<<<<<<< HEAD
sequence follows the '\', Brian Kernighan's 'awk' and 'gawk' both simply
remove the initial '\' and put the next character into the string.
Thus, for example, '"a\qb"' is treated as '"aqb"'.
@@ -12334,6 +13261,32 @@ is illustrated in *note Table 9.1: table-sub-escapes.
Table 9.1: Historical Escape Sequence Processing for 'sub()' and
'gsub()'
+=======
+sequence follows the `\', BWK `awk' and `gawk' both simply remove the
+initial `\' and put the next character into the string. Thus, for
+example, `"a\qb"' is treated as `"aqb"'.
+
+ At the runtime level, the various functions handle sequences of `\'
+and `&' differently. The situation is (sadly) somewhat complex.
+Historically, the `sub()' and `gsub()' functions treated the two
+character sequence `\&' specially; this sequence was replaced in the
+generated text with a single `&'. Any other `\' within the REPLACEMENT
+string that did not precede an `&' was passed through unchanged. This
+is illustrated in *note table-sub-escapes::.
+
+ You type `sub()' sees `sub()' generates
+ ------- --------- --------------
+ `\&' `&' The matched text
+ `\\&' `\&' A literal `&'
+ `\\\&' `\&' A literal `&'
+ `\\\\&' `\\&' A literal `\&'
+ `\\\\\&' `\\&' A literal `\&'
+ `\\\\\\&' `\\\&' A literal `\\&'
+ `\\q' `\q' A literal `\q'
+
+Table 9.1: Historical Escape Sequence Processing for `sub()' and
+`gsub()'
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
This table shows both the lexical-level processing, where an odd number
of backslashes becomes an even number at the runtime level, as well as
@@ -12342,6 +13295,7 @@ the rest of the following tables only show the case of even numbers of
backslashes entered at the lexical level.)
The problem with the historical approach is that there is no way to
+<<<<<<< HEAD
get a literal '\' followed by the matched text.
The 1992 POSIX standard attempted to fix this problem. That standard
@@ -12388,6 +13342,29 @@ table-sub-proposed.
'\\\\' '\\' '\\'
Table 9.3: Proposed Rules For 'sub()' And Backslash
+=======
+get a literal `\' followed by the matched text.
+
+ Several editions of the POSIX standard attempted to fix this problem
+but weren't successful. The details are irrelevant at this point in
+time.
+
+ At one point, the `gawk' maintainer submitted proposed text for a
+revised standard that reverts to rules that correspond more closely to
+the original existing practice. The proposed rules have special cases
+that make it possible to produce a `\' preceding the matched text.
+This is shown in *note table-sub-proposed::.
+
+ You type `sub()' sees `sub()' generates
+ ------- --------- --------------
+ `\\\\\\&' `\\\&' A literal `\&'
+ `\\\\&' `\\&' A literal `\', followed by the matched text
+ `\\&' `\&' A literal `&'
+ `\\q' `\q' A literal `\q'
+ `\\\\' `\\' `\\'
+
+Table 9.2: GNU `awk' Rules For `sub()' And Backslash
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
In a nutshell, at the runtime level, there are now three special
sequences of characters ('\\\&', '\\&' and '\&') whereas historically
@@ -12395,17 +13372,26 @@ there was only one. However, as in the historical case, any '\' that is
not part of one of these three sequences is not special and appears in
the output literally.
+<<<<<<< HEAD
'gawk' 3.0 and 3.1 follow these proposed POSIX rules for 'sub()' and
'gsub()'. The POSIX standard took much longer to be revised than was
expected in 1996. The 2001 standard does not follow the above rules.
Instead, the rules there are somewhat simpler. The results are similar
except for one case.
+=======
+ `gawk' 3.0 and 3.1 follow these rules for `sub()' and `gsub()'. The
+POSIX standard took much longer to be revised than was expected. In
+addition, the `gawk' maintainer's proposal was lost during the
+standardization process. The final rules are somewhat simpler. The
+results are similar except for one case.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
The POSIX rules state that '\&' in the replacement string produces a
literal '&', '\\' produces a literal '\', and '\' followed by anything
else is not special; the '\' is placed straight into the output. These
rules are presented in *note Table 9.4: table-posix-sub.
+<<<<<<< HEAD
You type 'sub()' sees 'sub()' generates
----- ------- ----------
'\\\\\\&' '\\\&' a literal '\&'
@@ -12415,10 +13401,22 @@ rules are presented in *note Table 9.4: table-posix-sub.
'\\\\' '\\' '\'
Table 9.4: POSIX Rules For 'sub()' And 'gsub()'
+=======
+ You type `sub()' sees `sub()' generates
+ ------- --------- --------------
+ `\\\\\\&' `\\\&' A literal `\&'
+ `\\\\&' `\\&' A literal `\', followed by the matched text
+ `\\&' `\&' A literal `&'
+ `\\q' `\q' A literal `\q'
+ `\\\\' `\\' `\'
+
+Table 9.3: POSIX Rules For `sub()' And `gsub()'
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
The only case where the difference is noticeable is the last one:
'\\\\' is seen as '\\' and produces '\' instead of '\\'.
+<<<<<<< HEAD
Starting with version 3.1.4, 'gawk' followed the POSIX rules when
'--posix' is specified (*note Options::). Otherwise, it continued to
follow the 1996 proposed rules, since that had been its behavior for
@@ -12429,6 +13427,18 @@ rules the default, breaking well over a decade's worth of backwards
compatibility.(2) Needless to say, this was a bad idea, and as of
version 4.0.1, 'gawk' resumed its historical behavior, and only follows
the POSIX rules when '--posix' is given.
+=======
+ Starting with version 3.1.4, `gawk' followed the POSIX rules when
+`--posix' is specified (*note Options::). Otherwise, it continued to
+follow the proposed rules, since that had been its behavior for many
+years.
+
+ When version 4.0.0 was released, the `gawk' maintainer made the
+POSIX rules the default, breaking well over a decade's worth of
+backwards compatibility.(1) Needless to say, this was a bad idea, and
+as of version 4.0.1, `gawk' resumed its historical behavior, and only
+follows the POSIX rules when `--posix' is given.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
The rules for 'gensub()' are considerably simpler. At the runtime
level, whenever 'gawk' sees a '\', if the following character is a
@@ -12437,6 +13447,7 @@ subexpression is placed in the generated output. Otherwise, no matter
what character follows the '\', it appears in the generated text and the
'\' does not, as shown in *note Table 9.5: table-gensub-escapes.
+<<<<<<< HEAD
You type 'gensub()' sees 'gensub()' generates
----- --------- ------------
'&' '&' the matched text
@@ -12447,6 +13458,18 @@ what character follows the '\', it appears in the generated text and the
'\\q' '\q' a literal 'q'
Table 9.5: Escape Sequence Processing For 'gensub()'
+=======
+ You type `gensub()' sees `gensub()' generates
+ ------- ------------ -----------------
+ `&' `&' The matched text
+ `\\&' `\&' A literal `&'
+ `\\\\' `\\' A literal `\'
+ `\\\\&' `\\&' A literal `\', then the matched text
+ `\\\\\\&' `\\\&' A literal `\&'
+ `\\q' `\q' A literal `q'
+
+Table 9.4: Escape Sequence Processing For `gensub()'
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
Because of the complexity of the lexical and runtime level processing
and the special cases for 'sub()' and 'gsub()', we recommend the use of
@@ -12465,9 +13488,7 @@ Although this makes a certain amount of sense, it can be surprising.
---------- Footnotes ----------
- (1) This consequence was certainly unintended.
-
- (2) This was rather naive of him, despite there being a note in this
+ (1) This was rather naive of him, despite there being a note in this
section indicating that the next major version would move to the POSIX
rules.
@@ -12514,10 +13535,17 @@ parameters are enclosed in square brackets ([ ]):
function--'gawk' also buffers its output and the 'fflush()'
function forces 'gawk' to flush its buffers.
+<<<<<<< HEAD
'fflush()' was added to Brian Kernighan's 'awk' in April of 1992.
For two decades, it was not part of the POSIX standard. As of
December, 2012, it was accepted for inclusion into the POSIX
standard. See the Austin Group website
+=======
+ `fflush()' was added to BWK `awk' in April of 1992. For two
+ decades, it was not part of the POSIX standard. As of December,
+ 2012, it was accepted for inclusion into the POSIX standard. See
+ the Austin Group website
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(http://austingroupbugs.net/view.php?id=634).
POSIX standardizes 'fflush()' as follows: If there is no argument,
@@ -12994,7 +14022,7 @@ bitwise AND, OR, and XOR. The operations are described in *note Table
0 | 0 0 | 0 1 | 0 1
1 | 0 1 | 1 1 | 1 0
-Table 9.6: Bitwise Operations
+Table 9.5: Bitwise Operations
As you can see, the result of an AND operation is 1 only when _both_
bits are 1. The result of an OR operation is 1 if _either_ bit is 1.
@@ -13192,8 +14220,16 @@ File: gawk.info, Node: Definition Syntax, Next: Function Example, Up: User-de
9.2.1 Function Definition Syntax
--------------------------------
+<<<<<<< HEAD
Definitions of functions can appear anywhere between the rules of an
'awk' program. Thus, the general form of an 'awk' program is extended
+=======
+ It's entirely fair to say that the `awk' syntax for local variable
+ definitions is appallingly awful. -- Brian Kernighan
+
+ Definitions of functions can appear anywhere between the rules of an
+`awk' program. Thus, the general form of an `awk' program is extended
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
to include sequences of rules _and_ user-defined function definitions.
There is no need to put the definition of a function before all uses of
the function. This is because 'awk' reads the entire program before
@@ -13221,8 +14257,13 @@ call.
have a parameter with the same name as the function itself. In
addition, according to the POSIX standard, function parameters cannot
have the same name as one of the special built-in variables (*note
+<<<<<<< HEAD
Built-in Variables::). Not all versions of 'awk' enforce this
restriction.)
+=======
+Built-in Variables::). Not all versions of `awk' enforce this
+restriction.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
Local variables act like the empty string if referenced where a
string value is required, and like zero if referenced where a numeric
@@ -13328,7 +14369,8 @@ this program, using our function to format the results, prints:
5.6
21.2
- This function deletes all the elements in an array:
+ This function deletes all the elements in an array (recall that the
+extra whitespace signifies the start of the local variable list):
function delarray(a, i)
{
@@ -13347,22 +14389,22 @@ standard.)
The following is an example of a recursive function. It takes a
string as an input parameter and returns the string in backwards order.
Recursive functions must always have a test that stops the recursion.
-In this case, the recursion terminates when the starting position is
-zero, i.e., when there are no more characters left in the string.
+In this case, the recursion terminates when the input string is already
+empty.
- function rev(str, start)
+ function rev(str)
{
- if (start == 0)
+ if (str == "")
return ""
- return (substr(str, start, 1) rev(str, start - 1))
+ return (rev(substr(str, 2)) substr(str, 1, 1))
}
If this function is in a file named 'rev.awk', it can be tested this
way:
$ echo "Don't Panic!" |
- > gawk --source '{ print rev($0, length($0)) }' -f rev.awk
+ > gawk -e '{ print rev($0) }' -f rev.awk
-| !cinaP t'noD
The C 'ctime()' function takes a timestamp and returns it in a
@@ -13611,8 +14653,13 @@ function _are_ visible outside that function.
a[1], a[2], a[3]
}
+<<<<<<< HEAD
prints 'a[1] = 1, a[2] = two, a[3] = 3', because 'changeit' stores
'"two"' in the second element of 'a'.
+=======
+ prints `a[1] = 1, a[2] = two, a[3] = 3', because `changeit()'
+ stores `"two"' in the second element of `a'.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
Some 'awk' implementations allow you to call a function that has not
been defined. They only report a problem at runtime when the program
@@ -13760,7 +14807,11 @@ File: gawk.info, Node: Indirect Calls, Next: Functions Summary, Prev: User-de
9.3 Indirect Function Calls
===========================
+<<<<<<< HEAD
This section describes a 'gawk'-specific extension.
+=======
+This section describes an advanced, `gawk'-specific extension.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
Often, you may wish to defer the choice of function to call until
runtime. For example, you may have different kinds of records, each of
@@ -13798,8 +14849,13 @@ your test scores:
}
This style of programming works, but can be awkward. With "indirect"
+<<<<<<< HEAD
function calls, you tell 'gawk' to use the _value_ of a variable as the
name of the function to call.
+=======
+function calls, you tell `gawk' to use the _value_ of a variable as the
+_name_ of the function to call.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
The syntax is similar to that of a regular function call: an
identifier immediately followed by a left parenthesis, any arguments,
@@ -13841,7 +14897,6 @@ using indirect function calls.
Otherwise they perform the expected computations and are not unusual.
# For each record, print the class name and the requested statistics
-
{
class_name = $1
gsub(/_/, " ", class_name) # Replace _ with spaces
@@ -14028,11 +15083,19 @@ names of the two comparison functions:
Remember that you must supply a leading '@' in front of an indirect
function call.
+<<<<<<< HEAD
Unfortunately, indirect function calls cannot be used with the
built-in functions. However, you can generally write "wrapper"
functions which call the built-in ones, and those can be called
indirectly. (Other than, perhaps, the mathematical functions, there is
not a lot of reason to try to call the built-in functions indirectly.)
+=======
+ Starting with version 4.1.2 of `gawk', indirect function calls may
+also be used with built-in functions and with extension functions
+(*note Dynamic Extensions::). The only thing you cannot do is pass a
+regular expression constant to a built-in function through an indirect
+function call.(1)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
'gawk' does its best to make indirect function calls efficient. For
example, in the following case:
@@ -14040,7 +15103,16 @@ example, in the following case:
for (i = 1; i <= n; i++)
@the_func()
+<<<<<<< HEAD
'gawk' will look up the actual function to call only once.
+=======
+`gawk' looks up the actual function to call only once.
+
+ ---------- Footnotes ----------
+
+ (1) This may change in a future version; recheck the documentation
+that comes with your version of `gawk' to see if it has.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac

File: gawk.info, Node: Functions Summary, Prev: Indirect Calls, Up: Functions
@@ -14076,8 +15148,15 @@ File: gawk.info, Node: Functions Summary, Prev: Indirect Calls, Up: Functions
whitespace.
* User-defined functions may call other user-defined (and built-in)
+<<<<<<< HEAD
functions and may call themselves recursively. Function parameters
"hide" any global variables of the same names.
+=======
+ functions and may call themselves recursively. Function parameters
+ "hide" any global variables of the same names. You cannot use the
+ name of a reserved variable (such as `ARGC') as the name of a
+ parameter in user-defined functions.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* Scalar values are passed to user-defined functions by value. Array
parameters are passed by reference; any changes made by the
@@ -14092,10 +15171,18 @@ File: gawk.info, Node: Functions Summary, Prev: Indirect Calls, Up: Functions
function, how that function treats the variable can set its nature:
either scalar or array.
+<<<<<<< HEAD
* 'gawk' provides indirect function calls using a special syntax. By
setting a variable to the name of a user-defined function, you can
determine at runtime what function will be called at that point in
the program. This is equivalent to function pointers in C and C++.
+=======
+ * `gawk' provides indirect function calls using a special syntax.
+ By setting a variable to the name of a function, you can determine
+ at runtime what function will be called at that point in the
+ program. This is equivalent to function pointers in C and C++.
+
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac

File: gawk.info, Node: Library Functions, Next: Sample Programs, Prev: Functions, Up: Top
@@ -14121,9 +15208,15 @@ P.J. Plauger wrote:
In fact, they felt this idea was so important that they placed this
statement on the cover of their book. Because we believe strongly that
+<<<<<<< HEAD
their statement is correct, this major node and *note Sample Programs::,
provide a good-sized body of code for you to read, and we hope, to learn
from.
+=======
+their statement is correct, this major node and *note Sample
+Programs::, provide a good-sized body of code for you to read and, we
+hope, to learn from.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
This major node presents a library of useful 'awk' functions. Many
of the sample programs presented later in this Info file use these
@@ -14174,7 +15267,7 @@ for different implementations of 'awk' is pretty straightforward.
* Group Functions:: Functions for getting group information.
* Walking Arrays:: A function to walk arrays of arrays.
* Library Functions Summary:: Summary of library functions.
-* Library exercises:: Exercises.
+* Library Exercises:: Exercises.
---------- Footnotes ----------
@@ -14309,8 +15402,9 @@ versions of 'awk':
ret = 0
for (i = 1; i <= n; i++) {
c = substr(str, i, 1)
- if ((k = index("01234567", c)) > 0)
- k-- # adjust for 1-basing in awk
+ # index() returns 0 if c not in string,
+ # includes c == "0"
+ k = index("1234567", c)
ret = ret * 8 + k
}
@@ -14322,6 +15416,8 @@ versions of 'awk':
for (i = 1; i <= n; i++) {
c = substr(str, i, 1)
c = tolower(c)
+ # index() returns 0 if c not in string,
+ # includes c == "0"
k = index("123456789abcdef", c)
ret = ret * 16 + k
@@ -14601,8 +15697,7 @@ use on some older systems, they are not really worth worrying about:
}
#### test code ####
- # BEGIN \
- # {
+ # BEGIN {
# for (;;) {
# printf("enter a character: ")
# if (getline var <= 0)
@@ -15268,8 +16363,7 @@ option, and it ends option processing. Continuing on:
i = index(options, thisopt)
if (i == 0) {
if (Opterr)
- printf("%c -- invalid option\n",
- thisopt) > "/dev/stderr"
+ printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
if (_opti >= length(argv[Optind])) {
Optind++
_opti = 0
@@ -15723,8 +16817,7 @@ the same names:
# group.awk --- functions for dealing with the group file
- BEGIN \
- {
+ BEGIN {
# Change to suit your system
_gr_awklib = "/usr/local/libexec/awk/"
}
@@ -15928,7 +17021,7 @@ value. Here is a main program to demonstrate:
-| a[3] = 3

-File: gawk.info, Node: Library Functions Summary, Next: Library exercises, Prev: Walking Arrays, Up: Library Functions
+File: gawk.info, Node: Library Functions Summary, Next: Library Exercises, Prev: Walking Arrays, Up: Library Functions
10.8 Summary
============
@@ -15964,7 +17057,7 @@ File: gawk.info, Node: Library Functions Summary, Next: Library exercises, Pr
A simple function to traverse an array of arrays to any depth.

-File: gawk.info, Node: Library exercises, Prev: Library Functions Summary, Up: Library Functions
+File: gawk.info, Node: Library Exercises, Prev: Library Functions Summary, Up: Library Functions
10.9 Exercises
==============
@@ -16145,8 +17238,7 @@ through the command-line options. Exactly one of the variables
should be done by fields or by characters, respectively. When cutting
by characters, the output field separator is set to the null string:
- BEGIN \
- {
+ BEGIN {
FS = "\t" # default
OFS = FS
while ((c = getopt(ARGC, ARGV, "sf:c:d:")) != -1) {
@@ -16530,8 +17622,7 @@ line is printed, with a leading file name and colon if necessary:
The 'END' rule takes care of producing the correct exit status. If
there are no matches, the exit status is one; otherwise it is zero:
- END \
- {
+ END {
exit (total == 0)
}
@@ -16549,6 +17640,7 @@ options, and then exits:
The variable 'e' is used so that the function fits nicely on the
printed page.
+<<<<<<< HEAD
Just a note on programming style: you may have noticed that the 'END'
rule uses backslash continuation, with the open brace on a line by
itself. This is so that it more closely resembles the way functions are
@@ -16556,6 +17648,8 @@ written. Many of the examples in this major node use this style. You
can decide for yourself if you like writing your 'BEGIN' and 'END' rules
this way or not.
+=======
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
---------- Footnotes ----------
(1) It also introduces a subtle bug; if a match happens, we output
@@ -16598,8 +17692,7 @@ the group numbers:
# uid=12(foo) euid=34(bar) gid=3(baz) \
# egid=5(blat) groups=9(nine),2(two),1(one)
- BEGIN \
- {
+ BEGIN {
uid = PROCINFO["uid"]
euid = PROCINFO["euid"]
gid = PROCINFO["gid"]
@@ -16806,8 +17899,7 @@ by setting 'ARGV[1]' to '"-"' and 'ARGC' to two:
# Copy standard input to all named output files.
# Append content if -a option is supplied.
#
- BEGIN \
- {
+ BEGIN {
for (i = 1; i < ARGC; i++)
copy[i] = ARGV[i]
@@ -16857,8 +17949,7 @@ statements, while the second executes N'*'M 'if' statements.
Finally, the 'END' rule cleans up by closing all the output files:
- END \
- {
+ END {
for (i in copy)
close(copy[i])
}
@@ -16945,8 +18036,7 @@ standard output, '/dev/stdout':
# -n skip n fields
# +n skip n characters, skip fields first
- BEGIN \
- {
+ BEGIN {
count = 1
outputfile = "/dev/stdout"
opts = "udc0:1:2:3:4:5:6:7:8:9:"
@@ -16958,7 +18048,7 @@ standard output, '/dev/stdout':
else if (c == "c")
do_count++
else if (index("0123456789", c) != 0) {
- # getopt requires args to options
+ # getopt() requires args to options
# this messes us up for things like -5
if (Optarg ~ /^[[:digit:]]+$/)
fcount = (c Optarg) + 0
@@ -17334,8 +18424,7 @@ Statement::), but the processing could be done with a series of
# Requires getlocaltime() library function
# usage: alarm time [ "message" [ count [ delay ] ] ]
- BEGIN \
- {
+ BEGIN {
# Initial argument sanity checking
usage1 = "usage: alarm time ['message' [count [delay]]]"
usage2 = sprintf("\t(%s) time ::= hh:mm", ARGV[1])
@@ -17548,6 +18637,10 @@ array only once, in a 'BEGIN' rule. However, this assumes that the
"from" and "to" lists will never change throughout the lifetime of the
program.
+ Another obvious improvement is to enable the use of ranges, such as
+`a-z', as allowed by the `tr' utility. Look at the code for `cut.awk'
+(*note Cut Program::) for inspiration.
+
---------- Footnotes ----------
(1) On some older systems, including Solaris, the system version of
@@ -17555,8 +18648,13 @@ program.
in square brackets ('[a-z]') and quoted, to prevent the shell from
attempting a file name expansion. This is not a feature.
+<<<<<<< HEAD
(2) This program was written before 'gawk' acquired the ability to
split each character in a string into separate array elements.
+=======
+ (2) This program was also written before `gawk' acquired the ability
+to split each character in a string into separate array elements.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac

File: gawk.info, Node: Labels Program, Next: Word Sorting, Prev: Translate Program, Up: Miscellaneous Programs
@@ -17655,8 +18753,7 @@ not have been an even multiple of 20 labels in the data:
Count++
}
- END \
- {
+ END {
printpage()
}
@@ -18148,8 +19245,13 @@ language.(1) It works as follows:
2. For any arguments that do represent 'awk' text, put the arguments
into a shell variable that will be expanded. There are two cases:
+<<<<<<< HEAD
a. Literal text, provided with '--source' or '--source='. This
text is just appended directly.
+=======
+ a. Literal text, provided with `-e' or `--source'. This text is
+ just appended directly.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
b. Source file names, provided with '-f'. We use a neat trick
and append '@include FILENAME' to the shell variable's
@@ -18638,14 +19740,30 @@ File: gawk.info, Node: Programs Exercises, Prev: Programs Summary, Up: Sample
Program::) to accept the same arguments and perform in the same
way.
+<<<<<<< HEAD
4. The 'split.awk' program (*note Split Program::) uses the 'chr()'
and 'ord()' functions to move through the letters of the alphabet.
Modify the program to instead use only the 'awk' built-in
functions, such as 'index()' and 'substr()'.
5. The 'split.awk' program (*note Split Program::) assumes that
+=======
+ 4. The `split.awk' program (*note Split Program::) assumes that
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
letters are contiguous in the character set, which isn't true for
- EBCDIC systems. Fix this problem.
+ EBCDIC systems. Fix this problem. (Hint: Consider a different
+ way to work through the alphabet, without relying on `ord()' and
+ `chr()'.)
+
+ 5. In `uniq.awk' (*note Uniq Program::, the logic for choosing which
+ lines to print represents a "state machine", which is "a device
+ that can be in one of a set number of stable conditions depending
+ on its previous condition and on the present values of its
+ inputs."(1) Brian Kernighan suggests that "an alternative approach
+ to state mechines is to just read the input into an array, then
+ use indexing. It's almost always easier code, and for most inputs
+ where you would use this, just as fast." Rewrite the logic to
+ follow this suggestion.
6. Why can't the 'wc.awk' program (*note Wc Program::) just use the
value of 'FNR' in 'endfile()'? Hint: Examine the code in *note
@@ -18706,6 +19824,11 @@ File: gawk.info, Node: Programs Exercises, Prev: Programs Summary, Up: Sample
13. Modify 'anagram.awk' (*note Anagram Program::), to avoid the use
of the external 'sort' utility.
+ ---------- Footnotes ----------
+
+ (1) This is the definition returned from entering `define: state
+machine' into Google.
+

File: gawk.info, Node: Advanced Features, Next: Internationalization, Prev: Sample Programs, Up: Top
@@ -19144,24 +20267,7 @@ File: gawk.info, Node: Two-way I/O, Next: TCP/IP Networking, Prev: Array Sort
12.3 Two-Way Communications with Another Process
================================================
- From: brennan@whidbey.com (Mike Brennan)
- Newsgroups: comp.lang.awk
- Subject: Re: Learn the SECRET to Attract Women Easily
- Date: 4 Aug 1997 17:34:46 GMT
- Message-ID: <5s53rm$eca@news.whidbey.com>
-
- On 3 Aug 1997 13:17:43 GMT, Want More Dates???
- <tracy78@kilgrona.com> wrote:
- >Learn the SECRET to Attract Women Easily
- >
- >The SCENT(tm) Pheromone Sex Attractant For Men to Attract Women
-
- The scent of awk programmers is a lot more attractive to women than
- the scent of perl programmers.
- --
- Mike Brennan
-
- It is often useful to be able to send data to a separate program for
+It is often useful to be able to send data to a separate program for
processing and then read the result. This can always be done with
temporary files:
@@ -19177,6 +20283,7 @@ temporary files:
close(tempfile)
system("rm " tempfile)
+<<<<<<< HEAD
This works, but not elegantly. Among other things, it requires that the
program be run in a directory that cannot be shared among users; for
example, '/tmp' will not do, as another user might happen to be using a
@@ -19186,6 +20293,16 @@ temporary file with the same name.
another process. The second process is termed a "coprocess", since it
runs in parallel with 'gawk'. The two-way connection is created using
the '|&' operator (borrowed from the Korn shell, 'ksh'):(1)
+=======
+This works, but not elegantly. Among other things, it requires that
+the program be run in a directory that cannot be shared among users;
+for example, `/tmp' will not do, as another user might happen to be
+using a temporary file with the same name.(1) However, with `gawk', it
+is possible to open a _two-way_ pipe to another process. The second
+process is termed a "coprocess", since it runs in parallel with `gawk'.
+The two-way connection is created using the `|&' operator (borrowed
+from the Korn shell, `ksh'):(2)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
do {
print DATA |& "subprogram"
@@ -19271,7 +20388,11 @@ using regular pipes.
---------- Footnotes ----------
- (1) This is very different from the same operator in the C shell and
+ (1) Michael Brennan suggests the use of `rand()' to generate unique
+file names. This is a valid point; nevertheless, temporary files remain
+more difficult than two-way pipes.
+
+ (2) This is very different from the same operator in the C shell and
in Bash.

@@ -19294,7 +20415,11 @@ network connection.
You can think of this as just a _very long_ two-way pipeline to a
coprocess. The way 'gawk' decides that you want to use TCP/IP
networking is by recognizing special file names that begin with one of
+<<<<<<< HEAD
'/inet/', '/inet4/' or '/inet6'.
+=======
+`/inet/', `/inet4/' or `/inet6/'.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
The full syntax of the special file name is
'/NET-TYPE/PROTOCOL/LOCAL-PORT/REMOTE-HOST/REMOTE-PORT'. The components
@@ -19763,11 +20888,19 @@ are:
Text-collation information; i.e., how different characters and/or
groups of characters sort in a given language.
+<<<<<<< HEAD
'LC_CTYPE'
Character-type information (alphabetic, digit, upper- or lowercase,
and so on). This information is accessed via the POSIX character
classes in regular expressions, such as '/[[:alnum:]]/' (*note
Regexp Operators::).
+=======
+`LC_CTYPE'
+ Character-type information (alphabetic, digit, upper- or
+ lowercase, and so on) as well as character encoding. This
+ information is accessed via the POSIX character classes in regular
+ expressions, such as `/[[:alnum:]]/' (*note Regexp Operators::).
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
'LC_MONETARY'
Monetary information, such as the currency symbol, and whether the
@@ -19777,11 +20910,15 @@ are:
Numeric information, such as which characters to use for the
decimal point and the thousands separator.(2)
+<<<<<<< HEAD
'LC_RESPONSE'
Response information, such as how "yes" and "no" appear in the
local language, and possibly other information as well.
'LC_TIME'
+=======
+`LC_TIME'
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
Time- and date-related information, such as 12- or 24-hour clock,
month printed before or after the day in a date, local month
abbreviations, and so on.
@@ -19877,17 +21014,36 @@ outlined in *note Explaining gettext::, like so:
printf(_"Number of users is %d\n", nusers)
3. If you are creating strings dynamically, you can still translate
+<<<<<<< HEAD
them, using the 'dcgettext()' built-in function:
+=======
+ them, using the `dcgettext()' built-in function:(1)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
- message = nusers " users logged in"
- message = dcgettext(message, "adminprog")
- print message
+ if (groggy)
+ message = dcgettext("%d customers disturbing me\n", "adminprog")
+ else
+ message = dcgettext("enjoying %d customers\n", "adminprog")
+ printf(message, ncustomers)
Here, the call to 'dcgettext()' supplies a different text domain
('"adminprog"') in which to find the message, but it uses the
default '"LC_MESSAGES"' category.
+<<<<<<< HEAD
4. During development, you might want to put the '.gmo' file in a
+=======
+ The previous example only works if `ncustomers' is greater than
+ one. This example would be better done with `dcngettext()':
+
+ if (groggy)
+ message = dcngettext("%d customer disturbing me\n", "%d customers disturbing me\n", "adminprog")
+ else
+ message = dcngettext("enjoying %d customer\n", "enjoying %d customers\n", "adminprog")
+ printf(message, ncustomers)
+
+ 4. During development, you might want to put the `.gmo' file in a
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
private directory for testing. This is done with the
'bindtextdomain()' built-in function:
@@ -19905,6 +21061,10 @@ outlined in *note Explaining gettext::, like so:
*Note I18N Example::, for an example program showing the steps to
create and use translations from 'awk'.
+ ---------- Footnotes ----------
+
+ (1) Thanks to Bruno Haible for this example.
+

File: gawk.info, Node: Translator i18n, Next: I18N Example, Prev: Programmer i18n, Up: Internationalization
@@ -19939,6 +21099,7 @@ create the initial '.pot' file:
$ gawk --gen-pot -f guide.awk > guide.pot
+<<<<<<< HEAD
When run with '--gen-pot', 'gawk' does not execute your program.
Instead, it parses it as usual and prints all marked strings to standard
output in the format of a GNU 'gettext' Portable Object file. Also
@@ -19946,6 +21107,18 @@ included in the output are any constant strings that appear as the first
argument to 'dcgettext()' or as the first and second argument to
'dcngettext()'.(1) *Note I18N Example::, for the full list of steps to
go through to create and test translations for 'guide'.
+=======
+ When run with `--gen-pot', `gawk' does not execute your program.
+Instead, it parses it as usual and prints all marked strings to
+standard output in the format of a GNU `gettext' Portable Object file.
+Also included in the output are any constant strings that appear as the
+first argument to `dcgettext()' or as the first and second argument to
+`dcngettext()'.(1) You should distribute the generated `.pot' file with
+your `awk' program; translators will eventually use it to provide you
+translations that you can also then distribute. *Note I18N Example::,
+for the full list of steps to go through to create and test
+translations for `guide'.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
---------- Footnotes ----------
@@ -20162,8 +21335,7 @@ file to machine-readable '.mo' file. By default, 'msgfmt' creates a
file named 'messages'. This file must be renamed and placed in the
proper directory so that 'gawk' can find it:
- $ msgfmt guide-mellow.po
- $ mv messages en_US.UTF-8/LC_MESSAGES/guide.mo
+ $ msgfmt guide-mellow.po -o en_US.UTF-8/LC_MESSAGES/guide.mo
Finally, we run the program to test it:
@@ -20415,7 +21587,7 @@ are given on the command line as arguments to one or more '-f' options.
('gawk' is not designed to debug command-line programs, only programs
contained in files.) In our case, we invoke the debugger like this:
- $ gawk -D -f getopt.awk -f join.awk -f uniq.awk inputfile
+ $ gawk -D -f getopt.awk -f join.awk -f uniq.awk -1 inputfile
where both 'getopt.awk' and 'uniq.awk' are in '$AWKPATH'. (Experienced
users of GDB or similar debuggers should note that this syntax is
@@ -20467,7 +21639,7 @@ for a breakpoint in 'uniq.awk' is at the beginning of the function
To set the breakpoint, use the 'b' (breakpoint) command:
gawk> b are_equal
- -| Breakpoint 1 set at file `awklib/eg/prog/uniq.awk', line 64
+ -| Breakpoint 1 set at file `awklib/eg/prog/uniq.awk', line 63
The debugger tells us the file and line number where the breakpoint
is. Now type 'r' or 'run' and the program runs until it hits the
@@ -20477,8 +21649,8 @@ breakpoint for the first time:
-| Starting program:
-| Stopping in Rule ...
-| Breakpoint 1, are_equal(n, m, clast, cline, alast, aline)
- at `awklib/eg/prog/uniq.awk':64
- -| 64 if (fcount == 0 && charcount == 0)
+ at `awklib/eg/prog/uniq.awk':63
+ -| 63 if (fcount == 0 && charcount == 0)
gawk>
Now we can look at what's going on inside our program. First of all,
@@ -20488,14 +21660,22 @@ current stack frames:
gawk> bt
-| #0 are_equal(n, m, clast, cline, alast, aline)
- at `awklib/eg/prog/uniq.awk':69
- -| #1 in main() at `awklib/eg/prog/uniq.awk':89
+ at `awklib/eg/prog/uniq.awk':68
+ -| #1 in main() at `awklib/eg/prog/uniq.awk':88
+<<<<<<< HEAD
This tells us that 'are_equal()' was called by the main program at
line 89 of 'uniq.awk'. (This is not a big surprise, since this is the
only call to 'are_equal()' in the program, but in more complex programs,
knowing who called a function and with what parameters can be the key to
finding the source of the problem.)
+=======
+ This tells us that `are_equal()' was called by the main program at
+line 88 of `uniq.awk'. (This is not a big surprise, since this is the
+only call to `are_equal()' in the program, but in more complex
+programs, knowing who called a function and with what parameters can be
+the key to finding the source of the problem.)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
Now that we're in 'are_equal()', we can start looking at the values
of some variables. Let's say we type 'p n' ('p' is short for "print").
@@ -20511,13 +21691,13 @@ was called without arguments (*note Function Calls::).
A more useful variable to display might be the current record:
gawk> p $0
- -| $0 = string ("gawk is a wonderful program!")
+ -| $0 = "gawk is a wonderful program!"
This might be a bit puzzling at first since this is the second line of
our test input above. Let's look at 'NR':
gawk> p NR
- -| NR = number (2)
+ -| NR = 2
So we can see that 'are_equal()' was only called for the second record
of the file. Of course, this is because our program contains a rule for
@@ -20531,7 +21711,7 @@ of the file. Of course, this is because our program contains a rule for
OK, let's just check that that rule worked correctly:
gawk> p last
- -| last = string ("awk is a wonderful program!")
+ -| last = "awk is a wonderful program!"
Everything we have done so far has verified that the program has
worked as planned, up to and including the call to 'are_equal()', so the
@@ -20540,38 +21720,47 @@ begin "stepping through" the lines of 'are_equal()'. We start by typing
'n' (for "next"):
gawk> n
- -| 67 if (fcount > 0) {
+ -| 66 if (fcount > 0) {
+<<<<<<< HEAD
This tells us that 'gawk' is now ready to execute line 67, which
decides whether to give the lines the special "field skipping" treatment
indicated by the '-f' command-line option. (Notice that we skipped from
where we were before at line 64 to here, since the condition in line 64
'if (fcount == 0 && charcount == 0)' was false.)
+=======
+ This tells us that `gawk' is now ready to execute line 66, which
+decides whether to give the lines the special "field skipping" treatment
+indicated by the `-1' command-line option. (Notice that we skipped
+from where we were before at line 63 to here, since the condition in
+line 63 `if (fcount == 0 && charcount == 0)' was false.)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
Continuing to step, we now get to the splitting of the current and
last records:
gawk> n
- -| 68 n = split(last, alast)
+ -| 67 n = split(last, alast)
gawk> n
- -| 69 m = split($0, aline)
+ -| 68 m = split($0, aline)
At this point, we should be curious to see what our records were
split into, so we try to look:
gawk> p n m alast aline
- -| n = number (5)
- -| m = number (5)
+ -| n = 5
+ -| m = untyped variable
-| alast = array, 5 elements
- -| aline = array, 5 elements
+ -| aline = untyped variable
(The 'p' command can take more than one argument, similar to 'awk''s
'print' statement.)
This is kind of disappointing, though. All we found out is that
-there are five elements in each of our arrays. Useful enough (we now
-know that none of the words were accidentally left out), but what if we
-want to see inside the array?
+there are five elements in `alast'; `m' and `aline' don't have values
+yet since we are at line 68 but haven't executed it yet. This
+information is useful enough (we now know that none of the words were
+accidentally left out), but what if we want to see inside the array?
The first choice would be to use subscripts:
@@ -20581,25 +21770,25 @@ want to see inside the array?
Oops!
gawk> p alast[1]
- -| alast["1"] = string ("awk")
+ -| alast["1"] = "awk"
This would be kind of slow for a 100-member array, though, so 'gawk'
provides a shortcut (reminiscent of another language not to be
mentioned):
gawk> p @alast
- -| alast["1"] = string ("awk")
- -| alast["2"] = string ("is")
- -| alast["3"] = string ("a")
- -| alast["4"] = string ("wonderful")
- -| alast["5"] = string ("program!")
+ -| alast["1"] = "awk"
+ -| alast["2"] = "is"
+ -| alast["3"] = "a"
+ -| alast["4"] = "wonderful"
+ -| alast["5"] = "program!"
It looks like we got this far OK. Let's take another step or two:
gawk> n
- -| 70 clast = join(alast, fcount, n)
+ -| 69 clast = join(alast, fcount, n)
gawk> n
- -| 71 cline = join(aline, fcount, m)
+ -| 70 cline = join(aline, fcount, m)
Well, here we are at our error (sorry to spoil the suspense). What
we had in mind was to join the fields starting from the second one to
@@ -20607,8 +21796,8 @@ make the virtual record to compare, and if the first field was numbered
zero, this would work. Let's look at what we've got:
gawk> p cline clast
- -| cline = string ("gawk is a wonderful program!")
- -| clast = string ("awk is a wonderful program!")
+ -| cline = "gawk is a wonderful program!"
+ -| clast = "awk is a wonderful program!"
Hey, those look pretty familiar! They're just our original,
unaltered, input records. A little thinking (the human brain is still
@@ -21243,7 +22432,11 @@ some limitations. A few which are worth being aware of are:
what your mistake was, though, you'll feel like a real guru.
* If you perused the dump of opcodes in *note Miscellaneous Debugger
+<<<<<<< HEAD
Commands::, (or if you are already familiar with 'gawk' internals),
+=======
+ Commands:: (or if you are already familiar with `gawk' internals),
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
you will realize that much of the internal manipulation of data in
'gawk', as in many interpreters, is done on a stack. 'Op_push',
'Op_pop', etc., are the "bread and butter" of most 'gawk' code.
@@ -21286,9 +22479,15 @@ File: gawk.info, Node: Debugging Summary, Prev: Limitations, Up: Debugger
Debugger, GDB.
* Debuggers let you step through your program one statement at a
+<<<<<<< HEAD
time, examine and change variable and array values, and do a number
of other things that let understand what your program is actually
doing (as opposed to what it is supposed to do).
+=======
+ time, examine and change variable and array values, and do a
+ number of other things that let you understand what your program
+ is actually doing (as opposed to what it is supposed to do).
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* Like most debuggers, the 'gawk' debugger works in terms of stack
frames, and lets you set both breakpoints (stop at a point in the
@@ -21318,6 +22517,10 @@ continues on to present arbitrary precision integers, and concludes with
a description of some points where 'gawk' and the POSIX standard are not
quite in agreement.
+ NOTE: Most users of `gawk' can safely skip this chapter. But if
+ you want to do scientific calculations with `gawk', this is the
+ place to be.
+
* Menu:
* Computer Arithmetic:: A quick intro to computer math.
@@ -21451,9 +22654,24 @@ material here.
another number and infinity produce infinity.
"NaN"
+<<<<<<< HEAD
"Not A Number." A special value indicating a result that can't
happen in real math, but that can happen in floating-point
computations.
+=======
+ "Not A Number."(1). A special value that results from attempting a
+ calculation that has no answer as a real number. In such a case,
+ programs can either receive a floating-point exception, or get
+ `NaN' back as the result. The IEEE 754 standard recommends that
+ systems return `NaN'. Some examples:
+
+ `sqrt(-1)'
+ This makes sense in the range of complex numbers, but not in
+ the range of real numbers, so the result is `NaN'.
+
+ `log(-8)'
+ -8 is out of the domain of `log()', so the result is `NaN'.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
"Normalized"
How the significand (see later in this list) is usually stored.
@@ -21511,6 +22729,11 @@ Table 15.1: Basic IEEE Format Context Values
NOTE: The precision numbers include the implied leading one that
gives them one extra bit of significand.
+ ---------- Footnotes ----------
+
+ (1) Thanks to Michael Brennan for this description, which I have
+paraphrased, and for the examples
+

File: gawk.info, Node: MPFR features, Next: FP Math Caution, Prev: Math Definitions, Up: Arbitrary Precision Arithmetic
@@ -21539,10 +22762,16 @@ Auto-set::).
The MPFR library provides precise control over precisions and
rounding modes, and gives correctly rounded, reproducible,
+<<<<<<< HEAD
platform-independent results. With either of the command-line options
'--bignum' or '-M', all floating-point arithmetic operators and numeric
functions can yield results to any desired precision level supported by
MPFR.
+=======
+platform-independent results. With the `-M' command-line option, all
+floating-point arithmetic operators and numeric functions can yield
+results to any desired precision level supported by MPFR.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
Two built-in variables, 'PREC' and 'ROUNDMODE', provide control over
the working precision and the rounding mode. The precision and the
@@ -21555,8 +22784,12 @@ File: gawk.info, Node: FP Math Caution, Next: Arbitrary Precision Integers, P
15.4 Floating Point Arithmetic: Caveat Emptor!
==============================================
+<<<<<<< HEAD
Math class is tough!
-- _Late 1980's Barbie_
+=======
+ Math class is tough! -- Teen Talk Barbie, July 1992
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
This minor node provides a high level overview of the issues involved
when doing lots of floating-point arithmetic.(1) The discussion applies
@@ -21838,10 +23071,17 @@ on arithmetic operations:
constant.
If you need to represent a floating-point constant at a higher
+<<<<<<< HEAD
precision than the default and cannot use a command line assignment
to 'PREC', you should either specify the constant as a string, or
as a rational number, whenever possible. The following example
illustrates the differences among various ways to print a
+=======
+ precision than the default and cannot use a command-line
+ assignment to `PREC', you should either specify the constant as a
+ string, or as a rational number, whenever possible. The following
+ example illustrates the differences among various ways to print a
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
floating-point constant:
$ gawk -M 'BEGIN { PREC = 113; printf("%0.25f\n", 0.1) }'
@@ -21950,6 +23190,7 @@ File: gawk.info, Node: Arbitrary Precision Integers, Next: POSIX Floating Poin
15.5 Arbitrary Precision Integer Arithmetic with 'gawk'
=======================================================
+<<<<<<< HEAD
When given one of the options '--bignum' or '-M', 'gawk' performs all
integer arithmetic using GMP arbitrary precision integers. Any number
that looks like an integer in a source or data file is stored as an
@@ -21957,6 +23198,15 @@ arbitrary precision integer. The size of the integer is limited only by
the available memory. For example, the following computes 5^4^3^2, the
result of which is beyond the limits of ordinary hardware
double-precision floating point values:
+=======
+When given the `-M' option, `gawk' performs all integer arithmetic
+using GMP arbitrary precision integers. Any number that looks like an
+integer in a source or data file is stored as an arbitrary precision
+integer. The size of the integer is limited only by the available
+memory. For example, the following computes 5^4^3^2, the result of
+which is beyond the limits of ordinary hardware double-precision
+floating point values:
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
$ gawk -M 'BEGIN {
> x = 5^4^3^2
@@ -22138,9 +23388,15 @@ File: gawk.info, Node: Floating point summary, Prev: POSIX Floating Point Prob
floating-point values. The default for 'awk' is to use
double-precision floating-point values.
+<<<<<<< HEAD
* In the 1980's, Barbie mistakenly said "Math class is tough!" While
math isn't tough, floating-point arithmetic isn't the same as
pencil and paper math, and care must be taken:
+=======
+ * In the early 1990's, Barbie mistakenly said "Math class is tough!"
+ While math isn't tough, floating-point arithmetic isn't the same
+ as pencil and paper math, and care must be taken:
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
- Not all numbers can be represented exactly.
@@ -22156,6 +23412,7 @@ File: gawk.info, Node: Floating point summary, Prev: POSIX Floating Point Prob
* Often, increasing the accuracy and then rounding to the desired
number of digits produces reasonable results.
+<<<<<<< HEAD
* Use either '-M' or '--bignum' to enable MPFR arithmetic. Use
'PREC' to set the precision in bits, and 'ROUNDMODE' to set the
IEEE 754 rounding mode.
@@ -22163,6 +23420,15 @@ File: gawk.info, Node: Floating point summary, Prev: POSIX Floating Point Prob
* With '-M' or '--bignum', 'gawk' performs arbitrary precision
integer arithmetic using the GMP library. This is faster and more
space efficient than using MPFR for the same calculations.
+=======
+ * Use `-M' (or `--bignum') to enable MPFR arithmetic. Use `PREC' to
+ set the precision in bits, and `ROUNDMODE' to set the IEEE 754
+ rounding mode.
+
+ * With `-M', `gawk' performs arbitrary precision integer arithmetic
+ using the GMP library. This is faster and more space efficient
+ than using MPFR for the same calculations.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* There are several "dark corners" with respect to floating-point
numbers where 'gawk' disagrees with the POSIX standard. It pays to
@@ -22356,8 +23622,13 @@ Example::) and also the 'testext.c' code for testing the APIs.
Some other bits and pieces:
+<<<<<<< HEAD
* The API provides access to 'gawk''s 'do_XXX' values, reflecting
command line options, like 'do_lint', 'do_profiling' and so on
+=======
+ * The API provides access to `gawk''s `do_XXX' values, reflecting
+ command-line options, like `do_lint', `do_profiling' and so on
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(*note Extension API Variables::). These are informational: an
extension cannot affect their values inside 'gawk'. In addition,
attempting to assign to them produces a compile-time error.
@@ -22479,7 +23750,11 @@ operations:
place '-Dinline=''' on your command line, or use the GNU Autotools
and include a 'config.h' file in your extensions.
+<<<<<<< HEAD
* All pointers filled in by 'gawk' are to memory managed by 'gawk'
+=======
+ * All pointers filled in by `gawk' point to memory managed by `gawk'
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
and should be treated by the extension as read-only. Memory for
_all_ strings passed into 'gawk' from the extension _must_ come
from calling the API-provided function pointers 'api_malloc()',
@@ -22885,7 +24160,7 @@ File: gawk.info, Node: Exit Callback Functions, Next: Extension Version String
An "exit callback" function is a function that 'gawk' calls before it
exits. Such functions are useful if you have general "cleanup" tasks
-that should be performed in your extension (such as closing data base
+that should be performed in your extension (such as closing database
connections or other resource deallocations). You can register such a
function with 'gawk' using the following function.
@@ -25930,7 +27205,7 @@ current version of 'gawk'.
- Indirect function calls (*note Indirect Calls::).
- Directories on the command line produce a warning and are
- skipped (*note Command line directories::).
+ skipped (*note Command-line directories::).
* New keywords:
@@ -25979,8 +27254,13 @@ current version of 'gawk'.
- The 'bindtextdomain()', 'dcgettext()' and 'dcngettext()'
functions for internationalization (*note Programmer i18n::).
+<<<<<<< HEAD
- The 'fflush()' function from Brian Kernighan's version of
'awk' (*note I/O Functions::).
+=======
+ - The `fflush()' function from BWK `awk' (*note I/O
+ Functions::).
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
- The 'gensub()', 'patsplit()', and 'strtonum()' functions for
more powerful text manipulation (*note String Functions::).
@@ -25990,6 +27270,7 @@ current version of 'gawk'.
* Changes and/or additions in the command-line options:
+<<<<<<< HEAD
- The 'AWKPATH' environment variable for specifying a path
search for the '-f' command-line option (*note Options::).
@@ -26008,6 +27289,26 @@ current version of 'gawk'.
'--pretty-print', '--profile', '--re-interval', '--sandbox',
'--source', '--traditional', '--use-lc-numeric', and
'--version' long options (*note Options::).
+=======
+ - The `AWKPATH' environment variable for specifying a path
+ search for the `-f' command-line option (*note Options::).
+
+ - The `AWKLIBPATH' environment variable for specifying a path
+ search for the `-l' command-line option (*note Options::).
+
+ - The `-b', `-c', `-C', `-d', `-D', `-e', `-E', `-g', `-h',
+ `-i', `-l', `-L', `-M', `-n', `-N', `-o', `-O', `-p', `-P',
+ `-r', `-S', `-t', and `-V' short options. Also, the ability
+ to use GNU-style long-named options that start with `--' and
+ the `--assign', `--bignum', `--characters-as-bytes',
+ `--copyright', `--debug', `--dump-variables', `--exec',
+ `--field-separator', `--file', `--gen-pot', `--help',
+ `--include', `--lint', `--lint-old', `--load',
+ `--non-decimal-data', `--optimize', `--posix',
+ `--pretty-print', `--profile', `--re-interval', `--sandbox',
+ `--source', `--traditional', `--use-lc-numeric', and
+ `--version' long options (*note Options::).
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* Support for the following obsolete systems was removed from the
code and the documentation for 'gawk' version 4.0:
@@ -26041,6 +27342,12 @@ current version of 'gawk'.
- Ultrix
+<<<<<<< HEAD
+=======
+ * Support for MirBSD was removed at `gawk' version 4.2.
+
+
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac

File: gawk.info, Node: Feature History, Next: Common Extensions, Prev: POSIX/GNU, Up: Language History
@@ -26098,7 +27405,7 @@ POSIX 'awk', in the order they were added to 'gawk'.
* The ability to delete all of an array at once with 'delete ARRAY'
(*note Delete::).
- * Command line option changes (*note Options::):
+ * Command-line option changes (*note Options::):
- The ability to use GNU-style long-named options that start
with '--'.
@@ -26132,18 +27439,29 @@ POSIX 'awk', in the order they were added to 'gawk'.
* The 'next file' statement became 'nextfile' (*note Nextfile
Statement::).
+<<<<<<< HEAD
* The 'fflush()' function from Brian Kernighan's 'awk' (then at Bell
Laboratories; *note I/O Functions::).
+=======
+ * The `fflush()' function from BWK `awk' (then at Bell Laboratories;
+ *note I/O Functions::).
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
- * New command line options:
+ * New command-line options:
- The '--lint-old' option to warn about constructs that are not
available in the original Version 7 Unix version of 'awk'
(*note V7/SVR3.1::).
+<<<<<<< HEAD
- The '-m' option from Brian Kernighan's 'awk'. (He was still
at Bell Laboratories at the time.) This was later removed
from both his 'awk' and from 'gawk'.
+=======
+ - The `-m' option from BWK `awk'. (Brian was still at Bell
+ Laboratories at the time.) This was later removed from both
+ his `awk' and from `gawk'.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
- The '--re-interval' option to provide interval expressions in
regexps (*note Regexp Operators::).
@@ -26302,9 +27620,15 @@ POSIX 'awk', in the order they were added to 'gawk'.
* An optional third argument to 'asort()' and 'asorti()', specifying
how to sort (*note String Functions::).
+<<<<<<< HEAD
* The behavior of 'fflush()' changed to match Brian Kernighan's 'awk'
and for POSIX; now both 'fflush()' and 'fflush("")' flush all open
output redirections (*note I/O Functions::).
+=======
+ * The behavior of `fflush()' changed to match BWK `awk' and for
+ POSIX; now both `fflush()' and `fflush("")' flush all open output
+ redirections (*note I/O Functions::).
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* The 'isarray()' function which distinguishes if an item is an array
or not, to make it possible to traverse arrays of arrays (*note
@@ -26327,7 +27651,7 @@ POSIX 'awk', in the order they were added to 'gawk'.
* 'switch' / 'case' are enabled by default (*note Switch
Statement::).
- * Command line option changes (*note Options::):
+ * Command-line option changes (*note Options::):
- The '-b' and '--characters-as-bytes' options which prevent
'gawk' from treating input as a multibyte string.
@@ -26344,8 +27668,13 @@ POSIX 'awk', in the order they were added to 'gawk'.
in '#!' scripts.
* Directories named on the command line now produce a warning, not a
+<<<<<<< HEAD
fatal error, unless '--posix' or '--traditional' are used (*note
Command line directories::).
+=======
+ fatal error, unless `--posix' or `--traditional' are used (*note
+ Command-line directories::).
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* The 'gawk' internals were rewritten, bringing the 'dgawk' debugger
and possibly improved performance (*note Debugger::).
@@ -26393,11 +27722,16 @@ POSIX 'awk', in the order they were added to 'gawk'.
* Three new arrays: 'SYMTAB', 'FUNCTAB', and
'PROCINFO["identifiers"]' (*note Auto-set::).
+<<<<<<< HEAD
* The three executables 'gawk', 'pgawk', and 'dgawk', were merged
into one, named just 'gawk'. As a result the command line options
+=======
+ * The three executables `gawk', `pgawk', and `dgawk', were merged
+ into one, named just `gawk'. As a result the command-line options
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
changed.
- * Command line option changes (*note Options::):
+ * Command-line option changes (*note Options::):
- The '-D' option invokes the debugger.
@@ -27442,7 +28776,7 @@ These give the same result as the '-W BINMODE=2' option in 'mawk'. The
following changes the record separator to '"\r\n"' and sets binary mode
on reads, but does not affect the mode on standard input:
- gawk -v RS="\r\n" --source "BEGIN { BINMODE = 1 }" ...
+ gawk -v RS="\r\n" -e "BEGIN { BINMODE = 1 }" ...
or:
@@ -27909,6 +29243,7 @@ Unix 'awk'
The project seems to be frozen; no new code changes have been made
since approximately 2003.
+<<<<<<< HEAD
'pawk'
Nelson H.F. Beebe at the University of Utah has modified Brian
Kernighan's 'awk' to provide timing and profiling information. It
@@ -27917,6 +29252,16 @@ Unix 'awk'
profiling. You may find it at either
<ftp://ftp.math.utah.edu/pub/pawk/pawk-20030606.tar.gz> or
<http://www.math.utah.edu/pub/pawk/pawk-20030606.tar.gz>.
+=======
+`pawk'
+ Nelson H.F. Beebe at the University of Utah has modified BWK `awk'
+ to provide timing and profiling information. It is different from
+ `gawk' with the `--profile' option. (*note Profiling::), in that
+ it uses CPU-based profiling, not line-count profiling. You may
+ find it at either
+ `ftp://ftp.math.utah.edu/pub/pawk/pawk-20030606.tar.gz' or
+ `http://www.math.utah.edu/pub/pawk/pawk-20030606.tar.gz'.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
Busybox Awk
Busybox is a GPL-licensed program providing small versions of many
@@ -27955,11 +29300,19 @@ Libmawk
This is an embeddable 'awk' interpreter derived from 'mawk'. For
more information see <http://repo.hu/projects/libmawk/>.
+<<<<<<< HEAD
'pawk'
This is a Python module that claims to bring 'awk'-like features to
Python. See <https://github.com/alecthomas/pawk> for more
information. (This is not related to Nelson Beebe's modified
version of Brian Kernighan's 'awk', described earlier.)
+=======
+`pawk'
+ This is a Python module that claims to bring `awk'-like features
+ to Python. See `https://github.com/alecthomas/pawk' for more
+ information. (This is not related to Nelson Beebe's modified
+ version of BWK `awk', described earlier.)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
QSE Awk
This is an embeddable 'awk' interpreter. For more information see
@@ -28779,8 +30132,13 @@ C.7 Summary
option or with the '--posix' option. The '--parsedebug' option is
available if 'gawk' is compiled with '-DDEBUG'.
+<<<<<<< HEAD
* The source code for 'gawk' is maintained in a publicly accessable
Git repository. Anyone may check it out and view the source.
+=======
+ * The source code for `gawk' is maintained in a publicly accessible
+ Git repository. Anyone may check it out and view the source.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* Contributions to 'gawk' are welcome. Following the steps outlined
in this major node will make it easier to integrate your
@@ -30831,6 +32189,7 @@ Index
(line 6)
* '!' (exclamation point), '!~' operator <4>: Comparison Operators.
(line 11)
+<<<<<<< HEAD
* '!' (exclamation point), '!~' operator <5>: Comparison Operators.
(line 98)
* '!' (exclamation point), '!~' operator <6>: Precedence. (line 79)
@@ -30840,6 +32199,15 @@ Index
* '"' (double quote), in regexp constants: Computed Regexps. (line 29)
* '"' (double quote), in shell commands: Quoting. (line 54)
* '#' (number sign), '#!' (executable scripts): Executable Scripts.
+=======
+* ! (exclamation point), !~ operator <4>: Regexp Constants. (line 6)
+* ! (exclamation point), !~ operator <5>: Case-sensitivity. (line 26)
+* ! (exclamation point), !~ operator <6>: Computed Regexps. (line 6)
+* ! (exclamation point), !~ operator: Regexp Usage. (line 19)
+* " (double quote), in regexp constants: Computed Regexps. (line 29)
+* " (double quote), in shell commands: Quoting. (line 54)
+* # (number sign), #! (executable scripts): Executable Scripts.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 6)
* '#' (number sign), commenting: Comments. (line 6)
* '$' (dollar sign), '$' field operator: Fields. (line 19)
@@ -30854,6 +32222,7 @@ Index
* '&' (ampersand), '&&' operator <1>: Precedence. (line 85)
* '&' (ampersand), 'gsub()'/'gensub()'/'sub()' functions and: Gory Details.
(line 6)
+<<<<<<< HEAD
* ''' (single quote): One-shot. (line 15)
* ''' (single quote) in 'gawk' command lines: Long. (line 33)
* ''' (single quote), in shell commands: Quoting. (line 48)
@@ -30896,6 +32265,47 @@ Index
* '--copyright' option: Options. (line 89)
* '--debug' option: Options. (line 108)
* '--disable-extensions' configuration option: Additional Configuration Options.
+=======
+* ' (single quote): One-shot. (line 15)
+* ' (single quote) in gawk command lines: Long. (line 33)
+* ' (single quote), in shell commands: Quoting. (line 48)
+* ' (single quote), vs. apostrophe: Comments. (line 27)
+* ' (single quote), with double quotes: Quoting. (line 70)
+* () (parentheses), in a profile: Profiling. (line 146)
+* () (parentheses), regexp operator: Regexp Operators. (line 81)
+* * (asterisk), * operator, as multiplication operator: Precedence.
+ (line 55)
+* * (asterisk), * operator, as regexp operator: Regexp Operators.
+ (line 89)
+* * (asterisk), * operator, null strings, matching: Gory Details.
+ (line 143)
+* * (asterisk), ** operator <1>: Precedence. (line 49)
+* * (asterisk), ** operator: Arithmetic Ops. (line 81)
+* * (asterisk), **= operator <1>: Precedence. (line 95)
+* * (asterisk), **= operator: Assignment Ops. (line 130)
+* * (asterisk), *= operator <1>: Precedence. (line 95)
+* * (asterisk), *= operator: Assignment Ops. (line 130)
+* + (plus sign), + operator: Precedence. (line 52)
+* + (plus sign), ++ operator <1>: Precedence. (line 46)
+* + (plus sign), ++ operator: Increment Ops. (line 11)
+* + (plus sign), += operator <1>: Precedence. (line 95)
+* + (plus sign), += operator: Assignment Ops. (line 82)
+* + (plus sign), regexp operator: Regexp Operators. (line 105)
+* , (comma), in range patterns: Ranges. (line 6)
+* - (hyphen), - operator: Precedence. (line 52)
+* - (hyphen), -- operator <1>: Precedence. (line 46)
+* - (hyphen), -- operator: Increment Ops. (line 48)
+* - (hyphen), -= operator <1>: Precedence. (line 95)
+* - (hyphen), -= operator: Assignment Ops. (line 130)
+* - (hyphen), filenames beginning with: Options. (line 59)
+* - (hyphen), in bracket expressions: Bracket Expressions. (line 17)
+* --assign option: Options. (line 32)
+* --bignum option: Options. (line 205)
+* --characters-as-bytes option: Options. (line 68)
+* --copyright option: Options. (line 88)
+* --debug option: Options. (line 108)
+* --disable-extensions configuration option: Additional Configuration Options.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 9)
* '--disable-lint' configuration option: Additional Configuration Options.
(line 15)
@@ -30942,6 +32352,7 @@ Index
* '--version' option: Options. (line 295)
* '--with-whiny-user-strftime' configuration option: Additional Configuration Options.
(line 35)
+<<<<<<< HEAD
* '-b' option: Options. (line 69)
* '-c' option: Options. (line 82)
* '-C' option: Options. (line 89)
@@ -30954,6 +32365,21 @@ Index
* '-f' option <1>: Options. (line 25)
* '-F' option, '-Ft' sets 'FS' to TAB: Options. (line 303)
* '-F' option, command line: Command Line Field Separator.
+=======
+* -b option: Options. (line 68)
+* -C option: Options. (line 88)
+* -c option: Options. (line 81)
+* -D option: Options. (line 108)
+* -d option: Options. (line 93)
+* -e option: Options. (line 333)
+* -E option: Options. (line 125)
+* -e option: Options. (line 117)
+* -f option: Options. (line 25)
+* -F option: Options. (line 21)
+* -f option: Long. (line 12)
+* -F option, -Ft sets FS to TAB: Options. (line 306)
+* -F option, command-line: Command Line Field Separator.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 6)
* '-f' option, multiple uses: Options. (line 308)
* '-g' option: Options. (line 147)
@@ -31021,6 +32447,7 @@ Index
* '>' (right angle bracket), '>' operator (I/O): Redirection. (line 22)
* '>' (right angle bracket), '>=' operator: Comparison Operators.
(line 11)
+<<<<<<< HEAD
* '>' (right angle bracket), '>=' operator <1>: Precedence. (line 64)
* '>' (right angle bracket), '>>' operator (I/O): Redirection.
(line 50)
@@ -31054,6 +32481,72 @@ Index
* '\' (backslash), regexp operator: Regexp Operators. (line 18)
* '\' (backslash), '\"' escape sequence: Escape Sequences. (line 76)
* '\' (backslash), '\'' operator ('gawk'): GNU Regexp Operators.
+=======
+* > (right angle bracket), >> operator (I/O) <1>: Precedence. (line 65)
+* > (right angle bracket), >> operator (I/O): Redirection. (line 50)
+* ? (question mark), ?: operator: Precedence. (line 92)
+* ? (question mark), regexp operator <1>: GNU Regexp Operators.
+ (line 59)
+* ? (question mark), regexp operator: Regexp Operators. (line 111)
+* @-notation for indirect function calls: Indirect Calls. (line 47)
+* @include directive: Include Files. (line 8)
+* @load directive: Loading Shared Libraries.
+ (line 8)
+* [] (square brackets), regexp operator: Regexp Operators. (line 56)
+* \ (backslash): Comments. (line 50)
+* \ (backslash), \" escape sequence: Escape Sequences. (line 84)
+* \ (backslash), \' operator (gawk): GNU Regexp Operators.
+ (line 56)
+* \ (backslash), \/ escape sequence: Escape Sequences. (line 75)
+* \ (backslash), \< operator (gawk): GNU Regexp Operators.
+ (line 30)
+* \ (backslash), \> operator (gawk): GNU Regexp Operators.
+ (line 34)
+* \ (backslash), \` operator (gawk): GNU Regexp Operators.
+ (line 54)
+* \ (backslash), \a escape sequence: Escape Sequences. (line 34)
+* \ (backslash), \b escape sequence: Escape Sequences. (line 38)
+* \ (backslash), \B operator (gawk): GNU Regexp Operators.
+ (line 43)
+* \ (backslash), \f escape sequence: Escape Sequences. (line 41)
+* \ (backslash), \n escape sequence: Escape Sequences. (line 44)
+* \ (backslash), \NNN escape sequence: Escape Sequences. (line 56)
+* \ (backslash), \r escape sequence: Escape Sequences. (line 47)
+* \ (backslash), \S operator (gawk): GNU Regexp Operators.
+ (line 17)
+* \ (backslash), \s operator (gawk): GNU Regexp Operators.
+ (line 13)
+* \ (backslash), \t escape sequence: Escape Sequences. (line 50)
+* \ (backslash), \v escape sequence: Escape Sequences. (line 53)
+* \ (backslash), \W operator (gawk): GNU Regexp Operators.
+ (line 26)
+* \ (backslash), \w operator (gawk): GNU Regexp Operators.
+ (line 21)
+* \ (backslash), \x escape sequence: Escape Sequences. (line 61)
+* \ (backslash), \y operator (gawk): GNU Regexp Operators.
+ (line 38)
+* \ (backslash), as field separator: Command Line Field Separator.
+ (line 27)
+* \ (backslash), continuing lines and: Statements/Lines. (line 19)
+* \ (backslash), continuing lines and, comments and: Statements/Lines.
+ (line 76)
+* \ (backslash), continuing lines and, in csh: Statements/Lines.
+ (line 44)
+* \ (backslash), gsub()/gensub()/sub() functions and: Gory Details.
+ (line 6)
+* \ (backslash), in bracket expressions: Bracket Expressions. (line 17)
+* \ (backslash), in escape sequences: Escape Sequences. (line 6)
+* \ (backslash), in escape sequences, POSIX and: Escape Sequences.
+ (line 120)
+* \ (backslash), in regexp constants: Computed Regexps. (line 29)
+* \ (backslash), in shell commands: Quoting. (line 48)
+* \ (backslash), regexp operator: Regexp Operators. (line 18)
+* ^ (caret), ^ operator: Precedence. (line 49)
+* ^ (caret), ^= operator <1>: Precedence. (line 95)
+* ^ (caret), ^= operator: Assignment Ops. (line 130)
+* ^ (caret), in bracket expressions: Bracket Expressions. (line 17)
+* ^ (caret), in FS: Regexp Field Splitting.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 59)
* '\' (backslash), '\/' escape sequence: Escape Sequences. (line 69)
* '\' (backslash), '\<' operator ('gawk'): GNU Regexp Operators.
@@ -31141,7 +32634,7 @@ Index
* advanced features, network programming: TCP/IP Networking. (line 6)
* advanced features, nondecimal input data: Nondecimal Data. (line 6)
* advanced features, processes, communicating with: Two-way I/O.
- (line 23)
+ (line 6)
* advanced features, specifying field content: Splitting By Content.
(line 10)
* Aho, Alfred: History. (line 17)
@@ -31151,6 +32644,7 @@ Index
* algorithms: Basic High Level. (line 57)
* allocating memory for extensions: Memory Allocation Functions.
(line 6)
+<<<<<<< HEAD
* Alpha (DEC): Manual History. (line 28)
* amazing 'awk' assembler ('aaa'): Glossary. (line 11)
* amazingly workable formatter ('awf'): Glossary. (line 24)
@@ -31159,6 +32653,15 @@ Index
* ampersand ('&'), '&&' operator: Boolean Ops. (line 57)
* ampersand ('&'), '&&' operator <1>: Precedence. (line 85)
* ampersand ('&'), 'gsub()'/'gensub()'/'sub()' functions and: Gory Details.
+=======
+* amazing awk assembler (aaa): Glossary. (line 11)
+* amazingly workable formatter (awf): Glossary. (line 24)
+* ambiguity, syntactic: /= operator vs. /=.../ regexp constant: Assignment Ops.
+ (line 148)
+* ampersand (&), && operator <1>: Precedence. (line 86)
+* ampersand (&), && operator: Boolean Ops. (line 57)
+* ampersand (&), gsub()/gensub()/sub() functions and: Gory Details.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 6)
* 'anagram.awk' program: Anagram Program. (line 22)
* anagrams, finding: Anagram Program. (line 6)
@@ -31179,11 +32682,20 @@ Index
* 'ARGC'/'ARGV' variables: Auto-set. (line 15)
* 'ARGC'/'ARGV' variables, command-line arguments: Other Arguments.
(line 12)
+<<<<<<< HEAD
* 'ARGC'/'ARGV' variables, how to use: ARGC and ARGV. (line 6)
* 'ARGC'/'ARGV' variables, portability and: Executable Scripts.
(line 42)
* 'ARGIND' variable: Auto-set. (line 44)
* 'ARGIND' variable, command-line arguments: Other Arguments. (line 12)
+=======
+* ARGC/ARGV variables, how to use: ARGC and ARGV. (line 6)
+* ARGC/ARGV variables, portability and: Executable Scripts. (line 59)
+* ARGIND variable: Auto-set. (line 44)
+* ARGIND variable, command-line arguments: Other Arguments. (line 12)
+* arguments, command-line <1>: ARGC and ARGV. (line 6)
+* arguments, command-line <2>: Auto-set. (line 15)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* arguments, command-line: Other Arguments. (line 6)
* arguments, command-line <1>: Auto-set. (line 15)
* arguments, command-line <2>: ARGC and ARGV. (line 6)
@@ -31201,7 +32713,11 @@ Index
* arrays: Arrays. (line 6)
* arrays of arrays: Arrays of Arrays. (line 6)
* arrays, an example of using: Array Example. (line 6)
+<<<<<<< HEAD
* arrays, and 'IGNORECASE' variable: Array Intro. (line 91)
+=======
+* arrays, and IGNORECASE variable: Array Intro. (line 94)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* arrays, as parameters to functions: Pass By Value/Reference.
(line 46)
* arrays, associative: Array Intro. (line 48)
@@ -31229,7 +32745,11 @@ Index
(line 6)
* arrays, sorting, and 'IGNORECASE' variable: Array Sorting Functions.
(line 83)
+<<<<<<< HEAD
* arrays, sparse: Array Intro. (line 70)
+=======
+* arrays, sparse: Array Intro. (line 72)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* arrays, subscripts, uninitialized variables as: Uninitialized Subscripts.
(line 6)
* arrays, unassigned elements: Reference to Elements.
@@ -31257,6 +32777,7 @@ Index
* assignment operators, evaluation order: Assignment Ops. (line 110)
* assignment operators, lvalues/rvalues: Assignment Ops. (line 31)
* assignments as filenames: Ignoring Assigns. (line 6)
+<<<<<<< HEAD
* associative arrays: Array Intro. (line 48)
* asterisk ('*'), '*' operator, as multiplication operator: Precedence.
(line 54)
@@ -31323,12 +32844,77 @@ Index
* 'awk', versions of <1>: V7/SVR3.1. (line 6)
* 'awk', versions of, changes between SVR3.1 and SVR4: SVR4. (line 6)
* 'awk', versions of, changes between SVR4 and POSIX 'awk': POSIX.
+=======
+* associative arrays: Array Intro. (line 50)
+* asterisk (*), * operator, as multiplication operator: Precedence.
+ (line 55)
+* asterisk (*), * operator, as regexp operator: Regexp Operators.
+ (line 89)
+* asterisk (*), * operator, null strings, matching: Gory Details.
+ (line 143)
+* asterisk (*), ** operator <1>: Precedence. (line 49)
+* asterisk (*), ** operator: Arithmetic Ops. (line 81)
+* asterisk (*), **= operator <1>: Precedence. (line 95)
+* asterisk (*), **= operator: Assignment Ops. (line 130)
+* asterisk (*), *= operator <1>: Precedence. (line 95)
+* asterisk (*), *= operator: Assignment Ops. (line 130)
+* atan2: Numeric Functions. (line 11)
+* automatic displays, in debugger: Debugger Info. (line 24)
+* awf (amazingly workable formatter) program: Glossary. (line 24)
+* awk debugging, enabling: Options. (line 108)
+* awk language, POSIX version: Assignment Ops. (line 137)
+* awk profiling, enabling: Options. (line 240)
+* awk programs <1>: Two Rules. (line 6)
+* awk programs <2>: Executable Scripts. (line 6)
+* awk programs: Getting Started. (line 12)
+* awk programs, complex: When. (line 27)
+* awk programs, documenting <1>: Library Names. (line 6)
+* awk programs, documenting: Comments. (line 6)
+* awk programs, examples of: Sample Programs. (line 6)
+* awk programs, execution of: Next Statement. (line 16)
+* awk programs, internationalizing <1>: Programmer i18n. (line 6)
+* awk programs, internationalizing: I18N Functions. (line 6)
+* awk programs, lengthy: Long. (line 6)
+* awk programs, lengthy, assertions: Assert Function. (line 6)
+* awk programs, location of: Options. (line 25)
+* awk programs, one-line examples: Very Simple. (line 45)
+* awk programs, profiling: Profiling. (line 6)
+* awk programs, running <1>: Long. (line 6)
+* awk programs, running: Running gawk. (line 6)
+* awk programs, running, from shell scripts: One-shot. (line 22)
+* awk programs, running, without input files: Read Terminal. (line 17)
+* awk programs, shell variables in: Using Shell Variables.
+ (line 6)
+* awk, function of: Getting Started. (line 6)
+* awk, gawk and <1>: This Manual. (line 14)
+* awk, gawk and: Preface. (line 21)
+* awk, history of: History. (line 17)
+* awk, implementation issues, pipes: Redirection. (line 135)
+* awk, implementations: Other Versions. (line 6)
+* awk, implementations, limits: Getline Notes. (line 14)
+* awk, invoking: Command Line. (line 6)
+* awk, new vs. old: Names. (line 6)
+* awk, new vs. old, OFMT variable: Strings And Numbers. (line 57)
+* awk, POSIX and: Preface. (line 21)
+* awk, POSIX and, See Also POSIX awk: Preface. (line 21)
+* awk, regexp constants and: Comparison Operators.
+ (line 102)
+* awk, See Also gawk: Preface. (line 34)
+* awk, terms describing: This Manual. (line 6)
+* awk, uses for <1>: When. (line 6)
+* awk, uses for <2>: Getting Started. (line 12)
+* awk, uses for: Preface. (line 21)
+* awk, versions of: V7/SVR3.1. (line 6)
+* awk, versions of, changes between SVR3.1 and SVR4: SVR4. (line 6)
+* awk, versions of, changes between SVR4 and POSIX awk: POSIX.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 6)
* 'awk', versions of, changes between V7 and SVR3.1: V7/SVR3.1.
(line 6)
* 'awk', versions of, See Also Brian Kernighan's 'awk': BTL. (line 6)
* 'awk', versions of, See Also Brian Kernighan's 'awk' <1>: Other Versions.
(line 13)
+<<<<<<< HEAD
* 'awka' compiler for 'awk': Other Versions. (line 64)
* 'AWKLIBPATH' environment variable: AWKLIBPATH Variable. (line 6)
* 'AWKPATH' environment variable: AWKPATH Variable. (line 6)
@@ -31347,6 +32933,31 @@ Index
* backslash ('\'), continuing lines and, comments and: Statements/Lines.
(line 75)
* backslash ('\'), continuing lines and, in 'csh': Statements/Lines.
+=======
+* awk, versions of, See Also Brian Kernighan's awk: BTL. (line 6)
+* awka compiler for awk: Other Versions. (line 64)
+* AWKLIBPATH environment variable: AWKLIBPATH Variable. (line 6)
+* AWKPATH environment variable <1>: PC Using. (line 10)
+* AWKPATH environment variable: AWKPATH Variable. (line 6)
+* awkprof.out file: Profiling. (line 6)
+* awksed.awk program: Simple Sed. (line 25)
+* awkvars.out file: Options. (line 93)
+* b debugger command (alias for break): Breakpoint Control. (line 11)
+* backslash (\): Comments. (line 50)
+* backslash (\), \" escape sequence: Escape Sequences. (line 84)
+* backslash (\), \' operator (gawk): GNU Regexp Operators.
+ (line 56)
+* backslash (\), \/ escape sequence: Escape Sequences. (line 75)
+* backslash (\), \< operator (gawk): GNU Regexp Operators.
+ (line 30)
+* backslash (\), \> operator (gawk): GNU Regexp Operators.
+ (line 34)
+* backslash (\), \` operator (gawk): GNU Regexp Operators.
+ (line 54)
+* backslash (\), \a escape sequence: Escape Sequences. (line 34)
+* backslash (\), \b escape sequence: Escape Sequences. (line 38)
+* backslash (\), \B operator (gawk): GNU Regexp Operators.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 43)
* backslash ('\'), 'gsub()'/'gensub()'/'sub()' functions and: Gory Details.
(line 6)
@@ -31377,6 +32988,7 @@ Index
* backslash ('\'), '\r' escape sequence: Escape Sequences. (line 47)
* backslash ('\'), '\s' operator ('gawk'): GNU Regexp Operators.
(line 13)
+<<<<<<< HEAD
* backslash ('\'), '\S' operator ('gawk'): GNU Regexp Operators.
(line 17)
* backslash ('\'), '\t' escape sequence: Escape Sequences. (line 50)
@@ -31391,6 +33003,35 @@ Index
* backslash ('\'), '\`' operator ('gawk'): GNU Regexp Operators.
(line 57)
* 'backtrace' debugger command: Execution Stack. (line 13)
+=======
+* backslash (\), \t escape sequence: Escape Sequences. (line 50)
+* backslash (\), \v escape sequence: Escape Sequences. (line 53)
+* backslash (\), \W operator (gawk): GNU Regexp Operators.
+ (line 26)
+* backslash (\), \w operator (gawk): GNU Regexp Operators.
+ (line 21)
+* backslash (\), \x escape sequence: Escape Sequences. (line 61)
+* backslash (\), \y operator (gawk): GNU Regexp Operators.
+ (line 38)
+* backslash (\), as field separator: Command Line Field Separator.
+ (line 27)
+* backslash (\), continuing lines and: Statements/Lines. (line 19)
+* backslash (\), continuing lines and, comments and: Statements/Lines.
+ (line 76)
+* backslash (\), continuing lines and, in csh: Statements/Lines.
+ (line 44)
+* backslash (\), gsub()/gensub()/sub() functions and: Gory Details.
+ (line 6)
+* backslash (\), in bracket expressions: Bracket Expressions. (line 17)
+* backslash (\), in escape sequences: Escape Sequences. (line 6)
+* backslash (\), in escape sequences, POSIX and: Escape Sequences.
+ (line 120)
+* backslash (\), in regexp constants: Computed Regexps. (line 29)
+* backslash (\), in shell commands: Quoting. (line 48)
+* backslash (\), regexp operator: Regexp Operators. (line 18)
+* backtrace debugger command: Execution Stack. (line 13)
+* Beebe, Nelson H.F. <1>: Other Versions. (line 78)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* Beebe, Nelson H.F.: Acknowledgments. (line 60)
* Beebe, Nelson H.F. <1>: Other Versions. (line 78)
* 'BEGIN' pattern: Field Separators. (line 44)
@@ -31407,7 +33048,13 @@ Index
(line 36)
* 'BEGIN' pattern, 'next'/'nextfile' statements and <1>: Next Statement.
(line 45)
+<<<<<<< HEAD
* 'BEGIN' pattern, 'OFS'/'ORS' variables, assigning values to: Output Separators.
+=======
+* BEGIN pattern, next/nextfile statements and: I/O And BEGIN/END.
+ (line 36)
+* BEGIN pattern, OFS/ORS variables, assigning values to: Output Separators.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 20)
* 'BEGIN' pattern, operators and: Using BEGIN/END. (line 17)
* 'BEGIN' pattern, 'print' statement and: I/O And BEGIN/END. (line 15)
@@ -31453,8 +33100,9 @@ Index
* bracket expressions: Regexp Operators. (line 56)
* bracket expressions <1>: Bracket Expressions. (line 6)
* bracket expressions, character classes: Bracket Expressions.
- (line 30)
+ (line 32)
* bracket expressions, collating elements: Bracket Expressions.
+<<<<<<< HEAD
(line 76)
* bracket expressions, collating symbols: Bracket Expressions.
(line 83)
@@ -31462,6 +33110,15 @@ Index
* bracket expressions, equivalence classes: Bracket Expressions.
(line 89)
* bracket expressions, non-ASCII: Bracket Expressions. (line 76)
+=======
+ (line 79)
+* bracket expressions, collating symbols: Bracket Expressions.
+ (line 86)
+* bracket expressions, complemented: Regexp Operators. (line 64)
+* bracket expressions, equivalence classes: Bracket Expressions.
+ (line 92)
+* bracket expressions, non-ASCII: Bracket Expressions. (line 79)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* bracket expressions, range expressions: Bracket Expressions.
(line 6)
* 'break' debugger command: Breakpoint Control. (line 11)
@@ -31474,6 +33131,7 @@ Index
* breakpoint, delete by number: Breakpoint Control. (line 64)
* breakpoint, how to disable or enable: Breakpoint Control. (line 69)
* breakpoint, setting: Breakpoint Control. (line 11)
+<<<<<<< HEAD
* Brennan, Michael: Foreword. (line 83)
* Brennan, Michael <1>: Delete. (line 56)
* Brennan, Michael <2>: Simple Sed. (line 25)
@@ -31499,6 +33157,31 @@ Index
* Brian Kernighan's 'awk' <14>: I/O Functions. (line 43)
* Brian Kernighan's 'awk', extensions: BTL. (line 6)
* Brian Kernighan's 'awk', source code: Other Versions. (line 13)
+=======
+* Brennan, Michael <1>: Other Versions. (line 6)
+* Brennan, Michael <2>: Simple Sed. (line 25)
+* Brennan, Michael <3>: Delete. (line 56)
+* Brennan, Michael <4>: Acknowledgments. (line 76)
+* Brennan, Michael: Foreword. (line 83)
+* Brian Kernighan's awk <1>: I/O Functions. (line 43)
+* Brian Kernighan's awk <2>: Gory Details. (line 19)
+* Brian Kernighan's awk <3>: String Functions. (line 490)
+* Brian Kernighan's awk <4>: Delete. (line 48)
+* Brian Kernighan's awk <5>: Nextfile Statement. (line 47)
+* Brian Kernighan's awk <6>: Continue Statement. (line 44)
+* Brian Kernighan's awk <7>: Break Statement. (line 51)
+* Brian Kernighan's awk <8>: I/O And BEGIN/END. (line 16)
+* Brian Kernighan's awk <9>: Concatenation. (line 36)
+* Brian Kernighan's awk <10>: Getline/Pipe. (line 62)
+* Brian Kernighan's awk <11>: Regexp Field Splitting.
+ (line 67)
+* Brian Kernighan's awk <12>: GNU Regexp Operators.
+ (line 83)
+* Brian Kernighan's awk <13>: Escape Sequences. (line 124)
+* Brian Kernighan's awk: When. (line 21)
+* Brian Kernighan's awk, extensions: BTL. (line 6)
+* Brian Kernighan's awk, source code: Other Versions. (line 13)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* Brini, Davide: Signature Program. (line 6)
* Brink, Jeroen: DOS Quoting. (line 10)
* Broder, Alan J.: Contributors. (line 89)
@@ -31507,11 +33190,17 @@ Index
* 'bt' debugger command (alias for 'backtrace'): Execution Stack.
(line 13)
* Buening, Andreas: Acknowledgments. (line 60)
+<<<<<<< HEAD
* Buening, Andreas <1>: Contributors. (line 93)
* Buening, Andreas <2>: Bugs. (line 71)
* buffering, input/output: I/O Functions. (line 139)
* buffering, input/output <1>: Two-way I/O. (line 70)
* buffering, interactive vs. noninteractive: I/O Functions. (line 108)
+=======
+* buffering, input/output <1>: Two-way I/O. (line 52)
+* buffering, input/output: I/O Functions. (line 140)
+* buffering, interactive vs. noninteractive: I/O Functions. (line 109)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* buffers, flushing: I/O Functions. (line 32)
* buffers, flushing <1>: I/O Functions. (line 139)
* buffers, operators for: GNU Regexp Operators.
@@ -31525,6 +33214,7 @@ Index
* built-in variables, conveying information: Auto-set. (line 6)
* built-in variables, user-modifiable: User-modified. (line 6)
* Busybox Awk: Other Versions. (line 88)
+* c.e., See common extensions: Conventions. (line 51)
* call by reference: Pass By Value/Reference.
(line 46)
* call by value: Pass By Value/Reference.
@@ -31540,8 +33230,13 @@ Index
* 'case' keyword: Switch Statement. (line 6)
* case sensitivity, and regexps: User-modified. (line 76)
* case sensitivity, and string comparisons: User-modified. (line 76)
+<<<<<<< HEAD
* case sensitivity, array indices and: Array Intro. (line 91)
* case sensitivity, converting case: String Functions. (line 519)
+=======
+* case sensitivity, array indices and: Array Intro. (line 94)
+* case sensitivity, converting case: String Functions. (line 520)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* case sensitivity, example programs: Library Functions. (line 53)
* case sensitivity, 'gawk': Case-sensitivity. (line 26)
* case sensitivity, regexps and: Case-sensitivity. (line 6)
@@ -31578,12 +33273,22 @@ Index
(line 81)
* 'close()' function, return value: Close Files And Pipes.
(line 131)
+<<<<<<< HEAD
* 'close()' function, two-way pipes and: Two-way I/O. (line 77)
* Close, Diane: Manual History. (line 41)
* Close, Diane <1>: Contributors. (line 21)
* Collado, Manuel: Acknowledgments. (line 60)
* collating elements: Bracket Expressions. (line 76)
* collating symbols: Bracket Expressions. (line 83)
+=======
+* close() function, two-way pipes and: Two-way I/O. (line 59)
+* Close, Diane <1>: Contributors. (line 20)
+* Close, Diane: Manual History. (line 34)
+* Collado, Manuel: Acknowledgments. (line 60)
+* collating elements: Bracket Expressions. (line 79)
+* collating symbols: Bracket Expressions. (line 86)
+* Colombo, Antonio <1>: Contributors. (line 137)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* Colombo, Antonio: Acknowledgments. (line 60)
* Colombo, Antonio <1>: Contributors. (line 138)
* columns, aligning: Print Examples. (line 69)
@@ -31591,9 +33296,13 @@ Index
* comma (','), in range patterns: Ranges. (line 6)
* command completion, in debugger: Readline Support. (line 6)
* command line, arguments: Other Arguments. (line 6)
+<<<<<<< HEAD
* command line, arguments <1>: Auto-set. (line 15)
* command line, arguments <2>: ARGC and ARGV. (line 6)
* command line, directories on: Command line directories.
+=======
+* command line, directories on: Command-line directories.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 6)
* command line, formats: Running gawk. (line 12)
* command line, 'FS' on, setting: Command Line Field Separator.
@@ -31610,6 +33319,7 @@ Index
* commands to execute at breakpoint: Debugger Execution Control.
(line 10)
* commenting: Comments. (line 6)
+<<<<<<< HEAD
* commenting, backslash continuation and: Statements/Lines. (line 75)
* common extensions, '**' operator: Arithmetic Ops. (line 30)
* common extensions, '**=' operator: Assignment Ops. (line 138)
@@ -31621,6 +33331,19 @@ Index
(line 39)
* common extensions, 'func' keyword: Definition Syntax. (line 89)
* common extensions, 'length()' applied to an array: String Functions.
+=======
+* commenting, backslash continuation and: Statements/Lines. (line 76)
+* common extensions, ** operator: Arithmetic Ops. (line 30)
+* common extensions, **= operator: Assignment Ops. (line 137)
+* common extensions, /dev/stderr special file: Special FD. (line 46)
+* common extensions, /dev/stdin special file: Special FD. (line 46)
+* common extensions, /dev/stdout special file: Special FD. (line 46)
+* common extensions, \x escape sequence: Escape Sequences. (line 61)
+* common extensions, BINMODE variable: PC Using. (line 33)
+* common extensions, delete to delete entire arrays: Delete. (line 39)
+* common extensions, func keyword: Definition Syntax. (line 92)
+* common extensions, length() applied to an array: String Functions.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 197)
* common extensions, 'RS' as a regexp: gawk split records. (line 6)
* common extensions, single character fields: Single Character Fields.
@@ -31686,6 +33409,10 @@ Index
* 'CONVFMT' variable, and array subscripts: Numeric Array Subscripts.
(line 6)
* cookie: Glossary. (line 149)
+<<<<<<< HEAD
+=======
+* coprocesses <1>: Two-way I/O. (line 25)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* coprocesses: Redirection. (line 102)
* coprocesses <1>: Two-way I/O. (line 44)
* coprocesses, closing: Close Files And Pipes.
@@ -31694,11 +33421,18 @@ Index
* 'cos': Numeric Functions. (line 14)
* cosine: Numeric Functions. (line 14)
* counting: Wc Program. (line 6)
+<<<<<<< HEAD
* 'csh' utility: Statements/Lines. (line 43)
* 'csh' utility, 'POSIXLY_CORRECT' environment variable: Options.
(line 350)
* 'csh' utility, '|&' operator, comparison with: Two-way I/O. (line 44)
* 'ctime()' user-defined function: Function Example. (line 73)
+=======
+* csh utility: Statements/Lines. (line 44)
+* csh utility, POSIXLY_CORRECT environment variable: Options. (line 351)
+* csh utility, |& operator, comparison with: Two-way I/O. (line 25)
+* ctime() user-defined function: Function Example. (line 74)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* currency symbols, localization: Explaining gettext. (line 104)
* current system time: Time Functions. (line 65)
* 'custom.h' file: Configuration Philosophy.
@@ -31706,6 +33440,7 @@ Index
* customized input parser: Input Parsers. (line 6)
* customized output wrapper: Output Wrappers. (line 6)
* customized two-way processor: Two-way processors. (line 6)
+<<<<<<< HEAD
* 'cut' utility: Cut Program. (line 6)
* 'cut' utility <1>: Cut Program. (line 6)
* 'cut.awk' program: Cut Program. (line 45)
@@ -31717,12 +33452,26 @@ Index
* dark corner, '"0"' is actually true: Truth Values. (line 24)
* dark corner, '/=' operator vs. '/=.../' regexp constant: Assignment Ops.
(line 149)
+=======
+* cut utility: Cut Program. (line 6)
+* cut.awk program: Cut Program. (line 45)
+* d debugger command (alias for delete): Breakpoint Control. (line 64)
+* d.c., See dark corner: Conventions. (line 42)
+* dark corner <1>: Glossary. (line 188)
+* dark corner: Conventions. (line 42)
+* dark corner, "0" is actually true: Truth Values. (line 24)
+* dark corner, /= operator vs. /=.../ regexp constant: Assignment Ops.
+ (line 148)
+* dark corner, ^, in FS: Regexp Field Splitting.
+ (line 59)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* dark corner, array subscripts: Uninitialized Subscripts.
(line 43)
* dark corner, 'break' statement: Break Statement. (line 51)
* dark corner, 'close()' function: Close Files And Pipes.
(line 131)
* dark corner, command-line arguments: Assignment Options. (line 43)
+<<<<<<< HEAD
* dark corner, 'continue' statement: Continue Statement. (line 43)
* dark corner, 'CONVFMT' variable: Strings And Numbers. (line 39)
* dark corner, escape sequences: Other Arguments. (line 31)
@@ -31734,6 +33483,19 @@ Index
* dark corner, 'FILENAME' variable: Getline Notes. (line 19)
* dark corner, 'FILENAME' variable <1>: Auto-set. (line 98)
* dark corner, 'FNR'/'NR' variables: Auto-set. (line 309)
+=======
+* dark corner, continue statement: Continue Statement. (line 44)
+* dark corner, CONVFMT variable: Strings And Numbers. (line 40)
+* dark corner, escape sequences: Other Arguments. (line 35)
+* dark corner, escape sequences, for metacharacters: Escape Sequences.
+ (line 142)
+* dark corner, exit statement: Exit Statement. (line 30)
+* dark corner, field separators: Field Splitting Summary.
+ (line 46)
+* dark corner, FILENAME variable <1>: Auto-set. (line 98)
+* dark corner, FILENAME variable: Getline Notes. (line 19)
+* dark corner, FNR/NR variables: Auto-set. (line 313)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* dark corner, format-control characters: Control Letters. (line 18)
* dark corner, format-control characters <1>: Control Letters.
(line 93)
@@ -31766,23 +33528,42 @@ Index
* 'date' utility, POSIX: Time Functions. (line 252)
* dates, converting to timestamps: Time Functions. (line 75)
* dates, information related to, localization: Explaining gettext.
+<<<<<<< HEAD
(line 116)
* Davies, Stephen: Acknowledgments. (line 60)
* Davies, Stephen <1>: Contributors. (line 75)
* 'dcgettext': I18N Functions. (line 21)
* 'dcgettext' <1>: Programmer i18n. (line 19)
* 'dcgettext()' function ('gawk'), portability and: I18N Portability.
+=======
+ (line 112)
+* Davies, Stephen <1>: Contributors. (line 74)
+* Davies, Stephen: Acknowledgments. (line 60)
+* Day, Robert P.J.: Acknowledgments. (line 76)
+* dcgettext <1>: Programmer i18n. (line 19)
+* dcgettext: I18N Functions. (line 22)
+* dcgettext() function (gawk), portability and: I18N Portability.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 33)
* 'dcngettext': I18N Functions. (line 27)
* 'dcngettext' <1>: Programmer i18n. (line 36)
* 'dcngettext()' function ('gawk'), portability and: I18N Portability.
(line 33)
+<<<<<<< HEAD
* deadlocks: Two-way I/O. (line 70)
* debugger commands, 'b' ('break'): Breakpoint Control. (line 11)
* debugger commands, 'backtrace': Execution Stack. (line 13)
* debugger commands, 'break': Breakpoint Control. (line 11)
* debugger commands, 'bt' ('backtrace'): Execution Stack. (line 13)
* debugger commands, 'c' ('continue'): Debugger Execution Control.
+=======
+* deadlocks: Two-way I/O. (line 52)
+* debugger commands, b (break): Breakpoint Control. (line 11)
+* debugger commands, backtrace: Execution Stack. (line 13)
+* debugger commands, break: Breakpoint Control. (line 11)
+* debugger commands, bt (backtrace): Execution Stack. (line 13)
+* debugger commands, c (continue): Debugger Execution Control.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 33)
* debugger commands, 'clear': Breakpoint Control. (line 36)
* debugger commands, 'commands': Debugger Execution Control.
@@ -31900,12 +33681,20 @@ Index
* deleting entire arrays: Delete. (line 39)
* Demaille, Akim: Acknowledgments. (line 60)
* describe call stack frame, in debugger: Debugger Info. (line 27)
+<<<<<<< HEAD
* differences between 'gawk' and 'awk': String Functions. (line 197)
* differences in 'awk' and 'gawk', 'ARGC'/'ARGV' variables: ARGC and ARGV.
(line 88)
* differences in 'awk' and 'gawk', 'ARGIND' variable: Auto-set.
(line 44)
* differences in 'awk' and 'gawk', array elements, deleting: Delete.
+=======
+* differences between gawk and awk: String Functions. (line 197)
+* differences in awk and gawk, ARGC/ARGV variables: ARGC and ARGV.
+ (line 90)
+* differences in awk and gawk, ARGIND variable: Auto-set. (line 44)
+* differences in awk and gawk, array elements, deleting: Delete.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 39)
* differences in 'awk' and 'gawk', 'AWKLIBPATH' environment variable: AWKLIBPATH Variable.
(line 6)
@@ -31921,9 +33710,13 @@ Index
(line 33)
* differences in 'awk' and 'gawk', 'close()' function: Close Files And Pipes.
(line 81)
+<<<<<<< HEAD
* differences in 'awk' and 'gawk', 'close()' function <1>: Close Files And Pipes.
(line 131)
* differences in 'awk' and 'gawk', command line directories: Command line directories.
+=======
+* differences in awk and gawk, command-line directories: Command-line directories.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 6)
* differences in 'awk' and 'gawk', 'ERRNO' variable: Auto-set.
(line 82)
@@ -31959,6 +33752,7 @@ Index
(line 260)
* differences in 'awk' and 'gawk', 'print'/'printf' statements: Format Modifiers.
(line 13)
+<<<<<<< HEAD
* differences in 'awk' and 'gawk', 'PROCINFO' array: Auto-set.
(line 136)
* differences in 'awk' and 'gawk', read timeouts: Read Timeout.
@@ -31966,11 +33760,19 @@ Index
* differences in 'awk' and 'gawk', record separators: awk split records.
(line 123)
* differences in 'awk' and 'gawk', regexp constants: Using Constant Regexps.
+=======
+* differences in awk and gawk, PROCINFO array: Auto-set. (line 137)
+* differences in awk and gawk, read timeouts: Read Timeout. (line 6)
+* differences in awk and gawk, record separators: awk split records.
+ (line 124)
+* differences in awk and gawk, regexp constants: Using Constant Regexps.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 43)
* differences in 'awk' and 'gawk', regular expressions: Case-sensitivity.
(line 26)
* differences in 'awk' and 'gawk', 'RS'/'RT' variables: gawk split records.
(line 58)
+<<<<<<< HEAD
* differences in 'awk' and 'gawk', 'RT' variable: Auto-set. (line 265)
* differences in 'awk' and 'gawk', single-character fields: Single Character Fields.
(line 6)
@@ -31982,12 +33784,24 @@ Index
* differences in 'awk' and 'gawk', 'SYMTAB' variable: Auto-set.
(line 269)
* differences in 'awk' and 'gawk', 'TEXTDOMAIN' variable: User-modified.
+=======
+* differences in awk and gawk, RT variable: Auto-set. (line 269)
+* differences in awk and gawk, single-character fields: Single Character Fields.
+ (line 6)
+* differences in awk and gawk, split() function: String Functions.
+ (line 347)
+* differences in awk and gawk, strings: Scalar Constants. (line 20)
+* differences in awk and gawk, strings, storing: gawk split records.
+ (line 77)
+* differences in awk and gawk, SYMTAB variable: Auto-set. (line 273)
+* differences in awk and gawk, TEXTDOMAIN variable: User-modified.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 152)
* differences in 'awk' and 'gawk', trunc-mod operation: Arithmetic Ops.
(line 66)
-* directories, command line: Command line directories.
+* directories, command-line: Command-line directories.
(line 6)
-* directories, searching: Programs Exercises. (line 63)
+* directories, searching: Programs Exercises. (line 70)
* directories, searching for loadable extensions: AWKLIBPATH Variable.
(line 6)
* directories, searching for source files: AWKPATH Variable. (line 6)
@@ -32008,11 +33822,18 @@ Index
* dollar sign ('$'), '$' field operator <1>: Precedence. (line 42)
* dollar sign ('$'), incrementing fields and arrays: Increment Ops.
(line 30)
+<<<<<<< HEAD
* dollar sign ('$'), regexp operator: Regexp Operators. (line 35)
* double quote ('"') in shell commands: Read Terminal. (line 25)
* double quote ('"'), in regexp constants: Computed Regexps. (line 29)
* double quote ('"'), in shell commands: Quoting. (line 54)
* 'down' debugger command: Execution Stack. (line 20)
+=======
+* dollar sign ($), regexp operator: Regexp Operators. (line 35)
+* double quote ("), in regexp constants: Computed Regexps. (line 29)
+* double quote ("), in shell commands: Quoting. (line 54)
+* down debugger command: Execution Stack. (line 21)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* Drepper, Ulrich: Acknowledgments. (line 52)
* dump all variables of a program: Options. (line 94)
* 'dump' debugger command: Miscellaneous Debugger Commands.
@@ -32023,11 +33844,19 @@ Index
* 'e' debugger command (alias for 'enable'): Breakpoint Control.
(line 73)
* EBCDIC: Ordinal Functions. (line 45)
+<<<<<<< HEAD
* effective group ID of 'gawk' user: Auto-set. (line 141)
* effective user ID of 'gawk' user: Auto-set. (line 145)
* 'egrep' utility: Bracket Expressions. (line 24)
* 'egrep' utility <1>: Egrep Program. (line 6)
* 'egrep.awk' program: Egrep Program. (line 53)
+=======
+* effective group ID of gawk user: Auto-set. (line 142)
+* effective user ID of gawk user: Auto-set. (line 146)
+* egrep utility <1>: Egrep Program. (line 6)
+* egrep utility: Bracket Expressions. (line 26)
+* egrep.awk program: Egrep Program. (line 54)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* elements in arrays, assigning values: Assigning Elements. (line 6)
* elements in arrays, deleting: Delete. (line 6)
* elements in arrays, order of access by 'in' operator: Scanning an Array.
@@ -32052,6 +33881,7 @@ Index
* 'END' pattern, and profiling: Profiling. (line 62)
* 'END' pattern, 'assert()' user-defined function and: Assert Function.
(line 75)
+<<<<<<< HEAD
* 'END' pattern, backslash continuation and: Egrep Program. (line 222)
* 'END' pattern, Boolean patterns and: Expression Patterns. (line 70)
* 'END' pattern, 'exit' statement and: Exit Statement. (line 12)
@@ -32069,6 +33899,23 @@ Index
* 'endgrent()' user-defined function: Group Functions. (line 216)
* 'endpwent()' function (C library): Passwd Functions. (line 210)
* 'endpwent()' user-defined function: Passwd Functions. (line 213)
+=======
+* END pattern, Boolean patterns and: Expression Patterns. (line 70)
+* END pattern, exit statement and: Exit Statement. (line 12)
+* END pattern, next/nextfile statements and <1>: Next Statement.
+ (line 45)
+* END pattern, next/nextfile statements and: I/O And BEGIN/END.
+ (line 36)
+* END pattern, operators and: Using BEGIN/END. (line 17)
+* END pattern, print statement and: I/O And BEGIN/END. (line 16)
+* ENDFILE pattern: BEGINFILE/ENDFILE. (line 6)
+* ENDFILE pattern, Boolean patterns and: Expression Patterns. (line 70)
+* endfile() user-defined function: Filetrans Function. (line 62)
+* endgrent() function (C library): Group Functions. (line 212)
+* endgrent() user-defined function: Group Functions. (line 215)
+* endpwent() function (C library): Passwd Functions. (line 210)
+* endpwent() user-defined function: Passwd Functions. (line 213)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* English, Steve: Advanced Features. (line 6)
* 'ENVIRON' array: Auto-set. (line 59)
* environment variables used by 'gawk': Environment Variables.
@@ -32078,6 +33925,7 @@ Index
* equals sign ('='), '=' operator: Assignment Ops. (line 6)
* equals sign ('='), '==' operator: Comparison Operators.
(line 11)
+<<<<<<< HEAD
* equals sign ('='), '==' operator <1>: Precedence. (line 64)
* EREs (Extended Regular Expressions): Bracket Expressions. (line 24)
* 'ERRNO' variable: Auto-set. (line 82)
@@ -32085,6 +33933,13 @@ Index
* 'ERRNO' variable, with 'BEGINFILE' pattern: BEGINFILE/ENDFILE.
(line 26)
* 'ERRNO' variable, with 'close()' function: Close Files And Pipes.
+=======
+* EREs (Extended Regular Expressions): Bracket Expressions. (line 26)
+* ERRNO variable <1>: TCP/IP Networking. (line 54)
+* ERRNO variable: Auto-set. (line 82)
+* ERRNO variable, with BEGINFILE pattern: BEGINFILE/ENDFILE. (line 26)
+* ERRNO variable, with close() function: Close Files And Pipes.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 139)
* 'ERRNO' variable, with 'getline' command: Getline. (line 19)
* error handling: Special FD. (line 16)
@@ -32116,6 +33971,7 @@ Index
(line 6)
* exclamation point ('!'), '!~' operator <4>: Comparison Operators.
(line 11)
+<<<<<<< HEAD
* exclamation point ('!'), '!~' operator <5>: Comparison Operators.
(line 98)
* exclamation point ('!'), '!~' operator <6>: Precedence. (line 79)
@@ -32128,6 +33984,19 @@ Index
(line 99)
* 'exp': Numeric Functions. (line 32)
* 'expand' utility: Very Simple. (line 69)
+=======
+* exclamation point (!), !~ operator <4>: Regexp Constants. (line 6)
+* exclamation point (!), !~ operator <5>: Case-sensitivity. (line 26)
+* exclamation point (!), !~ operator <6>: Computed Regexps. (line 6)
+* exclamation point (!), !~ operator: Regexp Usage. (line 19)
+* exit statement: Exit Statement. (line 6)
+* exit status, of gawk: Exit Status. (line 6)
+* exit status, of VMS: VMS Running. (line 29)
+* exit the debugger: Miscellaneous Debugger Commands.
+ (line 99)
+* exp: Numeric Functions. (line 33)
+* expand utility: Very Simple. (line 72)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* Expat XML parser library: gawkextlib. (line 35)
* exponent: Numeric Functions. (line 32)
* expressions: Expressions. (line 6)
@@ -32140,14 +34009,14 @@ Index
* expressions, matching, See comparison expressions: Typing and Comparison.
(line 9)
* expressions, selecting: Conditional Exp. (line 6)
-* Extended Regular Expressions (EREs): Bracket Expressions. (line 24)
+* Extended Regular Expressions (EREs): Bracket Expressions. (line 26)
* extension API: Extension API Description.
(line 6)
* extension API informational variables: Extension API Informational Variables.
(line 6)
* extension API version: Extension Versioning.
(line 6)
-* extension API, version number: Auto-set. (line 232)
+* extension API, version number: Auto-set. (line 236)
* extension example: Extension Example. (line 6)
* extension registration: Registration Functions.
(line 6)
@@ -32155,6 +34024,7 @@ Index
* extensions distributed with 'gawk': Extension Samples. (line 6)
* extensions, allocating memory: Memory Allocation Functions.
(line 6)
+<<<<<<< HEAD
* extensions, Brian Kernighan's 'awk': BTL. (line 6)
* extensions, Brian Kernighan's 'awk' <1>: Common Extensions. (line 6)
* extensions, common, '**' operator: Arithmetic Ops. (line 30)
@@ -32168,13 +34038,35 @@ Index
* extensions, common, 'fflush()' function: I/O Functions. (line 43)
* extensions, common, 'func' keyword: Definition Syntax. (line 89)
* extensions, common, 'length()' applied to an array: String Functions.
+=======
+* extensions, Brian Kernighan's awk <1>: Common Extensions. (line 6)
+* extensions, Brian Kernighan's awk: BTL. (line 6)
+* extensions, common, ** operator: Arithmetic Ops. (line 30)
+* extensions, common, **= operator: Assignment Ops. (line 137)
+* extensions, common, /dev/stderr special file: Special FD. (line 46)
+* extensions, common, /dev/stdin special file: Special FD. (line 46)
+* extensions, common, /dev/stdout special file: Special FD. (line 46)
+* extensions, common, \x escape sequence: Escape Sequences. (line 61)
+* extensions, common, BINMODE variable: PC Using. (line 33)
+* extensions, common, delete to delete entire arrays: Delete. (line 39)
+* extensions, common, fflush() function: I/O Functions. (line 43)
+* extensions, common, func keyword: Definition Syntax. (line 92)
+* extensions, common, length() applied to an array: String Functions.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 197)
* extensions, common, 'RS' as a regexp: gawk split records. (line 6)
* extensions, common, single character fields: Single Character Fields.
(line 6)
+<<<<<<< HEAD
* extensions, common, '\x' escape sequence: Escape Sequences. (line 61)
* extensions, in 'gawk', not in POSIX 'awk': POSIX/GNU. (line 6)
* extensions, 'mawk': Common Extensions. (line 6)
+=======
+* extensions, in gawk, not in POSIX awk: POSIX/GNU. (line 6)
+* extensions, loading, @load directive: Loading Shared Libraries.
+ (line 8)
+* extensions, mawk: Common Extensions. (line 6)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* extensions, where to find: gawkextlib. (line 6)
* 'extract.awk' program: Extract Program. (line 79)
* extraction, of marked strings (internationalization): String Extraction.
@@ -32208,8 +34100,16 @@ Index
* field separators, regular expressions as: Field Separators. (line 50)
* field separators, regular expressions as <1>: Regexp Field Splitting.
(line 6)
+<<<<<<< HEAD
* field separators, See Also 'OFS': Changing Fields. (line 64)
* field separators, spaces as: Cut Program. (line 109)
+=======
+* field separators, regular expressions as: Field Separators. (line 51)
+* field separators, See Also OFS: Changing Fields. (line 64)
+* field separators, spaces as: Cut Program. (line 108)
+* fields <1>: Basic High Level. (line 73)
+* fields <2>: Fields. (line 6)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* fields: Reading Files. (line 14)
* fields <1>: Fields. (line 6)
* fields <2>: Basic High Level. (line 62)
@@ -32227,7 +34127,12 @@ Index
* 'FIELDWIDTHS' variable: Constant Size. (line 23)
* 'FIELDWIDTHS' variable <1>: User-modified. (line 37)
* file descriptors: Special FD. (line 6)
+<<<<<<< HEAD
* file names, distinguishing: Auto-set. (line 55)
+=======
+* file inclusion, @include directive: Include Files. (line 8)
+* file names, distinguishing: Auto-set. (line 56)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* file names, in compatibility mode: Special Caveats. (line 9)
* file names, standard streams in 'gawk': Special FD. (line 46)
* 'FILENAME' variable: Reading Files. (line 6)
@@ -32265,9 +34170,13 @@ Index
(line 63)
* files, message object, specifying directory of: Explaining gettext.
(line 54)
+<<<<<<< HEAD
* files, message object, specifying directory of <1>: Programmer i18n.
(line 47)
* files, multiple passes over: Other Arguments. (line 49)
+=======
+* files, multiple passes over: Other Arguments. (line 53)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* files, multiple, duplicating output into: Tee Program. (line 6)
* files, output, See output files: Close Files And Pipes.
(line 6)
@@ -32283,7 +34192,7 @@ Index
* files, reading, multiline records: Multiple Line. (line 6)
* files, searching for regular expressions: Egrep Program. (line 6)
* files, skipping: File Checking. (line 6)
-* files, source, search path for: Programs Exercises. (line 63)
+* files, source, search path for: Programs Exercises. (line 70)
* files, splitting: Split Program. (line 6)
* files, Texinfo, extracting programs from: Extract Program. (line 6)
* find substring in string: String Functions. (line 155)
@@ -32300,12 +34209,21 @@ Index
* flush buffered output: I/O Functions. (line 28)
* 'fnmatch()' extension function: Extension Sample Fnmatch.
(line 12)
+<<<<<<< HEAD
* 'FNR' variable: Records. (line 6)
* 'FNR' variable <1>: Auto-set. (line 108)
* 'FNR' variable, changing: Auto-set. (line 309)
* 'for' statement: For Statement. (line 6)
* 'for' statement, looping over arrays: Scanning an Array. (line 20)
* 'fork()' extension function: Extension Sample Fork.
+=======
+* FNR variable <1>: Auto-set. (line 107)
+* FNR variable: Records. (line 6)
+* FNR variable, changing: Auto-set. (line 313)
+* for statement: For Statement. (line 6)
+* for statement, looping over arrays: Scanning an Array. (line 20)
+* fork() extension function: Extension Sample Fork.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 11)
* format specifiers: Basic Printf. (line 15)
* format specifiers, mixing regular with positional specifiers: Printf Ordering.
@@ -32357,6 +34275,7 @@ Index
* 'FUNCTAB' array: Auto-set. (line 123)
* function calls: Function Calls. (line 6)
* function calls, indirect: Indirect Calls. (line 6)
+* function calls, indirect, @-notation for: Indirect Calls. (line 47)
* function definition example: Function Example. (line 6)
* function pointers: Indirect Calls. (line 6)
* functions, arrays as parameters to: Pass By Value/Reference.
@@ -32364,7 +34283,7 @@ Index
* functions, built-in: Function Calls. (line 10)
* functions, built-in <1>: Functions. (line 6)
* functions, built-in, evaluation order: Calling Built-in. (line 30)
-* functions, defining: Definition Syntax. (line 6)
+* functions, defining: Definition Syntax. (line 9)
* functions, library: Library Functions. (line 6)
* functions, library, assertions: Assert Function. (line 6)
* functions, library, associative arrays and: Library Names. (line 57)
@@ -32387,9 +34306,15 @@ Index
* functions, library, rounding numbers: Round Function. (line 6)
* functions, library, user database, reading: Passwd Functions.
(line 6)
+<<<<<<< HEAD
* functions, names of: Arrays. (line 18)
* functions, names of <1>: Definition Syntax. (line 20)
* functions, recursive: Definition Syntax. (line 79)
+=======
+* functions, names of <1>: Definition Syntax. (line 23)
+* functions, names of: Arrays. (line 18)
+* functions, recursive: Definition Syntax. (line 82)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* functions, string-translation: I18N Functions. (line 6)
* functions, undefined: Pass By Value/Reference.
(line 70)
@@ -32399,6 +34324,7 @@ Index
* functions, user-defined, library of: Library Functions. (line 6)
* functions, user-defined, 'next'/'nextfile' statements and: Next Statement.
(line 45)
+<<<<<<< HEAD
* functions, user-defined, 'next'/'nextfile' statements and <1>: Nextfile Statement.
(line 46)
* G-d: Acknowledgments. (line 82)
@@ -32418,15 +34344,40 @@ Index
* 'gawk', comparison operators and: Comparison Operators.
(line 51)
* 'gawk', configuring: Configuration Philosophy.
+=======
+* G-d: Acknowledgments. (line 92)
+* Garfinkle, Scott: Contributors. (line 34)
+* gawk program, dynamic profiling: Profiling. (line 179)
+* gawk version: Auto-set. (line 211)
+* gawk, ARGIND variable in: Other Arguments. (line 12)
+* gawk, awk and <1>: This Manual. (line 14)
+* gawk, awk and: Preface. (line 21)
+* gawk, bitwise operations in: Bitwise Functions. (line 39)
+* gawk, break statement in: Break Statement. (line 51)
+* gawk, built-in variables and: Built-in Variables. (line 14)
+* gawk, character classes and: Bracket Expressions. (line 100)
+* gawk, coding style in: Adding Code. (line 39)
+* gawk, command-line options, and regular expressions: GNU Regexp Operators.
+ (line 70)
+* gawk, comparison operators and: Comparison Operators.
+ (line 50)
+* gawk, configuring: Configuration Philosophy.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 6)
* 'gawk', configuring, options: Additional Configuration Options.
(line 6)
+<<<<<<< HEAD
* 'gawk', 'continue' statement in: Continue Statement. (line 43)
* 'gawk', distribution: Distribution contents.
+=======
+* gawk, continue statement in: Continue Statement. (line 44)
+* gawk, distribution: Distribution contents.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 6)
* 'gawk', 'ERRNO' variable in: Getline. (line 19)
* 'gawk', 'ERRNO' variable in <1>: Close Files And Pipes.
(line 139)
+<<<<<<< HEAD
* 'gawk', 'ERRNO' variable in <2>: BEGINFILE/ENDFILE. (line 26)
* 'gawk', 'ERRNO' variable in <3>: Auto-set. (line 82)
* 'gawk', 'ERRNO' variable in <4>: TCP/IP Networking. (line 53)
@@ -32441,6 +34392,20 @@ Index
* 'gawk', format-control characters: Control Letters. (line 18)
* 'gawk', format-control characters <1>: Control Letters. (line 93)
* 'gawk', 'FPAT' variable in: Splitting By Content.
+=======
+* gawk, ERRNO variable in: Getline. (line 19)
+* gawk, escape sequences: Escape Sequences. (line 132)
+* gawk, extensions, disabling: Options. (line 252)
+* gawk, features, adding: Adding Code. (line 6)
+* gawk, features, advanced: Advanced Features. (line 6)
+* gawk, field separators and: User-modified. (line 71)
+* gawk, FIELDWIDTHS variable in <1>: User-modified. (line 37)
+* gawk, FIELDWIDTHS variable in: Constant Size. (line 23)
+* gawk, file names in: Special Files. (line 6)
+* gawk, format-control characters: Control Letters. (line 18)
+* gawk, FPAT variable in <1>: User-modified. (line 43)
+* gawk, FPAT variable in: Splitting By Content.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 27)
* 'gawk', 'FPAT' variable in <1>: User-modified. (line 43)
* 'gawk', 'FUNCTAB' array in: Auto-set. (line 123)
@@ -32452,10 +34417,20 @@ Index
* 'gawk', 'IGNORECASE' variable in <3>: String Functions. (line 58)
* 'gawk', 'IGNORECASE' variable in <4>: Array Sorting Functions.
(line 83)
+<<<<<<< HEAD
* 'gawk', implementation issues: Notes. (line 6)
* 'gawk', implementation issues, debugging: Compatibility Mode.
(line 6)
* 'gawk', implementation issues, downward compatibility: Compatibility Mode.
+=======
+* gawk, IGNORECASE variable in <2>: String Functions. (line 58)
+* gawk, IGNORECASE variable in <3>: Array Intro. (line 94)
+* gawk, IGNORECASE variable in <4>: User-modified. (line 76)
+* gawk, IGNORECASE variable in: Case-sensitivity. (line 26)
+* gawk, implementation issues: Notes. (line 6)
+* gawk, implementation issues, debugging: Compatibility Mode. (line 6)
+* gawk, implementation issues, downward compatibility: Compatibility Mode.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 6)
* 'gawk', implementation issues, limits: Getline Notes. (line 14)
* 'gawk', implementation issues, pipes: Redirection. (line 134)
@@ -32464,6 +34439,7 @@ Index
(line 13)
* 'gawk', interpreter, adding code to: Using Internal File Ops.
(line 6)
+<<<<<<< HEAD
* 'gawk', interval expressions and: Regexp Operators. (line 140)
* 'gawk', line continuation in: Conditional Exp. (line 34)
* 'gawk', 'LINT' variable in: User-modified. (line 88)
@@ -32478,11 +34454,27 @@ Index
* 'gawk', 'PROCINFO' array in <2>: Time Functions. (line 47)
* 'gawk', 'PROCINFO' array in <3>: Two-way I/O. (line 116)
* 'gawk', regexp constants and: Using Constant Regexps.
+=======
+* gawk, interval expressions and: Regexp Operators. (line 139)
+* gawk, line continuation in: Conditional Exp. (line 34)
+* gawk, LINT variable in: User-modified. (line 88)
+* gawk, list of contributors to: Contributors. (line 6)
+* gawk, MS-DOS version of: PC Using. (line 10)
+* gawk, MS-Windows version of: PC Using. (line 10)
+* gawk, newlines in: Statements/Lines. (line 12)
+* gawk, octal numbers and: Nondecimal-numbers. (line 42)
+* gawk, OS/2 version of: PC Using. (line 16)
+* gawk, PROCINFO array in <1>: Two-way I/O. (line 99)
+* gawk, PROCINFO array in <2>: Time Functions. (line 47)
+* gawk, PROCINFO array in: Auto-set. (line 137)
+* gawk, regexp constants and: Using Constant Regexps.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 28)
* 'gawk', regular expressions, case sensitivity: Case-sensitivity.
(line 26)
* 'gawk', regular expressions, operators: GNU Regexp Operators.
(line 6)
+<<<<<<< HEAD
* 'gawk', regular expressions, precedence: Regexp Operators. (line 162)
* 'gawk', 'RT' variable in: awk split records. (line 123)
* 'gawk', 'RT' variable in <1>: Multiple Line. (line 129)
@@ -32502,10 +34494,32 @@ Index
* 'gawkextlib': gawkextlib. (line 6)
* 'gawkextlib' project: gawkextlib. (line 6)
* General Public License (GPL): Glossary. (line 303)
+=======
+* gawk, regular expressions, precedence: Regexp Operators. (line 161)
+* gawk, RT variable in <1>: Auto-set. (line 269)
+* gawk, RT variable in <2>: Multiple Line. (line 129)
+* gawk, RT variable in: awk split records. (line 124)
+* gawk, See Also awk: Preface. (line 34)
+* gawk, source code, obtaining: Getting. (line 6)
+* gawk, splitting fields and: Constant Size. (line 88)
+* gawk, string-translation functions: I18N Functions. (line 6)
+* gawk, SYMTAB array in: Auto-set. (line 273)
+* gawk, TEXTDOMAIN variable in: User-modified. (line 152)
+* gawk, timestamps: Time Functions. (line 6)
+* gawk, uses for: Preface. (line 34)
+* gawk, versions of, information about, printing: Options. (line 298)
+* gawk, VMS version of: VMS Installation. (line 6)
+* gawk, word-boundary operator: GNU Regexp Operators.
+ (line 63)
+* gawkextlib: gawkextlib. (line 6)
+* gawkextlib project: gawkextlib. (line 6)
+* General Public License (GPL): Glossary. (line 305)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* General Public License, See GPL: Manual History. (line 11)
* generate time values: Time Functions. (line 25)
* 'gensub': Using Constant Regexps.
(line 43)
+<<<<<<< HEAD
* 'gensub' <1>: String Functions. (line 89)
* 'gensub()' function ('gawk'), escape processing: Gory Details.
(line 6)
@@ -32522,9 +34536,27 @@ Index
* 'getgruser()' function, user-defined: Group Functions. (line 196)
* 'getline' command: Reading Files. (line 20)
* 'getline' command, coprocesses, using from: Getline/Coprocess.
+=======
+* gensub() function (gawk), escape processing: Gory Details. (line 6)
+* getaddrinfo() function (C library): TCP/IP Networking. (line 38)
+* getgrent() function (C library): Group Functions. (line 6)
+* getgrent() user-defined function: Group Functions. (line 6)
+* getgrgid() function (C library): Group Functions. (line 183)
+* getgrgid() user-defined function: Group Functions. (line 186)
+* getgrnam() function (C library): Group Functions. (line 172)
+* getgrnam() user-defined function: Group Functions. (line 177)
+* getgruser() function (C library): Group Functions. (line 192)
+* getgruser() function, user-defined: Group Functions. (line 195)
+* getline command: Reading Files. (line 20)
+* getline command, _gr_init() user-defined function: Group Functions.
+ (line 83)
+* getline command, _pw_init() function: Passwd Functions. (line 154)
+* getline command, coprocesses, using from <1>: Close Files And Pipes.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 6)
* 'getline' command, coprocesses, using from <1>: Close Files And Pipes.
(line 6)
+<<<<<<< HEAD
* 'getline' command, deadlock and: Two-way I/O. (line 70)
* 'getline' command, explicit input with: Getline. (line 6)
* 'getline' command, 'FILENAME' variable and: Getline Notes. (line 19)
@@ -32536,6 +34568,16 @@ Index
* 'getline' from a file: Getline/File. (line 6)
* 'getline' into a variable: Getline/Variable. (line 6)
* 'getline' statement, 'BEGINFILE'/'ENDFILE' patterns and: BEGINFILE/ENDFILE.
+=======
+* getline command, deadlock and: Two-way I/O. (line 52)
+* getline command, explicit input with: Getline. (line 6)
+* getline command, FILENAME variable and: Getline Notes. (line 19)
+* getline command, return values: Getline. (line 19)
+* getline command, variants: Getline Summary. (line 6)
+* getline from a file: Getline/File. (line 6)
+* getline into a variable: Getline/Variable. (line 6)
+* getline statement, BEGINFILE/ENDFILE patterns and: BEGINFILE/ENDFILE.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 54)
* 'getlocaltime()' user-defined function: Getlocaltime Function.
(line 16)
@@ -32562,7 +34604,11 @@ Index
* 'git' utility <3>: Adding Code. (line 112)
* Git, use of for 'gawk' source code: Derived Files. (line 6)
* GNITS mailing list: Acknowledgments. (line 52)
+<<<<<<< HEAD
* GNU 'awk', See 'gawk': Preface. (line 53)
+=======
+* GNU awk, See gawk: Preface. (line 51)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* GNU Free Documentation License: GNU Free Documentation License.
(line 8)
* GNU General Public License: Glossary. (line 303)
@@ -32583,7 +34629,11 @@ Index
* Grigera, Juan: Contributors. (line 58)
* group database, reading: Group Functions. (line 6)
* group file: Group Functions. (line 6)
+<<<<<<< HEAD
* group ID of 'gawk' user: Auto-set. (line 180)
+=======
+* group ID of gawk user: Auto-set. (line 184)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* groups, information about: Group Functions. (line 6)
* 'gsub': Using Constant Regexps.
(line 43)
@@ -32622,10 +34672,17 @@ Index
* 'if' statement, use of regexps in: Regexp Usage. (line 19)
* 'igawk.sh' program: Igawk Program. (line 124)
* ignore breakpoint: Breakpoint Control. (line 87)
+<<<<<<< HEAD
* 'ignore' debugger command: Breakpoint Control. (line 87)
* 'IGNORECASE' variable: User-modified. (line 76)
* 'IGNORECASE' variable, and array indices: Array Intro. (line 91)
* 'IGNORECASE' variable, and array sorting functions: Array Sorting Functions.
+=======
+* ignore debugger command: Breakpoint Control. (line 87)
+* IGNORECASE variable: User-modified. (line 76)
+* IGNORECASE variable, and array indices: Array Intro. (line 94)
+* IGNORECASE variable, and array sorting functions: Array Sorting Functions.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 83)
* 'IGNORECASE' variable, in example programs: Library Functions.
(line 53)
@@ -32640,6 +34697,7 @@ Index
* implementation issues, 'gawk', limits <1>: Redirection. (line 134)
* 'in' operator: Comparison Operators.
(line 11)
+<<<<<<< HEAD
* 'in' operator <1>: Precedence. (line 82)
* 'in' operator <2>: For Statement. (line 75)
* 'in' operator, index existence in multidimensional arrays: Multidimensional.
@@ -32648,10 +34706,20 @@ Index
* 'in' operator, testing if array element exists: Reference to Elements.
(line 37)
* 'in' operator, use in loops: Scanning an Array. (line 17)
+=======
+* in operator, index existence in multidimensional arrays: Multidimensional.
+ (line 43)
+* in operator, order of array access: Scanning an Array. (line 48)
+* in operator, testing if array element exists: Reference to Elements.
+ (line 38)
+* in operator, use in loops: Scanning an Array. (line 17)
+* including files, @include directive: Include Files. (line 8)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* increment operators: Increment Ops. (line 6)
* 'index': String Functions. (line 155)
* indexing arrays: Array Intro. (line 48)
* indirect function calls: Indirect Calls. (line 6)
+* indirect function calls, @-notation: Indirect Calls. (line 47)
* infinite precision: Arbitrary Precision Arithmetic.
(line 6)
* 'info' debugger command: Debugger Info. (line 13)
@@ -32664,10 +34732,16 @@ Index
* input files, counting elements in: Wc Program. (line 6)
* input files, examples: Sample Data Files. (line 6)
* input files, reading: Reading Files. (line 6)
+<<<<<<< HEAD
* input files, running 'awk' without: Read Terminal. (line 6)
* input files, running 'awk' without <1>: Read Terminal. (line 17)
* input files, variable assignments and: Other Arguments. (line 19)
* input pipeline: Getline/Pipe. (line 10)
+=======
+* input files, running awk without: Read Terminal. (line 6)
+* input files, variable assignments and: Other Arguments. (line 23)
+* input pipeline: Getline/Pipe. (line 9)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* input record, length of: String Functions. (line 174)
* input redirection: Getline/File. (line 6)
* input, data, nondecimal: Nondecimal Data. (line 6)
@@ -32679,8 +34753,13 @@ Index
* input, standard <1>: Special FD. (line 6)
* input/output functions: I/O Functions. (line 6)
* input/output, binary: User-modified. (line 15)
+<<<<<<< HEAD
* input/output, from 'BEGIN' and 'END': I/O And BEGIN/END. (line 6)
* input/output, two-way: Two-way I/O. (line 44)
+=======
+* input/output, from BEGIN and END: I/O And BEGIN/END. (line 6)
+* input/output, two-way: Two-way I/O. (line 25)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* insomnia, cure for: Alarm Program. (line 6)
* installation, VMS: VMS Installation. (line 6)
* installing 'gawk': Installation. (line 6)
@@ -32699,8 +34778,13 @@ Index
* internationalization, localization <1>: Internationalization.
(line 13)
* internationalization, localization, character classes: Bracket Expressions.
+<<<<<<< HEAD
(line 97)
* internationalization, localization, 'gawk' and: Internationalization.
+=======
+ (line 100)
+* internationalization, localization, gawk and: Internationalization.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 13)
* internationalization, localization, locale categories: Explaining gettext.
(line 81)
@@ -32709,6 +34793,7 @@ Index
* internationalization, localization, portability and: I18N Portability.
(line 6)
* internationalizing a program: Explaining gettext. (line 6)
+<<<<<<< HEAD
* interpreted programs: Basic High Level. (line 13)
* interpreted programs <1>: Glossary. (line 352)
* interval expressions, regexp operator: Regexp Operators. (line 117)
@@ -32716,6 +34801,15 @@ Index
* invoke shell command: I/O Functions. (line 76)
* 'isarray': Type Functions. (line 11)
* ISO: Glossary. (line 363)
+=======
+* interpreted programs <1>: Glossary. (line 356)
+* interpreted programs: Basic High Level. (line 15)
+* interval expressions, regexp operator: Regexp Operators. (line 116)
+* inventory-shipped file: Sample Data Files. (line 32)
+* invoke shell command: I/O Functions. (line 75)
+* isarray: Type Functions. (line 11)
+* ISO: Glossary. (line 367)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* ISO 8859-1: Glossary. (line 133)
* ISO Latin-1: Glossary. (line 133)
* Jacobs, Andrew: Passwd Functions. (line 90)
@@ -32731,6 +34825,19 @@ Index
* Kahrs, Ju"rgen <1>: Contributors. (line 71)
* Kasal, Stepan: Acknowledgments. (line 60)
* Kenobi, Obi-Wan: Undocumented. (line 6)
+<<<<<<< HEAD
+=======
+* Kernighan, Brian <1>: Glossary. (line 143)
+* Kernighan, Brian <2>: Basic Data Typing. (line 54)
+* Kernighan, Brian <3>: Other Versions. (line 13)
+* Kernighan, Brian <4>: Contributors. (line 11)
+* Kernighan, Brian <5>: BTL. (line 6)
+* Kernighan, Brian <6>: Library Functions. (line 12)
+* Kernighan, Brian <7>: Concatenation. (line 6)
+* Kernighan, Brian <8>: Getline/Pipe. (line 6)
+* Kernighan, Brian <9>: Acknowledgments. (line 76)
+* Kernighan, Brian <10>: Conventions. (line 38)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* Kernighan, Brian: History. (line 17)
* Kernighan, Brian <1>: Conventions. (line 34)
* Kernighan, Brian <2>: Acknowledgments. (line 76)
@@ -32749,6 +34856,7 @@ Index
(line 72)
* 'labels.awk' program: Labels Program. (line 51)
* Langston, Peter: Advanced Features. (line 6)
+<<<<<<< HEAD
* languages, data-driven: Basic High Level. (line 74)
* 'LC_ALL' locale category: Explaining gettext. (line 121)
* 'LC_COLLATE' locale category: Explaining gettext. (line 94)
@@ -32761,6 +34869,20 @@ Index
* 'LC_RESPONSE' locale category: Explaining gettext. (line 112)
* 'LC_TIME' locale category: Explaining gettext. (line 116)
* left angle bracket ('<'), '<' operator: Comparison Operators.
+=======
+* languages, data-driven: Basic High Level. (line 85)
+* LC_ALL locale category: Explaining gettext. (line 117)
+* LC_COLLATE locale category: Explaining gettext. (line 94)
+* LC_CTYPE locale category: Explaining gettext. (line 98)
+* LC_MESSAGES locale category: Explaining gettext. (line 88)
+* LC_MESSAGES locale category, bindtextdomain() function (gawk): Programmer i18n.
+ (line 99)
+* LC_MONETARY locale category: Explaining gettext. (line 104)
+* LC_NUMERIC locale category: Explaining gettext. (line 108)
+* LC_TIME locale category: Explaining gettext. (line 112)
+* left angle bracket (<), < operator <1>: Precedence. (line 65)
+* left angle bracket (<), < operator: Comparison Operators.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 11)
* left angle bracket ('<'), '<' operator <1>: Precedence. (line 64)
* left angle bracket ('<'), '<' operator (I/O): Getline/File. (line 6)
@@ -32813,9 +34935,15 @@ Index
* lint checking, array subscripts: Uninitialized Subscripts.
(line 43)
* lint checking, empty programs: Command Line. (line 16)
+<<<<<<< HEAD
* lint checking, issuing warnings: Options. (line 184)
* lint checking, 'POSIXLY_CORRECT' environment variable: Options.
(line 335)
+=======
+* lint checking, issuing warnings: Options. (line 185)
+* lint checking, POSIXLY_CORRECT environment variable: Options.
+ (line 336)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* lint checking, undefined functions: Pass By Value/Reference.
(line 87)
* 'LINT' variable: User-modified. (line 88)
@@ -32826,7 +34954,13 @@ Index
* 'list' debugger command: Miscellaneous Debugger Commands.
(line 72)
* list function definitions, in debugger: Debugger Info. (line 30)
+<<<<<<< HEAD
* loading, extensions: Options. (line 172)
+=======
+* loading extensions, @load directive: Loading Shared Libraries.
+ (line 8)
+* loading, extensions: Options. (line 173)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* local variables, in a function: Variable Scope. (line 6)
* locale categories: Explaining gettext. (line 81)
* locale decimal point character: Options. (line 265)
@@ -32871,6 +35005,7 @@ Index
* matching, expressions, See comparison expressions: Typing and Comparison.
(line 9)
* matching, leftmost longest: Multiple Line. (line 26)
+<<<<<<< HEAD
* matching, null strings: Gory Details. (line 164)
* 'mawk' utility: Escape Sequences. (line 123)
* 'mawk' utility <1>: Getline/Pipe. (line 62)
@@ -32878,6 +35013,15 @@ Index
* 'mawk' utility <3>: Nextfile Statement. (line 46)
* 'mawk' utility <4>: Other Versions. (line 44)
* maximum precision supported by MPFR library: Auto-set. (line 221)
+=======
+* matching, null strings: Gory Details. (line 143)
+* mawk utility <1>: Other Versions. (line 44)
+* mawk utility <2>: Nextfile Statement. (line 47)
+* mawk utility <3>: Concatenation. (line 36)
+* mawk utility <4>: Getline/Pipe. (line 62)
+* mawk utility: Escape Sequences. (line 132)
+* maximum precision supported by MPFR library: Auto-set. (line 225)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* McIlroy, Doug: Glossary. (line 149)
* McPhee, Patrick: Contributors. (line 101)
* message object files: Explaining gettext. (line 42)
@@ -32889,9 +35033,15 @@ Index
(line 47)
* messages from extensions: Printing Messages. (line 6)
* metacharacters in regular expressions: Regexp Operators. (line 6)
+<<<<<<< HEAD
* metacharacters, escape sequences for: Escape Sequences. (line 129)
* minimum precision supported by MPFR library: Auto-set. (line 224)
* 'mktime': Time Functions. (line 25)
+=======
+* metacharacters, escape sequences for: Escape Sequences. (line 138)
+* minimum precision supported by MPFR library: Auto-set. (line 228)
+* mktime: Time Functions. (line 25)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* modifiers, in format specifiers: Format Modifiers. (line 6)
* monetary information, localization: Explaining gettext. (line 104)
* Moore, Duncan: Getline Notes. (line 40)
@@ -32902,6 +35052,7 @@ Index
* 'n' debugger command (alias for 'next'): Debugger Execution Control.
(line 43)
* names, arrays/variables: Arrays. (line 18)
+<<<<<<< HEAD
* names, arrays/variables <1>: Library Names. (line 6)
* names, functions: Definition Syntax. (line 20)
* names, functions <1>: Library Names. (line 6)
@@ -32910,6 +35061,14 @@ Index
* namespace issues, functions: Definition Syntax. (line 20)
* 'nawk' utility: Names. (line 10)
* NetBSD: Glossary. (line 607)
+=======
+* names, functions <1>: Library Names. (line 6)
+* names, functions: Definition Syntax. (line 23)
+* namespace issues <1>: Library Names. (line 6)
+* namespace issues: Arrays. (line 18)
+* namespace issues, functions: Definition Syntax. (line 23)
+* NetBSD: Glossary. (line 611)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* networks, programming: TCP/IP Networking. (line 6)
* networks, support for: Special Network. (line 6)
* newlines: Statements/Lines. (line 6)
@@ -32926,6 +35085,7 @@ Index
* newlines, separating statements in actions <1>: Statements. (line 10)
* 'next' debugger command: Debugger Execution Control.
(line 43)
+<<<<<<< HEAD
* 'next file' statement: Feature History. (line 168)
* 'next' statement: Boolean Ops. (line 85)
* 'next' statement <1>: Next Statement. (line 6)
@@ -32939,6 +35099,19 @@ Index
* 'nextfile' statement, 'BEGIN'/'END' patterns and: I/O And BEGIN/END.
(line 36)
* 'nextfile' statement, 'BEGINFILE'/'ENDFILE' patterns and: BEGINFILE/ENDFILE.
+=======
+* next file statement: Feature History. (line 169)
+* next statement <1>: Next Statement. (line 6)
+* next statement: Boolean Ops. (line 93)
+* next statement, BEGIN/END patterns and: I/O And BEGIN/END. (line 36)
+* next statement, BEGINFILE/ENDFILE patterns and: BEGINFILE/ENDFILE.
+ (line 49)
+* next statement, user-defined functions and: Next Statement. (line 45)
+* nextfile statement: Nextfile Statement. (line 6)
+* nextfile statement, BEGIN/END patterns and: I/O And BEGIN/END.
+ (line 36)
+* nextfile statement, BEGINFILE/ENDFILE patterns and: BEGINFILE/ENDFILE.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 26)
* 'nextfile' statement, user-defined functions and: Nextfile Statement.
(line 46)
@@ -32953,11 +35126,20 @@ Index
* non-existent array elements: Reference to Elements.
(line 23)
* not Boolean-logic operator: Boolean Ops. (line 6)
+<<<<<<< HEAD
* 'NR' variable: Records. (line 6)
* 'NR' variable <1>: Auto-set. (line 131)
* 'NR' variable, changing: Auto-set. (line 309)
* null strings: awk split records. (line 113)
* null strings <1>: Regexp Field Splitting.
+=======
+* NR variable <1>: Auto-set. (line 132)
+* NR variable: Records. (line 6)
+* NR variable, changing: Auto-set. (line 313)
+* null strings <1>: Basic Data Typing. (line 26)
+* null strings <2>: Truth Values. (line 6)
+* null strings <3>: Regexp Field Splitting.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 43)
* null strings <2>: Truth Values. (line 6)
* null strings <3>: Basic Data Typing. (line 26)
@@ -32967,8 +35149,13 @@ Index
(line 43)
* null strings, converting numbers to strings: Strings And Numbers.
(line 21)
+<<<<<<< HEAD
* null strings, matching: Gory Details. (line 164)
* number as string of bits: Bitwise Functions. (line 110)
+=======
+* null strings, matching: Gory Details. (line 143)
+* number as string of bits: Bitwise Functions. (line 109)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* number of array elements: String Functions. (line 197)
* number sign ('#'), '#!' (executable scripts): Executable Scripts.
(line 6)
@@ -32989,8 +35176,12 @@ Index
* numeric functions: Numeric Functions. (line 6)
* numeric, output format: OFMT. (line 6)
* numeric, strings: Variable Typing. (line 6)
+<<<<<<< HEAD
* 'o' debugger command (alias for 'option'): Debugger Info. (line 57)
* 'oawk' utility: Names. (line 10)
+=======
+* o debugger command (alias for option): Debugger Info. (line 57)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* obsolete features: Obsolete. (line 6)
* octal numbers: Nondecimal-numbers. (line 6)
* octal values, enabling interpretation of: Options. (line 209)
@@ -33074,12 +35265,21 @@ Index
* output, printing, See printing: Printing. (line 6)
* output, records: Output Separators. (line 20)
* output, standard: Special FD. (line 6)
+<<<<<<< HEAD
* 'p' debugger command (alias for 'print'): Viewing And Changing Data.
(line 35)
* Papadopoulos, Panos: Contributors. (line 129)
* parent process ID of 'gawk' process: Auto-set. (line 189)
* parentheses '()', in a profile: Profiling. (line 146)
* parentheses '()', regexp operator: Regexp Operators. (line 80)
+=======
+* p debugger command (alias for print): Viewing And Changing Data.
+ (line 36)
+* Papadopoulos, Panos: Contributors. (line 128)
+* parent process ID of gawk process: Auto-set. (line 193)
+* parentheses (), in a profile: Profiling. (line 146)
+* parentheses (), regexp operator: Regexp Operators. (line 81)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* password file: Passwd Functions. (line 16)
* 'patsplit': String Functions. (line 294)
* patterns: Patterns and Actions.
@@ -33113,6 +35313,7 @@ Index
* Pitts, Dave <1>: Bugs. (line 71)
* Plauger, P.J.: Library Functions. (line 12)
* plug-in: Extension Intro. (line 6)
+<<<<<<< HEAD
* plus sign ('+'), '+' operator: Precedence. (line 51)
* plus sign ('+'), '+' operator <1>: Precedence. (line 57)
* plus sign ('+'), '++' operator: Increment Ops. (line 11)
@@ -33131,14 +35332,38 @@ Index
* portability, backslash in escape sequences: Escape Sequences.
(line 111)
* portability, 'close()' function and: Close Files And Pipes.
+=======
+* plus sign (+), + operator: Precedence. (line 52)
+* plus sign (+), ++ operator <1>: Precedence. (line 46)
+* plus sign (+), ++ operator: Increment Ops. (line 11)
+* plus sign (+), += operator <1>: Precedence. (line 95)
+* plus sign (+), += operator: Assignment Ops. (line 82)
+* plus sign (+), regexp operator: Regexp Operators. (line 105)
+* pointers to functions: Indirect Calls. (line 6)
+* portability: Escape Sequences. (line 102)
+* portability, #! (executable scripts): Executable Scripts. (line 33)
+* portability, ** operator and: Arithmetic Ops. (line 81)
+* portability, **= operator and: Assignment Ops. (line 143)
+* portability, ARGV variable: Executable Scripts. (line 59)
+* portability, backslash continuation and: Statements/Lines. (line 30)
+* portability, backslash in escape sequences: Escape Sequences.
+ (line 120)
+* portability, close() function and: Close Files And Pipes.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 81)
* portability, data files as single record: gawk split records.
(line 65)
* portability, deleting array elements: Delete. (line 56)
* portability, example programs: Library Functions. (line 42)
+<<<<<<< HEAD
* portability, functions, defining: Definition Syntax. (line 104)
* portability, 'gawk': New Ports. (line 6)
* portability, 'gettext' library and: Explaining gettext. (line 11)
+=======
+* portability, functions, defining: Definition Syntax. (line 108)
+* portability, gawk: New Ports. (line 6)
+* portability, gettext library and: Explaining gettext. (line 11)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* portability, internationalization and: I18N Portability. (line 6)
* portability, 'length()' function: String Functions. (line 176)
* portability, new 'awk' vs. old 'awk': Strings And Numbers. (line 56)
@@ -33146,10 +35371,17 @@ Index
(line 90)
* portability, 'NF' variable, decrementing: Changing Fields. (line 115)
* portability, operators: Increment Ops. (line 60)
+<<<<<<< HEAD
* portability, operators, not in POSIX 'awk': Precedence. (line 97)
* portability, 'POSIXLY_CORRECT' environment variable: Options.
(line 355)
* portability, 'substr()' function: String Functions. (line 509)
+=======
+* portability, operators, not in POSIX awk: Precedence. (line 98)
+* portability, POSIXLY_CORRECT environment variable: Options. (line 356)
+* portability, substr() function: String Functions. (line 510)
+* portable object files <1>: Translator i18n. (line 6)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* portable object files: Explaining gettext. (line 37)
* portable object files <1>: Translator i18n. (line 6)
* portable object files, converting to message object files: I18N Example.
@@ -33163,6 +35395,7 @@ Index
(line 6)
* positional specifiers, 'printf' statement, mixing with regular formats: Printf Ordering.
(line 57)
+<<<<<<< HEAD
* POSIX 'awk': This Manual. (line 14)
* POSIX 'awk' <1>: Assignment Ops. (line 138)
* POSIX 'awk', '**' operator and: Precedence. (line 97)
@@ -33221,6 +35454,61 @@ Index
* 'print' statement, line continuations and: Print Examples. (line 75)
* 'print' statement, 'OFMT' variable and: User-modified. (line 113)
* 'print' statement, See Also redirection, of output: Redirection.
+=======
+* POSIX awk <1>: Assignment Ops. (line 137)
+* POSIX awk: This Manual. (line 14)
+* POSIX awk, ** operator and: Precedence. (line 98)
+* POSIX awk, **= operator and: Assignment Ops. (line 143)
+* POSIX awk, < operator and: Getline/File. (line 26)
+* POSIX awk, arithmetic operators and: Arithmetic Ops. (line 30)
+* POSIX awk, backslashes in string constants: Escape Sequences.
+ (line 120)
+* POSIX awk, BEGIN/END patterns: I/O And BEGIN/END. (line 16)
+* POSIX awk, bracket expressions and: Bracket Expressions. (line 26)
+* POSIX awk, bracket expressions and, character classes: Bracket Expressions.
+ (line 32)
+* POSIX awk, break statement and: Break Statement. (line 51)
+* POSIX awk, changes in awk versions: POSIX. (line 6)
+* POSIX awk, continue statement and: Continue Statement. (line 44)
+* POSIX awk, CONVFMT variable and: User-modified. (line 30)
+* POSIX awk, date utility and: Time Functions. (line 254)
+* POSIX awk, field separators and <1>: Field Splitting Summary.
+ (line 40)
+* POSIX awk, field separators and: Fields. (line 6)
+* POSIX awk, FS variable and: User-modified. (line 60)
+* POSIX awk, function keyword in: Definition Syntax. (line 92)
+* POSIX awk, functions and, gsub()/sub(): Gory Details. (line 90)
+* POSIX awk, functions and, length(): String Functions. (line 176)
+* POSIX awk, GNU long options and: Options. (line 15)
+* POSIX awk, interval expressions in: Regexp Operators. (line 135)
+* POSIX awk, next/nextfile statements and: Next Statement. (line 45)
+* POSIX awk, numeric strings and: Variable Typing. (line 6)
+* POSIX awk, OFMT variable and <1>: Strings And Numbers. (line 57)
+* POSIX awk, OFMT variable and: OFMT. (line 27)
+* POSIX awk, period (.), using: Regexp Operators. (line 51)
+* POSIX awk, printf format strings and: Format Modifiers. (line 159)
+* POSIX awk, regular expressions and: Regexp Operators. (line 161)
+* POSIX awk, timestamps and: Time Functions. (line 6)
+* POSIX awk, | I/O operator and: Getline/Pipe. (line 55)
+* POSIX mode: Options. (line 252)
+* POSIX, awk and: Preface. (line 21)
+* POSIX, gawk extensions not included in: POSIX/GNU. (line 6)
+* POSIX, programs, implementing in awk: Clones. (line 6)
+* POSIXLY_CORRECT environment variable: Options. (line 336)
+* PREC variable: User-modified. (line 124)
+* precedence <1>: Precedence. (line 6)
+* precedence: Increment Ops. (line 60)
+* precedence, regexp operators: Regexp Operators. (line 156)
+* print debugger command: Viewing And Changing Data.
+ (line 36)
+* print statement: Printing. (line 16)
+* print statement, BEGIN/END patterns and: I/O And BEGIN/END. (line 16)
+* print statement, commas, omitting: Print Examples. (line 31)
+* print statement, I/O operators in: Precedence. (line 71)
+* print statement, line continuations and: Print Examples. (line 76)
+* print statement, OFMT variable and: User-modified. (line 114)
+* print statement, See Also redirection, of output: Redirection.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 17)
* 'print' statement, 'sprintf()' function and: Round Function.
(line 6)
@@ -33253,6 +35541,7 @@ Index
* printing, unduplicated lines of text: Uniq Program. (line 6)
* printing, user information: Id Program. (line 6)
* private variables: Library Names. (line 11)
+<<<<<<< HEAD
* process group idIDof 'gawk' process: Auto-set. (line 183)
* process ID of 'gawk' process: Auto-set. (line 186)
* processes, two-way communications with: Two-way I/O. (line 23)
@@ -33275,6 +35564,28 @@ Index
* program, definition of: Getting Started. (line 21)
* programmers, attractiveness of: Two-way I/O. (line 6)
* programming conventions, '--non-decimal-data' option: Nondecimal Data.
+=======
+* process group idIDof gawk process: Auto-set. (line 187)
+* process ID of gawk process: Auto-set. (line 190)
+* processes, two-way communications with: Two-way I/O. (line 6)
+* processing data: Basic High Level. (line 6)
+* PROCINFO array <1>: Passwd Functions. (line 6)
+* PROCINFO array <2>: Time Functions. (line 47)
+* PROCINFO array: Auto-set. (line 137)
+* PROCINFO array, and communications via ptys: Two-way I/O. (line 99)
+* PROCINFO array, and group membership: Group Functions. (line 6)
+* PROCINFO array, and user and group ID numbers: Id Program. (line 15)
+* PROCINFO array, testing the field splitting: Passwd Functions.
+ (line 161)
+* PROCINFO array, uses: Auto-set. (line 246)
+* PROCINFO, values of sorted_in: Controlling Scanning.
+ (line 26)
+* profiling awk programs: Profiling. (line 6)
+* profiling awk programs, dynamically: Profiling. (line 179)
+* program identifiers: Auto-set. (line 155)
+* program, definition of: Getting Started. (line 21)
+* programming conventions, --non-decimal-data option: Nondecimal Data.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 36)
* programming conventions, 'ARGC'/'ARGV' variables: Auto-set. (line 35)
* programming conventions, 'exit' statement: Exit Statement. (line 38)
@@ -33283,9 +35594,15 @@ Index
* programming conventions, functions, calling: Calling Built-in.
(line 10)
* programming conventions, functions, writing: Definition Syntax.
+<<<<<<< HEAD
(line 61)
* programming conventions, 'gawk' extensions: Internal File Ops.
(line 44)
+=======
+ (line 64)
+* programming conventions, gawk extensions: Internal File Ops.
+ (line 45)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* programming conventions, private variable names: Library Names.
(line 23)
* programming language, recipe for: History. (line 6)
@@ -33301,10 +35618,17 @@ Index
(line 99)
* QSE Awk: Other Versions. (line 131)
* Quanstrom, Erik: Alarm Program. (line 8)
+<<<<<<< HEAD
* question mark ('?'), '?:' operator: Precedence. (line 91)
* question mark ('?'), regexp operator: Regexp Operators. (line 112)
* question mark ('?'), regexp operator <1>: GNU Regexp Operators.
(line 62)
+=======
+* question mark (?), ?: operator: Precedence. (line 92)
+* question mark (?), regexp operator <1>: GNU Regexp Operators.
+ (line 59)
+* question mark (?), regexp operator: Regexp Operators. (line 111)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* QuikTrim Awk: Other Versions. (line 135)
* 'quit' debugger command: Miscellaneous Debugger Commands.
(line 99)
@@ -33316,6 +35640,7 @@ Index
(line 62)
* Rakitzis, Byron: History Sorting. (line 25)
* Ramey, Chet: Acknowledgments. (line 60)
+<<<<<<< HEAD
* Ramey, Chet <1>: General Data Types. (line 6)
* 'rand': Numeric Functions. (line 48)
* random numbers, Cliff: Cliff Random Function.
@@ -33323,6 +35648,14 @@ Index
* random numbers, 'rand()'/'srand()' functions: Numeric Functions.
(line 48)
* random numbers, seed of: Numeric Functions. (line 78)
+=======
+* rand: Numeric Functions. (line 50)
+* random numbers, Cliff: Cliff Random Function.
+ (line 6)
+* random numbers, rand()/srand() functions: Numeric Functions.
+ (line 50)
+* random numbers, seed of: Numeric Functions. (line 80)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* range expressions (regexps): Bracket Expressions. (line 6)
* range patterns: Ranges. (line 6)
* range patterns, line continuation and: Ranges. (line 64)
@@ -33352,10 +35685,17 @@ Index
* records, multiline: Multiple Line. (line 6)
* records, printing: Print. (line 22)
* records, splitting input into: Records. (line 6)
+<<<<<<< HEAD
* records, terminating: awk split records. (line 123)
* records, treating files as: gawk split records. (line 91)
* recursive functions: Definition Syntax. (line 79)
* redirect 'gawk' output, in debugger: Debugger Info. (line 72)
+=======
+* records, terminating: awk split records. (line 124)
+* records, treating files as: gawk split records. (line 92)
+* recursive functions: Definition Syntax. (line 82)
+* redirect gawk output, in debugger: Debugger Info. (line 72)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* redirection of input: Getline/File. (line 6)
* redirection of output: Redirection. (line 6)
* reference counting, sorting arrays: Array Sorting Functions.
@@ -33405,7 +35745,7 @@ Index
* regular expressions, operators, 'gawk': GNU Regexp Operators.
(line 6)
* regular expressions, operators, precedence of: Regexp Operators.
- (line 157)
+ (line 156)
* regular expressions, searching for: Egrep Program. (line 6)
* relational operators, See comparison operators: Typing and Comparison.
(line 9)
@@ -33416,8 +35756,13 @@ Index
(line 6)
* return value, 'close()' function: Close Files And Pipes.
(line 131)
+<<<<<<< HEAD
* 'rev()' user-defined function: Function Example. (line 53)
* 'revoutput' extension: Extension Sample Revout.
+=======
+* rev() user-defined function: Function Example. (line 54)
+* revoutput extension: Extension Sample Revout.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 11)
* 'revtwoway' extension: Extension Sample Rev2way.
(line 12)
@@ -33436,6 +35781,7 @@ Index
* right shift: Bitwise Functions. (line 53)
* right shift, bitwise: Bitwise Functions. (line 32)
* Ritchie, Dennis: Basic Data Typing. (line 54)
+<<<<<<< HEAD
* 'RLENGTH' variable: Auto-set. (line 252)
* 'RLENGTH' variable, 'match()' function and: String Functions.
(line 224)
@@ -33469,6 +35815,40 @@ Index
* 'RT' variable: awk split records. (line 123)
* 'RT' variable <1>: Multiple Line. (line 129)
* 'RT' variable <2>: Auto-set. (line 265)
+=======
+* RLENGTH variable: Auto-set. (line 256)
+* RLENGTH variable, match() function and: String Functions. (line 224)
+* Robbins, Arnold <1>: Future Extensions. (line 6)
+* Robbins, Arnold <2>: Bugs. (line 32)
+* Robbins, Arnold <3>: Contributors. (line 141)
+* Robbins, Arnold <4>: General Data Types. (line 6)
+* Robbins, Arnold <5>: Alarm Program. (line 6)
+* Robbins, Arnold <6>: Passwd Functions. (line 90)
+* Robbins, Arnold <7>: Getline/Pipe. (line 39)
+* Robbins, Arnold: Command Line Field Separator.
+ (line 74)
+* Robbins, Bill: Getline/Pipe. (line 39)
+* Robbins, Harry: Acknowledgments. (line 92)
+* Robbins, Jean: Acknowledgments. (line 92)
+* Robbins, Miriam <1>: Passwd Functions. (line 90)
+* Robbins, Miriam <2>: Getline/Pipe. (line 39)
+* Robbins, Miriam: Acknowledgments. (line 92)
+* Rommel, Kai Uwe: Contributors. (line 42)
+* round to nearest integer: Numeric Functions. (line 38)
+* round() user-defined function: Round Function. (line 16)
+* rounding numbers: Round Function. (line 6)
+* ROUNDMODE variable: User-modified. (line 128)
+* RS variable <1>: User-modified. (line 133)
+* RS variable: awk split records. (line 12)
+* RS variable, multiline records and: Multiple Line. (line 17)
+* rshift: Bitwise Functions. (line 52)
+* RSTART variable: Auto-set. (line 262)
+* RSTART variable, match() function and: String Functions. (line 224)
+* RT variable <1>: Auto-set. (line 269)
+* RT variable <2>: Multiple Line. (line 129)
+* RT variable: awk split records. (line 124)
+* Rubin, Paul <1>: Contributors. (line 15)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* Rubin, Paul: History. (line 30)
* Rubin, Paul <1>: Contributors. (line 16)
* rule, definition of: Getting Started. (line 21)
@@ -33485,6 +35865,11 @@ Index
* scalar values: Basic Data Typing. (line 13)
* scanning arrays: Scanning an Array. (line 6)
* scanning multidimensional arrays: Multiscanning. (line 11)
+<<<<<<< HEAD
+=======
+* Schorr, Andrew <1>: Contributors. (line 133)
+* Schorr, Andrew <2>: Auto-set. (line 296)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* Schorr, Andrew: Acknowledgments. (line 60)
* Schorr, Andrew <1>: Auto-set. (line 292)
* Schorr, Andrew <2>: Contributors. (line 134)
@@ -33492,16 +35877,27 @@ Index
* Schreiber, Rita: Acknowledgments. (line 38)
* search and replace in strings: String Functions. (line 89)
* search in string: String Functions. (line 155)
+<<<<<<< HEAD
* search paths: Programs Exercises. (line 63)
* search paths <1>: PC Using. (line 10)
* search paths <2>: VMS Running. (line 57)
* search paths, for loadable extensions: AWKLIBPATH Variable. (line 6)
+=======
+* search paths <1>: VMS Running. (line 58)
+* search paths <2>: PC Using. (line 10)
+* search paths: Programs Exercises. (line 70)
+* search paths, for loadable extensions: AWKLIBPATH Variable. (line 6)
+* search paths, for source files <1>: VMS Running. (line 58)
+* search paths, for source files <2>: PC Using. (line 10)
+* search paths, for source files <3>: Programs Exercises. (line 70)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* search paths, for source files: AWKPATH Variable. (line 6)
* search paths, for source files <1>: Programs Exercises. (line 63)
* search paths, for source files <2>: PC Using. (line 10)
* search paths, for source files <3>: VMS Running. (line 57)
* searching, files for regular expressions: Egrep Program. (line 6)
* searching, for words: Dupword Program. (line 6)
+<<<<<<< HEAD
* 'sed' utility: Field Splitting Summary.
(line 45)
* 'sed' utility <1>: Simple Sed. (line 6)
@@ -33511,6 +35907,17 @@ Index
* semicolon (';'), separating statements in actions: Statements/Lines.
(line 90)
* semicolon (';'), separating statements in actions <1>: Action Overview.
+=======
+* sed utility <1>: Glossary. (line 11)
+* sed utility <2>: Simple Sed. (line 6)
+* sed utility: Field Splitting Summary.
+ (line 46)
+* seeding random number generator: Numeric Functions. (line 80)
+* semicolon (;), AWKPATH variable and: PC Using. (line 10)
+* semicolon (;), separating statements in actions <1>: Statements.
+ (line 10)
+* semicolon (;), separating statements in actions <2>: Action Overview.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 19)
* semicolon (';'), separating statements in actions <2>: Statements.
(line 10)
@@ -33532,9 +35939,14 @@ Index
(line 58)
* set directory of message catalogs: I18N Functions. (line 11)
* set watchpoint: Viewing And Changing Data.
+<<<<<<< HEAD
(line 66)
* shadowing of variable values: Definition Syntax. (line 67)
* shell quoting, double quote: Read Terminal. (line 25)
+=======
+ (line 67)
+* shadowing of variable values: Definition Syntax. (line 70)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* shell quoting, rules for: Quoting. (line 6)
* shells, piping commands into: Redirection. (line 141)
* shells, quoting: Using Shell Variables.
@@ -33559,8 +35971,13 @@ Index
* side effects <1>: Increment Ops. (line 11)
* side effects <2>: Increment Ops. (line 75)
* side effects, array indexing: Reference to Elements.
+<<<<<<< HEAD
(line 42)
* side effects, 'asort()' function: Array Sorting Functions.
+=======
+ (line 43)
+* side effects, asort() function: Array Sorting Functions.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 24)
* side effects, assignment expressions: Assignment Ops. (line 22)
* side effects, Boolean operators: Boolean Ops. (line 30)
@@ -33572,6 +35989,7 @@ Index
* sidebar, A Constant's Base Does Not Affect Its Value: Nondecimal-numbers.
(line 63)
* sidebar, Backslash Before Regular Characters: Escape Sequences.
+<<<<<<< HEAD
(line 109)
* sidebar, Changing 'FS' Does Not Affect the Fields: Field Splitting Summary.
(line 37)
@@ -33590,14 +36008,43 @@ Index
* sidebar, Portability Issues with '#!': Executable Scripts. (line 31)
* sidebar, Pre-POSIX 'awk' Used 'OFMT' For String Conversion: Strings And Numbers.
(line 54)
+=======
+ (line 118)
+* sidebar, Changing FS Does Not Affect the Fields: Field Splitting Summary.
+ (line 38)
+* sidebar, Changing NR and FNR: Auto-set. (line 311)
+* sidebar, Controlling Output Buffering with system(): I/O Functions.
+ (line 138)
+* sidebar, Escape Sequences for Metacharacters: Escape Sequences.
+ (line 136)
+* sidebar, FS and IGNORECASE: Field Splitting Summary.
+ (line 64)
+* sidebar, Interactive Versus Noninteractive Buffering: I/O Functions.
+ (line 107)
+* sidebar, Matching the Null String: Gory Details. (line 141)
+* sidebar, Operator Evaluation Order: Increment Ops. (line 58)
+* sidebar, Piping into sh: Redirection. (line 140)
+* sidebar, Pre-POSIX awk Used OFMT For String Conversion: Strings And Numbers.
+ (line 55)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* sidebar, Recipe For A Programming Language: History. (line 6)
* sidebar, 'RS = "\0"' Is Not Portable: gawk split records. (line 63)
* sidebar, So Why Does 'gawk' have 'BEGINFILE' and 'ENDFILE'?: Filetrans Function.
(line 83)
+<<<<<<< HEAD
* sidebar, Syntactic Ambiguities Between '/=' and Regular Expressions: Assignment Ops.
(line 147)
* sidebar, Understanding '$0': Changing Fields. (line 134)
* sidebar, Using 'close()''s Return Value: Close Files And Pipes.
+=======
+* sidebar, Syntactic Ambiguities Between /= and Regular Expressions: Assignment Ops.
+ (line 146)
+* sidebar, Understanding #!: Executable Scripts. (line 31)
+* sidebar, Understanding $0: Changing Fields. (line 134)
+* sidebar, Using \n in Bracket Expressions of Dynamic Regexps: Computed Regexps.
+ (line 57)
+* sidebar, Using close()'s Return Value: Close Files And Pipes.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 129)
* sidebar, Using '\n' in Bracket Expressions of Dynamic Regexps: Computed Regexps.
(line 57)
@@ -33612,6 +36059,7 @@ Index
* 'SIGUSR1' signal, for dynamic profiling: Profiling. (line 187)
* 'silent' debugger command: Debugger Execution Control.
(line 10)
+<<<<<<< HEAD
* 'sin': Numeric Functions. (line 89)
* sine: Numeric Functions. (line 89)
* single quote ('''): One-shot. (line 15)
@@ -33619,21 +36067,40 @@ Index
* single quote ('''), in shell commands: Quoting. (line 48)
* single quote ('''), vs. apostrophe: Comments. (line 27)
* single quote ('''), with double quotes: Quoting. (line 70)
+=======
+* sin: Numeric Functions. (line 91)
+* sine: Numeric Functions. (line 91)
+* single quote ('): One-shot. (line 15)
+* single quote (') in gawk command lines: Long. (line 33)
+* single quote ('), in shell commands: Quoting. (line 48)
+* single quote ('), vs. apostrophe: Comments. (line 27)
+* single quote ('), with double quotes: Quoting. (line 70)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* single-character fields: Single Character Fields.
(line 6)
* single-step execution, in the debugger: Debugger Execution Control.
(line 43)
* Skywalker, Luke: Undocumented. (line 6)
+<<<<<<< HEAD
* 'sleep' utility: Alarm Program. (line 110)
* 'sleep()' extension function: Extension Sample Time.
+=======
+* sleep utility: Alarm Program. (line 110)
+* sleep() extension function: Extension Sample Time.
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
(line 22)
* Solaris, POSIX-compliant 'awk': Other Versions. (line 96)
* sort array: String Functions. (line 42)
* sort array indices: String Functions. (line 42)
* sort function, arrays, sorting: Array Sorting Functions.
(line 6)
+<<<<<<< HEAD
* 'sort' utility: Word Sorting. (line 50)
* 'sort' utility, coprocesses and: Two-way I/O. (line 83)
+=======
+* sort utility: Word Sorting. (line 50)
+* sort utility, coprocesses and: Two-way I/O. (line 65)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* sorting characters in different languages: Explaining gettext.
(line 94)
* source code, 'awka': Other Versions. (line 64)
@@ -33649,9 +36116,15 @@ Index
* source code, 'pawk' (Python version): Other Versions. (line 125)
* source code, QSE Awk: Other Versions. (line 131)
* source code, QuikTrim Awk: Other Versions. (line 135)
+<<<<<<< HEAD
* source code, Solaris 'awk': Other Versions. (line 96)
* source files, search path for: Programs Exercises. (line 63)
* sparse arrays: Array Intro. (line 70)
+=======
+* source code, Solaris awk: Other Versions. (line 96)
+* source files, search path for: Programs Exercises. (line 70)
+* sparse arrays: Array Intro. (line 72)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* Spencer, Henry: Glossary. (line 11)
* 'split': String Functions. (line 313)
* split string into array: String Functions. (line 294)
@@ -33663,10 +36136,17 @@ Index
* 'sprintf()' function, 'OFMT' variable and: User-modified. (line 113)
* 'sprintf()' function, 'print'/'printf' statements and: Round Function.
(line 6)
+<<<<<<< HEAD
* 'sqrt': Numeric Functions. (line 92)
* square brackets ('[]'), regexp operator: Regexp Operators. (line 56)
* square root: Numeric Functions. (line 92)
* 'srand': Numeric Functions. (line 96)
+=======
+* sqrt: Numeric Functions. (line 94)
+* square brackets ([]), regexp operator: Regexp Operators. (line 56)
+* square root: Numeric Functions. (line 94)
+* srand: Numeric Functions. (line 98)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* stack frame: Debugging Terms. (line 10)
* Stallman, Richard: Manual History. (line 6)
* Stallman, Richard <1>: Acknowledgments. (line 18)
@@ -33739,6 +36219,7 @@ Index
* 'substr': String Functions. (line 478)
* substring: String Functions. (line 478)
* Sumner, Andrew: Other Versions. (line 64)
+<<<<<<< HEAD
* supplementary groups of 'gawk' process: Auto-set. (line 237)
* 'switch' statement: Switch Statement. (line 6)
* 'SYMTAB' array: Auto-set. (line 269)
@@ -33749,6 +36230,17 @@ Index
* 't' debugger command (alias for 'tbreak'): Breakpoint Control.
(line 90)
* 'tbreak' debugger command: Breakpoint Control. (line 90)
+=======
+* supplementary groups of gawk process: Auto-set. (line 241)
+* switch statement: Switch Statement. (line 6)
+* SYMTAB array: Auto-set. (line 273)
+* syntactic ambiguity: /= operator vs. /=.../ regexp constant: Assignment Ops.
+ (line 148)
+* system: I/O Functions. (line 75)
+* systime: Time Functions. (line 66)
+* t debugger command (alias for tbreak): Breakpoint Control. (line 90)
+* tbreak debugger command: Breakpoint Control. (line 90)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* Tcl: Library Names. (line 57)
* TCP/IP: TCP/IP Networking. (line 6)
* TCP/IP, support for: Special Network. (line 6)
@@ -33783,13 +36275,20 @@ Index
* tilde ('~'), '~' operator <3>: Regexp Constants. (line 6)
* tilde ('~'), '~' operator <4>: Comparison Operators.
(line 11)
+<<<<<<< HEAD
* tilde ('~'), '~' operator <5>: Comparison Operators.
(line 98)
* tilde ('~'), '~' operator <6>: Precedence. (line 79)
* tilde ('~'), '~' operator <7>: Expression Patterns. (line 24)
+=======
+* tilde (~), ~ operator <4>: Regexp Constants. (line 6)
+* tilde (~), ~ operator <5>: Case-sensitivity. (line 26)
+* tilde (~), ~ operator <6>: Computed Regexps. (line 6)
+* tilde (~), ~ operator: Regexp Usage. (line 19)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* time functions: Time Functions. (line 6)
* time, alarm clock example program: Alarm Program. (line 11)
-* time, localization and: Explaining gettext. (line 116)
+* time, localization and: Explaining gettext. (line 112)
* time, managing: Getlocaltime Function.
(line 6)
* time, retrieving: Time Functions. (line 17)
@@ -33814,7 +36313,11 @@ Index
* troubleshooting, 'awk' uses 'FS' not 'IFS': Field Separators.
(line 29)
* troubleshooting, backslash before nonspecial character: Escape Sequences.
+<<<<<<< HEAD
(line 111)
+=======
+ (line 120)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* troubleshooting, division: Arithmetic Ops. (line 44)
* troubleshooting, fatal errors, field widths, specifying: Constant Size.
(line 23)
@@ -33867,6 +36370,7 @@ Index
* Unicode <2>: Glossary. (line 133)
* uninitialized variables, as array subscripts: Uninitialized Subscripts.
(line 6)
+<<<<<<< HEAD
* 'uniq' utility: Uniq Program. (line 6)
* 'uniq.awk' program: Uniq Program. (line 65)
* Unix: Glossary. (line 607)
@@ -33878,6 +36382,19 @@ Index
(line 64)
* Unix, 'awk' scripts and: Executable Scripts. (line 6)
* 'UNIXROOT' variable, on OS/2 systems: PC Using. (line 16)
+=======
+* uniq utility: Uniq Program. (line 6)
+* uniq.awk program: Uniq Program. (line 65)
+* Unix: Glossary. (line 611)
+* Unix awk, backslashes in escape sequences: Escape Sequences.
+ (line 132)
+* Unix awk, close() function and: Close Files And Pipes.
+ (line 131)
+* Unix awk, password files, field separators and: Command Line Field Separator.
+ (line 65)
+* Unix, awk scripts and: Executable Scripts. (line 6)
+* UNIXROOT variable, on OS/2 systems: PC Using. (line 16)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* unsigned integers: Computer Arithmetic. (line 41)
* 'until' debugger command: Debugger Execution Control.
(line 83)
@@ -33894,7 +36411,7 @@ Index
* 'USR1' signal, for dynamic profiling: Profiling. (line 187)
* values, numeric: Basic Data Typing. (line 13)
* values, string: Basic Data Typing. (line 13)
-* variable assignments and input files: Other Arguments. (line 19)
+* variable assignments and input files: Other Arguments. (line 23)
* variable typing: Typing and Comparison.
(line 9)
* variables: Other Features. (line 6)
@@ -33919,13 +36436,19 @@ Index
* variables, names of: Arrays. (line 18)
* variables, private: Library Names. (line 11)
* variables, setting: Options. (line 32)
+<<<<<<< HEAD
* variables, shadowing: Definition Syntax. (line 67)
* variables, types of: Assignment Ops. (line 39)
+=======
+* variables, shadowing: Definition Syntax. (line 70)
+* variables, types of: Assignment Ops. (line 40)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* variables, types of, comparison expressions and: Typing and Comparison.
(line 9)
* variables, uninitialized, as array subscripts: Uninitialized Subscripts.
(line 6)
* variables, user-defined: Variables. (line 6)
+<<<<<<< HEAD
* version of 'gawk': Auto-set. (line 207)
* version of 'gawk' extension API: Auto-set. (line 232)
* version of GNU MP library: Auto-set. (line 218)
@@ -33938,6 +36461,20 @@ Index
* vertical bar ('|'), '|&' operator (I/O) <2>: Two-way I/O. (line 44)
* vertical bar ('|'), '||' operator: Boolean Ops. (line 57)
* vertical bar ('|'), '||' operator <1>: Precedence. (line 88)
+=======
+* version of gawk: Auto-set. (line 211)
+* version of gawk extension API: Auto-set. (line 236)
+* version of GNU MP library: Auto-set. (line 222)
+* version of GNU MPFR library: Auto-set. (line 218)
+* vertical bar (|): Regexp Operators. (line 70)
+* vertical bar (|), | operator (I/O) <1>: Precedence. (line 65)
+* vertical bar (|), | operator (I/O): Getline/Pipe. (line 9)
+* vertical bar (|), |& operator (I/O) <1>: Two-way I/O. (line 25)
+* vertical bar (|), |& operator (I/O) <2>: Precedence. (line 65)
+* vertical bar (|), |& operator (I/O): Getline/Coprocess. (line 6)
+* vertical bar (|), || operator <1>: Precedence. (line 89)
+* vertical bar (|), || operator: Boolean Ops. (line 57)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac
* Vinschen, Corinna: Acknowledgments. (line 60)
* 'w' debugger command (alias for 'watch'): Viewing And Changing Data.
(line 66)
@@ -33984,14 +36521,43 @@ Index
* XOR bitwise operation: Bitwise Functions. (line 6)
* Yawitz, Efraim: Contributors. (line 132)
* Zaretskii, Eli: Acknowledgments. (line 60)
+<<<<<<< HEAD
* Zaretskii, Eli <1>: Contributors. (line 56)
* Zaretskii, Eli <2>: Bugs. (line 71)
* 'zerofile.awk' program: Empty Files. (line 20)
* Zoulas, Christos: Contributors. (line 67)
+=======
+* zerofile.awk program: Empty Files. (line 21)
+* Zoulas, Christos: Contributors. (line 66)
+* {} (braces): Profiling. (line 142)
+* {} (braces), actions and: Action Overview. (line 19)
+* {} (braces), statements, grouping: Statements. (line 10)
+* | (vertical bar): Regexp Operators. (line 70)
+* | (vertical bar), | operator (I/O) <1>: Precedence. (line 65)
+* | (vertical bar), | operator (I/O) <2>: Redirection. (line 57)
+* | (vertical bar), | operator (I/O): Getline/Pipe. (line 9)
+* | (vertical bar), |& operator (I/O) <1>: Two-way I/O. (line 25)
+* | (vertical bar), |& operator (I/O) <2>: Precedence. (line 65)
+* | (vertical bar), |& operator (I/O) <3>: Redirection. (line 102)
+* | (vertical bar), |& operator (I/O): Getline/Coprocess. (line 6)
+* | (vertical bar), |& operator (I/O), pipes, closing: Close Files And Pipes.
+ (line 119)
+* | (vertical bar), || operator <1>: Precedence. (line 89)
+* | (vertical bar), || operator: Boolean Ops. (line 57)
+* ~ (tilde), ~ operator <1>: Expression Patterns. (line 24)
+* ~ (tilde), ~ operator <2>: Precedence. (line 80)
+* ~ (tilde), ~ operator <3>: Comparison Operators.
+ (line 11)
+* ~ (tilde), ~ operator <4>: Regexp Constants. (line 6)
+* ~ (tilde), ~ operator <5>: Case-sensitivity. (line 26)
+* ~ (tilde), ~ operator <6>: Computed Regexps. (line 6)
+* ~ (tilde), ~ operator: Regexp Usage. (line 19)
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac

Tag Table:
+<<<<<<< HEAD
Node: Top1200
Node: Foreword41842
Node: Preface46186
@@ -34540,5 +37106,556 @@ Node: Glossary1171890
Node: Copying1197041
Node: GNU Free Documentation License1234579
Node: Index1259696
+=======
+Node: Top1204
+Node: Foreword41858
+Node: Preface46203
+Ref: Preface-Footnote-149226
+Ref: Preface-Footnote-249333
+Node: History49565
+Node: Names51939
+Ref: Names-Footnote-153033
+Node: This Manual53179
+Ref: This Manual-Footnote-158958
+Node: Conventions59058
+Node: Manual History61403
+Ref: Manual History-Footnote-164479
+Ref: Manual History-Footnote-264520
+Node: How To Contribute64594
+Node: Acknowledgments65833
+Node: Getting Started70581
+Node: Running gawk73015
+Node: One-shot74205
+Node: Read Terminal75430
+Node: Long77455
+Node: Executable Scripts78849
+Ref: Executable Scripts-Footnote-181650
+Node: Comments81752
+Node: Quoting84225
+Node: DOS Quoting89538
+Node: Sample Data Files90213
+Node: Very Simple92820
+Node: Two Rules97705
+Node: More Complex99599
+Ref: More Complex-Footnote-1102513
+Node: Statements/Lines102598
+Ref: Statements/Lines-Footnote-1107054
+Node: Other Features107319
+Node: When108250
+Ref: When-Footnote-1110006
+Node: Intro Summary110071
+Node: Invoking Gawk110954
+Node: Command Line112469
+Node: Options113260
+Ref: Options-Footnote-1128907
+Node: Other Arguments128932
+Node: Naming Standard Input131760
+Node: Environment Variables132853
+Node: AWKPATH Variable133411
+Ref: AWKPATH Variable-Footnote-1136277
+Ref: AWKPATH Variable-Footnote-2136322
+Node: AWKLIBPATH Variable136582
+Node: Other Environment Variables137341
+Node: Exit Status140793
+Node: Include Files141468
+Node: Loading Shared Libraries145046
+Node: Obsolete146430
+Node: Undocumented147127
+Node: Invoking Summary147394
+Node: Regexp148994
+Node: Regexp Usage150453
+Node: Escape Sequences152486
+Node: Regexp Operators158557
+Ref: Regexp Operators-Footnote-1165988
+Ref: Regexp Operators-Footnote-2166135
+Node: Bracket Expressions166233
+Ref: table-char-classes168251
+Node: Leftmost Longest171191
+Node: Computed Regexps172395
+Node: GNU Regexp Operators175773
+Node: Case-sensitivity179479
+Ref: Case-sensitivity-Footnote-1182369
+Ref: Case-sensitivity-Footnote-2182604
+Node: Regexp Summary182712
+Node: Reading Files184181
+Node: Records186273
+Node: awk split records186995
+Node: gawk split records191853
+Ref: gawk split records-Footnote-1196374
+Node: Fields196411
+Ref: Fields-Footnote-1199375
+Node: Nonconstant Fields199461
+Ref: Nonconstant Fields-Footnote-1201691
+Node: Changing Fields201893
+Node: Field Separators207847
+Node: Default Field Splitting210549
+Node: Regexp Field Splitting211666
+Node: Single Character Fields214993
+Node: Command Line Field Separator216052
+Node: Full Line Fields219478
+Ref: Full Line Fields-Footnote-1219986
+Node: Field Splitting Summary220032
+Ref: Field Splitting Summary-Footnote-1223164
+Node: Constant Size223265
+Node: Splitting By Content227871
+Ref: Splitting By Content-Footnote-1231944
+Node: Multiple Line231984
+Ref: Multiple Line-Footnote-1237840
+Node: Getline238019
+Node: Plain Getline240230
+Node: Getline/Variable242936
+Node: Getline/File244083
+Node: Getline/Variable/File245467
+Ref: Getline/Variable/File-Footnote-1247066
+Node: Getline/Pipe247153
+Node: Getline/Variable/Pipe249839
+Node: Getline/Coprocess250946
+Node: Getline/Variable/Coprocess252198
+Node: Getline Notes252935
+Node: Getline Summary255739
+Ref: table-getline-variants256147
+Node: Read Timeout257059
+Ref: Read Timeout-Footnote-1260886
+Node: Command-line directories260944
+Node: Input Summary261848
+Node: Input Exercises264985
+Node: Printing265713
+Node: Print267435
+Node: Print Examples268928
+Node: Output Separators271707
+Node: OFMT273723
+Node: Printf275081
+Node: Basic Printf275987
+Node: Control Letters277526
+Node: Format Modifiers281517
+Node: Printf Examples287544
+Node: Redirection290008
+Node: Special Files296980
+Node: Special FD297513
+Ref: Special FD-Footnote-1301110
+Node: Special Network301184
+Node: Special Caveats302034
+Node: Close Files And Pipes302830
+Ref: Close Files And Pipes-Footnote-1309991
+Ref: Close Files And Pipes-Footnote-2310139
+Node: Output Summary310289
+Node: Output Exercises311286
+Node: Expressions311966
+Node: Values313151
+Node: Constants313827
+Node: Scalar Constants314507
+Ref: Scalar Constants-Footnote-1315366
+Node: Nondecimal-numbers315616
+Node: Regexp Constants318616
+Node: Using Constant Regexps319141
+Node: Variables322213
+Node: Using Variables322868
+Node: Assignment Options324592
+Node: Conversion326467
+Node: Strings And Numbers326991
+Ref: Strings And Numbers-Footnote-1330053
+Node: Locale influences conversions330162
+Ref: table-locale-affects332879
+Node: All Operators333467
+Node: Arithmetic Ops334097
+Node: Concatenation336602
+Ref: Concatenation-Footnote-1339421
+Node: Assignment Ops339527
+Ref: table-assign-ops344510
+Node: Increment Ops345813
+Node: Truth Values and Conditions349251
+Node: Truth Values350334
+Node: Typing and Comparison351383
+Node: Variable Typing352176
+Node: Comparison Operators355828
+Ref: table-relational-ops356238
+Node: POSIX String Comparison359788
+Ref: POSIX String Comparison-Footnote-1360872
+Node: Boolean Ops361010
+Ref: Boolean Ops-Footnote-1365349
+Node: Conditional Exp365440
+Node: Function Calls367167
+Node: Precedence371047
+Node: Locales374716
+Node: Expressions Summary376347
+Node: Patterns and Actions378888
+Node: Pattern Overview380004
+Node: Regexp Patterns381681
+Node: Expression Patterns382224
+Node: Ranges386004
+Node: BEGIN/END389110
+Node: Using BEGIN/END389872
+Ref: Using BEGIN/END-Footnote-1392608
+Node: I/O And BEGIN/END392714
+Node: BEGINFILE/ENDFILE394985
+Node: Empty397916
+Node: Using Shell Variables398233
+Node: Action Overview400516
+Node: Statements402843
+Node: If Statement404691
+Node: While Statement406189
+Node: Do Statement408233
+Node: For Statement409389
+Node: Switch Statement412541
+Node: Break Statement414929
+Node: Continue Statement416970
+Node: Next Statement418795
+Node: Nextfile Statement421185
+Node: Exit Statement423842
+Node: Built-in Variables426246
+Node: User-modified427373
+Ref: User-modified-Footnote-1435062
+Node: Auto-set435124
+Ref: Auto-set-Footnote-1448143
+Ref: Auto-set-Footnote-2448348
+Node: ARGC and ARGV448404
+Node: Pattern Action Summary452308
+Node: Arrays454531
+Node: Array Basics456080
+Node: Array Intro456906
+Ref: figure-array-elements458879
+Ref: Array Intro-Footnote-1461403
+Node: Reference to Elements461531
+Node: Assigning Elements463981
+Node: Array Example464472
+Node: Scanning an Array466204
+Node: Controlling Scanning469205
+Ref: Controlling Scanning-Footnote-1474378
+Node: Delete474694
+Ref: Delete-Footnote-1477445
+Node: Numeric Array Subscripts477502
+Node: Uninitialized Subscripts479685
+Node: Multidimensional481312
+Node: Multiscanning484425
+Node: Arrays of Arrays486014
+Node: Arrays Summary490677
+Node: Functions492782
+Node: Built-in493655
+Node: Calling Built-in494733
+Node: Numeric Functions496721
+Ref: Numeric Functions-Footnote-1501557
+Ref: Numeric Functions-Footnote-2501914
+Ref: Numeric Functions-Footnote-3501962
+Node: String Functions502231
+Ref: String Functions-Footnote-1525228
+Ref: String Functions-Footnote-2525357
+Ref: String Functions-Footnote-3525605
+Node: Gory Details525692
+Ref: table-sub-escapes527465
+Ref: table-sub-proposed528985
+Ref: table-posix-sub530349
+Ref: table-gensub-escapes531889
+Ref: Gory Details-Footnote-1533065
+Node: I/O Functions533216
+Ref: I/O Functions-Footnote-1540326
+Node: Time Functions540473
+Ref: Time Functions-Footnote-1550937
+Ref: Time Functions-Footnote-2551005
+Ref: Time Functions-Footnote-3551163
+Ref: Time Functions-Footnote-4551274
+Ref: Time Functions-Footnote-5551386
+Ref: Time Functions-Footnote-6551613
+Node: Bitwise Functions551879
+Ref: table-bitwise-ops552441
+Ref: Bitwise Functions-Footnote-1556686
+Node: Type Functions556870
+Node: I18N Functions558012
+Node: User-defined559657
+Node: Definition Syntax560461
+Ref: Definition Syntax-Footnote-1565774
+Node: Function Example565843
+Ref: Function Example-Footnote-1568483
+Node: Function Caveats568505
+Node: Calling A Function569023
+Node: Variable Scope569978
+Node: Pass By Value/Reference572966
+Node: Return Statement576476
+Node: Dynamic Typing579460
+Node: Indirect Calls580389
+Ref: Indirect Calls-Footnote-1590105
+Node: Functions Summary590233
+Node: Library Functions592883
+Ref: Library Functions-Footnote-1596501
+Ref: Library Functions-Footnote-2596644
+Node: Library Names596815
+Ref: Library Names-Footnote-1600288
+Ref: Library Names-Footnote-2600508
+Node: General Functions600594
+Node: Strtonum Function601622
+Node: Assert Function604496
+Node: Round Function607822
+Node: Cliff Random Function609363
+Node: Ordinal Functions610379
+Ref: Ordinal Functions-Footnote-1613444
+Ref: Ordinal Functions-Footnote-2613696
+Node: Join Function613907
+Ref: Join Function-Footnote-1615678
+Node: Getlocaltime Function615878
+Node: Readfile Function619614
+Node: Data File Management621453
+Node: Filetrans Function622085
+Node: Rewind Function626154
+Node: File Checking627712
+Ref: File Checking-Footnote-1628844
+Node: Empty Files629045
+Node: Ignoring Assigns631024
+Node: Getopt Function632578
+Ref: Getopt Function-Footnote-1643842
+Node: Passwd Functions644045
+Ref: Passwd Functions-Footnote-1653024
+Node: Group Functions653112
+Ref: Group Functions-Footnote-1661043
+Node: Walking Arrays661256
+Node: Library Functions Summary662859
+Node: Library Exercises664247
+Node: Sample Programs665527
+Node: Running Examples666297
+Node: Clones667025
+Node: Cut Program668249
+Node: Egrep Program678107
+Ref: Egrep Program-Footnote-1685694
+Node: Id Program685804
+Node: Split Program689458
+Ref: Split Program-Footnote-1692996
+Node: Tee Program693124
+Node: Uniq Program695911
+Node: Wc Program703334
+Ref: Wc Program-Footnote-1707599
+Node: Miscellaneous Programs707691
+Node: Dupword Program708904
+Node: Alarm Program710935
+Node: Translate Program715739
+Ref: Translate Program-Footnote-1720312
+Ref: Translate Program-Footnote-2720582
+Node: Labels Program720721
+Ref: Labels Program-Footnote-1724082
+Node: Word Sorting724166
+Node: History Sorting728209
+Node: Extract Program730045
+Node: Simple Sed737581
+Node: Igawk Program740643
+Ref: Igawk Program-Footnote-1754947
+Ref: Igawk Program-Footnote-2755148
+Node: Anagram Program755286
+Node: Signature Program758354
+Node: Programs Summary759601
+Node: Programs Exercises760816
+Ref: Programs Exercises-Footnote-1764947
+Node: Advanced Features765038
+Node: Nondecimal Data766986
+Node: Array Sorting768563
+Node: Controlling Array Traversal769260
+Node: Array Sorting Functions777540
+Ref: Array Sorting Functions-Footnote-1781447
+Node: Two-way I/O781641
+Ref: Two-way I/O-Footnote-1786585
+Ref: Two-way I/O-Footnote-2786764
+Node: TCP/IP Networking786846
+Node: Profiling789691
+Node: Advanced Features Summary797242
+Node: Internationalization799106
+Node: I18N and L10N800586
+Node: Explaining gettext801272
+Ref: Explaining gettext-Footnote-1806298
+Ref: Explaining gettext-Footnote-2806482
+Node: Programmer i18n806647
+Ref: Programmer i18n-Footnote-1811441
+Node: Translator i18n811490
+Node: String Extraction812284
+Ref: String Extraction-Footnote-1813417
+Node: Printf Ordering813503
+Ref: Printf Ordering-Footnote-1816285
+Node: I18N Portability816349
+Ref: I18N Portability-Footnote-1818798
+Node: I18N Example818861
+Ref: I18N Example-Footnote-1821567
+Node: Gawk I18N821639
+Node: I18N Summary822277
+Node: Debugger823616
+Node: Debugging824638
+Node: Debugging Concepts825079
+Node: Debugging Terms826935
+Node: Awk Debugging829532
+Node: Sample Debugging Session830424
+Node: Debugger Invocation830944
+Node: Finding The Bug832280
+Node: List of Debugger Commands838759
+Node: Breakpoint Control840091
+Node: Debugger Execution Control843755
+Node: Viewing And Changing Data847115
+Node: Execution Stack850473
+Node: Debugger Info851986
+Node: Miscellaneous Debugger Commands855980
+Node: Readline Support861164
+Node: Limitations862056
+Node: Debugging Summary864329
+Node: Arbitrary Precision Arithmetic865497
+Node: Computer Arithmetic866984
+Ref: Computer Arithmetic-Footnote-1871371
+Node: Math Definitions871428
+Ref: table-ieee-formats874717
+Ref: Math Definitions-Footnote-1875257
+Node: MPFR features875360
+Node: FP Math Caution876977
+Ref: FP Math Caution-Footnote-1878027
+Node: Inexactness of computations878396
+Node: Inexact representation879344
+Node: Comparing FP Values880699
+Node: Errors accumulate881663
+Node: Getting Accuracy883096
+Node: Try To Round885755
+Node: Setting precision886654
+Ref: table-predefined-precision-strings887336
+Node: Setting the rounding mode889129
+Ref: table-gawk-rounding-modes889493
+Ref: Setting the rounding mode-Footnote-1892947
+Node: Arbitrary Precision Integers893126
+Ref: Arbitrary Precision Integers-Footnote-1896899
+Node: POSIX Floating Point Problems897048
+Ref: POSIX Floating Point Problems-Footnote-1900924
+Node: Floating point summary900962
+Node: Dynamic Extensions903166
+Node: Extension Intro904718
+Node: Plugin License905983
+Node: Extension Mechanism Outline906668
+Ref: figure-load-extension907092
+Ref: figure-load-new-function908577
+Ref: figure-call-new-function909579
+Node: Extension API Description911563
+Node: Extension API Functions Introduction913013
+Node: General Data Types917880
+Ref: General Data Types-Footnote-1923573
+Node: Requesting Values923872
+Ref: table-value-types-returned924609
+Node: Memory Allocation Functions925567
+Ref: Memory Allocation Functions-Footnote-1928314
+Node: Constructor Functions928410
+Node: Registration Functions930168
+Node: Extension Functions930853
+Node: Exit Callback Functions933155
+Node: Extension Version String934403
+Node: Input Parsers935053
+Node: Output Wrappers944867
+Node: Two-way processors949383
+Node: Printing Messages951587
+Ref: Printing Messages-Footnote-1952664
+Node: Updating `ERRNO'952816
+Node: Accessing Parameters953555
+Node: Symbol Table Access954785
+Node: Symbol table by name955299
+Node: Symbol table by cookie957275
+Ref: Symbol table by cookie-Footnote-1961408
+Node: Cached values961471
+Ref: Cached values-Footnote-1964975
+Node: Array Manipulation965066
+Ref: Array Manipulation-Footnote-1966164
+Node: Array Data Types966203
+Ref: Array Data Types-Footnote-1968906
+Node: Array Functions968998
+Node: Flattening Arrays972872
+Node: Creating Arrays979724
+Node: Extension API Variables984455
+Node: Extension Versioning985091
+Node: Extension API Informational Variables986992
+Node: Extension API Boilerplate988078
+Node: Finding Extensions991882
+Node: Extension Example992442
+Node: Internal File Description993172
+Node: Internal File Ops997263
+Ref: Internal File Ops-Footnote-11008695
+Node: Using Internal File Ops1008835
+Ref: Using Internal File Ops-Footnote-11011182
+Node: Extension Samples1011450
+Node: Extension Sample File Functions1012974
+Node: Extension Sample Fnmatch1020542
+Node: Extension Sample Fork1022024
+Node: Extension Sample Inplace1023237
+Node: Extension Sample Ord1024912
+Node: Extension Sample Readdir1025748
+Ref: table-readdir-file-types1026604
+Node: Extension Sample Revout1027403
+Node: Extension Sample Rev2way1027994
+Node: Extension Sample Read write array1028735
+Node: Extension Sample Readfile1030614
+Node: Extension Sample API Tests1031714
+Node: Extension Sample Time1032239
+Node: gawkextlib1033554
+Node: Extension summary1036367
+Node: Extension Exercises1040060
+Node: Language History1040782
+Node: V7/SVR3.11042425
+Node: SVR41044745
+Node: POSIX1046187
+Node: BTL1047573
+Node: POSIX/GNU1048307
+Node: Feature History1054083
+Node: Common Extensions1067174
+Node: Ranges and Locales1068486
+Ref: Ranges and Locales-Footnote-11073103
+Ref: Ranges and Locales-Footnote-21073130
+Ref: Ranges and Locales-Footnote-31073364
+Node: Contributors1073585
+Node: History summary1079010
+Node: Installation1080379
+Node: Gawk Distribution1081330
+Node: Getting1081814
+Node: Extracting1082638
+Node: Distribution contents1084280
+Node: Unix Installation1090050
+Node: Quick Installation1090667
+Node: Additional Configuration Options1093109
+Node: Configuration Philosophy1094847
+Node: Non-Unix Installation1097198
+Node: PC Installation1097656
+Node: PC Binary Installation1098967
+Node: PC Compiling1100815
+Ref: PC Compiling-Footnote-11103814
+Node: PC Testing1103919
+Node: PC Using1105095
+Node: Cygwin1109247
+Node: MSYS1110056
+Node: VMS Installation1110570
+Node: VMS Compilation1111366
+Ref: VMS Compilation-Footnote-11112588
+Node: VMS Dynamic Extensions1112646
+Node: VMS Installation Details1114019
+Node: VMS Running1116271
+Node: VMS GNV1119105
+Node: VMS Old Gawk1119828
+Node: Bugs1120298
+Node: Other Versions1124302
+Node: Installation summary1130529
+Node: Notes1131585
+Node: Compatibility Mode1132450
+Node: Additions1133232
+Node: Accessing The Source1134157
+Node: Adding Code1135593
+Node: New Ports1141771
+Node: Derived Files1146252
+Ref: Derived Files-Footnote-11151333
+Ref: Derived Files-Footnote-21151367
+Ref: Derived Files-Footnote-31151963
+Node: Future Extensions1152077
+Node: Implementation Limitations1152683
+Node: Extension Design1153931
+Node: Old Extension Problems1155085
+Ref: Old Extension Problems-Footnote-11156602
+Node: Extension New Mechanism Goals1156659
+Ref: Extension New Mechanism Goals-Footnote-11160019
+Node: Extension Other Design Decisions1160208
+Node: Extension Future Growth1162314
+Node: Old Extension Mechanism1163150
+Node: Notes summary1164912
+Node: Basic Concepts1166098
+Node: Basic High Level1166779
+Ref: figure-general-flow1167051
+Ref: figure-process-flow1167650
+Ref: Basic High Level-Footnote-11170879
+Node: Basic Data Typing1171064
+Node: Glossary1174392
+Node: Copying1199544
+Node: GNU Free Documentation License1237100
+Node: Index1262236
+>>>>>>> ca9f23d6c33c4b5cb3786d480948a42988ca99ac

End Tag Table
diff --git a/doc/gawk.texi b/doc/gawk.texi
index f2b455b4..a809bd0d 100644
--- a/doc/gawk.texi
+++ b/doc/gawk.texi
@@ -51,7 +51,7 @@
@c applies to and all the info about who's publishing this edition
@c These apply across the board.
-@set UPDATE-MONTH July, 2014
+@set UPDATE-MONTH August, 2014
@set VERSION 4.1
@set PATCHLEVEL 1
@@ -165,6 +165,19 @@
@end macro
@end ifdocbook
+@c hack for docbook, where comma shouldn't always follow an @ref{}
+@ifdocbook
+@macro DBREF{text}
+@ref{\text\}
+@end macro
+@end ifdocbook
+
+@ifnotdocbook
+@macro DBREF{text}
+@ref{\text\},
+@end macro
+@end ifnotdocbook
+
@ifclear FOR_PRINT
@set FN file name
@set FFN File Name
@@ -526,10 +539,10 @@ particular records in a file and perform operations upon them.
* Escape Sequences:: How to write nonprinting characters.
* Regexp Operators:: Regular Expression Operators.
* Bracket Expressions:: What can go between @samp{[...]}.
-* GNU Regexp Operators:: Operators specific to GNU software.
-* Case-sensitivity:: How to do case-insensitive matching.
* Leftmost Longest:: How much text matches.
* Computed Regexps:: Using Dynamic Regexps.
+* GNU Regexp Operators:: Operators specific to GNU software.
+* Case-sensitivity:: How to do case-insensitive matching.
* Regexp Summary:: Regular expressions summary.
* Records:: Controlling how data is split into
records.
@@ -546,7 +559,7 @@ particular records in a file and perform operations upon them.
* Single Character Fields:: Making each character a separate
field.
* Command Line Field Separator:: Setting @code{FS} from the
- command-line.
+ command line.
* Full Line Fields:: Making the full line be a single
field.
* Field Splitting Summary:: Some final points and a summary table.
@@ -572,7 +585,7 @@ particular records in a file and perform operations upon them.
@code{getline}.
* Getline Summary:: Summary of @code{getline} Variants.
* Read Timeout:: Reading input with a timeout.
-* Command line directories:: What happens if you put a directory on
+* Command-line directories:: What happens if you put a directory on
the command line.
* Input Summary:: Input summary.
* Input Exercises:: Exercises.
@@ -600,7 +613,7 @@ particular records in a file and perform operations upon them.
* Close Files And Pipes:: Closing Input and Output Files and
Pipes.
* Output Summary:: Output summary.
-* Output exercises:: Exercises.
+* Output Exercises:: Exercises.
* Values:: Constants, Variables, and Regular
Expressions.
* Constants:: String, numeric and regexp constants.
@@ -611,7 +624,7 @@ particular records in a file and perform operations upon them.
* Variables:: Variables give names to values for
later use.
* Using Variables:: Using variables in your programs.
-* Assignment Options:: Setting variables on the command-line
+* Assignment Options:: Setting variables on the command line
and a summary of command-line syntax.
This is an advanced method of input.
* Conversion:: The conversion of strings to numbers
@@ -787,7 +800,7 @@ particular records in a file and perform operations upon them.
information.
* Walking Arrays:: A function to walk arrays of arrays.
* Library Functions Summary:: Summary of library functions.
-* Library exercises:: Exercises.
+* Library Exercises:: Exercises.
* Running Examples:: How to run these examples.
* Clones:: Clones of common utilities.
* Cut Program:: The @command{cut} utility.
@@ -1211,23 +1224,18 @@ March, 2001
</prefaceinfo>
@end docbook
-Several kinds of tasks occur repeatedly
-when working with text files.
-You might want to extract certain lines and discard the rest.
-Or you may need to make changes wherever certain patterns appear,
-but leave the rest of the file alone.
-Writing single-use programs for these tasks in languages such as C, C++,
-or Java is time-consuming and inconvenient.
-Such jobs are often easier with @command{awk}.
-The @command{awk} utility interprets a special-purpose programming language
-that makes it easy to handle simple data-reformatting jobs.
+Several kinds of tasks occur repeatedly when working with text files.
+You might want to extract certain lines and discard the rest. Or you
+may need to make changes wherever certain patterns appear, but leave the
+rest of the file alone. Such jobs are often easy with @command{awk}.
+The @command{awk} utility interprets a special-purpose programming
+language that makes it easy to handle simple data-reformatting jobs.
-@cindex Brian Kernighan's @command{awk}
The GNU implementation of @command{awk} is called @command{gawk}; if you
invoke it with the proper options or environment variables
(@pxref{Options}), it is fully
compatible with
-the POSIX@footnote{The 2008 POSIX standard is accessable online at
+the POSIX@footnote{The 2008 POSIX standard is accessible online at
@w{@url{http://www.opengroup.org/onlinepubs/9699919799/}.}}
specification of the @command{awk} language
and with the Unix version of @command{awk} maintained
@@ -1301,7 +1309,7 @@ different computing environments. This @value{DOCUMENT}, while describing
the @command{awk} language in general, also describes the particular
implementation of @command{awk} called @command{gawk} (which stands for
``GNU @command{awk}''). @command{gawk} runs on a broad range of Unix systems,
-ranging from Intel@registeredsymbol{}-architecture PC-based computers
+ranging from Intel-architecture PC-based computers
up through large-scale systems.
@command{gawk} has also been ported to Mac OS X,
Microsoft Windows
@@ -1404,7 +1412,7 @@ help from me, thoroughly reworked @command{gawk} for compatibility
with the newer @command{awk}.
Circa 1994, I became the primary maintainer.
Current development focuses on bug fixes,
-performance improvements, standards compliance, and occasionally, new features.
+performance improvements, standards compliance and, occasionally, new features.
In May of 1997, J@"urgen Kahrs felt the need for network access
from @command{awk}, and with a little help from me, set about adding
@@ -1429,29 +1437,27 @@ for a complete list of those who made important contributions to @command{gawk}.
The @command{awk} language has evolved over the years. Full details are
provided in @ref{Language History}.
The language described in this @value{DOCUMENT}
-is often referred to as ``new @command{awk}'' (@command{nawk}).
+is often referred to as ``new @command{awk}''.
+By analogy, the original version of @command{awk} is
+referred to as ``old @command{awk}.''
-@cindex @command{awk}, versions of
-@cindex @command{nawk} utility
-@cindex @command{oawk} utility
-For some time after new @command{awk} was introduced, there were
-systems with multiple versions of @command{awk}. Some systems had
-an @command{awk} utility that implemented the original version of the
-@command{awk} language and a @command{nawk} utility for the new version.
-Others had an @command{oawk} version for the ``old @command{awk}''
-language and plain @command{awk} for the new one. Still others only
-had one version, which is usually the new one.
-
-Today, only Solaris systems still use an old @command{awk} for the
-default @command{awk} utility. (A more modern @command{awk} lives in
-@file{/usr/xpg6/bin} on these systems.) All other modern systems use
-some version of new @command{awk}.@footnote{Many of these systems use
-@command{gawk} for their @command{awk} implementation!}
-
-It is likely that you already have some version of new @command{awk} on
-your system, which is what you should use when running your programs.
-(Of course, if you're reading this @value{DOCUMENT}, chances are good
-that you have @command{gawk}!)
+Today, on most systems, when you run the @command{awk} utility,
+you get some version of new @command{awk}.@footnote{Only
+Solaris systems still use an old @command{awk} for the
+default @command{awk} utility. A more modern @command{awk} lives in
+@file{/usr/xpg6/bin} on these systems.} If your system's standard
+@command{awk} is the old one, you will see something like this
+if you try the test program:
+
+@example
+$ @kbd{awk 1 /dev/null}
+@error{} awk: syntax error near line 1
+@error{} awk: bailing out near line 1
+@end example
+
+@noindent
+In this case, you should find a version of new @command{awk},
+or just install @command{gawk}!
Throughout this @value{DOCUMENT}, whenever we refer to a language feature
that should be available in any complete implementation of POSIX @command{awk},
@@ -1502,7 +1508,9 @@ There are sidebars
scattered throughout the @value{DOCUMENT}.
They add a more complete explanation of points that are relevant, but not likely
to be of interest on first reading.
+@ifclear FOR_PRINT
All appear in the index, under the heading ``sidebar.''
+@end ifclear
Most of the time, the examples use complete @command{awk} programs.
Some of the more advanced sections show only the part of the @command{awk}
@@ -1657,6 +1665,9 @@ try looking them up here.
@uref{http://www.gnu.org/software/gawk/manual/html_node/GNU-Free-Documentation-License.html,
The GNU FDL}
is the license that covers this @value{DOCUMENT}.
+
+Some of the chapters have exercise sections; these have also been
+omitted from the print edition.
@end ifset
@ifclear FOR_PRINT
@@ -1697,11 +1708,18 @@ are slightly different than in other books you may have read.
This @value{SECTION} briefly documents the typographical conventions used in Texinfo.
@end ifinfo
-Examples you would type at the command-line are preceded by the common
+Examples you would type at the command line are preceded by the common
shell primary and secondary prompts, @samp{$} and @samp{>}.
Input that you type is shown @kbd{like this}.
+@c 8/2014: @print{} is stripped from the texi to make docbook.
+@ifclear FOR_PRINT
Output from the command is preceded by the glyph ``@print{}''.
This typically represents the command's standard output.
+@end ifclear
+@ifset FOR_PRINT
+Output from the command, usually its standard output, appears
+@code{like this}.
+@end ifset
Error messages, and other output on the command's standard error, are preceded
by the glyph ``@error{}''. For example:
@@ -1731,6 +1749,10 @@ another key, at the same time. For example, a @kbd{Ctrl-d} is typed
by first pressing and holding the @kbd{CONTROL} key, next
pressing the @kbd{d} key and finally releasing both keys.
+For the sake of brevity, throughout this @value{DOCUMENT}, we refer to
+Brian Kernighan's version of @command{awk} as ``BWK @command{awk}.''
+(@xref{Other Versions}, for information on his and other versions.)
+
@ifset FOR_PRINT
@quotation NOTE
Notes of interest look like this.
@@ -1770,6 +1792,7 @@ They also appear in the index under the heading ``dark corner.''
As noted by the opening quote, though, any coverage of dark corners is,
by definition, incomplete.
+@cindex c.e., See common extensions
Extensions to the standard @command{awk} language that are supported by
more than one @command{awk} implementation are marked
@ifclear FOR_PRINT
@@ -1777,7 +1800,7 @@ more than one @command{awk} implementation are marked
and ``extensions, common.''
@end ifclear
@ifset FOR_PRINT
-``@value{COMMONEXT}.''
+``@value{COMMONEXT}'' for ``common extension.''
@end ifset
@node Manual History
@@ -1816,6 +1839,7 @@ see @uref{http://www.gnu.org, the GNU Project's home page}.
This @value{DOCUMENT} may also be read from
@uref{http://www.gnu.org/software/gawk/manual/, their web site}.
+@ifclear FOR_PRINT
A shell, an editor (Emacs), highly portable optimizing C, C++, and
Objective-C compilers, a symbolic debugger and dozens of large and
small utilities (such as @command{gawk}), have all been completed and are
@@ -1826,32 +1850,16 @@ stage of development.
@cindex Linux
@cindex GNU/Linux
@cindex operating systems, BSD-based
-@cindex Alpha (DEC)
Until the GNU operating system is more fully developed, you should
consider using GNU/Linux, a freely distributable, Unix-like operating
-system for Intel@registeredsymbol{},
+system for Intel,
Power Architecture,
Sun SPARC, IBM S/390, and other
-@ifclear FOR_PRINT
systems.@footnote{The terminology ``GNU/Linux'' is explained
in the @ref{Glossary}.}
-@end ifclear
-@ifset FOR_PRINT
-systems.
-@end ifset
Many GNU/Linux distributions are
available for download from the Internet.
-
-(There are numerous other freely available, Unix-like operating systems
-based on the
-Berkeley Software Distribution, and some of them use recent versions
-of @command{gawk} for their versions of @command{awk}.
-@uref{http://www.netbsd.org, NetBSD},
-@uref{http://www.freebsd.org, FreeBSD},
-and
-@uref{http://www.openbsd.org, OpenBSD}
-are three of the most popular ones, but there
-are others.)
+@end ifclear
@ifnotinfo
The @value{DOCUMENT} you are reading is actually free---at least, the
@@ -2095,17 +2103,29 @@ people.
Notable code and documentation contributions were made by
a number of people. @xref{Contributors}, for the full list.
-Thanks to Patrice Dumas for the new @command{makeinfo} program.
+Thanks to Patrice Dumas for the new @command{makeinfo} program.
Thanks to Karl Berry who continues to work to keep
the Texinfo markup language sane.
@cindex Kernighan, Brian
+@cindex Brennan, Michael
+@cindex Day, Robert P.J.@:
+Robert P.J.@: Day, Michael Brennan and Brian Kernighan kindly acted as
+reviewers for the 2015 edition of this @value{DOCUMENT}. Their feedback
+helped improve the final work.
+
I would like to thank Brian Kernighan for invaluable assistance during the
testing and debugging of @command{gawk}, and for ongoing
help and advice in clarifying numerous points about the language.
We could not have done nearly as good a job on either @command{gawk}
or its documentation without his help.
+Brian is in a class by himself as a programmer and technical
+author. I have to thank him (yet again) for his ongoing friendship
+and the role model he has been for me for close to 30 years!
+Having him as a reviewer is an exciting privilege. It has also
+been extremely humbling@enddots{}
+
@cindex Robbins, Miriam
@cindex Robbins, Jean
@cindex Robbins, Harry
@@ -2340,29 +2360,27 @@ For example, on OS/2, it is @kbd{Ctrl-z}.)
As an example, the following program prints a friendly piece of advice
(from Douglas Adams's @cite{The Hitchhiker's Guide to the Galaxy}),
to keep you from worrying about the complexities of computer
-programming@footnote{If you use Bash as your shell, you should execute
-the command @samp{set +H} before running this program interactively,
-to disable the C shell-style command history, which treats
-@samp{!} as a special character. We recommend putting this command into
-your personal startup file.}
-(@code{BEGIN} is a feature we haven't discussed yet):
+programming:
@example
-$ @kbd{awk "BEGIN @{ print \"Don't Panic!\" @}"}
+$ @kbd{awk "BEGIN @{ print "Don\47t Panic!" @}"}
@print{} Don't Panic!
@end example
-@cindex shell quoting, double quote
-@cindex double quote (@code{"}) in shell commands
-@cindex @code{"} (double quote) in shell commands
-@cindex @code{\} (backslash) in shell commands
-@cindex backslash (@code{\}) in shell commands
-This program does not read any input. The @samp{\} before each of the
-inner double quotes is necessary because of the shell's quoting
-rules---in particular because it mixes both single quotes and
-double quotes.@footnote{Although we generally recommend the use of single
-quotes around the program text, double quotes are needed here in order to
-put the single quote into the message.}
+@command{awk} executes statements associated with @code{BEGIN} before
+reading any input. If there are no other statements in your program,
+as is the case here, @command{awk} just stops, instead of trying to read
+input it doesn't know how to process.
+The @samp{\47} is a magic way of getting a single quote into
+the program, without having to engage in ugly shell quoting tricks.
+
+@quotation NOTE
+As a side note, if you use Bash as your shell, you should execute the
+command @samp{set +H} before running this program interactively, to
+disable the C shell-style command history, which treats @samp{!} as a
+special character. We recommend putting this command into your personal
+startup file.
+@end quotation
This next simple @command{awk} program
emulates the @command{cat} utility; it copies whatever you type on the
@@ -2397,9 +2415,10 @@ awk -f @var{source-file} @var{input-file1} @var{input-file2} @dots{}
@cindex @option{-f} option
@cindex command line, option @option{-f}
-The @option{-f} instructs the @command{awk} utility to get the @command{awk} program
-from the file @var{source-file}. Any @value{FN} can be used for
-@var{source-file}. For example, you could put the program:
+The @option{-f} instructs the @command{awk} utility to get the
+@command{awk} program from the file @var{source-file} (@pxref{Options}).
+Any @value{FN} can be used for @var{source-file}. For example, you
+could put the program:
@example
BEGIN @{ print "Don't Panic!" @}
@@ -2460,16 +2479,7 @@ BEGIN @{ print "Don't Panic!" @}
@noindent
After making this file executable (with the @command{chmod} utility),
simply type @samp{advice}
-at the shell and the system arranges to run @command{awk}@footnote{The
-line beginning with @samp{#!} lists the full @value{FN} of an interpreter
-to run and an optional initial command-line argument to pass to that
-interpreter. The operating system then runs the interpreter with the given
-argument and the full argument list of the executed program. The first argument
-in the list is the full @value{FN} of the @command{awk} program.
-The rest of the
-argument list contains either options to @command{awk}, or @value{DF}s,
-or both. Note that on many systems @command{awk} may be found in
-@file{/usr/bin} instead of in @file{/bin}. Caveat Emptor.} as if you had
+at the shell and the system arranges to run @command{awk} as if you had
typed @samp{awk -f advice}:
@example
@@ -2487,14 +2497,32 @@ Self-contained @command{awk} scripts are useful when you want to write a
program that users can invoke without their having to know that the program is
written in @command{awk}.
-@cindex sidebar, Portability Issues with @samp{#!}
+@cindex sidebar, Understanding @samp{#!}
@ifdocbook
@docbook
-<sidebar><title>Portability Issues with @samp{#!}</title>
+<sidebar><title>Understanding @samp{#!}</title>
@end docbook
@cindex portability, @code{#!} (executable scripts)
+@command{awk} is an @dfn{interpreted} language. This means that the
+@command{awk} utility reads your program and then processes your data
+according to the instructions in your program. (This is different
+from a @dfn{compiled} language such as C, where your program is first
+compiled into machine code that is executed directly by your system's
+hardware.) The @command{awk} utility is thus termed an @dfn{interpreter}.
+Many modern languages are interperted.
+
+The line beginning with @samp{#!} lists the full @value{FN} of an
+interpreter to run and a single optional initial command-line argument
+to pass to that interpreter. The operating system then runs the
+interpreter with the given argument and the full argument list of the
+executed program. The first argument in the list is the full @value{FN}
+of the @command{awk} program. The rest of the argument list contains
+either options to @command{awk}, or @value{DF}s, or both. Note that on
+many systems @command{awk} may be found in @file{/usr/bin} instead of
+in @file{/bin}. Caveat Emptor.
+
Some systems limit the length of the interpreter name to 32 characters.
Often, this can be dealt with by using a symbolic link.
@@ -2506,8 +2534,7 @@ of some sort from @command{awk}.
@cindex @code{ARGC}/@code{ARGV} variables, portability and
@cindex portability, @code{ARGV} variable
-Finally,
-the value of @code{ARGV[0]}
+Finally, the value of @code{ARGV[0]}
(@pxref{Built-in Variables})
varies depending upon your operating system.
Some systems put @samp{awk} there, some put the full pathname
@@ -2523,11 +2550,29 @@ to provide your script name.
@ifnotdocbook
@cartouche
-@center @b{Portability Issues with @samp{#!}}
+@center @b{Understanding @samp{#!}}
@cindex portability, @code{#!} (executable scripts)
+@command{awk} is an @dfn{interpreted} language. This means that the
+@command{awk} utility reads your program and then processes your data
+according to the instructions in your program. (This is different
+from a @dfn{compiled} language such as C, where your program is first
+compiled into machine code that is executed directly by your system's
+hardware.) The @command{awk} utility is thus termed an @dfn{interpreter}.
+Many modern languages are interperted.
+
+The line beginning with @samp{#!} lists the full @value{FN} of an
+interpreter to run and a single optional initial command-line argument
+to pass to that interpreter. The operating system then runs the
+interpreter with the given argument and the full argument list of the
+executed program. The first argument in the list is the full @value{FN}
+of the @command{awk} program. The rest of the argument list contains
+either options to @command{awk}, or @value{DF}s, or both. Note that on
+many systems @command{awk} may be found in @file{/usr/bin} instead of
+in @file{/bin}. Caveat Emptor.
+
Some systems limit the length of the interpreter name to 32 characters.
Often, this can be dealt with by using a symbolic link.
@@ -2539,8 +2584,7 @@ of some sort from @command{awk}.
@cindex @code{ARGC}/@code{ARGV} variables, portability and
@cindex portability, @code{ARGV} variable
-Finally,
-the value of @code{ARGV[0]}
+Finally, the value of @code{ARGV[0]}
(@pxref{Built-in Variables})
varies depending upon your operating system.
Some systems put @samp{awk} there, some put the full pathname
@@ -2720,7 +2764,7 @@ Note that the single quote is not special within double quotes.
@item
Null strings are removed when they occur as part of a non-null
-command-line argument, while explicit non-null objects are kept.
+command-line argument, while explicit null objects are kept.
For example, to specify that the field separator @code{FS} should
be set to the null string, use:
@@ -2867,7 +2911,9 @@ each line is considered to be one @dfn{record}.
In the @value{DF} @file{mail-list}, each record contains the name of a person,
his/her phone number, his/her email-address, and a code for their relationship
-with the author of the list. An @samp{A} in the last column
+with the author of the list.
+The columns are aligned using spaces.
+An @samp{A} in the last column
means that the person is an acquaintance. An @samp{F} in the last
column means that the person is a friend.
An @samp{R} means that the person is a relative:
@@ -2901,6 +2947,7 @@ of green crates shipped, the number of red boxes shipped, the number of
orange bags shipped, and the number of blue packages shipped,
respectively. There are 16 entries, covering the 12 months of last year
and the first four months of the current year.
+An empty line separates the data for the two years.
@example
@c file eg/data/inventory-shipped
@@ -2996,34 +3043,39 @@ you can come up with different ways to do the same things shown here:
@itemize @value{BULLET}
@item
-Print the length of the longest input line:
+Print every line that is longer than 80 characters:
@example
-awk '@{ if (length($0) > max) max = length($0) @}
- END @{ print max @}' data
+awk 'length($0) > 80' data
@end example
+The sole rule has a relational expression as its pattern and it has no
+action---so it uses the default action, printing the record.
+
@item
-Print every line that is longer than 80 characters:
+Print the length of the longest input line:
@example
-awk 'length($0) > 80' data
+awk '@{ if (length($0) > max) max = length($0) @}
+ END @{ print max @}' data
@end example
-The sole rule has a relational expression as its pattern and it has no
-action---so it uses the default action, printing the record.
+The code associated with @code{END} executes after all
+input has been read; it's the other side of the coin to @code{BEGIN}.
@cindex @command{expand} utility
@item
Print the length of the longest line in @file{data}:
@example
-expand data | awk '@{ if (x < length()) x = length() @}
+expand data | awk '@{ if (x < length($0)) x = length($0) @}
END @{ print "maximum line length is " x @}'
@end example
+This example differs slightly from the previous one:
The input is processed by the @command{expand} utility to change TABs
-into spaces, so the widths compared are actually the right-margin columns.
+into spaces, so the widths compared are actually the right-margin columns,
+as opposed to the number of input characters on each line.
@item
Print every line that has at least one field:
@@ -3150,8 +3202,8 @@ features that haven't been covered yet, so don't worry if you don't
understand all the details:
@example
-LC_ALL=C ls -l | awk '$6 == "Nov" @{ sum += $5 @}
- END @{ print sum @}'
+ls -l | awk '$6 == "Nov" @{ sum += $5 @}
+ END @{ print sum @}'
@end example
@cindex @command{ls} utility
@@ -3369,7 +3421,7 @@ and array sorting.
As we develop our presentation of the @command{awk} language, we introduce
most of the variables and many of the functions. They are described
-systematically in @ref{Built-in Variables}, and
+systematically in @ref{Built-in Variables}, and in
@ref{Built-in}.
@node When
@@ -3404,33 +3456,30 @@ eight-bit microprocessors,
and a microcode assembler for a special-purpose Prolog
computer.
While the original @command{awk}'s capabilities were strained by tasks
-of such complexity, modern versions are more capable. Even Brian Kernighan's
-version of @command{awk} has fewer predefined limits, and those
-that it has are much larger than they used to be.
+of such complexity, modern versions are more capable.
@cindex @command{awk} programs, complex
-If you find yourself writing @command{awk} scripts of more than, say, a few
-hundred lines, you might consider using a different programming
-language.
-The shell is good at string and
-pattern matching; in addition, it allows powerful use of the system
-utilities. More conventional languages, such as C, C++, and Java, offer
-better facilities for system programming and for managing the complexity
-of large programs.
-Python offers a nice balance between high-level ease of programming and
-access to system facilities.
-Programs in these languages may require more lines
-of source code than the equivalent @command{awk} programs, but they are
-easier to maintain and usually run more efficiently.
+If you find yourself writing @command{awk} scripts of more than, say,
+a few hundred lines, you might consider using a different programming
+language. The shell is good at string and pattern matching; in addition,
+it allows powerful use of the system utilities. Python offers a nice
+balance between high-level ease of programming and access to system
+facilities.@footnote{Other popular scripting languages include Ruby
+and Perl.}
@node Intro Summary
@section Summary
+@c FIXME: Review this chapter for summary of builtin functions called.
@itemize @value{BULLET}
@item
Programs in @command{awk} consist of @var{pattern}-@var{action} pairs.
@item
+An @var{action} without a @var{pattern} always runs. The default
+@var{action} for a pattern without one is @samp{@{ print $0 @}}.
+
+@item
Use either
@samp{awk '@var{program}' @var{files}}
or
@@ -3652,7 +3701,7 @@ multibyte characters. This option is an easy way to tell @command{gawk}:
@cindex compatibility mode (@command{gawk}), specifying
Specify @dfn{compatibility mode}, in which the GNU extensions to
the @command{awk} language are disabled, so that @command{gawk} behaves just
-like Brian Kernighan's version @command{awk}.
+like BWK @command{awk}.
@xref{POSIX/GNU},
which summarizes the extensions.
@ifclear FOR_PRINT
@@ -3737,7 +3786,7 @@ Command-line variable assignments of the form
This option is particularly necessary for World Wide Web CGI applications
that pass arguments through the URL; using this option prevents a malicious
(or other) user from passing in options, assignments, or @command{awk} source
-code (via @option{--source}) to the CGI application. This option should be used
+code (via @option{-e}) to the CGI application. This option should be used
with @samp{#!} scripts (@pxref{Executable Scripts}), like so:
@example
@@ -3783,7 +3832,7 @@ Second, because this option is intended to be used with code libraries,
@command{gawk} does not recognize such files as constituting main program
input. Thus, after processing an @option{-i} argument, @command{gawk}
still expects to find the main source code via the @option{-f} option
-or on the command-line.
+or on the command line.
@item @option{-l} @var{ext}
@itemx @option{--load} @var{ext}
@@ -3807,7 +3856,7 @@ a shared library. This feature is described in detail in @ref{Dynamic Extension
@cindex warnings, issuing
Warn about constructs that are dubious or nonportable to
other @command{awk} implementations.
-No space is allowed between the @option{-D} and @var{value}, if
+No space is allowed between the @option{-L} and @var{value}, if
@var{value} is supplied.
Some warnings are issued when @command{gawk} first reads your program. Others
are issued at runtime, as your program executes.
@@ -3926,7 +3975,7 @@ Newlines are not allowed after @samp{?} or @samp{:}
@cindex @code{FS} variable, as TAB character
@item
-Specifying @samp{-Ft} on the command-line does not set the value
+Specifying @samp{-Ft} on the command line does not set the value
of @code{FS} to be a single TAB character
(@pxref{Field Separators}).
@@ -4023,14 +4072,14 @@ source of data.)
Because it is clumsy using the standard @command{awk} mechanisms to mix
source file and command-line @command{awk} programs, @command{gawk}
-provides the @option{--source} option. This does not require you to
+provides the @option{-e} option. This does not require you to
pre-empt the standard input for your source code; it allows you to easily
mix command-line and library source code (@pxref{AWKPATH Variable}).
-As with @option{-f}, the @option{--source} and @option{--include}
+As with @option{-f}, the @option{-e} and @option{-i}
options may also be used multiple times on the command line.
-@cindex @option{--source} option
-If no @option{-f} or @option{--source} option is specified, then @command{gawk}
+@cindex @option{-e} option
+If no @option{-f} or @option{-e} option is specified, then @command{gawk}
uses the first non-option command-line argument as the text of the
program source code.
@@ -4098,6 +4147,11 @@ included. As each element of @code{ARGV} is processed, @command{gawk}
sets the variable @code{ARGIND} to the index in @code{ARGV} of the
current element.
+@c FIXME: One day, move the ARGC and ARGV node closer to here.
+Changing @code{ARGC} and @code{ARGV} in your @command{awk} program lets
+you control how @command{awk} processes the input files; this is described
+in more detail in @ref{ARGC and ARGV}.
+
@cindex input files, variable assignments and
@cindex variable assignments and input files
The distinction between @value{FN} arguments and variable-assignment
@@ -4172,7 +4226,7 @@ with @code{getline}.
Some other versions of @command{awk} also support this, but it
is not standard.
(Some operating systems provide a @file{/dev/stdin} file
-in the file system; however, @command{gawk} always processes
+in the filesystem; however, @command{gawk} always processes
this @value{FN} itself.)
@node Environment Variables
@@ -4198,7 +4252,7 @@ behaves.
@cindex differences in @command{awk} and @command{gawk}, @code{AWKPATH} environment variable
@ifinfo
The previous @value{SECTION} described how @command{awk} program files can be named
-on the command-line with the @option{-f} option.
+on the command line with the @option{-f} option.
@end ifinfo
In most @command{awk}
implementations, you must supply a precise path name for each program
@@ -4226,7 +4280,7 @@ standard directory in the default path and then specified on
the command line with a short @value{FN}. Otherwise, the full @value{FN}
would have to be typed for each file.
-By using the @option{-i} option, or the @option{--source} and @option{-f} options, your command-line
+By using the @option{-i} option, or the @option{-e} and @option{-f} options, your command-line
@command{awk} programs can use facilities in @command{awk} library files
(@pxref{Library Functions}).
Path searching is not done if @command{gawk} is in compatibility mode.
@@ -4293,7 +4347,7 @@ list are meant to be used by regular users.
@table @env
@item POSIXLY_CORRECT
-Causes @command{gawk} to switch POSIX compatibility
+Causes @command{gawk} to switch to POSIX compatibility
mode, disabling all traditional and GNU extensions.
@xref{Options}.
@@ -4326,7 +4380,7 @@ file as the size of the memory buffer to allocate for I/O. Otherwise,
the value should be a number, and @command{gawk} uses that number as
the size of the buffer to allocate. (When this variable is not set,
@command{gawk} uses the smaller of the file's size and the ``default''
-blocksize, which is usually the file systems I/O blocksize.)
+blocksize, which is usually the filesystems I/O blocksize.)
@item AWK_HASH
If this variable exists with a value of @samp{gst}, @command{gawk}
@@ -4399,6 +4453,9 @@ to @code{EXIT_FAILURE}.
This @value{SECTION} describes a feature that is specific to @command{gawk}.
+@cindex @code{@@include} directive
+@cindex file inclusion, @code{@@include} directive
+@cindex including files, @code{@@include} directive
The @code{@@include} keyword can be used to read external @command{awk} source
files. This gives you the ability to split large @command{awk} source files
into smaller, more manageable pieces, and also lets you reuse common @command{awk}
@@ -4518,6 +4575,9 @@ and this also applies to files named with @code{@@include}.
This @value{SECTION} describes a feature that is specific to @command{gawk}.
+@cindex @code{@@load} directive
+@cindex loading extensions, @code{@@load} directive
+@cindex extensions, loading, @code{@@load} directive
The @code{@@load} keyword can be used to read external @command{awk} extensions
(stored as system shared libraries).
This allows you to link in compiled code that may offer superior
@@ -4659,9 +4719,9 @@ or
to run @command{awk}.
@item
-The three standard @command{awk} options are @option{-f}, @option{-F}
-and @option{-v}. @command{gawk} supplies these and many others, as well
-as corresponding GNU-style long options.
+The three standard options for all versions of @command{awk} are
+@option{-f}, @option{-F} and @option{-v}. @command{gawk} supplies these
+and many others, as well as corresponding GNU-style long options.
@item
Non-option command-line arguments are usually treated as @value{FN}s,
@@ -4719,7 +4779,7 @@ The simplest regular expression is a sequence of letters, numbers, or
both. Such a regexp matches any string that contains that sequence.
Thus, the regexp @samp{foo} matches any string containing @samp{foo}.
Therefore, the pattern @code{/foo/} matches any input record containing
-the three characters @samp{foo} @emph{anywhere} in the record. Other
+the three adjacent characters @samp{foo} @emph{anywhere} in the record. Other
kinds of regexps let you specify more complicated classes of strings.
@ifnotinfo
@@ -4733,10 +4793,10 @@ regular expressions work, we present more complicated instances.
* Escape Sequences:: How to write nonprinting characters.
* Regexp Operators:: Regular Expression Operators.
* Bracket Expressions:: What can go between @samp{[...]}.
-* GNU Regexp Operators:: Operators specific to GNU software.
-* Case-sensitivity:: How to do case-insensitive matching.
* Leftmost Longest:: How much text matches.
* Computed Regexps:: Using Dynamic Regexps.
+* GNU Regexp Operators:: Operators specific to GNU software.
+* Case-sensitivity:: How to do case-insensitive matching.
* Regexp Summary:: Regular expressions summary.
@end menu
@@ -4928,20 +4988,30 @@ between @samp{0} and @samp{7}. For example, the code for the ASCII ESC
@item \x@var{hh}@dots{}
The hexadecimal value @var{hh}, where @var{hh} stands for a sequence
of hexadecimal digits (@samp{0}--@samp{9}, and either @samp{A}--@samp{F}
-or @samp{a}--@samp{f}). Like the same construct
-in ISO C, the escape sequence continues until the first nonhexadecimal
-digit is seen. @value{COMMONEXT}
+or @samp{a}--@samp{f}). A maximum of two digts are allowed after
+the @samp{\x}. Any further hexadecimal digits are treated as simple
+letters or numbers. @value{COMMONEXT}
+
+@quotation CAUTION
+In ISO C, the escape sequence continues until the first nonhexadecimal
+digit is seen.
+@c FIXME: Add exact version here.
+For many years, @command{gawk} would continue incorporating
+hexadecimal digits into the value until a non-hexadecimal digit
+or the end of the string was encountered.
However, using more than two hexadecimal digits produces
-undefined results. (The @samp{\x} escape sequence is not allowed in
-POSIX @command{awk}.)
+@end quotation
@cindex @code{\} (backslash), @code{\/} escape sequence
@cindex backslash (@code{\}), @code{\/} escape sequence
@item \/
A literal slash (necessary for regexp constants only).
This sequence is used when you want to write a regexp
-constant that contains a slash. Because the regexp is delimited by
-slashes, you need to escape the slash that is part of the pattern,
+constant that contains a slash
+(such as @code{/.*:\/home\/[[:alnum:]]+:.*/}; the @samp{[[:alnum:]]}
+notation is discussed shortly, in @ref{Bracket Expressions}).
+Because the regexp is delimited by
+slashes, you need to escape any slash that is part of the pattern,
in order to tell @command{awk} to keep processing the rest of the regexp.
@cindex @code{\} (backslash), @code{\"} escape sequence
@@ -4949,8 +5019,10 @@ in order to tell @command{awk} to keep processing the rest of the regexp.
@item \"
A literal double quote (necessary for string constants only).
This sequence is used when you want to write a string
-constant that contains a double quote. Because the string is delimited by
-double quotes, you need to escape the quote that is part of the string,
+constant that contains a double quote
+(such as @code{"He said \"hi!\" to her."}).
+Because the string is delimited by
+double quotes, you need to escape any quote that is part of the string,
in order to tell @command{awk} to keep processing the rest of the string.
@end table
@@ -5011,7 +5083,7 @@ leaves what happens as undefined. There are two choices:
@cindex Brian Kernighan's @command{awk}
@table @asis
@item Strip the backslash out
-This is what Brian Kernighan's @command{awk} and @command{gawk} both do.
+This is what BWK @command{awk} and @command{gawk} both do.
For example, @code{"a\qc"} is the same as @code{"aqc"}.
(Because this is such an easy bug both to introduce and to miss,
@command{gawk} warns you about it.)
@@ -5054,7 +5126,7 @@ leaves what happens as undefined. There are two choices:
@cindex Brian Kernighan's @command{awk}
@table @asis
@item Strip the backslash out
-This is what Brian Kernighan's @command{awk} and @command{gawk} both do.
+This is what BWK @command{awk} and @command{gawk} both do.
For example, @code{"a\qc"} is the same as @code{"aqc"}.
(Because this is such an easy bug both to introduce and to miss,
@command{gawk} warns you about it.)
@@ -5142,7 +5214,7 @@ The escape sequences described
@ifnotinfo
earlier
@end ifnotinfo
-in @ref{Escape Sequences},
+in @DBREF{Escape Sequences}
are valid inside a regexp. They are introduced by a @samp{\} and
are recognized and converted into corresponding real characters as
the very first step in processing regexps.
@@ -5239,12 +5311,11 @@ or @samp{k}.
@cindex vertical bar (@code{|})
@item @code{|}
This is the @dfn{alternation operator} and it is used to specify
-alternatives.
-The @samp{|} has the lowest precedence of all the regular
-expression operators.
-For example, @samp{^P|[[:digit:]]}
-matches any string that matches either @samp{^P} or @samp{[[:digit:]]}. This
-means it matches any string that starts with @samp{P} or contains a digit.
+alternatives. The @samp{|} has the lowest precedence of all the regular
+expression operators. For example, @samp{^P|[aeiouy]} matches any string
+that matches either @samp{^P} or @samp{[aeiouy]}. This means it matches
+any string that starts with @samp{P} or contains (anywhere within it)
+a lowercase English vowel.
The alternation applies to the largest possible regexps on either side.
@@ -5268,14 +5339,15 @@ applies the @samp{*} symbol to the preceding @samp{h} and looks for matches
of one @samp{p} followed by any number of @samp{h}s. This also matches
just @samp{p} if no @samp{h}s are present.
-The @samp{*} repeats the @emph{smallest} possible preceding expression.
-(Use parentheses if you want to repeat a larger expression.) It finds
-as many repetitions as possible. For example,
-@samp{awk '/\(c[ad][ad]*r x\)/ @{ print @}' sample}
-prints every record in @file{sample} containing a string of the form
-@samp{(car x)}, @samp{(cdr x)}, @samp{(cadr x)}, and so on.
-Notice the escaping of the parentheses by preceding them
-with backslashes.
+There are two subtle points to understand about how @samp{*} works.
+First, the @samp{*} applies only to the single preceding regular expression
+component (e.g., in @samp{ph*}, it applies just to the @samp{h}).
+To cause @samp{*} to apply to a larger sub-expression, use parentheses:
+@samp{(ph)*} matches @samp{ph}, @samp{phph}, @samp{phphph} and so on.
+
+Second, @samp{*} finds as many repetititons as possible. If the text
+to be matched is @samp{phhhhhhhhhhhhhhooey}, @samp{ph*} matches all of
+the @samp{h}s.
@cindex @code{+} (plus sign), regexp operator
@cindex plus sign (@code{+}), regexp operator
@@ -5284,12 +5356,6 @@ This symbol is similar to @samp{*}, except that the preceding expression must be
matched at least once. This means that @samp{wh+y}
would match @samp{why} and @samp{whhy}, but not @samp{wy}, whereas
@samp{wh*y} would match all three.
-The following is a simpler
-way of writing the last @samp{*} example:
-
-@example
-awk '/\(c[ad]+r x\)/ @{ print @}' sample
-@end example
@cindex @code{?} (question mark), regexp operator
@cindex question mark (@code{?}), regexp operator
@@ -5384,7 +5450,7 @@ Within a bracket expression, a @dfn{range expression} consists of two
characters separated by a hyphen. It matches any single character that
sorts between the two characters, based upon the system's native character
set. For example, @samp{[0-9]} is equivalent to @samp{[0123456789]}.
-(See @ref{Ranges and Locales}, for an explanation of how the POSIX
+(See @DBREF{Ranges and Locales} for an explanation of how the POSIX
standard and @command{gawk} have changed over time. This is mainly
of historical interest.)
@@ -5403,6 +5469,9 @@ bracket expression, put a @samp{\} in front of it. For example:
@noindent
matches either @samp{d} or @samp{]}.
+Additionally, if you place @samp{]} right after the opening
+@samp{[}, the closing bracket is treated as one of the
+characters to be matched.
@cindex POSIX @command{awk}, bracket expressions and
@cindex Extended Regular Expressions (EREs)
@@ -5514,6 +5583,204 @@ they do not recognize collating symbols or equivalence classes.
@c maybe one day ...
@c ENDOFRANGE charlist
+@node Leftmost Longest
+@section How Much Text Matches?
+
+@cindex regular expressions, leftmost longest match
+@c @cindex matching, leftmost longest
+Consider the following:
+
+@example
+echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'
+@end example
+
+This example uses the @code{sub()} function (which we haven't discussed yet;
+@pxref{String Functions})
+to make a change to the input record. Here, the regexp @code{/a+/}
+indicates ``one or more @samp{a} characters,'' and the replacement
+text is @samp{<A>}.
+
+The input contains four @samp{a} characters.
+@command{awk} (and POSIX) regular expressions always match
+the leftmost, @emph{longest} sequence of input characters that can
+match. Thus, all four @samp{a} characters are
+replaced with @samp{<A>} in this example:
+
+@example
+$ @kbd{echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'}
+@print{} <A>bcd
+@end example
+
+For simple match/no-match tests, this is not so important. But when doing
+text matching and substitutions with the @code{match()}, @code{sub()}, @code{gsub()},
+and @code{gensub()} functions, it is very important.
+@ifinfo
+@xref{String Functions},
+for more information on these functions.
+@end ifinfo
+Understanding this principle is also important for regexp-based record
+and field splitting (@pxref{Records},
+and also @pxref{Field Separators}).
+
+@node Computed Regexps
+@section Using Dynamic Regexps
+
+@c STARTOFRANGE dregexp
+@cindex regular expressions, computed
+@c STARTOFRANGE regexpd
+@cindex regular expressions, dynamic
+@cindex @code{~} (tilde), @code{~} operator
+@cindex tilde (@code{~}), @code{~} operator
+@cindex @code{!} (exclamation point), @code{!~} operator
+@cindex exclamation point (@code{!}), @code{!~} operator
+@c @cindex operators, @code{~}
+@c @cindex operators, @code{!~}
+The righthand side of a @samp{~} or @samp{!~} operator need not be a
+regexp constant (i.e., a string of characters between slashes). It may
+be any expression. The expression is evaluated and converted to a string
+if necessary; the contents of the string are then used as the
+regexp. A regexp computed in this way is called a @dfn{dynamic
+regexp} or a @dfn{computed regexp}:
+
+@example
+BEGIN @{ digits_regexp = "[[:digit:]]+" @}
+$0 ~ digits_regexp @{ print @}
+@end example
+
+@noindent
+This sets @code{digits_regexp} to a regexp that describes one or more digits,
+and tests whether the input record matches this regexp.
+
+@quotation NOTE
+When using the @samp{~} and @samp{!~}
+operators, there is a difference between a regexp constant
+enclosed in slashes and a string constant enclosed in double quotes.
+If you are going to use a string constant, you have to understand that
+the string is, in essence, scanned @emph{twice}: the first time when
+@command{awk} reads your program, and the second time when it goes to
+match the string on the lefthand side of the operator with the pattern
+on the right. This is true of any string-valued expression (such as
+@code{digits_regexp}, shown previously), not just string constants.
+@end quotation
+
+@cindex regexp constants, slashes vs.@: quotes
+@cindex @code{\} (backslash), in regexp constants
+@cindex backslash (@code{\}), in regexp constants
+@cindex @code{"} (double quote), in regexp constants
+@cindex double quote (@code{"}), in regexp constants
+What difference does it make if the string is
+scanned twice? The answer has to do with escape sequences, and particularly
+with backslashes. To get a backslash into a regular expression inside a
+string, you have to type two backslashes.
+
+For example, @code{/\*/} is a regexp constant for a literal @samp{*}.
+Only one backslash is needed. To do the same thing with a string,
+you have to type @code{"\\*"}. The first backslash escapes the
+second one so that the string actually contains the
+two characters @samp{\} and @samp{*}.
+
+@cindex troubleshooting, regexp constants vs.@: string constants
+@cindex regexp constants, vs.@: string constants
+@cindex string constants, vs.@: regexp constants
+Given that you can use both regexp and string constants to describe
+regular expressions, which should you use? The answer is ``regexp
+constants,'' for several reasons:
+
+@itemize @value{BULLET}
+@item
+String constants are more complicated to write and
+more difficult to read. Using regexp constants makes your programs
+less error-prone. Not understanding the difference between the two
+kinds of constants is a common source of errors.
+
+@item
+It is more efficient to use regexp constants. @command{awk} can note
+that you have supplied a regexp and store it internally in a form that
+makes pattern matching more efficient. When using a string constant,
+@command{awk} must first convert the string into this internal form and
+then perform the pattern matching.
+
+@item
+Using regexp constants is better form; it shows clearly that you
+intend a regexp match.
+@end itemize
+
+@cindex sidebar, Using @code{\n} in Bracket Expressions of Dynamic Regexps
+@ifdocbook
+@docbook
+<sidebar><title>Using @code{\n} in Bracket Expressions of Dynamic Regexps</title>
+@end docbook
+
+@cindex regular expressions, dynamic, with embedded newlines
+@cindex newlines, in dynamic regexps
+
+Some versions of @command{awk} do not allow the newline
+character to be used inside a bracket expression for a dynamic regexp:
+
+@example
+$ @kbd{awk '$0 ~ "[ \t\n]"'}
+@error{} awk: newline in character class [
+@error{} ]...
+@error{} source line number 1
+@error{} context is
+@error{} >>> <<<
+@end example
+
+@cindex newlines, in regexp constants
+But a newline in a regexp constant works with no problem:
+
+@example
+$ @kbd{awk '$0 ~ /[ \t\n]/'}
+@kbd{here is a sample line}
+@print{} here is a sample line
+@kbd{Ctrl-d}
+@end example
+
+@command{gawk} does not have this problem, and it isn't likely to
+occur often in practice, but it's worth noting for future reference.
+
+@docbook
+</sidebar>
+@end docbook
+@end ifdocbook
+
+@ifnotdocbook
+@cartouche
+@center @b{Using @code{\n} in Bracket Expressions of Dynamic Regexps}
+
+
+@cindex regular expressions, dynamic, with embedded newlines
+@cindex newlines, in dynamic regexps
+
+Some versions of @command{awk} do not allow the newline
+character to be used inside a bracket expression for a dynamic regexp:
+
+@example
+$ @kbd{awk '$0 ~ "[ \t\n]"'}
+@error{} awk: newline in character class [
+@error{} ]...
+@error{} source line number 1
+@error{} context is
+@error{} >>> <<<
+@end example
+
+@cindex newlines, in regexp constants
+But a newline in a regexp constant works with no problem:
+
+@example
+$ @kbd{awk '$0 ~ /[ \t\n]/'}
+@kbd{here is a sample line}
+@print{} here is a sample line
+@kbd{Ctrl-d}
+@end example
+
+@command{gawk} does not have this problem, and it isn't likely to
+occur often in practice, but it's worth noting for future reference.
+@end cartouche
+@end ifnotdocbook
+@c ENDOFRANGE dregexp
+@c ENDOFRANGE regexpd
+
@node GNU Regexp Operators
@section @command{gawk}-Specific Regexp Operators
@@ -5677,7 +5944,7 @@ are allowed.
Traditional Unix @command{awk} regexps are matched. The GNU operators
are not special, and interval expressions are not available.
The POSIX character classes (@samp{[[:alnum:]]}, etc.) are supported,
-as Brian Kernighan's @command{awk} does support them.
+as BWK @command{awk} does support them.
Characters described by octal and hexadecimal escape sequences are
treated literally, even if they represent regexp metacharacters.
@@ -5789,204 +6056,6 @@ Case is always significant in compatibility mode.
@c ENDOFRANGE csregexp
@c ENDOFRANGE regexpcs
-@node Leftmost Longest
-@section How Much Text Matches?
-
-@cindex regular expressions, leftmost longest match
-@c @cindex matching, leftmost longest
-Consider the following:
-
-@example
-echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'
-@end example
-
-This example uses the @code{sub()} function (which we haven't discussed yet;
-@pxref{String Functions})
-to make a change to the input record. Here, the regexp @code{/a+/}
-indicates ``one or more @samp{a} characters,'' and the replacement
-text is @samp{<A>}.
-
-The input contains four @samp{a} characters.
-@command{awk} (and POSIX) regular expressions always match
-the leftmost, @emph{longest} sequence of input characters that can
-match. Thus, all four @samp{a} characters are
-replaced with @samp{<A>} in this example:
-
-@example
-$ @kbd{echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'}
-@print{} <A>bcd
-@end example
-
-For simple match/no-match tests, this is not so important. But when doing
-text matching and substitutions with the @code{match()}, @code{sub()}, @code{gsub()},
-and @code{gensub()} functions, it is very important.
-@ifinfo
-@xref{String Functions},
-for more information on these functions.
-@end ifinfo
-Understanding this principle is also important for regexp-based record
-and field splitting (@pxref{Records},
-and also @pxref{Field Separators}).
-
-@node Computed Regexps
-@section Using Dynamic Regexps
-
-@c STARTOFRANGE dregexp
-@cindex regular expressions, computed
-@c STARTOFRANGE regexpd
-@cindex regular expressions, dynamic
-@cindex @code{~} (tilde), @code{~} operator
-@cindex tilde (@code{~}), @code{~} operator
-@cindex @code{!} (exclamation point), @code{!~} operator
-@cindex exclamation point (@code{!}), @code{!~} operator
-@c @cindex operators, @code{~}
-@c @cindex operators, @code{!~}
-The righthand side of a @samp{~} or @samp{!~} operator need not be a
-regexp constant (i.e., a string of characters between slashes). It may
-be any expression. The expression is evaluated and converted to a string
-if necessary; the contents of the string are then used as the
-regexp. A regexp computed in this way is called a @dfn{dynamic
-regexp} or a @dfn{computed regexp}:
-
-@example
-BEGIN @{ digits_regexp = "[[:digit:]]+" @}
-$0 ~ digits_regexp @{ print @}
-@end example
-
-@noindent
-This sets @code{digits_regexp} to a regexp that describes one or more digits,
-and tests whether the input record matches this regexp.
-
-@quotation NOTE
-When using the @samp{~} and @samp{!~}
-operators, there is a difference between a regexp constant
-enclosed in slashes and a string constant enclosed in double quotes.
-If you are going to use a string constant, you have to understand that
-the string is, in essence, scanned @emph{twice}: the first time when
-@command{awk} reads your program, and the second time when it goes to
-match the string on the lefthand side of the operator with the pattern
-on the right. This is true of any string-valued expression (such as
-@code{digits_regexp}, shown previously), not just string constants.
-@end quotation
-
-@cindex regexp constants, slashes vs.@: quotes
-@cindex @code{\} (backslash), in regexp constants
-@cindex backslash (@code{\}), in regexp constants
-@cindex @code{"} (double quote), in regexp constants
-@cindex double quote (@code{"}), in regexp constants
-What difference does it make if the string is
-scanned twice? The answer has to do with escape sequences, and particularly
-with backslashes. To get a backslash into a regular expression inside a
-string, you have to type two backslashes.
-
-For example, @code{/\*/} is a regexp constant for a literal @samp{*}.
-Only one backslash is needed. To do the same thing with a string,
-you have to type @code{"\\*"}. The first backslash escapes the
-second one so that the string actually contains the
-two characters @samp{\} and @samp{*}.
-
-@cindex troubleshooting, regexp constants vs.@: string constants
-@cindex regexp constants, vs.@: string constants
-@cindex string constants, vs.@: regexp constants
-Given that you can use both regexp and string constants to describe
-regular expressions, which should you use? The answer is ``regexp
-constants,'' for several reasons:
-
-@itemize @value{BULLET}
-@item
-String constants are more complicated to write and
-more difficult to read. Using regexp constants makes your programs
-less error-prone. Not understanding the difference between the two
-kinds of constants is a common source of errors.
-
-@item
-It is more efficient to use regexp constants. @command{awk} can note
-that you have supplied a regexp and store it internally in a form that
-makes pattern matching more efficient. When using a string constant,
-@command{awk} must first convert the string into this internal form and
-then perform the pattern matching.
-
-@item
-Using regexp constants is better form; it shows clearly that you
-intend a regexp match.
-@end itemize
-
-@cindex sidebar, Using @code{\n} in Bracket Expressions of Dynamic Regexps
-@ifdocbook
-@docbook
-<sidebar><title>Using @code{\n} in Bracket Expressions of Dynamic Regexps</title>
-@end docbook
-
-@cindex regular expressions, dynamic, with embedded newlines
-@cindex newlines, in dynamic regexps
-
-Some versions of @command{awk} do not allow the newline
-character to be used inside a bracket expression for a dynamic regexp:
-
-@example
-$ @kbd{awk '$0 ~ "[ \t\n]"'}
-@error{} awk: newline in character class [
-@error{} ]...
-@error{} source line number 1
-@error{} context is
-@error{} >>> <<<
-@end example
-
-@cindex newlines, in regexp constants
-But a newline in a regexp constant works with no problem:
-
-@example
-$ @kbd{awk '$0 ~ /[ \t\n]/'}
-@kbd{here is a sample line}
-@print{} here is a sample line
-@kbd{Ctrl-d}
-@end example
-
-@command{gawk} does not have this problem, and it isn't likely to
-occur often in practice, but it's worth noting for future reference.
-
-@docbook
-</sidebar>
-@end docbook
-@end ifdocbook
-
-@ifnotdocbook
-@cartouche
-@center @b{Using @code{\n} in Bracket Expressions of Dynamic Regexps}
-
-
-@cindex regular expressions, dynamic, with embedded newlines
-@cindex newlines, in dynamic regexps
-
-Some versions of @command{awk} do not allow the newline
-character to be used inside a bracket expression for a dynamic regexp:
-
-@example
-$ @kbd{awk '$0 ~ "[ \t\n]"'}
-@error{} awk: newline in character class [
-@error{} ]...
-@error{} source line number 1
-@error{} context is
-@error{} >>> <<<
-@end example
-
-@cindex newlines, in regexp constants
-But a newline in a regexp constant works with no problem:
-
-@example
-$ @kbd{awk '$0 ~ /[ \t\n]/'}
-@kbd{here is a sample line}
-@print{} here is a sample line
-@kbd{Ctrl-d}
-@end example
-
-@command{gawk} does not have this problem, and it isn't likely to
-occur often in practice, but it's worth noting for future reference.
-@end cartouche
-@end ifnotdocbook
-@c ENDOFRANGE dregexp
-@c ENDOFRANGE regexpd
-
@node Regexp Summary
@section Summary
@@ -5997,7 +6066,7 @@ In @command{awk}, regular expression constants are written enclosed
between slashes: @code{/}@dots{}@code{/}.
@item
-Regexp constants may be used by standalone in patterns and
+Regexp constants may be used standalone in patterns and
in conditional expressions, or as part of matching expressions
using the @samp{~} and @samp{!~} operators.
@@ -6027,7 +6096,7 @@ the match, such as for text substitution and when the record separator
is a regexp.
@item
-Matching expressions may use dynamic regexps; that is string values
+Matching expressions may use dynamic regexps, that is, string values
treated as regular expressions.
@end itemize
@@ -6079,7 +6148,7 @@ used with it do not have to be named on the @command{awk} command line
* Getline:: Reading files under explicit program control
using the @code{getline} function.
* Read Timeout:: Reading input with a timeout.
-* Command line directories:: What happens if you put a directory on the
+* Command-line directories:: What happens if you put a directory on the
command line.
* Input Summary:: Input summary.
* Input Exercises:: Exercises.
@@ -6094,16 +6163,13 @@ used with it do not have to be named on the @command{awk} command line
@cindex records, splitting input into
@cindex @code{NR} variable
@cindex @code{FNR} variable
-The @command{awk} utility divides the input for your @command{awk}
-program into records and fields.
-@command{awk} keeps track of the number of records that have
-been read
-so far
-from the current input file. This value is stored in a
-built-in variable called @code{FNR}. It is reset to zero when a new
-file is started. Another built-in variable, @code{NR}, records the total
-number of input records read so far from all @value{DF}s. It starts at zero,
-but is never automatically reset to zero.
+@command{awk} divides the input for your program into records and fields.
+It keeps track of the number of records that have been read so far from
+the current input file. This value is stored in a built-in variable
+called @code{FNR} which is reset to zero when a new file is started.
+Another built-in variable, @code{NR}, records the total number of input
+records read so far from all @value{DF}s. It starts at zero, but is
+never automatically reset to zero.
@menu
* awk split records:: How standard @command{awk} splits records.
@@ -6310,17 +6376,17 @@ with optional leading and/or trailing whitespace:
@example
$ @kbd{echo record 1 AAAA record 2 BBBB record 3 |}
> @kbd{gawk 'BEGIN @{ RS = "\n|( *[[:upper:]]+ *)" @}}
-> @kbd{@{ print "Record =", $0, "and RT =", RT @}'}
-@print{} Record = record 1 and RT = AAAA
-@print{} Record = record 2 and RT = BBBB
-@print{} Record = record 3 and RT =
-@print{}
+> @kbd{@{ print "Record =", $0,"and RT = [" RT "]" @}'}
+@print{} Record = record 1 and RT = [ AAAA ]
+@print{} Record = record 2 and RT = [ BBBB ]
+@print{} Record = record 3 and RT = [
+@print{} ]
@end example
@noindent
-The final line of output has an extra blank line. This is because the
-value of @code{RT} is a newline, and the @code{print} statement
-supplies its own terminating newline.
+The square brackets delineate the contents of @code{RT}, letting you
+see the leading and trailing whitespace. The final value of @code{RT}
+@code{RT} is a newline.
@xref{Simple Sed}, for a more useful example
of @code{RS} as a regexp and @code{RT}.
@@ -6834,7 +6900,7 @@ with a statement such as @samp{$1 = $1}, as described earlier.
* Default Field Splitting:: How fields are normally separated.
* Regexp Field Splitting:: Using regexps as the field separator.
* Single Character Fields:: Making each character a separate field.
-* Command Line Field Separator:: Setting @code{FS} from the command-line.
+* Command Line Field Separator:: Setting @code{FS} from the command line.
* Full Line Fields:: Making the full line be a single field.
* Field Splitting Summary:: Some final points and a summary table.
@end menu
@@ -7035,7 +7101,7 @@ should not rely on any specific behavior in your programs.
@value{DARKCORNER}
@cindex Brian Kernighan's @command{awk}
-As a point of information, Brian Kernighan's @command{awk} allows @samp{^}
+As a point of information, BWK @command{awk} allows @samp{^}
to match only at the beginning of the record. @command{gawk}
also works this way. For example:
@@ -7090,7 +7156,7 @@ behaves this way.
@node Command Line Field Separator
@subsection Setting @code{FS} from the Command Line
-@cindex @option{-F} option, command line
+@cindex @option{-F} option, command-line
@cindex field separator, on command line
@cindex command line, @code{FS} on@comma{} setting
@cindex @code{FS} variable, setting from command line
@@ -7140,6 +7206,8 @@ shell, without any quotes, the @samp{\} gets deleted, so @command{awk}
figures that you really want your fields to be separated with TABs and
not @samp{t}s. Use @samp{-v FS="t"} or @samp{-F"[t]"} on the command line
if you really do want to separate your fields with @samp{t}s.
+Use @samp{-F '\t'} when not in compatibility mode to specify that TABs
+separate fields.
As an example, let's use an @command{awk} program file called @file{edu.awk}
that contains the pattern @code{/edu/} and the action @samp{print $1}:
@@ -7290,7 +7358,7 @@ root
@noindent
on an incorrect implementation of @command{awk}, while @command{gawk}
-prints something like:
+prints the full first line of the file, something like:
@example
root:nSijPlPhZZwgE:0:0:Root:/:
@@ -7343,7 +7411,7 @@ root
@noindent
on an incorrect implementation of @command{awk}, while @command{gawk}
-prints something like:
+prints the full first line of the file, something like:
@example
root:nSijPlPhZZwgE:0:0:Root:/:
@@ -7480,7 +7548,7 @@ haven't been introduced yet.
BEGIN @{ FIELDWIDTHS = "9 6 10 6 7 7 35" @}
NR > 2 @{
idle = $4
- sub(/^ */, "", idle) # strip leading spaces
+ sub(/^ +/, "", idle) # strip leading spaces
if (idle == "")
idle = 0
if (idle ~ /:/) @{
@@ -7638,6 +7706,8 @@ if (substr($i, 1, 1) == "\"") @{
As with @code{FS}, the @code{IGNORECASE} variable (@pxref{User-modified})
affects field splitting with @code{FPAT}.
+Assigning a value to @code{FPAT} overrides field splitting
+with @code{FS} and with @code{FIELDWIDTHS}.
Similar to @code{FIELDWIDTHS}, the value of @code{PROCINFO["FS"]}
will be @code{"FPAT"} if content-based field splitting is being used.
@@ -7661,6 +7731,12 @@ FPAT = "([^,]*)|(\"[^\"]+\")"
Finally, the @code{patsplit()} function makes the same functionality
available for splitting regular strings (@pxref{String Functions}).
+To recap, @command{gawk} provides three independent methods
+to split input records into fields. @command{gawk} uses whichever
+mechanism was last chosen based on which of the three
+variables---@code{FS}, @code{FIELDWIDTHS}, and @code{FPAT}---was
+last assigned to.
+
@node Multiple Line
@section Multiple-Line Records
@@ -7882,7 +7958,7 @@ and have a good knowledge of how @command{awk} works.
@cindex @code{getline} command, return values
@cindex @option{--sandbox} option, input redirection with @code{getline}
-The @code{getline} command returns one if it finds a record and zero if
+The @code{getline} command returns 1 if it finds a record and 0 if
it encounters the end of the file. If there is some error in getting
a record, such as a file that cannot be opened, then @code{getline}
returns @minus{}1. In this case, @command{gawk} sets the variable
@@ -7922,32 +7998,58 @@ finished processing the current record, but want to do some special
processing on the next record @emph{right now}. For example:
@example
+# Remove text between /* and */, inclusive
@{
- if ((t = index($0, "/*")) != 0) @{
- # value of `tmp' will be "" if t is 1
- tmp = substr($0, 1, t - 1)
- u = index(substr($0, t + 2), "*/")
- offset = t + 2
- while (u == 0) @{
- if (getline <= 0) @{
+ if ((i = index($0, "/*")) != 0) @{
+ out = substr($0, 1, i - 1) # leading part of the string
+ rest = substr($0, i + 2) # ... */ ...
+ j = index(rest, "*/") # is */ in trailing part?
+ if (j > 0) @{
+ rest = substr(rest, j + 2) # remove comment
+ @} else @{
+ while (j == 0) @{
+ # get more text
+ if (getline <= 0) @{
m = "unexpected EOF or error"
m = (m ": " ERRNO)
print m > "/dev/stderr"
exit
- @}
- u = index($0, "*/")
- offset = 0
- @}
- # substr() expression will be "" if */
- # occurred at end of line
- $0 = tmp substr($0, offset + u + 2)
- @}
- print $0
+ @}
+ # build up the line using string concatenation
+ rest = rest $0
+ j = index(rest, "*/") # is */ in trailing part?
+ if (j != 0) @{
+ rest = substr(rest, j + 2)
+ break
+ @}
+ @}
+ @}
+ # build up the output line using string concatenation
+ $0 = out rest
+ @}
+ print $0
@}
@end example
+@c 8/2014: Here is some sample input:
+@ignore
+mon/*comment*/key
+rab/*commen
+t*/bit
+horse /*comment*/more text
+part 1 /*comment*/part 2 /*comment*/part 3
+no comment
+@end ignore
+
This @command{awk} program deletes C-style comments (@samp{/* @dots{}
-*/}) from the input. By replacing the @samp{print $0} with other
+*/}) from the input.
+It uses a number of features we haven't covered yet, including
+string concatenation
+(@pxref{Concatenation})
+and the @code{index()} and @code{substr()} built-in
+functions
+(@pxref{String Functions}).
+By replacing the @samp{print $0} with other
statements, you could perform more complicated processing on the
decommented input, such as searching for matches of a regular
expression. (This program has a subtle problem---it does not work if one
@@ -8204,7 +8306,7 @@ Unfortunately, @command{gawk} has not been consistent in its treatment
of a construct like @samp{@w{"echo "} "date" | getline}.
Most versions, including the current version, treat it at as
@samp{@w{("echo "} "date") | getline}.
-(This how Brian Kernighan's @command{awk} behaves.)
+(This how BWK @command{awk} behaves.)
Some versions changed and treated it as
@samp{@w{"echo "} ("date" | getline)}.
(This is how @command{mawk} behaves.)
@@ -8354,7 +8456,7 @@ probably by accident, and you should reconsider what it is you're
trying to accomplish.
@item
-@ref{Getline Summary}, presents a table summarizing the
+@DBREF{Getline Summary} presents a table summarizing the
@code{getline} variants and which variables they can affect.
It is worth noting that those variants which do not use redirection
can cause @code{FILENAME} to be updated if they cause
@@ -8525,10 +8627,10 @@ a connection before it can start reading any data,
or the attempt to open a FIFO special file for reading can block
indefinitely until some other process opens it for writing.
-@node Command line directories
+@node Command-line directories
@section Directories On The Command Line
-@cindex differences in @command{awk} and @command{gawk}, command line directories
-@cindex directories, command line
+@cindex differences in @command{awk} and @command{gawk}, command-line directories
+@cindex directories, command-line
@cindex command line, directories on
According to the POSIX standard, files named on the @command{awk}
@@ -8621,6 +8723,7 @@ Directories on the command line are fatal for standard @command{awk};
@end itemize
+@c EXCLUDE START
@node Input Exercises
@section Exercises
@@ -8637,9 +8740,10 @@ including abstentions, for each item.
comments (@samp{/* @dots{} */}) from the input. That program
does not work if one comment ends on one line and another one
starts later on the same line.
-Write a program that does handle multiple comments on the line.
+That can be fixed by making one simple change. What is it?
@end enumerate
+@c EXCLUDE END
@node Printing
@chapter Printing Output
@@ -8681,7 +8785,7 @@ and discusses the @code{close()} built-in function.
descriptors.
* Close Files And Pipes:: Closing Input and Output Files and Pipes.
* Output Summary:: Output summary.
-* Output exercises:: Exercises.
+* Output Exercises:: Exercises.
@end menu
@node Print
@@ -8718,6 +8822,10 @@ double-quote characters, your text is taken as an @command{awk}
expression, and you will probably get an error. Keep in mind that a
space is printed between any two items.
+Note that the @code{print} statement is a statement and not an
+expression---you can't use it the pattern part of a pattern-action
+statement, for example.
+
@node Print Examples
@section @code{print} Statement Examples
@@ -9691,7 +9799,7 @@ It then sends the list to the shell for execution.
@c ENDOFRANGE reout
@node Special Files
-@section Special @value{FFN} in @command{gawk}
+@section Special @value{FFN}s in @command{gawk}
@c STARTOFRANGE gfn
@cindex @command{gawk}, file names in
@@ -9738,7 +9846,8 @@ print "Serious error detected!" | "cat 1>&2"
@noindent
This works by opening a pipeline to a shell command that can access the
standard error stream that it inherits from the @command{awk} process.
-This is far from elegant, and it is also inefficient, because it requires a
+@c 8/2014: Mike Brennan says not to cite this as inefficient. So, fixed.
+This is far from elegant, and it also requires a
separate process. So people writing @command{awk} programs often
don't do this. Instead, they send the error messages to the
screen, like this:
@@ -10187,7 +10296,8 @@ communications.
@end itemize
-@node Output exercises
+@c EXCLUDE START
+@node Output Exercises
@section Exercises
@enumerate
@@ -10216,6 +10326,7 @@ BEGIN @{ print "Serious error detected!" > /dev/stderr @}
@end example
@end enumerate
+@c EXCLUDE END
@c ENDOFRANGE prnt
@@ -10459,7 +10570,8 @@ A regexp constant is a regular expression description enclosed in
slashes, such as @code{@w{/^beginning and end$/}}. Most regexps used in
@command{awk} programs are constant, but the @samp{~} and @samp{!~}
matching operators can also match computed or dynamic regexps
-(which are just ordinary strings or variables that contain a regexp).
+(which are typically just ordinary strings or variables that contain a regexp,
+but could be a more complex expression).
@c ENDOFRANGE cnst
@node Using Constant Regexps
@@ -10565,7 +10677,7 @@ function mysub(pat, repl, str, global)
@c @cindex automatic warnings
@c @cindex warnings, automatic
In this example, the programmer wants to pass a regexp constant to the
-user-defined function @code{mysub}, which in turn passes it on to
+user-defined function @code{mysub()}, which in turn passes it on to
either @code{sub()} or @code{gsub()}. However, what really happens is that
the @code{pat} parameter is either one or zero, depending upon whether
or not @code{$0} matches @code{/hi/}.
@@ -10586,7 +10698,7 @@ on the @command{awk} command line.
@menu
* Using Variables:: Using variables in your programs.
-* Assignment Options:: Setting variables on the command-line and a
+* Assignment Options:: Setting variables on the command line and a
summary of command-line syntax. This is an
advanced method of input.
@end menu
@@ -11073,7 +11185,7 @@ print "something meaningful" > file name
@cindex @command{mawk} utility
@noindent
This produces a syntax error with some versions of Unix
-@command{awk}.@footnote{It happens that Brian Kernighan's
+@command{awk}.@footnote{It happens that BWK
@command{awk}, @command{gawk} and @command{mawk} all ``get it right,''
but you should not rely on this.}
It is necessary to use the following:
@@ -11158,7 +11270,7 @@ Otherwise, it's parsed as follows:
@end display
As mentioned earlier,
-when doing concatenation, @emph{parenthesize}. Otherwise,
+when mixing concatenation with other operators, @emph{parenthesize}. Otherwise,
you're never quite sure what you'll get.
@node Assignment Ops
@@ -11416,7 +11528,7 @@ A workaround is:
awk '/[=]=/' /dev/null
@end example
-@command{gawk} does not have this problem; Brian Kernighan's @command{awk}
+@command{gawk} does not have this problem; BWK @command{awk}
and @command{mawk} also do not (@pxref{Other Versions}).
@docbook
@@ -11462,7 +11574,7 @@ A workaround is:
awk '/[=]=/' /dev/null
@end example
-@command{gawk} does not have this problem; Brian Kernighan's @command{awk}
+@command{gawk} does not have this problem; BWK @command{awk}
and @command{mawk} also do not (@pxref{Other Versions}).
@end cartouche
@end ifnotdocbook
@@ -11754,19 +11866,14 @@ compares variables.
@cindex numeric, strings
@cindex strings, numeric
@cindex POSIX @command{awk}, numeric strings and
-The 1992 POSIX standard introduced
+The POSIX standard introduced
the concept of a @dfn{numeric string}, which is simply a string that looks
like a number---for example, @code{@w{" +2"}}. This concept is used
for determining the type of a variable.
The type of the variable is important because the types of two variables
determine how they are compared.
+Variable typing follows these rules:
-The various versions of the POSIX standard did not get the rules
-quite right for several editions. Fortunately, as of at least the
-2008 standard (and possibly earlier), the standard has been fixed,
-and variable typing follows these rules:@footnote{@command{gawk} has
-followed these rules for many years,
-and it is gratifying that the POSIX standard is also now correct.}
@itemize @value{BULLET}
@item
@@ -11919,7 +12026,7 @@ made of characters and is therefore also a string.
Thus, for example, the string constant @w{@code{" +3.14"}},
when it appears in program source code,
is a string---even though it looks numeric---and
-is @emph{never} treated as number for comparison
+is @emph{never} treated as a number for comparison
purposes.
In short, when one operand is a ``pure'' string, such as a string
@@ -12236,7 +12343,7 @@ is ``short-circuited'' if the result can be determined part way through
its evaluation.
@cindex line continuations
-Statements that use @samp{&&} or @samp{||} can be continued simply
+Statements that end with @samp{&&} or @samp{||} can be continued simply
by putting a newline after them. But you cannot put a newline in front
of either of these operators without using backslash continuation
(@pxref{Statements/Lines}).
@@ -12255,7 +12362,7 @@ program is one way to print lines in between special bracketing lines:
@example
$1 == "START" @{ interested = ! interested; next @}
-interested == 1 @{ print @}
+interested @{ print @}
$1 == "END" @{ interested = ! interested; next @}
@end example
@@ -12275,6 +12382,16 @@ bogus input data, but the point is to illustrate the use of `!',
so we'll leave well enough alone.
@end ignore
+Most commonly, the @samp{!} operator is used in the conditions of
+@code{if} and @code{while} statements, where it often makes more
+sense to phrase the logic in the negative:
+
+@example
+if (! @var{some condition} || @var{some other condition}) @{
+ @var{@dots{} do whatever processing @dots{}}
+@}
+@end example
+
@cindex @code{next} statement
@quotation NOTE
The @code{next} statement is discussed in
@@ -12895,7 +13012,7 @@ Contrast this with the following regular expression match, which
accepts any record with a first field that contains @samp{li}:
@example
-$ @kbd{awk '$1 ~ /foo/ @{ print $2 @}' mail-list}
+$ @kbd{awk '$1 ~ /li/ @{ print $2 @}' mail-list}
@print{} 555-5553
@print{} 555-6699
@end example
@@ -13167,7 +13284,7 @@ rule. It contains the number of fields from the last input record.
Most probably due to an oversight, the standard does not say that @code{$0}
is also preserved, although logically one would think that it should be.
In fact, @command{gawk} does preserve the value of @code{$0} for use in
-@code{END} rules. Be aware, however, that Brian Kernighan's @command{awk}, and possibly
+@code{END} rules. Be aware, however, that BWK @command{awk}, and possibly
other implementations, do not.
The third point follows from the first two. The meaning of @samp{print}
@@ -13806,31 +13923,38 @@ case is made, the case statement bodies execute until a @code{break},
or the end of the @code{switch} statement itself. For example:
@example
-switch (NR * 2 + 1) @{
-case 3:
-case "11":
- print NR - 1
- break
-
-case /2[[:digit:]]+/:
- print NR
-
-default:
- print NR + 1
-
-case -1:
- print NR * -1
+while ((c = getopt(ARGC, ARGV, "aksx")) != -1) @{
+ switch (c) @{
+ case "a":
+ # report size of all files
+ all_files = TRUE;
+ break
+ case "k":
+ BLOCK_SIZE = 1024 # 1K block size
+ break
+ case "s":
+ # do sums only
+ sum_only = TRUE
+ break
+ case "x":
+ # don't cross filesystems
+ fts_flags = or(fts_flags, FTS_XDEV)
+ break
+ case "?":
+ default:
+ usage()
+ break
+ @}
@}
@end example
Note that if none of the statements specified above halt execution
of a matched @code{case} statement, execution falls through to the
-next @code{case} until execution halts. In the above example, for
-any case value starting with @samp{2} followed by one or more digits,
-the @code{print} statement is executed and then falls through into the
-@code{default} section, executing its @code{print} statement. In turn,
-the @minus{}1 case will also be executed since the @code{default} does
-not halt execution.
+next @code{case} until execution halts. In the above example, the
+@code{case} for @code{"?"} falls through to the @code{default}
+case, which is to call a function named @code{usage()}.
+(The @code{getopt()} function being called here is
+described in @ref{Getopt Function}.)
@node Break Statement
@subsection The @code{break} Statement
@@ -13904,7 +14028,7 @@ historical implementations of @command{awk} treated the @code{break}
statement outside of a loop as if it were a @code{next} statement
(@pxref{Next Statement}).
@value{DARKCORNER}
-Recent versions of Brian Kernighan's @command{awk} no longer allow this usage,
+Recent versions of BWK @command{awk} no longer allow this usage,
nor does @command{gawk}.
@node Continue Statement
@@ -13953,7 +14077,8 @@ BEGIN @{
@end example
@noindent
-This program loops forever once @code{x} reaches 5.
+This program loops forever once @code{x} reaches 5, since
+the increment (@samp{x++}) is never reached.
@c @cindex @code{continue}, outside of loops
@c @cindex historical features
@@ -13970,7 +14095,7 @@ statement outside a loop: as if it were a @code{next}
statement
(@pxref{Next Statement}).
@value{DARKCORNER}
-Recent versions of Brian Kernighan's @command{awk} no longer work this way, nor
+Recent versions of BWK @command{awk} no longer work this way, nor
does @command{gawk}.
@node Next Statement
@@ -14059,7 +14184,8 @@ starts over with the first rule in the program.
If the @code{nextfile} statement causes the end of the input to be reached,
then the code in any @code{END} rules is executed. An exception to this is
when @code{nextfile} is invoked during execution of any statement in an
-@code{END} rule; In this case, it causes the program to stop immediately. @xref{BEGIN/END}.
+@code{END} rule; in this case, it causes the program to stop immediately.
+@xref{BEGIN/END}.
The @code{nextfile} statement is useful when there are many @value{DF}s
to process but it isn't necessary to process every record in every file.
@@ -14069,13 +14195,10 @@ would have to continue scanning the unwanted records. The @code{nextfile}
statement accomplishes this much more efficiently.
In @command{gawk}, execution of @code{nextfile} causes additional things
-to happen:
-any @code{ENDFILE} rules are executed except in the case as
-mentioned below,
-@code{ARGIND} is incremented,
-and
-any @code{BEGINFILE} rules are executed.
-(@code{ARGIND} hasn't been introduced yet. @xref{Built-in Variables}.)
+to happen: any @code{ENDFILE} rules are executed if @command{gawk} is
+not currently in an @code{END} or @code{BEGINFILE} rule, @code{ARGIND} is
+incremented, and any @code{BEGINFILE} rules are executed. (@code{ARGIND}
+hasn't been introduced yet. @xref{Built-in Variables}.)
With @command{gawk}, @code{nextfile} is useful inside a @code{BEGINFILE}
rule to skip over a file that would otherwise cause @command{gawk}
@@ -14099,7 +14222,7 @@ See @uref{http://austingroupbugs.net/view.php?id=607, the Austin Group website}.
@cindex @code{nextfile} statement, user-defined functions and
@cindex Brian Kernighan's @command{awk}
@cindex @command{mawk} utility
-The current version of the Brian Kernighan's @command{awk}, and @command{mawk} (@pxref{Other
+The current version of BWK @command{awk}, and @command{mawk} (@pxref{Other
Versions}) also support @code{nextfile}. However, they don't allow the
@code{nextfile} statement inside function bodies (@pxref{User-defined}).
@command{gawk} does; a @code{nextfile} inside a function body reads the
@@ -14608,7 +14731,7 @@ current record. @xref{Changing Fields}.
@cindex differences in @command{awk} and @command{gawk}, @code{FUNCTAB} variable
@item @code{FUNCTAB #}
An array whose indices and corresponding values are the names of all
-the user-defined or extension functions in the program.
+the built-in, user-defined and extension functions in the program.
@quotation NOTE
Attempting to use the @code{delete} statement with the @code{FUNCTAB}
@@ -14656,9 +14779,12 @@ text of the AWK program. For each identifier, the value of the element is one o
@item "array"
The identifier is an array.
+@item "builtin"
+The identifier is a built-in function.
+
@item "extension"
The identifier is an extension function loaded via
-@code{@@load}.
+@code{@@load} or @option{-l}.
@item "scalar"
The identifier is a scalar.
@@ -14938,7 +15064,7 @@ changed.
@cindex arguments, command-line
@cindex command line, arguments
-@ref{Auto-set},
+@DBREF{Auto-set}
presented the following program describing the information contained in @code{ARGC}
and @code{ARGV}:
@@ -15011,8 +15137,17 @@ before actual processing of the input begins.
@xref{Split Program}, and see
@ref{Tee Program}, for examples
of each way of removing elements from @code{ARGV}.
+
+To actually get options into an @command{awk} program,
+end the @command{awk} options with @option{--} and then supply
+the @command{awk} program's options, in the following manner:
+
+@example
+awk -f myprog.awk -- -v -q file1 file2 @dots{}
+@end example
+
The following fragment processes @code{ARGV} in order to examine, and
-then remove, command-line options:
+then remove, the above command-line options:
@example
BEGIN @{
@@ -15032,32 +15167,24 @@ BEGIN @{
@}
@end example
-To actually get the options into the @command{awk} program,
-end the @command{awk} options with @option{--} and then supply
-the @command{awk} program's options, in the following manner:
-
-@example
-awk -f myprog -- -v -q file1 file2 @dots{}
-@end example
-
@cindex differences in @command{awk} and @command{gawk}, @code{ARGC}/@code{ARGV} variables
-This is not necessary in @command{gawk}. Unless @option{--posix} has
+Ending the @command{awk} options with @option{--} isn't
+necessary in @command{gawk}. Unless @option{--posix} has
been specified, @command{gawk} silently puts any unrecognized options
into @code{ARGV} for the @command{awk} program to deal with. As soon
as it sees an unknown option, @command{gawk} stops looking for other
-options that it might otherwise recognize. The previous example with
+options that it might otherwise recognize. The previous command line with
@command{gawk} would be:
@example
-gawk -f myprog -q -v file1 file2 @dots{}
+gawk -f myprog.awk -q -v file1 file2 @dots{}
@end example
@noindent
-Because @option{-q} is not a valid @command{gawk} option,
-it and the following @option{-v}
-are passed on to the @command{awk} program.
-(@xref{Getopt Function}, for an @command{awk} library function
-that parses command-line options.)
+Because @option{-q} is not a valid @command{gawk} option, it and the
+following @option{-v} are passed on to the @command{awk} program.
+(@xref{Getopt Function}, for an @command{awk} library function that
+parses command-line options.)
@node Pattern Action Summary
@section Summary
@@ -15312,7 +15439,10 @@ array element value:
@end docbook
@noindent
-The pairs are shown in jumbled order because their order is irrelevant.
+The pairs are shown in jumbled order because their order is
+irrelevant.@footnote{The ordering will vary among @command{awk}
+implementations, which typically use hash tables to store array elements
+and values.}
One advantage of associative arrays is that new pairs can be added
at any time. For example, suppose a tenth element is added to the array
@@ -15434,8 +15564,9 @@ English to French:
Here we decided to translate the number one in both spelled-out and
numeric form---thus illustrating that a single array can have both
numbers and strings as indices.
-(In fact, array subscripts are always strings; this is discussed
-in more detail in
+(In fact, array subscripts are always strings.
+There are some subtleties to how numbers work when used as
+array subscripts; this is discussed in more detail in
@ref{Numeric Array Subscripts}.)
Here, the number @code{1} isn't double-quoted, since @command{awk}
automatically converts it to a string.
@@ -15502,8 +15633,9 @@ if (a["foo"] != "") @dots{}
@end example
@noindent
-This is incorrect, since this will @emph{create} @code{a["foo"]}
-if it didn't exist before!
+This is incorrect for two reasons. First, it @emph{creates} @code{a["foo"]}
+if it didn't exist before! Second, it is valid (if a bit unusual) to set
+an array element equal to the empty string.
@end quotation
@c @cindex arrays, @code{in} operator and
@@ -15521,6 +15653,8 @@ This expression tests whether the particular index @var{indx} exists,
without the side effect of creating that element if it is not present.
The expression has the value one (true) if @code{@var{array}[@var{indx}]}
exists and zero (false) if it does not exist.
+(We use @var{indx} here, since @samp{index} is the name of a built-in
+function.)
For example, this statement tests whether the array @code{frequencies}
contains the index @samp{2}:
@@ -15728,7 +15862,7 @@ $ @kbd{gawk -f loopcheck.awk}
@print{} is
@end example
-Contrast this to Brian Kernighan's @command{awk}:
+Contrast this to BWK @command{awk}:
@example
$ @kbd{nawk -f loopcheck.awk}
@@ -15973,7 +16107,7 @@ using @code{delete} without a subscript was a @command{gawk} extension.
As of September, 2012, it was accepted for
inclusion into the POSIX standard. See @uref{http://austingroupbugs.net/view.php?id=544,
the Austin Group website}. This form of the @code{delete} statement is also supported
-by Brian Kernighan's @command{awk} and @command{mawk}, as well as
+by BWK @command{awk} and @command{mawk}, as well as
by a number of other implementations (@pxref{Other Versions}).
@end quotation
@@ -16089,7 +16223,7 @@ $ @kbd{echo 'line 1}
> @kbd{line 2}
> @kbd{line 3' | awk '@{ l[lines] = $0; ++lines @}}
> @kbd{END @{}
-> @kbd{for (i = lines-1; i >= 0; --i)}
+> @kbd{for (i = lines - 1; i >= 0; i--)}
> @kbd{print l[i]}
> @kbd{@}'}
@print{} line 3
@@ -16113,7 +16247,7 @@ The following version of the program works correctly:
@example
@{ l[lines++] = $0 @}
END @{
- for (i = lines - 1; i >= 0; --i)
+ for (i = lines - 1; i >= 0; i--)
print l[i]
@}
@end example
@@ -16187,10 +16321,11 @@ used for single dimensional arrays. Write the whole sequence of indices
in parentheses, separated by commas, as the left operand:
@example
-(@var{subscript1}, @var{subscript2}, @dots{}) in @var{array}
+if ((@var{subscript1}, @var{subscript2}, @dots{}) in @var{array})
+ @dots{}
@end example
-The following example treats its input as a two-dimensional array of
+Here is an example that treats its input as a two-dimensional array of
fields; it rotates this array 90 degrees clockwise and prints the
result. It assumes that all lines have the same number of
elements:
@@ -16663,7 +16798,9 @@ is @minus{}3, and @code{int(-3)} is @minus{}3 as well.
@cindexawkfunc{log}
@cindex logarithm
Return the natural logarithm of @var{x}, if @var{x} is positive;
-otherwise, report an error.
+otherwise, return @code{NaN} (``not a number'') on IEEE 754 systems.
+Additionally, @command{gawk} prints a warning message when @code{x}
+is negative.
@item @code{rand()}
@cindexawkfunc{rand}
@@ -16762,6 +16899,9 @@ numbers that are truly unpredictable.
The return value of @code{srand()} is the previous seed. This makes it
easy to keep track of the seeds in case you need to consistently reproduce
sequences of random numbers.
+
+POSIX does not specify the initial seed; it differs among @command{awk}
+implementations.
@end table
@node String Functions
@@ -17437,7 +17577,7 @@ in the string, counting from character @var{start}.
@cindex Brian Kernighan's @command{awk}
If @var{start} is less than one, @code{substr()} treats it as
if it was one. (POSIX doesn't specify what to do in this case:
-Brian Kernighan's @command{awk} acts this way, and therefore @command{gawk}
+BWK @command{awk} acts this way, and therefore @command{gawk}
does too.)
If @var{start} is greater than the number of characters
in the string, @code{substr()} returns the null string.
@@ -17506,6 +17646,12 @@ Nonalphabetic characters are left unchanged. For example,
@cindex backslash (@code{\}), @code{gsub()}/@code{gensub()}/@code{sub()} functions and
@cindex @code{&} (ampersand), @code{gsub()}/@code{gensub()}/@code{sub()} functions and
@cindex ampersand (@code{&}), @code{gsub()}/@code{gensub()}/@code{sub()} functions and
+
+@quotation CAUTION
+This section has been known to cause headaches.
+You might want to skip it upon first reading.
+@end quotation
+
When using @code{sub()}, @code{gsub()}, or @code{gensub()}, and trying to get literal
backslashes and ampersands into the replacement text, you need to remember
that there are several levels of @dfn{escape processing} going on.
@@ -17523,7 +17669,7 @@ escape sequences listed in @ref{Escape Sequences}.
Thus, for every @samp{\} that @command{awk} processes at the runtime
level, you must type two backslashes at the lexical level.
When a character that is not valid for an escape sequence follows the
-@samp{\}, Brian Kernighan's @command{awk} and @command{gawk} both simply remove the initial
+@samp{\}, BWK @command{awk} and @command{gawk} both simply remove the initial
@samp{\} and put the next character into the string. Thus, for
example, @code{"a\qb"} is treated as @code{"aqb"}.
@@ -17548,26 +17694,26 @@ through unchanged. This is illustrated in @ref{table-sub-escapes}.
_halign{_hfil#!_qquad_hfil#!_qquad#_hfil_cr
You type!@code{sub()} sees!@code{sub()} generates_cr
_hrulefill!_hrulefill!_hrulefill_cr
- @code{\&}! @code{&}!the matched text_cr
- @code{\\&}! @code{\&}!a literal @samp{&}_cr
- @code{\\\&}! @code{\&}!a literal @samp{&}_cr
- @code{\\\\&}! @code{\\&}!a literal @samp{\&}_cr
- @code{\\\\\&}! @code{\\&}!a literal @samp{\&}_cr
-@code{\\\\\\&}! @code{\\\&}!a literal @samp{\\&}_cr
- @code{\\q}! @code{\q}!a literal @samp{\q}_cr
+ @code{\&}! @code{&}!The matched text_cr
+ @code{\\&}! @code{\&}!A literal @samp{&}_cr
+ @code{\\\&}! @code{\&}!A literal @samp{&}_cr
+ @code{\\\\&}! @code{\\&}!A literal @samp{\&}_cr
+ @code{\\\\\&}! @code{\\&}!A literal @samp{\&}_cr
+@code{\\\\\\&}! @code{\\\&}!A literal @samp{\\&}_cr
+ @code{\\q}! @code{\q}!A literal @samp{\q}_cr
}
_bigskip}
@end tex
@ifdocbook
@multitable @columnfractions .20 .20 .60
@headitem You type @tab @code{sub()} sees @tab @code{sub()} generates
-@item @code{\&} @tab @code{&} @tab the matched text
-@item @code{\\&} @tab @code{\&} @tab a literal @samp{&}
-@item @code{\\\&} @tab @code{\&} @tab a literal @samp{&}
-@item @code{\\\\&} @tab @code{\\&} @tab a literal @samp{\&}
-@item @code{\\\\\&} @tab @code{\\&} @tab a literal @samp{\&}
-@item @code{\\\\\\&} @tab @code{\\\&} @tab a literal @samp{\\&}
-@item @code{\\q} @tab @code{\q} @tab a literal @samp{\q}
+@item @code{\&} @tab @code{&} @tab The matched text
+@item @code{\\&} @tab @code{\&} @tab A literal @samp{&}
+@item @code{\\\&} @tab @code{\&} @tab A literal @samp{&}
+@item @code{\\\\&} @tab @code{\\&} @tab A literal @samp{\&}
+@item @code{\\\\\&} @tab @code{\\&} @tab A literal @samp{\&}
+@item @code{\\\\\\&} @tab @code{\\\&} @tab A literal @samp{\\&}
+@item @code{\\q} @tab @code{\q} @tab A literal @samp{\q}
@end multitable
@end ifdocbook
@ifnottex
@@ -17575,13 +17721,13 @@ _bigskip}
@display
You type @code{sub()} sees @code{sub()} generates
-------- ---------- ---------------
- @code{\&} @code{&} the matched text
- @code{\\&} @code{\&} a literal @samp{&}
- @code{\\\&} @code{\&} a literal @samp{&}
- @code{\\\\&} @code{\\&} a literal @samp{\&}
- @code{\\\\\&} @code{\\&} a literal @samp{\&}
-@code{\\\\\\&} @code{\\\&} a literal @samp{\\&}
- @code{\\q} @code{\q} a literal @samp{\q}
+ @code{\&} @code{&} The matched text
+ @code{\\&} @code{\&} A literal @samp{&}
+ @code{\\\&} @code{\&} A literal @samp{&}
+ @code{\\\\&} @code{\\&} A literal @samp{\&}
+ @code{\\\\\&} @code{\\&} A literal @samp{\&}
+@code{\\\\\\&} @code{\\\&} A literal @samp{\\&}
+ @code{\\q} @code{\q} A literal @samp{\q}
@end display
@end ifnotdocbook
@end ifnottex
@@ -17597,86 +17743,19 @@ case of even numbers of backslashes entered at the lexical level.)
The problem with the historical approach is that there is no way to get
a literal @samp{\} followed by the matched text.
-@c @cindex @command{awk} language, POSIX version
-@cindex POSIX @command{awk}, functions and, @code{gsub()}/@code{sub()}
-The 1992 POSIX standard attempted to fix this problem. That standard
-says that @code{sub()} and @code{gsub()} look for either a @samp{\} or an @samp{&}
-after the @samp{\}. If either one follows a @samp{\}, that character is
-output literally. The interpretation of @samp{\} and @samp{&} then becomes
-as shown in @ref{table-sub-posix-92}.
-
-@float Table,table-sub-posix-92
-@caption{1992 POSIX Rules for @code{sub()} and @code{gsub()} Escape Sequence Processing}
-@c thanks to Karl Berry for formatting this table
-@tex
-\vbox{\bigskip
-% We need more characters for escape and tab ...
-\catcode`_ = 0
-\catcode`! = 4
-% ... since this table has lots of &'s and \'s, so we unspecialize them.
-\catcode`\& = \other \catcode`\\ = \other
-_halign{_hfil#!_qquad_hfil#!_qquad#_hfil_cr
- You type!@code{sub()} sees!@code{sub()} generates_cr
-_hrulefill!_hrulefill!_hrulefill_cr
- @code{&}! @code{&}!the matched text_cr
- @code{\\&}! @code{\&}!a literal @samp{&}_cr
-@code{\\\\&}! @code{\\&}!a literal @samp{\}, then the matched text_cr
-@code{\\\\\\&}! @code{\\\&}!a literal @samp{\&}_cr
-}
-_bigskip}
-@end tex
-@ifdocbook
-@multitable @columnfractions .20 .20 .60
-@headitem You type @tab @code{sub()} sees @tab @code{sub()} generates
-@item @code{&} @tab @code{&} @tab the matched text
-@item @code{\\&} @tab @code{\&} @tab a literal @samp{&}
-@item @code{\\\\&} @tab @code{\\&} @tab a literal @samp{\}, then the matched text
-@item @code{\\\\\\&} @tab @code{\\\&} @tab a literal @samp{\&}
-@end multitable
-@end ifdocbook
-@ifnottex
-@ifnotdocbook
-@display
- You type @code{sub()} sees @code{sub()} generates
- -------- ---------- ---------------
- @code{&} @code{&} the matched text
- @code{\\&} @code{\&} a literal @samp{&}
- @code{\\\\&} @code{\\&} a literal @samp{\}, then the matched text
-@code{\\\\\\&} @code{\\\&} a literal @samp{\&}
-@end display
-@end ifnotdocbook
-@end ifnottex
-@end float
-
-@noindent
-This appears to solve the problem.
-Unfortunately, the phrasing of the standard is unusual. It
-says, in effect, that @samp{\} turns off the special meaning of any
-following character, but for anything other than @samp{\} and @samp{&},
-such special meaning is undefined. This wording leads to two problems:
-
-@itemize @value{BULLET}
-@item
-Backslashes must now be doubled in the @var{replacement} string, breaking
-historical @command{awk} programs.
-
-@item
-To make sure that an @command{awk} program is portable, @emph{every} character
-in the @var{replacement} string must be preceded with a
-backslash.@footnote{This consequence was certainly unintended.}
-@c I can say that, 'cause I was involved in making this change
-@end itemize
+Several editions of the POSIX standard attempted to fix this problem
+but weren't successful. The details are irrelevant at this point in time.
-Because of the problems just listed,
-in 1996, the @command{gawk} maintainer submitted
+At one point, the @command{gawk} maintainer submitted
proposed text for a revised standard that
reverts to rules that correspond more closely to the original existing
practice. The proposed rules have special cases that make it possible
-to produce a @samp{\} preceding the matched text. This is shown in
+to produce a @samp{\} preceding the matched text.
+This is shown in
@ref{table-sub-proposed}.
@float Table,table-sub-proposed
-@caption{Proposed Rules For @code{sub()} And Backslash}
+@caption{GNU @command{awk} Rules For @code{sub()} And Backslash}
@tex
\vbox{\bigskip
% We need more characters for escape and tab ...
@@ -17687,10 +17766,10 @@ to produce a @samp{\} preceding the matched text. This is shown in
_halign{_hfil#!_qquad_hfil#!_qquad#_hfil_cr
You type!@code{sub()} sees!@code{sub()} generates_cr
_hrulefill!_hrulefill!_hrulefill_cr
-@code{\\\\\\&}! @code{\\\&}!a literal @samp{\&}_cr
-@code{\\\\&}! @code{\\&}!a literal @samp{\}, followed by the matched text_cr
- @code{\\&}! @code{\&}!a literal @samp{&}_cr
- @code{\\q}! @code{\q}!a literal @samp{\q}_cr
+@code{\\\\\\&}! @code{\\\&}!A literal @samp{\&}_cr
+@code{\\\\&}! @code{\\&}!A literal @samp{\}, followed by the matched text_cr
+ @code{\\&}! @code{\&}!A literal @samp{&}_cr
+ @code{\\q}! @code{\q}!A literal @samp{\q}_cr
@code{\\\\}! @code{\\}!@code{\\}_cr
}
_bigskip}
@@ -17698,10 +17777,10 @@ _bigskip}
@ifdocbook
@multitable @columnfractions .20 .20 .60
@headitem You type @tab @code{sub()} sees @tab @code{sub()} generates
-@item @code{\\\\\\&} @tab @code{\\\&} @tab a literal @samp{\&}
-@item @code{\\\\&} @tab @code{\\&} @tab a literal @samp{\}, followed by the matched text
-@item @code{\\&} @tab @code{\&} @tab a literal @samp{&}
-@item @code{\\q} @tab @code{\q} @tab a literal @samp{\q}
+@item @code{\\\\\\&} @tab @code{\\\&} @tab A literal @samp{\&}
+@item @code{\\\\&} @tab @code{\\&} @tab A literal @samp{\}, followed by the matched text
+@item @code{\\&} @tab @code{\&} @tab A literal @samp{&}
+@item @code{\\q} @tab @code{\q} @tab A literal @samp{\q}
@item @code{\\\\} @tab @code{\\} @tab @code{\\}
@end multitable
@end ifdocbook
@@ -17710,10 +17789,10 @@ _bigskip}
@display
You type @code{sub()} sees @code{sub()} generates
-------- ---------- ---------------
-@code{\\\\\\&} @code{\\\&} a literal @samp{\&}
- @code{\\\\&} @code{\\&} a literal @samp{\}, followed by the matched text
- @code{\\&} @code{\&} a literal @samp{&}
- @code{\\q} @code{\q} a literal @samp{\q}
+@code{\\\\\\&} @code{\\\&} A literal @samp{\&}
+ @code{\\\\&} @code{\\&} A literal @samp{\}, followed by the matched text
+ @code{\\&} @code{\&} A literal @samp{&}
+ @code{\\q} @code{\q} A literal @samp{\q}
@code{\\\\} @code{\\} @code{\\}
@end display
@end ifnotdocbook
@@ -17726,13 +17805,13 @@ there was only one. However, as in the historical case, any @samp{\} that
is not part of one of these three sequences is not special and appears
in the output literally.
-@command{gawk} 3.0 and 3.1 follow these proposed POSIX rules for @code{sub()} and
-@code{gsub()}.
-@c As much as we think it's a lousy idea. You win some, you lose some. Sigh.
-The POSIX standard took much longer to be revised than was expected in 1996.
-The 2001 standard does not follow the above rules. Instead, the rules
-there are somewhat simpler. The results are similar except for one case.
+@command{gawk} 3.0 and 3.1 follow these rules for @code{sub()} and
+@code{gsub()}. The POSIX standard took much longer to be revised than
+was expected. In addition, the @command{gawk} maintainer's proposal was
+lost during the standardization process. The final rules are
+somewhat simpler. The results are similar except for one case.
+@cindex POSIX @command{awk}, functions and, @code{gsub()}/@code{sub()}
The POSIX rules state that @samp{\&} in the replacement string produces
a literal @samp{&}, @samp{\\} produces a literal @samp{\}, and @samp{\} followed
by anything else is not special; the @samp{\} is placed straight into the output.
@@ -17750,10 +17829,10 @@ These rules are presented in @ref{table-posix-sub}.
_halign{_hfil#!_qquad_hfil#!_qquad#_hfil_cr
You type!@code{sub()} sees!@code{sub()} generates_cr
_hrulefill!_hrulefill!_hrulefill_cr
-@code{\\\\\\&}! @code{\\\&}!a literal @samp{\&}_cr
-@code{\\\\&}! @code{\\&}!a literal @samp{\}, followed by the matched text_cr
- @code{\\&}! @code{\&}!a literal @samp{&}_cr
- @code{\\q}! @code{\q}!a literal @samp{\q}_cr
+@code{\\\\\\&}! @code{\\\&}!A literal @samp{\&}_cr
+@code{\\\\&}! @code{\\&}!A literal @samp{\}, followed by the matched text_cr
+ @code{\\&}! @code{\&}!A literal @samp{&}_cr
+ @code{\\q}! @code{\q}!A literal @samp{\q}_cr
@code{\\\\}! @code{\\}!@code{\}_cr
}
_bigskip}
@@ -17761,10 +17840,10 @@ _bigskip}
@ifdocbook
@multitable @columnfractions .20 .20 .60
@headitem You type @tab @code{sub()} sees @tab @code{sub()} generates
-@item @code{\\\\\\&} @tab @code{\\\&} @tab a literal @samp{\&}
-@item @code{\\\\&} @tab @code{\\&} @tab a literal @samp{\}, followed by the matched text
-@item @code{\\&} @tab @code{\&} @tab a literal @samp{&}
-@item @code{\\q} @tab @code{\q} @tab a literal @samp{\q}
+@item @code{\\\\\\&} @tab @code{\\\&} @tab A literal @samp{\&}
+@item @code{\\\\&} @tab @code{\\&} @tab A literal @samp{\}, followed by the matched text
+@item @code{\\&} @tab @code{\&} @tab A literal @samp{&}
+@item @code{\\q} @tab @code{\q} @tab A literal @samp{\q}
@item @code{\\\\} @tab @code{\\} @tab @code{\}
@end multitable
@end ifdocbook
@@ -17773,10 +17852,10 @@ _bigskip}
@display
You type @code{sub()} sees @code{sub()} generates
-------- ---------- ---------------
-@code{\\\\\\&} @code{\\\&} a literal @samp{\&}
- @code{\\\\&} @code{\\&} a literal @samp{\}, followed by the matched text
- @code{\\&} @code{\&} a literal @samp{&}
- @code{\\q} @code{\q} a literal @samp{\q}
+@code{\\\\\\&} @code{\\\&} A literal @samp{\&}
+ @code{\\\\&} @code{\\&} A literal @samp{\}, followed by the matched text
+ @code{\\&} @code{\&} A literal @samp{&}
+ @code{\\q} @code{\q} A literal @samp{\q}
@code{\\\\} @code{\\} @code{\}
@end display
@end ifnotdocbook
@@ -17788,7 +17867,7 @@ is seen as @samp{\\} and produces @samp{\} instead of @samp{\\}.
Starting with @value{PVERSION} 3.1.4, @command{gawk} followed the POSIX rules
when @option{--posix} is specified (@pxref{Options}). Otherwise,
-it continued to follow the 1996 proposed rules, since
+it continued to follow the proposed rules, since
that had been its behavior for many years.
When @value{PVERSION} 4.0.0 was released, the @command{gawk} maintainer
@@ -17819,24 +17898,24 @@ as shown in @ref{table-gensub-escapes}.
_halign{_hfil#!_qquad_hfil#!_qquad#_hfil_cr
You type!@code{gensub()} sees!@code{gensub()} generates_cr
_hrulefill!_hrulefill!_hrulefill_cr
- @code{&}! @code{&}!the matched text_cr
- @code{\\&}! @code{\&}!a literal @samp{&}_cr
- @code{\\\\}! @code{\\}!a literal @samp{\}_cr
- @code{\\\\&}! @code{\\&}!a literal @samp{\}, then the matched text_cr
-@code{\\\\\\&}! @code{\\\&}!a literal @samp{\&}_cr
- @code{\\q}! @code{\q}!a literal @samp{q}_cr
+ @code{&}! @code{&}!The matched text_cr
+ @code{\\&}! @code{\&}!A literal @samp{&}_cr
+ @code{\\\\}! @code{\\}!A literal @samp{\}_cr
+ @code{\\\\&}! @code{\\&}!A literal @samp{\}, then the matched text_cr
+@code{\\\\\\&}! @code{\\\&}!A literal @samp{\&}_cr
+ @code{\\q}! @code{\q}!A literal @samp{q}_cr
}
_bigskip}
@end tex
@ifdocbook
@multitable @columnfractions .20 .20 .60
@headitem You type @tab @code{gensub()} sees @tab @code{gensub()} generates
-@item @code{&} @tab @code{&} @tab the matched text
-@item @code{\\&} @tab @code{\&} @tab a literal @samp{&}
-@item @code{\\\\} @tab @code{\\} @tab a literal @samp{\}
-@item @code{\\\\&} @tab @code{\\&} @tab a literal @samp{\}, then the matched text
-@item @code{\\\\\\&} @tab @code{\\\&} @tab a literal @samp{\&}
-@item @code{\\q} @tab @code{\q} @tab a literal @samp{q}
+@item @code{&} @tab @code{&} @tab The matched text
+@item @code{\\&} @tab @code{\&} @tab A literal @samp{&}
+@item @code{\\\\} @tab @code{\\} @tab A literal @samp{\}
+@item @code{\\\\&} @tab @code{\\&} @tab A literal @samp{\}, then the matched text
+@item @code{\\\\\\&} @tab @code{\\\&} @tab A literal @samp{\&}
+@item @code{\\q} @tab @code{\q} @tab A literal @samp{q}
@end multitable
@end ifdocbook
@ifnottex
@@ -17844,12 +17923,12 @@ _bigskip}
@display
You type @code{gensub()} sees @code{gensub()} generates
-------- ------------- ------------------
- @code{&} @code{&} the matched text
- @code{\\&} @code{\&} a literal @samp{&}
- @code{\\\\} @code{\\} a literal @samp{\}
- @code{\\\\&} @code{\\&} a literal @samp{\}, then the matched text
-@code{\\\\\\&} @code{\\\&} a literal @samp{\&}
- @code{\\q} @code{\q} a literal @samp{q}
+ @code{&} @code{&} The matched text
+ @code{\\&} @code{\&} A literal @samp{&}
+ @code{\\\\} @code{\\} A literal @samp{\}
+ @code{\\\\&} @code{\\&} A literal @samp{\}, then the matched text
+@code{\\\\\\&} @code{\\\&} A literal @samp{\&}
+ @code{\\q} @code{\q} A literal @samp{q}
@end display
@end ifnotdocbook
@end ifnottex
@@ -17964,7 +18043,7 @@ buffers its output and the @code{fflush()} function forces
@cindex extensions, common@comma{} @code{fflush()} function
@cindex Brian Kernighan's @command{awk}
-@code{fflush()} was added to Brian Kernighan's @command{awk} in
+@code{fflush()} was added to BWK @command{awk} in
April of 1992. For two decades, it was not part of the POSIX standard.
As of December, 2012, it was accepted for inclusion into the POSIX
standard.
@@ -19081,6 +19160,12 @@ them, i.e., to tell @command{awk} what they should do.
@node Definition Syntax
@subsection Function Definition Syntax
+@quotation
+It's entirely fair to say that the @command{awk} syntax for local
+variable definitions is appallingly awful.
+@author Brian Kernighan
+@end quotation
+
@c STARTOFRANGE fdef
@cindex functions, defining
Definitions of functions can appear anywhere between the rules of an
@@ -19120,7 +19205,7 @@ have a parameter with the same name as the function itself.
In addition, according to the POSIX standard, function parameters
cannot have the same name as one of the special built-in variables
(@pxref{Built-in Variables}). Not all versions of @command{awk} enforce
-this restriction.)
+this restriction.
Local variables act like the empty string if referenced where a string
value is required, and like zero if referenced where a numeric value
@@ -19250,7 +19335,8 @@ this program, using our function to format the results, prints:
21.2
@end example
-This function deletes all the elements in an array:
+This function deletes all the elements in an array (recall that the
+extra whitespace signifies the start of the local variable list):
@example
function delarray(a, i)
@@ -19273,17 +19359,18 @@ addition to the POSIX standard.)
The following is an example of a recursive function. It takes a string
as an input parameter and returns the string in backwards order.
Recursive functions must always have a test that stops the recursion.
-In this case, the recursion terminates when the starting position
-is zero, i.e., when there are no more characters left in the string.
+In this case, the recursion terminates when the input string is
+already empty.
+@c 8/2014: Thanks to Mike Brennan for the improved formulation
@cindex @code{rev()} user-defined function
@example
-function rev(str, start)
+function rev(str)
@{
- if (start == 0)
+ if (str == "")
return ""
- return (substr(str, start, 1) rev(str, start - 1))
+ return (rev(substr(str, 2)) substr(str, 1, 1))
@}
@end example
@@ -19292,7 +19379,7 @@ this way:
@example
$ @kbd{echo "Don't Panic!" |}
-> @kbd{gawk --source '@{ print rev($0, length($0)) @}' -f rev.awk}
+> @kbd{gawk -e '@{ print rev($0) @}' -f rev.awk}
@print{} !cinaP t'noD
@end example
@@ -19577,7 +19664,7 @@ BEGIN @{
@noindent
prints @samp{a[1] = 1, a[2] = two, a[3] = 3}, because
-@code{changeit} stores @code{"two"} in the second element of @code{a}.
+@code{changeit()} stores @code{"two"} in the second element of @code{a}.
@end quotation
@cindex undefined functions
@@ -19753,7 +19840,7 @@ being aware of them.
@cindex pointers to functions
@cindex differences in @command{awk} and @command{gawk}, indirect function calls
-This section describes a @command{gawk}-specific extension.
+This section describes an advanced, @command{gawk}-specific extension.
Often, you may wish to defer the choice of function to call until runtime.
For example, you may have different kinds of records, each of which
@@ -19799,8 +19886,11 @@ To process the data, you might write initially:
@noindent
This style of programming works, but can be awkward. With @dfn{indirect}
function calls, you tell @command{gawk} to use the @emph{value} of a
-variable as the name of the function to call.
+variable as the @emph{name} of the function to call.
+@cindex @code{@@}-notation for indirect function calls
+@cindex indirect function calls, @code{@@}-notation
+@cindex function calls, indirect, @code{@@}-notation for
The syntax is similar to that of a regular function call: an identifier
immediately followed by a left parenthesis, any arguments, and then
a closing right parenthesis, with the addition of a leading @samp{@@}
@@ -19858,7 +19948,6 @@ Otherwise they perform the expected computations and are not unusual.
@example
@c file eg/prog/indirectcall.awk
# For each record, print the class name and the requested statistics
-
@{
class_name = $1
gsub(/_/, " ", class_name) # Replace _ with spaces
@@ -20087,10 +20176,12 @@ $ @kbd{gawk -f quicksort.awk -f indirectcall.awk class_data2}
Remember that you must supply a leading @samp{@@} in front of an indirect function call.
-Unfortunately, indirect function calls cannot be used with the built-in functions. However,
-you can generally write ``wrapper'' functions which call the built-in ones, and those can
-be called indirectly. (Other than, perhaps, the mathematical functions, there is not a lot
-of reason to try to call the built-in functions indirectly.)
+Starting with @value{PVERSION} 4.1.2 of @command{gawk}, indirect function
+calls may also be used with built-in functions and with extension functions
+(@pxref{Dynamic Extensions}). The only thing you cannot do is pass a regular
+expression constant to a built-in function through an indirect function
+call.@footnote{This may change in a future version; recheck the documentation that
+comes with your version of @command{gawk} to see if it has.}
@command{gawk} does its best to make indirect function calls efficient.
For example, in the following case:
@@ -20101,7 +20192,7 @@ for (i = 1; i <= n; i++)
@end example
@noindent
-@code{gawk} will look up the actual function to call only once.
+@code{gawk} looks up the actual function to call only once.
@node Functions Summary
@section Summary
@@ -20141,6 +20232,8 @@ from the real parameters by extra whitespace.
User-defined functions may call other user-defined (and built-in)
functions and may call themselves recursively. Function parameters
``hide'' any global variables of the same names.
+You cannot use the name of a reserved variable (such as @code{ARGC})
+as the name of a parameter in user-defined functions.
@item
Scalar values are passed to user-defined functions by value. Array
@@ -20159,7 +20252,7 @@ either scalar or array.
@item
@command{gawk} provides indirect function calls using a special syntax.
-By setting a variable to the name of a user-defined function, you can
+By setting a variable to the name of a function, you can
determine at runtime what function will be called at that point in the
program. This is equivalent to function pointers in C and C++.
@@ -20194,7 +20287,7 @@ It contains the following chapters:
@c STARTOFRANGE fudlib
@cindex functions, user-defined, library of
-@ref{User-defined}, describes how to write
+@DBREF{User-defined} describes how to write
your own @command{awk} functions. Writing functions is important, because
it allows you to encapsulate algorithms and program tasks in a single
place. It simplifies programming, making program development more
@@ -20218,7 +20311,7 @@ of good programs leads to better writing.
In fact, they felt this idea was so important that they placed this
statement on the cover of their book. Because we believe strongly
that their statement is correct, this @value{CHAPTER} and @ref{Sample
-Programs}, provide a good-sized body of code for you to read, and we hope,
+Programs}, provide a good-sized body of code for you to read and, we hope,
to learn from.
This @value{CHAPTER} presents a library of useful @command{awk} functions.
@@ -20227,7 +20320,7 @@ use these functions.
The functions are presented here in a progression from simple to complex.
@cindex Texinfo
-@ref{Extract Program},
+@DBREF{Extract Program}
presents a program that you can use to extract the source code for
these example library functions and programs from the Texinfo source
for this @value{DOCUMENT}.
@@ -20291,7 +20384,7 @@ comparisons use only lowercase letters.
* Group Functions:: Functions for getting group information.
* Walking Arrays:: A function to walk arrays of arrays.
* Library Functions Summary:: Summary of library functions.
-* Library exercises:: Exercises.
+* Library Exercises:: Exercises.
@end menu
@node Library Names
@@ -20378,7 +20471,7 @@ A different convention, common in the Tcl community, is to use a single
associative array to hold the values needed by the library function(s), or
``package.'' This significantly decreases the number of actual global names
in use. For example, the functions described in
-@ref{Passwd Functions},
+@DBREF{Passwd Functions}
might have used array elements @code{@w{PW_data["inited"]}}, @code{@w{PW_data["total"]}},
@code{@w{PW_data["count"]}}, and @code{@w{PW_data["awklib"]}}, instead of
@code{@w{_pw_inited}}, @code{@w{_pw_awklib}}, @code{@w{_pw_total}},
@@ -20439,8 +20532,9 @@ function mystrtonum(str, ret, n, i, k, c)
ret = 0
for (i = 1; i <= n; i++) @{
c = substr(str, i, 1)
- if ((k = index("01234567", c)) > 0)
- k-- # adjust for 1-basing in awk
+ # index() returns 0 if c not in string,
+ # includes c == "0"
+ k = index("1234567", c)
ret = ret * 8 + k
@}
@@ -20452,6 +20546,8 @@ function mystrtonum(str, ret, n, i, k, c)
for (i = 1; i <= n; i++) @{
c = substr(str, i, 1)
c = tolower(c)
+ # index() returns 0 if c not in string,
+ # includes c == "0"
k = index("123456789abcdef", c)
ret = ret * 16 + k
@@ -20853,8 +20949,7 @@ function chr(c)
@c endfile
#### test code ####
-# BEGIN \
-# @{
+# BEGIN @{
# for (;;) @{
# printf("enter a character: ")
# if (getline var <= 0)
@@ -20939,7 +21034,7 @@ more difficult than they really need to be.}
@cindex timestamps, formatted
@cindex time, managing
The @code{systime()} and @code{strftime()} functions described in
-@ref{Time Functions},
+@DBREF{Time Functions}
provide the minimum functionality necessary for dealing with the time of day
in human readable form. While @code{strftime()} is extensive, the control
formats are not necessarily easy to remember or intuitively obvious when
@@ -21025,7 +21120,7 @@ function getlocaltime(time, ret, now, i)
The string indices are easier to use and read than the various formats
required by @code{strftime()}. The @code{alarm} program presented in
-@ref{Alarm Program},
+@DBREF{Alarm Program}
uses this function.
A more general design for the @code{getlocaltime()} function would have
allowed the user to supply an optional timestamp value to use instead
@@ -21228,7 +21323,7 @@ END @{ endfile(_filename_) @}
@c endfile
@end example
-@ref{Wc Program},
+@DBREF{Wc Program}
shows how this library function can be used and
how it simplifies writing the main program.
@@ -21728,8 +21823,7 @@ it is not an option, and it ends option processing. Continuing on:
i = index(options, thisopt)
if (i == 0) @{
if (Opterr)
- printf("%c -- invalid option\n",
- thisopt) > "/dev/stderr"
+ printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
if (_opti >= length(argv[Optind])) @{
Optind++
_opti = 0
@@ -22232,7 +22326,7 @@ once. If you are worried about squeezing every last cycle out of your
this is not necessary, since most @command{awk} programs are I/O-bound,
and such a change would clutter up the code.
-The @command{id} program in @ref{Id Program},
+The @command{id} program in @DBREF{Id Program}
uses these functions.
@c ENDOFRANGE libfudata
@c ENDOFRANGE flibudata
@@ -22258,7 +22352,7 @@ uses these functions.
@cindex group file
@cindex files, group
Much of the discussion presented in
-@ref{Passwd Functions},
+@DBREF{Passwd Functions}
applies to the group database as well. Although there has traditionally
been a well-known file (@file{/etc/group}) in a well-known format, the POSIX
standard only provides a set of C library routines
@@ -22411,8 +22505,7 @@ There are several, modeled after the C library functions of the same names:
@c line break on _gr_init for smallbook
@c file eg/lib/groupawk.in
-BEGIN \
-@{
+BEGIN @{
# Change to suit your system
_gr_awklib = "/usr/local/libexec/awk/"
@}
@@ -22598,13 +22691,13 @@ Most of the work is in scanning the database and building the various
associative arrays. The functions that the user calls are themselves very
simple, relying on @command{awk}'s associative arrays to do work.
-The @command{id} program in @ref{Id Program},
+The @command{id} program in @DBREF{Id Program}
uses these functions.
@node Walking Arrays
@section Traversing Arrays of Arrays
-@ref{Arrays of Arrays}, described how @command{gawk}
+@DBREF{Arrays of Arrays} described how @command{gawk}
provides arrays of arrays. In particular, any element of
an array may be either a scalar, or another array. The
@code{isarray()} function (@pxref{Type Functions})
@@ -22710,7 +22803,8 @@ A simple function to traverse an array of arrays to any depth.
@end itemize
-@node Library exercises
+@c EXCLUDE START
+@node Library Exercises
@section Exercises
@enumerate
@@ -22758,7 +22852,7 @@ As a related challenge, revise that code to handle the case where
an intervening value in @code{ARGV} is a variable assignment.
@item
-@ref{Walking Arrays}, presented a function that walked a multidimensional
+@DBREF{Walking Arrays} presented a function that walked a multidimensional
array to print it out. However, walking an array and processing
each element is a general-purpose operation. Generalize the
@code{walk_array()} function by adding an additional parameter named
@@ -22776,6 +22870,7 @@ Test your new version by printing the array; you should end up with
output identical to that of the original version.
@end enumerate
+@c EXCLUDE END
@c ENDOFRANGE flib
@c ENDOFRANGE fudlib
@@ -22989,8 +23084,7 @@ string:
@example
@c file eg/prog/cut.awk
-BEGIN \
-@{
+BEGIN @{
FS = "\t" # default
OFS = FS
while ((c = getopt(ARGC, ARGV, "sf:c:d:")) != -1) @{
@@ -23465,8 +23559,7 @@ there are no matches, the exit status is one; otherwise it is zero:
@example
@c file eg/prog/egrep.awk
-END \
-@{
+END @{
exit (total == 0)
@}
@c endfile
@@ -23490,17 +23583,6 @@ function usage( e)
The variable @code{e} is used so that the function fits nicely
on the printed page.
-@cindex @code{END} pattern, backslash continuation and
-@cindex @code{\} (backslash), continuing lines and
-@cindex backslash (@code{\}), continuing lines and
-Just a note on programming style: you may have noticed that the @code{END}
-rule uses backslash continuation, with the open brace on a line by
-itself. This is so that it more closely resembles the way functions
-are written. Many of the examples
-in this @value{CHAPTER}
-use this style. You can decide for yourself if you like writing
-your @code{BEGIN} and @code{END} rules this way
-or not.
@c ENDOFRANGE regexps
@c ENDOFRANGE sfregexp
@c ENDOFRANGE fsregexp
@@ -23567,8 +23649,7 @@ numbers:
# egid=5(blat) groups=9(nine),2(two),1(one)
@group
-BEGIN \
-@{
+BEGIN @{
uid = PROCINFO["uid"]
euid = PROCINFO["euid"]
gid = PROCINFO["gid"]
@@ -23785,6 +23866,12 @@ instead of doing it in an @code{END} rule.
It also assumes that letters are contiguous in the character set,
which isn't true for EBCDIC systems.
+@ifset FOR_PRINT
+You might want to consider how to eliminate the use of
+@code{ord()} and @code{chr()}; this can be done in such a
+way as to solve the EBCDIC issue as well.
+@end ifset
+
@c ENDOFRANGE filspl
@c ENDOFRANGE split
@@ -23838,8 +23925,7 @@ Finally, @command{awk} is forced to read the standard input by setting
@c endfile
@end ignore
@c file eg/prog/tee.awk
-BEGIN \
-@{
+BEGIN @{
for (i = 1; i < ARGC; i++)
copy[i] = ARGV[i]
@@ -23901,8 +23987,7 @@ Finally, the @code{END} rule cleans up by closing all the output files:
@example
@c file eg/prog/tee.awk
-END \
-@{
+END @{
for (i in copy)
close(copy[i])
@}
@@ -24019,8 +24104,7 @@ function usage( e)
# -n skip n fields
# +n skip n characters, skip fields first
-BEGIN \
-@{
+BEGIN @{
count = 1
outputfile = "/dev/stdout"
opts = "udc0:1:2:3:4:5:6:7:8:9:"
@@ -24032,7 +24116,7 @@ BEGIN \
else if (c == "c")
do_count++
else if (index("0123456789", c) != 0) @{
- # getopt requires args to options
+ # getopt() requires args to options
# this messes us up for things like -5
if (Optarg ~ /^[[:digit:]]+$/)
fcount = (c Optarg) + 0
@@ -24169,6 +24253,22 @@ END @{
@}
@c endfile
@end example
+
+@ifset FOR_PRINT
+The logic for choosing which lines to print represents a @dfn{state
+machine}, which is ``a device that can be in one of a set number of stable
+conditions depending on its previous condition and on the present values
+of its inputs.''@footnote{This is the definition returned from entering
+@code{define: state machine} into Google.}
+Brian Kernighan suggests that
+``an alternative approach to state mechines is to just read
+the input into an array, then use indexing. It's almost always
+easier code, and for most inputs where you would use this, just
+as fast.'' Consider how to rewrite the logic to follow this
+suggestion.
+@end ifset
+
+
@c ENDOFRANGE prunt
@c ENDOFRANGE tpul
@c ENDOFRANGE uniq
@@ -24539,8 +24639,7 @@ Here is the program:
@c file eg/prog/alarm.awk
# usage: alarm time [ "message" [ count [ delay ] ] ]
-BEGIN \
-@{
+BEGIN @{
# Initial argument sanity checking
usage1 = "usage: alarm time ['message' [count [delay]]]"
usage2 = sprintf("\t(%s) time ::= hh:mm", ARGV[1])
@@ -24695,7 +24794,7 @@ of standard @command{awk}: dealing with individual characters is very
painful, requiring repeated use of the @code{substr()}, @code{index()},
and @code{gsub()} built-in functions
(@pxref{String Functions}).@footnote{This
-program was written before @command{gawk} acquired the ability to
+program was also written before @command{gawk} acquired the ability to
split each character in a string into separate array elements.}
There are two functions. The first, @code{stranslate()}, takes three
arguments:
@@ -24803,6 +24902,12 @@ An obvious improvement to this program would be to set up the
@code{t_ar} array only once, in a @code{BEGIN} rule. However, this
assumes that the ``from'' and ``to'' lists
will never change throughout the lifetime of the program.
+
+Another obvious improvement is to enable the use of ranges,
+such as @samp{a-z}, as allowed by the @command{tr} utility.
+Look at the code for @file{cut.awk} (@pxref{Cut Program})
+for inspiration.
+
@c ENDOFRANGE chtra
@c ENDOFRANGE tr
@@ -24935,8 +25040,7 @@ function printpage( i, j)
Count++
@}
-END \
-@{
+END @{
printpage()
@}
@c endfile
@@ -25587,7 +25691,7 @@ a shell variable that will be expanded. There are two cases:
@enumerate a
@item
-Literal text, provided with @option{--source} or @option{--source=}. This
+Literal text, provided with @option{-e} or @option{--source}. This
text is just appended directly.
@item
@@ -25932,7 +26036,7 @@ The program should exit without reading any @value{DF}s.
However, suppose that an included library file defines an @code{END}
rule of its own. In this case, @command{gawk} will hang, reading standard
input. In order to avoid this, @file{/dev/null} is explicitly added to the
-command-line. Reading from @file{/dev/null} always returns an immediate
+command line. Reading from @file{/dev/null} always returns an immediate
end of file indication.
@c Hmm. Add /dev/null if $# is 0? Still messes up ARGV. Sigh.
@@ -26275,6 +26379,7 @@ mailing labels, and finding anagrams.
@end itemize
+@c EXCLUDE START
@node Programs Exercises
@section Exercises
@@ -26298,17 +26403,27 @@ information is printed. Modify the @command{awk} version
same way.
@item
-The @code{split.awk} program (@pxref{Split Program}) uses
-the @code{chr()} and @code{ord()} functions to move through the
-letters of the alphabet.
-Modify the program to instead use only the @command{awk}
-built-in functions, such as @code{index()} and @code{substr()}.
-
-@item
The @code{split.awk} program (@pxref{Split Program}) assumes
that letters are contiguous in the character set,
which isn't true for EBCDIC systems.
Fix this problem.
+(Hint: Consider a different way to work through the alphabet,
+without relying on @code{ord()} and @code{chr()}.)
+
+@item
+In @file{uniq.awk} (@pxref{Uniq Program}, the
+logic for choosing which lines to print represents a @dfn{state
+machine}, which is ``a device that can be in one of a set number of stable
+conditions depending on its previous condition and on the present values
+of its inputs.''@footnote{This is the definition returned from entering
+@code{define: state machine} into Google.}
+Brian Kernighan suggests that
+``an alternative approach to state mechines is to just read
+the input into an array, then use indexing. It's almost always
+easier code, and for most inputs where you would use this, just
+as fast.'' Rewrite the logic to follow this
+suggestion.
+
@item
Why can't the @file{wc.awk} program (@pxref{Wc Program}) just
@@ -26404,6 +26519,7 @@ Modify @file{anagram.awk} (@pxref{Anagram Program}), to avoid
the use of the external @command{sort} utility.
@end enumerate
+@c EXCLUDE END
@ifnotinfo
@part @value{PART3}Moving Beyond Standard @command{awk} With @command{gawk}
@@ -26585,7 +26701,7 @@ Often, though, it is desirable to be able to loop over the elements
in a particular order that you, the programmer, choose. @command{gawk}
lets you do this.
-@ref{Controlling Scanning}, describes how you can assign special,
+@DBREF{Controlling Scanning} describes how you can assign special,
pre-defined values to @code{PROCINFO["sorted_in"]} in order to
control the order in which @command{gawk} traverses an array
during a @code{for} loop.
@@ -26954,6 +27070,9 @@ Caveat Emptor.
@node Two-way I/O
@section Two-Way Communications with Another Process
+
+@c 8/2014. Neither Mike nor BWK saw this as relevant. Commenting it out.
+@ignore
@cindex Brennan, Michael
@cindex programmers, attractiveness of
@smallexample
@@ -26983,6 +27102,7 @@ the scent of perl programmers.
Mike Brennan
@c brennan@@whidbey.com
@end smallexample
+@end ignore
@cindex advanced features, processes@comma{} communicating with
@cindex processes, two-way communications with
@@ -27009,7 +27129,10 @@ system("rm " tempfile)
This works, but not elegantly. Among other things, it requires that
the program be run in a directory that cannot be shared among users;
for example, @file{/tmp} will not do, as another user might happen
-to be using a temporary file with the same name.
+to be using a temporary file with the same name.@footnote{Michael
+Brennan suggests the use of @command{rand()} to generate unique
+@value{FN}s. This is a valid point; nevertheless, temporary files
+remain more difficult than two-way pipes.} @c 8/2014
@cindex coprocesses
@cindex input/output, two-way
@@ -27164,7 +27287,7 @@ You can think of this as just a @emph{very long} two-way pipeline to
a coprocess.
The way @command{gawk} decides that you want to use TCP/IP networking is
by recognizing special @value{FN}s that begin with one of @samp{/inet/},
-@samp{/inet4/} or @samp{/inet6}.
+@samp{/inet4/} or @samp{/inet6/}.
The full syntax of the special @value{FN} is
@file{/@var{net-type}/@var{protocol}/@var{local-port}/@var{remote-host}/@var{remote-port}}.
@@ -27815,7 +27938,16 @@ and/or groups of characters sort in a given language.
@cindex @code{LC_CTYPE} locale category
@item LC_CTYPE
Character-type information (alphabetic, digit, upper- or lowercase, and
-so on).
+so on) as well as character encoding.
+@ignore
+In June 2001 Bruno Haible wrote:
+- Description of LC_CTYPE: It determines both
+ 1. character encoding,
+ 2. character type information.
+ (For example, in both KOI8-R and ISO-8859-5 the character type information
+ is the same - cyrillic letters could as 'alpha' - but the encoding is
+ different.)
+@end ignore
This information is accessed via the
POSIX character classes in regular expressions,
such as @code{/[[:alnum:]]/}
@@ -27836,11 +27968,6 @@ use a comma every three decimal places and a period for the decimal
point, while many Europeans do exactly the opposite:
1,234.56 versus 1.234,56.}
-@cindex @code{LC_RESPONSE} locale category
-@item LC_RESPONSE
-Response information, such as how ``yes'' and ``no'' appear in the
-local language, and possibly other information as well.
-
@cindex time, localization and
@cindex dates, information related to@comma{} localization
@cindex @code{LC_TIME} locale category
@@ -27975,18 +28102,33 @@ printf(_"Number of users is %d\n", nusers)
@item
If you are creating strings dynamically, you can
still translate them, using the @code{dcgettext()}
-built-in function:
+built-in function:@footnote{Thanks to Bruno Haible for this
+example.}
@example
-message = nusers " users logged in"
-message = dcgettext(message, "adminprog")
-print message
+if (groggy)
+ message = dcgettext("%d customers disturbing me\n", "adminprog")
+else
+ message = dcgettext("enjoying %d customers\n", "adminprog")
+printf(message, ncustomers)
@end example
Here, the call to @code{dcgettext()} supplies a different
text domain (@code{"adminprog"}) in which to find the
message, but it uses the default @code{"LC_MESSAGES"} category.
+The previous example only works if @code{ncustomers} is greater than one.
+This example would be better done with @code{dcngettext()}:
+
+@example
+if (groggy)
+ message = dcngettext("%d customer disturbing me\n", "%d customers disturbing me\n", "adminprog")
+else
+ message = dcngettext("enjoying %d customer\n", "enjoying %d customers\n", "adminprog")
+printf(message, ncustomers)
+@end example
+
+
@cindex @code{LC_MESSAGES} locale category, @code{bindtextdomain()} function (@command{gawk})
@item
During development, you might want to put the @file{.gmo}
@@ -28066,6 +28208,9 @@ appear as the first argument to @code{dcgettext()} or as the first and
second argument to @code{dcngettext()}.@footnote{The
@command{xgettext} utility that comes with GNU
@command{gettext} can handle @file{.awk} files.}
+You should distribute the generated @file{.pot} file with
+your @command{awk} program; translators will eventually use it
+to provide you translations that you can also then distribute.
@xref{I18N Example},
for the full list of steps to go through to create and test
translations for @command{guide}.
@@ -28356,8 +28501,7 @@ This file must be renamed and placed in the proper directory so that
@command{gawk} can find it:
@example
-$ @kbd{msgfmt guide-mellow.po}
-$ @kbd{mv messages en_US.UTF-8/LC_MESSAGES/guide.mo}
+$ @kbd{msgfmt guide-mellow.po -o en_US.UTF-8/LC_MESSAGES/guide.mo}
@end example
Finally, we run the program to test it:
@@ -28624,7 +28768,7 @@ to debug command-line programs, only programs contained in files.)
In our case, we invoke the debugger like this:
@example
-$ @kbd{gawk -D -f getopt.awk -f join.awk -f uniq.awk inputfile}
+$ @kbd{gawk -D -f getopt.awk -f join.awk -f uniq.awk -1 inputfile}
@end example
@noindent
@@ -28686,7 +28830,7 @@ the breakpoint, use the @code{b} (breakpoint) command:
@example
gawk> @kbd{b are_equal}
-@print{} Breakpoint 1 set at file `awklib/eg/prog/uniq.awk', line 64
+@print{} Breakpoint 1 set at file `awklib/eg/prog/uniq.awk', line 63
@end example
The debugger tells us the file and line number where the breakpoint is.
@@ -28698,8 +28842,8 @@ gawk> @kbd{r}
@print{} Starting program:
@print{} Stopping in Rule ...
@print{} Breakpoint 1, are_equal(n, m, clast, cline, alast, aline)
- at `awklib/eg/prog/uniq.awk':64
-@print{} 64 if (fcount == 0 && charcount == 0)
+ at `awklib/eg/prog/uniq.awk':63
+@print{} 63 if (fcount == 0 && charcount == 0)
gawk>
@end example
@@ -28711,12 +28855,12 @@ listing of the current stack frames:
@example
gawk> @kbd{bt}
@print{} #0 are_equal(n, m, clast, cline, alast, aline)
- at `awklib/eg/prog/uniq.awk':69
-@print{} #1 in main() at `awklib/eg/prog/uniq.awk':89
+ at `awklib/eg/prog/uniq.awk':68
+@print{} #1 in main() at `awklib/eg/prog/uniq.awk':88
@end example
This tells us that @code{are_equal()} was called by the main program at
-line 89 of @file{uniq.awk}. (This is not a big surprise, since this
+line 88 of @file{uniq.awk}. (This is not a big surprise, since this
is the only call to @code{are_equal()} in the program, but in more complex
programs, knowing who called a function and with what parameters can be
the key to finding the source of the problem.)
@@ -28740,7 +28884,7 @@ A more useful variable to display might be the current record:
@example
gawk> @kbd{p $0}
-@print{} $0 = string ("gawk is a wonderful program!")
+@print{} $0 = "gawk is a wonderful program!"
@end example
@noindent
@@ -28749,7 +28893,7 @@ our test input above. Let's look at @code{NR}:
@example
gawk> @kbd{p NR}
-@print{} NR = number (2)
+@print{} NR = 2
@end example
@noindent
@@ -28768,7 +28912,7 @@ OK, let's just check that that rule worked correctly:
@example
gawk> @kbd{p last}
-@print{} last = string ("awk is a wonderful program!")
+@print{} last = "awk is a wonderful program!"
@end example
Everything we have done so far has verified that the program has worked as
@@ -28779,13 +28923,13 @@ be inside this function. To investigate further, we must begin
@example
gawk> @kbd{n}
-@print{} 67 if (fcount > 0) @{
+@print{} 66 if (fcount > 0) @{
@end example
-This tells us that @command{gawk} is now ready to execute line 67, which
+This tells us that @command{gawk} is now ready to execute line 66, which
decides whether to give the lines the special ``field skipping'' treatment
-indicated by the @option{-f} command-line option. (Notice that we skipped
-from where we were before at line 64 to here, since the condition in line 64
+indicated by the @option{-1} command-line option. (Notice that we skipped
+from where we were before at line 63 to here, since the condition in line 63
@samp{if (fcount == 0 && charcount == 0)} was false.)
Continuing to step, we now get to the splitting of the current and
@@ -28793,9 +28937,9 @@ last records:
@example
gawk> @kbd{n}
-@print{} 68 n = split(last, alast)
+@print{} 67 n = split(last, alast)
gawk> @kbd{n}
-@print{} 69 m = split($0, aline)
+@print{} 68 m = split($0, aline)
@end example
At this point, we should be curious to see what our records were split
@@ -28803,10 +28947,10 @@ into, so we try to look:
@example
gawk> @kbd{p n m alast aline}
-@print{} n = number (5)
-@print{} m = number (5)
+@print{} n = 5
+@print{} m = untyped variable
@print{} alast = array, 5 elements
-@print{} aline = array, 5 elements
+@print{} aline = untyped variable
@end example
@noindent
@@ -28814,7 +28958,9 @@ gawk> @kbd{p n m alast aline}
@command{awk}'s @code{print} statement.)
This is kind of disappointing, though. All we found out is that there
-are five elements in each of our arrays. Useful enough (we now know that
+are five elements in @code{alast}; @code{m} and @code{aline} don't have
+values yet since we are at line 68 but haven't executed it yet.
+This information is useful enough (we now know that
none of the words were accidentally left out), but what if we want to see
inside the array?
@@ -28830,7 +28976,7 @@ Oops!
@example
gawk> @kbd{p alast[1]}
-@print{} alast["1"] = string ("awk")
+@print{} alast["1"] = "awk"
@end example
This would be kind of slow for a 100-member array, though, so
@@ -28839,11 +28985,11 @@ not to be mentioned):
@example
gawk> @kbd{p @@alast}
-@print{} alast["1"] = string ("awk")
-@print{} alast["2"] = string ("is")
-@print{} alast["3"] = string ("a")
-@print{} alast["4"] = string ("wonderful")
-@print{} alast["5"] = string ("program!")
+@print{} alast["1"] = "awk"
+@print{} alast["2"] = "is"
+@print{} alast["3"] = "a"
+@print{} alast["4"] = "wonderful"
+@print{} alast["5"] = "program!"
@end example
It looks like we got this far OK. Let's take another step
@@ -28851,9 +28997,9 @@ or two:
@example
gawk> @kbd{n}
-@print{} 70 clast = join(alast, fcount, n)
+@print{} 69 clast = join(alast, fcount, n)
gawk> @kbd{n}
-@print{} 71 cline = join(aline, fcount, m)
+@print{} 70 cline = join(aline, fcount, m)
@end example
Well, here we are at our error (sorry to spoil the suspense). What we
@@ -28863,8 +29009,8 @@ this would work. Let's look at what we've got:
@example
gawk> @kbd{p cline clast}
-@print{} cline = string ("gawk is a wonderful program!")
-@print{} clast = string ("awk is a wonderful program!")
+@print{} cline = "gawk is a wonderful program!"
+@print{} clast = "awk is a wonderful program!"
@end example
Hey, those look pretty familiar! They're just our original, unaltered,
@@ -29711,7 +29857,9 @@ responds @samp{syntax error}. When you do figure out what your mistake was,
though, you'll feel like a real guru.
@item
-If you perused the dump of opcodes in @ref{Miscellaneous Debugger Commands},
+@c NOTE: no comma after the ref{} on purpose, due to following
+@c parenthetical remark.
+If you perused the dump of opcodes in @ref{Miscellaneous Debugger Commands}
(or if you are already familiar with @command{gawk} internals),
you will realize that much of the internal manipulation of data
in @command{gawk}, as in many interpreters, is done on a stack.
@@ -29759,7 +29907,7 @@ similarly to the GNU Debugger, GDB.
@item
Debuggers let you step through your program one statement at a time,
examine and change variable and array values, and do a number of other
-things that let understand what your program is actually doing (as
+things that let you understand what your program is actually doing (as
opposed to what it is supposed to do).
@item
@@ -29797,6 +29945,12 @@ arbitrary precision integers, and concludes with a description of some
points where @command{gawk} and the POSIX standard are not quite in
agreement.
+@quotation NOTE
+Most users of @command{gawk} can safely skip this chapter.
+But if you want to do scientific calculations with @command{gawk},
+this is the place to be.
+@end quotation
+
@menu
* Computer Arithmetic:: A quick intro to computer math.
* Math Definitions:: Defining terms used.
@@ -29916,8 +30070,23 @@ A special value representing infinity. Operations involving another
number and infinity produce infinity.
@item NaN
-``Not A Number.'' A special value indicating a result that can't
-happen in real math, but that can happen in floating-point computations.
+``Not A Number.''@footnote{Thanks
+to Michael Brennan for this description, which I have paraphrased, and
+for the examples}.
+A special value that results from attempting a
+calculation that has no answer as a real number. In such a case,
+programs can either receive a floating-point exception, or get @code{NaN}
+back as the result. The IEEE 754 standard recommends that systems return
+@code{NaN}. Some examples:
+
+@table @code
+@item sqrt(-1)
+This makes sense in the range of complex numbers, but not in the
+range of real numbers, so the result is @code{NaN}.
+
+@item log(-8)
+@minus{}8 is out of the domain of @code{log()}, so the result is @code{NaN}.
+@end table
@item Normalized
How the significand (see later in this list) is usually stored. The
@@ -30024,8 +30193,8 @@ array to provide information about the MPFR and GMP libraries
The MPFR library provides precise control over precisions and rounding
modes, and gives correctly rounded, reproducible, platform-independent
-results. With either of the command-line options @option{--bignum} or
-@option{-M}, all floating-point arithmetic operators and numeric functions
+results. With the @option{-M} command-line option,
+all floating-point arithmetic operators and numeric functions
can yield results to any desired precision level supported by MPFR.
Two built-in variables, @code{PREC} and @code{ROUNDMODE},
@@ -30039,7 +30208,7 @@ to follow.
@quotation
Math class is tough!
-@author Late 1980's Barbie
+@author Teen Talk Barbie, July 1992
@end quotation
This @value{SECTION} provides a high level overview of the issues
@@ -30335,7 +30504,7 @@ internally as a MPFR number. Changing the precision using @code{PREC}
in the program text does @emph{not} change the precision of a constant.
If you need to represent a floating-point constant at a higher precision
-than the default and cannot use a command line assignment to @code{PREC},
+than the default and cannot use a command-line assignment to @code{PREC},
you should either specify the constant as a string, or as a rational
number, whenever possible. The following example illustrates the
differences among various ways to print a floating-point constant:
@@ -30451,7 +30620,7 @@ output when you change the rounding mode to be sure.
@cindex integers, arbitrary precision
@cindex arbitrary precision integers
-When given one of the options @option{--bignum} or @option{-M},
+When given the @option{-M} option,
@command{gawk} performs all integer arithmetic using GMP arbitrary
precision integers. Any number that looks like an integer in a source
or @value{DF} is stored as an arbitrary precision integer. The size
@@ -30565,8 +30734,20 @@ You can simulate the @code{div()} function in standard @command{awk}
using this user-defined function:
@example
+@c file eg/lib/div.awk
# div --- do integer division
+@c endfile
+@ignore
+@c file eg/lib/div.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# July, 2014
+
+@c endfile
+
+@end ignore
+@c file eg/lib/div.awk
function div(numerator, denominator, result, i)
@{
split("", result)
@@ -30578,6 +30759,7 @@ function div(numerator, denominator, result, i)
return 0.0
@}
+@c endfile
@end example
@node POSIX Floating Point Problems
@@ -30691,7 +30873,7 @@ values. The default for @command{awk} is to use double-precision
floating-point values.
@item
-In the 1980's, Barbie mistakenly said ``Math class is tough!''
+In the early 1990's, Barbie mistakenly said ``Math class is tough!''
While math isn't tough, floating-point arithmetic isn't the same
as pencil and paper math, and care must be taken:
@@ -30719,12 +30901,12 @@ Often, increasing the accuracy and then rounding to the desired
number of digits produces reasonable results.
@item
-Use either @option{-M} or @option{--bignum} to enable MPFR
+Use @option{-M} (or @option{--bignum}) to enable MPFR
arithmetic. Use @code{PREC} to set the precision in bits, and
@code{ROUNDMODE} to set the IEEE 754 rounding mode.
@item
-With @option{-M} or @option{--bignum}, @command{gawk} performs
+With @option{-M}, @command{gawk} performs
arbitrary precision integer arithmetic using the GMP library.
This is faster and more space efficient than using MPFR for
the same calculations.
@@ -30956,7 +31138,7 @@ Some other bits and pieces:
@itemize @value{BULLET}
@item
The API provides access to @command{gawk}'s @code{do_@var{xxx}} values,
-reflecting command line options, like @code{do_lint}, @code{do_profiling}
+reflecting command-line options, like @code{do_lint}, @code{do_profiling}
and so on (@pxref{Extension API Variables}).
These are informational: an extension cannot affect their values
inside @command{gawk}. In addition, attempting to assign to them
@@ -31107,7 +31289,7 @@ does not support this keyword, you should either place
@file{config.h} file in your extensions.
@item
-All pointers filled in by @command{gawk} are to memory
+All pointers filled in by @command{gawk} point to memory
managed by @command{gawk} and should be treated by the extension as
read-only. Memory for @emph{all} strings passed into @command{gawk}
from the extension @emph{must} come from calling the API-provided function
@@ -31641,8 +31823,8 @@ empty string (@code{""}). The @code{func} pointer is the address of a
An @dfn{exit callback} function is a function that
@command{gawk} calls before it exits.
Such functions are useful if you have general ``cleanup'' tasks
-that should be performed in your extension (such as closing data
-base connections or other resource deallocations).
+that should be performed in your extension (such as closing database
+connections or other resource deallocations).
You can register such
a function with @command{gawk} using the following function.
@@ -34720,6 +34902,7 @@ should be the place to do so.
@end itemize
+@c EXCLUDE START
@node Extension Exercises
@section Exercises
@@ -34742,6 +34925,7 @@ Write a wrapper script that provides an interface similar to
@ref{Extension Sample Inplace}.
@end enumerate
+@c EXCLUDE END
@ifnotinfo
@part @value{PART4}Appendices
@@ -35172,7 +35356,7 @@ Indirect function calls
@item
Directories on the command line produce a warning and are skipped
-(@pxref{Command line directories}).
+(@pxref{Command-line directories}).
@end itemize
@item
@@ -35256,8 +35440,7 @@ functions for internationalization
(@pxref{Programmer i18n}).
@item
-The @code{fflush()} function from Brian Kernighan's
-version of @command{awk}
+The @code{fflush()} function from BWK @command{awk}
(@pxref{I/O Functions}).
@item
@@ -35321,7 +35504,7 @@ and the
@option{--copyright},
@option{--debug},
@option{--dump-variables},
-@option{--execle},
+@option{--exec},
@option{--field-separator},
@option{--file},
@option{--gen-pot},
@@ -35402,6 +35585,10 @@ and the documentation for @command{gawk} @value{PVERSION} 4.1:
Ultrix
@end itemize
+@item
+@c FIXME: Verify the version here.
+Support for MirBSD was removed at @command{gawk} @value{PVERSION} 4.2.
+
@end itemize
@c XXX ADD MORE STUFF HERE
@@ -35519,7 +35706,7 @@ The ability to delete all of an array at once with @samp{delete @var{array}}
(@pxref{Delete}).
@item
-Command line option changes
+Command-line option changes
(@pxref{Options}):
@itemize @value{MINUS}
@@ -35577,12 +35764,12 @@ The @code{next file} statement became @code{nextfile}
@item
The @code{fflush()} function from
-Brian Kernighan's @command{awk}
+BWK @command{awk}
(then at Bell Laboratories;
@pxref{I/O Functions}).
@item
-New command line options:
+New command-line options:
@itemize @value{MINUS}
@item
@@ -35592,7 +35779,7 @@ the original Version 7 Unix version of @command{awk}
(@pxref{V7/SVR3.1}).
@item
-The @option{-m} option from Brian Kernighan's @command{awk}. (He was
+The @option{-m} option from BWK @command{awk}. (Brian was
still at Bell Laboratories at the time.) This was later removed from
both his @command{awk} and from @command{gawk}.
@@ -35834,7 +36021,7 @@ An optional third argument to
(@pxref{String Functions}).
@item
-The behavior of @code{fflush()} changed to match Brian Kernighan's @command{awk}
+The behavior of @code{fflush()} changed to match BWK @command{awk}
and for POSIX; now both @samp{fflush()} and @samp{fflush("")}
flush all open output redirections
(@pxref{I/O Functions}).
@@ -35872,7 +36059,7 @@ Indirect function calls
(@pxref{Switch Statement}).
@item
-Command line option changes
+Command-line option changes
(@pxref{Options}):
@itemize @value{MINUS}
@@ -35897,7 +36084,7 @@ All long options acquired corresponding short options, for use in @samp{#!} scri
@item
Directories named on the command line now produce a warning, not a fatal
error, unless @option{--posix} or @option{--traditional} are used
-(@pxref{Command line directories}).
+(@pxref{Command-line directories}).
@item
The @command{gawk} internals were rewritten, bringing the @command{dgawk}
@@ -35973,10 +36160,10 @@ Three new arrays:
@item
The three executables @command{gawk}, @command{pgawk}, and @command{dgawk}, were merged into
-one, named just @command{gawk}. As a result the command line options changed.
+one, named just @command{gawk}. As a result the command-line options changed.
@item
-Command line option changes
+Command-line option changes
(@pxref{Options}):
@itemize @value{MINUS}
@@ -37318,7 +37505,7 @@ The following changes the record separator to @code{"\r\n"} and sets binary
mode on reads, but does not affect the mode on standard input:
@example
-gawk -v RS="\r\n" --source "BEGIN @{ BINMODE = 1 @}" @dots{}
+gawk -v RS="\r\n" -e "BEGIN @{ BINMODE = 1 @}" @dots{}
@end example
@noindent
@@ -37931,7 +38118,7 @@ since approximately 2003.
@cindex source code, @command{pawk}
@item @command{pawk}
Nelson H.F.@: Beebe at the University of Utah has modified
-Brian Kernighan's @command{awk} to provide timing and profiling information.
+BWK @command{awk} to provide timing and profiling information.
It is different from @command{gawk} with the @option{--profile} option.
(@pxref{Profiling}),
in that it uses CPU-based profiling, not line-count
@@ -37994,8 +38181,7 @@ This is an embeddable @command{awk} interpreter derived from
This is a Python module that claims to bring @command{awk}-like
features to Python. See @uref{https://github.com/alecthomas/pawk}
for more information. (This is not related to Nelson Beebe's
-modified version of Brian Kernighan's @command{awk},
-described earlier.)
+modified version of BWK @command{awk}, described earlier.)
@item @w{QSE Awk}
@cindex QSE Awk
@@ -38134,7 +38320,7 @@ as well as any considerations you should bear in mind.
@appendixsubsec Accessing The @command{gawk} Git Repository
As @command{gawk} is Free Software, the source code is always available.
-@ref{Gawk Distribution}, describes how to get and build the formal,
+@DBREF{Gawk Distribution} describes how to get and build the formal,
released versions of @command{gawk}.
@cindex @command{git} utility
@@ -39016,7 +39202,7 @@ compiled with @samp{-DDEBUG}.
@item
The source code for @command{gawk} is maintained in a publicly
-accessable Git repository. Anyone may check it out and view the source.
+accessible Git repository. Anyone may check it out and view the source.
@item
Contributions to @command{gawk} are welcome. Following the steps
@@ -41354,13 +41540,14 @@ Consistency issues:
Use "zeros" instead of "zeroes".
Use "nonzero" not "non-zero".
Use "runtime" not "run time" or "run-time".
- Use "command-line" not "command line".
+ Use "command-line" as an adjective and "command line" as a noun.
Use "online" not "on-line".
Use "whitespace" not "white space".
Use "Input/Output", not "input/output". Also "I/O", not "i/o".
Use "lefthand"/"righthand", not "left-hand"/"right-hand".
Use "workaround", not "work-around".
Use "startup"/"cleanup", not "start-up"/"clean-up"
+ Use "filesystem", not "file system"
Use @code{do}, and not @code{do}-@code{while}, except where
actually discussing the do-while.
Use "versus" in text and "vs." in index entries
@@ -41375,8 +41562,6 @@ Consistency issues:
The numbers zero through ten should be spelled out, except when
talking about file descriptor numbers. > 10 and < 0, it's
ok to use numbers.
- In tables, put command-line options in @code, while in the text,
- put them in @option.
For most cases, do NOT put a comma before "and", "or" or "but".
But exercise taste with this rule.
Don't show the awk command with a program in quotes when it's
diff --git a/doc/gawktexi.in b/doc/gawktexi.in
index bb94aa1f..679073bf 100644
--- a/doc/gawktexi.in
+++ b/doc/gawktexi.in
@@ -46,7 +46,7 @@
@c applies to and all the info about who's publishing this edition
@c These apply across the board.
-@set UPDATE-MONTH July, 2014
+@set UPDATE-MONTH August, 2014
@set VERSION 4.1
@set PATCHLEVEL 1
@@ -160,6 +160,19 @@
@end macro
@end ifdocbook
+@c hack for docbook, where comma shouldn't always follow an @ref{}
+@ifdocbook
+@macro DBREF{text}
+@ref{\text\}
+@end macro
+@end ifdocbook
+
+@ifnotdocbook
+@macro DBREF{text}
+@ref{\text\},
+@end macro
+@end ifnotdocbook
+
@ifclear FOR_PRINT
@set FN file name
@set FFN File Name
@@ -521,10 +534,10 @@ particular records in a file and perform operations upon them.
* Escape Sequences:: How to write nonprinting characters.
* Regexp Operators:: Regular Expression Operators.
* Bracket Expressions:: What can go between @samp{[...]}.
-* GNU Regexp Operators:: Operators specific to GNU software.
-* Case-sensitivity:: How to do case-insensitive matching.
* Leftmost Longest:: How much text matches.
* Computed Regexps:: Using Dynamic Regexps.
+* GNU Regexp Operators:: Operators specific to GNU software.
+* Case-sensitivity:: How to do case-insensitive matching.
* Regexp Summary:: Regular expressions summary.
* Records:: Controlling how data is split into
records.
@@ -541,7 +554,7 @@ particular records in a file and perform operations upon them.
* Single Character Fields:: Making each character a separate
field.
* Command Line Field Separator:: Setting @code{FS} from the
- command-line.
+ command line.
* Full Line Fields:: Making the full line be a single
field.
* Field Splitting Summary:: Some final points and a summary table.
@@ -567,7 +580,7 @@ particular records in a file and perform operations upon them.
@code{getline}.
* Getline Summary:: Summary of @code{getline} Variants.
* Read Timeout:: Reading input with a timeout.
-* Command line directories:: What happens if you put a directory on
+* Command-line directories:: What happens if you put a directory on
the command line.
* Input Summary:: Input summary.
* Input Exercises:: Exercises.
@@ -595,7 +608,7 @@ particular records in a file and perform operations upon them.
* Close Files And Pipes:: Closing Input and Output Files and
Pipes.
* Output Summary:: Output summary.
-* Output exercises:: Exercises.
+* Output Exercises:: Exercises.
* Values:: Constants, Variables, and Regular
Expressions.
* Constants:: String, numeric and regexp constants.
@@ -606,7 +619,7 @@ particular records in a file and perform operations upon them.
* Variables:: Variables give names to values for
later use.
* Using Variables:: Using variables in your programs.
-* Assignment Options:: Setting variables on the command-line
+* Assignment Options:: Setting variables on the command line
and a summary of command-line syntax.
This is an advanced method of input.
* Conversion:: The conversion of strings to numbers
@@ -782,7 +795,7 @@ particular records in a file and perform operations upon them.
information.
* Walking Arrays:: A function to walk arrays of arrays.
* Library Functions Summary:: Summary of library functions.
-* Library exercises:: Exercises.
+* Library Exercises:: Exercises.
* Running Examples:: How to run these examples.
* Clones:: Clones of common utilities.
* Cut Program:: The @command{cut} utility.
@@ -1206,23 +1219,18 @@ March, 2001
</prefaceinfo>
@end docbook
-Several kinds of tasks occur repeatedly
-when working with text files.
-You might want to extract certain lines and discard the rest.
-Or you may need to make changes wherever certain patterns appear,
-but leave the rest of the file alone.
-Writing single-use programs for these tasks in languages such as C, C++,
-or Java is time-consuming and inconvenient.
-Such jobs are often easier with @command{awk}.
-The @command{awk} utility interprets a special-purpose programming language
-that makes it easy to handle simple data-reformatting jobs.
+Several kinds of tasks occur repeatedly when working with text files.
+You might want to extract certain lines and discard the rest. Or you
+may need to make changes wherever certain patterns appear, but leave the
+rest of the file alone. Such jobs are often easy with @command{awk}.
+The @command{awk} utility interprets a special-purpose programming
+language that makes it easy to handle simple data-reformatting jobs.
-@cindex Brian Kernighan's @command{awk}
The GNU implementation of @command{awk} is called @command{gawk}; if you
invoke it with the proper options or environment variables
(@pxref{Options}), it is fully
compatible with
-the POSIX@footnote{The 2008 POSIX standard is accessable online at
+the POSIX@footnote{The 2008 POSIX standard is accessible online at
@w{@url{http://www.opengroup.org/onlinepubs/9699919799/}.}}
specification of the @command{awk} language
and with the Unix version of @command{awk} maintained
@@ -1296,7 +1304,7 @@ different computing environments. This @value{DOCUMENT}, while describing
the @command{awk} language in general, also describes the particular
implementation of @command{awk} called @command{gawk} (which stands for
``GNU @command{awk}''). @command{gawk} runs on a broad range of Unix systems,
-ranging from Intel@registeredsymbol{}-architecture PC-based computers
+ranging from Intel-architecture PC-based computers
up through large-scale systems.
@command{gawk} has also been ported to Mac OS X,
Microsoft Windows
@@ -1371,7 +1379,7 @@ help from me, thoroughly reworked @command{gawk} for compatibility
with the newer @command{awk}.
Circa 1994, I became the primary maintainer.
Current development focuses on bug fixes,
-performance improvements, standards compliance, and occasionally, new features.
+performance improvements, standards compliance and, occasionally, new features.
In May of 1997, J@"urgen Kahrs felt the need for network access
from @command{awk}, and with a little help from me, set about adding
@@ -1396,29 +1404,27 @@ for a complete list of those who made important contributions to @command{gawk}.
The @command{awk} language has evolved over the years. Full details are
provided in @ref{Language History}.
The language described in this @value{DOCUMENT}
-is often referred to as ``new @command{awk}'' (@command{nawk}).
+is often referred to as ``new @command{awk}''.
+By analogy, the original version of @command{awk} is
+referred to as ``old @command{awk}.''
-@cindex @command{awk}, versions of
-@cindex @command{nawk} utility
-@cindex @command{oawk} utility
-For some time after new @command{awk} was introduced, there were
-systems with multiple versions of @command{awk}. Some systems had
-an @command{awk} utility that implemented the original version of the
-@command{awk} language and a @command{nawk} utility for the new version.
-Others had an @command{oawk} version for the ``old @command{awk}''
-language and plain @command{awk} for the new one. Still others only
-had one version, which is usually the new one.
-
-Today, only Solaris systems still use an old @command{awk} for the
-default @command{awk} utility. (A more modern @command{awk} lives in
-@file{/usr/xpg6/bin} on these systems.) All other modern systems use
-some version of new @command{awk}.@footnote{Many of these systems use
-@command{gawk} for their @command{awk} implementation!}
-
-It is likely that you already have some version of new @command{awk} on
-your system, which is what you should use when running your programs.
-(Of course, if you're reading this @value{DOCUMENT}, chances are good
-that you have @command{gawk}!)
+Today, on most systems, when you run the @command{awk} utility,
+you get some version of new @command{awk}.@footnote{Only
+Solaris systems still use an old @command{awk} for the
+default @command{awk} utility. A more modern @command{awk} lives in
+@file{/usr/xpg6/bin} on these systems.} If your system's standard
+@command{awk} is the old one, you will see something like this
+if you try the test program:
+
+@example
+$ @kbd{awk 1 /dev/null}
+@error{} awk: syntax error near line 1
+@error{} awk: bailing out near line 1
+@end example
+
+@noindent
+In this case, you should find a version of new @command{awk},
+or just install @command{gawk}!
Throughout this @value{DOCUMENT}, whenever we refer to a language feature
that should be available in any complete implementation of POSIX @command{awk},
@@ -1469,7 +1475,9 @@ There are sidebars
scattered throughout the @value{DOCUMENT}.
They add a more complete explanation of points that are relevant, but not likely
to be of interest on first reading.
+@ifclear FOR_PRINT
All appear in the index, under the heading ``sidebar.''
+@end ifclear
Most of the time, the examples use complete @command{awk} programs.
Some of the more advanced sections show only the part of the @command{awk}
@@ -1624,6 +1632,9 @@ try looking them up here.
@uref{http://www.gnu.org/software/gawk/manual/html_node/GNU-Free-Documentation-License.html,
The GNU FDL}
is the license that covers this @value{DOCUMENT}.
+
+Some of the chapters have exercise sections; these have also been
+omitted from the print edition.
@end ifset
@ifclear FOR_PRINT
@@ -1664,11 +1675,18 @@ are slightly different than in other books you may have read.
This @value{SECTION} briefly documents the typographical conventions used in Texinfo.
@end ifinfo
-Examples you would type at the command-line are preceded by the common
+Examples you would type at the command line are preceded by the common
shell primary and secondary prompts, @samp{$} and @samp{>}.
Input that you type is shown @kbd{like this}.
+@c 8/2014: @print{} is stripped from the texi to make docbook.
+@ifclear FOR_PRINT
Output from the command is preceded by the glyph ``@print{}''.
This typically represents the command's standard output.
+@end ifclear
+@ifset FOR_PRINT
+Output from the command, usually its standard output, appears
+@code{like this}.
+@end ifset
Error messages, and other output on the command's standard error, are preceded
by the glyph ``@error{}''. For example:
@@ -1698,6 +1716,10 @@ another key, at the same time. For example, a @kbd{Ctrl-d} is typed
by first pressing and holding the @kbd{CONTROL} key, next
pressing the @kbd{d} key and finally releasing both keys.
+For the sake of brevity, throughout this @value{DOCUMENT}, we refer to
+Brian Kernighan's version of @command{awk} as ``BWK @command{awk}.''
+(@xref{Other Versions}, for information on his and other versions.)
+
@ifset FOR_PRINT
@quotation NOTE
Notes of interest look like this.
@@ -1737,6 +1759,7 @@ They also appear in the index under the heading ``dark corner.''
As noted by the opening quote, though, any coverage of dark corners is,
by definition, incomplete.
+@cindex c.e., See common extensions
Extensions to the standard @command{awk} language that are supported by
more than one @command{awk} implementation are marked
@ifclear FOR_PRINT
@@ -1744,7 +1767,7 @@ more than one @command{awk} implementation are marked
and ``extensions, common.''
@end ifclear
@ifset FOR_PRINT
-``@value{COMMONEXT}.''
+``@value{COMMONEXT}'' for ``common extension.''
@end ifset
@node Manual History
@@ -1783,6 +1806,7 @@ see @uref{http://www.gnu.org, the GNU Project's home page}.
This @value{DOCUMENT} may also be read from
@uref{http://www.gnu.org/software/gawk/manual/, their web site}.
+@ifclear FOR_PRINT
A shell, an editor (Emacs), highly portable optimizing C, C++, and
Objective-C compilers, a symbolic debugger and dozens of large and
small utilities (such as @command{gawk}), have all been completed and are
@@ -1793,32 +1817,16 @@ stage of development.
@cindex Linux
@cindex GNU/Linux
@cindex operating systems, BSD-based
-@cindex Alpha (DEC)
Until the GNU operating system is more fully developed, you should
consider using GNU/Linux, a freely distributable, Unix-like operating
-system for Intel@registeredsymbol{},
+system for Intel,
Power Architecture,
Sun SPARC, IBM S/390, and other
-@ifclear FOR_PRINT
systems.@footnote{The terminology ``GNU/Linux'' is explained
in the @ref{Glossary}.}
-@end ifclear
-@ifset FOR_PRINT
-systems.
-@end ifset
Many GNU/Linux distributions are
available for download from the Internet.
-
-(There are numerous other freely available, Unix-like operating systems
-based on the
-Berkeley Software Distribution, and some of them use recent versions
-of @command{gawk} for their versions of @command{awk}.
-@uref{http://www.netbsd.org, NetBSD},
-@uref{http://www.freebsd.org, FreeBSD},
-and
-@uref{http://www.openbsd.org, OpenBSD}
-are three of the most popular ones, but there
-are others.)
+@end ifclear
@ifnotinfo
The @value{DOCUMENT} you are reading is actually free---at least, the
@@ -2062,17 +2070,29 @@ people.
Notable code and documentation contributions were made by
a number of people. @xref{Contributors}, for the full list.
-Thanks to Patrice Dumas for the new @command{makeinfo} program.
+Thanks to Patrice Dumas for the new @command{makeinfo} program.
Thanks to Karl Berry who continues to work to keep
the Texinfo markup language sane.
@cindex Kernighan, Brian
+@cindex Brennan, Michael
+@cindex Day, Robert P.J.@:
+Robert P.J.@: Day, Michael Brennan and Brian Kernighan kindly acted as
+reviewers for the 2015 edition of this @value{DOCUMENT}. Their feedback
+helped improve the final work.
+
I would like to thank Brian Kernighan for invaluable assistance during the
testing and debugging of @command{gawk}, and for ongoing
help and advice in clarifying numerous points about the language.
We could not have done nearly as good a job on either @command{gawk}
or its documentation without his help.
+Brian is in a class by himself as a programmer and technical
+author. I have to thank him (yet again) for his ongoing friendship
+and the role model he has been for me for close to 30 years!
+Having him as a reviewer is an exciting privilege. It has also
+been extremely humbling@enddots{}
+
@cindex Robbins, Miriam
@cindex Robbins, Jean
@cindex Robbins, Harry
@@ -2307,29 +2327,27 @@ For example, on OS/2, it is @kbd{Ctrl-z}.)
As an example, the following program prints a friendly piece of advice
(from Douglas Adams's @cite{The Hitchhiker's Guide to the Galaxy}),
to keep you from worrying about the complexities of computer
-programming@footnote{If you use Bash as your shell, you should execute
-the command @samp{set +H} before running this program interactively,
-to disable the C shell-style command history, which treats
-@samp{!} as a special character. We recommend putting this command into
-your personal startup file.}
-(@code{BEGIN} is a feature we haven't discussed yet):
+programming:
@example
-$ @kbd{awk "BEGIN @{ print \"Don't Panic!\" @}"}
+$ @kbd{awk "BEGIN @{ print "Don\47t Panic!" @}"}
@print{} Don't Panic!
@end example
-@cindex shell quoting, double quote
-@cindex double quote (@code{"}) in shell commands
-@cindex @code{"} (double quote) in shell commands
-@cindex @code{\} (backslash) in shell commands
-@cindex backslash (@code{\}) in shell commands
-This program does not read any input. The @samp{\} before each of the
-inner double quotes is necessary because of the shell's quoting
-rules---in particular because it mixes both single quotes and
-double quotes.@footnote{Although we generally recommend the use of single
-quotes around the program text, double quotes are needed here in order to
-put the single quote into the message.}
+@command{awk} executes statements associated with @code{BEGIN} before
+reading any input. If there are no other statements in your program,
+as is the case here, @command{awk} just stops, instead of trying to read
+input it doesn't know how to process.
+The @samp{\47} is a magic way of getting a single quote into
+the program, without having to engage in ugly shell quoting tricks.
+
+@quotation NOTE
+As a side note, if you use Bash as your shell, you should execute the
+command @samp{set +H} before running this program interactively, to
+disable the C shell-style command history, which treats @samp{!} as a
+special character. We recommend putting this command into your personal
+startup file.
+@end quotation
This next simple @command{awk} program
emulates the @command{cat} utility; it copies whatever you type on the
@@ -2364,9 +2382,10 @@ awk -f @var{source-file} @var{input-file1} @var{input-file2} @dots{}
@cindex @option{-f} option
@cindex command line, option @option{-f}
-The @option{-f} instructs the @command{awk} utility to get the @command{awk} program
-from the file @var{source-file}. Any @value{FN} can be used for
-@var{source-file}. For example, you could put the program:
+The @option{-f} instructs the @command{awk} utility to get the
+@command{awk} program from the file @var{source-file} (@pxref{Options}).
+Any @value{FN} can be used for @var{source-file}. For example, you
+could put the program:
@example
BEGIN @{ print "Don't Panic!" @}
@@ -2427,16 +2446,7 @@ BEGIN @{ print "Don't Panic!" @}
@noindent
After making this file executable (with the @command{chmod} utility),
simply type @samp{advice}
-at the shell and the system arranges to run @command{awk}@footnote{The
-line beginning with @samp{#!} lists the full @value{FN} of an interpreter
-to run and an optional initial command-line argument to pass to that
-interpreter. The operating system then runs the interpreter with the given
-argument and the full argument list of the executed program. The first argument
-in the list is the full @value{FN} of the @command{awk} program.
-The rest of the
-argument list contains either options to @command{awk}, or @value{DF}s,
-or both. Note that on many systems @command{awk} may be found in
-@file{/usr/bin} instead of in @file{/bin}. Caveat Emptor.} as if you had
+at the shell and the system arranges to run @command{awk} as if you had
typed @samp{awk -f advice}:
@example
@@ -2454,9 +2464,27 @@ Self-contained @command{awk} scripts are useful when you want to write a
program that users can invoke without their having to know that the program is
written in @command{awk}.
-@sidebar Portability Issues with @samp{#!}
+@sidebar Understanding @samp{#!}
@cindex portability, @code{#!} (executable scripts)
+@command{awk} is an @dfn{interpreted} language. This means that the
+@command{awk} utility reads your program and then processes your data
+according to the instructions in your program. (This is different
+from a @dfn{compiled} language such as C, where your program is first
+compiled into machine code that is executed directly by your system's
+hardware.) The @command{awk} utility is thus termed an @dfn{interpreter}.
+Many modern languages are interperted.
+
+The line beginning with @samp{#!} lists the full @value{FN} of an
+interpreter to run and a single optional initial command-line argument
+to pass to that interpreter. The operating system then runs the
+interpreter with the given argument and the full argument list of the
+executed program. The first argument in the list is the full @value{FN}
+of the @command{awk} program. The rest of the argument list contains
+either options to @command{awk}, or @value{DF}s, or both. Note that on
+many systems @command{awk} may be found in @file{/usr/bin} instead of
+in @file{/bin}. Caveat Emptor.
+
Some systems limit the length of the interpreter name to 32 characters.
Often, this can be dealt with by using a symbolic link.
@@ -2468,8 +2496,7 @@ of some sort from @command{awk}.
@cindex @code{ARGC}/@code{ARGV} variables, portability and
@cindex portability, @code{ARGV} variable
-Finally,
-the value of @code{ARGV[0]}
+Finally, the value of @code{ARGV[0]}
(@pxref{Built-in Variables})
varies depending upon your operating system.
Some systems put @samp{awk} there, some put the full pathname
@@ -2648,7 +2675,7 @@ Note that the single quote is not special within double quotes.
@item
Null strings are removed when they occur as part of a non-null
-command-line argument, while explicit non-null objects are kept.
+command-line argument, while explicit null objects are kept.
For example, to specify that the field separator @code{FS} should
be set to the null string, use:
@@ -2795,7 +2822,9 @@ each line is considered to be one @dfn{record}.
In the @value{DF} @file{mail-list}, each record contains the name of a person,
his/her phone number, his/her email-address, and a code for their relationship
-with the author of the list. An @samp{A} in the last column
+with the author of the list.
+The columns are aligned using spaces.
+An @samp{A} in the last column
means that the person is an acquaintance. An @samp{F} in the last
column means that the person is a friend.
An @samp{R} means that the person is a relative:
@@ -2829,6 +2858,7 @@ of green crates shipped, the number of red boxes shipped, the number of
orange bags shipped, and the number of blue packages shipped,
respectively. There are 16 entries, covering the 12 months of last year
and the first four months of the current year.
+An empty line separates the data for the two years.
@example
@c file eg/data/inventory-shipped
@@ -2924,34 +2954,39 @@ you can come up with different ways to do the same things shown here:
@itemize @value{BULLET}
@item
-Print the length of the longest input line:
+Print every line that is longer than 80 characters:
@example
-awk '@{ if (length($0) > max) max = length($0) @}
- END @{ print max @}' data
+awk 'length($0) > 80' data
@end example
+The sole rule has a relational expression as its pattern and it has no
+action---so it uses the default action, printing the record.
+
@item
-Print every line that is longer than 80 characters:
+Print the length of the longest input line:
@example
-awk 'length($0) > 80' data
+awk '@{ if (length($0) > max) max = length($0) @}
+ END @{ print max @}' data
@end example
-The sole rule has a relational expression as its pattern and it has no
-action---so it uses the default action, printing the record.
+The code associated with @code{END} executes after all
+input has been read; it's the other side of the coin to @code{BEGIN}.
@cindex @command{expand} utility
@item
Print the length of the longest line in @file{data}:
@example
-expand data | awk '@{ if (x < length()) x = length() @}
+expand data | awk '@{ if (x < length($0)) x = length($0) @}
END @{ print "maximum line length is " x @}'
@end example
+This example differs slightly from the previous one:
The input is processed by the @command{expand} utility to change TABs
-into spaces, so the widths compared are actually the right-margin columns.
+into spaces, so the widths compared are actually the right-margin columns,
+as opposed to the number of input characters on each line.
@item
Print every line that has at least one field:
@@ -3078,8 +3113,8 @@ features that haven't been covered yet, so don't worry if you don't
understand all the details:
@example
-LC_ALL=C ls -l | awk '$6 == "Nov" @{ sum += $5 @}
- END @{ print sum @}'
+ls -l | awk '$6 == "Nov" @{ sum += $5 @}
+ END @{ print sum @}'
@end example
@cindex @command{ls} utility
@@ -3297,7 +3332,7 @@ and array sorting.
As we develop our presentation of the @command{awk} language, we introduce
most of the variables and many of the functions. They are described
-systematically in @ref{Built-in Variables}, and
+systematically in @ref{Built-in Variables}, and in
@ref{Built-in}.
@node When
@@ -3332,33 +3367,30 @@ eight-bit microprocessors,
and a microcode assembler for a special-purpose Prolog
computer.
While the original @command{awk}'s capabilities were strained by tasks
-of such complexity, modern versions are more capable. Even Brian Kernighan's
-version of @command{awk} has fewer predefined limits, and those
-that it has are much larger than they used to be.
+of such complexity, modern versions are more capable.
@cindex @command{awk} programs, complex
-If you find yourself writing @command{awk} scripts of more than, say, a few
-hundred lines, you might consider using a different programming
-language.
-The shell is good at string and
-pattern matching; in addition, it allows powerful use of the system
-utilities. More conventional languages, such as C, C++, and Java, offer
-better facilities for system programming and for managing the complexity
-of large programs.
-Python offers a nice balance between high-level ease of programming and
-access to system facilities.
-Programs in these languages may require more lines
-of source code than the equivalent @command{awk} programs, but they are
-easier to maintain and usually run more efficiently.
+If you find yourself writing @command{awk} scripts of more than, say,
+a few hundred lines, you might consider using a different programming
+language. The shell is good at string and pattern matching; in addition,
+it allows powerful use of the system utilities. Python offers a nice
+balance between high-level ease of programming and access to system
+facilities.@footnote{Other popular scripting languages include Ruby
+and Perl.}
@node Intro Summary
@section Summary
+@c FIXME: Review this chapter for summary of builtin functions called.
@itemize @value{BULLET}
@item
Programs in @command{awk} consist of @var{pattern}-@var{action} pairs.
@item
+An @var{action} without a @var{pattern} always runs. The default
+@var{action} for a pattern without one is @samp{@{ print $0 @}}.
+
+@item
Use either
@samp{awk '@var{program}' @var{files}}
or
@@ -3580,7 +3612,7 @@ multibyte characters. This option is an easy way to tell @command{gawk}:
@cindex compatibility mode (@command{gawk}), specifying
Specify @dfn{compatibility mode}, in which the GNU extensions to
the @command{awk} language are disabled, so that @command{gawk} behaves just
-like Brian Kernighan's version @command{awk}.
+like BWK @command{awk}.
@xref{POSIX/GNU},
which summarizes the extensions.
@ifclear FOR_PRINT
@@ -3665,7 +3697,7 @@ Command-line variable assignments of the form
This option is particularly necessary for World Wide Web CGI applications
that pass arguments through the URL; using this option prevents a malicious
(or other) user from passing in options, assignments, or @command{awk} source
-code (via @option{--source}) to the CGI application. This option should be used
+code (via @option{-e}) to the CGI application. This option should be used
with @samp{#!} scripts (@pxref{Executable Scripts}), like so:
@example
@@ -3711,7 +3743,7 @@ Second, because this option is intended to be used with code libraries,
@command{gawk} does not recognize such files as constituting main program
input. Thus, after processing an @option{-i} argument, @command{gawk}
still expects to find the main source code via the @option{-f} option
-or on the command-line.
+or on the command line.
@item @option{-l} @var{ext}
@itemx @option{--load} @var{ext}
@@ -3735,7 +3767,7 @@ a shared library. This feature is described in detail in @ref{Dynamic Extension
@cindex warnings, issuing
Warn about constructs that are dubious or nonportable to
other @command{awk} implementations.
-No space is allowed between the @option{-D} and @var{value}, if
+No space is allowed between the @option{-L} and @var{value}, if
@var{value} is supplied.
Some warnings are issued when @command{gawk} first reads your program. Others
are issued at runtime, as your program executes.
@@ -3854,7 +3886,7 @@ Newlines are not allowed after @samp{?} or @samp{:}
@cindex @code{FS} variable, as TAB character
@item
-Specifying @samp{-Ft} on the command-line does not set the value
+Specifying @samp{-Ft} on the command line does not set the value
of @code{FS} to be a single TAB character
(@pxref{Field Separators}).
@@ -3951,14 +3983,14 @@ source of data.)
Because it is clumsy using the standard @command{awk} mechanisms to mix
source file and command-line @command{awk} programs, @command{gawk}
-provides the @option{--source} option. This does not require you to
+provides the @option{-e} option. This does not require you to
pre-empt the standard input for your source code; it allows you to easily
mix command-line and library source code (@pxref{AWKPATH Variable}).
-As with @option{-f}, the @option{--source} and @option{--include}
+As with @option{-f}, the @option{-e} and @option{-i}
options may also be used multiple times on the command line.
-@cindex @option{--source} option
-If no @option{-f} or @option{--source} option is specified, then @command{gawk}
+@cindex @option{-e} option
+If no @option{-f} or @option{-e} option is specified, then @command{gawk}
uses the first non-option command-line argument as the text of the
program source code.
@@ -4026,6 +4058,11 @@ included. As each element of @code{ARGV} is processed, @command{gawk}
sets the variable @code{ARGIND} to the index in @code{ARGV} of the
current element.
+@c FIXME: One day, move the ARGC and ARGV node closer to here.
+Changing @code{ARGC} and @code{ARGV} in your @command{awk} program lets
+you control how @command{awk} processes the input files; this is described
+in more detail in @ref{ARGC and ARGV}.
+
@cindex input files, variable assignments and
@cindex variable assignments and input files
The distinction between @value{FN} arguments and variable-assignment
@@ -4100,7 +4137,7 @@ with @code{getline}.
Some other versions of @command{awk} also support this, but it
is not standard.
(Some operating systems provide a @file{/dev/stdin} file
-in the file system; however, @command{gawk} always processes
+in the filesystem; however, @command{gawk} always processes
this @value{FN} itself.)
@node Environment Variables
@@ -4126,7 +4163,7 @@ behaves.
@cindex differences in @command{awk} and @command{gawk}, @code{AWKPATH} environment variable
@ifinfo
The previous @value{SECTION} described how @command{awk} program files can be named
-on the command-line with the @option{-f} option.
+on the command line with the @option{-f} option.
@end ifinfo
In most @command{awk}
implementations, you must supply a precise path name for each program
@@ -4154,7 +4191,7 @@ standard directory in the default path and then specified on
the command line with a short @value{FN}. Otherwise, the full @value{FN}
would have to be typed for each file.
-By using the @option{-i} option, or the @option{--source} and @option{-f} options, your command-line
+By using the @option{-i} option, or the @option{-e} and @option{-f} options, your command-line
@command{awk} programs can use facilities in @command{awk} library files
(@pxref{Library Functions}).
Path searching is not done if @command{gawk} is in compatibility mode.
@@ -4221,7 +4258,7 @@ list are meant to be used by regular users.
@table @env
@item POSIXLY_CORRECT
-Causes @command{gawk} to switch POSIX compatibility
+Causes @command{gawk} to switch to POSIX compatibility
mode, disabling all traditional and GNU extensions.
@xref{Options}.
@@ -4254,7 +4291,7 @@ file as the size of the memory buffer to allocate for I/O. Otherwise,
the value should be a number, and @command{gawk} uses that number as
the size of the buffer to allocate. (When this variable is not set,
@command{gawk} uses the smaller of the file's size and the ``default''
-blocksize, which is usually the file systems I/O blocksize.)
+blocksize, which is usually the filesystems I/O blocksize.)
@item AWK_HASH
If this variable exists with a value of @samp{gst}, @command{gawk}
@@ -4327,6 +4364,9 @@ to @code{EXIT_FAILURE}.
This @value{SECTION} describes a feature that is specific to @command{gawk}.
+@cindex @code{@@include} directive
+@cindex file inclusion, @code{@@include} directive
+@cindex including files, @code{@@include} directive
The @code{@@include} keyword can be used to read external @command{awk} source
files. This gives you the ability to split large @command{awk} source files
into smaller, more manageable pieces, and also lets you reuse common @command{awk}
@@ -4446,6 +4486,9 @@ and this also applies to files named with @code{@@include}.
This @value{SECTION} describes a feature that is specific to @command{gawk}.
+@cindex @code{@@load} directive
+@cindex loading extensions, @code{@@load} directive
+@cindex extensions, loading, @code{@@load} directive
The @code{@@load} keyword can be used to read external @command{awk} extensions
(stored as system shared libraries).
This allows you to link in compiled code that may offer superior
@@ -4587,9 +4630,9 @@ or
to run @command{awk}.
@item
-The three standard @command{awk} options are @option{-f}, @option{-F}
-and @option{-v}. @command{gawk} supplies these and many others, as well
-as corresponding GNU-style long options.
+The three standard options for all versions of @command{awk} are
+@option{-f}, @option{-F} and @option{-v}. @command{gawk} supplies these
+and many others, as well as corresponding GNU-style long options.
@item
Non-option command-line arguments are usually treated as @value{FN}s,
@@ -4647,7 +4690,7 @@ The simplest regular expression is a sequence of letters, numbers, or
both. Such a regexp matches any string that contains that sequence.
Thus, the regexp @samp{foo} matches any string containing @samp{foo}.
Therefore, the pattern @code{/foo/} matches any input record containing
-the three characters @samp{foo} @emph{anywhere} in the record. Other
+the three adjacent characters @samp{foo} @emph{anywhere} in the record. Other
kinds of regexps let you specify more complicated classes of strings.
@ifnotinfo
@@ -4661,10 +4704,10 @@ regular expressions work, we present more complicated instances.
* Escape Sequences:: How to write nonprinting characters.
* Regexp Operators:: Regular Expression Operators.
* Bracket Expressions:: What can go between @samp{[...]}.
-* GNU Regexp Operators:: Operators specific to GNU software.
-* Case-sensitivity:: How to do case-insensitive matching.
* Leftmost Longest:: How much text matches.
* Computed Regexps:: Using Dynamic Regexps.
+* GNU Regexp Operators:: Operators specific to GNU software.
+* Case-sensitivity:: How to do case-insensitive matching.
* Regexp Summary:: Regular expressions summary.
@end menu
@@ -4856,20 +4899,30 @@ between @samp{0} and @samp{7}. For example, the code for the ASCII ESC
@item \x@var{hh}@dots{}
The hexadecimal value @var{hh}, where @var{hh} stands for a sequence
of hexadecimal digits (@samp{0}--@samp{9}, and either @samp{A}--@samp{F}
-or @samp{a}--@samp{f}). Like the same construct
-in ISO C, the escape sequence continues until the first nonhexadecimal
-digit is seen. @value{COMMONEXT}
+or @samp{a}--@samp{f}). A maximum of two digts are allowed after
+the @samp{\x}. Any further hexadecimal digits are treated as simple
+letters or numbers. @value{COMMONEXT}
+
+@quotation CAUTION
+In ISO C, the escape sequence continues until the first nonhexadecimal
+digit is seen.
+@c FIXME: Add exact version here.
+For many years, @command{gawk} would continue incorporating
+hexadecimal digits into the value until a non-hexadecimal digit
+or the end of the string was encountered.
However, using more than two hexadecimal digits produces
-undefined results. (The @samp{\x} escape sequence is not allowed in
-POSIX @command{awk}.)
+@end quotation
@cindex @code{\} (backslash), @code{\/} escape sequence
@cindex backslash (@code{\}), @code{\/} escape sequence
@item \/
A literal slash (necessary for regexp constants only).
This sequence is used when you want to write a regexp
-constant that contains a slash. Because the regexp is delimited by
-slashes, you need to escape the slash that is part of the pattern,
+constant that contains a slash
+(such as @code{/.*:\/home\/[[:alnum:]]+:.*/}; the @samp{[[:alnum:]]}
+notation is discussed shortly, in @ref{Bracket Expressions}).
+Because the regexp is delimited by
+slashes, you need to escape any slash that is part of the pattern,
in order to tell @command{awk} to keep processing the rest of the regexp.
@cindex @code{\} (backslash), @code{\"} escape sequence
@@ -4877,8 +4930,10 @@ in order to tell @command{awk} to keep processing the rest of the regexp.
@item \"
A literal double quote (necessary for string constants only).
This sequence is used when you want to write a string
-constant that contains a double quote. Because the string is delimited by
-double quotes, you need to escape the quote that is part of the string,
+constant that contains a double quote
+(such as @code{"He said \"hi!\" to her."}).
+Because the string is delimited by
+double quotes, you need to escape any quote that is part of the string,
in order to tell @command{awk} to keep processing the rest of the string.
@end table
@@ -4934,7 +4989,7 @@ leaves what happens as undefined. There are two choices:
@cindex Brian Kernighan's @command{awk}
@table @asis
@item Strip the backslash out
-This is what Brian Kernighan's @command{awk} and @command{gawk} both do.
+This is what BWK @command{awk} and @command{gawk} both do.
For example, @code{"a\qc"} is the same as @code{"aqc"}.
(Because this is such an easy bug both to introduce and to miss,
@command{gawk} warns you about it.)
@@ -4987,7 +5042,7 @@ The escape sequences described
@ifnotinfo
earlier
@end ifnotinfo
-in @ref{Escape Sequences},
+in @DBREF{Escape Sequences}
are valid inside a regexp. They are introduced by a @samp{\} and
are recognized and converted into corresponding real characters as
the very first step in processing regexps.
@@ -5084,12 +5139,11 @@ or @samp{k}.
@cindex vertical bar (@code{|})
@item @code{|}
This is the @dfn{alternation operator} and it is used to specify
-alternatives.
-The @samp{|} has the lowest precedence of all the regular
-expression operators.
-For example, @samp{^P|[[:digit:]]}
-matches any string that matches either @samp{^P} or @samp{[[:digit:]]}. This
-means it matches any string that starts with @samp{P} or contains a digit.
+alternatives. The @samp{|} has the lowest precedence of all the regular
+expression operators. For example, @samp{^P|[aeiouy]} matches any string
+that matches either @samp{^P} or @samp{[aeiouy]}. This means it matches
+any string that starts with @samp{P} or contains (anywhere within it)
+a lowercase English vowel.
The alternation applies to the largest possible regexps on either side.
@@ -5113,14 +5167,15 @@ applies the @samp{*} symbol to the preceding @samp{h} and looks for matches
of one @samp{p} followed by any number of @samp{h}s. This also matches
just @samp{p} if no @samp{h}s are present.
-The @samp{*} repeats the @emph{smallest} possible preceding expression.
-(Use parentheses if you want to repeat a larger expression.) It finds
-as many repetitions as possible. For example,
-@samp{awk '/\(c[ad][ad]*r x\)/ @{ print @}' sample}
-prints every record in @file{sample} containing a string of the form
-@samp{(car x)}, @samp{(cdr x)}, @samp{(cadr x)}, and so on.
-Notice the escaping of the parentheses by preceding them
-with backslashes.
+There are two subtle points to understand about how @samp{*} works.
+First, the @samp{*} applies only to the single preceding regular expression
+component (e.g., in @samp{ph*}, it applies just to the @samp{h}).
+To cause @samp{*} to apply to a larger sub-expression, use parentheses:
+@samp{(ph)*} matches @samp{ph}, @samp{phph}, @samp{phphph} and so on.
+
+Second, @samp{*} finds as many repetititons as possible. If the text
+to be matched is @samp{phhhhhhhhhhhhhhooey}, @samp{ph*} matches all of
+the @samp{h}s.
@cindex @code{+} (plus sign), regexp operator
@cindex plus sign (@code{+}), regexp operator
@@ -5129,12 +5184,6 @@ This symbol is similar to @samp{*}, except that the preceding expression must be
matched at least once. This means that @samp{wh+y}
would match @samp{why} and @samp{whhy}, but not @samp{wy}, whereas
@samp{wh*y} would match all three.
-The following is a simpler
-way of writing the last @samp{*} example:
-
-@example
-awk '/\(c[ad]+r x\)/ @{ print @}' sample
-@end example
@cindex @code{?} (question mark), regexp operator
@cindex question mark (@code{?}), regexp operator
@@ -5229,7 +5278,7 @@ Within a bracket expression, a @dfn{range expression} consists of two
characters separated by a hyphen. It matches any single character that
sorts between the two characters, based upon the system's native character
set. For example, @samp{[0-9]} is equivalent to @samp{[0123456789]}.
-(See @ref{Ranges and Locales}, for an explanation of how the POSIX
+(See @DBREF{Ranges and Locales} for an explanation of how the POSIX
standard and @command{gawk} have changed over time. This is mainly
of historical interest.)
@@ -5248,6 +5297,9 @@ bracket expression, put a @samp{\} in front of it. For example:
@noindent
matches either @samp{d} or @samp{]}.
+Additionally, if you place @samp{]} right after the opening
+@samp{[}, the closing bracket is treated as one of the
+characters to be matched.
@cindex POSIX @command{awk}, bracket expressions and
@cindex Extended Regular Expressions (EREs)
@@ -5359,6 +5411,160 @@ they do not recognize collating symbols or equivalence classes.
@c maybe one day ...
@c ENDOFRANGE charlist
+@node Leftmost Longest
+@section How Much Text Matches?
+
+@cindex regular expressions, leftmost longest match
+@c @cindex matching, leftmost longest
+Consider the following:
+
+@example
+echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'
+@end example
+
+This example uses the @code{sub()} function (which we haven't discussed yet;
+@pxref{String Functions})
+to make a change to the input record. Here, the regexp @code{/a+/}
+indicates ``one or more @samp{a} characters,'' and the replacement
+text is @samp{<A>}.
+
+The input contains four @samp{a} characters.
+@command{awk} (and POSIX) regular expressions always match
+the leftmost, @emph{longest} sequence of input characters that can
+match. Thus, all four @samp{a} characters are
+replaced with @samp{<A>} in this example:
+
+@example
+$ @kbd{echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'}
+@print{} <A>bcd
+@end example
+
+For simple match/no-match tests, this is not so important. But when doing
+text matching and substitutions with the @code{match()}, @code{sub()}, @code{gsub()},
+and @code{gensub()} functions, it is very important.
+@ifinfo
+@xref{String Functions},
+for more information on these functions.
+@end ifinfo
+Understanding this principle is also important for regexp-based record
+and field splitting (@pxref{Records},
+and also @pxref{Field Separators}).
+
+@node Computed Regexps
+@section Using Dynamic Regexps
+
+@c STARTOFRANGE dregexp
+@cindex regular expressions, computed
+@c STARTOFRANGE regexpd
+@cindex regular expressions, dynamic
+@cindex @code{~} (tilde), @code{~} operator
+@cindex tilde (@code{~}), @code{~} operator
+@cindex @code{!} (exclamation point), @code{!~} operator
+@cindex exclamation point (@code{!}), @code{!~} operator
+@c @cindex operators, @code{~}
+@c @cindex operators, @code{!~}
+The righthand side of a @samp{~} or @samp{!~} operator need not be a
+regexp constant (i.e., a string of characters between slashes). It may
+be any expression. The expression is evaluated and converted to a string
+if necessary; the contents of the string are then used as the
+regexp. A regexp computed in this way is called a @dfn{dynamic
+regexp} or a @dfn{computed regexp}:
+
+@example
+BEGIN @{ digits_regexp = "[[:digit:]]+" @}
+$0 ~ digits_regexp @{ print @}
+@end example
+
+@noindent
+This sets @code{digits_regexp} to a regexp that describes one or more digits,
+and tests whether the input record matches this regexp.
+
+@quotation NOTE
+When using the @samp{~} and @samp{!~}
+operators, there is a difference between a regexp constant
+enclosed in slashes and a string constant enclosed in double quotes.
+If you are going to use a string constant, you have to understand that
+the string is, in essence, scanned @emph{twice}: the first time when
+@command{awk} reads your program, and the second time when it goes to
+match the string on the lefthand side of the operator with the pattern
+on the right. This is true of any string-valued expression (such as
+@code{digits_regexp}, shown previously), not just string constants.
+@end quotation
+
+@cindex regexp constants, slashes vs.@: quotes
+@cindex @code{\} (backslash), in regexp constants
+@cindex backslash (@code{\}), in regexp constants
+@cindex @code{"} (double quote), in regexp constants
+@cindex double quote (@code{"}), in regexp constants
+What difference does it make if the string is
+scanned twice? The answer has to do with escape sequences, and particularly
+with backslashes. To get a backslash into a regular expression inside a
+string, you have to type two backslashes.
+
+For example, @code{/\*/} is a regexp constant for a literal @samp{*}.
+Only one backslash is needed. To do the same thing with a string,
+you have to type @code{"\\*"}. The first backslash escapes the
+second one so that the string actually contains the
+two characters @samp{\} and @samp{*}.
+
+@cindex troubleshooting, regexp constants vs.@: string constants
+@cindex regexp constants, vs.@: string constants
+@cindex string constants, vs.@: regexp constants
+Given that you can use both regexp and string constants to describe
+regular expressions, which should you use? The answer is ``regexp
+constants,'' for several reasons:
+
+@itemize @value{BULLET}
+@item
+String constants are more complicated to write and
+more difficult to read. Using regexp constants makes your programs
+less error-prone. Not understanding the difference between the two
+kinds of constants is a common source of errors.
+
+@item
+It is more efficient to use regexp constants. @command{awk} can note
+that you have supplied a regexp and store it internally in a form that
+makes pattern matching more efficient. When using a string constant,
+@command{awk} must first convert the string into this internal form and
+then perform the pattern matching.
+
+@item
+Using regexp constants is better form; it shows clearly that you
+intend a regexp match.
+@end itemize
+
+@sidebar Using @code{\n} in Bracket Expressions of Dynamic Regexps
+@cindex regular expressions, dynamic, with embedded newlines
+@cindex newlines, in dynamic regexps
+
+Some versions of @command{awk} do not allow the newline
+character to be used inside a bracket expression for a dynamic regexp:
+
+@example
+$ @kbd{awk '$0 ~ "[ \t\n]"'}
+@error{} awk: newline in character class [
+@error{} ]...
+@error{} source line number 1
+@error{} context is
+@error{} >>> <<<
+@end example
+
+@cindex newlines, in regexp constants
+But a newline in a regexp constant works with no problem:
+
+@example
+$ @kbd{awk '$0 ~ /[ \t\n]/'}
+@kbd{here is a sample line}
+@print{} here is a sample line
+@kbd{Ctrl-d}
+@end example
+
+@command{gawk} does not have this problem, and it isn't likely to
+occur often in practice, but it's worth noting for future reference.
+@end sidebar
+@c ENDOFRANGE dregexp
+@c ENDOFRANGE regexpd
+
@node GNU Regexp Operators
@section @command{gawk}-Specific Regexp Operators
@@ -5522,7 +5728,7 @@ are allowed.
Traditional Unix @command{awk} regexps are matched. The GNU operators
are not special, and interval expressions are not available.
The POSIX character classes (@samp{[[:alnum:]]}, etc.) are supported,
-as Brian Kernighan's @command{awk} does support them.
+as BWK @command{awk} does support them.
Characters described by octal and hexadecimal escape sequences are
treated literally, even if they represent regexp metacharacters.
@@ -5634,160 +5840,6 @@ Case is always significant in compatibility mode.
@c ENDOFRANGE csregexp
@c ENDOFRANGE regexpcs
-@node Leftmost Longest
-@section How Much Text Matches?
-
-@cindex regular expressions, leftmost longest match
-@c @cindex matching, leftmost longest
-Consider the following:
-
-@example
-echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'
-@end example
-
-This example uses the @code{sub()} function (which we haven't discussed yet;
-@pxref{String Functions})
-to make a change to the input record. Here, the regexp @code{/a+/}
-indicates ``one or more @samp{a} characters,'' and the replacement
-text is @samp{<A>}.
-
-The input contains four @samp{a} characters.
-@command{awk} (and POSIX) regular expressions always match
-the leftmost, @emph{longest} sequence of input characters that can
-match. Thus, all four @samp{a} characters are
-replaced with @samp{<A>} in this example:
-
-@example
-$ @kbd{echo aaaabcd | awk '@{ sub(/a+/, "<A>"); print @}'}
-@print{} <A>bcd
-@end example
-
-For simple match/no-match tests, this is not so important. But when doing
-text matching and substitutions with the @code{match()}, @code{sub()}, @code{gsub()},
-and @code{gensub()} functions, it is very important.
-@ifinfo
-@xref{String Functions},
-for more information on these functions.
-@end ifinfo
-Understanding this principle is also important for regexp-based record
-and field splitting (@pxref{Records},
-and also @pxref{Field Separators}).
-
-@node Computed Regexps
-@section Using Dynamic Regexps
-
-@c STARTOFRANGE dregexp
-@cindex regular expressions, computed
-@c STARTOFRANGE regexpd
-@cindex regular expressions, dynamic
-@cindex @code{~} (tilde), @code{~} operator
-@cindex tilde (@code{~}), @code{~} operator
-@cindex @code{!} (exclamation point), @code{!~} operator
-@cindex exclamation point (@code{!}), @code{!~} operator
-@c @cindex operators, @code{~}
-@c @cindex operators, @code{!~}
-The righthand side of a @samp{~} or @samp{!~} operator need not be a
-regexp constant (i.e., a string of characters between slashes). It may
-be any expression. The expression is evaluated and converted to a string
-if necessary; the contents of the string are then used as the
-regexp. A regexp computed in this way is called a @dfn{dynamic
-regexp} or a @dfn{computed regexp}:
-
-@example
-BEGIN @{ digits_regexp = "[[:digit:]]+" @}
-$0 ~ digits_regexp @{ print @}
-@end example
-
-@noindent
-This sets @code{digits_regexp} to a regexp that describes one or more digits,
-and tests whether the input record matches this regexp.
-
-@quotation NOTE
-When using the @samp{~} and @samp{!~}
-operators, there is a difference between a regexp constant
-enclosed in slashes and a string constant enclosed in double quotes.
-If you are going to use a string constant, you have to understand that
-the string is, in essence, scanned @emph{twice}: the first time when
-@command{awk} reads your program, and the second time when it goes to
-match the string on the lefthand side of the operator with the pattern
-on the right. This is true of any string-valued expression (such as
-@code{digits_regexp}, shown previously), not just string constants.
-@end quotation
-
-@cindex regexp constants, slashes vs.@: quotes
-@cindex @code{\} (backslash), in regexp constants
-@cindex backslash (@code{\}), in regexp constants
-@cindex @code{"} (double quote), in regexp constants
-@cindex double quote (@code{"}), in regexp constants
-What difference does it make if the string is
-scanned twice? The answer has to do with escape sequences, and particularly
-with backslashes. To get a backslash into a regular expression inside a
-string, you have to type two backslashes.
-
-For example, @code{/\*/} is a regexp constant for a literal @samp{*}.
-Only one backslash is needed. To do the same thing with a string,
-you have to type @code{"\\*"}. The first backslash escapes the
-second one so that the string actually contains the
-two characters @samp{\} and @samp{*}.
-
-@cindex troubleshooting, regexp constants vs.@: string constants
-@cindex regexp constants, vs.@: string constants
-@cindex string constants, vs.@: regexp constants
-Given that you can use both regexp and string constants to describe
-regular expressions, which should you use? The answer is ``regexp
-constants,'' for several reasons:
-
-@itemize @value{BULLET}
-@item
-String constants are more complicated to write and
-more difficult to read. Using regexp constants makes your programs
-less error-prone. Not understanding the difference between the two
-kinds of constants is a common source of errors.
-
-@item
-It is more efficient to use regexp constants. @command{awk} can note
-that you have supplied a regexp and store it internally in a form that
-makes pattern matching more efficient. When using a string constant,
-@command{awk} must first convert the string into this internal form and
-then perform the pattern matching.
-
-@item
-Using regexp constants is better form; it shows clearly that you
-intend a regexp match.
-@end itemize
-
-@sidebar Using @code{\n} in Bracket Expressions of Dynamic Regexps
-@cindex regular expressions, dynamic, with embedded newlines
-@cindex newlines, in dynamic regexps
-
-Some versions of @command{awk} do not allow the newline
-character to be used inside a bracket expression for a dynamic regexp:
-
-@example
-$ @kbd{awk '$0 ~ "[ \t\n]"'}
-@error{} awk: newline in character class [
-@error{} ]...
-@error{} source line number 1
-@error{} context is
-@error{} >>> <<<
-@end example
-
-@cindex newlines, in regexp constants
-But a newline in a regexp constant works with no problem:
-
-@example
-$ @kbd{awk '$0 ~ /[ \t\n]/'}
-@kbd{here is a sample line}
-@print{} here is a sample line
-@kbd{Ctrl-d}
-@end example
-
-@command{gawk} does not have this problem, and it isn't likely to
-occur often in practice, but it's worth noting for future reference.
-@end sidebar
-@c ENDOFRANGE dregexp
-@c ENDOFRANGE regexpd
-
@node Regexp Summary
@section Summary
@@ -5798,7 +5850,7 @@ In @command{awk}, regular expression constants are written enclosed
between slashes: @code{/}@dots{}@code{/}.
@item
-Regexp constants may be used by standalone in patterns and
+Regexp constants may be used standalone in patterns and
in conditional expressions, or as part of matching expressions
using the @samp{~} and @samp{!~} operators.
@@ -5828,7 +5880,7 @@ the match, such as for text substitution and when the record separator
is a regexp.
@item
-Matching expressions may use dynamic regexps; that is string values
+Matching expressions may use dynamic regexps, that is, string values
treated as regular expressions.
@end itemize
@@ -5880,7 +5932,7 @@ used with it do not have to be named on the @command{awk} command line
* Getline:: Reading files under explicit program control
using the @code{getline} function.
* Read Timeout:: Reading input with a timeout.
-* Command line directories:: What happens if you put a directory on the
+* Command-line directories:: What happens if you put a directory on the
command line.
* Input Summary:: Input summary.
* Input Exercises:: Exercises.
@@ -5895,16 +5947,13 @@ used with it do not have to be named on the @command{awk} command line
@cindex records, splitting input into
@cindex @code{NR} variable
@cindex @code{FNR} variable
-The @command{awk} utility divides the input for your @command{awk}
-program into records and fields.
-@command{awk} keeps track of the number of records that have
-been read
-so far
-from the current input file. This value is stored in a
-built-in variable called @code{FNR}. It is reset to zero when a new
-file is started. Another built-in variable, @code{NR}, records the total
-number of input records read so far from all @value{DF}s. It starts at zero,
-but is never automatically reset to zero.
+@command{awk} divides the input for your program into records and fields.
+It keeps track of the number of records that have been read so far from
+the current input file. This value is stored in a built-in variable
+called @code{FNR} which is reset to zero when a new file is started.
+Another built-in variable, @code{NR}, records the total number of input
+records read so far from all @value{DF}s. It starts at zero, but is
+never automatically reset to zero.
@menu
* awk split records:: How standard @command{awk} splits records.
@@ -6111,17 +6160,17 @@ with optional leading and/or trailing whitespace:
@example
$ @kbd{echo record 1 AAAA record 2 BBBB record 3 |}
> @kbd{gawk 'BEGIN @{ RS = "\n|( *[[:upper:]]+ *)" @}}
-> @kbd{@{ print "Record =", $0, "and RT =", RT @}'}
-@print{} Record = record 1 and RT = AAAA
-@print{} Record = record 2 and RT = BBBB
-@print{} Record = record 3 and RT =
-@print{}
+> @kbd{@{ print "Record =", $0,"and RT = [" RT "]" @}'}
+@print{} Record = record 1 and RT = [ AAAA ]
+@print{} Record = record 2 and RT = [ BBBB ]
+@print{} Record = record 3 and RT = [
+@print{} ]
@end example
@noindent
-The final line of output has an extra blank line. This is because the
-value of @code{RT} is a newline, and the @code{print} statement
-supplies its own terminating newline.
+The square brackets delineate the contents of @code{RT}, letting you
+see the leading and trailing whitespace. The final value of @code{RT}
+@code{RT} is a newline.
@xref{Simple Sed}, for a more useful example
of @code{RS} as a regexp and @code{RT}.
@@ -6548,7 +6597,7 @@ with a statement such as @samp{$1 = $1}, as described earlier.
* Default Field Splitting:: How fields are normally separated.
* Regexp Field Splitting:: Using regexps as the field separator.
* Single Character Fields:: Making each character a separate field.
-* Command Line Field Separator:: Setting @code{FS} from the command-line.
+* Command Line Field Separator:: Setting @code{FS} from the command line.
* Full Line Fields:: Making the full line be a single field.
* Field Splitting Summary:: Some final points and a summary table.
@end menu
@@ -6749,7 +6798,7 @@ should not rely on any specific behavior in your programs.
@value{DARKCORNER}
@cindex Brian Kernighan's @command{awk}
-As a point of information, Brian Kernighan's @command{awk} allows @samp{^}
+As a point of information, BWK @command{awk} allows @samp{^}
to match only at the beginning of the record. @command{gawk}
also works this way. For example:
@@ -6804,7 +6853,7 @@ behaves this way.
@node Command Line Field Separator
@subsection Setting @code{FS} from the Command Line
-@cindex @option{-F} option, command line
+@cindex @option{-F} option, command-line
@cindex field separator, on command line
@cindex command line, @code{FS} on@comma{} setting
@cindex @code{FS} variable, setting from command line
@@ -6854,6 +6903,8 @@ shell, without any quotes, the @samp{\} gets deleted, so @command{awk}
figures that you really want your fields to be separated with TABs and
not @samp{t}s. Use @samp{-v FS="t"} or @samp{-F"[t]"} on the command line
if you really do want to separate your fields with @samp{t}s.
+Use @samp{-F '\t'} when not in compatibility mode to specify that TABs
+separate fields.
As an example, let's use an @command{awk} program file called @file{edu.awk}
that contains the pattern @code{/edu/} and the action @samp{print $1}:
@@ -6999,7 +7050,7 @@ root
@noindent
on an incorrect implementation of @command{awk}, while @command{gawk}
-prints something like:
+prints the full first line of the file, something like:
@example
root:nSijPlPhZZwgE:0:0:Root:/:
@@ -7099,7 +7150,7 @@ haven't been introduced yet.
BEGIN @{ FIELDWIDTHS = "9 6 10 6 7 7 35" @}
NR > 2 @{
idle = $4
- sub(/^ */, "", idle) # strip leading spaces
+ sub(/^ +/, "", idle) # strip leading spaces
if (idle == "")
idle = 0
if (idle ~ /:/) @{
@@ -7257,6 +7308,8 @@ if (substr($i, 1, 1) == "\"") @{
As with @code{FS}, the @code{IGNORECASE} variable (@pxref{User-modified})
affects field splitting with @code{FPAT}.
+Assigning a value to @code{FPAT} overrides field splitting
+with @code{FS} and with @code{FIELDWIDTHS}.
Similar to @code{FIELDWIDTHS}, the value of @code{PROCINFO["FS"]}
will be @code{"FPAT"} if content-based field splitting is being used.
@@ -7280,6 +7333,12 @@ FPAT = "([^,]*)|(\"[^\"]+\")"
Finally, the @code{patsplit()} function makes the same functionality
available for splitting regular strings (@pxref{String Functions}).
+To recap, @command{gawk} provides three independent methods
+to split input records into fields. @command{gawk} uses whichever
+mechanism was last chosen based on which of the three
+variables---@code{FS}, @code{FIELDWIDTHS}, and @code{FPAT}---was
+last assigned to.
+
@node Multiple Line
@section Multiple-Line Records
@@ -7501,7 +7560,7 @@ and have a good knowledge of how @command{awk} works.
@cindex @code{getline} command, return values
@cindex @option{--sandbox} option, input redirection with @code{getline}
-The @code{getline} command returns one if it finds a record and zero if
+The @code{getline} command returns 1 if it finds a record and 0 if
it encounters the end of the file. If there is some error in getting
a record, such as a file that cannot be opened, then @code{getline}
returns @minus{}1. In this case, @command{gawk} sets the variable
@@ -7541,32 +7600,58 @@ finished processing the current record, but want to do some special
processing on the next record @emph{right now}. For example:
@example
+# Remove text between /* and */, inclusive
@{
- if ((t = index($0, "/*")) != 0) @{
- # value of `tmp' will be "" if t is 1
- tmp = substr($0, 1, t - 1)
- u = index(substr($0, t + 2), "*/")
- offset = t + 2
- while (u == 0) @{
- if (getline <= 0) @{
+ if ((i = index($0, "/*")) != 0) @{
+ out = substr($0, 1, i - 1) # leading part of the string
+ rest = substr($0, i + 2) # ... */ ...
+ j = index(rest, "*/") # is */ in trailing part?
+ if (j > 0) @{
+ rest = substr(rest, j + 2) # remove comment
+ @} else @{
+ while (j == 0) @{
+ # get more text
+ if (getline <= 0) @{
m = "unexpected EOF or error"
m = (m ": " ERRNO)
print m > "/dev/stderr"
exit
- @}
- u = index($0, "*/")
- offset = 0
- @}
- # substr() expression will be "" if */
- # occurred at end of line
- $0 = tmp substr($0, offset + u + 2)
- @}
- print $0
+ @}
+ # build up the line using string concatenation
+ rest = rest $0
+ j = index(rest, "*/") # is */ in trailing part?
+ if (j != 0) @{
+ rest = substr(rest, j + 2)
+ break
+ @}
+ @}
+ @}
+ # build up the output line using string concatenation
+ $0 = out rest
+ @}
+ print $0
@}
@end example
+@c 8/2014: Here is some sample input:
+@ignore
+mon/*comment*/key
+rab/*commen
+t*/bit
+horse /*comment*/more text
+part 1 /*comment*/part 2 /*comment*/part 3
+no comment
+@end ignore
+
This @command{awk} program deletes C-style comments (@samp{/* @dots{}
-*/}) from the input. By replacing the @samp{print $0} with other
+*/}) from the input.
+It uses a number of features we haven't covered yet, including
+string concatenation
+(@pxref{Concatenation})
+and the @code{index()} and @code{substr()} built-in
+functions
+(@pxref{String Functions}).
+By replacing the @samp{print $0} with other
statements, you could perform more complicated processing on the
decommented input, such as searching for matches of a regular
expression. (This program has a subtle problem---it does not work if one
@@ -7823,7 +7908,7 @@ Unfortunately, @command{gawk} has not been consistent in its treatment
of a construct like @samp{@w{"echo "} "date" | getline}.
Most versions, including the current version, treat it at as
@samp{@w{("echo "} "date") | getline}.
-(This how Brian Kernighan's @command{awk} behaves.)
+(This how BWK @command{awk} behaves.)
Some versions changed and treated it as
@samp{@w{"echo "} ("date" | getline)}.
(This is how @command{mawk} behaves.)
@@ -7973,7 +8058,7 @@ probably by accident, and you should reconsider what it is you're
trying to accomplish.
@item
-@ref{Getline Summary}, presents a table summarizing the
+@DBREF{Getline Summary} presents a table summarizing the
@code{getline} variants and which variables they can affect.
It is worth noting that those variants which do not use redirection
can cause @code{FILENAME} to be updated if they cause
@@ -8144,10 +8229,10 @@ a connection before it can start reading any data,
or the attempt to open a FIFO special file for reading can block
indefinitely until some other process opens it for writing.
-@node Command line directories
+@node Command-line directories
@section Directories On The Command Line
-@cindex differences in @command{awk} and @command{gawk}, command line directories
-@cindex directories, command line
+@cindex differences in @command{awk} and @command{gawk}, command-line directories
+@cindex directories, command-line
@cindex command line, directories on
According to the POSIX standard, files named on the @command{awk}
@@ -8240,6 +8325,7 @@ Directories on the command line are fatal for standard @command{awk};
@end itemize
+@c EXCLUDE START
@node Input Exercises
@section Exercises
@@ -8256,9 +8342,10 @@ including abstentions, for each item.
comments (@samp{/* @dots{} */}) from the input. That program
does not work if one comment ends on one line and another one
starts later on the same line.
-Write a program that does handle multiple comments on the line.
+That can be fixed by making one simple change. What is it?
@end enumerate
+@c EXCLUDE END
@node Printing
@chapter Printing Output
@@ -8300,7 +8387,7 @@ and discusses the @code{close()} built-in function.
descriptors.
* Close Files And Pipes:: Closing Input and Output Files and Pipes.
* Output Summary:: Output summary.
-* Output exercises:: Exercises.
+* Output Exercises:: Exercises.
@end menu
@node Print
@@ -8337,6 +8424,10 @@ double-quote characters, your text is taken as an @command{awk}
expression, and you will probably get an error. Keep in mind that a
space is printed between any two items.
+Note that the @code{print} statement is a statement and not an
+expression---you can't use it the pattern part of a pattern-action
+statement, for example.
+
@node Print Examples
@section @code{print} Statement Examples
@@ -9272,7 +9363,7 @@ It then sends the list to the shell for execution.
@c ENDOFRANGE reout
@node Special Files
-@section Special @value{FFN} in @command{gawk}
+@section Special @value{FFN}s in @command{gawk}
@c STARTOFRANGE gfn
@cindex @command{gawk}, file names in
@@ -9319,7 +9410,8 @@ print "Serious error detected!" | "cat 1>&2"
@noindent
This works by opening a pipeline to a shell command that can access the
standard error stream that it inherits from the @command{awk} process.
-This is far from elegant, and it is also inefficient, because it requires a
+@c 8/2014: Mike Brennan says not to cite this as inefficient. So, fixed.
+This is far from elegant, and it also requires a
separate process. So people writing @command{awk} programs often
don't do this. Instead, they send the error messages to the
screen, like this:
@@ -9706,7 +9798,8 @@ communications.
@end itemize
-@node Output exercises
+@c EXCLUDE START
+@node Output Exercises
@section Exercises
@enumerate
@@ -9735,6 +9828,7 @@ BEGIN @{ print "Serious error detected!" > /dev/stderr @}
@end example
@end enumerate
+@c EXCLUDE END
@c ENDOFRANGE prnt
@@ -9949,7 +10043,8 @@ A regexp constant is a regular expression description enclosed in
slashes, such as @code{@w{/^beginning and end$/}}. Most regexps used in
@command{awk} programs are constant, but the @samp{~} and @samp{!~}
matching operators can also match computed or dynamic regexps
-(which are just ordinary strings or variables that contain a regexp).
+(which are typically just ordinary strings or variables that contain a regexp,
+but could be a more complex expression).
@c ENDOFRANGE cnst
@node Using Constant Regexps
@@ -10055,7 +10150,7 @@ function mysub(pat, repl, str, global)
@c @cindex automatic warnings
@c @cindex warnings, automatic
In this example, the programmer wants to pass a regexp constant to the
-user-defined function @code{mysub}, which in turn passes it on to
+user-defined function @code{mysub()}, which in turn passes it on to
either @code{sub()} or @code{gsub()}. However, what really happens is that
the @code{pat} parameter is either one or zero, depending upon whether
or not @code{$0} matches @code{/hi/}.
@@ -10076,7 +10171,7 @@ on the @command{awk} command line.
@menu
* Using Variables:: Using variables in your programs.
-* Assignment Options:: Setting variables on the command-line and a
+* Assignment Options:: Setting variables on the command line and a
summary of command-line syntax. This is an
advanced method of input.
@end menu
@@ -10534,7 +10629,7 @@ print "something meaningful" > file name
@cindex @command{mawk} utility
@noindent
This produces a syntax error with some versions of Unix
-@command{awk}.@footnote{It happens that Brian Kernighan's
+@command{awk}.@footnote{It happens that BWK
@command{awk}, @command{gawk} and @command{mawk} all ``get it right,''
but you should not rely on this.}
It is necessary to use the following:
@@ -10619,7 +10714,7 @@ Otherwise, it's parsed as follows:
@end display
As mentioned earlier,
-when doing concatenation, @emph{parenthesize}. Otherwise,
+when mixing concatenation with other operators, @emph{parenthesize}. Otherwise,
you're never quite sure what you'll get.
@node Assignment Ops
@@ -10872,7 +10967,7 @@ A workaround is:
awk '/[=]=/' /dev/null
@end example
-@command{gawk} does not have this problem; Brian Kernighan's @command{awk}
+@command{gawk} does not have this problem; BWK @command{awk}
and @command{mawk} also do not (@pxref{Other Versions}).
@end sidebar
@c ENDOFRANGE exas
@@ -11105,19 +11200,14 @@ compares variables.
@cindex numeric, strings
@cindex strings, numeric
@cindex POSIX @command{awk}, numeric strings and
-The 1992 POSIX standard introduced
+The POSIX standard introduced
the concept of a @dfn{numeric string}, which is simply a string that looks
like a number---for example, @code{@w{" +2"}}. This concept is used
for determining the type of a variable.
The type of the variable is important because the types of two variables
determine how they are compared.
+Variable typing follows these rules:
-The various versions of the POSIX standard did not get the rules
-quite right for several editions. Fortunately, as of at least the
-2008 standard (and possibly earlier), the standard has been fixed,
-and variable typing follows these rules:@footnote{@command{gawk} has
-followed these rules for many years,
-and it is gratifying that the POSIX standard is also now correct.}
@itemize @value{BULLET}
@item
@@ -11270,7 +11360,7 @@ made of characters and is therefore also a string.
Thus, for example, the string constant @w{@code{" +3.14"}},
when it appears in program source code,
is a string---even though it looks numeric---and
-is @emph{never} treated as number for comparison
+is @emph{never} treated as a number for comparison
purposes.
In short, when one operand is a ``pure'' string, such as a string
@@ -11587,7 +11677,7 @@ is ``short-circuited'' if the result can be determined part way through
its evaluation.
@cindex line continuations
-Statements that use @samp{&&} or @samp{||} can be continued simply
+Statements that end with @samp{&&} or @samp{||} can be continued simply
by putting a newline after them. But you cannot put a newline in front
of either of these operators without using backslash continuation
(@pxref{Statements/Lines}).
@@ -11606,7 +11696,7 @@ program is one way to print lines in between special bracketing lines:
@example
$1 == "START" @{ interested = ! interested; next @}
-interested == 1 @{ print @}
+interested @{ print @}
$1 == "END" @{ interested = ! interested; next @}
@end example
@@ -11626,6 +11716,16 @@ bogus input data, but the point is to illustrate the use of `!',
so we'll leave well enough alone.
@end ignore
+Most commonly, the @samp{!} operator is used in the conditions of
+@code{if} and @code{while} statements, where it often makes more
+sense to phrase the logic in the negative:
+
+@example
+if (! @var{some condition} || @var{some other condition}) @{
+ @var{@dots{} do whatever processing @dots{}}
+@}
+@end example
+
@cindex @code{next} statement
@quotation NOTE
The @code{next} statement is discussed in
@@ -12246,7 +12346,7 @@ Contrast this with the following regular expression match, which
accepts any record with a first field that contains @samp{li}:
@example
-$ @kbd{awk '$1 ~ /foo/ @{ print $2 @}' mail-list}
+$ @kbd{awk '$1 ~ /li/ @{ print $2 @}' mail-list}
@print{} 555-5553
@print{} 555-6699
@end example
@@ -12518,7 +12618,7 @@ rule. It contains the number of fields from the last input record.
Most probably due to an oversight, the standard does not say that @code{$0}
is also preserved, although logically one would think that it should be.
In fact, @command{gawk} does preserve the value of @code{$0} for use in
-@code{END} rules. Be aware, however, that Brian Kernighan's @command{awk}, and possibly
+@code{END} rules. Be aware, however, that BWK @command{awk}, and possibly
other implementations, do not.
The third point follows from the first two. The meaning of @samp{print}
@@ -13157,31 +13257,38 @@ case is made, the case statement bodies execute until a @code{break},
or the end of the @code{switch} statement itself. For example:
@example
-switch (NR * 2 + 1) @{
-case 3:
-case "11":
- print NR - 1
- break
-
-case /2[[:digit:]]+/:
- print NR
-
-default:
- print NR + 1
-
-case -1:
- print NR * -1
+while ((c = getopt(ARGC, ARGV, "aksx")) != -1) @{
+ switch (c) @{
+ case "a":
+ # report size of all files
+ all_files = TRUE;
+ break
+ case "k":
+ BLOCK_SIZE = 1024 # 1K block size
+ break
+ case "s":
+ # do sums only
+ sum_only = TRUE
+ break
+ case "x":
+ # don't cross filesystems
+ fts_flags = or(fts_flags, FTS_XDEV)
+ break
+ case "?":
+ default:
+ usage()
+ break
+ @}
@}
@end example
Note that if none of the statements specified above halt execution
of a matched @code{case} statement, execution falls through to the
-next @code{case} until execution halts. In the above example, for
-any case value starting with @samp{2} followed by one or more digits,
-the @code{print} statement is executed and then falls through into the
-@code{default} section, executing its @code{print} statement. In turn,
-the @minus{}1 case will also be executed since the @code{default} does
-not halt execution.
+next @code{case} until execution halts. In the above example, the
+@code{case} for @code{"?"} falls through to the @code{default}
+case, which is to call a function named @code{usage()}.
+(The @code{getopt()} function being called here is
+described in @ref{Getopt Function}.)
@node Break Statement
@subsection The @code{break} Statement
@@ -13255,7 +13362,7 @@ historical implementations of @command{awk} treated the @code{break}
statement outside of a loop as if it were a @code{next} statement
(@pxref{Next Statement}).
@value{DARKCORNER}
-Recent versions of Brian Kernighan's @command{awk} no longer allow this usage,
+Recent versions of BWK @command{awk} no longer allow this usage,
nor does @command{gawk}.
@node Continue Statement
@@ -13304,7 +13411,8 @@ BEGIN @{
@end example
@noindent
-This program loops forever once @code{x} reaches 5.
+This program loops forever once @code{x} reaches 5, since
+the increment (@samp{x++}) is never reached.
@c @cindex @code{continue}, outside of loops
@c @cindex historical features
@@ -13321,7 +13429,7 @@ statement outside a loop: as if it were a @code{next}
statement
(@pxref{Next Statement}).
@value{DARKCORNER}
-Recent versions of Brian Kernighan's @command{awk} no longer work this way, nor
+Recent versions of BWK @command{awk} no longer work this way, nor
does @command{gawk}.
@node Next Statement
@@ -13410,7 +13518,8 @@ starts over with the first rule in the program.
If the @code{nextfile} statement causes the end of the input to be reached,
then the code in any @code{END} rules is executed. An exception to this is
when @code{nextfile} is invoked during execution of any statement in an
-@code{END} rule; In this case, it causes the program to stop immediately. @xref{BEGIN/END}.
+@code{END} rule; in this case, it causes the program to stop immediately.
+@xref{BEGIN/END}.
The @code{nextfile} statement is useful when there are many @value{DF}s
to process but it isn't necessary to process every record in every file.
@@ -13420,13 +13529,10 @@ would have to continue scanning the unwanted records. The @code{nextfile}
statement accomplishes this much more efficiently.
In @command{gawk}, execution of @code{nextfile} causes additional things
-to happen:
-any @code{ENDFILE} rules are executed except in the case as
-mentioned below,
-@code{ARGIND} is incremented,
-and
-any @code{BEGINFILE} rules are executed.
-(@code{ARGIND} hasn't been introduced yet. @xref{Built-in Variables}.)
+to happen: any @code{ENDFILE} rules are executed if @command{gawk} is
+not currently in an @code{END} or @code{BEGINFILE} rule, @code{ARGIND} is
+incremented, and any @code{BEGINFILE} rules are executed. (@code{ARGIND}
+hasn't been introduced yet. @xref{Built-in Variables}.)
With @command{gawk}, @code{nextfile} is useful inside a @code{BEGINFILE}
rule to skip over a file that would otherwise cause @command{gawk}
@@ -13450,7 +13556,7 @@ See @uref{http://austingroupbugs.net/view.php?id=607, the Austin Group website}.
@cindex @code{nextfile} statement, user-defined functions and
@cindex Brian Kernighan's @command{awk}
@cindex @command{mawk} utility
-The current version of the Brian Kernighan's @command{awk}, and @command{mawk} (@pxref{Other
+The current version of BWK @command{awk}, and @command{mawk} (@pxref{Other
Versions}) also support @code{nextfile}. However, they don't allow the
@code{nextfile} statement inside function bodies (@pxref{User-defined}).
@command{gawk} does; a @code{nextfile} inside a function body reads the
@@ -13959,7 +14065,7 @@ current record. @xref{Changing Fields}.
@cindex differences in @command{awk} and @command{gawk}, @code{FUNCTAB} variable
@item @code{FUNCTAB #}
An array whose indices and corresponding values are the names of all
-the user-defined or extension functions in the program.
+the built-in, user-defined and extension functions in the program.
@quotation NOTE
Attempting to use the @code{delete} statement with the @code{FUNCTAB}
@@ -14007,9 +14113,12 @@ text of the AWK program. For each identifier, the value of the element is one o
@item "array"
The identifier is an array.
+@item "builtin"
+The identifier is a built-in function.
+
@item "extension"
The identifier is an extension function loaded via
-@code{@@load}.
+@code{@@load} or @option{-l}.
@item "scalar"
The identifier is a scalar.
@@ -14243,7 +14352,7 @@ changed.
@cindex arguments, command-line
@cindex command line, arguments
-@ref{Auto-set},
+@DBREF{Auto-set}
presented the following program describing the information contained in @code{ARGC}
and @code{ARGV}:
@@ -14316,8 +14425,17 @@ before actual processing of the input begins.
@xref{Split Program}, and see
@ref{Tee Program}, for examples
of each way of removing elements from @code{ARGV}.
+
+To actually get options into an @command{awk} program,
+end the @command{awk} options with @option{--} and then supply
+the @command{awk} program's options, in the following manner:
+
+@example
+awk -f myprog.awk -- -v -q file1 file2 @dots{}
+@end example
+
The following fragment processes @code{ARGV} in order to examine, and
-then remove, command-line options:
+then remove, the above command-line options:
@example
BEGIN @{
@@ -14337,32 +14455,24 @@ BEGIN @{
@}
@end example
-To actually get the options into the @command{awk} program,
-end the @command{awk} options with @option{--} and then supply
-the @command{awk} program's options, in the following manner:
-
-@example
-awk -f myprog -- -v -q file1 file2 @dots{}
-@end example
-
@cindex differences in @command{awk} and @command{gawk}, @code{ARGC}/@code{ARGV} variables
-This is not necessary in @command{gawk}. Unless @option{--posix} has
+Ending the @command{awk} options with @option{--} isn't
+necessary in @command{gawk}. Unless @option{--posix} has
been specified, @command{gawk} silently puts any unrecognized options
into @code{ARGV} for the @command{awk} program to deal with. As soon
as it sees an unknown option, @command{gawk} stops looking for other
-options that it might otherwise recognize. The previous example with
+options that it might otherwise recognize. The previous command line with
@command{gawk} would be:
@example
-gawk -f myprog -q -v file1 file2 @dots{}
+gawk -f myprog.awk -q -v file1 file2 @dots{}
@end example
@noindent
-Because @option{-q} is not a valid @command{gawk} option,
-it and the following @option{-v}
-are passed on to the @command{awk} program.
-(@xref{Getopt Function}, for an @command{awk} library function
-that parses command-line options.)
+Because @option{-q} is not a valid @command{gawk} option, it and the
+following @option{-v} are passed on to the @command{awk} program.
+(@xref{Getopt Function}, for an @command{awk} library function that
+parses command-line options.)
@node Pattern Action Summary
@section Summary
@@ -14617,7 +14727,10 @@ array element value:
@end docbook
@noindent
-The pairs are shown in jumbled order because their order is irrelevant.
+The pairs are shown in jumbled order because their order is
+irrelevant.@footnote{The ordering will vary among @command{awk}
+implementations, which typically use hash tables to store array elements
+and values.}
One advantage of associative arrays is that new pairs can be added
at any time. For example, suppose a tenth element is added to the array
@@ -14739,8 +14852,9 @@ English to French:
Here we decided to translate the number one in both spelled-out and
numeric form---thus illustrating that a single array can have both
numbers and strings as indices.
-(In fact, array subscripts are always strings; this is discussed
-in more detail in
+(In fact, array subscripts are always strings.
+There are some subtleties to how numbers work when used as
+array subscripts; this is discussed in more detail in
@ref{Numeric Array Subscripts}.)
Here, the number @code{1} isn't double-quoted, since @command{awk}
automatically converts it to a string.
@@ -14807,8 +14921,9 @@ if (a["foo"] != "") @dots{}
@end example
@noindent
-This is incorrect, since this will @emph{create} @code{a["foo"]}
-if it didn't exist before!
+This is incorrect for two reasons. First, it @emph{creates} @code{a["foo"]}
+if it didn't exist before! Second, it is valid (if a bit unusual) to set
+an array element equal to the empty string.
@end quotation
@c @cindex arrays, @code{in} operator and
@@ -14826,6 +14941,8 @@ This expression tests whether the particular index @var{indx} exists,
without the side effect of creating that element if it is not present.
The expression has the value one (true) if @code{@var{array}[@var{indx}]}
exists and zero (false) if it does not exist.
+(We use @var{indx} here, since @samp{index} is the name of a built-in
+function.)
For example, this statement tests whether the array @code{frequencies}
contains the index @samp{2}:
@@ -15033,7 +15150,7 @@ $ @kbd{gawk -f loopcheck.awk}
@print{} is
@end example
-Contrast this to Brian Kernighan's @command{awk}:
+Contrast this to BWK @command{awk}:
@example
$ @kbd{nawk -f loopcheck.awk}
@@ -15278,7 +15395,7 @@ using @code{delete} without a subscript was a @command{gawk} extension.
As of September, 2012, it was accepted for
inclusion into the POSIX standard. See @uref{http://austingroupbugs.net/view.php?id=544,
the Austin Group website}. This form of the @code{delete} statement is also supported
-by Brian Kernighan's @command{awk} and @command{mawk}, as well as
+by BWK @command{awk} and @command{mawk}, as well as
by a number of other implementations (@pxref{Other Versions}).
@end quotation
@@ -15394,7 +15511,7 @@ $ @kbd{echo 'line 1}
> @kbd{line 2}
> @kbd{line 3' | awk '@{ l[lines] = $0; ++lines @}}
> @kbd{END @{}
-> @kbd{for (i = lines-1; i >= 0; --i)}
+> @kbd{for (i = lines - 1; i >= 0; i--)}
> @kbd{print l[i]}
> @kbd{@}'}
@print{} line 3
@@ -15418,7 +15535,7 @@ The following version of the program works correctly:
@example
@{ l[lines++] = $0 @}
END @{
- for (i = lines - 1; i >= 0; --i)
+ for (i = lines - 1; i >= 0; i--)
print l[i]
@}
@end example
@@ -15492,10 +15609,11 @@ used for single dimensional arrays. Write the whole sequence of indices
in parentheses, separated by commas, as the left operand:
@example
-(@var{subscript1}, @var{subscript2}, @dots{}) in @var{array}
+if ((@var{subscript1}, @var{subscript2}, @dots{}) in @var{array})
+ @dots{}
@end example
-The following example treats its input as a two-dimensional array of
+Here is an example that treats its input as a two-dimensional array of
fields; it rotates this array 90 degrees clockwise and prints the
result. It assumes that all lines have the same number of
elements:
@@ -15968,7 +16086,9 @@ is @minus{}3, and @code{int(-3)} is @minus{}3 as well.
@cindexawkfunc{log}
@cindex logarithm
Return the natural logarithm of @var{x}, if @var{x} is positive;
-otherwise, report an error.
+otherwise, return @code{NaN} (``not a number'') on IEEE 754 systems.
+Additionally, @command{gawk} prints a warning message when @code{x}
+is negative.
@item @code{rand()}
@cindexawkfunc{rand}
@@ -16067,6 +16187,9 @@ numbers that are truly unpredictable.
The return value of @code{srand()} is the previous seed. This makes it
easy to keep track of the seeds in case you need to consistently reproduce
sequences of random numbers.
+
+POSIX does not specify the initial seed; it differs among @command{awk}
+implementations.
@end table
@node String Functions
@@ -16742,7 +16865,7 @@ in the string, counting from character @var{start}.
@cindex Brian Kernighan's @command{awk}
If @var{start} is less than one, @code{substr()} treats it as
if it was one. (POSIX doesn't specify what to do in this case:
-Brian Kernighan's @command{awk} acts this way, and therefore @command{gawk}
+BWK @command{awk} acts this way, and therefore @command{gawk}
does too.)
If @var{start} is greater than the number of characters
in the string, @code{substr()} returns the null string.
@@ -16811,6 +16934,12 @@ Nonalphabetic characters are left unchanged. For example,
@cindex backslash (@code{\}), @code{gsub()}/@code{gensub()}/@code{sub()} functions and
@cindex @code{&} (ampersand), @code{gsub()}/@code{gensub()}/@code{sub()} functions and
@cindex ampersand (@code{&}), @code{gsub()}/@code{gensub()}/@code{sub()} functions and
+
+@quotation CAUTION
+This section has been known to cause headaches.
+You might want to skip it upon first reading.
+@end quotation
+
When using @code{sub()}, @code{gsub()}, or @code{gensub()}, and trying to get literal
backslashes and ampersands into the replacement text, you need to remember
that there are several levels of @dfn{escape processing} going on.
@@ -16828,7 +16957,7 @@ escape sequences listed in @ref{Escape Sequences}.
Thus, for every @samp{\} that @command{awk} processes at the runtime
level, you must type two backslashes at the lexical level.
When a character that is not valid for an escape sequence follows the
-@samp{\}, Brian Kernighan's @command{awk} and @command{gawk} both simply remove the initial
+@samp{\}, BWK @command{awk} and @command{gawk} both simply remove the initial
@samp{\} and put the next character into the string. Thus, for
example, @code{"a\qb"} is treated as @code{"aqb"}.
@@ -16853,26 +16982,26 @@ through unchanged. This is illustrated in @ref{table-sub-escapes}.
_halign{_hfil#!_qquad_hfil#!_qquad#_hfil_cr
You type!@code{sub()} sees!@code{sub()} generates_cr
_hrulefill!_hrulefill!_hrulefill_cr
- @code{\&}! @code{&}!the matched text_cr
- @code{\\&}! @code{\&}!a literal @samp{&}_cr
- @code{\\\&}! @code{\&}!a literal @samp{&}_cr
- @code{\\\\&}! @code{\\&}!a literal @samp{\&}_cr
- @code{\\\\\&}! @code{\\&}!a literal @samp{\&}_cr
-@code{\\\\\\&}! @code{\\\&}!a literal @samp{\\&}_cr
- @code{\\q}! @code{\q}!a literal @samp{\q}_cr
+ @code{\&}! @code{&}!The matched text_cr
+ @code{\\&}! @code{\&}!A literal @samp{&}_cr
+ @code{\\\&}! @code{\&}!A literal @samp{&}_cr
+ @code{\\\\&}! @code{\\&}!A literal @samp{\&}_cr
+ @code{\\\\\&}! @code{\\&}!A literal @samp{\&}_cr
+@code{\\\\\\&}! @code{\\\&}!A literal @samp{\\&}_cr
+ @code{\\q}! @code{\q}!A literal @samp{\q}_cr
}
_bigskip}
@end tex
@ifdocbook
@multitable @columnfractions .20 .20 .60
@headitem You type @tab @code{sub()} sees @tab @code{sub()} generates
-@item @code{\&} @tab @code{&} @tab the matched text
-@item @code{\\&} @tab @code{\&} @tab a literal @samp{&}
-@item @code{\\\&} @tab @code{\&} @tab a literal @samp{&}
-@item @code{\\\\&} @tab @code{\\&} @tab a literal @samp{\&}
-@item @code{\\\\\&} @tab @code{\\&} @tab a literal @samp{\&}
-@item @code{\\\\\\&} @tab @code{\\\&} @tab a literal @samp{\\&}
-@item @code{\\q} @tab @code{\q} @tab a literal @samp{\q}
+@item @code{\&} @tab @code{&} @tab The matched text
+@item @code{\\&} @tab @code{\&} @tab A literal @samp{&}
+@item @code{\\\&} @tab @code{\&} @tab A literal @samp{&}
+@item @code{\\\\&} @tab @code{\\&} @tab A literal @samp{\&}
+@item @code{\\\\\&} @tab @code{\\&} @tab A literal @samp{\&}
+@item @code{\\\\\\&} @tab @code{\\\&} @tab A literal @samp{\\&}
+@item @code{\\q} @tab @code{\q} @tab A literal @samp{\q}
@end multitable
@end ifdocbook
@ifnottex
@@ -16880,13 +17009,13 @@ _bigskip}
@display
You type @code{sub()} sees @code{sub()} generates
-------- ---------- ---------------
- @code{\&} @code{&} the matched text
- @code{\\&} @code{\&} a literal @samp{&}
- @code{\\\&} @code{\&} a literal @samp{&}
- @code{\\\\&} @code{\\&} a literal @samp{\&}
- @code{\\\\\&} @code{\\&} a literal @samp{\&}
-@code{\\\\\\&} @code{\\\&} a literal @samp{\\&}
- @code{\\q} @code{\q} a literal @samp{\q}
+ @code{\&} @code{&} The matched text
+ @code{\\&} @code{\&} A literal @samp{&}
+ @code{\\\&} @code{\&} A literal @samp{&}
+ @code{\\\\&} @code{\\&} A literal @samp{\&}
+ @code{\\\\\&} @code{\\&} A literal @samp{\&}
+@code{\\\\\\&} @code{\\\&} A literal @samp{\\&}
+ @code{\\q} @code{\q} A literal @samp{\q}
@end display
@end ifnotdocbook
@end ifnottex
@@ -16902,86 +17031,19 @@ case of even numbers of backslashes entered at the lexical level.)
The problem with the historical approach is that there is no way to get
a literal @samp{\} followed by the matched text.
-@c @cindex @command{awk} language, POSIX version
-@cindex POSIX @command{awk}, functions and, @code{gsub()}/@code{sub()}
-The 1992 POSIX standard attempted to fix this problem. That standard
-says that @code{sub()} and @code{gsub()} look for either a @samp{\} or an @samp{&}
-after the @samp{\}. If either one follows a @samp{\}, that character is
-output literally. The interpretation of @samp{\} and @samp{&} then becomes
-as shown in @ref{table-sub-posix-92}.
-
-@float Table,table-sub-posix-92
-@caption{1992 POSIX Rules for @code{sub()} and @code{gsub()} Escape Sequence Processing}
-@c thanks to Karl Berry for formatting this table
-@tex
-\vbox{\bigskip
-% We need more characters for escape and tab ...
-\catcode`_ = 0
-\catcode`! = 4
-% ... since this table has lots of &'s and \'s, so we unspecialize them.
-\catcode`\& = \other \catcode`\\ = \other
-_halign{_hfil#!_qquad_hfil#!_qquad#_hfil_cr
- You type!@code{sub()} sees!@code{sub()} generates_cr
-_hrulefill!_hrulefill!_hrulefill_cr
- @code{&}! @code{&}!the matched text_cr
- @code{\\&}! @code{\&}!a literal @samp{&}_cr
-@code{\\\\&}! @code{\\&}!a literal @samp{\}, then the matched text_cr
-@code{\\\\\\&}! @code{\\\&}!a literal @samp{\&}_cr
-}
-_bigskip}
-@end tex
-@ifdocbook
-@multitable @columnfractions .20 .20 .60
-@headitem You type @tab @code{sub()} sees @tab @code{sub()} generates
-@item @code{&} @tab @code{&} @tab the matched text
-@item @code{\\&} @tab @code{\&} @tab a literal @samp{&}
-@item @code{\\\\&} @tab @code{\\&} @tab a literal @samp{\}, then the matched text
-@item @code{\\\\\\&} @tab @code{\\\&} @tab a literal @samp{\&}
-@end multitable
-@end ifdocbook
-@ifnottex
-@ifnotdocbook
-@display
- You type @code{sub()} sees @code{sub()} generates
- -------- ---------- ---------------
- @code{&} @code{&} the matched text
- @code{\\&} @code{\&} a literal @samp{&}
- @code{\\\\&} @code{\\&} a literal @samp{\}, then the matched text
-@code{\\\\\\&} @code{\\\&} a literal @samp{\&}
-@end display
-@end ifnotdocbook
-@end ifnottex
-@end float
-
-@noindent
-This appears to solve the problem.
-Unfortunately, the phrasing of the standard is unusual. It
-says, in effect, that @samp{\} turns off the special meaning of any
-following character, but for anything other than @samp{\} and @samp{&},
-such special meaning is undefined. This wording leads to two problems:
+Several editions of the POSIX standard attempted to fix this problem
+but weren't successful. The details are irrelevant at this point in time.
-@itemize @value{BULLET}
-@item
-Backslashes must now be doubled in the @var{replacement} string, breaking
-historical @command{awk} programs.
-
-@item
-To make sure that an @command{awk} program is portable, @emph{every} character
-in the @var{replacement} string must be preceded with a
-backslash.@footnote{This consequence was certainly unintended.}
-@c I can say that, 'cause I was involved in making this change
-@end itemize
-
-Because of the problems just listed,
-in 1996, the @command{gawk} maintainer submitted
+At one point, the @command{gawk} maintainer submitted
proposed text for a revised standard that
reverts to rules that correspond more closely to the original existing
practice. The proposed rules have special cases that make it possible
-to produce a @samp{\} preceding the matched text. This is shown in
+to produce a @samp{\} preceding the matched text.
+This is shown in
@ref{table-sub-proposed}.
@float Table,table-sub-proposed
-@caption{Proposed Rules For @code{sub()} And Backslash}
+@caption{GNU @command{awk} Rules For @code{sub()} And Backslash}
@tex
\vbox{\bigskip
% We need more characters for escape and tab ...
@@ -16992,10 +17054,10 @@ to produce a @samp{\} preceding the matched text. This is shown in
_halign{_hfil#!_qquad_hfil#!_qquad#_hfil_cr
You type!@code{sub()} sees!@code{sub()} generates_cr
_hrulefill!_hrulefill!_hrulefill_cr
-@code{\\\\\\&}! @code{\\\&}!a literal @samp{\&}_cr
-@code{\\\\&}! @code{\\&}!a literal @samp{\}, followed by the matched text_cr
- @code{\\&}! @code{\&}!a literal @samp{&}_cr
- @code{\\q}! @code{\q}!a literal @samp{\q}_cr
+@code{\\\\\\&}! @code{\\\&}!A literal @samp{\&}_cr
+@code{\\\\&}! @code{\\&}!A literal @samp{\}, followed by the matched text_cr
+ @code{\\&}! @code{\&}!A literal @samp{&}_cr
+ @code{\\q}! @code{\q}!A literal @samp{\q}_cr
@code{\\\\}! @code{\\}!@code{\\}_cr
}
_bigskip}
@@ -17003,10 +17065,10 @@ _bigskip}
@ifdocbook
@multitable @columnfractions .20 .20 .60
@headitem You type @tab @code{sub()} sees @tab @code{sub()} generates
-@item @code{\\\\\\&} @tab @code{\\\&} @tab a literal @samp{\&}
-@item @code{\\\\&} @tab @code{\\&} @tab a literal @samp{\}, followed by the matched text
-@item @code{\\&} @tab @code{\&} @tab a literal @samp{&}
-@item @code{\\q} @tab @code{\q} @tab a literal @samp{\q}
+@item @code{\\\\\\&} @tab @code{\\\&} @tab A literal @samp{\&}
+@item @code{\\\\&} @tab @code{\\&} @tab A literal @samp{\}, followed by the matched text
+@item @code{\\&} @tab @code{\&} @tab A literal @samp{&}
+@item @code{\\q} @tab @code{\q} @tab A literal @samp{\q}
@item @code{\\\\} @tab @code{\\} @tab @code{\\}
@end multitable
@end ifdocbook
@@ -17015,10 +17077,10 @@ _bigskip}
@display
You type @code{sub()} sees @code{sub()} generates
-------- ---------- ---------------
-@code{\\\\\\&} @code{\\\&} a literal @samp{\&}
- @code{\\\\&} @code{\\&} a literal @samp{\}, followed by the matched text
- @code{\\&} @code{\&} a literal @samp{&}
- @code{\\q} @code{\q} a literal @samp{\q}
+@code{\\\\\\&} @code{\\\&} A literal @samp{\&}
+ @code{\\\\&} @code{\\&} A literal @samp{\}, followed by the matched text
+ @code{\\&} @code{\&} A literal @samp{&}
+ @code{\\q} @code{\q} A literal @samp{\q}
@code{\\\\} @code{\\} @code{\\}
@end display
@end ifnotdocbook
@@ -17031,13 +17093,13 @@ there was only one. However, as in the historical case, any @samp{\} that
is not part of one of these three sequences is not special and appears
in the output literally.
-@command{gawk} 3.0 and 3.1 follow these proposed POSIX rules for @code{sub()} and
-@code{gsub()}.
-@c As much as we think it's a lousy idea. You win some, you lose some. Sigh.
-The POSIX standard took much longer to be revised than was expected in 1996.
-The 2001 standard does not follow the above rules. Instead, the rules
-there are somewhat simpler. The results are similar except for one case.
+@command{gawk} 3.0 and 3.1 follow these rules for @code{sub()} and
+@code{gsub()}. The POSIX standard took much longer to be revised than
+was expected. In addition, the @command{gawk} maintainer's proposal was
+lost during the standardization process. The final rules are
+somewhat simpler. The results are similar except for one case.
+@cindex POSIX @command{awk}, functions and, @code{gsub()}/@code{sub()}
The POSIX rules state that @samp{\&} in the replacement string produces
a literal @samp{&}, @samp{\\} produces a literal @samp{\}, and @samp{\} followed
by anything else is not special; the @samp{\} is placed straight into the output.
@@ -17055,10 +17117,10 @@ These rules are presented in @ref{table-posix-sub}.
_halign{_hfil#!_qquad_hfil#!_qquad#_hfil_cr
You type!@code{sub()} sees!@code{sub()} generates_cr
_hrulefill!_hrulefill!_hrulefill_cr
-@code{\\\\\\&}! @code{\\\&}!a literal @samp{\&}_cr
-@code{\\\\&}! @code{\\&}!a literal @samp{\}, followed by the matched text_cr
- @code{\\&}! @code{\&}!a literal @samp{&}_cr
- @code{\\q}! @code{\q}!a literal @samp{\q}_cr
+@code{\\\\\\&}! @code{\\\&}!A literal @samp{\&}_cr
+@code{\\\\&}! @code{\\&}!A literal @samp{\}, followed by the matched text_cr
+ @code{\\&}! @code{\&}!A literal @samp{&}_cr
+ @code{\\q}! @code{\q}!A literal @samp{\q}_cr
@code{\\\\}! @code{\\}!@code{\}_cr
}
_bigskip}
@@ -17066,10 +17128,10 @@ _bigskip}
@ifdocbook
@multitable @columnfractions .20 .20 .60
@headitem You type @tab @code{sub()} sees @tab @code{sub()} generates
-@item @code{\\\\\\&} @tab @code{\\\&} @tab a literal @samp{\&}
-@item @code{\\\\&} @tab @code{\\&} @tab a literal @samp{\}, followed by the matched text
-@item @code{\\&} @tab @code{\&} @tab a literal @samp{&}
-@item @code{\\q} @tab @code{\q} @tab a literal @samp{\q}
+@item @code{\\\\\\&} @tab @code{\\\&} @tab A literal @samp{\&}
+@item @code{\\\\&} @tab @code{\\&} @tab A literal @samp{\}, followed by the matched text
+@item @code{\\&} @tab @code{\&} @tab A literal @samp{&}
+@item @code{\\q} @tab @code{\q} @tab A literal @samp{\q}
@item @code{\\\\} @tab @code{\\} @tab @code{\}
@end multitable
@end ifdocbook
@@ -17078,10 +17140,10 @@ _bigskip}
@display
You type @code{sub()} sees @code{sub()} generates
-------- ---------- ---------------
-@code{\\\\\\&} @code{\\\&} a literal @samp{\&}
- @code{\\\\&} @code{\\&} a literal @samp{\}, followed by the matched text
- @code{\\&} @code{\&} a literal @samp{&}
- @code{\\q} @code{\q} a literal @samp{\q}
+@code{\\\\\\&} @code{\\\&} A literal @samp{\&}
+ @code{\\\\&} @code{\\&} A literal @samp{\}, followed by the matched text
+ @code{\\&} @code{\&} A literal @samp{&}
+ @code{\\q} @code{\q} A literal @samp{\q}
@code{\\\\} @code{\\} @code{\}
@end display
@end ifnotdocbook
@@ -17093,7 +17155,7 @@ is seen as @samp{\\} and produces @samp{\} instead of @samp{\\}.
Starting with @value{PVERSION} 3.1.4, @command{gawk} followed the POSIX rules
when @option{--posix} is specified (@pxref{Options}). Otherwise,
-it continued to follow the 1996 proposed rules, since
+it continued to follow the proposed rules, since
that had been its behavior for many years.
When @value{PVERSION} 4.0.0 was released, the @command{gawk} maintainer
@@ -17124,24 +17186,24 @@ as shown in @ref{table-gensub-escapes}.
_halign{_hfil#!_qquad_hfil#!_qquad#_hfil_cr
You type!@code{gensub()} sees!@code{gensub()} generates_cr
_hrulefill!_hrulefill!_hrulefill_cr
- @code{&}! @code{&}!the matched text_cr
- @code{\\&}! @code{\&}!a literal @samp{&}_cr
- @code{\\\\}! @code{\\}!a literal @samp{\}_cr
- @code{\\\\&}! @code{\\&}!a literal @samp{\}, then the matched text_cr
-@code{\\\\\\&}! @code{\\\&}!a literal @samp{\&}_cr
- @code{\\q}! @code{\q}!a literal @samp{q}_cr
+ @code{&}! @code{&}!The matched text_cr
+ @code{\\&}! @code{\&}!A literal @samp{&}_cr
+ @code{\\\\}! @code{\\}!A literal @samp{\}_cr
+ @code{\\\\&}! @code{\\&}!A literal @samp{\}, then the matched text_cr
+@code{\\\\\\&}! @code{\\\&}!A literal @samp{\&}_cr
+ @code{\\q}! @code{\q}!A literal @samp{q}_cr
}
_bigskip}
@end tex
@ifdocbook
@multitable @columnfractions .20 .20 .60
@headitem You type @tab @code{gensub()} sees @tab @code{gensub()} generates
-@item @code{&} @tab @code{&} @tab the matched text
-@item @code{\\&} @tab @code{\&} @tab a literal @samp{&}
-@item @code{\\\\} @tab @code{\\} @tab a literal @samp{\}
-@item @code{\\\\&} @tab @code{\\&} @tab a literal @samp{\}, then the matched text
-@item @code{\\\\\\&} @tab @code{\\\&} @tab a literal @samp{\&}
-@item @code{\\q} @tab @code{\q} @tab a literal @samp{q}
+@item @code{&} @tab @code{&} @tab The matched text
+@item @code{\\&} @tab @code{\&} @tab A literal @samp{&}
+@item @code{\\\\} @tab @code{\\} @tab A literal @samp{\}
+@item @code{\\\\&} @tab @code{\\&} @tab A literal @samp{\}, then the matched text
+@item @code{\\\\\\&} @tab @code{\\\&} @tab A literal @samp{\&}
+@item @code{\\q} @tab @code{\q} @tab A literal @samp{q}
@end multitable
@end ifdocbook
@ifnottex
@@ -17149,12 +17211,12 @@ _bigskip}
@display
You type @code{gensub()} sees @code{gensub()} generates
-------- ------------- ------------------
- @code{&} @code{&} the matched text
- @code{\\&} @code{\&} a literal @samp{&}
- @code{\\\\} @code{\\} a literal @samp{\}
- @code{\\\\&} @code{\\&} a literal @samp{\}, then the matched text
-@code{\\\\\\&} @code{\\\&} a literal @samp{\&}
- @code{\\q} @code{\q} a literal @samp{q}
+ @code{&} @code{&} The matched text
+ @code{\\&} @code{\&} A literal @samp{&}
+ @code{\\\\} @code{\\} A literal @samp{\}
+ @code{\\\\&} @code{\\&} A literal @samp{\}, then the matched text
+@code{\\\\\\&} @code{\\\&} A literal @samp{\&}
+ @code{\\q} @code{\q} A literal @samp{q}
@end display
@end ifnotdocbook
@end ifnottex
@@ -17236,7 +17298,7 @@ buffers its output and the @code{fflush()} function forces
@cindex extensions, common@comma{} @code{fflush()} function
@cindex Brian Kernighan's @command{awk}
-@code{fflush()} was added to Brian Kernighan's @command{awk} in
+@code{fflush()} was added to BWK @command{awk} in
April of 1992. For two decades, it was not part of the POSIX standard.
As of December, 2012, it was accepted for inclusion into the POSIX
standard.
@@ -18225,6 +18287,12 @@ them, i.e., to tell @command{awk} what they should do.
@node Definition Syntax
@subsection Function Definition Syntax
+@quotation
+It's entirely fair to say that the @command{awk} syntax for local
+variable definitions is appallingly awful.
+@author Brian Kernighan
+@end quotation
+
@c STARTOFRANGE fdef
@cindex functions, defining
Definitions of functions can appear anywhere between the rules of an
@@ -18264,7 +18332,7 @@ have a parameter with the same name as the function itself.
In addition, according to the POSIX standard, function parameters
cannot have the same name as one of the special built-in variables
(@pxref{Built-in Variables}). Not all versions of @command{awk} enforce
-this restriction.)
+this restriction.
Local variables act like the empty string if referenced where a string
value is required, and like zero if referenced where a numeric value
@@ -18394,7 +18462,8 @@ this program, using our function to format the results, prints:
21.2
@end example
-This function deletes all the elements in an array:
+This function deletes all the elements in an array (recall that the
+extra whitespace signifies the start of the local variable list):
@example
function delarray(a, i)
@@ -18417,17 +18486,18 @@ addition to the POSIX standard.)
The following is an example of a recursive function. It takes a string
as an input parameter and returns the string in backwards order.
Recursive functions must always have a test that stops the recursion.
-In this case, the recursion terminates when the starting position
-is zero, i.e., when there are no more characters left in the string.
+In this case, the recursion terminates when the input string is
+already empty.
+@c 8/2014: Thanks to Mike Brennan for the improved formulation
@cindex @code{rev()} user-defined function
@example
-function rev(str, start)
+function rev(str)
@{
- if (start == 0)
+ if (str == "")
return ""
- return (substr(str, start, 1) rev(str, start - 1))
+ return (rev(substr(str, 2)) substr(str, 1, 1))
@}
@end example
@@ -18436,7 +18506,7 @@ this way:
@example
$ @kbd{echo "Don't Panic!" |}
-> @kbd{gawk --source '@{ print rev($0, length($0)) @}' -f rev.awk}
+> @kbd{gawk -e '@{ print rev($0) @}' -f rev.awk}
@print{} !cinaP t'noD
@end example
@@ -18721,7 +18791,7 @@ BEGIN @{
@noindent
prints @samp{a[1] = 1, a[2] = two, a[3] = 3}, because
-@code{changeit} stores @code{"two"} in the second element of @code{a}.
+@code{changeit()} stores @code{"two"} in the second element of @code{a}.
@end quotation
@cindex undefined functions
@@ -18897,7 +18967,7 @@ being aware of them.
@cindex pointers to functions
@cindex differences in @command{awk} and @command{gawk}, indirect function calls
-This section describes a @command{gawk}-specific extension.
+This section describes an advanced, @command{gawk}-specific extension.
Often, you may wish to defer the choice of function to call until runtime.
For example, you may have different kinds of records, each of which
@@ -18943,8 +19013,11 @@ To process the data, you might write initially:
@noindent
This style of programming works, but can be awkward. With @dfn{indirect}
function calls, you tell @command{gawk} to use the @emph{value} of a
-variable as the name of the function to call.
+variable as the @emph{name} of the function to call.
+@cindex @code{@@}-notation for indirect function calls
+@cindex indirect function calls, @code{@@}-notation
+@cindex function calls, indirect, @code{@@}-notation for
The syntax is similar to that of a regular function call: an identifier
immediately followed by a left parenthesis, any arguments, and then
a closing right parenthesis, with the addition of a leading @samp{@@}
@@ -19002,7 +19075,6 @@ Otherwise they perform the expected computations and are not unusual.
@example
@c file eg/prog/indirectcall.awk
# For each record, print the class name and the requested statistics
-
@{
class_name = $1
gsub(/_/, " ", class_name) # Replace _ with spaces
@@ -19231,10 +19303,12 @@ $ @kbd{gawk -f quicksort.awk -f indirectcall.awk class_data2}
Remember that you must supply a leading @samp{@@} in front of an indirect function call.
-Unfortunately, indirect function calls cannot be used with the built-in functions. However,
-you can generally write ``wrapper'' functions which call the built-in ones, and those can
-be called indirectly. (Other than, perhaps, the mathematical functions, there is not a lot
-of reason to try to call the built-in functions indirectly.)
+Starting with @value{PVERSION} 4.1.2 of @command{gawk}, indirect function
+calls may also be used with built-in functions and with extension functions
+(@pxref{Dynamic Extensions}). The only thing you cannot do is pass a regular
+expression constant to a built-in function through an indirect function
+call.@footnote{This may change in a future version; recheck the documentation that
+comes with your version of @command{gawk} to see if it has.}
@command{gawk} does its best to make indirect function calls efficient.
For example, in the following case:
@@ -19245,7 +19319,7 @@ for (i = 1; i <= n; i++)
@end example
@noindent
-@code{gawk} will look up the actual function to call only once.
+@code{gawk} looks up the actual function to call only once.
@node Functions Summary
@section Summary
@@ -19285,6 +19359,8 @@ from the real parameters by extra whitespace.
User-defined functions may call other user-defined (and built-in)
functions and may call themselves recursively. Function parameters
``hide'' any global variables of the same names.
+You cannot use the name of a reserved variable (such as @code{ARGC})
+as the name of a parameter in user-defined functions.
@item
Scalar values are passed to user-defined functions by value. Array
@@ -19303,7 +19379,7 @@ either scalar or array.
@item
@command{gawk} provides indirect function calls using a special syntax.
-By setting a variable to the name of a user-defined function, you can
+By setting a variable to the name of a function, you can
determine at runtime what function will be called at that point in the
program. This is equivalent to function pointers in C and C++.
@@ -19338,7 +19414,7 @@ It contains the following chapters:
@c STARTOFRANGE fudlib
@cindex functions, user-defined, library of
-@ref{User-defined}, describes how to write
+@DBREF{User-defined} describes how to write
your own @command{awk} functions. Writing functions is important, because
it allows you to encapsulate algorithms and program tasks in a single
place. It simplifies programming, making program development more
@@ -19362,7 +19438,7 @@ of good programs leads to better writing.
In fact, they felt this idea was so important that they placed this
statement on the cover of their book. Because we believe strongly
that their statement is correct, this @value{CHAPTER} and @ref{Sample
-Programs}, provide a good-sized body of code for you to read, and we hope,
+Programs}, provide a good-sized body of code for you to read and, we hope,
to learn from.
This @value{CHAPTER} presents a library of useful @command{awk} functions.
@@ -19371,7 +19447,7 @@ use these functions.
The functions are presented here in a progression from simple to complex.
@cindex Texinfo
-@ref{Extract Program},
+@DBREF{Extract Program}
presents a program that you can use to extract the source code for
these example library functions and programs from the Texinfo source
for this @value{DOCUMENT}.
@@ -19435,7 +19511,7 @@ comparisons use only lowercase letters.
* Group Functions:: Functions for getting group information.
* Walking Arrays:: A function to walk arrays of arrays.
* Library Functions Summary:: Summary of library functions.
-* Library exercises:: Exercises.
+* Library Exercises:: Exercises.
@end menu
@node Library Names
@@ -19522,7 +19598,7 @@ A different convention, common in the Tcl community, is to use a single
associative array to hold the values needed by the library function(s), or
``package.'' This significantly decreases the number of actual global names
in use. For example, the functions described in
-@ref{Passwd Functions},
+@DBREF{Passwd Functions}
might have used array elements @code{@w{PW_data["inited"]}}, @code{@w{PW_data["total"]}},
@code{@w{PW_data["count"]}}, and @code{@w{PW_data["awklib"]}}, instead of
@code{@w{_pw_inited}}, @code{@w{_pw_awklib}}, @code{@w{_pw_total}},
@@ -19583,8 +19659,9 @@ function mystrtonum(str, ret, n, i, k, c)
ret = 0
for (i = 1; i <= n; i++) @{
c = substr(str, i, 1)
- if ((k = index("01234567", c)) > 0)
- k-- # adjust for 1-basing in awk
+ # index() returns 0 if c not in string,
+ # includes c == "0"
+ k = index("1234567", c)
ret = ret * 8 + k
@}
@@ -19596,6 +19673,8 @@ function mystrtonum(str, ret, n, i, k, c)
for (i = 1; i <= n; i++) @{
c = substr(str, i, 1)
c = tolower(c)
+ # index() returns 0 if c not in string,
+ # includes c == "0"
k = index("123456789abcdef", c)
ret = ret * 16 + k
@@ -19997,8 +20076,7 @@ function chr(c)
@c endfile
#### test code ####
-# BEGIN \
-# @{
+# BEGIN @{
# for (;;) @{
# printf("enter a character: ")
# if (getline var <= 0)
@@ -20083,7 +20161,7 @@ more difficult than they really need to be.}
@cindex timestamps, formatted
@cindex time, managing
The @code{systime()} and @code{strftime()} functions described in
-@ref{Time Functions},
+@DBREF{Time Functions}
provide the minimum functionality necessary for dealing with the time of day
in human readable form. While @code{strftime()} is extensive, the control
formats are not necessarily easy to remember or intuitively obvious when
@@ -20169,7 +20247,7 @@ function getlocaltime(time, ret, now, i)
The string indices are easier to use and read than the various formats
required by @code{strftime()}. The @code{alarm} program presented in
-@ref{Alarm Program},
+@DBREF{Alarm Program}
uses this function.
A more general design for the @code{getlocaltime()} function would have
allowed the user to supply an optional timestamp value to use instead
@@ -20372,7 +20450,7 @@ END @{ endfile(_filename_) @}
@c endfile
@end example
-@ref{Wc Program},
+@DBREF{Wc Program}
shows how this library function can be used and
how it simplifies writing the main program.
@@ -20843,8 +20921,7 @@ it is not an option, and it ends option processing. Continuing on:
i = index(options, thisopt)
if (i == 0) @{
if (Opterr)
- printf("%c -- invalid option\n",
- thisopt) > "/dev/stderr"
+ printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
if (_opti >= length(argv[Optind])) @{
Optind++
_opti = 0
@@ -21347,7 +21424,7 @@ once. If you are worried about squeezing every last cycle out of your
this is not necessary, since most @command{awk} programs are I/O-bound,
and such a change would clutter up the code.
-The @command{id} program in @ref{Id Program},
+The @command{id} program in @DBREF{Id Program}
uses these functions.
@c ENDOFRANGE libfudata
@c ENDOFRANGE flibudata
@@ -21373,7 +21450,7 @@ uses these functions.
@cindex group file
@cindex files, group
Much of the discussion presented in
-@ref{Passwd Functions},
+@DBREF{Passwd Functions}
applies to the group database as well. Although there has traditionally
been a well-known file (@file{/etc/group}) in a well-known format, the POSIX
standard only provides a set of C library routines
@@ -21526,8 +21603,7 @@ There are several, modeled after the C library functions of the same names:
@c line break on _gr_init for smallbook
@c file eg/lib/groupawk.in
-BEGIN \
-@{
+BEGIN @{
# Change to suit your system
_gr_awklib = "/usr/local/libexec/awk/"
@}
@@ -21713,13 +21789,13 @@ Most of the work is in scanning the database and building the various
associative arrays. The functions that the user calls are themselves very
simple, relying on @command{awk}'s associative arrays to do work.
-The @command{id} program in @ref{Id Program},
+The @command{id} program in @DBREF{Id Program}
uses these functions.
@node Walking Arrays
@section Traversing Arrays of Arrays
-@ref{Arrays of Arrays}, described how @command{gawk}
+@DBREF{Arrays of Arrays} described how @command{gawk}
provides arrays of arrays. In particular, any element of
an array may be either a scalar, or another array. The
@code{isarray()} function (@pxref{Type Functions})
@@ -21825,7 +21901,8 @@ A simple function to traverse an array of arrays to any depth.
@end itemize
-@node Library exercises
+@c EXCLUDE START
+@node Library Exercises
@section Exercises
@enumerate
@@ -21873,7 +21950,7 @@ As a related challenge, revise that code to handle the case where
an intervening value in @code{ARGV} is a variable assignment.
@item
-@ref{Walking Arrays}, presented a function that walked a multidimensional
+@DBREF{Walking Arrays} presented a function that walked a multidimensional
array to print it out. However, walking an array and processing
each element is a general-purpose operation. Generalize the
@code{walk_array()} function by adding an additional parameter named
@@ -21891,6 +21968,7 @@ Test your new version by printing the array; you should end up with
output identical to that of the original version.
@end enumerate
+@c EXCLUDE END
@c ENDOFRANGE flib
@c ENDOFRANGE fudlib
@@ -22104,8 +22182,7 @@ string:
@example
@c file eg/prog/cut.awk
-BEGIN \
-@{
+BEGIN @{
FS = "\t" # default
OFS = FS
while ((c = getopt(ARGC, ARGV, "sf:c:d:")) != -1) @{
@@ -22580,8 +22657,7 @@ there are no matches, the exit status is one; otherwise it is zero:
@example
@c file eg/prog/egrep.awk
-END \
-@{
+END @{
exit (total == 0)
@}
@c endfile
@@ -22605,17 +22681,6 @@ function usage( e)
The variable @code{e} is used so that the function fits nicely
on the printed page.
-@cindex @code{END} pattern, backslash continuation and
-@cindex @code{\} (backslash), continuing lines and
-@cindex backslash (@code{\}), continuing lines and
-Just a note on programming style: you may have noticed that the @code{END}
-rule uses backslash continuation, with the open brace on a line by
-itself. This is so that it more closely resembles the way functions
-are written. Many of the examples
-in this @value{CHAPTER}
-use this style. You can decide for yourself if you like writing
-your @code{BEGIN} and @code{END} rules this way
-or not.
@c ENDOFRANGE regexps
@c ENDOFRANGE sfregexp
@c ENDOFRANGE fsregexp
@@ -22682,8 +22747,7 @@ numbers:
# egid=5(blat) groups=9(nine),2(two),1(one)
@group
-BEGIN \
-@{
+BEGIN @{
uid = PROCINFO["uid"]
euid = PROCINFO["euid"]
gid = PROCINFO["gid"]
@@ -22900,6 +22964,12 @@ instead of doing it in an @code{END} rule.
It also assumes that letters are contiguous in the character set,
which isn't true for EBCDIC systems.
+@ifset FOR_PRINT
+You might want to consider how to eliminate the use of
+@code{ord()} and @code{chr()}; this can be done in such a
+way as to solve the EBCDIC issue as well.
+@end ifset
+
@c ENDOFRANGE filspl
@c ENDOFRANGE split
@@ -22953,8 +23023,7 @@ Finally, @command{awk} is forced to read the standard input by setting
@c endfile
@end ignore
@c file eg/prog/tee.awk
-BEGIN \
-@{
+BEGIN @{
for (i = 1; i < ARGC; i++)
copy[i] = ARGV[i]
@@ -23016,8 +23085,7 @@ Finally, the @code{END} rule cleans up by closing all the output files:
@example
@c file eg/prog/tee.awk
-END \
-@{
+END @{
for (i in copy)
close(copy[i])
@}
@@ -23134,8 +23202,7 @@ function usage( e)
# -n skip n fields
# +n skip n characters, skip fields first
-BEGIN \
-@{
+BEGIN @{
count = 1
outputfile = "/dev/stdout"
opts = "udc0:1:2:3:4:5:6:7:8:9:"
@@ -23147,7 +23214,7 @@ BEGIN \
else if (c == "c")
do_count++
else if (index("0123456789", c) != 0) @{
- # getopt requires args to options
+ # getopt() requires args to options
# this messes us up for things like -5
if (Optarg ~ /^[[:digit:]]+$/)
fcount = (c Optarg) + 0
@@ -23284,6 +23351,22 @@ END @{
@}
@c endfile
@end example
+
+@ifset FOR_PRINT
+The logic for choosing which lines to print represents a @dfn{state
+machine}, which is ``a device that can be in one of a set number of stable
+conditions depending on its previous condition and on the present values
+of its inputs.''@footnote{This is the definition returned from entering
+@code{define: state machine} into Google.}
+Brian Kernighan suggests that
+``an alternative approach to state mechines is to just read
+the input into an array, then use indexing. It's almost always
+easier code, and for most inputs where you would use this, just
+as fast.'' Consider how to rewrite the logic to follow this
+suggestion.
+@end ifset
+
+
@c ENDOFRANGE prunt
@c ENDOFRANGE tpul
@c ENDOFRANGE uniq
@@ -23654,8 +23737,7 @@ Here is the program:
@c file eg/prog/alarm.awk
# usage: alarm time [ "message" [ count [ delay ] ] ]
-BEGIN \
-@{
+BEGIN @{
# Initial argument sanity checking
usage1 = "usage: alarm time ['message' [count [delay]]]"
usage2 = sprintf("\t(%s) time ::= hh:mm", ARGV[1])
@@ -23810,7 +23892,7 @@ of standard @command{awk}: dealing with individual characters is very
painful, requiring repeated use of the @code{substr()}, @code{index()},
and @code{gsub()} built-in functions
(@pxref{String Functions}).@footnote{This
-program was written before @command{gawk} acquired the ability to
+program was also written before @command{gawk} acquired the ability to
split each character in a string into separate array elements.}
There are two functions. The first, @code{stranslate()}, takes three
arguments:
@@ -23918,6 +24000,12 @@ An obvious improvement to this program would be to set up the
@code{t_ar} array only once, in a @code{BEGIN} rule. However, this
assumes that the ``from'' and ``to'' lists
will never change throughout the lifetime of the program.
+
+Another obvious improvement is to enable the use of ranges,
+such as @samp{a-z}, as allowed by the @command{tr} utility.
+Look at the code for @file{cut.awk} (@pxref{Cut Program})
+for inspiration.
+
@c ENDOFRANGE chtra
@c ENDOFRANGE tr
@@ -24050,8 +24138,7 @@ function printpage( i, j)
Count++
@}
-END \
-@{
+END @{
printpage()
@}
@c endfile
@@ -24702,7 +24789,7 @@ a shell variable that will be expanded. There are two cases:
@enumerate a
@item
-Literal text, provided with @option{--source} or @option{--source=}. This
+Literal text, provided with @option{-e} or @option{--source}. This
text is just appended directly.
@item
@@ -25047,7 +25134,7 @@ The program should exit without reading any @value{DF}s.
However, suppose that an included library file defines an @code{END}
rule of its own. In this case, @command{gawk} will hang, reading standard
input. In order to avoid this, @file{/dev/null} is explicitly added to the
-command-line. Reading from @file{/dev/null} always returns an immediate
+command line. Reading from @file{/dev/null} always returns an immediate
end of file indication.
@c Hmm. Add /dev/null if $# is 0? Still messes up ARGV. Sigh.
@@ -25390,6 +25477,7 @@ mailing labels, and finding anagrams.
@end itemize
+@c EXCLUDE START
@node Programs Exercises
@section Exercises
@@ -25413,17 +25501,27 @@ information is printed. Modify the @command{awk} version
same way.
@item
-The @code{split.awk} program (@pxref{Split Program}) uses
-the @code{chr()} and @code{ord()} functions to move through the
-letters of the alphabet.
-Modify the program to instead use only the @command{awk}
-built-in functions, such as @code{index()} and @code{substr()}.
-
-@item
The @code{split.awk} program (@pxref{Split Program}) assumes
that letters are contiguous in the character set,
which isn't true for EBCDIC systems.
Fix this problem.
+(Hint: Consider a different way to work through the alphabet,
+without relying on @code{ord()} and @code{chr()}.)
+
+@item
+In @file{uniq.awk} (@pxref{Uniq Program}, the
+logic for choosing which lines to print represents a @dfn{state
+machine}, which is ``a device that can be in one of a set number of stable
+conditions depending on its previous condition and on the present values
+of its inputs.''@footnote{This is the definition returned from entering
+@code{define: state machine} into Google.}
+Brian Kernighan suggests that
+``an alternative approach to state mechines is to just read
+the input into an array, then use indexing. It's almost always
+easier code, and for most inputs where you would use this, just
+as fast.'' Rewrite the logic to follow this
+suggestion.
+
@item
Why can't the @file{wc.awk} program (@pxref{Wc Program}) just
@@ -25519,6 +25617,7 @@ Modify @file{anagram.awk} (@pxref{Anagram Program}), to avoid
the use of the external @command{sort} utility.
@end enumerate
+@c EXCLUDE END
@ifnotinfo
@part @value{PART3}Moving Beyond Standard @command{awk} With @command{gawk}
@@ -25700,7 +25799,7 @@ Often, though, it is desirable to be able to loop over the elements
in a particular order that you, the programmer, choose. @command{gawk}
lets you do this.
-@ref{Controlling Scanning}, describes how you can assign special,
+@DBREF{Controlling Scanning} describes how you can assign special,
pre-defined values to @code{PROCINFO["sorted_in"]} in order to
control the order in which @command{gawk} traverses an array
during a @code{for} loop.
@@ -26069,6 +26168,9 @@ Caveat Emptor.
@node Two-way I/O
@section Two-Way Communications with Another Process
+
+@c 8/2014. Neither Mike nor BWK saw this as relevant. Commenting it out.
+@ignore
@cindex Brennan, Michael
@cindex programmers, attractiveness of
@smallexample
@@ -26098,6 +26200,7 @@ the scent of perl programmers.
Mike Brennan
@c brennan@@whidbey.com
@end smallexample
+@end ignore
@cindex advanced features, processes@comma{} communicating with
@cindex processes, two-way communications with
@@ -26124,7 +26227,10 @@ system("rm " tempfile)
This works, but not elegantly. Among other things, it requires that
the program be run in a directory that cannot be shared among users;
for example, @file{/tmp} will not do, as another user might happen
-to be using a temporary file with the same name.
+to be using a temporary file with the same name.@footnote{Michael
+Brennan suggests the use of @command{rand()} to generate unique
+@value{FN}s. This is a valid point; nevertheless, temporary files
+remain more difficult than two-way pipes.} @c 8/2014
@cindex coprocesses
@cindex input/output, two-way
@@ -26279,7 +26385,7 @@ You can think of this as just a @emph{very long} two-way pipeline to
a coprocess.
The way @command{gawk} decides that you want to use TCP/IP networking is
by recognizing special @value{FN}s that begin with one of @samp{/inet/},
-@samp{/inet4/} or @samp{/inet6}.
+@samp{/inet4/} or @samp{/inet6/}.
The full syntax of the special @value{FN} is
@file{/@var{net-type}/@var{protocol}/@var{local-port}/@var{remote-host}/@var{remote-port}}.
@@ -26930,7 +27036,16 @@ and/or groups of characters sort in a given language.
@cindex @code{LC_CTYPE} locale category
@item LC_CTYPE
Character-type information (alphabetic, digit, upper- or lowercase, and
-so on).
+so on) as well as character encoding.
+@ignore
+In June 2001 Bruno Haible wrote:
+- Description of LC_CTYPE: It determines both
+ 1. character encoding,
+ 2. character type information.
+ (For example, in both KOI8-R and ISO-8859-5 the character type information
+ is the same - cyrillic letters could as 'alpha' - but the encoding is
+ different.)
+@end ignore
This information is accessed via the
POSIX character classes in regular expressions,
such as @code{/[[:alnum:]]/}
@@ -26951,11 +27066,6 @@ use a comma every three decimal places and a period for the decimal
point, while many Europeans do exactly the opposite:
1,234.56 versus 1.234,56.}
-@cindex @code{LC_RESPONSE} locale category
-@item LC_RESPONSE
-Response information, such as how ``yes'' and ``no'' appear in the
-local language, and possibly other information as well.
-
@cindex time, localization and
@cindex dates, information related to@comma{} localization
@cindex @code{LC_TIME} locale category
@@ -27090,18 +27200,33 @@ printf(_"Number of users is %d\n", nusers)
@item
If you are creating strings dynamically, you can
still translate them, using the @code{dcgettext()}
-built-in function:
+built-in function:@footnote{Thanks to Bruno Haible for this
+example.}
@example
-message = nusers " users logged in"
-message = dcgettext(message, "adminprog")
-print message
+if (groggy)
+ message = dcgettext("%d customers disturbing me\n", "adminprog")
+else
+ message = dcgettext("enjoying %d customers\n", "adminprog")
+printf(message, ncustomers)
@end example
Here, the call to @code{dcgettext()} supplies a different
text domain (@code{"adminprog"}) in which to find the
message, but it uses the default @code{"LC_MESSAGES"} category.
+The previous example only works if @code{ncustomers} is greater than one.
+This example would be better done with @code{dcngettext()}:
+
+@example
+if (groggy)
+ message = dcngettext("%d customer disturbing me\n", "%d customers disturbing me\n", "adminprog")
+else
+ message = dcngettext("enjoying %d customer\n", "enjoying %d customers\n", "adminprog")
+printf(message, ncustomers)
+@end example
+
+
@cindex @code{LC_MESSAGES} locale category, @code{bindtextdomain()} function (@command{gawk})
@item
During development, you might want to put the @file{.gmo}
@@ -27181,6 +27306,9 @@ appear as the first argument to @code{dcgettext()} or as the first and
second argument to @code{dcngettext()}.@footnote{The
@command{xgettext} utility that comes with GNU
@command{gettext} can handle @file{.awk} files.}
+You should distribute the generated @file{.pot} file with
+your @command{awk} program; translators will eventually use it
+to provide you translations that you can also then distribute.
@xref{I18N Example},
for the full list of steps to go through to create and test
translations for @command{guide}.
@@ -27471,8 +27599,7 @@ This file must be renamed and placed in the proper directory so that
@command{gawk} can find it:
@example
-$ @kbd{msgfmt guide-mellow.po}
-$ @kbd{mv messages en_US.UTF-8/LC_MESSAGES/guide.mo}
+$ @kbd{msgfmt guide-mellow.po -o en_US.UTF-8/LC_MESSAGES/guide.mo}
@end example
Finally, we run the program to test it:
@@ -27739,7 +27866,7 @@ to debug command-line programs, only programs contained in files.)
In our case, we invoke the debugger like this:
@example
-$ @kbd{gawk -D -f getopt.awk -f join.awk -f uniq.awk inputfile}
+$ @kbd{gawk -D -f getopt.awk -f join.awk -f uniq.awk -1 inputfile}
@end example
@noindent
@@ -27801,7 +27928,7 @@ the breakpoint, use the @code{b} (breakpoint) command:
@example
gawk> @kbd{b are_equal}
-@print{} Breakpoint 1 set at file `awklib/eg/prog/uniq.awk', line 64
+@print{} Breakpoint 1 set at file `awklib/eg/prog/uniq.awk', line 63
@end example
The debugger tells us the file and line number where the breakpoint is.
@@ -27813,8 +27940,8 @@ gawk> @kbd{r}
@print{} Starting program:
@print{} Stopping in Rule ...
@print{} Breakpoint 1, are_equal(n, m, clast, cline, alast, aline)
- at `awklib/eg/prog/uniq.awk':64
-@print{} 64 if (fcount == 0 && charcount == 0)
+ at `awklib/eg/prog/uniq.awk':63
+@print{} 63 if (fcount == 0 && charcount == 0)
gawk>
@end example
@@ -27826,12 +27953,12 @@ listing of the current stack frames:
@example
gawk> @kbd{bt}
@print{} #0 are_equal(n, m, clast, cline, alast, aline)
- at `awklib/eg/prog/uniq.awk':69
-@print{} #1 in main() at `awklib/eg/prog/uniq.awk':89
+ at `awklib/eg/prog/uniq.awk':68
+@print{} #1 in main() at `awklib/eg/prog/uniq.awk':88
@end example
This tells us that @code{are_equal()} was called by the main program at
-line 89 of @file{uniq.awk}. (This is not a big surprise, since this
+line 88 of @file{uniq.awk}. (This is not a big surprise, since this
is the only call to @code{are_equal()} in the program, but in more complex
programs, knowing who called a function and with what parameters can be
the key to finding the source of the problem.)
@@ -27855,7 +27982,7 @@ A more useful variable to display might be the current record:
@example
gawk> @kbd{p $0}
-@print{} $0 = string ("gawk is a wonderful program!")
+@print{} $0 = "gawk is a wonderful program!"
@end example
@noindent
@@ -27864,7 +27991,7 @@ our test input above. Let's look at @code{NR}:
@example
gawk> @kbd{p NR}
-@print{} NR = number (2)
+@print{} NR = 2
@end example
@noindent
@@ -27883,7 +28010,7 @@ OK, let's just check that that rule worked correctly:
@example
gawk> @kbd{p last}
-@print{} last = string ("awk is a wonderful program!")
+@print{} last = "awk is a wonderful program!"
@end example
Everything we have done so far has verified that the program has worked as
@@ -27894,13 +28021,13 @@ be inside this function. To investigate further, we must begin
@example
gawk> @kbd{n}
-@print{} 67 if (fcount > 0) @{
+@print{} 66 if (fcount > 0) @{
@end example
-This tells us that @command{gawk} is now ready to execute line 67, which
+This tells us that @command{gawk} is now ready to execute line 66, which
decides whether to give the lines the special ``field skipping'' treatment
-indicated by the @option{-f} command-line option. (Notice that we skipped
-from where we were before at line 64 to here, since the condition in line 64
+indicated by the @option{-1} command-line option. (Notice that we skipped
+from where we were before at line 63 to here, since the condition in line 63
@samp{if (fcount == 0 && charcount == 0)} was false.)
Continuing to step, we now get to the splitting of the current and
@@ -27908,9 +28035,9 @@ last records:
@example
gawk> @kbd{n}
-@print{} 68 n = split(last, alast)
+@print{} 67 n = split(last, alast)
gawk> @kbd{n}
-@print{} 69 m = split($0, aline)
+@print{} 68 m = split($0, aline)
@end example
At this point, we should be curious to see what our records were split
@@ -27918,10 +28045,10 @@ into, so we try to look:
@example
gawk> @kbd{p n m alast aline}
-@print{} n = number (5)
-@print{} m = number (5)
+@print{} n = 5
+@print{} m = untyped variable
@print{} alast = array, 5 elements
-@print{} aline = array, 5 elements
+@print{} aline = untyped variable
@end example
@noindent
@@ -27929,7 +28056,9 @@ gawk> @kbd{p n m alast aline}
@command{awk}'s @code{print} statement.)
This is kind of disappointing, though. All we found out is that there
-are five elements in each of our arrays. Useful enough (we now know that
+are five elements in @code{alast}; @code{m} and @code{aline} don't have
+values yet since we are at line 68 but haven't executed it yet.
+This information is useful enough (we now know that
none of the words were accidentally left out), but what if we want to see
inside the array?
@@ -27945,7 +28074,7 @@ Oops!
@example
gawk> @kbd{p alast[1]}
-@print{} alast["1"] = string ("awk")
+@print{} alast["1"] = "awk"
@end example
This would be kind of slow for a 100-member array, though, so
@@ -27954,11 +28083,11 @@ not to be mentioned):
@example
gawk> @kbd{p @@alast}
-@print{} alast["1"] = string ("awk")
-@print{} alast["2"] = string ("is")
-@print{} alast["3"] = string ("a")
-@print{} alast["4"] = string ("wonderful")
-@print{} alast["5"] = string ("program!")
+@print{} alast["1"] = "awk"
+@print{} alast["2"] = "is"
+@print{} alast["3"] = "a"
+@print{} alast["4"] = "wonderful"
+@print{} alast["5"] = "program!"
@end example
It looks like we got this far OK. Let's take another step
@@ -27966,9 +28095,9 @@ or two:
@example
gawk> @kbd{n}
-@print{} 70 clast = join(alast, fcount, n)
+@print{} 69 clast = join(alast, fcount, n)
gawk> @kbd{n}
-@print{} 71 cline = join(aline, fcount, m)
+@print{} 70 cline = join(aline, fcount, m)
@end example
Well, here we are at our error (sorry to spoil the suspense). What we
@@ -27978,8 +28107,8 @@ this would work. Let's look at what we've got:
@example
gawk> @kbd{p cline clast}
-@print{} cline = string ("gawk is a wonderful program!")
-@print{} clast = string ("awk is a wonderful program!")
+@print{} cline = "gawk is a wonderful program!"
+@print{} clast = "awk is a wonderful program!"
@end example
Hey, those look pretty familiar! They're just our original, unaltered,
@@ -28826,7 +28955,9 @@ responds @samp{syntax error}. When you do figure out what your mistake was,
though, you'll feel like a real guru.
@item
-If you perused the dump of opcodes in @ref{Miscellaneous Debugger Commands},
+@c NOTE: no comma after the ref{} on purpose, due to following
+@c parenthetical remark.
+If you perused the dump of opcodes in @ref{Miscellaneous Debugger Commands}
(or if you are already familiar with @command{gawk} internals),
you will realize that much of the internal manipulation of data
in @command{gawk}, as in many interpreters, is done on a stack.
@@ -28874,7 +29005,7 @@ similarly to the GNU Debugger, GDB.
@item
Debuggers let you step through your program one statement at a time,
examine and change variable and array values, and do a number of other
-things that let understand what your program is actually doing (as
+things that let you understand what your program is actually doing (as
opposed to what it is supposed to do).
@item
@@ -28912,6 +29043,12 @@ arbitrary precision integers, and concludes with a description of some
points where @command{gawk} and the POSIX standard are not quite in
agreement.
+@quotation NOTE
+Most users of @command{gawk} can safely skip this chapter.
+But if you want to do scientific calculations with @command{gawk},
+this is the place to be.
+@end quotation
+
@menu
* Computer Arithmetic:: A quick intro to computer math.
* Math Definitions:: Defining terms used.
@@ -29031,8 +29168,23 @@ A special value representing infinity. Operations involving another
number and infinity produce infinity.
@item NaN
-``Not A Number.'' A special value indicating a result that can't
-happen in real math, but that can happen in floating-point computations.
+``Not A Number.''@footnote{Thanks
+to Michael Brennan for this description, which I have paraphrased, and
+for the examples}.
+A special value that results from attempting a
+calculation that has no answer as a real number. In such a case,
+programs can either receive a floating-point exception, or get @code{NaN}
+back as the result. The IEEE 754 standard recommends that systems return
+@code{NaN}. Some examples:
+
+@table @code
+@item sqrt(-1)
+This makes sense in the range of complex numbers, but not in the
+range of real numbers, so the result is @code{NaN}.
+
+@item log(-8)
+@minus{}8 is out of the domain of @code{log()}, so the result is @code{NaN}.
+@end table
@item Normalized
How the significand (see later in this list) is usually stored. The
@@ -29139,8 +29291,8 @@ array to provide information about the MPFR and GMP libraries
The MPFR library provides precise control over precisions and rounding
modes, and gives correctly rounded, reproducible, platform-independent
-results. With either of the command-line options @option{--bignum} or
-@option{-M}, all floating-point arithmetic operators and numeric functions
+results. With the @option{-M} command-line option,
+all floating-point arithmetic operators and numeric functions
can yield results to any desired precision level supported by MPFR.
Two built-in variables, @code{PREC} and @code{ROUNDMODE},
@@ -29154,7 +29306,7 @@ to follow.
@quotation
Math class is tough!
-@author Late 1980's Barbie
+@author Teen Talk Barbie, July 1992
@end quotation
This @value{SECTION} provides a high level overview of the issues
@@ -29450,7 +29602,7 @@ internally as a MPFR number. Changing the precision using @code{PREC}
in the program text does @emph{not} change the precision of a constant.
If you need to represent a floating-point constant at a higher precision
-than the default and cannot use a command line assignment to @code{PREC},
+than the default and cannot use a command-line assignment to @code{PREC},
you should either specify the constant as a string, or as a rational
number, whenever possible. The following example illustrates the
differences among various ways to print a floating-point constant:
@@ -29566,7 +29718,7 @@ output when you change the rounding mode to be sure.
@cindex integers, arbitrary precision
@cindex arbitrary precision integers
-When given one of the options @option{--bignum} or @option{-M},
+When given the @option{-M} option,
@command{gawk} performs all integer arithmetic using GMP arbitrary
precision integers. Any number that looks like an integer in a source
or @value{DF} is stored as an arbitrary precision integer. The size
@@ -29680,8 +29832,20 @@ You can simulate the @code{div()} function in standard @command{awk}
using this user-defined function:
@example
+@c file eg/lib/div.awk
# div --- do integer division
+@c endfile
+@ignore
+@c file eg/lib/div.awk
+#
+# Arnold Robbins, arnold@@skeeve.com, Public Domain
+# July, 2014
+
+@c endfile
+
+@end ignore
+@c file eg/lib/div.awk
function div(numerator, denominator, result, i)
@{
split("", result)
@@ -29693,6 +29857,7 @@ function div(numerator, denominator, result, i)
return 0.0
@}
+@c endfile
@end example
@node POSIX Floating Point Problems
@@ -29806,7 +29971,7 @@ values. The default for @command{awk} is to use double-precision
floating-point values.
@item
-In the 1980's, Barbie mistakenly said ``Math class is tough!''
+In the early 1990's, Barbie mistakenly said ``Math class is tough!''
While math isn't tough, floating-point arithmetic isn't the same
as pencil and paper math, and care must be taken:
@@ -29834,12 +29999,12 @@ Often, increasing the accuracy and then rounding to the desired
number of digits produces reasonable results.
@item
-Use either @option{-M} or @option{--bignum} to enable MPFR
+Use @option{-M} (or @option{--bignum}) to enable MPFR
arithmetic. Use @code{PREC} to set the precision in bits, and
@code{ROUNDMODE} to set the IEEE 754 rounding mode.
@item
-With @option{-M} or @option{--bignum}, @command{gawk} performs
+With @option{-M}, @command{gawk} performs
arbitrary precision integer arithmetic using the GMP library.
This is faster and more space efficient than using MPFR for
the same calculations.
@@ -30071,7 +30236,7 @@ Some other bits and pieces:
@itemize @value{BULLET}
@item
The API provides access to @command{gawk}'s @code{do_@var{xxx}} values,
-reflecting command line options, like @code{do_lint}, @code{do_profiling}
+reflecting command-line options, like @code{do_lint}, @code{do_profiling}
and so on (@pxref{Extension API Variables}).
These are informational: an extension cannot affect their values
inside @command{gawk}. In addition, attempting to assign to them
@@ -30222,7 +30387,7 @@ does not support this keyword, you should either place
@file{config.h} file in your extensions.
@item
-All pointers filled in by @command{gawk} are to memory
+All pointers filled in by @command{gawk} point to memory
managed by @command{gawk} and should be treated by the extension as
read-only. Memory for @emph{all} strings passed into @command{gawk}
from the extension @emph{must} come from calling the API-provided function
@@ -30756,8 +30921,8 @@ empty string (@code{""}). The @code{func} pointer is the address of a
An @dfn{exit callback} function is a function that
@command{gawk} calls before it exits.
Such functions are useful if you have general ``cleanup'' tasks
-that should be performed in your extension (such as closing data
-base connections or other resource deallocations).
+that should be performed in your extension (such as closing database
+connections or other resource deallocations).
You can register such
a function with @command{gawk} using the following function.
@@ -33835,6 +34000,7 @@ should be the place to do so.
@end itemize
+@c EXCLUDE START
@node Extension Exercises
@section Exercises
@@ -33857,6 +34023,7 @@ Write a wrapper script that provides an interface similar to
@ref{Extension Sample Inplace}.
@end enumerate
+@c EXCLUDE END
@ifnotinfo
@part @value{PART4}Appendices
@@ -34287,7 +34454,7 @@ Indirect function calls
@item
Directories on the command line produce a warning and are skipped
-(@pxref{Command line directories}).
+(@pxref{Command-line directories}).
@end itemize
@item
@@ -34371,8 +34538,7 @@ functions for internationalization
(@pxref{Programmer i18n}).
@item
-The @code{fflush()} function from Brian Kernighan's
-version of @command{awk}
+The @code{fflush()} function from BWK @command{awk}
(@pxref{I/O Functions}).
@item
@@ -34436,7 +34602,7 @@ and the
@option{--copyright},
@option{--debug},
@option{--dump-variables},
-@option{--execle},
+@option{--exec},
@option{--field-separator},
@option{--file},
@option{--gen-pot},
@@ -34517,6 +34683,10 @@ and the documentation for @command{gawk} @value{PVERSION} 4.1:
Ultrix
@end itemize
+@item
+@c FIXME: Verify the version here.
+Support for MirBSD was removed at @command{gawk} @value{PVERSION} 4.2.
+
@end itemize
@c XXX ADD MORE STUFF HERE
@@ -34634,7 +34804,7 @@ The ability to delete all of an array at once with @samp{delete @var{array}}
(@pxref{Delete}).
@item
-Command line option changes
+Command-line option changes
(@pxref{Options}):
@itemize @value{MINUS}
@@ -34692,12 +34862,12 @@ The @code{next file} statement became @code{nextfile}
@item
The @code{fflush()} function from
-Brian Kernighan's @command{awk}
+BWK @command{awk}
(then at Bell Laboratories;
@pxref{I/O Functions}).
@item
-New command line options:
+New command-line options:
@itemize @value{MINUS}
@item
@@ -34707,7 +34877,7 @@ the original Version 7 Unix version of @command{awk}
(@pxref{V7/SVR3.1}).
@item
-The @option{-m} option from Brian Kernighan's @command{awk}. (He was
+The @option{-m} option from BWK @command{awk}. (Brian was
still at Bell Laboratories at the time.) This was later removed from
both his @command{awk} and from @command{gawk}.
@@ -34949,7 +35119,7 @@ An optional third argument to
(@pxref{String Functions}).
@item
-The behavior of @code{fflush()} changed to match Brian Kernighan's @command{awk}
+The behavior of @code{fflush()} changed to match BWK @command{awk}
and for POSIX; now both @samp{fflush()} and @samp{fflush("")}
flush all open output redirections
(@pxref{I/O Functions}).
@@ -34987,7 +35157,7 @@ Indirect function calls
(@pxref{Switch Statement}).
@item
-Command line option changes
+Command-line option changes
(@pxref{Options}):
@itemize @value{MINUS}
@@ -35012,7 +35182,7 @@ All long options acquired corresponding short options, for use in @samp{#!} scri
@item
Directories named on the command line now produce a warning, not a fatal
error, unless @option{--posix} or @option{--traditional} are used
-(@pxref{Command line directories}).
+(@pxref{Command-line directories}).
@item
The @command{gawk} internals were rewritten, bringing the @command{dgawk}
@@ -35088,10 +35258,10 @@ Three new arrays:
@item
The three executables @command{gawk}, @command{pgawk}, and @command{dgawk}, were merged into
-one, named just @command{gawk}. As a result the command line options changed.
+one, named just @command{gawk}. As a result the command-line options changed.
@item
-Command line option changes
+Command-line option changes
(@pxref{Options}):
@itemize @value{MINUS}
@@ -36433,7 +36603,7 @@ The following changes the record separator to @code{"\r\n"} and sets binary
mode on reads, but does not affect the mode on standard input:
@example
-gawk -v RS="\r\n" --source "BEGIN @{ BINMODE = 1 @}" @dots{}
+gawk -v RS="\r\n" -e "BEGIN @{ BINMODE = 1 @}" @dots{}
@end example
@noindent
@@ -37046,7 +37216,7 @@ since approximately 2003.
@cindex source code, @command{pawk}
@item @command{pawk}
Nelson H.F.@: Beebe at the University of Utah has modified
-Brian Kernighan's @command{awk} to provide timing and profiling information.
+BWK @command{awk} to provide timing and profiling information.
It is different from @command{gawk} with the @option{--profile} option.
(@pxref{Profiling}),
in that it uses CPU-based profiling, not line-count
@@ -37109,8 +37279,7 @@ This is an embeddable @command{awk} interpreter derived from
This is a Python module that claims to bring @command{awk}-like
features to Python. See @uref{https://github.com/alecthomas/pawk}
for more information. (This is not related to Nelson Beebe's
-modified version of Brian Kernighan's @command{awk},
-described earlier.)
+modified version of BWK @command{awk}, described earlier.)
@item @w{QSE Awk}
@cindex QSE Awk
@@ -37249,7 +37418,7 @@ as well as any considerations you should bear in mind.
@appendixsubsec Accessing The @command{gawk} Git Repository
As @command{gawk} is Free Software, the source code is always available.
-@ref{Gawk Distribution}, describes how to get and build the formal,
+@DBREF{Gawk Distribution} describes how to get and build the formal,
released versions of @command{gawk}.
@cindex @command{git} utility
@@ -38131,7 +38300,7 @@ compiled with @samp{-DDEBUG}.
@item
The source code for @command{gawk} is maintained in a publicly
-accessable Git repository. Anyone may check it out and view the source.
+accessible Git repository. Anyone may check it out and view the source.
@item
Contributions to @command{gawk} are welcome. Following the steps
@@ -40469,13 +40638,14 @@ Consistency issues:
Use "zeros" instead of "zeroes".
Use "nonzero" not "non-zero".
Use "runtime" not "run time" or "run-time".
- Use "command-line" not "command line".
+ Use "command-line" as an adjective and "command line" as a noun.
Use "online" not "on-line".
Use "whitespace" not "white space".
Use "Input/Output", not "input/output". Also "I/O", not "i/o".
Use "lefthand"/"righthand", not "left-hand"/"right-hand".
Use "workaround", not "work-around".
Use "startup"/"cleanup", not "start-up"/"clean-up"
+ Use "filesystem", not "file system"
Use @code{do}, and not @code{do}-@code{while}, except where
actually discussing the do-while.
Use "versus" in text and "vs." in index entries
@@ -40490,8 +40660,6 @@ Consistency issues:
The numbers zero through ten should be spelled out, except when
talking about file descriptor numbers. > 10 and < 0, it's
ok to use numbers.
- In tables, put command-line options in @code, while in the text,
- put them in @option.
For most cases, do NOT put a comma before "and", "or" or "but".
But exercise taste with this rule.
Don't show the awk command with a program in quotes when it's
diff --git a/eval.c b/eval.c
index ac9e7729..5649797f 100644
--- a/eval.c
+++ b/eval.c
@@ -241,6 +241,7 @@ static const char *const nodetypes[] = {
"Node_func",
"Node_ext_func",
"Node_old_ext_func",
+ "Node_builtin_func",
"Node_array_ref",
"Node_array_tree",
"Node_array_leaf",
@@ -803,9 +804,35 @@ set_BINMODE()
void
set_OFS()
{
+ static bool first = true;
+ size_t new_ofs_len;
+
+ if (first) /* true when called from init_vars() in main() */
+ first = false;
+ else {
+ /* rebuild $0 using OFS that was current when $0 changed */
+ if (! field0_valid) {
+ get_field(UNLIMITED - 1, NULL);
+ rebuild_record();
+ }
+ }
+
+ /*
+ * Save OFS value for use in building record and in printing.
+ * Can't just have OFS point into the OFS_node since it's
+ * already updated when we come into this routine, and we need
+ * the old value to rebuild the record (see above).
+ */
OFS_node->var_value = force_string(OFS_node->var_value);
- OFS = OFS_node->var_value->stptr;
- OFSlen = OFS_node->var_value->stlen;
+ new_ofs_len = OFS_node->var_value->stlen;
+
+ if (OFS == NULL)
+ emalloc(OFS, char *, new_ofs_len + 2, "set_OFS");
+ else if (OFSlen < new_ofs_len)
+ erealloc(OFS, char *, new_ofs_len + 2, "set_OFS");
+
+ memcpy(OFS, OFS_node->var_value->stptr, OFS_node->var_value->stlen);
+ OFSlen = new_ofs_len;
OFS[OFSlen] = '\0';
}
diff --git a/extension/CMakeLists.txt b/extension/CMakeLists.txt
new file mode 100644
index 00000000..1bb4ceb1
--- /dev/null
+++ b/extension/CMakeLists.txt
@@ -0,0 +1,84 @@
+#
+# extension/CMakeLists.txt --- CMake input file for gawk
+#
+# Copyright (C) 2013
+# the Free Software Foundation, Inc.
+#
+# This file is part of GAWK, the GNU implementation of the
+# AWK Programming Language.
+#
+# GAWK is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# GAWK is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+#
+
+## process this file with CMake to produce Makefile
+
+# Remove the definition of GAWK because of gawkapi.h.
+remove_definitions(-DGAWK)
+
+MACRO(BuildExtension name sources)
+ add_library (${name} MODULE ${sources} ${ARGN})
+ target_link_libraries(${name} ${EXTRA_LIBS})
+ set_target_properties(${name} PROPERTIES PREFIX "")
+ install(PROGRAMS ${CMAKE_BINARY_DIR}/extension/${name}${CMAKE_SHARED_LIBRARY_SUFFIX} DESTINATION lib)
+ENDMACRO(BuildExtension)
+
+if (${HAVE_STRUCT_STAT_ST_BLKSIZE})
+ BuildExtension(filefuncs filefuncs.c stack.c gawkfts.c)
+else()
+ message(STATUS "extension filefuncs cannot be built because HAVE_STRUCT_STAT_ST_BLKSIZE is missing")
+endif()
+
+if (HAVE_FNMATCH AND HAVE_FNMATCH_H)
+ BuildExtension(fnmatch fnmatch.c)
+else()
+ message(STATUS "extension fnmatch cannot be built because function fnmatch or fnmatch.h is missing")
+endif()
+
+if (${HAVE_SYS_WAIT_H})
+ BuildExtension(fork fork.c)
+else()
+ message(STATUS "extension fork cannot be built because HAVE_SYS_WAIT_H is missing")
+endif()
+
+if (${HAVE_MKSTEMP})
+ BuildExtension(inplace inplace.c)
+else()
+ message(STATUS "extension inplace cannot be built because HAVE_MKSTEMP is missing")
+endif()
+
+BuildExtension(ordchr ordchr.c)
+
+if (HAVE_DIRENT_H AND HAVE_DIRFD)
+ BuildExtension(readdir readdir.c)
+else()
+ message(STATUS "extension readdir cannot be built because function readdir is missing")
+endif()
+
+BuildExtension(readfile readfile.c)
+
+BuildExtension(revoutput revoutput.c)
+
+if (${HAVE_GETDTABLESIZE})
+ BuildExtension(revtwoway revtwoway.c)
+else()
+ message(STATUS "extension revtwoway cannot be built because function getdtablesize is missing")
+endif()
+
+BuildExtension(rwarray rwarray.c)
+
+BuildExtension(time time.c)
+
+BuildExtension(testext testext.c)
+
diff --git a/extension/ChangeLog b/extension/ChangeLog
index ab3d62c0..f324bdeb 100644
--- a/extension/ChangeLog
+++ b/extension/ChangeLog
@@ -1,3 +1,9 @@
+2014-08-12 Arnold D. Robbins <arnold@skeeve.com>
+
+ * Makefile.am (RM): Define for makes that don't have it,
+ such as on OpenBSD. Thanks to Jeremie Courreges-Anglas
+ <jca@wxcvbn.org> for the report.
+
2014-06-13 Paul Gortmaker <paul.gortmaker@windriver.com>
* Makefile.am (uninstall-so): Came across below bug while cross
diff --git a/extension/Makefile.am b/extension/Makefile.am
index b6beaee3..e6678c54 100644
--- a/extension/Makefile.am
+++ b/extension/Makefile.am
@@ -29,6 +29,9 @@ AM_CPPFLAGS = -I$(srcdir)/..
# correctly after changing configure.ac
ACLOCAL_AMFLAGS = -I m4
+# For some make's, e.g. OpenBSD, that don't define this
+RM = rm -f
+
# Note: rwarray does not currently compile.
pkgextension_LTLIBRARIES = \
diff --git a/extension/Makefile.in b/extension/Makefile.in
index 294e4f88..46168e4e 100644
--- a/extension/Makefile.in
+++ b/extension/Makefile.in
@@ -513,6 +513,9 @@ AM_CPPFLAGS = -I$(srcdir)/..
# correctly after changing configure.ac
ACLOCAL_AMFLAGS = -I m4
+# For some make's, e.g. OpenBSD, that don't define this
+RM = rm -f
+
# Note: rwarray does not currently compile.
pkgextension_LTLIBRARIES = \
filefuncs.la \
diff --git a/field.c b/field.c
index 64ee1f02..4819ea94 100644
--- a/field.c
+++ b/field.c
@@ -40,7 +40,6 @@ typedef void (* Setfunc)(long, char *, long, NODE *);
static long (*parse_field)(long, char **, int, NODE *,
Regexp *, Setfunc, NODE *, NODE *, bool);
-static void rebuild_record(void);
static long re_parse_field(long, char **, int, NODE *,
Regexp *, Setfunc, NODE *, NODE *, bool);
static long def_parse_field(long, char **, int, NODE *,
@@ -140,7 +139,7 @@ set_field(long num,
/* rebuild_record --- Someone assigned a value to $(something).
Fix up $0 to be right */
-static void
+void
rebuild_record()
{
/*
@@ -148,9 +147,7 @@ rebuild_record()
* a size_t isn't big enough.
*/
unsigned long tlen;
- unsigned long ofslen;
NODE *tmp;
- NODE *ofs;
char *ops;
char *cops;
long i;
@@ -158,14 +155,12 @@ rebuild_record()
assert(NF != -1);
tlen = 0;
- ofs = force_string(OFS_node->var_value);
- ofslen = ofs->stlen;
for (i = NF; i > 0; i--) {
tmp = fields_arr[i];
tmp = force_string(tmp);
tlen += tmp->stlen;
}
- tlen += (NF - 1) * ofslen;
+ tlen += (NF - 1) * OFSlen;
if ((long) tlen < 0)
tlen = 0;
emalloc(ops, char *, tlen + 2, "rebuild_record");
@@ -183,11 +178,11 @@ rebuild_record()
}
/* copy OFS */
if (i != NF) {
- if (ofslen == 1)
- *cops++ = ofs->stptr[0];
- else if (ofslen != 0) {
- memcpy(cops, ofs->stptr, ofslen);
- cops += ofslen;
+ if (OFSlen == 1)
+ *cops++ = *OFS;
+ else if (OFSlen != 0) {
+ memcpy(cops, OFS, OFSlen);
+ cops += OFSlen;
}
}
}
@@ -231,7 +226,7 @@ rebuild_record()
fields_arr[i] = n;
assert((n->flags & WSTRCUR) == 0);
}
- cops += fields_arr[i]->stlen + ofslen;
+ cops += fields_arr[i]->stlen + OFSlen;
}
unref(fields_arr[0]);
diff --git a/helpers/ChangeLog b/helpers/ChangeLog
index c9121403..a5bbafb1 100644
--- a/helpers/ChangeLog
+++ b/helpers/ChangeLog
@@ -1,3 +1,7 @@
+2014-09-04 Arnold D. Robbins <arnold@skeeve.com>
+
+ * chlistref.awk: New file. Finds @ref{} to non-chapters.
+
2014-06-08 Arnold D. Robbins <arnold@skeeve.com>
* testdfa.c: Minor improvements.
diff --git a/helpers/chlistref.awk b/helpers/chlistref.awk
new file mode 100644
index 00000000..49f63f59
--- /dev/null
+++ b/helpers/chlistref.awk
@@ -0,0 +1,31 @@
+BEGIN {
+ chapters["Getting Started"]++
+ chapters["Invoking Gawk"]++
+ chapters["Regexp"]++
+ chapters["Reading Files"]++
+ chapters["Printing"]++
+ chapters["Expressions"]++
+ chapters["Patterns and Actions"]++
+ chapters["Arrays"]++
+ chapters["Functions"]++
+ chapters["Library Functions"]++
+ chapters["Sample Programs"]++
+ chapters["Advanced Features"]++
+ chapters["Internationalization"]++
+ chapters["Debugger"]++
+ chapters["Arbitrary Precision Arithmetic"]++
+ chapters["Dynamic Extensions"]++
+ chapters["Language History"]++
+ chapters["Installation"]++
+ chapters["Notes"]++
+ chapters["Basic Concepts"]++
+
+ Pattern = ".*@ref\\{([^}]+)\\},.*"
+}
+
+$0 ~ Pattern {
+ ref = gensub(Pattern, "\\1", 1, $0)
+ if (! (ref in chapters))
+ printf("%s:%d: %s\n", FILENAME, FNR, $0)
+}
+
diff --git a/interpret.h b/interpret.h
index c26a9d46..28804330 100644
--- a/interpret.h
+++ b/interpret.h
@@ -1038,10 +1038,44 @@ match_re:
f = lookup(t1->stptr);
}
- if (f == NULL || f->type != Node_func) {
- if (f->type == Node_ext_func || f->type == Node_old_ext_func)
- fatal(_("cannot (yet) call extension functions indirectly"));
- else
+ if (f == NULL) {
+ fatal(_("`%s' is not a function, so it cannot be called indirectly"),
+ t1->stptr);
+ } else if (f->type == Node_builtin_func) {
+ int arg_count = (pc + 1)->expr_count;
+ builtin_func_t the_func = lookup_builtin(t1->stptr);
+
+ assert(the_func != NULL);
+
+ /* call it */
+ r = the_func(arg_count);
+ PUSH(r);
+ break;
+ } else if (f->type != Node_func) {
+ if ( f->type == Node_ext_func
+ || f->type == Node_old_ext_func) {
+ /* code copied from below, keep in sync */
+ INSTRUCTION *bc;
+ char *fname = pc->func_name;
+ int arg_count = (pc + 1)->expr_count;
+ static INSTRUCTION npc[2];
+
+ npc[0] = *pc;
+
+ bc = f->code_ptr;
+ assert(bc->opcode == Op_symbol);
+ if (f->type == Node_ext_func)
+ npc[0].opcode = Op_ext_builtin; /* self modifying code */
+ else
+ npc[0].opcode = Op_old_ext_builtin; /* self modifying code */
+ npc[0].extfunc = bc->extfunc;
+ npc[0].expr_count = arg_count; /* actual argument count */
+ npc[1] = pc[1];
+ npc[1].func_name = fname; /* name of the builtin */
+ npc[1].expr_count = bc->expr_count; /* defined max # of arguments */
+ ni = npc;
+ JUMPTO(ni);
+ } else
fatal(_("function called indirectly through `%s' does not exist"),
pc->func_name);
}
@@ -1065,6 +1099,7 @@ match_re:
}
if (f->type == Node_ext_func || f->type == Node_old_ext_func) {
+ /* keep in sync with indirect call code */
INSTRUCTION *bc;
char *fname = pc->func_name;
int arg_count = (pc + 1)->expr_count;
diff --git a/main.c b/main.c
index 7f1595b8..3da0703e 100644
--- a/main.c
+++ b/main.c
@@ -33,6 +33,16 @@
#include <mcheck.h>
#endif
+#ifdef HAVE_LIBSIGSEGV
+#include <sigsegv.h>
+#else
+typedef void *stackoverflow_context_t;
+/* the argument to this macro is purposely not used */
+#define sigsegv_install_handler(catchsegv) signal(SIGSEGV, catchsig)
+/* define as 0 rather than empty so that (void) cast on it works */
+#define stackoverflow_install_handler(catchstackoverflow, extra_stack, STACK_SIZE) 0
+#endif
+
#define DEFAULT_PROFILE "awkprof.out" /* where to put profile */
#define DEFAULT_VARFILE "awkvars.out" /* where to put vars */
#define DEFAULT_PREC 53
@@ -262,17 +272,6 @@ main(int argc, char **argv)
*/
gawk_mb_cur_max = MB_CUR_MAX;
/* Without MBS_SUPPORT, gawk_mb_cur_max is 1. */
-#ifdef LIBC_IS_BORKED
-{
- const char *env_lc;
-
- env_lc = getenv("LC_ALL");
- if (env_lc == NULL)
- env_lc = getenv("LANG");
- if (env_lc != NULL && env_lc[1] == '\0' && tolower(env_lc[0]) == 'c')
- gawk_mb_cur_max = 1;
-}
-#endif
/* init the cache for checking bytes if they're characters */
init_btowc_cache();
@@ -705,6 +704,8 @@ out:
if (do_intl)
exit(EXIT_SUCCESS);
+ install_builtins();
+
if (do_lint)
shadow_funcs();
@@ -825,7 +826,7 @@ usage(int exitval, FILE *fp)
fputs(_("\t-h\t\t\t--help\n"), fp);
fputs(_("\t-i includefile\t\t--include=includefile\n"), fp);
fputs(_("\t-l library\t\t--load=library\n"), fp);
- fputs(_("\t-L [fatal]\t\t--lint[=fatal]\n"), fp);
+ fputs(_("\t-L[fatal|invalid]\t--lint[=fatal|invalid]\n"), fp);
fputs(_("\t-M\t\t\t--bignum\n"), fp);
fputs(_("\t-N\t\t\t--use-lc-numeric\n"), fp);
fputs(_("\t-n\t\t\t--non-decimal-data\n"), fp);
diff --git a/node.c b/node.c
index 1c896342..213b5335 100644
--- a/node.c
+++ b/node.c
@@ -557,9 +557,8 @@ parse_escape(const char **string_ptr)
warning(_("no hex digits in `\\x' escape sequence"));
return ('x');
}
- i = j = 0;
start = *string_ptr;
- for (;; j++) {
+ for (i = j = 0; j < 2; j++) {
/* do outside test to avoid multiple side effects */
c = *(*string_ptr)++;
if (isxdigit(c)) {
diff --git a/po/CMakeLists.txt b/po/CMakeLists.txt
new file mode 100644
index 00000000..cd930077
--- /dev/null
+++ b/po/CMakeLists.txt
@@ -0,0 +1,133 @@
+# Most of this copied from the repository of Stellarium
+# http://sourceforge.net/projects/stellarium/
+
+# Special targets for translations:
+#
+# translations
+# Converts all PO files to GMO files. Note that it does *not* update
+# the PO files or the PO templates -- in fact, these files are never
+# updated automatically.
+#
+# generate-pot
+# Re-creates all POT files unconditionally.
+#
+# update-po
+# Updates all PO files unconditionally. Note that it takes care of
+# updating the POT files.
+#
+# translations-<DOMAIN>
+# generate-pot-<DOMAIN>
+# update-po-<DOMAIN>
+# Same as above, but only affect the files in the corresponding
+# po/<DOMAIN> directory. (DOMAIN is actually the base name of the POT
+# file in the subdirectory, but that should match the directory name
+# anyway.)
+
+ADD_CUSTOM_TARGET(translations)
+ADD_CUSTOM_TARGET(generate-pot)
+ADD_CUSTOM_TARGET(update-po)
+
+# GETTEXT_CREATE_TRANSLATIONS(domain [DEFAULT_TARGET] lang1 ... langN)
+#
+# Creates custom build rules to create and install (G)MO files for the
+# specified languages. If the DEFAULT_TARGET option is used, the
+# translations will also be created when building the default target.
+#
+# "domain" is the translation domain, eg. "gawk". A POT file
+# with the name ${domain}.pot must exist in the directory of the
+# CMakeLists.txt file invoking the macro.
+#
+# This macro also creates the "translations-${domain}" and
+# "update-po-${domain}" targets (see above for an explanation).
+#
+MACRO(GETTEXT_CREATE_TRANSLATIONS _domain _firstLang)
+
+ SET(_gmoFiles)
+ GET_FILENAME_COMPONENT(_absPotFile ${_domain}.pot ABSOLUTE)
+
+ # Update these PO files when building the "update-po-<DOMAIN>" and
+ # "update-po" targets.
+ ADD_CUSTOM_TARGET(update-po-${_domain})
+ ADD_DEPENDENCIES(update-po update-po-${_domain})
+
+ # Make sure the POT file is updated before updating the PO files.
+ ADD_DEPENDENCIES(update-po-${_domain} generate-pot-${_domain})
+
+ SET(_addToAll)
+ IF(${_firstLang} STREQUAL "DEFAULT_TARGET")
+ SET(_addToAll "ALL")
+ SET(_firstLang)
+ ENDIF(${_firstLang} STREQUAL "DEFAULT_TARGET")
+
+ FOREACH (_lang ${ARGN})
+ GET_FILENAME_COMPONENT(_absFile ${_lang}.po ABSOLUTE)
+ FILE(RELATIVE_PATH _relFile ${PROJECT_SOURCE_DIR} ${_absFile})
+ SET(_gmoFile ${CMAKE_CURRENT_BINARY_DIR}/${_lang}.gmo)
+
+ # Convert a PO file into a GMO file.
+ ADD_CUSTOM_COMMAND(
+ OUTPUT ${_gmoFile}
+ COMMAND ${GETTEXT_MSGFMT_EXECUTABLE} -o ${_gmoFile} ${_absFile}
+ DEPENDS ${_absFile}
+ )
+
+ # Update the PO file unconditionally when building the
+ # "update-po-<DOMAIN>" target. Note that to see the file being
+ # processed, we have to run "cmake -E echo", because the
+ # COMMENT is not displayed by cmake...
+ ADD_CUSTOM_COMMAND(
+ TARGET update-po-${_domain}
+ POST_BUILD
+ COMMAND ${CMAKE_COMMAND} -E echo "** Updating ${_relFile}"
+ COMMAND ${GETTEXT_MSGMERGE_EXECUTABLE}
+ --quiet --update -m --backup=none -s
+ ${_absFile} ${_absPotFile}
+ VERBATIM
+ )
+
+ INSTALL(FILES ${_gmoFile} DESTINATION share/locale/${_lang}/LC_MESSAGES RENAME ${_domain}.mo)
+ SET(_gmoFiles ${_gmoFiles} ${_gmoFile})
+
+ ENDFOREACH (_lang)
+
+ # Create the GMO files when building the "translations-<DOMAIN>" and
+ # "translations" targets.
+ ADD_CUSTOM_TARGET(translations-${_domain} ${_addToAll} DEPENDS ${_gmoFiles})
+ ADD_DEPENDENCIES(translations translations-${_domain})
+
+ENDMACRO(GETTEXT_CREATE_TRANSLATIONS )
+
+SET(gawk_DOMAIN gawk)
+SET(gawk_POT ${gawk_DOMAIN}.pot)
+
+file(READ LINGUAS linguas)
+string(REGEX REPLACE "\n" ";" linguas ${linguas})
+GETTEXT_CREATE_TRANSLATIONS(${gawk_DOMAIN} DEFAULT_TARGET ${linguas})
+
+ADD_CUSTOM_TARGET(
+ generate-pot-${gawk_DOMAIN}
+ ${GETTEXT_XGETTEXT_EXECUTABLE}
+ -o ${CMAKE_CURRENT_SOURCE_DIR}/${gawk_POT}
+ -C
+ --keyword=_
+ --keyword=N_
+ --keyword=q_
+ --keyword=translate:2
+ --add-comments=TRANSLATORS:
+ --directory=${CMAKE_BINARY_DIR}
+ --directory=${CMAKE_SOURCE_DIR}
+ --output-dir=${CMAKE_BINARY_DIR}
+ --files-from=${CMAKE_CURRENT_SOURCE_DIR}/POTFILES.in
+ --copyright-holder=FSF
+ WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
+ COMMENT "Generating ${gawk_POT}"
+ VERBATIM
+)
+# TODO: It would be nice to just depend on the exact files in POTFILES.in
+#file(READ ${CMAKE_CURRENT_SOURCE_DIR}/${gawk_POT} UiHeaders)
+#ADD_DEPENDENCIES(generate-pot-${gawk_DOMAIN} UiHeaders)
+#ADD_DEPENDENCIES(generate-pot-${gawk_DOMAIN} gawk_UIS_H)
+# Make sure the UI headers are created first.
+ADD_DEPENDENCIES(generate-pot-${gawk_DOMAIN} StelGuiLib) # ??? FIXME
+# Generate this POT file when building the "generate-pot" target.
+ADD_DEPENDENCIES(generate-pot generate-pot-${gawk_DOMAIN})
diff --git a/profile.c b/profile.c
index eae24b1c..d07bea4a 100644
--- a/profile.c
+++ b/profile.c
@@ -731,20 +731,28 @@ cleanup:
ip = pc + 1;
indent(ip->forloop_body->exec_count);
fprintf(prof_fp, "%s (", op2str(pc->opcode));
- pprint(pc->nexti, ip->forloop_cond, true);
- fprintf(prof_fp, "; ");
- if (ip->forloop_cond->opcode == Op_no_op &&
- ip->forloop_cond->nexti == ip->forloop_body)
+ /* If empty for looop header, print it a little more nicely. */
+ if ( pc->nexti->opcode == Op_no_op
+ && ip->forloop_cond == pc->nexti
+ && pc->target_continue->opcode == Op_jmp) {
+ fprintf(prof_fp, ";;");
+ } else {
+ pprint(pc->nexti, ip->forloop_cond, true);
fprintf(prof_fp, "; ");
- else {
- pprint(ip->forloop_cond, ip->forloop_body, true);
- t1 = pp_pop();
- fprintf(prof_fp, "%s; ", t1->pp_str);
- pp_free(t1);
- }
- pprint(pc->target_continue, pc->target_break, true);
+ if (ip->forloop_cond->opcode == Op_no_op &&
+ ip->forloop_cond->nexti == ip->forloop_body)
+ fprintf(prof_fp, "; ");
+ else {
+ pprint(ip->forloop_cond, ip->forloop_body, true);
+ t1 = pp_pop();
+ fprintf(prof_fp, "%s; ", t1->pp_str);
+ pp_free(t1);
+ }
+
+ pprint(pc->target_continue, pc->target_break, true);
+ }
fprintf(prof_fp, ") {\n");
indent_in();
pprint(ip->forloop_body->nexti, pc->target_continue, false);
diff --git a/regcomp.c b/regcomp.c
index a62364c9..1f6d978a 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -856,10 +856,6 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
#ifndef _LIBC
char *codeset_name;
#endif
-#if defined(GAWK) && defined(LIBC_IS_BORKED)
- /* Needed for brain damaged systems */
- extern int gawk_mb_cur_max;
-#endif
memset (dfa, '\0', sizeof (re_dfa_t));
@@ -881,11 +877,7 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
dfa->state_hash_mask = table_size - 1;
-#if defined(GAWK) && defined(LIBC_IS_BORKED)
- dfa->mb_cur_max = gawk_mb_cur_max;
-#else
dfa->mb_cur_max = MB_CUR_MAX;
-#endif
#ifdef _LIBC
if (dfa->mb_cur_max == 6
&& strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)
@@ -907,24 +899,9 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
codeset_name = strchr (codeset_name, '.') + 1;
# endif
- /* strcasecmp isn't a standard interface. brute force check */
-#ifndef GAWK
if (strcasecmp (codeset_name, "UTF-8") == 0
|| strcasecmp (codeset_name, "UTF8") == 0)
dfa->is_utf8 = 1;
-#else
- if ( (codeset_name[0] == 'U' || codeset_name[0] == 'u')
- && (codeset_name[1] == 'T' || codeset_name[1] == 't')
- && (codeset_name[2] == 'F' || codeset_name[2] == 'f')
- && (codeset_name[3] == '-'
- ? codeset_name[4] == '8' && codeset_name[5] == '\0'
- : codeset_name[3] == '8' && codeset_name[4] == '\0'))
- dfa->is_utf8 = 1;
-#if defined(GAWK) && defined(LIBC_IS_BORKED)
- if (gawk_mb_cur_max == 1)
- dfa->is_utf8 = 0;
-#endif /* defined(GAWK) && defined(LIBC_IS_BORKED) */
-#endif
/* We check exhaustively in the loop below if this charset is a
superset of ASCII. */
@@ -2215,7 +2192,11 @@ parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
{
branch = parse_branch (regexp, preg, token, syntax, nest, err);
if (BE (*err != REG_NOERROR && branch == NULL, 0))
- return NULL;
+ {
+ if (tree != NULL)
+ postorder (tree, free_tree, NULL);
+ return NULL;
+ }
}
else
branch = NULL;
@@ -2476,8 +2457,7 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS
|| token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM)
{
- bin_tree_t *dup_tree = parse_dup_op (tree, regexp, dfa, token,
- syntax, err);
+ bin_tree_t *dup_tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
if (BE (*err != REG_NOERROR && dup_tree == NULL, 0))
{
if (tree != NULL)
@@ -2640,6 +2620,8 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
/* Duplicate ELEM before it is marked optional. */
elem = duplicate_tree (elem, dfa);
+ if (BE (elem == NULL, 0))
+ goto parse_dup_op_espace;
old_tree = tree;
}
else
@@ -3136,8 +3118,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
if (BE (sbcset == NULL, 0))
#endif /* RE_ENABLE_I18N */
{
-#ifdef RE_ENABLE_I18N
re_free (sbcset);
+#ifdef RE_ENABLE_I18N
re_free (mbcset);
#endif
*err = REG_ESPACE;
diff --git a/regex.h b/regex.h
index 56602961..3d26a606 100644
--- a/regex.h
+++ b/regex.h
@@ -470,7 +470,7 @@ typedef struct
#ifdef __USE_GNU
/* Sets the current default syntax to SYNTAX, and return the old syntax.
You can also simply assign to the `re_syntax_options' variable. */
-extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax);
+extern reg_syntax_t re_set_syntax (reg_syntax_t syntax);
/* Compile the regular expression PATTERN, with length LENGTH
and syntax given by the global `re_syntax_options', into the buffer
@@ -480,14 +480,14 @@ extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax);
Note that the translate table must either have been initialised by
`regcomp', with a malloc'ed value, or set to NULL before calling
`regfree'. */
-extern const char *re_compile_pattern (const char *__pattern, size_t __length,
- struct re_pattern_buffer *__buffer);
+extern const char *re_compile_pattern (const char *pattern, size_t length,
+ struct re_pattern_buffer *buffer);
/* Compile a fastmap for the compiled pattern in BUFFER; used to
accelerate searches. Return 0 if successful and -2 if was an
internal error. */
-extern int re_compile_fastmap (struct re_pattern_buffer *__buffer);
+extern int re_compile_fastmap (struct re_pattern_buffer *buffer);
/* Search in the string STRING (with length LENGTH) for the pattern
@@ -495,30 +495,30 @@ extern int re_compile_fastmap (struct re_pattern_buffer *__buffer);
characters. Return the starting position of the match, -1 for no
match, or -2 for an internal error. Also return register
information in REGS (if REGS and BUFFER->no_sub are nonzero). */
-extern int re_search (struct re_pattern_buffer *__buffer, const char *__cstring,
- int __length, int __start, int __range,
- struct re_registers *__regs);
+extern int re_search (struct re_pattern_buffer *buffer, const char *c_string,
+ int length, int start, int range,
+ struct re_registers *regs);
/* Like `re_search', but search in the concatenation of STRING1 and
STRING2. Also, stop searching at index START + STOP. */
-extern int re_search_2 (struct re_pattern_buffer *__buffer,
- const char *__string1, int __length1,
- const char *__string2, int __length2, int __start,
- int __range, struct re_registers *__regs, int __stop);
+extern int re_search_2 (struct re_pattern_buffer *buffer,
+ const char *string1, int length1,
+ const char *string2, int length2, int start,
+ int range, struct re_registers *regs, int stop);
/* Like `re_search', but return how many characters in STRING the regexp
in BUFFER matched, starting at position START. */
-extern int re_match (struct re_pattern_buffer *__buffer, const char *__cstring,
- int __length, int __start, struct re_registers *__regs);
+extern int re_match (struct re_pattern_buffer *buffer, const char *c_string,
+ int length, int start, struct re_registers *regs);
/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
-extern int re_match_2 (struct re_pattern_buffer *__buffer,
- const char *__string1, int __length1,
- const char *__string2, int __length2, int __start,
- struct re_registers *__regs, int __stop);
+extern int re_match_2 (struct re_pattern_buffer *buffer,
+ const char *string1, int length1,
+ const char *string2, int length2, int start,
+ struct re_registers *regs, int stop);
/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
@@ -533,10 +533,10 @@ extern int re_match_2 (struct re_pattern_buffer *__buffer,
Unless this function is called, the first search or match using
PATTERN_BUFFER will allocate its own register data, without
freeing the old data. */
-extern void re_set_registers (struct re_pattern_buffer *__buffer,
- struct re_registers *__regs,
- unsigned int __num_regs,
- regoff_t *__starts, regoff_t *__ends);
+extern void re_set_registers (struct re_pattern_buffer *buffer,
+ struct re_registers *regs,
+ unsigned int num_regs,
+ regoff_t *starts, regoff_t *ends);
#endif /* Use GNU */
#if defined _REGEX_RE_COMP || (defined _LIBC && defined __USE_MISC)
@@ -569,19 +569,19 @@ extern int re_exec (const char *);
#endif
/* POSIX compatibility. */
-extern int regcomp (regex_t *__restrict __preg,
- const char *__restrict __pattern,
- int __cflags);
+extern int regcomp (regex_t *__restrict preg,
+ const char *__restrict pattern,
+ int cflags);
-extern int regexec (const regex_t *__restrict __preg,
- const char *__restrict __cstring, size_t __nmatch,
- regmatch_t __pmatch[__restrict_arr],
- int __eflags);
+extern int regexec (const regex_t *__restrict preg,
+ const char *__restrict c_string, size_t nmatch,
+ regmatch_t pmatch[__restrict_arr],
+ int eflags);
-extern size_t regerror (int __errcode, const regex_t *__restrict __preg,
- char *__restrict __errbuf, size_t __errbuf_size);
+extern size_t regerror (int errcode, const regex_t *__restrict preg,
+ char *__restrict errbuf, size_t errbuf_size);
-extern void regfree (regex_t *__preg);
+extern void regfree (regex_t *preg);
#ifdef __cplusplus
diff --git a/regex_internal.c b/regex_internal.c
index 056cff3d..9e427081 100644
--- a/regex_internal.c
+++ b/regex_internal.c
@@ -545,7 +545,10 @@ build_upper_buffer (re_string_t *pstr)
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
if (BE (pstr->trans != NULL, 0))
ch = pstr->trans[ch];
- pstr->mbs[char_idx] = toupper (ch);
+ if (islower (ch))
+ pstr->mbs[char_idx] = toupper (ch);
+ else
+ pstr->mbs[char_idx] = ch;
}
pstr->valid_len = char_idx;
pstr->valid_raw_len = char_idx;
@@ -683,7 +686,7 @@ re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
pstr->valid_len - offset);
pstr->valid_len -= offset;
pstr->valid_raw_len -= offset;
-#if DEBUG
+#if defined DEBUG && DEBUG
assert (pstr->valid_len > 0);
#endif
}
@@ -940,7 +943,7 @@ re_string_context_at (const re_string_t *input, int idx, int eflags)
int wc_idx = idx;
while(input->wcs[wc_idx] == WEOF)
{
-#ifdef DEBUG
+#if defined DEBUG && DEBUG
/* It must not happen. */
assert (wc_idx >= 0);
#endif
diff --git a/replace.c b/replace.c
index 559de014..71a8dc51 100644
--- a/replace.c
+++ b/replace.c
@@ -50,7 +50,7 @@
#include "missing_d/memmove.c"
#endif /* HAVE_MEMMOVE */
-#ifndef HAVE_STRNCASECMP
+#if !defined(HAVE_STRNCASECMP) || !defined(HAVE_STRCASECMP)
#include "missing_d/strncasecmp.c"
#endif /* HAVE_STRCASE */
diff --git a/symbol.c b/symbol.c
index 7ecdfe85..e89214c0 100644
--- a/symbol.c
+++ b/symbol.c
@@ -35,8 +35,8 @@ static int var_count; /* total number of global variables and functions */
static NODE *symbol_list;
static void (*install_func)(NODE *) = NULL;
-static NODE *make_symbol(char *name, NODETYPE type);
-static NODE *install(char *name, NODE *parm, NODETYPE type);
+static NODE *make_symbol(const char *name, NODETYPE type);
+static NODE *install(const char *name, NODE *parm, NODETYPE type);
static void free_bcpool(INSTRUCTION *pl);
static AWK_CONTEXT *curr_ctxt = NULL;
@@ -75,7 +75,7 @@ init_symbol_table()
*/
NODE *
-install_symbol(char *name, NODETYPE type)
+install_symbol(const char *name, NODETYPE type)
{
return install(name, NULL, type);
}
@@ -112,14 +112,12 @@ lookup(const char *name)
continue;
n = in_array(tables[i], tmp);
- if (n != NULL) {
- unref(tmp);
- return n;
- }
+ if (n != NULL)
+ break;
}
unref(tmp);
- return n; /* NULL */
+ return n; /* NULL or new place */
}
/* make_params --- allocate function parameters for the symbol table */
@@ -155,11 +153,13 @@ install_params(NODE *func)
if (func == NULL)
return;
+
assert(func->type == Node_func);
- if ((pcount = func->param_cnt) <= 0
- || (parms = func->fparms) == NULL
- )
+
+ if ( (pcount = func->param_cnt) <= 0
+ || (parms = func->fparms) == NULL)
return;
+
for (i = 0; i < pcount; i++)
(void) install(parms[i].param, parms + i, Node_param_list);
}
@@ -177,10 +177,11 @@ remove_params(NODE *func)
if (func == NULL)
return;
+
assert(func->type == Node_func);
- if ((pcount = func->param_cnt) <= 0
- || (parms = func->fparms) == NULL
- )
+
+ if ( (pcount = func->param_cnt) <= 0
+ || (parms = func->fparms) == NULL)
return;
for (i = pcount - 1; i >= 0; i--) {
@@ -191,11 +192,11 @@ remove_params(NODE *func)
assert(p->type == Node_param_list);
tmp = make_string(p->vname, strlen(p->vname));
tmp2 = in_array(param_table, tmp);
- if (tmp2 != NULL && tmp2->dup_ent != NULL) {
+ if (tmp2 != NULL && tmp2->dup_ent != NULL)
tmp2->dup_ent = tmp2->dup_ent->dup_ent;
- } else {
+ else
(void) assoc_remove(param_table, tmp);
- }
+
unref(tmp);
}
@@ -274,7 +275,7 @@ destroy_symbol(NODE *r)
/* make_symbol --- allocates a global symbol for the symbol table. */
static NODE *
-make_symbol(char *name, NODETYPE type)
+make_symbol(const char *name, NODETYPE type)
{
NODE *r;
@@ -284,7 +285,7 @@ make_symbol(char *name, NODETYPE type)
null_array(r);
else if (type == Node_var)
r->var_value = dupnode(Nnull_string);
- r->vname = name;
+ r->vname = (char *) name;
r->type = type;
return r;
@@ -293,7 +294,7 @@ make_symbol(char *name, NODETYPE type)
/* install --- install a global name or function parameter in the symbol table */
static NODE *
-install(char *name, NODE *parm, NODETYPE type)
+install(const char *name, NODE *parm, NODETYPE type)
{
NODE *r;
NODE **aptr;
@@ -306,20 +307,22 @@ install(char *name, NODE *parm, NODETYPE type)
if (type == Node_param_list) {
table = param_table;
- } else if (type == Node_func || type == Node_ext_func) {
+ } else if ( type == Node_func
+ || type == Node_ext_func
+ || type == Node_builtin_func) {
table = func_table;
} else if (installing_specials) {
table = global_table;
}
- if (parm != NULL) {
+ if (parm != NULL)
r = parm;
- } else {
+ else {
/* global symbol */
r = make_symbol(name, type);
if (type == Node_func)
func_count++;
- if (type != Node_ext_func && table != global_table)
+ if (type != Node_ext_func && type != Node_builtin_func && table != global_table)
var_count++; /* total, includes Node_func */
}
@@ -344,7 +347,6 @@ simple:
return r;
}
-
/* comp_symbol --- compare two (variable or function) names */
static int
@@ -393,7 +395,7 @@ get_symbols(SYMBOL_TYPE what, bool sort)
for (i = count = 0; i < max; i += 2) {
r = list[i+1];
- if (r->type == Node_ext_func)
+ if (r->type == Node_ext_func || r->type == Node_builtin_func)
continue;
assert(r->type == Node_func);
table[count++] = r;
@@ -517,7 +519,8 @@ release_symbols(NODE *symlist, int keep_globals)
for (p = symlist->rnode; p != NULL; p = next) {
if (! keep_globals) {
- /* destroys globals, function, and params
+ /*
+ * destroys globals, function, and params
* if still in symbol table
*/
destroy_symbol(p->lnode);
@@ -538,7 +541,7 @@ load_symbols()
NODE *sym_array;
NODE **aptr;
long i, j, max;
- NODE *user, *extension, *untyped, *scalar, *array;
+ NODE *user, *extension, *untyped, *scalar, *array, *built_in;
NODE **list;
NODE *tables[4];
@@ -569,6 +572,7 @@ load_symbols()
scalar = make_string("scalar", 6);
untyped = make_string("untyped", 7);
array = make_string("array", 5);
+ built_in = make_string("builtin", 7);
for (i = 0; tables[i] != NULL; i++) {
list = assoc_list(tables[i], "@unsorted", ASORTI);
@@ -579,6 +583,7 @@ load_symbols()
r = list[j+1];
if ( r->type == Node_ext_func
|| r->type == Node_func
+ || r->type == Node_builtin_func
|| r->type == Node_var
|| r->type == Node_var_array
|| r->type == Node_var_new) {
@@ -593,6 +598,9 @@ load_symbols()
case Node_func:
*aptr = dupnode(user);
break;
+ case Node_builtin_func:
+ *aptr = dupnode(built_in);
+ break;
case Node_var:
*aptr = dupnode(scalar);
break;
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
new file mode 100644
index 00000000..fee5eeca
--- /dev/null
+++ b/test/CMakeLists.txt
@@ -0,0 +1,90 @@
+#
+# test/CMakeLists.txt --- CMake input file for gawk
+#
+# Copyright (C) 2013
+# the Free Software Foundation, Inc.
+#
+# This file is part of GAWK, the GNU implementation of the
+# AWK Programming Language.
+#
+# GAWK is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# GAWK is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+#
+
+## process this file with CMake to produce Makefile
+
+if(WIN32)
+ set(SHELL_PREFIX "C:\\MinGW\\msys\\1.0\\bin\\sh")
+endif()
+
+# Find the names of the groups of tests in Makefile.am.
+file(READ ${CMAKE_CURRENT_SOURCE_DIR}/Makefile.am ALL_GROUPS)
+string(REGEX MATCHALL "[A-Z_]*_TESTS " ALL_GROUPS "${ALL_GROUPS}")
+string(REGEX REPLACE "_TESTS " ";" ALL_GROUPS "${ALL_GROUPS}")
+# For each group of test cases, search through Makefile.am and find the test cases.
+foreach(testgroup ${ALL_GROUPS} )
+ file(READ ${CMAKE_CURRENT_SOURCE_DIR}/Makefile.am ONE_GROUP)
+ string(REGEX MATCH "${testgroup}_TESTS = [a-z0-9_ \\\n\t]*" ONE_GROUP "${ONE_GROUP}")
+ string(REGEX REPLACE "${testgroup}_TESTS = " "" ONE_GROUP "${ONE_GROUP}")
+ string(REGEX REPLACE "[\\\n\t]" "" ONE_GROUP "${ONE_GROUP}")
+ string(REGEX REPLACE " " ";" ONE_GROUP "${ONE_GROUP}")
+ # Use each name of a test case to start a script that executes the test case.
+ foreach(testcase ${ONE_GROUP} )
+ add_test("${testgroup}.${testcase}" ${SHELL_PREFIX} ${CMAKE_SOURCE_DIR}/cmake/basictest ${CMAKE_BINARY_DIR}/gawk${CMAKE_EXECUTABLE_SUFFIX} ${testcase})
+ endforeach(testcase)
+endforeach(testgroup)
+
+# Create an empty configuration file for customizing test execution.
+set(CTestCustom ${CMAKE_BINARY_DIR}/CTestCustom.cmake)
+file(WRITE ${CTestCustom} "# DO NOT EDIT, THIS FILE WILL BE OVERWRITTEN\n" )
+# Test case SHLIB.filefuncs needs a file named gawkapi.o in source directory.
+file(APPEND ${CTestCustom} "file(COPY ${CMAKE_SOURCE_DIR}/README DESTINATION ${CMAKE_SOURCE_DIR}/gawkapi.o)\n")
+# Exclude test cases from execution that make no sense on a certain platform.
+file(APPEND ${CTestCustom} "set(CTEST_CUSTOM_TESTS_IGNORE\n")
+if(WIN32)
+ file(APPEND ${CTestCustom} " BASIC.exitval2\n")
+ file(APPEND ${CTestCustom} " BASIC.hsprint\n")
+ file(APPEND ${CTestCustom} " BASIC.rstest4\n")
+ file(APPEND ${CTestCustom} " BASIC.rstest5\n")
+ file(APPEND ${CTestCustom} " UNIX.getlnhd\n")
+ file(APPEND ${CTestCustom} " UNIX.pid\n")
+ file(APPEND ${CTestCustom} " GAWK_EXT.beginfile1\n")
+ file(APPEND ${CTestCustom} " GAWK_EXT.beginfile2\n")
+ file(APPEND ${CTestCustom} " GAWK_EXT.clos1way\n")
+ file(APPEND ${CTestCustom} " GAWK_EXT.devfd\n")
+ file(APPEND ${CTestCustom} " GAWK_EXT.devfd1\n")
+ file(APPEND ${CTestCustom} " GAWK_EXT.devfd2\n")
+ file(APPEND ${CTestCustom} " GAWK_EXT.getlndir\n")
+ file(APPEND ${CTestCustom} " GAWK_EXT.posix\n")
+ file(APPEND ${CTestCustom} " GAWK_EXT.pty1\n")
+ file(APPEND ${CTestCustom} " INET.inetdayu\n")
+ file(APPEND ${CTestCustom} " INET.inetdayt\n")
+ file(APPEND ${CTestCustom} " INET.inetechu\n")
+ file(APPEND ${CTestCustom} " INET.inetecht\n")
+ file(APPEND ${CTestCustom} " MACHINE.double2\n")
+ file(APPEND ${CTestCustom} " LOCALE_CHARSET.fmttest\n")
+ file(APPEND ${CTestCustom} " LOCALE_CHARSET.lc_num1\n")
+ file(APPEND ${CTestCustom} " LOCALE_CHARSET.mbfw1\n")
+ file(APPEND ${CTestCustom} " SHLIB.filefuncs\n")
+ file(APPEND ${CTestCustom} " SHLIB.fnmatch\n")
+ file(APPEND ${CTestCustom} " SHLIB.fork\n")
+ file(APPEND ${CTestCustom} " SHLIB.fork2\n")
+ file(APPEND ${CTestCustom} " SHLIB.fts\n")
+ file(APPEND ${CTestCustom} " SHLIB.functab4\n")
+ file(APPEND ${CTestCustom} " SHLIB.readdir\n")
+ file(APPEND ${CTestCustom} " SHLIB.revtwoway\n")
+ file(APPEND ${CTestCustom} " SHLIB.rwarray\n")
+endif()
+file(APPEND ${CTestCustom} ")\n")
+
diff --git a/test/ChangeLog b/test/ChangeLog
index db9b1e35..68cc18e6 100644
--- a/test/ChangeLog
+++ b/test/ChangeLog
@@ -1,3 +1,28 @@
+2014-09-05 Arnold D. Robbins <arnold@skeeve.com>
+
+ * functab4.awk: Changed to use stat instead of chdir since
+ /tmp isn't /tmp on all systems (e.g. Mac OS X). Thanks to
+ Hermann Peifer for the report.
+
+ Sort of related:
+
+ * indirectcall2.awk, indirectcall2.ok: New files.
+ * id.ok: Updated.
+
+2014-09-04 Arnold D. Robbins <arnold@skeeve.com>
+
+ * profile2.ok: Update after code improvement in profiler.
+ * functab4.ok: Update after making indirect calls of
+ extension functions work. :-)
+
+2014-08-15 Arnold D. Robbins <arnold@skeeve.com>
+
+ * badargs.ok: Adjust after revising text for -L option.
+
+2014-08-12 Arnold D. Robbins <arnold@skeeve.com>
+
+ * ofs1.ok: Updated to match corrected behavior in gawk.
+
2014-08-05 Arnold D. Robbins <arnold@skeeve.com>
* Makefile.am (mpfrsqrt): New test.
diff --git a/test/Makefile.am b/test/Makefile.am
index f28b381e..3be8b7cb 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -421,6 +421,8 @@ EXTRA_DIST = \
indirectcall.awk \
indirectcall.in \
indirectcall.ok \
+ indirectcall2.awk \
+ indirectcall2.ok \
inftest.awk \
inftest.ok \
inplace.in \
@@ -1010,7 +1012,7 @@ GAWK_EXT_TESTS = \
gensub gensub2 getlndir gnuops2 gnuops3 gnureops \
icasefs icasers id igncdym igncfs ignrcas2 ignrcase \
incdupe incdupe2 incdupe3 incdupe4 incdupe5 incdupe6 incdupe7 \
- include include2 indirectcall \
+ include include2 indirectcall indirectcall2 \
lint lintold lintwarn \
manyfiles match1 match2 match3 mbstr1 \
nastyparm next nondec nondec2 \
diff --git a/test/Makefile.in b/test/Makefile.in
index f3b537f3..b9753369 100644
--- a/test/Makefile.in
+++ b/test/Makefile.in
@@ -667,6 +667,8 @@ EXTRA_DIST = \
indirectcall.awk \
indirectcall.in \
indirectcall.ok \
+ indirectcall2.awk \
+ indirectcall2.ok \
inftest.awk \
inftest.ok \
inplace.in \
@@ -1255,7 +1257,7 @@ GAWK_EXT_TESTS = \
gensub gensub2 getlndir gnuops2 gnuops3 gnureops \
icasefs icasers id igncdym igncfs ignrcas2 ignrcase \
incdupe incdupe2 incdupe3 incdupe4 incdupe5 incdupe6 incdupe7 \
- include include2 indirectcall \
+ include include2 indirectcall indirectcall2 \
lint lintold lintwarn \
manyfiles match1 match2 match3 mbstr1 \
nastyparm next nondec nondec2 \
@@ -3476,6 +3478,11 @@ indirectcall:
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+indirectcall2:
+ @echo $@
+ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
lint:
@echo $@
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
diff --git a/test/Maketests b/test/Maketests
index 0841ae77..5c34af9e 100644
--- a/test/Maketests
+++ b/test/Maketests
@@ -1082,6 +1082,11 @@ indirectcall:
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk < "$(srcdir)"/$@.in >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
@-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+indirectcall2:
+ @echo $@
+ @AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
lint:
@echo $@
@AWKPATH="$(srcdir)" $(AWK) -f $@.awk >_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
diff --git a/test/badargs.ok b/test/badargs.ok
index 8d34be1f..1664ec1c 100644
--- a/test/badargs.ok
+++ b/test/badargs.ok
@@ -17,7 +17,7 @@ Short options: GNU long options: (extensions)
-h --help
-i includefile --include=includefile
-l library --load=library
- -L [fatal] --lint[=fatal]
+ -L[fatal|invalid] --lint[=fatal|invalid]
-M --bignum
-N --use-lc-numeric
-n --non-decimal-data
diff --git a/test/functab4.awk b/test/functab4.awk
index 0d9d4267..196fcc6d 100644
--- a/test/functab4.awk
+++ b/test/functab4.awk
@@ -6,9 +6,25 @@ function foo()
}
BEGIN {
- x = FUNCTAB["chdir"]
- print "x =", x
- @x("/tmp")
- printf "we are now in --> "
- system("/bin/pwd || /usr/bin/pwd")
+ f = FUNCTAB["foo"]
+ @f()
+
+ ret1 = stat(".", data1)
+ print "ret1 =", ret1
+
+ f = "stat"
+ ret2 = @f(".", data2)
+ print "ret2 =", ret2
+
+ problem = 0
+ for (i in data1) {
+ if (! isarray(data1[i])) {
+# print i, data1[i]
+ if (! (i in data2) || data1[i] != data2[i]) {
+ printf("mismatch element \"%s\"\n", i)
+ problems++
+ }
+ }
+ }
+ print(problems ? (problems+0) "encountered" : "no problems encountered")
}
diff --git a/test/functab4.ok b/test/functab4.ok
index 70a520b7..2b76cd88 100644
--- a/test/functab4.ok
+++ b/test/functab4.ok
@@ -1,3 +1,4 @@
-x = chdir
-gawk: functab4.awk:11: fatal: cannot (yet) call extension functions indirectly
-EXIT CODE: 2
+foo!
+ret1 = 0
+ret2 = 0
+no problems encountered
diff --git a/test/id.ok b/test/id.ok
index d31573de..4cb39b32 100644
--- a/test/id.ok
+++ b/test/id.ok
@@ -1,32 +1,73 @@
-FUNCTAB -> array
-ARGV -> array
-SYMTAB -> array
-ORS -> scalar
-ROUNDMODE -> scalar
-i -> untyped
OFS -> scalar
+rand -> builtin
+ARGC -> scalar
+dcgettext -> builtin
+gsub -> builtin
+PREC -> scalar
+match -> builtin
+ARGIND -> scalar
+int -> builtin
ERRNO -> scalar
+ARGV -> array
+log -> builtin
+sprintf -> builtin
+ROUNDMODE -> scalar
+strftime -> builtin
+systime -> builtin
+and -> builtin
+srand -> builtin
FNR -> scalar
+asort -> builtin
+atan2 -> builtin
+cos -> builtin
+TEXTDOMAIN -> scalar
+ORS -> scalar
+split -> builtin
+div -> builtin
+RSTART -> scalar
+compl -> builtin
+bindtextdomain -> builtin
+exp -> builtin
+or -> builtin
+fflush -> builtin
+gensub -> builtin
LINT -> scalar
+dcngettext -> builtin
+index -> builtin
IGNORECASE -> scalar
-NR -> scalar
-function1 -> user
-ARGIND -> scalar
-NF -> scalar
-TEXTDOMAIN -> scalar
+system -> builtin
CONVFMT -> scalar
+sqrt -> builtin
+rshift -> builtin
+tolower -> builtin
+FS -> scalar
+BINMODE -> scalar
+sin -> builtin
+asorti -> builtin
FIELDWIDTHS -> scalar
-ARGC -> scalar
+function1 -> user
+FILENAME -> scalar
+close -> builtin
+mktime -> builtin
+FUNCTAB -> array
+NF -> scalar
+isarray -> builtin
an_array -> untyped
-PROCINFO -> array
-PREC -> scalar
+patsplit -> builtin
+NR -> scalar
SUBSEP -> scalar
-FPAT -> scalar
-RS -> scalar
-FS -> scalar
+extension -> builtin
+i -> untyped
+sub -> builtin
OFMT -> scalar
RLENGTH -> scalar
+substr -> builtin
+FPAT -> scalar
+RS -> scalar
+xor -> builtin
RT -> scalar
-BINMODE -> scalar
-FILENAME -> scalar
-RSTART -> scalar
+PROCINFO -> array
+lshift -> builtin
+SYMTAB -> array
+strtonum -> builtin
+toupper -> builtin
diff --git a/test/indirectcall2.awk b/test/indirectcall2.awk
new file mode 100644
index 00000000..8f3c9483
--- /dev/null
+++ b/test/indirectcall2.awk
@@ -0,0 +1,11 @@
+BEGIN {
+ Quarter_pi = 3.1415927 / 4
+ print sin(Quarter_pi)
+
+ f = "sin"
+ print @f(Quarter_pi)
+
+ print substr("abcdefgh", 2, 3)
+ f = "substr"
+ print @f("abcdefgh", 2, 3)
+}
diff --git a/test/indirectcall2.ok b/test/indirectcall2.ok
new file mode 100644
index 00000000..05bee4b1
--- /dev/null
+++ b/test/indirectcall2.ok
@@ -0,0 +1,4 @@
+0.707107
+0.707107
+bcd
+bcd
diff --git a/test/ofs1.ok b/test/ofs1.ok
index a3a8ca7b..d01fa161 100644
--- a/test/ofs1.ok
+++ b/test/ofs1.ok
@@ -1,7 +1,7 @@
-a:x:c
+a x c
a x c
a x c
a
-a:x:c
a x c
-a:x:c
+a x c
+a x c
diff --git a/test/profile2.ok b/test/profile2.ok
index fe76a2c9..50c7e190 100644
--- a/test/profile2.ok
+++ b/test/profile2.ok
@@ -7,7 +7,7 @@
1 asplit("BEGIN:END:atan2:break:close:continue:cos:delete:" "do:else:exit:exp:for:getline:gsub:if:in:index:int:" "length:log:match:next:print:printf:rand:return:sin:" "split:sprintf:sqrt:srand:sub:substr:system:while", keywords, ":")
1 split("00:00:00:00:00:00:00:00:00:00:" "20:10:10:12:12:11:07:00:00:00:" "08:08:08:08:08:33:08:00:00:00:" "08:44:08:36:08:08:08:00:00:00:" "08:44:45:42:42:41:08", machine, ":")
1 state = 1
- 571 for (; ; ) {
+ 571 for (;;) {
571 symb = lex()
571 nextstate = substr(machine[state symb], 1, 1)
571 act = substr(machine[state symb], 2, 1)
@@ -109,7 +109,7 @@
571 function lex()
{
- 1702 for (; ; ) {
+ 1702 for (;;) {
1702 if (tok == "(eof)") {
return 7
}